38 files changed, 19089 insertions, 0 deletions
diff --git a/lib/libpmc/Makefile b/lib/libpmc/Makefile
new file mode 100644
index 0000000..85ddf0f
--- /dev/null
+++ b/lib/libpmc/Makefile
@@ -0,0 +1,72 @@
+# $FreeBSD$
+
+LIB=	pmc
+
+SRCS=	libpmc.c pmclog.c
+INCS=	pmc.h pmclog.h
+
+MAN=	pmc.3
+MAN+=	pmc_allocate.3
+MAN+=	pmc_attach.3
+MAN+=	pmc_capabilities.3
+MAN+=	pmc_configure_logfile.3
+MAN+=	pmc_disable.3
+MAN+=	pmc_event_names_of_class.3
+MAN+=	pmc_get_driver_stats.3
+MAN+=	pmc_get_msr.3
+MAN+=	pmc_init.3
+MAN+=	pmc_name_of_capability.3
+MAN+=	pmc_read.3
+MAN+=	pmc_set.3
+MAN+=	pmc_start.3
+MAN+=	pmclog.3
+
+# PMC-dependent manual pages
+.if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64"
+MAN+=	pmc.atom.3
+MAN+=	pmc.core.3
+MAN+=	pmc.core2.3
+MAN+=	pmc.iaf.3
+MAN+=	pmc.ucf.3
+MAN+=	pmc.k7.3
+MAN+=	pmc.k8.3
+MAN+=	pmc.p4.3
+MAN+=	pmc.p5.3
+MAN+=	pmc.p6.3
+MAN+=	pmc.corei7.3
+MAN+=	pmc.corei7uc.3
+MAN+=	pmc.westmere.3
+MAN+=	pmc.westmereuc.3
+MAN+=	pmc.tsc.3
+.elif ${MACHINE_CPUARCH} == "arm" && ${CPUTYPE} == "xscale"
+MAN+=	pmc.xscale.3
+.endif
+
+MLINKS+= \
+	pmc_allocate.3 pmc_release.3 \
+	pmc_attach.3 pmc_detach.3 \
+	pmc_capabilities.3 pmc_ncpu.3 \
+	pmc_capabilities.3 pmc_npmc.3 \
+	pmc_capabilities.3 pmc_pmcinfo.3 \
+	pmc_capabilities.3 pmc_cpuinfo.3 \
+	pmc_capabilities.3 pmc_width.3 \
+	pmc_configure_logfile.3 pmc_flush_logfile.3 \
+	pmc_configure_logfile.3 pmc_writelog.3 \
+	pmc_disable.3 pmc_enable.3 \
+	pmc_name_of_capability.3 pmc_name_of_class.3 \
+	pmc_name_of_capability.3 pmc_name_of_cputype.3 \
+	pmc_name_of_capability.3 pmc_name_of_disposition.3 \
+	pmc_name_of_capability.3 pmc_name_of_event.3 \
+	pmc_name_of_capability.3 pmc_name_of_mode.3 \
+	pmc_name_of_capability.3 pmc_name_of_state.3 \
+	pmc_read.3 pmc_rw.3 \
+	pmc_read.3 pmc_write.3 \
+	pmc_start.3 pmc_stop.3
+
+MLINKS+= \
+	pmclog.3 pmclog_open.3 \
+	pmclog.3 pmclog_close.3 \
+	pmclog.3 pmclog_feed.3 \
+	pmclog.3 pmclog_read.3
+
+.include <bsd.lib.mk>
diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c
new file mode 100644
index 0000000..1d86a82
--- /dev/null
+++ b/lib/libpmc/libpmc.c
@@ -0,0 +1,3127 @@
+/*-
+ * Copyright (c) 2003-2008 Joseph Koshy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/module.h>
+#include <sys/pmc.h>
+#include <sys/syscall.h>
+
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <pmc.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include "libpmcinternal.h"
+
+/* Function prototypes */
+#if defined(__i386__)
+static int k7_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
+    struct pmc_op_pmcallocate *_pmc_config);
+#endif
+#if defined(__amd64__) || defined(__i386__)
+static int iaf_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
+    struct pmc_op_pmcallocate *_pmc_config);
+static int iap_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
+    struct pmc_op_pmcallocate *_pmc_config);
+static int ucf_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
+    struct pmc_op_pmcallocate *_pmc_config);
+static int ucp_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
+    struct pmc_op_pmcallocate *_pmc_config);
+static int k8_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
+    struct pmc_op_pmcallocate *_pmc_config);
+static int p4_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
+    struct pmc_op_pmcallocate *_pmc_config);
+#endif
+#if defined(__i386__)
+static int p5_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
+    struct pmc_op_pmcallocate *_pmc_config);
+static int p6_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
+    struct pmc_op_pmcallocate *_pmc_config);
+#endif
+#if defined(__amd64__) || defined(__i386__)
+static int tsc_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
+    struct pmc_op_pmcallocate *_pmc_config);
+#endif
+#if defined(__XSCALE__)
+static int xscale_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
+    struct pmc_op_pmcallocate *_pmc_config);
+#endif
+
+#if defined(__mips__)
+static int mips24k_allocate_pmc(enum pmc_event _pe, char* ctrspec,
+			     struct pmc_op_pmcallocate *_pmc_config);
+#endif /* __mips__ */
+
+
+#define PMC_CALL(cmd, params)				\
+	syscall(pmc_syscall, PMC_OP_##cmd, (params))
+
+/*
+ * Event aliases provide a way for the user to ask for generic events
+ * like "cache-misses", or "instructions-retired".  These aliases are
+ * mapped to the appropriate canonical event descriptions using a
+ * lookup table.
+ */
+struct pmc_event_alias {
+	const char	*pm_alias;
+	const char	*pm_spec;
+};
+
+static const struct pmc_event_alias *pmc_mdep_event_aliases;
+
+/*
+ * The pmc_event_descr structure maps symbolic names known to the user
+ * to integer codes used by the PMC KLD.
+ */
+struct pmc_event_descr {
+	const char	*pm_ev_name;
+	enum pmc_event	pm_ev_code;
+};
+
+/*
+ * The pmc_class_descr structure maps class name prefixes for
+ * event names to event tables and other PMC class data.
+ */
+struct pmc_class_descr {
+	const char	*pm_evc_name;
+	size_t		pm_evc_name_size;
+	enum pmc_class	pm_evc_class;
+	const struct pmc_event_descr *pm_evc_event_table;
+	size_t		pm_evc_event_table_size;
+	int		(*pm_evc_allocate_pmc)(enum pmc_event _pe,
+			    char *_ctrspec, struct pmc_op_pmcallocate *_pa);
+};
+
+#define	PMC_TABLE_SIZE(N)	(sizeof(N)/sizeof(N[0]))
+#define	PMC_EVENT_TABLE_SIZE(N)	PMC_TABLE_SIZE(N##_event_table)
+
+#undef	__PMC_EV
+#define	__PMC_EV(C,N) { #N, PMC_EV_ ## C ## _ ## N },
+
+/*
+ * PMC_CLASSDEP_TABLE(NAME, CLASS)
+ *
+ * Define a table mapping event names and aliases to HWPMC event IDs.
+ */
+#define	PMC_CLASSDEP_TABLE(N, C)				\
+	static const struct pmc_event_descr N##_event_table[] =	\
+	{							\
+		__PMC_EV_##C()					\
+	}
+
+PMC_CLASSDEP_TABLE(iaf, IAF);
+PMC_CLASSDEP_TABLE(k7, K7);
+PMC_CLASSDEP_TABLE(k8, K8);
+PMC_CLASSDEP_TABLE(p4, P4);
+PMC_CLASSDEP_TABLE(p5, P5);
+PMC_CLASSDEP_TABLE(p6, P6);
+PMC_CLASSDEP_TABLE(xscale, XSCALE);
+PMC_CLASSDEP_TABLE(mips24k, MIPS24K);
+PMC_CLASSDEP_TABLE(ucf, UCF);
+
+#undef	__PMC_EV_ALIAS
+#define	__PMC_EV_ALIAS(N,CODE) 	{ N, PMC_EV_##CODE },
+
+static const struct pmc_event_descr atom_event_table[] =
+{
+	__PMC_EV_ALIAS_ATOM()
+};
+
+static const struct pmc_event_descr core_event_table[] =
+{
+	__PMC_EV_ALIAS_CORE()
+};
+
+
+static const struct pmc_event_descr core2_event_table[] =
+{
+	__PMC_EV_ALIAS_CORE2()
+};
+
+static const struct pmc_event_descr corei7_event_table[] =
+{
+	__PMC_EV_ALIAS_COREI7()
+};
+
+static const struct pmc_event_descr westmere_event_table[] =
+{
+	__PMC_EV_ALIAS_WESTMERE()
+};
+
+static const struct pmc_event_descr corei7uc_event_table[] =
+{
+	__PMC_EV_ALIAS_COREI7UC()
+};
+
+static const struct pmc_event_descr westmereuc_event_table[] =
+{
+	__PMC_EV_ALIAS_WESTMEREUC()
+};
+
+/*
+ * PMC_MDEP_TABLE(NAME, PRIMARYCLASS, ADDITIONAL_CLASSES...)
+ *
+ * Map a CPU to the PMC classes it supports.
+ */
+#define	PMC_MDEP_TABLE(N,C,...)				\
+	static const enum pmc_class N##_pmc_classes[] = {	\
+		PMC_CLASS_##C, __VA_ARGS__			\
+	}
+
+PMC_MDEP_TABLE(atom, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC);
+PMC_MDEP_TABLE(core, IAP, PMC_CLASS_TSC);
+PMC_MDEP_TABLE(core2, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC);
+PMC_MDEP_TABLE(corei7, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
+PMC_MDEP_TABLE(westmere, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
+PMC_MDEP_TABLE(k7, K7, PMC_CLASS_TSC);
+PMC_MDEP_TABLE(k8, K8, PMC_CLASS_TSC);
+PMC_MDEP_TABLE(p4, P4, PMC_CLASS_TSC);
+PMC_MDEP_TABLE(p5, P5, PMC_CLASS_TSC);
+PMC_MDEP_TABLE(p6, P6, PMC_CLASS_TSC);
+PMC_MDEP_TABLE(xscale, XSCALE, PMC_CLASS_XSCALE);
+PMC_MDEP_TABLE(mips24k, MIPS24K, PMC_CLASS_MIPS24K);
+
+static const struct pmc_event_descr tsc_event_table[] =
+{
+	__PMC_EV_TSC()
+};
+
+#undef	PMC_CLASS_TABLE_DESC
+#define	PMC_CLASS_TABLE_DESC(NAME, CLASS, EVENTS, ALLOCATOR)	\
+static const struct pmc_class_descr NAME##_class_table_descr =	\
+	{							\
+		.pm_evc_name  = #CLASS "-",			\
+		.pm_evc_name_size = sizeof(#CLASS "-") - 1,	\
+		.pm_evc_class = PMC_CLASS_##CLASS ,		\
+		.pm_evc_event_table = EVENTS##_event_table ,	\
+		.pm_evc_event_table_size = 			\
+			PMC_EVENT_TABLE_SIZE(EVENTS),		\
+		.pm_evc_allocate_pmc = ALLOCATOR##_allocate_pmc	\
+	}
+
+#if	defined(__i386__) || defined(__amd64__)
+PMC_CLASS_TABLE_DESC(iaf, IAF, iaf, iaf);
+PMC_CLASS_TABLE_DESC(atom, IAP, atom, iap);
+PMC_CLASS_TABLE_DESC(core, IAP, core, iap);
+PMC_CLASS_TABLE_DESC(core2, IAP, core2, iap);
+PMC_CLASS_TABLE_DESC(corei7, IAP, corei7, iap);
+PMC_CLASS_TABLE_DESC(westmere, IAP, westmere, iap);
+PMC_CLASS_TABLE_DESC(ucf, UCF, ucf, ucf);
+PMC_CLASS_TABLE_DESC(corei7uc, UCP, corei7uc, ucp);
+PMC_CLASS_TABLE_DESC(westmereuc, UCP, westmereuc, ucp);
+#endif
+#if	defined(__i386__)
+PMC_CLASS_TABLE_DESC(k7, K7, k7, k7);
+#endif
+#if	defined(__i386__) || defined(__amd64__)
+PMC_CLASS_TABLE_DESC(k8, K8, k8, k8);
+PMC_CLASS_TABLE_DESC(p4, P4, p4, p4);
+#endif
+#if	defined(__i386__)
+PMC_CLASS_TABLE_DESC(p5, P5, p5, p5);
+PMC_CLASS_TABLE_DESC(p6, P6, p6, p6);
+#endif
+#if	defined(__i386__) || defined(__amd64__)
+PMC_CLASS_TABLE_DESC(tsc, TSC, tsc, tsc);
+#endif
+#if	defined(__XSCALE__)
+PMC_CLASS_TABLE_DESC(xscale, XSCALE, xscale, xscale);
+#endif
+
+#if defined(__mips__)
+PMC_CLASS_TABLE_DESC(mips24k, MIPS24K, mips24k, mips24k);
+#endif /* __mips__ */
+
+#undef	PMC_CLASS_TABLE_DESC
+
+static const struct pmc_class_descr **pmc_class_table;
+#define	PMC_CLASS_TABLE_SIZE	cpu_info.pm_nclass
+
+static const enum pmc_class *pmc_mdep_class_list;
+static size_t pmc_mdep_class_list_size;
+
+/*
+ * Mapping tables, mapping enumeration values to human readable
+ * strings.
+ */
+
+static const char * pmc_capability_names[] = {
+#undef	__PMC_CAP
+#define	__PMC_CAP(N,V,D)	#N ,
+	__PMC_CAPS()
+};
+
+static const char * pmc_class_names[] = {
+#undef	__PMC_CLASS
+#define __PMC_CLASS(C)	#C ,
+	__PMC_CLASSES()
+};
+
+struct pmc_cputype_map {
+	enum pmc_class	pm_cputype;
+	const char	*pm_name;
+};
+
+static const struct pmc_cputype_map pmc_cputype_names[] = {
+#undef	__PMC_CPU
+#define	__PMC_CPU(S, V, D) { .pm_cputype = PMC_CPU_##S, .pm_name = #S } ,
+	__PMC_CPUS()
+};
+
+static const char * pmc_disposition_names[] = {
+#undef	__PMC_DISP
+#define	__PMC_DISP(D)	#D ,
+	__PMC_DISPOSITIONS()
+};
+
+static const char * pmc_mode_names[] = {
+#undef  __PMC_MODE
+#define __PMC_MODE(M,N)	#M ,
+	__PMC_MODES()
+};
+
+static const char * pmc_state_names[] = {
+#undef  __PMC_STATE
+#define __PMC_STATE(S) #S ,
+	__PMC_STATES()
+};
+
+static int pmc_syscall = -1;		/* filled in by pmc_init() */
+
+static struct pmc_cpuinfo cpu_info;	/* filled in by pmc_init() */
+
+/* Event masks for events */
+struct pmc_masks {
+	const char	*pm_name;
+	const uint32_t	pm_value;
+};
+#define	PMCMASK(N,V)	{ .pm_name = #N, .pm_value = (V) }
+#define	NULLMASK	{ .pm_name = NULL }
+
+#if defined(__amd64__) || defined(__i386__)
+static int
+pmc_parse_mask(const struct pmc_masks *pmask, char *p, uint32_t *evmask)
+{
+	const struct pmc_masks *pm;
+	char *q, *r;
+	int c;
+
+	if (pmask == NULL)	/* no mask keywords */
+		return (-1);
+	q = strchr(p, '=');	/* skip '=' */
+	if (*++q == '\0')	/* no more data */
+		return (-1);
+	c = 0;			/* count of mask keywords seen */
+	while ((r = strsep(&q, "+")) != NULL) {
+		for (pm = pmask; pm->pm_name && strcasecmp(r, pm->pm_name);
+		    pm++)
+			;
+		if (pm->pm_name == NULL) /* not found */
+			return (-1);
+		*evmask |= pm->pm_value;
+		c++;
+	}
+	return (c);
+}
+#endif
+
+#define	KWMATCH(p,kw)		(strcasecmp((p), (kw)) == 0)
+#define	KWPREFIXMATCH(p,kw)	(strncasecmp((p), (kw), sizeof((kw)) - 1) == 0)
+#define	EV_ALIAS(N,S)		{ .pm_alias = N, .pm_spec = S }
+
+#if defined(__i386__)
+
+/*
+ * AMD K7 (Athlon) CPUs.
+ */
+
+static struct pmc_event_alias k7_aliases[] = {
+	EV_ALIAS("branches",		"k7-retired-branches"),
+	EV_ALIAS("branch-mispredicts",	"k7-retired-branches-mispredicted"),
+	EV_ALIAS("cycles",		"tsc"),
+	EV_ALIAS("dc-misses",		"k7-dc-misses"),
+	EV_ALIAS("ic-misses",		"k7-ic-misses"),
+	EV_ALIAS("instructions",	"k7-retired-instructions"),
+	EV_ALIAS("interrupts",		"k7-hardware-interrupts"),
+	EV_ALIAS(NULL, NULL)
+};
+
+#define	K7_KW_COUNT	"count"
+#define	K7_KW_EDGE	"edge"
+#define	K7_KW_INV	"inv"
+#define	K7_KW_OS	"os"
+#define	K7_KW_UNITMASK	"unitmask"
+#define	K7_KW_USR	"usr"
+
+static int
+k7_allocate_pmc(enum pmc_event pe, char *ctrspec,
+    struct pmc_op_pmcallocate *pmc_config)
+{
+	char		*e, *p, *q;
+	int		c, has_unitmask;
+	uint32_t	count, unitmask;
+
+	pmc_config->pm_md.pm_amd.pm_amd_config = 0;
+	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE);
+
+	if (pe == PMC_EV_K7_DC_REFILLS_FROM_L2 ||
+	    pe == PMC_EV_K7_DC_REFILLS_FROM_SYSTEM ||
+	    pe == PMC_EV_K7_DC_WRITEBACKS) {
+		has_unitmask = 1;
+		unitmask = AMD_PMC_UNITMASK_MOESI;
+	} else
+		unitmask = has_unitmask = 0;
+
+	while ((p = strsep(&ctrspec, ",")) != NULL) {
+		if (KWPREFIXMATCH(p, K7_KW_COUNT "=")) {
+			q = strchr(p, '=');
+			if (*++q == '\0') /* skip '=' */
+				return (-1);
+
+			count = strtol(q, &e, 0);
+			if (e == q || *e != '\0')
+				return (-1);
+
+			pmc_config->pm_caps |= PMC_CAP_THRESHOLD;
+			pmc_config->pm_md.pm_amd.pm_amd_config |=
+			    AMD_PMC_TO_COUNTER(count);
+
+		} else if (KWMATCH(p, K7_KW_EDGE)) {
+			pmc_config->pm_caps |= PMC_CAP_EDGE;
+		} else if (KWMATCH(p, K7_KW_INV)) {
+			pmc_config->pm_caps |= PMC_CAP_INVERT;
+		} else if (KWMATCH(p, K7_KW_OS)) {
+			pmc_config->pm_caps |= PMC_CAP_SYSTEM;
+		} else if (KWPREFIXMATCH(p, K7_KW_UNITMASK "=")) {
+			if (has_unitmask == 0)
+				return (-1);
+			unitmask = 0;
+			q = strchr(p, '=');
+			if (*++q == '\0') /* skip '=' */
+				return (-1);
+
+			while ((c = tolower(*q++)) != 0)
+				if (c == 'm')
+					unitmask |= AMD_PMC_UNITMASK_M;
+				else if (c == 'o')
+					unitmask |= AMD_PMC_UNITMASK_O;
+				else if (c == 'e')
+					unitmask |= AMD_PMC_UNITMASK_E;
+				else if (c == 's')
+					unitmask |= AMD_PMC_UNITMASK_S;
+				else if (c == 'i')
+					unitmask |= AMD_PMC_UNITMASK_I;
+				else if (c == '+')
+					continue;
+				else
+					return (-1);
+
+			if (unitmask == 0)
+				return (-1);
+
+		} else if (KWMATCH(p, K7_KW_USR)) {
+			pmc_config->pm_caps |= PMC_CAP_USER;
+		} else
+			return (-1);
+	}
+
+	if (has_unitmask) {
+		pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
+		pmc_config->pm_md.pm_amd.pm_amd_config |=
+		    AMD_PMC_TO_UNITMASK(unitmask);
+	}
+
+	return (0);
+
+}
+
+#endif
+
+#if defined(__amd64__) || defined(__i386__)
+
+/*
+ * Intel Core (Family 6, Model E) PMCs.
+ */
+
+static struct pmc_event_alias core_aliases[] = {
+	EV_ALIAS("branches",		"iap-br-instr-ret"),
+	EV_ALIAS("branch-mispredicts",	"iap-br-mispred-ret"),
+	EV_ALIAS("cycles",		"tsc-tsc"),
+	EV_ALIAS("ic-misses",		"iap-icache-misses"),
+	EV_ALIAS("instructions",	"iap-instr-ret"),
+	EV_ALIAS("interrupts",		"iap-core-hw-int-rx"),
+	EV_ALIAS("unhalted-cycles",	"iap-unhalted-core-cycles"),
+	EV_ALIAS(NULL, NULL)
+};
+
+/*
+ * Intel Core2 (Family 6, Model F), Core2Extreme (Family 6, Model 17H)
+ * and Atom (Family 6, model 1CH) PMCs.
+ *
+ * We map aliases to events on the fixed-function counters if these
+ * are present.  Note that not all CPUs in this family contain fixed-function
+ * counters.
+ */
+
+static struct pmc_event_alias core2_aliases[] = {
+	EV_ALIAS("branches",		"iap-br-inst-retired.any"),
+	EV_ALIAS("branch-mispredicts",	"iap-br-inst-retired.mispred"),
+	EV_ALIAS("cycles",		"tsc-tsc"),
+	EV_ALIAS("ic-misses",		"iap-l1i-misses"),
+	EV_ALIAS("instructions",	"iaf-instr-retired.any"),
+	EV_ALIAS("interrupts",		"iap-hw-int-rcv"),
+	EV_ALIAS("unhalted-cycles",	"iaf-cpu-clk-unhalted.core"),
+	EV_ALIAS(NULL, NULL)
+};
+
+static struct pmc_event_alias core2_aliases_without_iaf[] = {
+	EV_ALIAS("branches",		"iap-br-inst-retired.any"),
+	EV_ALIAS("branch-mispredicts",	"iap-br-inst-retired.mispred"),
+	EV_ALIAS("cycles",		"tsc-tsc"),
+	EV_ALIAS("ic-misses",		"iap-l1i-misses"),
+	EV_ALIAS("instructions",	"iap-inst-retired.any_p"),
+	EV_ALIAS("interrupts",		"iap-hw-int-rcv"),
+	EV_ALIAS("unhalted-cycles",	"iap-cpu-clk-unhalted.core_p"),
+	EV_ALIAS(NULL, NULL)
+};
+
+#define	atom_aliases			core2_aliases
+#define	atom_aliases_without_iaf	core2_aliases_without_iaf
+#define corei7_aliases			core2_aliases
+#define corei7_aliases_without_iaf	core2_aliases_without_iaf
+#define westmere_aliases		core2_aliases
+#define westmere_aliases_without_iaf	core2_aliases_without_iaf
+
+#define	IAF_KW_OS		"os"
+#define	IAF_KW_USR		"usr"
+#define	IAF_KW_ANYTHREAD	"anythread"
+
+/*
+ * Parse an event specifier for Intel fixed function counters.
+ */
+static int
+iaf_allocate_pmc(enum pmc_event pe, char *ctrspec,
+    struct pmc_op_pmcallocate *pmc_config)
+{
+	char *p;
+
+	(void) pe;
+
+	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE);
+	pmc_config->pm_md.pm_iaf.pm_iaf_flags = 0;
+
+	while ((p = strsep(&ctrspec, ",")) != NULL) {
+		if (KWMATCH(p, IAF_KW_OS))
+			pmc_config->pm_caps |= PMC_CAP_SYSTEM;
+		else if (KWMATCH(p, IAF_KW_USR))
+			pmc_config->pm_caps |= PMC_CAP_USER;
+		else if (KWMATCH(p, IAF_KW_ANYTHREAD))
+			pmc_config->pm_md.pm_iaf.pm_iaf_flags |= IAF_ANY;
+		else
+			return (-1);
+	}
+
+	return (0);
+}
+
+/*
+ * Core/Core2 support.
+ */
+
+#define	IAP_KW_AGENT		"agent"
+#define	IAP_KW_ANYTHREAD	"anythread"
+#define	IAP_KW_CACHESTATE	"cachestate"
+#define	IAP_KW_CMASK		"cmask"
+#define	IAP_KW_CORE		"core"
+#define	IAP_KW_EDGE		"edge"
+#define	IAP_KW_INV		"inv"
+#define	IAP_KW_OS		"os"
+#define	IAP_KW_PREFETCH		"prefetch"
+#define	IAP_KW_SNOOPRESPONSE	"snoopresponse"
+#define	IAP_KW_SNOOPTYPE	"snooptype"
+#define	IAP_KW_TRANSITION	"trans"
+#define	IAP_KW_USR		"usr"
+#define	IAP_KW_RSP		"rsp"
+
+static struct pmc_masks iap_core_mask[] = {
+	PMCMASK(all,	(0x3 << 14)),
+	PMCMASK(this,	(0x1 << 14)),
+	NULLMASK
+};
+
+static struct pmc_masks iap_agent_mask[] = {
+	PMCMASK(this,	0),
+	PMCMASK(any,	(0x1 << 13)),
+	NULLMASK
+};
+
+static struct pmc_masks iap_prefetch_mask[] = {
+	PMCMASK(both,		(0x3 << 12)),
+	PMCMASK(only,		(0x1 << 12)),
+	PMCMASK(exclude,	0),
+	NULLMASK
+};
+
+static struct pmc_masks iap_cachestate_mask[] = {
+	PMCMASK(i,		(1 <<  8)),
+	PMCMASK(s,		(1 <<  9)),
+	PMCMASK(e,		(1 << 10)),
+	PMCMASK(m,		(1 << 11)),
+	NULLMASK
+};
+
+static struct pmc_masks iap_snoopresponse_mask[] = {
+	PMCMASK(clean,		(1 << 8)),
+	PMCMASK(hit,		(1 << 9)),
+	PMCMASK(hitm,		(1 << 11)),
+	NULLMASK
+};
+
+static struct pmc_masks iap_snooptype_mask[] = {
+	PMCMASK(cmp2s,		(1 << 8)),
+	PMCMASK(cmp2i,		(1 << 9)),
+	NULLMASK
+};
+
+static struct pmc_masks iap_transition_mask[] = {
+	PMCMASK(any,		0x00),
+	PMCMASK(frequency,	0x10),
+	NULLMASK
+};
+
+static struct pmc_masks iap_rsp_mask[] = {
+	PMCMASK(DMND_DATA_RD,		(1 <<  0)),
+	PMCMASK(DMND_RFO,		(1 <<  1)),
+	PMCMASK(DMND_IFETCH,		(1 <<  2)),
+	PMCMASK(WB,			(1 <<  3)),
+	PMCMASK(PF_DATA_RD,		(1 <<  4)),
+	PMCMASK(PF_RFO,			(1 <<  5)),
+	PMCMASK(PF_IFETCH,		(1 <<  6)),
+	PMCMASK(OTHER,			(1 <<  7)),
+	PMCMASK(UNCORE_HIT,		(1 <<  8)),
+	PMCMASK(OTHER_CORE_HIT_SNP,	(1 <<  9)),
+	PMCMASK(OTHER_CORE_HITM,	(1 << 10)),
+	PMCMASK(REMOTE_CACHE_FWD,	(1 << 12)),
+	PMCMASK(REMOTE_DRAM,		(1 << 13)),
+	PMCMASK(LOCAL_DRAM,		(1 << 14)),
+	PMCMASK(NON_DRAM,		(1 << 15)),
+	NULLMASK
+};
+
+static int
+iap_allocate_pmc(enum pmc_event pe, char *ctrspec,
+    struct pmc_op_pmcallocate *pmc_config)
+{
+	char *e, *p, *q;
+	uint32_t cachestate, evmask, rsp;
+	int count, n;
+
+	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE |
+	    PMC_CAP_QUALIFIER);
+	pmc_config->pm_md.pm_iap.pm_iap_config = 0;
+
+	cachestate = evmask = rsp = 0;
+
+	/* Parse additional modifiers if present */
+	while ((p = strsep(&ctrspec, ",")) != NULL) {
+
+		n = 0;
+		if (KWPREFIXMATCH(p, IAP_KW_CMASK "=")) {
+			q = strchr(p, '=');
+			if (*++q == '\0') /* skip '=' */
+				return (-1);
+			count = strtol(q, &e, 0);
+			if (e == q || *e != '\0')
+				return (-1);
+			pmc_config->pm_caps |= PMC_CAP_THRESHOLD;
+			pmc_config->pm_md.pm_iap.pm_iap_config |=
+			    IAP_CMASK(count);
+		} else if (KWMATCH(p, IAP_KW_EDGE)) {
+			pmc_config->pm_caps |= PMC_CAP_EDGE;
+		} else if (KWMATCH(p, IAP_KW_INV)) {
+			pmc_config->pm_caps |= PMC_CAP_INVERT;
+		} else if (KWMATCH(p, IAP_KW_OS)) {
+			pmc_config->pm_caps |= PMC_CAP_SYSTEM;
+		} else if (KWMATCH(p, IAP_KW_USR)) {
+			pmc_config->pm_caps |= PMC_CAP_USER;
+		} else if (KWMATCH(p, IAP_KW_ANYTHREAD)) {
+			pmc_config->pm_md.pm_iap.pm_iap_config |= IAP_ANY;
+		} else if (KWPREFIXMATCH(p, IAP_KW_CORE "=")) {
+			n = pmc_parse_mask(iap_core_mask, p, &evmask);
+			if (n != 1)
+				return (-1);
+		} else if (KWPREFIXMATCH(p, IAP_KW_AGENT "=")) {
+			n = pmc_parse_mask(iap_agent_mask, p, &evmask);
+			if (n != 1)
+				return (-1);
+		} else if (KWPREFIXMATCH(p, IAP_KW_PREFETCH "=")) {
+			n = pmc_parse_mask(iap_prefetch_mask, p, &evmask);
+			if (n != 1)
+				return (-1);
+		} else if (KWPREFIXMATCH(p, IAP_KW_CACHESTATE "=")) {
+			n = pmc_parse_mask(iap_cachestate_mask, p, &cachestate);
+		} else if (cpu_info.pm_cputype == PMC_CPU_INTEL_CORE &&
+		    KWPREFIXMATCH(p, IAP_KW_TRANSITION "=")) {
+			n = pmc_parse_mask(iap_transition_mask, p, &evmask);
+			if (n != 1)
+				return (-1);
+		} else if (cpu_info.pm_cputype == PMC_CPU_INTEL_ATOM ||
+		    cpu_info.pm_cputype == PMC_CPU_INTEL_CORE2 ||
+		    cpu_info.pm_cputype == PMC_CPU_INTEL_CORE2EXTREME) {
+			if (KWPREFIXMATCH(p, IAP_KW_SNOOPRESPONSE "=")) {
+				n = pmc_parse_mask(iap_snoopresponse_mask, p,
+				    &evmask);
+			} else if (KWPREFIXMATCH(p, IAP_KW_SNOOPTYPE "=")) {
+				n = pmc_parse_mask(iap_snooptype_mask, p,
+				    &evmask);
+			} else
+				return (-1);
+		} else if (cpu_info.pm_cputype == PMC_CPU_INTEL_COREI7 ||
+		    cpu_info.pm_cputype == PMC_CPU_INTEL_WESTMERE) {
+			if (KWPREFIXMATCH(p, IAP_KW_RSP "=")) {
+				n = pmc_parse_mask(iap_rsp_mask, p, &rsp);
+			} else
+				return (-1);
+		} else
+			return (-1);
+
+		if (n < 0)	/* Parsing failed. */
+			return (-1);
+	}
+
+	pmc_config->pm_md.pm_iap.pm_iap_config |= evmask;
+
+	/*
+	 * If the event requires a 'cachestate' qualifier but was not
+	 * specified by the user, use a sensible default.
+	 */
+	switch (pe) {
+	case PMC_EV_IAP_EVENT_28H: /* Core, Core2, Atom */
+	case PMC_EV_IAP_EVENT_29H: /* Core, Core2, Atom */
+	case PMC_EV_IAP_EVENT_2AH: /* Core, Core2, Atom */
+	case PMC_EV_IAP_EVENT_2BH: /* Atom, Core2 */
+	case PMC_EV_IAP_EVENT_2EH: /* Core, Core2, Atom */
+	case PMC_EV_IAP_EVENT_30H: /* Core, Core2, Atom */
+	case PMC_EV_IAP_EVENT_32H: /* Core */
+	case PMC_EV_IAP_EVENT_40H: /* Core */
+	case PMC_EV_IAP_EVENT_41H: /* Core */
+	case PMC_EV_IAP_EVENT_42H: /* Core, Core2, Atom */
+		if (cachestate == 0)
+			cachestate = (0xF << 8);
+		break;
+	case PMC_EV_IAP_EVENT_77H: /* Atom */
+		/* IAP_EVENT_77H only accepts a cachestate qualifier on the
+		 * Atom processor
+		 */
+		if(cpu_info.pm_cputype == PMC_CPU_INTEL_ATOM && cachestate == 0)
+			cachestate = (0xF << 8);
+	    break;
+	default:
+		break;
+	}
+
+	pmc_config->pm_md.pm_iap.pm_iap_config |= cachestate;
+	pmc_config->pm_md.pm_iap.pm_iap_rsp = rsp;
+
+	return (0);
+}
+
+/*
+ * Intel Uncore.
+ */
+
+static int
+ucf_allocate_pmc(enum pmc_event pe, char *ctrspec,
+    struct pmc_op_pmcallocate *pmc_config)
+{
+	(void) pe;
+	(void) ctrspec;
+
+	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE);
+	pmc_config->pm_md.pm_ucf.pm_ucf_flags = 0;
+
+	return (0);
+}
+
+#define	UCP_KW_CMASK		"cmask"
+#define	UCP_KW_EDGE		"edge"
+#define	UCP_KW_INV		"inv"
+
+static int
+ucp_allocate_pmc(enum pmc_event pe, char *ctrspec,
+    struct pmc_op_pmcallocate *pmc_config)
+{
+	char *e, *p, *q;
+	int count, n;
+
+	(void) pe;
+
+	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE |
+	    PMC_CAP_QUALIFIER);
+	pmc_config->pm_md.pm_ucp.pm_ucp_config = 0;
+
+	/* Parse additional modifiers if present */
+	while ((p = strsep(&ctrspec, ",")) != NULL) {
+
+		n = 0;
+		if (KWPREFIXMATCH(p, UCP_KW_CMASK "=")) {
+			q = strchr(p, '=');
+			if (*++q == '\0') /* skip '=' */
+				return (-1);
+			count = strtol(q, &e, 0);
+			if (e == q || *e != '\0')
+				return (-1);
+			pmc_config->pm_caps |= PMC_CAP_THRESHOLD;
+			pmc_config->pm_md.pm_ucp.pm_ucp_config |=
+			    UCP_CMASK(count);
+		} else if (KWMATCH(p, UCP_KW_EDGE)) {
+			pmc_config->pm_caps |= PMC_CAP_EDGE;
+		} else if (KWMATCH(p, UCP_KW_INV)) {
+			pmc_config->pm_caps |= PMC_CAP_INVERT;
+		} else
+			return (-1);
+
+		if (n < 0)	/* Parsing failed. */
+			return (-1);
+	}
+
+	return (0);
+}
+
+/*
+ * AMD K8 PMCs.
+ *
+ * These are very similar to AMD K7 PMCs, but support more kinds of
+ * events.
+ */
+
+static struct pmc_event_alias k8_aliases[] = {
+	EV_ALIAS("branches",		"k8-fr-retired-taken-branches"),
+	EV_ALIAS("branch-mispredicts",
+	    "k8-fr-retired-taken-branches-mispredicted"),
+	EV_ALIAS("cycles",		"tsc"),
+	EV_ALIAS("dc-misses",		"k8-dc-miss"),
+	EV_ALIAS("ic-misses",		"k8-ic-miss"),
+	EV_ALIAS("instructions",	"k8-fr-retired-x86-instructions"),
+	EV_ALIAS("interrupts",		"k8-fr-taken-hardware-interrupts"),
+	EV_ALIAS("unhalted-cycles",	"k8-bu-cpu-clk-unhalted"),
+	EV_ALIAS(NULL, NULL)
+};
+
+#define	__K8MASK(N,V) PMCMASK(N,(1 << (V)))
+
+/*
+ * Parsing tables
+ */
+
+/* fp dispatched fpu ops */
+static const struct pmc_masks k8_mask_fdfo[] = {
+	__K8MASK(add-pipe-excluding-junk-ops,	0),
+	__K8MASK(multiply-pipe-excluding-junk-ops,	1),
+	__K8MASK(store-pipe-excluding-junk-ops,	2),
+	__K8MASK(add-pipe-junk-ops,		3),
+	__K8MASK(multiply-pipe-junk-ops,	4),
+	__K8MASK(store-pipe-junk-ops,		5),
+	NULLMASK
+};
+
+/* ls segment register loads */
+static const struct pmc_masks k8_mask_lsrl[] = {
+	__K8MASK(es,	0),
+	__K8MASK(cs,	1),
+	__K8MASK(ss,	2),
+	__K8MASK(ds,	3),
+	__K8MASK(fs,	4),
+	__K8MASK(gs,	5),
+	__K8MASK(hs,	6),
+	NULLMASK
+};
+
+/* ls locked operation */
+static const struct pmc_masks k8_mask_llo[] = {
+	__K8MASK(locked-instructions,	0),
+	__K8MASK(cycles-in-request,	1),
+	__K8MASK(cycles-to-complete,	2),
+	NULLMASK
+};
+
+/* dc refill from {l2,system} and dc copyback */
+static const struct pmc_masks k8_mask_dc[] = {
+	__K8MASK(invalid,	0),
+	__K8MASK(shared,	1),
+	__K8MASK(exclusive,	2),
+	__K8MASK(owner,		3),
+	__K8MASK(modified,	4),
+	NULLMASK
+};
+
+/* dc one bit ecc error */
+static const struct pmc_masks k8_mask_dobee[] = {
+	__K8MASK(scrubber,	0),
+	__K8MASK(piggyback,	1),
+	NULLMASK
+};
+
+/* dc dispatched prefetch instructions */
+static const struct pmc_masks k8_mask_ddpi[] = {
+	__K8MASK(load,	0),
+	__K8MASK(store,	1),
+	__K8MASK(nta,	2),
+	NULLMASK
+};
+
+/* dc dcache accesses by locks */
+static const struct pmc_masks k8_mask_dabl[] = {
+	__K8MASK(accesses,	0),
+	__K8MASK(misses,	1),
+	NULLMASK
+};
+
+/* bu internal l2 request */
+static const struct pmc_masks k8_mask_bilr[] = {
+	__K8MASK(ic-fill,	0),
+	__K8MASK(dc-fill,	1),
+	__K8MASK(tlb-reload,	2),
+	__K8MASK(tag-snoop,	3),
+	__K8MASK(cancelled,	4),
+	NULLMASK
+};
+
+/* bu fill request l2 miss */
+static const struct pmc_masks k8_mask_bfrlm[] = {
+	__K8MASK(ic-fill,	0),
+	__K8MASK(dc-fill,	1),
+	__K8MASK(tlb-reload,	2),
+	NULLMASK
+};
+
+/* bu fill into l2 */
+static const struct pmc_masks k8_mask_bfil[] = {
+	__K8MASK(dirty-l2-victim,	0),
+	__K8MASK(victim-from-l2,	1),
+	NULLMASK
+};
+
+/* fr retired fpu instructions */
+static const struct pmc_masks k8_mask_frfi[] = {
+	__K8MASK(x87,			0),
+	__K8MASK(mmx-3dnow,		1),
+	__K8MASK(packed-sse-sse2,	2),
+	__K8MASK(scalar-sse-sse2,	3),
+	NULLMASK
+};
+
+/* fr retired fastpath double op instructions */
+static const struct pmc_masks k8_mask_frfdoi[] = {
+	__K8MASK(low-op-pos-0,		0),
+	__K8MASK(low-op-pos-1,		1),
+	__K8MASK(low-op-pos-2,		2),
+	NULLMASK
+};
+
+/* fr fpu exceptions */
+static const struct pmc_masks k8_mask_ffe[] = {
+	__K8MASK(x87-reclass-microfaults,	0),
+	__K8MASK(sse-retype-microfaults,	1),
+	__K8MASK(sse-reclass-microfaults,	2),
+	__K8MASK(sse-and-x87-microtraps,	3),
+	NULLMASK
+};
+
+/* nb memory controller page access event */
+static const struct pmc_masks k8_mask_nmcpae[] = {
+	__K8MASK(page-hit,	0),
+	__K8MASK(page-miss,	1),
+	__K8MASK(page-conflict,	2),
+	NULLMASK
+};
+
+/* nb memory controller turnaround */
+static const struct pmc_masks k8_mask_nmct[] = {
+	__K8MASK(dimm-turnaround,		0),
+	__K8MASK(read-to-write-turnaround,	1),
+	__K8MASK(write-to-read-turnaround,	2),
+	NULLMASK
+};
+
+/* nb memory controller bypass saturation */
+static const struct pmc_masks k8_mask_nmcbs[] = {
+	__K8MASK(memory-controller-hi-pri-bypass,	0),
+	__K8MASK(memory-controller-lo-pri-bypass,	1),
+	__K8MASK(dram-controller-interface-bypass,	2),
+	__K8MASK(dram-controller-queue-bypass,		3),
+	NULLMASK
+};
+
+/* nb sized commands */
+static const struct pmc_masks k8_mask_nsc[] = {
+	__K8MASK(nonpostwrszbyte,	0),
+	__K8MASK(nonpostwrszdword,	1),
+	__K8MASK(postwrszbyte,		2),
+	__K8MASK(postwrszdword,		3),
+	__K8MASK(rdszbyte,		4),
+	__K8MASK(rdszdword,		5),
+	__K8MASK(rdmodwr,		6),
+	NULLMASK
+};
+
+/* nb probe result */
+static const struct pmc_masks k8_mask_npr[] = {
+	__K8MASK(probe-miss,		0),
+	__K8MASK(probe-hit,		1),
+	__K8MASK(probe-hit-dirty-no-memory-cancel, 2),
+	__K8MASK(probe-hit-dirty-with-memory-cancel, 3),
+	NULLMASK
+};
+
+/* nb hypertransport bus bandwidth */
+static const struct pmc_masks k8_mask_nhbb[] = { /* HT bus bandwidth */
+	__K8MASK(command,	0),
+	__K8MASK(data,	1),
+	__K8MASK(buffer-release, 2),
+	__K8MASK(nop,	3),
+	NULLMASK
+};
+
+#undef	__K8MASK
+
+#define	K8_KW_COUNT	"count"
+#define	K8_KW_EDGE	"edge"
+#define	K8_KW_INV	"inv"
+#define	K8_KW_MASK	"mask"
+#define	K8_KW_OS	"os"
+#define	K8_KW_USR	"usr"
+
+static int
+k8_allocate_pmc(enum pmc_event pe, char *ctrspec,
+    struct pmc_op_pmcallocate *pmc_config)
+{
+	char		*e, *p, *q;
+	int		n;
+	uint32_t	count, evmask;
+	const struct pmc_masks	*pm, *pmask;
+
+	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE);
+	pmc_config->pm_md.pm_amd.pm_amd_config = 0;
+
+	pmask = NULL;
+	evmask = 0;
+
+#define	__K8SETMASK(M) pmask = k8_mask_##M
+
+	/* setup parsing tables */
+	switch (pe) {
+	case PMC_EV_K8_FP_DISPATCHED_FPU_OPS:
+		__K8SETMASK(fdfo);
+		break;
+	case PMC_EV_K8_LS_SEGMENT_REGISTER_LOAD:
+		__K8SETMASK(lsrl);
+		break;
+	case PMC_EV_K8_LS_LOCKED_OPERATION:
+		__K8SETMASK(llo);
+		break;
+	case PMC_EV_K8_DC_REFILL_FROM_L2:
+	case PMC_EV_K8_DC_REFILL_FROM_SYSTEM:
+	case PMC_EV_K8_DC_COPYBACK:
+		__K8SETMASK(dc);
+		break;
+	case PMC_EV_K8_DC_ONE_BIT_ECC_ERROR:
+		__K8SETMASK(dobee);
+		break;
+	case PMC_EV_K8_DC_DISPATCHED_PREFETCH_INSTRUCTIONS:
+		__K8SETMASK(ddpi);
+		break;
+	case PMC_EV_K8_DC_DCACHE_ACCESSES_BY_LOCKS:
+		__K8SETMASK(dabl);
+		break;
+	case PMC_EV_K8_BU_INTERNAL_L2_REQUEST:
+		__K8SETMASK(bilr);
+		break;
+	case PMC_EV_K8_BU_FILL_REQUEST_L2_MISS:
+		__K8SETMASK(bfrlm);
+		break;
+	case PMC_EV_K8_BU_FILL_INTO_L2:
+		__K8SETMASK(bfil);
+		break;
+	case PMC_EV_K8_FR_RETIRED_FPU_INSTRUCTIONS:
+		__K8SETMASK(frfi);
+		break;
+	case PMC_EV_K8_FR_RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS:
+		__K8SETMASK(frfdoi);
+		break;
+	case PMC_EV_K8_FR_FPU_EXCEPTIONS:
+		__K8SETMASK(ffe);
+		break;
+	case PMC_EV_K8_NB_MEMORY_CONTROLLER_PAGE_ACCESS_EVENT:
+		__K8SETMASK(nmcpae);
+		break;
+	case PMC_EV_K8_NB_MEMORY_CONTROLLER_TURNAROUND:
+		__K8SETMASK(nmct);
+		break;
+	case PMC_EV_K8_NB_MEMORY_CONTROLLER_BYPASS_SATURATION:
+		__K8SETMASK(nmcbs);
+		break;
+	case PMC_EV_K8_NB_SIZED_COMMANDS:
+		__K8SETMASK(nsc);
+		break;
+	case PMC_EV_K8_NB_PROBE_RESULT:
+		__K8SETMASK(npr);
+		break;
+	case PMC_EV_K8_NB_HT_BUS0_BANDWIDTH:
+	case PMC_EV_K8_NB_HT_BUS1_BANDWIDTH:
+	case PMC_EV_K8_NB_HT_BUS2_BANDWIDTH:
+		__K8SETMASK(nhbb);
+		break;
+
+	default:
+		break;		/* no options defined */
+	}
+
+	while ((p = strsep(&ctrspec, ",")) != NULL) {
+		if (KWPREFIXMATCH(p, K8_KW_COUNT "=")) {
+			q = strchr(p, '=');
+			if (*++q == '\0') /* skip '=' */
+				return (-1);
+
+			count = strtol(q, &e, 0);
+			if (e == q || *e != '\0')
+				return (-1);
+
+			pmc_config->pm_caps |= PMC_CAP_THRESHOLD;
+			pmc_config->pm_md.pm_amd.pm_amd_config |=
+			    AMD_PMC_TO_COUNTER(count);
+
+		} else if (KWMATCH(p, K8_KW_EDGE)) {
+			pmc_config->pm_caps |= PMC_CAP_EDGE;
+		} else if (KWMATCH(p, K8_KW_INV)) {
+			pmc_config->pm_caps |= PMC_CAP_INVERT;
+		} else if (KWPREFIXMATCH(p, K8_KW_MASK "=")) {
+			if ((n = pmc_parse_mask(pmask, p, &evmask)) < 0)
+				return (-1);
+			pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
+		} else if (KWMATCH(p, K8_KW_OS)) {
+			pmc_config->pm_caps |= PMC_CAP_SYSTEM;
+		} else if (KWMATCH(p, K8_KW_USR)) {
+			pmc_config->pm_caps |= PMC_CAP_USER;
+		} else
+			return (-1);
+	}
+
+	/* other post processing */
+	switch (pe) {
+	case PMC_EV_K8_FP_DISPATCHED_FPU_OPS:
+	case PMC_EV_K8_FP_CYCLES_WITH_NO_FPU_OPS_RETIRED:
+	case PMC_EV_K8_FP_DISPATCHED_FPU_FAST_FLAG_OPS:
+	case PMC_EV_K8_FR_RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS:
+	case PMC_EV_K8_FR_RETIRED_FPU_INSTRUCTIONS:
+	case PMC_EV_K8_FR_FPU_EXCEPTIONS:
+		/* XXX only available in rev B and later */
+		break;
+	case PMC_EV_K8_DC_DCACHE_ACCESSES_BY_LOCKS:
+		/* XXX only available in rev C and later */
+		break;
+	case PMC_EV_K8_LS_LOCKED_OPERATION:
+		/* XXX CPU Rev A,B evmask is to be zero */
+		if (evmask & (evmask - 1)) /* > 1 bit set */
+			return (-1);
+		if (evmask == 0) {
+			evmask = 0x01; /* Rev C and later: #instrs */
+			pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
+		}
+		break;
+	default:
+		if (evmask == 0 && pmask != NULL) {
+			for (pm = pmask; pm->pm_name; pm++)
+				evmask |= pm->pm_value;
+			pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
+		}
+	}
+
+	if (pmc_config->pm_caps & PMC_CAP_QUALIFIER)
+		pmc_config->pm_md.pm_amd.pm_amd_config =
+		    AMD_PMC_TO_UNITMASK(evmask);
+
+	return (0);
+}
+
+#endif
+
+#if defined(__amd64__) || defined(__i386__)
+
+/*
+ * Intel P4 PMCs
+ */
+
+static struct pmc_event_alias p4_aliases[] = {
+	EV_ALIAS("branches",		"p4-branch-retired,mask=mmtp+mmtm"),
+	EV_ALIAS("branch-mispredicts",	"p4-mispred-branch-retired"),
+	EV_ALIAS("cycles",		"tsc"),
+	EV_ALIAS("instructions",
+	    "p4-instr-retired,mask=nbogusntag+nbogustag"),
+	EV_ALIAS("unhalted-cycles",	"p4-global-power-events"),
+	EV_ALIAS(NULL, NULL)
+};
+
+#define	P4_KW_ACTIVE	"active"
+#define	P4_KW_ACTIVE_ANY "any"
+#define	P4_KW_ACTIVE_BOTH "both"
+#define	P4_KW_ACTIVE_NONE "none"
+#define	P4_KW_ACTIVE_SINGLE "single"
+#define	P4_KW_BUSREQTYPE "busreqtype"
+#define	P4_KW_CASCADE	"cascade"
+#define	P4_KW_EDGE	"edge"
+#define	P4_KW_INV	"complement"
+#define	P4_KW_OS	"os"
+#define	P4_KW_MASK	"mask"
+#define	P4_KW_PRECISE	"precise"
+#define	P4_KW_TAG	"tag"
+#define	P4_KW_THRESHOLD	"threshold"
+#define	P4_KW_USR	"usr"
+
+#define	__P4MASK(N,V) PMCMASK(N, (1 << (V)))
+
+static const struct pmc_masks p4_mask_tcdm[] = { /* tc deliver mode */
+	__P4MASK(dd, 0),
+	__P4MASK(db, 1),
+	__P4MASK(di, 2),
+	__P4MASK(bd, 3),
+	__P4MASK(bb, 4),
+	__P4MASK(bi, 5),
+	__P4MASK(id, 6),
+	__P4MASK(ib, 7),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_bfr[] = { /* bpu fetch request */
+	__P4MASK(tcmiss, 0),
+	NULLMASK,
+};
+
+static const struct pmc_masks p4_mask_ir[] = { /* itlb reference */
+	__P4MASK(hit, 0),
+	__P4MASK(miss, 1),
+	__P4MASK(hit-uc, 2),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_memcan[] = { /* memory cancel */
+	__P4MASK(st-rb-full, 2),
+	__P4MASK(64k-conf, 3),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_memcomp[] = { /* memory complete */
+	__P4MASK(lsc, 0),
+	__P4MASK(ssc, 1),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_lpr[] = { /* load port replay */
+	__P4MASK(split-ld, 1),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_spr[] = { /* store port replay */
+	__P4MASK(split-st, 1),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_mlr[] = { /* mob load replay */
+	__P4MASK(no-sta, 1),
+	__P4MASK(no-std, 3),
+	__P4MASK(partial-data, 4),
+	__P4MASK(unalgn-addr, 5),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_pwt[] = { /* page walk type */
+	__P4MASK(dtmiss, 0),
+	__P4MASK(itmiss, 1),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_bcr[] = { /* bsq cache reference */
+	__P4MASK(rd-2ndl-hits, 0),
+	__P4MASK(rd-2ndl-hite, 1),
+	__P4MASK(rd-2ndl-hitm, 2),
+	__P4MASK(rd-3rdl-hits, 3),
+	__P4MASK(rd-3rdl-hite, 4),
+	__P4MASK(rd-3rdl-hitm, 5),
+	__P4MASK(rd-2ndl-miss, 8),
+	__P4MASK(rd-3rdl-miss, 9),
+	__P4MASK(wr-2ndl-miss, 10),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_ia[] = { /* ioq allocation */
+	__P4MASK(all-read, 5),
+	__P4MASK(all-write, 6),
+	__P4MASK(mem-uc, 7),
+	__P4MASK(mem-wc, 8),
+	__P4MASK(mem-wt, 9),
+	__P4MASK(mem-wp, 10),
+	__P4MASK(mem-wb, 11),
+	__P4MASK(own, 13),
+	__P4MASK(other, 14),
+	__P4MASK(prefetch, 15),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_iae[] = { /* ioq active entries */
+	__P4MASK(all-read, 5),
+	__P4MASK(all-write, 6),
+	__P4MASK(mem-uc, 7),
+	__P4MASK(mem-wc, 8),
+	__P4MASK(mem-wt, 9),
+	__P4MASK(mem-wp, 10),
+	__P4MASK(mem-wb, 11),
+	__P4MASK(own, 13),
+	__P4MASK(other, 14),
+	__P4MASK(prefetch, 15),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_fda[] = { /* fsb data activity */
+	__P4MASK(drdy-drv, 0),
+	__P4MASK(drdy-own, 1),
+	__P4MASK(drdy-other, 2),
+	__P4MASK(dbsy-drv, 3),
+	__P4MASK(dbsy-own, 4),
+	__P4MASK(dbsy-other, 5),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_ba[] = { /* bsq allocation */
+	__P4MASK(req-type0, 0),
+	__P4MASK(req-type1, 1),
+	__P4MASK(req-len0, 2),
+	__P4MASK(req-len1, 3),
+	__P4MASK(req-io-type, 5),
+	__P4MASK(req-lock-type, 6),
+	__P4MASK(req-cache-type, 7),
+	__P4MASK(req-split-type, 8),
+	__P4MASK(req-dem-type, 9),
+	__P4MASK(req-ord-type, 10),
+	__P4MASK(mem-type0, 11),
+	__P4MASK(mem-type1, 12),
+	__P4MASK(mem-type2, 13),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_sia[] = { /* sse input assist */
+	__P4MASK(all, 15),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_psu[] = { /* packed sp uop */
+	__P4MASK(all, 15),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_pdu[] = { /* packed dp uop */
+	__P4MASK(all, 15),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_ssu[] = { /* scalar sp uop */
+	__P4MASK(all, 15),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_sdu[] = { /* scalar dp uop */
+	__P4MASK(all, 15),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_64bmu[] = { /* 64 bit mmx uop */
+	__P4MASK(all, 15),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_128bmu[] = { /* 128 bit mmx uop */
+	__P4MASK(all, 15),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_xfu[] = { /* X87 fp uop */
+	__P4MASK(all, 15),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_xsmu[] = { /* x87 simd moves uop */
+	__P4MASK(allp0, 3),
+	__P4MASK(allp2, 4),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_gpe[] = { /* global power events */
+	__P4MASK(running, 0),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_tmx[] = { /* TC ms xfer */
+	__P4MASK(cisc, 0),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_uqw[] = { /* uop queue writes */
+	__P4MASK(from-tc-build, 0),
+	__P4MASK(from-tc-deliver, 1),
+	__P4MASK(from-rom, 2),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_rmbt[] = {
+	/* retired mispred branch type */
+	__P4MASK(conditional, 1),
+	__P4MASK(call, 2),
+	__P4MASK(return, 3),
+	__P4MASK(indirect, 4),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_rbt[] = { /* retired branch type */
+	__P4MASK(conditional, 1),
+	__P4MASK(call, 2),
+	__P4MASK(retired, 3),
+	__P4MASK(indirect, 4),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_rs[] = { /* resource stall */
+	__P4MASK(sbfull, 5),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_wb[] = { /* WC buffer */
+	__P4MASK(wcb-evicts, 0),
+	__P4MASK(wcb-full-evict, 1),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_fee[] = { /* front end event */
+	__P4MASK(nbogus, 0),
+	__P4MASK(bogus, 1),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_ee[] = { /* execution event */
+	__P4MASK(nbogus0, 0),
+	__P4MASK(nbogus1, 1),
+	__P4MASK(nbogus2, 2),
+	__P4MASK(nbogus3, 3),
+	__P4MASK(bogus0, 4),
+	__P4MASK(bogus1, 5),
+	__P4MASK(bogus2, 6),
+	__P4MASK(bogus3, 7),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_re[] = { /* replay event */
+	__P4MASK(nbogus, 0),
+	__P4MASK(bogus, 1),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_insret[] = { /* instr retired */
+	__P4MASK(nbogusntag, 0),
+	__P4MASK(nbogustag, 1),
+	__P4MASK(bogusntag, 2),
+	__P4MASK(bogustag, 3),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_ur[] = { /* uops retired */
+	__P4MASK(nbogus, 0),
+	__P4MASK(bogus, 1),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_ut[] = { /* uop type */
+	__P4MASK(tagloads, 1),
+	__P4MASK(tagstores, 2),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_br[] = { /* branch retired */
+	__P4MASK(mmnp, 0),
+	__P4MASK(mmnm, 1),
+	__P4MASK(mmtp, 2),
+	__P4MASK(mmtm, 3),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_mbr[] = { /* mispred branch retired */
+	__P4MASK(nbogus, 0),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_xa[] = { /* x87 assist */
+	__P4MASK(fpsu, 0),
+	__P4MASK(fpso, 1),
+	__P4MASK(poao, 2),
+	__P4MASK(poau, 3),
+	__P4MASK(prea, 4),
+	NULLMASK
+};
+
+static const struct pmc_masks p4_mask_machclr[] = { /* machine clear */
+	__P4MASK(clear, 0),
+	__P4MASK(moclear, 2),
+	__P4MASK(smclear, 3),
+	NULLMASK
+};
+
+/* P4 event parser */
+static int
+p4_allocate_pmc(enum pmc_event pe, char *ctrspec,
+    struct pmc_op_pmcallocate *pmc_config)
+{
+
+	char	*e, *p, *q;
+	int	count, has_tag, has_busreqtype, n;
+	uint32_t evmask, cccractivemask;
+	const struct pmc_masks *pm, *pmask;
+
+	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE);
+	pmc_config->pm_md.pm_p4.pm_p4_cccrconfig =
+	    pmc_config->pm_md.pm_p4.pm_p4_escrconfig = 0;
+
+	pmask   = NULL;
+	evmask  = 0;
+	cccractivemask = 0x3;
+	has_tag = has_busreqtype = 0;
+
+#define	__P4SETMASK(M) do {				\
+	pmask = p4_mask_##M;				\
+} while (0)
+
+	switch (pe) {
+	case PMC_EV_P4_TC_DELIVER_MODE:
+		__P4SETMASK(tcdm);
+		break;
+	case PMC_EV_P4_BPU_FETCH_REQUEST:
+		__P4SETMASK(bfr);
+		break;
+	case PMC_EV_P4_ITLB_REFERENCE:
+		__P4SETMASK(ir);
+		break;
+	case PMC_EV_P4_MEMORY_CANCEL:
+		__P4SETMASK(memcan);
+		break;
+	case PMC_EV_P4_MEMORY_COMPLETE:
+		__P4SETMASK(memcomp);
+		break;
+	case PMC_EV_P4_LOAD_PORT_REPLAY:
+		__P4SETMASK(lpr);
+		break;
+	case PMC_EV_P4_STORE_PORT_REPLAY:
+		__P4SETMASK(spr);
+		break;
+	case PMC_EV_P4_MOB_LOAD_REPLAY:
+		__P4SETMASK(mlr);
+		break;
+	case PMC_EV_P4_PAGE_WALK_TYPE:
+		__P4SETMASK(pwt);
+		break;
+	case PMC_EV_P4_BSQ_CACHE_REFERENCE:
+		__P4SETMASK(bcr);
+		break;
+	case PMC_EV_P4_IOQ_ALLOCATION:
+		__P4SETMASK(ia);
+		has_busreqtype = 1;
+		break;
+	case PMC_EV_P4_IOQ_ACTIVE_ENTRIES:
+		__P4SETMASK(iae);
+		has_busreqtype = 1;
+		break;
+	case PMC_EV_P4_FSB_DATA_ACTIVITY:
+		__P4SETMASK(fda);
+		break;
+	case PMC_EV_P4_BSQ_ALLOCATION:
+		__P4SETMASK(ba);
+		break;
+	case PMC_EV_P4_SSE_INPUT_ASSIST:
+		__P4SETMASK(sia);
+		break;
+	case PMC_EV_P4_PACKED_SP_UOP:
+		__P4SETMASK(psu);
+		break;
+	case PMC_EV_P4_PACKED_DP_UOP:
+		__P4SETMASK(pdu);
+		break;
+	case PMC_EV_P4_SCALAR_SP_UOP:
+		__P4SETMASK(ssu);
+		break;
+	case PMC_EV_P4_SCALAR_DP_UOP:
+		__P4SETMASK(sdu);
+		break;
+	case PMC_EV_P4_64BIT_MMX_UOP:
+		__P4SETMASK(64bmu);
+		break;
+	case PMC_EV_P4_128BIT_MMX_UOP:
+		__P4SETMASK(128bmu);
+		break;
+	case PMC_EV_P4_X87_FP_UOP:
+		__P4SETMASK(xfu);
+		break;
+	case PMC_EV_P4_X87_SIMD_MOVES_UOP:
+		__P4SETMASK(xsmu);
+		break;
+	case PMC_EV_P4_GLOBAL_POWER_EVENTS:
+		__P4SETMASK(gpe);
+		break;
+	case PMC_EV_P4_TC_MS_XFER:
+		__P4SETMASK(tmx);
+		break;
+	case PMC_EV_P4_UOP_QUEUE_WRITES:
+		__P4SETMASK(uqw);
+		break;
+	case PMC_EV_P4_RETIRED_MISPRED_BRANCH_TYPE:
+		__P4SETMASK(rmbt);
+		break;
+	case PMC_EV_P4_RETIRED_BRANCH_TYPE:
+		__P4SETMASK(rbt);
+		break;
+	case PMC_EV_P4_RESOURCE_STALL:
+		__P4SETMASK(rs);
+		break;
+	case PMC_EV_P4_WC_BUFFER:
+		__P4SETMASK(wb);
+		break;
+	case PMC_EV_P4_BSQ_ACTIVE_ENTRIES:
+	case PMC_EV_P4_B2B_CYCLES:
+	case PMC_EV_P4_BNR:
+	case PMC_EV_P4_SNOOP:
+	case PMC_EV_P4_RESPONSE:
+		break;
+	case PMC_EV_P4_FRONT_END_EVENT:
+		__P4SETMASK(fee);
+		break;
+	case PMC_EV_P4_EXECUTION_EVENT:
+		__P4SETMASK(ee);
+		break;
+	case PMC_EV_P4_REPLAY_EVENT:
+		__P4SETMASK(re);
+		break;
+	case PMC_EV_P4_INSTR_RETIRED:
+		__P4SETMASK(insret);
+		break;
+	case PMC_EV_P4_UOPS_RETIRED:
+		__P4SETMASK(ur);
+		break;
+	case PMC_EV_P4_UOP_TYPE:
+		__P4SETMASK(ut);
+		break;
+	case PMC_EV_P4_BRANCH_RETIRED:
+		__P4SETMASK(br);
+		break;
+	case PMC_EV_P4_MISPRED_BRANCH_RETIRED:
+		__P4SETMASK(mbr);
+		break;
+	case PMC_EV_P4_X87_ASSIST:
+		__P4SETMASK(xa);
+		break;
+	case PMC_EV_P4_MACHINE_CLEAR:
+		__P4SETMASK(machclr);
+		break;
+	default:
+		return (-1);
+	}
+
+	/* process additional flags */
+	while ((p = strsep(&ctrspec, ",")) != NULL) {
+		if (KWPREFIXMATCH(p, P4_KW_ACTIVE)) {
+			q = strchr(p, '=');
+			if (*++q == '\0') /* skip '=' */
+				return (-1);
+
+			if (strcasecmp(q, P4_KW_ACTIVE_NONE) == 0)
+				cccractivemask = 0x0;
+			else if (strcasecmp(q, P4_KW_ACTIVE_SINGLE) == 0)
+				cccractivemask = 0x1;
+			else if (strcasecmp(q, P4_KW_ACTIVE_BOTH) == 0)
+				cccractivemask = 0x2;
+			else if (strcasecmp(q, P4_KW_ACTIVE_ANY) == 0)
+				cccractivemask = 0x3;
+			else
+				return (-1);
+
+		} else if (KWPREFIXMATCH(p, P4_KW_BUSREQTYPE)) {
+			if (has_busreqtype == 0)
+				return (-1);
+
+			q = strchr(p, '=');
+			if (*++q == '\0') /* skip '=' */
+				return (-1);
+
+			count = strtol(q, &e, 0);
+			if (e == q || *e != '\0')
+				return (-1);
+			evmask = (evmask & ~0x1F) | (count & 0x1F);
+		} else if (KWMATCH(p, P4_KW_CASCADE))
+			pmc_config->pm_caps |= PMC_CAP_CASCADE;
+		else if (KWMATCH(p, P4_KW_EDGE))
+			pmc_config->pm_caps |= PMC_CAP_EDGE;
+		else if (KWMATCH(p, P4_KW_INV))
+			pmc_config->pm_caps |= PMC_CAP_INVERT;
+		else if (KWPREFIXMATCH(p, P4_KW_MASK "=")) {
+			if ((n = pmc_parse_mask(pmask, p, &evmask)) < 0)
+				return (-1);
+			pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
+		} else if (KWMATCH(p, P4_KW_OS))
+			pmc_config->pm_caps |= PMC_CAP_SYSTEM;
+		else if (KWMATCH(p, P4_KW_PRECISE))
+			pmc_config->pm_caps |= PMC_CAP_PRECISE;
+		else if (KWPREFIXMATCH(p, P4_KW_TAG "=")) {
+			if (has_tag == 0)
+				return (-1);
+
+			q = strchr(p, '=');
+			if (*++q == '\0') /* skip '=' */
+				return (-1);
+
+			count = strtol(q, &e, 0);
+			if (e == q || *e != '\0')
+				return (-1);
+
+			pmc_config->pm_caps |= PMC_CAP_TAGGING;
+			pmc_config->pm_md.pm_p4.pm_p4_escrconfig |=
+			    P4_ESCR_TO_TAG_VALUE(count);
+		} else if (KWPREFIXMATCH(p, P4_KW_THRESHOLD "=")) {
+			q = strchr(p, '=');
+			if (*++q == '\0') /* skip '=' */
+				return (-1);
+
+			count = strtol(q, &e, 0);
+			if (e == q || *e != '\0')
+				return (-1);
+
+			pmc_config->pm_caps |= PMC_CAP_THRESHOLD;
+			pmc_config->pm_md.pm_p4.pm_p4_cccrconfig &=
+			    ~P4_CCCR_THRESHOLD_MASK;
+			pmc_config->pm_md.pm_p4.pm_p4_cccrconfig |=
+			    P4_CCCR_TO_THRESHOLD(count);
+		} else if (KWMATCH(p, P4_KW_USR))
+			pmc_config->pm_caps |= PMC_CAP_USER;
+		else
+			return (-1);
+	}
+
+	/* other post processing */
+	if (pe == PMC_EV_P4_IOQ_ALLOCATION ||
+	    pe == PMC_EV_P4_FSB_DATA_ACTIVITY ||
+	    pe == PMC_EV_P4_BSQ_ALLOCATION)
+		pmc_config->pm_caps |= PMC_CAP_EDGE;
+
+	/* fill in thread activity mask */
+	pmc_config->pm_md.pm_p4.pm_p4_cccrconfig |=
+	    P4_CCCR_TO_ACTIVE_THREAD(cccractivemask);
+
+	if (evmask)
+		pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
+
+	switch (pe) {
+	case PMC_EV_P4_FSB_DATA_ACTIVITY:
+		if ((evmask & 0x06) == 0x06 ||
+		    (evmask & 0x18) == 0x18)
+			return (-1); /* can't have own+other bits together */
+		if (evmask == 0) /* default:drdy-{drv,own}+dbsy{drv,own} */
+			evmask = 0x1D;
+		break;
+	case PMC_EV_P4_MACHINE_CLEAR:
+		/* only one bit is allowed to be set */
+		if ((evmask & (evmask - 1)) != 0)
+			return (-1);
+		if (evmask == 0) {
+			evmask = 0x1;	/* 'CLEAR' */
+			pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
+		}
+		break;
+	default:
+		if (evmask == 0 && pmask) {
+			for (pm = pmask; pm->pm_name; pm++)
+				evmask |= pm->pm_value;
+			pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
+		}
+	}
+
+	pmc_config->pm_md.pm_p4.pm_p4_escrconfig =
+	    P4_ESCR_TO_EVENT_MASK(evmask);
+
+	return (0);
+}
+
+#endif
+
+#if defined(__i386__)
+
+/*
+ * Pentium style PMCs
+ */
+
+static struct pmc_event_alias p5_aliases[] = {
+	EV_ALIAS("branches",		"p5-taken-branches"),
+	EV_ALIAS("cycles",		"tsc"),
+	EV_ALIAS("dc-misses",		"p5-data-read-miss-or-write-miss"),
+	EV_ALIAS("ic-misses",		"p5-code-cache-miss"),
+	EV_ALIAS("instructions",	"p5-instructions-executed"),
+	EV_ALIAS("interrupts",		"p5-hardware-interrupts"),
+	EV_ALIAS("unhalted-cycles",
+	    "p5-number-of-cycles-not-in-halt-state"),
+	EV_ALIAS(NULL, NULL)
+};
+
+static int
+p5_allocate_pmc(enum pmc_event pe, char *ctrspec,
+    struct pmc_op_pmcallocate *pmc_config)
+{
+	return (-1 || pe || ctrspec || pmc_config); /* shut up gcc */
+}
+
+/*
+ * Pentium Pro style PMCs.  These PMCs are found in Pentium II, Pentium III,
+ * and Pentium M CPUs.
+ */
+
+static struct pmc_event_alias p6_aliases[] = {
+	EV_ALIAS("branches",		"p6-br-inst-retired"),
+	EV_ALIAS("branch-mispredicts",	"p6-br-miss-pred-retired"),
+	EV_ALIAS("cycles",		"tsc"),
+	EV_ALIAS("dc-misses",		"p6-dcu-lines-in"),
+	EV_ALIAS("ic-misses",		"p6-ifu-fetch-miss"),
+	EV_ALIAS("instructions",	"p6-inst-retired"),
+	EV_ALIAS("interrupts",		"p6-hw-int-rx"),
+	EV_ALIAS("unhalted-cycles",	"p6-cpu-clk-unhalted"),
+	EV_ALIAS(NULL, NULL)
+};
+
+#define	P6_KW_CMASK	"cmask"
+#define	P6_KW_EDGE	"edge"
+#define	P6_KW_INV	"inv"
+#define	P6_KW_OS	"os"
+#define	P6_KW_UMASK	"umask"
+#define	P6_KW_USR	"usr"
+
+static struct pmc_masks p6_mask_mesi[] = {
+	PMCMASK(m,	0x01),
+	PMCMASK(e,	0x02),
+	PMCMASK(s,	0x04),
+	PMCMASK(i,	0x08),
+	NULLMASK
+};
+
+static struct pmc_masks p6_mask_mesihw[] = {
+	PMCMASK(m,	0x01),
+	PMCMASK(e,	0x02),
+	PMCMASK(s,	0x04),
+	PMCMASK(i,	0x08),
+	PMCMASK(nonhw,	0x00),
+	PMCMASK(hw,	0x10),
+	PMCMASK(both,	0x30),
+	NULLMASK
+};
+
+static struct pmc_masks p6_mask_hw[] = {
+	PMCMASK(nonhw,	0x00),
+	PMCMASK(hw,	0x10),
+	PMCMASK(both,	0x30),
+	NULLMASK
+};
+
+static struct pmc_masks p6_mask_any[] = {
+	PMCMASK(self,	0x00),
+	PMCMASK(any,	0x20),
+	NULLMASK
+};
+
+static struct pmc_masks p6_mask_ekp[] = {
+	PMCMASK(nta,	0x00),
+	PMCMASK(t1,	0x01),
+	PMCMASK(t2,	0x02),
+	PMCMASK(wos,	0x03),
+	NULLMASK
+};
+
+static struct pmc_masks p6_mask_pps[] = {
+	PMCMASK(packed-and-scalar, 0x00),
+	PMCMASK(scalar,	0x01),
+	NULLMASK
+};
+
+static struct pmc_masks p6_mask_mite[] = {
+	PMCMASK(packed-multiply,	 0x01),
+	PMCMASK(packed-shift,		0x02),
+	PMCMASK(pack,			0x04),
+	PMCMASK(unpack,			0x08),
+	PMCMASK(packed-logical,		0x10),
+	PMCMASK(packed-arithmetic,	0x20),
+	NULLMASK
+};
+
+static struct pmc_masks p6_mask_fmt[] = {
+	PMCMASK(mmxtofp,	0x00),
+	PMCMASK(fptommx,	0x01),
+	NULLMASK
+};
+
+static struct pmc_masks p6_mask_sr[] = {
+	PMCMASK(es,	0x01),
+	PMCMASK(ds,	0x02),
+	PMCMASK(fs,	0x04),
+	PMCMASK(gs,	0x08),
+	NULLMASK
+};
+
+static struct pmc_masks p6_mask_eet[] = {
+	PMCMASK(all,	0x00),
+	PMCMASK(freq,	0x02),
+	NULLMASK
+};
+
+static struct pmc_masks p6_mask_efur[] = {
+	PMCMASK(all,	0x00),
+	PMCMASK(loadop,	0x01),
+	PMCMASK(stdsta,	0x02),
+	NULLMASK
+};
+
+static struct pmc_masks p6_mask_essir[] = {
+	PMCMASK(sse-packed-single,	0x00),
+	PMCMASK(sse-packed-single-scalar-single, 0x01),
+	PMCMASK(sse2-packed-double,	0x02),
+	PMCMASK(sse2-scalar-double,	0x03),
+	NULLMASK
+};
+
+static struct pmc_masks p6_mask_esscir[] = {
+	PMCMASK(sse-packed-single,	0x00),
+	PMCMASK(sse-scalar-single,	0x01),
+	PMCMASK(sse2-packed-double,	0x02),
+	PMCMASK(sse2-scalar-double,	0x03),
+	NULLMASK
+};
+
+/* P6 event parser */
+static int
+p6_allocate_pmc(enum pmc_event pe, char *ctrspec,
+    struct pmc_op_pmcallocate *pmc_config)
+{
+	char *e, *p, *q;
+	uint32_t evmask;
+	int count, n;
+	const struct pmc_masks *pm, *pmask;
+
+	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE);
+	pmc_config->pm_md.pm_ppro.pm_ppro_config = 0;
+
+	evmask = 0;
+
+#define	P6MASKSET(M)	pmask = p6_mask_ ## M
+
+	switch(pe) {
+	case PMC_EV_P6_L2_IFETCH:	P6MASKSET(mesi); break;
+	case PMC_EV_P6_L2_LD:		P6MASKSET(mesi); break;
+	case PMC_EV_P6_L2_ST:		P6MASKSET(mesi); break;
+	case PMC_EV_P6_L2_RQSTS:	P6MASKSET(mesi); break;
+	case PMC_EV_P6_BUS_DRDY_CLOCKS:
+	case PMC_EV_P6_BUS_LOCK_CLOCKS:
+	case PMC_EV_P6_BUS_TRAN_BRD:
+	case PMC_EV_P6_BUS_TRAN_RFO:
+	case PMC_EV_P6_BUS_TRANS_WB:
+	case PMC_EV_P6_BUS_TRAN_IFETCH:
+	case PMC_EV_P6_BUS_TRAN_INVAL:
+	case PMC_EV_P6_BUS_TRAN_PWR:
+	case PMC_EV_P6_BUS_TRANS_P:
+	case PMC_EV_P6_BUS_TRANS_IO:
+	case PMC_EV_P6_BUS_TRAN_DEF:
+	case PMC_EV_P6_BUS_TRAN_BURST:
+	case PMC_EV_P6_BUS_TRAN_ANY:
+	case PMC_EV_P6_BUS_TRAN_MEM:
+		P6MASKSET(any);	break;
+	case PMC_EV_P6_EMON_KNI_PREF_DISPATCHED:
+	case PMC_EV_P6_EMON_KNI_PREF_MISS:
+		P6MASKSET(ekp); break;
+	case PMC_EV_P6_EMON_KNI_INST_RETIRED:
+	case PMC_EV_P6_EMON_KNI_COMP_INST_RET:
+		P6MASKSET(pps);	break;
+	case PMC_EV_P6_MMX_INSTR_TYPE_EXEC:
+		P6MASKSET(mite); break;
+	case PMC_EV_P6_FP_MMX_TRANS:
+		P6MASKSET(fmt);	break;
+	case PMC_EV_P6_SEG_RENAME_STALLS:
+	case PMC_EV_P6_SEG_REG_RENAMES:
+		P6MASKSET(sr);	break;
+	case PMC_EV_P6_EMON_EST_TRANS:
+		P6MASKSET(eet);	break;
+	case PMC_EV_P6_EMON_FUSED_UOPS_RET:
+		P6MASKSET(efur); break;
+	case PMC_EV_P6_EMON_SSE_SSE2_INST_RETIRED:
+		P6MASKSET(essir); break;
+	case PMC_EV_P6_EMON_SSE_SSE2_COMP_INST_RETIRED:
+		P6MASKSET(esscir); break;
+	default:
+		pmask = NULL;
+		break;
+	}
+
+	/* Pentium M PMCs have a few events with different semantics */
+	if (cpu_info.pm_cputype == PMC_CPU_INTEL_PM) {
+		if (pe == PMC_EV_P6_L2_LD ||
+		    pe == PMC_EV_P6_L2_LINES_IN ||
+		    pe == PMC_EV_P6_L2_LINES_OUT)
+			P6MASKSET(mesihw);
+		else if (pe == PMC_EV_P6_L2_M_LINES_OUTM)
+			P6MASKSET(hw);
+	}
+
+	/* Parse additional modifiers if present */
+	while ((p = strsep(&ctrspec, ",")) != NULL) {
+		if (KWPREFIXMATCH(p, P6_KW_CMASK "=")) {
+			q = strchr(p, '=');
+			if (*++q == '\0') /* skip '=' */
+				return (-1);
+			count = strtol(q, &e, 0);
+			if (e == q || *e != '\0')
+				return (-1);
+			pmc_config->pm_caps |= PMC_CAP_THRESHOLD;
+			pmc_config->pm_md.pm_ppro.pm_ppro_config |=
+			    P6_EVSEL_TO_CMASK(count);
+		} else if (KWMATCH(p, P6_KW_EDGE)) {
+			pmc_config->pm_caps |= PMC_CAP_EDGE;
+		} else if (KWMATCH(p, P6_KW_INV)) {
+			pmc_config->pm_caps |= PMC_CAP_INVERT;
+		} else if (KWMATCH(p, P6_KW_OS)) {
+			pmc_config->pm_caps |= PMC_CAP_SYSTEM;
+		} else if (KWPREFIXMATCH(p, P6_KW_UMASK "=")) {
+			evmask = 0;
+			if ((n = pmc_parse_mask(pmask, p, &evmask)) < 0)
+				return (-1);
+			if ((pe == PMC_EV_P6_BUS_DRDY_CLOCKS ||
+			     pe == PMC_EV_P6_BUS_LOCK_CLOCKS ||
+			     pe == PMC_EV_P6_BUS_TRAN_BRD ||
+			     pe == PMC_EV_P6_BUS_TRAN_RFO ||
+			     pe == PMC_EV_P6_BUS_TRAN_IFETCH ||
+			     pe == PMC_EV_P6_BUS_TRAN_INVAL ||
+			     pe == PMC_EV_P6_BUS_TRAN_PWR ||
+			     pe == PMC_EV_P6_BUS_TRAN_DEF ||
+			     pe == PMC_EV_P6_BUS_TRAN_BURST ||
+			     pe == PMC_EV_P6_BUS_TRAN_ANY ||
+			     pe == PMC_EV_P6_BUS_TRAN_MEM ||
+			     pe == PMC_EV_P6_BUS_TRANS_IO ||
+			     pe == PMC_EV_P6_BUS_TRANS_P ||
+			     pe == PMC_EV_P6_BUS_TRANS_WB ||
+			     pe == PMC_EV_P6_EMON_EST_TRANS ||
+			     pe == PMC_EV_P6_EMON_FUSED_UOPS_RET ||
+			     pe == PMC_EV_P6_EMON_KNI_COMP_INST_RET ||
+			     pe == PMC_EV_P6_EMON_KNI_INST_RETIRED ||
+			     pe == PMC_EV_P6_EMON_KNI_PREF_DISPATCHED ||
+			     pe == PMC_EV_P6_EMON_KNI_PREF_MISS ||
+			     pe == PMC_EV_P6_EMON_SSE_SSE2_COMP_INST_RETIRED ||
+			     pe == PMC_EV_P6_EMON_SSE_SSE2_INST_RETIRED ||
+			     pe == PMC_EV_P6_FP_MMX_TRANS)
+			    && (n > 1))	/* Only one mask keyword is allowed. */
+				return (-1);
+			pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
+		} else if (KWMATCH(p, P6_KW_USR)) {
+			pmc_config->pm_caps |= PMC_CAP_USER;
+		} else
+			return (-1);
+	}
+
+	/* post processing */
+	switch (pe) {
+
+		/*
+		 * The following events default to an evmask of 0
+		 */
+
+		/* default => 'self' */
+	case PMC_EV_P6_BUS_DRDY_CLOCKS:
+	case PMC_EV_P6_BUS_LOCK_CLOCKS:
+	case PMC_EV_P6_BUS_TRAN_BRD:
+	case PMC_EV_P6_BUS_TRAN_RFO:
+	case PMC_EV_P6_BUS_TRANS_WB:
+	case PMC_EV_P6_BUS_TRAN_IFETCH:
+	case PMC_EV_P6_BUS_TRAN_INVAL:
+	case PMC_EV_P6_BUS_TRAN_PWR:
+	case PMC_EV_P6_BUS_TRANS_P:
+	case PMC_EV_P6_BUS_TRANS_IO:
+	case PMC_EV_P6_BUS_TRAN_DEF:
+	case PMC_EV_P6_BUS_TRAN_BURST:
+	case PMC_EV_P6_BUS_TRAN_ANY:
+	case PMC_EV_P6_BUS_TRAN_MEM:
+
+		/* default => 'nta' */
+	case PMC_EV_P6_EMON_KNI_PREF_DISPATCHED:
+	case PMC_EV_P6_EMON_KNI_PREF_MISS:
+
+		/* default => 'packed and scalar' */
+	case PMC_EV_P6_EMON_KNI_INST_RETIRED:
+	case PMC_EV_P6_EMON_KNI_COMP_INST_RET:
+
+		/* default => 'mmx to fp transitions' */
+	case PMC_EV_P6_FP_MMX_TRANS:
+
+		/* default => 'SSE Packed Single' */
+	case PMC_EV_P6_EMON_SSE_SSE2_INST_RETIRED:
+	case PMC_EV_P6_EMON_SSE_SSE2_COMP_INST_RETIRED:
+
+		/* default => 'all fused micro-ops' */
+	case PMC_EV_P6_EMON_FUSED_UOPS_RET:
+
+		/* default => 'all transitions' */
+	case PMC_EV_P6_EMON_EST_TRANS:
+		break;
+
+	case PMC_EV_P6_MMX_UOPS_EXEC:
+		evmask = 0x0F;		/* only value allowed */
+		break;
+
+	default:
+		/*
+		 * For all other events, set the default event mask
+		 * to a logical OR of all the allowed event mask bits.
+		 */
+		if (evmask == 0 && pmask) {
+			for (pm = pmask; pm->pm_name; pm++)
+				evmask |= pm->pm_value;
+			pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
+		}
+
+		break;
+	}
+
+	if (pmc_config->pm_caps & PMC_CAP_QUALIFIER)
+		pmc_config->pm_md.pm_ppro.pm_ppro_config |=
+		    P6_EVSEL_TO_UMASK(evmask);
+
+	return (0);
+}
+
+#endif
+
+#if	defined(__i386__) || defined(__amd64__)
+static int
+tsc_allocate_pmc(enum pmc_event pe, char *ctrspec,
+    struct pmc_op_pmcallocate *pmc_config)
+{
+	if (pe != PMC_EV_TSC_TSC)
+		return (-1);
+
+	/* TSC events must be unqualified. */
+	if (ctrspec && *ctrspec != '\0')
+		return (-1);
+
+	pmc_config->pm_md.pm_amd.pm_amd_config = 0;
+	pmc_config->pm_caps |= PMC_CAP_READ;
+
+	return (0);
+}
+#endif
+
+#if	defined(__XSCALE__)
+
+static struct pmc_event_alias xscale_aliases[] = {
+	EV_ALIAS("branches",		"BRANCH_RETIRED"),
+	EV_ALIAS("branch-mispredicts",	"BRANCH_MISPRED"),
+	EV_ALIAS("dc-misses",		"DC_MISS"),
+	EV_ALIAS("ic-misses",		"IC_MISS"),
+	EV_ALIAS("instructions",	"INSTR_RETIRED"),
+	EV_ALIAS(NULL, NULL)
+};
+static int
+xscale_allocate_pmc(enum pmc_event pe, char *ctrspec __unused,
+    struct pmc_op_pmcallocate *pmc_config __unused)
+{
+	switch (pe) {
+	default:
+		break;
+	}
+
+	return (0);
+}
+#endif
+
+#if defined(__mips__)
+
+static struct pmc_event_alias mips24k_aliases[] = {
+	EV_ALIAS("instructions",	"INSTR_EXECUTED"),
+	EV_ALIAS("branches",		"BRANCH_COMPLETED"),
+	EV_ALIAS("branch-mispredicts",	"BRANCH_MISPRED"),
+	EV_ALIAS(NULL, NULL)
+};
+
+#define	MIPS24K_KW_OS		"os"
+#define	MIPS24K_KW_USR		"usr"
+#define	MIPS24K_KW_ANYTHREAD	"anythread"
+
+static int
+mips24k_allocate_pmc(enum pmc_event pe, char *ctrspec __unused,
+		  struct pmc_op_pmcallocate *pmc_config __unused)
+{
+	char *p;
+
+	(void) pe;
+
+	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE);
+	
+	while ((p = strsep(&ctrspec, ",")) != NULL) {
+		if (KWMATCH(p, MIPS24K_KW_OS))
+			pmc_config->pm_caps |= PMC_CAP_SYSTEM;
+		else if (KWMATCH(p, MIPS24K_KW_USR))
+			pmc_config->pm_caps |= PMC_CAP_USER;
+		else if (KWMATCH(p, MIPS24K_KW_ANYTHREAD))
+			pmc_config->pm_caps |= (PMC_CAP_USER | PMC_CAP_SYSTEM);
+		else
+			return (-1);
+	}
+
+	return (0);
+}
+#endif /* __mips__ */
+
+
+/*
+ * Match an event name `name' with its canonical form.
+ *
+ * Matches are case insensitive and spaces, periods, underscores and
+ * hyphen characters are considered to match each other.
+ *
+ * Returns 1 for a match, 0 otherwise.
+ */
+
+static int
+pmc_match_event_name(const char *name, const char *canonicalname)
+{
+	int cc, nc;
+	const unsigned char *c, *n;
+
+	c = (const unsigned char *) canonicalname;
+	n = (const unsigned char *) name;
+
+	for (; (nc = *n) && (cc = *c); n++, c++) {
+
+		if ((nc == ' ' || nc == '_' || nc == '-' || nc == '.') &&
+		    (cc == ' ' || cc == '_' || cc == '-' || cc == '.'))
+			continue;
+
+		if (toupper(nc) == toupper(cc))
+			continue;
+
+
+		return (0);
+	}
+
+	if (*n == '\0' && *c == '\0')
+		return (1);
+
+	return (0);
+}
+
+/*
+ * Match an event name against all the event named supported by a
+ * PMC class.
+ *
+ * Returns an event descriptor pointer on match or NULL otherwise.
+ */
+static const struct pmc_event_descr *
+pmc_match_event_class(const char *name,
+    const struct pmc_class_descr *pcd)
+{
+	size_t n;
+	const struct pmc_event_descr *ev;
+
+	ev = pcd->pm_evc_event_table;
+	for (n = 0; n < pcd->pm_evc_event_table_size; n++, ev++)
+		if (pmc_match_event_name(name, ev->pm_ev_name))
+			return (ev);
+
+	return (NULL);
+}
+
+static int
+pmc_mdep_is_compatible_class(enum pmc_class pc)
+{
+	size_t n;
+
+	for (n = 0; n < pmc_mdep_class_list_size; n++)
+		if (pmc_mdep_class_list[n] == pc)
+			return (1);
+	return (0);
+}
+
+/*
+ * API entry points
+ */
+
+int
+pmc_allocate(const char *ctrspec, enum pmc_mode mode,
+    uint32_t flags, int cpu, pmc_id_t *pmcid)
+{
+	size_t n;
+	int retval;
+	char *r, *spec_copy;
+	const char *ctrname;
+	const struct pmc_event_descr *ev;
+	const struct pmc_event_alias *alias;
+	struct pmc_op_pmcallocate pmc_config;
+	const struct pmc_class_descr *pcd;
+
+	spec_copy = NULL;
+	retval    = -1;
+
+	if (mode != PMC_MODE_SS && mode != PMC_MODE_TS &&
+	    mode != PMC_MODE_SC && mode != PMC_MODE_TC) {
+		errno = EINVAL;
+		goto out;
+	}
+
+	/* replace an event alias with the canonical event specifier */
+	if (pmc_mdep_event_aliases)
+		for (alias = pmc_mdep_event_aliases; alias->pm_alias; alias++)
+			if (!strcasecmp(ctrspec, alias->pm_alias)) {
+				spec_copy = strdup(alias->pm_spec);
+				break;
+			}
+
+	if (spec_copy == NULL)
+		spec_copy = strdup(ctrspec);
+
+	r = spec_copy;
+	ctrname = strsep(&r, ",");
+
+	/*
+	 * If a explicit class prefix was given by the user, restrict the
+	 * search for the event to the specified PMC class.
+	 */
+	ev = NULL;
+	for (n = 0; n < PMC_CLASS_TABLE_SIZE; n++) {
+		pcd = pmc_class_table[n];
+		if (pmc_mdep_is_compatible_class(pcd->pm_evc_class) &&
+		    strncasecmp(ctrname, pcd->pm_evc_name,
+				pcd->pm_evc_name_size) == 0) {
+			if ((ev = pmc_match_event_class(ctrname +
+			    pcd->pm_evc_name_size, pcd)) == NULL) {
+				errno = EINVAL;
+				goto out;
+			}
+			break;
+		}
+	}
+
+	/*
+	 * Otherwise, search for this event in all compatible PMC
+	 * classes.
+	 */
+	for (n = 0; ev == NULL && n < PMC_CLASS_TABLE_SIZE; n++) {
+		pcd = pmc_class_table[n];
+		if (pmc_mdep_is_compatible_class(pcd->pm_evc_class))
+			ev = pmc_match_event_class(ctrname, pcd);
+	}
+
+	if (ev == NULL) {
+		errno = EINVAL;
+		goto out;
+	}
+
+	bzero(&pmc_config, sizeof(pmc_config));
+	pmc_config.pm_ev    = ev->pm_ev_code;
+	pmc_config.pm_class = pcd->pm_evc_class;
+	pmc_config.pm_cpu   = cpu;
+	pmc_config.pm_mode  = mode;
+	pmc_config.pm_flags = flags;
+
+	if (PMC_IS_SAMPLING_MODE(mode))
+		pmc_config.pm_caps |= PMC_CAP_INTERRUPT;
+
+ 	if (pcd->pm_evc_allocate_pmc(ev->pm_ev_code, r, &pmc_config) < 0) {
+		errno = EINVAL;
+		goto out;
+	}
+
+	if (PMC_CALL(PMCALLOCATE, &pmc_config) < 0)
+		goto out;
+
+	*pmcid = pmc_config.pm_pmcid;
+
+	retval = 0;
+
+ out:
+	if (spec_copy)
+		free(spec_copy);
+
+	return (retval);
+}
+
+int
+pmc_attach(pmc_id_t pmc, pid_t pid)
+{
+	struct pmc_op_pmcattach pmc_attach_args;
+
+	pmc_attach_args.pm_pmc = pmc;
+	pmc_attach_args.pm_pid = pid;
+
+	return (PMC_CALL(PMCATTACH, &pmc_attach_args));
+}
+
+int
+pmc_capabilities(pmc_id_t pmcid, uint32_t *caps)
+{
+	unsigned int i;
+	enum pmc_class cl;
+
+	cl = PMC_ID_TO_CLASS(pmcid);
+	for (i = 0; i < cpu_info.pm_nclass; i++)
+		if (cpu_info.pm_classes[i].pm_class == cl) {
+			*caps = cpu_info.pm_classes[i].pm_caps;
+			return (0);
+		}
+	errno = EINVAL;
+	return (-1);
+}
+
+int
+pmc_configure_logfile(int fd)
+{
+	struct pmc_op_configurelog cla;
+
+	cla.pm_logfd = fd;
+	if (PMC_CALL(CONFIGURELOG, &cla) < 0)
+		return (-1);
+	return (0);
+}
+
+int
+pmc_cpuinfo(const struct pmc_cpuinfo **pci)
+{
+	if (pmc_syscall == -1) {
+		errno = ENXIO;
+		return (-1);
+	}
+
+	*pci = &cpu_info;
+	return (0);
+}
+
+int
+pmc_detach(pmc_id_t pmc, pid_t pid)
+{
+	struct pmc_op_pmcattach pmc_detach_args;
+
+	pmc_detach_args.pm_pmc = pmc;
+	pmc_detach_args.pm_pid = pid;
+	return (PMC_CALL(PMCDETACH, &pmc_detach_args));
+}
+
+int
+pmc_disable(int cpu, int pmc)
+{
+	struct pmc_op_pmcadmin ssa;
+
+	ssa.pm_cpu = cpu;
+	ssa.pm_pmc = pmc;
+	ssa.pm_state = PMC_STATE_DISABLED;
+	return (PMC_CALL(PMCADMIN, &ssa));
+}
+
+int
+pmc_enable(int cpu, int pmc)
+{
+	struct pmc_op_pmcadmin ssa;
+
+	ssa.pm_cpu = cpu;
+	ssa.pm_pmc = pmc;
+	ssa.pm_state = PMC_STATE_FREE;
+	return (PMC_CALL(PMCADMIN, &ssa));
+}
+
+/*
+ * Return a list of events known to a given PMC class.  'cl' is the
+ * PMC class identifier, 'eventnames' is the returned list of 'const
+ * char *' pointers pointing to the names of the events. 'nevents' is
+ * the number of event name pointers returned.
+ *
+ * The space for 'eventnames' is allocated using malloc(3).  The caller
+ * is responsible for freeing this space when done.
+ */
+int
+pmc_event_names_of_class(enum pmc_class cl, const char ***eventnames,
+    int *nevents)
+{
+	int count;
+	const char **names;
+	const struct pmc_event_descr *ev;
+
+	switch (cl)
+	{
+	case PMC_CLASS_IAF:
+		ev = iaf_event_table;
+		count = PMC_EVENT_TABLE_SIZE(iaf);
+		break;
+	case PMC_CLASS_IAP:
+		/*
+		 * Return the most appropriate set of event name
+		 * spellings for the current CPU.
+		 */
+		switch (cpu_info.pm_cputype) {
+		default:
+		case PMC_CPU_INTEL_ATOM:
+			ev = atom_event_table;
+			count = PMC_EVENT_TABLE_SIZE(atom);
+			break;
+		case PMC_CPU_INTEL_CORE:
+			ev = core_event_table;
+			count = PMC_EVENT_TABLE_SIZE(core);
+			break;
+		case PMC_CPU_INTEL_CORE2:
+		case PMC_CPU_INTEL_CORE2EXTREME:
+			ev = core2_event_table;
+			count = PMC_EVENT_TABLE_SIZE(core2);
+			break;
+		case PMC_CPU_INTEL_COREI7:
+			ev = corei7_event_table;
+			count = PMC_EVENT_TABLE_SIZE(corei7);
+			break;
+		case PMC_CPU_INTEL_WESTMERE:
+			ev = westmere_event_table;
+			count = PMC_EVENT_TABLE_SIZE(westmere);
+			break;
+		}
+		break;
+	case PMC_CLASS_UCF:
+		ev = ucf_event_table;
+		count = PMC_EVENT_TABLE_SIZE(ucf);
+		break;
+	case PMC_CLASS_UCP:
+		/*
+		 * Return the most appropriate set of event name
+		 * spellings for the current CPU.
+		 */
+		switch (cpu_info.pm_cputype) {
+		default:
+		case PMC_CPU_INTEL_COREI7:
+			ev = corei7uc_event_table;
+			count = PMC_EVENT_TABLE_SIZE(corei7uc);
+			break;
+		case PMC_CPU_INTEL_WESTMERE:
+			ev = westmereuc_event_table;
+			count = PMC_EVENT_TABLE_SIZE(westmereuc);
+			break;
+		}
+		break;
+	case PMC_CLASS_TSC:
+		ev = tsc_event_table;
+		count = PMC_EVENT_TABLE_SIZE(tsc);
+		break;
+	case PMC_CLASS_K7:
+		ev = k7_event_table;
+		count = PMC_EVENT_TABLE_SIZE(k7);
+		break;
+	case PMC_CLASS_K8:
+		ev = k8_event_table;
+		count = PMC_EVENT_TABLE_SIZE(k8);
+		break;
+	case PMC_CLASS_P4:
+		ev = p4_event_table;
+		count = PMC_EVENT_TABLE_SIZE(p4);
+		break;
+	case PMC_CLASS_P5:
+		ev = p5_event_table;
+		count = PMC_EVENT_TABLE_SIZE(p5);
+		break;
+	case PMC_CLASS_P6:
+		ev = p6_event_table;
+		count = PMC_EVENT_TABLE_SIZE(p6);
+		break;
+	case PMC_CLASS_XSCALE:
+		ev = xscale_event_table;
+		count = PMC_EVENT_TABLE_SIZE(xscale);
+		break;
+	case PMC_CLASS_MIPS24K:
+		ev = mips24k_event_table;
+		count = PMC_EVENT_TABLE_SIZE(mips24k);
+		break;
+	default:
+		errno = EINVAL;
+		return (-1);
+	}
+
+	if ((names = malloc(count * sizeof(const char *))) == NULL)
+		return (-1);
+
+	*eventnames = names;
+	*nevents = count;
+
+	for (;count--; ev++, names++)
+		*names = ev->pm_ev_name;
+	return (0);
+}
+
+int
+pmc_flush_logfile(void)
+{
+	return (PMC_CALL(FLUSHLOG,0));
+}
+
+int
+pmc_get_driver_stats(struct pmc_driverstats *ds)
+{
+	struct pmc_op_getdriverstats gms;
+
+	if (PMC_CALL(GETDRIVERSTATS, &gms) < 0)
+		return (-1);
+
+	/* copy out fields in the current userland<->library interface */
+	ds->pm_intr_ignored    = gms.pm_intr_ignored;
+	ds->pm_intr_processed  = gms.pm_intr_processed;
+	ds->pm_intr_bufferfull = gms.pm_intr_bufferfull;
+	ds->pm_syscalls        = gms.pm_syscalls;
+	ds->pm_syscall_errors  = gms.pm_syscall_errors;
+	ds->pm_buffer_requests = gms.pm_buffer_requests;
+	ds->pm_buffer_requests_failed = gms.pm_buffer_requests_failed;
+	ds->pm_log_sweeps      = gms.pm_log_sweeps;
+	return (0);
+}
+
+int
+pmc_get_msr(pmc_id_t pmc, uint32_t *msr)
+{
+	struct pmc_op_getmsr gm;
+
+	gm.pm_pmcid = pmc;
+	if (PMC_CALL(PMCGETMSR, &gm) < 0)
+		return (-1);
+	*msr = gm.pm_msr;
+	return (0);
+}
+
+int
+pmc_init(void)
+{
+	int error, pmc_mod_id;
+	unsigned int n;
+	uint32_t abi_version;
+	struct module_stat pmc_modstat;
+	struct pmc_op_getcpuinfo op_cpu_info;
+#if defined(__amd64__) || defined(__i386__)
+	int cpu_has_iaf_counters;
+	unsigned int t;
+#endif
+
+	if (pmc_syscall != -1) /* already inited */
+		return (0);
+
+	/* retrieve the system call number from the KLD */
+	if ((pmc_mod_id = modfind(PMC_MODULE_NAME)) < 0)
+		return (-1);
+
+	pmc_modstat.version = sizeof(struct module_stat);
+	if ((error = modstat(pmc_mod_id, &pmc_modstat)) < 0)
+		return (-1);
+
+	pmc_syscall = pmc_modstat.data.intval;
+
+	/* check the kernel module's ABI against our compiled-in version */
+	abi_version = PMC_VERSION;
+	if (PMC_CALL(GETMODULEVERSION, &abi_version) < 0)
+		return (pmc_syscall = -1);
+
+	/* ignore patch & minor numbers for the comparision */
+	if ((abi_version & 0xFF000000) != (PMC_VERSION & 0xFF000000)) {
+		errno  = EPROGMISMATCH;
+		return (pmc_syscall = -1);
+	}
+
+	if (PMC_CALL(GETCPUINFO, &op_cpu_info) < 0)
+		return (pmc_syscall = -1);
+
+	cpu_info.pm_cputype = op_cpu_info.pm_cputype;
+	cpu_info.pm_ncpu    = op_cpu_info.pm_ncpu;
+	cpu_info.pm_npmc    = op_cpu_info.pm_npmc;
+	cpu_info.pm_nclass  = op_cpu_info.pm_nclass;
+	for (n = 0; n < cpu_info.pm_nclass; n++)
+		cpu_info.pm_classes[n] = op_cpu_info.pm_classes[n];
+
+	pmc_class_table = malloc(PMC_CLASS_TABLE_SIZE *
+	    sizeof(struct pmc_class_descr *));
+
+	if (pmc_class_table == NULL)
+		return (-1);
+
+	for (n = 0; n < PMC_CLASS_TABLE_SIZE; n++)
+		pmc_class_table[n] = NULL;
+
+	/*
+	 * Fill in the class table.
+	 */
+	n = 0;
+#if defined(__amd64__) || defined(__i386__)
+	pmc_class_table[n++] = &tsc_class_table_descr;
+
+	/*
+ 	 * Check if this CPU has fixed function counters.
+	 */
+	cpu_has_iaf_counters = 0;
+	for (t = 0; t < cpu_info.pm_nclass; t++)
+		if (cpu_info.pm_classes[t].pm_class == PMC_CLASS_IAF &&
+		    cpu_info.pm_classes[t].pm_num > 0)
+			cpu_has_iaf_counters = 1;
+#endif
+
+#define	PMC_MDEP_INIT(C) do {					\
+		pmc_mdep_event_aliases    = C##_aliases;	\
+		pmc_mdep_class_list  = C##_pmc_classes;		\
+		pmc_mdep_class_list_size =			\
+		    PMC_TABLE_SIZE(C##_pmc_classes);		\
+	} while (0)
+
+#define	PMC_MDEP_INIT_INTEL_V2(C) do {					\
+		PMC_MDEP_INIT(C);					\
+		pmc_class_table[n++] = &iaf_class_table_descr;		\
+		if (!cpu_has_iaf_counters) 				\
+			pmc_mdep_event_aliases =			\
+				C##_aliases_without_iaf;		\
+		pmc_class_table[n] = &C##_class_table_descr;		\
+	} while (0)
+
+	/* Configure the event name parser. */
+	switch (cpu_info.pm_cputype) {
+#if defined(__i386__)
+	case PMC_CPU_AMD_K7:
+		PMC_MDEP_INIT(k7);
+		pmc_class_table[n] = &k7_class_table_descr;
+		break;
+	case PMC_CPU_INTEL_P5:
+		PMC_MDEP_INIT(p5);
+		pmc_class_table[n]  = &p5_class_table_descr;
+		break;
+	case PMC_CPU_INTEL_P6:		/* P6 ... Pentium M CPUs have */
+	case PMC_CPU_INTEL_PII:		/* similar PMCs. */
+	case PMC_CPU_INTEL_PIII:
+	case PMC_CPU_INTEL_PM:
+		PMC_MDEP_INIT(p6);
+		pmc_class_table[n] = &p6_class_table_descr;
+		break;
+#endif
+#if defined(__amd64__) || defined(__i386__)
+	case PMC_CPU_AMD_K8:
+		PMC_MDEP_INIT(k8);
+		pmc_class_table[n] = &k8_class_table_descr;
+		break;
+	case PMC_CPU_INTEL_ATOM:
+		PMC_MDEP_INIT_INTEL_V2(atom);
+		break;
+	case PMC_CPU_INTEL_CORE:
+		PMC_MDEP_INIT(core);
+		pmc_class_table[n] = &core_class_table_descr;
+		break;
+	case PMC_CPU_INTEL_CORE2:
+	case PMC_CPU_INTEL_CORE2EXTREME:
+		PMC_MDEP_INIT_INTEL_V2(core2);
+		break;
+	case PMC_CPU_INTEL_COREI7:
+		pmc_class_table[n++] = &ucf_class_table_descr;
+		pmc_class_table[n++] = &corei7uc_class_table_descr;
+		PMC_MDEP_INIT_INTEL_V2(corei7);
+		break;
+	case PMC_CPU_INTEL_WESTMERE:
+		pmc_class_table[n++] = &ucf_class_table_descr;
+		pmc_class_table[n++] = &westmereuc_class_table_descr;
+		PMC_MDEP_INIT_INTEL_V2(westmere);
+		break;
+	case PMC_CPU_INTEL_PIV:
+		PMC_MDEP_INIT(p4);
+		pmc_class_table[n] = &p4_class_table_descr;
+		break;
+#endif
+#if defined(__XSCALE__)
+	case PMC_CPU_INTEL_XSCALE:
+		PMC_MDEP_INIT(xscale);
+		pmc_class_table[n] = &xscale_class_table_descr;
+		break;
+#endif
+#if defined(__mips__)
+	case PMC_CPU_MIPS_24K:
+		PMC_MDEP_INIT(mips24k);
+		pmc_class_table[n] = &mips24k_class_table_descr;
+		break;
+#endif /* __mips__ */
+	default:
+		/*
+		 * Some kind of CPU this version of the library knows nothing
+		 * about.  This shouldn't happen since the abi version check
+		 * should have caught this.
+		 */
+		errno = ENXIO;
+		return (pmc_syscall = -1);
+	}
+
+	return (0);
+}
+
+const char *
+pmc_name_of_capability(enum pmc_caps cap)
+{
+	int i;
+
+	/*
+	 * 'cap' should have a single bit set and should be in
+	 * range.
+	 */
+	if ((cap & (cap - 1)) || cap < PMC_CAP_FIRST ||
+	    cap > PMC_CAP_LAST) {
+		errno = EINVAL;
+		return (NULL);
+	}
+
+	i = ffs(cap);
+	return (pmc_capability_names[i - 1]);
+}
+
+const char *
+pmc_name_of_class(enum pmc_class pc)
+{
+	if ((int) pc >= PMC_CLASS_FIRST &&
+	    pc <= PMC_CLASS_LAST)
+		return (pmc_class_names[pc]);
+
+	errno = EINVAL;
+	return (NULL);
+}
+
+const char *
+pmc_name_of_cputype(enum pmc_cputype cp)
+{
+	size_t n;
+
+	for (n = 0; n < PMC_TABLE_SIZE(pmc_cputype_names); n++)
+		if (cp == pmc_cputype_names[n].pm_cputype)
+			return (pmc_cputype_names[n].pm_name);
+
+	errno = EINVAL;
+	return (NULL);
+}
+
+const char *
+pmc_name_of_disposition(enum pmc_disp pd)
+{
+	if ((int) pd >= PMC_DISP_FIRST &&
+	    pd <= PMC_DISP_LAST)
+		return (pmc_disposition_names[pd]);
+
+	errno = EINVAL;
+	return (NULL);
+}
+
+const char *
+_pmc_name_of_event(enum pmc_event pe, enum pmc_cputype cpu)
+{
+	const struct pmc_event_descr *ev, *evfence;
+
+	ev = evfence = NULL;
+	if (pe >= PMC_EV_IAF_FIRST && pe <= PMC_EV_IAF_LAST) {
+		ev = iaf_event_table;
+		evfence = iaf_event_table + PMC_EVENT_TABLE_SIZE(iaf);
+	} else if (pe >= PMC_EV_IAP_FIRST && pe <= PMC_EV_IAP_LAST) {
+		switch (cpu) {
+		case PMC_CPU_INTEL_ATOM:
+			ev = atom_event_table;
+			evfence = atom_event_table + PMC_EVENT_TABLE_SIZE(atom);
+			break;
+		case PMC_CPU_INTEL_CORE:
+			ev = core_event_table;
+			evfence = core_event_table + PMC_EVENT_TABLE_SIZE(core);
+			break;
+		case PMC_CPU_INTEL_CORE2:
+		case PMC_CPU_INTEL_CORE2EXTREME:
+			ev = core2_event_table;
+			evfence = core2_event_table + PMC_EVENT_TABLE_SIZE(core2);
+			break;
+		case PMC_CPU_INTEL_COREI7:
+			ev = corei7_event_table;
+			evfence = corei7_event_table + PMC_EVENT_TABLE_SIZE(corei7);
+			break;
+		case PMC_CPU_INTEL_WESTMERE:
+			ev = westmere_event_table;
+			evfence = westmere_event_table + PMC_EVENT_TABLE_SIZE(westmere);
+			break;
+		default:	/* Unknown CPU type. */
+			break;
+		}
+	} else if (pe >= PMC_EV_UCF_FIRST && pe <= PMC_EV_UCF_LAST) {
+		ev = ucf_event_table;
+		evfence = ucf_event_table + PMC_EVENT_TABLE_SIZE(ucf);
+	} else if (pe >= PMC_EV_UCP_FIRST && pe <= PMC_EV_UCP_LAST) {
+		switch (cpu) {
+		case PMC_CPU_INTEL_COREI7:
+			ev = corei7uc_event_table;
+			evfence = corei7uc_event_table + PMC_EVENT_TABLE_SIZE(corei7uc);
+			break;
+		case PMC_CPU_INTEL_WESTMERE:
+			ev = westmereuc_event_table;
+			evfence = westmereuc_event_table + PMC_EVENT_TABLE_SIZE(westmereuc);
+			break;
+		default:	/* Unknown CPU type. */
+			break;
+		}
+	} else if (pe >= PMC_EV_K7_FIRST && pe <= PMC_EV_K7_LAST) {
+		ev = k7_event_table;
+		evfence = k7_event_table + PMC_EVENT_TABLE_SIZE(k7);
+	} else if (pe >= PMC_EV_K8_FIRST && pe <= PMC_EV_K8_LAST) {
+		ev = k8_event_table;
+		evfence = k8_event_table + PMC_EVENT_TABLE_SIZE(k8);
+	} else if (pe >= PMC_EV_P4_FIRST && pe <= PMC_EV_P4_LAST) {
+		ev = p4_event_table;
+		evfence = p4_event_table + PMC_EVENT_TABLE_SIZE(p4);
+	} else if (pe >= PMC_EV_P5_FIRST && pe <= PMC_EV_P5_LAST) {
+		ev = p5_event_table;
+		evfence = p5_event_table + PMC_EVENT_TABLE_SIZE(p5);
+	} else if (pe >= PMC_EV_P6_FIRST && pe <= PMC_EV_P6_LAST) {
+		ev = p6_event_table;
+		evfence = p6_event_table + PMC_EVENT_TABLE_SIZE(p6);
+	} else if (pe >= PMC_EV_XSCALE_FIRST && pe <= PMC_EV_XSCALE_LAST) {
+		ev = xscale_event_table;
+		evfence = xscale_event_table + PMC_EVENT_TABLE_SIZE(xscale);
+	} else if (pe >= PMC_EV_MIPS24K_FIRST && pe <= PMC_EV_MIPS24K_LAST) {
+		ev = mips24k_event_table;
+		evfence = mips24k_event_table + PMC_EVENT_TABLE_SIZE(mips24k
+);
+	} else if (pe == PMC_EV_TSC_TSC) {
+		ev = tsc_event_table;
+		evfence = tsc_event_table + PMC_EVENT_TABLE_SIZE(tsc);
+	}
+
+	for (; ev != evfence; ev++)
+		if (pe == ev->pm_ev_code)
+			return (ev->pm_ev_name);
+
+	return (NULL);
+}
+
+const char *
+pmc_name_of_event(enum pmc_event pe)
+{
+	const char *n;
+
+	if ((n = _pmc_name_of_event(pe, cpu_info.pm_cputype)) != NULL)
+		return (n);
+
+	errno = EINVAL;
+	return (NULL);
+}
+
+const char *
+pmc_name_of_mode(enum pmc_mode pm)
+{
+	if ((int) pm >= PMC_MODE_FIRST &&
+	    pm <= PMC_MODE_LAST)
+		return (pmc_mode_names[pm]);
+
+	errno = EINVAL;
+	return (NULL);
+}
+
+const char *
+pmc_name_of_state(enum pmc_state ps)
+{
+	if ((int) ps >= PMC_STATE_FIRST &&
+	    ps <= PMC_STATE_LAST)
+		return (pmc_state_names[ps]);
+
+	errno = EINVAL;
+	return (NULL);
+}
+
+int
+pmc_ncpu(void)
+{
+	if (pmc_syscall == -1) {
+		errno = ENXIO;
+		return (-1);
+	}
+
+	return (cpu_info.pm_ncpu);
+}
+
+int
+pmc_npmc(int cpu)
+{
+	if (pmc_syscall == -1) {
+		errno = ENXIO;
+		return (-1);
+	}
+
+	if (cpu < 0 || cpu >= (int) cpu_info.pm_ncpu) {
+		errno = EINVAL;
+		return (-1);
+	}
+
+	return (cpu_info.pm_npmc);
+}
+
+int
+pmc_pmcinfo(int cpu, struct pmc_pmcinfo **ppmci)
+{
+	int nbytes, npmc;
+	struct pmc_op_getpmcinfo *pmci;
+
+	if ((npmc = pmc_npmc(cpu)) < 0)
+		return (-1);
+
+	nbytes = sizeof(struct pmc_op_getpmcinfo) +
+	    npmc * sizeof(struct pmc_info);
+
+	if ((pmci = calloc(1, nbytes)) == NULL)
+		return (-1);
+
+	pmci->pm_cpu  = cpu;
+
+	if (PMC_CALL(GETPMCINFO, pmci) < 0) {
+		free(pmci);
+		return (-1);
+	}
+
+	/* kernel<->library, library<->userland interfaces are identical */
+	*ppmci = (struct pmc_pmcinfo *) pmci;
+	return (0);
+}
+
+int
+pmc_read(pmc_id_t pmc, pmc_value_t *value)
+{
+	struct pmc_op_pmcrw pmc_read_op;
+
+	pmc_read_op.pm_pmcid = pmc;
+	pmc_read_op.pm_flags = PMC_F_OLDVALUE;
+	pmc_read_op.pm_value = -1;
+
+	if (PMC_CALL(PMCRW, &pmc_read_op) < 0)
+		return (-1);
+
+	*value = pmc_read_op.pm_value;
+	return (0);
+}
+
+int
+pmc_release(pmc_id_t pmc)
+{
+	struct pmc_op_simple	pmc_release_args;
+
+	pmc_release_args.pm_pmcid = pmc;
+	return (PMC_CALL(PMCRELEASE, &pmc_release_args));
+}
+
+int
+pmc_rw(pmc_id_t pmc, pmc_value_t newvalue, pmc_value_t *oldvaluep)
+{
+	struct pmc_op_pmcrw pmc_rw_op;
+
+	pmc_rw_op.pm_pmcid = pmc;
+	pmc_rw_op.pm_flags = PMC_F_NEWVALUE | PMC_F_OLDVALUE;
+	pmc_rw_op.pm_value = newvalue;
+
+	if (PMC_CALL(PMCRW, &pmc_rw_op) < 0)
+		return (-1);
+
+	*oldvaluep = pmc_rw_op.pm_value;
+	return (0);
+}
+
+int
+pmc_set(pmc_id_t pmc, pmc_value_t value)
+{
+	struct pmc_op_pmcsetcount sc;
+
+	sc.pm_pmcid = pmc;
+	sc.pm_count = value;
+
+	if (PMC_CALL(PMCSETCOUNT, &sc) < 0)
+		return (-1);
+	return (0);
+}
+
+int
+pmc_start(pmc_id_t pmc)
+{
+	struct pmc_op_simple	pmc_start_args;
+
+	pmc_start_args.pm_pmcid = pmc;
+	return (PMC_CALL(PMCSTART, &pmc_start_args));
+}
+
+int
+pmc_stop(pmc_id_t pmc)
+{
+	struct pmc_op_simple	pmc_stop_args;
+
+	pmc_stop_args.pm_pmcid = pmc;
+	return (PMC_CALL(PMCSTOP, &pmc_stop_args));
+}
+
+int
+pmc_width(pmc_id_t pmcid, uint32_t *width)
+{
+	unsigned int i;
+	enum pmc_class cl;
+
+	cl = PMC_ID_TO_CLASS(pmcid);
+	for (i = 0; i < cpu_info.pm_nclass; i++)
+		if (cpu_info.pm_classes[i].pm_class == cl) {
+			*width = cpu_info.pm_classes[i].pm_width;
+			return (0);
+		}
+	errno = EINVAL;
+	return (-1);
+}
+
+int
+pmc_write(pmc_id_t pmc, pmc_value_t value)
+{
+	struct pmc_op_pmcrw pmc_write_op;
+
+	pmc_write_op.pm_pmcid = pmc;
+	pmc_write_op.pm_flags = PMC_F_NEWVALUE;
+	pmc_write_op.pm_value = value;
+	return (PMC_CALL(PMCRW, &pmc_write_op));
+}
+
+int
+pmc_writelog(uint32_t userdata)
+{
+	struct pmc_op_writelog wl;
+
+	wl.pm_userdata = userdata;
+	return (PMC_CALL(WRITELOG, &wl));
+}
diff --git a/lib/libpmc/libpmcinternal.h b/lib/libpmc/libpmcinternal.h
new file mode 100644
index 0000000..b1c9c86
--- /dev/null
+++ b/lib/libpmc/libpmcinternal.h
@@ -0,0 +1,37 @@
+/*-
+ * Copyright (c) 2008 Joseph Koshy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	LIBPMC_INTERNAL_H
+#define	LIBPMC_INTERNAL_H	1
+
+/*
+ * Prototypes.
+ */
+const char *_pmc_name_of_event(enum pmc_event _ev, enum pmc_cputype _cpu);
+
+#endif	/* LIBPMC_INTERNAL_H */
diff --git a/lib/libpmc/pmc.3 b/lib/libpmc/pmc.3
new file mode 100644
index 0000000..2403e64
--- /dev/null
+++ b/lib/libpmc/pmc.3
@@ -0,0 +1,540 @@
+.\" Copyright (c) 2003-2008 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd November 24, 2008
+.Dt PMC 3
+.Os
+.Sh NAME
+.Nm pmc
+.Nd library for accessing hardware performance monitoring counters
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+The
+.Lb libpmc
+provides a programming interface that allows applications to use
+hardware performance counters to gather performance data about
+specific processes or for the system as a whole.
+The library is implemented using the lower-level facilities offered by
+the
+.Xr hwpmc 4
+driver.
+.Ss Key Concepts
+Performance monitoring counters (PMCs) are represented by the library
+using a software abstraction.
+These
+.Dq abstract
+PMCs can have two scopes:
+.Bl -bullet
+.It
+System scope.
+These PMCs measure events in a whole-system manner, i.e., independent
+of the currently executing thread.
+System scope PMCs are allocated on specific CPUs and do not
+migrate between CPUs.
+Non-privileged process are allowed to allocate system scope PMCs if the
+.Xr hwpmc 4
+sysctl tunable:
+.Va security.bsd.unprivileged_syspmcs
+is non-zero.
+.It
+Process scope.
+These PMCs only measure hardware events when the processes they are
+attached to are executing on a CPU.
+In an SMP system, process scope PMCs migrate between CPUs along with
+their target processes.
+.El
+.Pp
+Orthogonal to PMC scope, PMCs may be allocated in one of two
+operational modes:
+.Bl -bullet
+.It
+Counting PMCs measure events according to their scope
+(system or process).
+The application needs to explicitly read these counters
+to retrieve their value.
+.It
+Sampling PMCs cause the CPU to be periodically interrupted
+and information about its state of execution to be collected.
+Sampling PMCs are used to profile specific processes and kernel
+threads or to profile the system as a whole.
+.El
+.Pp
+The scope and operational mode for a software PMC are specified at
+PMC allocation time.
+An application is allowed to allocate multiple PMCs subject
+to availability of hardware resources.
+.Pp
+The library uses human-readable strings to name the event being
+measured by hardware.
+The syntax used for specifying a hardware event along with additional
+event specific qualifiers (if any) is described in detail in section
+.Sx "EVENT SPECIFIERS"
+below.
+.Pp
+PMCs are associated with the process that allocated them and
+will be automatically reclaimed by the system when the process exits.
+Additionally, process-scope PMCs have to be attached to one or more
+target processes before they can perform measurements.
+A process-scope PMC may be attached to those target processes
+that its owner process would otherwise be permitted to debug.
+An owner process may attach PMCs to itself allowing
+it to measure its own behavior.
+Additionally, on some machine architectures, such self-attached PMCs
+may be read cheaply using specialized instructions supported by the
+processor.
+.Pp
+Certain kinds of PMCs require that a log file be configured before
+they may be started.
+These include:
+.Bl -bullet -compact
+.It
+System scope sampling PMCs.
+.It
+Process scope sampling PMCs.
+.It
+Process scope counting PMCs that have been configured to report PMC
+readings on process context switches or process exits.
+.El
+Up to one log file may be configured per owner process.
+Events logged to a log file may be subsequently analyzed using the
+.Xr pmclog 3
+family of functions.
+.Ss Supported CPUs
+The CPUs known to the PMC library are named by the
+.Vt "enum pmc_cputype"
+enumeration.
+Supported CPUs include:
+.Bl -tag -width "Li PMC_CPU_INTEL_CORE2" -compact
+.It Li PMC_CPU_AMD_K7
+.Tn "AMD Athlon"
+CPUs.
+.It Li PMC_CPU_AMD_K8
+.Tn "AMD Athlon64"
+CPUs.
+.It Li PMC_CPU_INTEL_ATOM
+.Tn Intel
+.Tn Atom
+CPUs and other CPUs conforming to version 3 of the
+.Tn Intel
+performance measurement architecture.
+.It Li PMC_CPU_INTEL_CORE
+.Tn Intel
+.Tn Core Solo
+and
+.Tn Core Duo
+CPUs, and other CPUs conforming to version 1 of the
+.Tn Intel
+performance measurement architecture.
+.It Li PMC_CPU_INTEL_CORE2
+.Tn Intel
+.Tn "Core2 Solo" ,
+.Tn "Core2 Duo"
+and
+.Tn "Core2 Extreme"
+CPUs, and other CPUs conforming to version 2 of the
+.Tn Intel
+performance measurement architecture.
+.It Li PMC_CPU_INTEL_P5
+.Tn Intel
+.Tn "Pentium"
+CPUs.
+.It Li PMC_CPU_INTEL_P6
+.Tn Intel
+.Tn "Pentium Pro"
+CPUs.
+.It Li PMC_CPU_INTEL_PII
+.Tn "Intel Pentium II"
+CPUs.
+.It Li PMC_CPU_INTEL_PIII
+.Tn "Intel Pentium III"
+CPUs.
+.It Li PMC_CPU_INTEL_PIV
+.Tn "Intel Pentium 4"
+CPUs.
+.It Li PMC_CPU_INTEL_PM
+.Tn "Intel Pentium M"
+CPUs.
+.El
+.Ss Supported PMCs
+PMC supported by this library are named by the
+.Vt enum pmc_class
+enumeration.
+Supported PMC kinds include:
+.Bl -tag -width "Li PMC_CLASS_IAF" -compact
+.It Li PMC_CLASS_IAF
+Fixed function hardware counters presents in CPUs conforming to the
+.Tn Intel
+performance measurement architecture version 2 and later.
+.It Li PMC_CLASS_IAP
+Programmable hardware counters present in CPUs conforming to the
+.Tn Intel
+performance measurement architecture version 1 and later.
+.It Li PMC_CLASS_K7
+Programmable hardware counters present in
+.Tn "AMD Athlon"
+CPUs.
+.It Li PMC_CLASS_K8
+Programmable hardware counters present in
+.Tn "AMD Athlon64"
+CPUs.
+.It Li PMC_CLASS_P4
+Programmable hardware counters present in
+.Tn "Intel Pentium 4"
+CPUs.
+.It Li PMC_CLASS_P5
+Programmable hardware counters present in
+.Tn Intel
+.Tn Pentium
+CPUs.
+.It Li PMC_CLASS_P6
+Programmable hardware counters present in
+.Tn Intel
+.Tn "Pentium Pro" ,
+.Tn "Pentium II" ,
+.Tn "Pentium III" ,
+.Tn "Celeron" ,
+and
+.Tn "Pentium M"
+CPUs.
+.It Li PMC_CLASS_TSC
+The timestamp counter on i386 and amd64 architecture CPUs.
+.El
+.Ss PMC Capabilities
+.Pp
+Capabilities of performance monitoring hardware are denoted using
+the
+.Vt "enum pmc_caps"
+enumeration.
+Supported capabilities include:
+.Bl -tag -width "Li PMC_CAP_INTERRUPT" -compact
+.It Li PMC_CAP_CASCADE
+The ability to cascade counters.
+.It Li PMC_CAP_EDGE
+The ability to count negated to asserted transitions of the hardware
+conditions being probed for.
+.It Li PMC_CAP_INTERRUPT
+The ability to interrupt the CPU.
+.It Li PMC_CAP_INVERT
+The ability to invert the sense of the hardware conditions being
+measured.
+.It Li PMC_CAP_PRECISE
+The ability to perform precise sampling.
+.It Li PMC_CAP_QUALIFIER
+The hardware allows monitored to be further qualified in some
+system dependent way.
+.It Li PMC_CAP_READ
+The ability to read from performance counters.
+.It Li PMC_CAP_SYSTEM
+The ability to restrict counting of hardware events to when the CPU is
+running privileged code.
+.It Li PMC_CAP_THRESHOLD
+The ability to ignore simultaneous hardware events below a
+programmable threshold.
+.It Li PMC_CAP_USER
+The ability to restrict counting of hardware events to those when the
+CPU is running unprivileged code.
+.It Li PMC_CAP_WRITE
+The ability to write to performance counters.
+.El
+.Ss CPU Naming Conventions
+CPUs are named using small integers from zero up to, but
+excluding, the value returned by function
+.Fn pmc_ncpu .
+On platforms supporting sparsely numbered CPUs not all the numbers in
+this range will denote valid CPUs.
+Operations on non-existent CPUs will return an error.
+.Ss Functional Grouping of the API
+This section contains a brief overview of the available functionality
+in the PMC library.
+Each function listed here is described further in its own manual page.
+.Bl -tag -width indent
+.It Administration
+.Bl -tag -compact
+.It Fn pmc_disable , Fn pmc_enable
+Administratively disable (enable) specific performance monitoring
+counter hardware.
+Counters that are disabled will not be available to applications to
+use.
+.El
+.It "Convenience Functions"
+.Bl -tag -compact
+.It Fn pmc_event_names_of_class
+Returns a list of event names supported by a given PMC type.
+.It Fn pmc_name_of_capability
+Convert a
+.Dv PMC_CAP_*
+flag to a human-readable string.
+.It Fn pmc_name_of_class
+Convert a
+.Dv PMC_CLASS_*
+constant to a human-readable string.
+.It Fn pmc_name_of_cputype
+Return a human-readable name for a CPU type.
+.It Fn pmc_name_of_disposition
+Return a human-readable string describing a PMC's disposition.
+.It Fn pmc_name_of_event
+Convert a numeric event code to a human-readable string.
+.It Fn pmc_name_of_mode
+Convert a
+.Dv PMC_MODE_*
+constant to a human-readable name.
+.It Fn pmc_name_of_state
+Return a human-readable string describing a PMC's current state.
+.El
+.It "Library Initialization"
+.Bl -tag -compact
+.It Fn pmc_init
+Initialize the library.
+This function must be called before any other library function.
+.El
+.It "Log File Handling"
+.Bl -tag -compact
+.It Fn pmc_configure_logfile
+Configure a log file for
+.Xr hwpmc 4
+to write logged events to.
+.It Fn pmc_flush_logfile
+Flush all pending log data in
+.Xr hwpmc 4 Ns Ap s
+buffers.
+.It Fn pmc_writelog
+Append arbitrary user data to the current log file.
+.El
+.It "PMC Management"
+.Bl -tag -compact
+.It Fn pmc_allocate , Fn pmc_release
+Allocate (free) a PMC.
+.It Fn pmc_attach , Fn pmc_detach
+Attach (detach) a process scope PMC to a target.
+.It Fn pmc_read , Fn pmc_write , Fn pmc_rw
+Read (write) a value from (to) a PMC.
+.It Fn pmc_start , Fn pmc_stop
+Start (stop) a software PMC.
+.It Fn pmc_set
+Set the reload value for a sampling PMC.
+.El
+.It "Queries"
+.Bl -tag -compact
+.It Fn pmc_capabilities
+Retrieve the capabilities for a given PMC.
+.It Fn pmc_cpuinfo
+Retrieve information about the CPUs and PMC hardware present in the
+system.
+.It Fn pmc_get_driver_stats
+Retrieve statistics maintained by
+.Xr hwpmc 4 .
+.It Fn pmc_ncpu
+Determine the greatest possible CPU number on the system.
+.It Fn pmc_npmc
+Return the number of hardware PMCs present in a given CPU.
+.It Fn pmc_pmcinfo
+Return information about the state of a given CPU's PMCs.
+.It Fn pmc_width
+Determine the width of a hardware counter in bits.
+.El
+.It "x86 Architecture Specific API"
+.Bl -tag -compact
+.It Fn pmc_get_msr
+Returns the processor model specific register number
+associated with
+.Fa pmc .
+Applications may then use the x86
+.Ic RDPMC
+instruction to directly read the contents of the PMC.
+.El
+.El
+.Ss Signal Handling Requirements
+Applications using PMCs are required to handle the following signals:
+.Bl -tag -width ".Dv SIGBUS"
+.It Dv SIGBUS
+When the
+.Xr hwpmc 4
+module is unloaded using
+.Xr kldunload 8 ,
+processes that have PMCs allocated to them will be sent a
+.Dv SIGBUS
+signal.
+.It Dv SIGIO
+The
+.Xr hwpmc 4
+driver will send a PMC owning process a
+.Dv SIGIO
+signal if:
+.Bl -bullet
+.It
+If any process-mode PMC allocated by it loses all its
+target processes.
+.It
+If the driver encounters an error when writing log data to a
+configured log file.
+This error may be retrieved by a subsequent call to
+.Fn pmc_flush_logfile .
+.El
+.El
+.Ss Typical Program Flow
+.Bl -enum
+.It
+An application would first invoke function
+.Fn pmc_init
+to allow the library to initialize itself.
+.It
+Signal handling would then be set up.
+.It
+Next the application would allocate the PMCs it desires using function
+.Fn pmc_allocate .
+.It
+Initial values for PMCs may be set using function
+.Fn pmc_set .
+.It
+If a log file is necessary for the PMCs to work, it would
+be configured using function
+.Fn pmc_configure_logfile .
+.It
+Process scope PMCs would then be attached to their target processes
+using function
+.Fn pmc_attach .
+.It
+The PMCs would then be started using function
+.Fn pmc_start .
+.It
+Once started, the values of counting PMCs may be read using function
+.Fn pmc_read .
+For PMCs that write events to the log file, this logged data would be
+read and parsed using the
+.Xr pmclog 3
+family of functions.
+.It
+PMCs are stopped using function
+.Fn pmc_stop ,
+and process scope PMCs are detached from their targets using
+function
+.Fn pmc_detach .
+.It
+Before the process exits, its may release its PMCs using function
+.Fn pmc_release .
+Any configured log file may be closed using function
+.Fn pmc_configure_logfile .
+.El
+.Sh EVENT SPECIFIERS
+Event specifiers are strings comprising of an event name, followed by
+optional parameters modifying the semantics of the hardware event
+being probed.
+Event names are PMC architecture dependent, but the PMC library defines
+machine independent aliases for commonly used events.
+.Pp
+Event specifiers spellings are case-insensitive and space characters,
+periods, underscores and hyphens are considered equivalent to each other.
+Thus the event specifiers
+.Qq "Example Event" ,
+.Qq "example-event" ,
+and
+.Qq "EXAMPLE_EVENT"
+are equivalent.
+.Ss PMC Architecture Dependent Events
+PMC architecture dependent event specifiers are described in the
+following manual pages:
+.Bl -column " PMC_CLASS_TSC " "MANUAL PAGE "
+.It Em "PMC Class"      Ta Em "Manual Page"
+.It Li PMC_CLASS_IAF    Ta Xr pmc.iaf 3
+.It Li PMC_CLASS_IAP    Ta Xr pmc.atom 3 , Xr pmc.core 3 , Xr pmc.core2 3
+.It Li PMC_CLASS_K7     Ta Xr pmc.k7 3
+.It Li PMC_CLASS_K8     Ta Xr pmc.k8 3
+.It Li PMC_CLASS_P4     Ta Xr pmc.p4 3
+.It Li PMC_CLASS_P5     Ta Xr pmc.p5 3
+.It Li PMC_CLASS_P6     Ta Xr pmc.p6 3
+.It Li PMC_CLASS_TSC    Ta Xr pmc.tsc 3
+.El
+.Ss Event Name Aliases
+Event name aliases are PMC-independent names for commonly used events.
+The following aliases are known to this version of the
+.Nm pmc
+library:
+.Bl -tag -width indent
+.It Li branches
+Measure the number of branches retired.
+.It Li branch-mispredicts
+Measure the number of retired branches that were mispredicted.
+.It Li cycles
+Measure processor cycles.
+This event is implemented using the processor's Time Stamp Counter
+register.
+.It Li dc-misses
+Measure the number of data cache misses.
+.It Li ic-misses
+Measure the number of instruction cache misses.
+.It Li instructions
+Measure the number of instructions retired.
+.It Li interrupts
+Measure the number of interrupts seen.
+.It Li unhalted-cycles
+Measure the number of cycles the processor is not in a halted
+or sleep state.
+.El
+.Sh COMPATIBILITY
+The interface between the
+.Nm pmc
+library and the
+.Xr hwpmc 4
+driver is intended to be private to the implementation and may
+change.
+In order to ease forward compatibility with future versions of the
+.Xr hwpmc 4
+driver, applications are urged to dynamically link with the
+.Nm pmc
+library.
+.Pp
+The
+.Nm pmc
+API is
+.Ud
+.Sh SEE ALSO
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.core2 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4 ,
+.Xr pmccontrol 8 ,
+.Xr pmcstat 8
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
diff --git a/lib/libpmc/pmc.atom.3 b/lib/libpmc/pmc.atom.3
new file mode 100644
index 0000000..a54d1db
--- /dev/null
+++ b/lib/libpmc/pmc.atom.3
@@ -0,0 +1,1193 @@
+.\" Copyright (c) 2008 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd November 12, 2008
+.Dt PMC.ATOM 3
+.Os
+.Sh NAME
+.Nm pmc.atom
+.Nd measurement events for
+.Tn Intel
+.Tn Atom
+family CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+.Tn Intel
+.Tn Atom
+CPUs contain PMCs conforming to version 3 of the
+.Tn Intel
+performance measurement architecture.
+These CPUs contains two classes of PMCs:
+.Bl -tag -width "Li PMC_CLASS_IAP"
+.It Li PMC_CLASS_IAF
+Fixed-function counters that count only one hardware event per counter.
+.It Li PMC_CLASS_IAP
+Programmable counters that may be configured to count one of a defined
+set of hardware events.
+.El
+.Pp
+The number of PMCs available in each class and their widths need to be
+determined at run time by calling
+.Xr pmc_cpuinfo 3 .
+.Pp
+Intel Atom PMCs are documented in
+.Rs
+.%B "IA-32 Intel(R) Architecture Software Developer's Manual"
+.%T "Volume 3: System Programming Guide"
+.%N "Order Number 253669-027US"
+.%D July 2008
+.%Q "Intel Corporation"
+.Re
+.Ss ATOM FIXED FUNCTION PMCS
+These PMCs and their supported events are documented in
+.Xr pmc.iaf 3 .
+.Ss ATOM PROGRAMMABLE PMCS
+The programmable PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_INTERRUPT Ta Yes
+.It PMC_CAP_INVERT Ta Yes
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta Yes
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta Yes
+.It PMC_CAP_USER Ta Yes
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+Event specifiers for these PMCs support the following common
+qualifiers:
+.Bl -tag -width indent
+.It Li any
+Count matching events seen on any logical processor in a package.
+.It Li cmask= Ns Ar value
+Configure the PMC to increment only if the number of configured
+events measured in a cycle is greater than or equal to
+.Ar value .
+.It Li edge
+Configure the PMC to count the number of de-asserted to asserted
+transitions of the conditions expressed by the other qualifiers.
+If specified, the counter will increment only once whenever a
+condition becomes true, irrespective of the number of clocks during
+which the condition remains true.
+.It Li inv
+Invert the sense of comparison when the
+.Dq Li cmask
+qualifier is present, making the counter increment when the number of
+events per cycle is less than the value specified by the
+.Dq Li cmask
+qualifier.
+.It Li os
+Configure the PMC to count events happening at processor privilege
+level 0.
+.It Li usr
+Configure the PMC to count events occurring at privilege levels 1, 2
+or 3.
+.El
+.Pp
+If neither of the
+.Dq Li os
+or
+.Dq Li usr
+qualifiers are specified, the default is to enable both.
+.Pp
+Events that require core-specificity to be specified use a
+additional qualifier
+.Dq Li core= Ns Ar core ,
+where argument
+.Ar core
+is one of:
+.Bl -tag -width indent
+.It Li all
+Measure event conditions on all cores.
+.It Li this
+Measure event conditions on this core.
+.El
+.Pp
+The default is
+.Dq Li this .
+.Pp
+Events that require an agent qualifier to be specified use an
+additional qualifier
+.Dq Li agent= Ns agent ,
+where argument
+.Ar agent
+is one of:
+.Bl -tag -width indent
+.It Li this
+Measure events associated with this bus agent.
+.It Li any
+Measure events caused by any bus agent.
+.El
+.Pp
+The default is
+.Dq Li this .
+.Pp
+Events that require a hardware prefetch qualifier to be specified use an
+additional qualifier
+.Dq Li prefetch= Ns Ar prefetch ,
+where argument
+.Ar prefetch
+is one of:
+.Bl -tag -width "exclude"
+.It Li both
+Include all prefetches.
+.It Li only
+Only count hardware prefetches.
+.It Li exclude
+Exclude hardware prefetches.
+.El
+.Pp
+The default is
+.Dq Li both .
+.Pp
+Events that require a cache coherence qualifier to be specified use an
+additional qualifier
+.Dq Li cachestate= Ns Ar state ,
+where argument
+.Ar state
+contains one or more of the following letters:
+.Bl -tag -width indent
+.It Li e
+Count cache lines in the exclusive state.
+.It Li i
+Count cache lines in the invalid state.
+.It Li m
+Count cache lines in the modified state.
+.It Li s
+Count cache lines in the shared state.
+.El
+.Pp
+The default is
+.Dq Li eims .
+.Pp
+Events that require a snoop response qualifier to be specified use an
+additional qualifier
+.Dq Li snoopresponse= Ns Ar response ,
+where argument
+.Ar response
+comprises of the following keywords separated by
+.Dq +
+signs:
+.Bl -tag -width indent
+.It Li clean
+Measure CLEAN responses.
+.It Li hit
+Measure HIT responses.
+.It Li hitm
+Measure HITM responses.
+.El
+.Pp
+The default is to measure all the above responses.
+.Pp
+Events that require a snoop type qualifier use an additional qualifier
+.Dq Li snooptype= Ns Ar type ,
+where argument
+.Ar type
+comprises the one of the following keywords:
+.Bl -tag -width indent
+.It Li cmp2i
+Measure CMP2I snoops.
+.It Li cmp2s
+Measure CMP2S snoops.
+.El
+.Pp
+The default is to measure both snoops.
+.Ss Event Specifiers (Programmable PMCs)
+Atom programmable PMCs support the following events:
+.Bl -tag -width indent
+.It Li BACLEARS
+.Pq Event E6H , Umask 01H
+The number of times the front end is resteered.
+.It Li BOGUS_BR
+.Pq Event E4H , Umask 00H
+The number of byte sequences mistakenly detected as taken branch
+instructions.
+.It Li BR_BAC_MISSP_EXEC
+.Pq Event 8AH , Umask 00H
+The number of branch instructions that were mispredicted when
+decoded.
+.It Li BR_CALL_MISSP_EXEC
+.Pq Event 93H , Umask 00H
+The number of mispredicted
+.Li CALL
+instructions that were executed.
+.It Li BR_CALL_EXEC
+.Pq Event 92H , Umask 00H
+The number of
+.Li CALL
+instructions executed.
+.It Li BR_CND_EXEC
+.Pq Event 8BH , Umask 00H
+The number of conditional branches executed, but not necessarily retired.
+.It Li BR_CND_MISSP_EXEC
+.Pq Event 8CH , Umask 00H
+The number of mispredicted conditional branches executed.
+.It Li BR_IND_CALL_EXEC
+.Pq Event 94H , Umask 00H
+The number of indirect
+.Li CALL
+instructions executed.
+.It Li BR_IND_EXEC
+.Pq Event 8DH , Umask 00H
+The number of indirect branch instructions executed.
+.It Li BR_IND_MISSP_EXEC
+.Pq Event 8EH , Umask 00H
+The number of mispredicted indirect branch instructions executed.
+.It Li BR_INST_DECODED
+.Pq Event E0H , Umask 01H
+The number of branch instructions decoded.
+.It Li BR_INST_EXEC
+.Pq Event 88H , Umask 00H
+The number of branches executed, but not necessarily retired.
+.It Li BR_INST_RETIRED.ANY
+.Pq Event C4H , Umask 00H
+.Pq Alias Qq "Branch Instruction Retired"
+The number of branch instructions retired.
+This is an architectural performance event.
+.It Li BR_INST_RETIRED.ANY1
+.Pq Event C4H , Umask 0FH
+The number of branch instructions retired that were mispredicted.
+.It Li BR_INST_RETIRED.MISPRED
+.Pq Event C5H , Umask 00H
+.Pq Alias Qq "Branch Misses Retired"
+The number of mispredicted branch instructions retired.
+This is an architectural performance event.
+.It Li BR_INST_RETIRED.MISPRED_NOT_TAKEN
+.Pq Event C4H , Umask 02H
+The number of not taken branch instructions retired that were
+mispredicted.
+.It Li BR_INST_RETIRED.MISPRED_TAKEN
+.Pq Event C4H , Umask 08H
+The number taken branch instructions retired that were mispredicted.
+.It Li BR_INST_RETIRED.PRED_NOT_TAKEN
+.Pq Event C4H , Umask 01H
+The number of not taken branch instructions retired that were
+correctly predicted.
+.It Li BR_INST_RETIRED.PRED_TAKEN
+.Pq Event C4H , Umask 04H
+The number of taken branch instructions retired that were correctly
+predicted.
+.It Li BR_INST_RETIRED.TAKEN
+.Pq Event C4H , Umask 0CH
+The number of taken branch instructions retired.
+.It Li BR_MISSP_EXEC
+.Pq Event 89H , Umask 00H
+The number of mispredicted branch instructions that were executed.
+.It Li BR_RET_MISSP_EXEC
+.Pq Event 90H , Umask 00H
+The number of mispredicted
+.Li RET
+instructions executed.
+.It Li BR_RET_BAC_MISSP_EXEC
+.Pq Event 91H , Umask 00H
+The number of
+.Li RET
+instructions executed that were mispredicted at decode time.
+.It Li BR_RET_EXEC
+.Pq Event 8FH , Umask 00H
+The number of
+.Li RET
+instructions executed.
+.It Li BR_TKN_BUBBLE_1
+.Pq Event 97H , Umask 00H
+The number of branch predicted taken with bubble 1.
+.It Li BR_TKN_BUBBLE_2
+.Pq Event 98H , Umask 00H
+The number of branch predicted taken with bubble 2.
+.It Li BUSQ_EMPTY Op ,core= Ns Ar core
+.Pq Event 7DH
+The number of cycles during which the core did not have any pending
+transactions in the bus queue.
+.It Li BUS_BNR_DRV Op ,agent= Ns Ar agent
+.Pq Event 61H
+The number of Bus Not Ready signals asserted on the bus.
+This event is thread-independent.
+.It Li BUS_DATA_RCV Op ,core= Ns Ar core
+.Pq Event 64H
+The number of bus cycles during which the processor is receiving data.
+This event is thread-independent.
+.It Li BUS_DRDY_CLOCKS Op ,agent= Ns Ar agent
+.Pq Event 62H
+The number of bus cycles during which the Data Ready signal is asserted
+on the bus.
+This event is thread-independent.
+.It Li BUS_HIT_DRV Op ,agent= Ns Ar agent
+.Pq Event 7AH
+The number of bus cycles during which the processor drives the
+.Li HIT#
+pin.
+This event is thread-independent.
+.It Li BUS_HITM_DRV Op ,agent= Ns Ar agent
+.Pq Event 7BH
+The number of bus cycles during which the processor drives the
+.Li HITM#
+pin.
+This event is thread-independent.
+.It Li BUS_IO_WAIT Op ,core= Ns Ar core
+.Pq Event 7FH
+The number of core cycles during which I/O requests wait in the bus
+queue.
+.It Li BUS_LOCK_CLOCKS Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 63H
+The number of bus cycles during which the
+.Li LOCK
+signal was asserted on the bus.
+This event is thread independent.
+.It Li BUS_REQUEST_OUTSTANDING Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 60H
+The number of pending full cache line read transactions on the bus
+occurring in each cycle.
+This event is thread independent.
+.It Li BUS_TRANS_P Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 6BH
+The number of partial bus transactions.
+.It Li BUS_TRANS_IFETCH Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 68H
+The number of instruction fetch full cache line bus transactions.
+.It Li BUS_TRANS_INVAL Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 69H
+The number of invalidate bus transactions.
+.It Li BUS_TRANS_PWR Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 6AH
+The number of partial write bus transactions.
+.It Li BUS_TRANS_DEF Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 6DH
+The number of deferred bus transactions.
+.It Li BUS_TRANS_BURST Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 6EH
+The number of burst transactions.
+.It Li BUS_TRANS_MEM Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 6FH
+The number of memory bus transactions.
+.It Li BUS_TRANS_ANY Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 70H
+The number of bus transactions of any kind.
+.It Li BUS_TRANS_BRD Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 65H
+The number of burst read transactions.
+.It Li BUS_TRANS_IO Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 6CH
+The number of completed I/O bus transactions due to
+.Li IN
+and
+.Li OUT
+instructions.
+.It Li BUS_TRANS_RFO Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 66H
+The number of Read For Ownership bus transactions.
+.It Li BUS_TRANS_WB Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 67H
+The number explicit write-back bus transactions due to dirty line
+evictions.
+.It Li CMP_SNOOP Xo
+.Op ,core= Ns Ar core
+.Op ,snooptype= Ns Ar snoop
+.Xc
+.Pq Event 78H
+The number of times the L1 data cache is snooped by the other core in
+the same processor.
+.It Li CPU_CLK_UNHALTED.BUS
+.Pq Event 3CH , Umask 01H
+.Pq Alias Qq "Unhalted Reference Cycles"
+The number of bus cycles when the core is not in the halt state.
+This is an architectural performance event.
+.It Li CPU_CLK_UNHALTED.CORE_P
+.Pq Event 3CH , Umask 00H
+.Pq Alias Qq "Unhalted Core Cycles"
+The number of core cycles while the core is not in a halt state.
+This is an architectural performance event.
+.It Li CPU_CLK_UNHALTED.NO_OTHER
+.Pq Event 3CH , Umask 02H
+The number of bus cycles during which the core remains unhalted and
+the other core is halted.
+.It Li CYCLES_DIV_BUSY
+.Pq Event 14H , Umask 01H
+The number of cycles the divider is busy.
+.It Li CYCLES_INT_MASKED.CYCLES_INT_MASKED
+.Pq Event C6H , Umask 01H
+The number of cycles during which interrupts are disabled.
+.It Li CYCLES_INT_MASKED.CYCLES_INT_PENDING_AND_MASKED
+.Pq Event C6H , Umask 02H
+The number of cycles during which there were pending interrupts while
+interrupts were disabled.
+.It Li CYCLES_L1I_MEM_STALLED
+.Pq Event 86H , Umask 00H
+The number of cycles for which an instruction fetch stalls.
+.It Li DATA_TLB_MISSES.DTLB_MISS
+.Pq Event 08H , Umask 07H
+The number of memory access that missed the Data TLB
+.It Li DATA_TLB_MISSES.DTLB_MISS_LD
+.Pq Event 08H , Umask 05H
+The number of loads that missed the Data TLB.
+.It Li DATA_TLB_MISSES.DTLB_MISS_ST
+.Pq Event 08H , Umask 06H
+The number of stores that missed the Data TLB.
+.It Li DATA_TLB_MISSES.UTLB_MISS_LD
+.Pq Event 08H , Umask 09H
+The number of loads that missed the UTLB.
+.It Li DELAYED_BYPASS.FP
+.Pq Event 19H , Umask 00H
+The number of floating point operations that used data immediately
+after the data was generated by a non floating point execution unit.
+.It Li DELAYED_BYPASS.LOAD
+.Pq Event 19H , Umask 01H
+The number of delayed bypass penalty cycles that a load operation incurred.
+.It Li DELAYED_BYPASS.SIMD
+.Pq Event 19H , Umask 02H
+The number of times SIMD operations use data immediately after data,
+was generated by a non-SIMD execution unit.
+.It Li DIV
+.Pq Event 13H , Umask 00H
+The number of divide operations executed.
+This event is only available on PMC1.
+.It Li DIV.AR
+.Pq Event 13H , Umask 81H
+The number of divide operations retired.
+.It Li DIV.S
+.Pq Event 13H , Umask 01H
+The number of divide operations executed.
+.It Li DTLB_MISSES.ANY
+.Pq Event 08H , Umask 01H
+The number of Data TLB misses, including misses that result from
+speculative accesses.
+.It Li DTLB_MISSES.L0_MISS_LD
+.Pq Event 08H , Umask 04H
+The number of level 0 DTLB misses due to load operations.
+.It Li DTLB_MISSES.MISS_LD
+.Pq Event 08H , Umask 02H
+The number of Data TLB misses due to load operations.
+.It Li DTLB_MISSES.MISS_ST
+.Pq Event 08H , Umask 08H
+The number of Data TLB misses due to store operations.
+.It Li EIST_TRANS
+.Pq Event 3AH , Umask 00H
+The number of Enhanced Intel SpeedStep Technology transitions.
+.It Li ESP.ADDITIONS
+.Pq Event ABH , Umask 02H
+The number of automatic additions to the
+.Li %esp
+register.
+.It Li ESP.SYNCH
+.Pq Event ABH , Umask 01H
+The number of times the
+.Li %esp
+register was explicitly used in an address expression after
+it is implicitly used by a
+.Li PUSH
+or
+.Li POP
+instruction.
+.It Li EXT_SNOOP Xo
+.Op ,agent= Ns Ar agent
+.Op ,snoopresponse= Ns Ar response
+.Xc
+.Pq Event 77H
+The number of snoop responses to bus transactions.
+.It Li FP_ASSIST
+.Pq Event 11H , Umask 01H
+The number of floating point operations executed that needed
+a microcode assist, including speculatively executed instructions.
+.It Li FP_ASSIST.AR
+.Pq Event 11H , Umask 81H
+The number of floating point operations retired that needed
+a microcode assist.
+.It Li FP_COMP_OPS_EXE
+.Pq Event 10H , Umask 00H
+The number of floating point computational micro-ops executed.
+The event is available only on PMC0.
+.It Li FP_MMX_TRANS_TO_FP
+.Pq Event CCH , Umask 02H
+The number of transitions from MMX instructions to floating point
+instructions.
+.It Li FP_MMX_TRANS_TO_MMX
+.Pq Event CCH , Umask 01H
+The number of transitions from floating point instructions to MMX
+instructions.
+.It Li HW_INT_RCV
+.Pq Event C8H , Umask 00H
+The number of hardware interrupts received.
+.It Li ICACHE.ACCESSES
+.Pq Event 80H , Umask 03H
+The number of instruction fetches.
+.It Li ICACHE.MISSES
+.Pq Event 80H , Umask 02H
+The number of instruction fetches that miss the instruction cache.
+.It Li IDLE_DURING_DIV
+.Pq Event 18H , Umask 00H
+The number of cycles the divider is busy and no other execution unit
+or load operation was in progress.
+This event is available only on PMC0.
+.It Li ILD_STALL
+.Pq Event 87H , Umask 00H
+The number of cycles the instruction length decoder stalled due to a
+length changing prefix.
+.It Li INST_QUEUE.FULL
+.Pq Event 83H , Umask 02H
+The number of cycles during which the instruction queue is full.
+.It Li INST_RETIRED.ANY_P
+.Pq Event C0H , Umask 00H
+.Pq Alias Qq "Instruction Retired"
+The number of instructions retired.
+This is an architectural performance event.
+.It Li INST_RETIRED.LOADS
+.Pq Event C0H , Umask 01H
+The number of instructions retired that contained a load operation.
+.It Li INST_RETIRED.OTHER
+.Pq Event C0H , Umask 04H
+The number of instructions retired that did not contain a load or a
+store operation.
+.It Li INST_RETIRED.STORES
+.Pq Event C0H , Umask 02H
+The number of instructions retired that contained a store operation.
+.It Li ITLB.FLUSH
+.Pq Event 82H , Umask 04H
+The number of ITLB flushes.
+.It Li ITLB.LARGE_MISS
+.Pq Event 82H , Umask 10H
+The number of instruction fetches from large pages that miss the
+ITLB.
+.It Li ITLB.MISSES
+.Pq Event 82H , Umask 02H
+The number of instruction fetches from both large and small pages that
+miss the ITLB.
+.It Li ITLB.SMALL_MISS
+.Pq Event 82H , Umask 02H
+The number of instruction fetches from small pages that miss the ITLB.
+.It Li ITLB_MISS_RETIRED
+.Pq Event C9H , Umask 00H
+The number of retired instructions that missed the ITLB when they were
+fetched.
+.It Li L1D_ALL_REF
+.Pq Event 43H , Umask 01H
+The number of references to L1 data cache counting loads and stores of
+to all memory types.
+.It Li L1D_ALL_CACHE_REF
+.Pq Event 43H , Umask 02H
+The number of data reads and writes to cacheable memory.
+.It Li L1D_CACHE_LOCK Op ,cachestate= Ns Ar state
+.Pq Event 42H
+The number of locked reads from cacheable memory.
+.It Li L1D_CACHE_LOCK_DURATION
+.Pq Event 42H , Umask 10H
+The number of cycles during which any cache line is locked by any
+locking instruction.
+.It Li L1D_CACHE.LD
+.Pq Event 40H , Umask 21H
+The number of data reads from cacheable memory.
+.It Li L1D_CACHE.ST
+.Pq Event 41H , Umask 22H
+The number of data writes to cacheable memory.
+.It Li L1D_M_EVICT
+.Pq Event 47H , Umask 00H
+The number of modified cache lines evicted from L1 data cache.
+.It Li L1D_M_REPL
+.Pq Event 46H , Umask 00H
+The number of modified lines allocated in L1 data cache.
+.It Li L1D_PEND_MISS
+.Pq Event 48H , Umask 00H
+The total number of outstanding L1 data cache misses at any clock.
+.It Li L1D_PREFETCH.REQUESTS
+.Pq Event 4EH , Umask 10H
+The number of times L1 data cache requested to prefetch a data cache
+line.
+.It Li L1D_REPL
+.Pq Event 45H , Umask 0FH
+The number of lines brought into L1 data cache.
+.It Li L1D_SPLIT.LOADS
+.Pq Event 49H , Umask 01H
+The number of load operations that span two cache lines.
+.It Li L1D_SPLIT.STORES
+.Pq Event 49H , Umask 02H
+The number of store operations that span two cache lines.
+.It Li L1I_MISSES
+.Pq Event 81H , Umask 00H
+The number of instruction fetch unit misses.
+.It Li L1I_READS
+.Pq Event 80H , Umask 00H
+The number of instruction fetches.
+.It Li L2_ADS Op ,core= Ns core
+.Pq Event 21H
+The number of cycles that the L2 address bus is in use.
+.It Li L2_DBUS_BUSY_RD Op ,core= Ns core
+.Pq Event 23H
+The number of core cycles during which the L2 data bus is busy
+transferring data to the core.
+.It Li L2_IFETCH Xo
+.Op ,cachestate= Ns Ar state
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 28H
+The number of instruction cache line requests from the instruction
+fetch unit.
+.It Li L2_LD Xo
+.Op ,cachestate= Ns Ar state
+.Op ,core= Ns Ar core
+.Op ,prefetch= Ns Ar prefetch
+.Xc
+.Pq Event 29H
+The number of L2 cache read requests from L1 cache and L2
+prefetchers.
+.It Li L2_LINES_IN Xo
+.Op ,core= Ns Ar core
+.Op ,prefetch= Ns Ar prefetch
+.Xc
+.Pq Event 24H
+The number of cache lines allocated in L2 cache.
+.It Li L2_LINES_OUT Xo
+.Op ,core= Ns Ar core
+.Op ,prefetch= Ns Ar prefetch
+.Xc
+.Pq Event 26H
+The number of L2 cache lines evicted.
+.It Li L2_LOCK Xo
+.Op ,cachestate= Ns Ar state
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 2BH
+The number of locked accesses to cache lines that miss L1 data
+cache.
+.It Li L2_M_LINES_IN Op ,core= Ns Ar core
+.Pq Event 25H
+The number of L2 cache line modifications.
+.It Li L2_M_LINES_OUT Xo
+.Op ,core= Ns Ar core
+.Op ,prefetch= Ns Ar prefetch
+.Xc
+.Pq Event 27H
+The number of modified lines evicted from L2 cache.
+.It Li L2_NO_REQ Op ,core= Ns Ar core
+.Pq Event 32H
+The number of cycles during which no L2 cache requests were pending
+from a core.
+.It Li L2_REJECT_BUSQ Xo
+.Op ,cachestate= Ns Ar state
+.Op ,core= Ns Ar core
+.Op ,prefetch= Ns Ar prefetch
+.Xc
+.Pq Event 30H
+The number of L2 cache requests that were rejected.
+.It Li L2_RQSTS Xo
+.Op ,cachestate= Ns Ar state
+.Op ,core= Ns Ar core
+.Op ,prefetch= Ns Ar prefetch
+.Xc
+.Pq Event 2EH
+The number of completed L2 cache requests.
+.It Li L2_RQSTS.SELF.DEMAND.I_STATE
+.Pq Event 2EH , Umask 41H
+.Pq Alias Qq "LLC Misses"
+The number of completed L2 cache demand requests from this core that
+missed the L2 cache.
+This is an architectural performance event.
+.It Li L2_RQSTS.SELF.DEMAND.MESI
+.Pq Event 2EH , Umask 4FH
+.Pq Alias Qq "LLC References"
+The number of completed L2 cache demand requests from this core.
+.It Li L2_ST Xo
+.Op ,cachestate= Ns Ar state
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 2AH
+The number of store operations that miss the L1 cache and request data
+from the L2 cache.
+.It Li LOAD_BLOCK.L1D
+.Pq Event 03H , Umask 20H
+The number of loads blocked by the L1 data cache.
+.It Li LOAD_BLOCK.OVERLAP_STORE
+.Pq Event 03H , Umask 08H
+The number of loads that partially overlap an earlier store or are
+aliased with a previous store.
+.It Li LOAD_BLOCK.STA
+.Pq Event 03H , Umask 02H
+The number of loads blocked by preceding stores whose address is yet
+to be calculated.
+.It Li LOAD_BLOCK.STD
+.Pq Event 03H , Umask 04H
+The number of loads blocked by preceding stores to the same address
+whose data value is not known.
+.It Li LOAD_BLOCK.UNTIL_RETIRE
+.Pq Event 03H , Umask 10H
+The number of load operations that were blocked until retirement.
+.It Li LOAD_HIT_PRE
+.Pq Event 4CH , Umask 00H
+The number of load operations that conflicted with an prefetch to the
+same cache line.
+.It Li MACHINE_CLEARS.SMC
+.Pq Event C3H , Umask 01H
+The number of times a program writes to a code section.
+.It Li MACHINE_NUKES.MEM_ORDER
+.Pq Event C3H , Umask 04H
+The number of times the execution pipeline was restarted due to a
+memory ordering conflict or memory disambiguation misprediction.
+.It Li MACRO_INSTS.ALL_DECODED
+.Pq Event AAH , Umask 03H
+The number of instructions decoded.
+.It Li MACRO_INSTS.CISC_DECODED
+.Pq Event AAH , Umask 02H
+The number of complex instructions decoded.
+.It Li MEMORY_DISAMBIGUATION.RESET
+.Pq Event 09H , Umask 01H
+The number of cycles during which memory disambiguation misprediction
+occurs.
+.It Li MEMORY_DISAMBIGUATION.SUCCESS
+.Pq Event 09H , Umask 02H
+The number of load operations that were successfully disambiguated.
+.It Li MEM_LOAD_RETIRED.DTLB_MISS
+.Pq Event CBH , Umask 04H
+The number of retired load operations that missed the DTLB.
+.It Li MEM_LOAD_RETIRED.L2_MISS
+.Pq Event CBH , Umask 02H
+The number of retired load operations that miss L2 cache.
+.It Li MEM_LOAD_RETIRED.L2_HIT
+.Pq Event CBH , Umask 01H
+The number of retired load operations that hit L2 cache.
+.It Li MEM_LOAD_RETIRED.L2_LINE_MISS
+.Pq Event CBH , Umask 08H
+The number of load operations that missed L2 cache and that caused a
+bus request.
+.It Li MUL
+.Pq Event 12H , Umask 00H
+The number of multiply operations executed.
+This event is only available on PMC1.
+.It Li MUL.AR
+.Pq Event 12H , Umask 81H
+The number of multiply operations retired.
+.It Li MUL.S
+.Pq Event 12H , Umask 01H
+The number of multiply operations executed.
+.It Li PAGE_WALKS.WALKS
+.Pq Event 0CH , Umask 03H
+The number of page walks executed due to an ITLB or DTLB miss.
+.It Li PAGE_WALKS.CYCLES
+.Pq Event 0CH , Umask 03H
+.\" XXX Clarify.  Identical event umask/event numbers.
+The number of cycles spent in a page walk caused by an ITLB or DTLB
+miss.
+.It Li PREF_RQSTS_DN
+.Pq Event F8H , Umask 00H
+The number of downward prefetches issued from the Data Prefetch Logic
+unit to L2 cache.
+.It Li PREF_RQSTS_UP
+.Pq Event F0H , Umask 00H
+The number of upward prefetches issued from the Data Prefetch Logic
+unit to L2 cache.
+.It Li PREFETCH.PREFETCHNTA
+.Pq Event 07H , Umask 08H
+The number of
+.Li PREFETCHNTA
+instructions executed.
+.It Li PREFETCH.PREFETCHT0
+.Pq Event 07H , Umask 01H
+The number of
+.Li PREFETCHT0
+instructions executed.
+.It Li PREFETCH.SW_L2
+.Pq Event 07H , Umask 06H
+The number of
+.Li PREFETCHT1
+and
+.Li PREFETCHT2
+instructions executed.
+.It Li RAT_STALLS.ANY
+.Pq Event D2H , Umask 0FH
+The number of stall cycles due to any of
+.Li RAT_STALLS.FLAGS
+.Li RAT_STALLS.FPSW ,
+.Li RAT_STALLS.PARTIAL
+and
+.Li RAT_STALLS.ROB_READ_PORT .
+.It Li RAT_STALLS.FLAGS
+.Pq Event D2H , Umask 04H
+The number of cycles execution stalled due to a flag register induced
+stall.
+.It Li RAT_STALLS.FPSW
+.Pq Event D2H , Umask 08H
+The number of times the floating point status word was written.
+.It Li RAT_STALLS.PARTIAL_CYCLES
+.Pq Event D2H , Umask 02H
+The number of cycles of added instruction execution latency due to the
+use of a register that was partially written by previous instructions.
+.It Li RAT_STALLS.ROB_READ_PORT
+.Pq Event D2H , Umask 01H
+The number of cycles when ROB read port stalls occurred.
+.It Li RESOURCE_STALLS.ANY
+.Pq Event DCH , Umask 1FH
+The number of cycles during which any resource related stall
+occurred.
+.It Li RESOURCE_STALLS.BR_MISS_CLEAR
+.Pq Event DCH , Umask 10H
+The number of cycles stalled due to branch misprediction.
+.It Li RESOURCE_STALLS.FPCW
+.Pq Event DCH , Umask 08H
+The number of cycles stalled due to writing the floating point control
+word.
+.It Li RESOURCE_STALLS.LD_ST
+.Pq Event DCH , Umask 04H
+The number of cycles during which the number of loads and stores in
+the pipeline exceeded their limits.
+.It Li RESOURCE_STALLS.ROB_FULL
+.Pq Event DCH , Umask 01H
+The number of cycles when the reorder buffer was full.
+.It Li RESOURCE_STALLS.RS_FULL
+.Pq Event DCH , Umask 02H
+The number of cycles during which the RS was full.
+.It Li RS_UOPS_DISPATCHED
+.Pq Event A0H , Umask 00H
+The number of micro-ops dispatched for execution.
+.It Li RS_UOPS_DISPATCHED.PORT0
+.Pq Event A1H , Umask 01H
+The number of cycles micro-ops were dispatched for execution on port
+0.
+.It Li RS_UOPS_DISPATCHED.PORT1
+.Pq Event A1H , Umask 02H
+The number of cycles micro-ops were dispatched for execution on port
+1.
+.It Li RS_UOPS_DISPATCHED.PORT2
+.Pq Event A1H , Umask 04H
+The number of cycles micro-ops were dispatched for execution on port
+2.
+.It Li RS_UOPS_DISPATCHED.PORT3
+.Pq Event A1H , Umask 08H
+The number of cycles micro-ops were dispatched for execution on port
+3.
+.It Li RS_UOPS_DISPATCHED.PORT4
+.Pq Event A1H , Umask 10H
+The number of cycles micro-ops were dispatched for execution on port
+4.
+.It Li RS_UOPS_DISPATCHED.PORT5
+.Pq Event A1H , Umask 20H
+The number of cycles micro-ops were dispatched for execution on port
+5.
+.It Li SB_DRAIN_CYCLES
+.Pq Event 04H , Umask 01H
+The number of cycles while the store buffer is draining.
+.It Li SEGMENT_REG_LOADS.ANY
+.Pq Event 06H , Umask 00H
+The number of segment register loads.
+.It Li SEG_REG_RENAMES.ANY
+.Pq Event D5H , Umask 0FH
+The number of times the any segment register was renamed.
+.It Li SEG_REG_RENAMES.DS
+.Pq Event D5H , Umask 02H
+The number of times the
+.Li %ds
+register is renamed.
+.It Li SEG_REG_RENAMES.ES
+.Pq Event D5H , Umask 01H
+The number of times the
+.Li %es
+register is renamed.
+.It Li SEG_REG_RENAMES.FS
+.Pq Event D5H , Umask 04H
+The number of times the
+.Li %fs
+register is renamed.
+.It Li SEG_REG_RENAMES.GS
+.Pq Event D5H , Umask 08H
+The number of times the
+.Li %gs
+register is renamed.
+.It Li SEG_RENAME_STALLS.ANY
+.Pq Event D4H , Umask 0FH
+The number of stalls due to lack of resource to rename any segment
+register.
+.It Li SEG_RENAME_STALLS.DS
+.Pq Event D4H , Umask 02H
+The number of stalls due to lack of renaming resources for the
+.Li %ds
+register.
+.It Li SEG_RENAME_STALLS.ES
+.Pq Event D4H , Umask 01H
+The number of stalls due to lack of renaming resources for the
+.Li %es
+register.
+.It Li SEG_RENAME_STALLS.FS
+.Pq Event D4H , Umask 04H
+The number of stalls due to lack of renaming resources for the
+.Li %fs
+register.
+.It Li SEG_RENAME_STALLS.GS
+.Pq Event D4H , Umask 08H
+The number of stalls due to lack of renaming resources for the
+.Li %gs
+register.
+.It Li SIMD_ASSIST
+.Pq Event CDH , Umask 00H
+The number SIMD assists invoked.
+.It Li SIMD_COMP_INST_RETIRED.PACKED_DOUBLE
+.Pq Event CAH , Umask 04H
+Then number of computational SSE2 packed double precision instructions
+retired.
+.It Li SIMD_COMP_INST_RETIRED.PACKED_SINGLE
+.Pq Event CAH , Umask 01H
+Then number of computational SSE2 packed single precision instructions
+retired.
+.It Li SIMD_COMP_INST_RETIRED.SCALAR_DOUBLE
+.Pq Event CAH , Umask 08H
+Then number of computational SSE2 scalar double precision instructions
+retired.
+.It Li SIMD_COMP_INST_RETIRED.SCALAR_SINGLE
+.Pq Event CAH , Umask 02H
+Then number of computational SSE2 scalar single precision instructions
+retired.
+.It Li SIMD_INSTR_RETIRED
+.Pq Event CEH , Umask 00H
+The number of retired SIMD instructions that use MMX registers.
+.It Li SIMD_INST_RETIRED.ANY
+.Pq Event C7H , Umask 1FH
+The number of streaming SIMD instructions retired.
+.It Li SIMD_INST_RETIRED.PACKED_DOUBLE
+.Pq Event C7H , Umask 04H
+The number of SSE2 packed double precision instructions retired.
+.It Li SIMD_INST_RETIRED.PACKED_SINGLE
+.Pq Event C7H , Umask 01H
+The number of SSE packed single precision instructions retired.
+.It Li SIMD_INST_RETIRED.SCALAR_DOUBLE
+.Pq Event C7H , Umask 08H
+The number of SSE2 scalar double precision instructions retired.
+.It Li SIMD_INST_RETIRED.SCALAR_SINGLE
+.Pq Event C7H , Umask 02H
+The number of SSE scalar single precision instructions retired.
+.It Li SIMD_INST_RETIRED.VECTOR
+.Pq Event C7H , Umask 10H
+The number of SSE2 vector instructions retired.
+.It Li SIMD_SAT_INSTR_RETIRED
+.Pq Event CFH , Umask 00H
+The number of saturated arithmetic SIMD instructions retired.
+.It Li SIMD_SAT_UOP_EXEC.AR
+.Pq Event B1H , Umask 80H
+The number of SIMD saturated arithmetic micro-ops retired.
+.It Li SIMD_SAT_UOP_EXEC.S
+.Pq Event B1H , Umask 00H
+The number of SIMD saturated arithmetic micro-ops executed.
+.It Li SIMD_UOPS_EXEC.AR
+.Pq Event B0H , Umask 80H
+The number of SIMD micro-ops retired.
+.It Li SIMD_UOPS_EXEC.S
+.Pq Event B0H , Umask 00H
+The number of SIMD micro-ops executed.
+.It Li SIMD_UOP_TYPE_EXEC.ARITHMETIC.AR
+.Pq Event B3H , Umask A0H
+The number of SIMD packed arithmetic micro-ops executed.
+.It Li SIMD_UOP_TYPE_EXEC.ARITHMETIC.S
+.Pq Event B3H , Umask 20H
+The number of SIMD packed arithmetic micro-ops executed.
+.It Li SIMD_UOP_TYPE_EXEC.LOGICAL.AR
+.Pq Event B3H , Umask 90H
+The number of SIMD packed logical micro-ops executed.
+.It Li SIMD_UOP_TYPE_EXEC.LOGICAL.S
+.Pq Event B3H , Umask 10H
+The number of SIMD packed logical micro-ops executed.
+.It Li SIMD_UOP_TYPE_EXEC.MUL.AR
+.Pq Event B3H , Umask 81H
+The number of SIMD packed multiply micro-ops retired.
+.It Li SIMD_UOP_TYPE_EXEC.MUL.S
+.Pq Event B3H , Umask 01H
+The number of SIMD packed multiply micro-ops executed.
+.It Li SIMD_UOP_TYPE_EXEC.PACK.AR
+.Pq Event B3H , Umask 84H
+The number of SIMD pack micro-ops retired.
+.It Li SIMD_UOP_TYPE_EXEC.PACK.S
+.Pq Event B3H , Umask 04H
+The number of SIMD pack micro-ops executed.
+.It Li SIMD_UOP_TYPE_EXEC.SHIFT.AR
+.Pq Event B3H , Umask 82H
+The number of SIMD packed shift micro-ops retired.
+.It Li SIMD_UOP_TYPE_EXEC.SHIFT.S
+.Pq Event B3H , Umask 02H
+The number of SIMD packed shift micro-ops executed.
+.It Li SIMD_UOP_TYPE_EXEC.UNPACK.AR
+.Pq Event B3H , Umask 88H
+The number of SIMD unpack micro-ops executed.
+.It Li SIMD_UOP_TYPE_EXEC.UNPACK.S
+.Pq Event B3H , Umask 08H
+The number of SIMD unpack micro-ops executed.
+.It Li SNOOP_STALL_DRV Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 7EH
+The number of times the bus stalled for snoops.
+This event is thread-independent.
+.It Li SSE_PRE_EXEC.L2
+.Pq Event 07H , Umask 02H
+The number of
+.Li PREFETCHT1
+instructions executed.
+.It Li SSE_PRE_EXEC.STORES
+.Pq Event 07H , Umask 03H
+The number of times SSE non-temporal store instructions were executed.
+.It Li SSE_PRE_MISS.L1
+.Pq Event 4BH , Umask 01H
+The number of times the
+.Li PREFETCHT0
+instruction executed and missed all cache levels.
+.It Li SSE_PRE_MISS.L2
+.Pq Event 4BH , Umask 02H
+The number of times the
+.Li PREFETCHT1
+instruction executed and missed all cache levels.
+.It Li SSE_PRE_MISS.NTA
+.Pq Event 4BH , Umask 00H
+The number of times the
+.Li PREFETCHNTA
+instruction executed and missed all cache levels.
+.It Li STORE_BLOCK.ORDER
+.Pq Event 04H , Umask 02H
+The number of cycles while a store was waiting for another store to be
+globally observed.
+.It Li STORE_BLOCK.SNOOP
+.Pq Event 04H , Umask 08H
+The number of cycles while a store was blocked due to a conflict with
+an internal or external snoop.
+.It Li STORE_FORWARDS.GOOD
+.Pq Event 02H , Umask 81H
+The number of times stored data was forwarded directly to a load.
+.It Li THERMAL_TRIP
+.Pq Event 3BH , Umask C0H
+The number of thermal trips.
+.It Li UOPS_RETIRED.LD_IND_BR
+.Pq Event C2H , Umask 01H
+The number of micro-ops retired that fused a load with another
+operation.
+.It Li UOPS_RETIRED.STD_STA
+.Pq Event C2H , Umask 02H
+The number of store address calculations that fused into one micro-op.
+.It Li UOPS_RETIRED.MACRO_FUSION
+.Pq Event C2H , Umask 04H
+The number of times retired instruction pairs were fused into one
+micro-op.
+.It Li UOPS_RETIRED.FUSED
+.Pq Event C2H , Umask 07H
+The number of fused micro-ops retired.
+.It Li UOPS_RETIRED.NON_FUSED
+.Pq Event C2H , Umask 8H
+The number of non-fused micro-ops retired.
+.It Li UOPS_RETIRED.ANY
+.Pq Event C2H , Umask 10H
+The number of micro-ops retired.
+.It Li X87_COMP_OPS_EXE.ANY.AR
+.Pq Event 10H , Umask 81H
+The number of x87 floating-point computational micro-ops retired.
+.It Li X87_COMP_OPS_EXE.ANY.S
+.Pq Event 10H , Umask 01H
+The number of x87 floating-point computational micro-ops executed.
+.It Li X87_OPS_RETIRED.ANY
+.Pq Event C1H , Umask FEH
+The number of floating point computational instructions retired.
+.It Li X87_OPS_RETIRED.FXCH
+.Pq Event C1H , Umask 01H
+The number of
+.Li FXCH
+instructions retired.
+.El
+.Ss Event Name Aliases
+The following table shows the mapping between the PMC-independent
+aliases supported by
+.Lb libpmc
+and the underlying hardware events used on these CPUs.
+.Bl -column "branch-mispredicts" "cpu_clk_unhalted.core_p" "PMC Class"
+.It Em Alias Ta Em Event Ta Em PMC Class
+.It Li branches Ta Li BR_INST_RETIRED.ANY Ta Li PMC_CLASS_IAP
+.It Li branch-mispredicts Ta Li BR_INST_RETIRED.MISPRED Ta Li PMC_CLASS_IAP
+.It Li ic-misses Ta Li ICACHE.MISSES Ta Li PMC_CLASS_IAP
+.It Li instructions Ta Li INST_RETIRED.ANY_P Ta Li PMC_CLASS_IAF
+.It Li interrupts Ta Li HW_INT_RCV Ta Li PMC_CLASS_IAP
+.It Li unhalted-cycles Ta Li CPU_CLK_UNHALTED.CORE_P Ta Li PMC_CLASS_IAF
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.core2 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
diff --git a/lib/libpmc/pmc.core.3 b/lib/libpmc/pmc.core.3
new file mode 100644
index 0000000..d32e62a
--- /dev/null
+++ b/lib/libpmc/pmc.core.3
@@ -0,0 +1,808 @@
+.\" Copyright (c) 2008 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd November 12, 2008
+.Dt PMC.CORE 3
+.Os
+.Sh NAME
+.Nm pmc.core
+.Nd measurement events for
+.Tn Intel
+.Tn Core Solo
+and
+.Tn Core Duo
+family CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+.Tn Intel
+.Tn "Core Solo"
+and
+.Tn "Core Duo"
+CPUs contain PMCs conforming to version 1 of the
+.Tn Intel
+performance measurement architecture.
+.Pp
+These PMCs are documented in
+.Rs
+.%B IA-32 Intel\(rg Architecture Software Developer's Manual
+.%T Volume 3: System Programming Guide
+.%N Order Number 253669-027US
+.%D July 2008
+.%Q Intel Corporation
+.Re
+.Ss PMC Features
+CPUs conforming to version 1 of the
+.Tn Intel
+performance measurement architecture contain two programmable PMCs of
+class
+.Li PMC_CLASS_IAP .
+The PMCs are 40 bits width and offer the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_INTERRUPT Ta Yes
+.It PMC_CAP_INVERT Ta Yes
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta Yes
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta Yes
+.It PMC_CAP_USER Ta Yes
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+Event specifiers for these PMCs support the following common
+qualifiers:
+.Bl -tag -width indent
+.It Li cmask= Ns Ar value
+Configure the PMC to increment only if the number of configured
+events measured in a cycle is greater than or equal to
+.Ar value .
+.It Li edge
+Configure the PMC to count the number of de-asserted to asserted
+transitions of the conditions expressed by the other qualifiers.
+If specified, the counter will increment only once whenever a
+condition becomes true, irrespective of the number of clocks during
+which the condition remains true.
+.It Li inv
+Invert the sense of comparison when the
+.Dq Li cmask
+qualifier is present, making the counter increment when the number of
+events per cycle is less than the value specified by the
+.Dq Li cmask
+qualifier.
+.It Li os
+Configure the PMC to count events happening at processor privilege
+level 0.
+.It Li usr
+Configure the PMC to count events occurring at privilege levels 1, 2
+or 3.
+.El
+.Pp
+If neither of the
+.Dq Li os
+or
+.Dq Li usr
+qualifiers are specified, the default is to enable both.
+.Pp
+Events that require core-specificity to be specified use a
+additional qualifier
+.Dq Li core= Ns Ar value ,
+where argument
+.Ar value
+is one of:
+.Bl -tag -width indent -compact
+.It Li all
+Measure event conditions on all cores.
+.It Li this
+Measure event conditions on this core.
+.El
+The default is
+.Dq Li this .
+.Pp
+Events that require an agent qualifier to be specified use an
+additional qualifier
+.Dq Li agent= Ns value ,
+where argument
+.Ar value
+is one of:
+.Bl -tag -width indent -compact
+.It Li this
+Measure events associated with this bus agent.
+.It Li any
+Measure events caused by any bus agent.
+.El
+The default is
+.Dq Li this .
+.Pp
+Events that require a hardware prefetch qualifier to be specified use an
+additional qualifier
+.Dq Li prefetch= Ns Ar value ,
+where argument
+.Ar value
+is one of:
+.Bl -tag -width "exclude" -compact
+.It Li both
+Include all prefetches.
+.It Li only
+Only count hardware prefetches.
+.It Li exclude
+Exclude hardware prefetches.
+.El
+The default is
+.Dq Li both .
+.Pp
+Events that require a cache coherence qualifier to be specified use an
+additional qualifier
+.Dq Li cachestate= Ns Ar value ,
+where argument
+.Ar value
+contains one or more of the following letters:
+.Bl -tag -width indent -compact
+.It Li e
+Count cache lines in the exclusive state.
+.It Li i
+Count cache lines in the invalid state.
+.It Li m
+Count cache lines in the modified state.
+.It Li s
+Count cache lines in the shared state.
+.El
+The default is
+.Dq Li eims .
+.Ss Event Specifiers
+The following event names are case insensitive.
+Whitespace, hyphens and underscore characters in these names are
+ignored.
+.Pp 
+Core PMCs support the following events:
+.Bl -tag -width indent
+.It Li BAClears
+.Pq Event E6H , Umask 00H
+The number of BAClear conditions asserted.
+.It Li BTB_Misses
+.Pq Event E2H , Umask 00H
+The number of branches for which the branch table buffer did not
+produce a prediction.
+.It Li Br_BAC_Missp_Exec
+.Pq Event 8AH , Umask 00H
+The number of branch instructions executed that were mispredicted at
+the front end. 
+.It Li Br_Bogus
+.Pq Event E4H , Umask 00H
+The number of bogus branches.
+.It Li Br_Call_Exec
+.Pq Event 92H , Umask 00H
+The number of
+.Li CALL
+instructions executed.
+.It Li Br_Call_Missp_Exec
+.Pq Event 93H , Umask 00H
+The number of
+.Li CALL
+instructions executed that were mispredicted.
+.It Li Br_Cnd_Exec
+.Pq Event 8BH , Umask 00H
+The number of conditional branch instructions executed.
+.It Li Br_Cnd_Missp_Exec
+.Pq Event 8CH , Umask 00H
+The number of conditional branch instructions executed that were mispredicted.
+.It Li Br_Ind_Call_Exec
+.Pq Event 94H , Umask 00H
+The number of indirect
+.Li CALL
+instructions executed.
+.It Li Br_Ind_Exec
+.Pq Event 8DH , Umask 00H
+The number of indirect branches executed.
+.It Li Br_Ind_Missp_Exec
+.Pq Event 8EH , Umask 00H
+The number of indirect branch instructions executed that were mispredicted.
+.It Li Br_Inst_Exec
+.Pq Event 88H , Umask 00H
+The number of branch instructions executed including speculative branches.
+.It Li Br_Instr_Decoded
+.Pq Event E0H , Umask 00H
+The number of branch instructions decoded.
+.It Li Br_Instr_Ret
+.Pq Event C4H , Umask 00H
+.Pq Alias Qq "Branch Instruction Retired"
+The number of branch instructions retired.
+This is an architectural performance event.
+.It Li Br_MisPred_Ret
+.Pq Event C5H , Umask 00H
+.Pq Alias Qq "Branch Misses Retired"
+The number of mispredicted branch instructions retired.
+This is an architectural performance event.
+.It Li Br_MisPred_Taken_Ret
+.Pq Event CAH , Umask 00H
+The number of taken and mispredicted branches retired.
+.It Li Br_Missp_Exec
+.Pq Event 89H , Umask 00H
+The number of branch instructions executed and mispredicted at
+execution including branches that were not predicted.
+.It Li Br_Ret_BAC_Missp_Exec
+.Pq Event 91H , Umask 00H
+The number of return branch instructions that were mispredicted at the
+front end.
+.It Li Br_Ret_Exec
+.Pq Event 8FH , Umask 00H
+The number of return branch instructions executed.
+.It Li Br_Ret_Missp_Exec
+.Pq Event 90H , Umask 00H
+The number of return branch instructions executed that were mispredicted.
+.It Li Br_Taken_Ret
+.Pq Event C9H , Umask 00H
+The number of taken branches retired.
+.It Li Bus_BNR_Clocks
+.Pq Event 61H , Umask 00H
+The number of external bus cycles while BNR (bus not ready) was asserted.
+.It Li Bus_DRDY_Clocks Op ,agent= Ns Ar agent
+.Pq Event 62H , Umask 00H
+The number of external bus cycles while DRDY was asserted.
+.It Li Bus_Data_Rcv
+.Pq Event 64H , Umask 40H
+.\" XXX Using the description in Core2 PMC documentation.
+The number of cycles during which the processor is busy receiving data.
+.It Li Bus_Locks_Clocks Op ,core= Ns Ar core
+.Pq Event 63H
+The number of external bus cycles while the bus lock signal was asserted.
+.It Li Bus_Not_In_Use Op ,core= Ns Ar core
+.Pq Event 7DH
+The number of cycles when there is no transaction from the core.
+.It Li Bus_Req_Outstanding Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 60H
+The weighted cycles of cacheable bus data read requests
+from the data cache unit or hardware prefetcher.
+.It Li Bus_Snoop_Stall
+.Pq Event 7EH , Umask 00H
+The number bus cycles while a bus snoop is stalled.
+.It Li Bus_Snoops Xo
+.Op ,agent= Ns Ar agent
+.Op ,cachestate= Ns Ar mesi
+.Xc
+.Pq Event 77H
+.\" XXX Using the description in Core2 PMC documentation.
+The number of snoop responses to bus transactions.
+.It Li Bus_Trans_Any Op ,agent= Ns Ar agent
+.Pq Event 70H
+The number of completed bus transactions.
+.It Li Bus_Trans_Brd Op ,core= Ns Ar core
+.Pq Event 65H
+The number of read bus transactions.
+.It Li Bus_Trans_Burst Op ,agent= Ns Ar agent
+.Pq Event 6EH
+The number of completed burst transactions.
+Retried transactions may be counted more than once.
+.It Li Bus_Trans_Def Op ,core= Ns Ar core
+.Pq Event 6DH
+The number of completed deferred transactions.
+.It Li Bus_Trans_IO Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 6CH
+The number of completed I/O transactions counting both reads and
+writes.
+.It Li Bus_Trans_Ifetch Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 68H
+Completed instruction fetch transactions.
+.It Li Bus_Trans_Inval Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 69H
+The number completed invalidate transactions.
+.It Li Bus_Trans_Mem Op ,agent= Ns Ar agent
+.Pq Event 6FH
+The number of completed memory transactions.
+.It Li Bus_Trans_P Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 6BH
+The number of completed partial transactions.
+.It Li Bus_Trans_Pwr Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 6AH
+The number of completed partial write transactions.
+.It Li Bus_Trans_RFO Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 66H
+The number of completed read-for-ownership transactions.
+.It Li Bus_Trans_WB Op ,agent= Ns Ar agent
+.Pq Event 67H
+The number of completed write-back transactions from the data cache
+unit, excluding L2 write-backs.
+.It Li Cycles_Div_Busy
+.Pq Event 14H , Umask 00H
+The number of cycles the divider is busy.
+The event is only available on PMC0.
+.It Li Cycles_Int_Masked
+.Pq Event C6H , Umask 00H
+The number of cycles while interrupts were disabled.
+.It Li Cycles_Int_Pending_Masked
+.Pq Event C7H , Umask 00H
+The number of cycles while interrupts were disabled and interrupts
+were pending.
+.It Li DCU_Snoop_To_Share Op ,core= Ns core
+.Pq Event 78H
+The number of data cache unit snoops to L1 cache lines in the shared
+state.
+.It Li DCache_Cache_Lock Op ,cachestate= Ns Ar mesi
+.\" XXX needs clarification
+.Pq Event 42H
+The number of cacheable locked read operations to invalid state.
+.It Li DCache_Cache_LD Op ,cachestate= Ns Ar mesi
+.Pq Event 40H
+The number of cacheable L1 data read operations.
+.It Li DCache_Cache_ST Op ,cachestate= Ns Ar mesi
+.Pq Event 41H
+The number cacheable L1 data write operations.
+.It Li DCache_M_Evict
+.Pq Event 47H , Umask 00H
+The number of M state data cache lines that were evicted.
+.It Li DCache_M_Repl
+.Pq Event 46H , Umask 00H
+The number of M state data cache lines that were allocated.
+.It Li DCache_Pend_Miss
+.Pq Event 48H , Umask 00H
+The weighted cycles an L1 miss was outstanding.
+.It Li DCache_Repl
+.Pq Event 45H , Umask 0FH
+The number of data cache line replacements.
+.It Li Data_Mem_Cache_Ref
+.Pq Event 44H , Umask 02H
+The number of cacheable read and write operations to L1 data cache.
+.It Li Data_Mem_Ref
+.Pq Event 43H , Umask 01H
+The number of L1 data reads and writes, both cacheable and
+un-cacheable.
+.It Li Dbus_Busy Op ,core= Ns Ar core
+.Pq Event 22H
+The number of core cycles during which the data bus was busy.
+.It Li Dbus_Busy_Rd Op ,core= Ns Ar core
+.Pq Event 23H
+The number of cycles during which the data bus was busy transferring
+data to a core.
+.It Li Div
+.Pq Event 13H , Umask 00H
+The number of divide operations including speculative operations for
+integer and floating point divides.
+This event can only be counted on PMC1.
+.It Li Dtlb_Miss
+.Pq Event 49H , Umask 00H
+The number of data references that missed the TLB.
+.It Li ESP_Uops
+.Pq Event D7H , Umask 00H
+The number of ESP folding instructions decoded.
+.It Li EST_Trans Op ,trans= Ns Ar transition
+.Pq Event 3AH
+Count the number of Intel Enhanced SpeedStep transitions.
+The argument
+.Ar transition
+can be one of the following values:
+.Bl -tag -width indent -compact
+.It Li any
+(Umask 00H) Count all transitions.
+.It Li frequency
+(Umask 01H) Count frequency transitions.
+.El
+The default is
+.Dq Li any .
+.It Li FP_Assist
+.Pq Event 11H , Umask 00H
+The number of floating point operations that required microcode
+assists.
+The event is only available on PMC1.
+.It Li FP_Comp_Instr_Ret
+.Pq Event C1H , Umask 00H
+The number of X87 floating point compute instructions retired.
+The event is only available on PMC0.
+.It Li FP_Comps_Op_Exe
+.Pq Event 10H , Umask 00H
+The number of floating point computational instructions executed.
+.It Li FP_MMX_Trans
+.Pq Event CCH , Umask 01H
+The number of transitions from X87 to MMX.
+.It Li Fused_Ld_Uops_Ret
+.Pq Event DAH , Umask 01H
+The number of fused load uops retired.
+.It Li Fused_St_Uops_Ret
+.Pq Event DAH , Umask 02H
+The number of fused store uops retired.
+.It Li Fused_Uops_Ret
+.Pq Event DAH , Umask 00H
+The number of fused uops retired.
+.It Li HW_Int_Rx
+.Pq Event C8H , Umask 00H
+The number of hardware interrupts received.
+.It Li ICache_Misses
+.Pq Event 81H , Umask 00H
+The number of instruction fetch misses in the instruction cache and
+streaming buffers.
+.It Li ICache_Reads
+.Pq Event 80H , Umask 00H
+The number of instruction fetches from the the instruction cache and
+streaming buffers counting both cacheable and un-cacheable fetches.
+.It Li IFU_Mem_Stall
+.Pq Event 86H , Umask 00H
+The number of cycles the instruction fetch unit was stalled while
+waiting for data from memory.
+.It Li ILD_Stall
+.Pq Event 87H , Umask 00H
+The number of instruction length decoder stalls.
+.It Li ITLB_Misses
+.Pq Event 85H , Umask 00H
+The number of instruction TLB misses.
+.It Li Instr_Decoded
+.Pq Event D0H , Umask 00H
+The number of instructions decoded.
+.It Li Instr_Ret
+.Pq Event C0H , Umask 00H
+.Pq Alias Qq "Instruction Retired"
+The number of instructions retired.
+This is an architectural performance event.
+.It Li L1_Pref_Req
+.Pq Event 4FH , Umask 00H
+The number of L1 prefetch request due to data cache misses.
+.It Li L2_ADS Op ,core= Ns core
+.Pq Event 21H
+The number of L2 address strobes.
+.It Li L2_IFetch Xo
+.Op ,cachestate= Ns Ar mesi
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 28H
+The number of instruction fetches by the instruction fetch unit from
+L2 cache including speculative fetches.
+.It Li L2_LD Xo
+.Op ,cachestate= Ns Ar mesi
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 29H
+The number of L2 cache reads.
+.It Li L2_Lines_In Xo
+.Op ,core= Ns Ar core
+.Op ,prefetch= Ns Ar prefetch
+.Xc
+.Pq Event 24H
+The number of L2 cache lines allocated.
+.It Li L2_Lines_Out Xo
+.Op ,core= Ns Ar core
+.Op ,prefetch= Ns Ar prefetch
+.Xc
+.Pq Event 26H
+The number of L2 cache lines evicted.
+.It Li L2_M_Lines_In Op ,core= Ns Ar core
+.Pq Event 25H
+The number of L2 M state cache lines allocated.
+.It Li L2_M_Lines_Out Xo
+.Op ,core= Ns Ar core
+.Op ,prefetch= Ns Ar prefetch
+.Xc
+.Pq Event 27H
+The number of L2 M state cache lines evicted.
+.It Li L2_No_Request_Cycles Xo
+.Op ,cachestate= Ns Ar mesi
+.Op ,core= Ns Ar core
+.Op ,prefetch= Ns Ar prefetch
+.Xc
+.Pq Event 32H
+The number of cycles there was no request to access L2 cache.
+.It Li L2_Reject_Cycles Xo
+.Op ,cachestate= Ns Ar mesi
+.Op ,core= Ns Ar core
+.Op ,prefetch= Ns Ar prefetch
+.Xc
+.Pq Event 30H
+The number of cycles the L2 cache was busy and rejecting new requests.
+.It Li L2_Rqsts Xo
+.Op ,cachestate= Ns Ar mesi
+.Op ,core= Ns Ar core
+.Op ,prefetch= Ns Ar prefetch
+.Xc
+.Pq Event 2EH
+The number of L2 cache requests.
+.It Li L2_ST Xo
+.Op ,cachestate= Ns Ar mesi
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 2AH
+The number of L2 cache writes including speculative writes.
+.It Li LD_Blocks
+.Pq Event 03H , Umask 00H
+The number of load operations delayed due to store buffer blocks.
+.It Li LLC_Misses
+.Pq Event 2EH , Umask 41H
+The number of cache misses for references to the last level cache,
+excluding misses due to hardware prefetches.
+This is an architectural performance event.
+.It Li LLC_Reference
+The number of references to the last level cache,
+excluding those due to hardware prefetches.
+This is an architectural performance event.
+.Pq Event 2EH , Umask 4FH
+This is an architectural performance event.
+.It Li MMX_Assist
+.Pq Event CDH , Umask 00H
+The number of EMMX instructions executed.
+.It Li MMX_FP_Trans
+.Pq Event CCH , Umask 00H
+The number of transitions from MMX to X87.
+.It Li MMX_Instr_Exec
+.Pq Event B0H , Umask 00H
+The number of MMX instructions executed excluding
+.Li MOVQ
+and
+.Li MOVD
+stores.
+.It Li MMX_Instr_Ret
+.Pq Event CEH , Umask 00H
+The number of MMX instructions retired.
+.It Li Misalign_Mem_Ref
+.Pq Event 05H , Umask 00H
+The number of misaligned data memory references, counting loads and
+stores.
+.It Li Mul
+.Pq Event 12H , Umask 00H
+The number of multiply operations include speculative floating point
+and integer multiplies.
+This event is available on PMC1 only.
+.It Li NonHlt_Ref_Cycles
+.Pq Event 3CH , Umask 01H
+.Pq Alias Qq "Unhalted Reference Cycles"
+The number of non-halted bus cycles.
+This is an architectural performance event.
+.It Li Pref_Rqsts_Dn
+.Pq Event F8H , Umask 00H
+The number of hardware prefetch requests issued in backward streams.
+.It Li Pref_Rqsts_Up
+.Pq Event F0H , Umask 00H
+The number of hardware prefetch requests issued in forward streams.
+.It Li Resource_Stall
+.Pq Event A2H , Umask 00H
+The number of cycles where there is a resource related stall.
+.It Li SD_Drains
+.Pq Event 04H , Umask 00H
+The number of cycles while draining store buffers.
+.It Li SIMD_FP_DP_P_Ret
+.Pq Event D8H , Umask 02H
+The number of SSE/SSE2 packed double precision instructions retired.
+.It Li SIMD_FP_DP_P_Comp_Ret
+.Pq Event D9H , Umask 02H
+The number of SSE/SSE2 packed double precision compute instructions
+retired.
+.It Li SIMD_FP_DP_S_Ret
+.Pq Event D8H , Umask 03H
+The number of SSE/SSE2 scalar double precision instructions retired.
+.It Li SIMD_FP_DP_S_Comp_Ret
+.Pq Event D9H , Umask 03H
+The number of SSE/SSE2 scalar double precision compute instructions
+retired.
+.It Li SIMD_FP_SP_P_Comp_Ret
+.Pq Event D9H , Umask 00H
+The number of SSE/SSE2 packed single precision compute instructions
+retired.
+.It Li SIMD_FP_SP_Ret
+.Pq Event D8H , Umask 00H
+The number of SSE/SSE2 scalar single precision instructions retired,
+both packed and scalar.
+.It Li SIMD_FP_SP_S_Ret
+.Pq Event D8H , Umask 01H
+The number of SSE/SSE2 scalar single precision instructions retired.
+.It Li SIMD_FP_SP_S_Comp_Ret
+.Pq Event D9H , Umask 01H
+The number of SSE/SSE2 single precision compute instructions retired.
+.It Li SIMD_Int_128_Ret
+.Pq Event D8H , Umask 04H
+The number of SSE2 128-bit integer instructions retired.
+.It Li SIMD_Int_Pari_Exec
+.Pq Event B3H , Umask 20H
+The number of SIMD integer packed arithmetic instructions executed.
+.It Li SIMD_Int_Pck_Exec
+.Pq Event B3H , Umask 04H
+The number of SIMD integer pack operations instructions executed.
+.It Li SIMD_Int_Plog_Exec
+.Pq Event B3H , Umask 10H
+The number of SIMD integer packed logical instructions executed.
+.It Li SIMD_Int_Pmul_Exec
+.Pq Event B3H , Umask 01H
+The number of SIMD integer packed multiply instructions executed.
+.It Li SIMD_Int_Psft_Exec
+.Pq Event B3H , Umask 02H
+The number of SIMD integer packed shift instructions executed.
+.It Li SIMD_Int_Sat_Exec
+.Pq Event B1H , Umask 00H
+The number of SIMD integer saturating instructions executed.
+.It Li SIMD_Int_Upck_Exec
+.Pq Event B3H , Umask 08H
+The number of SIMD integer unpack instructions executed.
+.It Li SMC_Detected
+.Pq Event C3H , Umask 00H
+The number of times self-modifying code was detected.
+.It Li SSE_NTStores_Miss
+.Pq Event 4BH , Umask 03H
+The number of times an SSE streaming store instruction missed all caches.
+.It Li SSE_NTStores_Ret
+.Pq Event 07H , Umask 03H
+The number of SSE streaming store instructions executed.
+.It Li SSE_PrefNta_Miss
+.Pq Event 4BH , Umask 00H
+The number of times
+.Li PREFETCHNTA
+missed all caches.
+.It Li SSE_PrefNta_Ret
+.Pq Event 07H , Umask 00H
+The number of
+.Li PREFETCHNTA
+instructions retired.
+.It Li SSE_PrefT1_Miss
+.Pq Event 4BH , Umask 01H
+The number of times
+.Li PREFETCHT1
+missed all caches.
+.It Li SSE_PrefT1_Ret
+.Pq Event 07H , Umask 01H
+The number of
+.Li PREFETCHT1
+instructions retired.
+.It Li SSE_PrefT2_Miss
+.Pq Event 4BH , Umask 02H
+The number of times
+.Li PREFETCHNT2
+missed all caches.
+.It Li SSE_PrefT2_Ret
+.Pq Event 07H , Umask 02H
+The number of
+.Li PREFETCHT2
+instructions retired.
+.It Li Seg_Reg_Loads
+.Pq Event 06H , Umask 00H
+The number of segment register loads.
+.It Li Serial_Execution_Cycles
+.Pq Event 3CH , Umask 02H
+The number of non-halted bus cycles of this code while the other core
+was halted.
+.It Li Thermal_Trip
+.Pq Event 3BH , Umask C0H
+The duration in a thermal trip based on the current core clock.
+.It Li Unfusion
+.Pq Event DBH , Umask 00H
+The number of unfusion events.
+.It Li Unhalted_Core_Cycles
+.Pq Event 3CH , Umask 00H
+The number of core clock cycles when the clock signal on a specific
+core is not halted.
+This is an architectural performance event.
+.It Li Uops_Ret
+.Pq Event C2H , Umask 00H
+The number of micro-ops retired.
+.El
+.Ss Event Name Aliases
+The following table shows the mapping between the PMC-independent
+aliases supported by
+.Lb libpmc
+and the underlying hardware events used.
+.Bl -column "branch-mispredicts" "Description"
+.It Em Alias Ta Em Event
+.It Li branches Ta Li Br_Instr_Ret
+.It Li branch-mispredicts Ta Li Br_MisPred_Ret
+.It Li dc-misses Ta (unsupported)
+.It Li ic-misses Ta Li ICache_Misses
+.It Li instructions Ta Li Instr_Ret
+.It Li interrupts Ta Li HW_Int_Rx
+.It Li unhalted-cycles Ta (unsupported)
+.El
+.Sh PROCESSOR ERRATA
+The following errata affect performance measurement on these
+processors.
+These errata are documented in
+.Rs
+.%B Specification Update
+.%T Intel\(rg CoreTM Duo Processor and Intel\(rg CoreTM Solo Processor on 65 nm Process
+.%N Order Number 309222-017
+.%D July 2008
+.%Q Intel Corporation
+.Re
+.Bl -tag -width indent -compact
+.It AE19
+Data prefetch performance monitoring events can only be enabled
+on a single core.
+.It AE25
+Performance monitoring counters that count external bus events
+may report incorrect values after processor power state transitions.
+.It AE28
+Performance monitoring events for retired floating point operations
+(C1H) may not be accurate.
+.It AE29
+DR3 address match on MOVD/MOVQ/MOVNTQ memory store
+instruction may incorrectly increment performance monitoring count
+for saturating SIMD instructions retired (Event CFH).
+.It AE33
+Hardware prefetch performance monitoring events may be counted
+inaccurately.
+.It AE36
+The
+.Li CPU_CLK_UNHALTED
+performance monitoring event (Event 3CH) counts
+clocks when the processor is in the C1/C2 processor power states.
+.It AE39
+Certain performance monitoring counters related to bus, L2 cache
+and power management are inaccurate.
+.It AE51
+Performance monitoring events for retired instructions (Event C0H) may
+not be accurate.
+.It AE67
+Performance monitoring event
+.Li FP_ASSIST
+may not be accurate.
+.It AE78
+Performance monitoring event for hardware prefetch requests (Event
+4EH) and hardware prefetch request cache misses (Event 4FH) may not be
+accurate.
+.It AE82
+Performance monitoring event
+.Li FP_MMX_TRANS_TO_MMX
+may not count some transitions.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core2 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
diff --git a/lib/libpmc/pmc.core2.3 b/lib/libpmc/pmc.core2.3
new file mode 100644
index 0000000..3dbc0c8
--- /dev/null
+++ b/lib/libpmc/pmc.core2.3
@@ -0,0 +1,1124 @@
+.\" Copyright (c) 2008,2009 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd June 8, 2009
+.Dt PMC.CORE2 3
+.Os
+.Sh NAME
+.Nm pmc.core2
+.Nd measurement events for
+.Tn Intel
+.Tn Core2
+family CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+.Tn Intel
+.Tn "Core2"
+CPUs contain PMCs conforming to version 2 of the
+.Tn Intel
+performance measurement architecture.
+These CPUs may contain up to two classes of PMCs:
+.Bl -tag -width "Li PMC_CLASS_IAP"
+.It Li PMC_CLASS_IAF
+Fixed-function counters that count only one hardware event per counter.
+.It Li PMC_CLASS_IAP
+Programmable counters that may be configured to count one of a defined
+set of hardware events.
+.El
+.Pp
+The number of PMCs available in each class and their widths need to be
+determined at run time by calling
+.Xr pmc_cpuinfo 3 .
+.Pp
+Intel Core2 PMCs are documented in
+.Rs
+.%B "IA-32 Intel(R) Architecture Software Developer's Manual"
+.%T "Volume 3: System Programming Guide"
+.%N "Order Number 253669-027US"
+.%D July 2008
+.%Q "Intel Corporation"
+.Re
+.Ss CORE2 FIXED FUNCTION PMCS
+These PMCs and their supported events are documented in
+.Xr pmc.iaf 3 .
+Not all CPUs in this family implement fixed-function counters.
+.Ss CORE2 PROGRAMMABLE PMCS
+The programmable PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_INTERRUPT Ta Yes
+.It PMC_CAP_INVERT Ta Yes
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta Yes
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta Yes
+.It PMC_CAP_USER Ta Yes
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+Event specifiers for these PMCs support the following common
+qualifiers:
+.Bl -tag -width indent
+.It Li cmask= Ns Ar value
+Configure the PMC to increment only if the number of configured
+events measured in a cycle is greater than or equal to
+.Ar value .
+.It Li edge
+Configure the PMC to count the number of de-asserted to asserted
+transitions of the conditions expressed by the other qualifiers.
+If specified, the counter will increment only once whenever a
+condition becomes true, irrespective of the number of clocks during
+which the condition remains true.
+.It Li inv
+Invert the sense of comparison when the
+.Dq Li cmask
+qualifier is present, making the counter increment when the number of
+events per cycle is less than the value specified by the
+.Dq Li cmask
+qualifier.
+.It Li os
+Configure the PMC to count events happening at processor privilege
+level 0.
+.It Li usr
+Configure the PMC to count events occurring at privilege levels 1, 2
+or 3.
+.El
+.Pp
+If neither of the
+.Dq Li os
+or
+.Dq Li usr
+qualifiers are specified, the default is to enable both.
+.Pp
+Events that require core-specificity to be specified use a
+additional qualifier
+.Dq Li core= Ns Ar core ,
+where argument
+.Ar core
+is one of:
+.Bl -tag -width indent
+.It Li all
+Measure event conditions on all cores.
+.It Li this
+Measure event conditions on this core.
+.El
+.Pp
+The default is
+.Dq Li this .
+.Pp
+Events that require an agent qualifier to be specified use an
+additional qualifier
+.Dq Li agent= Ns agent ,
+where argument
+.Ar agent
+is one of:
+.Bl -tag -width indent
+.It Li this
+Measure events associated with this bus agent.
+.It Li any
+Measure events caused by any bus agent.
+.El
+.Pp
+The default is
+.Dq Li this .
+.Pp
+Events that require a hardware prefetch qualifier to be specified use an
+additional qualifier
+.Dq Li prefetch= Ns Ar prefetch ,
+where argument
+.Ar prefetch
+is one of:
+.Bl -tag -width "exclude"
+.It Li both
+Include all prefetches.
+.It Li only
+Only count hardware prefetches.
+.It Li exclude
+Exclude hardware prefetches.
+.El
+.Pp
+The default is
+.Dq Li both .
+.Pp
+Events that require a cache coherence qualifier to be specified use an
+additional qualifier
+.Dq Li cachestate= Ns Ar state ,
+where argument
+.Ar state
+contains one or more of the following letters:
+.Bl -tag -width indent
+.It Li e
+Count cache lines in the exclusive state.
+.It Li i
+Count cache lines in the invalid state.
+.It Li m
+Count cache lines in the modified state.
+.It Li s
+Count cache lines in the shared state.
+.El
+.Pp
+The default is
+.Dq Li eims .
+.Pp
+Events that require a snoop response qualifier to be specified use an
+additional qualifier
+.Dq Li snoopresponse= Ns Ar response ,
+where argument
+.Ar response
+comprises of the following keywords separated by
+.Dq +
+signs:
+.Bl -tag -width indent
+.It Li clean
+Measure CLEAN responses.
+.It Li hit
+Measure HIT responses.
+.It Li hitm
+Measure HITM responses.
+.El
+.Pp
+The default is to measure all the above responses.
+.Pp
+Events that require a snoop type qualifier use an additional qualifier
+.Dq Li snooptype= Ns Ar type ,
+where argument
+.Ar type
+comprises the one of the following keywords:
+.Bl -tag -width indent
+.It Li cmp2i
+Measure CMP2I snoops.
+.It Li cmp2s
+Measure CMP2S snoops.
+.El
+.Pp
+The default is to measure both snoops.
+.Ss Event Specifiers (Programmable PMCs)
+Core2 programmable PMCs support the following events:
+.Bl -tag -width indent
+.It Li BACLEARS
+.Pq Event E6H , Umask 00H
+The number of times the front end is resteered.
+.It Li BOGUS_BR
+.Pq Event E4H , Umask 00H
+The number of byte sequences mistakenly detected as taken branch
+instructions.
+.It Li BR_BAC_MISSP_EXEC
+.Pq Event 8AH , Umask 00H
+The number of branch instructions that were mispredicted when
+decoded.
+.It Li BR_CALL_MISSP_EXEC
+.Pq Event 93H , Umask 00H
+The number of mispredicted
+.Li CALL
+instructions that were executed.
+.It Li BR_CALL_EXEC
+.Pq Event 92H , Umask 00H
+The number of
+.Li CALL
+instructions executed.
+.It Li BR_CND_EXEC
+.Pq Event 8BH , Umask 00H
+The number of conditional branches executed, but not necessarily retired.
+.It Li BR_CND_MISSP_EXEC
+.Pq Event 8CH , Umask 00H
+The number of mispredicted conditional branches executed.
+.It Li BR_IND_CALL_EXEC
+.Pq Event 94H , Umask 00H
+The number of indirect
+.Li CALL
+instructions executed.
+.It Li BR_IND_EXEC
+.Pq Event 8DH , Umask 00H
+The number of indirect branch instructions executed.
+.It Li BR_IND_MISSP_EXEC
+.Pq Event 8EH , Umask 00H
+The number of mispredicted indirect branch instructions executed.
+.It Li BR_INST_DECODED
+.Pq Event E0H , Umask 00H
+The number of branch instructions decoded.
+.It Li BR_INST_EXEC
+.Pq Event 88H , Umask 00H
+The number of branches executed, but not necessarily retired.
+.It Li BR_INST_RETIRED.ANY
+.Pq Event C4H , Umask 00H
+.Pq Alias Qq "Branch Instruction Retired"
+The number of branch instructions retired.
+This is an architectural performance event.
+.It Li BR_INST_RETIRED.MISPRED
+.Pq Event C5H , Umask 00H
+.Pq Alias Qq "Branch Misses Retired"
+The number of mispredicted branch instructions retired.
+This is an architectural performance event.
+.It Li BR_INST_RETIRED.MISPRED_NOT_TAKEN
+.Pq Event C4H , Umask 02H
+The number of not taken branch instructions retired that were
+mispredicted.
+.It Li BR_INST_RETIRED.MISPRED_TAKEN
+.Pq Event C4H , Umask 08H
+The number taken branch instructions retired that were mispredicted.
+.It Li BR_INST_RETIRED.PRED_NOT_TAKEN
+.Pq Event C4H , Umask 01H
+The number of not taken branch instructions retired that were
+correctly predicted.
+.It Li BR_INST_RETIRED.PRED_TAKEN
+.Pq Event C4H , Umask 04H
+The number of taken branch instructions retired that were correctly
+predicted.
+.It Li BR_INST_RETIRED.TAKEN
+.Pq Event C4H , Umask 0CH
+The number of taken branch instructions retired.
+.It Li BR_MISSP_EXEC
+.Pq Event 89H , Umask 00H
+The number of mispredicted branch instructions that were executed.
+.It Li BR_RET_MISSP_EXEC
+.Pq Event 90H , Umask 00H
+The number of mispredicted
+.Li RET
+instructions executed.
+.It Li BR_RET_BAC_MISSP_EXEC
+.Pq Event 91H , Umask 00H
+The number of
+.Li RET
+instructions executed that were mispredicted at decode time.
+.It Li BR_RET_EXEC
+.Pq Event 8FH , Umask 00H
+The number of
+.Li RET
+instructions executed.
+.It Li BR_TKN_BUBBLE_1
+.Pq Event 97H , Umask 00H
+The number of branch predicted taken with bubble 1.
+.It Li BR_TKN_BUBBLE_2
+.Pq Event 98H , Umask 00H
+The number of branch predicted taken with bubble 2.
+.It Li BUSQ_EMPTY Op ,core= Ns Ar core
+.Pq Event 7DH
+The number of cycles during which the core did not have any pending
+transactions in the bus queue.
+.It Li BUS_BNR_DRV Op ,agent= Ns Ar agent
+.Pq Event 61H
+The number of Bus Not Ready signals asserted on the bus.
+.It Li BUS_DATA_RCV Op ,core= Ns Ar core
+.Pq Event 64H
+The number of bus cycles during which the processor is receiving data.
+.It Li BUS_DRDY_CLOCKS Op ,agent= Ns Ar agent
+.Pq Event 62H
+The number of bus cycles during which the Data Ready signal is asserted
+on the bus.
+.It Li BUS_HIT_DRV Op ,agent= Ns Ar agent
+.Pq Event 7AH
+The number of bus cycles during which the processor drives the
+.Li HIT#
+pin.
+.It Li BUS_HITM_DRV Op ,agent= Ns Ar agent
+.Pq Event 7BH
+The number of bus cycles during which the processor drives the
+.Li HITM#
+pin.
+.It Li BUS_IO_WAIT Op ,core= Ns Ar core
+.Pq Event 7FH
+The number of core cycles during which I/O requests wait in the bus
+queue.
+.It Li BUS_LOCK_CLOCKS Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 63H
+The number of bus cycles during which the
+.Li LOCK
+signal was asserted on the bus.
+.It Li BUS_REQUEST_OUTSTANDING Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 60H
+The number of pending full cache line read transactions on the bus
+occurring in each cycle.
+.It Li BUS_TRANS_P Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 6BH
+The number of partial bus transactions.
+.It Li BUS_TRANS_IFETCH Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 68H
+The number of instruction fetch full cache line bus transactions.
+.It Li BUS_TRANS_INVAL Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 69H
+The number of invalidate bus transactions.
+.It Li BUS_TRANS_PWR Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 6AH
+The number of partial write bus transactions.
+.It Li BUS_TRANS_DEF Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 6DH
+The number of deferred bus transactions.
+.It Li BUS_TRANS_BURST Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 6EH
+The number of burst transactions.
+.It Li BUS_TRANS_MEM Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 6FH
+The number of memory bus transactions.
+.It Li BUS_TRANS_ANY Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 70H
+The number of bus transactions of any kind.
+.It Li BUS_TRANS_BRD Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 65H
+The number of burst read transactions.
+.It Li BUS_TRANS_IO Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 6CH
+The number of completed I/O bus transactions due to
+.Li IN
+and
+.Li OUT
+instructions.
+.It Li BUS_TRANS_RFO Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 66H
+The number of Read For Ownership bus transactions.
+.It Li BUS_TRANS_WB Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 67H
+The number explicit write-back bus transactions due to dirty line
+evictions.
+.It Li CMP_SNOOP Xo
+.Op ,core= Ns Ar core
+.Op ,snooptype= Ns Ar snoop
+.Xc
+.Pq Event 78H
+The number of times the L1 data cache is snooped by the other core in
+the same processor.
+.It Li CPU_CLK_UNHALTED.BUS
+.Pq Event 3CH , Umask 01H
+.Pq Alias Qq "Unhalted Reference Cycles"
+The number of bus cycles when the core is not in the halt state.
+This is an architectural performance event.
+.It Li CPU_CLK_UNHALTED.CORE_P
+.Pq Event 3CH , Umask 00H
+.Pq Alias Qq "Unhalted Core Cycles"
+The number of core cycles while the core is not in a halt state.
+This is an architectural performance event.
+.It Li CPU_CLK_UNHALTED.NO_OTHER
+.Pq Event 3CH , Umask 02H
+The number of bus cycles during which the core remains unhalted and
+the other core is halted.
+.It Li CYCLES_DIV_BUSY
+.Pq Event 14H , Umask 00H
+The number of cycles the divider is busy.
+This event is only available on PMC0.
+.It Li CYCLES_INT_MASKED
+.Pq Event C6H , Umask 01H
+The number of cycles during which interrupts are disabled.
+.It Li CYCLES_INT_PENDING_AND_MASKED
+.Pq Event C6H , Umask 02H
+The number of cycles during which there were pending interrupts while
+interrupts were disabled.
+.It Li CYCLES_L1I_MEM_STALLED
+.Pq Event 86H , Umask 00H
+The number of cycles for which an instruction fetch stalls.
+.It Li DELAYED_BYPASS.FP
+.Pq Event 19H , Umask 00H
+The number of floating point operations that used data immediately
+after the data was generated by a non floating point execution unit.
+.It Li DELAYED_BYPASS.LOAD
+.Pq Event 19H , Umask 01H
+The number of delayed bypass penalty cycles that a load operation incurred.
+.It Li DELAYED_BYPASS.SIMD
+.Pq Event 19H , Umask 02H
+The number of times SIMD operations use data immediately after data,
+was generated by a non-SIMD execution unit.
+.It Li DIV
+.Pq Event 13H , Umask 00H
+The number of divide operations executed.
+This event is only available on PMC1.
+.It Li DTLB_MISSES.ANY
+.Pq Event 08H , Umask 01H
+The number of Data TLB misses, including misses that result from
+speculative accesses.
+.It Li DTLB_MISSES.L0_MISS_LD
+.Pq Event 08H , Umask 04H
+The number of level 0 DTLB misses due to load operations.
+.It Li DTLB_MISSES.MISS_LD
+.Pq Event 08H , Umask 02H
+The number of Data TLB misses due to load operations.
+.It Li DTLB_MISSES.MISS_ST
+.Pq Event 08H , Umask 08H
+The number of Data TLB misses due to store operations.
+.It Li EIST_TRANS
+.Pq Event 3AH , Umask 00H
+The number of Enhanced Intel SpeedStep Technology transitions.
+.It Li ESP.ADDITIONS
+.Pq Event ABH , Umask 02H
+The number of automatic additions to the
+.Li %esp
+register.
+.It Li ESP.SYNCH
+.Pq Event ABH , Umask 01H
+The number of times the
+.Li %esp
+register was explicitly used in an address expression after
+it is implicitly used by a
+.Li PUSH
+or
+.Li POP
+instruction.
+.It Li EXT_SNOOP Xo
+.Op ,agent= Ns Ar agent
+.Op ,snoopresponse= Ns Ar response
+.Xc
+.Pq Event 77H
+The number of snoop responses to bus transactions.
+.It Li FP_ASSIST
+.Pq Event 11H , Umask 00H
+The number of floating point operations executed that needed
+a microcode assist.
+.It Li FP_COMP_OPS_EXE
+.Pq Event 10H , Umask 00H
+The number of floating point computational micro-ops executed.
+The event is available only on PMC0.
+.It Li FP_MMX_TRANS_TO_FP
+.Pq Event CCH , Umask 02H
+The number of transitions from MMX instructions to floating point
+instructions.
+.It Li FP_MMX_TRANS_TO_MMX
+.Pq Event CCH , Umask 01H
+The number of transitions from floating point instructions to MMX
+instructions.
+.It Li HW_INT_RCV
+.Pq Event C8H , Umask 00H
+The number of hardware interrupts received.
+.It Li IDLE_DURING_DIV
+.Pq Event 18H , Umask 00H
+The number of cycles the divider is busy and no other execution unit
+or load operation was in progress.
+This event is available only on PMC0.
+.It Li ILD_STALL
+.Pq Event 87H , Umask 00H
+The number of cycles the instruction length decoder stalled due to a
+length changing prefix.
+.It Li INST_QUEUE.FULL
+.Pq Event 83H , Umask 02H
+The number of cycles during which the instruction queue is full.
+.It Li INST_RETIRED.ANY_P
+.Pq Event C0H , Umask 00H
+.Pq Alias Qq "Instruction Retired"
+The number of instructions retired.
+This is an architectural performance event.
+.It Li INST_RETIRED.LOADS
+.Pq Event C0H , Umask 01H
+The number of instructions retired that contained a load operation.
+.It Li INST_RETIRED.OTHER
+.Pq Event C0H , Umask 04H
+The number of instructions retired that did not contain a load or a
+store operation.
+.It Li INST_RETIRED.STORES
+.Pq Event C0H , Umask 02H
+The number of instructions retired that contained a store operation.
+.It Li INST_RETIRED.VM_H
+.Pq Event C0H , Umask 08H
+.Pq Tn Core2Extreme
+The number of instructions retired while in VMX root operation.
+.It Li ITLB.FLUSH
+.Pq Event 82H , Umask 40H
+The number of ITLB flushes.
+.It Li ITLB.LARGE_MISS
+.Pq Event 82H , Umask 10H
+The number of instruction fetches from large pages that miss the
+ITLB.
+.It Li ITLB.MISSES
+.Pq Event 82H , Umask 12H
+The number of instruction fetches from both large and small pages that
+miss the ITLB.
+.It Li ITLB.SMALL_MISS
+.Pq Event 82H , Umask 02H
+The number of instruction fetches from small pages that miss the ITLB.
+.It Li ITLB_MISS_RETIRED
+.Pq Event C9H , Umask 00H
+The number of retired instructions that missed the ITLB when they were
+fetched.
+.It Li L1D_ALL_REF
+.Pq Event 43H , Umask 01H
+The number of references to L1 data cache counting loads and stores of
+to all memory types.
+.It Li L1D_ALL_CACHE_REF
+.Pq Event 43H , Umask 02H
+The number of data reads and writes to cacheable memory.
+.It Li L1D_CACHE_LOCK Op ,cachestate= Ns Ar state
+.Pq Event 42H
+The number of locked reads from cacheable memory.
+.It Li L1D_CACHE_LOCK_DURATION
+.Pq Event 42H , Umask 10H
+The number of cycles during which any cache line is locked by any
+locking instruction.
+.It Li L1D_CACHE_LD Op ,cachestate= Ns Ar state
+.Pq Event 40H
+The number of data reads from cacheable memory excluding locked
+reads.
+.It Li L1D_CACHE_ST Op ,cachestate= Ns Ar state
+.Pq Event 41H
+The number of data writes to cacheable memory excluding locked
+writes.
+.It Li L1D_M_EVICT
+.Pq Event 47H , Umask 00H
+The number of modified cache lines evicted from L1 data cache.
+.It Li L1D_M_REPL
+.Pq Event 46H , Umask 00H
+The number of modified lines allocated in L1 data cache.
+.It Li L1D_PEND_MISS
+.Pq Event 48H , Umask 00H
+The total number of outstanding L1 data cache misses at any clock.
+.It Li L1D_PREFETCH.REQUESTS
+.Pq Event 4EH , Umask 10H
+The number of times L1 data cache requested to prefetch a data cache
+line.
+.It Li L1D_REPL
+.Pq Event 45H , Umask 0FH
+The number of lines brought into L1 data cache.
+.It Li L1D_SPLIT.LOADS
+.Pq Event 49H , Umask 01H
+The number of load operations that span two cache lines.
+.It Li L1D_SPLIT.STORES
+.Pq Event 49H , Umask 02H
+The number of store operations that span two cache lines.
+.It Li L1I_MISSES
+.Pq Event 81H , Umask 00H
+The number of instruction fetch unit misses.
+.It Li L1I_READS
+.Pq Event 80H , Umask 00H
+The number of instruction fetches.
+.It Li L2_ADS Op ,core= Ns core
+.Pq Event 21H
+The number of cycles that the L2 address bus is in use.
+.It Li L2_DBUS_BUSY_RD Op ,core= Ns core
+.Pq Event 23H
+The number of cycles during which the L2 data bus is busy transferring
+data to the core.
+.It Li L2_IFETCH Xo
+.Op ,cachestate= Ns Ar state
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 28H
+The number of instruction cache line requests from the instruction
+fetch unit.
+.It Li L2_LD Xo
+.Op ,cachestate= Ns Ar state
+.Op ,core= Ns Ar core
+.Op ,prefetch= Ns Ar prefetch
+.Xc
+.Pq Event 29H
+The number of L2 cache read requests from L1 cache and L2
+prefetchers.
+.It Li L2_LINES_IN Xo
+.Op ,core= Ns Ar core
+.Op ,prefetch= Ns Ar prefetch
+.Xc
+.Pq Event 24H
+The number of cache lines allocated in L2 cache.
+.It Li L2_LINES_OUT Xo
+.Op ,core= Ns Ar core
+.Op ,prefetch= Ns Ar prefetch
+.Xc
+.Pq Event 26H
+The number of L2 cache lines evicted.
+.It Li L2_LOCK Xo
+.Op ,cachestate= Ns Ar state
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 2BH
+The number of locked accesses to cache lines that miss L1 data
+cache.
+.It Li L2_M_LINES_IN Op ,core= Ns Ar core
+.Pq Event 25H
+The number of L2 cache line modifications.
+.It Li L2_M_LINES_OUT Xo
+.Op ,core= Ns Ar core
+.Op ,prefetch= Ns Ar prefetch
+.Xc
+.Pq Event 27H
+The number of modified lines evicted from L2 cache.
+.It Li L2_NO_REQ Op ,core= Ns Ar core
+.Pq Event 32H
+The number of cycles during which no L2 cache requests were pending
+from a core.
+.It Li L2_REJECT_BUSQ Xo
+.Op ,cachestate= Ns Ar state
+.Op ,core= Ns Ar core
+.Op ,prefetch= Ns Ar prefetch
+.Xc
+.Pq Event 30H
+The number of L2 cache requests that were rejected.
+.It Li L2_RQSTS Xo
+.Op ,cachestate= Ns Ar state
+.Op ,core= Ns Ar core
+.Op ,prefetch= Ns Ar prefetch
+.Xc
+.Pq Event 2EH
+The number of completed L2 cache requests.
+.It Li L2_RQSTS.SELF.DEMAND.I_STATE
+.Pq Event 2EH , Umask 41H
+.Pq Alias Qq "LLC Misses"
+The number of completed L2 cache demand requests from this core that
+missed the L2 cache.
+This is an architectural performance event.
+.It Li L2_RQSTS.SELF.DEMAND.MESI
+.Pq Event 2EH , Umask 4FH
+.Pq Alias Qq "LLC References"
+The number of completed L2 cache demand requests from this core.
+This is an architectural performance event.
+.It Li L2_ST Xo
+.Op ,cachestate= Ns Ar state
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 2AH
+The number of store operations that miss the L1 cache and request data
+from the L2 cache.
+.It Li LOAD_BLOCK.L1D
+.Pq Event 03H , Umask 20H
+The number of loads blocked by the L1 data cache.
+.It Li LOAD_BLOCK.OVERLAP_STORE
+.Pq Event 03H , Umask 08H
+The number of loads that partially overlap an earlier store or are
+aliased with a previous store.
+.It Li LOAD_BLOCK.STA
+.Pq Event 03H , Umask 02H
+The number of loads blocked by preceding stores whose address is yet
+to be calculated.
+.It Li LOAD_BLOCK.STD
+.Pq Event 03H , Umask 04H
+The number of loads blocked by preceding stores to the same address
+whose data value is not known.
+.It Li LOAD_BLOCK.UNTIL_RETIRE
+.Pq Event 03H , Umask 10H
+The number of load operations that were blocked until retirement.
+.It Li LOAD_HIT_PRE
+.Pq Event 4CH , Umask 00H
+The number of load operations that conflicted with an prefetch to the
+same cache line.
+.It Li MACHINE_NUKES.SMC
+.Pq Event C3H , Umask 01H
+The number of times a program writes to a code section.
+.It Li MACHINE_NUKES.MEM_ORDER
+.Pq Event C3H , Umask 04H
+The number of times the execution pipeline was restarted due to a
+memory ordering conflict or memory disambiguation misprediction.
+.It Li MACRO_INSTS.CISC_DECODED
+.Pq Event AAH , Umask 08H
+The number of complex instructions decoded.
+.It Li MACRO_INSTS.DECODED
+.Pq Event AAH , Umask 01H
+The number of instructions decoded.
+.It Li MEMORY_DISAMBIGUATION.RESET
+.Pq Event 09H , Umask 01H
+The number of cycles during which memory disambiguation misprediction
+occurs.
+.It Li MEMORY_DISAMBIGUATION.SUCCESS
+.Pq Event 09H , Umask 02H
+The number of load operations that were successfully disambiguated.
+.It Li MEM_LOAD_RETIRED.DTLB_MISS
+.Pq Event CBH , Umask 10H
+The number of retired loads that missed the DTLB.
+.It Li MEM_LOAD_RETIRED.L1D_LINE_MISS
+.Pq Event CBH , Umask 02H
+The number of retired load operations that missed L1 data cache and
+that sent a request to L2 cache.
+This event is only available on PMC0.
+.It Li MEM_LOAD_RETIRED.L1D_MISS
+.Pq Event CBH , Umask 01H
+The number of retired load operations that missed L1 data cache.
+This event is only available on PMC0.
+.It Li MEM_LOAD_RETIRED.L2_LINE_MISS
+.Pq Event CBH , Umask 08H
+The number of load operations that missed L2 cache and that caused a
+bus request.
+.It Li MEM_LOAD_RETIRED.L2_MISS
+.Pq Event CBH , Umask 04H
+The number of load operations that missed L2 cache.
+.It Li MUL
+.Pq Event 12H , Umask 00H
+The number of multiply operations executed.
+This event is only available on PMC1.
+.It Li PAGE_WALKS.COUNT
+.Pq Event 0CH , Umask 01H
+The number of page walks executed due to an ITLB or DTLB miss.
+.It Li PAGE_WALKS.CYCLES
+.Pq Event 0CH , Umask 02H
+The number of cycles spent in a page walk caused by an ITLB or DTLB
+miss.
+.It Li PREF_RQSTS_DN
+.Pq Event F8H , Umask 00H
+The number of downward prefetches issued from the Data Prefetch Logic
+unit to L2 cache.
+.It Li PREF_RQSTS_UP
+.Pq Event F0H , Umask 00H
+The number of upward prefetches issued from the Data Prefetch Logic
+unit to L2 cache.
+.It Li RAT_STALLS.ANY
+.Pq Event D2H , Umask 0FH
+The number of stall cycles due to any of
+.Li RAT_STALLS.FLAGS
+.Li RAT_STALLS.FPSW ,
+.Li RAT_STALLS.PARTIAL
+and
+.Li RAT_STALLS.ROB_READ_PORT .
+.It Li RAT_STALLS.FLAGS
+.Pq Event D2H , Umask 04H
+The number of cycles execution stalled due to a flag register induced
+stall.
+.It Li RAT_STALLS.FPSW
+.Pq Event D2H , Umask 08H
+The number of times the floating point status word was written.
+.It Li RAT_STALLS.OTHER_SERIALIZATION_STALLS
+.Pq Event D2H , Umask 10H , Tn Core2Extreme
+The number of stalls due to other RAT resource serialization not
+counted by umask 0FH.
+.It Li RAT_STALLS.PARTIAL_CYCLES
+.Pq Event D2H , Umask 02H
+The number of cycles of added instruction execution latency due to the
+use of a register that was partially written by previous instructions.
+.It Li RAT_STALLS.ROB_READ_PORT
+.Pq Event D2H , Umask 01H
+The number of cycles when ROB read port stalls occurred.
+.It Li RESOURCE_STALLS.ANY
+.Pq Event DCH , Umask 1FH
+The number of cycles during which any resource related stall
+occurred.
+.It Li RESOURCE_STALLS.BR_MISS_CLEAR
+.Pq Event DCH , Umask 10H
+The number of cycles stalled due to branch misprediction.
+.It Li RESOURCE_STALLS.FPCW
+.Pq Event DCH , Umask 08H
+The number of cycles stalled due to writing the floating point control
+word.
+.It Li RESOURCE_STALLS.LD_ST
+.Pq Event DCH , Umask 04H
+The number of cycles during which the number of loads and stores in
+the pipeline exceeded their limits.
+.It Li RESOURCE_STALLS.ROB_FULL
+.Pq Event DCH , Umask 01H
+The number of cycles when the reorder buffer was full.
+.It Li RESOURCE_STALLS.RS_FULL
+.Pq Event DCH , Umask 02H
+The number of cycles during which the RS was full.
+.It Li RS_UOPS_DISPATCHED
+.Pq Event A0H , Umask 00H
+The number of micro-ops dispatched for execution.
+.It Li RS_UOPS_DISPATCHED.PORT0
+.Pq Event A1H , Umask 01H
+The number of cycles micro-ops were dispatched for execution on port
+0.
+.It Li RS_UOPS_DISPATCHED.PORT1
+.Pq Event A1H , Umask 02H
+The number of cycles micro-ops were dispatched for execution on port
+1.
+.It Li RS_UOPS_DISPATCHED.PORT2
+.Pq Event A1H , Umask 04H
+The number of cycles micro-ops were dispatched for execution on port
+2.
+.It Li RS_UOPS_DISPATCHED.PORT3
+.Pq Event A1H , Umask 08H
+The number of cycles micro-ops were dispatched for execution on port
+3.
+.It Li RS_UOPS_DISPATCHED.PORT4
+.Pq Event A1H , Umask 10H
+The number of cycles micro-ops were dispatched for execution on port
+4.
+.It Li RS_UOPS_DISPATCHED.PORT5
+.Pq Event A1H , Umask 20H
+The number of cycles micro-ops were dispatched for execution on port
+5.
+.It Li SB_DRAIN_CYCLES
+.Pq Event 04H , Umask 01H
+The number of cycles while the store buffer is draining.
+.It Li SEGMENT_REG_LOADS
+.Pq Event 06H , Umask 00H
+The number of segment register loads.
+.It Li SEG_REG_RENAMES.ANY
+.Pq Event D5H , Umask 0FH
+The number of times the any segment register was renamed.
+.It Li SEG_REG_RENAMES.DS
+.Pq Event D5H , Umask 02H
+The number of times the
+.Li %ds
+register is renamed.
+.It Li SEG_REG_RENAMES.ES
+.Pq Event D5H , Umask 01H
+The number of times the
+.Li %es
+register is renamed.
+.It Li SEG_REG_RENAMES.FS
+.Pq Event D5H , Umask 04H
+The number of times the
+.Li %fs
+register is renamed.
+.It Li SEG_REG_RENAMES.GS
+.Pq Event D5H , Umask 08H
+The number of times the
+.Li %gs
+register is renamed.
+.It Li SEG_RENAME_STALLS.ANY
+.Pq Event D4H , Umask 0FH
+The number of stalls due to lack of resource to rename any segment
+register.
+.It Li SEG_RENAME_STALLS.DS
+.Pq Event D4H , Umask 02H
+The number of stalls due to lack of renaming resources for the
+.Li %ds
+register.
+.It Li SEG_RENAME_STALLS.ES
+.Pq Event D4H , Umask 01H
+The number of stalls due to lack of renaming resources for the
+.Li %es
+register.
+.It Li SEG_RENAME_STALLS.FS
+.Pq Event D4H , Umask 04H
+The number of stalls due to lack of renaming resources for the
+.Li %fs
+register.
+.It Li SEG_RENAME_STALLS.GS
+.Pq Event D4H , Umask 08H
+The number of stalls due to lack of renaming resources for the
+.Li %gs
+register.
+.It Li SIMD_ASSIST
+.Pq Event CDH , Umask 00H
+The number SIMD assists invoked.
+.It Li SIMD_COMP_INST_RETIRED.PACKED_DOUBLE
+.Pq Event CAH , Umask 04H
+Then number of computational SSE2 packed double precision instructions
+retired.
+.It Li SIMD_COMP_INST_RETIRED.PACKED_SINGLE
+.Pq Event CAH , Umask 01H
+Then number of computational SSE2 packed single precision instructions
+retired.
+.It Li SIMD_COMP_INST_RETIRED.SCALAR_DOUBLE
+.Pq Event CAH , Umask 08H
+Then number of computational SSE2 scalar double precision instructions
+retired.
+.It Li SIMD_COMP_INST_RETIRED.SCALAR_SINGLE
+.Pq Event CAH , Umask 02H
+Then number of computational SSE2 scalar single precision instructions
+retired.
+.It Li SIMD_INSTR_RETIRED
+.Pq Event CEH , Umask 00H
+The number of retired SIMD instructions that use MMX registers.
+.It Li SIMD_INST_RETIRED.ANY
+.Pq Event C7H , Umask 1FH
+The number of streaming SIMD instructions retired.
+.It Li SIMD_INST_RETIRED.PACKED_DOUBLE
+.Pq Event C7H , Umask 04H
+The number of SSE2 packed double precision instructions retired.
+.It Li SIMD_INST_RETIRED.PACKED_SINGLE
+.Pq Event C7H , Umask 01H
+The number of SSE packed single precision instructions retired.
+.It Li SIMD_INST_RETIRED.SCALAR_DOUBLE
+.Pq Event C7H , Umask 08H
+The number of SSE2 scalar double precision instructions retired.
+.It Li SIMD_INST_RETIRED.SCALAR_SINGLE
+.Pq Event C7H , Umask 02H
+The number of SSE scalar single precision instructions retired.
+.It Li SIMD_INST_RETIRED.VECTOR
+.Pq Event C7H , Umask 10H
+The number of SSE2 vector instructions retired.
+.It Li SIMD_SAT_INSTR_RETIRED
+.Pq Event CFH , Umask 00H
+The number of saturated arithmetic SIMD instructions retired.
+.It Li SIMD_SAT_UOP_EXEC
+.Pq Event B1H , Umask 00H
+The number of SIMD saturated arithmetic micro-ops executed.
+.It Li SIMD_UOPS_EXEC
+.Pq Event B0H , Umask 00H
+The number of SIMD micro-ops executed.
+.It Li SIMD_UOP_TYPE_EXEC.ARITHMETIC
+.Pq Event B3H , Umask 20H
+The number of SIMD packed arithmetic micro-ops executed.
+.It Li SIMD_UOP_TYPE_EXEC.LOGICAL
+.Pq Event B3H , Umask 10H
+The number of SIMD packed logical micro-ops executed.
+.It Li SIMD_UOP_TYPE_EXEC.MUL
+.Pq Event B3H , Umask 01H
+The number of SIMD packed multiply micro-ops executed.
+.It Li SIMD_UOP_TYPE_EXEC.PACK
+.Pq Event B3H , Umask 04H
+The number of SIMD pack micro-ops executed.
+.It Li SIMD_UOP_TYPE_EXEC.SHIFT
+.Pq Event B3H , Umask 02H
+The number of SIMD packed shift micro-ops executed.
+.It Li SIMD_UOP_TYPE_EXEC.UNPACK
+.Pq Event B3H , Umask 08H
+The number of SIMD unpack micro-ops executed.
+.It Li SNOOP_STALL_DRV Xo
+.Op ,agent= Ns Ar agent
+.Op ,core= Ns Ar core
+.Xc
+.Pq Event 7EH
+The number of times the bus stalled for snoops.
+.It Li SSE_PRE_EXEC.L1
+.Pq Event 07H , Umask 01H
+The number of
+.Li PREFETCHT0
+instructions executed.
+.It Li SSE_PRE_EXEC.L2
+.Pq Event 07H , Umask 02H
+The number of
+.Li PREFETCHT1
+instructions executed.
+.It Li SSE_PRE_EXEC.NTA
+.Pq Event 07H , Umask 00H
+The number of
+.Li PREFETCHNTA
+instructions executed.
+.It Li SSE_PRE_EXEC.STORES
+.Pq Event 07H , Umask 03H
+The number of times SSE non-temporal store instructions were executed.
+.It Li SSE_PRE_MISS.L1
+.Pq Event 4BH , Umask 01H
+The number of times the
+.Li PREFETCHT0
+instruction executed and missed all cache levels.
+.It Li SSE_PRE_MISS.L2
+.Pq Event 4BH , Umask 02H
+The number of times the
+.Li PREFETCHT1
+instruction executed and missed all cache levels.
+.It Li SSE_PRE_MISS.NTA
+.Pq Event 4BH , Umask 00H
+The number of times the
+.Li PREFETCHNTA
+instruction executed and missed all cache levels.
+.It Li STORE_BLOCK.ORDER
+.Pq Event 04H , Umask 02H
+The number of cycles while a store was waiting for another store to be
+globally observed.
+.It Li STORE_BLOCK.SNOOP
+.Pq Event 04H , Umask 08H
+The number of cycles while a store was blocked due to a conflict with
+an internal or external snoop.
+.It Li THERMAL_TRIP
+.Pq Event 3BH , Umask C0H
+The number of thermal trips.
+.It Li UOPS_RETIRED.LD_IND_BR
+.Pq Event C2H , Umask 01H
+The number of micro-ops retired that fused a load with another
+operation.
+.It Li UOPS_RETIRED.STD_STA
+.Pq Event C2H , Umask 02H
+The number of store address calculations that fused into one micro-op.
+.It Li UOPS_RETIRED.MACRO_FUSION
+.Pq Event C2H , Umask 04H
+The number of times retired instruction pairs were fused into one
+micro-op.
+.It Li UOPS_RETIRED.FUSED
+.Pq Event C2H , Umask 07H
+The number of fused micro-ops retired.
+.It Li UOPS_RETIRED.NON_FUSED
+.Pq Event C2H , Umask 8H
+The number of non-fused micro-ops retired.
+.It Li UOPS_RETIRED.ANY
+.Pq Event C2H , Umask 0FH
+The number of micro-ops retired.
+.It Li X87_OPS_RETIRED.ANY
+.Pq Event C1H , Umask FEH
+The number of floating point computational instructions retired.
+.It Li X87_OPS_RETIRED.FXCH
+.Pq Event C1H , Umask 01H
+The number of
+.Li FXCH
+instructions retired.
+.El
+.Ss Event Name Aliases
+The following table shows the mapping between the PMC-independent
+aliases supported by
+.Lb libpmc
+and the underlying hardware events used.
+.Bl -column "branch-mispredicts" "cpu_clk_unhalted.core_p" "PMC Class"
+.It Em Alias Ta Em Event Ta Em PMC Class
+.It Li branches Ta Li BR_INST_RETIRED.ANY Ta Li PMC_CLASS_IAP
+.It Li branch-mispredicts Ta Li BR_INST_RETIRED.MISPRED Ta Li PMC_CLASS_IAP
+.It Li ic-misses Ta Li L1I_MISSES Ta Li PMC_CLASS_IAP
+.It Li instructions Ta Li INST_RETIRED.ANY_P Ta Li PMC_CLASS_IAF
+.It Li interrupts Ta Li HW_INT_RCV Ta Li PMC_CLASS_IAP
+.It Li unhalted-cycles Ta Li CPU_CLK_UNHALTED.CORE_P Ta Li PMC_CLASS_IAF
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
diff --git a/lib/libpmc/pmc.corei7.3 b/lib/libpmc/pmc.corei7.3
new file mode 100644
index 0000000..679313f
--- /dev/null
+++ b/lib/libpmc/pmc.corei7.3
@@ -0,0 +1,1581 @@
+.\" Copyright (c) 2010 Fabien Thomas.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd March 24, 2010
+.Dt PMC.COREI7 3
+.Os
+.Sh NAME
+.Nm pmc.corei7
+.Nd measurement events for
+.Tn Intel
+.Tn Core i7 and Xeon 5500
+family CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+.Tn Intel
+.Tn "Core i7"
+CPUs contain PMCs conforming to version 2 of the
+.Tn Intel
+performance measurement architecture.
+These CPUs may contain up to three classes of PMCs:
+.Bl -tag -width "Li PMC_CLASS_IAP"
+.It Li PMC_CLASS_IAF
+Fixed-function counters that count only one hardware event per counter.
+.It Li PMC_CLASS_IAP
+Programmable counters that may be configured to count one of a defined
+set of hardware events.
+.El
+.Pp
+The number of PMCs available in each class and their widths need to be
+determined at run time by calling
+.Xr pmc_cpuinfo 3 .
+.Pp
+Intel Core i7 and Xeon 5500 PMCs are documented in
+.Rs
+.%B "Intel(R) 64 and IA-32 Architectures Software Developes Manual"
+.%T "Volume 3B: System Programming Guide, Part 2"
+.%N "Order Number: 253669-033US"
+.%D December 2009
+.%Q "Intel Corporation"
+.Re
+.Ss COREI7 AND XEON 5500 FIXED FUNCTION PMCS
+These PMCs and their supported events are documented in
+.Xr pmc.iaf 3 .
+Not all CPUs in this family implement fixed-function counters.
+.Ss COREI7 AND XEON 5500 PROGRAMMABLE PMCS
+The programmable PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_INTERRUPT Ta Yes
+.It PMC_CAP_INVERT Ta Yes
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta Yes
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta Yes
+.It PMC_CAP_USER Ta Yes
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+Event specifiers for these PMCs support the following common
+qualifiers:
+.Bl -tag -width indent
+.It Li rsp= Ns Ar value
+Configure the Off-core Response bits.
+.Bl -tag -width indent
+.It Li DMND_DATA_RD
+Counts the number of demand and DCU prefetch data reads of full
+and partial cachelines as well as demand data page table entry
+cacheline reads. Does not count L2 data read prefetches or
+instruction fetches.
+.It Li DMND_RFO
+Counts the number of demand and DCU prefetch reads for ownership
+(RFO) requests generated by a write to data cacheline. Does not
+count L2 RFO.
+.It Li DMND_IFETCH
+Counts the number of demand and DCU prefetch instruction cacheline
+reads. Does not count L2 code read prefetches.
+WB
+Counts the number of writeback (modified to exclusive) transactions.
+.It Li PF_DATA_RD
+Counts the number of data cacheline reads generated by L2 prefetchers.
+.It Li PF_RFO
+Counts the number of RFO requests generated by L2 prefetchers.
+.It Li PF_IFETCH
+Counts the number of code reads generated by L2 prefetchers.
+.It Li OTHER
+Counts one of the following transaction types, including L3 invalidate,
+I/O, full or partial writes, WC or non-temporal stores, CLFLUSH, Fences,
+lock, unlock, split lock.
+.It Li UNCORE_HIT
+L3 Hit: local or remote home requests that hit L3 cache in the uncore
+with no coherency actions required (snooping).
+.It Li OTHER_CORE_HIT_SNP
+L3 Hit: local or remote home requests that hit L3 cache in the uncore
+and was serviced by another core with a cross core snoop where no modified
+copies were found (clean).
+.It Li OTHER_CORE_HITM
+L3 Hit: local or remote home requests that hit L3 cache in the uncore
+and was serviced by another core with a cross core snoop where modified
+copies were found (HITM).
+.It Li REMOTE_CACHE_FWD
+L3 Miss: local homed requests that missed the L3 cache and was serviced
+by forwarded data following a cross package snoop where no modified
+copies found. (Remote home requests are not counted)
+.It Li REMOTE_DRAM
+L3 Miss: remote home requests that missed the L3 cache and were serviced
+by remote DRAM.
+.It Li LOCAL_DRAM
+L3 Miss: local home requests that missed the L3 cache and were serviced
+by local DRAM.
+.It Li NON_DRAM
+Non-DRAM requests that were serviced by IOH.
+.El
+.It Li cmask= Ns Ar value
+Configure the PMC to increment only if the number of configured
+events measured in a cycle is greater than or equal to
+.Ar value .
+.It Li edge
+Configure the PMC to count the number of de-asserted to asserted
+transitions of the conditions expressed by the other qualifiers.
+If specified, the counter will increment only once whenever a
+condition becomes true, irrespective of the number of clocks during
+which the condition remains true.
+.It Li inv
+Invert the sense of comparison when the
+.Dq Li cmask
+qualifier is present, making the counter increment when the number of
+events per cycle is less than the value specified by the
+.Dq Li cmask
+qualifier.
+.It Li os
+Configure the PMC to count events happening at processor privilege
+level 0.
+.It Li usr
+Configure the PMC to count events occurring at privilege levels 1, 2
+or 3.
+.El
+.Pp
+If neither of the
+.Dq Li os
+or
+.Dq Li usr
+qualifiers are specified, the default is to enable both.
+.Ss Event Specifiers (Programmable PMCs)
+Core i7 and Xeon 5500 programmable PMCs support the following events:
+.Bl -tag -width indent
+.It Li SB_DRAIN.ANY
+.Pq Event 04H , Umask 07H
+Counts the number of store buffer drains.
+.It Li STORE_BLOCKS.AT_RET
+.Pq Event 06H , Umask 04H
+Counts number of loads delayed with at-Retirement block code. The following
+loads need to be executed at retirement and wait for all senior stores on
+the same thread to be drained: load splitting across 4K boundary (page
+split), load accessing uncacheable (UC or USWC) memory, load lock, and load
+with page table in UC or USWC memory region.
+.It Li STORE_BLOCKS.L1D_BLOCK
+.Pq Event 06H , Umask 08H
+Cacheable loads delayed with L1D block code
+.It Li PARTIAL_ADDRESS_ALIAS
+.Pq Event 07H , Umask 01H
+Counts false dependency due to partial address aliasing
+.It Li DTLB_LOAD_MISSES.ANY
+.Pq Event 08H , Umask 01H
+Counts all load misses that cause a page walk
+.It Li DTLB_LOAD_MISSES.WALK_COMPLETED
+.Pq Event 08H , Umask 02H
+Counts number of completed page walks due to load miss in the STLB.
+.It Li DTLB_LOAD_MISSES.STLB_HIT
+.Pq Event 08H , Umask 10H
+Number of cache load STLB hits
+.It Li DTLB_LOAD_MISSES.PDE_MISS
+.Pq Event 08H , Umask 20H
+Number of DTLB cache load misses where the low part of the linear to
+physical address translation was missed.
+.It Li DTLB_LOAD_MISSES.PDP_MISS
+.Pq Event 08H , Umask 40H
+Number of DTLB cache load misses where the high part of the linear to
+physical address translation was missed.
+.It Li DTLB_LOAD_MISSES.LARGE_WALK_COMPLETED
+.Pq Event 08H , Umask 80H
+Counts number of completed large page walks due to load miss in the STLB.
+.It Li MEM_INST_RETIRED.LOADS
+.Pq Event 0BH , Umask 01H
+Counts the number of instructions with an architecturally-visible store
+retired on the architected path.
+In conjunction with ld_lat facility
+.It Li MEM_INST_RETIRED.STORES
+.Pq Event 0BH , Umask 02H
+Counts the number of instructions with an architecturally-visible store
+retired on the architected path.
+In conjunction with ld_lat facility
+.It Li MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD
+.Pq Event 0BH , Umask 10H
+Counts the number of instructions exceeding the latency specified with
+ld_lat facility.
+In conjunction with ld_lat facility
+.It Li MEM_STORE_RETIRED.DTLB_MISS
+.Pq Event 0CH , Umask 01H
+The event counts the number of retired stores that missed the DTLB. The DTLB
+miss is not counted if the store operation causes a fault. Does not counter
+prefetches. Counts both primary and secondary misses to the TLB
+.It Li UOPS_ISSUED.ANY
+.Pq Event 0EH , Umask 01H
+Counts the number of Uops issued by the Register Allocation Table to the
+Reservation Station, i.e. the UOPs issued from the front end to the back
+end.
+.It Li UOPS_ISSUED.STALLED_CYCLES
+.Pq Event 0EH , Umask 01H
+Counts the number of cycles no Uops issued by the Register Allocation Table
+to the Reservation Station, i.e. the UOPs issued from the front end to the
+back end.
+set invert=1, cmask = 1
+.It Li UOPS_ISSUED.FUSED
+.Pq Event 0EH , Umask 02H
+Counts the number of fused Uops that were issued from the Register
+Allocation Table to the Reservation Station.
+.It Li MEM_UNCORE_RETIRED.L3_DATA_MISS_UNKNOWN
+.Pq Event 0FH , Umask 01H
+Counts number of memory load instructions retired where the memory reference
+missed L3 and data source is unknown.
+Available only for CPUID signature 06_2EH
+.It Li MEM_UNCORE_RETIRED.OTHER_CORE_L2_HITM
+.Pq Event 0FH , Umask 02H
+Counts number of memory load instructions retired where the memory reference
+hit modified data in a sibling core residing on the same socket.
+.It Li MEM_UNCORE_RETIRED.REMOTE_CACHE_LOCAL_HOME_HIT
+.Pq Event 0FH , Umask 08H
+Counts number of memory load instructions retired where the memory reference
+missed the L1, L2 and L3 caches and HIT in a remote socket's cache. Only
+counts locally homed lines.
+.It Li MEM_UNCORE_RETIRED.REMOTE_DRAM
+.Pq Event 0FH , Umask 10H
+Counts number of memory load instructions retired where the memory reference
+missed the L1, L2 and L3 caches and was remotely homed. This includes both
+DRAM access and HITM in a remote socket's cache for remotely homed lines.
+.It Li MEM_UNCORE_RETIRED.LOCAL_DRAM
+.Pq Event 0FH , Umask 20H
+Counts number of memory load instructions retired where the memory reference
+missed the L1, L2 and L3 caches and required a local socket memory
+reference. This includes locally homed cachelines that were in a modified
+state in another socket.
+.It Li MEM_UNCORE_RETIRED.UNCACHEABLE
+.Pq Event 0FH , Umask 80H
+Counts number of memory load instructions retired where the memory reference
+missed the L1, L2 and L3 caches and to perform I/O.
+Available only for CPUID signature 06_2EH
+.It Li FP_COMP_OPS_EXE.X87
+.Pq Event 10H , Umask 01H
+Counts the number of FP Computational Uops Executed. The number of FADD,
+FSUB, FCOM, FMULs, integer MULsand IMULs, FDIVs, FPREMs, FSQRTS, integer
+DIVs, and IDIVs. This event does not distinguish an FADD used in the middle
+of a transcendental flow from a separate FADD instruction.
+.It Li FP_COMP_OPS_EXE.MMX
+.Pq Event 10H , Umask 02H
+Counts number of MMX Uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_FP
+.Pq Event 10H , Umask 04H
+Counts number of SSE and SSE2 FP uops executed.
+.It Li FP_COMP_OPS_EXE.SSE2_INTEGER
+.Pq Event 10H , Umask 08H
+Counts number of SSE2 integer uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_FP_PACKED
+.Pq Event 10H , Umask 10H
+Counts number of SSE FP packed uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_FP_SCALAR
+.Pq Event 10H , Umask 20H
+Counts number of SSE FP scalar uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION
+.Pq Event 10H , Umask 40H
+Counts number of SSE* FP single precision uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION
+.Pq Event 10H , Umask 80H
+Counts number of SSE* FP double precision uops executed.
+.It Li SIMD_INT_128.PACKED_MPY
+.Pq Event 12H , Umask 01H
+Counts number of 128 bit SIMD integer multiply operations.
+.It Li SIMD_INT_128.PACKED_SHIFT
+.Pq Event 12H , Umask 02H
+Counts number of 128 bit SIMD integer shift operations.
+.It Li SIMD_INT_128.PACK
+.Pq Event 12H , Umask 04H
+Counts number of 128 bit SIMD integer pack operations.
+.It Li SIMD_INT_128.UNPACK
+.Pq Event 12H , Umask 08H
+Counts number of 128 bit SIMD integer unpack operations.
+.It Li SIMD_INT_128.PACKED_LOGICAL
+.Pq Event 12H , Umask 10H
+Counts number of 128 bit SIMD integer logical operations.
+.It Li SIMD_INT_128.PACKED_ARITH
+.Pq Event 12H , Umask 20H
+Counts number of 128 bit SIMD integer arithmetic operations.
+.It Li SIMD_INT_128.SHUFFLE_MOVE
+.Pq Event 12H , Umask 40H
+Counts number of 128 bit SIMD integer shuffle and move operations.
+.It Li LOAD_DISPATCH.RS
+.Pq Event 13H , Umask 01H
+Counts number of loads dispatched from the Reservation Station that bypass
+the Memory Order Buffer.
+.It Li LOAD_DISPATCH.RS_DELAYED
+.Pq Event 13H , Umask 02H
+Counts the number of delayed RS dispatches at the stage latch. If an RS
+dispatch can not bypass to LB, it has another chance to dispatch from the
+one-cycle delayed staging latch before it is written into the LB.
+.It Li LOAD_DISPATCH.MOB
+.Pq Event 13H , Umask 04H
+Counts the number of loads dispatched from the Reservation Station to the
+Memory Order Buffer.
+.It Li LOAD_DISPATCH.ANY
+.Pq Event 13H , Umask 07H
+Counts all loads dispatched from the Reservation Station.
+.It Li ARITH.CYCLES_DIV_BUSY
+.Pq Event 14H , Umask 01H
+Counts the number of cycles the divider is busy executing divide or square
+root operations. The divide can be integer, X87 or Streaming SIMD Extensions
+(SSE). The square root operation can be either X87 or SSE.
+Set 'edge =1, invert=1, cmask=1' to count the number of divides.
+Count may be incorrect When SMT is on.
+.It Li ARITH.MUL
+.Pq Event 14H , Umask 02H
+Counts the number of multiply operations executed. This includes integer as
+well as floating point multiply operations but excludes DPPS mul and MPSAD.
+Count may be incorrect When SMT is on
+.It Li INST_QUEUE_WRITES
+.Pq Event 17H , Umask 01H
+Counts the number of instructions written into the instruction queue every
+cycle.
+.It Li INST_DECODED.DEC0
+.Pq Event 18H , Umask 01H
+Counts number of instructions that require decoder 0 to be decoded. Usually,
+this means that the instruction maps to more than 1 uop
+.It Li TWO_UOP_INSTS_DECODED
+.Pq Event 19H , Umask 01H
+An instruction that generates two uops was decoded
+.It Li INST_QUEUE_WRITE_CYCLES
+.Pq Event 1EH , Umask 01H
+This event counts the number of cycles during which instructions are written
+to the instruction queue. Dividing this counter by the number of
+instructions written to the instruction queue (INST_QUEUE_WRITES) yields the
+average number of instructions decoded each cycle. If this number is less
+than four and the pipe stalls, this indicates that the decoder is failing to
+decode enough instructions per cycle to sustain the 4-wide pipeline.
+If SSE* instructions that are 6 bytes or longer arrive one after another,
+then front end throughput may limit execution speed. In such case,
+.It Li LSD_OVERFLOW
+.Pq Event 20H , Umask 01H
+Counts number of loops that cant stream from the instruction queue.
+.It Li L2_RQSTS.LD_HIT
+.Pq Event 24H , Umask 01H
+Counts number of loads that hit the L2 cache. L2 loads include both L1D
+demand misses as well as L1D prefetches. L2 loads can be rejected for
+various reasons. Only non rejected loads are counted.
+.It Li L2_RQSTS.LD_MISS
+.Pq Event 24H , Umask 02H
+Counts the number of loads that miss the L2 cache. L2 loads include both L1D
+demand misses as well as L1D prefetches.
+.It Li L2_RQSTS.LOADS
+.Pq Event 24H , Umask 03H
+Counts all L2 load requests. L2 loads include both L1D demand misses as well
+as L1D prefetches.
+.It Li L2_RQSTS.RFO_HIT
+.Pq Event 24H , Umask 04H
+Counts the number of store RFO requests that hit the L2 cache. L2 RFO
+requests include both L1D demand RFO misses as well as L1D RFO prefetches.
+Count includes WC memory requests, where the data is not fetched but the
+permission to write the line is required.
+.It Li L2_RQSTS.RFO_MISS
+.Pq Event 24H , Umask 08H
+Counts the number of store RFO requests that miss the L2 cache. L2 RFO
+requests include both L1D demand RFO misses as well as L1D RFO prefetches.
+.It Li L2_RQSTS.RFOS
+.Pq Event 24H , Umask 0CH
+Counts all L2 store RFO requests. L2 RFO requests include both L1D demand
+RFO misses as well as L1D RFO prefetches.
+.It Li L2_RQSTS.IFETCH_HIT
+.Pq Event 24H , Umask 10H
+Counts number of instruction fetches that hit the L2 cache. L2 instruction
+fetches include both L1I demand misses as well as L1I instruction
+prefetches.
+.It Li L2_RQSTS.IFETCH_MISS
+.Pq Event 24H , Umask 20H
+Counts number of instruction fetches that miss the L2 cache. L2 instruction
+fetches include both L1I demand misses as well as L1I instruction
+prefetches.
+.It Li L2_RQSTS.IFETCHES
+.Pq Event 24H , Umask 30H
+Counts all instruction fetches. L2 instruction fetches include both L1I
+demand misses as well as L1I instruction prefetches.
+.It Li L2_RQSTS.PREFETCH_HIT
+.Pq Event 24H , Umask 40H
+Counts L2 prefetch hits for both code and data.
+.It Li L2_RQSTS.PREFETCH_MISS
+.Pq Event 24H , Umask 80H
+Counts L2 prefetch misses for both code and data.
+.It Li L2_RQSTS.PREFETCHES
+.Pq Event 24H , Umask C0H
+Counts all L2 prefetches for both code and data.
+.It Li L2_RQSTS.MISS
+.Pq Event 24H , Umask AAH
+Counts all L2 misses for both code and data.
+.It Li L2_RQSTS.REFERENCES
+.Pq Event 24H , Umask FFH
+Counts all L2 requests for both code and data.
+.It Li L2_DATA_RQSTS.DEMAND.I_STATE
+.Pq Event 26H , Umask 01H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the I (invalid) state, i.e. a cache miss. L2 demand loads are both L1D
+demand misses and L1D prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.S_STATE
+.Pq Event 26H , Umask 02H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the S (shared) state. L2 demand loads are both L1D demand misses and L1D
+prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.E_STATE
+.Pq Event 26H , Umask 04H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the E (exclusive) state. L2 demand loads are both L1D demand misses and
+L1D prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.M_STATE
+.Pq Event 26H , Umask 08H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the M (modified) state. L2 demand loads are both L1D demand misses and
+L1D prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.MESI
+.Pq Event 26H , Umask 0FH
+Counts all L2 data demand requests. L2 demand loads are both L1D demand
+misses and L1D prefetches.
+.It Li L2_DATA_RQSTS.PREFETCH.I_STATE
+.Pq Event 26H , Umask 10H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the I (invalid) state, i.e. a cache miss.
+.It Li L2_DATA_RQSTS.PREFETCH.S_STATE
+.Pq Event 26H , Umask 20H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the S (shared) state. A prefetch RFO will miss on an S state line, while
+a prefetch read will hit on an S state line.
+.It Li L2_DATA_RQSTS.PREFETCH.E_STATE
+.Pq Event 26H , Umask 40H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the E (exclusive) state.
+.It Li L2_DATA_RQSTS.PREFETCH.M_STATE
+.Pq Event 26H , Umask 80H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the M (modified) state.
+.It Li L2_DATA_RQSTS.PREFETCH.MESI
+.Pq Event 26H , Umask F0H
+Counts all L2 prefetch requests.
+.It Li L2_DATA_RQSTS.ANY
+.Pq Event 26H , Umask FFH
+Counts all L2 data requests.
+.It Li L2_WRITE.RFO.I_STATE
+.Pq Event 27H , Umask 01H
+Counts number of L2 demand store RFO requests where the cache line to be
+loaded is in the I (invalid) state, i.e, a cache miss. The L1D prefetcher
+does not issue a RFO prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.S_STATE
+.Pq Event 27H , Umask 02H
+Counts number of L2 store RFO requests where the cache line to be loaded is
+in the S (shared) state. The L1D prefetcher does not issue a RFO prefetch,.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.M_STATE
+.Pq Event 27H , Umask 08H
+Counts number of L2 store RFO requests where the cache line to be loaded is
+in the M (modified) state. The L1D prefetcher does not issue a RFO prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.HIT
+.Pq Event 27H , Umask 0EH
+Counts number of L2 store RFO requests where the cache line to be loaded is
+in either the S, E or M states. The L1D prefetcher does not issue a RFO
+prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.MESI
+.Pq Event 27H , Umask 0FH
+Counts all L2 store RFO requests.The L1D prefetcher does not issue a RFO
+prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.LOCK.I_STATE
+.Pq Event 27H , Umask 10H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in the I (invalid) state, i.e. a cache miss.
+.It Li L2_WRITE.LOCK.S_STATE
+.Pq Event 27H , Umask 20H
+Counts number of L2 lock RFO requests where the cache line to be loaded is
+in the S (shared) state.
+.It Li L2_WRITE.LOCK.E_STATE
+.Pq Event 27H , Umask 40H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in the E (exclusive) state.
+.It Li L2_WRITE.LOCK.M_STATE
+.Pq Event 27H , Umask 80H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in the M (modified) state.
+.It Li L2_WRITE.LOCK.HIT
+.Pq Event 27H , Umask E0H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in either the S, E, or M state.
+.It Li L2_WRITE.LOCK.MESI
+.Pq Event 27H , Umask F0H
+Counts all L2 demand lock RFO requests.
+.It Li L1D_WB_L2.I_STATE
+.Pq Event 28H , Umask 01H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the I (invalid) state, i.e. a cache miss.
+.It Li L1D_WB_L2.S_STATE
+.Pq Event 28H , Umask 02H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the S state.
+.It Li L1D_WB_L2.E_STATE
+.Pq Event 28H , Umask 04H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the E (exclusive) state.
+.It Li L1D_WB_L2.M_STATE
+.Pq Event 28H , Umask 08H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the M (modified) state.
+.It Li L1D_WB_L2.MESI
+.Pq Event 28H , Umask 0FH
+Counts all L1 writebacks to the L2.
+.It Li L3_LAT_CACHE.REFERENCE
+.Pq Event 2EH , Umask 4FH
+This event counts requests originating from the core that reference a cache
+line in the last level cache. The event count includes speculative traffic
+but excludes cache line fills due to a L2 hardware-prefetch. Because cache
+hierarchy, cache sizes and other implementation-specific characteristics;
+value comparison to estimate performance differences is not recommended.
+see Table A-1
+.It Li L3_LAT_CACHE.MISS
+.Pq Event 2EH , Umask 41H
+This event counts each cache miss condition for references to the last level
+cache. The event count may include speculative traffic but excludes cache
+line fills due to L2 hardware-prefetches. Because cache hierarchy, cache
+sizes and other implementation-specific characteristics; value comparison to
+estimate performance differences is not recommended.
+see Table A-1
+.It Li CPU_CLK_UNHALTED.THREAD_P
+.Pq Event 3CH , Umask 00H
+Counts the number of thread cycles while the thread is not in a halt state.
+The thread enters the halt state when it is running the HLT instruction. The
+core frequency may change from time to time due to power or thermal
+throttling.
+see Table A-1
+.It Li CPU_CLK_UNHALTED.REF_P
+.Pq Event 3CH , Umask 01H
+Increments at the frequency of TSC when not halted.
+see Table A-1
+.It Li L1D_CACHE_LD.I_STATE
+.Pq Event 40H , Umask 01H
+Counts L1 data cache read requests where the cache line to be loaded is in
+the I (invalid) state, i.e. the read request missed the cache.
+Counter 0, 1 only
+.It Li L1D_CACHE_LD.S_STATE
+.Pq Event 40H , Umask 02H
+Counts L1 data cache read requests where the cache line to be loaded is in
+the S (shared) state.
+Counter 0, 1 only
+.It Li L1D_CACHE_LD.E_STATE
+.Pq Event 40H , Umask 04H
+Counts L1 data cache read requests where the cache line to be loaded is in
+the E (exclusive) state.
+Counter 0, 1 only
+.It Li L1D_CACHE_LD.M_STATE
+.Pq Event 40H , Umask 08H
+Counts L1 data cache read requests where the cache line to be loaded is in
+the M (modified) state.
+Counter 0, 1 only
+.It Li L1D_CACHE_LD.MESI
+.Pq Event 40H , Umask 0FH
+Counts L1 data cache read requests.
+Counter 0, 1 only
+.It Li L1D_CACHE_ST.S_STATE
+.Pq Event 41H , Umask 02H
+Counts L1 data cache store RFO requests where the cache line to be loaded is
+in the S (shared) state.
+Counter 0, 1 only
+.It Li L1D_CACHE_ST.E_STATE
+.Pq Event 41H , Umask 04H
+Counts L1 data cache store RFO requests where the cache line to be loaded is
+in the E (exclusive) state.
+Counter 0, 1 only
+.It Li L1D_CACHE_ST.M_STATE
+.Pq Event 41H , Umask 08H
+Counts L1 data cache store RFO requests where cache line to be loaded is in
+the M (modified) state.
+Counter 0, 1 only
+.It Li L1D_CACHE_LOCK.HIT
+.Pq Event 42H , Umask 01H
+Counts retired load locks that hit in the L1 data cache or hit in an already
+allocated fill buffer. The lock portion of the load lock transaction must
+hit in the L1D.
+The initial load will pull the lock into the L1 data cache. Counter 0, 1
+only
+.It Li L1D_CACHE_LOCK.S_STATE
+.Pq Event 42H , Umask 02H
+Counts L1 data cache retired load locks that hit the target cache line in
+the shared state.
+Counter 0, 1 only
+.It Li L1D_CACHE_LOCK.E_STATE
+.Pq Event 42H , Umask 04H
+Counts L1 data cache retired load locks that hit the target cache line in
+the exclusive state.
+Counter 0, 1 only
+.It Li L1D_CACHE_LOCK.M_STATE
+.Pq Event 42H , Umask 08H
+Counts L1 data cache retired load locks that hit the target cache line in
+the modified state.
+Counter 0, 1 only
+.It Li L1D_ALL_REF.ANY
+.Pq Event 43H , Umask 01H
+Counts all references (uncached, speculated and retired) to the L1 data
+cache, including all loads and stores with any memory types. The event
+counts memory accesses only when they are actually performed. For example, a
+load blocked by unknown store address and later performed is only counted
+once.
+The event does not include non- memory accesses, such as I/O accesses.
+Counter 0, 1 only
+.It Li L1D_ALL_REF.CACHEABLE
+.Pq Event 43H , Umask 02H
+Counts all data reads and writes (speculated and retired) from cacheable
+memory, including locked operations.
+Counter 0, 1 only
+.It Li L1D_PEND_MISS.LOAD_BUFFERS_FULL
+.Pq Event 48H , Umask 02H
+Counts cycles of L1 data cache load fill buffers full.
+Counter 0, 1 only
+.It Li DTLB_MISSES.ANY
+.Pq Event 49H , Umask 01H
+Counts the number of misses in the STLB which causes a page walk.
+.It Li DTLB_MISSES.WALK_COMPLETED
+.Pq Event 49H , Umask 02H
+Counts number of misses in the STLB which resulted in a completed page walk.
+.It Li DTLB_MISSES.STLB_HIT
+.Pq Event 49H , Umask 10H
+Counts the number of DTLB first level misses that hit in the second level
+TLB. This event is only relevant if the core contains multiple DTLB levels.
+.It Li LOAD_HIT_PRE
+.Pq Event 4CH , Umask 01H
+Counts load operations sent to the L1 data cache while a previous SSE
+prefetch instruction to the same cache line has started prefetching but has
+not yet finished.
+.It Li L1D_PREFETCH.REQUESTS
+.Pq Event 4EH , Umask 01H
+Counts number of hardware prefetch requests dispatched out of the prefetch
+FIFO.
+.It Li L1D_PREFETCH.MISS
+.Pq Event 4EH , Umask 02H
+Counts number of hardware prefetch requests that miss the L1D. There are two
+prefetchers in the L1D. A streamer, which predicts lines sequentially after
+this one should be fetched, and the IP prefetcher that remembers access
+patterns for the current instruction. The streamer prefetcher stops on an
+L1D hit, while the IP prefetcher does not.
+.It Li L1D_PREFETCH.TRIGGERS
+.Pq Event 4EH , Umask 04H
+Counts number of prefetch requests triggered by the Finite State Machine and
+pushed into the prefetch FIFO. Some of the prefetch requests are dropped due
+to overwrites or competition between the IP index prefetcher and streamer
+prefetcher. The prefetch FIFO contains 4 entries.
+.It Li L1D.REPL
+.Pq Event 51H , Umask 01H
+Counts the number of lines brought into the L1 data cache.
+Counter 0, 1 only
+.It Li L1D.M_REPL
+.Pq Event 51H , Umask 02H
+Counts the number of modified lines brought into the L1 data cache.
+Counter 0, 1 only
+.It Li L1D.M_EVICT
+.Pq Event 51H , Umask 04H
+Counts the number of modified lines evicted from the L1 data cache due to
+replacement.
+Counter 0, 1 only
+.It Li L1D.M_SNOOP_EVICT
+.Pq Event 51H , Umask 08H
+Counts the number of modified lines evicted from the L1 data cache due to
+snoop HITM intervention.
+Counter 0, 1 only
+.It Li L1D_CACHE_PREFETCH_LOCK_FB_HIT
+.Pq Event 52H , Umask 01H
+Counts the number of cacheable load lock speculated instructions accepted
+into the fill buffer.
+.It Li L1D_CACHE_LOCK_FB_HIT
+.Pq Event 53H , Umask 01H
+Counts the number of cacheable load lock speculated or retired instructions
+accepted into the fill buffer.
+.It Li CACHE_LOCK_CYCLES.L1D_L2
+.Pq Event 63H , Umask 01H
+Cycle count during which the L1D and L2 are locked. A lock is asserted when
+there is a locked memory access, due to uncacheable memory, a locked
+operation that spans two cache lines, or a page walk from an uncacheable
+page table.
+Counter 0, 1 only. L1D and L2 locks have a very high performance penalty and
+it is highly recommended to avoid such accesses.
+.It Li CACHE_LOCK_CYCLES.L1D
+.Pq Event 63H , Umask 02H
+Counts the number of cycles that cacheline in the L1 data cache unit is
+locked.
+Counter 0, 1 only.
+.It Li IO_TRANSACTIONS
+.Pq Event 6CH , Umask 01H
+Counts the number of completed I/O transactions.
+.It Li L1I.HITS
+.Pq Event 80H , Umask 01H
+Counts all instruction fetches that hit the L1 instruction cache.
+.It Li L1I.MISSES
+.Pq Event 80H , Umask 02H
+Counts all instruction fetches that miss the L1I cache. This includes
+instruction cache misses, streaming buffer misses, victim cache misses and
+uncacheable fetches. An instruction fetch miss is counted only once and not
+once for every cycle it is outstanding.
+.It Li L1I.READS
+.Pq Event 80H , Umask 03H
+Counts all instruction fetches, including uncacheable fetches that bypass
+the L1I.
+.It Li L1I.CYCLES_STALLED
+.Pq Event 80H , Umask 04H
+Cycle counts for which an instruction fetch stalls due to a L1I cache miss,
+ITLB miss or ITLB fault.
+.It Li LARGE_ITLB.HIT
+.Pq Event 82H , Umask 01H
+Counts number of large ITLB hits.
+.It Li ITLB_MISSES.ANY
+.Pq Event 85H , Umask 01H
+Counts the number of misses in all levels of the ITLB which causes a page
+walk.
+.It Li ITLB_MISSES.WALK_COMPLETED
+.Pq Event 85H , Umask 02H
+Counts number of misses in all levels of the ITLB which resulted in a
+completed page walk.
+.It Li ILD_STALL.LCP
+.Pq Event 87H , Umask 01H
+Cycles Instruction Length Decoder stalls due to length changing prefixes:
+66, 67 or REX.W (for EM64T) instructions which change the length of the
+decoded instruction.
+.It Li ILD_STALL.MRU
+.Pq Event 87H , Umask 02H
+Instruction Length Decoder stall cycles due to Brand Prediction Unit (PBU)
+Most Recently Used (MRU) bypass.
+.It Li ILD_STALL.IQ_FULL
+.Pq Event 87H , Umask 04H
+Stall cycles due to a full instruction queue.
+.It Li ILD_STALL.REGEN
+.Pq Event 87H , Umask 08H
+Counts the number of regen stalls.
+.It Li ILD_STALL.ANY
+.Pq Event 87H , Umask 0FH
+Counts any cycles the Instruction Length Decoder is stalled.
+.It Li BR_INST_EXEC.COND
+.Pq Event 88H , Umask 01H
+Counts the number of conditional near branch instructions executed, but not
+necessarily retired.
+.It Li BR_INST_EXEC.DIRECT
+.Pq Event 88H , Umask 02H
+Counts all unconditional near branch instructions excluding calls and
+indirect branches.
+.It Li BR_INST_EXEC.INDIRECT_NON_CALL
+.Pq Event 88H , Umask 04H
+Counts the number of executed indirect near branch instructions that are not
+calls.
+.It Li BR_INST_EXEC.NON_CALLS
+.Pq Event 88H , Umask 07H
+Counts all non call near branch instructions executed, but not necessarily
+retired.
+.It Li BR_INST_EXEC.RETURN_NEAR
+.Pq Event 88H , Umask 08H
+Counts indirect near branches that have a return mnemonic.
+.It Li BR_INST_EXEC.DIRECT_NEAR_CALL
+.Pq Event 88H , Umask 10H
+Counts unconditional near call branch instructions, excluding non call
+branch, executed.
+.It Li BR_INST_EXEC.INDIRECT_NEAR_CALL
+.Pq Event 88H , Umask 20H
+Counts indirect near calls, including both register and memory indirect,
+executed.
+.It Li BR_INST_EXEC.NEAR_CALLS
+.Pq Event 88H , Umask 30H
+Counts all near call branches executed, but not necessarily retired.
+.It Li BR_INST_EXEC.TAKEN
+.Pq Event 88H , Umask 40H
+Counts taken near branches executed, but not necessarily retired.
+.It Li BR_INST_EXEC.ANY
+.Pq Event 88H , Umask 7FH
+Counts all near executed branches (not necessarily retired). This includes
+only instructions and not micro-op branches. Frequent branching is not
+necessarily a major performance issue. However frequent branch
+mispredictions may be a problem.
+.It Li BR_MISP_EXEC.COND
+.Pq Event 89H , Umask 01H
+Counts the number of mispredicted conditional near branch instructions
+executed, but not necessarily retired.
+.It Li BR_MISP_EXEC.DIRECT
+.Pq Event 89H , Umask 02H
+Counts mispredicted macro unconditional near branch instructions, excluding
+calls and indirect branches (should always be 0).
+.It Li BR_MISP_EXEC.INDIRECT_NON_CALL
+.Pq Event 89H , Umask 04H
+Counts the number of executed mispredicted indirect near branch instructions
+that are not calls.
+.It Li BR_MISP_EXEC.NON_CALLS
+.Pq Event 89H , Umask 07H
+Counts mispredicted non call near branches executed, but not necessarily
+retired.
+.It Li BR_MISP_EXEC.RETURN_NEAR
+.Pq Event 89H , Umask 08H
+Counts mispredicted indirect branches that have a rear return mnemonic.
+.It Li BR_MISP_EXEC.DIRECT_NEAR_CALL
+.Pq Event 89H , Umask 10H
+Counts mispredicted non-indirect near calls executed, (should always be 0).
+.It Li BR_MISP_EXEC.INDIRECT_NEAR_CALL
+.Pq Event 89H , Umask 20H
+Counts mispredicted indirect near calls executed, including both register
+and memory indirect.
+.It Li BR_MISP_EXEC.NEAR_CALLS
+.Pq Event 89H , Umask 30H
+Counts all mispredicted near call branches executed, but not necessarily
+retired.
+.It Li BR_MISP_EXEC.TAKEN
+.Pq Event 89H , Umask 40H
+Counts executed mispredicted near branches that are taken, but not
+necessarily retired.
+.It Li BR_MISP_EXEC.ANY
+.Pq Event 89H , Umask 7FH
+Counts the number of mispredicted near branch instructions that were
+executed, but not necessarily retired.
+.It Li RESOURCE_STALLS.ANY
+.Pq Event A2H , Umask 01H
+Counts the number of Allocator resource related stalls. Includes register
+renaming buffer entries, memory buffer entries. In addition to resource
+related stalls, this event counts some other events. Includes stalls arising
+during branch misprediction recovery, such as if retirement of the
+mispredicted branch is delayed and stalls arising while store buffer is
+draining from synchronizing operations.
+Does not include stalls due to SuperQ (off core) queue full, too many cache
+misses, etc.
+.It Li RESOURCE_STALLS.LOAD
+.Pq Event A2H , Umask 02H
+Counts the cycles of stall due to lack of load buffer for load operation.
+.It Li RESOURCE_STALLS.RS_FULL
+.Pq Event A2H , Umask 04H
+This event counts the number of cycles when the number of instructions in
+the pipeline waiting for execution reaches the limit the processor can
+handle. A high count of this event indicates that there are long latency
+operations in the pipe (possibly load and store operations that miss the L2
+cache, or instructions dependent upon instructions further down the pipeline
+that have yet to retire.
+When RS is full, new instructions can not enter the reservation station and
+start execution.
+.It Li RESOURCE_STALLS.STORE
+.Pq Event A2H , Umask 08H
+This event counts the number of cycles that a resource related stall will
+occur due to the number of store instructions reaching the limit of the
+pipeline, (i.e. all store buffers are used). The stall ends when a store
+instruction commits its data to the cache or memory.
+.It Li RESOURCE_STALLS.ROB_FULL
+.Pq Event A2H , Umask 10H
+Counts the cycles of stall due to re- order buffer full.
+.It Li RESOURCE_STALLS.FPCW
+.Pq Event A2H , Umask 20H
+Counts the number of cycles while execution was stalled due to writing the
+floating-point unit (FPU) control word.
+.It Li RESOURCE_STALLS.MXCSR
+.Pq Event A2H , Umask 40H
+Stalls due to the MXCSR register rename occurring to close to a previous
+MXCSR rename. The MXCSR provides control and status for the MMX registers.
+.It Li RESOURCE_STALLS.OTHER
+.Pq Event A2H , Umask 80H
+Counts the number of cycles while execution was stalled due to other
+resource issues.
+.It Li MACRO_INSTS.FUSIONS_DECODED
+.Pq Event A6H , Umask 01H
+Counts the number of instructions decoded that are macro-fused but not
+necessarily executed or retired.
+.It Li BACLEAR_FORCE_IQ
+.Pq Event A7H , Umask 01H
+Counts number of times a BACLEAR was forced by the Instruction Queue. The IQ
+is also responsible for providing conditional branch prediction direction
+based on a static scheme and dynamic data provided by the L2 Branch
+Prediction Unit. If the conditional branch target is not found in the Target
+Array and the IQ predicts that the branch is taken, then the IQ will force
+the Branch Address Calculator to issue a BACLEAR. Each BACLEAR asserted by
+the BAC generates approximately an 8 cycle bubble in the instruction fetch
+pipeline.
+.It Li LSD.UOPS
+.Pq Event A8H , Umask 01H
+Counts the number of micro-ops delivered by loop stream detector
+Use cmask=1 and invert to count cycles
+.It Li ITLB_FLUSH
+.Pq Event AEH , Umask 01H
+Counts the number of ITLB flushes
+.It Li OFFCORE_REQUESTS.L1D_WRITEBACK
+.Pq Event B0H , Umask 40H
+Counts number of L1D writebacks to the uncore.
+.It Li UOPS_EXECUTED.PORT0
+.Pq Event B1H , Umask 01H
+Counts number of Uops executed that were issued on port 0. Port 0 handles
+integer arithmetic, SIMD and FP add Uops.
+.It Li UOPS_EXECUTED.PORT1
+.Pq Event B1H , Umask 02H
+Counts number of Uops executed that were issued on port 1. Port 1 handles
+integer arithmetic, SIMD, integer shift, FP multiply and FP divide Uops.
+.It Li UOPS_EXECUTED.PORT2_CORE
+.Pq Event B1H , Umask 04H
+Counts number of Uops executed that were issued on port 2. Port 2 handles
+the load Uops. This is a core count only and can not be collected per
+thread.
+.It Li UOPS_EXECUTED.PORT3_CORE
+.Pq Event B1H , Umask 08H
+Counts number of Uops executed that were issued on port 3. Port 3 handles
+store Uops. This is a core count only and can not be collected per thread.
+.It Li UOPS_EXECUTED.PORT4_CORE
+.Pq Event B1H , Umask 10H
+Counts number of Uops executed that where issued on port 4. Port 4 handles
+the value to be stored for the store Uops issued on port 3. This is a core
+count only and can not be collected per thread.
+.It Li UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5
+.Pq Event B1H , Umask 1FH
+Counts cycles when the Uops executed were issued from any ports except port
+5. Use Cmask=1 for active cycles; Cmask=0 for weighted cycles; Use CMask=1,
+Invert=1 to count P0-4 stalled cycles Use Cmask=1, Edge=1, Invert=1 to count
+P0-4 stalls.
+.It Li UOPS_EXECUTED.PORT5
+.Pq Event B1H , Umask 20H
+Counts number of Uops executed that where issued on port 5.
+.It Li UOPS_EXECUTED.CORE_ACTIVE_CYCLES
+.Pq Event B1H , Umask 3FH
+Counts cycles when the Uops are executing. Use Cmask=1 for active cycles;
+Cmask=0 for weighted cycles; Use CMask=1, Invert=1 to count P0-4 stalled
+cycles Use Cmask=1, Edge=1, Invert=1 to count P0-4 stalls.
+.It Li UOPS_EXECUTED.PORT015
+.Pq Event B1H , Umask 40H
+Counts number of Uops executed that where issued on port 0, 1, or 5.
+use cmask=1, invert=1 to count stall cycles
+.It Li UOPS_EXECUTED.PORT234
+.Pq Event B1H , Umask 80H
+Counts number of Uops executed that where issued on port 2, 3, or 4.
+.It Li OFFCORE_REQUESTS_SQ_FULL
+.Pq Event B2H , Umask 01H
+Counts number of cycles the SQ is full to handle off-core requests.
+.It Li OFF_CORE_RESPONSE_0
+.Pq Event B7H , Umask 01H
+see Section 30.6.1.3, Off-core Response Performance Monitoring in the
+Processor Core
+Requires programming MSR 01A6H
+.It Li SNOOP_RESPONSE.HIT
+.Pq Event B8H , Umask 01H
+Counts HIT snoop response sent by this thread in response to a snoop
+request.
+.It Li SNOOP_RESPONSE.HITE
+.Pq Event B8H , Umask 02H
+Counts HIT E snoop response sent by this thread in response to a snoop
+request.
+.It Li SNOOP_RESPONSE.HITM
+.Pq Event B8H , Umask 04H
+Counts HIT M snoop response sent by this thread in response to a snoop
+request.
+.It Li OFF_CORE_RESPONSE_1
+.Pq Event BBH , Umask 01H
+see Section 30.6.1.3, Off-core Response Performance Monitoring in the
+Processor Core
+Requires programming MSR 01A7H
+.It Li INST_RETIRED.ANY_P
+.Pq Event C0H , Umask 01H
+See Table A-1
+Notes: INST_RETIRED.ANY is counted by a designated fixed counter.
+INST_RETIRED.ANY_P is counted by a programmable counter and is an
+architectural performance event. Event is supported if CPUID.A.EBX[1] = 0.
+Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not
+count as retired instructions.
+.It Li INST_RETIRED.X87
+.Pq Event C0H , Umask 02H
+Counts the number of MMX instructions retired.
+.It Li INST_RETIRED.MMX
+.Pq Event C0H , Umask 04H
+Counts the number of floating point computational operations retired:
+floating point computational operations executed by the assist handler and
+sub-operations of complex floating point instructions like transcendental
+instructions.
+.It Li UOPS_RETIRED.ANY
+.Pq Event C2H , Umask 01H
+Counts the number of micro-ops retired, (macro-fused=1, micro- fused=2,
+others=1; maximum count of 8 per cycle). Most instructions are composed of
+one or two micro-ops. Some instructions are decoded into longer sequences
+such as repeat instructions, floating point transcendental instructions, and
+assists.
+Use cmask=1 and invert to count active cycles or stalled cycles
+.It Li UOPS_RETIRED.RETIRE_SLOTS
+.Pq Event C2H , Umask 02H
+Counts the number of retirement slots used each cycle
+.It Li UOPS_RETIRED.MACRO_FUSED
+.Pq Event C2H , Umask 04H
+Counts number of macro-fused uops retired.
+.It Li MACHINE_CLEARS.CYCLES
+.Pq Event C3H , Umask 01H
+Counts the cycles machine clear is asserted.
+.It Li MACHINE_CLEARS.MEM_ORDER
+.Pq Event C3H , Umask 02H
+Counts the number of machine clears due to memory order conflicts.
+.It Li MACHINE_CLEARS.SMC
+.Pq Event C3H , Umask 04H
+Counts the number of times that a program writes to a code section.
+Self-modifying code causes a sever penalty in all Intel 64 and IA-32
+processors. The modified cache line is written back to the L2 and L3caches.
+.It Li BR_INST_RETIRED.ALL_BRANCHES
+.Pq Event C4H , Umask 00H
+See Table A-1
+.It Li BR_INST_RETIRED.CONDITIONAL
+.Pq Event C4H , Umask 01H
+Counts the number of conditional branch instructions retired.
+.It Li BR_INST_RETIRED.NEAR_CALL
+.Pq Event C4H , Umask 02H
+Counts the number of direct & indirect near unconditional calls retired
+.It Li BR_INST_RETIRED.ALL_BRANCHES
+.Pq Event C4H , Umask 04H
+Counts the number of branch instructions retired
+.It Li BR_MISP_RETIRED.ALL_BRANCHES
+.Pq Event C5H , Umask 00H
+See Table A-1
+.It Li BR_MISP_RETIRED.NEAR_CALL
+.Pq Event C5H , Umask 02H
+Counts mispredicted direct & indirect near unconditional retired calls.
+.It Li SSEX_UOPS_RETIRED.PACKED_SINGLE
+.Pq Event C7H , Umask 01H
+Counts SIMD packed single-precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.SCALAR_SINGLE
+.Pq Event C7H , Umask 02H
+Counts SIMD calar single-precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.PACKED_DOUBLE
+.Pq Event C7H , Umask 04H
+Counts SIMD packed double- precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.SCALAR_DOUBLE
+.Pq Event C7H , Umask 08H
+Counts SIMD scalar double-precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.VECTOR_INTEGER
+.Pq Event C7H , Umask 10H
+Counts 128-bit SIMD vector integer Uops retired.
+.It Li ITLB_MISS_RETIRED
+.Pq Event C8H , Umask 20H
+Counts the number of retired instructions that missed the ITLB when the
+instruction was fetched.
+.It Li MEM_LOAD_RETIRED.L1D_HIT
+.Pq Event CBH , Umask 01H
+Counts number of retired loads that hit the L1 data cache.
+.It Li MEM_LOAD_RETIRED.L2_HIT
+.Pq Event CBH , Umask 02H
+Counts number of retired loads that hit the L2 data cache.
+.It Li MEM_LOAD_RETIRED.L3_UNSHARED_HIT
+.Pq Event CBH , Umask 04H
+Counts number of retired loads that hit their own, unshared lines in the L3
+cache.
+.It Li MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM
+.Pq Event CBH , Umask 08H
+Counts number of retired loads that hit in a sibling core's L2 (on die
+core). Since the L3 is inclusive of all cores on the package, this is an L3
+hit. This counts both clean or modified hits.
+.It Li MEM_LOAD_RETIRED.L3_MISS
+.Pq Event CBH , Umask 10H
+Counts number of retired loads that miss the L3 cache. The load was
+satisfied by a remote socket, local memory or an IOH.
+.It Li MEM_LOAD_RETIRED.HIT_LFB
+.Pq Event CBH , Umask 40H
+Counts number of retired loads that miss the L1D and the address is located
+in an allocated line fill buffer and will soon be committed to cache. This
+is counting secondary L1D misses.
+.It Li MEM_LOAD_RETIRED.DTLB_MISS
+.Pq Event CBH , Umask 80H
+Counts the number of retired loads that missed the DTLB. The DTLB miss is
+not counted if the load operation causes a fault. This event counts loads
+from cacheable memory only. The event does not count loads by software
+prefetches. Counts both primary and secondary misses to the TLB.
+.It Li FP_MMX_TRANS.TO_FP
+.Pq Event CCH , Umask 01H
+Counts the first floating-point instruction following any MMX instruction.
+You can use this event to estimate the penalties for the transitions between
+floating-point and MMX technology states.
+.It Li FP_MMX_TRANS.TO_MMX
+.Pq Event CCH , Umask 02H
+Counts the first MMX instruction following a floating-point instruction. You
+can use this event to estimate the penalties for the transitions between
+floating-point and MMX technology states.
+.It Li FP_MMX_TRANS.ANY
+.Pq Event CCH , Umask 03H
+Counts all transitions from floating point to MMX instructions and from MMX
+instructions to floating point instructions. You can use this event to
+estimate the penalties for the transitions between floating-point and MMX
+technology states.
+.It Li MACRO_INSTS.DECODED
+.Pq Event D0H , Umask 01H
+Counts the number of instructions decoded, (but not necessarily executed or
+retired).
+.It Li UOPS_DECODED.MS
+.Pq Event D1H , Umask 02H
+Counts the number of Uops decoded by the Microcode Sequencer, MS. The MS
+delivers uops when the instruction is more than 4 uops long or a microcode
+assist is occurring.
+.It Li UOPS_DECODED.ESP_FOLDING
+.Pq Event D1H , Umask 04H
+Counts number of stack pointer (ESP) instructions decoded: push , pop , call
+, ret, etc. ESP instructions do not generate a Uop to increment or decrement
+ESP. Instead, they update an ESP_Offset register that keeps track of the
+delta to the current value of the ESP register.
+.It Li UOPS_DECODED.ESP_SYNC
+.Pq Event D1H , Umask 08H
+Counts number of stack pointer (ESP) sync operations where an ESP
+instruction is corrected by adding the ESP offset register to the current
+value of the ESP register.
+.It Li RAT_STALLS.FLAGS
+.Pq Event D2H , Umask 01H
+Counts the number of cycles during which execution stalled due to several
+reasons, one of which is a partial flag register stall. A partial register
+stall may occur when two conditions are met: 1) an instruction modifies
+some, but not all, of the flags in the flag register and 2) the next
+instruction, which depends on flags, depends on flags that were not modified
+by this instruction.
+.It Li RAT_STALLS.REGISTERS
+.Pq Event D2H , Umask 02H
+This event counts the number of cycles instruction execution latency became
+longer than the defined latency because the instruction used a register that
+was partially written by previous instruction.
+.It Li RAT_STALLS.ROB_READ_PORT
+.Pq Event D2H , Umask 04H
+Counts the number of cycles when ROB read port stalls occurred, which did
+not allow new micro-ops to enter the out-of-order pipeline. Note that, at
+this stage in the pipeline, additional stalls may occur at the same cycle
+and prevent the stalled micro-ops from entering the pipe. In such a case,
+micro-ops retry entering the execution pipe in the next cycle and the
+ROB-read port stall is counted again.
+.It Li RAT_STALLS.SCOREBOARD
+.Pq Event D2H , Umask 08H
+Counts the cycles where we stall due to microarchitecturally required
+serialization. Microcode scoreboarding stalls.
+.It Li RAT_STALLS.ANY
+.Pq Event D2H , Umask 0FH
+Counts all Register Allocation Table stall cycles due to: Cycles when ROB
+read port stalls occurred, which did not allow new micro-ops to enter the
+execution pipe. Cycles when partial register stalls occurred Cycles when
+flag stalls occurred Cycles floating-point unit (FPU) status word stalls
+occurred. To count each of these conditions separately use the events:
+RAT_STALLS.ROB_READ_PORT, RAT_STALLS.PARTIAL, RAT_STALLS.FLAGS, and
+RAT_STALLS.FPSW.
+.It Li SEG_RENAME_STALLS
+.Pq Event D4H , Umask 01H
+Counts the number of stall cycles due to the lack of renaming resources for
+the ES, DS, FS, and GS segment registers. If a segment is renamed but not
+retired and a second update to the same segment occurs, a stall occurs in
+the front-end of the pipeline until the renamed segment retires.
+.It Li ES_REG_RENAMES
+.Pq Event D5H , Umask 01H
+Counts the number of times the ES segment register is renamed.
+.It Li UOP_UNFUSION
+.Pq Event DBH , Umask 01H
+Counts unfusion events due to floating point exception to a fused uop.
+.It Li BR_INST_DECODED
+.Pq Event E0H , Umask 01H
+Counts the number of branch instructions decoded.
+.It Li BPU_MISSED_CALL_RET
+.Pq Event E5H , Umask 01H
+Counts number of times the Branch Prediction Unit missed predicting a call
+or return branch.
+.It Li BACLEAR.CLEAR
+.Pq Event E6H , Umask 01H
+Counts the number of times the front end is resteered, mainly when the
+Branch Prediction Unit cannot provide a correct prediction and this is
+corrected by the Branch Address Calculator at the front end. This can occur
+if the code has many branches such that they cannot be consumed by the BPU.
+Each BACLEAR asserted by the BAC generates approximately an 8 cycle bubble
+in the instruction fetch pipeline. The effect on total execution time
+depends on the surrounding code.
+.It Li BACLEAR.BAD_TARGET
+.Pq Event E6H , Umask 02H
+Counts number of Branch Address Calculator clears (BACLEAR) asserted due to
+conditional branch instructions in which there was a target hit but the
+direction was wrong. Each BACLEAR asserted by the BAC generates
+approximately an 8 cycle bubble in the instruction fetch pipeline.
+.It Li BPU_CLEARS.EARLY
+.Pq Event E8H , Umask 01H
+Counts early (normal) Branch Prediction Unit clears: BPU predicted a taken
+branch after incorrectly assuming that it was not taken.
+The BPU clear leads to 2 cycle bubble in the Front End.
+.It Li BPU_CLEARS.LATE
+.Pq Event E8H , Umask 02H
+Counts late Branch Prediction Unit clears due to Most Recently Used
+conflicts. The PBU clear leads to a 3 cycle bubble in the Front End.
+.It Li BPU_CLEARS.ANY
+.Pq Event E8H , Umask 03H
+Counts all BPU clears.
+.It Li L2_TRANSACTIONS.LOAD
+.Pq Event F0H , Umask 01H
+Counts L2 load operations due to HW prefetch or demand loads.
+.It Li L2_TRANSACTIONS.RFO
+.Pq Event F0H , Umask 02H
+Counts L2 RFO operations due to HW prefetch or demand RFOs.
+.It Li L2_TRANSACTIONS.IFETCH
+.Pq Event F0H , Umask 04H
+Counts L2 instruction fetch operations due to HW prefetch or demand ifetch.
+.It Li L2_TRANSACTIONS.PREFETCH
+.Pq Event F0H , Umask 08H
+Counts L2 prefetch operations.
+.It Li L2_TRANSACTIONS.L1D_WB
+.Pq Event F0H , Umask 10H
+Counts L1D writeback operations to the L2.
+.It Li L2_TRANSACTIONS.FILL
+.Pq Event F0H , Umask 20H
+Counts L2 cache line fill operations due to load, RFO, L1D writeback or
+prefetch.
+.It Li L2_TRANSACTIONS.WB
+.Pq Event F0H , Umask 40H
+Counts L2 writeback operations to the L3.
+.It Li L2_TRANSACTIONS.ANY
+.Pq Event F0H , Umask 80H
+Counts all L2 cache operations.
+.It Li L2_LINES_IN.S_STATE
+.Pq Event F1H , Umask 02H
+Counts the number of cache lines allocated in the L2 cache in the S (shared)
+state.
+.It Li L2_LINES_IN.E_STATE
+.Pq Event F1H , Umask 04H
+Counts the number of cache lines allocated in the L2 cache in the E
+(exclusive) state.
+.It Li L2_LINES_IN.ANY
+.Pq Event F1H , Umask 07H
+Counts the number of cache lines allocated in the L2 cache.
+.It Li L2_LINES_OUT.DEMAND_CLEAN
+.Pq Event F2H , Umask 01H
+Counts L2 clean cache lines evicted by a demand request.
+.It Li L2_LINES_OUT.DEMAND_DIRTY
+.Pq Event F2H , Umask 02H
+Counts L2 dirty (modified) cache lines evicted by a demand request.
+.It Li L2_LINES_OUT.PREFETCH_CLEAN
+.Pq Event F2H , Umask 04H
+Counts L2 clean cache line evicted by a prefetch request.
+.It Li L2_LINES_OUT.PREFETCH_DIRTY
+.Pq Event F2H , Umask 08H
+Counts L2 modified cache line evicted by a prefetch request.
+.It Li L2_LINES_OUT.ANY
+.Pq Event F2H , Umask 0FH
+Counts all L2 cache lines evicted for any reason.
+.It Li SQ_MISC.SPLIT_LOCK
+.Pq Event F4H , Umask 10H
+Counts the number of SQ lock splits across a cache line.
+.It Li SQ_FULL_STALL_CYCLES
+.Pq Event F6H , Umask 01H
+Counts cycles the Super Queue is full. Neither of the threads on this core
+will be able to access the uncore.
+.It Li FP_ASSIST.ALL
+.Pq Event F7H , Umask 01H
+Counts the number of floating point operations executed that required
+micro-code assist intervention. Assists are required in the following cases:
+SSE instructions, (Denormal input when the DAZ flag is off or Underflow
+result when the FTZ flag is off): x87 instructions, (NaN or denormal are
+loaded to a register or used as input from memory, Division by 0 or
+Underflow output).
+.It Li FP_ASSIST.OUTPUT
+.Pq Event F7H , Umask 02H
+Counts number of floating point micro-code assist when the output value
+(destination register) is invalid.
+.It Li FP_ASSIST.INPUT
+.Pq Event F7H , Umask 04H
+Counts number of floating point micro-code assist when the input value (one
+of the source operands to an FP instruction) is invalid.
+.It Li SIMD_INT_64.PACKED_MPY
+.Pq Event FDH , Umask 01H
+Counts number of SID integer 64 bit packed multiply operations.
+.It Li SIMD_INT_64.PACKED_SHIFT
+.Pq Event FDH , Umask 02H
+Counts number of SID integer 64 bit packed shift operations.
+.It Li SIMD_INT_64.PACK
+.Pq Event FDH , Umask 04H
+Counts number of SID integer 64 bit pack operations.
+.It Li SIMD_INT_64.UNPACK
+.Pq Event FDH , Umask 08H
+Counts number of SID integer 64 bit unpack operations.
+.It Li SIMD_INT_64.PACKED_LOGICAL
+.Pq Event FDH , Umask 10H
+Counts number of SID integer 64 bit logical operations.
+.It Li SIMD_INT_64.PACKED_ARITH
+.Pq Event FDH , Umask 20H
+Counts number of SID integer 64 bit arithmetic operations.
+.It Li SIMD_INT_64.SHUFFLE_MOVE
+.Pq Event FDH , Umask 40H
+Counts number of SID integer 64 bit shift or move operations.
+.El
+.Ss Event Specifiers (Programmable PMCs)
+Core i7 and Xeon 5500 programmable PMCs support the following events as
+June 2009 document (removed in December 2009):
+.Bl -tag -width indent
+.It Li SB_FORWARD.ANY
+.Pq Event 02H , Umask 01H
+Counts the number of store forwards.
+.It Li LOAD_BLOCK.STD
+.Pq Event 03H , Umask 01H
+Counts the number of loads blocked by a preceding store with unknown data.
+.It Li LOAD_BLOCK.ADDRESS_OFFSET
+.Pq Event 03H , Umask 04H
+Counts the number of loads blocked by a preceding store address.
+.It Li LOAD_BLOCK.ADDRESS_OFFSET
+.Pq Event 01H , Umask 04H
+Counts the cycles of store buffer drains.
+.It Li MISALIGN_MEM_REF.LOAD
+.Pq Event 05H , Umask 01H
+Counts the number of misaligned load references
+.It Li MISALIGN_MEM_REF.STORE
+.Pq Event 05H , Umask 02H
+Counts the number of misaligned store references
+.It Li MISALIGN_MEM_REF.ANY
+.Pq Event 05H , Umask 03H
+Counts the number of misaligned memory references
+.It Li STORE_BLOCKS.NOT_STA
+.Pq Event 06H , Umask 01H
+This event counts the number of load operations delayed caused by preceding
+stores whose addresses are known but whose data is unknown, and preceding
+stores that conflict with the load but which incompletely overlap the load.
+.It Li STORE_BLOCKS.STA
+.Pq Event 06H , Umask 02H
+This event counts load operations delayed caused by preceding stores whose
+addresses are unknown (STA block).
+.It Li STORE_BLOCKS.ANY
+.Pq Event 06H , Umask 0FH
+All loads delayed due to store blocks
+.It Li MEMORY_DISAMBIGURATION.RESET
+.Pq Event 09H , Umask 01H
+Counts memory disambiguration reset cycles
+.It Li MEMORY_DISAMBIGURATION.SUCCESS
+.Pq Event 09H , Umask 02H
+Counts the number of loads that memory disambiguration succeeded
+.It Li MEMORY_DISAMBIGURATION.WATCHDOG
+.Pq Event 09H , Umask 04H
+Counts the number of times the memory disambiguration watchdog kicked in.
+.It Li MEMORY_DISAMBIGURATION.WATCH_CYCLES
+.Pq Event 09H , Umask 08H
+Counts the cycles that the memory disambiguration watchdog is active.
+set invert=1, cmask = 1
+.It Li HW_INT.RCV
+.Pq Event 1DH , Umask 01H
+Number of interrupt received
+.It Li HW_INT.CYCLES_MASKED
+.Pq Event 1DH , Umask 02H
+Number of cycles interrupt are masked
+.It Li HW_INT.CYCLES_PENDING_AND_MASKED
+.Pq Event 1DH , Umask 04H
+Number of cycles interrupts are pending and masked
+.It Li HW_INT.CYCLES_PENDING_AND_MASKED
+.Pq Event 04H , Umask 04H
+Counts number of L2 store RFO requests where the cache line to be loaded is
+in the E (exclusive) state. The L1D prefetcher does not issue a RFO
+prefetch.
+This is a demand RFO request
+.It Li HW_INT.CYCLES_PENDING_AND_MASKED
+.Pq Event 27H , Umask 04H
+LONGEST_LAT_CACH E.MISS
+.It Li UOPS_DECODED.DEC0
+.Pq Event 3DH , Umask 01H
+Counts micro-ops decoded by decoder 0.
+.It Li UOPS_DECODED.DEC0
+.Pq Event 01H , Umask 01H
+Counts L1 data cache store RFO requests where the cache line to be loaded is
+in the I state.
+Counter 0, 1 only
+.It Li 0FH
+.Pq Event 41H , Umask 41H
+L1D_CACHE_ST.MESI
+Counts L1 data cache store RFO requests.
+Counter 0, 1 only
+.It Li DTLB_MISSES.PDE_MISS
+.Pq Event 49H , Umask 20H
+Number of DTLB cache misses where the low part of the linear to physical
+address translation was missed.
+.It Li DTLB_MISSES.PDP_MISS
+.Pq Event 49H , Umask 40H
+Number of DTLB misses where the high part of the linear to physical address
+translation was missed.
+.It Li DTLB_MISSES.LARGE_WALK_COMPLETED
+.Pq Event 49H , Umask 80H
+Counts number of completed large page walks due to misses in the STLB.
+.It Li SSE_MEM_EXEC.NTA
+.Pq Event 4BH , Umask 01H
+Counts number of SSE NTA prefetch/weakly-ordered instructions which missed
+the L1 data cache.
+.It Li SSE_MEM_EXEC.STREAMING_STORES
+.Pq Event 4BH , Umask 08H
+Counts number of SSE non temporal stores
+.It Li SFENCE_CYCLES
+.Pq Event 4DH , Umask 01H
+Counts store fence cycles
+.It Li EPT.EPDE_MISS
+.Pq Event 4FH , Umask 02H
+Counts Extended Page Directory Entry misses. The Extended Page Directory
+cache is used by Virtual Machine operating systems while the guest operating
+systems use the standard TLB caches.
+.It Li EPT.EPDPE_HIT
+.Pq Event 4FH , Umask 04H
+Counts Extended Page Directory Pointer Entry hits.
+.It Li EPT.EPDPE_MISS
+.Pq Event 4FH , Umask 08H
+Counts Extended Page Directory Pointer Entry misses. T
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA
+.Pq Event 60H , Umask 01H
+Counts weighted cycles of offcore demand data read requests. Does not
+include L2 prefetch requests.
+counter 0
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE
+.Pq Event 60H , Umask 02H
+Counts weighted cycles of offcore demand code read requests. Does not
+include L2 prefetch requests.
+counter 0
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO
+.Pq Event 60H , Umask 04H
+Counts weighted cycles of offcore demand RFO requests. Does not include L2
+prefetch requests.
+counter 0
+.It Li OFFCORE_REQUESTS_OUTSTANDING.ANY.READ
+.Pq Event 60H , Umask 08H
+Counts weighted cycles of offcore read requests of any kind. Include L2
+prefetch requests.
+counter 0
+.It Li IFU_IVC.FULL
+.Pq Event 81H , Umask 01H
+Instruction Fetche unit victim cache full.
+.It Li IFU_IVC.L1I_EVICTION
+.Pq Event 81H , Umask 02H
+L1 Instruction cache evictions.
+.It Li L1I_OPPORTUNISTIC_HITS
+.Pq Event 83H , Umask 01H
+Opportunistic hits in streaming.
+.It Li ITLB_MISSES.WALK_CYCLES
+.Pq Event 85H , Umask 04H
+Counts ITLB miss page walk cycles.
+.It Li ITLB_MISSES.PMH_BUSY_CYCLES
+.Pq Event 85H , Umask 04H
+Counts PMH busy cycles.
+.It Li ITLB_MISSES.STLB_HIT
+.Pq Event 85H , Umask 10H
+Counts the number of ITLB misses that hit in the second level TLB.
+.It Li ITLB_MISSES.PDE_MISS
+.Pq Event 85H , Umask 20H
+Number of ITLB misses where the low part of the linear to physical address
+translation was missed.
+.It Li ITLB_MISSES.PDP_MISS
+.Pq Event 85H , Umask 40H
+Number of ITLB misses where the high part of the linear to physical address
+translation was missed.
+.It Li ITLB_MISSES.LARGE_WALK_COMPLETED
+.Pq Event 85H , Umask 80H
+Counts number of completed large page walks due to misses in the STLB.
+.It Li ITLB_MISSES.LARGE_WALK_COMPLETED
+.Pq Event 01H , Umask 80H
+Counts number of offcore demand data read requests. Does not count L2
+prefetch requests.
+.It Li OFFCORE_REQUESTS.DEMAND.READ_CODE
+.Pq Event B0H , Umask 02H
+Counts number of offcore demand code read requests. Does not count L2
+prefetch requests.
+.It Li OFFCORE_REQUESTS.DEMAND.RFO
+.Pq Event B0H , Umask 04H
+Counts number of offcore demand RFO requests. Does not count L2 prefetch
+requests.
+.It Li OFFCORE_REQUESTS.ANY.READ
+.Pq Event B0H , Umask 08H
+Counts number of offcore read requests. Includes L2 prefetch requests.
+.It Li OFFCORE_REQUESTS.ANY.RFO
+.Pq Event B0H , Umask 10H
+Counts number of offcore RFO requests. Includes L2 prefetch requests.
+.It Li OFFCORE_REQUESTS.UNCACHED_MEM
+.Pq Event B0H , Umask 20H
+Counts number of offcore uncached memory requests.
+.It Li OFFCORE_REQUESTS.ANY
+.Pq Event B0H , Umask 80H
+Counts all offcore requests.
+.It Li SNOOPQ_REQUESTS_OUTSTANDING.DATA
+.Pq Event B3H , Umask 01H
+Counts weighted cycles of snoopq requests for data. Counter 0 only
+Use cmask=1 to count cycles not empty.
+.It Li SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE
+.Pq Event B3H , Umask 02H
+Counts weighted cycles of snoopq invalidate requests. Counter 0 only
+Use cmask=1 to count cycles not empty.
+.It Li SNOOPQ_REQUESTS_OUTSTANDING.CODE
+.Pq Event B3H , Umask 04H
+Counts weighted cycles of snoopq requests for code. Counter 0 only
+Use cmask=1 to count cycles not empty.
+.It Li SNOOPQ_REQUESTS_OUTSTANDING.CODE
+.Pq Event BAH , Umask 04H
+Counts number of TPR reads
+.It Li PIC_ACCESSES.TPR_WRITES
+.Pq Event BAH , Umask 02H
+Counts number of TPR writes
+one or two micro-ops. Some instructions are decoded into longer sequences
+.It Li MACHINE_CLEARS.FUSION_ASSIST
+.Pq Event C3H , Umask 10H
+Counts the number of macro-fusion assists
+Counts SIMD packed single- precision floating point Uops retired.
+.It Li BOGUS_BR
+.Pq Event E4H , Umask 01H
+Counts the number of bogus branches.
+.It Li L2_HW_PREFETCH.HIT
+.Pq Event F3H , Umask 01H
+Count L2 HW prefetcher detector hits
+.It Li L2_HW_PREFETCH.ALLOC
+.Pq Event F3H , Umask 02H
+Count L2 HW prefetcher allocations
+.It Li L2_HW_PREFETCH.DATA_TRIGGER
+.Pq Event F3H , Umask 04H
+Count L2 HW data prefetcher triggered
+.It Li L2_HW_PREFETCH.CODE_TRIGGER
+.Pq Event F3H , Umask 08H
+Count L2 HW code prefetcher triggered
+.It Li L2_HW_PREFETCH.DCA_TRIGGER
+.Pq Event F3H , Umask 10H
+Count L2 HW DCA prefetcher triggered
+.It Li L2_HW_PREFETCH.KICK_START
+.Pq Event F3H , Umask 20H
+Count L2 HW prefetcher kick started
+.It Li SQ_MISC.PROMOTION
+.Pq Event F4H , Umask 01H
+Counts the number of L2 secondary misses that hit the Super Queue.
+.It Li SQ_MISC.PROMOTION_POST_GO
+.Pq Event F4H , Umask 02H
+Counts the number of L2 secondary misses during the Super Queue filling L2.
+.It Li SQ_MISC.LRU_HINTS
+.Pq Event F4H , Umask 04H
+Counts number of Super Queue LRU hints sent to L3.
+.It Li SQ_MISC.FILL_DROPPED
+.Pq Event F4H , Umask 08H
+Counts the number of SQ L2 fills dropped due to L2 busy.
+.It Li SEGMENT_REG_LOADS
+.Pq Event F8H , Umask 01H
+Counts number of segment register loads.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.ucf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.corei7uc 3 ,
+.Xr pmc.westmere 3 ,
+.Xr pmc.westmereuc 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
diff --git a/lib/libpmc/pmc.corei7uc.3 b/lib/libpmc/pmc.corei7uc.3
new file mode 100644
index 0000000..a69eab7
--- /dev/null
+++ b/lib/libpmc/pmc.corei7uc.3
@@ -0,0 +1,880 @@
+.\" Copyright (c) 2010 Fabien Thomas.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd March 24, 2010
+.Dt PMC.COREI7UC 3
+.Os
+.Sh NAME
+.Nm pmc.corei7uc
+.Nd uncore measurement events for
+.Tn Intel
+.Tn Core i7 and Xeon 5500
+family CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+.Tn Intel
+.Tn "Core i7"
+CPUs contain PMCs conforming to version 2 of the
+.Tn Intel
+performance measurement architecture.
+These CPUs contain 2 classes of PMCs:
+.Bl -tag -width "Li PMC_CLASS_UCP"
+.It Li PMC_CLASS_UCF
+Fixed-function counters that count only one hardware event per counter.
+.It Li PMC_CLASS_UCP
+Programmable counters that may be configured to count one of a defined
+set of hardware events.
+.El
+.Pp
+The number of PMCs available in each class and their widths need to be
+determined at run time by calling
+.Xr pmc_cpuinfo 3 .
+.Pp
+Intel Core i7 and Xeon 5500 PMCs are documented in
+.Rs
+.%B "Intel(R) 64 and IA-32 Architectures Software Developes Manual"
+.%T "Volume 3B: System Programming Guide, Part 2"
+.%N "Order Number: 253669-033US"
+.%D December 2009
+.%Q "Intel Corporation"
+.Re
+.Ss COREI7 AND XEON 5500 UNCORE FIXED FUNCTION PMCS
+These PMCs and their supported events are documented in
+.Xr pmc.ucf 3 .
+.Ss COREI7 AND XEON 5500 UNCORE PROGRAMMABLE PMCS
+The programmable PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_INTERRUPT Ta \&No
+.It PMC_CAP_INVERT Ta Yes
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta \&No
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta Yes
+.It PMC_CAP_USER Ta \&No
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+Event specifiers for these PMCs support the following common
+qualifiers:
+.Bl -tag -width indent
+.It Li cmask= Ns Ar value
+Configure the PMC to increment only if the number of configured
+events measured in a cycle is greater than or equal to
+.Ar value .
+.It Li edge
+Configure the PMC to count the number of de-asserted to asserted
+transitions of the conditions expressed by the other qualifiers.
+If specified, the counter will increment only once whenever a
+condition becomes true, irrespective of the number of clocks during
+which the condition remains true.
+.It Li inv
+Invert the sense of comparison when the
+.Dq Li cmask
+qualifier is present, making the counter increment when the number of
+events per cycle is less than the value specified by the
+.Dq Li cmask
+qualifier.
+.El
+.Ss Event Specifiers (Programmable PMCs)
+Core i7 and Xeon 5500 uncore programmable PMCs support the following events:
+.Bl -tag -width indent
+.It Li GQ_CYCLES_FULL.READ_TRACKER
+.Pq Event 00H , Umask 01H
+Uncore cycles Global Queue read tracker is full.
+.It Li GQ_CYCLES_FULL.WRITE_TRACKER
+.Pq Event 00H , Umask 02H
+Uncore cycles Global Queue write tracker is full.
+.It Li GQ_CYCLES_FULL.PEER_PROBE_TRACKER
+.Pq Event 00H , Umask 04H
+Uncore cycles Global Queue peer probe tracker is full. The peer probe
+tracker queue tracks snoops from the IOH and remote sockets.
+.It Li GQ_CYCLES_NOT_EMPTY.READ_TRACKER
+.Pq Event 01H , Umask 01H
+Uncore cycles were Global Queue read tracker has at least one valid entry.
+.It Li GQ_CYCLES_NOT_EMPTY.WRITE_TRACKER
+.Pq Event 01H , Umask 02H
+Uncore cycles were Global Queue write tracker has at least one valid entry.
+.It Li GQ_CYCLES_NOT_EMPTY.PEER_PROBE_TRACKER
+.Pq Event 01H , Umask 04H
+Uncore cycles were Global Queue peer probe tracker has at least one valid
+entry. The peer probe tracker queue tracks IOH and remote socket snoops.
+.It Li GQ_ALLOC.READ_TRACKER
+.Pq Event 03H , Umask 01H
+Counts the number of tread tracker allocate to deallocate entries. The GQ
+read tracker allocate to deallocate occupancy count is divided by the count
+to obtain the average read tracker latency.
+.It Li GQ_ALLOC.RT_L3_MISS
+.Pq Event 03H , Umask 02H
+Counts the number GQ read tracker entries for which a full cache line read
+has missed the L3. The GQ read tracker L3 miss to fill occupancy count is
+divided by this count to obtain the average cache line read L3 miss latency.
+The latency represents the time after which the L3 has determined that the
+cache line has missed. The time between a GQ read tracker allocation and the
+L3 determining that the cache line has missed is the average L3 hit latency.
+The total L3 cache line read miss latency is the hit latency + L3 miss
+latency.
+.It Li GQ_ALLOC.RT_TO_L3_RESP
+.Pq Event 03H , Umask 04H
+Counts the number of GQ read tracker entries that are allocated in the read
+tracker queue that hit or miss the L3. The GQ read tracker L3 hit occupancy
+count is divided by this count to obtain the average L3 hit latency.
+.It Li GQ_ALLOC.RT_TO_RTID_ACQUIRED
+.Pq Event 03H , Umask 08H
+Counts the number of GQ read tracker entries that are allocated in the read
+tracker, have missed in the L3 and have not acquired a Request Transaction
+ID. The GQ read tracker L3 miss to RTID acquired occupancy count is
+divided by this count to obtain the average latency for a read L3 miss to
+acquire an RTID.
+.It Li GQ_ALLOC.WT_TO_RTID_ACQUIRED
+.Pq Event 03H , Umask 10H
+Counts the number of GQ write tracker entries that are allocated in the
+write tracker, have missed in the L3 and have not acquired a Request
+Transaction ID. The GQ write tracker L3 miss to RTID occupancy count is
+divided by this count to obtain the average latency for a write L3 miss to
+acquire an RTID.
+.It Li GQ_ALLOC.WRITE_TRACKER
+.Pq Event 03H , Umask 20H
+Counts the number of GQ write tracker entries that are allocated in the
+write tracker queue that miss the L3. The GQ write tracker occupancy count
+is divided by the this count to obtain the average L3 write miss latency.
+.It Li GQ_ALLOC.PEER_PROBE_TRACKER
+.Pq Event 03H , Umask 40H
+Counts the number of GQ peer probe tracker (snoop) entries that are
+allocated in the peer probe tracker queue that miss the L3. The GQ peer
+probe occupancy count is divided by this count to obtain the average L3 peer
+probe miss latency.
+.It Li GQ_DATA.FROM_QPI
+.Pq Event 04H , Umask 01H
+Cycles Global Queue Quickpath Interface input data port is busy importing
+data from the Quickpath Interface. Each cycle the input port can transfer 8
+or 16 bytes of data.
+.It Li GQ_DATA.FROM_QMC
+.Pq Event 04H , Umask 02H
+Cycles Global Queue Quickpath Memory Interface input data port is busy
+importing data from the Quickpath Memory Interface. Each cycle the input
+port can transfer 8 or 16 bytes of data.
+.It Li GQ_DATA.FROM_L3
+.Pq Event 04H , Umask 04H
+Cycles GQ L3 input data port is busy importing data from the Last Level
+Cache. Each cycle the input port can transfer 32 bytes of data.
+.It Li GQ_DATA.FROM_CORES_02
+.Pq Event 04H , Umask 08H
+Cycles GQ Core 0 and 2 input data port is busy importing data from processor
+cores 0 and 2. Each cycle the input port can transfer 32 bytes of data.
+.It Li GQ_DATA.FROM_CORES_13
+.Pq Event 04H , Umask 10H
+Cycles GQ Core 1 and 3 input data port is busy importing data from processor
+cores 1 and 3. Each cycle the input port can transfer 32 bytes of data.
+.It Li GQ_DATA.TO_QPI_QMC
+.Pq Event 05H , Umask 01H
+Cycles GQ QPI and QMC output data port is busy sending data to the Quickpath
+Interface or Quickpath Memory Interface. Each cycle the output port can
+transfer 32 bytes of data.
+.It Li GQ_DATA.TO_L3
+.Pq Event 05H , Umask 02H
+Cycles GQ L3 output data port is busy sending data to the Last Level Cache.
+Each cycle the output port can transfer 32 bytes of data.
+.It Li GQ_DATA.TO_CORES
+.Pq Event 05H , Umask 04H
+Cycles GQ Core output data port is busy sending data to the Cores. Each
+cycle the output port can transfer 32 bytes of data.
+.It Li SNP_RESP_TO_LOCAL_HOME.I_STATE
+.Pq Event 06H , Umask 01H
+Number of snoop responses to the local home that L3 does not have the
+referenced cache line.
+.It Li SNP_RESP_TO_LOCAL_HOME.S_STATE
+.Pq Event 06H , Umask 02H
+Number of snoop responses to the local home that L3 has the referenced line
+cached in the S state.
+.It Li SNP_RESP_TO_LOCAL_HOME.FWD_S_STATE
+.Pq Event 06H , Umask 04H
+Number of responses to code or data read snoops to the local home that the
+L3 has the referenced cache line in the E state. The L3 cache line state is
+changed to the S state and the line is forwarded to the local home in the S
+state.
+.It Li SNP_RESP_TO_LOCAL_HOME.FWD_I_STATE
+.Pq Event 06H , Umask 08H
+Number of responses to read invalidate snoops to the local home that the L3
+has the referenced cache line in the M state. The L3 cache line state is
+invalidated and the line is forwarded to the local home in the M state.
+.It Li SNP_RESP_TO_LOCAL_HOME.CONFLICT
+.Pq Event 06H , Umask 10H
+Number of conflict snoop responses sent to the local home.
+.It Li SNP_RESP_TO_LOCAL_HOME.WB
+.Pq Event 06H , Umask 20H
+Number of responses to code or data read snoops to the local home that the
+L3 has the referenced line cached in the M state.
+.It Li SNP_RESP_TO_REMOTE_HOME.I_STATE
+.Pq Event 07H , Umask 01H
+Number of snoop responses to a remote home that L3 does not have the
+referenced cache line.
+.It Li SNP_RESP_TO_REMOTE_HOME.S_STATE
+.Pq Event 07H , Umask 02H
+Number of snoop responses to a remote home that L3 has the referenced line
+cached in the S state.
+.It Li SNP_RESP_TO_REMOTE_HOME.FWD_S_STATE
+.Pq Event 07H , Umask 04H
+Number of responses to code or data read snoops to a remote home that the L3
+has the referenced cache line in the E state. The L3 cache line state is
+changed to the S state and the line is forwarded to the remote home in the S
+state.
+.It Li SNP_RESP_TO_REMOTE_HOME.FWD_I_STATE
+.Pq Event 07H , Umask 08H
+Number of responses to read invalidate snoops to a remote home that the L3
+has the referenced cache line in the M state. The L3 cache line state is
+invalidated and the line is forwarded to the remote home in the M state.
+.It Li SNP_RESP_TO_REMOTE_HOME.CONFLICT
+.Pq Event 07H , Umask 10H
+Number of conflict snoop responses sent to the local home.
+.It Li SNP_RESP_TO_REMOTE_HOME.WB
+.Pq Event 07H , Umask 20H
+Number of responses to code or data read snoops to a remote home that the L3
+has the referenced line cached in the M state.
+.It Li SNP_RESP_TO_REMOTE_HOME.HITM
+.Pq Event 07H , Umask 24H
+Number of HITM snoop responses to a remote home
+.It Li L3_HITS.READ
+.Pq Event 08H , Umask 01H
+Number of code read, data read and RFO requests that hit in the L3
+.It Li L3_HITS.WRITE
+.Pq Event 08H , Umask 02H
+Number of writeback requests that hit in the L3. Writebacks from the cores
+will always result in L3 hits due to the inclusive property of the L3.
+.It Li L3_HITS.PROBE
+.Pq Event 08H , Umask 04H
+Number of snoops from IOH or remote sockets that hit in the L3.
+.It Li L3_HITS.ANY
+.Pq Event 08H , Umask 03H
+Number of reads and writes that hit the L3.
+.It Li L3_MISS.READ
+.Pq Event 09H , Umask 01H
+Number of code read, data read and RFO requests that miss the L3.
+.It Li L3_MISS.WRITE
+.Pq Event 09H , Umask 02H
+Number of writeback requests that miss the L3. Should always be zero as
+writebacks from the cores will always result in L3 hits due to the inclusive
+property of the L3.
+.It Li L3_MISS.PROBE
+.Pq Event 09H , Umask 04H
+Number of snoops from IOH or remote sockets that miss the L3.
+.It Li L3_MISS.ANY
+.Pq Event 09H , Umask 03H
+Number of reads and writes that miss the L3.
+.It Li L3_LINES_IN.M_STATE
+.Pq Event 0AH , Umask 01H
+Counts the number of L3 lines allocated in M state. The only time a cache
+line is allocated in the M state is when the line was forwarded in M state
+is forwarded due to a Snoop Read Invalidate Own request.
+.It Li L3_LINES_IN.E_STATE
+.Pq Event 0AH , Umask 02H
+Counts the number of L3 lines allocated in E state.
+.It Li L3_LINES_IN.S_STATE
+.Pq Event 0AH , Umask 04H
+Counts the number of L3 lines allocated in S state.
+.It Li L3_LINES_IN.F_STATE
+.Pq Event 0AH , Umask 08H
+Counts the number of L3 lines allocated in F state.
+.It Li L3_LINES_IN.ANY
+.Pq Event 0AH , Umask 0FH
+Counts the number of L3 lines allocated in any state.
+.It Li L3_LINES_OUT.M_STATE
+.Pq Event 0BH , Umask 01H
+Counts the number of L3 lines victimized that were in the M state. When the
+victim cache line is in M state, the line is written to its home cache agent
+which can be either local or remote.
+.It Li L3_LINES_OUT.E_STATE
+.Pq Event 0BH , Umask 02H
+Counts the number of L3 lines victimized that were in the E state.
+.It Li L3_LINES_OUT.S_STATE
+.Pq Event 0BH , Umask 04H
+Counts the number of L3 lines victimized that were in the S state.
+.It Li L3_LINES_OUT.I_STATE
+.Pq Event 0BH , Umask 08H
+Counts the number of L3 lines victimized that were in the I state.
+.It Li L3_LINES_OUT.F_STATE
+.Pq Event 0BH , Umask 10H
+Counts the number of L3 lines victimized that were in the F state.
+.It Li L3_LINES_OUT.ANY
+.Pq Event 0BH , Umask 1FH
+Counts the number of L3 lines victimized in any state.
+.It Li QHL_REQUESTS.IOH_READS
+.Pq Event 20H , Umask 01H
+Counts number of Quickpath Home Logic read requests from the IOH.
+.It Li QHL_REQUESTS.IOH_WRITES
+.Pq Event 20H , Umask 02H
+Counts number of Quickpath Home Logic write requests from the IOH.
+.It Li QHL_REQUESTS.REMOTE_READS
+.Pq Event 20H , Umask 04H
+Counts number of Quickpath Home Logic read requests from a remote socket.
+.It Li QHL_REQUESTS.REMOTE_WRITES
+.Pq Event 20H , Umask 08H
+Counts number of Quickpath Home Logic write requests from a remote socket.
+.It Li QHL_REQUESTS.LOCAL_READS
+.Pq Event 20H , Umask 10H
+Counts number of Quickpath Home Logic read requests from the local socket.
+.It Li QHL_REQUESTS.LOCAL_WRITES
+.Pq Event 20H , Umask 20H
+Counts number of Quickpath Home Logic write requests from the local socket.
+.It Li QHL_CYCLES_FULL.IOH
+.Pq Event 21H , Umask 01H
+Counts uclk cycles all entries in the Quickpath Home Logic IOH are full.
+.It Li QHL_CYCLES_FULL.REMOTE
+.Pq Event 21H , Umask 02H
+Counts uclk cycles all entries in the Quickpath Home Logic remote tracker
+are full.
+.It Li QHL_CYCLES_FULL.LOCAL
+.Pq Event 21H , Umask 04H
+Counts uclk cycles all entries in the Quickpath Home Logic local tracker are
+full.
+.It Li QHL_CYCLES_NOT_EMPTY.IOH
+.Pq Event 22H , Umask 01H
+Counts uclk cycles all entries in the Quickpath Home Logic IOH is busy.
+.It Li QHL_CYCLES_NOT_EMPTY.REMOTE
+.Pq Event 22H , Umask 02H
+Counts uclk cycles all entries in the Quickpath Home Logic remote tracker is
+busy.
+.It Li QHL_CYCLES_NOT_EMPTY.LOCAL
+.Pq Event 22H , Umask 04H
+Counts uclk cycles all entries in the Quickpath Home Logic local tracker is
+busy.
+.It Li QHL_OCCUPANCY.IOH
+.Pq Event 23H , Umask 01H
+QHL IOH tracker allocate to deallocate read occupancy.
+.It Li QHL_OCCUPANCY.REMOTE
+.Pq Event 23H , Umask 02H
+QHL remote tracker allocate to deallocate read occupancy.
+.It Li QHL_OCCUPANCY.LOCAL
+.Pq Event 23H , Umask 04H
+QHL local tracker allocate to deallocate read occupancy.
+.It Li QHL_ADDRESS_CONFLICTS.2WAY
+.Pq Event 24H , Umask 02H
+Counts number of QHL Active Address Table (AAT) entries that saw a max of 2
+conflicts. The AAT is a structure that tracks requests that are in conflict.
+The requests themselves are in the home tracker entries. The count is
+reported when an AAT entry deallocates.
+.It Li QHL_ADDRESS_CONFLICTS.3WAY
+.Pq Event 24H , Umask 04H
+Counts number of QHL Active Address Table (AAT) entries that saw a max of 3
+conflicts. The AAT is a structure that tracks requests that are in conflict.
+The requests themselves are in the home tracker entries. The count is
+reported when an AAT entry deallocates.
+.It Li QHL_CONFLICT_CYCLES.IOH
+.Pq Event 25H , Umask 01H
+Counts cycles the Quickpath Home Logic IOH Tracker contains two or more
+requests with an address conflict. A max of 3 requests can be in conflict.
+.It Li QHL_CONFLICT_CYCLES.REMOTE
+.Pq Event 25H , Umask 02H
+Counts cycles the Quickpath Home Logic Remote Tracker contains two or more
+requests with an address conflict. A max of 3 requests can be in conflict.
+.It Li QHL_CONFLICT_CYCLES.LOCAL
+.Pq Event 25H , Umask 04H
+Counts cycles the Quickpath Home Logic Local Tracker contains two or more
+requests with an address conflict. A max of 3 requests can be in conflict.
+.It Li QHL_TO_QMC_BYPASS
+.Pq Event 26H , Umask 01H
+Counts number or requests to the Quickpath Memory Controller that bypass the
+Quickpath Home Logic. All local accesses can be bypassed. For remote
+requests, only read requests can be bypassed.
+.It Li QMC_NORMAL_FULL.READ.CH0
+.Pq Event 27H , Umask 01H
+Uncore cycles all the entries in the DRAM channel 0 medium or low priority
+queue are occupied with read requests.
+.It Li QMC_NORMAL_FULL.READ.CH1
+.Pq Event 27H , Umask 02H
+Uncore cycles all the entries in the DRAM channel 1 medium or low priority
+queue are occupied with read requests.
+.It Li QMC_NORMAL_FULL.READ.CH2
+.Pq Event 27H , Umask 04H
+Uncore cycles all the entries in the DRAM channel 2 medium or low priority
+queue are occupied with read requests.
+.It Li QMC_NORMAL_FULL.WRITE.CH0
+.Pq Event 27H , Umask 08H
+Uncore cycles all the entries in the DRAM channel 0 medium or low priority
+queue are occupied with write requests.
+.It Li QMC_NORMAL_FULL.WRITE.CH1
+.Pq Event 27H , Umask 10H
+Counts cycles all the entries in the DRAM channel 1 medium or low priority
+queue are occupied with write requests.
+.It Li QMC_NORMAL_FULL.WRITE.CH2
+.Pq Event 27H , Umask 20H
+Uncore cycles all the entries in the DRAM channel 2 medium or low priority
+queue are occupied with write requests.
+.It Li QMC_ISOC_FULL.READ.CH0
+.Pq Event 28H , Umask 01H
+Counts cycles all the entries in the DRAM channel 0 high priority queue are
+occupied with isochronous read requests.
+.It Li QMC_ISOC_FULL.READ.CH1
+.Pq Event 28H , Umask 02H
+Counts cycles all the entries in the DRAM channel 1 high priority queue are
+occupied with isochronous read requests.
+.It Li QMC_ISOC_FULL.READ.CH2
+.Pq Event 28H , Umask 04H
+Counts cycles all the entries in the DRAM channel 2 high priority queue are
+occupied with isochronous read requests.
+.It Li QMC_ISOC_FULL.WRITE.CH0
+.Pq Event 28H , Umask 08H
+Counts cycles all the entries in the DRAM channel 0 high priority queue are
+occupied with isochronous write requests.
+.It Li QMC_ISOC_FULL.WRITE.CH1
+.Pq Event 28H , Umask 10H
+Counts cycles all the entries in the DRAM channel 1 high priority queue are
+occupied with isochronous write requests.
+.It Li QMC_ISOC_FULL.WRITE.CH2
+.Pq Event 28H , Umask 20H
+Counts cycles all the entries in the DRAM channel 2 high priority queue are
+occupied with isochronous write requests.
+.It Li QMC_BUSY.READ.CH0
+.Pq Event 29H , Umask 01H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+read request to DRAM channel 0.
+.It Li QMC_BUSY.READ.CH1
+.Pq Event 29H , Umask 02H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+read request to DRAM channel 1.
+.It Li QMC_BUSY.READ.CH2
+.Pq Event 29H , Umask 04H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+read request to DRAM channel 2.
+.It Li QMC_BUSY.WRITE.CH0
+.Pq Event 29H , Umask 08H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+write request to DRAM channel 0.
+.It Li QMC_BUSY.WRITE.CH1
+.Pq Event 29H , Umask 10H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+write request to DRAM channel 1.
+.It Li QMC_BUSY.WRITE.CH2
+.Pq Event 29H , Umask 20H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+write request to DRAM channel 2.
+.It Li QMC_OCCUPANCY.CH0
+.Pq Event 2AH , Umask 01H
+IMC channel 0 normal read request occupancy.
+.It Li QMC_OCCUPANCY.CH1
+.Pq Event 2AH , Umask 02H
+IMC channel 1 normal read request occupancy.
+.It Li QMC_OCCUPANCY.CH2
+.Pq Event 2AH , Umask 04H
+IMC channel 2 normal read request occupancy.
+.It Li QMC_ISSOC_OCCUPANCY.CH0
+.Pq Event 2BH , Umask 01H
+IMC channel 0 issoc read request occupancy.
+.It Li QMC_ISSOC_OCCUPANCY.CH1
+.Pq Event 2BH , Umask 02H
+IMC channel 1 issoc read request occupancy.
+.It Li QMC_ISSOC_OCCUPANCY.CH2
+.Pq Event 2BH , Umask 04H
+IMC channel 2 issoc read request occupancy.
+.It Li QMC_ISSOC_READS.ANY
+.Pq Event 2BH , Umask 07H
+IMC issoc read request occupancy.
+.It Li QMC_NORMAL_READS.CH0
+.Pq Event 2CH , Umask 01H
+Counts the number of Quickpath Memory Controller channel 0 medium and low
+priority read requests. The QMC channel 0 normal read occupancy divided by
+this count provides the average QMC channel 0 read latency.
+.It Li QMC_NORMAL_READS.CH1
+.Pq Event 2CH , Umask 02H
+Counts the number of Quickpath Memory Controller channel 1 medium and low
+priority read requests. The QMC channel 1 normal read occupancy divided by
+this count provides the average QMC channel 1 read latency.
+.It Li QMC_NORMAL_READS.CH2
+.Pq Event 2CH , Umask 04H
+Counts the number of Quickpath Memory Controller channel 2 medium and low
+priority read requests. The QMC channel 2 normal read occupancy divided by
+this count provides the average QMC channel 2 read latency.
+.It Li QMC_NORMAL_READS.ANY
+.Pq Event 2CH , Umask 07H
+Counts the number of Quickpath Memory Controller medium and low priority
+read requests. The QMC normal read occupancy divided by this count provides
+the average QMC read latency.
+.It Li QMC_HIGH_PRIORITY_READS.CH0
+.Pq Event 2DH , Umask 01H
+Counts the number of Quickpath Memory Controller channel 0 high priority
+isochronous read requests.
+.It Li QMC_HIGH_PRIORITY_READS.CH1
+.Pq Event 2DH , Umask 02H
+Counts the number of Quickpath Memory Controller channel 1 high priority
+isochronous read requests.
+.It Li QMC_HIGH_PRIORITY_READS.CH2
+.Pq Event 2DH , Umask 04H
+Counts the number of Quickpath Memory Controller channel 2 high priority
+isochronous read requests.
+.It Li QMC_HIGH_PRIORITY_READS.ANY
+.Pq Event 2DH , Umask 07H
+Counts the number of Quickpath Memory Controller high priority isochronous
+read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.CH0
+.Pq Event 2EH , Umask 01H
+Counts the number of Quickpath Memory Controller channel 0 critical priority
+isochronous read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.CH1
+.Pq Event 2EH , Umask 02H
+Counts the number of Quickpath Memory Controller channel 1 critical priority
+isochronous read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.CH2
+.Pq Event 2EH , Umask 04H
+Counts the number of Quickpath Memory Controller channel 2 critical priority
+isochronous read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.ANY
+.Pq Event 2EH , Umask 07H
+Counts the number of Quickpath Memory Controller critical priority
+isochronous read requests.
+.It Li QMC_WRITES.FULL.CH0
+.Pq Event 2FH , Umask 01H
+Counts number of full cache line writes to DRAM channel 0.
+.It Li QMC_WRITES.FULL.CH1
+.Pq Event 2FH , Umask 02H
+Counts number of full cache line writes to DRAM channel 1.
+.It Li QMC_WRITES.FULL.CH2
+.Pq Event 2FH , Umask 04H
+Counts number of full cache line writes to DRAM channel 2.
+.It Li QMC_WRITES.FULL.ANY
+.Pq Event 2FH , Umask 07H
+Counts number of full cache line writes to DRAM.
+.It Li QMC_WRITES.PARTIAL.CH0
+.Pq Event 2FH , Umask 08H
+Counts number of partial cache line writes to DRAM channel 0.
+.It Li QMC_WRITES.PARTIAL.CH1
+.Pq Event 2FH , Umask 10H
+Counts number of partial cache line writes to DRAM channel 1.
+.It Li QMC_WRITES.PARTIAL.CH2
+.Pq Event 2FH , Umask 20H
+Counts number of partial cache line writes to DRAM channel 2.
+.It Li QMC_WRITES.PARTIAL.ANY
+.Pq Event 2FH , Umask 38H
+Counts number of partial cache line writes to DRAM.
+.It Li QMC_CANCEL.CH0
+.Pq Event 30H , Umask 01H
+Counts number of DRAM channel 0 cancel requests.
+.It Li QMC_CANCEL.CH1
+.Pq Event 30H , Umask 02H
+Counts number of DRAM channel 1 cancel requests.
+.It Li QMC_CANCEL.CH2
+.Pq Event 30H , Umask 04H
+Counts number of DRAM channel 2 cancel requests.
+.It Li QMC_CANCEL.ANY
+.Pq Event 30H , Umask 07H
+Counts number of DRAM cancel requests.
+.It Li QMC_PRIORITY_UPDATES.CH0
+.Pq Event 31H , Umask 01H
+Counts number of DRAM channel 0 priority updates. A priority update occurs
+when an ISOC high or critical request is received by the QHL and there is a
+matching request with normal priority that has already been issued to the
+QMC. In this instance, the QHL will send a priority update to QMC to
+expedite the request.
+.It Li QMC_PRIORITY_UPDATES.CH1
+.Pq Event 31H , Umask 02H
+Counts number of DRAM channel 1 priority updates. A priority update occurs
+when an ISOC high or critical request is received by the QHL and there is a
+matching request with normal priority that has already been issued to the
+QMC. In this instance, the QHL will send a priority update to QMC to
+expedite the request.
+.It Li QMC_PRIORITY_UPDATES.CH2
+.Pq Event 31H , Umask 04H
+Counts number of DRAM channel 2 priority updates. A priority update occurs
+when an ISOC high or critical request is received by the QHL and there is a
+matching request with normal priority that has already been issued to the
+QMC. In this instance, the QHL will send a priority update to QMC to
+expedite the request.
+.It Li QMC_PRIORITY_UPDATES.ANY
+.Pq Event 31H , Umask 07H
+Counts number of DRAM priority updates. A priority update occurs when an
+ISOC high or critical request is received by the QHL and there is a matching
+request with normal priority that has already been issued to the QMC. In
+this instance, the QHL will send a priority update to QMC to expedite the
+request.
+.It Li QHL_FRC_ACK_CNFLTS.LOCAL
+.Pq Event 33H , Umask 04H
+Counts number of Force Acknowledge Conflict messages sent by the Quickpath
+Home Logic to the local home.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_0
+.Pq Event 40H , Umask 01H
+Counts cycles the Quickpath outbound link 0 HOME virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_0
+.Pq Event 40H , Umask 02H
+Counts cycles the Quickpath outbound link 0 SNOOP virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_0
+.Pq Event 40H , Umask 04H
+Counts cycles the Quickpath outbound link 0 non-data response virtual
+channel is stalled due to lack of a VNA and VN0 credit. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_1
+.Pq Event 40H , Umask 08H
+Counts cycles the Quickpath outbound link 1 HOME virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_1
+.Pq Event 40H , Umask 10H
+Counts cycles the Quickpath outbound link 1 SNOOP virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_1
+.Pq Event 40H , Umask 20H
+Counts cycles the Quickpath outbound link 1 non-data response virtual
+channel is stalled due to lack of a VNA and VN0 credit. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.LINK_0
+.Pq Event 40H , Umask 07H
+Counts cycles the Quickpath outbound link 0 virtual channels are stalled due
+to lack of a VNA and VN0 credit. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.LINK_1
+.Pq Event 40H , Umask 38H
+Counts cycles the Quickpath outbound link 1 virtual channels are stalled due
+to lack of a VNA and VN0 credit. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_0
+.Pq Event 41H , Umask 01H
+Counts cycles the Quickpath outbound link 0 Data ResponSe virtual channel is
+stalled due to lack of VNA and VN0 credits. Note that this event does not
+filter out when a flit would not have been selected for arbitration because
+another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_0
+.Pq Event 41H , Umask 02H
+Counts cycles the Quickpath outbound link 0 Non-Coherent Bypass virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_0
+.Pq Event 41H , Umask 04H
+Counts cycles the Quickpath outbound link 0 Non-Coherent Standard virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_1
+.Pq Event 41H , Umask 08H
+Counts cycles the Quickpath outbound link 1 Data ResponSe virtual channel is
+stalled due to lack of VNA and VN0 credits. Note that this event does not
+filter out when a flit would not have been selected for arbitration because
+another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_1
+.Pq Event 41H , Umask 10H
+Counts cycles the Quickpath outbound link 1 Non-Coherent Bypass virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_1
+.Pq Event 41H , Umask 20H
+Counts cycles the Quickpath outbound link 1 Non-Coherent Standard virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.LINK_0
+.Pq Event 41H , Umask 07H
+Counts cycles the Quickpath outbound link 0 virtual channels are stalled due
+to lack of VNA and VN0 credits. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.LINK_1
+.Pq Event 41H , Umask 38H
+Counts cycles the Quickpath outbound link 1 virtual channels are stalled due
+to lack of VNA and VN0 credits. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_HEADER.BUSY.LINK_0
+.Pq Event 42H , Umask 02H
+Number of cycles that the header buffer in the Quickpath Interface outbound
+link 0 is busy.
+.It Li QPI_TX_HEADER.BUSY.LINK_1
+.Pq Event 42H , Umask 08H
+Number of cycles that the header buffer in the Quickpath Interface outbound
+link 1 is busy.
+.It Li QPI_RX_NO_PPT_CREDIT.STALLS.LINK_0
+.Pq Event 43H , Umask 01H
+Number of cycles that snoop packets incoming to the Quickpath Interface link
+0 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT)
+does not have any available entries.
+.It Li QPI_RX_NO_PPT_CREDIT.STALLS.LINK_1
+.Pq Event 43H , Umask 02H
+Number of cycles that snoop packets incoming to the Quickpath Interface link
+1 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT)
+does not have any available entries.
+.It Li DRAM_OPEN.CH0
+.Pq Event 60H , Umask 01H
+Counts number of DRAM Channel 0 open commands issued either for read or
+write. To read or write data, the referenced DRAM page must first be opened.
+.It Li DRAM_OPEN.CH1
+.Pq Event 60H , Umask 02H
+Counts number of DRAM Channel 1 open commands issued either for read or
+write. To read or write data, the referenced DRAM page must first be opened.
+.It Li DRAM_OPEN.CH2
+.Pq Event 60H , Umask 04H
+Counts number of DRAM Channel 2 open commands issued either for read or
+write. To read or write data, the referenced DRAM page must first be opened.
+.It Li DRAM_PAGE_CLOSE.CH0
+.Pq Event 61H , Umask 01H
+DRAM channel 0 command issued to CLOSE a page due to page idle timer
+expiration. Closing a page is done by issuing a precharge.
+.It Li DRAM_PAGE_CLOSE.CH1
+.Pq Event 61H , Umask 02H
+DRAM channel 1 command issued to CLOSE a page due to page idle timer
+expiration. Closing a page is done by issuing a precharge.
+.It Li DRAM_PAGE_CLOSE.CH2
+.Pq Event 61H , Umask 04H
+DRAM channel 2 command issued to CLOSE a page due to page idle timer
+expiration. Closing a page is done by issuing a precharge.
+.It Li DRAM_PAGE_MISS.CH0
+.Pq Event 62H , Umask 01H
+Counts the number of precharges (PRE) that were issued to DRAM channel 0
+because there was a page miss. A page miss refers to a situation in which a
+page is currently open and another page from the same bank needs to be
+opened. The new page experiences a page miss. Closing of the old page is
+done by issuing a precharge.
+.It Li DRAM_PAGE_MISS.CH1
+.Pq Event 62H , Umask 02H
+Counts the number of precharges (PRE) that were issued to DRAM channel 1
+because there was a page miss. A page miss refers to a situation in which a
+page is currently open and another page from the same bank needs to be
+opened. The new page experiences a page miss. Closing of the old page is
+done by issuing a precharge.
+.It Li DRAM_PAGE_MISS.CH2
+.Pq Event 62H , Umask 04H
+Counts the number of precharges (PRE) that were issued to DRAM channel 2
+because there was a page miss. A page miss refers to a situation in which a
+page is currently open and another page from the same bank needs to be
+opened. The new page experiences a page miss. Closing of the old page is
+done by issuing a precharge.
+.It Li DRAM_READ_CAS.CH0
+.Pq Event 63H , Umask 01H
+Counts the number of times a read CAS command was issued on DRAM channel 0.
+.It Li DRAM_READ_CAS.AUTOPRE_CH0
+.Pq Event 63H , Umask 02H
+Counts the number of times a read CAS command was issued on DRAM channel 0
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_READ_CAS.CH1
+.Pq Event 63H , Umask 04H
+Counts the number of times a read CAS command was issued on DRAM channel 1.
+.It Li DRAM_READ_CAS.AUTOPRE_CH1
+.Pq Event 63H , Umask 08H
+Counts the number of times a read CAS command was issued on DRAM channel 1
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_READ_CAS.CH2
+.Pq Event 63H , Umask 10H
+Counts the number of times a read CAS command was issued on DRAM channel 2.
+.It Li DRAM_READ_CAS.AUTOPRE_CH2
+.Pq Event 63H , Umask 20H
+Counts the number of times a read CAS command was issued on DRAM channel 2
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_WRITE_CAS.CH0
+.Pq Event 64H , Umask 01H
+Counts the number of times a write CAS command was issued on DRAM channel 0.
+.It Li DRAM_WRITE_CAS.AUTOPRE_CH0
+.Pq Event 64H , Umask 02H
+Counts the number of times a write CAS command was issued on DRAM channel 0
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_WRITE_CAS.CH1
+.Pq Event 64H , Umask 04H
+Counts the number of times a write CAS command was issued on DRAM channel 1.
+.It Li DRAM_WRITE_CAS.AUTOPRE_CH1
+.Pq Event 64H , Umask 08H
+Counts the number of times a write CAS command was issued on DRAM channel 1
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_WRITE_CAS.CH2
+.Pq Event 64H , Umask 10H
+Counts the number of times a write CAS command was issued on DRAM channel 2.
+.It Li DRAM_WRITE_CAS.AUTOPRE_CH2
+.Pq Event 64H , Umask 20H
+Counts the number of times a write CAS command was issued on DRAM channel 2
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_REFRESH.CH0
+.Pq Event 65H , Umask 01H
+Counts number of DRAM channel 0 refresh commands. DRAM loses data content
+over time. In order to keep correct data content, the data values have to be
+refreshed periodically.
+.It Li DRAM_REFRESH.CH1
+.Pq Event 65H , Umask 02H
+Counts number of DRAM channel 1 refresh commands. DRAM loses data content
+over time. In order to keep correct data content, the data values have to be
+refreshed periodically.
+.It Li DRAM_REFRESH.CH2
+.Pq Event 65H , Umask 04H
+Counts number of DRAM channel 2 refresh commands. DRAM loses data content
+over time. In order to keep correct data content, the data values have to be
+refreshed periodically.
+.It Li DRAM_PRE_ALL.CH0
+.Pq Event 66H , Umask 01H
+Counts number of DRAM Channel 0 precharge-all (PREALL) commands that close
+all open pages in a rank. PREALL is issued when the DRAM needs to be
+refreshed or needs to go into a power down mode.
+.It Li DRAM_PRE_ALL.CH1
+.Pq Event 66H , Umask 02H
+Counts number of DRAM Channel 1 precharge-all (PREALL) commands that close
+all open pages in a rank. PREALL is issued when the DRAM needs to be
+refreshed or needs to go into a power down mode.
+.It Li DRAM_PRE_ALL.CH2
+.Pq Event 66H , Umask 04H
+Counts number of DRAM Channel 2 precharge-all (PREALL) commands that close
+all open pages in a rank. PREALL is issued when the DRAM needs to be
+refreshed or needs to go into a power down mode.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.ucf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.corei7 3 ,
+.Xr pmc.westmere 3 ,
+.Xr pmc.westmereuc 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
diff --git a/lib/libpmc/pmc.h b/lib/libpmc/pmc.h
new file mode 100644
index 0000000..5b1ad07
--- /dev/null
+++ b/lib/libpmc/pmc.h
@@ -0,0 +1,112 @@
+/*-
+ * Copyright (c) 2003,2004 Joseph Koshy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _PMC_H_
+#define _PMC_H_
+
+#include <sys/cdefs.h>
+#include <sys/pmc.h>
+
+/*
+ * Driver statistics.
+ */
+struct pmc_driverstats {
+	int	pm_intr_ignored;	/* #interrupts ignored */
+	int	pm_intr_processed;	/* #interrupts processed */
+	int	pm_intr_bufferfull;	/* #interrupts with ENOSPC */
+	int	pm_syscalls;		/* #syscalls */
+	int	pm_syscall_errors;	/* #syscalls with errors */
+	int	pm_buffer_requests;	/* #buffer requests */
+	int	pm_buffer_requests_failed; /* #failed buffer requests */
+	int	pm_log_sweeps;		/* #sample buffer processing passes */
+};
+
+/*
+ * CPU information.
+ */
+struct pmc_cpuinfo {
+	enum pmc_cputype pm_cputype;	/* the kind of CPU */
+	uint32_t	pm_ncpu;	/* number of CPUs */
+	uint32_t	pm_npmc;	/* #PMCs per CPU */
+	uint32_t	pm_nclass;	/* #classes of PMCs */
+	struct pmc_classinfo pm_classes[PMC_CLASS_MAX];
+};
+
+/*
+ * Current PMC state.
+ */
+struct pmc_pmcinfo {
+	int32_t		pm_cpu;		/* CPU number */
+	struct pmc_info	pm_pmcs[];	/* NPMC structs */
+};
+
+/*
+ * Prototypes
+ */
+
+__BEGIN_DECLS
+int	pmc_allocate(const char *_ctrspec, enum pmc_mode _mode, uint32_t _flags,
+    int _cpu, pmc_id_t *_pmcid);
+int	pmc_attach(pmc_id_t _pmcid, pid_t _pid);
+int	pmc_capabilities(pmc_id_t _pmc, uint32_t *_caps);
+int	pmc_configure_logfile(int _fd);
+int	pmc_flush_logfile(void);
+int	pmc_detach(pmc_id_t _pmcid, pid_t _pid);
+int	pmc_disable(int _cpu, int _pmc);
+int	pmc_enable(int _cpu, int _pmc);
+int	pmc_get_driver_stats(struct pmc_driverstats *_gms);
+int	pmc_get_msr(pmc_id_t _pmc, uint32_t *_msr);
+int	pmc_init(void);
+int	pmc_read(pmc_id_t _pmc, pmc_value_t *_value);
+int	pmc_release(pmc_id_t _pmc);
+int	pmc_rw(pmc_id_t _pmc, pmc_value_t _newvalue, pmc_value_t *_oldvalue);
+int	pmc_set(pmc_id_t _pmc, pmc_value_t _value);
+int	pmc_start(pmc_id_t _pmc);
+int	pmc_stop(pmc_id_t _pmc);
+int	pmc_width(pmc_id_t _pmc, uint32_t *_width);
+int	pmc_write(pmc_id_t _pmc, pmc_value_t _value);
+int	pmc_writelog(uint32_t _udata);
+
+int	pmc_ncpu(void);
+int	pmc_npmc(int _cpu);
+int	pmc_cpuinfo(const struct pmc_cpuinfo **_cpu_info);
+int	pmc_pmcinfo(int _cpu, struct pmc_pmcinfo **_pmc_info);
+
+const char	*pmc_name_of_capability(uint32_t _c);
+const char	*pmc_name_of_class(enum pmc_class _pc);
+const char	*pmc_name_of_cputype(enum pmc_cputype _cp);
+const char	*pmc_name_of_disposition(enum pmc_disp _pd);
+const char	*pmc_name_of_event(enum pmc_event _pe);
+const char	*pmc_name_of_mode(enum pmc_mode _pm);
+const char	*pmc_name_of_state(enum pmc_state _ps);
+
+int	pmc_event_names_of_class(enum pmc_class _cl, const char ***_eventnames,
+    int *_nevents);
+__END_DECLS
+
+#endif
diff --git a/lib/libpmc/pmc.iaf.3 b/lib/libpmc/pmc.iaf.3
new file mode 100644
index 0000000..ec9f21c
--- /dev/null
+++ b/lib/libpmc/pmc.iaf.3
@@ -0,0 +1,149 @@
+.\" Copyright (c) 2008 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd November 14, 2008
+.Dt PMC.IAF 3
+.Os
+.Sh NAME
+.Nm pmc.iaf
+.Nd measurement events for
+.Tn Intel
+fixed function performance counters.
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+.Tn Intel
+fixed-function PMCs are present in CPUs that conform to version 2 or
+later of the
+.Tn Intel
+Performance Measurement Architecture.
+Each fixed-function PMC measures a specific hardware event.
+The number of fixed-function PMCs implemented in a CPU can vary.
+The number of fixed-function PMCs present can be determined at runtime
+by using function
+.Xr pmc_cpuinfo 3 .
+.Pp
+Intel fixed-function PMCs are documented in
+.Rs
+.%B "IA-32 Intel(R) Architecture Software Developer's Manual"
+.%T "Volume 3: System Programming Guide"
+.%N "Order Number 253669-027US"
+.%D July 2008
+.%Q "Intel Corporation"
+.Re
+.Pp
+.Ss PMC Capabilities
+Fixed-function PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta \&No
+.It PMC_CAP_INTERRUPT Ta Yes
+.It PMC_CAP_INVERT Ta \&No
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta Yes
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta \&No
+.It PMC_CAP_USER Ta Yes
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Class Name Prefix
+These PMCs are named using a class name prefix of
+.Dq Li iaf- .
+.Ss Event Qualifiers (Fixed Function PMCs)
+These PMCs support the following modifiers:
+.Bl -tag -width indent
+.It Li os
+Configure the PMC to count events occurring at ring level 0.
+.It Li usr
+Configure the PMC to count events occurring at ring levels 1, 2
+or 3.
+.It Li anythread
+.Pq Tn Atom CPUs
+Configure the PMC to count events on all logical processors sharing a
+processor core.
+The default is to count events on the current logical processor.
+.El
+.Pp
+If neither of the
+.Dq Li os
+or
+.Dq Li usr
+qualifiers are specified, the default is to enable both.
+.Ss Event Specifiers (Fixed Function PMCs)
+The fixed function PMCs are selectable using the following
+event names:
+.Bl -tag -width indent
+.It Li INSTR_RETIRED.ANY
+.Pq Fixed Function Counter 0
+The number of instructions retired.
+.It Li CPU_CLK_UNHALTED.CORE
+.Pq Fixed Function Counter 1
+The number of core cycles for which the core is not halted.
+.It Li CPU_CLK_UNHALTED.REF
+.Pq Fixed Function Counter 2
+The number of reference cycles for which the core is not halted.
+.El
+.Sh EXAMPLES
+To measure the number of core cycles for which the core was not halted
+use the event specifier
+.Qq iaf-cpu-clk-unhalted.core .
+.Pp
+To measure the number of user instructions retired use the event specifier
+.Qq iaf-instr-retired.any,usr .
+.Pp
+To measure the number of user instructions retired on all logical processors
+in an
+.Tn Atom
+CPU, use the event specifier
+.Qq iaf-instr-retired.any,usr,anythread .
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.core2 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
diff --git a/lib/libpmc/pmc.k7.3 b/lib/libpmc/pmc.k7.3
new file mode 100644
index 0000000..2775d4f
--- /dev/null
+++ b/lib/libpmc/pmc.k7.3
@@ -0,0 +1,266 @@
+.\" Copyright (c) 2003-2008 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd October 4, 2008
+.Dt PMC.K7 3
+.Os
+.Sh NAME
+.Nm pmc.k7
+.Nd measurement events for
+.Tn AMD
+.Tn Athlon
+(K7 family) CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+AMD K7 PMCs are present in the
+.Tn "AMD Athlon"
+series of CPUs and are documented in:
+.Rs
+.%B "AMD Athlon Processor x86 Code Optimization Guide"
+.%N "Publication No. 22007"
+.%D "February 2002"
+.%Q "Advanced Micro Devices, Inc."
+.Re
+.Ss PMC Features
+AMD K7 PMCs are 48 bits wide.
+Each K7 CPU contains 4 PMCs with the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_INTERRUPT Ta Yes
+.It PMC_CAP_INVERT Ta Yes
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta Yes
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta Yes
+.It PMC_CAP_USER Ta Yes
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+.Pp
+Event specifiers for AMD K7 PMCs can have the following optional
+qualifiers:
+.Bl -tag -width indent
+.It Li count= Ns Ar value
+Configure the counter to increment only if the number of configured
+events measured in a cycle is greater than or equal to
+.Ar value .
+.It Li edge
+Configure the counter to only count negated-to-asserted transitions
+of the conditions expressed by the other qualifiers.
+In other words, the counter will increment only once whenever a given
+condition becomes true, irrespective of the number of clocks during
+which the condition remains true.
+.It Li inv
+Invert the sense of comparison when the
+.Dq Li count
+qualifier is present, making the counter to increment when the
+number of events per cycle is less than the value specified by
+the
+.Dq Li count
+qualifier.
+.It Li os
+Configure the PMC to count events happening at privilege level 0.
+.It Li unitmask= Ns Ar mask
+This qualifier is used to further qualify a select few events,
+.Dq Li k7-dc-refills-from-l2 ,
+.Dq Li k7-dc-refills-from-system
+and
+.Dq Li k7-dc-writebacks .
+Here
+.Ar mask
+is a string of the following characters optionally separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li m
+Count operations for lines in the
+.Dq Modified
+state.
+.It Li o
+Count operations for lines in the
+.Dq Owner
+state.
+.It Li e
+Count operations for lines in the
+.Dq Exclusive
+state.
+.It Li s
+Count operations for lines in the
+.Dq Shared
+state.
+.It Li i
+Count operations for lines in the
+.Dq Invalid
+state.
+.El
+.Pp
+If no
+.Dq Li unitmask
+qualifier is specified, the default is to count events for caches
+lines in any of the above states.
+.It Li usr
+Configure the PMC to count events occurring at privilege levels 1, 2
+or 3.
+.El
+.Pp
+If neither of the
+.Dq Li os
+or
+.Dq Li usr
+qualifiers were specified, the default is to enable both.
+.Ss AMD K7 Event Specifiers
+The event specifiers supported on AMD K7 PMCs are:
+.Bl -tag -width indent
+.It Li k7-dc-accesses
+.Pq Event 40H
+Count data cache accesses.
+.It Li k7-dc-misses
+.Pq Event 41H
+Count data cache misses.
+.It Li k7-dc-refills-from-l2 Op Li ,unitmask= Ns Ar mask
+.Pq Event 42H
+Count data cache refills from L2 cache.
+This event may be further qualified using the
+.Dq Li unitmask
+qualifier.
+.It Li k7-dc-refills-from-system Op Li ,unitmask= Ns Ar mask
+.Pq Event 43H
+Count data cache refills from system memory.
+This event may be further qualified using the
+.Dq Li unitmask
+qualifier.
+.It Li k7-dc-writebacks Op Li ,unitmask= Ns Ar mask
+.Pq Event 44H
+Count data cache writebacks.
+This event may be further qualified using the
+.Dq Li unitmask
+qualifier.
+.It Li k7-hardware-interrupts
+.Pq Event CFH
+Count the number of taken hardware interrupts.
+.It Li k7-ic-fetches
+.Pq Event 80H
+Count instruction cache fetches.
+.It Li k7-ic-misses
+.Pq Event 81H
+Count instruction cache misses.
+.It Li k7-interrupts-masked-cycles
+.Pq Event CDH
+Count the number of cycles when the processor's
+.Va IF
+flag was zero.
+.It Li k7-interrupts-masked-while-pending-cycles
+.Pq Event CEH
+Count the number of cycles interrupts were masked while pending due
+to the processor's
+.Va IF
+flag being zero.
+.It Li k7-l1-and-l2-dtlb-misses
+.Pq Event 46H
+Count L1 and L2 DTLB misses.
+.It Li k7-l1-dtlb-miss-and-l2-dtlb-hits
+.Pq Event 45H
+Count L1 DTLB misses and L2 DTLB hits.
+.It Li k7-l1-itlb-misses
+.Pq Event 84H
+Count L1 ITLB misses that are L2 ITLB hits.
+.It Li k7-l1-l2-itlb-misses
+.Pq Event 85H
+Count L1 (and L2) ITLB misses.
+.It Li k7-misaligned-references
+.Pq Event 47H
+Count misaligned data references.
+.It Li k7-retired-branches
+.Pq Event C2H
+Count all retired branches (conditional, unconditional, exceptions
+and interrupts).
+.It Li k7-retired-branches-mispredicted
+.Pq Event C3H
+Count all mispredicted retired branches.
+.It Li k7-retired-far-control-transfers
+.Pq Event C6H
+Count retired far control transfers.
+.It Li k7-retired-instructions
+.Pq Event C0H
+Count all retired instructions.
+.It Li k7-retired-ops
+.Pq Event C1H
+Count retired ops.
+.It Li k7-retired-resync-branches
+.Pq Event C7H
+Count retired resync branches (non control transfer branches).
+.It Li k7-retired-taken-branches
+.Pq Event C4H
+Count retired taken branches.
+.It Li k7-retired-taken-branches-mispredicted
+.Pq Event C5H
+Count mispredicted taken branches that were retired.
+.El
+.Ss Event Name Aliases
+The following table shows the mapping between the PMC-independent
+aliases supported by
+.Lb libpmc
+and the underlying hardware events used.
+.Bl -column "branch-mispredicts" "Description"
+.It Em Alias Ta Em Event
+.It Li branches Ta Li k7-retired-branches
+.It Li branch-mispredicts Ta Li k7-retired-branches-mispredicted
+.It Li dc-misses Ta Li k7-dc-misses
+.It Li ic-misses Ta Li k7-ic-misses
+.It Li instructions Ta Li k7-retired-instructions
+.It Li interrupts Ta Li k7-hardware-interrupts
+.It Li unhalted-cycles Ta (unsupported)
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.core2 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
diff --git a/lib/libpmc/pmc.k8.3 b/lib/libpmc/pmc.k8.3
new file mode 100644
index 0000000..995bfac
--- /dev/null
+++ b/lib/libpmc/pmc.k8.3
@@ -0,0 +1,800 @@
+.\" Copyright (c) 2003-2008 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd October 4, 2008
+.Dt PMC.K8 3
+.Os
+.Sh NAME
+.Nm pmc.k8
+.Nd measurement events for
+.Tn AMD
+.Tn Athlon 64
+(K8 family) CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+AMD K8 PMCs are present in the
+.Tn "AMD Athlon64"
+and
+.Tn "AMD Opteron"
+series of CPUs.
+They are documented in the
+.Rs
+.%B "BIOS and Kernel Developer's Guide for the AMD Athlon(tm) 64 and AMD Opteron Processors"
+.%N "Publication No. 26094"
+.%D "April 2004"
+.%Q "Advanced Micro Devices, Inc."
+.Re
+.Ss PMC Features
+AMD K8 PMCs are 48 bits wide.
+Each CPU contains 4 PMCs with the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_INTERRUPT Ta Yes
+.It PMC_CAP_INVERT Ta Yes
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta Yes
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta Yes
+.It PMC_CAP_USER Ta Yes
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+.Pp
+Event specifiers for AMD K8 PMCs can have the following optional
+qualifiers:
+.Bl -tag -width indent
+.It Li count= Ns Ar value
+Configure the counter to increment only if the number of configured
+events measured in a cycle is greater than or equal to
+.Ar value .
+.It Li edge
+Configure the counter to only count negated-to-asserted transitions
+of the conditions expressed by the other fields.
+In other words, the counter will increment only once whenever a given
+condition becomes true, irrespective of the number of clocks during
+which the condition remains true.
+.It Li inv
+Invert the sense of comparison when the
+.Dq Li count
+qualifier is present, making the counter to increment when the
+number of events per cycle is less than the value specified by
+the
+.Dq Li count
+qualifier.
+.It Li mask= Ns Ar qualifier
+Many event specifiers for AMD K8 PMCs need to be additionally
+qualified using a mask qualifier.
+These additional qualifiers are event-specific and are documented
+along with their associated event specifiers below.
+.It Li os
+Configure the PMC to count events happening at privilege level 0.
+.It Li usr
+Configure the PMC to count events occurring at privilege levels 1, 2
+or 3.
+.El
+.Pp
+If neither of the
+.Dq Li os
+or
+.Dq Li usr
+qualifiers were specified, the default is to enable both.
+.Ss AMD K8 Event Specifiers
+The event specifiers supported on AMD K8 PMCs are:
+.Bl -tag -width indent
+.It Li k8-bu-cpu-clk-unhalted
+.Pq Event 76H
+Count the number of clock cycles when the CPU is not in the HLT or
+STPCLK states.
+.It Li k8-bu-fill-request-l2-miss Op Li ,mask= Ns Ar qualifier
+.Pq Event 7EH
+Count fill requests that missed in the L2 cache.
+This event may be further qualified using
+.Ar qualifier ,
+which is a
+.Ql +
+separated set of the following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li dc-fill
+Count data cache fill requests.
+.It Li ic-fill
+Count instruction cache fill requests.
+.It Li tlb-reload
+Count TLB reloads.
+.El
+.Pp
+The default is to count all types of requests.
+.It Li k8-bu-fill-into-l2 Op Li ,mask= Ns Ar qualifier
+.Pq Event 7FH
+The number of lines written to and from the L2 cache.
+The event may be further qualified by using
+.Ar qualifier ,
+which is a
+.Ql +
+separated set of the following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li dirty-l2-victim
+Count lines written into L2 cache due to victim writebacks from the
+Icache or Dcache, TLB page table walks or hardware data prefetches.
+.It Li victim-from-l2
+Count writebacks of dirty lines from L2 to the system.
+.El
+.It Li k8-bu-internal-l2-request Op Li ,mask= Ns Ar qualifier
+.Pq Event 7DH
+Count internally generated requests to the L2 cache.
+This event may be further qualified using
+.Ar qualifier ,
+which is a
+.Ql +
+separated set of the following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li cancelled
+Count cancelled requests.
+.It Li dc-fill
+Count data cache fill requests.
+.It Li ic-fill
+Count instruction cache fill requests.
+.It Li tag-snoop
+Count tag snoop requests.
+.It Li tlb-reload
+Count TLB reloads.
+.El
+.Pp
+The default is to count all types of requests.
+.It Li k8-dc-access
+.Pq Event 40H
+Count data cache accesses including microcode scratch pad accesses.
+.It Li k8-dc-copyback Op Li ,mask= Ns Ar qualifier
+.Pq Event 44H
+Count data cache copyback operations.
+This event may be further qualified using
+.Ar qualifier ,
+which is a
+.Ql +
+separated set of the following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li exclusive
+Count operations for lines in the
+.Dq exclusive
+state.
+.It Li invalid
+Count operations for lines in the
+.Dq invalid
+state.
+.It Li modified
+Count operations for lines in the
+.Dq modified
+state.
+.It Li owner
+Count operations for lines in the
+.Dq owner
+state.
+.It Li shared
+Count operations for lines in the
+.Dq shared
+state.
+.El
+.Pp
+The default is to count operations for lines in all the
+above states.
+.It Li k8-dc-dcache-accesses-by-locks Op Li ,mask= Ns Ar qualifier
+.Pq Event 4CH
+Count data cache accesses by lock instructions.
+This event is only available on processors of revision C or later
+vintage.
+This event may be further qualified using
+.Ar qualifier ,
+which is a
+.Ql +
+separated set of the following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li accesses
+Count data cache accesses by lock instructions.
+.It Li misses
+Count data cache misses by lock instructions.
+.El
+.Pp
+The default is to count all accesses.
+.It Li k8-dc-dispatched-prefetch-instructions Op Li ,mask= Ns Ar qualifier
+.Pq Event 4BH
+Count the number of dispatched prefetch instructions.
+This event may be further qualified using
+.Ar qualifier ,
+which is a
+.Ql +
+separated set of the following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li load
+Count load operations.
+.It Li nta
+Count non-temporal operations.
+.It Li store
+Count store operations.
+.El
+.Pp
+The default is to count all operations.
+.It Li k8-dc-l1-dtlb-miss-and-l2-dtlb-hit
+.Pq Event 45H
+Count L1 DTLB misses that are L2 DTLB hits.
+.It Li k8-dc-l1-dtlb-miss-and-l2-dtlb-miss
+.Pq Event 46H
+Count L1 DTLB misses that are also misses in the L2 DTLB.
+.It Li k8-dc-microarchitectural-early-cancel-of-an-access
+.Pq Event 49H
+Count microarchitectural early cancels of data cache accesses.
+.It Li k8-dc-microarchitectural-late-cancel-of-an-access
+.Pq Event 48H
+Count microarchitectural late cancels of data cache accesses.
+.It Li k8-dc-misaligned-data-reference
+.Pq Event 47H
+Count misaligned data references.
+.It Li k8-dc-miss
+.Pq Event 41H
+Count data cache misses.
+.It Li k8-dc-one-bit-ecc-error Op Li ,mask= Ns Ar qualifier
+.Pq Event 4AH
+Count one bit ECC errors found by the scrubber.
+This event may be further qualified using
+.Ar qualifier ,
+which is a
+.Ql +
+separated set of the following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li scrubber
+Count scrubber detected errors.
+.It Li piggyback
+Count piggyback scrubber errors.
+.El
+.Pp
+The default is to count both kinds of errors.
+.It Li k8-dc-refill-from-l2 Op Li ,mask= Ns Ar qualifier
+.Pq Event 42H
+Count data cache refills from L2 cache.
+This event may be further qualified using
+.Ar qualifier ,
+which is a
+.Ql +
+separated set of the following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li exclusive
+Count operations for lines in the
+.Dq exclusive
+state.
+.It Li invalid
+Count operations for lines in the
+.Dq invalid
+state.
+.It Li modified
+Count operations for lines in the
+.Dq modified
+state.
+.It Li owner
+Count operations for lines in the
+.Dq owner
+state.
+.It Li shared
+Count operations for lines in the
+.Dq shared
+state.
+.El
+.Pp
+The default is to count operations for lines in all the
+above states.
+.It Li k8-dc-refill-from-system Op Li ,mask= Ns Ar qualifier
+.Pq Event 43H
+Count data cache refills from system memory.
+This event may be further qualified using
+.Ar qualifier ,
+which is a
+.Ql +
+separated set of the following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li exclusive
+Count operations for lines in the
+.Dq exclusive
+state.
+.It Li invalid
+Count operations for lines in the
+.Dq invalid
+state.
+.It Li modified
+Count operations for lines in the
+.Dq modified
+state.
+.It Li owner
+Count operations for lines in the
+.Dq owner
+state.
+.It Li shared
+Count operations for lines in the
+.Dq shared
+state.
+.El
+.Pp
+The default is to count operations for lines in all the
+above states.
+.It Li k8-fp-cycles-with-no-fpu-ops-retired
+.Pq Event 01H
+Count cycles when no FPU ops were retired.
+This event is supported in revision B and later CPUs.
+.It Li k8-fp-dispatched-fpu-fast-flag-ops
+.Pq Event 02H
+Count dispatched FPU ops that use the fast flag interface.
+This event is supported in revision B and later CPUs.
+.It Li k8-fp-dispatched-fpu-ops Op Li ,mask= Ns Ar qualifier
+.Pq Event 00H
+Count the number of dispatched FPU ops.
+This event is supported in revision B and later CPUs.
+This event may be further qualified using
+.Ar qualifier ,
+which is a
+.Ql +
+separated set of the following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li add-pipe-excluding-junk-ops
+Count add pipe ops excluding junk ops.
+.It Li add-pipe-junk-ops
+Count junk ops in the add pipe.
+.It Li multiply-pipe-excluding-junk-ops
+Count multiply pipe ops excluding junk ops.
+.It Li multiply-pipe-junk-ops
+Count junk ops in the multiply pipe.
+.It Li store-pipe-excluding-junk-ops
+Count store pipe ops excluding junk ops
+.It Li store-pipe-junk-ops
+Count junk ops in the store pipe.
+.El
+.Pp
+The default is to count all types of ops.
+.It Li k8-fr-decoder-empty
+.Pq Event D0H
+Count cycles when there was nothing to dispatch (i.e., the decoder
+was empty).
+.It Li k8-fr-dispatch-stall-for-segment-load
+.Pq Event D4H
+Count dispatch stalls for segment loads.
+.It Li k8-fr-dispatch-stall-for-serialization
+.Pq Event D3H
+Count dispatch stalls for serialization.
+.It Li k8-fr-dispatch-stall-from-branch-abort-to-retire
+.Pq Event D2H
+Count dispatch stalls from branch abort to retiral.
+.It Li k8-fr-dispatch-stall-when-fpu-is-full
+.Pq Event D7H
+Count dispatch stalls when the FPU is full.
+.It Li k8-fr-dispatch-stall-when-ls-is-full
+.Pq Event D8H
+Count dispatch stalls when the load/store unit is full.
+.It Li k8-fr-dispatch-stall-when-reorder-buffer-is-full
+.Pq Event D5H
+Count dispatch stalls when the reorder buffer is full.
+.It Li k8-fr-dispatch-stall-when-reservation-stations-are-full
+.Pq Event D6H
+Count dispatch stalls when reservation stations are full.
+.It Li k8-fr-dispatch-stall-when-waiting-far-xfer-or-resync-branch-pending
+.Pq Event DAH
+Count dispatch stalls when a far control transfer or a resync branch
+is pending.
+.It Li k8-fr-dispatch-stall-when-waiting-for-all-to-be-quiet
+.Pq Event D9H
+Count dispatch stalls when waiting for all to be quiet.
+.\" XXX What does "waiting for all to be quiet" mean?
+.It Li k8-fr-dispatch-stalls
+.Pq Event D1H
+Count all dispatch stalls.
+.It Li k8-fr-fpu-exceptions Op Li ,mask= Ns Ar qualifier
+.Pq Event DBH
+Count FPU exceptions.
+This event is supported in revision B and later CPUs.
+This event may be further qualified using
+.Ar qualifier ,
+which is a
+.Ql +
+separated set of the following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li sse-and-x87-microtraps
+Count SSE and x87 microtraps.
+.It Li sse-reclass-microfaults
+Count SSE reclass microfaults
+.It Li sse-retype-microfaults
+Count SSE retype microfaults
+.It Li x87-reclass-microfaults
+Count x87 reclass microfaults.
+.El
+.Pp
+The default is to count all types of exceptions.
+.It Li k8-fr-interrupts-masked-cycles
+.Pq Event CDH
+Count cycles when interrupts were masked (by CPU RFLAGS field IF was zero).
+.It Li k8-fr-interrupts-masked-while-pending-cycles
+.Pq Event CEH
+Count cycles while interrupts were masked while pending (i.e., cycles
+when INTR was asserted while CPU RFLAGS field IF was zero).
+.It Li k8-fr-number-of-breakpoints-for-dr0
+.Pq Event DCH
+Count the number of breakpoints for DR0.
+.It Li k8-fr-number-of-breakpoints-for-dr1
+.Pq Event DDH
+Count the number of breakpoints for DR1.
+.It Li k8-fr-number-of-breakpoints-for-dr2
+.Pq Event DEH
+Count the number of breakpoints for DR2.
+.It Li k8-fr-number-of-breakpoints-for-dr3
+.Pq Event DFH
+Count the number of breakpoints for DR3.
+.It Li k8-fr-retired-branches
+.Pq Event C2H
+Count retired branches including exceptions and interrupts.
+.It Li k8-fr-retired-branches-mispredicted
+.Pq Event C3H
+Count mispredicted retired branches.
+.It Li k8-fr-retired-far-control-transfers
+.Pq Event C6H
+Count retired far control transfers (which are always mispredicted).
+.It Li k8-fr-retired-fastpath-double-op-instructions Op Li ,mask= Ns Ar qualifier
+.Pq Event CCH
+Count retired fastpath double op instructions.
+This event is supported in revision B and later CPUs.
+This event may be further qualified using
+.Ar qualifier ,
+which is a
+.Ql +
+separated set of the following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li low-op-pos-0
+Count instructions with the low op in position 0.
+.It Li low-op-pos-1
+Count instructions with the low op in position 1.
+.It Li low-op-pos-2
+Count instructions with the low op in position 2.
+.El
+.Pp
+The default is to count all types of instructions.
+.It Li k8-fr-retired-fpu-instructions Op Li ,mask= Ns Ar qualifier
+.Pq Event CBH
+Count retired FPU instructions.
+This event is supported in revision B and later CPUs.
+This event may be further qualified using
+.Ar qualifier ,
+which is a
+.Ql +
+separated set of the following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li mmx-3dnow
+Count MMX and 3DNow!\& instructions.
+.It Li packed-sse-sse2
+Count packed SSE and SSE2 instructions.
+.It Li scalar-sse-sse2
+Count scalar SSE and SSE2 instructions
+.It Li x87
+Count x87 instructions.
+.El
+.Pp
+The default is to count all types of instructions.
+.It Li k8-fr-retired-near-returns
+.Pq Event C8H
+Count retired near returns.
+.It Li k8-fr-retired-near-returns-mispredicted
+.Pq Event C9H
+Count mispredicted near returns.
+.It Li k8-fr-retired-resyncs
+.Pq Event C7H
+Count retired resyncs (non-control transfer branches).
+.It Li k8-fr-retired-taken-branches
+.Pq Event C4H
+Count retired taken branches.
+.It Li k8-fr-retired-taken-branches-mispredicted
+.Pq Event C5H
+Count retired taken branches that were mispredicted.
+.It Li k8-fr-retired-taken-branches-mispredicted-by-addr-miscompare
+.Pq Event CAH
+Count retired taken branches that were mispredicted only due to an
+address miscompare.
+.It Li k8-fr-retired-taken-hardware-interrupts
+.Pq Event CFH
+Count retired taken hardware interrupts.
+.It Li k8-fr-retired-uops
+.Pq Event C1H
+Count retired uops.
+.It Li k8-fr-retired-x86-instructions
+.Pq Event C0H
+Count retired x86 instructions including exceptions and interrupts.
+.It Li k8-ic-fetch
+.Pq Event 80H
+Count instruction cache fetches.
+.It Li k8-ic-instruction-fetch-stall
+.Pq Event 87H
+Count cycles in stalls due to instruction fetch.
+.It Li k8-ic-l1-itlb-miss-and-l2-itlb-hit
+.Pq Event 84H
+Count L1 ITLB misses that are L2 ITLB hits.
+.It Li k8-ic-l1-itlb-miss-and-l2-itlb-miss
+.Pq Event 85H
+Count ITLB misses that miss in both L1 and L2 ITLBs.
+.It Li k8-ic-microarchitectural-resync-by-snoop
+.Pq Event 86H
+Count microarchitectural resyncs caused by snoops.
+.It Li k8-ic-miss
+.Pq Event 81H
+Count instruction cache misses.
+.It Li k8-ic-refill-from-l2
+.Pq Event 82H
+Count instruction cache refills from L2 cache.
+.It Li k8-ic-refill-from-system
+.Pq Event 83H
+Count instruction cache refills from system memory.
+.It Li k8-ic-return-stack-hits
+.Pq Event 88H
+Count hits to the return stack.
+.It Li k8-ic-return-stack-overflow
+.Pq Event 89H
+Count overflows of the return stack.
+.It Li k8-ls-buffer2-full
+.Pq Event 23H
+Count load/store buffer2 full events.
+.It Li k8-ls-locked-operation Op Li ,mask= Ns Ar qualifier
+.Pq Event 24H
+Count locked operations.
+For revision C and later CPUs, the following qualifiers are supported:
+.Pp
+.Bl -tag -width indent -compact
+.It Li cycles-in-request
+Count the number of cycles in the lock request/grant stage.
+.It Li cycles-to-complete
+Count the number of cycles a lock takes to complete once it is
+non-speculative and is the older load/store operation.
+.It Li locked-instructions
+Count the number of lock instructions executed.
+.El
+.Pp
+The default is to count the number of lock instructions executed.
+.It Li k8-ls-microarchitectural-late-cancel
+.Pq Event 25H
+Count microarchitectural late cancels of operations in the load/store
+unit.
+.It Li k8-ls-microarchitectural-resync-by-self-modifying-code
+.Pq Event 21H
+Count microarchitectural resyncs caused by self-modifying code.
+.It Li k8-ls-microarchitectural-resync-by-snoop
+.Pq Event 22H
+Count microarchitectural resyncs caused by snoops.
+.It Li k8-ls-retired-cflush-instructions
+.Pq Event 26H
+Count retired CFLUSH instructions.
+.It Li k8-ls-retired-cpuid-instructions
+.Pq Event 27H
+Count retired CPUID instructions.
+.It Li k8-ls-segment-register-load Op Li ,mask= Ns Ar qualifier
+.Pq Event 20H
+Count segment register loads.
+This event may be further qualified using
+.Ar qualifier ,
+which is a
+.Ql +
+separated set of the following keywords:
+.Bl -tag -width indent -compact
+.It Li cs
+Count CS register loads.
+.It Li ds
+Count DS register loads.
+.It Li es
+Count ES register loads.
+.It Li fs
+Count FS register loads.
+.It Li gs
+Count GS register loads.
+.\" .It Li hs
+.\" Count HS register loads.
+.\" XXX "HS" register?
+.It Li ss
+Count SS register loads.
+.El
+.Pp
+The default is to count all types of loads.
+.It Li k8-nb-ht-bus0-bandwidth Op Li ,mask= Ns Ar qualifier
+.It Li k8-nb-ht-bus1-bandwidth Op Li ,mask= Ns Ar qualifier
+.It Li k8-nb-ht-bus2-bandwidth Op Li ,mask= Ns Ar qualifier
+.Pq Events F6H, F7H and F8H respectively
+Count events on the HyperTransport(tm) buses.
+These events may be further qualified using
+.Ar qualifier ,
+which is a
+.Ql +
+separated set of the following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li buffer-release
+Count buffer release messages sent.
+.It Li command
+Count command messages sent.
+.It Li data
+Count data messages sent.
+.It Li nop
+Count nop messages sent.
+.El
+.Pp
+The default is to count all types of messages.
+.It Li k8-nb-memory-controller-bypass-saturation Op Li ,mask= Ns Ar qualifier
+.Pq Event E4H
+Count memory controller bypass counter saturation events.
+This event may be further qualified using
+.Ar qualifier ,
+which is a
+.Ql +
+separated set of the following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li dram-controller-interface-bypass
+Count DRAM controller interface bypass.
+.It Li dram-controller-queue-bypass
+Count DRAM controller queue bypass.
+.It Li memory-controller-hi-pri-bypass
+Count memory controller high priority bypasses.
+.It Li memory-controller-lo-pri-bypass
+Count memory controller low priority bypasses.
+.El
+.Pp
+.It Li k8-nb-memory-controller-dram-slots-missed
+.Pq Event E2H
+Count memory controller DRAM command slots missed (in MemClks).
+.It Li k8-nb-memory-controller-page-access-event Op Li ,mask= Ns Ar qualifier
+.Pq Event E0H
+Count memory controller page access events.
+This event may be further qualified using
+.Ar qualifier ,
+which is a
+.Ql +
+separated set of the following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li page-conflict
+Count page conflicts.
+.It Li page-hit
+Count page hits.
+.It Li page-miss
+Count page misses.
+.El
+.Pp
+The default is to count all types of events.
+.It Li k8-nb-memory-controller-page-table-overflow
+.Pq Event E1H
+Count memory control page table overflow events.
+.It Li k8-nb-memory-controller-turnaround Op Li ,mask= Ns Ar qualifier
+.Pq Event E3H
+Count memory control turnaround events.
+This event may be further qualified using
+.Ar qualifier ,
+which is a
+.Ql +
+separated set of the following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.\" XXX doc is unclear whether these are cycle counts or event counts
+.It Li dimm-turnaround
+Count DIMM turnarounds.
+.It Li read-to-write-turnaround
+Count read to write turnarounds.
+.It Li write-to-read-turnaround
+Count write to read turnarounds.
+.El
+.Pp
+The default is to count all types of events.
+.It Li k8-nb-probe-result Op Li ,mask= Ns Ar qualifier
+.Pq Event ECH
+Count probe events.
+This event may be further qualified using
+.Ar qualifier ,
+which is a
+.Ql +
+separated set of the following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li probe-hit
+Count all probe hits.
+.It Li probe-hit-dirty-no-memory-cancel
+Count probe hits without memory cancels.
+.It Li probe-hit-dirty-with-memory-cancel
+Count probe hits with memory cancels.
+.It Li probe-miss
+Count probe misses.
+.El
+.It Li k8-nb-sized-commands Op Li ,mask= Ns Ar qualifier
+.Pq Event EBH
+Count sized commands issued.
+This event may be further qualified using
+.Ar qualifier ,
+which is a
+.Ql +
+separated set of the following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li nonpostwrszbyte
+.It Li nonpostwrszdword
+.It Li postwrszbyte
+.It Li postwrszdword
+.It Li rdszbyte
+.It Li rdszdword
+.It Li rdmodwr
+.El
+.Pp
+The default is to count all types of commands.
+.El
+.Ss Event Name Aliases
+The following table shows the mapping between the PMC-independent
+aliases supported by
+.Lb libpmc
+and the underlying hardware events used.
+.Bl -column "branch-mispredicts" "Description"
+.It Em Alias Ta Em Event
+.It Li branches Ta Li k8-fr-retired-taken-branches
+.It Li branch-mispredicts Ta Li k8-fr-retired-taken-branches-mispredicted
+.It Li dc-misses Ta Li k8-dc-miss
+.It Li ic-misses Ta Li k8-ic-miss
+.It Li instructions Ta Li k8-fr-retired-x86-instructions
+.It Li interrupts Ta Li k8-fr-taken-hardware-interrupts
+.It Li unhalted-cycles Ta Li k8-bu-cpu-clk-unhalted
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.core2 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
diff --git a/lib/libpmc/pmc.mips.3 b/lib/libpmc/pmc.mips.3
new file mode 100644
index 0000000..194838e
--- /dev/null
+++ b/lib/libpmc/pmc.mips.3
@@ -0,0 +1,410 @@
+.\" Copyright (c) 2010 George Neville-Neil.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall George Neville-Neil be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd February 11, 2010
+.Os
+.Dt PMC.MIPS 3
+.Sh NAME
+.Nm pmc.mips
+.Nd measurement events for
+.Tn MIPS
+family CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+MIPS PMCs are present in MIPS
+.Tn "24k"
+and other processors in the MIPS family.
+.Pp
+There are two counters supported by the hardware and each is 32 bits
+wide.
+.Pp
+MIPS PMCs are documented in
+.Rs
+.%B "MIPS32 24K Processor Core Family Software User's Manual"
+.%D December 2008
+.%Q "MIPS Technologies Inc."
+.Re
+.Ss Event Specifiers (Programmable PMCs)
+MIPS programmable PMCs support the following events:
+.Bl -tag -width indent
+.It Li CYCLE
+.Pq Event 0, Counter 0/1
+Total number of cycles. 
+The performance counters are clocked by the
+top-level gated clock. 
+If the core is built with that clock gater
+present, none of the counters will increment while the clock is
+stopped - due to a WAIT instruction.
+.It Li INSTR_EXECUTED
+.Pq Event 1, Counter 0/1
+Total number of instructions completed.
+.It Li BRANCH_COMPLETED 
+.Pq Event 2, Counter 0
+Total number of branch instructions completed.
+.It Li BRANCH_MISPRED
+.Pq Event 2, Counter 1
+Counts all branch instructions which completed, but were mispredicted.
+.It Li RETURN
+.Pq Event 3, Counter 0
+Counts all JR R31 instructions completed.
+.It Li RETURN_MISPRED
+.Pq Event 3, Counter 1
+Counts all JR $31 instructions which completed, used the RPS for a prediction, but were mispredicted.
+.It Li RETURN_NOT_31
+.Pq Event 4, Counter 0
+Counts all JR $xx (not $31) and JALR instructions (indirect jumps).
+.It Li RETURN_NOTPRED
+.Pq Event 4, Counter 1
+If RPS use is disabled, JR $31 will not be predicted.
+.It Li ITLB_ACCESS
+.Pq Event 5, Counter 0
+Counts ITLB accesses that are due to fetches showing up in the
+instruction fetch stage of the pipeline and which do not use a fixed
+mapping or are not in unmapped space. 
+If an address is fetched twice from the pipe (as in the case of a
+cache miss), that instruction willcount as 2 ITLB accesses. 
+Since each fetch gets us 2 instructions,there is one access marked per double
+word.
+.It Li ITLB_MISS
+.Pq Event 5, Counter 1
+Counts all misses in the ITLB except ones that are on the back of another
+miss.
+We cannot process back to back misses and thus those are
+ignored.
+They are also ignored if there is some form of address error.
+.It Li DTLB_ACCESS
+.Pq Event 6, Counter 0
+Counts DTLB access including those in unmapped address spaces.
+.It Li DTLB_MISS
+.Pq Event 6, Counter 1
+Counts DTLB misses. Back to back misses that result in only one DTLB
+entry getting refilled are counted as a single miss.
+.It Li JTLB_IACCESS
+.Pq Event 7, Counter 0
+Instruction JTLB accesses are counted exactly the same as ITLB misses.
+.It Li JTLB_IMISS
+.Pq Event 7, Counter 1
+Counts instruction JTLB accesses that result in no match or a match on
+an invalid translation.
+.It Li JTLB_DACCESS
+.Pq Event 8, Counter 0
+Data JTLB accesses.
+.It Li JTLB_DMISS
+.Pq Event 8, Counter 1
+Counts data JTLB accesses that result in no match or a match on an invalid translation.
+.It Li IC_FETCH
+.Pq Event 9, Counter 0
+Counts every time the instruction cache is accessed. All replays,
+wasted fetches etc. are counted.
+For example, following a branch, even though the prediction is taken,
+the fall through access is counted.
+
+.It Li IC_MISS
+.Pq Event 9, Counter 1
+Counts all instruction cache misses that result in a bus request.
+.It Li DC_LOADSTORE
+.Pq Event 10, Counter 0
+Counts cached loads and stores.
+.It Li DC_WRITEBACK
+.Pq Event 10, Counter 1
+Counts cache lines written back to memory due to replacement or cacheops.
+.It Li DC_MISS
+.Pq Event 11,   Counter 0/1
+Counts loads and stores that miss in the cache
+.It Li LOAD_MISS
+.Pq Event 13, Counter 0
+Counts number of cacheable loads that miss in the cache.
+.It Li STORE_MISS
+.Pq Event 13, Counter 1
+Counts number of cacheable stores that miss in the cache.
+.It Li INTEGER_COMPLETED
+.Pq Event 14, Counter 0
+Non-floating point, non-Coprocessor 2 instructions.
+.It Li FP_COMPLETED
+.Pq Event 14, Counter 1
+Floating point instructions completed.
+.It Li LOAD_COMPLETED
+.Pq Event 15, Counter 0
+Integer and co-processor loads completed.
+.It Li STORE_COMPLETED
+.Pq Event 15, Counter 1
+Integer and co-processor stores completed.
+.It Li BARRIER_COMPLETED
+.Pq Event 16, Counter 0
+Direct jump (and link) instructions completed.
+.It Li MIPS16_COMPLETED
+.Pq Event 16, Counter 1
+MIPS16c instructions completed.
+.It Li NOP_COMPLETED
+.Pq Event 17, Counter 0
+NOPs completed.
+This includes all instructions that normally write to a general
+purpose register, but where the destination register was set to r0.
+.It Li INTEGER_MULDIV_COMPLETED
+.Pq Event 17, Counter 1
+Integer multipy and divide instructions completed.  (MULxx, DIVx, MADDx, MSUBx).
+.It Li RF_STALL
+.Pq Event 18, Counter 0
+Counts the total number of cycles where no instructions are issued
+from the IFU to ALU (the RF stage does not advance) which includes
+both of the previous two events.
+The RT_STALL is different than the sum of them though because cycles
+when both stalls are active will only be counted once.
+.It Li INSTR_REFETCH
+.Pq Event 18, Counter 1
+replay traps (other than uTLB)
+.It Li STORE_COND_COMPLETED
+.Pq Event 19, Counter 0
+Conditional stores completed.  Counts all events, including failed stores.
+.It Li STORE_COND_FAILED
+.Pq Event 19, Counter 1
+Conditional store instruction that did not update memory.
+Note: While this event and the SC instruction count event can be configured to
+count in specific operating modes, the timing of the events is much
+different and the observed operating mode could change between them,
+causing some inaccuracy in the measured ratio.
+.It Li ICACHE_REQUESTS
+.Pq Event 20, Counter 0
+Note that this only counts PREFs that are actually attempted. 
+PREFs to uncached addresses or ones with translation errors are not counted
+.It Li ICACHE_HIT
+.Pq Event 20, Counter 1
+Counts PREF instructions that hit in the cache
+.It Li L2_WRITEBACK
+.Pq Event 21, Counter 0
+Counts cache lines written back to memory due to replacement or cacheops.
+.It Li L2_ACCESS
+.Pq Event 21, Counter 1
+Number of accesses to L2 Cache.
+.It Li L2_MISS
+.Pq Event 22, Counter 0
+Number of accesses that missed in the L2 cache.
+.It Li L2_ERR_CORRECTED
+.Pq Event 22, Counter 1
+Single bit errors in L2 Cache that were detected and corrected.
+.It Li EXCEPTIONS
+.Pq Event 23, Counter 0
+Any type of exception taken.
+.It Li RF_CYCLES_STALLED
+.Pq Event 24, Counter 0
+Counts cycles where the LSU is in fixup and cannot accept a new
+instruction from the ALU.
+Fixups are replays within the LSU that occur when an instruction needs
+to re-access the cache or the DTLB. 
+.It Li IFU_CYCLES_STALLED
+.Pq Event 25, Counter 0
+Counts the number of cycles where the fetch unit is not providing a
+valid instruction to the ALU.
+.It Li ALU_CYCLES_STALLED
+.Pq Event 25, Counter 1
+Counts the number of cycles where the ALU pipeline cannot advance.
+.It Li UNCACHED_LOAD
+.Pq Event 33, Counter 0
+Counts uncached and uncached accelerated loads.
+.It Li UNCACHED_STORE
+.Pq Event 33, Counter 1
+Counts uncached and uncached accelerated stores.
+.It Li CP2_REG_TO_REG_COMPLETED
+.Pq Event 35, Counter 0
+Co-processor 2 register to register instructions completed.
+.It Li MFTC_COMPLETED
+.Pq Event 35, Counter 1
+Co-processor 2 move to and from instructions as well as loads and stores.
+.It Li IC_BLOCKED_CYCLES
+.Pq Event 37, Counter 0
+Cycles when IFU stalls because an instruction miss caused the IFU not
+to have any runnable instructions.
+Ignores the stalls due to ITLB misses as well as the 4 cycles
+following a redirect.
+.It Li DC_BLOCKED_CYCLES
+.Pq Event 37, Counter 1
+Counts all cycles where integer pipeline waits on Load return data due
+to a D-cache miss.
+The LSU can signal a "long stall" on a D-cache misses, in which case
+the waiting TC might be rescheduled so other TCs can execute
+instructions till the data returns.
+.It Li L2_IMISS_STALL_CYCLES
+.Pq Event 38, Counter 0
+Cycles where the main pipeline is stalled waiting for a SYNC to complete.
+.It Li L2_DMISS_STALL_CYCLES
+.Pq Event 38, Counter 1
+Cycles where the main pipeline is stalled because of an index conflict
+in the Fill Store Buffer.
+.It Li DMISS_CYCLES
+.Pq Event 39, Counter 0
+Data miss is outstanding, but not necessarily stalling the pipeline. 
+The difference between this and D$ miss stall cycles can show the gain
+from non-blocking cache misses.
+.It Li L2_MISS_CYCLES
+.Pq Event 39, Counter 1
+L2 miss is outstanding, but not necessarily stalling the pipeline.
+.It Li UNCACHED_BLOCK_CYCLES
+.Pq Event 40, Counter 0
+Cycles where the processor is stalled on an uncached fetch, load, or store.
+.It Li MDU_STALL_CYCLES
+.Pq Event 41, Counter 0
+Cycles where the processor is stalled on an uncached fetch, load, or store.
+.It Li FPU_STALL_CYCLES
+.Pq Event 41, Counter 1
+Counts all cycles where integer pipeline waits on FPU return data.
+.It Li CP2_STALL_CYCLES
+.Pq Event 42, Counter 0
+Counts all cycles where integer pipeline waits on CP2 return data.
+.It Li COREXTEND_STALL_CYCLES
+.Pq Event 42, Counter 1
+Counts all cycles where integer pipeline waits on CorExtend return data.
+.It Li ISPRAM_STALL_CYCLES
+.Pq Event 43, Counter 0
+Count all pipeline bubbles that are a result of multicycle ISPRAM
+access.
+Pipeline bubbles are defined as all cycles that IFU doesn't present an
+instruction to ALU. The four cycles after a redirect are not counted.
+.It Li DSPRAM_STALL_CYCLES
+.Pq Event 43, Counter 1
+Counts stall cycles created by an instruction waiting for access to DSPRAM.
+.It Li CACHE_STALL_CYCLES
+.Pq Event 44, Counter 0
+Counts all cycles the where pipeline is stalled due to CACHE
+instructions.
+Includes cycles where CACHE instructions themselves are
+stalled in the ALU, and cycles where CACHE instructions cause
+subsequent instructions to be stalled.
+.It Li LOAD_TO_USE_STALLS
+.Pq Event 45, Counter 0
+Counts all cycles where integer pipeline waits on Load return data.
+.It Li BASE_MISPRED_STALLS
+.Pq Event 45, Counter 1
+Counts stall cycles due to skewed ALU where the bypass to the address
+generation takes an extra cycle.
+.It Li CPO_READ_STALLS
+.Pq Event 46, Counter 0
+Counts all cycles where integer pipeline waits on return data from
+MFC0, RDHWR instructions.
+.It Li BRANCH_MISPRED_CYCLES
+.Pq Event 46, Counter 1
+This counts the number of cycles from a mispredicted branch until the
+next non-delay slot instruction executes.
+.It Li IFETCH_BUFFER_FULL
+.Pq Event 48, Counter 0
+Counts the number of times an instruction cache miss was detected, but
+both fill buffers were already allocated.
+.It Li FETCH_BUFFER_ALLOCATED
+.Pq Event 48, Counter 1
+Number of cycles where at least one of the IFU fill buffers is
+allocated (miss pending).
+.It Li EJTAG_ITRIGGER
+.Pq Event 49, Counter 0
+Number of times an EJTAG Instruction Trigger Point condition matched.
+.It Li EJTAG_DTRIGGER
+.Pq Event 49, Counter 1
+Number of times an EJTAG Data Trigger Point condition matched.
+.It Li FSB_LT_QUARTER
+.Pq Event 50, Counter 0
+Fill store buffer less than one quarter full.
+.It Li FSB_QUARTER_TO_HALF
+.Pq Event 50, Counter 1
+Fill store buffer between one quarter and one half full.
+.It Li FSB_GT_HALF
+.Pq Event 51, Counter 0
+Fill store buffer more than half full.
+.It Li FSB_FULL_PIPELINE_STALLS
+.Pq Event 51, Counter 1
+Cycles where the pipeline is stalled because the Fill-Store Buffer in LSU is full.
+.It Li LDQ_LT_QUARTER
+.Pq Event 52, Counter 0
+Load data queue less than one quarter full.
+.It Li LDQ_QUARTER_TO_HALF
+.Pq Event 52, Counter 1
+Load data queue between one quarter and one half full.
+.It Li LDQ_GT_HALF
+.Pq Event 53, Counter 0
+Load data queue more than one half full.
+.It Li LDQ_FULL_PIPELINE_STALLS
+.Pq Event 53, Counter 1
+Cycles where the pipeline is stalled because the Load Data Queue in the LSU is full.
+.It Li WBB_LT_QUARTER
+.Pq Event 54, Counter 0
+Write back buffer less than one quarter full.
+.It Li WBB_QUARTER_TO_HALF
+.Pq Event 54, Counter 1
+Write back buffer between one quarter and one half full.
+.It Li WBB_GT_HALF
+.Pq Event 55, Counter 0
+Write back buffer more than one half full.
+.It Li WBB_FULL_PIPELINE_STALLS
+.Pq Event 55 Counter 1
+Cycles where the pipeline is stalled because the Load Data Queue in the LSU is full.
+.It Li REQUEST_LATENCY
+.Pq Event 61, Counter 0
+Measures latency from miss detection until critical dword of response
+is returned, Only counts for cacheable reads.
+.It Li REQUEST_COUNT
+.Pq Event 61, Counter 1
+Counts number of cacheable read requests used for previous latency counter.
+.El
+.Ss Event Name Aliases
+The following table shows the mapping between the PMC-independent
+aliases supported by
+.Lb libpmc
+and the underlying hardware events used.
+.Bl -column "branch-mispredicts" "cpu_clk_unhalted.core_p"
+.It Em Alias Ta Em Event Ta 
+.It Li instructions Ta Li INSTR_EXECUTED Ta
+.It Li branches Ta Li BRANCH_COMPLETED Ta 
+.It Li branch-mispredicts Ta Li BRANCH_MISPRED Ta 
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
+MIPS support was added by
+.An "George Neville-Neil"
+.Aq gnn@FreeBSD.org .
+.Sh CAVEATS
+The MIPS code does not yet support sampling.
diff --git a/lib/libpmc/pmc.p4.3 b/lib/libpmc/pmc.p4.3
new file mode 100644
index 0000000..e13fa6e
--- /dev/null
+++ b/lib/libpmc/pmc.p4.3
@@ -0,0 +1,1226 @@
+.\" Copyright (c) 2003-2008 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd October 4, 2008
+.Dt PMC.P4 3
+.Os
+.Sh NAME
+.Nm pmc.p4
+.Nd measurement events for
+.Tn "Intel Pentium 4"
+and other
+.Tn Netburst
+architecture CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+Intel P4 PMCs are present in Intel
+.Tn "Pentium 4"
+and
+.Tn Xeon
+processors that use the
+.Tn Netburst
+CPU architecture.
+.Pp
+These PMCs are documented in
+.Rs
+.%B "IA-32 Intel(R) Architecture Software Developer's Manual"
+.%T "Volume 3: System Programming Guide"
+.%N "Order Number 245472-012"
+.%D 2003
+.%Q "Intel Corporation"
+.Re
+Further information about using these PMCs may be found in
+.Rs
+.%B "IA-32 Intel(R) Architecture Optimization Guide"
+.%D 2003
+.%N "Order Number 248966-009"
+.%Q "Intel Corporation"
+.Re
+Some of these events are affected by processor errata described in
+.Rs
+.%B "Intel(R) Pentium(R) 4 Processor Specification Update"
+.%N "Document Number: 249199-059"
+.%D "April 2005"
+.%Q "Intel Corporation"
+.Re
+.Ss PMC Features
+Intel Pentium 4 PMCs are 40 bits wide.
+Each CPU contains 18 PMCs, divided into 4 groups with 4, 4, 4 and 6
+PMCs respectively.
+On processors with hyperthreading support, PMC resources are shared
+between logical processors.
+These PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta Yes
+.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_INTERRUPT Ta Yes
+.It PMC_CAP_INVERT Ta Yes
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta Unimplemented
+.It PMC_CAP_SYSTEM Ta Yes
+.It PMC_CAP_TAGGING Ta Yes
+.It PMC_CAP_THRESHOLD Ta Yes
+.It PMC_CAP_USER Ta Yes
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+.Pp
+Event specifiers for Intel P4 PMCs can have the following common
+qualifiers:
+.Bl -tag -width indent
+.It Li active= Ns Ar choice
+(On P4 HTT CPUs) Filter event counting based on which logical
+processors are active.
+The allowed values of
+.Ar choice
+are:
+.Pp
+.Bl -tag -width indent -compact
+.It Li any
+Count when either logical processor is active.
+.It Li both
+Count when both logical processors are active.
+.It Li none
+Count only when neither logical processor is active.
+.It Li single
+Count only when one logical processor is active.
+.El
+.Pp
+The default is
+.Dq Li both .
+.It Li cascade
+Configure the PMC to cascade onto its partner.
+See
+.Sx "Cascading P4 PMCs"
+below for more information.
+.It Li edge
+Configure the counter to count false to true transitions of the threshold
+comparison output.
+This qualifier only takes effect if a threshold qualifier has also been
+specified.
+.It Li complement
+Configure the counter to increment only when the event count seen is
+less than the threshold qualifier value specified.
+.It Li mask= Ns Ar qualifier
+Many event specifiers for Intel P4 PMCs need to be additionally
+qualified using a mask qualifier.
+The allowed syntax for these qualifiers is event specific and is
+described along with the events.
+.It Li os
+Configure the PMC to count when the CPL of the processor is 0.
+.It Li precise
+Select precise event based sampling.
+Precise sampling is supported by the hardware for a limited set of
+events.
+.It Li tag= Ns Ar value
+Configure the PMC to tag the internal uop selected by the other
+fields in this event specifier with value
+.Ar value .
+This feature is used when cascading PMCs.
+.It Li threshold= Ns Ar value
+Configure the PMC to increment only when the event counts seen are
+greater than the specified threshold value
+.Ar value .
+.It Li usr
+Configure the PMC to count when the CPL of the processor is 1, 2 or 3.
+.El
+.Pp
+If neither of the
+.Dq Li os
+or
+.Dq Li usr
+qualifiers are specified, the default is to enable both.
+.Pp
+On Intel Pentium 4 processors with HTT, events are
+divided into two classes:
+.Pp
+.Bl -tag -width indent -compact
+.It "TS Events"
+are those where hardware can differentiate between events
+generated on one logical processor from those generated on the
+other.
+.It "TI Events"
+are those where hardware cannot differentiate between events
+generated by multiple logical processors in a package.
+.El
+.Pp
+Only TS events are allowed for use with process-mode PMCs on
+Pentium-4/HTT CPUs.
+.Pp
+The event specifiers supported by Intel P4 PMCs are:
+.Pp
+.Bl -tag -width indent
+.It Li p4-128bit-mmx-uop Op Li ,mask= Ns Ar flags
+.Pq "TI event"
+Count integer SIMD SSE2 instructions that operate on 128 bit SIMD
+operands.
+Qualifier
+.Ar flags
+can take the following value (which is also the default):
+.Pp
+.Bl -tag -width indent -compact
+.It Li all
+Count all uops operating on 128 bit SIMD integer operands in memory or
+XMM register.
+.El
+.Pp
+If an instruction contains more than one 128 bit MMX uop, then each
+uop will be counted.
+.It Li p4-64bit-mmx-uop Op Li ,mask= Ns Ar flags
+.Pq "TI event"
+Count MMX instructions that operate on 64 bit SIMD operands.
+Qualifier
+.Ar flags
+can take the following value (which is also the default):
+.Pp
+.Bl -tag -width indent -compact
+.It Li all
+Count all uops operating on 64 bit SIMD integer operands in memory or
+in MMX registers.
+.El
+.Pp
+If an instruction contains more than one 64 bit MMX uop, then each
+uop will be counted.
+.It Li p4-b2b-cycles
+.Pq "TI event"
+Count back-to-back bus cycles.
+Further documentation for this event is unavailable.
+.It Li p4-bnr
+.Pq "TI event"
+Count bus-not-ready conditions.
+Further documentation for this event is unavailable.
+.It Li p4-bpu-fetch-request Op Li ,mask= Ns Ar qualifier
+.Pq "TS event"
+Count instruction fetch requests qualified by additional
+flags specified in
+.Ar qualifier .
+At this point only one flag is supported:
+.Pp
+.Bl -tag -width indent -compact
+.It Li tcmiss
+Count trace cache lookup misses.
+.El
+.Pp
+The default qualifier is also
+.Dq Li mask=tcmiss .
+.It Li p4-branch-retired Op Li ,mask= Ns Ar flags
+.Pq "TS event"
+Counts retired branches.
+Qualifier
+.Ar flags
+is a list of the following
+.Ql +
+separated strings:
+.Pp
+.Bl -tag -width indent -compact
+.It Li mmnp
+Count branches not-taken and predicted.
+.It Li mmnm
+Count branches not-taken and mis-predicted.
+.It Li mmtp
+Count branches taken and predicted.
+.It Li mmtm
+Count branches taken and mis-predicted.
+.El
+.Pp
+The default qualifier counts all four kinds of branches.
+.It Li p4-bsq-active-entries Op Li ,mask= Ns Ar qualifier
+.Pq "TS event"
+Count the number of entries (clipped at 15) currently active in the
+BSQ.
+Qualifier
+.Ar qualifier
+is a
+.Ql +
+separated set of the following flags:
+.Pp
+.Bl -tag -width indent -compact
+.It Li req-type0 , Li req-type1
+Forms a 2-bit number used to select the request type encoding:
+.Pp
+.Bl -tag -width indent -compact
+.It Li 0
+reads excluding read invalidate
+.It Li 1
+read invalidates
+.It Li 2
+writes other than writebacks
+.It Li 3
+writebacks
+.El
+.Pp
+Bit
+.Dq Li req-type1
+is the MSB for this two bit number.
+.It Li req-len0 , Li req-len1
+Forms a two-bit number that specifies the request length encoding:
+.Pp
+.Bl -tag -width indent -compact
+.It Li 0
+0 chunks
+.It Li 1
+1 chunk
+.It Li 3
+8 chunks
+.El
+.Pp
+Bit
+.Dq Li req-len1
+is the MSB for this two bit number.
+.It Li req-io-type
+Count requests that are input or output requests.
+.It Li req-lock-type
+Count requests that lock the bus.
+.It Li req-lock-cache
+Count requests that lock the cache.
+.It Li req-split-type
+Count requests that is a bus 8-byte chunk that is split across an
+8-byte boundary.
+.It Li req-dem-type
+Count requests that are demand (not prefetches) if set.
+Count requests that are prefetches if not set.
+.It Li req-ord-type
+Count requests that are ordered.
+.It Li mem-type0 , Li mem-type1 , Li mem-type2
+Forms a 3-bit number that specifies a memory type encoding:
+.Pp
+.Bl -tag -width indent -compact
+.It Li 0
+UC
+.It Li 1
+USWC
+.It Li 4
+WT
+.It Li 5
+WP
+.It Li 6
+WB
+.El
+.Pp
+Bit
+.Dq Li mem-type2
+is the MSB of this 3-bit number.
+.El
+.Pp
+The default qualifier has all the above bits set.
+.Pp
+Edge triggering using the
+.Dq Li edge
+qualifier should not be used with this event when counting cycles.
+.It Li p4-bsq-allocation Op Li ,mask= Ns Ar qualifier
+.Pq "TS event"
+Count allocations in the bus sequence unit according to the flags
+specified in
+.Ar qualifier ,
+which is a
+.Ql +
+separated set of the following flags:
+.Pp
+.Bl -tag -width indent -compact
+.It Li req-type0 , Li req-type1
+Forms a 2-bit number used to select the request type encoding:
+.Pp
+.Bl -tag -width indent -compact
+.It Li 0
+reads excluding read invalidate
+.It Li 1
+read invalidates
+.It Li 2
+writes other than writebacks
+.It Li 3
+writebacks
+.El
+.Pp
+Bit
+.Dq Li req-type1
+is the MSB for this two bit number.
+.It Li req-len0 , Li req-len1
+Forms a two-bit number that specifies the request length encoding:
+.Pp
+.Bl -tag -width indent -compact
+.It Li 0
+0 chunks
+.It Li 1
+1 chunk
+.It Li 3
+8 chunks
+.El
+.Pp
+Bit
+.Dq Li req-len1
+is the MSB for this two bit number.
+.It Li req-io-type
+Count requests that are input or output requests.
+.It Li req-lock-type
+Count requests that lock the bus.
+.It Li req-lock-cache
+Count requests that lock the cache.
+.It Li req-split-type
+Count requests that is a bus 8-byte chunk that is split across an
+8-byte boundary.
+.It Li req-dem-type
+Count requests that are demand (not prefetches) if set.
+Count requests that are prefetches if not set.
+.It Li req-ord-type
+Count requests that are ordered.
+.It Li mem-type0 , Li mem-type1 , Li mem-type2
+Forms a 3-bit number that specifies a memory type encoding:
+.Pp
+.Bl -tag -width indent -compact
+.It Li 0
+UC
+.It Li 1
+USWC
+.It Li 4
+WT
+.It Li 5
+WP
+.It Li 6
+WB
+.El
+.Pp
+Bit
+.Dq Li mem-type2
+is the MSB of this 3-bit number.
+.El
+.Pp
+The default qualifier has all the above bits set.
+.Pp
+This event is usually used along with the
+.Dq Li edge
+qualifier to avoid multiple counting.
+.It Li p4-bsq-cache-reference Op Li ,mask= Ns Ar qualifier
+.Pq "TS event"
+Count cache references as seen by the bus unit (2nd or 3rd level
+cache references).
+Qualifier
+.Ar qualifier
+is a
+.Ql +
+separated list of the following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li rd-2ndl-hits
+Count 2nd level cache hits in the shared state.
+.It Li rd-2ndl-hite
+Count 2nd level cache hits in the exclusive state.
+.It Li rd-2ndl-hitm
+Count 2nd level cache hits in the modified state.
+.It Li rd-3rdl-hits
+Count 3rd level cache hits in the shared state.
+.It Li rd-3rdl-hite
+Count 3rd level cache hits in the exclusive state.
+.It Li rd-3rdl-hitm
+Count 3rd level cache hits in the modified state.
+.It Li rd-2ndl-miss
+Count 2nd level cache misses.
+.It Li rd-3rdl-miss
+Count 3rd level cache misses.
+.It Li wr-2ndl-miss
+Count write-back lookups from the data access cache that miss the 2nd
+level cache.
+.El
+.Pp
+The default is to count all the above events.
+.It Li p4-execution-event Op Li ,mask= Ns Ar flags
+.Pq "TS event"
+Count the retirement of tagged uops selected through the execution
+tagging mechanism.
+Qualifier
+.Ar flags
+can contain the following strings separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li nbogus0 , Li nbogus1 , Li nbogus2 , Li nbogus3
+The marked uops are not bogus.
+.It Li bogus0 , Li bogus1 , Li bogus2 , Li bogus3
+The marked uops are bogus.
+.El
+.Pp
+This event requires additional (upstream) events to be allocated to
+perform the desired uop tagging.
+The default is to set all the above flags.
+This event can be used for precise event based sampling.
+.It Li p4-front-end-event Op Li ,mask= Ns Ar flags
+.Pq "TS event"
+Count the retirement of tagged uops selected through the front-end
+tagging mechanism.
+Qualifier
+.Ar flags
+can contain the following strings separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li nbogus
+The marked uops are not bogus.
+.It Li bogus
+The marked uops are bogus.
+.El
+.Pp
+This event requires additional (upstream) events to be allocated to
+perform the desired uop tagging.
+The default is to select both kinds of events.
+This event can be used for precise event based sampling.
+.It Li p4-fsb-data-activity Op Li ,mask= Ns Ar flags
+.Pq "TI event"
+Count each DBSY or DRDY event selected by qualifier
+.Ar flags .
+Qualifier
+.Ar flags
+is a
+.Ql +
+separated set of the following flags:
+.Pp
+.Bl -tag -width indent -compact
+.It Li drdy-drv
+Count when this processor is driving data onto the bus.
+.It Li drdy-own
+Count when this processor is reading data from the bus.
+.It Li drdy-other
+Count when data is on the bus but not being sampled by this processor.
+.It Li dbsy-drv
+Count when this processor reserves the bus for use in the next cycle
+in order to drive data.
+.It Li dbsy-own
+Count when some agent reserves the bus for use in the next bus cycle
+to drive data that this processor will sample.
+.It Li dbsy-other
+Count when some agent reserves the bus for use in the next bus cycle
+to drive data that this processor will not sample.
+.El
+.Pp
+Flags
+.Dq Li drdy-own
+and
+.Dq Li drdy-other
+are mutually exclusive.
+Flags
+.Dq Li dbsy-own
+and
+.Dq Li dbsy-other
+are mutually exclusive.
+The default value for
+.Ar qualifier
+is
+.Dq Li drdy-drv+drdy-own+dbsy-drv+dbsy-own .
+.It Li p4-global-power-events Op Li ,mask= Ns Ar flags
+.Pq "TS event"
+Count cycles during which the processor is not stopped.
+Qualifier
+.Ar flags
+can take the following value (which is also the default):
+.Pp
+.Bl -tag -width indent -compact
+.It Li running
+Count cycles when the processor is active.
+.El
+.Pp
+.It Li p4-instr-retired Op Li ,mask= Ns Ar flags
+.Pq "TS event"
+Count instructions retired during a clock cycle.
+Qualifier
+.Ar flags
+comprises of the following strings separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li nbogusntag
+Count non-bogus instructions that are not tagged.
+.It Li nbogustag
+Count non-bogus instructions that are tagged.
+.It Li bogusntag
+Count bogus instructions that are not tagged.
+.It Li bogustag
+Count bogus instructions that are tagged.
+.El
+.Pp
+The default qualifier counts all the above kinds of instructions.
+.It Li p4-ioq-active-entries Xo
+.Op Li ,mask= Ns Ar qualifier
+.Op Li ,busreqtype= Ns Ar req-type
+.Xc
+.Pq "TS event"
+Count the number of entries (clipped at 15) in the IOQ that are
+active.
+The event masks are specified by qualifier
+.Ar qualifier
+and
+.Ar req-type .
+.Pp
+Qualifier
+.Ar qualifier
+is a
+.Ql +
+separated set of the following flags:
+.Pp
+.Bl -tag -width indent -compact
+.It Li all-read
+Count read entries.
+.It Li all-write
+Count write entries.
+.It Li mem-uc
+Count entries accessing un-cacheable memory.
+.It Li mem-wc
+Count entries accessing write-combining memory.
+.It Li mem-wt
+Count entries accessing write-through memory.
+.It Li mem-wp
+Count entries accessing write-protected memory
+.It Li mem-wb
+Count entries accessing write-back memory.
+.It Li own
+Count store requests driven by the processor (i.e., not by other
+processors or by DMA).
+.It Li other
+Count store requests driven by other processors or by DMA.
+.It Li prefetch
+Include hardware and software prefetch requests in the count.
+.El
+.Pp
+The default value for
+.Ar qualifier
+is to enable all the above flags.
+.Pp
+The
+.Ar req-type
+qualifier is a 5-bit number can be additionally used to select a
+specific bus request type.
+The default is 0.
+.Pp
+The
+.Dq Li edge
+qualifier should not be used when counting cycles with this event.
+The exact behavior of this event depends on the processor revision.
+.It Li p4-ioq-allocation Xo
+.Op Li ,mask= Ns Ar qualifier
+.Op Li ,busreqtype= Ns Ar req-type
+.Xc
+.Pq "TS event"
+Count various types of transactions on the bus matching the flags set
+in
+.Ar qualifier
+and
+.Ar req-type .
+.Pp
+Qualifier
+.Ar qualifier
+is a
+.Ql +
+separated set of the following flags:
+.Pp
+.Bl -tag -width indent -compact
+.It Li all-read
+Count read entries.
+.It Li all-write
+Count write entries.
+.It Li mem-uc
+Count entries accessing un-cacheable memory.
+.It Li mem-wc
+Count entries accessing write-combining memory.
+.It Li mem-wt
+Count entries accessing write-through memory.
+.It Li mem-wp
+Count entries accessing write-protected memory
+.It Li mem-wb
+Count entries accessing write-back memory.
+.It Li own
+Count store requests driven by the processor (i.e., not by other
+processors or by DMA).
+.It Li other
+Count store requests driven by other processors or by DMA.
+.It Li prefetch
+Include hardware and software prefetch requests in the count.
+.El
+.Pp
+The default value for
+.Ar qualifier
+is to enable all the above flags.
+.Pp
+The
+.Ar req-type
+qualifier is a 5-bit number can be additionally used to select a
+specific bus request type.
+The default is 0.
+.Pp
+The
+.Dq Li edge
+qualifier is normally used with this event to prevent multiple
+counting.
+The exact behavior of this event depends on the processor revision.
+.It Li p4-itlb-reference Op mask= Ns Ar qualifier
+.Pq "TS event"
+Count translations using the instruction translation look-aside
+buffer.
+The
+.Ar qualifier
+argument is a list of the following strings separated by
+.Ql +
+characters.
+.Pp
+.Bl -tag -width indent -compact
+.It Li hit
+Count ITLB hits.
+.It Li miss
+Count ITLB misses.
+.It Li hit-uc
+Count un-cacheable ITLB hits.
+.El
+.Pp
+If no
+.Ar qualifier
+is specified the default is to count all the three kinds of ITLB
+translations.
+.It Li p4-load-port-replay Op Li ,mask= Ns Ar qualifier
+.Pq "TS event"
+Count replayed events at the load port.
+Qualifier
+.Ar qualifier
+can take on one value:
+.Pp
+.Bl -tag -width indent -compact
+.It Li split-ld
+Count split loads.
+.El
+.Pp
+The default value for
+.Ar qualifier
+is
+.Dq Li split-ld .
+.It Li p4-mispred-branch-retired Op Li ,mask= Ns Ar flags
+.Pq "TS event"
+Count mispredicted IA-32 branch instructions.
+Qualifier
+.Ar flags
+can take the following value (which is also the default):
+.Pp
+.Bl -tag -width indent -compact
+.It Li nbogus
+Count non-bogus retired branch instructions.
+.El
+.It Li p4-machine-clear Op Li ,mask= Ns Ar flags
+.Pq "TS event"
+Count the number of pipeline clears seen by the processor.
+Qualifier
+.Ar flags
+is a list of the following strings separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li clear
+Count for a portion of the many cycles when the machine is being
+cleared for any reason.
+.It Li moclear
+Count machine clears due to memory ordering issues.
+.It Li smclear
+Count machine clears due to self-modifying code.
+.El
+.Pp
+Use qualifier
+.Dq Li edge
+to get a count of occurrences of machine clears.
+The default qualifier is
+.Dq Li clear .
+.It Li p4-memory-cancel Op Li ,mask= Ns Ar event-list
+.Pq "TS event"
+Count the canceling of various kinds of requests in the data cache
+address control unit of the CPU.
+The qualifier
+.Ar event-list
+is a list of the following strings separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li st-rb-full
+Requests cancelled because no store request buffer was available.
+.It Li 64k-conf
+Requests that conflict due to 64K aliasing.
+.El
+.Pp
+If
+.Ar event-list
+is not specified, then the default is to count both kinds of events.
+.It Li p4-memory-complete Op Li ,mask= Ns Ar event-list
+.Pq "TS event"
+Count the completion of load split, store split, un-cacheable split and
+un-cacheable load operations selected by qualifier
+.Ar event-list .
+The qualifier
+.Ar event-list
+is a
+.Ql +
+separated list of the following flags:
+.Pp
+.Bl -tag -width indent -compact
+.It Li lsc
+Count load splits completed, excluding loads from un-cacheable or
+write-combining areas.
+.It Li ssc
+Count any split stores completed.
+.El
+.Pp
+The default is to count both kinds of operations.
+.It Li p4-mob-load-replay Op Li ,mask= Ns Ar qualifier
+.Pq "TS event"
+Count load replays triggered by the memory order buffer.
+Qualifier
+.Ar qualifier
+can be a
+.Ql +
+separated list of the following flags:
+.Pp
+.Bl -tag -width indent -compact
+.It Li no-sta
+Count replays because of unknown store addresses.
+.It Li no-std
+Count replays because of unknown store data.
+.It Li partial-data
+Count replays because of partially overlapped data accesses between
+load and store operations.
+.It Li unalgn-addr
+Count replays because of mismatches in the lower 4 bits of load and
+store operations.
+.El
+.Pp
+The default qualifier is
+.Ar no-sta+no-std+partial-data+unalgn-addr .
+.It Li p4-packed-dp-uop Op Li ,mask= Ns Ar flags
+.Pq "TI event"
+Count packed double-precision uops.
+Qualifier
+.Ar flags
+can take the following value (which is also the default):
+.Pp
+.Bl -tag -width indent -compact
+.It Li all
+Count all uops operating on packed double-precision operands.
+.El
+.It Li p4-packed-sp-uop Op Li ,mask= Ns Ar flags
+.Pq "TI event"
+Count packed single-precision uops.
+Qualifier
+.Ar flags
+can take the following value (which is also the default):
+.Pp
+.Bl -tag -width indent -compact
+.It Li all
+Count all uops operating on packed single-precision operands.
+.El
+.It Li p4-page-walk-type Op Li ,mask= Ns Ar qualifier
+.Pq "TI event"
+Count page walks performed by the page miss handler.
+Qualifier
+.Ar qualifier
+can be a
+.Ql +
+separated list of the following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li dtmiss
+Count page walks for data TLB misses.
+.It Li itmiss
+Count page walks for instruction TLB misses.
+.El
+.Pp
+The default value for
+.Ar qualifier
+is
+.Dq Li dtmiss+itmiss .
+.It Li p4-replay-event Op Li ,mask= Ns Ar flags
+.Pq "TS event"
+Count the retirement of tagged uops selected through the replay
+tagging mechanism.
+Qualifier
+.Ar flags
+contains a
+.Ql +
+separated set of the following strings:
+.Pp
+.Bl -tag -width indent -compact
+.It Li nbogus
+The marked uops are not bogus.
+.It Li bogus
+The marked uops are bogus.
+.El
+.Pp
+This event requires additional (upstream) events to be allocated to
+perform the desired uop tagging.
+The default qualifier counts both kinds of uops.
+This event can be used for precise event based sampling.
+.It Li p4-resource-stall Op Li ,mask= Ns Ar flags
+.Pq "TS event"
+Count the occurrence or latency of stalls in the allocator.
+Qualifier
+.Ar flags
+can take the following value (which is also the default):
+.Pp
+.Bl -tag -width indent -compact
+.It Li sbfull
+A stall due to the lack of store buffers.
+.El
+.It Li p4-response
+.Pq "TI event"
+Count different types of responses.
+Further documentation on this event is not available.
+.It Li p4-retired-branch-type Op Li ,mask= Ns Ar flags
+.Pq "TS event"
+Count branches retired.
+Qualifier
+.Ar flags
+contains a
+.Ql +
+separated list of strings:
+.Pp
+.Bl -tag -width indent -compact
+.It Li conditional
+Count conditional jumps.
+.It Li call
+Count direct and indirect call branches.
+.It Li return
+Count return branches.
+.It Li indirect
+Count returns, indirect calls or indirect jumps.
+.El
+.Pp
+The default qualifier counts all the above branch types.
+.It Li p4-retired-mispred-branch-type Op Li ,mask= Ns Ar flags
+.Pq "TS event"
+Count mispredicted branches retired.
+Qualifier
+.Ar flags
+contains a
+.Ql +
+separated list of strings:
+.Pp
+.Bl -tag -width indent -compact
+.It Li conditional
+Count conditional jumps.
+.It Li call
+Count indirect call branches.
+.It Li return
+Count return branches.
+.It Li indirect
+Count returns, indirect calls or indirect jumps.
+.El
+.Pp
+The default qualifier counts all the above branch types.
+.It Li p4-scalar-dp-uop Op Li ,mask= Ns Ar flags
+.Pq "TI event"
+Count the number of scalar double-precision uops.
+Qualifier
+.Ar flags
+can take the following value (which is also the default):
+.Pp
+.Bl -tag -width indent -compact
+.It Li all
+Count the number of scalar double-precision uops.
+.El
+.It Li p4-scalar-sp-uop Op Li ,mask= Ns Ar flags
+.Pq "TI event"
+Count the number of scalar single-precision uops.
+Qualifier
+.Ar flags
+can take the following value (which is also the default):
+.Pp
+.Bl -tag -width indent -compact
+.It Li all
+Count all uops operating on scalar single-precision operands.
+.El
+.It Li p4-snoop
+.Pq "TI event"
+Count snoop traffic.
+Further documentation on this event is not available.
+.It Li p4-sse-input-assist Op Li ,mask= Ns Ar flags
+.Pq "TI event"
+Count the number of times an assist is required to handle problems
+with the operands for SSE and SSE2 operations.
+Qualifier
+.Ar flags
+can take the following value (which is also the default):
+.Pp
+.Bl -tag -width indent -compact
+.It Li all
+Count assists for all SSE and SSE2 uops.
+.El
+.It Li p4-store-port-replay Op Li ,mask= Ns Ar qualifier
+.Pq "TS event"
+Count events replayed at the store port.
+Qualifier
+.Ar qualifier
+can take on one value:
+.Pp
+.Bl -tag -width indent -compact
+.It Li split-st
+Count split stores.
+.El
+.Pp
+The default value for
+.Ar qualifier
+is
+.Dq Li split-st .
+.It Li p4-tc-deliver-mode Op Li ,mask= Ns Ar qualifier
+.Pq "TI event"
+Count the duration in cycles of operating modes of the trace cache and
+decode engine.
+The desired operating mode is selected by
+.Ar qualifier ,
+which is a list of the following strings separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li DD
+Both logical processors are in deliver mode.
+.It Li DB
+Logical processor 0 is in deliver mode while logical processor 1 is in
+build mode.
+.It Li DI
+Logical processor 0 is in deliver mode while logical processor 1 is
+halted, or in machine clear, or transitioning to a long microcode
+flow.
+.It Li BD
+Logical processor 0 is in build mode while logical processor 1 is in
+deliver mode.
+.It Li BB
+Both logical processors are in build mode.
+.It Li BI
+Logical processor 0 is in build mode while logical processor 1 is
+halted, or in machine clear or transitioning to a long microcode
+flow.
+.It Li ID
+Logical processor 0 is halted, or in machine clear or transitioning to
+a long microcode flow while logical processor 1 is in deliver mode.
+.It Li IB
+Logical processor 0 is halted, or in machine clear or transitioning to
+a long microcode flow while logical processor 1 is in build mode.
+.El
+.Pp
+If there is only one logical processor in the processor package then
+the qualifier for logical processor 1 is ignored.
+If no qualifier is specified, the default qualifier is
+.Dq Li DD+DB+DI+BD+BB+BI+ID+IB .
+.It Li p4-tc-ms-xfer Op Li ,mask= Ns Ar flags
+.Pq "TI event"
+Count the number of times uop delivery changed from the trace cache to
+MS ROM.
+Qualifier
+.Ar flags
+can take the following value (which is also the default):
+.Pp
+.Bl -tag -width indent -compact
+.It Li cisc
+Count TC to MS transfers.
+.El
+.It Li p4-uop-queue-writes Op Li ,mask= Ns Ar flags
+.Pq "TS event"
+Count the number of valid uops written to the uop queue.
+Qualifier
+.Ar flags
+is a list of the following strings, separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li from-tc-build
+Count uops being written from the trace cache in build mode.
+.It Li from-tc-deliver
+Count uops being written from the trace cache in deliver mode.
+.It Li from-rom
+Count uops being written from microcode ROM.
+.El
+.Pp
+The default qualifier counts all the above kinds of uops.
+.It Li p4-uop-type Op Li ,mask= Ns Ar flags
+.Pq "TS event"
+This event is used in conjunction with the front-end at-retirement
+mechanism to tag load and store uops.
+Qualifier
+.Ar flags
+comprises the following strings separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li tagloads
+Mark uops that are load operations.
+.It Li tagstores
+Mark uops that are store operations.
+.El
+.Pp
+The default qualifier counts both kinds of uops.
+.It Li p4-uops-retired Op Li ,mask= Ns Ar flags
+.Pq "TS event"
+Count uops retired during a clock cycle.
+Qualifier
+.Ar flags
+comprises the following strings separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li nbogus
+Count marked uops that are not bogus.
+.It Li bogus
+Count marked uops that are bogus.
+.El
+.Pp
+The default qualifier counts both kinds of uops.
+.It Li p4-wc-buffer Op Li ,mask= Ns Ar flags
+.Pq "TI event"
+Count write-combining buffer operations.
+Qualifier
+.Ar flags
+contains the following strings separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li wcb-evicts
+WC buffer evictions due to any cause.
+.It Li wcb-full-evict
+WC buffer evictions due to no WC buffer being available.
+.El
+.Pp
+The default qualifier counts both kinds of evictions.
+.It Li p4-x87-assist Op Li ,mask= Ns Ar flags
+.Pq "TS event"
+Count the retirement of x87 instructions that required special
+handling.
+Qualifier
+.Ar flags
+contains the following strings separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li fpsu
+Count instructions that saw an FP stack underflow.
+.It Li fpso
+Count instructions that saw an FP stack overflow.
+.It Li poao
+Count instructions that saw an x87 output overflow.
+.It Li poau
+Count instructions that saw an x87 output underflow.
+.It Li prea
+Count instructions that needed an x87 input assist.
+.El
+.Pp
+The default qualifier counts all the above types of instruction
+retirements.
+.It Li p4-x87-fp-uop Op Li ,mask= Ns Ar flags
+.Pq "TI event"
+Count x87 floating-point uops.
+Qualifier
+.Ar flags
+can take the following value (which is also the default):
+.Pp
+.Bl -tag -width indent -compact
+.It Li all
+Count all x87 floating-point uops.
+.El
+.Pp
+If an instruction contains more than one x87 floating-point uops, then
+all x87 floating-point uops will be counted.
+This event does not count x87 floating-point data movement operations.
+.It Li p4-x87-simd-moves-uop Op Li ,mask= Ns Ar flags
+.Pq "TI event"
+Count each x87 FPU, MMX, SSE, or SSE2 uops that load data or store
+data or perform register-to-register moves.
+This event does not count integer move uops.
+Qualifier
+.Ar flags
+may contain the following keywords separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li allp0
+Count all x87 and SIMD store and move uops.
+.It Li allp2
+Count all x87 and SIMD load uops.
+.El
+.Pp
+The default is to count all uops.
+.Pq Errata
+This event may be affected by processor errata N43.
+.El
+.Ss "Cascading P4 PMCs"
+PMC cascading support is currently poorly implemented.
+While individual event counters may be allocated with a
+.Dq Li cascade
+qualifier, the current API does not offer the ability
+to name and allocate all the resources needed for a
+cascaded event counter pair in a single operation.
+.Ss "Precise Event Based Sampling"
+Support for precise event based sampling is currently
+unimplemented.
+.Ss Event Name Aliases
+The following table shows the mapping between the PMC-independent
+aliases supported by
+.Lb libpmc
+and the underlying hardware events used.
+.Bl -column "branch-mispredicts" "Description"
+.It Em Alias Ta Em Event
+.It Li branches Ta Li p4-branch-retired,mask=mmtp+mmtm
+.It Li branch-mispredicts Ta Li p4-mispred-branch-retired
+.It Li dc-misses Ta (unsupported)
+.It Li ic-misses Ta (unsupported)
+.It Li instructions Ta Li p4-instr-retired,mask=nbogusntag+nbogustag
+.It Li interrupts Ta Li (unsupported)
+.It Li unhalted-cycles Ta Li p4-global-power-events
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.core2 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
diff --git a/lib/libpmc/pmc.p5.3 b/lib/libpmc/pmc.p5.3
new file mode 100644
index 0000000..36ab917
--- /dev/null
+++ b/lib/libpmc/pmc.p5.3
@@ -0,0 +1,460 @@
+.\" Copyright (c) 2003-2008 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd October 4, 2008
+.Dt PMC 3
+.Os
+.Sh NAME
+.Nm pmc
+.Nd library for accessing hardware performance monitoring counters
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+Intel Pentium PMCs are present in Intel
+.Tn Pentium
+and
+.Tn "Pentium MMX"
+processors.
+These PMCs are documented in the
+.Rs
+.%B "Intel 64 and IA-32 Intel(R) Architectures Software Developer's Manual"
+.%T "Volume 3B: System Programming Guide, Part 2"
+.%N "Order Number 253669-024US"
+.%D "August 2007"
+.%Q "Intel Corporation"
+.Re
+.Ss PMC Features
+These CPUs contain two PMCs, each 40 bits wide.
+These PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta \&No
+.It PMC_CAP_INTERRUPT Ta \&No
+.It PMC_CAP_INVERT Ta \&No
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta Yes
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta \&No
+.It PMC_CAP_USER Ta Yes
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+Event specifiers for Intel Pentium PMCs can have the following common
+qualifiers:
+.Bl -tag -width indent
+.It Li duration
+Count duration (in clocks) of events.
+The default is to count events.
+.It Li os
+Measure events at privilege levels 0, 1 and 2.
+.It Li overflow
+Assert the external processor pin associated with a counter on counter
+overflow.
+.It Li usr
+Measure events at privilege level 3.
+.El
+.Pp
+If neither of the
+.Dq Li os
+or
+.Dq Li usr
+qualifiers are specified, the default is to enable both.
+.Pp
+Some events may only be used on specific counters and some events
+are defined only on processors supporting the MMX instruction set.
+Note that these PMCs do not have the ability to interrupt the CPU.
+.Ss Intel Pentium Event Specifiers
+The event specifiers supported by Intel Pentium PMCs are:
+.Bl -tag -width indent
+.It Li p5-any-segment-register-loaded
+.Pq Event 0FH
+The number of writes to any segment register, including the LDTR,
+GDTR, TR and IDTR.
+Far control transfers and task switches that involve privilege
+level changes will count this event twice.
+.It Li p5-bank-conflicts
+.Pq Event 0AH
+The number of actual bank conflicts.
+.It Li p5-branches
+.Pq Event 12H
+The number of taken and not taken branches including branches, jumps, calls,
+software interrupts and interrupt returns.
+.It Li p5-breakpoint-match-on-dr0-register
+.Pq Event 23H
+The number of matches on the DR0 breakpoint register.
+.It Li p5-breakpoint-match-on-dr1-register
+.Pq Event 24H
+The number of matches on the DR1 breakpoint register.
+.It Li p5-breakpoint-match-on-dr2-register
+.Pq Event 25H
+The number of matches on the DR2 breakpoint register.
+.It Li p5-breakpoint-match-on-dr3-register
+.Pq Event 26H
+The number of matches on the DR3 breakpoint register.
+.It Li p5-btb-false-entries
+.Pq Event 3AH , Tn Pentium MMX
+The number of false entries in the BTB.
+This event is only allocated on counter 0.
+.It Li p5-btb-hits
+.Pq Event 13H
+The number of branches executed that hit in the branch table buffer.
+.It Li p5-btb-miss-prediction-on-not-taken-branch
+.Pq Event 3AH , Tn Pentium MMX
+The number of times the BTB predicted a not-taken branch as taken.
+This event is only allocated on counter 1.
+.It Li p5-bus-cycle-duration
+.Pq Event 18H
+The number of cycles while a bus cycle was in progress.
+.It Li p5-bus-ownership-latency
+.Pq Event 2AH , Tn Pentium MMX
+The time from bus ownership being requested to ownership being granted.
+This event is only allocated on counter 0.
+.It Li p5-bus-ownership-transfers
+.Pq Event 2AH , Tn Pentium MMX
+The number of bus ownership transfers.
+This event is only allocated on counter 1.
+.It Li p5-bus-utilization-due-to-processor-activity
+.Pq Event 2EH , Tn Pentium MMX
+The number of clocks the bus is busy due to the processor's own
+activity.
+This event is only allocated on counter 0.
+.It Li p5-cache-line-sharing
+.Pq Event 2CH , Tn Pentium MMX
+The number of shared data lines in L1 cache.
+This event is only allocated on counter 1.
+.It Li p5-cache-m-state-line-sharing
+.Pq Event 2CH , Tn Pentium MMX
+The number of hits to an M- state line due to a memory access by
+another processor.
+This event is only allocated on counter 0.
+.It Li p5-code-cache-miss
+.Pq Event 0EH
+The number of instruction reads that miss the internal code cache.
+Both cacheable and un-cacheable misses are counted.
+.It Li p5-code-read
+.Pq Event 0CH
+The number of instruction reads to both cacheable and un-cacheable regions.
+.It Li p5-code-tlb-miss
+.Pq Event 0DH
+The number of instruction reads that miss the instruction TLB.
+Both cacheable and un-cacheable unreads are counted.
+.It Li p5-d1-starvation-and-fifo-is-empty
+.Pq Event 33H , Tn Pentium MMX
+The number of times the D1 stage cannot issue any instructions because
+the FIFO was empty.
+This event is only allocated on counter 0.
+.It Li p5-d1-starvation-and-only-one-instruction-in-fifo
+.Pq Event 33H , Tn Pentium MMX
+The number of times the D1 stage could issue only one instruction
+because the FIFO had one instruction ready.
+This event is only allocated on counter 1.
+.It Li p5-data-cache-lines-written-back
+.Pq Event 06H
+The number of data cache lines that are written back, including
+those caused by internal and external snoops.
+.It Li p5-data-cache-tlb-miss-stall-duration
+.Pq Event 30H , Tn Pentium MMX
+The number of clocks the pipeline is stalled due to a data cache
+TLB miss.
+This event is only allocated on counter 1.
+.It Li p5-data-read
+.Pq Event 00H
+The number of memory data reads, counting internal data cache hits and
+misses.
+I/O and data memory accesses due to TLB miss processing are
+not included.
+Split cycle reads are counted individually.
+.It Li p5-data-read-miss
+.Pq Event 03H
+The number of memory read accesses that miss the data cache, counting
+both cacheable and un-cacheable accesses.
+Data accesses that are part of TLB miss processing are not included.
+I/O accesses are not included.
+.It Li p5-data-read-miss-or-write-miss
+.Pq Event 29H
+The number of data reads and writes that miss the internal data cache,
+counting un-cacheable accesses.
+Data accesses due to TLB miss processing are not counted.
+.It Li p5-data-read-or-write
+.Pq Event 28H
+The number of data reads and writes including internal data cache hits
+and misses.
+Data reads due to TLB miss processing are not counted.
+.It Li p5-data-tlb-miss
+.Pq Event 02H
+The number of misses to the data cache translation look aside buffer.
+.It Li p5-data-write
+.Pq Event 01H
+The number of memory data writes, counting internal data cache hits
+and misses.
+I/O is not included and split cycle writes are counted individually.
+.It Li p5-data-write-miss
+.Pq Event 04H
+The number of memory write accesses that miss the data cache, counting
+both cacheable and un-cacheable accesses.
+I/O accesses are not counted.
+.It Li p5-emms-instructions-executed
+.Pq Event 2DH , Tn Pentium MMX
+The number of EMMS instructions executed.
+This event is only allocated on counter 0.
+.It Li p5-external-data-cache-snoop-hits
+.Pq Event 08H
+The number of external snoops to the data cache that hit a valid line,
+or the data line fill buffer, or one of the write back buffers.
+.It Li p5-external-snoops
+.Pq Event 07H
+The number of external snoop requests accepted, including snoops that
+hit in the code cache, the data cache and that hit in neither.
+.It Li p5-floating-point-stalls-duration
+.Pq Event 32H , Tn Pentium MMX
+The number of cycles the pipeline is stalled due to a floating point
+freeze.
+This event is only allocated on counter 0.
+.It Li p5-flops
+.Pq Event 22H
+The number of floating point adds, subtracts, multiples, divides and
+square roots.
+Transcendental instructions trigger this event multiple times.
+Instructions generating divide-by-zero, negative square root, special
+operand and stack exceptions are not counted.
+Integer multiply instructions that use the x87 FPU are counted.
+.It Li p5-full-write-buffer-stall-duration-while-executing-mmx-instructions
+.Pq Event 3BH , Tn Pentium MMX
+The number of clocks the pipeline has stalled due to full write
+buffers when executing MMX instructions.
+This event is only allocated on counter 0.
+.It Li p5-hardware-interrupts
+.Pq Event 27H
+The number of taken INTR and NMI interrupts.
+.It Li p5-instructions-executed
+.Pq Event 16H
+The number of instructions executed.
+Repeat prefixed instructions are counted only once.
+The HLT instruction is counted only once, irrespective of the number
+of cycles spent in the halted state.
+All hardware and software exceptions are counted as instructions, and
+fault handler invocations are also counted as instructions.
+.It Li p5-instructions-executed-v-pipe
+.Pq Event 17H
+The number of instructions that executed in the V pipe.
+.It Li p5-io-read-or-write-cycle
+.Pq Event 1DH
+The number of bus cycles directed to I/O space.
+.It Li p5-locked-bus-cycle
+.Pq Event 1CH
+The number of locked bus cycles that occur on account of the lock
+prefixes, LOCK instructions, page table updates and descriptor table
+updates.
+.It Li p5-memory-accesses-in-both-pipes
+.Pq Event 09H
+The number of data memory reads or writes that are paired in both pipes.
+.It Li p5-misaligned-data-memory-or-io-references
+.Pq Event 0BH
+The number of memory or I/O reads or writes that are not aligned on
+natural boundaries.
+2- and 4-byte accesses are counted as misaligned if they cross a 4
+byte boundary.
+.It Li p5-misaligned-data-memory-reference-on-mmx-instructions
+.Pq Event 36H , Tn Pentium MMX
+The number of misaligned data memory references when executing MMX
+instructions.
+This event is only allocated on counter 0.
+.It Li p5-mispredicted-or-unpredicted-returns
+.Pq Event 37H , Tn Pentium MMX
+The number of returns predicted incorrectly or not at all, only
+counting RET instructions.
+This event is only allocated on counter 0.
+.It Li p5-mmx-instruction-data-read-misses
+.Pq Event 31H , Tn Pentium MMX
+The number of MMX instruction data read misses.
+This event is only allocated on counter 1.
+.It Li p5-mmx-instruction-data-reads
+.Pq Event 31H , Tn Pentium MMX
+The number of MMX instruction data reads.
+This event is only allocated on counter 0.
+.It Li p5-mmx-instruction-data-write-misses
+.Pq Event 34H , Tn Pentium MMX
+The number of data write misses caused by MMX instructions.
+This event is only allocated on counter 1.
+.It Li p5-mmx-instruction-data-writes
+.Pq Event 34H , Tn Pentium MMX
+The number of data writes caused by MMX instructions.
+This event is only allocated on counter 0.
+.It Li p5-mmx-instructions-executed-u-pipe
+.Pq Event 2BH , Tn Pentium MMX
+The number of MMX instructions executed in the U pipe.
+This event is only allocated on counter 0.
+.It Li p5-mmx-instructions-executed-v-pipe
+.Pq Event 2BH , Tn Pentium MMX
+The number of MMX instructions executed in the V pipe.
+This event is only allocated on counter 1.
+.It Li p5-mmx-multiply-unit-interlock
+.Pq Event 38H , Tn Pentium MMX
+The number of clocks the pipeline is stalled because the destination
+of a prior MMX multiply is not ready.
+This event is only allocated on counter 0.
+.It Li p5-movd-movq-store-stall-due-to-previous-mmx-operation
+.Pq Event 38H , Tn Pentium MMX
+The number of clocks a MOVD/MOVQ instruction stalled in the D2 stage
+of the pipeline due to a previous MMX instruction.
+This event is only allocated on counter 1.
+.It Li p5-noncacheable-memory-reads
+.Pq Event 1EH
+The number of bus cycles for non-cacheable instruction or data reads,
+including cycles caused by TLB misses.
+.It Li p5-number-of-cycles-not-in-halt-state
+.Pq Event 30H , Tn Pentium MMX
+The number of cycles the processor is not idle due to the HLT
+instruction.
+This event is only allocated on counter 0.
+.It Li p5-pipeline-agi-stalls
+.Pq Event 1FH
+The number of address generation interlock stalls.
+An AGI that occurs in both the U and V pipelines in the same clock
+signals the event twice.
+.It Li p5-pipeline-flushes
+.Pq Event 15H
+The number of pipeline flushes that occur.
+Pipeline flushes are caused by branch mispredicts, exceptions,
+interrupts, some segment register loads, and BTB misses.
+Prefetch queue flushes due to serializing instructions are not
+counted.
+.It Li p5-pipeline-flushes-due-to-wrong-branch-predictions
+.Pq Event 35H , Tn Pentium MMX
+The number of pipeline flushes due to wrong branch predictions
+resolved in either the E- or WB- stage of the pipeline.
+This event is only allocated on counter 0.
+.It Li p5-pipeline-flushes-due-to-wrong-branch-predictions-resolved-in-wb-stage
+.Pq Event 35H , Tn Pentium MMX
+The number of pipeline flushes due to wrong branch predictions
+resolved in the stage of the pipeline.
+This event is only allocated on counter 1.
+.It Li p5-pipeline-stall-for-mmx-instruction-data-memory-reads
+.Pq Event 36H , Tn Pentium MMX
+The number of clocks during pipeline stalls caused by waiting MMX data
+memory reads.
+This event is only allocated on counter 1.
+.It Li p5-predicted-returns
+.Pq Event 37H , Tn Pentium MMX
+The number of predicted returns, whether correct or incorrect.
+This counter only counts RET instructions.
+This event is only allocated on counter 1.
+.It Li p5-returns
+.Pq Event 39H , Tn Pentium MMX
+The number of RET instructions executed.
+This event is only allocated on counter 0.
+.It Li p5-saturating-mmx-instructions-executed
+.Pq Event 2FH , Tn Pentium MMX
+The number of saturating MMX instructions executed.
+This event is only allocated on counter 0.
+.It Li p5-saturations-performed
+.Pq Event 2FH , Tn Pentium MMX
+The number of saturating MMX instructions executed when at least one
+of its results were actually saturated.
+This event is only allocated on counter 1.
+.It Li p5-stall-on-mmx-instruction-write-to-e-o-m-state-line
+.Pq Event 3BH , Tn Pentium MMX
+The number of clocks during stalls on MMX instructions writing to
+E- or M- state cache lines.
+This event is only allocated on counter 1.
+.It Li p5-stall-on-write-to-an-e-or-m-state-line
+.Pq Event 1BH
+The number of stalls on a write to an exclusive or modified data cache
+line.
+.It Li p5-taken-branch-or-btb-hit
+.Pq Event 14H
+The number of events that may cause a hit in the BTB, namely either
+taken branches or BTB hits.
+.It Li p5-taken-branches
+.Pq Event 32H , Tn Pentium MMX
+The number of taken branches.
+This event is only allocated on counter 1.
+.It Li p5-transitions-between-mmx-and-fp-instructions
+.Pq Event 2DH , Tn Pentium MMX
+The number of transitions between MMX and floating-point instructions
+and vice-versa.
+This event is only allocated on counter 1.
+.It Li p5-waiting-for-data-memory-read-stall-duration
+.Pq Event 1AH
+The number of clocks the pipeline was stalled waiting for data
+memory reads.
+Data TLB misses processing is included in this count.
+.It Li p5-write-buffer-full-stall-duration
+.Pq Event 19H
+The number of clocks while the pipeline was stalled due to write
+buffers being full.
+.It Li p5-write-hit-to-m-or-e-state-lines
+.Pq Event 05H
+The number of writes that hit exclusive or modified lines in the data
+cache.
+.It Li p5-writes-to-noncacheable-memory
+.Pq Event 2EH , Tn Pentium MMX
+The number of writes to non-cacheable memory, including write cycles
+caused by TLB misses and I/O writes.
+This event is only allocated on counter 1.
+.El
+.Ss Event Name Aliases
+The following table shows the mapping between the PMC-independent
+aliases supported by
+.Lb libpmc
+and the underlying hardware events used.
+.Bl -column "branch-mispredicts" "Description"
+.It Em Alias Ta Em Event
+.It Li branches Ta Li p5-taken-branches
+.It Li branch-mispredicts Ta Li (unsupported)
+.It Li dc-misses Ta Li p5-data-read-miss-or-write-miss
+.It Li ic-misses Ta Li p5-code-cache-miss
+.It Li instructions Ta Li p5-instructions-executed
+.It Li interrupts Ta Li p5-hardware-interrupts
+.It Li unhalted-cycles Ta Li p5-number-of-cycles-not-in-halt-state
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.core2 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
diff --git a/lib/libpmc/pmc.p6.3 b/lib/libpmc/pmc.p6.3
new file mode 100644
index 0000000..d8cde64
--- /dev/null
+++ b/lib/libpmc/pmc.p6.3
@@ -0,0 +1,1026 @@
+.\" Copyright (c) 2003-2008 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd October 4, 2008
+.Dt PMC.P6 3
+.Os
+.Sh NAME
+.Nm pmc.p6
+.Nd measurement events for
+.Tn Intel
+Pentium Pro, P-II, P-III family CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+Intel P6 PMCs are present in Intel
+.Tn "Pentium Pro" ,
+.Tn "Pentium II" ,
+.Tn Celeron ,
+.Tn "Pentium III"
+and
+.Tn "Pentium M"
+processors.
+.Pp
+They are documented in
+.Rs
+.%B "IA-32 Intel(R) Architecture Software Developer's Manual"
+.%T "Volume 3: System Programming Guide"
+.%N "Order Number 245472-012"
+.%D 2003
+.%Q "Intel Corporation"
+.Re
+.Pp
+Some of these events are affected by processor errata described in
+.Rs
+.%B "Intel(R) Pentium(R) III Processor Specification Update"
+.%N "Document Number: 244453-054"
+.%D "April 2005"
+.%Q "Intel Corporation"
+.Re
+.Ss PMC Features
+These CPUs have two counters, each 40 bits wide.
+Some events may only be used on specific counters and some events are
+defined only on specific processor models.
+These PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_INTERRUPT Ta Yes
+.It PMC_CAP_INVERT Ta Yes
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta Yes
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta Yes
+.It PMC_CAP_USER Ta Yes
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+Event specifiers for Intel P6 PMCs can have the following common
+qualifiers:
+.Bl -tag -width indent
+.It Li cmask= Ns Ar value
+Configure the PMC to increment only if the number of configured
+events measured in a cycle is greater than or equal to
+.Ar value .
+.It Li edge
+Configure the PMC to count the number of de-asserted to asserted
+transitions of the conditions expressed by the other qualifiers.
+If specified, the counter will increment only once whenever a
+condition becomes true, irrespective of the number of clocks during
+which the condition remains true.
+.It Li inv
+Invert the sense of comparison when the
+.Dq Li cmask
+qualifier is present, making the counter increment when the number of
+events per cycle is less than the value specified by the
+.Dq Li cmask
+qualifier.
+.It Li os
+Configure the PMC to count events happening at processor privilege
+level 0.
+.It Li umask= Ns Ar value
+This qualifier is used to further qualify the event selected (see
+below).
+.It Li usr
+Configure the PMC to count events occurring at privilege levels 1, 2
+or 3.
+.El
+.Pp
+If neither of the
+.Dq Li os
+or
+.Dq Li usr
+qualifiers are specified, the default is to enable both.
+.Pp
+The event specifiers supported by Intel P6 PMCs are:
+.Bl -tag -width indent
+.It Li p6-baclears
+.Pq Event E6H
+Count the number of times a static branch prediction was made by the
+branch decoder because the BTB did not have a prediction.
+.It Li p6-br-bac-missp-exec
+.Pq Event 8AH , Tn "Pentium M"
+Count the number of branch instructions executed that where
+mispredicted at the Front End (BAC).
+.It Li p6-br-bogus
+.Pq Event E4H
+Count the number of bogus branches.
+.It Li p6-br-call-exec
+.Pq Event 92H , Tn "Pentium M"
+Count the number of call instructions executed.
+.It Li p6-br-call-missp-exec
+.Pq Event 93H , Tn "Pentium M"
+Count the number of call instructions executed that were mispredicted.
+.It Li p6-br-cnd-exec
+.Pq Event 8BH , Tn "Pentium M"
+Count the number of conditional branch instructions executed.
+.It Li p6-br-cnd-missp-exec
+.Pq Event 8CH , Tn "Pentium M"
+Count the number of conditional branch instructions executed that were
+mispredicted.
+.It Li p6-br-ind-call-exec
+.Pq Event 94H , Tn "Pentium M"
+Count the number of indirect call instructions executed.
+.It Li p6-br-ind-exec
+.Pq Event 8DH , Tn "Pentium M"
+Count the number of indirect branch instructions executed.
+.It Li p6-br-ind-missp-exec
+.Pq Event 8EH , Tn "Pentium M"
+Count the number of indirect branch instructions executed that were
+mispredicted.
+.It Li p6-br-inst-decoded
+.Pq Event E0H
+Count the number of branch instructions decoded.
+.It Li p6-br-inst-exec
+.Pq Event 88H , Tn "Pentium M"
+Count the number of branch instructions executed but necessarily retired.
+.It Li p6-br-inst-retired
+.Pq Event C4H
+Count the number of branch instructions retired.
+.It Li p6-br-miss-pred-retired
+.Pq Event C5H
+Count the number of mispredicted branch instructions retired.
+.It Li p6-br-miss-pred-taken-ret
+.Pq Event C9H
+Count the number of taken mispredicted branches retired.
+.It Li p6-br-missp-exec
+.Pq Event 89H , Tn "Pentium M"
+Count the number of branch instructions executed that were
+mispredicted at execution.
+.It Li p6-br-ret-bac-missp-exec
+.Pq Event 91H , Tn "Pentium M"
+Count the number of return instructions executed that were
+mispredicted at the Front End (BAC).
+.It Li p6-br-ret-exec
+.Pq Event 8FH , Tn "Pentium M"
+Count the number of return instructions executed.
+.It Li p6-br-ret-missp-exec
+.Pq Event 90H , Tn "Pentium M"
+Count the number of return instructions executed that were
+mispredicted at execution.
+.It Li p6-br-taken-retired
+.Pq Event C9H
+Count the number of taken branches retired.
+.It Li p6-btb-misses
+.Pq Event E2H
+Count the number of branches for which the BTB did not produce a
+prediction.
+.It Li p6-bus-bnr-drv
+.Pq Event 61H
+Count the number of bus clock cycles during which this processor is
+driving the BNR# pin.
+.It Li p6-bus-data-rcv
+.Pq Event 64H
+Count the number of bus clock cycles during which this processor is
+receiving data.
+.It Li p6-bus-drdy-clocks Op Li ,umask= Ns Ar qualifier
+.Pq Event 62H
+Count the number of clocks during which DRDY# is asserted.
+An additional qualifier may be specified, and comprises one of the
+following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li any
+Count transactions generated by any agent on the bus.
+.It Li self
+Count transactions generated by this processor.
+.El
+.Pp
+The default is to count operations generated by this processor.
+.It Li p6-bus-hit-drv
+.Pq Event 7AH
+Count the number of bus clock cycles during which this processor is
+driving the HIT# pin.
+.It Li p6-bus-hitm-drv
+.Pq Event 7BH
+Count the number of bus clock cycles during which this processor is
+driving the HITM# pin.
+.It Li p6-bus-lock-clocks Op Li ,umask= Ns Ar qualifier
+.Pq Event 63H
+Count the number of clocks during with LOCK# is asserted on the
+external system bus.
+An additional qualifier may be specified and comprises one of the following
+keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li any
+Count transactions generated by any agent on the bus.
+.It Li self
+Count transactions generated by this processor.
+.El
+.Pp
+The default is to count operations generated by this processor.
+.It Li p6-bus-req-outstanding
+.Pq Event 60H
+Count the number of bus requests outstanding in any given cycle.
+.It Li p6-bus-snoop-stall
+.Pq Event 7EH
+Count the number of clock cycles during which the bus is snoop stalled.
+.It Li p6-bus-tran-any Op Li ,umask= Ns Ar qualifier
+.Pq Event 70H
+Count the number of completed bus transactions of any kind.
+An additional qualifier may be specified and comprises one of the following
+keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li any
+Count transactions generated by any agent on the bus.
+.It Li self
+Count transactions generated by this processor.
+.El
+.Pp
+The default is to count operations generated by this processor.
+.It Li p6-bus-tran-brd Op Li ,umask= Ns Ar qualifier
+.Pq Event 65H
+Count the number of burst read transactions.
+An additional qualifier may be specified and comprises one of the following
+keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li any
+Count transactions generated by any agent on the bus.
+.It Li self
+Count transactions generated by this processor.
+.El
+.Pp
+The default is to count operations generated by this processor.
+.It Li p6-bus-tran-burst Op Li ,umask= Ns Ar qualifier
+.Pq Event 6EH
+Count the number of completed burst transactions.
+An additional qualifier may be specified and comprises one of the following
+keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li any
+Count transactions generated by any agent on the bus.
+.It Li self
+Count transactions generated by this processor.
+.El
+.Pp
+The default is to count operations generated by this processor.
+.It Li p6-bus-tran-def Op Li ,umask= Ns Ar qualifier
+.Pq Event 6DH
+Count the number of completed deferred transactions.
+An additional qualifier may be specified and comprises one of the following
+keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li any
+Count transactions generated by any agent on the bus.
+.It Li self
+Count transactions generated by this processor.
+.El
+.Pp
+The default is to count operations generated by this processor.
+.It Li p6-bus-tran-ifetch Op Li ,umask= Ns Ar qualifier
+.Pq Event 68H
+Count the number of completed instruction fetch transactions.
+An additional qualifier may be specified and comprises one of the following
+keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li any
+Count transactions generated by any agent on the bus.
+.It Li self
+Count transactions generated by this processor.
+.El
+.Pp
+The default is to count operations generated by this processor.
+.It Li p6-bus-tran-inval Op Li ,umask= Ns Ar qualifier
+.Pq Event 69H
+Count the number of completed invalidate transactions.
+An additional qualifier may be specified and comprises one of the following
+keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li any
+Count transactions generated by any agent on the bus.
+.It Li self
+Count transactions generated by this processor.
+.El
+.Pp
+The default is to count operations generated by this processor.
+.It Li p6-bus-tran-mem Op Li ,umask= Ns Ar qualifier
+.Pq Event 6FH
+Count the number of completed memory transactions.
+An additional qualifier may be specified and comprises one of the following
+keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li any
+Count transactions generated by any agent on the bus.
+.It Li self
+Count transactions generated by this processor.
+.El
+.Pp
+The default is to count operations generated by this processor.
+.It Li p6-bus-tran-pwr Op Li ,umask= Ns Ar qualifier
+.Pq Event 6AH
+Count the number of completed partial write transactions.
+An additional qualifier may be specified and comprises one of the following
+keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li any
+Count transactions generated by any agent on the bus.
+.It Li self
+Count transactions generated by this processor.
+.El
+.Pp
+The default is to count operations generated by this processor.
+.It Li p6-bus-tran-rfo Op Li ,umask= Ns Ar qualifier
+.Pq Event 66H
+Count the number of completed read-for-ownership transactions.
+An additional qualifier may be specified and comprises one of the following
+keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li any
+Count transactions generated by any agent on the bus.
+.It Li self
+Count transactions generated by this processor.
+.El
+.Pp
+The default is to count operations generated by this processor.
+.It Li p6-bus-trans-io Op Li ,umask= Ns Ar qualifier
+.Pq Event 6CH
+Count the number of completed I/O transactions.
+An additional qualifier may be specified and comprises one of the following
+keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li any
+Count transactions generated by any agent on the bus.
+.It Li self
+Count transactions generated by this processor.
+.El
+.Pp
+The default is to count operations generated by this processor.
+.It Li p6-bus-trans-p Op Li ,umask= Ns Ar qualifier
+.Pq Event 6BH
+Count the number of completed partial transactions.
+An additional qualifier may be specified and comprises one of the following
+keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li any
+Count transactions generated by any agent on the bus.
+.It Li self
+Count transactions generated by this processor.
+.El
+.Pp
+The default is to count operations generated by this processor.
+.It Li p6-bus-trans-wb Op Li ,umask= Ns Ar qualifier
+.Pq Event 67H
+Count the number of completed write-back transactions.
+An additional qualifier may be specified and comprises one of the following
+keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li any
+Count transactions generated by any agent on the bus.
+.It Li self
+Count transactions generated by this processor.
+.El
+.Pp
+The default is to count operations generated by this processor.
+.It Li p6-cpu-clk-unhalted
+.Pq Event 79H
+Count the number of cycles during with the processor was not halted.
+.Pp
+.Pq Tn "Pentium M"
+Count the number of cycles during with the processor was not halted
+and not in a thermal trip.
+.It Li p6-cycles-div-busy
+.Pq Event 14H
+Count the number of cycles during which the divider is busy and cannot
+accept new divides.
+This event is only allocated on counter 0.
+.It Li p6-cycles-int-pending-and-masked
+.Pq Event C7H
+Count the number of processor cycles for which interrupts were
+disabled and interrupts were pending.
+.It Li p6-cycles-int-masked
+.Pq Event C6H
+Count the number of processor cycles for which interrupts were
+disabled.
+.It Li p6-data-mem-refs
+.Pq Event 43H
+Count all loads and all stores using any memory type, including
+internal retries.
+Each part of a split store is counted separately.
+.It Li p6-dcu-lines-in
+.Pq Event 45H
+Count the total lines allocated in the data cache unit.
+.It Li p6-dcu-m-lines-in
+.Pq Event 46H
+Count the number of M state lines allocated in the data cache unit.
+.It Li p6-dcu-m-lines-out
+.Pq Event 47H
+Count the number of M state lines evicted from the data cache unit.
+.It Li p6-dcu-miss-outstanding
+.Pq Event 48H
+Count the weighted number of cycles while a data cache unit miss is
+outstanding, incremented by the number of outstanding cache misses at
+any time.
+.It Li p6-div
+.Pq Event 13H
+Count the number of integer and floating-point divides including
+speculative divides.
+This event is only allocated on counter 1.
+.It Li p6-emon-esp-uops
+.Pq Event D7H , Tn "Pentium M"
+Count the total number of micro-ops.
+.It Li p6-emon-est-trans Op Li ,umask= Ns Ar qualifier
+.Pq Event 58H , Tn "Pentium M"
+Count the number of
+.Tn "Enhanced Intel SpeedStep"
+transitions.
+An additional qualifier may be specified, and can be one of the
+following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li all
+Count all transitions.
+.It Li freq
+Count only frequency transitions.
+.El
+.Pp
+The default is to count all transitions.
+.It Li p6-emon-fused-uops-ret Op Li ,umask= Ns Ar qualifier
+.Pq Event DAH , Tn "Pentium M"
+Count the number of retired fused micro-ops.
+An additional qualifier may be specified, and may be one of the
+following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li all
+Count all fused micro-ops.
+.It Li loadop
+Count only load and op micro-ops.
+.It Li stdsta
+Count only STD/STA micro-ops.
+.El
+.Pp
+The default is to count all fused micro-ops.
+.It Li p6-emon-kni-comp-inst-ret
+.Pq Event D9H , Tn "Pentium III"
+Count the number of SSE computational instructions retired.
+An additional qualifier may be specified, and comprises one of the
+following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li packed-and-scalar
+Count packed and scalar operations.
+.It Li scalar
+Count scalar operations only.
+.El
+.Pp
+The default is to count packed and scalar operations.
+.It Li p6-emon-kni-inst-retired Op Li ,umask= Ns Ar qualifier
+.Pq Event D8H , Tn "Pentium III"
+Count the number of SSE instructions retired.
+An additional qualifier may be specified, and comprises one of the
+following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li packed-and-scalar
+Count packed and scalar operations.
+.It Li scalar
+Count scalar operations only.
+.El
+.Pp
+The default is to count packed and scalar operations.
+.It Li p6-emon-kni-pref-dispatched Op Li ,umask= Ns Ar qualifier
+.Pq Event 07H , Tn "Pentium III"
+Count the number of SSE prefetch or weakly ordered instructions
+dispatched (including speculative prefetches).
+An additional qualifier may be specified, and comprises one of the
+following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li nta
+Count non-temporal prefetches.
+.It Li t1
+Count prefetches to L1.
+.It Li t2
+Count prefetches to L2.
+.It Li wos
+Count weakly ordered stores.
+.El
+.Pp
+The default is to count non-temporal prefetches.
+.It Li p6-emon-kni-pref-miss Op Li ,umask= Ns Ar qualifier
+.Pq Event 4BH , Tn "Pentium III"
+Count the number of prefetch or weakly ordered instructions that miss
+all caches.
+An additional qualifier may be specified, and comprises one of the
+following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li nta
+Count non-temporal prefetches.
+.It Li t1
+Count prefetches to L1.
+.It Li t2
+Count prefetches to L2.
+.It Li wos
+Count weakly ordered stores.
+.El
+.Pp
+The default is to count non-temporal prefetches.
+.It Li p6-emon-pref-rqsts-dn
+.Pq Event F8H , Tn "Pentium M"
+Count the number of downward prefetches issued.
+.It Li p6-emon-pref-rqsts-up
+.Pq Event F0H , Tn "Pentium M"
+Count the number of upward prefetches issued.
+.It Li p6-emon-simd-instr-retired
+.Pq Event CEH , Tn "Pentium M"
+Count the number of retired
+.Tn MMX
+instructions.
+.It Li p6-emon-sse-sse2-comp-inst-retired Op Li ,umask= Ns Ar qualifier
+.Pq Event D9H , Tn "Pentium M"
+Count the number of computational SSE instructions retired.
+An additional qualifier may be specified and can be one of the
+following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li sse-packed-single
+Count SSE packed-single instructions.
+.It Li sse-scalar-single
+Count SSE scalar-single instructions.
+.It Li sse2-packed-double
+Count SSE2 packed-double instructions.
+.It Li sse2-scalar-double
+Count SSE2 scalar-double instructions.
+.El
+.Pp
+The default is to count SSE packed-single instructions.
+.It Li p6-emon-sse-sse2-inst-retired Op Li ,umask= Ns Ar qualifier
+.Pq Event D8H , Tn "Pentium M"
+Count the number of SSE instructions retired.
+An additional qualifier can be specified, and can be one of the
+following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li sse-packed-single
+Count SSE packed-single instructions.
+.It Li sse-packed-single-scalar-single
+Count SSE packed-single and scalar-single instructions.
+.It Li sse2-packed-double
+Count SSE2 packed-double instructions.
+.It Li sse2-scalar-double
+Count SSE2 scalar-double instructions.
+.El
+.Pp
+The default is to count SSE packed-single instructions.
+.It Li p6-emon-synch-uops
+.Pq Event D3H , Tn "Pentium M"
+Count the number of sync micro-ops.
+.It Li p6-emon-thermal-trip
+.Pq Event 59H , Tn "Pentium M"
+Count the duration or occurrences of thermal trips.
+Use the
+.Dq Li edge
+qualifier to count occurrences of thermal trips.
+.It Li p6-emon-unfusion
+.Pq Event DBH , Tn "Pentium M"
+Count the number of unfusion events in the reorder buffer.
+.It Li p6-flops
+.Pq Event C1H
+Count the number of computational floating point operations retired.
+This event is only allocated on counter 0.
+.It Li p6-fp-assist
+.Pq Event 11H
+Count the number of floating point exceptions handled by microcode.
+This event is only allocated on counter 1.
+.It Li p6-fp-comps-ops-exe
+.Pq Event 10H
+Count the number of computation floating point operations executed.
+This event is only allocated on counter 0.
+.It Li p6-fp-mmx-trans Op Li ,umask= Ns Ar qualifier
+.Pq Event CCH , Tn "Pentium II" , Tn "Pentium III"
+Count the number of transitions between MMX and floating-point
+instructions.
+An additional qualifier may be specified, and comprises one of the
+following keywords:
+.Pp
+.Bl -tag -width indent -compact
+.It Li mmxtofp
+Count transitions from MMX instructions to floating-point instructions.
+.It Li fptommx
+Count transitions from floating-point instructions to MMX instructions.
+.El
+.Pp
+The default is to count MMX to floating-point transitions.
+.It Li p6-hw-int-rx
+.Pq Event C8H
+Count the number of hardware interrupts received.
+.It Li p6-ifu-ifetch
+.Pq Event 80H
+Count the number of instruction fetches, both cacheable and non-cacheable.
+.It Li p6-ifu-ifetch-miss
+.Pq Event 81H
+Count the number of instruction fetch misses (i.e., those that produce
+memory accesses).
+.It Li p6-ifu-mem-stall
+.Pq Event 86H
+Count the number of cycles instruction fetch is stalled for any reason.
+.It Li p6-ild-stall
+.Pq Event 87H
+Count the number of cycles the instruction length decoder is stalled.
+.It Li p6-inst-decoded
+.Pq Event D0H
+Count the number of instructions decoded.
+.It Li p6-inst-retired
+.Pq Event C0H
+Count the number of instructions retired.
+.It Li p6-itlb-miss
+.Pq Event 85H
+Count the number of instruction TLB misses.
+.It Li p6-l2-ads
+.Pq Event 21H
+Count the number of L2 address strobes.
+.It Li p6-l2-dbus-busy
+.Pq Event 22H
+Count the number of cycles during which the L2 cache data bus was busy.
+.It Li p6-l2-dbus-busy-rd
+.Pq Event 23H
+Count the number of cycles during which the L2 cache data bus was busy
+transferring read data from L2 to the processor.
+.It Li p6-l2-ifetch Op Li ,umask= Ns Ar qualifier
+.Pq Event 28H
+Count the number of L2 instruction fetches.
+An additional qualifier may be specified and comprises a list of the following
+keywords separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li e
+Count operations affecting E (exclusive) state lines.
+.It Li i
+Count operations affecting I (invalid) state lines.
+.It Li m
+Count operations affecting M (modified) state lines.
+.It Li s
+Count operations affecting S (shared) state lines.
+.El
+.Pp
+The default is to count operations affecting all (MESI) state lines.
+.It Li p6-l2-ld Op Li ,umask= Ns Ar qualifier
+.Pq Event 29H
+Count the number of L2 data loads.
+An additional qualifier may be specified and comprises a list of the following
+keywords separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li both
+.Pq Tn "Pentium M"
+Count both hardware-prefetched lines and non-hardware-prefetched lines.
+.It Li e
+Count operations affecting E (exclusive) state lines.
+.It Li hw
+.Pq Tn "Pentium M"
+Count hardware-prefetched lines only.
+.It Li i
+Count operations affecting I (invalid) state lines.
+.It Li m
+Count operations affecting M (modified) state lines.
+.It Li nonhw
+.Pq Tn "Pentium M"
+Exclude hardware-prefetched lines.
+.It Li s
+Count operations affecting S (shared) state lines.
+.El
+.Pp
+The default on processors other than
+.Tn "Pentium M"
+processors is to count operations affecting all (MESI) state lines.
+The default on
+.Tn "Pentium M"
+processors is to count both hardware-prefetched and
+non-hardware-prefetch operations on all (MESI) state lines.
+.Pq Errata
+This event is affected by processor errata E53.
+.It Li p6-l2-lines-in Op Li ,umask= Ns Ar qualifier
+.Pq Event 24H
+Count the number of L2 lines allocated.
+An additional qualifier may be specified and comprises a list of the following
+keywords separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li both
+.Pq Tn "Pentium M"
+Count both hardware-prefetched lines and non-hardware-prefetched lines.
+.It Li e
+Count operations affecting E (exclusive) state lines.
+.It Li hw
+.Pq Tn "Pentium M"
+Count hardware-prefetched lines only.
+.It Li i
+Count operations affecting I (invalid) state lines.
+.It Li m
+Count operations affecting M (modified) state lines.
+.It Li nonhw
+.Pq Tn "Pentium M"
+Exclude hardware-prefetched lines.
+.It Li s
+Count operations affecting S (shared) state lines.
+.El
+.Pp
+The default on processors other than
+.Tn "Pentium M"
+processors is to count operations affecting all (MESI) state lines.
+The default on
+.Tn "Pentium M"
+processors is to count both hardware-prefetched and
+non-hardware-prefetch operations on all (MESI) state lines.
+.Pq Errata
+This event is affected by processor errata E45.
+.It Li p6-l2-lines-out Op Li ,umask= Ns Ar qualifier
+.Pq Event 26H
+Count the number of L2 lines evicted.
+An additional qualifier may be specified and comprises a list of the following
+keywords separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li both
+.Pq Tn "Pentium M"
+Count both hardware-prefetched lines and non-hardware-prefetched lines.
+.It Li e
+Count operations affecting E (exclusive) state lines.
+.It Li hw
+.Pq Tn "Pentium M"
+Count hardware-prefetched lines only.
+.It Li i
+Count operations affecting I (invalid) state lines.
+.It Li m
+Count operations affecting M (modified) state lines.
+.It Li nonhw
+.Pq Tn "Pentium M" only
+Exclude hardware-prefetched lines.
+.It Li s
+Count operations affecting S (shared) state lines.
+.El
+.Pp
+The default on processors other than
+.Tn "Pentium M"
+processors is to count operations affecting all (MESI) state lines.
+The default on
+.Tn "Pentium M"
+processors is to count both hardware-prefetched and
+non-hardware-prefetch operations on all (MESI) state lines.
+.Pq Errata
+This event is affected by processor errata E45.
+.It Li p6-l2-m-lines-inm
+.Pq Event 25H
+Count the number of modified lines allocated in L2 cache.
+.It Li p6-l2-m-lines-outm Op Li ,umask= Ns Ar qualifier
+.Pq Event 27H
+Count the number of L2 M-state lines evicted.
+.Pp
+.Pq Tn "Pentium M"
+On these processors an additional qualifier may be specified and
+comprises a list of the following keywords separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li both
+Count both hardware-prefetched lines and non-hardware-prefetched lines.
+.It Li hw
+Count hardware-prefetched lines only.
+.It Li nonhw
+Exclude hardware-prefetched lines.
+.El
+.Pp
+The default is to count both hardware-prefetched and
+non-hardware-prefetch operations.
+.Pq Errata
+This event is affected by processor errata E53.
+.It Li p6-l2-rqsts Op Li ,umask= Ns Ar qualifier
+.Pq Event 2EH
+Count the total number of L2 requests.
+An additional qualifier may be specified and comprises a list of the following
+keywords separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li e
+Count operations affecting E (exclusive) state lines.
+.It Li i
+Count operations affecting I (invalid) state lines.
+.It Li m
+Count operations affecting M (modified) state lines.
+.It Li s
+Count operations affecting S (shared) state lines.
+.El
+.Pp
+The default is to count operations affecting all (MESI) state lines.
+.It Li p6-l2-st
+.Pq Event 2AH
+Count the number of L2 data stores.
+An additional qualifier may be specified and comprises a list of the following
+keywords separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li e
+Count operations affecting E (exclusive) state lines.
+.It Li i
+Count operations affecting I (invalid) state lines.
+.It Li m
+Count operations affecting M (modified) state lines.
+.It Li s
+Count operations affecting S (shared) state lines.
+.El
+.Pp
+The default is to count operations affecting all (MESI) state lines.
+.It Li p6-ld-blocks
+.Pq Event 03H
+Count the number of load operations delayed due to store buffer blocks.
+.It Li p6-misalign-mem-ref
+.Pq Event 05H
+Count the number of misaligned data memory references (crossing a 64
+bit boundary).
+.It Li p6-mmx-assist
+.Pq Event CDH , Tn "Pentium II" , Tn "Pentium III"
+Count the number of MMX assists executed.
+.It Li p6-mmx-instr-exec
+.Pq Event B0H
+.Pq Tn Celeron , Tn "Pentium II"
+Count the number of MMX instructions executed, except MOVQ and MOVD
+stores from register to memory.
+.It Li p6-mmx-instr-ret
+.Pq Event CEH , Tn "Pentium II"
+Count the number of MMX instructions retired.
+.It Li p6-mmx-instr-type-exec Op Li ,umask= Ns Ar qualifier
+.Pq Event B3H , Tn "Pentium II" , Tn "Pentium III"
+Count the number of MMX instructions executed.
+An additional qualifier may be specified and comprises a list of
+the following keywords separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li pack
+Count MMX pack operation instructions.
+.It Li packed-arithmetic
+Count MMX packed arithmetic instructions.
+.It Li packed-logical
+Count MMX packed logical instructions.
+.It Li packed-multiply
+Count MMX packed multiply instructions.
+.It Li packed-shift
+Count MMX packed shift instructions.
+.It Li unpack
+Count MMX unpack operation instructions.
+.El
+.Pp
+The default is to count all operations.
+.It Li p6-mmx-sat-instr-exec
+.Pq Event B1H , Tn "Pentium II" , Tn "Pentium III"
+Count the number of MMX saturating instructions executed.
+.It Li p6-mmx-uops-exec
+.Pq Event B2H , Tn "Pentium II" , Tn "Pentium III"
+Count the number of MMX micro-ops executed.
+.It Li p6-mul
+.Pq Event 12H
+Count the number of integer and floating-point multiplies, including
+speculative multiplies.
+This event is only allocated on counter 1.
+.It Li p6-partial-rat-stalls
+.Pq Event D2H
+Count the number of cycles or events for partial stalls.
+.It Li p6-resource-stalls
+.Pq Event A2H
+Count the number of cycles there was a resource related stall of any kind.
+.It Li p6-ret-seg-renames
+.Pq Event D6H , Tn "Pentium II" , Tn "Pentium III"
+Count the number of segment register rename events retired.
+.It Li p6-sb-drains
+.Pq Event 04H
+Count the number of cycles the store buffer is draining.
+.It Li p6-seg-reg-renames Op Li ,umask= Ns Ar qualifier
+.Pq Event D5H , Tn "Pentium II" , Tn "Pentium III"
+Count the number of segment register renames.
+An additional qualifier may be specified, and comprises a list of the
+following keywords separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li ds
+Count renames for segment register DS.
+.It Li es
+Count renames for segment register ES.
+.It Li fs
+Count renames for segment register FS.
+.It Li gs
+Count renames for segment register GS.
+.El
+.Pp
+The default is to count operations affecting all segment registers.
+.It Li p6-seg-rename-stalls
+.Pq Event D4H , Tn "Pentium II" , Tn "Pentium III"
+Count the number of segment register renaming stalls.
+An additional qualifier may be specified, and comprises a list of the
+following keywords separated by
+.Ql +
+characters:
+.Pp
+.Bl -tag -width indent -compact
+.It Li ds
+Count stalls for segment register DS.
+.It Li es
+Count stalls for segment register ES.
+.It Li fs
+Count stalls for segment register FS.
+.It Li gs
+Count stalls for segment register GS.
+.El
+.Pp
+The default is to count operations affecting all the segment registers.
+.It Li p6-segment-reg-loads
+.Pq Event 06H
+Count the number of segment register loads.
+.It Li p6-uops-retired
+.Pq Event C2H
+Count the number of micro-ops retired.
+.El
+.Ss Event Name Aliases
+The following table shows the mapping between the PMC-independent
+aliases supported by
+.Lb libpmc
+and the underlying hardware events used.
+.Bl -column "branch-mispredicts" "Description"
+.It Em Alias Ta Em Event
+.It Li branches Ta Li p6-br-inst-retired
+.It Li branch-mispredicts Ta Li p6-br-miss-pred-retired
+.It Li dc-misses Ta Li p6-dcu-lines-in
+.It Li ic-misses Ta Li p6-ifu-fetch-miss
+.It Li instructions Ta Li p6-inst-retired
+.It Li interrupts Ta Li p6-hw-int-rx
+.It Li unhalted-cycles Ta Li p6-cpu-clk-unhalted
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.core2 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
diff --git a/lib/libpmc/pmc.tsc.3 b/lib/libpmc/pmc.tsc.3
new file mode 100644
index 0000000..144ff35
--- /dev/null
+++ b/lib/libpmc/pmc.tsc.3
@@ -0,0 +1,83 @@
+.\" Copyright (c) 2003-2008 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd October 4, 2008
+.Dt PMC.TSC 3
+.Os
+.Sh NAME
+.Nm pmc.tsc
+.Nd measurements using the i386 timestamp counter
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+In the i386 architecture, the timestamp counter is a monotonically
+non-decreasing counter that counts processor cycles.
+.Pp
+This counter may be selected specifying an event specifier
+.Dq Li tsc
+to
+.Xr pmc_allocate 3 .
+The TSC is a read-only counter that may only be allocated in
+system-wide counting mode.
+The
+.Dq Li tsc
+event does not support further event qualifiers.
+.Pp
+Multiple processes are allowed to allocate the TSC.
+Once allocated, the TSC may be read using the
+.Fn pmc_read
+function, or by using the
+.Li RDTSC
+instruction.
+.Ss Event Name Aliases
+The alias
+.Dq Li cycles
+maps to the TSC.
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.core2 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
diff --git a/lib/libpmc/pmc.ucf.3 b/lib/libpmc/pmc.ucf.3
new file mode 100644
index 0000000..5155eb6
--- /dev/null
+++ b/lib/libpmc/pmc.ucf.3
@@ -0,0 +1,113 @@
+.\" Copyright (c) 2010 Fabien Thomas.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd March 30, 2010
+.Dt PMC.UCF 3
+.Os
+.Sh NAME
+.Nm pmc.ucf
+.Nd measurement events for
+.Tn Intel
+uncore fixed function performance counters.
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+Each fixed-function PMC measures a specific hardware event.
+The number of fixed-function PMCs implemented in a CPU can vary.
+The number of fixed-function PMCs present can be determined at runtime
+by using function
+.Xr pmc_cpuinfo 3 .
+.Pp
+Intel uncore fixed-function PMCs are documented in
+.Rs
+.%B "Intel(R) 64 and IA-32 Architectures Software Developes Manual"
+.%T "Volume 3B: System Programming Guide, Part 2"
+.%N "Order Number: 253669-033US"
+.%D December 2009
+.%Q "Intel Corporation"
+.Re
+.Pp
+.Ss PMC Capabilities
+Fixed-function PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta \&No
+.It PMC_CAP_INTERRUPT Ta \&No
+.It PMC_CAP_INVERT Ta \&No
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta \&No
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta \&No
+.It PMC_CAP_USER Ta \&No
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Class Name Prefix
+These PMCs are named using a class name prefix of
+.Dq Li ucf- .
+.Ss Event Specifiers (Fixed Function PMCs)
+The fixed function PMCs are selectable using the following
+event names:
+.Bl -tag -width indent
+.It Li UCLOCK
+.Pq Fixed Function Counter 0
+The fixed-function uncore counter increments at the rate of the U-clock.
+The frequency of the uncore clock domain can be determined from the uncore
+clock ratio which is available in the PCI configuration space register at
+offset C0H under device number 0 and Function 0.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.core2 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.corei7 3 ,
+.Xr pmc.corei7uc 3 ,
+.Xr pmc.westmere 3 ,
+.Xr pmc.westmereuc 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
diff --git a/lib/libpmc/pmc.westmere.3 b/lib/libpmc/pmc.westmere.3
new file mode 100644
index 0000000..bd0244e
--- /dev/null
+++ b/lib/libpmc/pmc.westmere.3
@@ -0,0 +1,1329 @@
+.\" Copyright (c) 2010 Fabien Thomas.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd March 24, 2010
+.Dt PMC.WESTMERE 3
+.Os
+.Sh NAME
+.Nm pmc.westmere
+.Nd measurement events for
+.Tn Intel
+.Tn Westmere
+family CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+.Tn Intel
+.Tn "Westmere"
+CPUs contain PMCs conforming to version 2 of the
+.Tn Intel
+performance measurement architecture.
+These CPUs may contain up to three classes of PMCs:
+.Bl -tag -width "Li PMC_CLASS_IAP"
+.It Li PMC_CLASS_IAF
+Fixed-function counters that count only one hardware event per counter.
+.It Li PMC_CLASS_IAP
+Programmable counters that may be configured to count one of a defined
+set of hardware events.
+.El
+.Pp
+The number of PMCs available in each class and their widths need to be
+determined at run time by calling
+.Xr pmc_cpuinfo 3 .
+.Pp
+Intel Westmere PMCs are documented in
+.Rs
+.%B "Intel(R) 64 and IA-32 Architectures Software Developes Manual"
+.%T "Volume 3B: System Programming Guide, Part 2"
+.%N "Order Number: 253669-033US"
+.%D December 2009
+.%Q "Intel Corporation"
+.Re
+.Ss WESTMERE FIXED FUNCTION PMCS
+These PMCs and their supported events are documented in
+.Xr pmc.iaf 3 .
+.Ss WESTMERE PROGRAMMABLE PMCS
+The programmable PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_INTERRUPT Ta Yes
+.It PMC_CAP_INVERT Ta Yes
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta Yes
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta Yes
+.It PMC_CAP_USER Ta Yes
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+Event specifiers for these PMCs support the following common
+qualifiers:
+.Bl -tag -width indent
+.It Li rsp= Ns Ar value
+Configure the Off-core Response bits.
+.Bl -tag -width indent
+.It Li DMND_DATA_RD
+Counts the number of demand and DCU prefetch data reads of full
+and partial cachelines as well as demand data page table entry
+cacheline reads. Does not count L2 data read prefetches or
+instruction fetches.
+.It Li DMND_RFO
+Counts the number of demand and DCU prefetch reads for ownership
+(RFO) requests generated by a write to data cacheline. Does not
+count L2 RFO.
+.It Li DMND_IFETCH
+Counts the number of demand and DCU prefetch instruction cacheline
+reads. Does not count L2 code read prefetches.
+WB
+Counts the number of writeback (modified to exclusive) transactions.
+.It Li PF_DATA_RD
+Counts the number of data cacheline reads generated by L2 prefetchers.
+.It Li PF_RFO
+Counts the number of RFO requests generated by L2 prefetchers.
+.It Li PF_IFETCH
+Counts the number of code reads generated by L2 prefetchers.
+.It Li OTHER
+Counts one of the following transaction types, including L3 invalidate,
+I/O, full or partial writes, WC or non-temporal stores, CLFLUSH, Fences,
+lock, unlock, split lock.
+.It Li UNCORE_HIT
+L3 Hit: local or remote home requests that hit L3 cache in the uncore
+with no coherency actions required (snooping).
+.It Li OTHER_CORE_HIT_SNP
+L3 Hit: local or remote home requests that hit L3 cache in the uncore
+and was serviced by another core with a cross core snoop where no modified
+copies were found (clean).
+.It Li OTHER_CORE_HITM
+L3 Hit: local or remote home requests that hit L3 cache in the uncore
+and was serviced by another core with a cross core snoop where modified
+copies were found (HITM).
+.It Li REMOTE_CACHE_FWD
+L3 Miss: local homed requests that missed the L3 cache and was serviced
+by forwarded data following a cross package snoop where no modified
+copies found. (Remote home requests are not counted)
+.It Li REMOTE_DRAM
+L3 Miss: remote home requests that missed the L3 cache and were serviced
+by remote DRAM.
+.It Li LOCAL_DRAM
+L3 Miss: local home requests that missed the L3 cache and were serviced
+by local DRAM.
+.It Li NON_DRAM
+Non-DRAM requests that were serviced by IOH.
+.El
+.It Li cmask= Ns Ar value
+Configure the PMC to increment only if the number of configured
+events measured in a cycle is greater than or equal to
+.Ar value .
+.It Li edge
+Configure the PMC to count the number of de-asserted to asserted
+transitions of the conditions expressed by the other qualifiers.
+If specified, the counter will increment only once whenever a
+condition becomes true, irrespective of the number of clocks during
+which the condition remains true.
+.It Li inv
+Invert the sense of comparison when the
+.Dq Li cmask
+qualifier is present, making the counter increment when the number of
+events per cycle is less than the value specified by the
+.Dq Li cmask
+qualifier.
+.It Li os
+Configure the PMC to count events happening at processor privilege
+level 0.
+.It Li usr
+Configure the PMC to count events occurring at privilege levels 1, 2
+or 3.
+.El
+.Pp
+If neither of the
+.Dq Li os
+or
+.Dq Li usr
+qualifiers are specified, the default is to enable both.
+.Ss Event Specifiers (Programmable PMCs)
+Westmere programmable PMCs support the following events:
+.Bl -tag -width indent
+.It Li LOAD_BLOCK.OVERLAP_STORE
+.Pq Event 03H , Umask 02H
+Loads that partially overlap an earlier store
+.It Li SB_DRAIN.ANY
+.Pq Event 04H , Umask 07H
+All Store buffer stall cycles
+.It Li MISALIGN_MEMORY.STORE
+.Pq Event 05H , Umask 02H
+All store referenced with misaligned address
+.It Li STORE_BLOCKS.AT_RET
+.Pq Event 06H , Umask 04H
+Counts number of loads delayed with at-Retirement block code. The following
+loads need to be executed at retirement and wait for all senior stores on
+the same thread to be drained: load splitting across 4K boundary (page
+split), load accessing uncacheable (UC or USWC) memory, load lock, and load
+with page table in UC or USWC memory region.
+.It Li STORE_BLOCKS.L1D_BLOCK
+.Pq Event 06H , Umask 08H
+Cacheable loads delayed with L1D block code
+.It Li PARTIAL_ADDRESS_ALIAS
+.Pq Event 07H , Umask 01H
+Counts false dependency due to partial address aliasing
+.It Li DTLB_LOAD_MISSES.ANY
+.Pq Event 08H , Umask 01H
+Counts all load misses that cause a page walk
+.It Li DTLB_LOAD_MISSES.WALK_COMPLETED
+.Pq Event 08H , Umask 02H
+Counts number of completed page walks due to load miss in the STLB.
+.It Li DTLB_LOAD_MISSES.WALK_CYCLES
+.Pq Event 08H , Umask 04H
+Cycles PMH is busy with a page walk due to a load miss in the STLB.
+.It Li DTLB_LOAD_MISSES.STLB_HIT
+.Pq Event 08H , Umask 10H
+Number of cache load STLB hits
+.It Li DTLB_LOAD_MISSES.PDE_MISS
+.Pq Event 08H , Umask 20H
+Number of DTLB cache load misses where the low part of the linear to
+physical address translation was missed.
+.It Li MEM_INST_RETIRED.LOADS
+.Pq Event 0BH , Umask 01H
+Counts the number of instructions with an architecturally-visible store
+retired on the architected path.
+In conjunction with ld_lat facility
+.It Li MEM_INST_RETIRED.STORES
+.Pq Event 0BH , Umask 02H
+Counts the number of instructions with an architecturally-visible store
+retired on the architected path.
+In conjunction with ld_lat facility
+.It Li MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD
+.Pq Event 0BH , Umask 10H
+Counts the number of instructions exceeding the latency specified with
+ld_lat facility.
+In conjunction with ld_lat facility
+.It Li MEM_STORE_RETIRED.DTLB_MISS
+.Pq Event 0CH , Umask 01H
+The event counts the number of retired stores that missed the DTLB. The DTLB
+miss is not counted if the store operation causes a fault. Does not counter
+prefetches. Counts both primary and secondary misses to the TLB
+.It Li UOPS_ISSUED.ANY
+.Pq Event 0EH , Umask 01H
+Counts the number of Uops issued by the Register Allocation Table to the
+Reservation Station, i.e. the UOPs issued from the front end to the back
+end.
+.It Li UOPS_ISSUED.STALLED_CYCLES
+.Pq Event 0EH , Umask 01H
+Counts the number of cycles no Uops issued by the Register Allocation Table
+to the Reservation Station, i.e. the UOPs issued from the front end to the
+back end.
+set invert=1, cmask = 1
+.It Li UOPS_ISSUED.FUSED
+.Pq Event 0EH , Umask 02H
+Counts the number of fused Uops that were issued from the Register
+Allocation Table to the Reservation Station.
+.It Li MEM_UNCORE_RETIRED.LOCAL_HITM
+.Pq Event 0FH , Umask 02H
+Load instructions retired that HIT modified data in sibling core (Precise
+Event)
+.It Li MEM_UNCORE_RETIRED.LOCAL_DRAM_AND_REMOTE_CACHE_HIT
+.Pq Event 0FH , Umask 08H
+Load instructions retired local dram and remote cache HIT data sources
+(Precise Event)
+.It Li MEM_UNCORE_RETIRED.LOCAL_DRAM
+.Pq Event 0FH , Umask 10H
+Load instructions retired with a data source of local DRAM or locally homed
+remote cache HITM (Precise Event)
+.It Li MEM_UNCORE_RETIRED.REMOTE_DRAM
+.Pq Event 0FH , Umask 20H
+Load instructions retired remote DRAM and remote home-remote cache HITM
+(Precise Event)
+.It Li MEM_UNCORE_RETIRED.UNCACHEABLE
+.Pq Event 0FH , Umask 80H
+Load instructions retired I/O (Precise Event)
+.It Li FP_COMP_OPS_EXE.X87
+.Pq Event 10H , Umask 01H
+Counts the number of FP Computational Uops Executed. The number of FADD,
+FSUB, FCOM, FMULs, integer MULsand IMULs, FDIVs, FPREMs, FSQRTS, integer
+DIVs, and IDIVs. This event does not distinguish an FADD used in the middle
+of a transcendental flow from a separate FADD instruction.
+.It Li FP_COMP_OPS_EXE.MMX
+.Pq Event 10H , Umask 02H
+Counts number of MMX Uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_FP
+.Pq Event 10H , Umask 04H
+Counts number of SSE and SSE2 FP uops executed.
+.It Li FP_COMP_OPS_EXE.SSE2_INTEGER
+.Pq Event 10H , Umask 08H
+Counts number of SSE2 integer uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_FP_PACKED
+.Pq Event 10H , Umask 10H
+Counts number of SSE FP packed uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_FP_SCALAR
+.Pq Event 10H , Umask 20H
+Counts number of SSE FP scalar uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION
+.Pq Event 10H , Umask 40H
+Counts number of SSE* FP single precision uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION
+.Pq Event 10H , Umask 80H
+Counts number of SSE* FP double precision uops executed.
+.It Li SIMD_INT_128.PACKED_MPY
+.Pq Event 12H , Umask 01H
+Counts number of 128 bit SIMD integer multiply operations.
+.It Li SIMD_INT_128.PACKED_SHIFT
+.Pq Event 12H , Umask 02H
+Counts number of 128 bit SIMD integer shift operations.
+.It Li SIMD_INT_128.PACK
+.Pq Event 12H , Umask 04H
+Counts number of 128 bit SIMD integer pack operations.
+.It Li SIMD_INT_128.UNPACK
+.Pq Event 12H , Umask 08H
+Counts number of 128 bit SIMD integer unpack operations.
+.It Li SIMD_INT_128.PACKED_LOGICAL
+.Pq Event 12H , Umask 10H
+Counts number of 128 bit SIMD integer logical operations.
+.It Li SIMD_INT_128.PACKED_ARITH
+.Pq Event 12H , Umask 20H
+Counts number of 128 bit SIMD integer arithmetic operations.
+.It Li SIMD_INT_128.SHUFFLE_MOVE
+.Pq Event 12H , Umask 40H
+Counts number of 128 bit SIMD integer shuffle and move operations.
+.It Li LOAD_DISPATCH.RS
+.Pq Event 13H , Umask 01H
+Counts number of loads dispatched from the Reservation Station that bypass
+the Memory Order Buffer.
+.It Li LOAD_DISPATCH.RS_DELAYED
+.Pq Event 13H , Umask 02H
+Counts the number of delayed RS dispatches at the stage latch. If an RS
+dispatch can not bypass to LB, it has another chance to dispatch from the
+one-cycle delayed staging latch before it is written into the LB.
+.It Li LOAD_DISPATCH.MOB
+.Pq Event 13H , Umask 04H
+Counts the number of loads dispatched from the Reservation Station to the
+Memory Order Buffer.
+.It Li LOAD_DISPATCH.ANY
+.Pq Event 13H , Umask 07H
+Counts all loads dispatched from the Reservation Station.
+.It Li ARITH.CYCLES_DIV_BUSY
+.Pq Event 14H , Umask 01H
+Counts the number of cycles the divider is busy executing divide or square
+root operations. The divide can be integer, X87 or Streaming SIMD Extensions
+(SSE). The square root operation can be either X87 or SSE.
+Set 'edge =1, invert=1, cmask=1' to count the number of divides.
+Count may be incorrect When SMT is on
+.It Li ARITH.MUL
+.Pq Event 14H , Umask 02H
+Counts the number of multiply operations executed. This includes integer as
+well as floating point multiply operations but excludes DPPS mul and MPSAD.
+Count may be incorrect When SMT is on
+.It Li INST_QUEUE_WRITES
+.Pq Event 17H , Umask 01H
+Counts the number of instructions written into the instruction queue every
+cycle.
+.It Li INST_DECODED.DEC0
+.Pq Event 18H , Umask 01H
+Counts number of instructions that require decoder 0 to be decoded. Usually,
+this means that the instruction maps to more than 1 uop
+.It Li TWO_UOP_INSTS_DECODED
+.Pq Event 19H , Umask 01H
+An instruction that generates two uops was decoded
+.It Li INST_QUEUE_WRITE_CYCLES
+.Pq Event 1EH , Umask 01H
+This event counts the number of cycles during which instructions are written
+to the instruction queue. Dividing this counter by the number of
+instructions written to the instruction queue (INST_QUEUE_WRITES) yields the
+average number of instructions decoded each cycle. If this number is less
+than four and the pipe stalls, this indicates that the decoder is failing to
+decode enough instructions per cycle to sustain the 4-wide pipeline.
+If SSE* instructions that are 6 bytes or longer arrive one after another,
+then front end throughput may limit execution speed. In such case,
+.It Li LSD_OVERFLOW
+.Pq Event 20H , Umask 01H
+Number of loops that can not stream from the instruction queue.
+.It Li L2_RQSTS.LD_HIT
+.Pq Event 24H , Umask 01H
+Counts number of loads that hit the L2 cache. L2 loads include both L1D
+demand misses as well as L1D prefetches. L2 loads can be rejected for
+various reasons. Only non rejected loads are counted.
+.It Li L2_RQSTS.LD_MISS
+.Pq Event 24H , Umask 02H
+Counts the number of loads that miss the L2 cache. L2 loads include both L1D
+demand misses as well as L1D prefetches.
+.It Li L2_RQSTS.LOADS
+.Pq Event 24H , Umask 03H
+Counts all L2 load requests. L2 loads include both L1D demand misses as well
+as L1D prefetches.
+.It Li L2_RQSTS.RFO_HIT
+.Pq Event 24H , Umask 04H
+Counts the number of store RFO requests that hit the L2 cache. L2 RFO
+requests include both L1D demand RFO misses as well as L1D RFO prefetches.
+Count includes WC memory requests, where the data is not fetched but the
+permission to write the line is required.
+.It Li L2_RQSTS.RFO_MISS
+.Pq Event 24H , Umask 08H
+Counts the number of store RFO requests that miss the L2 cache. L2 RFO
+requests include both L1D demand RFO misses as well as L1D RFO prefetches.
+.It Li L2_RQSTS.RFOS
+.Pq Event 24H , Umask 0CH
+Counts all L2 store RFO requests. L2 RFO requests include both L1D demand
+RFO misses as well as L1D RFO prefetches..
+.It Li L2_RQSTS.IFETCH_HIT
+.Pq Event 24H , Umask 10H
+Counts number of instruction fetches that hit the L2 cache. L2 instruction
+fetches include both L1I demand misses as well as L1I instruction
+prefetches.
+.It Li L2_RQSTS.IFETCH_MISS
+.Pq Event 24H , Umask 20H
+Counts number of instruction fetches that miss the L2 cache. L2 instruction
+fetches include both L1I demand misses as well as L1I instruction
+prefetches.
+.It Li L2_RQSTS.IFETCHES
+.Pq Event 24H , Umask 30H
+Counts all instruction fetches. L2 instruction fetches include both L1I
+demand misses as well as L1I instruction prefetches.
+.It Li L2_RQSTS.PREFETCH_HIT
+.Pq Event 24H , Umask 40H
+Counts L2 prefetch hits for both code and data.
+.It Li L2_RQSTS.PREFETCH_MISS
+.Pq Event 24H , Umask 80H
+Counts L2 prefetch misses for both code and data.
+.It Li L2_RQSTS.PREFETCHES
+.Pq Event 24H , Umask C0H
+Counts all L2 prefetches for both code and data.
+.It Li L2_RQSTS.MISS
+.Pq Event 24H , Umask AAH
+Counts all L2 misses for both code and data.
+.It Li L2_RQSTS.REFERENCES
+.Pq Event 24H , Umask FFH
+Counts all L2 requests for both code and data.
+.It Li L2_DATA_RQSTS.DEMAND.I_STATE
+.Pq Event 26H , Umask 01H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the I (invalid) state, i.e. a cache miss. L2 demand loads are both L1D
+demand misses and L1D prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.S_STATE
+.Pq Event 26H , Umask 02H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the S (shared) state. L2 demand loads are both L1D demand misses and L1D
+prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.E_STATE
+.Pq Event 26H , Umask 04H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the E (exclusive) state. L2 demand loads are both L1D demand misses and
+L1D prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.M_STATE
+.Pq Event 26H , Umask 08H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the M (modified) state. L2 demand loads are both L1D demand misses and
+L1D prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.MESI
+.Pq Event 26H , Umask 0FH
+Counts all L2 data demand requests. L2 demand loads are both L1D demand
+misses and L1D prefetches.
+.It Li L2_DATA_RQSTS.PREFETCH.I_STATE
+.Pq Event 26H , Umask 10H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the I (invalid) state, i.e. a cache miss.
+.It Li L2_DATA_RQSTS.PREFETCH.S_STATE
+.Pq Event 26H , Umask 20H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the S (shared) state. A prefetch RFO will miss on an S state line, while
+a prefetch read will hit on an S state line.
+.It Li L2_DATA_RQSTS.PREFETCH.E_STATE
+.Pq Event 26H , Umask 40H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the E (exclusive) state.
+.It Li L2_DATA_RQSTS.PREFETCH.M_STATE
+.Pq Event 26H , Umask 80H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the M (modified) state.
+.It Li L2_DATA_RQSTS.PREFETCH.MESI
+.Pq Event 26H , Umask F0H
+Counts all L2 prefetch requests.
+.It Li L2_DATA_RQSTS.ANY
+.Pq Event 26H , Umask FFH
+Counts all L2 data requests.
+.It Li L2_WRITE.RFO.I_STATE
+.Pq Event 27H , Umask 01H
+Counts number of L2 demand store RFO requests where the cache line to be
+loaded is in the I (invalid) state, i.e, a cache miss. The L1D prefetcher
+does not issue a RFO prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.S_STATE
+.Pq Event 27H , Umask 02H
+Counts number of L2 store RFO requests where the cache line to be loaded is
+in the S (shared) state. The L1D prefetcher does not issue a RFO prefetch,.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.M_STATE
+.Pq Event 27H , Umask 08H
+Counts number of L2 store RFO requests where the cache line to be loaded is
+in the M (modified) state. The L1D prefetcher does not issue a RFO prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.HIT
+.Pq Event 27H , Umask 0EH
+Counts number of L2 store RFO requests where the cache line to be loaded is
+in either the S, E or M states. The L1D prefetcher does not issue a RFO
+prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.MESI
+.Pq Event 27H , Umask 0FH
+Counts all L2 store RFO requests.The L1D prefetcher does not issue a RFO
+prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.LOCK.I_STATE
+.Pq Event 27H , Umask 10H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in the I (invalid) state, i.e. a cache miss.
+.It Li L2_WRITE.LOCK.S_STATE
+.Pq Event 27H , Umask 20H
+Counts number of L2 lock RFO requests where the cache line to be loaded is
+in the S (shared) state.
+.It Li L2_WRITE.LOCK.E_STATE
+.Pq Event 27H , Umask 40H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in the E (exclusive) state.
+.It Li L2_WRITE.LOCK.M_STATE
+.Pq Event 27H , Umask 80H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in the M (modified) state.
+.It Li L2_WRITE.LOCK.HIT
+.Pq Event 27H , Umask E0H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in either the S, E, or M state.
+.It Li L2_WRITE.LOCK.MESI
+.Pq Event 27H , Umask F0H
+Counts all L2 demand lock RFO requests.
+.It Li L1D_WB_L2.I_STATE
+.Pq Event 28H , Umask 01H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the I (invalid) state, i.e. a cache miss.
+.It Li L1D_WB_L2.S_STATE
+.Pq Event 28H , Umask 02H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the S state.
+.It Li L1D_WB_L2.E_STATE
+.Pq Event 28H , Umask 04H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the E (exclusive) state.
+.It Li L1D_WB_L2.M_STATE
+.Pq Event 28H , Umask 08H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the M (modified) state.
+.It Li L1D_WB_L2.MESI
+.Pq Event 28H , Umask 0FH
+Counts all L1 writebacks to the L2.
+.It Li L3_LAT_CACHE.REFERENCE
+.Pq Event 2EH , Umask 02H
+Counts uncore Last Level Cache references. Because cache hierarchy, cache
+sizes and other implementation-specific characteristics; value comparison to
+estimate performance differences is not recommended.
+see Table A-1
+.It Li L3_LAT_CACHE.MISS
+.Pq Event 2EH , Umask 01H
+Counts uncore Last Level Cache misses. Because cache hierarchy, cache sizes
+and other implementation-specific characteristics; value comparison to
+estimate performance differences is not recommended.
+see Table A-1
+.It Li CPU_CLK_UNHALTED.THREAD_P
+.Pq Event 3CH , Umask 00H
+Counts the number of thread cycles while the thread is not in a halt state.
+The thread enters the halt state when it is running the HLT instruction. The
+core frequency may change from time to time due to power or thermal
+throttling.
+see Table A-1
+.It Li CPU_CLK_UNHALTED.REF_P
+.Pq Event 3CH , Umask 01H
+Increments at the frequency of TSC when not halted.
+see Table A-1
+.It Li DTLB_MISSES.ANY
+.Pq Event 49H , Umask 01H
+Counts the number of misses in the STLB which causes a page walk.
+.It Li DTLB_MISSES.WALK_COMPLETED
+.Pq Event 49H , Umask 02H
+Counts number of misses in the STLB which resulted in a completed page walk.
+.It Li DTLB_MISSES.WALK_CYCLES
+.Pq Event 49H , Umask 04H
+Counts cycles of page walk due to misses in the STLB.
+.It Li DTLB_MISSES.STLB_HIT
+.Pq Event 49H , Umask 10H
+Counts the number of DTLB first level misses that hit in the second level
+TLB. This event is only relevant if the core contains multiple DTLB levels.
+.It Li DTLB_MISSES.LARGE_WALK_COMPLETED
+.Pq Event 49H , Umask 80H
+Counts number of completed large page walks due to misses in the STLB.
+.It Li LOAD_HIT_PRE
+.Pq Event 4CH , Umask 01H
+Counts load operations sent to the L1 data cache while a previous SSE
+prefetch instruction to the same cache line has started prefetching but has
+not yet finished.
+.It Li L1D_PREFETCH.REQUESTS
+.Pq Event 4EH , Umask 01H
+Counts number of hardware prefetch requests dispatched out of the prefetch
+FIFO.
+.It Li L1D_PREFETCH.MISS
+.Pq Event 4EH , Umask 02H
+Counts number of hardware prefetch requests that miss the L1D. There are two
+prefetchers in the L1D. A streamer, which predicts lines sequentially after
+this one should be fetched, and the IP prefetcher that remembers access
+patterns for the current instruction. The streamer prefetcher stops on an
+L1D hit, while the IP prefetcher does not.
+.It Li L1D_PREFETCH.TRIGGERS
+.Pq Event 4EH , Umask 04H
+Counts number of prefetch requests triggered by the Finite State Machine and
+pushed into the prefetch FIFO. Some of the prefetch requests are dropped due
+to overwrites or competition between the IP index prefetcher and streamer
+prefetcher. The prefetch FIFO contains 4 entries.
+.It Li EPT.WALK_CYCLES
+.Pq Event 4FH , Umask 10H
+Counts Extended Page walk cycles.
+.It Li L1D.REPL
+.Pq Event 51H , Umask 01H
+Counts the number of lines brought into the L1 data cache.
+Counter 0, 1 only
+.It Li L1D.M_REPL
+.Pq Event 51H , Umask 02H
+Counts the number of modified lines brought into the L1 data cache.
+Counter 0, 1 only
+.It Li L1D.M_EVICT
+.Pq Event 51H , Umask 04H
+Counts the number of modified lines evicted from the L1 data cache due to
+replacement.
+Counter 0, 1 only
+.It Li L1D.M_SNOOP_EVICT
+.Pq Event 51H , Umask 08H
+Counts the number of modified lines evicted from the L1 data cache due to
+snoop HITM intervention.
+Counter 0, 1 only
+.It Li L1D_CACHE_PREFETCH_LOCK_FB_HIT
+.Pq Event 52H , Umask 01H
+Counts the number of cacheable load lock speculated instructions accepted
+into the fill buffer.
+.It Li L1D_CACHE_LOCK_FB_HIT
+.Pq Event 53H , Umask 01H
+Counts the number of cacheable load lock speculated or retired instructions
+accepted into the fill buffer.
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA
+.Pq Event 60H , Umask 01H
+Counts weighted cycles of offcore demand data read requests. Does not
+include L2 prefetch requests.
+counter 0
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE
+.Pq Event 60H , Umask 02H
+Counts weighted cycles of offcore demand code read requests. Does not
+include L2 prefetch requests.
+counter 0
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO
+.Pq Event 60H , Umask 04H
+Counts weighted cycles of offcore demand RFO requests. Does not include L2
+prefetch requests.
+counter 0
+.It Li OFFCORE_REQUESTS_OUTSTANDING.ANY.READ
+.Pq Event 60H , Umask 08H
+Counts weighted cycles of offcore read requests of any kind. Include L2
+prefetch requests.
+counter 0
+.It Li CACHE_LOCK_CYCLES.L1D_L2
+.Pq Event 63H , Umask 01H
+Cycle count during which the L1D and L2 are locked. A lock is asserted when
+there is a locked memory access, due to uncacheable memory, a locked
+operation that spans two cache lines, or a page walk from an uncacheable
+page table.
+Counter 0, 1 only. L1D and L2 locks have a very high performance penalty and
+it is highly recommended to avoid such accesses.
+.It Li CACHE_LOCK_CYCLES.L1D
+.Pq Event 63H , Umask 02H
+Counts the number of cycles that cacheline in the L1 data cache unit is
+locked.
+Counter 0, 1 only.
+.It Li IO_TRANSACTIONS
+.Pq Event 6CH , Umask 01H
+Counts the number of completed I/O transactions.
+.It Li L1I.HITS
+.Pq Event 80H , Umask 01H
+Counts all instruction fetches that hit the L1 instruction cache.
+.It Li L1I.MISSES
+.Pq Event 80H , Umask 02H
+Counts all instruction fetches that miss the L1I cache. This includes
+instruction cache misses, streaming buffer misses, victim cache misses and
+uncacheable fetches. An instruction fetch miss is counted only once and not
+once for every cycle it is outstanding.
+.It Li L1I.READS
+.Pq Event 80H , Umask 03H
+Counts all instruction fetches, including uncacheable fetches that bypass
+the L1I.
+.It Li L1I.CYCLES_STALLED
+.Pq Event 80H , Umask 04H
+Cycle counts for which an instruction fetch stalls due to a L1I cache miss,
+ITLB miss or ITLB fault.
+.It Li LARGE_ITLB.HIT
+.Pq Event 82H , Umask 01H
+Counts number of large ITLB hits.
+.It Li ITLB_MISSES.ANY
+.Pq Event 85H , Umask 01H
+Counts the number of misses in all levels of the ITLB which causes a page
+walk.
+.It Li ITLB_MISSES.WALK_COMPLETED
+.Pq Event 85H , Umask 02H
+Counts number of misses in all levels of the ITLB which resulted in a
+completed page walk.
+.It Li ITLB_MISSES.WALK_CYCLES
+.Pq Event 85H , Umask 04H
+Counts ITLB miss page walk cycles.
+.It Li ITLB_MISSES.LARGE_WALK_COMPLETED
+.Pq Event 85H , Umask 80H
+Counts number of completed large page walks due to misses in the STLB.
+.It Li ILD_STALL.LCP
+.Pq Event 87H , Umask 01H
+Cycles Instruction Length Decoder stalls due to length changing prefixes:
+66, 67 or REX.W (for EM64T) instructions which change the length of the
+decoded instruction.
+.It Li ILD_STALL.MRU
+.Pq Event 87H , Umask 02H
+Instruction Length Decoder stall cycles due to Brand Prediction Unit (PBU)
+Most Recently Used (MRU) bypass.
+.It Li ILD_STALL.IQ_FULL
+.Pq Event 87H , Umask 04H
+Stall cycles due to a full instruction queue.
+.It Li ILD_STALL.REGEN
+.Pq Event 87H , Umask 08H
+Counts the number of regen stalls.
+.It Li ILD_STALL.ANY
+.Pq Event 87H , Umask 0FH
+Counts any cycles the Instruction Length Decoder is stalled.
+.It Li BR_INST_EXEC.COND
+.Pq Event 88H , Umask 01H
+Counts the number of conditional near branch instructions executed, but not
+necessarily retired.
+.It Li BR_INST_EXEC.DIRECT
+.Pq Event 88H , Umask 02H
+Counts all unconditional near branch instructions excluding calls and
+indirect branches.
+.It Li BR_INST_EXEC.INDIRECT_NON_CALL
+.Pq Event 88H , Umask 04H
+Counts the number of executed indirect near branch instructions that are not
+calls.
+.It Li BR_INST_EXEC.NON_CALLS
+.Pq Event 88H , Umask 07H
+Counts all non call near branch instructions executed, but not necessarily
+retired.
+.It Li BR_INST_EXEC.RETURN_NEAR
+.Pq Event 88H , Umask 08H
+Counts indirect near branches that have a return mnemonic.
+.It Li BR_INST_EXEC.DIRECT_NEAR_CALL
+.Pq Event 88H , Umask 10H
+Counts unconditional near call branch instructions, excluding non call
+branch, executed.
+.It Li BR_INST_EXEC.INDIRECT_NEAR_CALL
+.Pq Event 88H , Umask 20H
+Counts indirect near calls, including both register and memory indirect,
+executed.
+.It Li BR_INST_EXEC.NEAR_CALLS
+.Pq Event 88H , Umask 30H
+Counts all near call branches executed, but not necessarily retired.
+.It Li BR_INST_EXEC.TAKEN
+.Pq Event 88H , Umask 40H
+Counts taken near branches executed, but not necessarily retired.
+.It Li BR_INST_EXEC.ANY
+.Pq Event 88H , Umask 7FH
+Counts all near executed branches (not necessarily retired). This includes
+only instructions and not micro-op branches. Frequent branching is not
+necessarily a major performance issue. However frequent branch
+mispredictions may be a problem.
+.It Li BR_MISP_EXEC.COND
+.Pq Event 89H , Umask 01H
+Counts the number of mispredicted conditional near branch instructions
+executed, but not necessarily retired.
+.It Li BR_MISP_EXEC.DIRECT
+.Pq Event 89H , Umask 02H
+Counts mispredicted macro unconditional near branch instructions, excluding
+calls and indirect branches (should always be 0).
+.It Li BR_MISP_EXEC.INDIRECT_NON_CALL
+.Pq Event 89H , Umask 04H
+Counts the number of executed mispredicted indirect near branch instructions
+that are not calls.
+.It Li BR_MISP_EXEC.NON_CALLS
+.Pq Event 89H , Umask 07H
+Counts mispredicted non call near branches executed, but not necessarily
+retired.
+.It Li BR_MISP_EXEC.RETURN_NEAR
+.Pq Event 89H , Umask 08H
+Counts mispredicted indirect branches that have a rear return mnemonic.
+.It Li BR_MISP_EXEC.DIRECT_NEAR_CALL
+.Pq Event 89H , Umask 10H
+Counts mispredicted non-indirect near calls executed, (should always be 0).
+.It Li BR_MISP_EXEC.INDIRECT_NEAR_CALL
+.Pq Event 89H , Umask 20H
+Counts mispredicted indirect near calls executed, including both register
+and memory indirect.
+.It Li BR_MISP_EXEC.NEAR_CALLS
+.Pq Event 89H , Umask 30H
+Counts all mispredicted near call branches executed, but not necessarily
+retired.
+.It Li BR_MISP_EXEC.TAKEN
+.Pq Event 89H , Umask 40H
+Counts executed mispredicted near branches that are taken, but not
+necessarily retired.
+.It Li BR_MISP_EXEC.ANY
+.Pq Event 89H , Umask 7FH
+Counts the number of mispredicted near branch instructions that were
+executed, but not necessarily retired.
+.It Li RESOURCE_STALLS.ANY
+.Pq Event A2H , Umask 01H
+Counts the number of Allocator resource related stalls. Includes register
+renaming buffer entries, memory buffer entries. In addition to resource
+related stalls, this event counts some other events. Includes stalls arising
+during branch misprediction recovery, such as if retirement of the
+mispredicted branch is delayed and stalls arising while store buffer is
+draining from synchronizing operations.
+Does not include stalls due to SuperQ (off core) queue full, too many cache
+misses, etc.
+.It Li RESOURCE_STALLS.LOAD
+.Pq Event A2H , Umask 02H
+Counts the cycles of stall due to lack of load buffer for load operation.
+.It Li RESOURCE_STALLS.RS_FULL
+.Pq Event A2H , Umask 04H
+This event counts the number of cycles when the number of instructions in
+the pipeline waiting for execution reaches the limit the processor can
+handle. A high count of this event indicates that there are long latency
+operations in the pipe (possibly load and store operations that miss the L2
+cache, or instructions dependent upon instructions further down the pipeline
+that have yet to retire.
+When RS is full, new instructions can not enter the reservation station and
+start execution.
+.It Li RESOURCE_STALLS.STORE
+.Pq Event A2H , Umask 08H
+This event counts the number of cycles that a resource related stall will
+occur due to the number of store instructions reaching the limit of the
+pipeline, (i.e. all store buffers are used). The stall ends when a store
+instruction commits its data to the cache or memory.
+.It Li RESOURCE_STALLS.ROB_FULL
+.Pq Event A2H , Umask 10H
+Counts the cycles of stall due to re- order buffer full.
+.It Li RESOURCE_STALLS.FPCW
+.Pq Event A2H , Umask 20H
+Counts the number of cycles while execution was stalled due to writing the
+floating-point unit (FPU) control word.
+.It Li RESOURCE_STALLS.MXCSR
+.Pq Event A2H , Umask 40H
+Stalls due to the MXCSR register rename occurring to close to a previous
+MXCSR rename. The MXCSR provides control and status for the MMX registers.
+.It Li RESOURCE_STALLS.OTHER
+.Pq Event A2H , Umask 80H
+Counts the number of cycles while execution was stalled due to other
+resource issues.
+.It Li MACRO_INSTS.FUSIONS_DECODED
+.Pq Event A6H , Umask 01H
+Counts the number of instructions decoded that are macro-fused but not
+necessarily executed or retired.
+.It Li BACLEAR_FORCE_IQ
+.Pq Event A7H , Umask 01H
+Counts number of times a BACLEAR was forced by the Instruction Queue. The IQ
+is also responsible for providing conditional branch prediction direction
+based on a static scheme and dynamic data provided by the L2 Branch
+Prediction Unit. If the conditional branch target is not found in the Target
+Array and the IQ predicts that the branch is taken, then the IQ will force
+the Branch Address Calculator to issue a BACLEAR. Each BACLEAR asserted by
+the BAC generates approximately an 8 cycle bubble in the instruction fetch
+pipeline.
+.It Li LSD.UOPS
+.Pq Event A8H , Umask 01H
+Counts the number of micro-ops delivered by loop stream detector
+Use cmask=1 and invert to count cycles
+.It Li ITLB_FLUSH
+.Pq Event AEH , Umask 01H
+Counts the number of ITLB flushes
+.It Li OFFCORE_REQUESTS.DEMAND.READ_DATA
+.Pq Event B0H , Umask 01H
+Counts number of offcore demand data read requests. Does not count L2
+prefetch requests.
+.It Li OFFCORE_REQUESTS.DEMAND.READ_CODE
+.Pq Event B0H , Umask 02H
+Counts number of offcore demand code read requests. Does not count L2
+prefetch requests.
+.It Li OFFCORE_REQUESTS.DEMAND.RFO
+.Pq Event B0H , Umask 04H
+Counts number of offcore demand RFO requests. Does not count L2 prefetch
+requests.
+.It Li OFFCORE_REQUESTS.ANY.READ
+.Pq Event B0H , Umask 08H
+Counts number of offcore read requests. Includes L2 prefetch requests.
+.It Li OFFCORE_REQUESTS.ANY.RFO
+.Pq Event 80H , Umask 10H
+Counts number of offcore RFO requests. Includes L2 prefetch requests.
+.It Li OFFCORE_REQUESTS.L1D_WRITEBACK
+.Pq Event B0H , Umask 40H
+Counts number of L1D writebacks to the uncore.
+.It Li OFFCORE_REQUESTS.ANY
+.Pq Event B0H , Umask 80H
+Counts all offcore requests.
+.It Li UOPS_EXECUTED.PORT0
+.Pq Event B1H , Umask 01H
+Counts number of Uops executed that were issued on port 0. Port 0 handles
+integer arithmetic, SIMD and FP add Uops.
+.It Li UOPS_EXECUTED.PORT1
+.Pq Event B1H , Umask 02H
+Counts number of Uops executed that were issued on port 1. Port 1 handles
+integer arithmetic, SIMD, integer shift, FP multiply and FP divide Uops.
+.It Li UOPS_EXECUTED.PORT2_CORE
+.Pq Event B1H , Umask 04H
+Counts number of Uops executed that were issued on port 2. Port 2 handles
+the load Uops. This is a core count only and can not be collected per
+thread.
+.It Li UOPS_EXECUTED.PORT3_CORE
+.Pq Event B1H , Umask 08H
+Counts number of Uops executed that were issued on port 3. Port 3 handles
+store Uops. This is a core count only and can not be collected per thread.
+.It Li UOPS_EXECUTED.PORT4_CORE
+.Pq Event B1H , Umask 10H
+Counts number of Uops executed that where issued on port 4. Port 4 handles
+the value to be stored for the store Uops issued on port 3. This is a core
+count only and can not be collected per thread.
+.It Li UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5
+.Pq Event B1H , Umask 1FH
+Counts number of cycles there are one or more uops being executed and were
+issued on ports 0-4. This is a core count only and can not be collected per
+thread.
+.It Li UOPS_EXECUTED.PORT5
+.Pq Event B1H , Umask 20H
+Counts number of Uops executed that where issued on port 5.
+.It Li UOPS_EXECUTED.CORE_ACTIVE_CYCLES
+.Pq Event B1H , Umask 3FH
+Counts number of cycles there are one or more uops being executed on any
+ports. This is a core count only and can not be collected per thread.
+.It Li UOPS_EXECUTED.PORT015
+.Pq Event B1H , Umask 40H
+Counts number of Uops executed that where issued on port 0, 1, or 5.
+use cmask=1, invert=1 to count stall cycles
+.It Li UOPS_EXECUTED.PORT234
+.Pq Event B1H , Umask 80H
+Counts number of Uops executed that where issued on port 2, 3, or 4.
+.It Li OFFCORE_REQUESTS_SQ_FULL
+.Pq Event B2H , Umask 01H
+Counts number of cycles the SQ is full to handle off-core requests.
+.It Li SNOOPQ_REQUESTS_OUTSTANDING.DATA
+.Pq Event B3H , Umask 01H
+Counts weighted cycles of snoopq requests for data. Counter 0 only
+Use cmask=1 to count cycles not empty.
+.It Li SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE
+.Pq Event B3H , Umask 02H
+Counts weighted cycles of snoopq invalidate requests. Counter 0 only
+Use cmask=1 to count cycles not empty.
+.It Li SNOOPQ_REQUESTS_OUTSTANDING.CODE
+.Pq Event B3H , Umask 04H
+Counts weighted cycles of snoopq requests for code. Counter 0 only
+Use cmask=1 to count cycles not empty.
+.It Li SNOOPQ_REQUESTS.CODE
+.Pq Event B4H , Umask 01H
+Counts the number of snoop code requests
+.It Li SNOOPQ_REQUESTS.DATA
+.Pq Event B4H , Umask 02H
+Counts the number of snoop data requests
+.It Li SNOOPQ_REQUESTS.INVALIDATE
+.Pq Event B4H , Umask 04H
+Counts the number of snoop invalidate requests
+.It Li OFF_CORE_RESPONSE_0
+.Pq Event B7H , Umask 01H
+see Section 30.6.1.3, Off-core Response Performance Monitoring in the
+Processor Core.
+Requires programming MSR 01A6H
+.It Li SNOOP_RESPONSE.HIT
+.Pq Event B8H , Umask 01H
+Counts HIT snoop response sent by this thread in response to a snoop
+request.
+.It Li SNOOP_RESPONSE.HITE
+.Pq Event B8H , Umask 02H
+Counts HIT E snoop response sent by this thread in response to a snoop
+request.
+.It Li SNOOP_RESPONSE.HITM
+.Pq Event B8H , Umask 04H
+Counts HIT M snoop response sent by this thread in response to a snoop
+request.
+.It Li OFF_CORE_RESPONSE_1
+.Pq Event BBH , Umask 01H
+see Section 30.6.1.3, Off-core Response Performance Monitoring in the
+Processor Core
+Use MSR 01A7H
+.It Li INST_RETIRED.ANY_P
+.Pq Event C0H , Umask 01H
+See Table A-1
+Notes: INST_RETIRED.ANY is counted by a designated fixed counter.
+INST_RETIRED.ANY_P is counted by a programmable counter and is an
+architectural performance event. Event is supported if CPUID.A.EBX[1] = 0.
+Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not
+count as retired instructions.
+.It Li INST_RETIRED.X87
+.Pq Event C0H , Umask 02H
+Counts the number of floating point computational operations retired
+floating point computational operations executed by the assist handler and
+sub-operations of complex floating point instructions like transcendental
+instructions.
+.It Li INST_RETIRED.MMX
+.Pq Event C0H , Umask 04H
+Counts the number of retired: MMX instructions.
+.It Li UOPS_RETIRED.ANY
+.Pq Event C2H , Umask 01H
+Counts the number of micro-ops retired, (macro-fused=1, micro- fused=2,
+others=1; maximum count of 8 per cycle). Most instructions are composed of
+one or two micro-ops. Some instructions are decoded into longer sequences
+such as repeat instructions, floating point transcendental instructions, and
+assists.
+Use cmask=1 and invert to count active cycles or stalled cycles
+.It Li UOPS_RETIRED.RETIRE_SLOTS
+.Pq Event C2H , Umask 02H
+Counts the number of retirement slots used each cycle
+.It Li UOPS_RETIRED.MACRO_FUSED
+.Pq Event C2H , Umask 04H
+Counts number of macro-fused uops retired.
+.It Li MACHINE_CLEARS.CYCLES
+.Pq Event C3H , Umask 01H
+Counts the cycles machine clear is asserted.
+.It Li MACHINE_CLEARS.MEM_ORDER
+.Pq Event C3H , Umask 02H
+Counts the number of machine clears due to memory order conflicts.
+.It Li MACHINE_CLEARS.SMC
+.Pq Event C3H , Umask 04H
+Counts the number of times that a program writes to a code section.
+Self-modifying code causes a sever penalty in all Intel 64 and IA-32
+processors. The modified cache line is written back to the L2 and L3caches.
+.It Li BR_INST_RETIRED.ALL_BRANCHES
+.Pq Event C4H , Umask 00H
+See Table A-1
+.It Li BR_INST_RETIRED.CONDITIONAL
+.Pq Event C4H , Umask 01H
+Counts the number of conditional branch instructions retired.
+.It Li BR_INST_RETIRED.NEAR_CALL
+.Pq Event C4H , Umask 02H
+Counts the number of direct & indirect near unconditional calls retired
+.It Li BR_INST_RETIRED.ALL_BRANCHES
+.Pq Event C4H , Umask 04H
+Counts the number of branch instructions retired
+.It Li BR_MISP_RETIRED.ALL_BRANCHES
+.Pq Event C5H , Umask 00H
+See Table A-1
+.It Li BR_MISP_RETIRED.CONDITIONAL
+.Pq Event C5H , Umask 01H
+Counts mispredicted conditional retired calls.
+.It Li BR_MISP_RETIRED.NEAR_CALL
+.Pq Event C5H , Umask 02H
+Counts mispredicted direct & indirect near unconditional retired calls.
+.It Li BR_MISP_RETIRED.ALL_BRANCHES
+.Pq Event C5H , Umask 04H
+Counts all mispredicted retired calls.
+.It Li SSEX_UOPS_RETIRED.PACKED_SINGLE
+.Pq Event C7H , Umask 01H
+Counts SIMD packed single-precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.SCALAR_SINGLE
+.Pq Event C7H , Umask 02H
+Counts SIMD calar single-precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.PACKED_DOUBLE
+.Pq Event C7H , Umask 04H
+Counts SIMD packed double- precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.SCALAR_DOUBLE
+.Pq Event C7H , Umask 08H
+Counts SIMD scalar double-precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.VECTOR_INTEGER
+.Pq Event C7H , Umask 10H
+Counts 128-bit SIMD vector integer Uops retired.
+.It Li ITLB_MISS_RETIRED
+.Pq Event C8H , Umask 20H
+Counts the number of retired instructions that missed the ITLB when the
+instruction was fetched.
+.It Li MEM_LOAD_RETIRED.L1D_HIT
+.Pq Event CBH , Umask 01H
+Counts number of retired loads that hit the L1 data cache.
+.It Li MEM_LOAD_RETIRED.L2_HIT
+.Pq Event CBH , Umask 02H
+Counts number of retired loads that hit the L2 data cache.
+.It Li MEM_LOAD_RETIRED.L3_UNSHARED_HIT
+.Pq Event CBH , Umask 04H
+Counts number of retired loads that hit their own, unshared lines in the L3
+cache.
+.It Li MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM
+.Pq Event CBH , Umask 08H
+Counts number of retired loads that hit in a sibling core's L2 (on die
+core). Since the L3 is inclusive of all cores on the package, this is an L3
+hit. This counts both clean or modified hits.
+.It Li MEM_LOAD_RETIRED.L3_MISS
+.Pq Event CBH , Umask 10H
+Counts number of retired loads that miss the L3 cache. The load was
+satisfied by a remote socket, local memory or an IOH.
+.It Li MEM_LOAD_RETIRED.HIT_LFB
+.Pq Event CBH , Umask 40H
+Counts number of retired loads that miss the L1D and the address is located
+in an allocated line fill buffer and will soon be committed to cache. This
+is counting secondary L1D misses.
+.It Li MEM_LOAD_RETIRED.DTLB_MISS
+.Pq Event CBH , Umask 80H
+Counts the number of retired loads that missed the DTLB. The DTLB miss is
+not counted if the load operation causes a fault. This event counts loads
+from cacheable memory only. The event does not count loads by software
+prefetches. Counts both primary and secondary misses to the TLB.
+.It Li FP_MMX_TRANS.TO_FP
+.Pq Event CCH , Umask 01H
+Counts the first floating-point instruction following any MMX instruction.
+You can use this event to estimate the penalties for the transitions between
+floating-point and MMX technology states.
+.It Li FP_MMX_TRANS.TO_MMX
+.Pq Event CCH , Umask 02H
+Counts the first MMX instruction following a floating-point instruction. You
+can use this event to estimate the penalties for the transitions between
+floating-point and MMX technology states.
+.It Li FP_MMX_TRANS.ANY
+.Pq Event CCH , Umask 03H
+Counts all transitions from floating point to MMX instructions and from MMX
+instructions to floating point instructions. You can use this event to
+estimate the penalties for the transitions between floating-point and MMX
+technology states.
+.It Li MACRO_INSTS.DECODED
+.Pq Event D0H , Umask 01H
+Counts the number of instructions decoded, (but not necessarily executed or
+retired).
+.It Li UOPS_DECODED.STALL_CYCLES
+.Pq Event D1H , Umask 01H
+Counts the cycles of decoder stalls.
+.It Li UOPS_DECODED.MS
+.Pq Event D1H , Umask 02H
+Counts the number of Uops decoded by the Microcode Sequencer, MS. The MS
+delivers uops when the instruction is more than 4 uops long or a microcode
+assist is occurring.
+.It Li UOPS_DECODED.ESP_FOLDING
+.Pq Event D1H , Umask 04H
+Counts number of stack pointer (ESP) instructions decoded: push , pop , call
+, ret, etc. ESP instructions do not generate a Uop to increment or decrement
+ESP. Instead, they update an ESP_Offset register that keeps track of the
+delta to the current value of the ESP register.
+.It Li UOPS_DECODED.ESP_SYNC
+.Pq Event D1H , Umask 08H
+Counts number of stack pointer (ESP) sync operations where an ESP
+instruction is corrected by adding the ESP offset register to the current
+value of the ESP register.
+.It Li RAT_STALLS.FLAGS
+.Pq Event D2H , Umask 01H
+Counts the number of cycles during which execution stalled due to several
+reasons, one of which is a partial flag register stall. A partial register
+stall may occur when two conditions are met: 1) an instruction modifies
+some, but not all, of the flags in the flag register and 2) the next
+instruction, which depends on flags, depends on flags that were not modified
+by this instruction.
+.It Li RAT_STALLS.REGISTERS
+.Pq Event D2H , Umask 02H
+This event counts the number of cycles instruction execution latency became
+longer than the defined latency because the instruction used a register that
+was partially written by previous instruction.
+.It Li RAT_STALLS.ROB_READ_PORT
+.Pq Event D2H , Umask 04H
+Counts the number of cycles when ROB read port stalls occurred, which did
+not allow new micro-ops to enter the out-of-order pipeline. Note that, at
+this stage in the pipeline, additional stalls may occur at the same cycle
+and prevent the stalled micro-ops from entering the pipe. In such a case,
+micro-ops retry entering the execution pipe in the next cycle and the
+ROB-read port stall is counted again.
+.It Li RAT_STALLS.SCOREBOARD
+.Pq Event D2H , Umask 08H
+Counts the cycles where we stall due to microarchitecturally required
+serialization. Microcode scoreboarding stalls.
+.It Li RAT_STALLS.ANY
+.Pq Event D2H , Umask 0FH
+Counts all Register Allocation Table stall cycles due to: Cycles when ROB
+read port stalls occurred, which did not allow new micro-ops to enter the
+execution pipe. Cycles when partial register stalls occurred Cycles when
+flag stalls occurred Cycles floating-point unit (FPU) status word stalls
+occurred. To count each of these conditions separately use the events:
+RAT_STALLS.ROB_READ_PORT, RAT_STALLS.PARTIAL, RAT_STALLS.FLAGS, and
+RAT_STALLS.FPSW.
+.It Li SEG_RENAME_STALLS
+.Pq Event D4H , Umask 01H
+Counts the number of stall cycles due to the lack of renaming resources for
+the ES, DS, FS, and GS segment registers. If a segment is renamed but not
+retired and a second update to the same segment occurs, a stall occurs in
+the front- end of the pipeline until the renamed segment retires.
+.It Li ES_REG_RENAMES
+.Pq Event D5H , Umask 01H
+Counts the number of times the ES segment register is renamed.
+.It Li UOP_UNFUSION
+.Pq Event DBH , Umask 01H
+Counts unfusion events due to floating point exception to a fused uop.
+.It Li BR_INST_DECODED
+.Pq Event E0H , Umask 01H
+Counts the number of branch instructions decoded.
+.It Li BPU_MISSED_CALL_RET
+.Pq Event E5H , Umask 01H
+Counts number of times the Branch Prediction Unit missed predicting a call
+or return branch.
+.It Li BACLEAR.CLEAR
+.Pq Event E6H , Umask 01H
+Counts the number of times the front end is resteered, mainly when the
+Branch Prediction Unit cannot provide a correct prediction and this is
+corrected by the Branch Address Calculator at the front end. This can occur
+if the code has many branches such that they cannot be consumed by the BPU.
+Each BACLEAR asserted by the BAC generates approximately an 8 cycle bubble
+in the instruction fetch pipeline. The effect on total execution time
+depends on the surrounding code.
+.It Li BACLEAR.BAD_TARGET
+.Pq Event E6H , Umask 02H
+Counts number of Branch Address Calculator clears (BACLEAR) asserted due to
+conditional branch instructions in which there was a target hit but the
+direction was wrong. Each BACLEAR asserted by the BAC generates
+approximately an 8 cycle bubble in the instruction fetch pipeline.
+.It Li BPU_CLEARS.EARLY
+.Pq Event E8H , Umask 01H
+Counts early (normal) Branch Prediction Unit clears: BPU predicted a taken
+branch after incorrectly assuming that it was not taken.
+The BPU clear leads to 2 cycle bubble in the Front End.
+.It Li BPU_CLEARS.LATE
+.Pq Event E8H , Umask 02H
+Counts late Branch Prediction Unit clears due to Most Recently Used
+conflicts. The PBU clear leads to a 3 cycle bubble in the Front End.
+.It Li THREAD_ACTIVE
+.Pq Event ECH , Umask 01H
+Counts cycles threads are active.
+.It Li L2_TRANSACTIONS.LOAD
+.Pq Event F0H , Umask 01H
+Counts L2 load operations due to HW prefetch or demand loads.
+.It Li L2_TRANSACTIONS.RFO
+.Pq Event F0H , Umask 02H
+Counts L2 RFO operations due to HW prefetch or demand RFOs.
+.It Li L2_TRANSACTIONS.IFETCH
+.Pq Event F0H , Umask 04H
+Counts L2 instruction fetch operations due to HW prefetch or demand ifetch.
+.It Li L2_TRANSACTIONS.PREFETCH
+.Pq Event F0H , Umask 08H
+Counts L2 prefetch operations.
+.It Li L2_TRANSACTIONS.L1D_WB
+.Pq Event F0H , Umask 10H
+Counts L1D writeback operations to the L2.
+.It Li L2_TRANSACTIONS.FILL
+.Pq Event F0H , Umask 20H
+Counts L2 cache line fill operations due to load, RFO, L1D writeback or
+prefetch.
+.It Li L2_TRANSACTIONS.WB
+.Pq Event F0H , Umask 40H
+Counts L2 writeback operations to the L3.
+.It Li L2_TRANSACTIONS.ANY
+.Pq Event F0H , Umask 80H
+Counts all L2 cache operations.
+.It Li L2_LINES_IN.S_STATE
+.Pq Event F1H , Umask 02H
+Counts the number of cache lines allocated in the L2 cache in the S (shared)
+state.
+.It Li L2_LINES_IN.E_STATE
+.Pq Event F1H , Umask 04H
+Counts the number of cache lines allocated in the L2 cache in the E
+(exclusive) state.
+.It Li L2_LINES_IN.ANY
+.Pq Event F1H , Umask 07H
+Counts the number of cache lines allocated in the L2 cache.
+.It Li L2_LINES_OUT.DEMAND_CLEAN
+.Pq Event F2H , Umask 01H
+Counts L2 clean cache lines evicted by a demand request.
+.It Li L2_LINES_OUT.DEMAND_DIRTY
+.Pq Event F2H , Umask 02H
+Counts L2 dirty (modified) cache lines evicted by a demand request.
+.It Li L2_LINES_OUT.PREFETCH_CLEAN
+.Pq Event F2H , Umask 04H
+Counts L2 clean cache line evicted by a prefetch request.
+.It Li L2_LINES_OUT.PREFETCH_DIRTY
+.Pq Event F2H , Umask 08H
+Counts L2 modified cache line evicted by a prefetch request.
+.It Li L2_LINES_OUT.ANY
+.Pq Event F2H , Umask 0FH
+Counts all L2 cache lines evicted for any reason.
+.It Li SQ_MISC.LRU_HINTS
+.Pq Event F4H , Umask 04H
+Counts number of Super Queue LRU hints sent to L3.
+.It Li SQ_MISC.SPLIT_LOCK
+.Pq Event F4H , Umask 10H
+Counts the number of SQ lock splits across a cache line.
+.It Li SQ_FULL_STALL_CYCLES
+.Pq Event F6H , Umask 01H
+Counts cycles the Super Queue is full. Neither of the threads on this core
+will be able to access the uncore.
+.It Li FP_ASSIST.ALL
+.Pq Event F7H , Umask 01H
+Counts the number of floating point operations executed that required
+micro-code assist intervention. Assists are required in the following cases:
+SSE instructions, (Denormal input when the DAZ flag is off or Underflow
+result when the FTZ flag is off): x87 instructions, (NaN or denormal are
+loaded to a register or used as input from memory, Division by 0 or
+Underflow output).
+.It Li FP_ASSIST.OUTPUT
+.Pq Event F7H , Umask 02H
+Counts number of floating point micro-code assist when the output value
+(destination register) is invalid.
+.It Li FP_ASSIST.INPUT
+.Pq Event F7H , Umask 04H
+Counts number of floating point micro-code assist when the input value (one
+of the source operands to an FP instruction) is invalid.
+.It Li SIMD_INT_64.PACKED_MPY
+.Pq Event FDH , Umask 01H
+Counts number of SID integer 64 bit packed multiply operations.
+.It Li SIMD_INT_64.PACKED_SHIFT
+.Pq Event FDH , Umask 02H
+Counts number of SID integer 64 bit packed shift operations.
+.It Li SIMD_INT_64.PACK
+.Pq Event FDH , Umask 04H
+Counts number of SID integer 64 bit pack operations.
+.It Li SIMD_INT_64.UNPACK
+.Pq Event FDH , Umask 08H
+Counts number of SID integer 64 bit unpack operations.
+.It Li SIMD_INT_64.PACKED_LOGICAL
+.Pq Event FDH , Umask 10H
+Counts number of SID integer 64 bit logical operations.
+.It Li SIMD_INT_64.PACKED_ARITH
+.Pq Event FDH , Umask 20H
+Counts number of SID integer 64 bit arithmetic operations.
+.It Li SIMD_INT_64.SHUFFLE_MOVE
+.Pq Event FDH , Umask 40H
+Counts number of SID integer 64 bit shift or move operations.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.ucf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.corei7 3 ,
+.Xr pmc.corei7uc 3 ,
+.Xr pmc.westmereuc 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
diff --git a/lib/libpmc/pmc.westmereuc.3 b/lib/libpmc/pmc.westmereuc.3
new file mode 100644
index 0000000..c768daa
--- /dev/null
+++ b/lib/libpmc/pmc.westmereuc.3
@@ -0,0 +1,1083 @@
+.\" Copyright (c) 2010 Fabien Thomas.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd March 24, 2010
+.Dt PMC.WESTMEREUC 3
+.Os
+.Sh NAME
+.Nm pmc.westmere
+.Nd uncore measurement events for
+.Tn Intel
+.Tn Westmere
+family CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+.Tn Intel
+.Tn "Westmere"
+CPUs contain PMCs conforming to version 2 of the
+.Tn Intel
+performance measurement architecture.
+These CPUs contain two classes of PMCs:
+.Bl -tag -width "Li PMC_CLASS_UCP"
+.It Li PMC_CLASS_UCF
+Fixed-function counters that count only one hardware event per counter.
+.It Li PMC_CLASS_UCP
+Programmable counters that may be configured to count one of a defined
+set of hardware events.
+.El
+.Pp
+The number of PMCs available in each class and their widths need to be
+determined at run time by calling
+.Xr pmc_cpuinfo 3 .
+.Pp
+Intel Westmere PMCs are documented in
+.Rs
+.%B "Intel(R) 64 and IA-32 Architectures Software Developes Manual"
+.%T "Volume 3B: System Programming Guide, Part 2"
+.%N "Order Number: 253669-033US"
+.%D December 2009
+.%Q "Intel Corporation"
+.Re
+.Ss WESTMERE UNCORE FIXED FUNCTION PMCS
+These PMCs and their supported events are documented in
+.Xr pmc.ucf 3 .
+Not all CPUs in this family implement fixed-function counters.
+.Ss WESTMERE UNCORE PROGRAMMABLE PMCS
+The programmable PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_INTERRUPT Ta \&No
+.It PMC_CAP_INVERT Ta Yes
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta \&No
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta Yes
+.It PMC_CAP_USER Ta \&No
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+Event specifiers for these PMCs support the following common
+qualifiers:
+.Bl -tag -width indent
+.It Li cmask= Ns Ar value
+Configure the PMC to increment only if the number of configured
+events measured in a cycle is greater than or equal to
+.Ar value .
+.It Li edge
+Configure the PMC to count the number of de-asserted to asserted
+transitions of the conditions expressed by the other qualifiers.
+If specified, the counter will increment only once whenever a
+condition becomes true, irrespective of the number of clocks during
+which the condition remains true.
+.It Li inv
+Invert the sense of comparison when the
+.Dq Li cmask
+qualifier is present, making the counter increment when the number of
+events per cycle is less than the value specified by the
+.Dq Li cmask
+qualifier.
+.El
+.Ss Event Specifiers (Programmable PMCs)
+Westmere uncore programmable PMCs support the following events:
+.Bl -tag -width indent
+.It Li GQ_CYCLES_FULL.READ_TRACKER
+.Pq Event 00H , Umask 01H
+Uncore cycles Global Queue read tracker is full.
+.It Li GQ_CYCLES_FULL.WRITE_TRACKER
+.Pq Event 00H , Umask 02H
+Uncore cycles Global Queue write tracker is full.
+.It Li GQ_CYCLES_FULL.PEER_PROBE_TRACKER
+.Pq Event 00H , Umask 04H
+Uncore cycles Global Queue peer probe tracker is full. The peer probe
+tracker queue tracks snoops from the IOH and remote sockets.
+.It Li GQ_CYCLES_NOT_EMPTY.READ_TRACKER
+.Pq Event 01H , Umask 01H
+Uncore cycles were Global Queue read tracker has at least one valid entry.
+.It Li GQ_CYCLES_NOT_EMPTY.WRITE_TRACKER
+.Pq Event 01H , Umask 02H
+Uncore cycles were Global Queue write tracker has at least one valid entry.
+.It Li GQ_CYCLES_NOT_EMPTY.PEER_PROBE_TRACKER
+.Pq Event 01H , Umask 04H
+Uncore cycles were Global Queue peer probe tracker has at least one valid
+entry. The peer probe tracker queue tracks IOH and remote socket snoops.
+.It Li GQ_OCCUPANCY.READ_TRACKER
+.Pq Event 02H , Umask 01H
+Increments the number of queue entries (code read, data read, and RFOs) in
+the tread tracker. The GQ read tracker allocate to deallocate occupancy
+count is divided by the count to obtain the average read tracker latency.
+.It Li GQ_ALLOC.READ_TRACKER
+.Pq Event 03H , Umask 01H
+Counts the number of tread tracker allocate to deallocate entries. The GQ
+read tracker allocate to deallocate occupancy count is divided by the count
+to obtain the average read tracker latency.
+.It Li GQ_ALLOC.RT_L3_MISS
+.Pq Event 03H , Umask 02H
+Counts the number GQ read tracker entries for which a full cache line read
+has missed the L3. The GQ read tracker L3 miss to fill occupancy count is
+divided by this count to obtain the average cache line read L3 miss latency.
+The latency represents the time after which the L3 has determined that the
+cache line has missed. The time between a GQ read tracker allocation and the
+L3 determining that the cache line has missed is the average L3 hit latency.
+The total L3 cache line read miss latency is the hit latency + L3 miss
+latency.
+.It Li GQ_ALLOC.RT_TO_L3_RESP
+.Pq Event 03H , Umask 04H
+Counts the number of GQ read tracker entries that are allocated in the read
+tracker queue that hit or miss the L3. The GQ read tracker L3 hit occupancy
+count is divided by this count to obtain the average L3 hit latency.
+.It Li GQ_ALLOC.RT_TO_RTID_ACQUIRED
+.Pq Event 03H , Umask 08H
+Counts the number of GQ read tracker entries that are allocated in the read
+tracker, have missed in the L3 and have not acquired a Request Transaction
+ID. The GQ read tracker L3 miss to RTID acquired occupancy count is
+divided by this count to obtain the average latency for a read L3 miss to
+acquire an RTID.
+.It Li GQ_ALLOC.WT_TO_RTID_ACQUIRED
+.Pq Event 03H , Umask 10H
+Counts the number of GQ write tracker entries that are allocated in the
+write tracker, have missed in the L3 and have not acquired a Request
+Transaction ID. The GQ write tracker L3 miss to RTID occupancy count is
+divided by this count to obtain the average latency for a write L3 miss to
+acquire an RTID.
+.It Li GQ_ALLOC.WRITE_TRACKER
+.Pq Event 03H , Umask 20H
+Counts the number of GQ write tracker entries that are allocated in the
+write tracker queue that miss the L3. The GQ write tracker occupancy count
+is divided by the this count to obtain the average L3 write miss latency.
+.It Li GQ_ALLOC.PEER_PROBE_TRACKER
+.Pq Event 03H , Umask 40H
+Counts the number of GQ peer probe tracker (snoop) entries that are
+allocated in the peer probe tracker queue that miss the L3. The GQ peer
+probe occupancy count is divided by this count to obtain the average L3 peer
+probe miss latency.
+.It Li GQ_DATA.FROM_QPI
+.Pq Event 04H , Umask 01H
+Cycles Global Queue Quickpath Interface input data port is busy importing
+data from the Quickpath Interface. Each cycle the input port can transfer 8
+or 16 bytes of data.
+.It Li GQ_DATA.FROM_QMC
+.Pq Event 04H , Umask 02H
+Cycles Global Queue Quickpath Memory Interface input data port is busy
+importing data from the Quickpath Memory Interface. Each cycle the input
+port can transfer 8 or 16 bytes of data.
+.It Li GQ_DATA.FROM_L3
+.Pq Event 04H , Umask 04H
+Cycles GQ L3 input data port is busy importing data from the Last Level
+Cache. Each cycle the input port can transfer 32 bytes of data.
+.It Li GQ_DATA.FROM_CORES_02
+.Pq Event 04H , Umask 08H
+Cycles GQ Core 0 and 2 input data port is busy importing data from processor
+cores 0 and 2. Each cycle the input port can transfer 32 bytes of data.
+.It Li GQ_DATA.FROM_CORES_13
+.Pq Event 04H , Umask 10H
+Cycles GQ Core 1 and 3 input data port is busy importing data from processor
+cores 1 and 3. Each cycle the input port can transfer 32 bytes of data.
+.It Li GQ_DATA.TO_QPI_QMC
+.Pq Event 05H , Umask 01H
+Cycles GQ QPI and QMC output data port is busy sending data to the Quickpath
+Interface or Quickpath Memory Interface. Each cycle the output port can
+transfer 32 bytes of data.
+.It Li GQ_DATA.TO_L3
+.Pq Event 05H , Umask 02H
+Cycles GQ L3 output data port is busy sending data to the Last Level Cache.
+Each cycle the output port can transfer 32 bytes of data.
+.It Li GQ_DATA.TO_CORES
+.Pq Event 05H , Umask 04H
+Cycles GQ Core output data port is busy sending data to the Cores. Each
+cycle the output port can transfer 32 bytes of data.
+.It Li SNP_RESP_TO_LOCAL_HOME.I_STATE
+.Pq Event 06H , Umask 01H
+Number of snoop responses to the local home that L3 does not have the
+referenced cache line.
+.It Li SNP_RESP_TO_LOCAL_HOME.S_STATE
+.Pq Event 06H , Umask 02H
+Number of snoop responses to the local home that L3 has the referenced line
+cached in the S state.
+.It Li SNP_RESP_TO_LOCAL_HOME.FWD_S_STATE
+.Pq Event 06H , Umask 04H
+Number of responses to code or data read snoops to the local home that the
+L3 has the referenced cache line in the E state. The L3 cache line state is
+changed to the S state and the line is forwarded to the local home in the S
+state.
+.It Li SNP_RESP_TO_LOCAL_HOME.FWD_I_STATE
+.Pq Event 06H , Umask 08H
+Number of responses to read invalidate snoops to the local home that the L3
+has the referenced cache line in the M state. The L3 cache line state is
+invalidated and the line is forwarded to the local home in the M state.
+.It Li SNP_RESP_TO_LOCAL_HOME.CONFLICT
+.Pq Event 06H , Umask 10H
+Number of conflict snoop responses sent to the local home.
+.It Li SNP_RESP_TO_LOCAL_HOME.WB
+.Pq Event 06H , Umask 20H
+Number of responses to code or data read snoops to the local home that the
+L3 has the referenced line cached in the M state.
+.It Li SNP_RESP_TO_REMOTE_HOME.I_STATE
+.Pq Event 07H , Umask 01H
+Number of snoop responses to a remote home that L3 does not have the
+referenced cache line.
+.It Li SNP_RESP_TO_REMOTE_HOME.S_STATE
+.Pq Event 07H , Umask 02H
+Number of snoop responses to a remote home that L3 has the referenced line
+cached in the S state.
+.It Li SNP_RESP_TO_REMOTE_HOME.FWD_S_STATE
+.Pq Event 07H , Umask 04H
+Number of responses to code or data read snoops to a remote home that the L3
+has the referenced cache line in the E state. The L3 cache line state is
+changed to the S state and the line is forwarded to the remote home in the S
+state.
+.It Li SNP_RESP_TO_REMOTE_HOME.FWD_I_STATE
+.Pq Event 07H , Umask 08H
+Number of responses to read invalidate snoops to a remote home that the L3
+has the referenced cache line in the M state. The L3 cache line state is
+invalidated and the line is forwarded to the remote home in the M state.
+.It Li SNP_RESP_TO_REMOTE_HOME.CONFLICT
+.Pq Event 07H , Umask 10H
+Number of conflict snoop responses sent to the local home.
+.It Li SNP_RESP_TO_REMOTE_HOME.WB
+.Pq Event 07H , Umask 20H
+Number of responses to code or data read snoops to a remote home that the L3
+has the referenced line cached in the M state.
+.It Li SNP_RESP_TO_REMOTE_HOME.HITM
+.Pq Event 07H , Umask 24H
+Number of HITM snoop responses to a remote home
+.It Li L3_HITS.READ
+.Pq Event 08H , Umask 01H
+Number of code read, data read and RFO requests that hit in the L3
+.It Li L3_HITS.WRITE
+.Pq Event 08H , Umask 02H
+Number of writeback requests that hit in the L3. Writebacks from the cores
+will always result in L3 hits due to the inclusive property of the L3.
+.It Li L3_HITS.PROBE
+.Pq Event 08H , Umask 04H
+Number of snoops from IOH or remote sockets that hit in the L3.
+.It Li L3_HITS.ANY
+.Pq Event 08H , Umask 03H
+Number of reads and writes that hit the L3.
+.It Li L3_MISS.READ
+.Pq Event 09H , Umask 01H
+Number of code read, data read and RFO requests that miss the L3.
+.It Li L3_MISS.WRITE
+.Pq Event 09H , Umask 02H
+Number of writeback requests that miss the L3. Should always be zero as
+writebacks from the cores will always result in L3 hits due to the inclusive
+property of the L3.
+.It Li L3_MISS.PROBE
+.Pq Event 09H , Umask 04H
+Number of snoops from IOH or remote sockets that miss the L3.
+.It Li L3_MISS.ANY
+.Pq Event 09H , Umask 03H
+Number of reads and writes that miss the L3.
+.It Li L3_LINES_IN.M_STATE
+.Pq Event 0AH , Umask 01H
+Counts the number of L3 lines allocated in M state. The only time a cache
+line is allocated in the M state is when the line was forwarded in M state
+is forwarded due to a Snoop Read Invalidate Own request.
+.It Li L3_LINES_IN.E_STATE
+.Pq Event 0AH , Umask 02H
+Counts the number of L3 lines allocated in E state.
+.It Li L3_LINES_IN.S_STATE
+.Pq Event 0AH , Umask 04H
+Counts the number of L3 lines allocated in S state.
+.It Li L3_LINES_IN.F_STATE
+.Pq Event 0AH , Umask 08H
+Counts the number of L3 lines allocated in F state.
+.It Li L3_LINES_IN.ANY
+.Pq Event 0AH , Umask 0FH
+Counts the number of L3 lines allocated in any state.
+.It Li L3_LINES_OUT.M_STATE
+.Pq Event 0BH , Umask 01H
+Counts the number of L3 lines victimized that were in the M state. When the
+victim cache line is in M state, the line is written to its home cache agent
+which can be either local or remote.
+.It Li L3_LINES_OUT.E_STATE
+.Pq Event 0BH , Umask 02H
+Counts the number of L3 lines victimized that were in the E state.
+.It Li L3_LINES_OUT.S_STATE
+.Pq Event 0BH , Umask 04H
+Counts the number of L3 lines victimized that were in the S state.
+.It Li L3_LINES_OUT.I_STATE
+.Pq Event 0BH , Umask 08H
+Counts the number of L3 lines victimized that were in the I state.
+.It Li L3_LINES_OUT.F_STATE
+.Pq Event 0BH , Umask 10H
+Counts the number of L3 lines victimized that were in the F state.
+.It Li L3_LINES_OUT.ANY
+.Pq Event 0BH , Umask 1FH
+Counts the number of L3 lines victimized in any state.
+.It Li GQ_SNOOP.GOTO_S
+.Pq Event 0CH , Umask 01H
+Counts the number of remote snoops that have requested a cache line be set
+to the S state.
+.It Li GQ_SNOOP.GOTO_I
+.Pq Event 0CH , Umask 02H
+Counts the number of remote snoops that have requested a cache line be set
+to the I state.
+.It Li GQ_SNOOP.GOTO_S_HIT_E
+.Pq Event 0CH , Umask 04H
+Counts the number of remote snoops that have requested a cache line be set
+to the S state from E state.
+Requires writing MSR 301H with mask = 2H
+.It Li GQ_SNOOP.GOTO_S_HIT_F
+.Pq Event 0CH , Umask 04H
+Counts the number of remote snoops that have requested a cache line be set
+to the S state from F (forward) state.
+Requires writing MSR 301H with mask = 8H
+.It Li GQ_SNOOP.GOTO_S_HIT_M
+.Pq Event 0CH , Umask 04H
+Counts the number of remote snoops that have requested a cache line be set
+to the S state from M state.
+Requires writing MSR 301H with mask = 1H
+.It Li GQ_SNOOP.GOTO_S_HIT_S
+.Pq Event 0CH , Umask 04H
+Counts the number of remote snoops that have requested a cache line be set
+to the S state from S state.
+Requires writing MSR 301H with mask = 4H
+.It Li GQ_SNOOP.GOTO_I_HIT_E
+.Pq Event 0CH , Umask 08H
+Counts the number of remote snoops that have requested a cache line be set
+to the I state from E state.
+Requires writing MSR 301H with mask = 2H
+.It Li GQ_SNOOP.GOTO_I_HIT_F
+.Pq Event 0CH , Umask 08H
+Counts the number of remote snoops that have requested a cache line be set
+to the I state from F (forward) state.
+Requires writing MSR 301H with mask = 8H
+.It Li GQ_SNOOP.GOTO_I_HIT_M
+.Pq Event 0CH , Umask 08H
+Counts the number of remote snoops that have requested a cache line be set
+to the I state from M state.
+Requires writing MSR 301H with mask = 1H
+.It Li GQ_SNOOP.GOTO_I_HIT_S
+.Pq Event 0CH , Umask 08H
+Counts the number of remote snoops that have requested a cache line be set
+to the I state from S state.
+Requires writing MSR 301H with mask = 4H
+.It Li QHL_REQUESTS.IOH_READS
+.Pq Event 20H , Umask 01H
+Counts number of Quickpath Home Logic read requests from the IOH.
+.It Li QHL_REQUESTS.IOH_WRITES
+.Pq Event 20H , Umask 02H
+Counts number of Quickpath Home Logic write requests from the IOH.
+.It Li QHL_REQUESTS.REMOTE_READS
+.Pq Event 20H , Umask 04H
+Counts number of Quickpath Home Logic read requests from a remote socket.
+.It Li QHL_REQUESTS.REMOTE_WRITES
+.Pq Event 20H , Umask 08H
+Counts number of Quickpath Home Logic write requests from a remote socket.
+.It Li QHL_REQUESTS.LOCAL_READS
+.Pq Event 20H , Umask 10H
+Counts number of Quickpath Home Logic read requests from the local socket.
+.It Li QHL_REQUESTS.LOCAL_WRITES
+.Pq Event 20H , Umask 20H
+Counts number of Quickpath Home Logic write requests from the local socket.
+.It Li QHL_CYCLES_FULL.IOH
+.Pq Event 21H , Umask 01H
+Counts uclk cycles all entries in the Quickpath Home Logic IOH are full.
+.It Li QHL_CYCLES_FULL.REMOTE
+.Pq Event 21H , Umask 02H
+Counts uclk cycles all entries in the Quickpath Home Logic remote tracker
+are full.
+.It Li QHL_CYCLES_FULL.LOCAL
+.Pq Event 21H , Umask 04H
+Counts uclk cycles all entries in the Quickpath Home Logic local tracker are
+full.
+.It Li QHL_CYCLES_NOT_EMPTY.IOH
+.Pq Event 22H , Umask 01H
+Counts uclk cycles all entries in the Quickpath Home Logic IOH is busy.
+.It Li QHL_CYCLES_NOT_EMPTY.REMOTE
+.Pq Event 22H , Umask 02H
+Counts uclk cycles all entries in the Quickpath Home Logic remote tracker is
+busy.
+.It Li QHL_CYCLES_NOT_EMPTY.LOCAL
+.Pq Event 22H , Umask 04H
+Counts uclk cycles all entries in the Quickpath Home Logic local tracker is
+busy.
+.It Li QHL_OCCUPANCY.IOH
+.Pq Event 23H , Umask 01H
+QHL IOH tracker allocate to deallocate read occupancy.
+.It Li QHL_OCCUPANCY.REMOTE
+.Pq Event 23H , Umask 02H
+QHL remote tracker allocate to deallocate read occupancy.
+.It Li QHL_OCCUPANCY.LOCAL
+.Pq Event 23H , Umask 04H
+QHL local tracker allocate to deallocate read occupancy.
+.It Li QHL_ADDRESS_CONFLICTS.2WAY
+.Pq Event 24H , Umask 02H
+Counts number of QHL Active Address Table (AAT) entries that saw a max of 2
+conflicts. The AAT is a structure that tracks requests that are in conflict.
+The requests themselves are in the home tracker entries. The count is
+reported when an AAT entry deallocates.
+.It Li QHL_ADDRESS_CONFLICTS.3WAY
+.Pq Event 24H , Umask 04H
+Counts number of QHL Active Address Table (AAT) entries that saw a max of 3
+conflicts. The AAT is a structure that tracks requests that are in conflict.
+The requests themselves are in the home tracker entries. The count is
+reported when an AAT entry deallocates.
+.It Li QHL_CONFLICT_CYCLES.IOH
+.Pq Event 25H , Umask 01H
+Counts cycles the Quickpath Home Logic IOH Tracker contains two or more
+requests with an address conflict. A max of 3 requests can be in conflict.
+.It Li QHL_CONFLICT_CYCLES.REMOTE
+.Pq Event 25H , Umask 02H
+Counts cycles the Quickpath Home Logic Remote Tracker contains two or more
+requests with an address conflict. A max of 3 requests can be in conflict.
+.It Li QHL_CONFLICT_CYCLES.LOCAL
+.Pq Event 25H , Umask 04H
+Counts cycles the Quickpath Home Logic Local Tracker contains two or more
+requests with an address conflict. A max of 3 requests can be in conflict.
+.It Li QHL_TO_QMC_BYPASS
+.Pq Event 26H , Umask 01H
+Counts number or requests to the Quickpath Memory Controller that bypass the
+Quickpath Home Logic. All local accesses can be bypassed. For remote
+requests, only read requests can be bypassed.
+.It Li QMC_ISOC_FULL.READ.CH0
+.Pq Event 28H , Umask 01H
+Counts cycles all the entries in the DRAM channel 0 high priority queue are
+occupied with isochronous read requests.
+.It Li QMC_ISOC_FULL.READ.CH1
+.Pq Event 28H , Umask 02H
+Counts cycles all the entries in the DRAM channel 1 high priority queue are
+occupied with isochronous read requests.
+.It Li QMC_ISOC_FULL.READ.CH2
+.Pq Event 28H , Umask 04H
+Counts cycles all the entries in the DRAM channel 2 high priority queue are
+occupied with isochronous read requests.
+.It Li QMC_ISOC_FULL.WRITE.CH0
+.Pq Event 28H , Umask 08H
+Counts cycles all the entries in the DRAM channel 0 high priority queue are
+occupied with isochronous write requests.
+.It Li QMC_ISOC_FULL.WRITE.CH1
+.Pq Event 28H , Umask 10H
+Counts cycles all the entries in the DRAM channel 1 high priority queue are
+occupied with isochronous write requests.
+.It Li QMC_ISOC_FULL.WRITE.CH2
+.Pq Event 28H , Umask 20H
+Counts cycles all the entries in the DRAM channel 2 high priority queue are
+occupied with isochronous write requests.
+.It Li QMC_BUSY.READ.CH0
+.Pq Event 29H , Umask 01H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+read request to DRAM channel 0.
+.It Li QMC_BUSY.READ.CH1
+.Pq Event 29H , Umask 02H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+read request to DRAM channel 1.
+.It Li QMC_BUSY.READ.CH2
+.Pq Event 29H , Umask 04H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+read request to DRAM channel 2.
+.It Li QMC_BUSY.WRITE.CH0
+.Pq Event 29H , Umask 08H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+write request to DRAM channel 0.
+.It Li QMC_BUSY.WRITE.CH1
+.Pq Event 29H , Umask 10H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+write request to DRAM channel 1.
+.It Li QMC_BUSY.WRITE.CH2
+.Pq Event 29H , Umask 20H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+write request to DRAM channel 2.
+.It Li QMC_OCCUPANCY.CH0
+.Pq Event 2AH , Umask 01H
+IMC channel 0 normal read request occupancy.
+.It Li QMC_OCCUPANCY.CH1
+.Pq Event 2AH , Umask 02H
+IMC channel 1 normal read request occupancy.
+.It Li QMC_OCCUPANCY.CH2
+.Pq Event 2AH , Umask 04H
+IMC channel 2 normal read request occupancy.
+.It Li QMC_OCCUPANCY.ANY
+.Pq Event 2AH , Umask 07H
+Normal read request occupancy for any channel.
+.It Li QMC_ISSOC_OCCUPANCY.CH0
+.Pq Event 2BH , Umask 01H
+IMC channel 0 issoc read request occupancy.
+.It Li QMC_ISSOC_OCCUPANCY.CH1
+.Pq Event 2BH , Umask 02H
+IMC channel 1 issoc read request occupancy.
+.It Li QMC_ISSOC_OCCUPANCY.CH2
+.Pq Event 2BH , Umask 04H
+IMC channel 2 issoc read request occupancy.
+.It Li QMC_ISSOC_READS.ANY
+.Pq Event 2BH , Umask 07H
+IMC issoc read request occupancy.
+.It Li QMC_NORMAL_READS.CH0
+.Pq Event 2CH , Umask 01H
+Counts the number of Quickpath Memory Controller channel 0 medium and low
+priority read requests. The QMC channel 0 normal read occupancy divided by
+this count provides the average QMC channel 0 read latency.
+.It Li QMC_NORMAL_READS.CH1
+.Pq Event 2CH , Umask 02H
+Counts the number of Quickpath Memory Controller channel 1 medium and low
+priority read requests. The QMC channel 1 normal read occupancy divided by
+this count provides the average QMC channel 1 read latency.
+.It Li QMC_NORMAL_READS.CH2
+.Pq Event 2CH , Umask 04H
+Counts the number of Quickpath Memory Controller channel 2 medium and low
+priority read requests. The QMC channel 2 normal read occupancy divided by
+this count provides the average QMC channel 2 read latency.
+.It Li QMC_NORMAL_READS.ANY
+.Pq Event 2CH , Umask 07H
+Counts the number of Quickpath Memory Controller medium and low priority
+read requests. The QMC normal read occupancy divided by this count provides
+the average QMC read latency.
+.It Li QMC_HIGH_PRIORITY_READS.CH0
+.Pq Event 2DH , Umask 01H
+Counts the number of Quickpath Memory Controller channel 0 high priority
+isochronous read requests.
+.It Li QMC_HIGH_PRIORITY_READS.CH1
+.Pq Event 2DH , Umask 02H
+Counts the number of Quickpath Memory Controller channel 1 high priority
+isochronous read requests.
+.It Li QMC_HIGH_PRIORITY_READS.CH2
+.Pq Event 2DH , Umask 04H
+Counts the number of Quickpath Memory Controller channel 2 high priority
+isochronous read requests.
+.It Li QMC_HIGH_PRIORITY_READS.ANY
+.Pq Event 2DH , Umask 07H
+Counts the number of Quickpath Memory Controller high priority isochronous
+read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.CH0
+.Pq Event 2EH , Umask 01H
+Counts the number of Quickpath Memory Controller channel 0 critical priority
+isochronous read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.CH1
+.Pq Event 2EH , Umask 02H
+Counts the number of Quickpath Memory Controller channel 1 critical priority
+isochronous read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.CH2
+.Pq Event 2EH , Umask 04H
+Counts the number of Quickpath Memory Controller channel 2 critical priority
+isochronous read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.ANY
+.Pq Event 2EH , Umask 07H
+Counts the number of Quickpath Memory Controller critical priority
+isochronous read requests.
+.It Li QMC_WRITES.FULL.CH0
+.Pq Event 2FH , Umask 01H
+Counts number of full cache line writes to DRAM channel 0.
+.It Li QMC_WRITES.FULL.CH1
+.Pq Event 2FH , Umask 02H
+Counts number of full cache line writes to DRAM channel 1.
+.It Li QMC_WRITES.FULL.CH2
+.Pq Event 2FH , Umask 04H
+Counts number of full cache line writes to DRAM channel 2.
+.It Li QMC_WRITES.FULL.ANY
+.Pq Event 2FH , Umask 07H
+Counts number of full cache line writes to DRAM.
+.It Li QMC_WRITES.PARTIAL.CH0
+.Pq Event 2FH , Umask 08H
+Counts number of partial cache line writes to DRAM channel 0.
+.It Li QMC_WRITES.PARTIAL.CH1
+.Pq Event 2FH , Umask 10H
+Counts number of partial cache line writes to DRAM channel 1.
+.It Li QMC_WRITES.PARTIAL.CH2
+.Pq Event 2FH , Umask 20H
+Counts number of partial cache line writes to DRAM channel 2.
+.It Li QMC_WRITES.PARTIAL.ANY
+.Pq Event 2FH , Umask 38H
+Counts number of partial cache line writes to DRAM.
+.It Li QMC_CANCEL.CH0
+.Pq Event 30H , Umask 01H
+Counts number of DRAM channel 0 cancel requests.
+.It Li QMC_CANCEL.CH1
+.Pq Event 30H , Umask 02H
+Counts number of DRAM channel 1 cancel requests.
+.It Li QMC_CANCEL.CH2
+.Pq Event 30H , Umask 04H
+Counts number of DRAM channel 2 cancel requests.
+.It Li QMC_CANCEL.ANY
+.Pq Event 30H , Umask 07H
+Counts number of DRAM cancel requests.
+.It Li QMC_PRIORITY_UPDATES.CH0
+.Pq Event 31H , Umask 01H
+Counts number of DRAM channel 0 priority updates. A priority update occurs
+when an ISOC high or critical request is received by the QHL and there is a
+matching request with normal priority that has already been issued to the
+QMC. In this instance, the QHL will send a priority update to QMC to
+expedite the request.
+.It Li QMC_PRIORITY_UPDATES.CH1
+.Pq Event 31H , Umask 02H
+Counts number of DRAM channel 1 priority updates. A priority update occurs
+when an ISOC high or critical request is received by the QHL and there is a
+matching request with normal priority that has already been issued to the
+QMC. In this instance, the QHL will send a priority update to QMC to
+expedite the request.
+.It Li QMC_PRIORITY_UPDATES.CH2
+.Pq Event 31H , Umask 04H
+Counts number of DRAM channel 2 priority updates. A priority update occurs
+when an ISOC high or critical request is received by the QHL and there is a
+matching request with normal priority that has already been issued to the
+QMC. In this instance, the QHL will send a priority update to QMC to
+expedite the request.
+.It Li QMC_PRIORITY_UPDATES.ANY
+.Pq Event 31H , Umask 07H
+Counts number of DRAM priority updates. A priority update occurs when an
+ISOC high or critical request is received by the QHL and there is a matching
+request with normal priority that has already been issued to the QMC. In
+this instance, the QHL will send a priority update to QMC to expedite the
+request.
+.It Li IMC_RETRY.CH0
+.Pq Event 32H , Umask 01H
+Counts number of IMC DRAM channel 0 retries. DRAM retry only occurs when
+configured in RAS mode.
+.It Li IMC_RETRY.CH1
+.Pq Event 32H , Umask 02H
+Counts number of IMC DRAM channel 1 retries. DRAM retry only occurs when
+configured in RAS mode.
+.It Li IMC_RETRY.CH2
+.Pq Event 32H , Umask 04H
+Counts number of IMC DRAM channel 2 retries. DRAM retry only occurs when
+configured in RAS mode.
+.It Li IMC_RETRY.ANY
+.Pq Event 32H , Umask 07H
+Counts number of IMC DRAM retries from any channel. DRAM retry only occurs
+when configured in RAS mode.
+.It Li QHL_FRC_ACK_CNFLTS.IOH
+.Pq Event 33H , Umask 01H
+Counts number of Force Acknowledge Conflict messages sent by the Quickpath
+Home Logic to the IOH.
+.It Li QHL_FRC_ACK_CNFLTS.REMOTE
+.Pq Event 33H , Umask 02H
+Counts number of Force Acknowledge Conflict messages sent by the Quickpath
+Home Logic to the remote home.
+.It Li QHL_FRC_ACK_CNFLTS.LOCAL
+.Pq Event 33H , Umask 04H
+Counts number of Force Acknowledge Conflict messages sent by the Quickpath
+Home Logic to the local home.
+.It Li QHL_FRC_ACK_CNFLTS.ANY
+.Pq Event 33H , Umask 07H
+Counts number of Force Acknowledge Conflict messages sent by the Quickpath
+Home Logic.
+.It Li QHL_SLEEPS.IOH_ORDER
+.Pq Event 34H , Umask 01H
+Counts number of occurrences a request was put to sleep due to IOH ordering
+(write after read) conflicts. While in the sleep state, the request is not
+eligible to be scheduled to the QMC.
+.It Li QHL_SLEEPS.REMOTE_ORDER
+.Pq Event 34H , Umask 02H
+Counts number of occurrences a request was put to sleep due to remote socket
+ordering (write after read) conflicts. While in the sleep state, the request
+is not eligible to be scheduled to the QMC.
+.It Li QHL_SLEEPS.LOCAL_ORDER
+.Pq Event 34H , Umask 04H
+Counts number of occurrences a request was put to sleep due to local socket
+ordering (write after read) conflicts. While in the sleep state, the request
+is not eligible to be scheduled to the QMC.
+.It Li QHL_SLEEPS.IOH_CONFLICT
+.Pq Event 34H , Umask 08H
+Counts number of occurrences a request was put to sleep due to IOH address
+conflicts. While in the sleep state, the request is not eligible to be
+scheduled to the QMC.
+.It Li QHL_SLEEPS.REMOTE_CONFLICT
+.Pq Event 34H , Umask 10H
+Counts number of occurrences a request was put to sleep due to remote socket
+address conflicts. While in the sleep state, the request is not eligible to
+be scheduled to the QMC.
+.It Li QHL_SLEEPS.LOCAL_CONFLICT
+.Pq Event 34H , Umask 20H
+Counts number of occurrences a request was put to sleep due to local socket
+address conflicts. While in the sleep state, the request is not eligible to
+be scheduled to the QMC.
+.It Li ADDR_OPCODE_MATCH.IOH
+.Pq Event 35H , Umask 01H
+Counts number of requests from the IOH, address/opcode of request is
+qualified by mask value written to MSR 396H. The following mask values are
+supported:
+0: NONE 40000000_00000000H:RSPFWDI 40001A00_00000000H:RSPFWDS
+40001D00_00000000H:RSPIWB
+Match opcode/address by writing MSR 396H with mask supported mask value
+.It Li ADDR_OPCODE_MATCH.REMOTE
+.Pq Event 35H , Umask 02H
+Counts number of requests from the remote socket, address/opcode of request
+is qualified by mask value written to MSR 396H. The following mask values
+are supported:
+0: NONE 40000000_00000000H:RSPFWDI 40001A00_00000000H:RSPFWDS
+40001D00_00000000H:RSPIWB
+Match opcode/address by writing MSR 396H with mask supported mask value
+.It Li ADDR_OPCODE_MATCH.LOCAL
+.Pq Event 35H , Umask 04H
+Counts number of requests from the local socket, address/opcode of request
+is qualified by mask value written to MSR 396H. The following mask values
+are supported:
+0: NONE 40000000_00000000H:RSPFWDI 40001A00_00000000H:RSPFWDS
+40001D00_00000000H:RSPIWB
+Match opcode/address by writing MSR 396H with mask supported mask value
+.It Li QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_0
+.Pq Event 40H , Umask 01H
+Counts cycles the Quickpath outbound link 0 HOME virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_0
+.Pq Event 40H , Umask 02H
+Counts cycles the Quickpath outbound link 0 SNOOP virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_0
+.Pq Event 40H , Umask 04H
+Counts cycles the Quickpath outbound link 0 non-data response virtual
+channel is stalled due to lack of a VNA and VN0 credit. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_1
+.Pq Event 40H , Umask 08H
+Counts cycles the Quickpath outbound link 1 HOME virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_1
+.Pq Event 40H , Umask 10H
+Counts cycles the Quickpath outbound link 1 SNOOP virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_1
+.Pq Event 40H , Umask 20H
+Counts cycles the Quickpath outbound link 1 non-data response virtual
+channel is stalled due to lack of a VNA and VN0 credit. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.LINK_0
+.Pq Event 40H , Umask 07H
+Counts cycles the Quickpath outbound link 0 virtual channels are stalled due
+to lack of a VNA and VN0 credit. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.LINK_1
+.Pq Event 40H , Umask 38H
+Counts cycles the Quickpath outbound link 1 virtual channels are stalled due
+to lack of a VNA and VN0 credit. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_0
+.Pq Event 41H , Umask 01H
+Counts cycles the Quickpath outbound link 0 Data ResponSe virtual channel is
+stalled due to lack of VNA and VN0 credits. Note that this event does not
+filter out when a flit would not have been selected for arbitration because
+another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_0
+.Pq Event 41H , Umask 02H
+Counts cycles the Quickpath outbound link 0 Non-Coherent Bypass virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_0
+.Pq Event 41H , Umask 04H
+Counts cycles the Quickpath outbound link 0 Non-Coherent Standard virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_1
+.Pq Event 41H , Umask 08H
+Counts cycles the Quickpath outbound link 1 Data ResponSe virtual channel is
+stalled due to lack of VNA and VN0 credits. Note that this event does not
+filter out when a flit would not have been selected for arbitration because
+another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_1
+.Pq Event 41H , Umask 10H
+Counts cycles the Quickpath outbound link 1 Non-Coherent Bypass virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_1
+.Pq Event 41H , Umask 20H
+Counts cycles the Quickpath outbound link 1 Non-Coherent Standard virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.LINK_0
+.Pq Event 41H , Umask 07H
+Counts cycles the Quickpath outbound link 0 virtual channels are stalled due
+to lack of VNA and VN0 credits. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.LINK_1
+.Pq Event 41H , Umask 38H
+Counts cycles the Quickpath outbound link 1 virtual channels are stalled due
+to lack of VNA and VN0 credits. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_HEADER.FULL.LINK_0
+.Pq Event 42H , Umask 01H
+Number of cycles that the header buffer in the Quickpath Interface outbound
+link 0 is full.
+.It Li QPI_TX_HEADER.BUSY.LINK_0
+.Pq Event 42H , Umask 02H
+Number of cycles that the header buffer in the Quickpath Interface outbound
+link 0 is busy.
+.It Li QPI_TX_HEADER.FULL.LINK_1
+.Pq Event 42H , Umask 04H
+Number of cycles that the header buffer in the Quickpath Interface outbound
+link 1 is full.
+.It Li QPI_TX_HEADER.BUSY.LINK_1
+.Pq Event 42H , Umask 08H
+Number of cycles that the header buffer in the Quickpath Interface outbound
+link 1 is busy.
+.It Li QPI_RX_NO_PPT_CREDIT.STALLS.LINK_0
+.Pq Event 43H , Umask 01H
+Number of cycles that snoop packets incoming to the Quickpath Interface link
+0 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT)
+does not have any available entries.
+.It Li QPI_RX_NO_PPT_CREDIT.STALLS.LINK_1
+.Pq Event 43H , Umask 02H
+Number of cycles that snoop packets incoming to the Quickpath Interface link
+1 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT)
+does not have any available entries.
+.It Li DRAM_OPEN.CH0
+.Pq Event 60H , Umask 01H
+Counts number of DRAM Channel 0 open commands issued either for read or
+write. To read or write data, the referenced DRAM page must first be opened.
+.It Li DRAM_OPEN.CH1
+.Pq Event 60H , Umask 02H
+Counts number of DRAM Channel 1 open commands issued either for read or
+write. To read or write data, the referenced DRAM page must first be opened.
+.It Li DRAM_OPEN.CH2
+.Pq Event 60H , Umask 04H
+Counts number of DRAM Channel 2 open commands issued either for read or
+write. To read or write data, the referenced DRAM page must first be opened.
+.It Li DRAM_PAGE_CLOSE.CH0
+.Pq Event 61H , Umask 01H
+DRAM channel 0 command issued to CLOSE a page due to page idle timer
+expiration. Closing a page is done by issuing a precharge.
+.It Li DRAM_PAGE_CLOSE.CH1
+.Pq Event 61H , Umask 02H
+DRAM channel 1 command issued to CLOSE a page due to page idle timer
+expiration. Closing a page is done by issuing a precharge.
+.It Li DRAM_PAGE_CLOSE.CH2
+.Pq Event 61H , Umask 04H
+DRAM channel 2 command issued to CLOSE a page due to page idle timer
+expiration. Closing a page is done by issuing a precharge.
+.It Li DRAM_PAGE_MISS.CH0
+.Pq Event 62H , Umask 01H
+Counts the number of precharges (PRE) that were issued to DRAM channel 0
+because there was a page miss. A page miss refers to a situation in which a
+page is currently open and another page from the same bank needs to be
+opened. The new page experiences a page miss. Closing of the old page is
+done by issuing a precharge.
+.It Li DRAM_PAGE_MISS.CH1
+.Pq Event 62H , Umask 02H
+Counts the number of precharges (PRE) that were issued to DRAM channel 1
+because there was a page miss. A page miss refers to a situation in which a
+page is currently open and another page from the same bank needs to be
+opened. The new page experiences a page miss. Closing of the old page is
+done by issuing a precharge.
+.It Li DRAM_PAGE_MISS.CH2
+.Pq Event 62H , Umask 04H
+Counts the number of precharges (PRE) that were issued to DRAM channel 2
+because there was a page miss. A page miss refers to a situation in which a
+page is currently open and another page from the same bank needs to be
+opened. The new page experiences a page miss. Closing of the old page is
+done by issuing a precharge.
+.It Li DRAM_READ_CAS.CH0
+.Pq Event 63H , Umask 01H
+Counts the number of times a read CAS command was issued on DRAM channel 0.
+.It Li DRAM_READ_CAS.AUTOPRE_CH0
+.Pq Event 63H , Umask 02H
+Counts the number of times a read CAS command was issued on DRAM channel 0
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_READ_CAS.CH1
+.Pq Event 63H , Umask 04H
+Counts the number of times a read CAS command was issued on DRAM channel 1.
+.It Li DRAM_READ_CAS.AUTOPRE_CH1
+.Pq Event 63H , Umask 08H
+Counts the number of times a read CAS command was issued on DRAM channel 1
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_READ_CAS.CH2
+.Pq Event 63H , Umask 10H
+Counts the number of times a read CAS command was issued on DRAM channel 2.
+.It Li DRAM_READ_CAS.AUTOPRE_CH2
+.Pq Event 63H , Umask 20H
+Counts the number of times a read CAS command was issued on DRAM channel 2
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_WRITE_CAS.CH0
+.Pq Event 64H , Umask 01H
+Counts the number of times a write CAS command was issued on DRAM channel 0.
+.It Li DRAM_WRITE_CAS.AUTOPRE_CH0
+.Pq Event 64H , Umask 02H
+Counts the number of times a write CAS command was issued on DRAM channel 0
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_WRITE_CAS.CH1
+.Pq Event 64H , Umask 04H
+Counts the number of times a write CAS command was issued on DRAM channel 1.
+.It Li DRAM_WRITE_CAS.AUTOPRE_CH1
+.Pq Event 64H , Umask 08H
+Counts the number of times a write CAS command was issued on DRAM channel 1
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_WRITE_CAS.CH2
+.Pq Event 64H , Umask 10H
+Counts the number of times a write CAS command was issued on DRAM channel 2.
+.It Li DRAM_WRITE_CAS.AUTOPRE_CH2
+.Pq Event 64H , Umask 20H
+Counts the number of times a write CAS command was issued on DRAM channel 2
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_REFRESH.CH0
+.Pq Event 65H , Umask 01H
+Counts number of DRAM channel 0 refresh commands. DRAM loses data content
+over time. In order to keep correct data content, the data values have to be
+refreshed periodically.
+.It Li DRAM_REFRESH.CH1
+.Pq Event 65H , Umask 02H
+Counts number of DRAM channel 1 refresh commands. DRAM loses data content
+over time. In order to keep correct data content, the data values have to be
+refreshed periodically.
+.It Li DRAM_REFRESH.CH2
+.Pq Event 65H , Umask 04H
+Counts number of DRAM channel 2 refresh commands. DRAM loses data content
+over time. In order to keep correct data content, the data values have to be
+refreshed periodically.
+.It Li DRAM_PRE_ALL.CH0
+.Pq Event 66H , Umask 01H
+Counts number of DRAM Channel 0 precharge-all (PREALL) commands that close
+all open pages in a rank. PREALL is issued when the DRAM needs to be
+refreshed or needs to go into a power down mode.
+.It Li DRAM_PRE_ALL.CH1
+.Pq Event 66H , Umask 02H
+Counts number of DRAM Channel 1 precharge-all (PREALL) commands that close
+all open pages in a rank. PREALL is issued when the DRAM needs to be
+refreshed or needs to go into a power down mode.
+.It Li DRAM_PRE_ALL.CH2
+.Pq Event 66H , Umask 04H
+Counts number of DRAM Channel 2 precharge-all (PREALL) commands that close
+all open pages in a rank. PREALL is issued when the DRAM needs to be
+refreshed or needs to go into a power down mode.
+.It Li DRAM_THERMAL_THROTTLED
+.Pq Event 67H , Umask 01H
+Uncore cycles DRAM was throttled due to its temperature being above the
+thermal throttling threshold.
+.It Li THERMAL_THROTTLING_TEMP.CORE_0
+.Pq Event 80H , Umask 01H
+Cycles that the PCU records that core 0 is above the thermal throttling
+threshold temperature.
+.It Li THERMAL_THROTTLING_TEMP.CORE_1
+.Pq Event 80H , Umask 02H
+Cycles that the PCU records that core 1 is above the thermal throttling
+threshold temperature.
+.It Li THERMAL_THROTTLING_TEMP.CORE_2
+.Pq Event 80H , Umask 04H
+Cycles that the PCU records that core 2 is above the thermal throttling
+threshold temperature.
+.It Li THERMAL_THROTTLING_TEMP.CORE_3
+.Pq Event 80H , Umask 08H
+Cycles that the PCU records that core 3 is above the thermal throttling
+threshold temperature.
+.It Li THERMAL_THROTTLED_TEMP.CORE_0
+.Pq Event 81H , Umask 01H
+Cycles that the PCU records that core 0 is in the power throttled state due
+to cores temperature being above the thermal throttling threshold.
+.It Li THERMAL_THROTTLED_TEMP.CORE_1
+.Pq Event 81H , Umask 02H
+Cycles that the PCU records that core 1 is in the power throttled state due
+to cores temperature being above the thermal throttling threshold.
+.It Li THERMAL_THROTTLED_TEMP.CORE_2
+.Pq Event 81H , Umask 04H
+Cycles that the PCU records that core 2 is in the power throttled state due
+to cores temperature being above the thermal throttling threshold.
+.It Li THERMAL_THROTTLED_TEMP.CORE_3
+.Pq Event 81H , Umask 08H
+Cycles that the PCU records that core 3 is in the power throttled state due
+to cores temperature being above the thermal throttling threshold.
+.It Li PROCHOT_ASSERTION
+.Pq Event 82H , Umask 01H
+Number of system assertions of PROCHOT indicating the entire processor has
+exceeded the thermal limit.
+.It Li THERMAL_THROTTLING_PROCHOT.CORE_0
+.Pq Event 83H , Umask 01H
+Cycles that the PCU records that core 0 is a low power state due to the
+system asserting PROCHOT the entire processor has exceeded the thermal
+limit.
+.It Li THERMAL_THROTTLING_PROCHOT.CORE_1
+.Pq Event 83H , Umask 02H
+Cycles that the PCU records that core 1 is a low power state due to the
+system asserting PROCHOT the entire processor has exceeded the thermal
+limit.
+.It Li THERMAL_THROTTLING_PROCHOT.CORE_2
+.Pq Event 83H , Umask 04H
+Cycles that the PCU records that core 2 is a low power state due to the
+system asserting PROCHOT the entire processor has exceeded the thermal
+limit.
+.It Li THERMAL_THROTTLING_PROCHOT.CORE_3
+.Pq Event 83H , Umask 08H
+Cycles that the PCU records that core 3 is a low power state due to the
+system asserting PROCHOT the entire processor has exceeded the thermal
+limit.
+.It Li TURBO_MODE.CORE_0
+.Pq Event 84H , Umask 01H
+Uncore cycles that core 0 is operating in turbo mode.
+.It Li TURBO_MODE.CORE_1
+.Pq Event 84H , Umask 02H
+Uncore cycles that core 1 is operating in turbo mode.
+.It Li TURBO_MODE.CORE_2
+.Pq Event 84H , Umask 04H
+Uncore cycles that core 2 is operating in turbo mode.
+.It Li TURBO_MODE.CORE_3
+.Pq Event 84H , Umask 08H
+Uncore cycles that core 3 is operating in turbo mode.
+.It Li CYCLES_UNHALTED_L3_FLL_ENABLE
+.Pq Event 85H , Umask 02H
+Uncore cycles that at least one core is unhalted and all L3 ways are
+enabled.
+.It Li CYCLES_UNHALTED_L3_FLL_DISABLE
+.Pq Event 86H , Umask 01H
+Uncore cycles that at least one core is unhalted and all L3 ways are
+disabled.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.ucf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.corei7 3 ,
+.Xr pmc.corei7uc 3 ,
+.Xr pmc.westmere 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
diff --git a/lib/libpmc/pmc.xscale.3 b/lib/libpmc/pmc.xscale.3
new file mode 100644
index 0000000..ba4b6d1
--- /dev/null
+++ b/lib/libpmc/pmc.xscale.3
@@ -0,0 +1,156 @@
+.\" Copyright (c) 2009, 2010 Rui Paulo.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Rui Paulo ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd December 23, 2009
+.Os
+.Dt PMC.XSCALE 3
+.Sh NAME
+.Nm pmc.xscale
+.Nd measurement events for
+.Tn Intel
+.Tn XScale
+family CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+.Tn Intel XScale
+CPUs are ARM CPUs based on the ARMv5e core.
+.Pp
+Second generation cores have 2 counters, while third generation cores
+have 4 counters.
+Third generation cores also have an increased number of PMC events.
+.Pp
+.Tn Intel XScale
+PMCs are documented in 
+.Rs
+.%B "3rd Generation Intel XScale Microarchitecture Developer's Manual"
+.%D May 2007
+.Re
+.Ss Event Specifiers (Programmable PMCs)
+.Tn Intel XScale
+programmable PMCs support the following events:
+.Bl -tag -width indent
+.It Li IC_FETCH
+External memory fetch due to L1 instruction cache miss.
+.It Li IC_MISS
+Instruction cache or TLB miss.
+.It Li DATA_DEPENDENCY_STALLED
+A data dependency stalled
+.It Li ITLB_MISS
+Instruction TLB miss.
+.It Li DTLB_MISS
+Data TLB miss.
+.It Li BRANCH_RETIRED
+Branch instruction retired (executed).
+.It Li BRANCH_MISPRED
+Branch mispredicted.
+.It Li INSTR_RETIRED
+Instructions retired (executed).
+.It Li DC_FULL_CYCLE
+L1 data cache buffer full stall.
+Event occurs on every cycle the
+condition is present.
+.It Li DC_FULL_CONTIG
+L1 data cache buffer full stall.
+Event occurs once for each contiguous sequence of this type of stall.
+.It Li DC_ACCESS
+L1 data cache access, not including cache operations.
+.It Li DC_MISS
+L1 data cache miss, not including cache operations.
+.It Li DC_WRITEBACK
+L1 data cache write-back.
+Occurs for each cache line that's written back from the cache.
+.It Li PC_CHANGE
+Software changed the program counter.
+.It Li BRANCH_RETIRED_ALL
+Branch instruction retired (executed).
+This event counts all branch instructions, indirect or direct.
+.It Li INSTR_CYCLE
+Count the number of microarchitecture cycles each instruction requires
+to issue.
+.It Li CP_STALL
+Coprocessor stalled the instruction pipeline.
+.It Li PC_CHANGE_ALL
+Software changed the program counter (includes exceptions).
+.It Li PIPELINE_FLUSH
+Pipeline flushes due to mispredictions or exceptions.
+.It Li BACKEND_STALL
+Backend stalled the instruction pipeline.
+.It Li MULTIPLIER_USE
+Multiplier used.
+.It Li MULTIPLIER_STALLED
+Multiplier stalled the instruction pipeline.
+.It Li DATA_CACHE_STALLED
+Data cache stalled the instruction pipeline.
+.It Li L2_CACHE_REQ
+L2 cache request, not including cache operations.
+.It Li L2_CACHE_MISS
+L2 cache miss, not including cache operations.
+.It Li ADDRESS_BUS_TRANS
+Address bus transaction.
+.It Li SELF_ADDRESS_BUS_TRANS
+Self initiated address bus transaction.
+.It Li DATA_BUS_TRANS
+Data bus transaction.
+.El
+.Ss Event Name Aliases  
+The following table shows the mapping between the PMC-independent
+aliases supported by
+.Lb libpmc
+and the underlying hardware events used.
+.Bl -column "branch-mispredicts" "BRANCH_MISPRED"
+.It Em Alias Ta Em Event Ta
+.It Li branches Ta Li BRANCH_RETIRED Ta
+.It Li branch-mispredicts Ta Li BRANCH_MISPRED Ta
+.It Li dc-misses Ta Li DC_MISS Ta
+.It Li ic-misses Ta Li IC_MISS Ta
+.It Li instructions Ta Li INSTR_RETIRED Ta
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+Intel XScale support first appeared in
+.Fx 9.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
+.Pp
+Intel XScale support was added by
+.An "Rui Paulo"
+.Aq rpaulo@FreeBSD.org .
+.Sh CAVEATS
+The Intel XScale code does not yet support sampling.
diff --git a/lib/libpmc/pmc_allocate.3 b/lib/libpmc/pmc_allocate.3
new file mode 100644
index 0000000..6a2a6c0
--- /dev/null
+++ b/lib/libpmc/pmc_allocate.3
@@ -0,0 +1,184 @@
+.\" Copyright (c) 2007-2008 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd September 22, 2008
+.Dt PMC_ALLOCATE 3
+.Os
+.Sh NAME
+.Nm pmc_allocate ,
+.Nm pmc_release
+.Nd allocate and free performance monitoring counters
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Ft int
+.Fo pmc_allocate
+.Fa "const char *eventspecifier"
+.Fa "enum pmc_mode mode"
+.Fa "uint32_t flags"
+.Fa "int cpu"
+.Fa "pmc_id_t *pmcid"
+.Fc
+.Ft int
+.Fn pmc_release "pmc_id_t pmc"
+.Sh DESCRIPTION
+Function
+.Fn pmc_allocate
+allocates a performance monitoring counter that measures the events
+named by argument
+.Fa eventspecifier ,
+and writes the allocated handle to the location pointed to by argument
+.Fa pmcid .
+.Pp
+Argument
+.Fa eventspecifier
+comprises an PMC event name followed by an optional comma separated
+list of keywords and qualifiers.
+The allowed syntax for argument
+.Fa eventspecifier
+is processor specific and is listed in section
+.Sx "EVENT SPECIFIERS"
+in the
+.Xr pmc 3
+manual page.
+.Pp
+The desired PMC mode is specified by argument
+.Fa mode .
+Legal values for the
+.Fa mode
+argument are:
+.Bl -tag -width ".Dv PMC_MODE_SS" -compact
+.It Dv PMC_MODE_SC
+Allocate a system-scope counting PMC.
+.It Dv PMC_MODE_SS
+Allocate a system-scope sampling PMC.
+.It Dv PMC_MODE_TC
+Allocate a process-scope counting PMC.
+.It Dv PMC_MODE_TS
+Allocate a process-scope sampling PMC.
+.El
+.Pp
+Mode specific modifiers may be specified using argument
+.Fa flags .
+The flags supported at PMC allocation time are:
+.Bl -tag -width ".Dv PMC_F_LOG_PROCEXIT" -compact
+.It Dv PMC_F_DESCENDANTS
+For process-scope PMCs, automatically track descendants of attached
+processes.
+.It Dv PMC_F_LOG_PROCCSW
+For process-scope counting PMCs, generate a log event at every context
+switch containing the incremental number of hardware events seen
+by the process during the time it was executing on the CPU.
+.It Dv PMC_F_LOG_PROCEXIT
+For process-scope counting PMCs, accumulate hardware events seen
+when the process was executing on a CPU and generate a log event
+when an attached process exits.
+.El
+PMCs allocated with flags
+.Dv PMC_F_LOG_PROCCSW
+and
+.Dv PMC_F_LOG_PROCEXIT
+need a log file to be configured before they are started.
+.Pp
+For system scope PMCs, the argument
+.Fa cpu
+is a non-negative value that specifies the CPU number
+that the PMC is to be allocated on.
+Process scope PMC allocations should specify the constant
+.Dv PMC_CPU_ANY
+for this argument.
+.Pp
+Function
+.Fn pmc_release
+releases the PMC denoted by argument
+.Fa pmcid .
+.Sh RETURN VALUES
+If successful, function
+.Fn pmc_allocate
+sets the location specified by argument
+.Fa pmcid
+to the handle of the allocated PMC and returns 0.
+In case of an error, the function returns -1 and sets the global
+variable
+.Va errno
+to indicate the error.
+.Pp
+.Rv -std pmc_release
+.Sh ERRORS
+.Bl -tag -width Er
+.It Bq Er EINVAL
+The argument
+.Fa mode
+to function
+.Fn pmc_allocate
+had an invalid value.
+.It Bq Er EINVAL
+Argument
+.Fa cpu
+to function
+.Fn pmc_allocate
+had an invalid CPU number.
+.It Bq Er EINVAL
+Argument
+.Fa flags
+contained flags that were unsupported or otherwise incompatible with
+the requested PMC mode.
+.It Bq Er EINVAL
+Argument
+.Fa eventspecifier
+to function
+.Fn pmc_allocate
+specified an event not supported by hardware or contained a syntax
+error.
+.It Bq Er ENXIO
+Function
+.Fn pmc_allocate
+requested the use of a hardware resource that was absent or
+administratively disabled.
+.It Bq Er EOPNOTSUPP
+The underlying hardware does not support the capabilities needed for
+a PMC being allocated by a call to
+.Fn pmc_allocate .
+.It Bq Er EPERM
+A system scope PMC allocation was attempted without adequate process
+privilege.
+.It Bq Er ESRCH
+Function
+.Fn pmc_release
+was called without first having allocated a PMC.
+.It Bq Er EINVAL
+Argument
+.Fa pmcid
+to function
+.Fn pmc_release
+did not specify a PMC previously allocated by this process.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc_attach 3 ,
+.Xr pmc_configure_logfile 3 ,
+.Xr pmc_start 3 ,
+.Xr hwpmc 4
diff --git a/lib/libpmc/pmc_attach.3 b/lib/libpmc/pmc_attach.3
new file mode 100644
index 0000000..ca72511
--- /dev/null
+++ b/lib/libpmc/pmc_attach.3
@@ -0,0 +1,149 @@
+.\" Copyright (c) 2007 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd November 25, 2007
+.Dt PMC_ATTACH 3
+.Os
+.Sh NAME
+.Nm pmc_attach ,
+.Nm pmc_detach
+.Nd attaching and detaching process scope PMCs to target processes
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Ft int
+.Fn pmc_attach "pmc_id_t pmcid" "pid_t pid"
+.Ft int
+.Fn pmc_detach "pmc_id_t pmcid" "pid_t pid"
+.Sh DESCRIPTION
+These functions control the set of target processes tracked by a
+process scope PMC.
+.Pp
+Function
+.Fn pmc_attach
+is used to attach a process scope PMC specified by argument
+.Fa pmcid
+to a target process specified by argument
+.Fa pid .
+Argument
+.Fa pid
+may be zero to denote the current process.
+If the PMC was allocated with modifier
+.Dv PMC_F_DESCENDANTS ,
+the PMC will additionally attach to current and future descendents of
+the specified target process.
+The PMC should be in a quiescent state (i.e., not running).
+.Pp
+Function
+.Fn pmc_detach
+is used to detach a process scope PMC specified by argument
+.Fa pmcid
+from a process specified by argument
+.Fa pid .
+Argument
+.Fa pid
+may be zero to denote the current process.
+.Sh RETURN VALUES
+.Rv -std
+.Sh ERRORS
+A call to function
+.Fn pmc_attach
+may fail with the following errors:
+.Bl -tag -width Er
+.It Bq Er EBUSY
+Argument
+.Fa pmcid
+specified a PMC that was not in a quiescent state.
+.It Bq Er EBUSY
+The target process specified by function
+.Fa pmc_attach
+is being tracked by another process scope PMC that uses the same PMC
+hardware resources.
+.It Bq Er EEXIST
+The target process is already being tracked by the specified PMC.
+.It Bq Er EINVAL
+Argument
+.Fa pmcid
+specified a PMC with system scope.
+.It Bq Er EINVAL
+Argument
+.Fa pid
+specified an illegal process id.
+.It Bq Er EINVAL
+The current process does not own a PMC with the handle specified in
+argument
+.Fa pmcid .
+.It Bq Er EPERM
+The caller lacked the privilege needed to attach PMCs to
+the specified target process.
+.It Bq Er EPERM
+(i386 and amd64 architectures) The PMC specified by argument
+.Fa pmcid
+has been setup to allow the use of the RDPMC instruction for
+self measurement.
+.It Bq Er ESRCH
+The current process does not own any PMCs.
+.It Bq Er ESRCH
+The process specified by argument
+.Fa pid
+did not exist.
+.El
+.Pp
+A call to function
+.Fn pmc_detach
+may fail with the following errors:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+Argument
+.Fa pmcid
+specified a PMC with system scope.
+.It Bq Er EINVAL
+Argument
+.Fa pid
+specified an illegal process id.
+.It Bq Er EINVAL
+The current process does not own a PMC with the handle specified in
+argument
+.Fa pmcid .
+.It Bq Er EINVAL
+The specified PMC was not attached to the target process.
+.It Bq Er ESRCH
+The current process does not own any PMCs.
+.It Bq Er ESRCH
+The process specified by argument
+.Fa pid
+is not being monitored by
+.Xr hwpmc 4 .
+.It Bq Er ESRCH
+The process specified by argument
+.Fa pid
+did not exist.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc_start 3 ,
+.Xr pmc_stop 3 ,
+.Xr hwpmc 4
diff --git a/lib/libpmc/pmc_capabilities.3 b/lib/libpmc/pmc_capabilities.3
new file mode 100644
index 0000000..6aee17f
--- /dev/null
+++ b/lib/libpmc/pmc_capabilities.3
@@ -0,0 +1,230 @@
+.\" Copyright (c) 2007-2008 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd September 22, 2008
+.Dt PMC_CAPABILITIES 3
+.Os
+.Sh NAME
+.Nm pmc_capabilities ,
+.Nm pmc_cpuinfo ,
+.Nm pmc_ncpu ,
+.Nm pmc_npmc ,
+.Nm pmc_pmcinfo ,
+.Nm pmc_width
+.Nd retrieve information about performance monitoring counters
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Ft int
+.Fn pmc_capabilities "pmc_id_t pmc" "uint32_t *caps"
+.Ft int
+.Fn pmc_cpuinfo "const struct pmc_cpuinfo **cpu_info"
+.Ft int
+.Fn pmc_ncpu void
+.Ft int
+.Fn pmc_npmc "int cpu"
+.Ft int
+.Fn pmc_pmcinfo "int cpu" "struct pmc_pmcinfo **pmc_info"
+.Ft int
+.Fn pmc_width "pmc_id_t pmc" "uint32_t *width"
+.Sh DESCRIPTION
+These functions retrieve information about performance monitoring 
+hardware.
+.Pp
+Function
+.Fn pmc_capabilities
+retrieves the hardware capabilities of a PMC.
+Argument
+.Fa pmc
+is a PMC handle obtained by a prior call to
+.Fn pmc_allocate .
+The function sets argument
+.Fa caps
+to a bit mask of capabilities supported by the PMC denoted by
+argument
+.Fa pmc .
+PMC capabilities are described in
+.Xr pmc 3 .
+.Pp
+Function
+.Fn pmc_cpuinfo
+retrieves information about the CPUs in the system.
+Argument
+.Fa cpu_info
+will be set to point to an internal structure with information about
+the system's CPUs.
+The caller should not free this pointer value.
+This structure has the following fields:
+.Bl -tag -width "pm_classes" -offset indent -compact
+.It pm_cputype
+Specifies the CPU type.
+.It pm_ncpu
+Specifies the number of CPUs in the system.
+.It pm_npmc
+Specifies the number of PMC rows per CPU.
+.It pm_nclass
+Specifies the number of distinct classes of PMCs in the system.
+.It pm_classes
+Contains an array of
+.Vt "struct pmc_classinfo"
+descriptors describing the properties of each class of PMCs
+in the system.
+.El
+.Pp
+Function
+.Fn pmc_ncpu
+is a convenience function that returns the maximum CPU number in
+the system.
+On systems that support sparsely numbered CPUs, not all CPUs may
+be physically present.
+Applications need to be prepared to deal with nonexistent CPUs.
+.Pp
+Function
+.Fn pmc_npmc
+is a convenience function that returns the number of PMCs available
+in the CPU specified by argument
+.Fa cpu .
+.Pp
+Function
+.Fn pmc_pmcinfo
+returns information about the current state of the PMC hardware
+in the CPU specified by argument
+.Fa cpu .
+The location specified by argument
+.Fa pmc_info
+is set to point an array of
+.Vt "struct pmc_info"
+structures each describing the state of one PMC in the CPU.
+These structure contain the following fields:
+.Bl -tag -width pm_ownerpid -offset indent -compact
+.It pm_name
+A human readable name for the PMC.
+.It pm_class
+The PMC class for the PMC.
+.It pm_enabled
+Non-zero if the PMC is enabled.
+.It pm_rowdisp
+The disposition of the PMC row for this PMC.
+Row dispositions are documented in
+.Xr hwpmc 4 .
+.It pm_ownerpid
+If the hardware is in use, the process id of the owner of the PMC.
+.It pm_mode
+The PMC mode as described in
+.Xr pmc 3 .
+.It pm_event
+If the hardware is in use, the PMC event being measured.
+.It pm_flags
+If the hardware is in use, the flags associated with the PMC.
+.It pm_reloadcount
+For sampling PMCs, the reload count associated with the PMC.
+.El
+.Pp
+Function
+.Fn pmc_width
+is used to retrieve the width in bits of the hardware counters
+associated with a PMC.
+Argument
+.Fa pmc
+is a PMC handle obtained by a prior call to
+.Fn pmc_allocate .
+The function sets the location pointed to by argument
+.Fa width
+to the width of the physical counters associated with PMC
+.Fa pmc .
+.Sh RETURN VALUES
+Functions
+.Fn pmc_ncpu
+and
+.Fn pmc_npmc
+returns a positive integer if successful; otherwise the value -1 is
+returned and the global variable
+.Va errno
+is set to indicate the error.
+.Pp
+Functions
+.Fn pmc_capabilities ,
+.Fn pmc_cpuinfo ,
+.Fn pmc_pmcinfo
+and
+.Fn pmc_width
+return 0 if successful; otherwise the value -1 is returned and the
+global variable
+.Va errno
+is set to indicate the error.
+.Sh ERRORS
+A call to function
+.Fn pmc_capabilities
+may fail with the following errors:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+The argument to the function was invalid.
+.El
+.Pp
+Calls to functions
+.Fn pmc_cpuinfo ,
+.Fn pmc_ncpu
+and
+.Fn pmc_npmc
+may fail with the following errors:
+.Bl -tag -width Er
+.It Bq Er ENXIO
+A prior call to
+.Fn pmc_init
+to initialize the PMC library had failed.
+.El
+.Pp
+A call to function
+.Fn pmc_pmcinfo
+may fail with the following errors:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+The argument
+.Fa cpu
+was invalid.
+.It Bq Er ENXIO
+The argument
+.Fa cpu
+specified a disabled or absent CPU.
+.El
+.Pp
+A call to function
+.Fn pmc_width
+may fail with the following errors:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+The argument to the function was invalid.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc_allocate 3 ,
+.Xr pmc_get_driver_stats 3 ,
+.Xr pmc_name_of_capability 3 ,
+.Xr pmc_name_of_cputype 3 ,
+.Xr pmc_name_of_class 3 ,
+.Xr pmc_name_of_event 3 ,
+.Xr pmc_name_of_mode 3 ,
+.Xr hwpmc 4
diff --git a/lib/libpmc/pmc_configure_logfile.3 b/lib/libpmc/pmc_configure_logfile.3
new file mode 100644
index 0000000..a33688c
--- /dev/null
+++ b/lib/libpmc/pmc_configure_logfile.3
@@ -0,0 +1,124 @@
+.\" Copyright (c) 2007 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd November 24, 2007
+.Dt PMC_CONFIGURE_LOGFILE 3
+.Os
+.Sh NAME
+.Nm pmc_configure_logfile ,
+.Nm pmc_flush_logfile ,
+.Nm pmc_writelog
+.Nd log file management
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Ft int
+.Fn pmc_configure_logfile "int fd"
+.Ft int
+.Fn pmc_flush_logfile void
+.Ft int
+.Fn pmc_writelog "uint32_t userdata"
+.Sh DESCRIPTION
+The functions manage logging of
+.Xr hwpmc 4
+events.
+.Pp
+Function
+.Fn pmc_configure_logfile
+is used to turn on and turn off logging.
+If argument
+.Fa fd
+is a valid file handle returned by a prior call to
+.Xr open 2
+or
+.Xr socket 2
+then performance events will be logged to the file corresponding
+to the specified handle.
+If the value of argument
+.Fa fd
+is -1 then logging will be stopped after any pending data is flushed.
+.Pp
+Function
+.Fn pmc_flush_logfile
+will force all log data queued inside the
+.Xr hwpmc 4
+driver to be written out.
+.Pp
+Function
+.Fn pmc_writelog
+will append a log entry containing the value of argument
+.Fa userdata
+to the log file.
+.Sh RETURN VALUES
+.Rv -std
+.Sh ERRORS
+A call to
+.Fn pmc_configure_logfile
+may fail with the following errors:
+.Bl -tag -width Er
+.It Bq Er EAGAIN
+The
+.Xr hwpmc 4
+driver was not able to create a helper process due to system limits
+being reached.
+.It Bq Er EBUSY
+Function
+.Fn pmc_configure_logfile
+was called with a log file already configured.
+.It Bq Er EINVAL
+Function
+.Fn pmc_configure_logfile
+was called with an argument of -1 without a log file being previously
+configured.
+.It Bq Er ENOMEM
+The system encountered a memory shortage when servicing this request.
+.El
+.Pp
+A call to
+.Fn pmc_flush_logfile
+may fail with the following errors:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+Function
+.Fn pmc_flush_logfile
+was called without a log file being previously configured.
+.El
+.Pp
+A call to
+.Fn pmc_writelog
+may fail with the following errors:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+Function
+.Fn pmc_writelog
+was called without a log file being previously configured.
+.It Bq Er ENOMEM
+The system encountered a memory shortage when servicing this
+request.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr hwpmc 4
diff --git a/lib/libpmc/pmc_disable.3 b/lib/libpmc/pmc_disable.3
new file mode 100644
index 0000000..a6902ff
--- /dev/null
+++ b/lib/libpmc/pmc_disable.3
@@ -0,0 +1,99 @@
+.\" Copyright (c) 2007-2008 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd September 22, 2008
+.Dt PMC_ENABLE 3
+.Os
+.Sh NAME
+.Nm pmc_disable ,
+.Nm pmc_enable
+.Nd administrative control of hardware performance counters
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Ft int
+.Fn pmc_disable "int cpu" "int pmc"
+.Ft int
+.Fn pmc_enable "int cpu" "int pmc"
+.Sh DESCRIPTION
+These functions allow specific hardware performance monitoring
+counters in a system to be disabled and enabled administratively.
+The hardware performance counters available on each CPU are numbered
+using small non-negative integers, in a system dependent manner.
+Disabled counters will not be available to applications for use.
+.Pp
+The invoking process needs to have the
+.Dv PRIV_PMC_MANAGE
+privilege to perform these operations.
+.Pp
+Function
+.Fn pmc_disable
+disables the hardware counter numbered by argument
+.Fa pmc
+on CPU number
+.Fa cpu .
+.Pp
+Function
+.Fn pmc_enable
+enables the hardware counter numbered by argument
+.Fa pmc
+on CPU number
+.Fa cpu .
+.Sh IMPLEMENTATION NOTES
+Hardware PMCs that are currently in use by applications cannot be
+disabled.
+Allocation of a process scope software PMC marks all
+hardware PMCs in the system with the same pmc number as being in-use.
+.Sh RETURN VALUES
+.Rv -std
+.Sh ERRORS
+A call to these functions may fail with the following errors:
+.Bl -tag -width Er
+.It Bq Er EBUSY
+Function
+.Fn pmc_disable
+specified a hardware PMC is currently in use.
+.It Bq Er EINVAL
+Arguments
+.Fa cpu
+or
+.Fa pmc
+were invalid.
+.It Bq Er ENXIO
+Argument
+.Fa cpu
+specified a disabled or absent CPU.
+.It Bq Er EPERM
+The current process lacks sufficient privilege to perform this
+operation.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmc_pmcinfo 3 ,
+.Xr hwpmc 4 ,
+.Xr pmccontrol 8 ,
+.Xr priv_check 9
diff --git a/lib/libpmc/pmc_event_names_of_class.3 b/lib/libpmc/pmc_event_names_of_class.3
new file mode 100644
index 0000000..183f03f
--- /dev/null
+++ b/lib/libpmc/pmc_event_names_of_class.3
@@ -0,0 +1,75 @@
+.\" Copyright (c) 2007 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd November 23, 2007
+.Dt PMC_EVENT_NAMES_OF_CLASS 3
+.Os
+.Sh NAME
+.Nm pmc_event_names_of_class
+.Nd return a list of event names supported by a PMC class.
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Ft int
+.Fo pmc_event_names_of_class
+.Fa "enum pmc_class cl"
+.Fa "const char ***eventnames"
+.Fa "int *nevents"
+.Fc
+.Sh DESCRIPTION
+Function
+.Fn pmc_event_names_of_class
+retrieves the hardware event names supported by the class of PMC hardware
+specified by argument
+.Fa cl .
+.Pp
+It returns an array of
+.Vt "const char *"
+pointers to names of events supported by the specified class of PMC
+hardware.
+The location pointed to by argument
+.Fa nevents
+is set to the number of event names returned.
+.Pp
+The returned array is allocated using
+.Xr malloc 3 .
+.Sh RETURN VALUES
+.Rv -std pmc_event_names_of_class
+.Sh ERRORS
+A call to
+.Fn pmc_event_names_of_class
+may fail with the following errors:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+Argument
+.Fa cl
+was invalid.
+.It Bq Er ENOMEM
+Allocation of a memory area to hold the result failed.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr hwpmc 4
diff --git a/lib/libpmc/pmc_get_driver_stats.3 b/lib/libpmc/pmc_get_driver_stats.3
new file mode 100644
index 0000000..fa214b3
--- /dev/null
+++ b/lib/libpmc/pmc_get_driver_stats.3
@@ -0,0 +1,73 @@
+.\" Copyright (c) 2007 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd November 25, 2007
+.Dt PMC_GET_DRIVER_STATS 3
+.Os
+.Sh NAME
+.Nm pmc_get_driver_stats
+.Nd retrieve driver statistics
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Ft int
+.Fn pmc_get_driver_stats "struct pmc_driverstats *gms"
+.Sh DESCRIPTION
+The function
+.Fn pmc_get_driver_statistics
+retrieves a snapshot of the usage statistics maintained by
+.Xr hwpmc 4
+into the memory area pointed to by argument
+.Fa gms .
+.Pp
+The returned structure includes the following fields:
+.Bl -tag -width pmc_intr_bufferfull -offset indent -compact
+.It pm_intr_ignored
+The number of sampling interrupts ignored.
+.It pm_intr_processed
+The number of sampling interrupts processed.
+.It pm_intr_bufferfull
+The number of sampling interrupts dropped due to lack of space
+in the sample buffer.
+.It pm_syscalls
+The number of system calls into
+.Xr hwpmc 4 .
+.It pm_syscalls_errors
+The number of system calls into
+.Xr hwpmc 4
+that failed.
+.It pm_buffer_requests
+The number of log buffer requests so far.
+.It pm_buffer_requests_failed
+The number of log buffer requests that failed due to lack of buffers.
+.It pm_log_sweeps
+The number of sample buffer processing sweeps.
+.El
+.Sh RETURN VALUES
+.Rv -std
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr hwpmc 4
diff --git a/lib/libpmc/pmc_get_msr.3 b/lib/libpmc/pmc_get_msr.3
new file mode 100644
index 0000000..6361d3a
--- /dev/null
+++ b/lib/libpmc/pmc_get_msr.3
@@ -0,0 +1,76 @@
+.\" Copyright (c) 2007 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd November 25, 2007
+.Dt PMC_GET_MSR 3
+.Os
+.Sh NAME
+.Nm pmc_get_msr
+.Nd x86 architecture-specific PMC operations
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Ft int
+.Fn pmc_get_msr "pmc_id_t pmc" "uint32_t *msr"
+.Sh DESCRIPTION
+The function
+.Fn pmc_get_msr
+returns the processor model specific register number associated with
+a PMC for subsequent use with RDPMC instructions.
+Argument
+.Fa pmc
+specifies a process scope counting PMC.
+The function will write the model specific register number associated
+with the PMC to the location pointed to by argument
+.Fa msr .
+.Pp
+After successful completion of this function, applications
+can directly read the contents of PMC hardware using
+RDPMC instructions.
+.Sh RETURN VALUES
+.Rv -std pmc_get_msr
+.Sh ERRORS
+A call to
+.Fn pmc_get_msr
+may fail with the following errors:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+The PMC handle specified was invalid.
+.It Bq Er EINVAL
+The PMC specified did not have process scope or counting mode.
+.It Bq Er EINVAL
+The PMC specified was allocated with the
+.Dv PMC_F_DESCENDANTS
+flag.
+.It Bq Er EINVAL
+The specified PMC is already attached to target processes other
+than the owner.
+.It Bq Er ENOSYS
+The underlying hardware does not support an RDPMC instruction.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr hwpmc 4
diff --git a/lib/libpmc/pmc_init.3 b/lib/libpmc/pmc_init.3
new file mode 100644
index 0000000..655bfb6
--- /dev/null
+++ b/lib/libpmc/pmc_init.3
@@ -0,0 +1,63 @@
+.\" Copyright (c) 2007 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd November 24, 2007
+.Dt PMC_INIT 3
+.Os
+.Sh NAME
+.Nm pmc_init
+.Nd initialize library
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Ft int
+.Fn pmc_init void
+.Sh DESCRIPTION
+Function
+.Fn pmc_init
+initializes the PMC library.
+This function must be called before any of the other functions in the
+library.
+.Sh RETURN VALUES
+.Rv -std pmc_init
+.Sh ERRORS
+A call to
+.Fn pmc_init
+may fail with the following errors:
+.Bl -tag -width Er
+.It Bq Er ENOENT
+The
+.Xr hwpmc 4
+module was not found in the kernel.
+.It Bq Er EPROGMISMATCH
+The library's version number did not match that expected by
+.Xr hwpmc 4 .
+.It Bq Er ENXIO
+PMC hardware on this system is unsupported.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr hwpmc 4
diff --git a/lib/libpmc/pmc_name_of_capability.3 b/lib/libpmc/pmc_name_of_capability.3
new file mode 100644
index 0000000..78efeaf
--- /dev/null
+++ b/lib/libpmc/pmc_name_of_capability.3
@@ -0,0 +1,140 @@
+.\" Copyright (c) 2007 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd November 24, 2007
+.Dt PMC_NAME_OF_CAPABILITY 3
+.Os
+.Sh NAME
+.Nm pmc_name_of_capability ,
+.Nm pmc_name_of_class ,
+.Nm pmc_name_of_cputype ,
+.Nm pmc_name_of_disposition ,
+.Nm pmc_name_of_event ,
+.Nm pmc_name_of_mode ,
+.Nm pmc_name_of_state
+.Nd human readable names for numeric constants used by
+.Xr pmc 3
+and
+.Xr hwpmc 4
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Ft "const char *"
+.Fn pmc_name_of_capability "enum pmc_caps pc"
+.Ft "const char *"
+.Fn pmc_name_of_class "enum pmc_class pc"
+.Ft "const char *"
+.Fn pmc_name_of_cputype "enum pmc_cputype ct"
+.Ft "const char *"
+.Fn pmc_name_of_disposition "enum pmc_disp pd"
+.Ft "const char *"
+.Fn pmc_name_of_event "enum pmc_event pe"
+.Ft "const char *"
+.Fn pmc_name_of_mode "enum pmc_mode pm"
+.Ft "const char *"
+.Fn pmc_name_of_state "enum pmc_state ps"
+.Sh DESCRIPTION
+These convenience functions translate numeric constants used by the
+.Lb libpmc
+to
+.Vt "const char *"
+pointers to human readable representations of their arguments.
+.Pp
+Function
+.Fn pmc_name_of_capability
+translates a PMC capability flag given in argument
+.Fa pc
+to a human readable string.
+PMC capabilities are described in
+.Xr pmc 3 .
+.Pp
+Function
+.Fn pmc_name_of_class
+translates the PMC class value specified in argument
+.Fa pc
+to a human readable name.
+PMC classes are described in
+.Xr pmc 3 .
+.Pp
+Function
+.Fn pmc_name_of_cputype
+translates the CPU type value specified in argument
+.Fa ct
+to a human readable name.
+CPU types known to the library are described in
+.Xr pmc 3 .
+.Pp
+Function
+.Fn pmc_name_of_disposition
+translates the PMC row disposition specified in argument
+.Fa pd
+to a human readable name.
+PMC row dispositions are described in
+.Xr hwpmc 4 .
+.Pp
+Function
+.Fn pmc_name_of_event
+translates the PMC event number specified by argument
+.Fa pe
+to a string.
+PMC event names are documented in section
+.Sx EVENT SPECIFIERS
+of
+.Xr pmc 3 .
+.Pp
+Function
+.Fn pmc_name_of_mode
+translates the PMC mode specified by argument
+.Fa pm
+to a human readable string.
+PMC modes are described in
+.Xr pmc 3 .
+.Pp
+Function
+.Fn pmc_name_of_state
+translates the value of argument
+.Fa ps
+to a human readable name.
+.Sh IMPLEMENTATION NOTES
+The returned pointers point to static storage inside the PMC
+library and should not be freed by the caller.
+.Sh RETURN VALUES
+These functions return a non-NULL pointer on successful completion.
+In case of an error, a NULL pointer is returned and the global
+variable
+.Va errno
+is set to indicate the error.
+.Sh ERRORS
+A call to these functions may fail with the following errors:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+The function argument specified an invalid value.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmc_pmcinfo 3 ,
+.Xr hwpmc 4
diff --git a/lib/libpmc/pmc_read.3 b/lib/libpmc/pmc_read.3
new file mode 100644
index 0000000..d091716
--- /dev/null
+++ b/lib/libpmc/pmc_read.3
@@ -0,0 +1,84 @@
+.\" Copyright (c) 2007 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd November 25, 2007
+.Dt PMC_READ 3
+.Os
+.Sh NAME
+.Nm pmc_read ,
+.Nm pmc_rw ,
+.Nm pmc_write ,
+.Nd read and write hardware performance counters
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Ft int
+.Fn pmc_read "pmc_id_t pmc" "pmc_value_t *value"
+.Ft int
+.Fn pmc_rw "pmc_id_t pmc" "pmc_value_t newvalue" "pmc_value_t *oldvaluep"
+.Ft int
+.Fn pmc_write "pmc_id_t pmc" "pmc_value_t value"
+.Sh DESCRIPTION
+These functions read and write the current value of a PMC.
+.Pp
+Function
+.Fn pmc_read
+will read the current value of the PMC specified by argument
+.Fa pmc
+and write it to the location specified by argument
+.Fa value .
+.Pp
+Function
+.Fn pmc_write
+will set the current value of the PMC specified by argument
+.Fa pmc
+to the value specified by argument
+.Fa value .
+.Pp
+Function
+.Fn pmc_rw
+combines a read and a write into a single atomic operation.
+.Pp
+For write operations the PMC should be a quiescent state.
+.Sh RETURN VALUES
+.Rv -std
+.Sh ERRORS
+A call to these functions may fail with the following errors:
+.Bl -tag -width Er
+.It Bq Er EBUSY
+A write operation specified a currently running PMC.
+.It Bq Er EINVAL
+Argument
+.Fa pmc
+specified a PMC not in a readable state.
+.It Bq Er EINVAL
+The PMC specified by argument
+.Fa pmc
+was not owned by the current process.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr hwpmc 4
diff --git a/lib/libpmc/pmc_set.3 b/lib/libpmc/pmc_set.3
new file mode 100644
index 0000000..e8d6597
--- /dev/null
+++ b/lib/libpmc/pmc_set.3
@@ -0,0 +1,73 @@
+.\" Copyright (c) 2007 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd November 25, 2007
+.Dt PMC_SET 3
+.Os
+.Sh NAME
+.Nm pmc_set
+.Nd set the reload count of a sampling PMC
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Ft int
+.Fn pmc_set "pmc_id_t pmc" "pmc_value_t value"
+.Sh DESCRIPTION
+Function
+.Fn pmc_set
+is used to set the reload value of sampling PMCs.
+Argument
+.Fa pmc
+specified the handle a previously allocate sampling mode PMC.
+Argument
+.Fa value
+specifies the reload count.
+.Pp
+Sampling PMCs will interrupt the CPU after the number of
+hardware events specified by the reload count are seen.
+After the sampling interrupt is processed the underlying hardware will
+be reloaded with the specified count and the hardware
+automatically restarted by
+.Xr hwpmc 4 .
+.Pp
+Function
+.Fn pmc_set
+should be called on PMC in a quiescent state.
+.Sh RETURN VALUES
+.Rv -std pmc_set
+.Sh ERRORS
+A call to
+.Fn pmc_set
+may fail with the following errors:
+.Bl -tag -width Er
+.It Bq Er EINVAL
+The current process did not own a PMC with the specified handle.
+.It Bq Er EBUSY
+The specified PMC was already running.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr hwpmc 4
diff --git a/lib/libpmc/pmc_start.3 b/lib/libpmc/pmc_start.3
new file mode 100644
index 0000000..2272122
--- /dev/null
+++ b/lib/libpmc/pmc_start.3
@@ -0,0 +1,77 @@
+.\" Copyright (c) 2007-2008 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd September 22, 2008
+.Dt PMC_START 3
+.Os
+.Sh NAME
+.Nm pmc_start ,
+.Nm pmc_stop
+.Nd start and stop a PMC
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Ft int
+.Fn pmc_start "pmc_id_t pmc"
+.Ft int
+.Fn pmc_stop "pmc_id_t pmc"
+.Sh DESCRIPTION
+These functions are used to start and stop a PMC.
+.Pp
+Function
+.Fn pmc_start
+starts the PMC specified by argument
+.Fa pmc .
+If the specified PMC has process scope and has not been attached
+to any targets, it will be attached to the current process.
+.Pp
+Function
+.Fn pmc_stop
+stops the PMC specified by argument
+.Fa pmc .
+.Sh RETURN VALUES
+.Rv -std
+.Sh ERRORS
+A call to these functions may fail with the following errors:
+.Bl -tag -width Er
+.It Bq Er EDOOFUS
+Function
+.Fn pmc_start
+specified a PMC that requires a log file and no log file was
+configured.
+.It Bq Er EINVAL
+The specified PMC is in the process of being deleted.
+.It Bq Er EINVAL
+Function
+.Fn pmc_stop
+specified a PMC that was never started.
+.It Bq Er ENXIO
+The specified PMC had system scope and its associated CPU was disabled or
+absent.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr hwpmc 4
diff --git a/lib/libpmc/pmclog.3 b/lib/libpmc/pmclog.3
new file mode 100644
index 0000000..4438f10
--- /dev/null
+++ b/lib/libpmc/pmclog.3
@@ -0,0 +1,320 @@
+.\" Copyright (c) 2005-2006 Joseph Koshy.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd March 26, 2006
+.Dt PMCLOG 3
+.Os
+.Sh NAME
+.Nm pmclog_open ,
+.Nm pmclog_close ,
+.Nm pmclog_read ,
+.Nm pmclog_feed
+.Nd parse event log data generated by
+.Xr hwpmc 4
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmclog.h
+.Ft "void *"
+.Fn pmclog_open "int fd"
+.Ft void
+.Fn pmclog_close "void *cookie"
+.Ft int
+.Fn pmclog_read "void *cookie" "struct pmclog_ev *ev"
+.Ft int
+.Fn pmclog_feed "void *cookie" "char *data" "int len"
+.Sh DESCRIPTION
+These functions provide a way for application programs to extract
+events from an event stream generated by
+.Xr hwpmc 4 .
+.Pp
+A new event log parser is allocated using
+.Fn pmclog_open .
+Argument
+.Fa fd
+may be a file descriptor opened for reading if the event stream is
+present in a file, or the constant
+.Dv PMCLOG_FD_NONE
+for an event stream present in memory.
+This function returns a cookie that is passed into the other functions
+in this API set.
+.Pp
+Function
+.Fn pmclog_read
+returns the next available event in the event stream associated with
+argument
+.Fa cookie .
+Argument
+.Fa ev
+points to an event descriptor that which will contain the result of a
+successfully parsed event.
+.Pp
+An event descriptor returned by
+.Fn pmclog_read
+has the following structure:
+.Bd -literal
+struct pmclog_ev {
+       enum pmclog_state pl_state;	/* parser state after 'get_event()' */
+       off_t             pl_offset;	/* byte offset in stream */
+       size_t            pl_count;	/* count of records so far */
+       struct timespec   pl_ts;		/* log entry timestamp */
+       enum pmclog_type  pl_type;	/* log entry kind */
+       union {				/* log entry data */
+		struct pmclog_ev_closelog    pl_cl;
+		struct pmclog_ev_dropnotify  pl_d;
+		struct pmclog_ev_initialize  pl_i;
+		struct pmclog_ev_map_in      pl_mi;
+		struct pmclog_ev_map_out     pl_mo;
+		struct pmclog_ev_pcsample    pl_s;
+		struct pmclog_ev_pmcallocate pl_a;
+		struct pmclog_ev_pmcattach   pl_t;
+		struct pmclog_ev_pmcdetach   pl_d;
+		struct pmclog_ev_proccsw     pl_c;
+		struct pmclog_ev_procexec    pl_x;
+		struct pmclog_ev_procexit    pl_e;
+		struct pmclog_ev_procfork    pl_f;
+		struct pmclog_ev_sysexit     pl_e;
+		struct pmclog_ev_userdata    pl_u;
+       } pl_u;
+};
+.Ed
+.Pp
+The current state of the parser is recorded in
+.Va pl_state .
+This field can take on the following values:
+.Bl -tag -width ".Dv PMCLOG_REQUIRE_DATA"
+.It Dv PMCLOG_EOF
+(For file based parsers only)
+An end-of-file condition was encountered on the configured file
+descriptor.
+.It Dv PMCLOG_ERROR
+An error occurred during parsing.
+.It Dv PMCLOG_OK
+A complete event record was read into
+.Fa *ev .
+.It Dv PMCLOG_REQUIRE_DATA
+There was insufficient data in the event stream to assemble a complete
+event record.
+For memory based parsers, more data can be fed to the
+parser using function
+.Fn pmclog_feed .
+For file based parsers, function
+.Fn pmclog_read
+may be retried when data is available on the configured file
+descriptor.
+.El
+.Pp
+The rest of the event structure is valid only if field
+.Va pl_state
+contains
+.Dv PMCLOG_OK .
+Field
+.Va pl_offset
+contains the offset of the current record in the byte stream.
+Field
+.Va pl_count
+contains the serial number of this event.
+Field
+.Va pl_ts
+contains a timestamp with the system time when the event occurred.
+Field
+.Va pl_type
+denotes the kind of the event returned in argument
+.Fa *ev
+and is one of the following:
+.Bl -tag -width ".Dv PMCLOG_TYPE_PMCALLOCATE"
+.It Dv PMCLOG_TYPE_CLOSELOG
+A marker indicating a successful close of a log file.
+This record will be the last record of a log file.
+.It Dv PMCLOG_TYPE_DROPNOTIFY
+A marker indicating that
+.Xr hwpmc 4
+had to drop data due to a resource constraint.
+.It Dv PMCLOG_TYPE_INITIALIZE
+An initialization record.
+This is the first record in a log file.
+.It Dv PMCLOG_TYPE_MAP_IN
+A record describing the introduction of a mapping to an executable
+object by a
+.Xr kldload 2
+or
+.Xr mmap 2
+system call.
+.It Dv PMCLOG_TYPE_MAP_OUT
+A record describing the removal of a mapping to an executable
+object by a
+.Xr kldunload 2
+or
+.Xr munmap 2
+system call.
+.It Dv PMCLOG_TYPE_PCSAMPLE
+A record containing an instruction pointer sample.
+.It Dv PMCLOG_TYPE_PMCALLOCATE
+A record describing a PMC allocation operation.
+.It Dv PMCLOG_TYPE_PMCATTACH
+A record describing a PMC attach operation.
+.It Dv PMCLOG_TYPE_PMCDETACH
+A record describing a PMC detach operation.
+.It Dv PMCLOG_TYPE_PROCCSW
+A record describing a PMC reading at the time of a process context switch.
+.It Dv PMCLOG_TYPE_PROCEXEC
+A record describing an
+.Xr execve 2
+by a target process.
+.It Dv PMCLOG_TYPE_PROCEXIT
+A record describing the accumulated PMC reading for a process at the
+time of
+.Xr _exit 2 .
+.It Dv PMCLOG_TYPE_PROCFORK
+A record describing a
+.Xr fork 2
+by a target process.
+.It Dv PMCLOG_TYPE_SYSEXIT
+A record describing a process exit, sent to processes
+owning system-wide sampling PMCs.
+.It Dv PMCLOG_TYPE_USERDATA
+A record containing user data.
+.El
+.Pp
+Function
+.Fn pmclog_feed
+is used with parsers configured to parse memory based event streams.
+It is intended to be called when function
+.Fn pmclog_read
+indicates the need for more data by a returning
+.Dv PMCLOG_REQUIRE_DATA
+in field
+.Va pl_state
+of its event structure argument.
+Argument
+.Fa data
+points to the start of a memory buffer containing fresh event data.
+Argument
+.Fa len
+indicates the number of data bytes available.
+The memory range
+.Bq Fa data , Fa data No + Fa len
+must remain valid till the next time
+.Fn pmclog_read
+returns an error.
+It is an error to use
+.Fn pmclog_feed
+on a parser configured to parse file data.
+.Pp
+Function
+.Fn pmclog_close
+releases the internal state allocated by a prior call
+to
+.Fn pmclog_open .
+.Sh RETURN VALUES
+Function
+.Fn pmclog_open
+will return a
+.No non- Ns Dv NULL
+value if successful or
+.Dv NULL
+otherwise.
+.Pp
+Function
+.Fn pmclog_read
+will return 0 in case a complete event record was successfully read,
+or will return \-1 and will set the
+.Va pl_state
+field of the event record to the appropriate code in case of an error.
+.Pp
+Function
+.Fn pmclog_feed
+will return 0 on success or \-1 in case of failure.
+.Sh EXAMPLES
+A template for using the log file parsing API is shown below in pseudocode:
+.Bd -literal
+void *parser;			/* cookie */
+struct pmclog_ev ev;		/* parsed event */
+int fd;				/* file descriptor */
+
+fd = open(filename, O_RDONLY);	/* open log file */
+parser = pmclog_open(fd);	/* initialize parser */
+if (parser == NULL)
+	--handle an out of memory error--;
+
+/* read and parse data */
+while (pmclog_read(parser, &ev) == 0) {
+	assert(ev.pl_state == PMCLOG_OK);
+	/* process the event */
+	switch (ev.pl_type) {
+	case PMCLOG_TYPE_ALLOCATE:
+		--process a pmc allocation record--
+		break;
+	case PMCLOG_TYPE_PROCCSW:
+		--process a thread context switch record--
+		break;
+	case PMCLOG_TYPE_PCSAMPLE:
+		--process a PC sample--
+		break;
+	--and so on--
+	}
+}
+
+/* examine parser state */
+switch (ev.pl_state) {
+case PMCLOG_EOF:
+	--normal termination--
+	break;
+case PMCLOG_ERROR:
+	--look at errno here--
+	break;
+case PMCLOG_REQUIRE_DATA:
+	--arrange for more data to be available for parsing--
+	break;
+default:
+	assert(0);
+	/*NOTREACHED*/
+}
+
+pmclog_close(parser);		/* cleanup */
+.Ed
+.Sh ERRORS
+A call to
+.Fn pmclog_init_parser
+may fail with any of the errors returned by
+.Xr malloc 3 .
+.Pp
+A call to
+.Fn pmclog_read
+for a file based parser may fail with any of the errors returned by
+.Xr read 2 .
+.Sh SEE ALSO
+.Xr read 2 ,
+.Xr malloc 3 ,
+.Xr pmc 3 ,
+.Xr hwpmc 4 ,
+.Xr pmcstat 8
+.Sh HISTORY
+The
+.Nm pmclog
+API
+.Ud
+It first appeared in
+.Fx 6.0 .
diff --git a/lib/libpmc/pmclog.c b/lib/libpmc/pmclog.c
new file mode 100644
index 0000000..d9ebc67
--- /dev/null
+++ b/lib/libpmc/pmclog.c
@@ -0,0 +1,577 @@
+/*-
+ * Copyright (c) 2005-2007 Joseph Koshy
+ * Copyright (c) 2007 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/pmc.h>
+#include <sys/pmclog.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <pmc.h>
+#include <pmclog.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include <machine/pmc_mdep.h>
+
+#include "libpmcinternal.h"
+
+#define	PMCLOG_BUFFER_SIZE			4096
+
+/*
+ * API NOTES
+ *
+ * The pmclog(3) API is oriented towards parsing an event stream in
+ * "realtime", i.e., from an data source that may or may not preserve
+ * record boundaries -- for example when the data source is elsewhere
+ * on a network.  The API allows data to be fed into the parser zero
+ * or more bytes at a time.
+ *
+ * The state for a log file parser is maintained in a 'struct
+ * pmclog_parse_state'.  Parser invocations are done by calling
+ * 'pmclog_read()'; this function will inform the caller when a
+ * complete event is parsed.
+ *
+ * The parser first assembles a complete log file event in an internal
+ * work area (see "ps_saved" below).  Once a complete log file event
+ * is read, the parser then parses it and converts it to an event
+ * descriptor usable by the client.  We could possibly avoid this two
+ * step process by directly parsing the input log to set fields in the
+ * event record.  However the parser's state machine would get
+ * insanely complicated, and this code is unlikely to be used in
+ * performance critical paths.
+ */
+
+enum pmclog_parser_state {
+	PL_STATE_NEW_RECORD,		/* in-between records */
+	PL_STATE_EXPECTING_HEADER,	/* header being read */
+	PL_STATE_PARTIAL_RECORD,	/* header present but not the record */
+	PL_STATE_ERROR			/* parsing error encountered */
+};
+
+struct pmclog_parse_state {
+	enum pmclog_parser_state ps_state;
+	enum pmc_cputype	ps_arch;	/* log file architecture */
+	uint32_t		ps_version;	/* hwpmc version */
+	int			ps_initialized;	/* whether initialized */
+	int			ps_count;	/* count of records processed */
+	off_t			ps_offset;	/* stream byte offset */
+	union pmclog_entry	ps_saved;	/* saved partial log entry */
+	int			ps_svcount;	/* #bytes saved */
+	int			ps_fd;		/* active fd or -1 */
+	char			*ps_buffer;	/* scratch buffer if fd != -1 */
+	char			*ps_data;	/* current parse pointer */
+	size_t			ps_len;		/* length of buffered data */
+};
+
+#define	PMCLOG_HEADER_FROM_SAVED_STATE(PS)				\
+	(* ((uint32_t *) &(PS)->ps_saved))
+
+#define	PMCLOG_INITIALIZE_READER(LE,A)	LE = (uint32_t *) &(A)
+#define	PMCLOG_READ32(LE,V) 		do {				\
+		(V)  = *(LE)++;						\
+	} while (0)
+#define	PMCLOG_READ64(LE,V)		do {				\
+		uint64_t _v;						\
+		_v  = (uint64_t) *(LE)++;				\
+		_v |= ((uint64_t) *(LE)++) << 32;			\
+		(V) = _v;						\
+	} while (0)
+
+#define	PMCLOG_READSTRING(LE,DST,LEN)	strlcpy((DST), (char *) (LE), (LEN))
+
+/*
+ * Assemble a log record from '*len' octets starting from address '*data'.
+ * Update 'data' and 'len' to reflect the number of bytes consumed.
+ *
+ * '*data' is potentially an unaligned address and '*len' octets may
+ * not be enough to complete a event record.
+ */
+
+static enum pmclog_parser_state
+pmclog_get_record(struct pmclog_parse_state *ps, char **data, ssize_t *len)
+{
+	int avail, copylen, recordsize, used;
+	uint32_t h;
+	const int HEADERSIZE = sizeof(uint32_t);
+	char *src, *dst;
+
+	if ((avail = *len) <= 0)
+		return (ps->ps_state = PL_STATE_ERROR);
+
+	src = *data;
+	h = used = 0;
+
+	if (ps->ps_state == PL_STATE_NEW_RECORD)
+		ps->ps_svcount = 0;
+
+	dst = (char *) &ps->ps_saved + ps->ps_svcount;
+
+	switch (ps->ps_state) {
+	case PL_STATE_NEW_RECORD:
+
+		/*
+		 * Transitions:
+		 *
+		 * Case A: avail < headersize
+		 *	-> 'expecting header'
+		 *
+		 * Case B: avail >= headersize
+		 *    B.1: avail < recordsize
+		 *	   -> 'partial record'
+		 *    B.2: avail >= recordsize
+		 *         -> 'new record'
+		 */
+
+		copylen = avail < HEADERSIZE ? avail : HEADERSIZE;
+		bcopy(src, dst, copylen);
+		ps->ps_svcount = used = copylen;
+
+		if (copylen < HEADERSIZE) {
+			ps->ps_state = PL_STATE_EXPECTING_HEADER;
+			goto done;
+		}
+
+		src += copylen;
+		dst += copylen;
+
+		h = PMCLOG_HEADER_FROM_SAVED_STATE(ps);
+		recordsize = PMCLOG_HEADER_TO_LENGTH(h);
+
+		if (recordsize <= 0)
+			goto error;
+
+		if (recordsize <= avail) { /* full record available */
+			bcopy(src, dst, recordsize - copylen);
+			ps->ps_svcount = used = recordsize;
+			goto done;
+		}
+
+		/* header + a partial record is available */
+		bcopy(src, dst, avail - copylen);
+		ps->ps_svcount = used = avail;
+		ps->ps_state = PL_STATE_PARTIAL_RECORD;
+
+		break;
+
+	case PL_STATE_EXPECTING_HEADER:
+
+		/*
+		 * Transitions:
+		 *
+		 * Case C: avail+saved < headersize
+		 * 	-> 'expecting header'
+		 *
+		 * Case D: avail+saved >= headersize
+		 *    D.1: avail+saved < recordsize
+		 *    	-> 'partial record'
+		 *    D.2: avail+saved >= recordsize
+		 *    	-> 'new record'
+		 *    (see PARTIAL_RECORD handling below)
+		 */
+
+		if (avail + ps->ps_svcount < HEADERSIZE) {
+			bcopy(src, dst, avail);
+			ps->ps_svcount += avail;
+			used = avail;
+			break;
+		}
+
+		used = copylen = HEADERSIZE - ps->ps_svcount;
+		bcopy(src, dst, copylen);
+		src += copylen;
+		dst += copylen;
+		avail -= copylen;
+		ps->ps_svcount += copylen;
+
+		/*FALLTHROUGH*/
+
+	case PL_STATE_PARTIAL_RECORD:
+
+		/*
+		 * Transitions:
+		 *
+		 * Case E: avail+saved < recordsize
+		 * 	-> 'partial record'
+		 *
+		 * Case F: avail+saved >= recordsize
+		 * 	-> 'new record'
+		 */
+
+		h = PMCLOG_HEADER_FROM_SAVED_STATE(ps);
+		recordsize = PMCLOG_HEADER_TO_LENGTH(h);
+
+		if (recordsize <= 0)
+			goto error;
+
+		if (avail + ps->ps_svcount < recordsize) {
+			copylen = avail;
+			ps->ps_state = PL_STATE_PARTIAL_RECORD;
+		} else {
+			copylen = recordsize - ps->ps_svcount;
+			ps->ps_state = PL_STATE_NEW_RECORD;
+		}
+
+		bcopy(src, dst, copylen);
+		ps->ps_svcount += copylen;
+		used += copylen;
+		break;
+
+	default:
+		goto error;
+	}
+
+ done:
+	*data += used;
+	*len  -= used;
+	return ps->ps_state;
+
+ error:
+	ps->ps_state = PL_STATE_ERROR;
+	return ps->ps_state;
+}
+
+/*
+ * Get an event from the stream pointed to by '*data'.  '*len'
+ * indicates the number of bytes available to parse.  Arguments
+ * '*data' and '*len' are updated to indicate the number of bytes
+ * consumed.
+ */
+
+static int
+pmclog_get_event(void *cookie, char **data, ssize_t *len,
+    struct pmclog_ev *ev)
+{
+	int evlen, pathlen;
+	uint32_t h, *le, npc;
+	enum pmclog_parser_state e;
+	struct pmclog_parse_state *ps;
+
+	ps = (struct pmclog_parse_state *) cookie;
+
+	assert(ps->ps_state != PL_STATE_ERROR);
+
+	if ((e = pmclog_get_record(ps,data,len)) == PL_STATE_ERROR) {
+		ev->pl_state = PMCLOG_ERROR;
+		return -1;
+	}
+
+	if (e != PL_STATE_NEW_RECORD) {
+		ev->pl_state = PMCLOG_REQUIRE_DATA;
+		return -1;
+	}
+
+	PMCLOG_INITIALIZE_READER(le, ps->ps_saved);
+
+	PMCLOG_READ32(le,h);
+
+	if (!PMCLOG_HEADER_CHECK_MAGIC(h)) {
+		ps->ps_state = PL_STATE_ERROR;
+		ev->pl_state = PMCLOG_ERROR;
+		return -1;
+	}
+
+	/* copy out the time stamp */
+	PMCLOG_READ32(le,ev->pl_ts.tv_sec);
+	PMCLOG_READ32(le,ev->pl_ts.tv_nsec);
+
+	evlen = PMCLOG_HEADER_TO_LENGTH(h);
+
+#define	PMCLOG_GET_PATHLEN(P,E,TYPE) do {				\
+		(P) = (E) - offsetof(struct TYPE, pl_pathname);		\
+		if ((P) > PATH_MAX || (P) < 0)				\
+			goto error;					\
+	} while (0)
+
+#define	PMCLOG_GET_CALLCHAIN_SIZE(SZ,E) do {				\
+		(SZ) = ((E) - offsetof(struct pmclog_callchain, pl_pc))	\
+			/ sizeof(uintfptr_t);				\
+	} while (0);
+
+	switch (ev->pl_type = PMCLOG_HEADER_TO_TYPE(h)) {
+	case PMCLOG_TYPE_CALLCHAIN:
+		PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_pid);
+		PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_pmcid);
+		PMCLOG_READ32(le,ev->pl_u.pl_cc.pl_cpuflags);
+		PMCLOG_GET_CALLCHAIN_SIZE(ev->pl_u.pl_cc.pl_npc,evlen);
+		for (npc = 0; npc < ev->pl_u.pl_cc.pl_npc; npc++)
+			PMCLOG_READADDR(le,ev->pl_u.pl_cc.pl_pc[npc]);
+		for (;npc < PMC_CALLCHAIN_DEPTH_MAX; npc++)
+			ev->pl_u.pl_cc.pl_pc[npc] = (uintfptr_t) 0;
+		break;
+	case PMCLOG_TYPE_CLOSELOG:
+	case PMCLOG_TYPE_DROPNOTIFY:
+		/* nothing to do */
+		break;
+	case PMCLOG_TYPE_INITIALIZE:
+		PMCLOG_READ32(le,ev->pl_u.pl_i.pl_version);
+		PMCLOG_READ32(le,ev->pl_u.pl_i.pl_arch);
+		ps->ps_version = ev->pl_u.pl_i.pl_version;
+		ps->ps_arch = ev->pl_u.pl_i.pl_arch;
+		ps->ps_initialized = 1;
+		break;
+	case PMCLOG_TYPE_MAP_IN:
+		PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_map_in);
+		PMCLOG_READ32(le,ev->pl_u.pl_mi.pl_pid);
+		PMCLOG_READADDR(le,ev->pl_u.pl_mi.pl_start);
+		PMCLOG_READSTRING(le, ev->pl_u.pl_mi.pl_pathname, pathlen);
+		break;
+	case PMCLOG_TYPE_MAP_OUT:
+		PMCLOG_READ32(le,ev->pl_u.pl_mo.pl_pid);
+		PMCLOG_READADDR(le,ev->pl_u.pl_mo.pl_start);
+		PMCLOG_READADDR(le,ev->pl_u.pl_mo.pl_end);
+		break;
+	case PMCLOG_TYPE_PCSAMPLE:
+		PMCLOG_READ32(le,ev->pl_u.pl_s.pl_pid);
+		PMCLOG_READADDR(le,ev->pl_u.pl_s.pl_pc);
+		PMCLOG_READ32(le,ev->pl_u.pl_s.pl_pmcid);
+		PMCLOG_READ32(le,ev->pl_u.pl_s.pl_usermode);
+		break;
+	case PMCLOG_TYPE_PMCALLOCATE:
+		PMCLOG_READ32(le,ev->pl_u.pl_a.pl_pmcid);
+		PMCLOG_READ32(le,ev->pl_u.pl_a.pl_event);
+		PMCLOG_READ32(le,ev->pl_u.pl_a.pl_flags);
+		if ((ev->pl_u.pl_a.pl_evname =
+		    _pmc_name_of_event(ev->pl_u.pl_a.pl_event, ps->ps_arch))
+		    == NULL)
+			goto error;
+		break;
+	case PMCLOG_TYPE_PMCATTACH:
+		PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_pmcattach);
+		PMCLOG_READ32(le,ev->pl_u.pl_t.pl_pmcid);
+		PMCLOG_READ32(le,ev->pl_u.pl_t.pl_pid);
+		PMCLOG_READSTRING(le,ev->pl_u.pl_t.pl_pathname,pathlen);
+		break;
+	case PMCLOG_TYPE_PMCDETACH:
+		PMCLOG_READ32(le,ev->pl_u.pl_d.pl_pmcid);
+		PMCLOG_READ32(le,ev->pl_u.pl_d.pl_pid);
+		break;
+	case PMCLOG_TYPE_PROCCSW:
+		PMCLOG_READ32(le,ev->pl_u.pl_c.pl_pmcid);
+		PMCLOG_READ64(le,ev->pl_u.pl_c.pl_value);
+		PMCLOG_READ32(le,ev->pl_u.pl_c.pl_pid);
+		break;
+	case PMCLOG_TYPE_PROCEXEC:
+		PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_procexec);
+		PMCLOG_READ32(le,ev->pl_u.pl_x.pl_pid);
+		PMCLOG_READADDR(le,ev->pl_u.pl_x.pl_entryaddr);
+		PMCLOG_READ32(le,ev->pl_u.pl_x.pl_pmcid);
+		PMCLOG_READSTRING(le,ev->pl_u.pl_x.pl_pathname,pathlen);
+		break;
+	case PMCLOG_TYPE_PROCEXIT:
+		PMCLOG_READ32(le,ev->pl_u.pl_e.pl_pmcid);
+		PMCLOG_READ64(le,ev->pl_u.pl_e.pl_value);
+		PMCLOG_READ32(le,ev->pl_u.pl_e.pl_pid);
+		break;
+	case PMCLOG_TYPE_PROCFORK:
+		PMCLOG_READ32(le,ev->pl_u.pl_f.pl_oldpid);
+		PMCLOG_READ32(le,ev->pl_u.pl_f.pl_newpid);
+		break;
+	case PMCLOG_TYPE_SYSEXIT:
+		PMCLOG_READ32(le,ev->pl_u.pl_se.pl_pid);
+		break;
+	case PMCLOG_TYPE_USERDATA:
+		PMCLOG_READ32(le,ev->pl_u.pl_u.pl_userdata);
+		break;
+	default:	/* unknown record type */
+		ps->ps_state = PL_STATE_ERROR;
+		ev->pl_state = PMCLOG_ERROR;
+		return (-1);
+	}
+
+	ev->pl_offset = (ps->ps_offset += evlen);
+	ev->pl_count  = (ps->ps_count += 1);
+	ev->pl_state = PMCLOG_OK;
+	return 0;
+
+ error:
+	ev->pl_state = PMCLOG_ERROR;
+	ps->ps_state = PL_STATE_ERROR;
+	return -1;
+}
+
+/*
+ * Extract and return the next event from the byte stream.
+ *
+ * Returns 0 and sets the event's state to PMCLOG_OK in case an event
+ * was successfully parsed.  Otherwise this function returns -1 and
+ * sets the event's state to one of PMCLOG_REQUIRE_DATA (if more data
+ * is needed) or PMCLOG_EOF (if an EOF was seen) or PMCLOG_ERROR if
+ * a parse error was encountered.
+ */
+
+int
+pmclog_read(void *cookie, struct pmclog_ev *ev)
+{
+	int retval;
+	ssize_t nread;
+	struct pmclog_parse_state *ps;
+
+	ps = (struct pmclog_parse_state *) cookie;
+
+	if (ps->ps_state == PL_STATE_ERROR) {
+		ev->pl_state = PMCLOG_ERROR;
+		return -1;
+	}
+
+	/*
+	 * If there isn't enough data left for a new event try and get
+	 * more data.
+	 */
+	if (ps->ps_len == 0) {
+		ev->pl_state = PMCLOG_REQUIRE_DATA;
+
+		/*
+		 * If we have a valid file descriptor to read from, attempt
+		 * to read from that.  This read may return with an error,
+		 * (which may be EAGAIN or other recoverable error), or
+		 * can return EOF.
+		 */
+		if (ps->ps_fd != PMCLOG_FD_NONE) {
+		refill:
+			nread = read(ps->ps_fd, ps->ps_buffer,
+			    PMCLOG_BUFFER_SIZE);
+
+			if (nread <= 0) {
+				if (nread == 0)
+					ev->pl_state = PMCLOG_EOF;
+				else if (errno != EAGAIN) /* not restartable */
+					ev->pl_state = PMCLOG_ERROR;
+				return -1;
+			}
+
+			ps->ps_len = nread;
+			ps->ps_data = ps->ps_buffer;
+		} else
+			return -1;
+	}
+
+	assert(ps->ps_len > 0);
+
+
+	 /* Retrieve one event from the byte stream. */
+	retval = pmclog_get_event(ps, &ps->ps_data, &ps->ps_len, ev);
+
+	/*
+	 * If we need more data and we have a configured fd, try read
+	 * from it.
+	 */
+	if (retval < 0 && ev->pl_state == PMCLOG_REQUIRE_DATA &&
+	    ps->ps_fd != -1) {
+		assert(ps->ps_len == 0);
+		goto refill;
+	}
+
+	return retval;
+}
+
+/*
+ * Feed data to a memory based parser.
+ *
+ * The memory area pointed to by 'data' needs to be valid till the
+ * next error return from pmclog_next_event().
+ */
+
+int
+pmclog_feed(void *cookie, char *data, int len)
+{
+	struct pmclog_parse_state *ps;
+
+	ps = (struct pmclog_parse_state *) cookie;
+
+	if (len < 0 ||		/* invalid length */
+	    ps->ps_buffer ||	/* called for a file parser */
+	    ps->ps_len != 0)	/* unnecessary call */
+		return -1;
+
+	ps->ps_data = data;
+	ps->ps_len  = len;
+
+	return 0;
+}
+
+/*
+ * Allocate and initialize parser state.
+ */
+
+void *
+pmclog_open(int fd)
+{
+	struct pmclog_parse_state *ps;
+
+	if ((ps = (struct pmclog_parse_state *) malloc(sizeof(*ps))) == NULL)
+		return NULL;
+
+	ps->ps_state = PL_STATE_NEW_RECORD;
+	ps->ps_arch = -1;
+	ps->ps_initialized = 0;
+	ps->ps_count = 0;
+	ps->ps_offset = (off_t) 0;
+	bzero(&ps->ps_saved, sizeof(ps->ps_saved));
+	ps->ps_svcount = 0;
+	ps->ps_fd    = fd;
+	ps->ps_data  = NULL;
+	ps->ps_buffer = NULL;
+	ps->ps_len   = 0;
+
+	/* allocate space for a work area */
+	if (ps->ps_fd != PMCLOG_FD_NONE) {
+		if ((ps->ps_buffer = malloc(PMCLOG_BUFFER_SIZE)) == NULL) {
+			free(ps);
+			return NULL;
+		}
+	}
+
+	return ps;
+}
+
+
+/*
+ * Free up parser state.
+ */
+
+void
+pmclog_close(void *cookie)
+{
+	struct pmclog_parse_state *ps;
+
+	ps = (struct pmclog_parse_state *) cookie;
+
+	if (ps->ps_buffer)
+		free(ps->ps_buffer);
+
+	free(ps);
+}
diff --git a/lib/libpmc/pmclog.h b/lib/libpmc/pmclog.h
new file mode 100644
index 0000000..b7c9c84
--- /dev/null
+++ b/lib/libpmc/pmclog.h
@@ -0,0 +1,170 @@
+/*-
+ * Copyright (c) 2005-2007 Joseph Koshy
+ * Copyright (c) 2007 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef	_PMCLOG_H_
+#define	_PMCLOG_H_
+
+#include <sys/cdefs.h>
+#include <sys/pmclog.h>
+
+enum pmclog_state {
+	PMCLOG_OK,
+	PMCLOG_EOF,
+	PMCLOG_REQUIRE_DATA,
+	PMCLOG_ERROR
+};
+
+struct pmclog_ev_callchain {
+	uint32_t	pl_pid;
+	uint32_t	pl_pmcid;
+	uint32_t	pl_cpuflags;
+	uint32_t	pl_npc;
+	uintfptr_t	pl_pc[PMC_CALLCHAIN_DEPTH_MAX];
+};
+
+struct pmclog_ev_dropnotify {
+};
+
+struct pmclog_ev_closelog {
+};
+
+struct pmclog_ev_initialize {
+	uint32_t	pl_version;
+	uint32_t	pl_arch;
+};
+
+struct pmclog_ev_map_in {
+	pid_t		pl_pid;
+	uintfptr_t	pl_start;
+	char		pl_pathname[PATH_MAX];
+};
+
+struct pmclog_ev_map_out {
+	pid_t		pl_pid;
+	uintfptr_t	pl_start;
+	uintfptr_t	pl_end;
+};
+
+struct pmclog_ev_pcsample {
+	uintfptr_t	pl_pc;
+	pid_t		pl_pid;
+	pmc_id_t	pl_pmcid;
+	uint32_t	pl_usermode;
+};
+
+struct pmclog_ev_pmcallocate {
+	uint32_t	pl_event;
+	const char *	pl_evname;
+	uint32_t	pl_flags;
+	pmc_id_t	pl_pmcid;
+};
+
+struct pmclog_ev_pmcattach {
+	pmc_id_t	pl_pmcid;
+	pid_t		pl_pid;
+	char		pl_pathname[PATH_MAX];
+};
+
+struct pmclog_ev_pmcdetach {
+	pmc_id_t	pl_pmcid;
+	pid_t		pl_pid;
+};
+
+struct pmclog_ev_proccsw {
+	pid_t		pl_pid;
+	pmc_id_t	pl_pmcid;
+	pmc_value_t	pl_value;
+};
+
+struct pmclog_ev_procexec {
+	pid_t		pl_pid;
+	pmc_id_t	pl_pmcid;
+	uintfptr_t	pl_entryaddr;
+	char		pl_pathname[PATH_MAX];
+};
+
+struct pmclog_ev_procexit {
+	uint32_t	pl_pid;
+	pmc_id_t	pl_pmcid;
+	pmc_value_t	pl_value;
+};
+
+struct pmclog_ev_procfork {
+	pid_t		pl_oldpid;
+	pid_t		pl_newpid;
+};
+
+struct pmclog_ev_sysexit {
+	pid_t		pl_pid;
+};
+
+struct pmclog_ev_userdata {
+	uint32_t	pl_userdata;
+};
+
+struct pmclog_ev {
+	enum pmclog_state pl_state;	/* state after 'get_event()' */
+	off_t		  pl_offset;	/* byte offset in stream */
+	size_t		  pl_count;	/* count of records so far */
+	struct timespec   pl_ts;	/* log entry timestamp */
+	enum pmclog_type  pl_type;	/* type of log entry */
+	union { 			/* log entry data */
+		struct pmclog_ev_callchain	pl_cc;
+		struct pmclog_ev_closelog	pl_cl;
+		struct pmclog_ev_dropnotify	pl_dn;
+		struct pmclog_ev_initialize	pl_i;
+		struct pmclog_ev_map_in		pl_mi;
+		struct pmclog_ev_map_out	pl_mo;
+		struct pmclog_ev_pcsample	pl_s;
+		struct pmclog_ev_pmcallocate	pl_a;
+		struct pmclog_ev_pmcattach	pl_t;
+		struct pmclog_ev_pmcdetach	pl_d;
+		struct pmclog_ev_proccsw	pl_c;
+		struct pmclog_ev_procexec	pl_x;
+		struct pmclog_ev_procexit	pl_e;
+		struct pmclog_ev_procfork	pl_f;
+		struct pmclog_ev_sysexit	pl_se;
+		struct pmclog_ev_userdata	pl_u;
+	} pl_u;
+};
+
+#define	PMCLOG_FD_NONE				(-1)
+
+__BEGIN_DECLS
+void	*pmclog_open(int _fd);
+int	pmclog_feed(void *_cookie, char *_data, int _len);
+int	pmclog_read(void *_cookie, struct pmclog_ev *_ev);
+void	pmclog_close(void *_cookie);
+__END_DECLS
+
+#endif
+