- Support for uncore counting events: one fixed PMC with the uncore

domain clock, 8 programmable PMC. - Westmere based CPU (Xeon 5600, Corei7 980X) support. - New man pages with events list for core and uncore. - Updated Corei7 events with Intel 253669-033US December 2009 doc. There is some removed events in the documentation, they have been kept in the code but documented in the man page as obsolete. - Offcore response events can be setup with rsp token. Sponsored by: NETASQ
author: fabient <fabient@FreeBSD.org> 2010-04-02 13:23:49 +0000
committer: fabient <fabient@FreeBSD.org> 2010-04-02 13:23:49 +0000
commit: 85d5b2855f1b8db1aa9a2dd7945b711399a111b7 (patch)
tree: 74703b81e372faa288cd54560b3715fd2bc1e0ce /lib/libpmc
parent: 0e3cec01fc03abe666c04e3bfb10c74c603bc896 (diff)
download: FreeBSD-src-85d5b2855f1b8db1aa9a2dd7945b711399a111b7.zip
FreeBSD-src-85d5b2855f1b8db1aa9a2dd7945b711399a111b7.tar.gz
7 files changed, 5167 insertions, 7 deletions
diff --git a/lib/libpmc/Makefile b/lib/libpmc/Makefile
index 09a23e3..fd35fd5 100644
--- a/lib/libpmc/Makefile
+++ b/lib/libpmc/Makefile
@@ -27,11 +27,16 @@ MAN+=	pmc.atom.3
 MAN+=	pmc.core.3
 MAN+=	pmc.core2.3
 MAN+=	pmc.iaf.3
+MAN+=	pmc.ucf.3
 MAN+=	pmc.k7.3
 MAN+=	pmc.k8.3
 MAN+=	pmc.p4.3
 MAN+=	pmc.p5.3
 MAN+=	pmc.p6.3
+MAN+=	pmc.corei7.3
+MAN+=	pmc.corei7uc.3
+MAN+=	pmc.westmere.3
+MAN+=	pmc.westmereuc.3
 MAN+=	pmc.tsc.3
 .elif ${MACHINE_ARCH} == "arm" && ${CPUTYPE} == "xscale"
 MAN+=	pmc.xscale.3
diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c
index bb79d7b..c440aa8 100644
--- a/lib/libpmc/libpmc.c
+++ b/lib/libpmc/libpmc.c
@@ -54,6 +54,10 @@ static int iaf_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
 static int iap_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
+static int ucf_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
+    struct pmc_op_pmcallocate *_pmc_config);
+static int ucp_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
+    struct pmc_op_pmcallocate *_pmc_config);
 static int k8_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
     struct pmc_op_pmcallocate *_pmc_config);
 static int p4_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
@@ -144,6 +148,7 @@ PMC_CLASSDEP_TABLE(p5, P5);
 PMC_CLASSDEP_TABLE(p6, P6);
 PMC_CLASSDEP_TABLE(xscale, XSCALE);
 PMC_CLASSDEP_TABLE(mips24k, MIPS24K);
+PMC_CLASSDEP_TABLE(ucf, UCF);
 
 #undef	__PMC_EV_ALIAS
 #define	__PMC_EV_ALIAS(N,CODE) 	{ N, PMC_EV_##CODE },
@@ -169,6 +174,21 @@ static const struct pmc_event_descr corei7_event_table[] =
 	__PMC_EV_ALIAS_COREI7()
 };
 
+static const struct pmc_event_descr westmere_event_table[] =
+{
+	__PMC_EV_ALIAS_WESTMERE()
+};
+
+static const struct pmc_event_descr corei7uc_event_table[] =
+{
+	__PMC_EV_ALIAS_COREI7UC()
+};
+
+static const struct pmc_event_descr westmereuc_event_table[] =
+{
+	__PMC_EV_ALIAS_WESTMEREUC()
+};
+
 /*
  * PMC_MDEP_TABLE(NAME, PRIMARYCLASS, ADDITIONAL_CLASSES...)
  *
@@ -182,7 +202,8 @@ static const struct pmc_event_descr corei7_event_table[] =
 PMC_MDEP_TABLE(atom, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC);
 PMC_MDEP_TABLE(core, IAP, PMC_CLASS_TSC);
 PMC_MDEP_TABLE(core2, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC);
-PMC_MDEP_TABLE(corei7, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC);
+PMC_MDEP_TABLE(corei7, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
+PMC_MDEP_TABLE(westmere, IAP, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP);
 PMC_MDEP_TABLE(k7, K7, PMC_CLASS_TSC);
 PMC_MDEP_TABLE(k8, K8, PMC_CLASS_TSC);
 PMC_MDEP_TABLE(p4, P4, PMC_CLASS_TSC);
@@ -215,6 +236,10 @@ PMC_CLASS_TABLE_DESC(atom, IAP, atom, iap);
 PMC_CLASS_TABLE_DESC(core, IAP, core, iap);
 PMC_CLASS_TABLE_DESC(core2, IAP, core2, iap);
 PMC_CLASS_TABLE_DESC(corei7, IAP, corei7, iap);
+PMC_CLASS_TABLE_DESC(westmere, IAP, westmere, iap);
+PMC_CLASS_TABLE_DESC(ucf, UCF, ucf, ucf);
+PMC_CLASS_TABLE_DESC(corei7uc, UCP, corei7uc, ucp);
+PMC_CLASS_TABLE_DESC(westmereuc, UCP, westmereuc, ucp);
 #endif
 #if	defined(__i386__)
 PMC_CLASS_TABLE_DESC(k7, K7, k7, k7);
@@ -302,7 +327,7 @@ struct pmc_masks {
 	const uint32_t	pm_value;
 };
 #define	PMCMASK(N,V)	{ .pm_name = #N, .pm_value = (V) }
-#define	NULLMASK	PMCMASK(NULL,0)
+#define	NULLMASK	{ .pm_name = NULL }
 
 #if defined(__amd64__) || defined(__i386__)
 static int
@@ -495,6 +520,8 @@ static struct pmc_event_alias core2_aliases_without_iaf[] = {
 #define	atom_aliases_without_iaf	core2_aliases_without_iaf
 #define corei7_aliases			core2_aliases
 #define corei7_aliases_without_iaf	core2_aliases_without_iaf
+#define westmere_aliases		core2_aliases
+#define westmere_aliases_without_iaf	core2_aliases_without_iaf
 
 #define	IAF_KW_OS		"os"
 #define	IAF_KW_USR		"usr"
@@ -545,6 +572,7 @@ iaf_allocate_pmc(enum pmc_event pe, char *ctrspec,
 #define	IAP_KW_SNOOPTYPE	"snooptype"
 #define	IAP_KW_TRANSITION	"trans"
 #define	IAP_KW_USR		"usr"
+#define	IAP_KW_RSP		"rsp"
 
 static struct pmc_masks iap_core_mask[] = {
 	PMCMASK(all,	(0x3 << 14)),
@@ -592,19 +620,38 @@ static struct pmc_masks iap_transition_mask[] = {
 	NULLMASK
 };
 
+static struct pmc_masks iap_rsp_mask[] = {
+	PMCMASK(DMND_DATA_RD,		(1 <<  0)),
+	PMCMASK(DMND_RFO,		(1 <<  1)),
+	PMCMASK(DMND_IFETCH,		(1 <<  2)),
+	PMCMASK(WB,			(1 <<  3)),
+	PMCMASK(PF_DATA_RD,		(1 <<  4)),
+	PMCMASK(PF_RFO,			(1 <<  5)),
+	PMCMASK(PF_IFETCH,		(1 <<  6)),
+	PMCMASK(OTHER,			(1 <<  7)),
+	PMCMASK(UNCORE_HIT,		(1 <<  8)),
+	PMCMASK(OTHER_CORE_HIT_SNP,	(1 <<  9)),
+	PMCMASK(OTHER_CORE_HITM,	(1 << 10)),
+	PMCMASK(REMOTE_CACHE_FWD,	(1 << 12)),
+	PMCMASK(REMOTE_DRAM,		(1 << 13)),
+	PMCMASK(LOCAL_DRAM,		(1 << 14)),
+	PMCMASK(NON_DRAM,		(1 << 15)),
+	NULLMASK
+};
+
 static int
 iap_allocate_pmc(enum pmc_event pe, char *ctrspec,
     struct pmc_op_pmcallocate *pmc_config)
 {
 	char *e, *p, *q;
-	uint32_t cachestate, evmask;
+	uint32_t cachestate, evmask, rsp;
 	int count, n;
 
 	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE |
 	    PMC_CAP_QUALIFIER);
 	pmc_config->pm_md.pm_iap.pm_iap_config = 0;
 
-	cachestate = evmask = 0;
+	cachestate = evmask = rsp = 0;
 
 	/* Parse additional modifiers if present */
 	while ((p = strsep(&ctrspec, ",")) != NULL) {
@@ -651,8 +698,7 @@ iap_allocate_pmc(enum pmc_event pe, char *ctrspec,
 				return (-1);
 		} else if (cpu_info.pm_cputype == PMC_CPU_INTEL_ATOM ||
 		    cpu_info.pm_cputype == PMC_CPU_INTEL_CORE2 ||
-		    cpu_info.pm_cputype == PMC_CPU_INTEL_CORE2EXTREME ||
-		    cpu_info.pm_cputype == PMC_CPU_INTEL_COREI7) {
+		    cpu_info.pm_cputype == PMC_CPU_INTEL_CORE2EXTREME) {
 			if (KWPREFIXMATCH(p, IAP_KW_SNOOPRESPONSE "=")) {
 				n = pmc_parse_mask(iap_snoopresponse_mask, p,
 				    &evmask);
@@ -661,6 +707,12 @@ iap_allocate_pmc(enum pmc_event pe, char *ctrspec,
 				    &evmask);
 			} else
 				return (-1);
+		} else if (cpu_info.pm_cputype == PMC_CPU_INTEL_COREI7 ||
+		    cpu_info.pm_cputype == PMC_CPU_INTEL_WESTMERE) {
+			if (KWPREFIXMATCH(p, IAP_KW_RSP "=")) {
+				n = pmc_parse_mask(iap_rsp_mask, p, &rsp);
+			} else
+				return (-1);
 		} else
 			return (-1);
 
@@ -693,6 +745,69 @@ iap_allocate_pmc(enum pmc_event pe, char *ctrspec,
 	}
 
 	pmc_config->pm_md.pm_iap.pm_iap_config |= cachestate;
+	pmc_config->pm_md.pm_iap.pm_iap_rsp = rsp;
+
+	return (0);
+}
+
+/*
+ * Intel Uncore.
+ */
+
+static int
+ucf_allocate_pmc(enum pmc_event pe, char *ctrspec,
+    struct pmc_op_pmcallocate *pmc_config)
+{
+	(void) pe;
+	(void) ctrspec;
+
+	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE);
+	pmc_config->pm_md.pm_ucf.pm_ucf_flags = 0;
+
+	return (0);
+}
+
+#define	UCP_KW_CMASK		"cmask"
+#define	UCP_KW_EDGE		"edge"
+#define	UCP_KW_INV		"inv"
+
+static int
+ucp_allocate_pmc(enum pmc_event pe, char *ctrspec,
+    struct pmc_op_pmcallocate *pmc_config)
+{
+	char *e, *p, *q;
+	int count, n;
+
+	(void) pe;
+
+	pmc_config->pm_caps |= (PMC_CAP_READ | PMC_CAP_WRITE |
+	    PMC_CAP_QUALIFIER);
+	pmc_config->pm_md.pm_ucp.pm_ucp_config = 0;
+
+	/* Parse additional modifiers if present */
+	while ((p = strsep(&ctrspec, ",")) != NULL) {
+
+		n = 0;
+		if (KWPREFIXMATCH(p, UCP_KW_CMASK "=")) {
+			q = strchr(p, '=');
+			if (*++q == '\0') /* skip '=' */
+				return (-1);
+			count = strtol(q, &e, 0);
+			if (e == q || *e != '\0')
+				return (-1);
+			pmc_config->pm_caps |= PMC_CAP_THRESHOLD;
+			pmc_config->pm_md.pm_ucp.pm_ucp_config |=
+			    UCP_CMASK(count);
+		} else if (KWMATCH(p, UCP_KW_EDGE)) {
+			pmc_config->pm_caps |= PMC_CAP_EDGE;
+		} else if (KWMATCH(p, UCP_KW_INV)) {
+			pmc_config->pm_caps |= PMC_CAP_INVERT;
+		} else
+			return (-1);
+
+		if (n < 0)	/* Parsing failed. */
+			return (-1);
+	}
 
 	return (0);
 }
@@ -2392,6 +2507,31 @@ pmc_event_names_of_class(enum pmc_class cl, const char ***eventnames,
 			ev = corei7_event_table;
 			count = PMC_EVENT_TABLE_SIZE(corei7);
 			break;
+		case PMC_CPU_INTEL_WESTMERE:
+			ev = westmere_event_table;
+			count = PMC_EVENT_TABLE_SIZE(westmere);
+			break;
+		}
+		break;
+	case PMC_CLASS_UCF:
+		ev = ucf_event_table;
+		count = PMC_EVENT_TABLE_SIZE(ucf);
+		break;
+	case PMC_CLASS_UCP:
+		/*
+		 * Return the most appropriate set of event name
+		 * spellings for the current CPU.
+		 */
+		switch (cpu_info.pm_cputype) {
+		default:
+		case PMC_CPU_INTEL_COREI7:
+			ev = corei7uc_event_table;
+			count = PMC_EVENT_TABLE_SIZE(corei7uc);
+			break;
+		case PMC_CPU_INTEL_WESTMERE:
+			ev = westmereuc_event_table;
+			count = PMC_EVENT_TABLE_SIZE(westmereuc);
+			break;
 		}
 		break;
 	case PMC_CLASS_TSC:
@@ -2605,8 +2745,15 @@ pmc_init(void)
 		PMC_MDEP_INIT_INTEL_V2(core2);
 		break;
 	case PMC_CPU_INTEL_COREI7:
+		pmc_class_table[n++] = &ucf_class_table_descr;
+		pmc_class_table[n++] = &corei7uc_class_table_descr;
 		PMC_MDEP_INIT_INTEL_V2(corei7);
 		break;
+	case PMC_CPU_INTEL_WESTMERE:
+		pmc_class_table[n++] = &ucf_class_table_descr;
+		pmc_class_table[n++] = &westmereuc_class_table_descr;
+		PMC_MDEP_INIT_INTEL_V2(westmere);
+		break;
 	case PMC_CPU_INTEL_PIV:
 		PMC_MDEP_INIT(p4);
 		pmc_class_table[n] = &p4_class_table_descr;
@@ -2719,10 +2866,30 @@ _pmc_name_of_event(enum pmc_event pe, enum pmc_cputype cpu)
 			ev = corei7_event_table;
 			evfence = corei7_event_table + PMC_EVENT_TABLE_SIZE(corei7);
 			break;
+		case PMC_CPU_INTEL_WESTMERE:
+			ev = westmere_event_table;
+			evfence = westmere_event_table + PMC_EVENT_TABLE_SIZE(westmere);
+			break;
+		default:	/* Unknown CPU type. */
+			break;
+		}
+	} else if (pe >= PMC_EV_UCF_FIRST && pe <= PMC_EV_UCF_LAST) {
+		ev = ucf_event_table;
+		evfence = ucf_event_table + PMC_EVENT_TABLE_SIZE(ucf);
+	} else if (pe >= PMC_EV_UCP_FIRST && pe <= PMC_EV_UCP_LAST) {
+		switch (cpu) {
+		case PMC_CPU_INTEL_COREI7:
+			ev = corei7uc_event_table;
+			evfence = corei7uc_event_table + PMC_EVENT_TABLE_SIZE(corei7uc);
+			break;
+		case PMC_CPU_INTEL_WESTMERE:
+			ev = westmereuc_event_table;
+			evfence = westmereuc_event_table + PMC_EVENT_TABLE_SIZE(westmereuc);
+			break;
 		default:	/* Unknown CPU type. */
 			break;
 		}
-	} if (pe >= PMC_EV_K7_FIRST && pe <= PMC_EV_K7_LAST) {
+	} else if (pe >= PMC_EV_K7_FIRST && pe <= PMC_EV_K7_LAST) {
 		ev = k7_event_table;
 		evfence = k7_event_table + PMC_EVENT_TABLE_SIZE(k7);
 	} else if (pe >= PMC_EV_K8_FIRST && pe <= PMC_EV_K8_LAST) {
diff --git a/lib/libpmc/pmc.corei7.3 b/lib/libpmc/pmc.corei7.3
new file mode 100644
index 0000000..1a8b9d9
--- /dev/null
+++ b/lib/libpmc/pmc.corei7.3
@@ -0,0 +1,1581 @@
+.\" Copyright (c) 2010 Fabien Thomas.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd March 24, 2010
+.Os
+.Dt PMC.COREI7 3
+.Sh NAME
+.Nm pmc.corei7
+.Nd measurement events for
+.Tn Intel
+.Tn Core i7 and Xeon 5500
+family CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+.Tn Intel
+.Tn "Core i7"
+CPUs contain PMCs conforming to version 2 of the
+.Tn Intel
+performance measurement architecture.
+These CPUs may contain up to three classes of PMCs:
+.Bl -tag -width "Li PMC_CLASS_IAP"
+.It Li PMC_CLASS_IAF
+Fixed-function counters that count only one hardware event per counter.
+.It Li PMC_CLASS_IAP
+Programmable counters that may be configured to count one of a defined
+set of hardware events.
+.El
+.Pp
+The number of PMCs available in each class and their widths need to be
+determined at run time by calling
+.Xr pmc_cpuinfo 3 .
+.Pp
+Intel Core i7 and Xeon 5500 PMCs are documented in
+.Rs
+.%B "Intel(R) 64 and IA-32 Architectures Software Developes Manual"
+.%T "Volume 3B: System Programming Guide, Part 2"
+.%N "Order Number: 253669-033US"
+.%D December 2009
+.%Q "Intel Corporation"
+.Re
+.Ss COREI7 AND XEON 5500 FIXED FUNCTION PMCS
+These PMCs and their supported events are documented in
+.Xr pmc.iaf 3 .
+Not all CPUs in this family implement fixed-function counters.
+.Ss COREI7 AND XEON 5500 PROGRAMMABLE PMCS
+The programmable PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_INTERRUPT Ta Yes
+.It PMC_CAP_INVERT Ta Yes
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta Yes
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta Yes
+.It PMC_CAP_USER Ta Yes
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+Event specifiers for these PMCs support the following common
+qualifiers:
+.Bl -tag -width indent
+.It Li rsp= Ns Ar value
+Configure the Off-core Response bits.
+.Bl -tag -width indent
+.It Li DMND_DATA_RD
+Counts the number of demand and DCU prefetch data reads of full
+and partial cachelines as well as demand data page table entry
+cacheline reads. Does not count L2 data read prefetches or
+instruction fetches.
+.It Li DMND_RFO
+Counts the number of demand and DCU prefetch reads for ownership
+(RFO) requests generated by a write to data cacheline. Does not
+count L2 RFO.
+.It Li DMND_IFETCH
+Counts the number of demand and DCU prefetch instruction cacheline
+reads. Does not count L2 code read prefetches.
+WB
+Counts the number of writeback (modified to exclusive) transactions.
+.It Li PF_DATA_RD
+Counts the number of data cacheline reads generated by L2 prefetchers.
+.It Li PF_RFO
+Counts the number of RFO requests generated by L2 prefetchers.
+.It Li PF_IFETCH
+Counts the number of code reads generated by L2 prefetchers.
+.It Li OTHER
+Counts one of the following transaction types, including L3 invalidate,
+I/O, full or partial writes, WC or non-temporal stores, CLFLUSH, Fences,
+lock, unlock, split lock.
+.It Li UNCORE_HIT
+L3 Hit: local or remote home requests that hit L3 cache in the uncore
+with no coherency actions required (snooping).
+.It Li OTHER_CORE_HIT_SNP
+L3 Hit: local or remote home requests that hit L3 cache in the uncore
+and was serviced by another core with a cross core snoop where no modified
+copies were found (clean).
+.It Li OTHER_CORE_HITM
+L3 Hit: local or remote home requests that hit L3 cache in the uncore
+and was serviced by another core with a cross core snoop where modified
+copies were found (HITM).
+.It Li REMOTE_CACHE_FWD
+L3 Miss: local homed requests that missed the L3 cache and was serviced
+by forwarded data following a cross package snoop where no modified
+copies found. (Remote home requests are not counted)
+.It Li REMOTE_DRAM
+L3 Miss: remote home requests that missed the L3 cache and were serviced
+by remote DRAM.
+.It Li LOCAL_DRAM
+L3 Miss: local home requests that missed the L3 cache and were serviced
+by local DRAM.
+.It Li NON_DRAM
+Non-DRAM requests that were serviced by IOH.
+.El
+.It Li cmask= Ns Ar value
+Configure the PMC to increment only if the number of configured
+events measured in a cycle is greater than or equal to
+.Ar value .
+.It Li edge
+Configure the PMC to count the number of de-asserted to asserted
+transitions of the conditions expressed by the other qualifiers.
+If specified, the counter will increment only once whenever a
+condition becomes true, irrespective of the number of clocks during
+which the condition remains true.
+.It Li inv
+Invert the sense of comparison when the
+.Dq Li cmask
+qualifier is present, making the counter increment when the number of
+events per cycle is less than the value specified by the
+.Dq Li cmask
+qualifier.
+.It Li os
+Configure the PMC to count events happening at processor privilege
+level 0.
+.It Li usr
+Configure the PMC to count events occurring at privilege levels 1, 2
+or 3.
+.El
+.Pp
+If neither of the
+.Dq Li os
+or
+.Dq Li usr
+qualifiers are specified, the default is to enable both.
+.Ss Event Specifiers (Programmable PMCs)
+Core i7 and Xeon 5500 programmable PMCs support the following events:
+.Bl -tag -width indent
+.It Li SB_DRAIN.ANY
+.Pq Event 04H , Umask 07H
+Counts the number of store buffer drains.
+.It Li STORE_BLOCKS.AT_RET
+.Pq Event 06H , Umask 04H
+Counts number of loads delayed with at-Retirement block code. The following
+loads need to be executed at retirement and wait for all senior stores on
+the same thread to be drained: load splitting across 4K boundary (page
+split), load accessing uncacheable (UC or USWC) memory, load lock, and load
+with page table in UC or USWC memory region.
+.It Li STORE_BLOCKS.L1D_BLOCK
+.Pq Event 06H , Umask 08H
+Cacheable loads delayed with L1D block code
+.It Li PARTIAL_ADDRESS_ALIAS
+.Pq Event 07H , Umask 01H
+Counts false dependency due to partial address aliasing
+.It Li DTLB_LOAD_MISSES.ANY
+.Pq Event 08H , Umask 01H
+Counts all load misses that cause a page walk
+.It Li DTLB_LOAD_MISSES.WALK_COMPLETED
+.Pq Event 08H , Umask 02H
+Counts number of completed page walks due to load miss in the STLB.
+.It Li DTLB_LOAD_MISSES.STLB_HIT
+.Pq Event 08H , Umask 10H
+Number of cache load STLB hits
+.It Li DTLB_LOAD_MISSES.PDE_MISS
+.Pq Event 08H , Umask 20H
+Number of DTLB cache load misses where the low part of the linear to
+physical address translation was missed.
+.It Li DTLB_LOAD_MISSES.PDP_MISS
+.Pq Event 08H , Umask 40H
+Number of DTLB cache load misses where the high part of the linear to
+physical address translation was missed.
+.It Li DTLB_LOAD_MISSES.LARGE_WALK_COMPLETED
+.Pq Event 08H , Umask 80H
+Counts number of completed large page walks due to load miss in the STLB.
+.It Li MEM_INST_RETIRED.LOADS
+.Pq Event 0BH , Umask 01H
+Counts the number of instructions with an architecturally-visible store
+retired on the architected path.
+In conjunction with ld_lat facility
+.It Li MEM_INST_RETIRED.STORES
+.Pq Event 0BH , Umask 02H
+Counts the number of instructions with an architecturally-visible store
+retired on the architected path.
+In conjunction with ld_lat facility
+.It Li MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD
+.Pq Event 0BH , Umask 10H
+Counts the number of instructions exceeding the latency specified with
+ld_lat facility.
+In conjunction with ld_lat facility
+.It Li MEM_STORE_RETIRED.DTLB_MISS
+.Pq Event 0CH , Umask 01H
+The event counts the number of retired stores that missed the DTLB. The DTLB
+miss is not counted if the store operation causes a fault. Does not counter
+prefetches. Counts both primary and secondary misses to the TLB
+.It Li UOPS_ISSUED.ANY
+.Pq Event 0EH , Umask 01H
+Counts the number of Uops issued by the Register Allocation Table to the
+Reservation Station, i.e. the UOPs issued from the front end to the back
+end.
+.It Li UOPS_ISSUED.STALLED_CYCLES
+.Pq Event 0EH , Umask 01H
+Counts the number of cycles no Uops issued by the Register Allocation Table
+to the Reservation Station, i.e. the UOPs issued from the front end to the
+back end.
+set invert=1, cmask = 1
+.It Li UOPS_ISSUED.FUSED
+.Pq Event 0EH , Umask 02H
+Counts the number of fused Uops that were issued from the Register
+Allocation Table to the Reservation Station.
+.It Li MEM_UNCORE_RETIRED.L3_DATA_MISS_UNKNOWN
+.Pq Event 0FH , Umask 01H
+Counts number of memory load instructions retired where the memory reference
+missed L3 and data source is unknown.
+Available only for CPUID signature 06_2EH
+.It Li MEM_UNCORE_RETIRED.OTHER_CORE_L2_HITM
+.Pq Event 0FH , Umask 02H
+Counts number of memory load instructions retired where the memory reference
+hit modified data in a sibling core residing on the same socket.
+.It Li MEM_UNCORE_RETIRED.REMOTE_CACHE_LOCAL_HOME_HIT
+.Pq Event 0FH , Umask 08H
+Counts number of memory load instructions retired where the memory reference
+missed the L1, L2 and L3 caches and HIT in a remote socket's cache. Only
+counts locally homed lines.
+.It Li MEM_UNCORE_RETIRED.REMOTE_DRAM
+.Pq Event 0FH , Umask 10H
+Counts number of memory load instructions retired where the memory reference
+missed the L1, L2 and L3 caches and was remotely homed. This includes both
+DRAM access and HITM in a remote socket's cache for remotely homed lines.
+.It Li MEM_UNCORE_RETIRED.LOCAL_DRAM
+.Pq Event 0FH , Umask 20H
+Counts number of memory load instructions retired where the memory reference
+missed the L1, L2 and L3 caches and required a local socket memory
+reference. This includes locally homed cachelines that were in a modified
+state in another socket.
+.It Li MEM_UNCORE_RETIRED.UNCACHEABLE
+.Pq Event 0FH , Umask 80H
+Counts number of memory load instructions retired where the memory reference
+missed the L1, L2 and L3 caches and to perform I/O.
+Available only for CPUID signature 06_2EH
+.It Li FP_COMP_OPS_EXE.X87
+.Pq Event 10H , Umask 01H
+Counts the number of FP Computational Uops Executed. The number of FADD,
+FSUB, FCOM, FMULs, integer MULsand IMULs, FDIVs, FPREMs, FSQRTS, integer
+DIVs, and IDIVs. This event does not distinguish an FADD used in the middle
+of a transcendental flow from a separate FADD instruction.
+.It Li FP_COMP_OPS_EXE.MMX
+.Pq Event 10H , Umask 02H
+Counts number of MMX Uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_FP
+.Pq Event 10H , Umask 04H
+Counts number of SSE and SSE2 FP uops executed.
+.It Li FP_COMP_OPS_EXE.SSE2_INTEGER
+.Pq Event 10H , Umask 08H
+Counts number of SSE2 integer uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_FP_PACKED
+.Pq Event 10H , Umask 10H
+Counts number of SSE FP packed uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_FP_SCALAR
+.Pq Event 10H , Umask 20H
+Counts number of SSE FP scalar uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION
+.Pq Event 10H , Umask 40H
+Counts number of SSE* FP single precision uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION
+.Pq Event 10H , Umask 80H
+Counts number of SSE* FP double precision uops executed.
+.It Li SIMD_INT_128.PACKED_MPY
+.Pq Event 12H , Umask 01H
+Counts number of 128 bit SIMD integer multiply operations.
+.It Li SIMD_INT_128.PACKED_SHIFT
+.Pq Event 12H , Umask 02H
+Counts number of 128 bit SIMD integer shift operations.
+.It Li SIMD_INT_128.PACK
+.Pq Event 12H , Umask 04H
+Counts number of 128 bit SIMD integer pack operations.
+.It Li SIMD_INT_128.UNPACK
+.Pq Event 12H , Umask 08H
+Counts number of 128 bit SIMD integer unpack operations.
+.It Li SIMD_INT_128.PACKED_LOGICAL
+.Pq Event 12H , Umask 10H
+Counts number of 128 bit SIMD integer logical operations.
+.It Li SIMD_INT_128.PACKED_ARITH
+.Pq Event 12H , Umask 20H
+Counts number of 128 bit SIMD integer arithmetic operations.
+.It Li SIMD_INT_128.SHUFFLE_MOVE
+.Pq Event 12H , Umask 40H
+Counts number of 128 bit SIMD integer shuffle and move operations.
+.It Li LOAD_DISPATCH.RS
+.Pq Event 13H , Umask 01H
+Counts number of loads dispatched from the Reservation Station that bypass
+the Memory Order Buffer.
+.It Li LOAD_DISPATCH.RS_DELAYED
+.Pq Event 13H , Umask 02H
+Counts the number of delayed RS dispatches at the stage latch. If an RS
+dispatch can not bypass to LB, it has another chance to dispatch from the
+one-cycle delayed staging latch before it is written into the LB.
+.It Li LOAD_DISPATCH.MOB
+.Pq Event 13H , Umask 04H
+Counts the number of loads dispatched from the Reservation Station to the
+Memory Order Buffer.
+.It Li LOAD_DISPATCH.ANY
+.Pq Event 13H , Umask 07H
+Counts all loads dispatched from the Reservation Station.
+.It Li ARITH.CYCLES_DIV_BUSY
+.Pq Event 14H , Umask 01H
+Counts the number of cycles the divider is busy executing divide or square
+root operations. The divide can be integer, X87 or Streaming SIMD Extensions
+(SSE). The square root operation can be either X87 or SSE.
+Set 'edge =1, invert=1, cmask=1' to count the number of divides.
+Count may be incorrect When SMT is on.
+.It Li ARITH.MUL
+.Pq Event 14H , Umask 02H
+Counts the number of multiply operations executed. This includes integer as
+well as floating point multiply operations but excludes DPPS mul and MPSAD.
+Count may be incorrect When SMT is on
+.It Li INST_QUEUE_WRITES
+.Pq Event 17H , Umask 01H
+Counts the number of instructions written into the instruction queue every
+cycle.
+.It Li INST_DECODED.DEC0
+.Pq Event 18H , Umask 01H
+Counts number of instructions that require decoder 0 to be decoded. Usually,
+this means that the instruction maps to more than 1 uop
+.It Li TWO_UOP_INSTS_DECODED
+.Pq Event 19H , Umask 01H
+An instruction that generates two uops was decoded
+.It Li INST_QUEUE_WRITE_CYCLES
+.Pq Event 1EH , Umask 01H
+This event counts the number of cycles during which instructions are written
+to the instruction queue. Dividing this counter by the number of
+instructions written to the instruction queue (INST_QUEUE_WRITES) yields the
+average number of instructions decoded each cycle. If this number is less
+than four and the pipe stalls, this indicates that the decoder is failing to
+decode enough instructions per cycle to sustain the 4-wide pipeline.
+If SSE* instructions that are 6 bytes or longer arrive one after another,
+then front end throughput may limit execution speed. In such case,
+.It Li LSD_OVERFLOW
+.Pq Event 20H , Umask 01H
+Counts number of loops that cant stream from the instruction queue.
+.It Li L2_RQSTS.LD_HIT
+.Pq Event 24H , Umask 01H
+Counts number of loads that hit the L2 cache. L2 loads include both L1D
+demand misses as well as L1D prefetches. L2 loads can be rejected for
+various reasons. Only non rejected loads are counted.
+.It Li L2_RQSTS.LD_MISS
+.Pq Event 24H , Umask 02H
+Counts the number of loads that miss the L2 cache. L2 loads include both L1D
+demand misses as well as L1D prefetches.
+.It Li L2_RQSTS.LOADS
+.Pq Event 24H , Umask 03H
+Counts all L2 load requests. L2 loads include both L1D demand misses as well
+as L1D prefetches.
+.It Li L2_RQSTS.RFO_HIT
+.Pq Event 24H , Umask 04H
+Counts the number of store RFO requests that hit the L2 cache. L2 RFO
+requests include both L1D demand RFO misses as well as L1D RFO prefetches.
+Count includes WC memory requests, where the data is not fetched but the
+permission to write the line is required.
+.It Li L2_RQSTS.RFO_MISS
+.Pq Event 24H , Umask 08H
+Counts the number of store RFO requests that miss the L2 cache. L2 RFO
+requests include both L1D demand RFO misses as well as L1D RFO prefetches.
+.It Li L2_RQSTS.RFOS
+.Pq Event 24H , Umask 0CH
+Counts all L2 store RFO requests. L2 RFO requests include both L1D demand
+RFO misses as well as L1D RFO prefetches.
+.It Li L2_RQSTS.IFETCH_HIT
+.Pq Event 24H , Umask 10H
+Counts number of instruction fetches that hit the L2 cache. L2 instruction
+fetches include both L1I demand misses as well as L1I instruction
+prefetches.
+.It Li L2_RQSTS.IFETCH_MISS
+.Pq Event 24H , Umask 20H
+Counts number of instruction fetches that miss the L2 cache. L2 instruction
+fetches include both L1I demand misses as well as L1I instruction
+prefetches.
+.It Li L2_RQSTS.IFETCHES
+.Pq Event 24H , Umask 30H
+Counts all instruction fetches. L2 instruction fetches include both L1I
+demand misses as well as L1I instruction prefetches.
+.It Li L2_RQSTS.PREFETCH_HIT
+.Pq Event 24H , Umask 40H
+Counts L2 prefetch hits for both code and data.
+.It Li L2_RQSTS.PREFETCH_MISS
+.Pq Event 24H , Umask 80H
+Counts L2 prefetch misses for both code and data.
+.It Li L2_RQSTS.PREFETCHES
+.Pq Event 24H , Umask C0H
+Counts all L2 prefetches for both code and data.
+.It Li L2_RQSTS.MISS
+.Pq Event 24H , Umask AAH
+Counts all L2 misses for both code and data.
+.It Li L2_RQSTS.REFERENCES
+.Pq Event 24H , Umask FFH
+Counts all L2 requests for both code and data.
+.It Li L2_DATA_RQSTS.DEMAND.I_STATE
+.Pq Event 26H , Umask 01H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the I (invalid) state, i.e. a cache miss. L2 demand loads are both L1D
+demand misses and L1D prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.S_STATE
+.Pq Event 26H , Umask 02H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the S (shared) state. L2 demand loads are both L1D demand misses and L1D
+prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.E_STATE
+.Pq Event 26H , Umask 04H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the E (exclusive) state. L2 demand loads are both L1D demand misses and
+L1D prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.M_STATE
+.Pq Event 26H , Umask 08H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the M (modified) state. L2 demand loads are both L1D demand misses and
+L1D prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.MESI
+.Pq Event 26H , Umask 0FH
+Counts all L2 data demand requests. L2 demand loads are both L1D demand
+misses and L1D prefetches.
+.It Li L2_DATA_RQSTS.PREFETCH.I_STATE
+.Pq Event 26H , Umask 10H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the I (invalid) state, i.e. a cache miss.
+.It Li L2_DATA_RQSTS.PREFETCH.S_STATE
+.Pq Event 26H , Umask 20H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the S (shared) state. A prefetch RFO will miss on an S state line, while
+a prefetch read will hit on an S state line.
+.It Li L2_DATA_RQSTS.PREFETCH.E_STATE
+.Pq Event 26H , Umask 40H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the E (exclusive) state.
+.It Li L2_DATA_RQSTS.PREFETCH.M_STATE
+.Pq Event 26H , Umask 80H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the M (modified) state.
+.It Li L2_DATA_RQSTS.PREFETCH.MESI
+.Pq Event 26H , Umask F0H
+Counts all L2 prefetch requests.
+.It Li L2_DATA_RQSTS.ANY
+.Pq Event 26H , Umask FFH
+Counts all L2 data requests.
+.It Li L2_WRITE.RFO.I_STATE
+.Pq Event 27H , Umask 01H
+Counts number of L2 demand store RFO requests where the cache line to be
+loaded is in the I (invalid) state, i.e, a cache miss. The L1D prefetcher
+does not issue a RFO prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.S_STATE
+.Pq Event 27H , Umask 02H
+Counts number of L2 store RFO requests where the cache line to be loaded is
+in the S (shared) state. The L1D prefetcher does not issue a RFO prefetch,.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.M_STATE
+.Pq Event 27H , Umask 08H
+Counts number of L2 store RFO requests where the cache line to be loaded is
+in the M (modified) state. The L1D prefetcher does not issue a RFO prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.HIT
+.Pq Event 27H , Umask 0EH
+Counts number of L2 store RFO requests where the cache line to be loaded is
+in either the S, E or M states. The L1D prefetcher does not issue a RFO
+prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.MESI
+.Pq Event 27H , Umask 0FH
+Counts all L2 store RFO requests.The L1D prefetcher does not issue a RFO
+prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.LOCK.I_STATE
+.Pq Event 27H , Umask 10H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in the I (invalid) state, i.e. a cache miss.
+.It Li L2_WRITE.LOCK.S_STATE
+.Pq Event 27H , Umask 20H
+Counts number of L2 lock RFO requests where the cache line to be loaded is
+in the S (shared) state.
+.It Li L2_WRITE.LOCK.E_STATE
+.Pq Event 27H , Umask 40H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in the E (exclusive) state.
+.It Li L2_WRITE.LOCK.M_STATE
+.Pq Event 27H , Umask 80H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in the M (modified) state.
+.It Li L2_WRITE.LOCK.HIT
+.Pq Event 27H , Umask E0H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in either the S, E, or M state.
+.It Li L2_WRITE.LOCK.MESI
+.Pq Event 27H , Umask F0H
+Counts all L2 demand lock RFO requests.
+.It Li L1D_WB_L2.I_STATE
+.Pq Event 28H , Umask 01H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the I (invalid) state, i.e. a cache miss.
+.It Li L1D_WB_L2.S_STATE
+.Pq Event 28H , Umask 02H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the S state.
+.It Li L1D_WB_L2.E_STATE
+.Pq Event 28H , Umask 04H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the E (exclusive) state.
+.It Li L1D_WB_L2.M_STATE
+.Pq Event 28H , Umask 08H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the M (modified) state.
+.It Li L1D_WB_L2.MESI
+.Pq Event 28H , Umask 0FH
+Counts all L1 writebacks to the L2.
+.It Li L3_LAT_CACHE.REFERENCE
+.Pq Event 2EH , Umask 4FH
+This event counts requests originating from the core that reference a cache
+line in the last level cache. The event count includes speculative traffic
+but excludes cache line fills due to a L2 hardware-prefetch. Because cache
+hierarchy, cache sizes and other implementation-specific characteristics;
+value comparison to estimate performance differences is not recommended.
+see Table A-1
+.It Li L3_LAT_CACHE.MISS
+.Pq Event 2EH , Umask 41H
+This event counts each cache miss condition for references to the last level
+cache. The event count may include speculative traffic but excludes cache
+line fills due to L2 hardware-prefetches. Because cache hierarchy, cache
+sizes and other implementation-specific characteristics; value comparison to
+estimate performance differences is not recommended.
+see Table A-1
+.It Li CPU_CLK_UNHALTED.THREAD_P
+.Pq Event 3CH , Umask 00H
+Counts the number of thread cycles while the thread is not in a halt state.
+The thread enters the halt state when it is running the HLT instruction. The
+core frequency may change from time to time due to power or thermal
+throttling.
+see Table A-1
+.It Li CPU_CLK_UNHALTED.REF_P
+.Pq Event 3CH , Umask 01H
+Increments at the frequency of TSC when not halted.
+see Table A-1
+.It Li L1D_CACHE_LD.I_STATE
+.Pq Event 40H , Umask 01H
+Counts L1 data cache read requests where the cache line to be loaded is in
+the I (invalid) state, i.e. the read request missed the cache.
+Counter 0, 1 only
+.It Li L1D_CACHE_LD.S_STATE
+.Pq Event 40H , Umask 02H
+Counts L1 data cache read requests where the cache line to be loaded is in
+the S (shared) state.
+Counter 0, 1 only
+.It Li L1D_CACHE_LD.E_STATE
+.Pq Event 40H , Umask 04H
+Counts L1 data cache read requests where the cache line to be loaded is in
+the E (exclusive) state.
+Counter 0, 1 only
+.It Li L1D_CACHE_LD.M_STATE
+.Pq Event 40H , Umask 08H
+Counts L1 data cache read requests where the cache line to be loaded is in
+the M (modified) state.
+Counter 0, 1 only
+.It Li L1D_CACHE_LD.MESI
+.Pq Event 40H , Umask 0FH
+Counts L1 data cache read requests.
+Counter 0, 1 only
+.It Li L1D_CACHE_ST.S_STATE
+.Pq Event 41H , Umask 02H
+Counts L1 data cache store RFO requests where the cache line to be loaded is
+in the S (shared) state.
+Counter 0, 1 only
+.It Li L1D_CACHE_ST.E_STATE
+.Pq Event 41H , Umask 04H
+Counts L1 data cache store RFO requests where the cache line to be loaded is
+in the E (exclusive) state.
+Counter 0, 1 only
+.It Li L1D_CACHE_ST.M_STATE
+.Pq Event 41H , Umask 08H
+Counts L1 data cache store RFO requests where cache line to be loaded is in
+the M (modified) state.
+Counter 0, 1 only
+.It Li L1D_CACHE_LOCK.HIT
+.Pq Event 42H , Umask 01H
+Counts retired load locks that hit in the L1 data cache or hit in an already
+allocated fill buffer.	The lock portion of the load lock transaction must
+hit in the L1D.
+The initial load will pull the lock into the L1 data cache. Counter 0, 1
+only
+.It Li L1D_CACHE_LOCK.S_STATE
+.Pq Event 42H , Umask 02H
+Counts L1 data cache retired load locks that hit the target cache line in
+the shared state.
+Counter 0, 1 only
+.It Li L1D_CACHE_LOCK.E_STATE
+.Pq Event 42H , Umask 04H
+Counts L1 data cache retired load locks that hit the target cache line in
+the exclusive state.
+Counter 0, 1 only
+.It Li L1D_CACHE_LOCK.M_STATE
+.Pq Event 42H , Umask 08H
+Counts L1 data cache retired load locks that hit the target cache line in
+the modified state.
+Counter 0, 1 only
+.It Li L1D_ALL_REF.ANY
+.Pq Event 43H , Umask 01H
+Counts all references (uncached, speculated and retired) to the L1 data
+cache, including all loads and stores with any memory types. The event
+counts memory accesses only when they are actually performed. For example, a
+load blocked by unknown store address and later performed is only counted
+once.
+The event does not include non- memory accesses, such as I/O accesses.
+Counter 0, 1 only
+.It Li L1D_ALL_REF.CACHEABLE
+.Pq Event 43H , Umask 02H
+Counts all data reads and writes (speculated and retired) from cacheable
+memory, including locked operations.
+Counter 0, 1 only
+.It Li L1D_PEND_MISS.LOAD_BUFFERS_FULL
+.Pq Event 48H , Umask 02H
+Counts cycles of L1 data cache load fill buffers full.
+Counter 0, 1 only
+.It Li DTLB_MISSES.ANY
+.Pq Event 49H , Umask 01H
+Counts the number of misses in the STLB which causes a page walk.
+.It Li DTLB_MISSES.WALK_COMPLETED
+.Pq Event 49H , Umask 02H
+Counts number of misses in the STLB which resulted in a completed page walk.
+.It Li DTLB_MISSES.STLB_HIT
+.Pq Event 49H , Umask 10H
+Counts the number of DTLB first level misses that hit in the second level
+TLB. This event is only relevant if the core contains multiple DTLB levels.
+.It Li LOAD_HIT_PRE
+.Pq Event 4CH , Umask 01H
+Counts load operations sent to the L1 data cache while a previous SSE
+prefetch instruction to the same cache line has started prefetching but has
+not yet finished.
+.It Li L1D_PREFETCH.REQUESTS
+.Pq Event 4EH , Umask 01H
+Counts number of hardware prefetch requests dispatched out of the prefetch
+FIFO.
+.It Li L1D_PREFETCH.MISS
+.Pq Event 4EH , Umask 02H
+Counts number of hardware prefetch requests that miss the L1D. There are two
+prefetchers in the L1D. A streamer, which predicts lines sequentially after
+this one should be fetched, and the IP prefetcher that remembers access
+patterns for the current instruction. The streamer prefetcher stops on an
+L1D hit, while the IP prefetcher does not.
+.It Li L1D_PREFETCH.TRIGGERS
+.Pq Event 4EH , Umask 04H
+Counts number of prefetch requests triggered by the Finite State Machine and
+pushed into the prefetch FIFO. Some of the prefetch requests are dropped due
+to overwrites or competition between the IP index prefetcher and streamer
+prefetcher. The prefetch FIFO contains 4 entries.
+.It Li L1D.REPL
+.Pq Event 51H , Umask 01H
+Counts the number of lines brought into the L1 data cache.
+Counter 0, 1 only
+.It Li L1D.M_REPL
+.Pq Event 51H , Umask 02H
+Counts the number of modified lines brought into the L1 data cache.
+Counter 0, 1 only
+.It Li L1D.M_EVICT
+.Pq Event 51H , Umask 04H
+Counts the number of modified lines evicted from the L1 data cache due to
+replacement.
+Counter 0, 1 only
+.It Li L1D.M_SNOOP_EVICT
+.Pq Event 51H , Umask 08H
+Counts the number of modified lines evicted from the L1 data cache due to
+snoop HITM intervention.
+Counter 0, 1 only
+.It Li L1D_CACHE_PREFETCH_LOCK_FB_HIT
+.Pq Event 52H , Umask 01H
+Counts the number of cacheable load lock speculated instructions accepted
+into the fill buffer.
+.It Li L1D_CACHE_LOCK_FB_HIT
+.Pq Event 53H , Umask 01H
+Counts the number of cacheable load lock speculated or retired instructions
+accepted into the fill buffer.
+.It Li CACHE_LOCK_CYCLES.L1D_L2
+.Pq Event 63H , Umask 01H
+Cycle count during which the L1D and L2 are locked. A lock is asserted when
+there is a locked memory access, due to uncacheable memory, a locked
+operation that spans two cache lines, or a page walk from an uncacheable
+page table.
+Counter 0, 1 only. L1D and L2 locks have a very high performance penalty and
+it is highly recommended to avoid such accesses.
+.It Li CACHE_LOCK_CYCLES.L1D
+.Pq Event 63H , Umask 02H
+Counts the number of cycles that cacheline in the L1 data cache unit is
+locked.
+Counter 0, 1 only.
+.It Li IO_TRANSACTIONS
+.Pq Event 6CH , Umask 01H
+Counts the number of completed I/O transactions.
+.It Li L1I.HITS
+.Pq Event 80H , Umask 01H
+Counts all instruction fetches that hit the L1 instruction cache.
+.It Li L1I.MISSES
+.Pq Event 80H , Umask 02H
+Counts all instruction fetches that miss the L1I cache. This includes
+instruction cache misses, streaming buffer misses, victim cache misses and
+uncacheable fetches. An instruction fetch miss is counted only once and not
+once for every cycle it is outstanding.
+.It Li L1I.READS
+.Pq Event 80H , Umask 03H
+Counts all instruction fetches, including uncacheable fetches that bypass
+the L1I.
+.It Li L1I.CYCLES_STALLED
+.Pq Event 80H , Umask 04H
+Cycle counts for which an instruction fetch stalls due to a L1I cache miss,
+ITLB miss or ITLB fault.
+.It Li LARGE_ITLB.HIT
+.Pq Event 82H , Umask 01H
+Counts number of large ITLB hits.
+.It Li ITLB_MISSES.ANY
+.Pq Event 85H , Umask 01H
+Counts the number of misses in all levels of the ITLB which causes a page
+walk.
+.It Li ITLB_MISSES.WALK_COMPLETED
+.Pq Event 85H , Umask 02H
+Counts number of misses in all levels of the ITLB which resulted in a
+completed page walk.
+.It Li ILD_STALL.LCP
+.Pq Event 87H , Umask 01H
+Cycles Instruction Length Decoder stalls due to length changing prefixes:
+66, 67 or REX.W (for EM64T) instructions which change the length of the
+decoded instruction.
+.It Li ILD_STALL.MRU
+.Pq Event 87H , Umask 02H
+Instruction Length Decoder stall cycles due to Brand Prediction Unit (PBU)
+Most Recently Used (MRU) bypass.
+.It Li ILD_STALL.IQ_FULL
+.Pq Event 87H , Umask 04H
+Stall cycles due to a full instruction queue.
+.It Li ILD_STALL.REGEN
+.Pq Event 87H , Umask 08H
+Counts the number of regen stalls.
+.It Li ILD_STALL.ANY
+.Pq Event 87H , Umask 0FH
+Counts any cycles the Instruction Length Decoder is stalled.
+.It Li BR_INST_EXEC.COND
+.Pq Event 88H , Umask 01H
+Counts the number of conditional near branch instructions executed, but not
+necessarily retired.
+.It Li BR_INST_EXEC.DIRECT
+.Pq Event 88H , Umask 02H
+Counts all unconditional near branch instructions excluding calls and
+indirect branches.
+.It Li BR_INST_EXEC.INDIRECT_NON_CALL
+.Pq Event 88H , Umask 04H
+Counts the number of executed indirect near branch instructions that are not
+calls.
+.It Li BR_INST_EXEC.NON_CALLS
+.Pq Event 88H , Umask 07H
+Counts all non call near branch instructions executed, but not necessarily
+retired.
+.It Li BR_INST_EXEC.RETURN_NEAR
+.Pq Event 88H , Umask 08H
+Counts indirect near branches that have a return mnemonic.
+.It Li BR_INST_EXEC.DIRECT_NEAR_CALL
+.Pq Event 88H , Umask 10H
+Counts unconditional near call branch instructions, excluding non call
+branch, executed.
+.It Li BR_INST_EXEC.INDIRECT_NEAR_CALL
+.Pq Event 88H , Umask 20H
+Counts indirect near calls, including both register and memory indirect,
+executed.
+.It Li BR_INST_EXEC.NEAR_CALLS
+.Pq Event 88H , Umask 30H
+Counts all near call branches executed, but not necessarily retired.
+.It Li BR_INST_EXEC.TAKEN
+.Pq Event 88H , Umask 40H
+Counts taken near branches executed, but not necessarily retired.
+.It Li BR_INST_EXEC.ANY
+.Pq Event 88H , Umask 7FH
+Counts all near executed branches (not necessarily retired). This includes
+only instructions and not micro-op branches. Frequent branching is not
+necessarily a major performance issue. However frequent branch
+mispredictions may be a problem.
+.It Li BR_MISP_EXEC.COND
+.Pq Event 89H , Umask 01H
+Counts the number of mispredicted conditional near branch instructions
+executed, but not necessarily retired.
+.It Li BR_MISP_EXEC.DIRECT
+.Pq Event 89H , Umask 02H
+Counts mispredicted macro unconditional near branch instructions, excluding
+calls and indirect branches (should always be 0).
+.It Li BR_MISP_EXEC.INDIRECT_NON_CALL
+.Pq Event 89H , Umask 04H
+Counts the number of executed mispredicted indirect near branch instructions
+that are not calls.
+.It Li BR_MISP_EXEC.NON_CALLS
+.Pq Event 89H , Umask 07H
+Counts mispredicted non call near branches executed, but not necessarily
+retired.
+.It Li BR_MISP_EXEC.RETURN_NEAR
+.Pq Event 89H , Umask 08H
+Counts mispredicted indirect branches that have a rear return mnemonic.
+.It Li BR_MISP_EXEC.DIRECT_NEAR_CALL
+.Pq Event 89H , Umask 10H
+Counts mispredicted non-indirect near calls executed, (should always be 0).
+.It Li BR_MISP_EXEC.INDIRECT_NEAR_CALL
+.Pq Event 89H , Umask 20H
+Counts mispredicted indirect near calls exeucted, including both register
+and memory indirect.
+.It Li BR_MISP_EXEC.NEAR_CALLS
+.Pq Event 89H , Umask 30H
+Counts all mispredicted near call branches executed, but not necessarily
+retired.
+.It Li BR_MISP_EXEC.TAKEN
+.Pq Event 89H , Umask 40H
+Counts executed mispredicted near branches that are taken, but not
+necessarily retired.
+.It Li BR_MISP_EXEC.ANY
+.Pq Event 89H , Umask 7FH
+Counts the number of mispredicted near branch instructions that were
+executed, but not necessarily retired.
+.It Li RESOURCE_STALLS.ANY
+.Pq Event A2H , Umask 01H
+Counts the number of Allocator resource related stalls. Includes register
+renaming buffer entries, memory buffer entries. In addition to resource
+related stalls, this event counts some other events. Includes stalls arising
+during branch misprediction recovery, such as if retirement of the
+mispredicted branch is delayed and stalls arising while store buffer is
+draining from synchronizing operations.
+Does not include stalls due to SuperQ (off core) queue full, too many cache
+misses, etc.
+.It Li RESOURCE_STALLS.LOAD
+.Pq Event A2H , Umask 02H
+Counts the cycles of stall due to lack of load buffer for load operation.
+.It Li RESOURCE_STALLS.RS_FULL
+.Pq Event A2H , Umask 04H
+This event counts the number of cycles when the number of instructions in
+the pipeline waiting for execution reaches the limit the processor can
+handle. A high count of this event indicates that there are long latency
+operations in the pipe (possibly load and store operations that miss the L2
+cache, or instructions dependent upon instructions further down the pipeline
+that have yet to retire.
+When RS is full, new instructions can not enter the reservation station and
+start execution.
+.It Li RESOURCE_STALLS.STORE
+.Pq Event A2H , Umask 08H
+This event counts the number of cycles that a resource related stall will
+occur due to the number of store instructions reaching the limit of the
+pipeline, (i.e. all store buffers are used). The stall ends when a store
+instruction commits its data to the cache or memory.
+.It Li RESOURCE_STALLS.ROB_FULL
+.Pq Event A2H , Umask 10H
+Counts the cycles of stall due to re- order buffer full.
+.It Li RESOURCE_STALLS.FPCW
+.Pq Event A2H , Umask 20H
+Counts the number of cycles while execution was stalled due to writing the
+floating-point unit (FPU) control word.
+.It Li RESOURCE_STALLS.MXCSR
+.Pq Event A2H , Umask 40H
+Stalls due to the MXCSR register rename occurring to close to a previous
+MXCSR rename. The MXCSR provides control and status for the MMX registers.
+.It Li RESOURCE_STALLS.OTHER
+.Pq Event A2H , Umask 80H
+Counts the number of cycles while execution was stalled due to other
+resource issues.
+.It Li MACRO_INSTS.FUSIONS_DECODED
+.Pq Event A6H , Umask 01H
+Counts the number of instructions decoded that are macro-fused but not
+necessarily executed or retired.
+.It Li BACLEAR_FORCE_IQ
+.Pq Event A7H , Umask 01H
+Counts number of times a BACLEAR was forced by the Instruction Queue. The IQ
+is also responsible for providing conditional branch prediciton direction
+based on a static scheme and dynamic data provided by the L2 Branch
+Prediction Unit. If the conditional branch target is not found in the Target
+Array and the IQ predicts that the branch is taken, then the IQ will force
+the Branch Address Calculator to issue a BACLEAR. Each BACLEAR asserted by
+the BAC generates approximately an 8 cycle bubble in the instruction fetch
+pipeline.
+.It Li LSD.UOPS
+.Pq Event A8H , Umask 01H
+Counts the number of micro-ops delivered by loop stream detector
+Use cmask=1 and invert to count cycles
+.It Li ITLB_FLUSH
+.Pq Event AEH , Umask 01H
+Counts the number of ITLB flushes
+.It Li OFFCORE_REQUESTS.L1D_WRITEBACK
+.Pq Event B0H , Umask 40H
+Counts number of L1D writebacks to the uncore.
+.It Li UOPS_EXECUTED.PORT0
+.Pq Event B1H , Umask 01H
+Counts number of Uops executed that were issued on port 0. Port 0 handles
+integer arithmetic, SIMD and FP add Uops.
+.It Li UOPS_EXECUTED.PORT1
+.Pq Event B1H , Umask 02H
+Counts number of Uops executed that were issued on port 1. Port 1 handles
+integer arithmetic, SIMD, integer shift, FP multiply and FP divide Uops.
+.It Li UOPS_EXECUTED.PORT2_CORE
+.Pq Event B1H , Umask 04H
+Counts number of Uops executed that were issued on port 2. Port 2 handles
+the load Uops. This is a core count only and can not be collected per
+thread.
+.It Li UOPS_EXECUTED.PORT3_CORE
+.Pq Event B1H , Umask 08H
+Counts number of Uops executed that were issued on port 3. Port 3 handles
+store Uops. This is a core count only and can not be collected per thread.
+.It Li UOPS_EXECUTED.PORT4_CORE
+.Pq Event B1H , Umask 10H
+Counts number of Uops executed that where issued on port 4. Port 4 handles
+the value to be stored for the store Uops issued on port 3. This is a core
+count only and can not be collected per thread.
+.It Li UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5
+.Pq Event B1H , Umask 1FH
+Counts cycles when the Uops executed were issued from any ports except port
+5. Use Cmask=1 for active cycles; Cmask=0 for weighted cycles; Use CMask=1,
+Invert=1 to count P0-4 stalled cycles Use Cmask=1, Edge=1, Invert=1 to count
+P0-4 stalls.
+.It Li UOPS_EXECUTED.PORT5
+.Pq Event B1H , Umask 20H
+Counts number of Uops executed that where issued on port 5.
+.It Li UOPS_EXECUTED.CORE_ACTIVE_CYCLES
+.Pq Event B1H , Umask 3FH
+Counts cycles when the Uops are executing. Use Cmask=1 for active cycles;
+Cmask=0 for weighted cycles; Use CMask=1, Invert=1 to count P0-4 stalled
+cycles Use Cmask=1, Edge=1, Invert=1 to count P0-4 stalls.
+.It Li UOPS_EXECUTED.PORT015
+.Pq Event B1H , Umask 40H
+Counts number of Uops executed that where issued on port 0, 1, or 5.
+use cmask=1, invert=1 to count stall cycles
+.It Li UOPS_EXECUTED.PORT234
+.Pq Event B1H , Umask 80H
+Counts number of Uops executed that where issued on port 2, 3, or 4.
+.It Li OFFCORE_REQUESTS_SQ_FULL
+.Pq Event B2H , Umask 01H
+Counts number of cycles the SQ is full to handle off-core requests.
+.It Li OFF_CORE_RESPONSE_0
+.Pq Event B7H , Umask 01H
+see Section 30.6.1.3, Off-core Response Performance Monitoring in the
+Processor Core
+Requires programming MSR 01A6H
+.It Li SNOOP_RESPONSE.HIT
+.Pq Event B8H , Umask 01H
+Counts HIT snoop response sent by this thread in response to a snoop
+request.
+.It Li SNOOP_RESPONSE.HITE
+.Pq Event B8H , Umask 02H
+Counts HIT E snoop response sent by this thread in response to a snoop
+request.
+.It Li SNOOP_RESPONSE.HITM
+.Pq Event B8H , Umask 04H
+Counts HIT M snoop response sent by this thread in response to a snoop
+request.
+.It Li OFF_CORE_RESPONSE_1
+.Pq Event BBH , Umask 01H
+see Section 30.6.1.3, Off-core Response Performance Monitoring in the
+Processor Core
+Requires programming MSR 01A7H
+.It Li INST_RETIRED.ANY_P
+.Pq Event C0H , Umask 01H
+See Table A-1
+Notes: INST_RETIRED.ANY is counted by a designated fixed counter.
+INST_RETIRED.ANY_P is counted by a programmable counter and is an
+architectural performance event. Event is supported if CPUID.A.EBX[1] = 0.
+Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not
+count as retired instructions.
+.It Li INST_RETIRED.X87
+.Pq Event C0H , Umask 02H
+Counts the number of MMX instructions retired:.
+.It Li INST_RETIRED.MMX
+.Pq Event C0H , Umask 04H
+Counts the number of floating point computational operations retired:
+floating point computational operations executed by the assist handler and
+sub-operations of complex floating point instructions like transcendental
+instructions.
+.It Li UOPS_RETIRED.ANY
+.Pq Event C2H , Umask 01H
+Counts the number of micro-ops retired, (macro-fused=1, micro- fused=2,
+others=1; maximum count of 8 per cycle). Most instructions are composed of
+one or two micro-ops. Some instructions are decoded into longer sequences
+such as repeat instructions, floating point transcendental instructions, and
+assists.
+Use cmask=1 and invert to count active cycles or stalled cycles
+.It Li UOPS_RETIRED.RETIRE_SLOTS
+.Pq Event C2H , Umask 02H
+Counts the number of retirement slots used each cycle
+.It Li UOPS_RETIRED.MACRO_FUSED
+.Pq Event C2H , Umask 04H
+Counts number of macro-fused uops retired.
+.It Li MACHINE_CLEARS.CYCLES
+.Pq Event C3H , Umask 01H
+Counts the cycles machine clear is asserted.
+.It Li MACHINE_CLEARS.MEM_ORDER
+.Pq Event C3H , Umask 02H
+Counts the number of machine clears due to memory order conflicts.
+.It Li MACHINE_CLEARS.SMC
+.Pq Event C3H , Umask 04H
+Counts the number of times that a program writes to a code section.
+Self-modifying code causes a sever penalty in all Intel 64 and IA-32
+processors. The modified cache line is written back to the L2 and L3caches.
+.It Li BR_INST_RETIRED.ALL_BRANCHES
+.Pq Event C4H , Umask 00H
+See Table A-1
+.It Li BR_INST_RETIRED.CONDITIONAL
+.Pq Event C4H , Umask 01H
+Counts the number of conditional branch instructions retired.
+.It Li BR_INST_RETIRED.NEAR_CALL
+.Pq Event C4H , Umask 02H
+Counts the number of direct & indirect near unconditional calls retired
+.It Li BR_INST_RETIRED.ALL_BRANCHES
+.Pq Event C4H , Umask 04H
+Counts the number of branch instructions retired
+.It Li BR_MISP_RETIRED.ALL_BRANCHES
+.Pq Event C5H , Umask 00H
+See Table A-1
+.It Li BR_MISP_RETIRED.NEAR_CALL
+.Pq Event C5H , Umask 02H
+Counts mispredicted direct & indirect near unconditional retired calls.
+.It Li SSEX_UOPS_RETIRED.PACKED_SINGLE
+.Pq Event C7H , Umask 01H
+Counts SIMD packed single-precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.SCALAR_SINGLE
+.Pq Event C7H , Umask 02H
+Counts SIMD calar single-precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.PACKED_DOUBLE
+.Pq Event C7H , Umask 04H
+Counts SIMD packed double- precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.SCALAR_DOUBLE
+.Pq Event C7H , Umask 08H
+Counts SIMD scalar double-precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.VECTOR_INTEGER
+.Pq Event C7H , Umask 10H
+Counts 128-bit SIMD vector integer Uops retired.
+.It Li ITLB_MISS_RETIRED
+.Pq Event C8H , Umask 20H
+Counts the number of retired instructions that missed the ITLB when the
+instruction was fetched.
+.It Li MEM_LOAD_RETIRED.L1D_HIT
+.Pq Event CBH , Umask 01H
+Counts number of retired loads that hit the L1 data cache.
+.It Li MEM_LOAD_RETIRED.L2_HIT
+.Pq Event CBH , Umask 02H
+Counts number of retired loads that hit the L2 data cache.
+.It Li MEM_LOAD_RETIRED.L3_UNSHARED_HIT
+.Pq Event CBH , Umask 04H
+Counts number of retired loads that hit their own, unshared lines in the L3
+cache.
+.It Li MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM
+.Pq Event CBH , Umask 08H
+Counts number of retired loads that hit in a sibling core's L2 (on die
+core). Since the L3 is inclusive of all cores on the package, this is an L3
+hit. This counts both clean or modified hits.
+.It Li MEM_LOAD_RETIRED.L3_MISS
+.Pq Event CBH , Umask 10H
+Counts number of retired loads that miss the L3 cache. The load was
+satisfied by a remote socket, local memory or an IOH.
+.It Li MEM_LOAD_RETIRED.HIT_LFB
+.Pq Event CBH , Umask 40H
+Counts number of retired loads that miss the L1D and the address is located
+in an allocated line fill buffer and will soon be committed to cache. This
+is counting secondary L1D misses.
+.It Li MEM_LOAD_RETIRED.DTLB_MISS
+.Pq Event CBH , Umask 80H
+Counts the number of retired loads that missed the DTLB. The DTLB miss is
+not counted if the load operation causes a fault. This event counts loads
+from cacheable memory only. The event does not count loads by software
+prefetches. Counts both primary and secondary misses to the TLB.
+.It Li FP_MMX_TRANS.TO_FP
+.Pq Event CCH , Umask 01H
+Counts the first floating-point instruction following any MMX instruction.
+You can use this event to estimate the penalties for the transitions between
+floating-point and MMX technology states.
+.It Li FP_MMX_TRANS.TO_MMX
+.Pq Event CCH , Umask 02H
+Counts the first MMX instruction following a floating-point instruction. You
+can use this event to estimate the penalties for the transitions between
+floating-point and MMX technology states.
+.It Li FP_MMX_TRANS.ANY
+.Pq Event CCH , Umask 03H
+Counts all transitions from floating point to MMX instructions and from MMX
+instructions to floating point instructions. You can use this event to
+estimate the penalties for the transitions between floating-point and MMX
+technology states.
+.It Li MACRO_INSTS.DECODED
+.Pq Event D0H , Umask 01H
+Counts the number of instructions decoded, (but not necessarily executed or
+retired).
+.It Li UOPS_DECODED.MS
+.Pq Event D1H , Umask 02H
+Counts the number of Uops decoded by the Microcode Sequencer, MS. The MS
+delivers uops when the instruction is more than 4 uops long or a microcode
+assist is occurring.
+.It Li UOPS_DECODED.ESP_FOLDING
+.Pq Event D1H , Umask 04H
+Counts number of stack pointer (ESP) instructions decoded: push , pop , call
+, ret, etc. ESP instructions do not generate a Uop to increment or decrement
+ESP. Instead, they update an ESP_Offset register that keeps track of the
+delta to the current value of the ESP register.
+.It Li UOPS_DECODED.ESP_SYNC
+.Pq Event D1H , Umask 08H
+Counts number of stack pointer (ESP) sync operations where an ESP
+instruction is corrected by adding the ESP offset register to the current
+value of the ESP register.
+.It Li RAT_STALLS.FLAGS
+.Pq Event D2H , Umask 01H
+Counts the number of cycles during which execution stalled due to several
+reasons, one of which is a partial flag register stall. A partial register
+stall may occur when two conditions are met: 1) an instruction modifies
+some, but not all, of the flags in the flag register and 2) the next
+instruction, which depends on flags, depends on flags that were not modified
+by this instruction.
+.It Li RAT_STALLS.REGISTERS
+.Pq Event D2H , Umask 02H
+This event counts the number of cycles instruction execution latency became
+longer than the defined latency because the instruction used a register that
+was partially written by previous instruction.
+.It Li RAT_STALLS.ROB_READ_PORT
+.Pq Event D2H , Umask 04H
+Counts the number of cycles when ROB read port stalls occurred, which did
+not allow new micro-ops to enter the out-of-order pipeline. Note that, at
+this stage in the pipeline, additional stalls may occur at the same cycle
+and prevent the stalled micro-ops from entering the pipe. In such a case,
+micro-ops retry entering the execution pipe in the next cycle and the
+ROB-read port stall is counted again.
+.It Li RAT_STALLS.SCOREBOARD
+.Pq Event D2H , Umask 08H
+Counts the cycles where we stall due to microarchitecturally required
+serialization. Microcode scoreboarding stalls.
+.It Li RAT_STALLS.ANY
+.Pq Event D2H , Umask 0FH
+Counts all Register Allocation Table stall cycles due to: Cycles when ROB
+read port stalls occurred, which did not allow new micro-ops to enter the
+execution pipe. Cycles when partial register stalls occurred Cycles when
+flag stalls occurred Cycles floating-point unit (FPU) status word stalls
+occurred. To count each of these conditions separately use the events:
+RAT_STALLS.ROB_READ_PORT, RAT_STALLS.PARTIAL, RAT_STALLS.FLAGS, and
+RAT_STALLS.FPSW.
+.It Li SEG_RENAME_STALLS
+.Pq Event D4H , Umask 01H
+Counts the number of stall cycles due to the lack of renaming resources for
+the ES, DS, FS, and GS segment registers. If a segment is renamed but not
+retired and a second update to the same segment occurs, a stall occurs in
+the front-end of the pipeline until the renamed segment retires.
+.It Li ES_REG_RENAMES
+.Pq Event D5H , Umask 01H
+Counts the number of times the ES segment register is renamed.
+.It Li UOP_UNFUSION
+.Pq Event DBH , Umask 01H
+Counts unfusion events due to floating point exception to a fused uop.
+.It Li BR_INST_DECODED
+.Pq Event E0H , Umask 01H
+Counts the number of branch instructions decoded.
+.It Li BPU_MISSED_CALL_RET
+.Pq Event E5H , Umask 01H
+Counts number of times the Branch Prediciton Unit missed predicting a call
+or return branch.
+.It Li BACLEAR.CLEAR
+.Pq Event E6H , Umask 01H
+Counts the number of times the front end is resteered, mainly when the
+Branch Prediction Unit cannot provide a correct prediction and this is
+corrected by the Branch Address Calculator at the front end. This can occur
+if the code has many branches such that they cannot be consumed by the BPU.
+Each BACLEAR asserted by the BAC generates approximately an 8 cycle bubble
+in the instruction fetch pipeline. The effect on total execution time
+depends on the surrounding code.
+.It Li BACLEAR.BAD_TARGET
+.Pq Event E6H , Umask 02H
+Counts number of Branch Address Calculator clears (BACLEAR) asserted due to
+conditional branch instructions in which there was a target hit but the
+direction was wrong. Each BACLEAR asserted by the BAC generates
+approximately an 8 cycle bubble in the instruction fetch pipeline.
+.It Li BPU_CLEARS.EARLY
+.Pq Event E8H , Umask 01H
+Counts early (normal) Branch Prediction Unit clears: BPU predicted a taken
+branch after incorrectly assuming that it was not taken.
+The BPU clear leads to 2 cycle bubble in the Front End.
+.It Li BPU_CLEARS.LATE
+.Pq Event E8H , Umask 02H
+Counts late Branch Prediction Unit clears due to Most Recently Used
+conflicts. The PBU clear leads to a 3 cycle bubble in the Front End.
+.It Li BPU_CLEARS.ANY
+.Pq Event E8H , Umask 03H
+Counts all BPU clears.
+.It Li L2_TRANSACTIONS.LOAD
+.Pq Event F0H , Umask 01H
+Counts L2 load operations due to HW prefetch or demand loads.
+.It Li L2_TRANSACTIONS.RFO
+.Pq Event F0H , Umask 02H
+Counts L2 RFO operations due to HW prefetch or demand RFOs.
+.It Li L2_TRANSACTIONS.IFETCH
+.Pq Event F0H , Umask 04H
+Counts L2 instruction fetch operations due to HW prefetch or demand ifetch.
+.It Li L2_TRANSACTIONS.PREFETCH
+.Pq Event F0H , Umask 08H
+Counts L2 prefetch operations.
+.It Li L2_TRANSACTIONS.L1D_WB
+.Pq Event F0H , Umask 10H
+Counts L1D writeback operations to the L2.
+.It Li L2_TRANSACTIONS.FILL
+.Pq Event F0H , Umask 20H
+Counts L2 cache line fill operations due to load, RFO, L1D writeback or
+prefetch.
+.It Li L2_TRANSACTIONS.WB
+.Pq Event F0H , Umask 40H
+Counts L2 writeback operations to the L3.
+.It Li L2_TRANSACTIONS.ANY
+.Pq Event F0H , Umask 80H
+Counts all L2 cache operations.
+.It Li L2_LINES_IN.S_STATE
+.Pq Event F1H , Umask 02H
+Counts the number of cache lines allocated in the L2 cache in the S (shared)
+state.
+.It Li L2_LINES_IN.E_STATE
+.Pq Event F1H , Umask 04H
+Counts the number of cache lines allocated in the L2 cache in the E
+(exclusive) state.
+.It Li L2_LINES_IN.ANY
+.Pq Event F1H , Umask 07H
+Counts the number of cache lines allocated in the L2 cache.
+.It Li L2_LINES_OUT.DEMAND_CLEAN
+.Pq Event F2H , Umask 01H
+Counts L2 clean cache lines evicted by a demand request.
+.It Li L2_LINES_OUT.DEMAND_DIRTY
+.Pq Event F2H , Umask 02H
+Counts L2 dirty (modified) cache lines evicted by a demand request.
+.It Li L2_LINES_OUT.PREFETCH_CLEAN
+.Pq Event F2H , Umask 04H
+Counts L2 clean cache line evicted by a prefetch request.
+.It Li L2_LINES_OUT.PREFETCH_DIRTY
+.Pq Event F2H , Umask 08H
+Counts L2 modified cache line evicted by a prefetch request.
+.It Li L2_LINES_OUT.ANY
+.Pq Event F2H , Umask 0FH
+Counts all L2 cache lines evicted for any reason.
+.It Li SQ_MISC.SPLIT_LOCK
+.Pq Event F4H , Umask 10H
+Counts the number of SQ lock splits across a cache line.
+.It Li SQ_FULL_STALL_CYCLES
+.Pq Event F6H , Umask 01H
+Counts cycles the Super Queue is full. Neither of the threads on this core
+will be able to access the uncore.
+.It Li FP_ASSIST.ALL
+.Pq Event F7H , Umask 01H
+Counts the number of floating point operations executed that required
+micro-code assist intervention. Assists are required in the following cases:
+SSE instructions, (Denormal input when the DAZ flag is off or Underflow
+result when the FTZ flag is off): x87 instructions, (NaN or denormal are
+loaded to a register or used as input from memory, Division by 0 or
+Underflow output).
+.It Li FP_ASSIST.OUTPUT
+.Pq Event F7H , Umask 02H
+Counts number of floating point micro-code assist when the output value
+(destination register) is invalid.
+.It Li FP_ASSIST.INPUT
+.Pq Event F7H , Umask 04H
+Counts number of floating point micro-code assist when the input value (one
+of the source operands to an FP instruction) is invalid.
+.It Li SIMD_INT_64.PACKED_MPY
+.Pq Event FDH , Umask 01H
+Counts number of SID integer 64 bit packed multiply operations.
+.It Li SIMD_INT_64.PACKED_SHIFT
+.Pq Event FDH , Umask 02H
+Counts number of SID integer 64 bit packed shift operations.
+.It Li SIMD_INT_64.PACK
+.Pq Event FDH , Umask 04H
+Counts number of SID integer 64 bit pack operations.
+.It Li SIMD_INT_64.UNPACK
+.Pq Event FDH , Umask 08H
+Counts number of SID integer 64 bit unpack operations.
+.It Li SIMD_INT_64.PACKED_LOGICAL
+.Pq Event FDH , Umask 10H
+Counts number of SID integer 64 bit logical operations.
+.It Li SIMD_INT_64.PACKED_ARITH
+.Pq Event FDH , Umask 20H
+Counts number of SID integer 64 bit arithmetic operations.
+.It Li SIMD_INT_64.SHUFFLE_MOVE
+.Pq Event FDH , Umask 40H
+Counts number of SID integer 64 bit shift or move operations.
+.El
+.Ss Event Specifiers (Programmable PMCs)
+Core i7 and Xeon 5500 programmable PMCs support the following events as
+June 2009 document (removed in December 2009):
+.Bl -tag -width indent
+.It Li SB_FORWARD.ANY
+.Pq Event 02H , Umask 01H
+Counts the number of store forwards.
+.It Li LOAD_BLOCK.STD
+.Pq Event 03H , Umask 01H
+Counts the number of loads blocked by a preceding store with unknown data.
+.It Li LOAD_BLOCK.ADDRESS_OFFSET
+.Pq Event 03H , Umask 04H
+Counts the number of loads blocked by a preceding store address.
+.It Li LOAD_BLOCK.ADDRESS_OFFSET
+.Pq Event 01H , Umask 04H
+Counts the cycles of store buffer drains.
+.It Li MISALIGN_MEM_REF.LOAD
+.Pq Event 05H , Umask 01H
+Counts the number of misaligned load references
+.It Li MISALIGN_MEM_REF.STORE
+.Pq Event 05H , Umask 02H
+Counts the number of misaligned store references
+.It Li MISALIGN_MEM_REF.ANY
+.Pq Event 05H , Umask 03H
+Counts the number of misaligned memory references
+.It Li STORE_BLOCKS.NOT_STA
+.Pq Event 06H , Umask 01H
+This event counts the number of load operations delayed caused by preceding
+stores whose addresses are known but whose data is unknown, and preceding
+stores that conflict with the load but which incompletely overlap the load.
+.It Li STORE_BLOCKS.STA
+.Pq Event 06H , Umask 02H
+This event counts load operations delayed caused by preceding stores whose
+addresses are unknown (STA block).
+.It Li STORE_BLOCKS.ANY
+.Pq Event 06H , Umask 0FH
+All loads delayed due to store blocks
+.It Li MEMORY_DISAMBIGURATION.RESET
+.Pq Event 09H , Umask 01H
+Counts memory disambiguration reset cycles
+.It Li MEMORY_DISAMBIGURATION.SUCCESS
+.Pq Event 09H , Umask 02H
+Counts the number of loads that memory disambiguration succeeded
+.It Li MEMORY_DISAMBIGURATION.WATCHDOG
+.Pq Event 09H , Umask 04H
+Counts the number of times the memory disambiguration watchdog kicked in.
+.It Li MEMORY_DISAMBIGURATION.WATCH_CYCLES
+.Pq Event 09H , Umask 08H
+Counts the cycles that the memory disambiguration watchdog is active.
+set invert=1, cmask = 1
+.It Li HW_INT.RCV
+.Pq Event 1DH , Umask 01H
+Number of interrupt received
+.It Li HW_INT.CYCLES_MASKED
+.Pq Event 1DH , Umask 02H
+Number of cycles interrupt are masked
+.It Li HW_INT.CYCLES_PENDING_AND_MASKED
+.Pq Event 1DH , Umask 04H
+Number of cycles interrupts are pending and masked
+.It Li HW_INT.CYCLES_PENDING_AND_MASKED
+.Pq Event 04H , Umask 04H
+Counts number of L2 store RFO requests where the cache line to be loaded is
+in the E (exclusive) state. The L1D prefetcher does not issue a RFO
+prefetch.
+This is a demand RFO request
+.It Li HW_INT.CYCLES_PENDING_AND_MASKED
+.Pq Event 27H , Umask 04H
+LONGEST_LAT_CACH E.MISS
+.It Li UOPS_DECODED.DEC0
+.Pq Event 3DH , Umask 01H
+Counts micro-ops decoded by decoder 0.
+.It Li UOPS_DECODED.DEC0
+.Pq Event 01H , Umask 01H
+Counts L1 data cache store RFO requests where the cache line to be loaded is
+in the I state.
+Counter 0, 1 only
+.It Li 0FH
+.Pq Event 41H , Umask 41H
+L1D_CACHE_ST.MESI
+Counts L1 data cache store RFO requests.
+Counter 0, 1 only
+.It Li DTLB_MISSES.PDE_MISS
+.Pq Event 49H , Umask 20H
+Number of DTLB cache misses where the low part of the linear to physical
+address translation was missed.
+.It Li DTLB_MISSES.PDP_MISS
+.Pq Event 49H , Umask 40H
+Number of DTLB misses where the high part of the linear to physical address
+translation was missed.
+.It Li DTLB_MISSES.LARGE_WALK_COMPLETED
+.Pq Event 49H , Umask 80H
+Counts number of completed large page walks due to misses in the STLB.
+.It Li SSE_MEM_EXEC.NTA
+.Pq Event 4BH , Umask 01H
+Counts number of SSE NTA prefetch/weakly-ordered instructions which missed
+the L1 data cache.
+.It Li SSE_MEM_EXEC.STREAMING_STORES
+.Pq Event 4BH , Umask 08H
+Counts number of SSE non temporal stores
+.It Li SFENCE_CYCLES
+.Pq Event 4DH , Umask 01H
+Counts store fence cycles
+.It Li EPT.EPDE_MISS
+.Pq Event 4FH , Umask 02H
+Counts Extended Page Directory Entry misses. The Extended Page Directory
+cache is used by Virtual Machine operating systems while the guest operating
+systems use the standard TLB caches.
+.It Li EPT.EPDPE_HIT
+.Pq Event 4FH , Umask 04H
+Counts Extended Page Directory Pointer Entry hits.
+.It Li EPT.EPDPE_MISS
+.Pq Event 4FH , Umask 08H
+Counts Extended Page Directory Pointer Entry misses. T
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA
+.Pq Event 60H , Umask 01H
+Counts weighted cycles of offcore demand data read requests. Does not
+include L2 prefetch requests.
+counter 0
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE
+.Pq Event 60H , Umask 02H
+Counts weighted cycles of offcore demand code read requests. Does not
+include L2 prefetch requests.
+counter 0
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO
+.Pq Event 60H , Umask 04H
+Counts weighted cycles of offcore demand RFO requests. Does not include L2
+prefetch requests.
+counter 0
+.It Li OFFCORE_REQUESTS_OUTSTANDING.ANY.READ
+.Pq Event 60H , Umask 08H
+Counts weighted cycles of offcore read requests of any kind. Include L2
+prefetch requests.
+counter 0
+.It Li IFU_IVC.FULL
+.Pq Event 81H , Umask 01H
+Instruction Fetche unit victim cache full.
+.It Li IFU_IVC.L1I_EVICTION
+.Pq Event 81H , Umask 02H
+L1 Instruction cache evictions.
+.It Li L1I_OPPORTUNISTIC_HITS
+.Pq Event 83H , Umask 01H
+Opportunistic hits in streaming.
+.It Li ITLB_MISSES.WALK_CYCLES
+.Pq Event 85H , Umask 04H
+Counts ITLB miss page walk cycles.
+.It Li ITLB_MISSES.PMH_BUSY_CYCLES
+.Pq Event 85H , Umask 04H
+Counts PMH busy cycles.
+.It Li ITLB_MISSES.STLB_HIT
+.Pq Event 85H , Umask 10H
+Counts the number of ITLB misses that hit in the second level TLB.
+.It Li ITLB_MISSES.PDE_MISS
+.Pq Event 85H , Umask 20H
+Number of ITLB misses where the low part of the linear to physical address
+translation was missed.
+.It Li ITLB_MISSES.PDP_MISS
+.Pq Event 85H , Umask 40H
+Number of ITLB misses where the high part of the linear to physical address
+translation was missed.
+.It Li ITLB_MISSES.LARGE_WALK_COMPLETED
+.Pq Event 85H , Umask 80H
+Counts number of completed large page walks due to misses in the STLB.
+.It Li ITLB_MISSES.LARGE_WALK_COMPLETED
+.Pq Event 01H , Umask 80H
+Counts number of offcore demand data read requests. Does not count L2
+prefetch requests.
+.It Li OFFCORE_REQUESTS.DEMAND.READ_CODE
+.Pq Event B0H , Umask 02H
+Counts number of offcore demand code read requests. Does not count L2
+prefetch requests.
+.It Li OFFCORE_REQUESTS.DEMAND.RFO
+.Pq Event B0H , Umask 04H
+Counts number of offcore demand RFO requests. Does not count L2 prefetch
+requests.
+.It Li OFFCORE_REQUESTS.ANY.READ
+.Pq Event B0H , Umask 08H
+Counts number of offcore read requests. Includes L2 prefetch requests.
+.It Li OFFCORE_REQUESTS.ANY.RFO
+.Pq Event B0H , Umask 10H
+Counts number of offcore RFO requests. Includes L2 prefetch requests.
+.It Li OFFCORE_REQUESTS.UNCACHED_MEM
+.Pq Event B0H , Umask 20H
+Counts number of offcore uncached memory requests.
+.It Li OFFCORE_REQUESTS.ANY
+.Pq Event B0H , Umask 80H
+Counts all offcore requests.
+.It Li SNOOPQ_REQUESTS_OUTSTANDING.DATA
+.Pq Event B3H , Umask 01H
+Counts weighted cycles of snoopq requests for data. Counter 0 only
+Use cmask=1 to count cycles not empty.
+.It Li SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE
+.Pq Event B3H , Umask 02H
+Counts weighted cycles of snoopq invalidate requests. Counter 0 only
+Use cmask=1 to count cycles not empty.
+.It Li SNOOPQ_REQUESTS_OUTSTANDING.CODE
+.Pq Event B3H , Umask 04H
+Counts weighted cycles of snoopq requests for code. Counter 0 only
+Use cmask=1 to count cycles not empty.
+.It Li SNOOPQ_REQUESTS_OUTSTANDING.CODE
+.Pq Event BAH , Umask 04H
+Counts number of TPR reads
+.It Li PIC_ACCESSES.TPR_WRITES
+.Pq Event BAH , Umask 02H
+Counts number of TPR writes
+one or two micro-ops. Some instructions are decoded into longer sequences
+.It Li MACHINE_CLEARS.FUSION_ASSIST
+.Pq Event C3H , Umask 10H
+Counts the number of macro-fusion assists
+Counts SIMD packed single- precision floating point Uops retired.
+.It Li BOGUS_BR
+.Pq Event E4H , Umask 01H
+Counts the number of bogus branches.
+.It Li L2_HW_PREFETCH.HIT
+.Pq Event F3H , Umask 01H
+Count L2 HW prefetcher detector hits
+.It Li L2_HW_PREFETCH.ALLOC
+.Pq Event F3H , Umask 02H
+Count L2 HW prefetcher allocations
+.It Li L2_HW_PREFETCH.DATA_TRIGGER
+.Pq Event F3H , Umask 04H
+Count L2 HW data prefetcher triggered
+.It Li L2_HW_PREFETCH.CODE_TRIGGER
+.Pq Event F3H , Umask 08H
+Count L2 HW code prefetcher triggered
+.It Li L2_HW_PREFETCH.DCA_TRIGGER
+.Pq Event F3H , Umask 10H
+Count L2 HW DCA prefetcher triggered
+.It Li L2_HW_PREFETCH.KICK_START
+.Pq Event F3H , Umask 20H
+Count L2 HW prefetcher kick started
+.It Li SQ_MISC.PROMOTION
+.Pq Event F4H , Umask 01H
+Counts the number of L2 secondary misses that hit the Super Queue.
+.It Li SQ_MISC.PROMOTION_POST_GO
+.Pq Event F4H , Umask 02H
+Counts the number of L2 secondary misses during the Super Queue filling L2.
+.It Li SQ_MISC.LRU_HINTS
+.Pq Event F4H , Umask 04H
+Counts number of Super Queue LRU hints sent to L3.
+.It Li SQ_MISC.FILL_DROPPED
+.Pq Event F4H , Umask 08H
+Counts the number of SQ L2 fills dropped due to L2 busy.
+.It Li SEGMENT_REG_LOADS
+.Pq Event F8H , Umask 01H
+Counts number of segment register loads.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.ucf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.corei7uc 3 ,
+.Xr pmc.westmere 3 ,
+.Xr pmc.westmereuc 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
diff --git a/lib/libpmc/pmc.corei7uc.3 b/lib/libpmc/pmc.corei7uc.3
new file mode 100644
index 0000000..2c1b3dd
--- /dev/null
+++ b/lib/libpmc/pmc.corei7uc.3
@@ -0,0 +1,880 @@
+.\" Copyright (c) 2010 Fabien Thomas.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd March 24, 2010
+.Os
+.Dt PMC.COREI7UC 3
+.Sh NAME
+.Nm pmc.corei7uc
+.Nd uncore measurement events for
+.Tn Intel
+.Tn Core i7 and Xeon 5500
+family CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+.Tn Intel
+.Tn "Core i7"
+CPUs contain PMCs conforming to version 2 of the
+.Tn Intel
+performance measurement architecture.
+These CPUs contain 2 classes of PMCs:
+.Bl -tag -width "Li PMC_CLASS_UCP"
+.It Li PMC_CLASS_UCF
+Fixed-function counters that count only one hardware event per counter.
+.It Li PMC_CLASS_UCP
+Programmable counters that may be configured to count one of a defined
+set of hardware events.
+.El
+.Pp
+The number of PMCs available in each class and their widths need to be
+determined at run time by calling
+.Xr pmc_cpuinfo 3 .
+.Pp
+Intel Core i7 and Xeon 5500 PMCs are documented in
+.Rs
+.%B "Intel(R) 64 and IA-32 Architectures Software Developes Manual"
+.%T "Volume 3B: System Programming Guide, Part 2"
+.%N "Order Number: 253669-033US"
+.%D December 2009
+.%Q "Intel Corporation"
+.Re
+.Ss COREI7 AND XEON 5500 UNCORE FIXED FUNCTION PMCS
+These PMCs and their supported events are documented in
+.Xr pmc.ucf 3 .
+.Ss COREI7 AND XEON 5500 UNCORE PROGRAMMABLE PMCS
+The programmable PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_INTERRUPT Ta \&No
+.It PMC_CAP_INVERT Ta Yes
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta \&No
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta Yes
+.It PMC_CAP_USER Ta \&No
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+Event specifiers for these PMCs support the following common
+qualifiers:
+.Bl -tag -width indent
+.It Li cmask= Ns Ar value
+Configure the PMC to increment only if the number of configured
+events measured in a cycle is greater than or equal to
+.Ar value .
+.It Li edge
+Configure the PMC to count the number of de-asserted to asserted
+transitions of the conditions expressed by the other qualifiers.
+If specified, the counter will increment only once whenever a
+condition becomes true, irrespective of the number of clocks during
+which the condition remains true.
+.It Li inv
+Invert the sense of comparison when the
+.Dq Li cmask
+qualifier is present, making the counter increment when the number of
+events per cycle is less than the value specified by the
+.Dq Li cmask
+qualifier.
+.El
+.Ss Event Specifiers (Programmable PMCs)
+Core i7 and Xeon 5500 uncore programmable PMCs support the following events:
+.Bl -tag -width indent
+.It Li GQ_CYCLES_FULL.READ_TRACKER
+.Pq Event 00H , Umask 01H
+Uncore cycles Global Queue read tracker is full.
+.It Li GQ_CYCLES_FULL.WRITE_TRACKER
+.Pq Event 00H , Umask 02H
+Uncore cycles Global Queue write tracker is full.
+.It Li GQ_CYCLES_FULL.PEER_PROBE_TRACKER
+.Pq Event 00H , Umask 04H
+Uncore cycles Global Queue peer probe tracker is full. The peer probe
+tracker queue tracks snoops from the IOH and remote sockets.
+.It Li GQ_CYCLES_NOT_EMPTY.READ_TRACKER
+.Pq Event 01H , Umask 01H
+Uncore cycles were Global Queue read tracker has at least one valid entry.
+.It Li GQ_CYCLES_NOT_EMPTY.WRITE_TRACKER
+.Pq Event 01H , Umask 02H
+Uncore cycles were Global Queue write tracker has at least one valid entry.
+.It Li GQ_CYCLES_NOT_EMPTY.PEER_PROBE_TRACKER
+.Pq Event 01H , Umask 04H
+Uncore cycles were Global Queue peer probe tracker has at least one valid
+entry. The peer probe tracker queue tracks IOH and remote socket snoops.
+.It Li GQ_ALLOC.READ_TRACKER
+.Pq Event 03H , Umask 01H
+Counts the number of tread tracker allocate to deallocate entries. The GQ
+read tracker allocate to deallocate occupancy count is divided by the count
+to obtain the average read tracker latency.
+.It Li GQ_ALLOC.RT_L3_MISS
+.Pq Event 03H , Umask 02H
+Counts the number GQ read tracker entries for which a full cache line read
+has missed the L3. The GQ read tracker L3 miss to fill occupancy count is
+divided by this count to obtain the average cache line read L3 miss latency.
+The latency represents the time after which the L3 has determined that the
+cache line has missed. The time between a GQ read tracker allocation and the
+L3 determining that the cache line has missed is the average L3 hit latency.
+The total L3 cache line read miss latency is the hit latency + L3 miss
+latency.
+.It Li GQ_ALLOC.RT_TO_L3_RESP
+.Pq Event 03H , Umask 04H
+Counts the number of GQ read tracker entries that are allocated in the read
+tracker queue that hit or miss the L3. The GQ read tracker L3 hit occupancy
+count is divided by this count to obtain the average L3 hit latency.
+.It Li GQ_ALLOC.RT_TO_RTID_ACQUIRED
+.Pq Event 03H , Umask 08H
+Counts the number of GQ read tracker entries that are allocated in the read
+tracker, have missed in the L3 and have not acquired a Request Transaction
+ID.	The GQ read tracker L3 miss to RTID acquired occupancy count is
+divided by this count to obtain the average latency for a read L3 miss to
+acquire an RTID.
+.It Li GQ_ALLOC.WT_TO_RTID_ACQUIRED
+.Pq Event 03H , Umask 10H
+Counts the number of GQ write tracker entries that are allocated in the
+write tracker, have missed in the L3 and have not acquired a Request
+Transaction ID.	The GQ write tracker L3 miss to RTID occupancy count is
+divided by this count to obtain the average latency for a write L3 miss to
+acquire an RTID.
+.It Li GQ_ALLOC.WRITE_TRACKER
+.Pq Event 03H , Umask 20H
+Counts the number of GQ write tracker entries that are allocated in the
+write tracker queue that miss the L3. The GQ write tracker occupancy count
+is divided by the this count to obtain the average L3 write miss latency.
+.It Li GQ_ALLOC.PEER_PROBE_TRACKER
+.Pq Event 03H , Umask 40H
+Counts the number of GQ peer probe tracker (snoop) entries that are
+allocated in the peer probe tracker queue that miss the L3. The GQ peer
+probe occupancy count is divided by this count to obtain the average L3 peer
+probe miss latency.
+.It Li GQ_DATA.FROM_QPI
+.Pq Event 04H , Umask 01H
+Cycles Global Queue Quickpath Interface input data port is busy importing
+data from the Quickpath Interface. Each cycle the input port can transfer 8
+or 16 bytes of data.
+.It Li GQ_DATA.FROM_QMC
+.Pq Event 04H , Umask 02H
+Cycles Global Queue Quickpath Memory Interface input data port is busy
+importing data from the Quickpath Memory Interface. Each cycle the input
+port can transfer 8 or 16 bytes of data.
+.It Li GQ_DATA.FROM_L3
+.Pq Event 04H , Umask 04H
+Cycles GQ L3 input data port is busy importing data from the Last Level
+Cache. Each cycle the input port can transfer 32 bytes of data.
+.It Li GQ_DATA.FROM_CORES_02
+.Pq Event 04H , Umask 08H
+Cycles GQ Core 0 and 2 input data port is busy importing data from processor
+cores 0 and 2. Each cycle the input port can transfer 32 bytes of data.
+.It Li GQ_DATA.FROM_CORES_13
+.Pq Event 04H , Umask 10H
+Cycles GQ Core 1 and 3 input data port is busy importing data from processor
+cores 1 and 3. Each cycle the input port can transfer 32 bytes of data.
+.It Li GQ_DATA.TO_QPI_QMC
+.Pq Event 05H , Umask 01H
+Cycles GQ QPI and QMC output data port is busy sending data to the Quickpath
+Interface or Quickpath Memory Interface. Each cycle the output port can
+transfer 32 bytes of data.
+.It Li GQ_DATA.TO_L3
+.Pq Event 05H , Umask 02H
+Cycles GQ L3 output data port is busy sending data to the Last Level Cache.
+Each cycle the output port can transfer 32 bytes of data.
+.It Li GQ_DATA.TO_CORES
+.Pq Event 05H , Umask 04H
+Cycles GQ Core output data port is busy sending data to the Cores. Each
+cycle the output port can transfer 32 bytes of data.
+.It Li SNP_RESP_TO_LOCAL_HOME.I_STATE
+.Pq Event 06H , Umask 01H
+Number of snoop responses to the local home that L3 does not have the
+referenced cache line.
+.It Li SNP_RESP_TO_LOCAL_HOME.S_STATE
+.Pq Event 06H , Umask 02H
+Number of snoop responses to the local home that L3 has the referenced line
+cached in the S state.
+.It Li SNP_RESP_TO_LOCAL_HOME.FWD_S_STATE
+.Pq Event 06H , Umask 04H
+Number of responses to code or data read snoops to the local home that the
+L3 has the referenced cache line in the E state. The L3 cache line state is
+changed to the S state and the line is forwarded to the local home in the S
+state.
+.It Li SNP_RESP_TO_LOCAL_HOME.FWD_I_STATE
+.Pq Event 06H , Umask 08H
+Number of responses to read invalidate snoops to the local home that the L3
+has the referenced cache line in the M state. The L3 cache line state is
+invalidated and the line is forwarded to the local home in the M state.
+.It Li SNP_RESP_TO_LOCAL_HOME.CONFLICT
+.Pq Event 06H , Umask 10H
+Number of conflict snoop responses sent to the local home.
+.It Li SNP_RESP_TO_LOCAL_HOME.WB
+.Pq Event 06H , Umask 20H
+Number of responses to code or data read snoops to the local home that the
+L3 has the referenced line cached in the M state.
+.It Li SNP_RESP_TO_REMOTE_HOME.I_STATE
+.Pq Event 07H , Umask 01H
+Number of snoop responses to a remote home that L3 does not have the
+referenced cache line.
+.It Li SNP_RESP_TO_REMOTE_HOME.S_STATE
+.Pq Event 07H , Umask 02H
+Number of snoop responses to a remote home that L3 has the referenced line
+cached in the S state.
+.It Li SNP_RESP_TO_REMOTE_HOME.FWD_S_STATE
+.Pq Event 07H , Umask 04H
+Number of responses to code or data read snoops to a remote home that the L3
+has the referenced cache line in the E state. The L3 cache line state is
+changed to the S state and the line is forwarded to the remote home in the S
+state.
+.It Li SNP_RESP_TO_REMOTE_HOME.FWD_I_STATE
+.Pq Event 07H , Umask 08H
+Number of responses to read invalidate snoops to a remote home that the L3
+has the referenced cache line in the M state. The L3 cache line state is
+invalidated and the line is forwarded to the remote home in the M state.
+.It Li SNP_RESP_TO_REMOTE_HOME.CONFLICT
+.Pq Event 07H , Umask 10H
+Number of conflict snoop responses sent to the local home.
+.It Li SNP_RESP_TO_REMOTE_HOME.WB
+.Pq Event 07H , Umask 20H
+Number of responses to code or data read snoops to a remote home that the L3
+has the referenced line cached in the M state.
+.It Li SNP_RESP_TO_REMOTE_HOME.HITM
+.Pq Event 07H , Umask 24H
+Number of HITM snoop responses to a remote home
+.It Li L3_HITS.READ
+.Pq Event 08H , Umask 01H
+Number of code read, data read and RFO requests that hit in the L3
+.It Li L3_HITS.WRITE
+.Pq Event 08H , Umask 02H
+Number of writeback requests that hit in the L3. Writebacks from the cores
+will always result in L3 hits due to the inclusive property of the L3.
+.It Li L3_HITS.PROBE
+.Pq Event 08H , Umask 04H
+Number of snoops from IOH or remote sockets that hit in the L3.
+.It Li L3_HITS.ANY
+.Pq Event 08H , Umask 03H
+Number of reads and writes that hit the L3.
+.It Li L3_MISS.READ
+.Pq Event 09H , Umask 01H
+Number of code read, data read and RFO requests that miss the L3.
+.It Li L3_MISS.WRITE
+.Pq Event 09H , Umask 02H
+Number of writeback requests that miss the L3. Should always be zero as
+writebacks from the cores will always result in L3 hits due to the inclusive
+property of the L3.
+.It Li L3_MISS.PROBE
+.Pq Event 09H , Umask 04H
+Number of snoops from IOH or remote sockets that miss the L3.
+.It Li L3_MISS.ANY
+.Pq Event 09H , Umask 03H
+Number of reads and writes that miss the L3.
+.It Li L3_LINES_IN.M_STATE
+.Pq Event 0AH , Umask 01H
+Counts the number of L3 lines allocated in M state. The only time a cache
+line is allocated in the M state is when the line was forwarded in M state
+is forwarded due to a Snoop Read Invalidate Own request.
+.It Li L3_LINES_IN.E_STATE
+.Pq Event 0AH , Umask 02H
+Counts the number of L3 lines allocated in E state.
+.It Li L3_LINES_IN.S_STATE
+.Pq Event 0AH , Umask 04H
+Counts the number of L3 lines allocated in S state.
+.It Li L3_LINES_IN.F_STATE
+.Pq Event 0AH , Umask 08H
+Counts the number of L3 lines allocated in F state.
+.It Li L3_LINES_IN.ANY
+.Pq Event 0AH , Umask 0FH
+Counts the number of L3 lines allocated in any state.
+.It Li L3_LINES_OUT.M_STATE
+.Pq Event 0BH , Umask 01H
+Counts the number of L3 lines victimized that were in the M state. When the
+victim cache line is in M state, the line is written to its home cache agent
+which can be either local or remote.
+.It Li L3_LINES_OUT.E_STATE
+.Pq Event 0BH , Umask 02H
+Counts the number of L3 lines victimized that were in the E state.
+.It Li L3_LINES_OUT.S_STATE
+.Pq Event 0BH , Umask 04H
+Counts the number of L3 lines victimized that were in the S state.
+.It Li L3_LINES_OUT.I_STATE
+.Pq Event 0BH , Umask 08H
+Counts the number of L3 lines victimized that were in the I state.
+.It Li L3_LINES_OUT.F_STATE
+.Pq Event 0BH , Umask 10H
+Counts the number of L3 lines victimized that were in the F state.
+.It Li L3_LINES_OUT.ANY
+.Pq Event 0BH , Umask 1FH
+Counts the number of L3 lines victimized in any state.
+.It Li QHL_REQUESTS.IOH_READS
+.Pq Event 20H , Umask 01H
+Counts number of Quickpath Home Logic read requests from the IOH.
+.It Li QHL_REQUESTS.IOH_WRITES
+.Pq Event 20H , Umask 02H
+Counts number of Quickpath Home Logic write requests from the IOH.
+.It Li QHL_REQUESTS.REMOTE_READS
+.Pq Event 20H , Umask 04H
+Counts number of Quickpath Home Logic read requests from a remote socket.
+.It Li QHL_REQUESTS.REMOTE_WRITES
+.Pq Event 20H , Umask 08H
+Counts number of Quickpath Home Logic write requests from a remote socket.
+.It Li QHL_REQUESTS.LOCAL_READS
+.Pq Event 20H , Umask 10H
+Counts number of Quickpath Home Logic read requests from the local socket.
+.It Li QHL_REQUESTS.LOCAL_WRITES
+.Pq Event 20H , Umask 20H
+Counts number of Quickpath Home Logic write requests from the local socket.
+.It Li QHL_CYCLES_FULL.IOH
+.Pq Event 21H , Umask 01H
+Counts uclk cycles all entries in the Quickpath Home Logic IOH are full.
+.It Li QHL_CYCLES_FULL.REMOTE
+.Pq Event 21H , Umask 02H
+Counts uclk cycles all entries in the Quickpath Home Logic remote tracker
+are full.
+.It Li QHL_CYCLES_FULL.LOCAL
+.Pq Event 21H , Umask 04H
+Counts uclk cycles all entries in the Quickpath Home Logic local tracker are
+full.
+.It Li QHL_CYCLES_NOT_EMPTY.IOH
+.Pq Event 22H , Umask 01H
+Counts uclk cycles all entries in the Quickpath Home Logic IOH is busy.
+.It Li QHL_CYCLES_NOT_EMPTY.REMOTE
+.Pq Event 22H , Umask 02H
+Counts uclk cycles all entries in the Quickpath Home Logic remote tracker is
+busy.
+.It Li QHL_CYCLES_NOT_EMPTY.LOCAL
+.Pq Event 22H , Umask 04H
+Counts uclk cycles all entries in the Quickpath Home Logic local tracker is
+busy.
+.It Li QHL_OCCUPANCY.IOH
+.Pq Event 23H , Umask 01H
+QHL IOH tracker allocate to deallocate read occupancy.
+.It Li QHL_OCCUPANCY.REMOTE
+.Pq Event 23H , Umask 02H
+QHL remote tracker allocate to deallocate read occupancy.
+.It Li QHL_OCCUPANCY.LOCAL
+.Pq Event 23H , Umask 04H
+QHL local tracker allocate to deallocate read occupancy.
+.It Li QHL_ADDRESS_CONFLICTS.2WAY
+.Pq Event 24H , Umask 02H
+Counts number of QHL Active Address Table (AAT) entries that saw a max of 2
+conflicts. The AAT is a structure that tracks requests that are in conflict.
+The requests themselves are in the home tracker entries. The count is
+reported when an AAT entry deallocates.
+.It Li QHL_ADDRESS_CONFLICTS.3WAY
+.Pq Event 24H , Umask 04H
+Counts number of QHL Active Address Table (AAT) entries that saw a max of 3
+conflicts. The AAT is a structure that tracks requests that are in conflict.
+The requests themselves are in the home tracker entries. The count is
+reported when an AAT entry deallocates.
+.It Li QHL_CONFLICT_CYCLES.IOH
+.Pq Event 25H , Umask 01H
+Counts cycles the Quickpath Home Logic IOH Tracker contains two or more
+requests with an address conflict. A max of 3 requests can be in conflict.
+.It Li QHL_CONFLICT_CYCLES.REMOTE
+.Pq Event 25H , Umask 02H
+Counts cycles the Quickpath Home Logic Remote Tracker contains two or more
+requests with an address conflict. A max of 3 requests can be in conflict.
+.It Li QHL_CONFLICT_CYCLES.LOCAL
+.Pq Event 25H , Umask 04H
+Counts cycles the Quickpath Home Logic Local Tracker contains two or more
+requests with an address conflict. A max of 3 requests can be in conflict.
+.It Li QHL_TO_QMC_BYPASS
+.Pq Event 26H , Umask 01H
+Counts number or requests to the Quickpath Memory Controller that bypass the
+Quickpath Home Logic. All local accesses can be bypassed. For remote
+requests, only read requests can be bypassed.
+.It Li QMC_NORMAL_FULL.READ.CH0
+.Pq Event 27H , Umask 01H
+Uncore cycles all the entries in the DRAM channel 0 medium or low priority
+queue are occupied with read requests.
+.It Li QMC_NORMAL_FULL.READ.CH1
+.Pq Event 27H , Umask 02H
+Uncore cycles all the entries in the DRAM channel 1 medium or low priority
+queue are occupied with read requests.
+.It Li QMC_NORMAL_FULL.READ.CH2
+.Pq Event 27H , Umask 04H
+Uncore cycles all the entries in the DRAM channel 2 medium or low priority
+queue are occupied with read requests.
+.It Li QMC_NORMAL_FULL.WRITE.CH0
+.Pq Event 27H , Umask 08H
+Uncore cycles all the entries in the DRAM channel 0 medium or low priority
+queue are occupied with write requests.
+.It Li QMC_NORMAL_FULL.WRITE.CH1
+.Pq Event 27H , Umask 10H
+Counts cycles all the entries in the DRAM channel 1 medium or low priority
+queue are occupied with write requests.
+.It Li QMC_NORMAL_FULL.WRITE.CH2
+.Pq Event 27H , Umask 20H
+Uncore cycles all the entries in the DRAM channel 2 medium or low priority
+queue are occupied with write requests.
+.It Li QMC_ISOC_FULL.READ.CH0
+.Pq Event 28H , Umask 01H
+Counts cycles all the entries in the DRAM channel 0 high priority queue are
+occupied with isochronous read requests.
+.It Li QMC_ISOC_FULL.READ.CH1
+.Pq Event 28H , Umask 02H
+Counts cycles all the entries in the DRAM channel 1high priority queue are
+occupied with isochronous read requests.
+.It Li QMC_ISOC_FULL.READ.CH2
+.Pq Event 28H , Umask 04H
+Counts cycles all the entries in the DRAM channel 2 high priority queue are
+occupied with isochronous read requests.
+.It Li QMC_ISOC_FULL.WRITE.CH0
+.Pq Event 28H , Umask 08H
+Counts cycles all the entries in the DRAM channel 0 high priority queue are
+occupied with isochronous write requests.
+.It Li QMC_ISOC_FULL.WRITE.CH1
+.Pq Event 28H , Umask 10H
+Counts cycles all the entries in the DRAM channel 1 high priority queue are
+occupied with isochronous write requests.
+.It Li QMC_ISOC_FULL.WRITE.CH2
+.Pq Event 28H , Umask 20H
+Counts cycles all the entries in the DRAM channel 2 high priority queue are
+occupied with isochronous write requests.
+.It Li QMC_BUSY.READ.CH0
+.Pq Event 29H , Umask 01H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+read request to DRAM channel 0.
+.It Li QMC_BUSY.READ.CH1
+.Pq Event 29H , Umask 02H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+read request to DRAM channel 1.
+.It Li QMC_BUSY.READ.CH2
+.Pq Event 29H , Umask 04H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+read request to DRAM channel 2.
+.It Li QMC_BUSY.WRITE.CH0
+.Pq Event 29H , Umask 08H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+write request to DRAM channel 0.
+.It Li QMC_BUSY.WRITE.CH1
+.Pq Event 29H , Umask 10H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+write request to DRAM channel 1.
+.It Li QMC_BUSY.WRITE.CH2
+.Pq Event 29H , Umask 20H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+write request to DRAM channel 2.
+.It Li QMC_OCCUPANCY.CH0
+.Pq Event 2AH , Umask 01H
+IMC channel 0 normal read request occupancy.
+.It Li QMC_OCCUPANCY.CH1
+.Pq Event 2AH , Umask 02H
+IMC channel 1 normal read request occupancy.
+.It Li QMC_OCCUPANCY.CH2
+.Pq Event 2AH , Umask 04H
+IMC channel 2 normal read request occupancy.
+.It Li QMC_ISSOC_OCCUPANCY.CH0
+.Pq Event 2BH , Umask 01H
+IMC channel 0 issoc read request occupancy.
+.It Li QMC_ISSOC_OCCUPANCY.CH1
+.Pq Event 2BH , Umask 02H
+IMC channel 1 issoc read request occupancy.
+.It Li QMC_ISSOC_OCCUPANCY.CH2
+.Pq Event 2BH , Umask 04H
+IMC channel 2 issoc read request occupancy.
+.It Li QMC_ISSOC_READS.ANY
+.Pq Event 2BH , Umask 07H
+IMC issoc read request occupancy.
+.It Li QMC_NORMAL_READS.CH0
+.Pq Event 2CH , Umask 01H
+Counts the number of Quickpath Memory Controller channel 0 medium and low
+priority read requests. The QMC channel 0 normal read occupancy divided by
+this count provides the average QMC channel 0 read latency.
+.It Li QMC_NORMAL_READS.CH1
+.Pq Event 2CH , Umask 02H
+Counts the number of Quickpath Memory Controller channel 1 medium and low
+priority read requests. The QMC channel 1 normal read occupancy divided by
+this count provides the average QMC channel 1 read latency.
+.It Li QMC_NORMAL_READS.CH2
+.Pq Event 2CH , Umask 04H
+Counts the number of Quickpath Memory Controller channel 2 medium and low
+priority read requests. The QMC channel 2 normal read occupancy divided by
+this count provides the average QMC channel 2 read latency.
+.It Li QMC_NORMAL_READS.ANY
+.Pq Event 2CH , Umask 07H
+Counts the number of Quickpath Memory Controller medium and low priority
+read requests. The QMC normal read occupancy divided by this count provides
+the average QMC read latency.
+.It Li QMC_HIGH_PRIORITY_READS.CH0
+.Pq Event 2DH , Umask 01H
+Counts the number of Quickpath Memory Controller channel 0 high priority
+isochronous read requests.
+.It Li QMC_HIGH_PRIORITY_READS.CH1
+.Pq Event 2DH , Umask 02H
+Counts the number of Quickpath Memory Controller channel 1 high priority
+isochronous read requests.
+.It Li QMC_HIGH_PRIORITY_READS.CH2
+.Pq Event 2DH , Umask 04H
+Counts the number of Quickpath Memory Controller channel 2 high priority
+isochronous read requests.
+.It Li QMC_HIGH_PRIORITY_READS.ANY
+.Pq Event 2DH , Umask 07H
+Counts the number of Quickpath Memory Controller high priority isochronous
+read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.CH0
+.Pq Event 2EH , Umask 01H
+Counts the number of Quickpath Memory Controller channel 0 critical priority
+isochronous read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.CH1
+.Pq Event 2EH , Umask 02H
+Counts the number of Quickpath Memory Controller channel 1 critical priority
+isochronous read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.CH2
+.Pq Event 2EH , Umask 04H
+Counts the number of Quickpath Memory Controller channel 2 critical priority
+isochronous read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.ANY
+.Pq Event 2EH , Umask 07H
+Counts the number of Quickpath Memory Controller critical priority
+isochronous read requests.
+.It Li QMC_WRITES.FULL.CH0
+.Pq Event 2FH , Umask 01H
+Counts number of full cache line writes to DRAM channel 0.
+.It Li QMC_WRITES.FULL.CH1
+.Pq Event 2FH , Umask 02H
+Counts number of full cache line writes to DRAM channel 1.
+.It Li QMC_WRITES.FULL.CH2
+.Pq Event 2FH , Umask 04H
+Counts number of full cache line writes to DRAM channel 2.
+.It Li QMC_WRITES.FULL.ANY
+.Pq Event 2FH , Umask 07H
+Counts number of full cache line writes to DRAM.
+.It Li QMC_WRITES.PARTIAL.CH0
+.Pq Event 2FH , Umask 08H
+Counts number of partial cache line writes to DRAM channel 0.
+.It Li QMC_WRITES.PARTIAL.CH1
+.Pq Event 2FH , Umask 10H
+Counts number of partial cache line writes to DRAM channel 1.
+.It Li QMC_WRITES.PARTIAL.CH2
+.Pq Event 2FH , Umask 20H
+Counts number of partial cache line writes to DRAM channel 2.
+.It Li QMC_WRITES.PARTIAL.ANY
+.Pq Event 2FH , Umask 38H
+Counts number of partial cache line writes to DRAM.
+.It Li QMC_CANCEL.CH0
+.Pq Event 30H , Umask 01H
+Counts number of DRAM channel 0 cancel requests.
+.It Li QMC_CANCEL.CH1
+.Pq Event 30H , Umask 02H
+Counts number of DRAM channel 1 cancel requests.
+.It Li QMC_CANCEL.CH2
+.Pq Event 30H , Umask 04H
+Counts number of DRAM channel 2 cancel requests.
+.It Li QMC_CANCEL.ANY
+.Pq Event 30H , Umask 07H
+Counts number of DRAM cancel requests.
+.It Li QMC_PRIORITY_UPDATES.CH0
+.Pq Event 31H , Umask 01H
+Counts number of DRAM channel 0 priority updates. A priority update occurs
+when an ISOC high or critical request is received by the QHL and there is a
+matching request with normal priority that has already been issued to the
+QMC. In this instance, the QHL will send a priority update to QMC to
+expedite the request.
+.It Li QMC_PRIORITY_UPDATES.CH1
+.Pq Event 31H , Umask 02H
+Counts number of DRAM channel 1 priority updates. A priority update occurs
+when an ISOC high or critical request is received by the QHL and there is a
+matching request with normal priority that has already been issued to the
+QMC. In this instance, the QHL will send a priority update to QMC to
+expedite the request.
+.It Li QMC_PRIORITY_UPDATES.CH2
+.Pq Event 31H , Umask 04H
+Counts number of DRAM channel 2 priority updates. A priority update occurs
+when an ISOC high or critical request is received by the QHL and there is a
+matching request with normal priority that has already been issued to the
+QMC. In this instance, the QHL will send a priority update to QMC to
+expedite the request.
+.It Li QMC_PRIORITY_UPDATES.ANY
+.Pq Event 31H , Umask 07H
+Counts number of DRAM priority updates. A priority update occurs when an
+ISOC high or critical request is received by the QHL and there is a matching
+request with normal priority that has already been issued to the QMC. In
+this instance, the QHL will send a priority update to QMC to expedite the
+request.
+.It Li QHL_FRC_ACK_CNFLTS.LOCAL
+.Pq Event 33H , Umask 04H
+Counts number of Force Acknowledge Conflict messages sent by the Quickpath
+Home Logic to the local home.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_0
+.Pq Event 40H , Umask 01H
+Counts cycles the Quickpath outbound link 0 HOME virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_0
+.Pq Event 40H , Umask 02H
+Counts cycles the Quickpath outbound link 0 SNOOP virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_0
+.Pq Event 40H , Umask 04H
+Counts cycles the Quickpath outbound link 0 non-data response virtual
+channel is stalled due to lack of a VNA and VN0 credit. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_1
+.Pq Event 40H , Umask 08H
+Counts cycles the Quickpath outbound link 1 HOME virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_1
+.Pq Event 40H , Umask 10H
+Counts cycles the Quickpath outbound link 1 SNOOP virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_1
+.Pq Event 40H , Umask 20H
+Counts cycles the Quickpath outbound link 1 non-data response virtual
+channel is stalled due to lack of a VNA and VN0 credit. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.LINK_0
+.Pq Event 40H , Umask 07H
+Counts cycles the Quickpath outbound link 0 virtual channels are stalled due
+to lack of a VNA and VN0 credit. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.LINK_1
+.Pq Event 40H , Umask 38H
+Counts cycles the Quickpath outbound link 1 virtual channels are stalled due
+to lack of a VNA and VN0 credit. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_0
+.Pq Event 41H , Umask 01H
+Counts cycles the Quickpath outbound link 0 Data ResponSe virtual channel is
+stalled due to lack of VNA and VN0 credits. Note that this event does not
+filter out when a flit would not have been selected for arbitration because
+another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_0
+.Pq Event 41H , Umask 02H
+Counts cycles the Quickpath outbound link 0 Non-Coherent Bypass virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_0
+.Pq Event 41H , Umask 04H
+Counts cycles the Quickpath outbound link 0 Non-Coherent Standard virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_1
+.Pq Event 41H , Umask 08H
+Counts cycles the Quickpath outbound link 1 Data ResponSe virtual channel is
+stalled due to lack of VNA and VN0 credits. Note that this event does not
+filter out when a flit would not have been selected for arbitration because
+another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_1
+.Pq Event 41H , Umask 10H
+Counts cycles the Quickpath outbound link 1 Non-Coherent Bypass virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_1
+.Pq Event 41H , Umask 20H
+Counts cycles the Quickpath outbound link 1 Non-Coherent Standard virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.LINK_0
+.Pq Event 41H , Umask 07H
+Counts cycles the Quickpath outbound link 0 virtual channels are stalled due
+to lack of VNA and VN0 credits. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.LINK_1
+.Pq Event 41H , Umask 38H
+Counts cycles the Quickpath outbound link 1 virtual channels are stalled due
+to lack of VNA and VN0 credits. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_HEADER.BUSY.LINK_0
+.Pq Event 42H , Umask 02H
+Number of cycles that the header buffer in the Quickpath Interface outbound
+link 0 is busy.
+.It Li QPI_TX_HEADER.BUSY.LINK_1
+.Pq Event 42H , Umask 08H
+Number of cycles that the header buffer in the Quickpath Interface outbound
+link 1 is busy.
+.It Li QPI_RX_NO_PPT_CREDIT.STALLS.LINK_0
+.Pq Event 43H , Umask 01H
+Number of cycles that snoop packets incoming to the Quickpath Interface link
+0 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT)
+does not have any available entries.
+.It Li QPI_RX_NO_PPT_CREDIT.STALLS.LINK_1
+.Pq Event 43H , Umask 02H
+Number of cycles that snoop packets incoming to the Quickpath Interface link
+1 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT)
+does not have any available entries.
+.It Li DRAM_OPEN.CH0
+.Pq Event 60H , Umask 01H
+Counts number of DRAM Channel 0 open commands issued either for read or
+write. To read or write data, the referenced DRAM page must first be opened.
+.It Li DRAM_OPEN.CH1
+.Pq Event 60H , Umask 02H
+Counts number of DRAM Channel 1 open commands issued either for read or
+write. To read or write data, the referenced DRAM page must first be opened.
+.It Li DRAM_OPEN.CH2
+.Pq Event 60H , Umask 04H
+Counts number of DRAM Channel 2 open commands issued either for read or
+write. To read or write data, the referenced DRAM page must first be opened.
+.It Li DRAM_PAGE_CLOSE.CH0
+.Pq Event 61H , Umask 01H
+DRAM channel 0 command issued to CLOSE a page due to page idle timer
+expiration. Closing a page is done by issuing a precharge.
+.It Li DRAM_PAGE_CLOSE.CH1
+.Pq Event 61H , Umask 02H
+DRAM channel 1 command issued to CLOSE a page due to page idle timer
+expiration. Closing a page is done by issuing a precharge.
+.It Li DRAM_PAGE_CLOSE.CH2
+.Pq Event 61H , Umask 04H
+DRAM channel 2 command issued to CLOSE a page due to page idle timer
+expiration. Closing a page is done by issuing a precharge.
+.It Li DRAM_PAGE_MISS.CH0
+.Pq Event 62H , Umask 01H
+Counts the number of precharges (PRE) that were issued to DRAM channel 0
+because there was a page miss. A page miss refers to a situation in which a
+page is currently open and another page from the same bank needs to be
+opened. The new page experiences a page miss. Closing of the old page is
+done by issuing a precharge.
+.It Li DRAM_PAGE_MISS.CH1
+.Pq Event 62H , Umask 02H
+Counts the number of precharges (PRE) that were issued to DRAM channel 1
+because there was a page miss. A page miss refers to a situation in which a
+page is currently open and another page from the same bank needs to be
+opened. The new page experiences a page miss. Closing of the old page is
+done by issuing a precharge.
+.It Li DRAM_PAGE_MISS.CH2
+.Pq Event 62H , Umask 04H
+Counts the number of precharges (PRE) that were issued to DRAM channel 2
+because there was a page miss. A page miss refers to a situation in which a
+page is currently open and another page from the same bank needs to be
+opened. The new page experiences a page miss. Closing of the old page is
+done by issuing a precharge.
+.It Li DRAM_READ_CAS.CH0
+.Pq Event 63H , Umask 01H
+Counts the number of times a read CAS command was issued on DRAM channel 0.
+.It Li DRAM_READ_CAS.AUTOPRE_CH0
+.Pq Event 63H , Umask 02H
+Counts the number of times a read CAS command was issued on DRAM channel 0
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_READ_CAS.CH1
+.Pq Event 63H , Umask 04H
+Counts the number of times a read CAS command was issued on DRAM channel 1.
+.It Li DRAM_READ_CAS.AUTOPRE_CH1
+.Pq Event 63H , Umask 08H
+Counts the number of times a read CAS command was issued on DRAM channel 1
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_READ_CAS.CH2
+.Pq Event 63H , Umask 10H
+Counts the number of times a read CAS command was issued on DRAM channel 2.
+.It Li DRAM_READ_CAS.AUTOPRE_CH2
+.Pq Event 63H , Umask 20H
+Counts the number of times a read CAS command was issued on DRAM channel 2
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_WRITE_CAS.CH0
+.Pq Event 64H , Umask 01H
+Counts the number of times a write CAS command was issued on DRAM channel 0.
+.It Li DRAM_WRITE_CAS.AUTOPRE_CH0
+.Pq Event 64H , Umask 02H
+Counts the number of times a write CAS command was issued on DRAM channel 0
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_WRITE_CAS.CH1
+.Pq Event 64H , Umask 04H
+Counts the number of times a write CAS command was issued on DRAM channel 1.
+.It Li DRAM_WRITE_CAS.AUTOPRE_CH1
+.Pq Event 64H , Umask 08H
+Counts the number of times a write CAS command was issued on DRAM channel 1
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_WRITE_CAS.CH2
+.Pq Event 64H , Umask 10H
+Counts the number of times a write CAS command was issued on DRAM channel 2.
+.It Li DRAM_WRITE_CAS.AUTOPRE_CH2
+.Pq Event 64H , Umask 20H
+Counts the number of times a write CAS command was issued on DRAM channel 2
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_REFRESH.CH0
+.Pq Event 65H , Umask 01H
+Counts number of DRAM channel 0 refresh commands. DRAM loses data content
+over time. In order to keep correct data content, the data values have to be
+refreshed periodically.
+.It Li DRAM_REFRESH.CH1
+.Pq Event 65H , Umask 02H
+Counts number of DRAM channel 1 refresh commands. DRAM loses data content
+over time. In order to keep correct data content, the data values have to be
+refreshed periodically.
+.It Li DRAM_REFRESH.CH2
+.Pq Event 65H , Umask 04H
+Counts number of DRAM channel 2 refresh commands. DRAM loses data content
+over time. In order to keep correct data content, the data values have to be
+refreshed periodically.
+.It Li DRAM_PRE_ALL.CH0
+.Pq Event 66H , Umask 01H
+Counts number of DRAM Channel 0 precharge-all (PREALL) commands that close
+all open pages in a rank. PREALL is issued when the DRAM needs to be
+refreshed or needs to go into a power down mode.
+.It Li DRAM_PRE_ALL.CH1
+.Pq Event 66H , Umask 02H
+Counts number of DRAM Channel 1 precharge-all (PREALL) commands that close
+all open pages in a rank. PREALL is issued when the DRAM needs to be
+refreshed or needs to go into a power down mode.
+.It Li DRAM_PRE_ALL.CH2
+.Pq Event 66H , Umask 04H
+Counts number of DRAM Channel 2 precharge-all (PREALL) commands that close
+all open pages in a rank. PREALL is issued when the DRAM needs to be
+refreshed or needs to go into a power down mode.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.ucf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.corei7 3 ,
+.Xr pmc.westmere 3 ,
+.Xr pmc.westmereuc 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
diff --git a/lib/libpmc/pmc.ucf.3 b/lib/libpmc/pmc.ucf.3
new file mode 100644
index 0000000..c8f2468
--- /dev/null
+++ b/lib/libpmc/pmc.ucf.3
@@ -0,0 +1,115 @@
+.\" Copyright (c) 2010 Fabien Thomas.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd March 30, 2010
+.Os
+.Dt PMC.UCF 3
+.Sh NAME
+.Nm pmc.ucf
+.Nd measurement events for
+.Tn Intel
+uncore fixed function performance counters.
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+Each fixed-function PMC measures a specific hardware event.
+The number of fixed-function PMCs implemented in a CPU can vary.
+The number of fixed-function PMCs present can be determined at runtime
+by using function
+.Xr pmc_cpuinfo 3 .
+.Pp
+Intel uncore fixed-function PMCs are documented in
+.Rs
+.%B "Intel(R) 64 and IA-32 Architectures Software Developes Manual"
+.%T "Volume 3B: System Programming Guide, Part 2"
+.%N "Order Number: 253669-033US"
+.%D December 2009
+.%Q "Intel Corporation"
+.Re
+.Pp
+.Ss PMC Capabilities
+Fixed-function PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta \&No
+.It PMC_CAP_INTERRUPT Ta \&No
+.It PMC_CAP_INVERT Ta \&No
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta \&No
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta \&No
+.It PMC_CAP_USER Ta \&No
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Class Name Prefix
+These PMCs are named using a class name prefix of
+.Dq Li ucf- .
+.Ss Event Specifiers (Fixed Function PMCs)
+The fixed function PMCs are selectable using the following
+event names:
+.Bl -tag -width indent
+.It Li UCLOCK
+.Pq Fixed Function Counter 0
+The fixed-function uncore counter increments at the rate of the U-clock.
+The frequency of the uncore clock domain can be determined from the uncore
+clock ratio which is available in the PCI configuration space register at
+offset C0H under device number 0 and Function 0.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.core2 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.corei7 3 ,
+.Xr pmc.corei7uc 3 ,
+.Xr pmc.westmere 3 ,
+.Xr pmc.westmereuc 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
+
+
diff --git a/lib/libpmc/pmc.westmere.3 b/lib/libpmc/pmc.westmere.3
new file mode 100644
index 0000000..0b87e50
--- /dev/null
+++ b/lib/libpmc/pmc.westmere.3
@@ -0,0 +1,1329 @@
+.\" Copyright (c) 2010 Fabien Thomas.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd March 24, 2010
+.Os
+.Dt PMC.WESTMERE 3
+.Sh NAME
+.Nm pmc.westmere
+.Nd measurement events for
+.Tn Intel
+.Tn Westmere
+family CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+.Tn Intel
+.Tn "Westmere"
+CPUs contain PMCs conforming to version 2 of the
+.Tn Intel
+performance measurement architecture.
+These CPUs may contain up to three classes of PMCs:
+.Bl -tag -width "Li PMC_CLASS_IAP"
+.It Li PMC_CLASS_IAF
+Fixed-function counters that count only one hardware event per counter.
+.It Li PMC_CLASS_IAP
+Programmable counters that may be configured to count one of a defined
+set of hardware events.
+.El
+.Pp
+The number of PMCs available in each class and their widths need to be
+determined at run time by calling
+.Xr pmc_cpuinfo 3 .
+.Pp
+Intel Westmere PMCs are documented in
+.Rs
+.%B "Intel(R) 64 and IA-32 Architectures Software Developes Manual"
+.%T "Volume 3B: System Programming Guide, Part 2"
+.%N "Order Number: 253669-033US"
+.%D December 2009
+.%Q "Intel Corporation"
+.Re
+.Ss WESTMERE FIXED FUNCTION PMCS
+These PMCs and their supported events are documented in
+.Xr pmc.iaf 3 .
+.Ss WESTMERE PROGRAMMABLE PMCS
+The programmable PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_INTERRUPT Ta Yes
+.It PMC_CAP_INVERT Ta Yes
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta Yes
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta Yes
+.It PMC_CAP_USER Ta Yes
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+Event specifiers for these PMCs support the following common
+qualifiers:
+.Bl -tag -width indent
+.It Li rsp= Ns Ar value
+Configure the Off-core Response bits.
+.Bl -tag -width indent
+.It Li DMND_DATA_RD
+Counts the number of demand and DCU prefetch data reads of full
+and partial cachelines as well as demand data page table entry
+cacheline reads. Does not count L2 data read prefetches or
+instruction fetches.
+.It Li DMND_RFO
+Counts the number of demand and DCU prefetch reads for ownership
+(RFO) requests generated by a write to data cacheline. Does not
+count L2 RFO.
+.It Li DMND_IFETCH
+Counts the number of demand and DCU prefetch instruction cacheline
+reads. Does not count L2 code read prefetches.
+WB
+Counts the number of writeback (modified to exclusive) transactions.
+.It Li PF_DATA_RD
+Counts the number of data cacheline reads generated by L2 prefetchers.
+.It Li PF_RFO
+Counts the number of RFO requests generated by L2 prefetchers.
+.It Li PF_IFETCH
+Counts the number of code reads generated by L2 prefetchers.
+.It Li OTHER
+Counts one of the following transaction types, including L3 invalidate,
+I/O, full or partial writes, WC or non-temporal stores, CLFLUSH, Fences,
+lock, unlock, split lock.
+.It Li UNCORE_HIT
+L3 Hit: local or remote home requests that hit L3 cache in the uncore
+with no coherency actions required (snooping).
+.It Li OTHER_CORE_HIT_SNP
+L3 Hit: local or remote home requests that hit L3 cache in the uncore
+and was serviced by another core with a cross core snoop where no modified
+copies were found (clean).
+.It Li OTHER_CORE_HITM
+L3 Hit: local or remote home requests that hit L3 cache in the uncore
+and was serviced by another core with a cross core snoop where modified
+copies were found (HITM).
+.It Li REMOTE_CACHE_FWD
+L3 Miss: local homed requests that missed the L3 cache and was serviced
+by forwarded data following a cross package snoop where no modified
+copies found. (Remote home requests are not counted)
+.It Li REMOTE_DRAM
+L3 Miss: remote home requests that missed the L3 cache and were serviced
+by remote DRAM.
+.It Li LOCAL_DRAM
+L3 Miss: local home requests that missed the L3 cache and were serviced
+by local DRAM.
+.It Li NON_DRAM
+Non-DRAM requests that were serviced by IOH.
+.El
+.It Li cmask= Ns Ar value
+Configure the PMC to increment only if the number of configured
+events measured in a cycle is greater than or equal to
+.Ar value .
+.It Li edge
+Configure the PMC to count the number of de-asserted to asserted
+transitions of the conditions expressed by the other qualifiers.
+If specified, the counter will increment only once whenever a
+condition becomes true, irrespective of the number of clocks during
+which the condition remains true.
+.It Li inv
+Invert the sense of comparison when the
+.Dq Li cmask
+qualifier is present, making the counter increment when the number of
+events per cycle is less than the value specified by the
+.Dq Li cmask
+qualifier.
+.It Li os
+Configure the PMC to count events happening at processor privilege
+level 0.
+.It Li usr
+Configure the PMC to count events occurring at privilege levels 1, 2
+or 3.
+.El
+.Pp
+If neither of the
+.Dq Li os
+or
+.Dq Li usr
+qualifiers are specified, the default is to enable both.
+.Ss Event Specifiers (Programmable PMCs)
+Westmere programmable PMCs support the following events:
+.Bl -tag -width indent
+.It Li LOAD_BLOCK.OVERLAP_STORE
+.Pq Event 03H , Umask 02H
+Loads that partially overlap an earlier store
+.It Li SB_DRAIN.ANY
+.Pq Event 04H , Umask 07H
+All Store buffer stall cycles
+.It Li MISALIGN_MEMORY.STORE
+.Pq Event 05H , Umask 02H
+All store referenced with misaligned address
+.It Li STORE_BLOCKS.AT_RET
+.Pq Event 06H , Umask 04H
+Counts number of loads delayed with at-Retirement block code. The following
+loads need to be executed at retirement and wait for all senior stores on
+the same thread to be drained: load splitting across 4K boundary (page
+split), load accessing uncacheable (UC or USWC) memory, load lock, and load
+with page table in UC or USWC memory region.
+.It Li STORE_BLOCKS.L1D_BLOCK
+.Pq Event 06H , Umask 08H
+Cacheable loads delayed with L1D block code
+.It Li PARTIAL_ADDRESS_ALIAS
+.Pq Event 07H , Umask 01H
+Counts false dependency due to partial address aliasing
+.It Li DTLB_LOAD_MISSES.ANY
+.Pq Event 08H , Umask 01H
+Counts all load misses that cause a page walk
+.It Li DTLB_LOAD_MISSES.WALK_COMPLETED
+.Pq Event 08H , Umask 02H
+Counts number of completed page walks due to load miss in the STLB.
+.It Li DTLB_LOAD_MISSES.WALK_CYCLES
+.Pq Event 08H , Umask 04H
+Cycles PMH is busy with a page walk due to a load miss in the STLB.
+.It Li DTLB_LOAD_MISSES.STLB_HIT
+.Pq Event 08H , Umask 10H
+Number of cache load STLB hits
+.It Li DTLB_LOAD_MISSES.PDE_MISS
+.Pq Event 08H , Umask 20H
+Number of DTLB cache load misses where the low part of the linear to
+physical address translation was missed.
+.It Li MEM_INST_RETIRED.LOADS
+.Pq Event 0BH , Umask 01H
+Counts the number of instructions with an architecturally-visible store
+retired on the architected path.
+In conjunction with ld_lat facility
+.It Li MEM_INST_RETIRED.STORES
+.Pq Event 0BH , Umask 02H
+Counts the number of instructions with an architecturally-visible store
+retired on the architected path.
+In conjunction with ld_lat facility
+.It Li MEM_INST_RETIRED.LATENCY_ABOVE_THRESHOLD
+.Pq Event 0BH , Umask 10H
+Counts the number of instructions exceeding the latency specified with
+ld_lat facility.
+In conjunction with ld_lat facility
+.It Li MEM_STORE_RETIRED.DTLB_MISS
+.Pq Event 0CH , Umask 01H
+The event counts the number of retired stores that missed the DTLB. The DTLB
+miss is not counted if the store operation causes a fault. Does not counter
+prefetches. Counts both primary and secondary misses to the TLB
+.It Li UOPS_ISSUED.ANY
+.Pq Event 0EH , Umask 01H
+Counts the number of Uops issued by the Register Allocation Table to the
+Reservation Station, i.e. the UOPs issued from the front end to the back
+end.
+.It Li UOPS_ISSUED.STALLED_CYCLES
+.Pq Event 0EH , Umask 01H
+Counts the number of cycles no Uops issued by the Register Allocation Table
+to the Reservation Station, i.e. the UOPs issued from the front end to the
+back end.
+set invert=1, cmask = 1
+.It Li UOPS_ISSUED.FUSED
+.Pq Event 0EH , Umask 02H
+Counts the number of fused Uops that were issued from the Register
+Allocation Table to the Reservation Station.
+.It Li MEM_UNCORE_RETIRED.LOCAL_HITM
+.Pq Event 0FH , Umask 02H
+Load instructions retired that HIT modified data in sibling core (Precise
+Event)
+.It Li MEM_UNCORE_RETIRED.LOCAL_DRAM_AND_REMOTE_CACHE_HIT
+.Pq Event 0FH , Umask 08H
+Load instructions retired local dram and remote cache HIT data sources
+(Precise Event)
+.It Li MEM_UNCORE_RETIRED.LOCAL_DRAM
+.Pq Event 0FH , Umask 10H
+Load instructions retired with a data source of local DRAM or locally homed
+remote cache HITM (Precise Event)
+.It Li MEM_UNCORE_RETIRED.REMOTE_DRAM
+.Pq Event 0FH , Umask 20H
+Load instructions retired remote DRAM and remote home-remote cache HITM
+(Precise Event)
+.It Li MEM_UNCORE_RETIRED.UNCACHEABLE
+.Pq Event 0FH , Umask 80H
+Load instructions retired I/O (Precise Event)
+.It Li FP_COMP_OPS_EXE.X87
+.Pq Event 10H , Umask 01H
+Counts the number of FP Computational Uops Executed. The number of FADD,
+FSUB, FCOM, FMULs, integer MULsand IMULs, FDIVs, FPREMs, FSQRTS, integer
+DIVs, and IDIVs. This event does not distinguish an FADD used in the middle
+of a transcendental flow from a separate FADD instruction.
+.It Li FP_COMP_OPS_EXE.MMX
+.Pq Event 10H , Umask 02H
+Counts number of MMX Uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_FP
+.Pq Event 10H , Umask 04H
+Counts number of SSE and SSE2 FP uops executed.
+.It Li FP_COMP_OPS_EXE.SSE2_INTEGER
+.Pq Event 10H , Umask 08H
+Counts number of SSE2 integer uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_FP_PACKED
+.Pq Event 10H , Umask 10H
+Counts number of SSE FP packed uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_FP_SCALAR
+.Pq Event 10H , Umask 20H
+Counts number of SSE FP scalar uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_SINGLE_PRECISION
+.Pq Event 10H , Umask 40H
+Counts number of SSE* FP single precision uops executed.
+.It Li FP_COMP_OPS_EXE.SSE_DOUBLE_PRECISION
+.Pq Event 10H , Umask 80H
+Counts number of SSE* FP double precision uops executed.
+.It Li SIMD_INT_128.PACKED_MPY
+.Pq Event 12H , Umask 01H
+Counts number of 128 bit SIMD integer multiply operations.
+.It Li SIMD_INT_128.PACKED_SHIFT
+.Pq Event 12H , Umask 02H
+Counts number of 128 bit SIMD integer shift operations.
+.It Li SIMD_INT_128.PACK
+.Pq Event 12H , Umask 04H
+Counts number of 128 bit SIMD integer pack operations.
+.It Li SIMD_INT_128.UNPACK
+.Pq Event 12H , Umask 08H
+Counts number of 128 bit SIMD integer unpack operations.
+.It Li SIMD_INT_128.PACKED_LOGICAL
+.Pq Event 12H , Umask 10H
+Counts number of 128 bit SIMD integer logical operations.
+.It Li SIMD_INT_128.PACKED_ARITH
+.Pq Event 12H , Umask 20H
+Counts number of 128 bit SIMD integer arithmetic operations.
+.It Li SIMD_INT_128.SHUFFLE_MOVE
+.Pq Event 12H , Umask 40H
+Counts number of 128 bit SIMD integer shuffle and move operations.
+.It Li LOAD_DISPATCH.RS
+.Pq Event 13H , Umask 01H
+Counts number of loads dispatched from the Reservation Station that bypass
+the Memory Order Buffer.
+.It Li LOAD_DISPATCH.RS_DELAYED
+.Pq Event 13H , Umask 02H
+Counts the number of delayed RS dispatches at the stage latch. If an RS
+dispatch can not bypass to LB, it has another chance to dispatch from the
+one-cycle delayed staging latch before it is written into the LB.
+.It Li LOAD_DISPATCH.MOB
+.Pq Event 13H , Umask 04H
+Counts the number of loads dispatched from the Reservation Station to the
+Memory Order Buffer.
+.It Li LOAD_DISPATCH.ANY
+.Pq Event 13H , Umask 07H
+Counts all loads dispatched from the Reservation Station.
+.It Li ARITH.CYCLES_DIV_BUSY
+.Pq Event 14H , Umask 01H
+Counts the number of cycles the divider is busy executing divide or square
+root operations. The divide can be integer, X87 or Streaming SIMD Extensions
+(SSE). The square root operation can be either X87 or SSE.
+Set 'edge =1, invert=1, cmask=1' to count the number of divides.
+Count may be incorrect When SMT is on
+.It Li ARITH.MUL
+.Pq Event 14H , Umask 02H
+Counts the number of multiply operations executed. This includes integer as
+well as floating point multiply operations but excludes DPPS mul and MPSAD.
+Count may be incorrect When SMT is on
+.It Li INST_QUEUE_WRITES
+.Pq Event 17H , Umask 01H
+Counts the number of instructions written into the instruction queue every
+cycle.
+.It Li INST_DECODED.DEC0
+.Pq Event 18H , Umask 01H
+Counts number of instructions that require decoder 0 to be decoded. Usually,
+this means that the instruction maps to more than 1 uop
+.It Li TWO_UOP_INSTS_DECODED
+.Pq Event 19H , Umask 01H
+An instruction that generates two uops was decoded
+.It Li INST_QUEUE_WRITE_CYCLES
+.Pq Event 1EH , Umask 01H
+This event counts the number of cycles during which instructions are written
+to the instruction queue. Dividing this counter by the number of
+instructions written to the instruction queue (INST_QUEUE_WRITES) yields the
+average number of instructions decoded each cycle. If this number is less
+than four and the pipe stalls, this indicates that the decoder is failing to
+decode enough instructions per cycle to sustain the 4-wide pipeline.
+If SSE* instructions that are 6 bytes or longer arrive one after another,
+then front end throughput may limit execution speed. In such case,
+.It Li LSD_OVERFLOW
+.Pq Event 20H , Umask 01H
+Number of loops that can not stream from the instruction queue.
+.It Li L2_RQSTS.LD_HIT
+.Pq Event 24H , Umask 01H
+Counts number of loads that hit the L2 cache. L2 loads include both L1D
+demand misses as well as L1D prefetches. L2 loads can be rejected for
+various reasons. Only non rejected loads are counted.
+.It Li L2_RQSTS.LD_MISS
+.Pq Event 24H , Umask 02H
+Counts the number of loads that miss the L2 cache. L2 loads include both L1D
+demand misses as well as L1D prefetches.
+.It Li L2_RQSTS.LOADS
+.Pq Event 24H , Umask 03H
+Counts all L2 load requests. L2 loads include both L1D demand misses as well
+as L1D prefetches.
+.It Li L2_RQSTS.RFO_HIT
+.Pq Event 24H , Umask 04H
+Counts the number of store RFO requests that hit the L2 cache. L2 RFO
+requests include both L1D demand RFO misses as well as L1D RFO prefetches.
+Count includes WC memory requests, where the data is not fetched but the
+permission to write the line is required.
+.It Li L2_RQSTS.RFO_MISS
+.Pq Event 24H , Umask 08H
+Counts the number of store RFO requests that miss the L2 cache. L2 RFO
+requests include both L1D demand RFO misses as well as L1D RFO prefetches.
+.It Li L2_RQSTS.RFOS
+.Pq Event 24H , Umask 0CH
+Counts all L2 store RFO requests. L2 RFO requests include both L1D demand
+RFO misses as well as L1D RFO prefetches..
+.It Li L2_RQSTS.IFETCH_HIT
+.Pq Event 24H , Umask 10H
+Counts number of instruction fetches that hit the L2 cache. L2 instruction
+fetches include both L1I demand misses as well as L1I instruction
+prefetches.
+.It Li L2_RQSTS.IFETCH_MISS
+.Pq Event 24H , Umask 20H
+Counts number of instruction fetches that miss the L2 cache. L2 instruction
+fetches include both L1I demand misses as well as L1I instruction
+prefetches.
+.It Li L2_RQSTS.IFETCHES
+.Pq Event 24H , Umask 30H
+Counts all instruction fetches. L2 instruction fetches include both L1I
+demand misses as well as L1I instruction prefetches.
+.It Li L2_RQSTS.PREFETCH_HIT
+.Pq Event 24H , Umask 40H
+Counts L2 prefetch hits for both code and data.
+.It Li L2_RQSTS.PREFETCH_MISS
+.Pq Event 24H , Umask 80H
+Counts L2 prefetch misses for both code and data.
+.It Li L2_RQSTS.PREFETCHES
+.Pq Event 24H , Umask C0H
+Counts all L2 prefetches for both code and data.
+.It Li L2_RQSTS.MISS
+.Pq Event 24H , Umask AAH
+Counts all L2 misses for both code and data.
+.It Li L2_RQSTS.REFERENCES
+.Pq Event 24H , Umask FFH
+Counts all L2 requests for both code and data.
+.It Li L2_DATA_RQSTS.DEMAND.I_STATE
+.Pq Event 26H , Umask 01H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the I (invalid) state, i.e. a cache miss. L2 demand loads are both L1D
+demand misses and L1D prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.S_STATE
+.Pq Event 26H , Umask 02H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the S (shared) state. L2 demand loads are both L1D demand misses and L1D
+prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.E_STATE
+.Pq Event 26H , Umask 04H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the E (exclusive) state. L2 demand loads are both L1D demand misses and
+L1D prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.M_STATE
+.Pq Event 26H , Umask 08H
+Counts number of L2 data demand loads where the cache line to be loaded is
+in the M (modified) state. L2 demand loads are both L1D demand misses and
+L1D prefetches.
+.It Li L2_DATA_RQSTS.DEMAND.MESI
+.Pq Event 26H , Umask 0FH
+Counts all L2 data demand requests. L2 demand loads are both L1D demand
+misses and L1D prefetches.
+.It Li L2_DATA_RQSTS.PREFETCH.I_STATE
+.Pq Event 26H , Umask 10H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the I (invalid) state, i.e. a cache miss.
+.It Li L2_DATA_RQSTS.PREFETCH.S_STATE
+.Pq Event 26H , Umask 20H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the S (shared) state. A prefetch RFO will miss on an S state line, while
+a prefetch read will hit on an S state line.
+.It Li L2_DATA_RQSTS.PREFETCH.E_STATE
+.Pq Event 26H , Umask 40H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the E (exclusive) state.
+.It Li L2_DATA_RQSTS.PREFETCH.M_STATE
+.Pq Event 26H , Umask 80H
+Counts number of L2 prefetch data loads where the cache line to be loaded is
+in the M (modified) state.
+.It Li L2_DATA_RQSTS.PREFETCH.MESI
+.Pq Event 26H , Umask F0H
+Counts all L2 prefetch requests.
+.It Li L2_DATA_RQSTS.ANY
+.Pq Event 26H , Umask FFH
+Counts all L2 data requests.
+.It Li L2_WRITE.RFO.I_STATE
+.Pq Event 27H , Umask 01H
+Counts number of L2 demand store RFO requests where the cache line to be
+loaded is in the I (invalid) state, i.e, a cache miss. The L1D prefetcher
+does not issue a RFO prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.S_STATE
+.Pq Event 27H , Umask 02H
+Counts number of L2 store RFO requests where the cache line to be loaded is
+in the S (shared) state. The L1D prefetcher does not issue a RFO prefetch,.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.M_STATE
+.Pq Event 27H , Umask 08H
+Counts number of L2 store RFO requests where the cache line to be loaded is
+in the M (modified) state. The L1D prefetcher does not issue a RFO prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.HIT
+.Pq Event 27H , Umask 0EH
+Counts number of L2 store RFO requests where the cache line to be loaded is
+in either the S, E or M states. The L1D prefetcher does not issue a RFO
+prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.RFO.MESI
+.Pq Event 27H , Umask 0FH
+Counts all L2 store RFO requests.The L1D prefetcher does not issue a RFO
+prefetch.
+This is a demand RFO request
+.It Li L2_WRITE.LOCK.I_STATE
+.Pq Event 27H , Umask 10H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in the I (invalid) state, i.e. a cache miss.
+.It Li L2_WRITE.LOCK.S_STATE
+.Pq Event 27H , Umask 20H
+Counts number of L2 lock RFO requests where the cache line to be loaded is
+in the S (shared) state.
+.It Li L2_WRITE.LOCK.E_STATE
+.Pq Event 27H , Umask 40H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in the E (exclusive) state.
+.It Li L2_WRITE.LOCK.M_STATE
+.Pq Event 27H , Umask 80H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in the M (modified) state.
+.It Li L2_WRITE.LOCK.HIT
+.Pq Event 27H , Umask E0H
+Counts number of L2 demand lock RFO requests where the cache line to be
+loaded is in either the S, E, or M state.
+.It Li L2_WRITE.LOCK.MESI
+.Pq Event 27H , Umask F0H
+Counts all L2 demand lock RFO requests.
+.It Li L1D_WB_L2.I_STATE
+.Pq Event 28H , Umask 01H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the I (invalid) state, i.e. a cache miss.
+.It Li L1D_WB_L2.S_STATE
+.Pq Event 28H , Umask 02H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the S state.
+.It Li L1D_WB_L2.E_STATE
+.Pq Event 28H , Umask 04H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the E (exclusive) state.
+.It Li L1D_WB_L2.M_STATE
+.Pq Event 28H , Umask 08H
+Counts number of L1 writebacks to the L2 where the cache line to be written
+is in the M (modified) state.
+.It Li L1D_WB_L2.MESI
+.Pq Event 28H , Umask 0FH
+Counts all L1 writebacks to the L2.
+.It Li L3_LAT_CACHE.REFERENCE
+.Pq Event 2EH , Umask 02H
+Counts uncore Last Level Cache references. Because cache hierarchy, cache
+sizes and other implementation-specific characteristics; value comparison to
+estimate performance differences is not recommended.
+see Table A-1
+.It Li L3_LAT_CACHE.MISS
+.Pq Event 2EH , Umask 01H
+Counts uncore Last Level Cache misses. Because cache hierarchy, cache sizes
+and other implementation-specific characteristics; value comparison to
+estimate performance differences is not recommended.
+see Table A-1
+.It Li CPU_CLK_UNHALTED.THREAD_P
+.Pq Event 3CH , Umask 00H
+Counts the number of thread cycles while the thread is not in a halt state.
+The thread enters the halt state when it is running the HLT instruction. The
+core frequency may change from time to time due to power or thermal
+throttling.
+see Table A-1
+.It Li CPU_CLK_UNHALTED.REF_P
+.Pq Event 3CH , Umask 01H
+Increments at the frequency of TSC when not halted.
+see Table A-1
+.It Li DTLB_MISSES.ANY
+.Pq Event 49H , Umask 01H
+Counts the number of misses in the STLB which causes a page walk.
+.It Li DTLB_MISSES.WALK_COMPLETED
+.Pq Event 49H , Umask 02H
+Counts number of misses in the STLB which resulted in a completed page walk.
+.It Li DTLB_MISSES.WALK_CYCLES
+.Pq Event 49H , Umask 04H
+Counts cycles of page walk due to misses in the STLB.
+.It Li DTLB_MISSES.STLB_HIT
+.Pq Event 49H , Umask 10H
+Counts the number of DTLB first level misses that hit in the second level
+TLB. This event is only relevant if the core contains multiple DTLB levels.
+.It Li DTLB_MISSES.LARGE_WALK_COMPLETED
+.Pq Event 49H , Umask 80H
+Counts number of completed large page walks due to misses in the STLB.
+.It Li LOAD_HIT_PRE
+.Pq Event 4CH , Umask 01H
+Counts load operations sent to the L1 data cache while a previous SSE
+prefetch instruction to the same cache line has started prefetching but has
+not yet finished.
+.It Li L1D_PREFETCH.REQUESTS
+.Pq Event 4EH , Umask 01H
+Counts number of hardware prefetch requests dispatched out of the prefetch
+FIFO.
+.It Li L1D_PREFETCH.MISS
+.Pq Event 4EH , Umask 02H
+Counts number of hardware prefetch requests that miss the L1D. There are two
+prefetchers in the L1D. A streamer, which predicts lines sequentially after
+this one should be fetched, and the IP prefetcher that remembers access
+patterns for the current instruction. The streamer prefetcher stops on an
+L1D hit, while the IP prefetcher does not.
+.It Li L1D_PREFETCH.TRIGGERS
+.Pq Event 4EH , Umask 04H
+Counts number of prefetch requests triggered by the Finite State Machine and
+pushed into the prefetch FIFO. Some of the prefetch requests are dropped due
+to overwrites or competition between the IP index prefetcher and streamer
+prefetcher. The prefetch FIFO contains 4 entries.
+.It Li EPT.WALK_CYCLES
+.Pq Event 4FH , Umask 10H
+Counts Extended Page walk cycles.
+.It Li L1D.REPL
+.Pq Event 51H , Umask 01H
+Counts the number of lines brought into the L1 data cache.
+Counter 0, 1 only
+.It Li L1D.M_REPL
+.Pq Event 51H , Umask 02H
+Counts the number of modified lines brought into the L1 data cache.
+Counter 0, 1 only
+.It Li L1D.M_EVICT
+.Pq Event 51H , Umask 04H
+Counts the number of modified lines evicted from the L1 data cache due to
+replacement.
+Counter 0, 1 only
+.It Li L1D.M_SNOOP_EVICT
+.Pq Event 51H , Umask 08H
+Counts the number of modified lines evicted from the L1 data cache due to
+snoop HITM intervention.
+Counter 0, 1 only
+.It Li L1D_CACHE_PREFETCH_LOCK_FB_HIT
+.Pq Event 52H , Umask 01H
+Counts the number of cacheable load lock speculated instructions accepted
+into the fill buffer.
+.It Li L1D_CACHE_LOCK_FB_HIT
+.Pq Event 53H , Umask 01H
+Counts the number of cacheable load lock speculated or retired instructions
+accepted into the fill buffer.
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_DATA
+.Pq Event 60H , Umask 01H
+Counts weighted cycles of offcore demand data read requests. Does not
+include L2 prefetch requests.
+counter 0
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.READ_CODE
+.Pq Event 60H , Umask 02H
+Counts weighted cycles of offcore demand code read requests. Does not
+include L2 prefetch requests.
+counter 0
+.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND.RFO
+.Pq Event 60H , Umask 04H
+Counts weighted cycles of offcore demand RFO requests. Does not include L2
+prefetch requests.
+counter 0
+.It Li OFFCORE_REQUESTS_OUTSTANDING.ANY.READ
+.Pq Event 60H , Umask 08H
+Counts weighted cycles of offcore read requests of any kind. Include L2
+prefetch requests.
+counter 0
+.It Li CACHE_LOCK_CYCLES.L1D_L2
+.Pq Event 63H , Umask 01H
+Cycle count during which the L1D and L2 are locked. A lock is asserted when
+there is a locked memory access, due to uncacheable memory, a locked
+operation that spans two cache lines, or a page walk from an uncacheable
+page table.
+Counter 0, 1 only. L1D and L2 locks have a very high performance penalty and
+it is highly recommended to avoid such accesses.
+.It Li CACHE_LOCK_CYCLES.L1D
+.Pq Event 63H , Umask 02H
+Counts the number of cycles that cacheline in the L1 data cache unit is
+locked.
+Counter 0, 1 only.
+.It Li IO_TRANSACTIONS
+.Pq Event 6CH , Umask 01H
+Counts the number of completed I/O transactions.
+.It Li L1I.HITS
+.Pq Event 80H , Umask 01H
+Counts all instruction fetches that hit the L1 instruction cache.
+.It Li L1I.MISSES
+.Pq Event 80H , Umask 02H
+Counts all instruction fetches that miss the L1I cache. This includes
+instruction cache misses, streaming buffer misses, victim cache misses and
+uncacheable fetches. An instruction fetch miss is counted only once and not
+once for every cycle it is outstanding.
+.It Li L1I.READS
+.Pq Event 80H , Umask 03H
+Counts all instruction fetches, including uncacheable fetches that bypass
+the L1I.
+.It Li L1I.CYCLES_STALLED
+.Pq Event 80H , Umask 04H
+Cycle counts for which an instruction fetch stalls due to a L1I cache miss,
+ITLB miss or ITLB fault.
+.It Li LARGE_ITLB.HIT
+.Pq Event 82H , Umask 01H
+Counts number of large ITLB hits.
+.It Li ITLB_MISSES.ANY
+.Pq Event 85H , Umask 01H
+Counts the number of misses in all levels of the ITLB which causes a page
+walk.
+.It Li ITLB_MISSES.WALK_COMPLETED
+.Pq Event 85H , Umask 02H
+Counts number of misses in all levels of the ITLB which resulted in a
+completed page walk.
+.It Li ITLB_MISSES.WALK_CYCLES
+.Pq Event 85H , Umask 04H
+Counts ITLB miss page walk cycles.
+.It Li ITLB_MISSES.LARGE_WALK_COMPLETED
+.Pq Event 85H , Umask 80H
+Counts number of completed large page walks due to misses in the STLB.
+.It Li ILD_STALL.LCP
+.Pq Event 87H , Umask 01H
+Cycles Instruction Length Decoder stalls due to length changing prefixes:
+66, 67 or REX.W (for EM64T) instructions which change the length of the
+decoded instruction.
+.It Li ILD_STALL.MRU
+.Pq Event 87H , Umask 02H
+Instruction Length Decoder stall cycles due to Brand Prediction Unit (PBU)
+Most Recently Used (MRU) bypass.
+.It Li ILD_STALL.IQ_FULL
+.Pq Event 87H , Umask 04H
+Stall cycles due to a full instruction queue.
+.It Li ILD_STALL.REGEN
+.Pq Event 87H , Umask 08H
+Counts the number of regen stalls.
+.It Li ILD_STALL.ANY
+.Pq Event 87H , Umask 0FH
+Counts any cycles the Instruction Length Decoder is stalled.
+.It Li BR_INST_EXEC.COND
+.Pq Event 88H , Umask 01H
+Counts the number of conditional near branch instructions executed, but not
+necessarily retired.
+.It Li BR_INST_EXEC.DIRECT
+.Pq Event 88H , Umask 02H
+Counts all unconditional near branch instructions excluding calls and
+indirect branches.
+.It Li BR_INST_EXEC.INDIRECT_NON_CALL
+.Pq Event 88H , Umask 04H
+Counts the number of executed indirect near branch instructions that are not
+calls.
+.It Li BR_INST_EXEC.NON_CALLS
+.Pq Event 88H , Umask 07H
+Counts all non call near branch instructions executed, but not necessarily
+retired.
+.It Li BR_INST_EXEC.RETURN_NEAR
+.Pq Event 88H , Umask 08H
+Counts indirect near branches that have a return mnemonic.
+.It Li BR_INST_EXEC.DIRECT_NEAR_CALL
+.Pq Event 88H , Umask 10H
+Counts unconditional near call branch instructions, excluding non call
+branch, executed.
+.It Li BR_INST_EXEC.INDIRECT_NEAR_CALL
+.Pq Event 88H , Umask 20H
+Counts indirect near calls, including both register and memory indirect,
+executed.
+.It Li BR_INST_EXEC.NEAR_CALLS
+.Pq Event 88H , Umask 30H
+Counts all near call branches executed, but not necessarily retired.
+.It Li BR_INST_EXEC.TAKEN
+.Pq Event 88H , Umask 40H
+Counts taken near branches executed, but not necessarily retired.
+.It Li BR_INST_EXEC.ANY
+.Pq Event 88H , Umask 7FH
+Counts all near executed branches (not necessarily retired). This includes
+only instructions and not micro-op branches. Frequent branching is not
+necessarily a major performance issue. However frequent branch
+mispredictions may be a problem.
+.It Li BR_MISP_EXEC.COND
+.Pq Event 89H , Umask 01H
+Counts the number of mispredicted conditional near branch instructions
+executed, but not necessarily retired.
+.It Li BR_MISP_EXEC.DIRECT
+.Pq Event 89H , Umask 02H
+Counts mispredicted macro unconditional near branch instructions, excluding
+calls and indirect branches (should always be 0).
+.It Li BR_MISP_EXEC.INDIRECT_NON_CALL
+.Pq Event 89H , Umask 04H
+Counts the number of executed mispredicted indirect near branch instructions
+that are not calls.
+.It Li BR_MISP_EXEC.NON_CALLS
+.Pq Event 89H , Umask 07H
+Counts mispredicted non call near branches executed, but not necessarily
+retired.
+.It Li BR_MISP_EXEC.RETURN_NEAR
+.Pq Event 89H , Umask 08H
+Counts mispredicted indirect branches that have a rear return mnemonic.
+.It Li BR_MISP_EXEC.DIRECT_NEAR_CALL
+.Pq Event 89H , Umask 10H
+Counts mispredicted non-indirect near calls executed, (should always be 0).
+.It Li BR_MISP_EXEC.INDIRECT_NEAR_CALL
+.Pq Event 89H , Umask 20H
+Counts mispredicted indirect near calls exeucted, including both register
+and memory indirect.
+.It Li BR_MISP_EXEC.NEAR_CALLS
+.Pq Event 89H , Umask 30H
+Counts all mispredicted near call branches executed, but not necessarily
+retired.
+.It Li BR_MISP_EXEC.TAKEN
+.Pq Event 89H , Umask 40H
+Counts executed mispredicted near branches that are taken, but not
+necessarily retired.
+.It Li BR_MISP_EXEC.ANY
+.Pq Event 89H , Umask 7FH
+Counts the number of mispredicted near branch instructions that were
+executed, but not necessarily retired.
+.It Li RESOURCE_STALLS.ANY
+.Pq Event A2H , Umask 01H
+Counts the number of Allocator resource related stalls. Includes register
+renaming buffer entries, memory buffer entries. In addition to resource
+related stalls, this event counts some other events. Includes stalls arising
+during branch misprediction recovery, such as if retirement of the
+mispredicted branch is delayed and stalls arising while store buffer is
+draining from synchronizing operations.
+Does not include stalls due to SuperQ (off core) queue full, too many cache
+misses, etc.
+.It Li RESOURCE_STALLS.LOAD
+.Pq Event A2H , Umask 02H
+Counts the cycles of stall due to lack of load buffer for load operation.
+.It Li RESOURCE_STALLS.RS_FULL
+.Pq Event A2H , Umask 04H
+This event counts the number of cycles when the number of instructions in
+the pipeline waiting for execution reaches the limit the processor can
+handle. A high count of this event indicates that there are long latency
+operations in the pipe (possibly load and store operations that miss the L2
+cache, or instructions dependent upon instructions further down the pipeline
+that have yet to retire.
+When RS is full, new instructions can not enter the reservation station and
+start execution.
+.It Li RESOURCE_STALLS.STORE
+.Pq Event A2H , Umask 08H
+This event counts the number of cycles that a resource related stall will
+occur due to the number of store instructions reaching the limit of the
+pipeline, (i.e. all store buffers are used). The stall ends when a store
+instruction commits its data to the cache or memory.
+.It Li RESOURCE_STALLS.ROB_FULL
+.Pq Event A2H , Umask 10H
+Counts the cycles of stall due to re- order buffer full.
+.It Li RESOURCE_STALLS.FPCW
+.Pq Event A2H , Umask 20H
+Counts the number of cycles while execution was stalled due to writing the
+floating-point unit (FPU) control word.
+.It Li RESOURCE_STALLS.MXCSR
+.Pq Event A2H , Umask 40H
+Stalls due to the MXCSR register rename occurring to close to a previous
+MXCSR rename. The MXCSR provides control and status for the MMX registers.
+.It Li RESOURCE_STALLS.OTHER
+.Pq Event A2H , Umask 80H
+Counts the number of cycles while execution was stalled due to other
+resource issues.
+.It Li MACRO_INSTS.FUSIONS_DECODED
+.Pq Event A6H , Umask 01H
+Counts the number of instructions decoded that are macro-fused but not
+necessarily executed or retired.
+.It Li BACLEAR_FORCE_IQ
+.Pq Event A7H , Umask 01H
+Counts number of times a BACLEAR was forced by the Instruction Queue. The IQ
+is also responsible for providing conditional branch prediciton direction
+based on a static scheme and dynamic data provided by the L2 Branch
+Prediction Unit. If the conditional branch target is not found in the Target
+Array and the IQ predicts that the branch is taken, then the IQ will force
+the Branch Address Calculator to issue a BACLEAR. Each BACLEAR asserted by
+the BAC generates approximately an 8 cycle bubble in the instruction fetch
+pipeline.
+.It Li LSD.UOPS
+.Pq Event A8H , Umask 01H
+Counts the number of micro-ops delivered by loop stream detector
+Use cmask=1 and invert to count cycles
+.It Li ITLB_FLUSH
+.Pq Event AEH , Umask 01H
+Counts the number of ITLB flushes
+.It Li OFFCORE_REQUESTS.DEMAND.READ_DATA
+.Pq Event B0H , Umask 01H
+Counts number of offcore demand data read requests. Does not count L2
+prefetch requests.
+.It Li OFFCORE_REQUESTS.DEMAND.READ_CODE
+.Pq Event B0H , Umask 02H
+Counts number of offcore demand code read requests. Does not count L2
+prefetch requests.
+.It Li OFFCORE_REQUESTS.DEMAND.RFO
+.Pq Event B0H , Umask 04H
+Counts number of offcore demand RFO requests. Does not count L2 prefetch
+requests.
+.It Li OFFCORE_REQUESTS.ANY.READ
+.Pq Event B0H , Umask 08H
+Counts number of offcore read requests. Includes L2 prefetch requests.
+.It Li OFFCORE_REQUESTS.ANY.RFO
+.Pq Event 80H , Umask 10H
+Counts number of offcore RFO requests. Includes L2 prefetch requests.
+.It Li OFFCORE_REQUESTS.L1D_WRITEBACK
+.Pq Event B0H , Umask 40H
+Counts number of L1D writebacks to the uncore.
+.It Li OFFCORE_REQUESTS.ANY
+.Pq Event B0H , Umask 80H
+Counts all offcore requests.
+.It Li UOPS_EXECUTED.PORT0
+.Pq Event B1H , Umask 01H
+Counts number of Uops executed that were issued on port 0. Port 0 handles
+integer arithmetic, SIMD and FP add Uops.
+.It Li UOPS_EXECUTED.PORT1
+.Pq Event B1H , Umask 02H
+Counts number of Uops executed that were issued on port 1. Port 1 handles
+integer arithmetic, SIMD, integer shift, FP multiply and FP divide Uops.
+.It Li UOPS_EXECUTED.PORT2_CORE
+.Pq Event B1H , Umask 04H
+Counts number of Uops executed that were issued on port 2. Port 2 handles
+the load Uops. This is a core count only and can not be collected per
+thread.
+.It Li UOPS_EXECUTED.PORT3_CORE
+.Pq Event B1H , Umask 08H
+Counts number of Uops executed that were issued on port 3. Port 3 handles
+store Uops. This is a core count only and can not be collected per thread.
+.It Li UOPS_EXECUTED.PORT4_CORE
+.Pq Event B1H , Umask 10H
+Counts number of Uops executed that where issued on port 4. Port 4 handles
+the value to be stored for the store Uops issued on port 3. This is a core
+count only and can not be collected per thread.
+.It Li UOPS_EXECUTED.CORE_ACTIVE_CYCLES_NO_PORT5
+.Pq Event B1H , Umask 1FH
+Counts number of cycles there are one or more uops being executed and were
+issued on ports 0-4. This is a core count only and can not be collected per
+thread.
+.It Li UOPS_EXECUTED.PORT5
+.Pq Event B1H , Umask 20H
+Counts number of Uops executed that where issued on port 5.
+.It Li UOPS_EXECUTED.CORE_ACTIVE_CYCLES
+.Pq Event B1H , Umask 3FH
+Counts number of cycles there are one or more uops being executed on any
+ports. This is a core count only and can not be collected per thread.
+.It Li UOPS_EXECUTED.PORT015
+.Pq Event B1H , Umask 40H
+Counts number of Uops executed that where issued on port 0, 1, or 5.
+use cmask=1, invert=1 to count stall cycles
+.It Li UOPS_EXECUTED.PORT234
+.Pq Event B1H , Umask 80H
+Counts number of Uops executed that where issued on port 2, 3, or 4.
+.It Li OFFCORE_REQUESTS_SQ_FULL
+.Pq Event B2H , Umask 01H
+Counts number of cycles the SQ is full to handle off-core requests.
+.It Li SNOOPQ_REQUESTS_OUTSTANDING.DATA
+.Pq Event B3H , Umask 01H
+Counts weighted cycles of snoopq requests for data. Counter 0 only
+Use cmask=1 to count cycles not empty.
+.It Li SNOOPQ_REQUESTS_OUTSTANDING.INVALIDATE
+.Pq Event B3H , Umask 02H
+Counts weighted cycles of snoopq invalidate requests. Counter 0 only
+Use cmask=1 to count cycles not empty.
+.It Li SNOOPQ_REQUESTS_OUTSTANDING.CODE
+.Pq Event B3H , Umask 04H
+Counts weighted cycles of snoopq requests for code. Counter 0 only
+Use cmask=1 to count cycles not empty.
+.It Li SNOOPQ_REQUESTS.CODE
+.Pq Event B4H , Umask 01H
+Counts the number of snoop code requests
+.It Li SNOOPQ_REQUESTS.DATA
+.Pq Event B4H , Umask 02H
+Counts the number of snoop data requests
+.It Li SNOOPQ_REQUESTS.INVALIDATE
+.Pq Event B4H , Umask 04H
+Counts the number of snoop invalidate requests
+.It Li OFF_CORE_RESPONSE_0
+.Pq Event B7H , Umask 01H
+see Section 30.6.1.3, Off-core Response Performance Monitoring in the
+Processor Core.
+Requires programming MSR 01A6H
+.It Li SNOOP_RESPONSE.HIT
+.Pq Event B8H , Umask 01H
+Counts HIT snoop response sent by this thread in response to a snoop
+request.
+.It Li SNOOP_RESPONSE.HITE
+.Pq Event B8H , Umask 02H
+Counts HIT E snoop response sent by this thread in response to a snoop
+request.
+.It Li SNOOP_RESPONSE.HITM
+.Pq Event B8H , Umask 04H
+Counts HIT M snoop response sent by this thread in response to a snoop
+request.
+.It Li OFF_CORE_RESPONSE_1
+.Pq Event BBH , Umask 01H
+see Section 30.6.1.3, Off-core Response Performance Monitoring in the
+Processor Core
+Use MSR 01A7H
+.It Li INST_RETIRED.ANY_P
+.Pq Event C0H , Umask 01H
+See Table A-1
+Notes: INST_RETIRED.ANY is counted by a designated fixed counter.
+INST_RETIRED.ANY_P is counted by a programmable counter and is an
+architectural performance event. Event is supported if CPUID.A.EBX[1] = 0.
+Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not
+count as retired instructions.
+.It Li INST_RETIRED.X87
+.Pq Event C0H , Umask 02H
+Counts the number of floating point computational operations retired:
+floating point computational operations executed by the assist handler and
+sub-operations of complex floating point instructions like transcendental
+instructions.
+.It Li INST_RETIRED.MMX
+.Pq Event C0H , Umask 04H
+Counts the number of retired: MMX instructions.
+.It Li UOPS_RETIRED.ANY
+.Pq Event C2H , Umask 01H
+Counts the number of micro-ops retired, (macro-fused=1, micro- fused=2,
+others=1; maximum count of 8 per cycle). Most instructions are composed of
+one or two micro-ops. Some instructions are decoded into longer sequences
+such as repeat instructions, floating point transcendental instructions, and
+assists.
+Use cmask=1 and invert to count active cycles or stalled cycles
+.It Li UOPS_RETIRED.RETIRE_SLOTS
+.Pq Event C2H , Umask 02H
+Counts the number of retirement slots used each cycle
+.It Li UOPS_RETIRED.MACRO_FUSED
+.Pq Event C2H , Umask 04H
+Counts number of macro-fused uops retired.
+.It Li MACHINE_CLEARS.CYCLES
+.Pq Event C3H , Umask 01H
+Counts the cycles machine clear is asserted.
+.It Li MACHINE_CLEARS.MEM_ORDER
+.Pq Event C3H , Umask 02H
+Counts the number of machine clears due to memory order conflicts.
+.It Li MACHINE_CLEARS.SMC
+.Pq Event C3H , Umask 04H
+Counts the number of times that a program writes to a code section.
+Self-modifying code causes a sever penalty in all Intel 64 and IA-32
+processors. The modified cache line is written back to the L2 and L3caches.
+.It Li BR_INST_RETIRED.ALL_BRANCHES
+.Pq Event C4H , Umask 00H
+See Table A-1
+.It Li BR_INST_RETIRED.CONDITIONAL
+.Pq Event C4H , Umask 01H
+Counts the number of conditional branch instructions retired.
+.It Li BR_INST_RETIRED.NEAR_CALL
+.Pq Event C4H , Umask 02H
+Counts the number of direct & indirect near unconditional calls retired
+.It Li BR_INST_RETIRED.ALL_BRANCHES
+.Pq Event C4H , Umask 04H
+Counts the number of branch instructions retired
+.It Li BR_MISP_RETIRED.ALL_BRANCHES
+.Pq Event C5H , Umask 00H
+See Table A-1
+.It Li BR_MISP_RETIRED.CONDITIONAL
+.Pq Event C5H , Umask 01H
+Counts mispredicted conditional retired calls.
+.It Li BR_MISP_RETIRED.NEAR_CALL
+.Pq Event C5H , Umask 02H
+Counts mispredicted direct & indirect near unconditional retired calls.
+.It Li BR_MISP_RETIRED.ALL_BRANCHES
+.Pq Event C5H , Umask 04H
+Counts all mispredicted retired calls.
+.It Li SSEX_UOPS_RETIRED.PACKED_SINGLE
+.Pq Event C7H , Umask 01H
+Counts SIMD packed single-precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.SCALAR_SINGLE
+.Pq Event C7H , Umask 02H
+Counts SIMD calar single-precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.PACKED_DOUBLE
+.Pq Event C7H , Umask 04H
+Counts SIMD packed double- precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.SCALAR_DOUBLE
+.Pq Event C7H , Umask 08H
+Counts SIMD scalar double-precision floating point Uops retired.
+.It Li SSEX_UOPS_RETIRED.VECTOR_INTEGER
+.Pq Event C7H , Umask 10H
+Counts 128-bit SIMD vector integer Uops retired.
+.It Li ITLB_MISS_RETIRED
+.Pq Event C8H , Umask 20H
+Counts the number of retired instructions that missed the ITLB when the
+instruction was fetched.
+.It Li MEM_LOAD_RETIRED.L1D_HIT
+.Pq Event CBH , Umask 01H
+Counts number of retired loads that hit the L1 data cache.
+.It Li MEM_LOAD_RETIRED.L2_HIT
+.Pq Event CBH , Umask 02H
+Counts number of retired loads that hit the L2 data cache.
+.It Li MEM_LOAD_RETIRED.L3_UNSHARED_HIT
+.Pq Event CBH , Umask 04H
+Counts number of retired loads that hit their own, unshared lines in the L3
+cache.
+.It Li MEM_LOAD_RETIRED.OTHER_CORE_L2_HIT_HITM
+.Pq Event CBH , Umask 08H
+Counts number of retired loads that hit in a sibling core's L2 (on die
+core). Since the L3 is inclusive of all cores on the package, this is an L3
+hit. This counts both clean or modified hits.
+.It Li MEM_LOAD_RETIRED.L3_MISS
+.Pq Event CBH , Umask 10H
+Counts number of retired loads that miss the L3 cache. The load was
+satisfied by a remote socket, local memory or an IOH.
+.It Li MEM_LOAD_RETIRED.HIT_LFB
+.Pq Event CBH , Umask 40H
+Counts number of retired loads that miss the L1D and the address is located
+in an allocated line fill buffer and will soon be committed to cache. This
+is counting secondary L1D misses.
+.It Li MEM_LOAD_RETIRED.DTLB_MISS
+.Pq Event CBH , Umask 80H
+Counts the number of retired loads that missed the DTLB. The DTLB miss is
+not counted if the load operation causes a fault. This event counts loads
+from cacheable memory only. The event does not count loads by software
+prefetches. Counts both primary and secondary misses to the TLB.
+.It Li FP_MMX_TRANS.TO_FP
+.Pq Event CCH , Umask 01H
+Counts the first floating-point instruction following any MMX instruction.
+You can use this event to estimate the penalties for the transitions between
+floating-point and MMX technology states.
+.It Li FP_MMX_TRANS.TO_MMX
+.Pq Event CCH , Umask 02H
+Counts the first MMX instruction following a floating-point instruction. You
+can use this event to estimate the penalties for the transitions between
+floating-point and MMX technology states.
+.It Li FP_MMX_TRANS.ANY
+.Pq Event CCH , Umask 03H
+Counts all transitions from floating point to MMX instructions and from MMX
+instructions to floating point instructions. You can use this event to
+estimate the penalties for the transitions between floating-point and MMX
+technology states.
+.It Li MACRO_INSTS.DECODED
+.Pq Event D0H , Umask 01H
+Counts the number of instructions decoded, (but not necessarily executed or
+retired).
+.It Li UOPS_DECODED.STALL_CYCLES
+.Pq Event D1H , Umask 01H
+Counts the cycles of decoder stalls.
+.It Li UOPS_DECODED.MS
+.Pq Event D1H , Umask 02H
+Counts the number of Uops decoded by the Microcode Sequencer, MS. The MS
+delivers uops when the instruction is more than 4 uops long or a microcode
+assist is occurring.
+.It Li UOPS_DECODED.ESP_FOLDING
+.Pq Event D1H , Umask 04H
+Counts number of stack pointer (ESP) instructions decoded: push , pop , call
+, ret, etc. ESP instructions do not generate a Uop to increment or decrement
+ESP. Instead, they update an ESP_Offset register that keeps track of the
+delta to the current value of the ESP register.
+.It Li UOPS_DECODED.ESP_SYNC
+.Pq Event D1H , Umask 08H
+Counts number of stack pointer (ESP) sync operations where an ESP
+instruction is corrected by adding the ESP offset register to the current
+value of the ESP register.
+.It Li RAT_STALLS.FLAGS
+.Pq Event D2H , Umask 01H
+Counts the number of cycles during which execution stalled due to several
+reasons, one of which is a partial flag register stall. A partial register
+stall may occur when two conditions are met: 1) an instruction modifies
+some, but not all, of the flags in the flag register and 2) the next
+instruction, which depends on flags, depends on flags that were not modified
+by this instruction.
+.It Li RAT_STALLS.REGISTERS
+.Pq Event D2H , Umask 02H
+This event counts the number of cycles instruction execution latency became
+longer than the defined latency because the instruction used a register that
+was partially written by previous instruction.
+.It Li RAT_STALLS.ROB_READ_PORT
+.Pq Event D2H , Umask 04H
+Counts the number of cycles when ROB read port stalls occurred, which did
+not allow new micro-ops to enter the out-of-order pipeline. Note that, at
+this stage in the pipeline, additional stalls may occur at the same cycle
+and prevent the stalled micro-ops from entering the pipe. In such a case,
+micro-ops retry entering the execution pipe in the next cycle and the
+ROB-read port stall is counted again.
+.It Li RAT_STALLS.SCOREBOARD
+.Pq Event D2H , Umask 08H
+Counts the cycles where we stall due to microarchitecturally required
+serialization. Microcode scoreboarding stalls.
+.It Li RAT_STALLS.ANY
+.Pq Event D2H , Umask 0FH
+Counts all Register Allocation Table stall cycles due to: Cycles when ROB
+read port stalls occurred, which did not allow new micro-ops to enter the
+execution pipe. Cycles when partial register stalls occurred Cycles when
+flag stalls occurred Cycles floating-point unit (FPU) status word stalls
+occurred. To count each of these conditions separately use the events:
+RAT_STALLS.ROB_READ_PORT, RAT_STALLS.PARTIAL, RAT_STALLS.FLAGS, and
+RAT_STALLS.FPSW.
+.It Li SEG_RENAME_STALLS
+.Pq Event D4H , Umask 01H
+Counts the number of stall cycles due to the lack of renaming resources for
+the ES, DS, FS, and GS segment registers. If a segment is renamed but not
+retired and a second update to the same segment occurs, a stall occurs in
+the front- end of the pipeline until the renamed segment retires.
+.It Li ES_REG_RENAMES
+.Pq Event D5H , Umask 01H
+Counts the number of times the ES segment register is renamed.
+.It Li UOP_UNFUSION
+.Pq Event DBH , Umask 01H
+Counts unfusion events due to floating point exception to a fused uop.
+.It Li BR_INST_DECODED
+.Pq Event E0H , Umask 01H
+Counts the number of branch instructions decoded.
+.It Li BPU_MISSED_CALL_RET
+.Pq Event E5H , Umask 01H
+Counts number of times the Branch Prediciton Unit missed predicting a call
+or return branch.
+.It Li BACLEAR.CLEAR
+.Pq Event E6H , Umask 01H
+Counts the number of times the front end is resteered, mainly when the
+Branch Prediction Unit cannot provide a correct prediction and this is
+corrected by the Branch Address Calculator at the front end. This can occur
+if the code has many branches such that they cannot be consumed by the BPU.
+Each BACLEAR asserted by the BAC generates approximately an 8 cycle bubble
+in the instruction fetch pipeline. The effect on total execution time
+depends on the surrounding code.
+.It Li BACLEAR.BAD_TARGET
+.Pq Event E6H , Umask 02H
+Counts number of Branch Address Calculator clears (BACLEAR) asserted due to
+conditional branch instructions in which there was a target hit but the
+direction was wrong. Each BACLEAR asserted by the BAC generates
+approximately an 8 cycle bubble in the instruction fetch pipeline.
+.It Li BPU_CLEARS.EARLY
+.Pq Event E8H , Umask 01H
+Counts early (normal) Branch Prediction Unit clears: BPU predicted a taken
+branch after incorrectly assuming that it was not taken.
+The BPU clear leads to 2 cycle bubble in the Front End.
+.It Li BPU_CLEARS.LATE
+.Pq Event E8H , Umask 02H
+Counts late Branch Prediction Unit clears due to Most Recently Used
+conflicts. The PBU clear leads to a 3 cycle bubble in the Front End.
+.It Li THREAD_ACTIVE
+.Pq Event ECH , Umask 01H
+Counts cycles threads are active.
+.It Li L2_TRANSACTIONS.LOAD
+.Pq Event F0H , Umask 01H
+Counts L2 load operations due to HW prefetch or demand loads.
+.It Li L2_TRANSACTIONS.RFO
+.Pq Event F0H , Umask 02H
+Counts L2 RFO operations due to HW prefetch or demand RFOs.
+.It Li L2_TRANSACTIONS.IFETCH
+.Pq Event F0H , Umask 04H
+Counts L2 instruction fetch operations due to HW prefetch or demand ifetch.
+.It Li L2_TRANSACTIONS.PREFETCH
+.Pq Event F0H , Umask 08H
+Counts L2 prefetch operations.
+.It Li L2_TRANSACTIONS.L1D_WB
+.Pq Event F0H , Umask 10H
+Counts L1D writeback operations to the L2.
+.It Li L2_TRANSACTIONS.FILL
+.Pq Event F0H , Umask 20H
+Counts L2 cache line fill operations due to load, RFO, L1D writeback or
+prefetch.
+.It Li L2_TRANSACTIONS.WB
+.Pq Event F0H , Umask 40H
+Counts L2 writeback operations to the L3.
+.It Li L2_TRANSACTIONS.ANY
+.Pq Event F0H , Umask 80H
+Counts all L2 cache operations.
+.It Li L2_LINES_IN.S_STATE
+.Pq Event F1H , Umask 02H
+Counts the number of cache lines allocated in the L2 cache in the S (shared)
+state.
+.It Li L2_LINES_IN.E_STATE
+.Pq Event F1H , Umask 04H
+Counts the number of cache lines allocated in the L2 cache in the E
+(exclusive) state.
+.It Li L2_LINES_IN.ANY
+.Pq Event F1H , Umask 07H
+Counts the number of cache lines allocated in the L2 cache.
+.It Li L2_LINES_OUT.DEMAND_CLEAN
+.Pq Event F2H , Umask 01H
+Counts L2 clean cache lines evicted by a demand request.
+.It Li L2_LINES_OUT.DEMAND_DIRTY
+.Pq Event F2H , Umask 02H
+Counts L2 dirty (modified) cache lines evicted by a demand request.
+.It Li L2_LINES_OUT.PREFETCH_CLEAN
+.Pq Event F2H , Umask 04H
+Counts L2 clean cache line evicted by a prefetch request.
+.It Li L2_LINES_OUT.PREFETCH_DIRTY
+.Pq Event F2H , Umask 08H
+Counts L2 modified cache line evicted by a prefetch request.
+.It Li L2_LINES_OUT.ANY
+.Pq Event F2H , Umask 0FH
+Counts all L2 cache lines evicted for any reason.
+.It Li SQ_MISC.LRU_HINTS
+.Pq Event F4H , Umask 04H
+Counts number of Super Queue LRU hints sent to L3.
+.It Li SQ_MISC.SPLIT_LOCK
+.Pq Event F4H , Umask 10H
+Counts the number of SQ lock splits across a cache line.
+.It Li SQ_FULL_STALL_CYCLES
+.Pq Event F6H , Umask 01H
+Counts cycles the Super Queue is full. Neither of the threads on this core
+will be able to access the uncore.
+.It Li FP_ASSIST.ALL
+.Pq Event F7H , Umask 01H
+Counts the number of floating point operations executed that required
+micro-code assist intervention. Assists are required in the following cases:
+SSE instructions, (Denormal input when the DAZ flag is off or Underflow
+result when the FTZ flag is off): x87 instructions, (NaN or denormal are
+loaded to a register or used as input from memory, Division by 0 or
+Underflow output).
+.It Li FP_ASSIST.OUTPUT
+.Pq Event F7H , Umask 02H
+Counts number of floating point micro-code assist when the output value
+(destination register) is invalid.
+.It Li FP_ASSIST.INPUT
+.Pq Event F7H , Umask 04H
+Counts number of floating point micro-code assist when the input value (one
+of the source operands to an FP instruction) is invalid.
+.It Li SIMD_INT_64.PACKED_MPY
+.Pq Event FDH , Umask 01H
+Counts number of SID integer 64 bit packed multiply operations.
+.It Li SIMD_INT_64.PACKED_SHIFT
+.Pq Event FDH , Umask 02H
+Counts number of SID integer 64 bit packed shift operations.
+.It Li SIMD_INT_64.PACK
+.Pq Event FDH , Umask 04H
+Counts number of SID integer 64 bit pack operations.
+.It Li SIMD_INT_64.UNPACK
+.Pq Event FDH , Umask 08H
+Counts number of SID integer 64 bit unpack operations.
+.It Li SIMD_INT_64.PACKED_LOGICAL
+.Pq Event FDH , Umask 10H
+Counts number of SID integer 64 bit logical operations.
+.It Li SIMD_INT_64.PACKED_ARITH
+.Pq Event FDH , Umask 20H
+Counts number of SID integer 64 bit arithmetic operations.
+.It Li SIMD_INT_64.SHUFFLE_MOVE
+.Pq Event FDH , Umask 40H
+Counts number of SID integer 64 bit shift or move operations.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.ucf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.corei7 3 ,
+.Xr pmc.corei7uc 3 ,
+.Xr pmc.westmereuc 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
diff --git a/lib/libpmc/pmc.westmereuc.3 b/lib/libpmc/pmc.westmereuc.3
new file mode 100644
index 0000000..c5fd12d
--- /dev/null
+++ b/lib/libpmc/pmc.westmereuc.3
@@ -0,0 +1,1083 @@
+.\" Copyright (c) 2010 Fabien Thomas.  All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed.  in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd March 24, 2010
+.Os
+.Dt PMC.WESTMEREUC 3
+.Sh NAME
+.Nm pmc.westmere
+.Nd uncore measurement events for
+.Tn Intel
+.Tn Westmere
+family CPUs
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmc.h
+.Sh DESCRIPTION
+.Tn Intel
+.Tn "Westmere"
+CPUs contain PMCs conforming to version 2 of the
+.Tn Intel
+performance measurement architecture.
+These CPUs contain two classes of PMCs:
+.Bl -tag -width "Li PMC_CLASS_UCP"
+.It Li PMC_CLASS_UCF
+Fixed-function counters that count only one hardware event per counter.
+.It Li PMC_CLASS_UCP
+Programmable counters that may be configured to count one of a defined
+set of hardware events.
+.El
+.Pp
+The number of PMCs available in each class and their widths need to be
+determined at run time by calling
+.Xr pmc_cpuinfo 3 .
+.Pp
+Intel Westmere PMCs are documented in
+.Rs
+.%B "Intel(R) 64 and IA-32 Architectures Software Developes Manual"
+.%T "Volume 3B: System Programming Guide, Part 2"
+.%N "Order Number: 253669-033US"
+.%D December 2009
+.%Q "Intel Corporation"
+.Re
+.Ss WESTMERE UNCORE FIXED FUNCTION PMCS
+These PMCs and their supported events are documented in
+.Xr pmc.ucf 3 .
+Not all CPUs in this family implement fixed-function counters.
+.Ss WESTMERE UNCORE PROGRAMMABLE PMCS
+The programmable PMCs support the following capabilities:
+.Bl -column "PMC_CAP_INTERRUPT" "Support"
+.It Em Capability Ta Em Support
+.It PMC_CAP_CASCADE Ta \&No
+.It PMC_CAP_EDGE Ta Yes
+.It PMC_CAP_INTERRUPT Ta \&No
+.It PMC_CAP_INVERT Ta Yes
+.It PMC_CAP_READ Ta Yes
+.It PMC_CAP_PRECISE Ta \&No
+.It PMC_CAP_SYSTEM Ta \&No
+.It PMC_CAP_TAGGING Ta \&No
+.It PMC_CAP_THRESHOLD Ta Yes
+.It PMC_CAP_USER Ta \&No
+.It PMC_CAP_WRITE Ta Yes
+.El
+.Ss Event Qualifiers
+Event specifiers for these PMCs support the following common
+qualifiers:
+.Bl -tag -width indent
+.It Li cmask= Ns Ar value
+Configure the PMC to increment only if the number of configured
+events measured in a cycle is greater than or equal to
+.Ar value .
+.It Li edge
+Configure the PMC to count the number of de-asserted to asserted
+transitions of the conditions expressed by the other qualifiers.
+If specified, the counter will increment only once whenever a
+condition becomes true, irrespective of the number of clocks during
+which the condition remains true.
+.It Li inv
+Invert the sense of comparison when the
+.Dq Li cmask
+qualifier is present, making the counter increment when the number of
+events per cycle is less than the value specified by the
+.Dq Li cmask
+qualifier.
+.El
+.Ss Event Specifiers (Programmable PMCs)
+Westmere uncore programmable PMCs support the following events:
+.Bl -tag -width indent
+.It Li GQ_CYCLES_FULL.READ_TRACKER
+.Pq Event 00H , Umask 01H
+Uncore cycles Global Queue read tracker is full.
+.It Li GQ_CYCLES_FULL.WRITE_TRACKER
+.Pq Event 00H , Umask 02H
+Uncore cycles Global Queue write tracker is full.
+.It Li GQ_CYCLES_FULL.PEER_PROBE_TRACKER
+.Pq Event 00H , Umask 04H
+Uncore cycles Global Queue peer probe tracker is full. The peer probe
+tracker queue tracks snoops from the IOH and remote sockets.
+.It Li GQ_CYCLES_NOT_EMPTY.READ_TRACKER
+.Pq Event 01H , Umask 01H
+Uncore cycles were Global Queue read tracker has at least one valid entry.
+.It Li GQ_CYCLES_NOT_EMPTY.WRITE_TRACKER
+.Pq Event 01H , Umask 02H
+Uncore cycles were Global Queue write tracker has at least one valid entry.
+.It Li GQ_CYCLES_NOT_EMPTY.PEER_PROBE_TRACKER
+.Pq Event 01H , Umask 04H
+Uncore cycles were Global Queue peer probe tracker has at least one valid
+entry. The peer probe tracker queue tracks IOH and remote socket snoops.
+.It Li GQ_OCCUPANCY.READ_TRACKER
+.Pq Event 02H , Umask 01H
+Increments the number of queue entries (code read, data read, and RFOs) in
+the tread tracker. The GQ read tracker allocate to deallocate occupancy
+count is divided by the count to obtain the average read tracker latency.
+.It Li GQ_ALLOC.READ_TRACKER
+.Pq Event 03H , Umask 01H
+Counts the number of tread tracker allocate to deallocate entries. The GQ
+read tracker allocate to deallocate occupancy count is divided by the count
+to obtain the average read tracker latency.
+.It Li GQ_ALLOC.RT_L3_MISS
+.Pq Event 03H , Umask 02H
+Counts the number GQ read tracker entries for which a full cache line read
+has missed the L3. The GQ read tracker L3 miss to fill occupancy count is
+divided by this count to obtain the average cache line read L3 miss latency.
+The latency represents the time after which the L3 has determined that the
+cache line has missed. The time between a GQ read tracker allocation and the
+L3 determining that the cache line has missed is the average L3 hit latency.
+The total L3 cache line read miss latency is the hit latency + L3 miss
+latency.
+.It Li GQ_ALLOC.RT_TO_L3_RESP
+.Pq Event 03H , Umask 04H
+Counts the number of GQ read tracker entries that are allocated in the read
+tracker queue that hit or miss the L3. The GQ read tracker L3 hit occupancy
+count is divided by this count to obtain the average L3 hit latency.
+.It Li GQ_ALLOC.RT_TO_RTID_ACQUIRED
+.Pq Event 03H , Umask 08H
+Counts the number of GQ read tracker entries that are allocated in the read
+tracker, have missed in the L3 and have not acquired a Request Transaction
+ID.	The GQ read tracker L3 miss to RTID acquired occupancy count is
+divided by this count to obtain the average latency for a read L3 miss to
+acquire an RTID.
+.It Li GQ_ALLOC.WT_TO_RTID_ACQUIRED
+.Pq Event 03H , Umask 10H
+Counts the number of GQ write tracker entries that are allocated in the
+write tracker, have missed in the L3 and have not acquired a Request
+Transaction ID.	The GQ write tracker L3 miss to RTID occupancy count is
+divided by this count to obtain the average latency for a write L3 miss to
+acquire an RTID.
+.It Li GQ_ALLOC.WRITE_TRACKER
+.Pq Event 03H , Umask 20H
+Counts the number of GQ write tracker entries that are allocated in the
+write tracker queue that miss the L3. The GQ write tracker occupancy count
+is divided by the this count to obtain the average L3 write miss latency.
+.It Li GQ_ALLOC.PEER_PROBE_TRACKER
+.Pq Event 03H , Umask 40H
+Counts the number of GQ peer probe tracker (snoop) entries that are
+allocated in the peer probe tracker queue that miss the L3. The GQ peer
+probe occupancy count is divided by this count to obtain the average L3 peer
+probe miss latency.
+.It Li GQ_DATA.FROM_QPI
+.Pq Event 04H , Umask 01H
+Cycles Global Queue Quickpath Interface input data port is busy importing
+data from the Quickpath Interface. Each cycle the input port can transfer 8
+or 16 bytes of data.
+.It Li GQ_DATA.FROM_QMC
+.Pq Event 04H , Umask 02H
+Cycles Global Queue Quickpath Memory Interface input data port is busy
+importing data from the Quickpath Memory Interface. Each cycle the input
+port can transfer 8 or 16 bytes of data.
+.It Li GQ_DATA.FROM_L3
+.Pq Event 04H , Umask 04H
+Cycles GQ L3 input data port is busy importing data from the Last Level
+Cache. Each cycle the input port can transfer 32 bytes of data.
+.It Li GQ_DATA.FROM_CORES_02
+.Pq Event 04H , Umask 08H
+Cycles GQ Core 0 and 2 input data port is busy importing data from processor
+cores 0 and 2. Each cycle the input port can transfer 32 bytes of data.
+.It Li GQ_DATA.FROM_CORES_13
+.Pq Event 04H , Umask 10H
+Cycles GQ Core 1 and 3 input data port is busy importing data from processor
+cores 1 and 3. Each cycle the input port can transfer 32 bytes of data.
+.It Li GQ_DATA.TO_QPI_QMC
+.Pq Event 05H , Umask 01H
+Cycles GQ QPI and QMC output data port is busy sending data to the Quickpath
+Interface or Quickpath Memory Interface. Each cycle the output port can
+transfer 32 bytes of data.
+.It Li GQ_DATA.TO_L3
+.Pq Event 05H , Umask 02H
+Cycles GQ L3 output data port is busy sending data to the Last Level Cache.
+Each cycle the output port can transfer 32 bytes of data.
+.It Li GQ_DATA.TO_CORES
+.Pq Event 05H , Umask 04H
+Cycles GQ Core output data port is busy sending data to the Cores. Each
+cycle the output port can transfer 32 bytes of data.
+.It Li SNP_RESP_TO_LOCAL_HOME.I_STATE
+.Pq Event 06H , Umask 01H
+Number of snoop responses to the local home that L3 does not have the
+referenced cache line.
+.It Li SNP_RESP_TO_LOCAL_HOME.S_STATE
+.Pq Event 06H , Umask 02H
+Number of snoop responses to the local home that L3 has the referenced line
+cached in the S state.
+.It Li SNP_RESP_TO_LOCAL_HOME.FWD_S_STATE
+.Pq Event 06H , Umask 04H
+Number of responses to code or data read snoops to the local home that the
+L3 has the referenced cache line in the E state. The L3 cache line state is
+changed to the S state and the line is forwarded to the local home in the S
+state.
+.It Li SNP_RESP_TO_LOCAL_HOME.FWD_I_STATE
+.Pq Event 06H , Umask 08H
+Number of responses to read invalidate snoops to the local home that the L3
+has the referenced cache line in the M state. The L3 cache line state is
+invalidated and the line is forwarded to the local home in the M state.
+.It Li SNP_RESP_TO_LOCAL_HOME.CONFLICT
+.Pq Event 06H , Umask 10H
+Number of conflict snoop responses sent to the local home.
+.It Li SNP_RESP_TO_LOCAL_HOME.WB
+.Pq Event 06H , Umask 20H
+Number of responses to code or data read snoops to the local home that the
+L3 has the referenced line cached in the M state.
+.It Li SNP_RESP_TO_REMOTE_HOME.I_STATE
+.Pq Event 07H , Umask 01H
+Number of snoop responses to a remote home that L3 does not have the
+referenced cache line.
+.It Li SNP_RESP_TO_REMOTE_HOME.S_STATE
+.Pq Event 07H , Umask 02H
+Number of snoop responses to a remote home that L3 has the referenced line
+cached in the S state.
+.It Li SNP_RESP_TO_REMOTE_HOME.FWD_S_STATE
+.Pq Event 07H , Umask 04H
+Number of responses to code or data read snoops to a remote home that the L3
+has the referenced cache line in the E state. The L3 cache line state is
+changed to the S state and the line is forwarded to the remote home in the S
+state.
+.It Li SNP_RESP_TO_REMOTE_HOME.FWD_I_STATE
+.Pq Event 07H , Umask 08H
+Number of responses to read invalidate snoops to a remote home that the L3
+has the referenced cache line in the M state. The L3 cache line state is
+invalidated and the line is forwarded to the remote home in the M state.
+.It Li SNP_RESP_TO_REMOTE_HOME.CONFLICT
+.Pq Event 07H , Umask 10H
+Number of conflict snoop responses sent to the local home.
+.It Li SNP_RESP_TO_REMOTE_HOME.WB
+.Pq Event 07H , Umask 20H
+Number of responses to code or data read snoops to a remote home that the L3
+has the referenced line cached in the M state.
+.It Li SNP_RESP_TO_REMOTE_HOME.HITM
+.Pq Event 07H , Umask 24H
+Number of HITM snoop responses to a remote home
+.It Li L3_HITS.READ
+.Pq Event 08H , Umask 01H
+Number of code read, data read and RFO requests that hit in the L3
+.It Li L3_HITS.WRITE
+.Pq Event 08H , Umask 02H
+Number of writeback requests that hit in the L3. Writebacks from the cores
+will always result in L3 hits due to the inclusive property of the L3.
+.It Li L3_HITS.PROBE
+.Pq Event 08H , Umask 04H
+Number of snoops from IOH or remote sockets that hit in the L3.
+.It Li L3_HITS.ANY
+.Pq Event 08H , Umask 03H
+Number of reads and writes that hit the L3.
+.It Li L3_MISS.READ
+.Pq Event 09H , Umask 01H
+Number of code read, data read and RFO requests that miss the L3.
+.It Li L3_MISS.WRITE
+.Pq Event 09H , Umask 02H
+Number of writeback requests that miss the L3. Should always be zero as
+writebacks from the cores will always result in L3 hits due to the inclusive
+property of the L3.
+.It Li L3_MISS.PROBE
+.Pq Event 09H , Umask 04H
+Number of snoops from IOH or remote sockets that miss the L3.
+.It Li L3_MISS.ANY
+.Pq Event 09H , Umask 03H
+Number of reads and writes that miss the L3.
+.It Li L3_LINES_IN.M_STATE
+.Pq Event 0AH , Umask 01H
+Counts the number of L3 lines allocated in M state. The only time a cache
+line is allocated in the M state is when the line was forwarded in M state
+is forwarded due to a Snoop Read Invalidate Own request.
+.It Li L3_LINES_IN.E_STATE
+.Pq Event 0AH , Umask 02H
+Counts the number of L3 lines allocated in E state.
+.It Li L3_LINES_IN.S_STATE
+.Pq Event 0AH , Umask 04H
+Counts the number of L3 lines allocated in S state.
+.It Li L3_LINES_IN.F_STATE
+.Pq Event 0AH , Umask 08H
+Counts the number of L3 lines allocated in F state.
+.It Li L3_LINES_IN.ANY
+.Pq Event 0AH , Umask 0FH
+Counts the number of L3 lines allocated in any state.
+.It Li L3_LINES_OUT.M_STATE
+.Pq Event 0BH , Umask 01H
+Counts the number of L3 lines victimized that were in the M state. When the
+victim cache line is in M state, the line is written to its home cache agent
+which can be either local or remote.
+.It Li L3_LINES_OUT.E_STATE
+.Pq Event 0BH , Umask 02H
+Counts the number of L3 lines victimized that were in the E state.
+.It Li L3_LINES_OUT.S_STATE
+.Pq Event 0BH , Umask 04H
+Counts the number of L3 lines victimized that were in the S state.
+.It Li L3_LINES_OUT.I_STATE
+.Pq Event 0BH , Umask 08H
+Counts the number of L3 lines victimized that were in the I state.
+.It Li L3_LINES_OUT.F_STATE
+.Pq Event 0BH , Umask 10H
+Counts the number of L3 lines victimized that were in the F state.
+.It Li L3_LINES_OUT.ANY
+.Pq Event 0BH , Umask 1FH
+Counts the number of L3 lines victimized in any state.
+.It Li GQ_SNOOP.GOTO_S
+.Pq Event 0CH , Umask 01H
+Counts the number of remote snoops that have requested a cache line be set
+to the S state.
+.It Li GQ_SNOOP.GOTO_I
+.Pq Event 0CH , Umask 02H
+Counts the number of remote snoops that have requested a cache line be set
+to the I state.
+.It Li GQ_SNOOP.GOTO_S_HIT_E
+.Pq Event 0CH , Umask 04H
+Counts the number of remote snoops that have requested a cache line be set
+to the S state from E state.
+Requires writing MSR 301H with mask = 2H
+.It Li GQ_SNOOP.GOTO_S_HIT_F
+.Pq Event 0CH , Umask 04H
+Counts the number of remote snoops that have requested a cache line be set
+to the S state from F (forward) state.
+Requires writing MSR 301H with mask = 8H
+.It Li GQ_SNOOP.GOTO_S_HIT_M
+.Pq Event 0CH , Umask 04H
+Counts the number of remote snoops that have requested a cache line be set
+to the S state from M state.
+Requires writing MSR 301H with mask = 1H
+.It Li GQ_SNOOP.GOTO_S_HIT_S
+.Pq Event 0CH , Umask 04H
+Counts the number of remote snoops that have requested a cache line be set
+to the S state from S state.
+Requires writing MSR 301H with mask = 4H
+.It Li GQ_SNOOP.GOTO_I_HIT_E
+.Pq Event 0CH , Umask 08H
+Counts the number of remote snoops that have requested a cache line be set
+to the I state from E state.
+Requires writing MSR 301H with mask = 2H
+.It Li GQ_SNOOP.GOTO_I_HIT_F
+.Pq Event 0CH , Umask 08H
+Counts the number of remote snoops that have requested a cache line be set
+to the I state from F (forward) state.
+Requires writing MSR 301H with mask = 8H
+.It Li GQ_SNOOP.GOTO_I_HIT_M
+.Pq Event 0CH , Umask 08H
+Counts the number of remote snoops that have requested a cache line be set
+to the I state from M state.
+Requires writing MSR 301H with mask = 1H
+.It Li GQ_SNOOP.GOTO_I_HIT_S
+.Pq Event 0CH , Umask 08H
+Counts the number of remote snoops that have requested a cache line be set
+to the I state from S state.
+Requires writing MSR 301H with mask = 4H
+.It Li QHL_REQUESTS.IOH_READS
+.Pq Event 20H , Umask 01H
+Counts number of Quickpath Home Logic read requests from the IOH.
+.It Li QHL_REQUESTS.IOH_WRITES
+.Pq Event 20H , Umask 02H
+Counts number of Quickpath Home Logic write requests from the IOH.
+.It Li QHL_REQUESTS.REMOTE_READS
+.Pq Event 20H , Umask 04H
+Counts number of Quickpath Home Logic read requests from a remote socket.
+.It Li QHL_REQUESTS.REMOTE_WRITES
+.Pq Event 20H , Umask 08H
+Counts number of Quickpath Home Logic write requests from a remote socket.
+.It Li QHL_REQUESTS.LOCAL_READS
+.Pq Event 20H , Umask 10H
+Counts number of Quickpath Home Logic read requests from the local socket.
+.It Li QHL_REQUESTS.LOCAL_WRITES
+.Pq Event 20H , Umask 20H
+Counts number of Quickpath Home Logic write requests from the local socket.
+.It Li QHL_CYCLES_FULL.IOH
+.Pq Event 21H , Umask 01H
+Counts uclk cycles all entries in the Quickpath Home Logic IOH are full.
+.It Li QHL_CYCLES_FULL.REMOTE
+.Pq Event 21H , Umask 02H
+Counts uclk cycles all entries in the Quickpath Home Logic remote tracker
+are full.
+.It Li QHL_CYCLES_FULL.LOCAL
+.Pq Event 21H , Umask 04H
+Counts uclk cycles all entries in the Quickpath Home Logic local tracker are
+full.
+.It Li QHL_CYCLES_NOT_EMPTY.IOH
+.Pq Event 22H , Umask 01H
+Counts uclk cycles all entries in the Quickpath Home Logic IOH is busy.
+.It Li QHL_CYCLES_NOT_EMPTY.REMOTE
+.Pq Event 22H , Umask 02H
+Counts uclk cycles all entries in the Quickpath Home Logic remote tracker is
+busy.
+.It Li QHL_CYCLES_NOT_EMPTY.LOCAL
+.Pq Event 22H , Umask 04H
+Counts uclk cycles all entries in the Quickpath Home Logic local tracker is
+busy.
+.It Li QHL_OCCUPANCY.IOH
+.Pq Event 23H , Umask 01H
+QHL IOH tracker allocate to deallocate read occupancy.
+.It Li QHL_OCCUPANCY.REMOTE
+.Pq Event 23H , Umask 02H
+QHL remote tracker allocate to deallocate read occupancy.
+.It Li QHL_OCCUPANCY.LOCAL
+.Pq Event 23H , Umask 04H
+QHL local tracker allocate to deallocate read occupancy.
+.It Li QHL_ADDRESS_CONFLICTS.2WAY
+.Pq Event 24H , Umask 02H
+Counts number of QHL Active Address Table (AAT) entries that saw a max of 2
+conflicts. The AAT is a structure that tracks requests that are in conflict.
+The requests themselves are in the home tracker entries. The count is
+reported when an AAT entry deallocates.
+.It Li QHL_ADDRESS_CONFLICTS.3WAY
+.Pq Event 24H , Umask 04H
+Counts number of QHL Active Address Table (AAT) entries that saw a max of 3
+conflicts. The AAT is a structure that tracks requests that are in conflict.
+The requests themselves are in the home tracker entries. The count is
+reported when an AAT entry deallocates.
+.It Li QHL_CONFLICT_CYCLES.IOH
+.Pq Event 25H , Umask 01H
+Counts cycles the Quickpath Home Logic IOH Tracker contains two or more
+requests with an address conflict. A max of 3 requests can be in conflict.
+.It Li QHL_CONFLICT_CYCLES.REMOTE
+.Pq Event 25H , Umask 02H
+Counts cycles the Quickpath Home Logic Remote Tracker contains two or more
+requests with an address conflict. A max of 3 requests can be in conflict.
+.It Li QHL_CONFLICT_CYCLES.LOCAL
+.Pq Event 25H , Umask 04H
+Counts cycles the Quickpath Home Logic Local Tracker contains two or more
+requests with an address conflict. A max of 3 requests can be in conflict.
+.It Li QHL_TO_QMC_BYPASS
+.Pq Event 26H , Umask 01H
+Counts number or requests to the Quickpath Memory Controller that bypass the
+Quickpath Home Logic. All local accesses can be bypassed. For remote
+requests, only read requests can be bypassed.
+.It Li QMC_ISOC_FULL.READ.CH0
+.Pq Event 28H , Umask 01H
+Counts cycles all the entries in the DRAM channel 0 high priority queue are
+occupied with isochronous read requests.
+.It Li QMC_ISOC_FULL.READ.CH1
+.Pq Event 28H , Umask 02H
+Counts cycles all the entries in the DRAM channel 1high priority queue are
+occupied with isochronous read requests.
+.It Li QMC_ISOC_FULL.READ.CH2
+.Pq Event 28H , Umask 04H
+Counts cycles all the entries in the DRAM channel 2 high priority queue are
+occupied with isochronous read requests.
+.It Li QMC_ISOC_FULL.WRITE.CH0
+.Pq Event 28H , Umask 08H
+Counts cycles all the entries in the DRAM channel 0 high priority queue are
+occupied with isochronous write requests.
+.It Li QMC_ISOC_FULL.WRITE.CH1
+.Pq Event 28H , Umask 10H
+Counts cycles all the entries in the DRAM channel 1 high priority queue are
+occupied with isochronous write requests.
+.It Li QMC_ISOC_FULL.WRITE.CH2
+.Pq Event 28H , Umask 20H
+Counts cycles all the entries in the DRAM channel 2 high priority queue are
+occupied with isochronous write requests.
+.It Li QMC_BUSY.READ.CH0
+.Pq Event 29H , Umask 01H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+read request to DRAM channel 0.
+.It Li QMC_BUSY.READ.CH1
+.Pq Event 29H , Umask 02H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+read request to DRAM channel 1.
+.It Li QMC_BUSY.READ.CH2
+.Pq Event 29H , Umask 04H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+read request to DRAM channel 2.
+.It Li QMC_BUSY.WRITE.CH0
+.Pq Event 29H , Umask 08H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+write request to DRAM channel 0.
+.It Li QMC_BUSY.WRITE.CH1
+.Pq Event 29H , Umask 10H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+write request to DRAM channel 1.
+.It Li QMC_BUSY.WRITE.CH2
+.Pq Event 29H , Umask 20H
+Counts cycles where Quickpath Memory Controller has at least 1 outstanding
+write request to DRAM channel 2.
+.It Li QMC_OCCUPANCY.CH0
+.Pq Event 2AH , Umask 01H
+IMC channel 0 normal read request occupancy.
+.It Li QMC_OCCUPANCY.CH1
+.Pq Event 2AH , Umask 02H
+IMC channel 1 normal read request occupancy.
+.It Li QMC_OCCUPANCY.CH2
+.Pq Event 2AH , Umask 04H
+IMC channel 2 normal read request occupancy.
+.It Li QMC_OCCUPANCY.ANY
+.Pq Event 2AH , Umask 07H
+Normal read request occupancy for any channel.
+.It Li QMC_ISSOC_OCCUPANCY.CH0
+.Pq Event 2BH , Umask 01H
+IMC channel 0 issoc read request occupancy.
+.It Li QMC_ISSOC_OCCUPANCY.CH1
+.Pq Event 2BH , Umask 02H
+IMC channel 1 issoc read request occupancy.
+.It Li QMC_ISSOC_OCCUPANCY.CH2
+.Pq Event 2BH , Umask 04H
+IMC channel 2 issoc read request occupancy.
+.It Li QMC_ISSOC_READS.ANY
+.Pq Event 2BH , Umask 07H
+IMC issoc read request occupancy.
+.It Li QMC_NORMAL_READS.CH0
+.Pq Event 2CH , Umask 01H
+Counts the number of Quickpath Memory Controller channel 0 medium and low
+priority read requests. The QMC channel 0 normal read occupancy divided by
+this count provides the average QMC channel 0 read latency.
+.It Li QMC_NORMAL_READS.CH1
+.Pq Event 2CH , Umask 02H
+Counts the number of Quickpath Memory Controller channel 1 medium and low
+priority read requests. The QMC channel 1 normal read occupancy divided by
+this count provides the average QMC channel 1 read latency.
+.It Li QMC_NORMAL_READS.CH2
+.Pq Event 2CH , Umask 04H
+Counts the number of Quickpath Memory Controller channel 2 medium and low
+priority read requests. The QMC channel 2 normal read occupancy divided by
+this count provides the average QMC channel 2 read latency.
+.It Li QMC_NORMAL_READS.ANY
+.Pq Event 2CH , Umask 07H
+Counts the number of Quickpath Memory Controller medium and low priority
+read requests. The QMC normal read occupancy divided by this count provides
+the average QMC read latency.
+.It Li QMC_HIGH_PRIORITY_READS.CH0
+.Pq Event 2DH , Umask 01H
+Counts the number of Quickpath Memory Controller channel 0 high priority
+isochronous read requests.
+.It Li QMC_HIGH_PRIORITY_READS.CH1
+.Pq Event 2DH , Umask 02H
+Counts the number of Quickpath Memory Controller channel 1 high priority
+isochronous read requests.
+.It Li QMC_HIGH_PRIORITY_READS.CH2
+.Pq Event 2DH , Umask 04H
+Counts the number of Quickpath Memory Controller channel 2 high priority
+isochronous read requests.
+.It Li QMC_HIGH_PRIORITY_READS.ANY
+.Pq Event 2DH , Umask 07H
+Counts the number of Quickpath Memory Controller high priority isochronous
+read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.CH0
+.Pq Event 2EH , Umask 01H
+Counts the number of Quickpath Memory Controller channel 0 critical priority
+isochronous read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.CH1
+.Pq Event 2EH , Umask 02H
+Counts the number of Quickpath Memory Controller channel 1 critical priority
+isochronous read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.CH2
+.Pq Event 2EH , Umask 04H
+Counts the number of Quickpath Memory Controller channel 2 critical priority
+isochronous read requests.
+.It Li QMC_CRITICAL_PRIORITY_READS.ANY
+.Pq Event 2EH , Umask 07H
+Counts the number of Quickpath Memory Controller critical priority
+isochronous read requests.
+.It Li QMC_WRITES.FULL.CH0
+.Pq Event 2FH , Umask 01H
+Counts number of full cache line writes to DRAM channel 0.
+.It Li QMC_WRITES.FULL.CH1
+.Pq Event 2FH , Umask 02H
+Counts number of full cache line writes to DRAM channel 1.
+.It Li QMC_WRITES.FULL.CH2
+.Pq Event 2FH , Umask 04H
+Counts number of full cache line writes to DRAM channel 2.
+.It Li QMC_WRITES.FULL.ANY
+.Pq Event 2FH , Umask 07H
+Counts number of full cache line writes to DRAM.
+.It Li QMC_WRITES.PARTIAL.CH0
+.Pq Event 2FH , Umask 08H
+Counts number of partial cache line writes to DRAM channel 0.
+.It Li QMC_WRITES.PARTIAL.CH1
+.Pq Event 2FH , Umask 10H
+Counts number of partial cache line writes to DRAM channel 1.
+.It Li QMC_WRITES.PARTIAL.CH2
+.Pq Event 2FH , Umask 20H
+Counts number of partial cache line writes to DRAM channel 2.
+.It Li QMC_WRITES.PARTIAL.ANY
+.Pq Event 2FH , Umask 38H
+Counts number of partial cache line writes to DRAM.
+.It Li QMC_CANCEL.CH0
+.Pq Event 30H , Umask 01H
+Counts number of DRAM channel 0 cancel requests.
+.It Li QMC_CANCEL.CH1
+.Pq Event 30H , Umask 02H
+Counts number of DRAM channel 1 cancel requests.
+.It Li QMC_CANCEL.CH2
+.Pq Event 30H , Umask 04H
+Counts number of DRAM channel 2 cancel requests.
+.It Li QMC_CANCEL.ANY
+.Pq Event 30H , Umask 07H
+Counts number of DRAM cancel requests.
+.It Li QMC_PRIORITY_UPDATES.CH0
+.Pq Event 31H , Umask 01H
+Counts number of DRAM channel 0 priority updates. A priority update occurs
+when an ISOC high or critical request is received by the QHL and there is a
+matching request with normal priority that has already been issued to the
+QMC. In this instance, the QHL will send a priority update to QMC to
+expedite the request.
+.It Li QMC_PRIORITY_UPDATES.CH1
+.Pq Event 31H , Umask 02H
+Counts number of DRAM channel 1 priority updates. A priority update occurs
+when an ISOC high or critical request is received by the QHL and there is a
+matching request with normal priority that has already been issued to the
+QMC. In this instance, the QHL will send a priority update to QMC to
+expedite the request.
+.It Li QMC_PRIORITY_UPDATES.CH2
+.Pq Event 31H , Umask 04H
+Counts number of DRAM channel 2 priority updates. A priority update occurs
+when an ISOC high or critical request is received by the QHL and there is a
+matching request with normal priority that has already been issued to the
+QMC. In this instance, the QHL will send a priority update to QMC to
+expedite the request.
+.It Li QMC_PRIORITY_UPDATES.ANY
+.Pq Event 31H , Umask 07H
+Counts number of DRAM priority updates. A priority update occurs when an
+ISOC high or critical request is received by the QHL and there is a matching
+request with normal priority that has already been issued to the QMC. In
+this instance, the QHL will send a priority update to QMC to expedite the
+request.
+.It Li IMC_RETRY.CH0
+.Pq Event 32H , Umask 01H
+Counts number of IMC DRAM channel 0 retries. DRAM retry only occurs when
+configured in RAS mode.
+.It Li IMC_RETRY.CH1
+.Pq Event 32H , Umask 02H
+Counts number of IMC DRAM channel 1 retries. DRAM retry only occurs when
+configured in RAS mode.
+.It Li IMC_RETRY.CH2
+.Pq Event 32H , Umask 04H
+Counts number of IMC DRAM channel 2 retries. DRAM retry only occurs when
+configured in RAS mode.
+.It Li IMC_RETRY.ANY
+.Pq Event 32H , Umask 07H
+Counts number of IMC DRAM retries from any channel. DRAM retry only occurs
+when configured in RAS mode.
+.It Li QHL_FRC_ACK_CNFLTS.IOH
+.Pq Event 33H , Umask 01H
+Counts number of Force Acknowledge Conflict messages sent by the Quickpath
+Home Logic to the IOH.
+.It Li QHL_FRC_ACK_CNFLTS.REMOTE
+.Pq Event 33H , Umask 02H
+Counts number of Force Acknowledge Conflict messages sent by the Quickpath
+Home Logic to the remote home.
+.It Li QHL_FRC_ACK_CNFLTS.LOCAL
+.Pq Event 33H , Umask 04H
+Counts number of Force Acknowledge Conflict messages sent by the Quickpath
+Home Logic to the local home.
+.It Li QHL_FRC_ACK_CNFLTS.ANY
+.Pq Event 33H , Umask 07H
+Counts number of Force Acknowledge Conflict messages sent by the Quickpath
+Home Logic.
+.It Li QHL_SLEEPS.IOH_ORDER
+.Pq Event 34H , Umask 01H
+Counts number of occurrences a request was put to sleep due to IOH ordering
+(write after read) conflicts. While in the sleep state, the request is not
+eligible to be scheduled to the QMC.
+.It Li QHL_SLEEPS.REMOTE_ORDER
+.Pq Event 34H , Umask 02H
+Counts number of occurrences a request was put to sleep due to remote socket
+ordering (write after read) conflicts. While in the sleep state, the request
+is not eligible to be scheduled to the QMC.
+.It Li QHL_SLEEPS.LOCAL_ORDER
+.Pq Event 34H , Umask 04H
+Counts number of occurrences a request was put to sleep due to local socket
+ordering (write after read) conflicts. While in the sleep state, the request
+is not eligible to be scheduled to the QMC.
+.It Li QHL_SLEEPS.IOH_CONFLICT
+.Pq Event 34H , Umask 08H
+Counts number of occurrences a request was put to sleep due to IOH address
+conflicts. While in the sleep state, the request is not eligible to be
+scheduled to the QMC.
+.It Li QHL_SLEEPS.REMOTE_CONFLICT
+.Pq Event 34H , Umask 10H
+Counts number of occurrences a request was put to sleep due to remote socket
+address conflicts. While in the sleep state, the request is not eligible to
+be scheduled to the QMC.
+.It Li QHL_SLEEPS.LOCAL_CONFLICT
+.Pq Event 34H , Umask 20H
+Counts number of occurrences a request was put to sleep due to local socket
+address conflicts. While in the sleep state, the request is not eligible to
+be scheduled to the QMC.
+.It Li ADDR_OPCODE_MATCH.IOH
+.Pq Event 35H , Umask 01H
+Counts number of requests from the IOH, address/opcode of request is
+qualified by mask value written to MSR 396H. The following mask values are
+supported:
+0: NONE 40000000_00000000H:RSPFWDI 40001A00_00000000H:RSPFWDS
+40001D00_00000000H:RSPIWB
+Match opcode/addres s by writing MSR 396H with mask supported mask value
+.It Li ADDR_OPCODE_MATCH.REMOTE
+.Pq Event 35H , Umask 02H
+Counts number of requests from the remote socket, address/opcode of request
+is qualified by mask value written to MSR 396H. The following mask values
+are supported:
+0: NONE 40000000_00000000H:RSPFWDI 40001A00_00000000H:RSPFWDS
+40001D00_00000000H:RSPIWB
+Match opcode/addres s by writing MSR 396H with mask supported mask value
+.It Li ADDR_OPCODE_MATCH.LOCAL
+.Pq Event 35H , Umask 04H
+Counts number of requests from the local socket, address/opcode of request
+is qualified by mask value written to MSR 396H. The following mask values
+are supported:
+0: NONE 40000000_00000000H:RSPFWDI 40001A00_00000000H:RSPFWDS
+40001D00_00000000H:RSPIWB
+Match opcode/addres s by writing MSR 396H with mask supported mask value
+.It Li QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_0
+.Pq Event 40H , Umask 01H
+Counts cycles the Quickpath outbound link 0 HOME virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_0
+.Pq Event 40H , Umask 02H
+Counts cycles the Quickpath outbound link 0 SNOOP virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_0
+.Pq Event 40H , Umask 04H
+Counts cycles the Quickpath outbound link 0 non-data response virtual
+channel is stalled due to lack of a VNA and VN0 credit. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.HOME.LINK_1
+.Pq Event 40H , Umask 08H
+Counts cycles the Quickpath outbound link 1 HOME virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.SNOOP.LINK_1
+.Pq Event 40H , Umask 10H
+Counts cycles the Quickpath outbound link 1 SNOOP virtual channel is stalled
+due to lack of a VNA and VN0 credit. Note that this event does not filter
+out when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.NDR.LINK_1
+.Pq Event 40H , Umask 20H
+Counts cycles the Quickpath outbound link 1 non-data response virtual
+channel is stalled due to lack of a VNA and VN0 credit. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.LINK_0
+.Pq Event 40H , Umask 07H
+Counts cycles the Quickpath outbound link 0 virtual channels are stalled due
+to lack of a VNA and VN0 credit. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_SINGLE_FLIT.LINK_1
+.Pq Event 40H , Umask 38H
+Counts cycles the Quickpath outbound link 1 virtual channels are stalled due
+to lack of a VNA and VN0 credit. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_0
+.Pq Event 41H , Umask 01H
+Counts cycles the Quickpath outbound link 0 Data ResponSe virtual channel is
+stalled due to lack of VNA and VN0 credits. Note that this event does not
+filter out when a flit would not have been selected for arbitration because
+another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_0
+.Pq Event 41H , Umask 02H
+Counts cycles the Quickpath outbound link 0 Non-Coherent Bypass virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_0
+.Pq Event 41H , Umask 04H
+Counts cycles the Quickpath outbound link 0 Non-Coherent Standard virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.DRS.LINK_1
+.Pq Event 41H , Umask 08H
+Counts cycles the Quickpath outbound link 1 Data ResponSe virtual channel is
+stalled due to lack of VNA and VN0 credits. Note that this event does not
+filter out when a flit would not have been selected for arbitration because
+another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCB.LINK_1
+.Pq Event 41H , Umask 10H
+Counts cycles the Quickpath outbound link 1 Non-Coherent Bypass virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.NCS.LINK_1
+.Pq Event 41H , Umask 20H
+Counts cycles the Quickpath outbound link 1 Non-Coherent Standard virtual
+channel is stalled due to lack of VNA and VN0 credits. Note that this event
+does not filter out when a flit would not have been selected for arbitration
+because another virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.LINK_0
+.Pq Event 41H , Umask 07H
+Counts cycles the Quickpath outbound link 0 virtual channels are stalled due
+to lack of VNA and VN0 credits. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_STALLED_MULTI_FLIT.LINK_1
+.Pq Event 41H , Umask 38H
+Counts cycles the Quickpath outbound link 1 virtual channels are stalled due
+to lack of VNA and VN0 credits. Note that this event does not filter out
+when a flit would not have been selected for arbitration because another
+virtual channel is getting arbitrated.
+.It Li QPI_TX_HEADER.FULL.LINK_0
+.Pq Event 42H , Umask 01H
+Number of cycles that the header buffer in the Quickpath Interface outbound
+link 0 is full.
+.It Li QPI_TX_HEADER.BUSY.LINK_0
+.Pq Event 42H , Umask 02H
+Number of cycles that the header buffer in the Quickpath Interface outbound
+link 0 is busy.
+.It Li QPI_TX_HEADER.FULL.LINK_1
+.Pq Event 42H , Umask 04H
+Number of cycles that the header buffer in the Quickpath Interface outbound
+link 1 is full.
+.It Li QPI_TX_HEADER.BUSY.LINK_1
+.Pq Event 42H , Umask 08H
+Number of cycles that the header buffer in the Quickpath Interface outbound
+link 1 is busy.
+.It Li QPI_RX_NO_PPT_CREDIT.STALLS.LINK_0
+.Pq Event 43H , Umask 01H
+Number of cycles that snoop packets incoming to the Quickpath Interface link
+0 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT)
+does not have any available entries.
+.It Li QPI_RX_NO_PPT_CREDIT.STALLS.LINK_1
+.Pq Event 43H , Umask 02H
+Number of cycles that snoop packets incoming to the Quickpath Interface link
+1 are stalled and not sent to the GQ because the GQ Peer Probe Tracker (PPT)
+does not have any available entries.
+.It Li DRAM_OPEN.CH0
+.Pq Event 60H , Umask 01H
+Counts number of DRAM Channel 0 open commands issued either for read or
+write. To read or write data, the referenced DRAM page must first be opened.
+.It Li DRAM_OPEN.CH1
+.Pq Event 60H , Umask 02H
+Counts number of DRAM Channel 1 open commands issued either for read or
+write. To read or write data, the referenced DRAM page must first be opened.
+.It Li DRAM_OPEN.CH2
+.Pq Event 60H , Umask 04H
+Counts number of DRAM Channel 2 open commands issued either for read or
+write. To read or write data, the referenced DRAM page must first be opened.
+.It Li DRAM_PAGE_CLOSE.CH0
+.Pq Event 61H , Umask 01H
+DRAM channel 0 command issued to CLOSE a page due to page idle timer
+expiration. Closing a page is done by issuing a precharge.
+.It Li DRAM_PAGE_CLOSE.CH1
+.Pq Event 61H , Umask 02H
+DRAM channel 1 command issued to CLOSE a page due to page idle timer
+expiration. Closing a page is done by issuing a precharge.
+.It Li DRAM_PAGE_CLOSE.CH2
+.Pq Event 61H , Umask 04H
+DRAM channel 2 command issued to CLOSE a page due to page idle timer
+expiration. Closing a page is done by issuing a precharge.
+.It Li DRAM_PAGE_MISS.CH0
+.Pq Event 62H , Umask 01H
+Counts the number of precharges (PRE) that were issued to DRAM channel 0
+because there was a page miss. A page miss refers to a situation in which a
+page is currently open and another page from the same bank needs to be
+opened. The new page experiences a page miss. Closing of the old page is
+done by issuing a precharge.
+.It Li DRAM_PAGE_MISS.CH1
+.Pq Event 62H , Umask 02H
+Counts the number of precharges (PRE) that were issued to DRAM channel 1
+because there was a page miss. A page miss refers to a situation in which a
+page is currently open and another page from the same bank needs to be
+opened. The new page experiences a page miss. Closing of the old page is
+done by issuing a precharge.
+.It Li DRAM_PAGE_MISS.CH2
+.Pq Event 62H , Umask 04H
+Counts the number of precharges (PRE) that were issued to DRAM channel 2
+because there was a page miss. A page miss refers to a situation in which a
+page is currently open and another page from the same bank needs to be
+opened. The new page experiences a page miss. Closing of the old page is
+done by issuing a precharge.
+.It Li DRAM_READ_CAS.CH0
+.Pq Event 63H , Umask 01H
+Counts the number of times a read CAS command was issued on DRAM channel 0.
+.It Li DRAM_READ_CAS.AUTOPRE_CH0
+.Pq Event 63H , Umask 02H
+Counts the number of times a read CAS command was issued on DRAM channel 0
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_READ_CAS.CH1
+.Pq Event 63H , Umask 04H
+Counts the number of times a read CAS command was issued on DRAM channel 1.
+.It Li DRAM_READ_CAS.AUTOPRE_CH1
+.Pq Event 63H , Umask 08H
+Counts the number of times a read CAS command was issued on DRAM channel 1
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_READ_CAS.CH2
+.Pq Event 63H , Umask 10H
+Counts the number of times a read CAS command was issued on DRAM channel 2.
+.It Li DRAM_READ_CAS.AUTOPRE_CH2
+.Pq Event 63H , Umask 20H
+Counts the number of times a read CAS command was issued on DRAM channel 2
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_WRITE_CAS.CH0
+.Pq Event 64H , Umask 01H
+Counts the number of times a write CAS command was issued on DRAM channel 0.
+.It Li DRAM_WRITE_CAS.AUTOPRE_CH0
+.Pq Event 64H , Umask 02H
+Counts the number of times a write CAS command was issued on DRAM channel 0
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_WRITE_CAS.CH1
+.Pq Event 64H , Umask 04H
+Counts the number of times a write CAS command was issued on DRAM channel 1.
+.It Li DRAM_WRITE_CAS.AUTOPRE_CH1
+.Pq Event 64H , Umask 08H
+Counts the number of times a write CAS command was issued on DRAM channel 1
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_WRITE_CAS.CH2
+.Pq Event 64H , Umask 10H
+Counts the number of times a write CAS command was issued on DRAM channel 2.
+.It Li DRAM_WRITE_CAS.AUTOPRE_CH2
+.Pq Event 64H , Umask 20H
+Counts the number of times a write CAS command was issued on DRAM channel 2
+where the command issued used the auto-precharge (auto page close) mode.
+.It Li DRAM_REFRESH.CH0
+.Pq Event 65H , Umask 01H
+Counts number of DRAM channel 0 refresh commands. DRAM loses data content
+over time. In order to keep correct data content, the data values have to be
+refreshed periodically.
+.It Li DRAM_REFRESH.CH1
+.Pq Event 65H , Umask 02H
+Counts number of DRAM channel 1 refresh commands. DRAM loses data content
+over time. In order to keep correct data content, the data values have to be
+refreshed periodically.
+.It Li DRAM_REFRESH.CH2
+.Pq Event 65H , Umask 04H
+Counts number of DRAM channel 2 refresh commands. DRAM loses data content
+over time. In order to keep correct data content, the data values have to be
+refreshed periodically.
+.It Li DRAM_PRE_ALL.CH0
+.Pq Event 66H , Umask 01H
+Counts number of DRAM Channel 0 precharge-all (PREALL) commands that close
+all open pages in a rank. PREALL is issued when the DRAM needs to be
+refreshed or needs to go into a power down mode.
+.It Li DRAM_PRE_ALL.CH1
+.Pq Event 66H , Umask 02H
+Counts number of DRAM Channel 1 precharge-all (PREALL) commands that close
+all open pages in a rank. PREALL is issued when the DRAM needs to be
+refreshed or needs to go into a power down mode.
+.It Li DRAM_PRE_ALL.CH2
+.Pq Event 66H , Umask 04H
+Counts number of DRAM Channel 2 precharge-all (PREALL) commands that close
+all open pages in a rank. PREALL is issued when the DRAM needs to be
+refreshed or needs to go into a power down mode.
+.It Li DRAM_THERMAL_THROTTLED
+.Pq Event 67H , Umask 01H
+Uncore cycles DRAM was throttled due to its temperature being above the
+thermal throttling threshold.
+.It Li THERMAL_THROTTLING_TEMP.CORE_0
+.Pq Event 80H , Umask 01H
+Cycles that the PCU records that core 0 is above the thermal throttling
+threshold temperature.
+.It Li THERMAL_THROTTLING_TEMP.CORE_1
+.Pq Event 80H , Umask 02H
+Cycles that the PCU records that core 1 is above the thermal throttling
+threshold temperature.
+.It Li THERMAL_THROTTLING_TEMP.CORE_2
+.Pq Event 80H , Umask 04H
+Cycles that the PCU records that core 2 is above the thermal throttling
+threshold temperature.
+.It Li THERMAL_THROTTLING_TEMP.CORE_3
+.Pq Event 80H , Umask 08H
+Cycles that the PCU records that core 3 is above the thermal throttling
+threshold temperature.
+.It Li THERMAL_THROTTLED_TEMP.CORE_0
+.Pq Event 81H , Umask 01H
+Cycles that the PCU records that core 0 is in the power throttled state due
+to cores temperature being above the thermal throttling threshold.
+.It Li THERMAL_THROTTLED_TEMP.CORE_1
+.Pq Event 81H , Umask 02H
+Cycles that the PCU records that core 1 is in the power throttled state due
+to cores temperature being above the thermal throttling threshold.
+.It Li THERMAL_THROTTLED_TEMP.CORE_2
+.Pq Event 81H , Umask 04H
+Cycles that the PCU records that core 2 is in the power throttled state due
+to cores temperature being above the thermal throttling threshold.
+.It Li THERMAL_THROTTLED_TEMP.CORE_3
+.Pq Event 81H , Umask 08H
+Cycles that the PCU records that core 3 is in the power throttled state due
+to cores temperature being above the thermal throttling threshold.
+.It Li PROCHOT_ASSERTION
+.Pq Event 82H , Umask 01H
+Number of system assertions of PROCHOT indicating the entire processor has
+exceeded the thermal limit.
+.It Li THERMAL_THROTTLING_PROCHOT.CORE_0
+.Pq Event 83H , Umask 01H
+Cycles that the PCU records that core 0 is a low power state due to the
+system asserting PROCHOT the entire processor has exceeded the thermal
+limit.
+.It Li THERMAL_THROTTLING_PROCHOT.CORE_1
+.Pq Event 83H , Umask 02H
+Cycles that the PCU records that core 1 is a low power state due to the
+system asserting PROCHOT the entire processor has exceeded the thermal
+limit.
+.It Li THERMAL_THROTTLING_PROCHOT.CORE_2
+.Pq Event 83H , Umask 04H
+Cycles that the PCU records that core 2 is a low power state due to the
+system asserting PROCHOT the entire processor has exceeded the thermal
+limit.
+.It Li THERMAL_THROTTLING_PROCHOT.CORE_3
+.Pq Event 83H , Umask 08H
+Cycles that the PCU records that core 3 is a low power state due to the
+system asserting PROCHOT the entire processor has exceeded the thermal
+limit.
+.It Li TURBO_MODE.CORE_0
+.Pq Event 84H , Umask 01H
+Uncore cycles that core 0 is operating in turbo mode.
+.It Li TURBO_MODE.CORE_1
+.Pq Event 84H , Umask 02H
+Uncore cycles that core 1 is operating in turbo mode.
+.It Li TURBO_MODE.CORE_2
+.Pq Event 84H , Umask 04H
+Uncore cycles that core 2 is operating in turbo mode.
+.It Li TURBO_MODE.CORE_3
+.Pq Event 84H , Umask 08H
+Uncore cycles that core 3 is operating in turbo mode.
+.It Li CYCLES_UNHALTED_L3_FLL_ENABLE
+.Pq Event 85H , Umask 02H
+Uncore cycles that at least one core is unhalted and all L3 ways are
+enabled.
+.It Li CYCLES_UNHALTED_L3_FLL_DISABLE
+.Pq Event 86H , Umask 01H
+Uncore cycles that at least one core is unhalted and all L3 ways are
+disabled.
+.El
+.Sh SEE ALSO
+.Xr pmc 3 ,
+.Xr pmc.atom 3 ,
+.Xr pmc.core 3 ,
+.Xr pmc.iaf 3 ,
+.Xr pmc.ucf 3 ,
+.Xr pmc.k7 3 ,
+.Xr pmc.k8 3 ,
+.Xr pmc.p4 3 ,
+.Xr pmc.p5 3 ,
+.Xr pmc.p6 3 ,
+.Xr pmc.corei7 3 ,
+.Xr pmc.corei7uc 3 ,
+.Xr pmc.westmere 3 ,
+.Xr pmc.tsc 3 ,
+.Xr pmc_cpuinfo 3 ,
+.Xr pmclog 3 ,
+.Xr hwpmc 4
+.Sh HISTORY
+The
+.Nm pmc
+library first appeared in
+.Fx 6.0 .
+.Sh AUTHORS
+The
+.Lb libpmc
+library was written by
+.An "Joseph Koshy"
+.Aq jkoshy@FreeBSD.org .
author	fabient <fabient@FreeBSD.org>	2010-04-02 13:23:49 +0000
committer	fabient <fabient@FreeBSD.org>	2010-04-02 13:23:49 +0000
commit	85d5b2855f1b8db1aa9a2dd7945b711399a111b7 (patch)
tree	74703b81e372faa288cd54560b3715fd2bc1e0ce /lib/libpmc
parent	0e3cec01fc03abe666c04e3bfb10c74c603bc896 (diff)
download	FreeBSD-src-85d5b2855f1b8db1aa9a2dd7945b711399a111b7.zip FreeBSD-src-85d5b2855f1b8db1aa9a2dd7945b711399a111b7.tar.gz