diff options
Diffstat (limited to 'lib/libpmc')
-rw-r--r-- | lib/libpmc/libpmc.c | 206 | ||||
-rw-r--r-- | lib/libpmc/pmc.3 | 1 | ||||
-rw-r--r-- | lib/libpmc/pmc.corei7.3 | 8 | ||||
-rw-r--r-- | lib/libpmc/pmc.corei7uc.3 | 8 | ||||
-rw-r--r-- | lib/libpmc/pmc.haswell.3 | 153 | ||||
-rw-r--r-- | lib/libpmc/pmc.haswellxeon.3 | 956 | ||||
-rw-r--r-- | lib/libpmc/pmc.ivybridge.3 | 141 | ||||
-rw-r--r-- | lib/libpmc/pmc.ivybridgexeon.3 | 141 | ||||
-rw-r--r-- | lib/libpmc/pmc.sandybridge.3 | 100 | ||||
-rw-r--r-- | lib/libpmc/pmc.sandybridgexeon.3 | 112 | ||||
-rw-r--r-- | lib/libpmc/pmc.soft.3 | 8 | ||||
-rw-r--r-- | lib/libpmc/pmc.ucf.3 | 8 | ||||
-rw-r--r-- | lib/libpmc/pmc.westmere.3 | 8 | ||||
-rw-r--r-- | lib/libpmc/pmc.westmereuc.3 | 8 | ||||
-rw-r--r-- | lib/libpmc/pmc.xscale.3 | 2 | ||||
-rw-r--r-- | lib/libpmc/pmc_capabilities.3 | 2 |
16 files changed, 1452 insertions, 410 deletions
diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c index 5282bb2..d9d7902 100644 --- a/lib/libpmc/libpmc.c +++ b/lib/libpmc/libpmc.c @@ -74,10 +74,18 @@ static int p6_allocate_pmc(enum pmc_event _pe, char *_ctrspec, static int tsc_allocate_pmc(enum pmc_event _pe, char *_ctrspec, struct pmc_op_pmcallocate *_pmc_config); #endif +#if defined(__arm__) #if defined(__XSCALE__) static int xscale_allocate_pmc(enum pmc_event _pe, char *_ctrspec, struct pmc_op_pmcallocate *_pmc_config); #endif +static int armv7_allocate_pmc(enum pmc_event _pe, char *_ctrspec, + struct pmc_op_pmcallocate *_pmc_config); +#endif +#if defined(__aarch64__) +static int arm64_allocate_pmc(enum pmc_event _pe, char *_ctrspec, + struct pmc_op_pmcallocate *_pmc_config); +#endif #if defined(__mips__) static int mips_allocate_pmc(enum pmc_event _pe, char* ctrspec, struct pmc_op_pmcallocate *_pmc_config); @@ -153,11 +161,15 @@ PMC_CLASSDEP_TABLE(p4, P4); PMC_CLASSDEP_TABLE(p5, P5); PMC_CLASSDEP_TABLE(p6, P6); PMC_CLASSDEP_TABLE(xscale, XSCALE); +PMC_CLASSDEP_TABLE(armv7, ARMV7); +PMC_CLASSDEP_TABLE(armv8, ARMV8); PMC_CLASSDEP_TABLE(mips24k, MIPS24K); +PMC_CLASSDEP_TABLE(mips74k, MIPS74K); PMC_CLASSDEP_TABLE(octeon, OCTEON); PMC_CLASSDEP_TABLE(ucf, UCF); PMC_CLASSDEP_TABLE(ppc7450, PPC7450); PMC_CLASSDEP_TABLE(ppc970, PPC970); +PMC_CLASSDEP_TABLE(e500, E500); static struct pmc_event_descr soft_event_table[PMC_EV_DYN_COUNT]; @@ -200,6 +212,12 @@ static const struct pmc_event_descr haswell_event_table[] = __PMC_EV_ALIAS_HASWELL() }; +static const struct pmc_event_descr haswell_xeon_event_table[] = +{ + __PMC_EV_ALIAS_HASWELL_XEON() +}; + + static const struct pmc_event_descr ivybridge_event_table[] = { __PMC_EV_ALIAS_IVYBRIDGE() @@ -250,6 +268,16 @@ static const struct pmc_event_descr westmereuc_event_table[] = __PMC_EV_ALIAS_WESTMEREUC() }; +static const struct pmc_event_descr cortex_a53_event_table[] = +{ + __PMC_EV_ALIAS_ARMV8_CORTEX_A53() +}; + +static const struct pmc_event_descr cortex_a57_event_table[] = +{ + __PMC_EV_ALIAS_ARMV8_CORTEX_A57() +}; + /* * PMC_MDEP_TABLE(NAME, PRIMARYCLASS, ADDITIONAL_CLASSES...) * @@ -267,6 +295,7 @@ PMC_MDEP_TABLE(core2, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC); PMC_MDEP_TABLE(corei7, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP); PMC_MDEP_TABLE(nehalem_ex, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC); PMC_MDEP_TABLE(haswell, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP); +PMC_MDEP_TABLE(haswell_xeon, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP); PMC_MDEP_TABLE(ivybridge, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC); PMC_MDEP_TABLE(ivybridge_xeon, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC); PMC_MDEP_TABLE(sandybridge, IAP, PMC_CLASS_SOFT, PMC_CLASS_IAF, PMC_CLASS_TSC, PMC_CLASS_UCF, PMC_CLASS_UCP); @@ -279,10 +308,15 @@ PMC_MDEP_TABLE(p4, P4, PMC_CLASS_SOFT, PMC_CLASS_TSC); PMC_MDEP_TABLE(p5, P5, PMC_CLASS_SOFT, PMC_CLASS_TSC); PMC_MDEP_TABLE(p6, P6, PMC_CLASS_SOFT, PMC_CLASS_TSC); PMC_MDEP_TABLE(xscale, XSCALE, PMC_CLASS_SOFT, PMC_CLASS_XSCALE); +PMC_MDEP_TABLE(armv7, ARMV7, PMC_CLASS_SOFT, PMC_CLASS_ARMV7); +PMC_MDEP_TABLE(cortex_a53, ARMV8, PMC_CLASS_SOFT, PMC_CLASS_ARMV8); +PMC_MDEP_TABLE(cortex_a57, ARMV8, PMC_CLASS_SOFT, PMC_CLASS_ARMV8); PMC_MDEP_TABLE(mips24k, MIPS24K, PMC_CLASS_SOFT, PMC_CLASS_MIPS24K); +PMC_MDEP_TABLE(mips74k, MIPS74K, PMC_CLASS_SOFT, PMC_CLASS_MIPS74K); PMC_MDEP_TABLE(octeon, OCTEON, PMC_CLASS_SOFT, PMC_CLASS_OCTEON); -PMC_MDEP_TABLE(ppc7450, PPC7450, PMC_CLASS_SOFT, PMC_CLASS_PPC7450); -PMC_MDEP_TABLE(ppc970, PPC970, PMC_CLASS_SOFT, PMC_CLASS_PPC970); +PMC_MDEP_TABLE(ppc7450, PPC7450, PMC_CLASS_SOFT, PMC_CLASS_PPC7450, PMC_CLASS_TSC); +PMC_MDEP_TABLE(ppc970, PPC970, PMC_CLASS_SOFT, PMC_CLASS_PPC970, PMC_CLASS_TSC); +PMC_MDEP_TABLE(e500, E500, PMC_CLASS_SOFT, PMC_CLASS_E500, PMC_CLASS_TSC); PMC_MDEP_TABLE(generic, SOFT, PMC_CLASS_SOFT); static const struct pmc_event_descr tsc_event_table[] = @@ -312,6 +346,7 @@ PMC_CLASS_TABLE_DESC(core2, IAP, core2, iap); PMC_CLASS_TABLE_DESC(corei7, IAP, corei7, iap); PMC_CLASS_TABLE_DESC(nehalem_ex, IAP, nehalem_ex, iap); PMC_CLASS_TABLE_DESC(haswell, IAP, haswell, iap); +PMC_CLASS_TABLE_DESC(haswell_xeon, IAP, haswell_xeon, iap); PMC_CLASS_TABLE_DESC(ivybridge, IAP, ivybridge, iap); PMC_CLASS_TABLE_DESC(ivybridge_xeon, IAP, ivybridge_xeon, iap); PMC_CLASS_TABLE_DESC(sandybridge, IAP, sandybridge, iap); @@ -338,16 +373,25 @@ PMC_CLASS_TABLE_DESC(p6, P6, p6, p6); #if defined(__i386__) || defined(__amd64__) PMC_CLASS_TABLE_DESC(tsc, TSC, tsc, tsc); #endif +#if defined(__arm__) #if defined(__XSCALE__) PMC_CLASS_TABLE_DESC(xscale, XSCALE, xscale, xscale); #endif +PMC_CLASS_TABLE_DESC(armv7, ARMV7, armv7, armv7); +#endif +#if defined(__aarch64__) +PMC_CLASS_TABLE_DESC(cortex_a53, ARMV8, cortex_a53, arm64); +PMC_CLASS_TABLE_DESC(cortex_a57, ARMV8, cortex_a57, arm64); +#endif #if defined(__mips__) PMC_CLASS_TABLE_DESC(mips24k, MIPS24K, mips24k, mips); +PMC_CLASS_TABLE_DESC(mips74k, MIPS74K, mips74k, mips); PMC_CLASS_TABLE_DESC(octeon, OCTEON, octeon, mips); #endif /* __mips__ */ #if defined(__powerpc__) PMC_CLASS_TABLE_DESC(ppc7450, PPC7450, ppc7450, powerpc); PMC_CLASS_TABLE_DESC(ppc970, PPC970, ppc970, powerpc); +PMC_CLASS_TABLE_DESC(e500, E500, e500, powerpc); #endif static struct pmc_class_descr soft_class_table_descr = @@ -379,9 +423,14 @@ static const char * pmc_capability_names[] = { __PMC_CAPS() }; -static const char * pmc_class_names[] = { +struct pmc_class_map { + enum pmc_class pm_class; + const char *pm_name; +}; + +static const struct pmc_class_map pmc_class_names[] = { #undef __PMC_CLASS -#define __PMC_CLASS(C) #C , +#define __PMC_CLASS(S,V,D) { .pm_class = PMC_CLASS_##S, .pm_name = #S } , __PMC_CLASSES() }; @@ -626,6 +675,8 @@ static struct pmc_event_alias core2_aliases_without_iaf[] = { #define nehalem_ex_aliases_without_iaf core2_aliases_without_iaf #define haswell_aliases core2_aliases #define haswell_aliases_without_iaf core2_aliases_without_iaf +#define haswell_xeon_aliases core2_aliases +#define haswell_xeon_aliases_without_iaf core2_aliases_without_iaf #define ivybridge_aliases core2_aliases #define ivybridge_aliases_without_iaf core2_aliases_without_iaf #define ivybridge_xeon_aliases core2_aliases @@ -896,7 +947,8 @@ iap_allocate_pmc(enum pmc_event pe, char *ctrspec, n = pmc_parse_mask(iap_rsp_mask_sb_sbx_ib, p, &rsp); } else return (-1); - } else if (cpu_info.pm_cputype == PMC_CPU_INTEL_HASWELL) { + } else if (cpu_info.pm_cputype == PMC_CPU_INTEL_HASWELL || + cpu_info.pm_cputype == PMC_CPU_INTEL_HASWELL_XEON) { if (KWPREFIXMATCH(p, IAP_KW_RSP "=")) { n = pmc_parse_mask(iap_rsp_mask_haswell, p, &rsp); } else @@ -2360,6 +2412,7 @@ soft_allocate_pmc(enum pmc_event pe, char *ctrspec, return (0); } +#if defined(__arm__) #if defined(__XSCALE__) static struct pmc_event_alias xscale_aliases[] = { @@ -2383,6 +2436,45 @@ xscale_allocate_pmc(enum pmc_event pe, char *ctrspec __unused, } #endif +static struct pmc_event_alias armv7_aliases[] = { + EV_ALIAS("dc-misses", "L1_DCACHE_REFILL"), + EV_ALIAS("ic-misses", "L1_ICACHE_REFILL"), + EV_ALIAS("instructions", "INSTR_EXECUTED"), + EV_ALIAS(NULL, NULL) +}; +static int +armv7_allocate_pmc(enum pmc_event pe, char *ctrspec __unused, + struct pmc_op_pmcallocate *pmc_config __unused) +{ + switch (pe) { + default: + break; + } + + return (0); +} +#endif + +#if defined(__aarch64__) +static struct pmc_event_alias cortex_a53_aliases[] = { + EV_ALIAS(NULL, NULL) +}; +static struct pmc_event_alias cortex_a57_aliases[] = { + EV_ALIAS(NULL, NULL) +}; +static int +arm64_allocate_pmc(enum pmc_event pe, char *ctrspec __unused, + struct pmc_op_pmcallocate *pmc_config __unused) +{ + switch (pe) { + default: + break; + } + + return (0); +} +#endif + #if defined(__mips__) static struct pmc_event_alias mips24k_aliases[] = { @@ -2392,6 +2484,13 @@ static struct pmc_event_alias mips24k_aliases[] = { EV_ALIAS(NULL, NULL) }; +static struct pmc_event_alias mips74k_aliases[] = { + EV_ALIAS("instructions", "INSTR_EXECUTED"), + EV_ALIAS("branches", "BRANCH_INSNS"), + EV_ALIAS("branch-mispredicts", "MISPREDICTED_BRANCH_INSNS"), + EV_ALIAS(NULL, NULL) +}; + static struct pmc_event_alias octeon_aliases[] = { EV_ALIAS("instructions", "RET"), EV_ALIAS("branches", "BR"), @@ -2444,6 +2543,12 @@ static struct pmc_event_alias ppc970_aliases[] = { EV_ALIAS(NULL, NULL) }; +static struct pmc_event_alias e500_aliases[] = { + EV_ALIAS("instructions", "INSTR_COMPLETED"), + EV_ALIAS("cycles", "CYCLES"), + EV_ALIAS(NULL, NULL) +}; + #define POWERPC_KW_OS "os" #define POWERPC_KW_USR "usr" #define POWERPC_KW_ANYTHREAD "anythread" @@ -2788,6 +2893,10 @@ pmc_event_names_of_class(enum pmc_class cl, const char ***eventnames, ev = haswell_event_table; count = PMC_EVENT_TABLE_SIZE(haswell); break; + case PMC_CPU_INTEL_HASWELL_XEON: + ev = haswell_xeon_event_table; + count = PMC_EVENT_TABLE_SIZE(haswell_xeon); + break; case PMC_CPU_INTEL_IVYBRIDGE: ev = ivybridge_event_table; count = PMC_EVENT_TABLE_SIZE(ivybridge); @@ -2871,10 +2980,31 @@ pmc_event_names_of_class(enum pmc_class cl, const char ***eventnames, ev = xscale_event_table; count = PMC_EVENT_TABLE_SIZE(xscale); break; + case PMC_CLASS_ARMV7: + ev = armv7_event_table; + count = PMC_EVENT_TABLE_SIZE(armv7); + break; + case PMC_CLASS_ARMV8: + switch (cpu_info.pm_cputype) { + default: + case PMC_CPU_ARMV8_CORTEX_A53: + ev = cortex_a53_event_table; + count = PMC_EVENT_TABLE_SIZE(cortex_a53); + break; + case PMC_CPU_ARMV8_CORTEX_A57: + ev = cortex_a57_event_table; + count = PMC_EVENT_TABLE_SIZE(cortex_a57); + break; + } + break; case PMC_CLASS_MIPS24K: ev = mips24k_event_table; count = PMC_EVENT_TABLE_SIZE(mips24k); break; + case PMC_CLASS_MIPS74K: + ev = mips74k_event_table; + count = PMC_EVENT_TABLE_SIZE(mips74k); + break; case PMC_CLASS_OCTEON: ev = octeon_event_table; count = PMC_EVENT_TABLE_SIZE(octeon); @@ -2887,6 +3017,10 @@ pmc_event_names_of_class(enum pmc_class cl, const char ***eventnames, ev = ppc970_event_table; count = PMC_EVENT_TABLE_SIZE(ppc970); break; + case PMC_CLASS_E500: + ev = e500_event_table; + count = PMC_EVENT_TABLE_SIZE(e500); + break; case PMC_CLASS_SOFT: ev = soft_event_table; count = soft_event_info.pm_nevent; @@ -3115,6 +3249,9 @@ pmc_init(void) pmc_class_table[n++] = &haswelluc_class_table_descr; PMC_MDEP_INIT_INTEL_V2(haswell); break; + case PMC_CPU_INTEL_HASWELL_XEON: + PMC_MDEP_INIT_INTEL_V2(haswell_xeon); + break; case PMC_CPU_INTEL_IVYBRIDGE: PMC_MDEP_INIT_INTEL_V2(ivybridge); break; @@ -3145,17 +3282,37 @@ pmc_init(void) case PMC_CPU_GENERIC: PMC_MDEP_INIT(generic); break; +#if defined(__arm__) #if defined(__XSCALE__) case PMC_CPU_INTEL_XSCALE: PMC_MDEP_INIT(xscale); pmc_class_table[n] = &xscale_class_table_descr; break; #endif + case PMC_CPU_ARMV7: + PMC_MDEP_INIT(armv7); + pmc_class_table[n] = &armv7_class_table_descr; + break; +#endif +#if defined(__aarch64__) + case PMC_CPU_ARMV8_CORTEX_A53: + PMC_MDEP_INIT(cortex_a53); + pmc_class_table[n] = &cortex_a53_class_table_descr; + break; + case PMC_CPU_ARMV8_CORTEX_A57: + PMC_MDEP_INIT(cortex_a57); + pmc_class_table[n] = &cortex_a57_class_table_descr; + break; +#endif #if defined(__mips__) case PMC_CPU_MIPS_24K: PMC_MDEP_INIT(mips24k); pmc_class_table[n] = &mips24k_class_table_descr; break; + case PMC_CPU_MIPS_74K: + PMC_MDEP_INIT(mips74k); + pmc_class_table[n] = &mips74k_class_table_descr; + break; case PMC_CPU_MIPS_OCTEON: PMC_MDEP_INIT(octeon); pmc_class_table[n] = &octeon_class_table_descr; @@ -3170,6 +3327,10 @@ pmc_init(void) PMC_MDEP_INIT(ppc970); pmc_class_table[n] = &ppc970_class_table_descr; break; + case PMC_CPU_PPC_E500: + PMC_MDEP_INIT(e500); + pmc_class_table[n] = &e500_class_table_descr; + break; #endif default: /* @@ -3206,9 +3367,11 @@ pmc_name_of_capability(enum pmc_caps cap) const char * pmc_name_of_class(enum pmc_class pc) { - if ((int) pc >= PMC_CLASS_FIRST && - pc <= PMC_CLASS_LAST) - return (pmc_class_names[pc]); + size_t n; + + for (n = 0; n < PMC_TABLE_SIZE(pmc_class_names); n++) + if (pc == pmc_class_names[n].pm_class) + return (pmc_class_names[n].pm_name); errno = EINVAL; return (NULL); @@ -3280,6 +3443,11 @@ _pmc_name_of_event(enum pmc_event pe, enum pmc_cputype cpu) ev = haswell_event_table; evfence = haswell_event_table + PMC_EVENT_TABLE_SIZE(haswell); break; + case PMC_CPU_INTEL_HASWELL_XEON: + ev = haswell_xeon_event_table; + evfence = haswell_xeon_event_table + PMC_EVENT_TABLE_SIZE(haswell_xeon); + break; + case PMC_CPU_INTEL_IVYBRIDGE: ev = ivybridge_event_table; evfence = ivybridge_event_table + PMC_EVENT_TABLE_SIZE(ivybridge); @@ -3346,9 +3514,28 @@ _pmc_name_of_event(enum pmc_event pe, enum pmc_cputype cpu) } else if (pe >= PMC_EV_XSCALE_FIRST && pe <= PMC_EV_XSCALE_LAST) { ev = xscale_event_table; evfence = xscale_event_table + PMC_EVENT_TABLE_SIZE(xscale); + } else if (pe >= PMC_EV_ARMV7_FIRST && pe <= PMC_EV_ARMV7_LAST) { + ev = armv7_event_table; + evfence = armv7_event_table + PMC_EVENT_TABLE_SIZE(armv7); + } else if (pe >= PMC_EV_ARMV8_FIRST && pe <= PMC_EV_ARMV8_LAST) { + switch (cpu) { + case PMC_CPU_ARMV8_CORTEX_A53: + ev = cortex_a53_event_table; + evfence = cortex_a53_event_table + PMC_EVENT_TABLE_SIZE(cortex_a53); + break; + case PMC_CPU_ARMV8_CORTEX_A57: + ev = cortex_a57_event_table; + evfence = cortex_a57_event_table + PMC_EVENT_TABLE_SIZE(cortex_a57); + break; + default: /* Unknown CPU type. */ + break; + } } else if (pe >= PMC_EV_MIPS24K_FIRST && pe <= PMC_EV_MIPS24K_LAST) { ev = mips24k_event_table; evfence = mips24k_event_table + PMC_EVENT_TABLE_SIZE(mips24k); + } else if (pe >= PMC_EV_MIPS74K_FIRST && pe <= PMC_EV_MIPS74K_LAST) { + ev = mips74k_event_table; + evfence = mips74k_event_table + PMC_EVENT_TABLE_SIZE(mips74k); } else if (pe >= PMC_EV_OCTEON_FIRST && pe <= PMC_EV_OCTEON_LAST) { ev = octeon_event_table; evfence = octeon_event_table + PMC_EVENT_TABLE_SIZE(octeon); @@ -3358,6 +3545,9 @@ _pmc_name_of_event(enum pmc_event pe, enum pmc_cputype cpu) } else if (pe >= PMC_EV_PPC970_FIRST && pe <= PMC_EV_PPC970_LAST) { ev = ppc970_event_table; evfence = ppc970_event_table + PMC_EVENT_TABLE_SIZE(ppc970); + } else if (pe >= PMC_EV_E500_FIRST && pe <= PMC_EV_E500_LAST) { + ev = e500_event_table; + evfence = e500_event_table + PMC_EVENT_TABLE_SIZE(e500); } else if (pe == PMC_EV_TSC_TSC) { ev = tsc_event_table; evfence = tsc_event_table + PMC_EVENT_TABLE_SIZE(tsc); diff --git a/lib/libpmc/pmc.3 b/lib/libpmc/pmc.3 index 8e6db48..f51285e 100644 --- a/lib/libpmc/pmc.3 +++ b/lib/libpmc/pmc.3 @@ -527,6 +527,7 @@ API is .Xr pmc.core2 3 , .Xr pmc.haswell 3 , .Xr pmc.haswelluc 3 , +.Xr pmc.haswellxeon 3 , .Xr pmc.iaf 3 , .Xr pmc.ivybridge 3 , .Xr pmc.ivybridgexeon 3 , diff --git a/lib/libpmc/pmc.corei7.3 b/lib/libpmc/pmc.corei7.3 index 90d19df..ec31054 100644 --- a/lib/libpmc/pmc.corei7.3 +++ b/lib/libpmc/pmc.corei7.3 @@ -1549,18 +1549,18 @@ Counts number of segment register loads. .Xr pmc 3 , .Xr pmc.atom 3 , .Xr pmc.core 3 , +.Xr pmc.corei7uc 3 , .Xr pmc.iaf 3 , -.Xr pmc.ucf 3 , .Xr pmc.k7 3 , .Xr pmc.k8 3 , .Xr pmc.p4 3 , .Xr pmc.p5 3 , .Xr pmc.p6 3 , -.Xr pmc.corei7uc 3 , -.Xr pmc.westmere 3 , -.Xr pmc.westmereuc 3 , .Xr pmc.soft 3 , .Xr pmc.tsc 3 , +.Xr pmc.ucf 3 , +.Xr pmc.westmere 3 , +.Xr pmc.westmereuc 3 , .Xr pmc_cpuinfo 3 , .Xr pmclog 3 , .Xr hwpmc 4 diff --git a/lib/libpmc/pmc.corei7uc.3 b/lib/libpmc/pmc.corei7uc.3 index 5b99113..3bcda1c 100644 --- a/lib/libpmc/pmc.corei7uc.3 +++ b/lib/libpmc/pmc.corei7uc.3 @@ -853,18 +853,18 @@ refreshed or needs to go into a power down mode. .Xr pmc 3 , .Xr pmc.atom 3 , .Xr pmc.core 3 , +.Xr pmc.corei7 3 , .Xr pmc.iaf 3 , -.Xr pmc.ucf 3 , .Xr pmc.k7 3 , .Xr pmc.k8 3 , .Xr pmc.p4 3 , .Xr pmc.p5 3 , .Xr pmc.p6 3 , -.Xr pmc.corei7 3 , -.Xr pmc.westmere 3 , -.Xr pmc.westmereuc 3 , .Xr pmc.soft 3 , .Xr pmc.tsc 3 , +.Xr pmc.ucf 3 , +.Xr pmc.westmere 3 , +.Xr pmc.westmereuc 3 , .Xr pmc_cpuinfo 3 , .Xr pmclog 3 , .Xr hwpmc 4 diff --git a/lib/libpmc/pmc.haswell.3 b/lib/libpmc/pmc.haswell.3 index ace9c2a..a85f760 100644 --- a/lib/libpmc/pmc.haswell.3 +++ b/lib/libpmc/pmc.haswell.3 @@ -529,73 +529,60 @@ instruction. .It Li ILD_STALL.IQ_FULL .Pq Event 87H , Umask 04H Stall cycles due to IQ is full. -.It Li BR_INST_EXEC.COND -.Pq Event 88H , Umask 01H -Qualify conditional near branch instructions -executed, but not necessarily retired. +.It Li BR_INST_EXEC.NONTAKEN_COND +.Pq Event 88H , Umask 41H +Count conditional near branch instructions that were executed (but not +necessarily retired) and not taken. +.It Li BR_INST_EXEC.TAKEN_COND +.Pq Event 88H , Umask 81H +Count conditional near branch instructions that were executed (but not +necessarily retired) and taken. .It Li BR_INST_EXEC.DIRECT_JMP -.Pq Event 88H , Umask 02H -Qualify all unconditional near branch instructions -excluding calls and indirect branches. +.Pq Event 88H , Umask 82H +Count all unconditional near branch instructions excluding calls and +indirect branches. .It Li BR_INST_EXEC.INDIRECT_JMP_NON_CALL_RET -.Pq Event 88H , Umask 04H -Qualify executed indirect near branch instructions -that are not calls nor returns. +.Pq Event 88H , Umask 84H +Count executed indirect near branch instructions that are not calls nor +returns. .It Li BR_INST_EXEC.RETURN_NEAR -.Pq Event 88H , Umask 08H -Qualify indirect near branches that have a return -mnemonic. +.Pq Event 88H , Umask 88H +Count indirect near branches that have a return mnemonic. .It Li BR_INST_EXEC.DIRECT_NEAR_CALL -.Pq Event 88H , Umask 10H -Qualify unconditional near call branch instructions, -excluding non call branch, executed. +.Pq Event 88H , Umask 90H +Count unconditional near call branch instructions, excluding non call +branch, executed. .It Li BR_INST_EXEC.INDIRECT_NEAR_CALL -.Pq Event 88H , Umask 20H -Qualify indirect near calls, including both register and -memory indirect, executed. -.It Li BR_INST_EXEC.NONTAKEN -.Pq Event 88H , Umask 40H -Qualify non-taken near branches executed. -.It Li BR_INST_EXEC.TAKEN -.Pq Event 88H , Umask 80H -Qualify taken near branches executed. Must combine -with 01H,02H, 04H, 08H, 10H, 20H. +.Pq Event 88H , Umask A0H +Count indirect near calls, including both register and memory indirect, +executed. .It Li BR_INST_EXEC.ALL_BRANCHES .Pq Event 88H , Umask FFH -Counts all near executed branches (not necessarily -retired). -.It Li BR_MISP_EXEC.COND -.Pq Event 89H , Umask 01H -Qualify conditional near branch instructions -mispredicted. +Counts all near executed branches (not necessarily retired). +.It Li BR_MISP_EXEC.NONTAKEN_COND +.Pq Event 89H , Umask 41H +Count conditional near branch instructions mispredicted as nontaken. +.It Li BR_MISP_EXEC.TAKEN_COND +.Pq Event 89H , Umask 81H +Count conditional near branch instructions mispredicted as taken. .It Li BR_MISP_EXEC.INDIRECT_JMP_NON_CALL_RET -.Pq Event 89H , Umask 04H -Qualify mispredicted indirect near branch -instructions that are not calls nor returns. +.Pq Event 89H , Umask 84H +Count mispredicted indirect near branch instructions that are not calls +nor returns. .It Li BR_MISP_EXEC.RETURN_NEAR -.Pq Event 89H , Umask 08H -Qualify mispredicted indirect near branches that -have a return mnemonic. +.Pq Event 89H , Umask 88H +Count mispredicted indirect near branches that have a return mnemonic. .It Li BR_MISP_EXEC.DIRECT_NEAR_CALL -.Pq Event 89H , Umask 10H -Qualify mispredicted unconditional near call branch -instructions, excluding non call branch, executed. +.Pq Event 89H , Umask 90H +Count mispredicted unconditional near call branch instructions, excluding +non call branch, executed. .It Li BR_MISP_EXEC.INDIRECT_NEAR_CALL -.Pq Event 89H , Umask 20H -Qualify mispredicted indirect near calls, including -both register and memory indirect, executed. -.It Li BR_MISP_EXEC.NONTAKEN -.Pq Event 89H , Umask 40H -Qualify mispredicted non-taken near branches -executed. -.It Li BR_MISP_EXEC.TAKEN -.Pq Event 89H , Umask 80H -Qualify mispredicted taken near branches executed. -Must combine with 01H,02H, 04H, 08H, 10H, 20H. +.Pq Event 89H , Umask A0H +Count mispredicted indirect near calls, including both register and memory +indirect, executed. .It Li BR_MISP_EXEC.ALL_BRANCHES .Pq Event 89H , Umask FFH -Counts all near executed branches (not necessarily -retired). +Counts all mispredicted near executed branches (not necessarily retired). .It Li IDQ_UOPS_NOT_DELIVERED.CORE .Pq Event 9CH , Umask 01H Count number of non-delivered uops to RAT per @@ -821,30 +808,24 @@ Count cases of saving new LBR records by hardware. Randomly sampled loads whose latency is above a user defined threshold. A small fraction of the overall loads are sampled due to randomization. -.It Li MEM_UOP_RETIRED.LOADS -.Pq Event D0H , Umask 01H -Qualify retired memory uops that are loads. Combine Supports PEBS and -with umask 10H, 20H, 40H, 80H. -.It Li MEM_UOP_RETIRED.STORES -.Pq Event D0H , Umask 02H -Qualify retired memory uops that are stores. -Combine with umask 10H, 20H, 40H, 80H. -.It Li MEM_UOP_RETIRED.STLB_MISS -.Pq Event D0H , Umask 10H -Qualify retired memory uops with STLB miss. Must -combine with umask 01H, 02H, to produce counts. -.It Li MEM_UOP_RETIRED.LOCK -.Pq Event D0H , Umask 20H -Qualify retired memory uops with lock. Must combine Supports PEBS and -with umask 01H, 02H, to produce counts. -.It Li MEM_UOP_RETIRED.SPLIT -.Pq Event D0H , Umask 40H -Qualify retired memory uops with line split. Must -combine with umask 01H, 02H, to produce counts. -.It Li MEM_UOP_RETIRED.ALL -.Pq Event D0H , Umask 80H -Qualify any retired memory uops. Must combine with Supports PEBS and -umask 01H, 02H, to produce counts. +.It Li MEM_UOPS_RETIRED.STLB_MISS_LOADS +.Pq Event D0H , Umask 11H +Count retired load uops that missed the STLB. +.It Li MEM_UOPS_RETIRED.STLB_MISS_STORES +.Pq Event D0H , Umask 12H +Count retired store uops that missed the STLB. +.It Li MEM_UOPS_RETIRED.SPLIT_LOADS +.Pq Event D0H , Umask 41H +Count retired load uops that were split across a cache line. +.It Li MEM_UOPS_RETIRED.SPLIT_STORES +.Pq Event D0H , Umask 42H +Count retired store uops that were split across a cache line. +.It Li MEM_UOPS_RETIRED.ALL_LOADS +.Pq Event D0H , Umask 81H +Count all retired load uops. +.It Li MEM_UOPS_RETIRED.ALL_STORES +.Pq Event D0H , Umask 82H +Count all retired store uops. .It Li MEM_LOAD_UOPS_RETIRED.L1_HIT .Pq Event D1H , Umask 01H Retired load uops with L1 cache hits as data sources. @@ -936,25 +917,25 @@ Dirty L2 cache lines evicted by demand. .Xr pmc 3 , .Xr pmc.atom 3 , .Xr pmc.core 3 , +.Xr pmc.corei7 3 , +.Xr pmc.corei7uc 3 , +.Xr pmc.haswelluc 3 , .Xr pmc.iaf 3 , -.Xr pmc.ucf 3 , +.Xr pmc.ivybridge 3 , +.Xr pmc.ivybridgexeon 3 , .Xr pmc.k7 3 , .Xr pmc.k8 3 , .Xr pmc.p4 3 , .Xr pmc.p5 3 , .Xr pmc.p6 3 , -.Xr pmc.corei7 3 , -.Xr pmc.corei7uc 3 , -.Xr pmc.haswelluc 3 , -.Xr pmc.ivybridge 3 , -.Xr pmc.ivybridgexeon 3 , .Xr pmc.sandybridge 3 , .Xr pmc.sandybridgeuc 3 , .Xr pmc.sandybridgexeon 3 , -.Xr pmc.westmere 3 , -.Xr pmc.westmereuc 3 , .Xr pmc.soft 3 , .Xr pmc.tsc 3 , +.Xr pmc.ucf 3 , +.Xr pmc.westmere 3 , +.Xr pmc.westmereuc 3 , .Xr pmc_cpuinfo 3 , .Xr pmclog 3 , .Xr hwpmc 4 diff --git a/lib/libpmc/pmc.haswellxeon.3 b/lib/libpmc/pmc.haswellxeon.3 new file mode 100644 index 0000000..7519030 --- /dev/null +++ b/lib/libpmc/pmc.haswellxeon.3 @@ -0,0 +1,956 @@ +.\" +.\" Copyright (c) 2013 Hiren Panchasara <hiren.panchasara@gmail.com> +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd November 21, 2014 +.Dt PMC.HASWELLXEON 3 +.Os +.Sh NAME +.Nm pmc.haswellxeon +.Nd measurement events for +.Tn Intel +.Tn Haswell Xeon +family CPUs +.Sh LIBRARY +.Lb libpmc +.Sh SYNOPSIS +.In pmc.h +.Sh DESCRIPTION +.Tn Intel +.Tn "Haswell" +CPUs contain PMCs conforming to version 2 of the +.Tn Intel +performance measurement architecture. +These CPUs may contain up to two classes of PMCs: +.Bl -tag -width "Li PMC_CLASS_IAP" +.It Li PMC_CLASS_IAF +Fixed-function counters that count only one hardware event per counter. +.It Li PMC_CLASS_IAP +Programmable counters that may be configured to count one of a defined +set of hardware events. +.El +.Pp +The number of PMCs available in each class and their widths need to be +determined at run time by calling +.Xr pmc_cpuinfo 3 . +.Pp +Intel Haswell Xeon PMCs are documented in +.Rs +.%B "Intel(R) 64 and IA-32 Architectures Software Developer's Manual" +.%T "Combined Volumes: 1, 2A, 2B, 2C, 3A, 3B and 3C" +.%N "Order Number: 325462-052US" +.%D September 2014 +.%Q "Intel Corporation" +.Re +.Ss HASWELL FIXED FUNCTION PMCS +These PMCs and their supported events are documented in +.Xr pmc.iaf 3 . +.Ss HASWELL PROGRAMMABLE PMCS +The programmable PMCs support the following capabilities: +.Bl -column "PMC_CAP_INTERRUPT" "Support" +.It Em Capability Ta Em Support +.It PMC_CAP_CASCADE Ta \&No +.It PMC_CAP_EDGE Ta Yes +.It PMC_CAP_INTERRUPT Ta Yes +.It PMC_CAP_INVERT Ta Yes +.It PMC_CAP_READ Ta Yes +.It PMC_CAP_PRECISE Ta \&No +.It PMC_CAP_SYSTEM Ta Yes +.It PMC_CAP_TAGGING Ta \&No +.It PMC_CAP_THRESHOLD Ta Yes +.It PMC_CAP_USER Ta Yes +.It PMC_CAP_WRITE Ta Yes +.El +.Ss Event Qualifiers +Event specifiers for these PMCs support the following common +qualifiers: +.Bl -tag -width indent +.It Li rsp= Ns Ar value +Configure the Off-core Response bits. +.Bl -tag -width indent +.It Li DMND_DATA_RD +Counts the number of demand and DCU prefetch data reads of full +and partial cachelines as well as demand data page table entry +cacheline reads. Does not count L2 data read prefetches or +instruction fetches. +.It Li REQ_DMND_RFO +Counts the number of demand and DCU prefetch reads for ownership (RFO) +requests generated by a write to data cacheline. Does not count L2 RFO +prefetches. +.It Li REQ_DMND_IFETCH +Counts the number of demand and DCU prefetch instruction cacheline reads. +Does not count L2 code read prefetches. +.It Li REQ_WB +Counts the number of writeback (modified to exclusive) transactions. +.It Li REQ_PF_DATA_RD +Counts the number of data cacheline reads generated by L2 prefetchers. +.It Li REQ_PF_RFO +Counts the number of RFO requests generated by L2 prefetchers. +.It Li REQ_PF_IFETCH +Counts the number of code reads generated by L2 prefetchers. +.It Li REQ_PF_LLC_DATA_RD +L2 prefetcher to L3 for loads. +.It Li REQ_PF_LLC_RFO +RFO requests generated by L2 prefetcher +.It Li REQ_PF_LLC_IFETCH +L2 prefetcher to L3 for instruction fetches. +.It Li REQ_BUS_LOCKS +Bus lock and split lock requests. +.It Li REQ_STRM_ST +Streaming store requests. +.It Li REQ_OTHER +Any other request that crosses IDI, including I/O. +.It Li RES_ANY +Catch all value for any response types. +.It Li RES_SUPPLIER_NO_SUPP +No Supplier Information available. +.It Li RES_SUPPLIER_LLC_HITM +M-state initial lookup stat in L3. +.It Li RES_SUPPLIER_LLC_HITE +E-state. +.It Li RES_SUPPLIER_LLC_HITS +S-state. +.It Li RES_SUPPLIER_LLC_HITF +F-state. +.It Li RES_SUPPLIER_LOCAL +Local DRAM Controller. +.It Li RES_SNOOP_SNP_NONE +No details on snoop-related information. +.It Li RES_SNOOP_SNP_NO_NEEDED +No snoop was needed to satisfy the request. +.It Li RES_SNOOP_SNP_MISS +A snoop was needed and it missed all snooped caches: +-For LLC Hit, ReslHitl was returned by all cores +-For LLC Miss, Rspl was returned by all sockets and data was returned from +DRAM. +.It Li RES_SNOOP_HIT_NO_FWD +A snoop was needed and it hits in at least one snooped cache. Hit denotes a +cache-line was valid before snoop effect. This includes: +-Snoop Hit w/ Invalidation (LLC Hit, RFO) +-Snoop Hit, Left Shared (LLC Hit/Miss, IFetch/Data_RD) +-Snoop Hit w/ Invalidation and No Forward (LLC Miss, RFO Hit S) +In the LLC Miss case, data is returned from DRAM. +.It Li RES_SNOOP_HIT_FWD +A snoop was needed and data was forwarded from a remote socket. +This includes: +-Snoop Forward Clean, Left Shared (LLC Hit/Miss, IFetch/Data_RD/RFT). +.It Li RES_SNOOP_HITM +A snoop was needed and it HitM-ed in local or remote cache. HitM denotes a +cache-line was in modified state before effect as a results of snoop. This +includes: +-Snoop HitM w/ WB (LLC miss, IFetch/Data_RD) +-Snoop Forward Modified w/ Invalidation (LLC Hit/Miss, RFO) +-Snoop MtoS (LLC Hit, IFetch/Data_RD). +.It Li RES_NON_DRAM +Target was non-DRAM system address. This includes MMIO transactions. +.El +.It Li cmask= Ns Ar value +Configure the PMC to increment only if the number of configured +events measured in a cycle is greater than or equal to +.Ar value . +.It Li edge +Configure the PMC to count the number of de-asserted to asserted +transitions of the conditions expressed by the other qualifiers. +If specified, the counter will increment only once whenever a +condition becomes true, irrespective of the number of clocks during +which the condition remains true. +.It Li inv +Invert the sense of comparison when the +.Dq Li cmask +qualifier is present, making the counter increment when the number of +events per cycle is less than the value specified by the +.Dq Li cmask +qualifier. +.It Li os +Configure the PMC to count events happening at processor privilege +level 0. +.It Li usr +Configure the PMC to count events occurring at privilege levels 1, 2 +or 3. +.El +.Pp +If neither of the +.Dq Li os +or +.Dq Li usr +qualifiers are specified, the default is to enable both. +.Ss Event Specifiers (Programmable PMCs) +Haswell programmable PMCs support the following events: +.Bl -tag -width indent +.It Li LD_BLOCKS.STORE_FORWARD +.Pq Event 03H , Umask 02H +Loads blocked by overlapping with store buffer that +cannot be forwarded. +.It Li MISALIGN_MEM_REF.LOADS +.Pq Event 05H , Umask 01H +Speculative cache-line split load uops dispatched to +L1D. +.It Li MISALIGN_MEM_REF.STORES +.Pq Event 05H , Umask 02H +Speculative cache-line split Store-address uops +dispatched to L1D. +.It Li LD_BLOCKS_PARTIAL.ADDRESS_ALIAS +.Pq Event 07H , Umask 01H +False dependencies in MOB due to partial compare +on address. +.It Li DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK +.Pq Event 08H , Umask 01H +Misses in all TLB levels that cause a page walk of any +page size. +.It Li DTLB_LOAD_MISSES.WALK_COMPLETED_4K +.Pq Event 08H , Umask 02H +Completed page walks due to demand load misses +that caused 4K page walks in any TLB levels. +.It Li DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4K +.Pq Event 08H , Umask 02H +Completed page walks due to demand load misses +that caused 2M/4M page walks in any TLB levels. +.It Li DTLB_LOAD_MISSES.WALK_COMPLETED +.Pq Event 08H , Umask 0EH +Completed page walks in any TLB of any page size +due to demand load misses +.It Li DTLB_LOAD_MISSES.WALK_DURATION +.Pq Event 08H , Umask 10H +Cycle PMH is busy with a walk. +.It Li DTLB_LOAD_MISSES.STLB_HIT_4K +.Pq Event 08H , Umask 20H +Load misses that missed DTLB but hit STLB (4K). +.It Li DTLB_LOAD_MISSES.STLB_HIT_2M +.Pq Event 08H , Umask 40H +Load misses that missed DTLB but hit STLB (2M). +.It Li DTLB_LOAD_MISSES.STLB_HIT +.Pq Event 08H , Umask 60H +Number of cache load STLB hits. No page walk. +.It Li DTLB_LOAD_MISSES.PDE_CACHE_MISS +.Pq Event 08H , Umask 80H +DTLB demand load misses with low part of linear-to- +physical address translation missed +.It Li INT_MISC.RECOVERY_CYCLES +.Pq Event 0DH , Umask 03H +Cycles waiting to recover after Machine Clears +except JEClear. Set Cmask= 1. +.It Li UOPS_ISSUED.ANY +.Pq Event 0EH , Umask 01H +ncrements each cycle the # of Uops issued by the +RAT to RS. +Set Cmask = 1, Inv = 1, Any= 1to count stalled cycles +of this core. +.It Li UOPS_ISSUED.FLAGS_MERGE +.Pq Event 0EH , Umask 10H +Number of flags-merge uops allocated. Such uops +adds delay. +.It Li UOPS_ISSUED.SLOW_LEA +.Pq Event 0EH , Umask 20H +Number of slow LEA or similar uops allocated. Such +uop has 3 sources (e.g. 2 sources + immediate) +regardless if as a result of LEA instruction or not. +.It Li UOPS_ISSUED.SiNGLE_MUL +.Pq Event 0EH , Umask 40H +Number of multiply packed/scalar single precision +uops allocated. +.It Li L2_RQSTS.DEMAND_DATA_RD_MISS +.Pq Event 24H , Umask 21H +Demand Data Read requests that missed L2, no +rejects. +.It Li L2_RQSTS.DEMAND_DATA_RD_HIT +.Pq Event 24H , Umask 41H +Demand Data Read requests that hit L2 cache. +.It Li L2_RQSTS.ALL_DEMAND_DATA_RD +.Pq Event 24H , Umask E1H +Counts any demand and L1 HW prefetch data load +requests to L2. +.It Li L2_RQSTS.RFO_HIT +.Pq Event 24H , Umask 42H +Counts the number of store RFO requests that hit +the L2 cache. +.It Li L2_RQSTS.RFO_MISS +.Pq Event 24H , Umask 22H +Counts the number of store RFO requests that miss +the L2 cache. +.It Li L2_RQSTS.ALL_RFO +.Pq Event 24H , Umask E2H +Counts all L2 store RFO requests. +.It Li L2_RQSTS.CODE_RD_HIT +.Pq Event 24H , Umask 44H +Number of instruction fetches that hit the L2 cache. +.It Li L2_RQSTS.CODE_RD_MISS +.Pq Event 24H , Umask 24H +Number of instruction fetches that missed the L2 +cache. +.It Li L2_RQSTS.ALL_DEMAND_MISS +.Pq Event 24H , Umask 27H +Demand requests that miss L2 cache. +.It Li L2_RQSTS.ALL_DEMAND_REFERENCES +.Pq Event 24H , Umask E7H +Demand requests to L2 cache. +.It Li L2_RQSTS.ALL_CODE_RD +.Pq Event 24H , Umask E4H +Counts all L2 code requests. +.It Li L2_RQSTS.L2_PF_HIT +.Pq Event 24H , Umask 50H +Counts all L2 HW prefetcher requests that hit L2. +.It Li L2_RQSTS.L2_PF_MISS +.Pq Event 24H , Umask 30H +Counts all L2 HW prefetcher requests that missed +L2. +.It Li L2_RQSTS.ALL_PF +.Pq Event 24H , Umask F8H +Counts all L2 HW prefetcher requests. +.It Li L2_RQSTS.MISS +.Pq Event 24H , Umask 3FH +All requests that missed L2. +.It Li L2_RQSTS.REFERENCES +.Pq Event 24H , Umask FFH +All requests to L2 cache. +.It Li L2_DEMAND_RQSTS.WB_HIT +.Pq Event 27H , Umask 50H +Not rejected writebacks that hit L2 cache +.It Li LONGEST_LAT_CACHE.REFERENCE +.Pq Event 2EH , Umask 4FH +This event counts requests originating from the core +that reference a cache line in the last level cache. +.It Li LONGEST_LAT_CACHE.MISS +.Pq Event 2EH , Umask 41H +This event counts each cache miss condition for +references to the last level cache. +.It Li CPU_CLK_UNHALTED.THREAD_P +.Pq Event 3CH , Umask 00H +Counts the number of thread cycles while the thread +is not in a halt state. The thread enters the halt state +when it is running the HLT instruction. The core +frequency may change from time to time due to +power or thermal throttling. +.It Li CPU_CLK_THREAD_UNHALTED.REF_XCLK +.Pq Event 3CH , Umask 01H +Increments at the frequency of XCLK (100 MHz) +when not halted. +.It Li L1D_PEND_MISS.PENDING +.Pq Event 48H , Umask 01H +Increments the number of outstanding L1D misses +every cycle. Set Cmaks = 1 and Edge =1 to count +occurrences. +.It Li DTLB_STORE_MISSES.MISS_CAUSES_A_WALK +.Pq Event 49H , Umask 01H +Miss in all TLB levels causes an page walk of any +page size (4K/2M/4M/1G). +.It Li DTLB_STORE_MISSES.WALK_COMPLETED_4K +.Pq Event 49H , Umask 02H +Completed page walks due to store misses in one or +more TLB levels of 4K page structure. +.It Li DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M +.Pq Event 49H , Umask 04H +Completed page walks due to store misses in one or +more TLB levels of 2M/4M page structure. +.It Li DTLB_STORE_MISSES.WALK_COMPLETED +.Pq Event 49H , Umask 0EH +Completed page walks due to store miss in any TLB +levels of any page size (4K/2M/4M/1G). +.It Li DTLB_STORE_MISSES.WALK_DURATION +.Pq Event 49H , Umask 10H +Cycles PMH is busy with this walk. +.It Li DTLB_STORE_MISSES.STLB_HIT_4K +.Pq Event 49H , Umask 20H +Store misses that missed DTLB but hit STLB (4K). +.It Li DTLB_STORE_MISSES.STLB_HIT_2M +.Pq Event 49H , Umask 40H +Store misses that missed DTLB but hit STLB (2M). +.It Li DTLB_STORE_MISSES.STLB_HIT +.Pq Event 49H , Umask 60H +Store operations that miss the first TLB level but hit +the second and do not cause page walks. +.It Li DTLB_STORE_MISSES.PDE_CACHE_MISS +.Pq Event 49H , Umask 80H +DTLB store misses with low part of linear-to-physical +address translation missed. +.It Li LOAD_HIT_PRE.SW_PF +.Pq Event 4CH , Umask 01H +Non-SW-prefetch load dispatches that hit fill buffer +allocated for S/W prefetch. +.It Li LOAD_HIT_PRE.HW_PF +.Pq Event 4CH , Umask 02H +Non-SW-prefetch load dispatches that hit fill buffer +allocated for H/W prefetch. +.It Li L1D.REPLACEMENT +.Pq Event 51H , Umask 01H +Counts the number of lines brought into the L1 data +cache. +.It Li MOVE_ELIMINATION.INT_NOT_ELIMINATED +.Pq Event 58H , Umask 04H +Number of integer Move Elimination candidate uops +that were not eliminated. +.It Li MOVE_ELIMINATION.SMID_NOT_ELIMINATED +.Pq Event 58H , Umask 08H +Number of SIMD Move Elimination candidate uops +that were not eliminated. +.It Li MOVE_ELIMINATION.INT_ELIMINATED +.Pq Event 58H , Umask 01H +Unhalted core cycles when the thread is in ring 0. +.It Li MOVE_ELIMINATION.SMID_ELIMINATED +.Pq Event 58H , Umask 02H +Number of SIMD Move Elimination candidate uops +that were eliminated. +.It Li CPL_CYCLES.RING0 +.Pq Event 5CH , Umask 02H +Unhalted core cycles when the thread is in ring 0. +.It Li CPL_CYCLES.RING123 +.Pq Event 5CH , Umask 01H +Unhalted core cycles when the thread is not in ring 0. +.It Li RS_EVENTS.EMPTY_CYCLES +.Pq Event 5EH , Umask 01H +Cycles the RS is empty for the thread. +.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD +.Pq Event 60H , Umask 01H +Offcore outstanding Demand Data Read transactions +in SQ to uncore. Set Cmask=1 to count cycles. +.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CORE_RD +.Pq Event 60H , Umask 02H +Offcore outstanding Demand code Read transactions +in SQ to uncore. Set Cmask=1 to count cycles. +.It Li OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO +.Pq Event 60H , Umask 04H +Offcore outstanding RFO store transactions in SQ to +uncore. Set Cmask=1 to count cycles. +.It Li OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD +.Pq Event 60H , Umask 08H +Offcore outstanding cacheable data read +transactions in SQ to uncore. Set Cmask=1 to count +cycles. +.It Li LOCK_CYCLES.SPLIT_LOCK_UC_LOCK_DURATION +.Pq Event 63H , Umask 01H +Cycles in which the L1D and L2 are locked, due to a +UC lock or split lock. +.It Li LOCK_CYCLES.CACHE_LOCK_DURATION +.Pq Event 63H , Umask 02H +Cycles in which the L1D is locked. +.It Li IDQ.EMPTY +.Pq Event 79H , Umask 02H +Counts cycles the IDQ is empty. +.It Li IDQ.MITE_UOPS +.Pq Event 79H , Umask 04H +Increment each cycle # of uops delivered to IDQ from +MITE path. +Set Cmask = 1 to count cycles. +.It Li IDQ.DSB_UOPS +.Pq Event 79H , Umask 08H +Increment each cycle. # of uops delivered to IDQ +from DSB path. +Set Cmask = 1 to count cycles. +.It Li IDQ.MS_DSB_UOPS +.Pq Event 79H , Umask 10H +Increment each cycle # of uops delivered to IDQ +when MS_busy by DSB. Set Cmask = 1 to count +cycles. Add Edge=1 to count # of delivery. +.It Li IDQ.MS_MITE_UOPS +.Pq Event 79H , Umask 20H +ncrement each cycle # of uops delivered to IDQ +when MS_busy by MITE. Set Cmask = 1 to count +cycles. +.It Li IDQ.MS_UOPS +.Pq Event 79H , Umask 30H +Increment each cycle # of uops delivered to IDQ from +MS by either DSB or MITE. Set Cmask = 1 to count +cycles. +.It Li IDQ.ALL_DSB_CYCLES_ANY_UOPS +.Pq Event 79H , Umask 18H +Counts cycles DSB is delivered at least one uops. Set +Cmask = 1. +.It Li IDQ.ALL_DSB_CYCLES_4_UOPS +.Pq Event 79H , Umask 18H +Counts cycles DSB is delivered four uops. Set Cmask +=4. +.It Li IDQ.ALL_MITE_CYCLES_ANY_UOPS +.Pq Event 79H , Umask 24H +Counts cycles MITE is delivered at least one uops. Set +Cmask = 1. +.It Li IDQ.ALL_MITE_CYCLES_4_UOPS +.Pq Event 79H , Umask 24H +Counts cycles MITE is delivered four uops. Set Cmask +=4. +.It Li IDQ.MITE_ALL_UOPS +.Pq Event 79H , Umask 3CH +# of uops delivered to IDQ from any path. +.It Li ICACHE.MISSES +.Pq Event 80H , Umask 02H +Number of Instruction Cache, Streaming Buffer and +Victim Cache Misses. Includes UC accesses. +.It Li ITLB_MISSES.MISS_CAUSES_A_WALK +.Pq Event 85H , Umask 01H +Misses in ITLB that causes a page walk of any page +size. +.It Li ITLB_MISSES.WALK_COMPLETED_4K +.Pq Event 85H , Umask 02H +Completed page walks due to misses in ITLB 4K page +entries. +.It Li TLB_MISSES.WALK_COMPLETED_2M_4M +.Pq Event 85H , Umask 04H +Completed page walks due to misses in ITLB 2M/4M +page entries. +.It Li ITLB_MISSES.WALK_COMPLETED +.Pq Event 85H , Umask 0EH +Completed page walks in ITLB of any page size. +.It Li ITLB_MISSES.WALK_DURATION +.Pq Event 85H , Umask 10H +Cycle PMH is busy with a walk. +.It Li ITLB_MISSES.STLB_HIT_4K +.Pq Event 85H , Umask 20H +ITLB misses that hit STLB (4K). +.It Li ITLB_MISSES.STLB_HIT_2M +.Pq Event 85H , Umask 40H +ITLB misses that hit STLB (2K). +.It Li ITLB_MISSES.STLB_HIT +.Pq Event 85H , Umask 60H +TLB misses that hit STLB. No page walk. +.It Li ILD_STALL.LCP +.Pq Event 87H , Umask 01H +Stalls caused by changing prefix length of the +instruction. +.It Li ILD_STALL.IQ_FULL +.Pq Event 87H , Umask 04H +Stall cycles due to IQ is full. +.It Li BR_INST_EXEC.NONTAKEN_COND +.Pq Event 88H , Umask 41H +Count conditional near branch instructions that were executed (but not +necessarily retired) and not taken. +.It Li BR_INST_EXEC.TAKEN_COND +.Pq Event 88H , Umask 81H +Count conditional near branch instructions that were executed (but not +necessarily retired) and taken. +.It Li BR_INST_EXEC.DIRECT_JMP +.Pq Event 88H , Umask 82H +Count all unconditional near branch instructions excluding calls and +indirect branches. +.It Li BR_INST_EXEC.INDIRECT_JMP_NON_CALL_RET +.Pq Event 88H , Umask 84H +Count executed indirect near branch instructions that are not calls nor +returns. +.It Li BR_INST_EXEC.RETURN_NEAR +.Pq Event 88H , Umask 88H +Count indirect near branches that have a return mnemonic. +.It Li BR_INST_EXEC.DIRECT_NEAR_CALL +.Pq Event 88H , Umask 90H +Count unconditional near call branch instructions, excluding non call +branch, executed. +.It Li BR_INST_EXEC.INDIRECT_NEAR_CALL +.Pq Event 88H , Umask A0H +Count indirect near calls, including both register and memory indirect, +executed. +.It Li BR_INST_EXEC.ALL_BRANCHES +.Pq Event 88H , Umask FFH +Counts all near executed branches (not necessarily retired). +.It Li BR_MISP_EXEC.NONTAKEN_COND +.Pq Event 89H , Umask 41H +Count conditional near branch instructions mispredicted as nontaken. +.It Li BR_MISP_EXEC.TAKEN_COND +.Pq Event 89H , Umask 81H +Count conditional near branch instructions mispredicted as taken. +.It Li BR_MISP_EXEC.INDIRECT_JMP_NON_CALL_RET +.Pq Event 89H , Umask 84H +Count mispredicted indirect near branch instructions that are not calls +nor returns. +.It Li BR_MISP_EXEC.RETURN_NEAR +.Pq Event 89H , Umask 88H +Count mispredicted indirect near branches that have a return mnemonic. +.It Li BR_MISP_EXEC.DIRECT_NEAR_CALL +.Pq Event 89H , Umask 90H +Count mispredicted unconditional near call branch instructions, excluding +non call branch, executed. +.It Li BR_MISP_EXEC.INDIRECT_NEAR_CALL +.Pq Event 89H , Umask A0H +Count mispredicted indirect near calls, including both register and memory +indirect, executed. +.It Li BR_MISP_EXEC.ALL_BRANCHES +.Pq Event 89H , Umask FFH +Counts all mispredicted near executed branches (not necessarily retired). +.It Li IDQ_UOPS_NOT_DELIVERED.CORE +.Pq Event 9CH , Umask 01H +Count number of non-delivered uops to RAT per +thread. +.It Li UOPS_EXECUTED_PORT.PORT_0 +.Pq Event A1H , Umask 01H +Cycles which a Uop is dispatched on port 0 in this +thread. +.It Li UOPS_EXECUTED_PORT.PORT_1 +.Pq Event A1H , Umask 02H +Cycles which a Uop is dispatched on port 1 in this +thread. +.It Li UOPS_EXECUTED_PORT.PORT_2 +.Pq Event A1H , Umask 04H +Cycles which a Uop is dispatched on port 2 in this +thread. +.It Li UOPS_EXECUTED_PORT.PORT_3 +.Pq Event A1H , Umask 08H +Cycles which a Uop is dispatched on port 3 in this +thread. +.It Li UOPS_EXECUTED_PORT.PORT_4 +.Pq Event A1H , Umask 10H +Cycles which a Uop is dispatched on port 4 in this +thread. +.It Li UOPS_EXECUTED_PORT.PORT_5 +.Pq Event A1H , Umask 20H +Cycles which a Uop is dispatched on port 5 in this +thread. +.It Li UOPS_EXECUTED_PORT.PORT_6 +.Pq Event A1H , Umask 40H +Cycles which a Uop is dispatched on port 6 in this +thread. +.It Li UOPS_EXECUTED_PORT.PORT_7 +.Pq Event A1H , Umask 80H +Cycles which a Uop is dispatched on port 7 in this +thread. +.It Li RESOURCE_STALLS.ANY +.Pq Event A2H , Umask 01H +Cycles Allocation is stalled due to Resource Related +reason. +.It Li RESOURCE_STALLS.RS +.Pq Event A2H , Umask 04H +Cycles stalled due to no eligible RS entry available. +.It Li RESOURCE_STALLS.SB +.Pq Event A2H , Umask 08H +Cycles stalled due to no store buffers available (not +including draining form sync). +.It Li RESOURCE_STALLS.ROB +.Pq Event A2H , Umask 10H +Cycles stalled due to re-order buffer full. +.It Li CYCLE_ACTIVITY.CYCLES_L2_PENDING +.Pq Event A3H , Umask 01H +Cycles with pending L2 miss loads. Set Cmask=2 to +count cycle. +.It Li CYCLE_ACTIVITY.CYCLES_LDM_PENDING +.Pq Event A3H , Umask 02H +Cycles with pending memory loads. Set Cmask=2 to +count cycle. +.It Li CYCLE_ACTIVITY.STALLS_L2_PENDING +.Pq Event A3H , Umask 05H +Number of loads missed L2. +.It Li CYCLE_ACTIVITY.CYCLES_L1D_PENDING +.Pq Event A3H , Umask 08H +Cycles with pending L1 cache miss loads. Set +Cmask=8 to count cycle. +.It Li ITLB.ITLB_FLUSH +.Pq Event AEH , Umask 01H +Counts the number of ITLB flushes, includes +4k/2M/4M pages. +.It Li OFFCORE_REQUESTS.DEMAND_DATA_RD +.Pq Event B0H , Umask 01H +Demand data read requests sent to uncore. +.It Li OFFCORE_REQUESTS.DEMAND_CODE_RD +.Pq Event B0H , Umask 02H +Demand code read requests sent to uncore. +.It Li OFFCORE_REQUESTS.DEMAND_RFO +.Pq Event B0H , Umask 04H +Demand RFO read requests sent to uncore, including +regular RFOs, locks, ItoM. +.It Li OFFCORE_REQUESTS.ALL_DATA_RD +.Pq Event B0H , Umask 08H +Data read requests sent to uncore (demand and +prefetch). +.It Li UOPS_EXECUTED.CORE +.Pq Event B1H , Umask 02H +Counts total number of uops to be executed per-core +each cycle. +.It Li OFF_CORE_RESPONSE_0 +.Pq Event B7H , Umask 01H +Requires MSR 01A6H +.It Li OFF_CORE_RESPONSE_1 +.Pq Event BBH , Umask 01H +Requires MSR 01A7H +.It Li PAGE_WALKER_LOADS.DTLB_L1 +.Pq Event BCH , Umask 11H +Number of DTLB page walker loads that hit in the +L1+FB. +.It Li PAGE_WALKER_LOADS.ITLB_L1 +.Pq Event BCH , Umask 21H +Number of ITLB page walker loads that hit in the +L1+FB. +.It Li PAGE_WALKER_LOADS.DTLB_L2 +.Pq Event BCH , Umask 12H +Number of DTLB page walker loads that hit in the L2. +.It Li PAGE_WALKER_LOADS.ITLB_L2 +.Pq Event BCH , Umask 22H +Number of ITLB page walker loads that hit in the L2. +.It Li PAGE_WALKER_LOADS.DTLB_L3 +.Pq Event BCH , Umask 14H +Number of DTLB page walker loads that hit in the L3. +.It Li PAGE_WALKER_LOADS.ITLB_L3 +.Pq Event BCH , Umask 24H +Number of ITLB page walker loads that hit in the L3. +.It Li PAGE_WALKER_LOADS.DTLB_MEMORY +.Pq Event BCH , Umask 18H +Number of DTLB page walker loads from memory. +.It Li PAGE_WALKER_LOADS.ITLB_MEMORY +.Pq Event BCH , Umask 28H +Number of ITLB page walker loads from memory. +.It Li TLB_FLUSH.DTLB_THREAD +.Pq Event BDH , Umask 01H +DTLB flush attempts of the thread-specific entries. +.It Li TLB_FLUSH.STLB_ANY +.Pq Event BDH , Umask 20H +Count number of STLB flush attempts. +.It Li INST_RETIRED.ANY_P +.Pq Event C0H , Umask 00H +Number of instructions at retirement. +.It Li INST_RETIRED.ALL +.Pq Event C0H , Umask 01H +Precise instruction retired event with HW to reduce +effect of PEBS shadow in IP distribution. +.It Li OTHER_ASSISTS.AVX_TO_SSE +.Pq Event C1H , Umask 08H +Number of transitions from AVX-256 to legacy SSE +when penalty applicable. +.It Li OTHER_ASSISTS.SSE_TO_AVX +.Pq Event C1H , Umask 10H +Number of transitions from SSE to AVX-256 when +penalty applicable. +.It Li OTHER_ASSISTS.ANY_WB_ASSIST +.Pq Event C1H , Umask 40H +Number of microcode assists invoked by HW upon +uop writeback. +.It Li UOPS_RETIRED.ALL +.Pq Event C2H , Umask 01H +Counts the number of micro-ops retired, Use +cmask=1 and invert to count active cycles or stalled +cycles. +.It Li UOPS_RETIRED.RETIRE_SLOTS +.Pq Event C2H , Umask 02H +Counts the number of retirement slots used each +cycle. +.It Li MACHINE_CLEARS.MEMORY_ORDERING +.Pq Event C3H , Umask 02H +Counts the number of machine clears due to memory +order conflicts. +.It Li MACHINE_CLEARS.SMC +.Pq Event C3H , Umask 04H +Number of self-modifying-code machine clears +detected. +.It Li MACHINE_CLEARS.MASKMOV +.Pq Event C3H , Umask 20H +Counts the number of executed AVX masked load +operations that refer to an illegal address range with +the mask bits set to 0. +.It Li BR_INST_RETIRED.ALL_BRANCHES +.Pq Event C4H , Umask 00H +Branch instructions at retirement. +.It Li BR_INST_RETIRED.CONDITIONAL +.Pq Event C4H , Umask 01H +Counts the number of conditional branch instructions Supports PEBS +retired. +.It Li BR_INST_RETIRED.NEAR_CALL +.Pq Event C4H , Umask 02H +Direct and indirect near call instructions retired. +.It Li BR_INST_RETIRED.ALL_BRANCHES +.Pq Event C4H , Umask 04H +Counts the number of branch instructions retired. +.It Li BR_INST_RETIRED.NEAR_RETURN +.Pq Event C4H , Umask 08H +Counts the number of near return instructions +retired. +.It Li BR_INST_RETIRED.NOT_TAKEN +.Pq Event C4H , Umask 10H +Counts the number of not taken branch instructions +retired. + It Li BR_INST_RETIRED.NEAR_TAKEN +.Pq Event C4H , Umask 20H +Number of near taken branches retired. +.It Li BR_INST_RETIRED.FAR_BRANCH +.Pq Event C4H , Umask 40H +Number of far branches retired. +.It Li BR_MISP_RETIRED.ALL_BRANCHES +.Pq Event C5H , Umask 00H +Mispredicted branch instructions at retirement +.It Li BR_MISP_RETIRED.CONDITIONAL +.Pq Event C5H , Umask 01H +Mispredicted conditional branch instructions retired. +.It Li BR_MISP_RETIRED.CONDITIONAL +.Pq Event C5H , Umask 04H +Mispredicted macro branch instructions retired. +.It Li FP_ASSIST.X87_OUTPUT +.Pq Event CAH , Umask 02H +Number of X87 FP assists due to Output values. +.It Li FP_ASSIST.X87_INPUT +.Pq Event CAH , Umask 04H +Number of X87 FP assists due to input values. +.It Li FP_ASSIST.SIMD_OUTPUT +.Pq Event CAH , Umask 08H +Number of SIMD FP assists due to Output values. +.It Li FP_ASSIST.SIMD_INPUT +.Pq Event CAH , Umask 10H +Number of SIMD FP assists due to input values. +.It Li FP_ASSIST.ANY +.Pq Event CAH , Umask 1EH +Cycles with any input/output SSE* or FP assists. +.It Li ROB_MISC_EVENTS.LBR_INSERTS +.Pq Event CCH , Umask 20H +Count cases of saving new LBR records by hardware. +.It Li MEM_TRANS_RETIRED.LOAD_LATENCY +.Pq Event CDH , Umask 01H +Randomly sampled loads whose latency is above a +user defined threshold. A small fraction of the overall +loads are sampled due to randomization. +.It Li MEM_UOPS_RETIRED.STLB_MISS_LOADS +.Pq Event D0H , Umask 11H +Count retired load uops that missed the STLB. +.It Li MEM_UOPS_RETIRED.STLB_MISS_STORES +.Pq Event D0H , Umask 12H +Count retired store uops that missed the STLB. +.It Li MEM_UOPS_RETIRED.SPLIT_LOADS +.Pq Event D0H , Umask 41H +Count retired load uops that were split across a cache line. +.It Li MEM_UOPS_RETIRED.SPLIT_STORES +.Pq Event D0H , Umask 42H +Count retired store uops that were split across a cache line. +.It Li MEM_UOPS_RETIRED.ALL_LOADS +.Pq Event D0H , Umask 81H +Count all retired load uops. +.It Li MEM_UOPS_RETIRED.ALL_STORES +.Pq Event D0H , Umask 82H +Count all retired store uops. +.It Li MEM_LOAD_UOPS_RETIRED.L1_HIT +.Pq Event D1H , Umask 01H +Retired load uops with L1 cache hits as data sources. +.It Li MEM_LOAD_UOPS_RETIRED.L2_HIT +.Pq Event D1H , Umask 02H +Retired load uops with L2 cache hits as data sources. +.It Li MEM_LOAD_UOPS_RETIRED.LLC_HIT +.Pq Event D1H , Umask 04H +Retired load uops with LLC cache hits as data +sources. +.It Li MEM_LOAD_UOPS_RETIRED.L2_MISS +.Pq Event D1H , Umask 10H +Retired load uops missed L2. Unknown data source +excluded. +.It Li MEM_LOAD_UOPS_RETIRED.HIT_LFB +.Pq Event D1H , Umask 40H +Retired load uops which data sources were load uops +missed L1 but hit FB due to preceding miss to the +same cache line with data not ready. +.It Li MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS +.Pq Event D2H , Umask 01H +Retired load uops which data sources were LLC hit +and cross-core snoop missed in on-pkg core cache. +.It Li MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT +.Pq Event D2H , Umask 02H +Retired load uops which data sources were LLC and +cross-core snoop hits in on-pkg core cache. +.It Li MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM +.Pq Event D2H , Umask 04H +Retired load uops which data sources were HitM +responses from shared LLC. +.It Li MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE +.Pq Event D2H , Umask 08H +Retired load uops which data sources were hits in +LLC without snoops required. +.It Li MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM +.Pq Event D3H , Umask 01H +Retired load uops which data sources missed LLC but +serviced from local dram. +.It Li BACLEARS.ANY +.Pq Event E6H , Umask 1FH +Number of front end re-steers due to BPU +misprediction. +.It Li L2_TRANS.DEMAND_DATA_RD +.Pq Event F0H , Umask 01H +Demand Data Read requests that access L2 cache. +.It Li L2_TRANS.RFO +.Pq Event F0H , Umask 02H +RFO requests that access L2 cache. +.It Li L2_TRANS.CODE_RD +.Pq Event F0H , Umask 04H +L2 cache accesses when fetching instructions. +.It Li L2_TRANS.ALL_PF +.Pq Event F0H , Umask 08H +Any MLC or LLC HW prefetch accessing L2, including +rejects. +.It Li L2_TRANS.L1D_WB +.Pq Event F0H , Umask 10H +L1D writebacks that access L2 cache. +.It Li L2_TRANS.L2_FILL +.Pq Event F0H , Umask 20H +L2 fill requests that access L2 cache. +.It Li L2_TRANS.L2_WB +.Pq Event F0H , Umask 40H +L2 writebacks that access L2 cache. +.It Li L2_TRANS.ALL_REQUESTS +.Pq Event F0H , Umask 80H +Transactions accessing L2 pipe. +.It Li L2_LINES_IN.I +.Pq Event F1H , Umask 01H +L2 cache lines in I state filling L2. +.It Li L2_LINES_IN.S +.Pq Event F1H , Umask 02H +L2 cache lines in S state filling L2. +.It Li L2_LINES_IN.E +.Pq Event F1H , Umask 04H +L2 cache lines in E state filling L2. +.It Li L2_LINES_IN.ALL +.Pq Event F1H , Umask 07H +L2 cache lines filling L2. +.It Li L2_LINES_OUT.DEMAND_CLEAN +.Pq Event F2H , Umask 05H +Clean L2 cache lines evicted by demand. +.It Li L2_LINES_OUT.DEMAND_DIRTY +.Pq Event F2H , Umask 06H +Dirty L2 cache lines evicted by demand. +.El +.Sh SEE ALSO +.Xr pmc 3 , +.Xr pmc.atom 3 , +.Xr pmc.core 3 , +.Xr pmc.iaf 3 , +.Xr pmc.ucf 3 , +.Xr pmc.k7 3 , +.Xr pmc.k8 3 , +.Xr pmc.p4 3 , +.Xr pmc.p5 3 , +.Xr pmc.p6 3 , +.Xr pmc.corei7 3 , +.Xr pmc.corei7uc 3 , +.Xr pmc.haswell 3 , +.Xr pmc.haswelluc 3 , +.Xr pmc.ivybridge 3 , +.Xr pmc.ivybridgexeon 3 , +.Xr pmc.sandybridge 3 , +.Xr pmc.sandybridgeuc 3 , +.Xr pmc.sandybridgexeon 3 , +.Xr pmc.westmere 3 , +.Xr pmc.westmereuc 3 , +.Xr pmc.soft 3 , +.Xr pmc.tsc 3 , +.Xr pmc_cpuinfo 3 , +.Xr pmclog 3 , +.Xr hwpmc 4 +.Sh HISTORY +Support for the Haswell Xeon microarchitecture first appeared in +.Fx 10.2 . +.Sh AUTHORS +The +.Lb libpmc +library was written by +.An "Joseph Koshy" +.Aq jkoshy@FreeBSD.org . +The support for the Haswell Xeon +microarchitecture was written by +.An "Randall Stewart" +.Aq rrs@FreeBSD.org . diff --git a/lib/libpmc/pmc.ivybridge.3 b/lib/libpmc/pmc.ivybridge.3 index b8ad8e0..b693b30 100644 --- a/lib/libpmc/pmc.ivybridge.3 +++ b/lib/libpmc/pmc.ivybridge.3 @@ -449,80 +449,60 @@ Stalls caused by changing prefix length of the instruction. .It Li ILD_STALL.IQ_FULL .Pq Event 87H , Umask 04H Stall cycles due to IQ is full. -.It Li BR_INST_EXEC.COND -.Pq Event 88H , Umask 01H -Qualify conditional near branch instructions executed, but not necessarily -retired. -Must combine with umask 40H, 80H. +.It Li BR_INST_EXEC.NONTAKEN_COND +.Pq Event 88H , Umask 41H +Count conditional near branch instructions that were executed (but not +necessarily retired) and not taken. +.It Li BR_INST_EXEC.TAKEN_COND +.Pq Event 88H , Umask 81H +Count conditional near branch instructions that were executed (but not +necessarily retired) and taken. .It Li BR_INST_EXEC.DIRECT_JMP -.Pq Event 88H , Umask 02H -Qualify all unconditional near branch instructions excluding calls and +.Pq Event 88H , Umask 82H +Count all unconditional near branch instructions excluding calls and indirect branches. -Must combine with umask 80H. .It Li BR_INST_EXEC.INDIRECT_JMP_NON_CALL_RET -.Pq Event 88H , Umask 04H -Qualify executed indirect near branch instructions that are not calls nor +.Pq Event 88H , Umask 84H +Count executed indirect near branch instructions that are not calls nor returns. -Must combine with umask 80H. .It Li BR_INST_EXEC.RETURN_NEAR -.Pq Event 88H , Umask 08H -Qualify indirect near branches that have a return mnemonic. -Must combine with umask 80H. +.Pq Event 88H , Umask 88H +Count indirect near branches that have a return mnemonic. .It Li BR_INST_EXEC.DIRECT_NEAR_CALL -.Pq Event 88H , Umask 10H -Qualify unconditional near call branch instructions, excluding non call +.Pq Event 88H , Umask 90H +Count unconditional near call branch instructions, excluding non call branch, executed. -Must combine with umask 80H. .It Li BR_INST_EXEC.INDIRECT_NEAR_CALL -.Pq Event 88H , Umask 20H -Qualify indirect near calls, including both register and memory indirect, +.Pq Event 88H , Umask A0H +Count indirect near calls, including both register and memory indirect, executed. -Must combine with umask 80H. -.It Li BR_INST_EXEC.NONTAKEN -.Pq Event 88H , Umask 40H -Qualify non-taken near branches executed. -Applicable to umask 01H only. -.It Li BR_INST_EXEC.TAKEN -.Pq Event 88H , Umask 80H -Qualify taken near branches executed. Must combine with 01H,02H, 04H, 08H, -10H, 20H. .It Li BR_INST_EXEC.ALL_BRANCHES .Pq Event 88H , Umask FFH Counts all near executed branches (not necessarily retired). -.It Li BR_MISP_EXEC.COND -.Pq Event 89H , Umask 01H -Qualify conditional near branch instructions mispredicted. -Must combine with umask 40H, 80H. +.It Li BR_MISP_EXEC.NONTAKEN_COND +.Pq Event 89H , Umask 41H +Count conditional near branch instructions mispredicted as nontaken. +.It Li BR_MISP_EXEC.TAKEN_COND +.Pq Event 89H , Umask 81H +Count conditional near branch instructions mispredicted as taken. .It Li BR_MISP_EXEC.INDIRECT_JMP_NON_CALL_RET -.Pq Event 89H , Umask 04H -Qualify mispredicted indirect near branch instructions that are not calls +.Pq Event 89H , Umask 84H +Count mispredicted indirect near branch instructions that are not calls nor returns. -Must combine with umask 80H. .It Li BR_MISP_EXEC.RETURN_NEAR -.Pq Event 89H , Umask 08H -Qualify mispredicted indirect near branches that have a return mnemonic. -Must combine with umask 80H. +.Pq Event 89H , Umask 88H +Count mispredicted indirect near branches that have a return mnemonic. .It Li BR_MISP_EXEC.DIRECT_NEAR_CALL -.Pq Event 89H , Umask 10H -Qualify mispredicted unconditional near call branch instructions, excluding +.Pq Event 89H , Umask 90H +Count mispredicted unconditional near call branch instructions, excluding non call branch, executed. -Must combine with umask 80H. .It Li BR_MISP_EXEC.INDIRECT_NEAR_CALL -.Pq Event 89H , Umask 20H -Qualify mispredicted indirect near calls, including both register and memory +.Pq Event 89H , Umask A0H +Count mispredicted indirect near calls, including both register and memory indirect, executed. -Must combine with umask 80H. -.It Li BR_MISP_EXEC.NONTAKEN -.Pq Event 89H , Umask 40H -Qualify mispredicted non-taken near branches executed. -Applicable to umask 01H only. -.It Li BR_MISP_EXEC.TAKEN -.Pq Event 89H , Umask 80H -Qualify mispredicted taken near branches executed. Must combine with -01H,02H, 04H, 08H, 10H, 20H. .It Li BR_MISP_EXEC.ALL_BRANCHES .Pq Event 89H , Umask FFH -Counts all near executed branches (not necessarily retired). +Counts all mispredicted near executed branches (not necessarily retired). .It Li IDQ_UOPS_NOT_DELIVERED.CORE .Pq Event 9CH , Umask 01H Count number of non-delivered uops to RAT per thread. @@ -726,31 +706,24 @@ Specify threshold in MSR 0x3F6. .Pq Event CDH , Umask 02H Sample stores and collect precise store operation via PEBS record. PMC3 only. -.It Li MEM_UOP_RETIRED.LOADS -.Pq Event D0H , Umask 01H -Qualify retired memory uops that are loads. Combine with umask 10H, 20H, -40H, 80H. -Supports PEBS. -.It Li MEM_UOP_RETIRED.STORES -.Pq Event D0H , Umask 02H -Qualify retired memory uops that are stores. Combine with umask 10H, 20H, -40H, 80H. -.It Li MEM_UOP_RETIRED.STLB_MISS -.Pq Event D0H , Umask 10H -Qualify retired memory uops with STLB miss. Must combine with umask 01H, -02H, to produce counts. -.It Li MEM_UOP_RETIRED.LOCK -.Pq Event D0H , Umask 20H -Qualify retired memory uops with lock. Must combine with umask 01H, 02H, to -produce counts. -.It Li MEM_UOP_RETIRED.SPLIT -.Pq Event D0H , Umask 40H -Qualify retired memory uops with line split. Must combine with umask 01H, -02H, to produce counts. -.It Li MEM_UOP_RETIRED.ALL -.Pq Event D0H , Umask 80H -Qualify any retired memory uops. Must combine with umask 01H, 02H, to -produce counts. +.It Li MEM_UOPS_RETIRED.STLB_MISS_LOADS +.Pq Event D0H , Umask 11H +Count retired load uops that missed the STLB. +.It Li MEM_UOPS_RETIRED.STLB_MISS_STORES +.Pq Event D0H , Umask 12H +Count retired store uops that missed the STLB. +.It Li MEM_UOPS_RETIRED.SPLIT_LOADS +.Pq Event D0H , Umask 41H +Count retired load uops that were split across a cache line. +.It Li MEM_UOPS_RETIRED.SPLIT_STORES +.Pq Event D0H , Umask 42H +Count retired store uops that were split across a cache line. +.It Li MEM_UOPS_RETIRED.ALL_LOADS +.Pq Event D0H , Umask 81H +Count all retired load uops. +.It Li MEM_UOPS_RETIRED.ALL_STORES +.Pq Event D0H , Umask 82H +Count all retired store uops. .It Li MEM_LOAD_UOPS_RETIRED.L1_HIT .Pq Event D1H , Umask 01H Retired load uops with L1 cache hits as data sources. @@ -844,23 +817,23 @@ Dirty L2 cache lines evicted by the MLC prefetcher. .Xr pmc 3 , .Xr pmc.atom 3 , .Xr pmc.core 3 , +.Xr pmc.corei7 3 , +.Xr pmc.corei7uc 3 , .Xr pmc.iaf 3 , -.Xr pmc.ucf 3 , +.Xr pmc.ivybridgexeon 3 , .Xr pmc.k7 3 , .Xr pmc.k8 3 , .Xr pmc.p4 3 , .Xr pmc.p5 3 , .Xr pmc.p6 3 , -.Xr pmc.corei7 3 , -.Xr pmc.corei7uc 3 , -.Xr pmc.ivybridgexeon 3 , .Xr pmc.sandybridge 3 , .Xr pmc.sandybridgeuc 3 , .Xr pmc.sandybridgexeon 3 , -.Xr pmc.westmere 3 , -.Xr pmc.westmereuc 3 , .Xr pmc.soft 3 , .Xr pmc.tsc 3 , +.Xr pmc.ucf 3 , +.Xr pmc.westmere 3 , +.Xr pmc.westmereuc 3 , .Xr pmc_cpuinfo 3 , .Xr pmclog 3 , .Xr hwpmc 4 diff --git a/lib/libpmc/pmc.ivybridgexeon.3 b/lib/libpmc/pmc.ivybridgexeon.3 index 3a5b709..2ee5b78 100644 --- a/lib/libpmc/pmc.ivybridgexeon.3 +++ b/lib/libpmc/pmc.ivybridgexeon.3 @@ -449,80 +449,60 @@ Stalls caused by changing prefix length of the instruction. .It Li ILD_STALL.IQ_FULL .Pq Event 87H , Umask 04H Stall cycles due to IQ is full. -.It Li BR_INST_EXEC.COND -.Pq Event 88H , Umask 01H -Qualify conditional near branch instructions executed, but not necessarily -retired. -Must combine with umask 40H, 80H. +.It Li BR_INST_EXEC.NONTAKEN_COND +.Pq Event 88H , Umask 41H +Count conditional near branch instructions that were executed (but not +necessarily retired) and not taken. +.It Li BR_INST_EXEC.TAKEN_COND +.Pq Event 88H , Umask 81H +Count conditional near branch instructions that were executed (but not +necessarily retired) and taken. .It Li BR_INST_EXEC.DIRECT_JMP -.Pq Event 88H , Umask 02H -Qualify all unconditional near branch instructions excluding calls and +.Pq Event 88H , Umask 82H +Count all unconditional near branch instructions excluding calls and indirect branches. -Must combine with umask 80H. .It Li BR_INST_EXEC.INDIRECT_JMP_NON_CALL_RET -.Pq Event 88H , Umask 04H -Qualify executed indirect near branch instructions that are not calls nor +.Pq Event 88H , Umask 84H +Count executed indirect near branch instructions that are not calls nor returns. -Must combine with umask 80H. .It Li BR_INST_EXEC.RETURN_NEAR -.Pq Event 88H , Umask 08H -Qualify indirect near branches that have a return mnemonic. -Must combine with umask 80H. +.Pq Event 88H , Umask 88H +Count indirect near branches that have a return mnemonic. .It Li BR_INST_EXEC.DIRECT_NEAR_CALL -.Pq Event 88H , Umask 10H -Qualify unconditional near call branch instructions, excluding non call +.Pq Event 88H , Umask 90H +Count unconditional near call branch instructions, excluding non call branch, executed. -Must combine with umask 80H. .It Li BR_INST_EXEC.INDIRECT_NEAR_CALL -.Pq Event 88H , Umask 20H -Qualify indirect near calls, including both register and memory indirect, +.Pq Event 88H , Umask A0H +Count indirect near calls, including both register and memory indirect, executed. -Must combine with umask 80H. -.It Li BR_INST_EXEC.NONTAKEN -.Pq Event 88H , Umask 40H -Qualify non-taken near branches executed. -Applicable to umask 01H only. -.It Li BR_INST_EXEC.TAKEN -.Pq Event 88H , Umask 80H -Qualify taken near branches executed. Must combine with 01H,02H, 04H, 08H, -10H, 20H. .It Li BR_INST_EXEC.ALL_BRANCHES .Pq Event 88H , Umask FFH Counts all near executed branches (not necessarily retired). -.It Li BR_MISP_EXEC.COND -.Pq Event 89H , Umask 01H -Qualify conditional near branch instructions mispredicted. -Must combine with umask 40H, 80H. +.It Li BR_MISP_EXEC.NONTAKEN_COND +.Pq Event 89H , Umask 41H +Count conditional near branch instructions mispredicted as nontaken. +.It Li BR_MISP_EXEC.TAKEN_COND +.Pq Event 89H , Umask 81H +Count conditional near branch instructions mispredicted as taken. .It Li BR_MISP_EXEC.INDIRECT_JMP_NON_CALL_RET -.Pq Event 89H , Umask 04H -Qualify mispredicted indirect near branch instructions that are not calls +.Pq Event 89H , Umask 84H +Count mispredicted indirect near branch instructions that are not calls nor returns. -Must combine with umask 80H. .It Li BR_MISP_EXEC.RETURN_NEAR -.Pq Event 89H , Umask 08H -Qualify mispredicted indirect near branches that have a return mnemonic. -Must combine with umask 80H. +.Pq Event 89H , Umask 88H +Count mispredicted indirect near branches that have a return mnemonic. .It Li BR_MISP_EXEC.DIRECT_NEAR_CALL -.Pq Event 89H , Umask 10H -Qualify mispredicted unconditional near call branch instructions, excluding +.Pq Event 89H , Umask 90H +Count mispredicted unconditional near call branch instructions, excluding non call branch, executed. -Must combine with umask 80H. .It Li BR_MISP_EXEC.INDIRECT_NEAR_CALL -.Pq Event 89H , Umask 20H -Qualify mispredicted indirect near calls, including both register and memory +.Pq Event 89H , Umask A0H +Count mispredicted indirect near calls, including both register and memory indirect, executed. -Must combine with umask 80H. -.It Li BR_MISP_EXEC.NONTAKEN -.Pq Event 89H , Umask 40H -Qualify mispredicted non-taken near branches executed. -Applicable to umask 01H only. -.It Li BR_MISP_EXEC.TAKEN -.Pq Event 89H , Umask 80H -Qualify mispredicted taken near branches executed. Must combine with -01H,02H, 04H, 08H, 10H, 20H. .It Li BR_MISP_EXEC.ALL_BRANCHES .Pq Event 89H , Umask FFH -Counts all near executed branches (not necessarily retired). +Counts all mispredicted near executed branches (not necessarily retired). .It Li IDQ_UOPS_NOT_DELIVERED.CORE .Pq Event 9CH , Umask 01H Count number of non-delivered uops to RAT per thread. @@ -738,31 +718,24 @@ Specify threshold in MSR 0x3F6. .Pq Event CDH , Umask 02H Sample stores and collect precise store operation via PEBS record. PMC3 only. -.It Li MEM_UOP_RETIRED.LOADS -.Pq Event D0H , Umask 01H -Qualify retired memory uops that are loads. Combine with umask 10H, 20H, -40H, 80H. -Supports PEBS. -.It Li MEM_UOP_RETIRED.STORES -.Pq Event D0H , Umask 02H -Qualify retired memory uops that are stores. Combine with umask 10H, 20H, -40H, 80H. -.It Li MEM_UOP_RETIRED.STLB_MISS -.Pq Event D0H , Umask 10H -Qualify retired memory uops with STLB miss. Must combine with umask 01H, -02H, to produce counts. -.It Li MEM_UOP_RETIRED.LOCK -.Pq Event D0H , Umask 20H -Qualify retired memory uops with lock. Must combine with umask 01H, 02H, to -produce counts. -.It Li MEM_UOP_RETIRED.SPLIT -.Pq Event D0H , Umask 40H -Qualify retired memory uops with line split. Must combine with umask 01H, -02H, to produce counts. -.It Li MEM_UOP_RETIRED.ALL -.Pq Event D0H , Umask 80H -Qualify any retired memory uops. Must combine with umask 01H, 02H, to -produce counts. +.It Li MEM_UOPS_RETIRED.STLB_MISS_LOADS +.Pq Event D0H , Umask 11H +Count retired load uops that missed the STLB. +.It Li MEM_UOPS_RETIRED.STLB_MISS_STORES +.Pq Event D0H , Umask 12H +Count retired store uops that missed the STLB. +.It Li MEM_UOPS_RETIRED.SPLIT_LOADS +.Pq Event D0H , Umask 41H +Count retired load uops that were split across a cache line. +.It Li MEM_UOPS_RETIRED.SPLIT_STORES +.Pq Event D0H , Umask 42H +Count retired store uops that were split across a cache line. +.It Li MEM_UOPS_RETIRED.ALL_LOADS +.Pq Event D0H , Umask 81H +Count all retired load uops. +.It Li MEM_UOPS_RETIRED.ALL_STORES +.Pq Event D0H , Umask 82H +Count all retired store uops. .It Li MEM_LOAD_UOPS_RETIRED.L1_HIT .Pq Event D1H , Umask 01H Retired load uops with L1 cache hits as data sources. @@ -874,23 +847,23 @@ Dirty L2 cache lines filling the L2. .Xr pmc 3 , .Xr pmc.atom 3 , .Xr pmc.core 3 , +.Xr pmc.corei7 3 , +.Xr pmc.corei7uc 3 , .Xr pmc.iaf 3 , -.Xr pmc.ucf 3 , +.Xr pmc.ivybridge 3 , .Xr pmc.k7 3 , .Xr pmc.k8 3 , .Xr pmc.p4 3 , .Xr pmc.p5 3 , .Xr pmc.p6 3 , -.Xr pmc.corei7 3 , -.Xr pmc.corei7uc 3 , -.Xr pmc.ivybridge 3 , .Xr pmc.sandybridge 3 , .Xr pmc.sandybridgeuc 3 , .Xr pmc.sandybridgexeon 3 , -.Xr pmc.westmere 3 , -.Xr pmc.westmereuc 3 , .Xr pmc.soft 3 , .Xr pmc.tsc 3 , +.Xr pmc.ucf 3 , +.Xr pmc.westmere 3 , +.Xr pmc.westmereuc 3 , .Xr pmc_cpuinfo 3 , .Xr pmclog 3 , .Xr hwpmc 4 diff --git a/lib/libpmc/pmc.sandybridge.3 b/lib/libpmc/pmc.sandybridge.3 index 0b8f6b2..0e219ae 100644 --- a/lib/libpmc/pmc.sandybridge.3 +++ b/lib/libpmc/pmc.sandybridge.3 @@ -497,80 +497,60 @@ Stalls caused by changing prefix length of the instruction. .It Li ILD_STALL.IQ_FULL .Pq Event 87H, Umask 04H Stall cycles due to IQ is full. -.It Li BR_INST_EXEC.COND -.Pq Event 88H, Umask 01H -Qualify conditional near branch instructions executed, but not necessarily -retired. -Must combine with umask 40H, 80H +.It Li BR_INST_EXEC.NONTAKEN_COND +.Pq Event 88H , Umask 41H +Count conditional near branch instructions that were executed (but not +necessarily retired) and not taken. +.It Li BR_INST_EXEC.TAKEN_COND +.Pq Event 88H , Umask 81H +Count conditional near branch instructions that were executed (but not +necessarily retired) and taken. .It Li BR_INST_EXEC.DIRECT_JMP -.Pq Event 88H, Umask 02H -Qualify all unconditional near branch instructions excluding calls and indirect -branches. -Must combine with umask 80H +.Pq Event 88H , Umask 82H +Count all unconditional near branch instructions excluding calls and +indirect branches. .It Li BR_INST_EXEC.INDIRECT_JMP_NON_CALL_RET -.Pq Event 88H, Umask 04H -Qualify executed indirect near branch instructions that are not calls nor +.Pq Event 88H , Umask 84H +Count executed indirect near branch instructions that are not calls nor returns. -Must combine with umask 80H .It Li BR_INST_EXEC.RETURN_NEAR -.Pq Event 88H, Umask 08H -Qualify indirect near branches that have a return mnemonic. -Must combine with umask 80H +.Pq Event 88H , Umask 88H +Count indirect near branches that have a return mnemonic. .It Li BR_INST_EXEC.DIRECT_NEAR_CALL -.Pq Event 88H, Umask 10H -Qualify unconditional near call branch instructions, excluding non call branch, -executed. -Must combine with umask 80H +.Pq Event 88H , Umask 90H +Count unconditional near call branch instructions, excluding non call +branch, executed. .It Li BR_INST_EXEC.INDIRECT_NEAR_CALL -.Pq Event 88H, Umask 20H -Qualify indirect near calls, including both register and memory indirect, +.Pq Event 88H , Umask A0H +Count indirect near calls, including both register and memory indirect, executed. -Must combine with umask 80H -.It Li BR_INST_EXEC.NONTAKEN -.Pq Event 88H, Umask 40H -Qualify non-taken near branches executed. -Applicable to umask 01H only -.It Li BR_INST_EXEC.TAKEN -.Pq Event 88H, Umask 80H -Qualify taken near branches executed. -Must combine with 01H,02H, 04H, 08H, 10H, 20H -.It Li BR_INST_EXE.ALL_BRANCHES -.Pq Event 88H, Umask FFH +.It Li BR_INST_EXEC.ALL_BRANCHES +.Pq Event 88H , Umask FFH Counts all near executed branches (not necessarily retired). -.It Li BR_MISP_EXEC.COND -.Pq Event 89H, Umask 01H -Qualify conditional near branch instructions mispredicted. -Must combine with umask 40H, 80H +.It Li BR_MISP_EXEC.NONTAKEN_COND +.Pq Event 89H , Umask 41H +Count conditional near branch instructions mispredicted as nontaken. +.It Li BR_MISP_EXEC.TAKEN_COND +.Pq Event 89H , Umask 81H +Count conditional near branch instructions mispredicted as taken. .It Li BR_MISP_EXEC.INDIRECT_JMP_NON_CALL_RET -.Pq Event 89H, Umask 04H -Qualify mispredicted indirect near branch instructions that are not calls nor -returns. -Must combine with umask 80H +.Pq Event 89H , Umask 84H +Count mispredicted indirect near branch instructions that are not calls +nor returns. .It Li BR_MISP_EXEC.RETURN_NEAR -.Pq Event 89H, Umask 08H -Qualify mispredicted indirect near branches that have a return mnemonic. -Must combine with umask 80H +.Pq Event 89H , Umask 88H +Count mispredicted indirect near branches that have a return mnemonic. .It Li BR_MISP_EXEC.DIRECT_NEAR_CALL -.Pq Event 89H, Umask 10H -Qualify mispredicted unconditional near call branch instructions, excluding non -call branch, executed. -Must combine with umask 80H +.Pq Event 89H , Umask 90H +Count mispredicted unconditional near call branch instructions, excluding +non call branch, executed. .It Li BR_MISP_EXEC.INDIRECT_NEAR_CALL -.Pq Event 89H, Umask 20H -Qualify mispredicted indirect near calls, including both register and memory +.Pq Event 89H , Umask A0H +Count mispredicted indirect near calls, including both register and memory indirect, executed. -Must combine with umask 80H -.It Li BR_MISP_EXEC.NONTAKEN -.Pq Event 89H, Umask 40H -Qualify mispredicted non-taken near branches executed. -Applicable to umask 01H only -.It Li BR_MISP_EXEC.TAKEN -.Pq Event 89H, Umask 80H -Qualify mispredicted taken near branches executed. -Must combine with 01H,02H, 04H, 08H, 10H, 20H .It Li BR_MISP_EXEC.ALL_BRANCHES -.Pq Event 89H, Umask FFH -Counts all near executed branches (not necessarily retired). +.Pq Event 89H , Umask FFH +Counts all mispredicted near executed branches (not necessarily retired). .It Li IDQ_UOPS_NOT_DELIVERED.CORE .Pq Event 9CH, Umask 01H Count number of non-delivered uops to RAT per thread. diff --git a/lib/libpmc/pmc.sandybridgexeon.3 b/lib/libpmc/pmc.sandybridgexeon.3 index 9a80f78..b334c16 100644 --- a/lib/libpmc/pmc.sandybridgexeon.3 +++ b/lib/libpmc/pmc.sandybridgexeon.3 @@ -543,73 +543,60 @@ instruction. .It Li ILD_STALL.IQ_FULL .Pq Event 87H , Umask 04H Stall cycles due to IQ is full. -.It Li BR_INST_EXEC.COND -.Pq Event 88H , Umask 01H -Qualify conditional near branch instructions -executed, but not necessarily retired. +.It Li BR_INST_EXEC.NONTAKEN_COND +.Pq Event 88H , Umask 41H +Count conditional near branch instructions that were executed (but not +necessarily retired) and not taken. +.It Li BR_INST_EXEC.TAKEN_COND +.Pq Event 88H , Umask 81H +Count conditional near branch instructions that were executed (but not +necessarily retired) and taken. .It Li BR_INST_EXEC.DIRECT_JMP -.Pq Event 88H , Umask 02H -Qualify all unconditional near branch instructions -excluding calls and indirect branches. +.Pq Event 88H , Umask 82H +Count all unconditional near branch instructions excluding calls and +indirect branches. .It Li BR_INST_EXEC.INDIRECT_JMP_NON_CALL_RET -.Pq Event 88H , Umask 04H -Qualify executed indirect near branch instructions -that are not calls nor returns. +.Pq Event 88H , Umask 84H +Count executed indirect near branch instructions that are not calls nor +returns. .It Li BR_INST_EXEC.RETURN_NEAR -.Pq Event 88H , Umask 08H -Qualify indirect near branches that have a return -mnemonic. +.Pq Event 88H , Umask 88H +Count indirect near branches that have a return mnemonic. .It Li BR_INST_EXEC.DIRECT_NEAR_CALL -.Pq Event 88H , Umask 10H -Qualify unconditional near call branch instructions, -excluding non call branch, executed. +.Pq Event 88H , Umask 90H +Count unconditional near call branch instructions, excluding non call +branch, executed. .It Li BR_INST_EXEC.INDIRECT_NEAR_CALL -.Pq Event 88H , Umask 20H -Qualify indirect near calls, including both register -and memory indirect, executed. -.It Li BR_INST_EXEC.NONTAKEN -.Pq Event 88H , Umask 40H -Qualify non-taken near branches executed. -.It Li BR_INST_EXEC.TAKEN -.Pq Event 88H , Umask 80H -Qualify taken near branches executed. Must -combine with 01H,02H, 04H, 08H, 10H, 20H. -.It Li BR_INST_EXE.ALL_BRANCHES +.Pq Event 88H , Umask A0H +Count indirect near calls, including both register and memory indirect, +executed. +.It Li BR_INST_EXEC.ALL_BRANCHES .Pq Event 88H , Umask FFH -Counts all near executed branches (not necessarily -retired). -.It Li BR_MISP_EXEC.COND -.Pq Event 89H , Umask 01H -Qualify conditional near branch instructions -mispredicted. +Counts all near executed branches (not necessarily retired). +.It Li BR_MISP_EXEC.NONTAKEN_COND +.Pq Event 89H , Umask 41H +Count conditional near branch instructions mispredicted as nontaken. +.It Li BR_MISP_EXEC.TAKEN_COND +.Pq Event 89H , Umask 81H +Count conditional near branch instructions mispredicted as taken. .It Li BR_MISP_EXEC.INDIRECT_JMP_NON_CALL_RET -.Pq Event 89H , Umask 04H -Qualify mispredicted indirect near branch -instructions that are not calls nor returns. +.Pq Event 89H , Umask 84H +Count mispredicted indirect near branch instructions that are not calls +nor returns. .It Li BR_MISP_EXEC.RETURN_NEAR -.Pq Event 89H , Umask 08H -Qualify mispredicted indirect near branches that -have a return mnemonic. +.Pq Event 89H , Umask 88H +Count mispredicted indirect near branches that have a return mnemonic. .It Li BR_MISP_EXEC.DIRECT_NEAR_CALL -.Pq Event 89H , Umask 10H -Qualify mispredicted unconditional near call branch -instructions, excluding non call branch, executed. +.Pq Event 89H , Umask 90H +Count mispredicted unconditional near call branch instructions, excluding +non call branch, executed. .It Li BR_MISP_EXEC.INDIRECT_NEAR_CALL -.Pq Event 89H , Umask 20H -Qualify mispredicted indirect near calls, including -both register and memory indirect, executed. -.It Li BR_MISP_EXEC.NONTAKEN -.Pq Event 89H , Umask 40H -Qualify mispredicted non-taken near branches -executed,. -.It Li BR_MISP_EXEC.TAKEN -.Pq Event 89H , Umask 80H -Qualify mispredicted taken near branches executed. -Must combine with 01H,02H, 04H, 08H, 10H, 20H +.Pq Event 89H , Umask A0H +Count mispredicted indirect near calls, including both register and memory +indirect, executed. .It Li BR_MISP_EXEC.ALL_BRANCHES .Pq Event 89H , Umask FFH -Counts all near executed branches (not necessarily -retired). +Counts all mispredicted near executed branches (not necessarily retired). .It Li IDQ_UOPS_NOT_DELIVERED.CORE .Pq Event 9CH , Umask 01H Count number of non-delivered uops to RAT per @@ -987,23 +974,24 @@ Split locks in SQ. .Xr pmc 3 , .Xr pmc.atom 3 , .Xr pmc.core 3 , +.Xr pmc.corei7 3 , +.Xr pmc.corei7uc 3 , +.Xr pmc.haswelluc 3 , .Xr pmc.iaf 3 , -.Xr pmc.ucf 3 , +.Xr pmc.ivybridge 3 , +.Xr pmc.ivybridgexeon 3 , .Xr pmc.k7 3 , .Xr pmc.k8 3 , .Xr pmc.p4 3 , .Xr pmc.p5 3 , .Xr pmc.p6 3 , -.Xr pmc.corei7 3 , -.Xr pmc.corei7uc 3 , -.Xr pmc.ivybridge 3 , -.Xr pmc.ivybridgexeon 3 , .Xr pmc.sandybridge 3 , .Xr pmc.sandybridgeuc 3 , -.Xr pmc.westmere 3 , -.Xr pmc.westmereuc 3 , .Xr pmc.soft 3 , .Xr pmc.tsc 3 , +.Xr pmc.ucf 3 , +.Xr pmc.westmere 3 , +.Xr pmc.westmereuc 3 , .Xr pmc_cpuinfo 3 , .Xr pmclog 3 , .Xr hwpmc 4 diff --git a/lib/libpmc/pmc.soft.3 b/lib/libpmc/pmc.soft.3 index 5d331e2..ec53206 100644 --- a/lib/libpmc/pmc.soft.3 +++ b/lib/libpmc/pmc.soft.3 @@ -76,17 +76,17 @@ Write page fault. .Xr pmc 3 , .Xr pmc.atom 3 , .Xr pmc.core 3 , +.Xr pmc.corei7 3 , +.Xr pmc.corei7uc 3 , .Xr pmc.iaf 3 , -.Xr pmc.ucf 3 , .Xr pmc.k7 3 , .Xr pmc.k8 3 , .Xr pmc.p4 3 , .Xr pmc.p5 3 , .Xr pmc.p6 3 , -.Xr pmc.corei7 3 , -.Xr pmc.corei7uc 3 , -.Xr pmc.westmereuc 3 , .Xr pmc.tsc 3 , +.Xr pmc.ucf 3 , +.Xr pmc.westmereuc 3 , .Xr pmc_cpuinfo 3 , .Xr pmclog 3 , .Xr hwpmc 4 diff --git a/lib/libpmc/pmc.ucf.3 b/lib/libpmc/pmc.ucf.3 index 6b02786..bdbb200 100644 --- a/lib/libpmc/pmc.ucf.3 +++ b/lib/libpmc/pmc.ucf.3 @@ -86,18 +86,18 @@ offset C0H under device number 0 and Function 0. .Xr pmc.atom 3 , .Xr pmc.core 3 , .Xr pmc.core2 3 , +.Xr pmc.corei7 3 , +.Xr pmc.corei7uc 3 , .Xr pmc.iaf 3 , .Xr pmc.k7 3 , .Xr pmc.k8 3 , .Xr pmc.p4 3 , .Xr pmc.p5 3 , .Xr pmc.p6 3 , -.Xr pmc.corei7 3 , -.Xr pmc.corei7uc 3 , -.Xr pmc.westmere 3 , -.Xr pmc.westmereuc 3 , .Xr pmc.soft 3 , .Xr pmc.tsc 3 , +.Xr pmc.westmere 3 , +.Xr pmc.westmereuc 3 , .Xr pmc_cpuinfo 3 , .Xr pmclog 3 , .Xr hwpmc 4 diff --git a/lib/libpmc/pmc.westmere.3 b/lib/libpmc/pmc.westmere.3 index 6bfdf39..3668460 100644 --- a/lib/libpmc/pmc.westmere.3 +++ b/lib/libpmc/pmc.westmere.3 @@ -1371,18 +1371,18 @@ Counts number of SID integer 64 bit shift or move operations. .Xr pmc 3 , .Xr pmc.atom 3 , .Xr pmc.core 3 , +.Xr pmc.corei7 3 , +.Xr pmc.corei7uc 3 , .Xr pmc.iaf 3 , -.Xr pmc.ucf 3 , .Xr pmc.k7 3 , .Xr pmc.k8 3 , .Xr pmc.p4 3 , .Xr pmc.p5 3 , .Xr pmc.p6 3 , -.Xr pmc.corei7 3 , -.Xr pmc.corei7uc 3 , -.Xr pmc.westmereuc 3 , .Xr pmc.soft 3 , .Xr pmc.tsc 3 , +.Xr pmc.ucf 3 , +.Xr pmc.westmereuc 3 , .Xr pmc_cpuinfo 3 , .Xr pmclog 3 , .Xr hwpmc 4 diff --git a/lib/libpmc/pmc.westmereuc.3 b/lib/libpmc/pmc.westmereuc.3 index 2f260c2..23319cf 100644 --- a/lib/libpmc/pmc.westmereuc.3 +++ b/lib/libpmc/pmc.westmereuc.3 @@ -1056,18 +1056,18 @@ disabled. .Xr pmc 3 , .Xr pmc.atom 3 , .Xr pmc.core 3 , +.Xr pmc.corei7 3 , +.Xr pmc.corei7uc 3 , .Xr pmc.iaf 3 , -.Xr pmc.ucf 3 , .Xr pmc.k7 3 , .Xr pmc.k8 3 , .Xr pmc.p4 3 , .Xr pmc.p5 3 , .Xr pmc.p6 3 , -.Xr pmc.corei7 3 , -.Xr pmc.corei7uc 3 , -.Xr pmc.westmere 3 , .Xr pmc.soft 3 , .Xr pmc.tsc 3 , +.Xr pmc.ucf 3 , +.Xr pmc.westmere 3 , .Xr pmc_cpuinfo 3 , .Xr pmclog 3 , .Xr hwpmc 4 diff --git a/lib/libpmc/pmc.xscale.3 b/lib/libpmc/pmc.xscale.3 index 5a496b9..d5766e3 100644 --- a/lib/libpmc/pmc.xscale.3 +++ b/lib/libpmc/pmc.xscale.3 @@ -132,9 +132,9 @@ and the underlying hardware events used. .El .Sh SEE ALSO .Xr pmc 3 , +.Xr pmc.soft 3 , .Xr pmc_cpuinfo 3 , .Xr pmclog 3 , -.Xr pmc.soft 3 , .Xr hwpmc 4 .Sh HISTORY The diff --git a/lib/libpmc/pmc_capabilities.3 b/lib/libpmc/pmc_capabilities.3 index 8c5916e..c0df754 100644 --- a/lib/libpmc/pmc_capabilities.3 +++ b/lib/libpmc/pmc_capabilities.3 @@ -223,8 +223,8 @@ The argument to the function was invalid. .Xr pmc_allocate 3 , .Xr pmc_get_driver_stats 3 , .Xr pmc_name_of_capability 3 , -.Xr pmc_name_of_cputype 3 , .Xr pmc_name_of_class 3 , +.Xr pmc_name_of_cputype 3 , .Xr pmc_name_of_event 3 , .Xr pmc_name_of_mode 3 , .Xr hwpmc 4 |