diff options
author | jkoshy <jkoshy@FreeBSD.org> | 2005-04-19 04:01:25 +0000 |
---|---|---|
committer | jkoshy <jkoshy@FreeBSD.org> | 2005-04-19 04:01:25 +0000 |
commit | dc3444cd91762fa913e417f7f7a7a0484872f54e (patch) | |
tree | 3175e06cfbec643ca7426d756f2362160f9309d4 /lib | |
parent | 8c509864f2dd0cdcc6116de38bf9137583c4ab2f (diff) | |
download | FreeBSD-src-dc3444cd91762fa913e417f7f7a7a0484872f54e.zip FreeBSD-src-dc3444cd91762fa913e417f7f7a7a0484872f54e.tar.gz |
Bring a working snapshot of hwpmc(4), its associated libraries, userland utilities
and documentation into -CURRENT.
Bump FreeBSD_version.
Reviewed by: alc, jhb (kernel changes)
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Makefile | 4 | ||||
-rw-r--r-- | lib/libpmc/Makefile | 43 | ||||
-rw-r--r-- | lib/libpmc/libpmc.c | 2136 | ||||
-rw-r--r-- | lib/libpmc/pmc.3 | 3090 | ||||
-rw-r--r-- | lib/libpmc/pmc.h | 79 |
5 files changed, 5351 insertions, 1 deletions
diff --git a/lib/Makefile b/lib/Makefile index 76f4f70..2720004 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -31,7 +31,7 @@ SUBDIR= ${_csu} libcom_err libcrypt libkvm msun libmd libncurses \ ${_libio} libipsec \ libipx libkiconv libmagic libmenu ${_libmilter} ${_libmp} \ ${_libncp} ${_libngatm} libopie libpam libpanel libpcap \ - ${_libpthread} ${_libsdp} ${_libsm} ${_libsmb} ${_libsmdb} \ + ${_libpmc} ${_libpthread} ${_libsdp} ${_libsm} ${_libsmb} ${_libsmdb} \ ${_libsmutil} libstand libtelnet ${_libthr} ${_libthread_db} libufs \ libugidfw ${_libusbhid} ${_libvgl} libwrap liby libz ${_bind} @@ -59,6 +59,7 @@ _libsdp= libsdp .if ${MACHINE_ARCH} == "i386" _libncp= libncp +_libpmc= libpmc _libsmb= libsmb _libvgl= libvgl .endif @@ -89,6 +90,7 @@ _libmp= libmp .if ${MACHINE_ARCH} == "amd64" _libncp= libncp +_libpmc= libpmc _libsmb= libsmb .endif diff --git a/lib/libpmc/Makefile b/lib/libpmc/Makefile new file mode 100644 index 0000000..c6857da --- /dev/null +++ b/lib/libpmc/Makefile @@ -0,0 +1,43 @@ +# $FreeBSD$ + +LIB= pmc + +SRCS= libpmc.c +INCS= pmc.h + +CFLAGS+= -I${.CURDIR} -I${.CURDIR}/../../sys + +WARNS?= 6 + +MAN= pmc.3 + +MLINKS+= \ + pmc.3 pmc_allocate.3 \ + pmc.3 pmc_attach.3 \ + pmc.3 pmc_configure_logfile.3 \ + pmc.3 pmc_cpuinfo.3 \ + pmc.3 pmc_detach.3 \ + pmc.3 pmc_disable.3 \ + pmc.3 pmc_enable.3 \ + pmc.3 pmc_event_names_of_class.3 \ + pmc.3 pmc_get_driver_stats.3 \ + pmc.3 pmc_init.3 \ + pmc.3 pmc_name_of_capability.3 \ + pmc.3 pmc_name_of_class.3 \ + pmc.3 pmc_name_of_cputype.3 \ + pmc.3 pmc_name_of_event.3 \ + pmc.3 pmc_name_of_mode.3 \ + pmc.3 pmc_name_of_state.3 \ + pmc.3 pmc_ncpu.3 \ + pmc.3 pmc_npmc.3 \ + pmc.3 pmc_pmcinfo.3 \ + pmc.3 pmc_read.3 \ + pmc.3 pmc_release.3 \ + pmc.3 pmc_rw.3 \ + pmc.3 pmc_set.3 \ + pmc.3 pmc_start.3 \ + pmc.3 pmc_stop.3 \ + pmc.3 pmc_write.3 \ + pmc.3 pmc_x86_get_msr.3 + +.include <bsd.lib.mk> diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c new file mode 100644 index 0000000..925e3f9 --- /dev/null +++ b/lib/libpmc/libpmc.c @@ -0,0 +1,2136 @@ +/*- + * Copyright (c) 2003,2004 Joseph Koshy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/module.h> +#include <sys/pmc.h> +#include <sys/syscall.h> + +#include <machine/pmc_mdep.h> + +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <pmc.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <strings.h> +#include <unistd.h> + +/* Function prototypes */ +#if __i386__ +static int k7_allocate_pmc(enum pmc_event _pe, char *_ctrspec, + struct pmc_op_pmcallocate *_pmc_config); +static int p6_allocate_pmc(enum pmc_event _pe, char *_ctrspec, + struct pmc_op_pmcallocate *_pmc_config); +static int p4_allocate_pmc(enum pmc_event _pe, char *_ctrspec, + struct pmc_op_pmcallocate *_pmc_config); +static int p5_allocate_pmc(enum pmc_event _pe, char *_ctrspec, + struct pmc_op_pmcallocate *_pmc_config); +#elif __amd64__ +static int k8_allocate_pmc(enum pmc_event _pe, char *_ctrspec, + struct pmc_op_pmcallocate *_pmc_config); +#endif + +#define PMC_CALL(cmd, params) \ + syscall(pmc_syscall, PMC_OP_##cmd, (params)) + +/* + * Event aliases provide a way for the user to ask for generic events + * like "cache-misses", or "instructions-retired". These aliases are + * mapped to the appropriate canonical event descriptions using a + * lookup table. + */ + +struct pmc_event_alias { + const char *pm_alias; + const char *pm_spec; +}; + +static const struct pmc_event_alias *pmc_mdep_event_aliases; + +/* + * The pmc_event_descr table maps symbolic names known to the user + * to integer codes used by the PMC KLD. + */ + +struct pmc_event_descr { + const char *pm_ev_name; + enum pmc_event pm_ev_code; + enum pmc_class pm_ev_class; +}; + +static const struct pmc_event_descr +pmc_event_table[] = +{ +#undef __PMC_EV +#define __PMC_EV(C,N,EV) { #EV, PMC_EV_ ## C ## _ ## N, PMC_CLASS_ ## C }, + __PMC_EVENTS() +}; + +/* + * Mapping tables, mapping enumeration values to human readable + * strings. + */ + +static const char * pmc_capability_names[] = { +#undef __PMC_CAP +#define __PMC_CAP(N,V,D) #N , + __PMC_CAPS() +}; + +static const char * pmc_class_names[] = { +#undef __PMC_CLASS +#define __PMC_CLASS(C) #C , + __PMC_CLASSES() +}; + +static const char * pmc_cputype_names[] = { +#undef __PMC_CPU +#define __PMC_CPU(S, D) #S , + __PMC_CPUS() +}; + +static const char * pmc_disposition_names[] = { +#undef __PMC_DISP +#define __PMC_DISP(D) #D , + __PMC_DISPOSITIONS() +}; + +static const char * pmc_mode_names[] = { +#undef __PMC_MODE +#define __PMC_MODE(M,N) #M , + __PMC_MODES() +}; + +static const char * pmc_state_names[] = { +#undef __PMC_STATE +#define __PMC_STATE(S) #S , + __PMC_STATES() +}; + +static int pmc_syscall = -1; /* filled in by pmc_init() */ + +struct pmc_op_getcpuinfo cpu_info; /* filled in by pmc_init() */ + +/* Architecture dependent event parsing */ +static int (*pmc_mdep_allocate_pmc)(enum pmc_event _pe, char *_ctrspec, + struct pmc_op_pmcallocate *_pmc_config); + +/* Event masks for events */ +struct pmc_masks { + const char *pm_name; + const uint32_t pm_value; +}; +#define PMCMASK(N,V) { .pm_name = #N, .pm_value = (V) } +#define NULLMASK PMCMASK(NULL,0) + +static int +pmc_parse_mask(const struct pmc_masks *pmask, char *p, uint32_t *evmask) +{ + const struct pmc_masks *pm; + char *q, *r; + int c; + + if (pmask == NULL) /* no mask keywords */ + return -1; + q = strchr(p, '='); /* skip '=' */ + if (*++q == '\0') /* no more data */ + return -1; + c = 0; /* count of mask keywords seen */ + while ((r = strsep(&q, "+")) != NULL) { + for (pm = pmask; pm->pm_name && strcmp(r, pm->pm_name); pm++) + ; + if (pm->pm_name == NULL) /* not found */ + return -1; + *evmask |= pm->pm_value; + c++; + } + return c; +} + +#define KWMATCH(p,kw) (strcasecmp((p), (kw)) == 0) +#define KWPREFIXMATCH(p,kw) (strncasecmp((p), (kw), sizeof((kw)) - 1) == 0) +#define EV_ALIAS(N,S) { .pm_alias = N, .pm_spec = S } + +#if __i386__ + +/* + * AMD K7 (Athlon) CPUs. + */ + +static struct pmc_event_alias k7_aliases[] = { +EV_ALIAS("branches", "k7-retired-branches"), +EV_ALIAS("branch-mispredicts", "k7-retired-branches-mispredicted"), +EV_ALIAS("cycles", "tsc"), +EV_ALIAS("dc-misses", "k7-dc-misses,mask=moesi"), +EV_ALIAS("ic-misses", "k7-ic-misses"), +EV_ALIAS("instructions", "k7-retired-instructions"), +EV_ALIAS("interrupts", "k7-hardware-interrupts"), +EV_ALIAS(NULL, NULL) +}; + +#define K7_KW_COUNT "count" +#define K7_KW_EDGE "edge" +#define K7_KW_INV "inv" +#define K7_KW_OS "os" +#define K7_KW_UNITMASK "unitmask" +#define K7_KW_USR "usr" + +static int +k7_allocate_pmc(enum pmc_event pe, char *ctrspec, + struct pmc_op_pmcallocate *pmc_config) +{ + char *e, *p, *q; + int c, has_unitmask; + uint32_t count, unitmask; + + pmc_config->pm_amd_config = 0; + pmc_config->pm_caps |= PMC_CAP_READ; + + if (pe == PMC_EV_TSC_TSC) { + /* TSC events must be unqualified. */ + if (ctrspec && *ctrspec != '\0') + return -1; + return 0; + } + + if (pe == PMC_EV_K7_DC_REFILLS_FROM_L2 || + pe == PMC_EV_K7_DC_REFILLS_FROM_SYSTEM || + pe == PMC_EV_K7_DC_WRITEBACKS) { + has_unitmask = 1; + unitmask = K7_PMC_UNITMASK_MOESI; + } else + unitmask = has_unitmask = 0; + + pmc_config->pm_caps |= PMC_CAP_WRITE; + + while ((p = strsep(&ctrspec, ",")) != NULL) { + if (KWPREFIXMATCH(p, K7_KW_COUNT "=")) { + q = strchr(p, '='); + if (*++q == '\0') /* skip '=' */ + return -1; + + count = strtol(q, &e, 0); + if (e == q || *e != '\0') + return -1; + + pmc_config->pm_caps |= PMC_CAP_THRESHOLD; + pmc_config->pm_amd_config |= K7_PMC_TO_COUNTER(count); + + } else if (KWMATCH(p, K7_KW_EDGE)) { + pmc_config->pm_caps |= PMC_CAP_EDGE; + } else if (KWMATCH(p, K7_KW_INV)) { + pmc_config->pm_caps |= PMC_CAP_INVERT; + } else if (KWMATCH(p, K7_KW_OS)) { + pmc_config->pm_caps |= PMC_CAP_SYSTEM; + } else if (KWPREFIXMATCH(p, K7_KW_UNITMASK "=")) { + if (has_unitmask == 0) + return -1; + unitmask = 0; + q = strchr(p, '='); + if (*++q == '\0') /* skip '=' */ + return -1; + + while ((c = tolower(*q++)) != 0) + if (c == 'm') + unitmask |= K7_PMC_UNITMASK_M; + else if (c == 'o') + unitmask |= K7_PMC_UNITMASK_O; + else if (c == 'e') + unitmask |= K7_PMC_UNITMASK_E; + else if (c == 's') + unitmask |= K7_PMC_UNITMASK_S; + else if (c == 'i') + unitmask |= K7_PMC_UNITMASK_I; + else if (c == '+') + continue; + else + return -1; + + if (unitmask == 0) + return -1; + + } else if (KWMATCH(p, K7_KW_USR)) { + pmc_config->pm_caps |= PMC_CAP_USER; + } else + return -1; + } + + if (has_unitmask) { + pmc_config->pm_caps |= PMC_CAP_QUALIFIER; + pmc_config->pm_amd_config |= + K7_PMC_TO_UNITMASK(unitmask); + } + + return 0; + +} + +/* + * Intel P4 PMCs + */ + +static struct pmc_event_alias p4_aliases[] = { + EV_ALIAS("cycles", "tsc"), + EV_ALIAS(NULL, NULL) +}; + +#define P4_KW_ACTIVE "active" +#define P4_KW_ACTIVE_ANY "any" +#define P4_KW_ACTIVE_BOTH "both" +#define P4_KW_ACTIVE_NONE "none" +#define P4_KW_ACTIVE_SINGLE "single" +#define P4_KW_BUSREQTYPE "busreqtype" +#define P4_KW_CASCADE "cascade" +#define P4_KW_EDGE "edge" +#define P4_KW_INV "complement" +#define P4_KW_OS "os" +#define P4_KW_MASK "mask" +#define P4_KW_PRECISE "precise" +#define P4_KW_TAG "tag" +#define P4_KW_THRESHOLD "threshold" +#define P4_KW_USR "usr" + +#define __P4MASK(N,V) PMCMASK(N, (1 << (V))) + +static const struct pmc_masks p4_mask_tcdm[] = { /* tc deliver mode */ + __P4MASK(dd, 0), + __P4MASK(db, 1), + __P4MASK(di, 2), + __P4MASK(bd, 3), + __P4MASK(bb, 4), + __P4MASK(bi, 5), + __P4MASK(id, 6), + __P4MASK(ib, 7), + NULLMASK +}; + +static const struct pmc_masks p4_mask_bfr[] = { /* bpu fetch request */ + __P4MASK(tcmiss, 0), + NULLMASK, +}; + +static const struct pmc_masks p4_mask_ir[] = { /* itlb reference */ + __P4MASK(hit, 0), + __P4MASK(miss, 1), + __P4MASK(hit-uc, 2), + NULLMASK +}; + +static const struct pmc_masks p4_mask_memcan[] = { /* memory cancel */ + __P4MASK(st-rb-full, 2), + __P4MASK(64k-conf, 3), + NULLMASK +}; + +static const struct pmc_masks p4_mask_memcomp[] = { /* memory complete */ + __P4MASK(lsc, 0), + __P4MASK(ssc, 1), + NULLMASK +}; + +static const struct pmc_masks p4_mask_lpr[] = { /* load port replay */ + __P4MASK(split-ld, 1), + NULLMASK +}; + +static const struct pmc_masks p4_mask_spr[] = { /* store port replay */ + __P4MASK(split-st, 1), + NULLMASK +}; + +static const struct pmc_masks p4_mask_mlr[] = { /* mob load replay */ + __P4MASK(no-sta, 1), + __P4MASK(no-std, 3), + __P4MASK(partial-data, 4), + __P4MASK(unalgn-addr, 5), + NULLMASK +}; + +static const struct pmc_masks p4_mask_pwt[] = { /* page walk type */ + __P4MASK(dtmiss, 0), + __P4MASK(itmiss, 1), + NULLMASK +}; + +static const struct pmc_masks p4_mask_bcr[] = { /* bsq cache reference */ + __P4MASK(rd-2ndl-hits, 0), + __P4MASK(rd-2ndl-hite, 1), + __P4MASK(rd-2ndl-hitm, 2), + __P4MASK(rd-3rdl-hits, 3), + __P4MASK(rd-3rdl-hite, 4), + __P4MASK(rd-3rdl-hitm, 5), + __P4MASK(rd-2ndl-miss, 8), + __P4MASK(rd-3rdl-miss, 9), + __P4MASK(wr-2ndl-miss, 10), + NULLMASK +}; + +static const struct pmc_masks p4_mask_ia[] = { /* ioq allocation */ + __P4MASK(all-read, 5), + __P4MASK(all-write, 6), + __P4MASK(mem-uc, 7), + __P4MASK(mem-wc, 8), + __P4MASK(mem-wt, 9), + __P4MASK(mem-wp, 10), + __P4MASK(mem-wb, 11), + __P4MASK(own, 13), + __P4MASK(other, 14), + __P4MASK(prefetch, 15), + NULLMASK +}; + +static const struct pmc_masks p4_mask_iae[] = { /* ioq active entries */ + __P4MASK(all-read, 5), + __P4MASK(all-write, 6), + __P4MASK(mem-uc, 7), + __P4MASK(mem-wc, 8), + __P4MASK(mem-wt, 9), + __P4MASK(mem-wp, 10), + __P4MASK(mem-wb, 11), + __P4MASK(own, 13), + __P4MASK(other, 14), + __P4MASK(prefetch, 15), + NULLMASK +}; + +static const struct pmc_masks p4_mask_fda[] = { /* fsb data activity */ + __P4MASK(drdy-drv, 0), + __P4MASK(drdy-own, 1), + __P4MASK(drdy-other, 2), + __P4MASK(dbsy-drv, 3), + __P4MASK(dbsy-own, 4), + __P4MASK(dbsy-other, 5), + NULLMASK +}; + +static const struct pmc_masks p4_mask_ba[] = { /* bsq allocation */ + __P4MASK(req-type0, 0), + __P4MASK(req-type1, 1), + __P4MASK(req-len0, 2), + __P4MASK(req-len1, 3), + __P4MASK(req-io-type, 5), + __P4MASK(req-lock-type, 6), + __P4MASK(req-cache-type, 7), + __P4MASK(req-split-type, 8), + __P4MASK(req-dem-type, 9), + __P4MASK(req-ord-type, 10), + __P4MASK(mem-type0, 11), + __P4MASK(mem-type1, 12), + __P4MASK(mem-type2, 13), + NULLMASK +}; + +static const struct pmc_masks p4_mask_sia[] = { /* sse input assist */ + __P4MASK(all, 15), + NULLMASK +}; + +static const struct pmc_masks p4_mask_psu[] = { /* packed sp uop */ + __P4MASK(all, 15), + NULLMASK +}; + +static const struct pmc_masks p4_mask_pdu[] = { /* packed dp uop */ + __P4MASK(all, 15), + NULLMASK +}; + +static const struct pmc_masks p4_mask_ssu[] = { /* scalar sp uop */ + __P4MASK(all, 15), + NULLMASK +}; + +static const struct pmc_masks p4_mask_sdu[] = { /* scalar dp uop */ + __P4MASK(all, 15), + NULLMASK +}; + +static const struct pmc_masks p4_mask_64bmu[] = { /* 64 bit mmx uop */ + __P4MASK(all, 15), + NULLMASK +}; + +static const struct pmc_masks p4_mask_128bmu[] = { /* 128 bit mmx uop */ + __P4MASK(all, 15), + NULLMASK +}; + +static const struct pmc_masks p4_mask_xfu[] = { /* X87 fp uop */ + __P4MASK(all, 15), + NULLMASK +}; + +static const struct pmc_masks p4_mask_xsmu[] = { /* x87 simd moves uop */ + __P4MASK(allp0, 3), + __P4MASK(allp2, 4), + NULLMASK +}; + +static const struct pmc_masks p4_mask_gpe[] = { /* global power events */ + __P4MASK(running, 0), + NULLMASK +}; + +static const struct pmc_masks p4_mask_tmx[] = { /* TC ms xfer */ + __P4MASK(cisc, 0), + NULLMASK +}; + +static const struct pmc_masks p4_mask_uqw[] = { /* uop queue writes */ + __P4MASK(from-tc-build, 0), + __P4MASK(from-tc-deliver, 1), + __P4MASK(from-rom, 2), + NULLMASK +}; + +static const struct pmc_masks p4_mask_rmbt[] = { /* retired mispred branch type */ + __P4MASK(conditional, 1), + __P4MASK(call, 2), + __P4MASK(return, 3), + __P4MASK(indirect, 4), + NULLMASK +}; + +static const struct pmc_masks p4_mask_rbt[] = { /* retired branch type */ + __P4MASK(conditional, 1), + __P4MASK(call, 2), + __P4MASK(retired, 3), + __P4MASK(indirect, 4), + NULLMASK +}; + +static const struct pmc_masks p4_mask_rs[] = { /* resource stall */ + __P4MASK(sbfull, 5), + NULLMASK +}; + +static const struct pmc_masks p4_mask_wb[] = { /* WC buffer */ + __P4MASK(wcb-evicts, 0), + __P4MASK(wcb-full-evict, 1), + NULLMASK +}; + +static const struct pmc_masks p4_mask_fee[] = { /* front end event */ + __P4MASK(nbogus, 0), + __P4MASK(bogus, 1), + NULLMASK +}; + +static const struct pmc_masks p4_mask_ee[] = { /* execution event */ + __P4MASK(nbogus0, 0), + __P4MASK(nbogus1, 1), + __P4MASK(nbogus2, 2), + __P4MASK(nbogus3, 3), + __P4MASK(bogus0, 4), + __P4MASK(bogus1, 5), + __P4MASK(bogus2, 6), + __P4MASK(bogus3, 7), + NULLMASK +}; + +static const struct pmc_masks p4_mask_re[] = { /* replay event */ + __P4MASK(nbogus, 0), + __P4MASK(bogus, 1), + NULLMASK +}; + +static const struct pmc_masks p4_mask_insret[] = { /* instr retired */ + __P4MASK(nbogusntag, 0), + __P4MASK(nbogustag, 1), + __P4MASK(bogusntag, 2), + __P4MASK(bogustag, 3), + NULLMASK +}; + +static const struct pmc_masks p4_mask_ur[] = { /* uops retired */ + __P4MASK(nbogus, 0), + __P4MASK(bogus, 1), + NULLMASK +}; + +static const struct pmc_masks p4_mask_ut[] = { /* uop type */ + __P4MASK(tagloads, 1), + __P4MASK(tagstores, 2), + NULLMASK +}; + +static const struct pmc_masks p4_mask_br[] = { /* branch retired */ + __P4MASK(mmnp, 0), + __P4MASK(mmnm, 1), + __P4MASK(mmtp, 2), + __P4MASK(mmtm, 3), + NULLMASK +}; + +static const struct pmc_masks p4_mask_mbr[] = { /* mispred branch retired */ + __P4MASK(nbogus, 0), + NULLMASK +}; + +static const struct pmc_masks p4_mask_xa[] = { /* x87 assist */ + __P4MASK(fpsu, 0), + __P4MASK(fpso, 1), + __P4MASK(poao, 2), + __P4MASK(poau, 3), + __P4MASK(prea, 4), + NULLMASK +}; + +static const struct pmc_masks p4_mask_machclr[] = { /* machine clear */ + __P4MASK(clear, 0), + __P4MASK(moclear, 2), + __P4MASK(smclear, 3), + NULLMASK +}; + +/* P4 event parser */ +static int +p4_allocate_pmc(enum pmc_event pe, char *ctrspec, + struct pmc_op_pmcallocate *pmc_config) +{ + + char *e, *p, *q; + int count, has_tag, has_busreqtype, n; + uint32_t evmask, cccractivemask; + const struct pmc_masks *pm, *pmask; + + pmc_config->pm_caps |= PMC_CAP_READ; + pmc_config->pm_p4_cccrconfig = pmc_config->pm_p4_escrconfig = 0; + + if (pe == PMC_EV_TSC_TSC) { + /* TSC must not be further qualified */ + if (ctrspec && *ctrspec != '\0') + return -1; + return 0; + } + + pmask = NULL; + evmask = 0; + cccractivemask = 0x3; + has_tag = has_busreqtype = 0; + pmc_config->pm_caps |= PMC_CAP_WRITE; + +#define __P4SETMASK(M) do { \ + pmask = p4_mask_##M; \ +} while (0) + + switch (pe) { + case PMC_EV_P4_TC_DELIVER_MODE: + __P4SETMASK(tcdm); + break; + case PMC_EV_P4_BPU_FETCH_REQUEST: + __P4SETMASK(bfr); + break; + case PMC_EV_P4_ITLB_REFERENCE: + __P4SETMASK(ir); + break; + case PMC_EV_P4_MEMORY_CANCEL: + __P4SETMASK(memcan); + break; + case PMC_EV_P4_MEMORY_COMPLETE: + __P4SETMASK(memcomp); + break; + case PMC_EV_P4_LOAD_PORT_REPLAY: + __P4SETMASK(lpr); + break; + case PMC_EV_P4_STORE_PORT_REPLAY: + __P4SETMASK(spr); + break; + case PMC_EV_P4_MOB_LOAD_REPLAY: + __P4SETMASK(mlr); + break; + case PMC_EV_P4_PAGE_WALK_TYPE: + __P4SETMASK(pwt); + break; + case PMC_EV_P4_BSQ_CACHE_REFERENCE: + __P4SETMASK(bcr); + break; + case PMC_EV_P4_IOQ_ALLOCATION: + __P4SETMASK(ia); + has_busreqtype = 1; + break; + case PMC_EV_P4_IOQ_ACTIVE_ENTRIES: + __P4SETMASK(iae); + has_busreqtype = 1; + break; + case PMC_EV_P4_FSB_DATA_ACTIVITY: + __P4SETMASK(fda); + break; + case PMC_EV_P4_BSQ_ALLOCATION: + __P4SETMASK(ba); + break; + case PMC_EV_P4_SSE_INPUT_ASSIST: + __P4SETMASK(sia); + break; + case PMC_EV_P4_PACKED_SP_UOP: + __P4SETMASK(psu); + break; + case PMC_EV_P4_PACKED_DP_UOP: + __P4SETMASK(pdu); + break; + case PMC_EV_P4_SCALAR_SP_UOP: + __P4SETMASK(ssu); + break; + case PMC_EV_P4_SCALAR_DP_UOP: + __P4SETMASK(sdu); + break; + case PMC_EV_P4_64BIT_MMX_UOP: + __P4SETMASK(64bmu); + break; + case PMC_EV_P4_128BIT_MMX_UOP: + __P4SETMASK(128bmu); + break; + case PMC_EV_P4_X87_FP_UOP: + __P4SETMASK(xfu); + break; + case PMC_EV_P4_X87_SIMD_MOVES_UOP: + __P4SETMASK(xsmu); + break; + case PMC_EV_P4_GLOBAL_POWER_EVENTS: + __P4SETMASK(gpe); + break; + case PMC_EV_P4_TC_MS_XFER: + __P4SETMASK(tmx); + break; + case PMC_EV_P4_UOP_QUEUE_WRITES: + __P4SETMASK(uqw); + break; + case PMC_EV_P4_RETIRED_MISPRED_BRANCH_TYPE: + __P4SETMASK(rmbt); + break; + case PMC_EV_P4_RETIRED_BRANCH_TYPE: + __P4SETMASK(rbt); + break; + case PMC_EV_P4_RESOURCE_STALL: + __P4SETMASK(rs); + break; + case PMC_EV_P4_WC_BUFFER: + __P4SETMASK(wb); + break; + case PMC_EV_P4_BSQ_ACTIVE_ENTRIES: + case PMC_EV_P4_B2B_CYCLES: + case PMC_EV_P4_BNR: + case PMC_EV_P4_SNOOP: + case PMC_EV_P4_RESPONSE: + break; + case PMC_EV_P4_FRONT_END_EVENT: + __P4SETMASK(fee); + break; + case PMC_EV_P4_EXECUTION_EVENT: + __P4SETMASK(ee); + break; + case PMC_EV_P4_REPLAY_EVENT: + __P4SETMASK(re); + break; + case PMC_EV_P4_INSTR_RETIRED: + __P4SETMASK(insret); + break; + case PMC_EV_P4_UOPS_RETIRED: + __P4SETMASK(ur); + break; + case PMC_EV_P4_UOP_TYPE: + __P4SETMASK(ut); + break; + case PMC_EV_P4_BRANCH_RETIRED: + __P4SETMASK(br); + break; + case PMC_EV_P4_MISPRED_BRANCH_RETIRED: + __P4SETMASK(mbr); + break; + case PMC_EV_P4_X87_ASSIST: + __P4SETMASK(xa); + break; + case PMC_EV_P4_MACHINE_CLEAR: + __P4SETMASK(machclr); + break; + default: + return -1; + } + + /* process additional flags */ + while ((p = strsep(&ctrspec, ",")) != NULL) { + if (KWPREFIXMATCH(p, P4_KW_ACTIVE)) { + q = strchr(p, '='); + if (*++q == '\0') /* skip '=' */ + return -1; + + if (strcmp(q, P4_KW_ACTIVE_NONE) == 0) + cccractivemask = 0x0; + else if (strcmp(q, P4_KW_ACTIVE_SINGLE) == 0) + cccractivemask = 0x1; + else if (strcmp(q, P4_KW_ACTIVE_BOTH) == 0) + cccractivemask = 0x2; + else if (strcmp(q, P4_KW_ACTIVE_ANY) == 0) + cccractivemask = 0x3; + else + return -1; + + } else if (KWPREFIXMATCH(p, P4_KW_BUSREQTYPE)) { + if (has_busreqtype == 0) + return -1; + + q = strchr(p, '='); + if (*++q == '\0') /* skip '=' */ + return -1; + + count = strtol(q, &e, 0); + if (e == q || *e != '\0') + return -1; + evmask = (evmask & ~0x1F) | (count & 0x1F); + } else if (KWMATCH(p, P4_KW_CASCADE)) + pmc_config->pm_caps |= PMC_CAP_CASCADE; + else if (KWMATCH(p, P4_KW_EDGE)) + pmc_config->pm_caps |= PMC_CAP_EDGE; + else if (KWMATCH(p, P4_KW_INV)) + pmc_config->pm_caps |= PMC_CAP_INVERT; + else if (KWPREFIXMATCH(p, P4_KW_MASK "=")) { + if ((n = pmc_parse_mask(pmask, p, &evmask)) < 0) + return -1; + pmc_config->pm_caps |= PMC_CAP_QUALIFIER; + } else if (KWMATCH(p, P4_KW_OS)) + pmc_config->pm_caps |= PMC_CAP_SYSTEM; + else if (KWMATCH(p, P4_KW_PRECISE)) + pmc_config->pm_caps |= PMC_CAP_PRECISE; + else if (KWPREFIXMATCH(p, P4_KW_TAG "=")) { + if (has_tag == 0) + return -1; + + q = strchr(p, '='); + if (*++q == '\0') /* skip '=' */ + return -1; + + count = strtol(q, &e, 0); + if (e == q || *e != '\0') + return -1; + + pmc_config->pm_caps |= PMC_CAP_TAGGING; + pmc_config->pm_p4_escrconfig |= + P4_ESCR_TO_TAG_VALUE(count); + } else if (KWPREFIXMATCH(p, P4_KW_THRESHOLD "=")) { + q = strchr(p, '='); + if (*++q == '\0') /* skip '=' */ + return -1; + + count = strtol(q, &e, 0); + if (e == q || *e != '\0') + return -1; + + pmc_config->pm_caps |= PMC_CAP_THRESHOLD; + pmc_config->pm_p4_cccrconfig &= ~P4_CCCR_THRESHOLD_MASK; + pmc_config->pm_p4_cccrconfig |= P4_CCCR_TO_THRESHOLD(count); + } else if (KWMATCH(p, P4_KW_USR)) + pmc_config->pm_caps |= PMC_CAP_USER; + else + return -1; + } + + /* other post processing */ + if (pe == PMC_EV_P4_IOQ_ALLOCATION || + pe == PMC_EV_P4_FSB_DATA_ACTIVITY || + pe == PMC_EV_P4_BSQ_ALLOCATION) + pmc_config->pm_caps |= PMC_CAP_EDGE; + + /* fill in thread activity mask */ + pmc_config->pm_p4_cccrconfig |= + P4_CCCR_TO_ACTIVE_THREAD(cccractivemask); + + if (evmask) + pmc_config->pm_caps |= PMC_CAP_QUALIFIER; + + switch (pe) { + case PMC_EV_P4_FSB_DATA_ACTIVITY: + if ((evmask & 0x06) == 0x06 || + (evmask & 0x18) == 0x18) + return -1; /* can't have own+other bits together */ + if (evmask == 0) /* default:drdy-{drv,own}+dbsy{drv,own} */ + evmask = 0x1D; + break; + case PMC_EV_P4_MACHINE_CLEAR: + /* only one bit is allowed to be set */ + if ((evmask & (evmask - 1)) != 0) + return -1; + if (evmask == 0) { + evmask = 0x1; /* 'CLEAR' */ + pmc_config->pm_caps |= PMC_CAP_QUALIFIER; + } + break; + default: + if (evmask == 0 && pmask) { + for (pm = pmask; pm->pm_name; pm++) + evmask |= pm->pm_value; + pmc_config->pm_caps |= PMC_CAP_QUALIFIER; + } + } + + pmc_config->pm_p4_escrconfig = P4_ESCR_TO_EVENT_MASK(evmask); + + return 0; +} + +/* + * Pentium Pro style PMCs. These PMCs are found in Pentium II, Pentium III, + * and Pentium M CPUs. + */ + +static struct pmc_event_alias p6_aliases[] = { +EV_ALIAS("branches", "p6-br-inst-retired"), +EV_ALIAS("branch-mispredicts", "p6-br-miss-pred-retired"), +EV_ALIAS("cycles", "tsc"), +EV_ALIAS("instructions", "p6-inst-retired"), +EV_ALIAS("interrupts", "p6-hw-int-rx"), +EV_ALIAS(NULL, NULL) +}; + +#define P6_KW_CMASK "cmask" +#define P6_KW_EDGE "edge" +#define P6_KW_INV "inv" +#define P6_KW_OS "os" +#define P6_KW_UMASK "umask" +#define P6_KW_USR "usr" + +static struct pmc_masks p6_mask_mesi[] = { + PMCMASK(m, 0x01), + PMCMASK(e, 0x02), + PMCMASK(s, 0x04), + PMCMASK(i, 0x08), + NULLMASK +}; + +static struct pmc_masks p6_mask_mesihw[] = { + PMCMASK(m, 0x01), + PMCMASK(e, 0x02), + PMCMASK(s, 0x04), + PMCMASK(i, 0x08), + PMCMASK(nonhw, 0x00), + PMCMASK(hw, 0x10), + PMCMASK(both, 0x30), + NULLMASK +}; + +static struct pmc_masks p6_mask_hw[] = { + PMCMASK(nonhw, 0x00), + PMCMASK(hw, 0x10), + PMCMASK(both, 0x30), + NULLMASK +}; + +static struct pmc_masks p6_mask_any[] = { + PMCMASK(self, 0x00), + PMCMASK(any, 0x20), + NULLMASK +}; + +static struct pmc_masks p6_mask_ekp[] = { + PMCMASK(nta, 0x00), + PMCMASK(t1, 0x01), + PMCMASK(t2, 0x02), + PMCMASK(wos, 0x03), + NULLMASK +}; + +static struct pmc_masks p6_mask_pps[] = { + PMCMASK(packed-and-scalar, 0x00), + PMCMASK(scalar, 0x01), + NULLMASK +}; + +static struct pmc_masks p6_mask_mite[] = { + PMCMASK(packed-multiply, 0x01), + PMCMASK(packed-shift, 0x02), + PMCMASK(pack, 0x04), + PMCMASK(unpack, 0x08), + PMCMASK(packed-logical, 0x10), + PMCMASK(packed-arithmetic, 0x20), + NULLMASK +}; + +static struct pmc_masks p6_mask_fmt[] = { + PMCMASK(mmxtofp, 0x00), + PMCMASK(fptommx, 0x01), + NULLMASK +}; + +static struct pmc_masks p6_mask_sr[] = { + PMCMASK(es, 0x01), + PMCMASK(ds, 0x02), + PMCMASK(fs, 0x04), + PMCMASK(gs, 0x08), + NULLMASK +}; + +static struct pmc_masks p6_mask_eet[] = { + PMCMASK(all, 0x00), + PMCMASK(freq, 0x02), + NULLMASK +}; + +static struct pmc_masks p6_mask_efur[] = { + PMCMASK(all, 0x00), + PMCMASK(loadop, 0x01), + PMCMASK(stdsta, 0x02), + NULLMASK +}; + +static struct pmc_masks p6_mask_essir[] = { + PMCMASK(sse-packed-single, 0x00), + PMCMASK(sse-packed-single-scalar-single, 0x01), + PMCMASK(sse2-packed-double, 0x02), + PMCMASK(sse2-scalar-double, 0x03), + NULLMASK +}; + +static struct pmc_masks p6_mask_esscir[] = { + PMCMASK(sse-packed-single, 0x00), + PMCMASK(sse-scalar-single, 0x01), + PMCMASK(sse2-packed-double, 0x02), + PMCMASK(sse2-scalar-double, 0x03), + NULLMASK +}; + +/* P6 event parser */ +static int +p6_allocate_pmc(enum pmc_event pe, char *ctrspec, + struct pmc_op_pmcallocate *pmc_config) +{ + char *e, *p, *q; + uint32_t evmask; + int count, n; + const struct pmc_masks *pm, *pmask; + + pmc_config->pm_caps |= PMC_CAP_READ; + pmc_config->pm_p6_config = 0; + + if (pe == PMC_EV_TSC_TSC) { + if (ctrspec && *ctrspec != '\0') + return -1; + return 0; + } + + pmc_config->pm_caps |= PMC_CAP_WRITE; + evmask = 0; + +#define P6MASKSET(M) pmask = p6_mask_ ## M + + switch(pe) { + case PMC_EV_P6_L2_IFETCH: P6MASKSET(mesi); break; + case PMC_EV_P6_L2_LD: P6MASKSET(mesi); break; + case PMC_EV_P6_L2_ST: P6MASKSET(mesi); break; + case PMC_EV_P6_L2_RQSTS: P6MASKSET(mesi); break; + case PMC_EV_P6_BUS_DRDY_CLOCKS: + case PMC_EV_P6_BUS_LOCK_CLOCKS: + case PMC_EV_P6_BUS_TRAN_BRD: + case PMC_EV_P6_BUS_TRAN_RFO: + case PMC_EV_P6_BUS_TRANS_WB: + case PMC_EV_P6_BUS_TRAN_IFETCH: + case PMC_EV_P6_BUS_TRAN_INVAL: + case PMC_EV_P6_BUS_TRAN_PWR: + case PMC_EV_P6_BUS_TRANS_P: + case PMC_EV_P6_BUS_TRANS_IO: + case PMC_EV_P6_BUS_TRAN_DEF: + case PMC_EV_P6_BUS_TRAN_BURST: + case PMC_EV_P6_BUS_TRAN_ANY: + case PMC_EV_P6_BUS_TRAN_MEM: + P6MASKSET(any); break; + case PMC_EV_P6_EMON_KNI_PREF_DISPATCHED: + case PMC_EV_P6_EMON_KNI_PREF_MISS: + P6MASKSET(ekp); break; + case PMC_EV_P6_EMON_KNI_INST_RETIRED: + case PMC_EV_P6_EMON_KNI_COMP_INST_RET: + P6MASKSET(pps); break; + case PMC_EV_P6_MMX_INSTR_TYPE_EXEC: + P6MASKSET(mite); break; + case PMC_EV_P6_FP_MMX_TRANS: + P6MASKSET(fmt); break; + case PMC_EV_P6_SEG_RENAME_STALLS: + case PMC_EV_P6_SEG_REG_RENAMES: + P6MASKSET(sr); break; + case PMC_EV_P6_EMON_EST_TRANS: + P6MASKSET(eet); break; + case PMC_EV_P6_EMON_FUSED_UOPS_RET: + P6MASKSET(efur); break; + case PMC_EV_P6_EMON_SSE_SSE2_INST_RETIRED: + P6MASKSET(essir); break; + case PMC_EV_P6_EMON_SSE_SSE2_COMP_INST_RETIRED: + P6MASKSET(esscir); break; + default: + pmask = NULL; + break; + } + + /* Pentium M PMCs have a few events with different semantics */ + if (cpu_info.pm_cputype == PMC_CPU_INTEL_PM) { + if (pe == PMC_EV_P6_L2_LD || + pe == PMC_EV_P6_L2_LINES_IN || + pe == PMC_EV_P6_L2_LINES_OUT) + P6MASKSET(mesihw); + else if (pe == PMC_EV_P6_L2_M_LINES_OUTM) + P6MASKSET(hw); + } + + /* Parse additional modifiers if present */ + while ((p = strsep(&ctrspec, ",")) != NULL) { + if (KWPREFIXMATCH(p, P6_KW_CMASK "=")) { + q = strchr(p, '='); + if (*++q == '\0') /* skip '=' */ + return -1; + count = strtol(q, &e, 0); + if (e == q || *e != '\0') + return -1; + pmc_config->pm_caps |= PMC_CAP_THRESHOLD; + pmc_config->pm_p6_config |= P6_EVSEL_TO_CMASK(count); + } else if (KWMATCH(p, P6_KW_EDGE)) { + pmc_config->pm_caps |= PMC_CAP_EDGE; + } else if (KWMATCH(p, P6_KW_INV)) { + pmc_config->pm_caps |= PMC_CAP_INVERT; + } else if (KWMATCH(p, P6_KW_OS)) { + pmc_config->pm_caps |= PMC_CAP_SYSTEM; + } else if (KWPREFIXMATCH(p, P6_KW_UMASK "=")) { + evmask = 0; + if ((n = pmc_parse_mask(pmask, p, &evmask)) < 0) + return -1; + if ((pe == PMC_EV_P6_BUS_DRDY_CLOCKS || + pe == PMC_EV_P6_BUS_LOCK_CLOCKS || + pe == PMC_EV_P6_BUS_TRAN_BRD || + pe == PMC_EV_P6_BUS_TRAN_RFO || + pe == PMC_EV_P6_BUS_TRAN_IFETCH || + pe == PMC_EV_P6_BUS_TRAN_INVAL || + pe == PMC_EV_P6_BUS_TRAN_PWR || + pe == PMC_EV_P6_BUS_TRAN_DEF || + pe == PMC_EV_P6_BUS_TRAN_BURST || + pe == PMC_EV_P6_BUS_TRAN_ANY || + pe == PMC_EV_P6_BUS_TRAN_MEM || + pe == PMC_EV_P6_BUS_TRANS_IO || + pe == PMC_EV_P6_BUS_TRANS_P || + pe == PMC_EV_P6_BUS_TRANS_WB || + pe == PMC_EV_P6_EMON_EST_TRANS || + pe == PMC_EV_P6_EMON_FUSED_UOPS_RET || + pe == PMC_EV_P6_EMON_KNI_COMP_INST_RET || + pe == PMC_EV_P6_EMON_KNI_INST_RETIRED || + pe == PMC_EV_P6_EMON_KNI_PREF_DISPATCHED || + pe == PMC_EV_P6_EMON_KNI_PREF_MISS || + pe == PMC_EV_P6_EMON_SSE_SSE2_COMP_INST_RETIRED || + pe == PMC_EV_P6_EMON_SSE_SSE2_INST_RETIRED || + pe == PMC_EV_P6_FP_MMX_TRANS) + && (n > 1)) + return -1; /* only one mask keyword allowed */ + pmc_config->pm_caps |= PMC_CAP_QUALIFIER; + } else if (KWMATCH(p, P6_KW_USR)) { + pmc_config->pm_caps |= PMC_CAP_USER; + } else + return -1; + } + + /* post processing */ + switch (pe) { + + /* + * The following events default to an evmask of 0 + */ + + /* default => 'self' */ + case PMC_EV_P6_BUS_DRDY_CLOCKS: + case PMC_EV_P6_BUS_LOCK_CLOCKS: + case PMC_EV_P6_BUS_TRAN_BRD: + case PMC_EV_P6_BUS_TRAN_RFO: + case PMC_EV_P6_BUS_TRANS_WB: + case PMC_EV_P6_BUS_TRAN_IFETCH: + case PMC_EV_P6_BUS_TRAN_INVAL: + case PMC_EV_P6_BUS_TRAN_PWR: + case PMC_EV_P6_BUS_TRANS_P: + case PMC_EV_P6_BUS_TRANS_IO: + case PMC_EV_P6_BUS_TRAN_DEF: + case PMC_EV_P6_BUS_TRAN_BURST: + case PMC_EV_P6_BUS_TRAN_ANY: + case PMC_EV_P6_BUS_TRAN_MEM: + + /* default => 'nta' */ + case PMC_EV_P6_EMON_KNI_PREF_DISPATCHED: + case PMC_EV_P6_EMON_KNI_PREF_MISS: + + /* default => 'packed and scalar' */ + case PMC_EV_P6_EMON_KNI_INST_RETIRED: + case PMC_EV_P6_EMON_KNI_COMP_INST_RET: + + /* default => 'mmx to fp transitions' */ + case PMC_EV_P6_FP_MMX_TRANS: + + /* default => 'SSE Packed Single' */ + case PMC_EV_P6_EMON_SSE_SSE2_INST_RETIRED: + case PMC_EV_P6_EMON_SSE_SSE2_COMP_INST_RETIRED: + + /* default => 'all fused micro-ops' */ + case PMC_EV_P6_EMON_FUSED_UOPS_RET: + + /* default => 'all transitions' */ + case PMC_EV_P6_EMON_EST_TRANS: + break; + + case PMC_EV_P6_MMX_UOPS_EXEC: + evmask = 0x0F; /* only value allowed */ + break; + + default: + + /* + * For all other events, set the default event mask + * to a logical OR of all the allowed event mask bits. + */ + + if (evmask == 0 && pmask) { + for (pm = pmask; pm->pm_name; pm++) + evmask |= pm->pm_value; + pmc_config->pm_caps |= PMC_CAP_QUALIFIER; + } + + break; + } + + if (pmc_config->pm_caps & PMC_CAP_QUALIFIER) + pmc_config->pm_p6_config |= P6_EVSEL_TO_UMASK(evmask); + + return 0; +} + +/* + * Pentium style PMCs + */ + +static struct pmc_event_alias p5_aliases[] = { + EV_ALIAS("cycles", "tsc"), + EV_ALIAS(NULL, NULL) +}; + +static int +p5_allocate_pmc(enum pmc_event pe, char *ctrspec, + struct pmc_op_pmcallocate *pmc_config) +{ + return -1 || pe || ctrspec || pmc_config; /* shut up gcc */ +} + +#elif __amd64__ + +/* + * AMD K8 PMCs. + * + * These are very similar to AMD K7 PMCs, but support more kinds of + * events. + */ + +static struct pmc_event_alias k8_aliases[] = { + EV_ALIAS("cycles", "tsc"), + EV_ALIAS(NULL, NULL) +}; + +#define __K8MASK(N,V) PMCMASK(N,(1 << (V))) + +/* + * Parsing tables + */ + +/* fp dispatched fpu ops */ +static const struct pmc_masks k8_mask_fdfo[] = { + __K8MASK(add-pipe-excluding-junk-ops, 0), + __K8MASK(multiply-pipe-excluding-junk-ops, 1), + __K8MASK(store-pipe-excluding-junk-ops, 2), + __K8MASK(add-pipe-junk-ops, 3), + __K8MASK(multiply-pipe-junk-ops, 4), + __K8MASK(store-pipe-junk-ops, 5), + NULLMASK +}; + +/* ls segment register loads */ +static const struct pmc_masks k8_mask_lsrl[] = { + __K8MASK(es, 0), + __K8MASK(cs, 1), + __K8MASK(ss, 2), + __K8MASK(ds, 3), + __K8MASK(fs, 4), + __K8MASK(gs, 5), + __K8MASK(hs, 6), + NULLMASK +}; + +/* ls locked operation */ +static const struct pmc_masks k8_mask_llo[] = { + __K8MASK(locked-instructions, 0), + __K8MASK(cycles-in-request, 1), + __K8MASK(cycles-to-complete, 2), + NULLMASK +}; + +/* dc refill from {l2,system} and dc copyback */ +static const struct pmc_masks k8_mask_dc[] = { + __K8MASK(invalid, 0), + __K8MASK(shared, 1), + __K8MASK(exclusive, 2), + __K8MASK(owner, 3), + __K8MASK(modified, 4), + NULLMASK +}; + +/* dc one bit ecc error */ +static const struct pmc_masks k8_mask_dobee[] = { + __K8MASK(scrubber, 0), + __K8MASK(piggyback, 1), + NULLMASK +}; + +/* dc dispatched prefetch instructions */ +static const struct pmc_masks k8_mask_ddpi[] = { + __K8MASK(load, 0), + __K8MASK(store, 1), + __K8MASK(nta, 2), + NULLMASK +}; + +/* dc dcache accesses by locks */ +static const struct pmc_masks k8_mask_dabl[] = { + __K8MASK(accesses, 0), + __K8MASK(misses, 1), + NULLMASK +}; + +/* bu internal l2 request */ +static const struct pmc_masks k8_mask_bilr[] = { + __K8MASK(ic-fill, 0), + __K8MASK(dc-fill, 1), + __K8MASK(tlb-reload, 2), + __K8MASK(tag-snoop, 3), + __K8MASK(cancelled, 4), + NULLMASK +}; + +/* bu fill request l2 miss */ +static const struct pmc_masks k8_mask_bfrlm[] = { + __K8MASK(ic-fill, 0), + __K8MASK(dc-fill, 1), + __K8MASK(tlb-reload, 2), + NULLMASK +}; + +/* bu fill into l2 */ +static const struct pmc_masks k8_mask_bfil[] = { + __K8MASK(dirty-l2-victim, 0), + __K8MASK(victim-from-l2, 1), + NULLMASK +}; + +/* fr retired fpu instructions */ +static const struct pmc_masks k8_mask_frfi[] = { + __K8MASK(x87, 0), + __K8MASK(mmx-3dnow, 1), + __K8MASK(packed-sse-sse2, 2), + __K8MASK(scalar-sse-sse2, 3), + NULLMASK +}; + +/* fr retired fastpath double op instructions */ +static const struct pmc_masks k8_mask_frfdoi[] = { + __K8MASK(low-op-pos-0, 0), + __K8MASK(low-op-pos-1, 1), + __K8MASK(low-op-pos-2, 2), + NULLMASK +}; + +/* fr fpu exceptions */ +static const struct pmc_masks k8_mask_ffe[] = { + __K8MASK(x87-reclass-microfaults, 0), + __K8MASK(sse-retype-microfaults, 1), + __K8MASK(sse-reclass-microfaults, 2), + __K8MASK(sse-and-x87-microtraps, 3), + NULLMASK +}; + +/* nb memory controller page access event */ +static const struct pmc_masks k8_mask_nmcpae[] = { + __K8MASK(page-hit, 0), + __K8MASK(page-miss, 1), + __K8MASK(page-conflict, 2), + NULLMASK +}; + +/* nb memory controller turnaround */ +static const struct pmc_masks k8_mask_nmct[] = { + __K8MASK(dimm-turnaround, 0), + __K8MASK(read-to-write-turnaround, 1), + __K8MASK(write-to-read-turnaround, 2), + NULLMASK +}; + +/* nb memory controller bypass saturation */ +static const struct pmc_masks k8_mask_nmcbs[] = { + __K8MASK(memory-controller-hi-pri-bypass, 0), + __K8MASK(memory-controller-lo-pri-bypass, 1), + __K8MASK(dram-controller-interface-bypass, 2), + __K8MASK(dram-controller-queue-bypass, 3), + NULLMASK +}; + +/* nb sized commands */ +static const struct pmc_masks k8_mask_nsc[] = { + __K8MASK(nonpostwrszbyte, 0), + __K8MASK(nonpostwrszdword, 1), + __K8MASK(postwrszbyte, 2), + __K8MASK(postwrszdword, 3), + __K8MASK(rdszbyte, 4), + __K8MASK(rdszdword, 5), + __K8MASK(rdmodwr, 6), + NULLMASK +}; + +/* nb probe result */ +static const struct pmc_masks k8_mask_npr[] = { + __K8MASK(probe-miss, 0), + __K8MASK(probe-hit, 1), + __K8MASK(probe-hit-dirty-no-memory-cancel, 2), + __K8MASK(probe-hit-dirty-with-memory-cancel, 3), + NULLMASK +}; + +/* nb hypertransport bus bandwidth */ +static const struct pmc_masks k8_mask_nhbb[] = { /* HT bus bandwidth */ + __K8MASK(command, 0), + __K8MASK(data, 1), + __K8MASK(buffer-release, 2), + __K8MASK(nop, 3), + NULLMASK +}; + +#undef __K8MASK + +#define K8_KW_COUNT "count" +#define K8_KW_EDGE "edge" +#define K8_KW_INV "inv" +#define K8_KW_MASK "mask" +#define K8_KW_OS "os" +#define K8_KW_USR "usr" + +static int +k8_allocate_pmc(enum pmc_event pe, char *ctrspec, + struct pmc_op_pmcallocate *pmc_config) +{ + char *e, *p, *q; + int n; + uint32_t count, evmask; + const struct pmc_masks *pm, *pmask; + + pmc_config->pm_caps |= PMC_CAP_READ; + pmc_config->pm_amd_config = 0; + + if (pe == PMC_EV_TSC_TSC) { + /* TSC events must be unqualified. */ + if (ctrspec && *ctrspec != '\0') + return -1; + return 0; + } + + pmask = NULL; + evmask = 0; + +#define __K8SETMASK(M) pmask = k8_mask_##M + + /* setup parsing tables */ + switch (pe) { + case PMC_EV_K8_FP_DISPATCHED_FPU_OPS: + __K8SETMASK(fdfo); + break; + case PMC_EV_K8_LS_SEGMENT_REGISTER_LOAD: + __K8SETMASK(lsrl); + break; + case PMC_EV_K8_LS_LOCKED_OPERATION: + __K8SETMASK(llo); + break; + case PMC_EV_K8_DC_REFILL_FROM_L2: + case PMC_EV_K8_DC_REFILL_FROM_SYSTEM: + case PMC_EV_K8_DC_COPYBACK: + __K8SETMASK(dc); + break; + case PMC_EV_K8_DC_ONE_BIT_ECC_ERROR: + __K8SETMASK(dobee); + break; + case PMC_EV_K8_DC_DISPATCHED_PREFETCH_INSTRUCTIONS: + __K8SETMASK(ddpi); + break; + case PMC_EV_K8_DC_DCACHE_ACCESSES_BY_LOCKS: + __K8SETMASK(dabl); + break; + case PMC_EV_K8_BU_INTERNAL_L2_REQUEST: + __K8SETMASK(bilr); + break; + case PMC_EV_K8_BU_FILL_REQUEST_L2_MISS: + __K8SETMASK(bfrlm); + break; + case PMC_EV_K8_BU_FILL_INTO_L2: + __K8SETMASK(bfil); + break; + case PMC_EV_K8_FR_RETIRED_FPU_INSTRUCTIONS: + __K8SETMASK(frfi); + break; + case PMC_EV_K8_FR_RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS: + __K8SETMASK(frfdoi); + break; + case PMC_EV_K8_FR_FPU_EXCEPTIONS: + __K8SETMASK(ffe); + break; + case PMC_EV_K8_NB_MEMORY_CONTROLLER_PAGE_ACCESS_EVENT: + __K8SETMASK(nmcpae); + break; + case PMC_EV_K8_NB_MEMORY_CONTROLLER_TURNAROUND: + __K8SETMASK(nmct); + break; + case PMC_EV_K8_NB_MEMORY_CONTROLLER_BYPASS_SATURATION: + __K8SETMASK(nmcbs); + break; + case PMC_EV_K8_NB_SIZED_COMMANDS: + __K8SETMASK(nsc); + break; + case PMC_EV_K8_NB_PROBE_RESULT: + __K8SETMASK(npr); + break; + case PMC_EV_K8_NB_HT_BUS0_BANDWIDTH: + case PMC_EV_K8_NB_HT_BUS1_BANDWIDTH: + case PMC_EV_K8_NB_HT_BUS2_BANDWIDTH: + __K8SETMASK(nhbb); + break; + + default: + break; /* no options defined */ + } + + pmc_config->pm_caps |= PMC_CAP_WRITE; + + while ((p = strsep(&ctrspec, ",")) != NULL) { + if (KWPREFIXMATCH(p, K8_KW_COUNT "=")) { + q = strchr(p, '='); + if (*++q == '\0') /* skip '=' */ + return -1; + + count = strtol(q, &e, 0); + if (e == q || *e != '\0') + return -1; + + pmc_config->pm_caps |= PMC_CAP_THRESHOLD; + pmc_config->pm_amd_config |= K8_PMC_TO_COUNTER(count); + + } else if (KWMATCH(p, K8_KW_EDGE)) { + pmc_config->pm_caps |= PMC_CAP_EDGE; + } else if (KWMATCH(p, K8_KW_INV)) { + pmc_config->pm_caps |= PMC_CAP_INVERT; + } else if (KWPREFIXMATCH(p, K8_KW_MASK "=")) { + if ((n = pmc_parse_mask(pmask, p, &evmask)) < 0) + return -1; + pmc_config->pm_caps |= PMC_CAP_QUALIFIER; + } else if (KWMATCH(p, K8_KW_OS)) { + pmc_config->pm_caps |= PMC_CAP_SYSTEM; + } else if (KWMATCH(p, K8_KW_USR)) { + pmc_config->pm_caps |= PMC_CAP_USER; + } else + return -1; + } + + /* other post processing */ + + switch (pe) { + case PMC_EV_K8_FP_DISPATCHED_FPU_OPS: + case PMC_EV_K8_FP_CYCLES_WITH_NO_FPU_OPS_RETIRED: + case PMC_EV_K8_FP_DISPATCHED_FPU_FAST_FLAG_OPS: + case PMC_EV_K8_FR_RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS: + case PMC_EV_K8_FR_RETIRED_FPU_INSTRUCTIONS: + case PMC_EV_K8_FR_FPU_EXCEPTIONS: + /* XXX only available in rev B and later */ + break; + case PMC_EV_K8_DC_DCACHE_ACCESSES_BY_LOCKS: + /* XXX only available in rev C and later */ + break; + case PMC_EV_K8_LS_LOCKED_OPERATION: + /* XXX CPU Rev A,B evmask is to be zero */ + if (evmask & (evmask - 1)) /* > 1 bit set */ + return -1; + if (evmask == 0) { + evmask = 0x01; /* Rev C and later: #instrs */ + pmc_config->pm_caps |= PMC_CAP_QUALIFIER; + } + break; + default: + if (evmask == 0 && pmask != NULL) { + for (pm = pmask; pm->pm_name; pm++) + evmask |= pm->pm_value; + pmc_config->pm_caps |= PMC_CAP_QUALIFIER; + } + } + + if (pmc_config->pm_caps & PMC_CAP_QUALIFIER) + pmc_config->pm_amd_config = K8_PMC_TO_UNITMASK(evmask); + + return 0; +} +#endif + +/* + * API entry points + */ + +int +pmc_init(void) +{ + int error, pmc_mod_id; + uint32_t abi_version; + struct module_stat pmc_modstat; + + if (pmc_syscall != -1) /* already inited */ + return 0; + + /* retrieve the system call number from the KLD */ + if ((pmc_mod_id = modfind(PMC_MODULE_NAME)) < 0) + return -1; + + pmc_modstat.version = sizeof(struct module_stat); + if ((error = modstat(pmc_mod_id, &pmc_modstat)) < 0) + return -1; + + pmc_syscall = pmc_modstat.data.intval; + + /* check ABI version against compiled-in version */ + if (PMC_CALL(GETMODULEVERSION, &abi_version) < 0) + return (pmc_syscall = -1); + + /* ignore patch numbers for the comparision */ + if ((abi_version & 0xFFFF0000) != (PMC_VERSION & 0xFFFF0000)) { + errno = EPROGMISMATCH; + return (pmc_syscall = -1); + } + + if (PMC_CALL(GETCPUINFO, &cpu_info) < 0) + return (pmc_syscall = -1); + + /* set parser pointer */ + switch (cpu_info.pm_cputype) { +#if __i386__ + case PMC_CPU_AMD_K7: + pmc_mdep_event_aliases = k7_aliases; + pmc_mdep_allocate_pmc = k7_allocate_pmc; + break; + case PMC_CPU_INTEL_P5: + pmc_mdep_event_aliases = p5_aliases; + pmc_mdep_allocate_pmc = p5_allocate_pmc; + break; + case PMC_CPU_INTEL_P6: /* P6 ... Pentium M CPUs have */ + case PMC_CPU_INTEL_PII: /* similar PMCs. */ + case PMC_CPU_INTEL_PIII: + case PMC_CPU_INTEL_PM: + pmc_mdep_event_aliases = p6_aliases; + pmc_mdep_allocate_pmc = p6_allocate_pmc; + break; + case PMC_CPU_INTEL_PIV: + pmc_mdep_event_aliases = p4_aliases; + pmc_mdep_allocate_pmc = p4_allocate_pmc; + break; +#elif __amd64__ + case PMC_CPU_AMD_K8: + pmc_mdep_event_aliases = k8_aliases; + pmc_mdep_allocate_pmc = k8_allocate_pmc; + break; +#endif + + default: + /* + * Some kind of CPU this version of the library knows nothing + * about. This shouldn't happen since the abi version check + * should have caught this. + */ + errno = ENXIO; + return (pmc_syscall = -1); + } + + return 0; +} + +int +pmc_allocate(const char *ctrspec, enum pmc_mode mode, + uint32_t flags, int cpu, pmc_id_t *pmcid) +{ + int retval; + enum pmc_event pe; + char *r, *spec_copy; + const char *ctrname; + const struct pmc_event_alias *p; + struct pmc_op_pmcallocate pmc_config; + + spec_copy = NULL; + retval = -1; + + if (mode != PMC_MODE_SS && mode != PMC_MODE_TS && + mode != PMC_MODE_SC && mode != PMC_MODE_TC) { + errno = EINVAL; + goto out; + } + + /* replace an event alias with the canonical event specifier */ + if (pmc_mdep_event_aliases) + for (p = pmc_mdep_event_aliases; p->pm_alias; p++) + if (!strcmp(ctrspec, p->pm_alias)) { + spec_copy = strdup(p->pm_spec); + break; + } + + if (spec_copy == NULL) + spec_copy = strdup(ctrspec); + + r = spec_copy; + ctrname = strsep(&r, ","); + + /* look for the given counter name */ + + for (pe = PMC_EVENT_FIRST; pe < (PMC_EVENT_LAST+1); pe++) + if (!strcmp(ctrname, pmc_event_table[pe].pm_ev_name)) + break; + + if (pe > PMC_EVENT_LAST) { + errno = EINVAL; + goto out; + } + + bzero(&pmc_config, sizeof(pmc_config)); + pmc_config.pm_ev = pmc_event_table[pe].pm_ev_code; + pmc_config.pm_class = pmc_event_table[pe].pm_ev_class; + pmc_config.pm_cpu = cpu; + pmc_config.pm_mode = mode; + pmc_config.pm_flags = flags; + + if (PMC_IS_SAMPLING_MODE(mode)) + pmc_config.pm_caps |= PMC_CAP_INTERRUPT; + + if (pmc_mdep_allocate_pmc(pe, r, &pmc_config) < 0) { + errno = EINVAL; + goto out; + } + + if (PMC_CALL(PMCALLOCATE, &pmc_config) < 0) + goto out; + + *pmcid = pmc_config.pm_pmcid; + + retval = 0; + + out: + if (spec_copy) + free(spec_copy); + + return retval; +} + +int +pmc_attach(pmc_id_t pmc, pid_t pid) +{ + struct pmc_op_pmcattach pmc_attach_args; + + pmc_attach_args.pm_pmc = pmc; + pmc_attach_args.pm_pid = pid; + + return PMC_CALL(PMCATTACH, &pmc_attach_args); +} + +int +pmc_detach(pmc_id_t pmc, pid_t pid) +{ + struct pmc_op_pmcattach pmc_detach_args; + + pmc_detach_args.pm_pmc = pmc; + pmc_detach_args.pm_pid = pid; + + return PMC_CALL(PMCDETACH, &pmc_detach_args); +} + +int +pmc_release(pmc_id_t pmc) +{ + struct pmc_op_simple pmc_release_args; + + pmc_release_args.pm_pmcid = pmc; + + return PMC_CALL(PMCRELEASE, &pmc_release_args); +} + +int +pmc_start(pmc_id_t pmc) +{ + struct pmc_op_simple pmc_start_args; + + pmc_start_args.pm_pmcid = pmc; + return PMC_CALL(PMCSTART, &pmc_start_args); +} + +int +pmc_stop(pmc_id_t pmc) +{ + struct pmc_op_simple pmc_stop_args; + + pmc_stop_args.pm_pmcid = pmc; + return PMC_CALL(PMCSTOP, &pmc_stop_args); +} + +int +pmc_read(pmc_id_t pmc, pmc_value_t *value) +{ + struct pmc_op_pmcrw pmc_read_op; + + pmc_read_op.pm_pmcid = pmc; + pmc_read_op.pm_flags = PMC_F_OLDVALUE; + pmc_read_op.pm_value = -1; + + if (PMC_CALL(PMCRW, &pmc_read_op) < 0) + return -1; + + *value = pmc_read_op.pm_value; + + return 0; +} + +int +pmc_write(pmc_id_t pmc, pmc_value_t value) +{ + struct pmc_op_pmcrw pmc_write_op; + + pmc_write_op.pm_pmcid = pmc; + pmc_write_op.pm_flags = PMC_F_NEWVALUE; + pmc_write_op.pm_value = value; + + return PMC_CALL(PMCRW, &pmc_write_op); +} + +int +pmc_rw(pmc_id_t pmc, pmc_value_t newvalue, pmc_value_t *oldvaluep) +{ + struct pmc_op_pmcrw pmc_rw_op; + + pmc_rw_op.pm_pmcid = pmc; + pmc_rw_op.pm_flags = PMC_F_NEWVALUE | PMC_F_OLDVALUE; + pmc_rw_op.pm_value = newvalue; + + if (PMC_CALL(PMCRW, &pmc_rw_op) < 0) + return -1; + + *oldvaluep = pmc_rw_op.pm_value; + + return 0; +} + +int +pmc_set(pmc_id_t pmc, pmc_value_t value) +{ + struct pmc_op_pmcsetcount sc; + + sc.pm_pmcid = pmc; + sc.pm_count = value; + + if (PMC_CALL(PMCSETCOUNT, &sc) < 0) + return -1; + + return 0; + +} + +int +pmc_configure_logfile(int fd) +{ + struct pmc_op_configurelog cla; + + cla.pm_logfd = fd; + if (PMC_CALL(CONFIGURELOG, &cla) < 0) + return -1; + + return 0; +} + +int +pmc_get_driver_stats(struct pmc_op_getdriverstats *gms) +{ + return PMC_CALL(GETDRIVERSTATS, gms); +} + +int +pmc_ncpu(void) +{ + if (pmc_syscall == -1) { + errno = ENXIO; + return -1; + } + + return cpu_info.pm_ncpu; +} + +int +pmc_npmc(int cpu) +{ + if (pmc_syscall == -1) { + errno = ENXIO; + return -1; + } + + if (cpu < 0 || cpu >= (int) cpu_info.pm_ncpu) { + errno = EINVAL; + return -1; + } + + return cpu_info.pm_npmc; +} + +int +pmc_enable(int cpu, int pmc) +{ + struct pmc_op_pmcadmin ssa; + + ssa.pm_cpu = cpu; + ssa.pm_pmc = pmc; + ssa.pm_state = PMC_STATE_FREE; + return PMC_CALL(PMCADMIN, &ssa); +} + +int +pmc_disable(int cpu, int pmc) +{ + struct pmc_op_pmcadmin ssa; + + ssa.pm_cpu = cpu; + ssa.pm_pmc = pmc; + ssa.pm_state = PMC_STATE_DISABLED; + return PMC_CALL(PMCADMIN, &ssa); +} + + +int +pmc_pmcinfo(int cpu, struct pmc_op_getpmcinfo **ppmci) +{ + int nbytes, npmc, saved_errno; + struct pmc_op_getpmcinfo *pmci; + + if ((npmc = pmc_npmc(cpu)) < 0) + return -1; + + nbytes = sizeof(struct pmc_op_getpmcinfo) + + npmc * sizeof(struct pmc_info); + + if ((pmci = calloc(1, nbytes)) == NULL) + return -1; + + pmci->pm_cpu = cpu; + + if (PMC_CALL(GETPMCINFO, pmci) < 0) { + saved_errno = errno; + free(pmci); + errno = saved_errno; + return -1; + } + + *ppmci = pmci; + return 0; +} + +int +pmc_cpuinfo(const struct pmc_op_getcpuinfo **pci) +{ + if (pmc_syscall == -1) { + errno = ENXIO; + return -1; + } + + *pci = &cpu_info; + return 0; +} + +const char * +pmc_name_of_cputype(enum pmc_cputype cp) +{ + if ((int) cp >= PMC_CPU_FIRST && + cp <= PMC_CPU_LAST) + return pmc_cputype_names[cp]; + errno = EINVAL; + return NULL; +} + +const char * +pmc_name_of_class(enum pmc_class pc) +{ + if ((int) pc >= PMC_CLASS_FIRST && + pc <= PMC_CLASS_LAST) + return pmc_class_names[pc]; + + errno = EINVAL; + return NULL; +} + +const char * +pmc_name_of_mode(enum pmc_mode pm) +{ + if ((int) pm >= PMC_MODE_FIRST && + pm <= PMC_MODE_LAST) + return pmc_mode_names[pm]; + + errno = EINVAL; + return NULL; +} + +const char * +pmc_name_of_event(enum pmc_event pe) +{ + if ((int) pe >= PMC_EVENT_FIRST && + pe <= PMC_EVENT_LAST) + return pmc_event_table[pe].pm_ev_name; + + errno = EINVAL; + return NULL; +} + +const char * +pmc_name_of_state(enum pmc_state ps) +{ + if ((int) ps >= PMC_STATE_FIRST && + ps <= PMC_STATE_LAST) + return pmc_state_names[ps]; + + errno = EINVAL; + return NULL; +} + +const char * +pmc_name_of_disposition(enum pmc_disp pd) +{ + if ((int) pd >= PMC_DISP_FIRST && + pd <= PMC_DISP_LAST) + return pmc_disposition_names[pd]; + + errno = EINVAL; + return NULL; +} + +const char * +pmc_name_of_capability(enum pmc_caps cap) +{ + int i; + + /* + * 'cap' should have a single bit set and should be in + * range. + */ + + if ((cap & (cap - 1)) || cap < PMC_CAP_FIRST || + cap > PMC_CAP_LAST) { + errno = EINVAL; + return NULL; + } + + i = ffs(cap); + + return pmc_capability_names[i - 1]; +} + +/* + * Return a list of events known to a given PMC class. 'cl' is the + * PMC class identifier, 'eventnames' is the returned list of 'const + * char *' pointers pointing to the names of the events. 'nevents' is + * the number of event name pointers returned. + * + * The space for 'eventnames' is allocated using malloc(3). The caller + * is responsible for freeing this space when done. + */ + +int +pmc_event_names_of_class(enum pmc_class cl, const char ***eventnames, + int *nevents) +{ + int count; + const char **names; + const struct pmc_event_descr *ev; + + switch (cl) + { + case PMC_CLASS_TSC: + ev = &pmc_event_table[PMC_EV_TSC_TSC]; + count = 1; + break; + case PMC_CLASS_K7: + ev = &pmc_event_table[PMC_EV_K7_FIRST]; + count = PMC_EV_K7_LAST - PMC_EV_K7_FIRST + 1; + break; + case PMC_CLASS_K8: + ev = &pmc_event_table[PMC_EV_K8_FIRST]; + count = PMC_EV_K8_LAST - PMC_EV_K8_FIRST + 1; + break; + case PMC_CLASS_P5: + ev = &pmc_event_table[PMC_EV_P5_FIRST]; + count = PMC_EV_P5_LAST - PMC_EV_P5_FIRST + 1; + break; + case PMC_CLASS_P6: + ev = &pmc_event_table[PMC_EV_P6_FIRST]; + count = PMC_EV_P6_LAST - PMC_EV_P6_FIRST + 1; + break; + case PMC_CLASS_P4: + ev = &pmc_event_table[PMC_EV_P4_FIRST]; + count = PMC_EV_P4_LAST - PMC_EV_P4_FIRST + 1; + break; + default: + errno = EINVAL; + return -1; + } + + if ((names = malloc(count * sizeof(const char *))) == NULL) + return -1; + + *eventnames = names; + *nevents = count; + + for (;count--; ev++, names++) + *names = ev->pm_ev_name; + return 0; +} + +/* + * Architecture specific APIs + */ + +#if __i386__ || __amd64__ + +int +pmc_x86_get_msr(pmc_id_t pmc, uint32_t *msr) +{ + struct pmc_op_x86_getmsr gm; + + gm.pm_pmcid = pmc; + if (PMC_CALL(PMCX86GETMSR, &gm) < 0) + return -1; + *msr = gm.pm_msr; + return 0; +} + +#endif diff --git a/lib/libpmc/pmc.3 b/lib/libpmc/pmc.3 new file mode 100644 index 0000000..2fce168 --- /dev/null +++ b/lib/libpmc/pmc.3 @@ -0,0 +1,3090 @@ +.\" Copyright (c) 2003 Joseph Koshy. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" This software is provided by Joseph Koshy ``as is'' and +.\" any express or implied warranties, including, but not limited to, the +.\" implied warranties of merchantability and fitness for a particular purpose +.\" are disclaimed. in no event shall Joseph Koshy be liable +.\" for any direct, indirect, incidental, special, exemplary, or consequential +.\" damages (including, but not limited to, procurement of substitute goods +.\" or services; loss of use, data, or profits; or business interruption) +.\" however caused and on any theory of liability, whether in contract, strict +.\" liability, or tort (including negligence or otherwise) arising in any way +.\" out of the use of this software, even if advised of the possibility of +.\" such damage. +.\" +.\" $FreeBSD$ +.\" +.Dd Apr 15, 2005 +.Os +.Dt PMC 3 +.Sh NAME +.Nm pmc_allocate , +.Nm pmc_attach , +.Nm pmc_configure_logfile , +.Nm pmc_cpuinfo , +.Nm pmc_detach , +.Nm pmc_disable , +.Nm pmc_enable , +.Nm pmc_event_names_of_class , +.Nm pmc_get_driver_stats , +.Nm pmc_init , +.Nm pmc_name_of_capability , +.Nm pmc_name_of_class , +.Nm pmc_name_of_cputype , +.Nm pmc_name_of_event , +.Nm pmc_name_of_mode , +.Nm pmc_name_of_state , +.Nm pmc_ncpu , +.Nm pmc_npmc , +.Nm pmc_pmcinfo , +.Nm pmc_read , +.Nm pmc_release , +.Nm pmc_rw , +.Nm pmc_set , +.Nm pmc_start , +.Nm pmc_stop , +.Nm pmc_write , +.Nm pmc_x86_get_msr +.Nd programming API for using hardware performance monitoring counters +.Sh LIBRARY +.Lb libpmc +.Sh SYNOPSIS +.In pmc.h +.Ft int +.Fo pmc_allocate +.Fa "const char *eventspecifier" +.Fa "enum pmc_mode mode" +.Fa "uint32_t flags" +.Fa "uint32_t cpu" +.Fa "pmc_id_t *pmcid" +.Fc +.Ft int +.Fo pmc_attach +.Fa "pmc_id_t pmcid" +.Fa "pid_t pid" +.Fc +.Ft int +.Fn pmc_configure_logfile "int fd" +.Ft int +.Fn pmc_cpuinfo "const struct pmc_op_getcpuinfo **cpu_info" +.Ft int +.Fo pmc_detach +.Fa "pmc_id_t pmcid" +.Fa "pid_t pid" +.Fc +.Ft int +.Fn pmc_disable "uint32_t cpu" "int pmc" +.Ft int +.Fn pmc_enable "uint32_t cpu" "int pmc" +.Ft int +.Fo pmc_event_names_of_class +.Fa "enum pmc_class cl" +.Fa "const char ***eventnames" +.Fa "int *nevents" +.Fc +.Ft int +.Fn pmc_get_driver_stats "struct pmc_op_getdriverstats *gms" +.Ft int +.Fn pmc_init "void" +.Ft "const char *" +.Fn pmc_name_of_capability "enum pmc_caps pc" +.Ft "const char *" +.Fn pmc_name_of_class "enum pmc_class pc" +.Ft "const char *" +.Fn pmc_name_of_cputype "enum pmc_cputype ct" +.Ft "const char *" +.Fn pmc_name_of_disposition "enum pmc_disp pd" +.Ft "const char *" +.Fn pmc_name_of_event "enum pmc_event pe" +.Ft "const char *" +.Fn pmc_name_of_mode "enum pmc_mode pm" +.Ft "const char *" +.Fn pmc_name_of_state "enum pmc_state ps" +.Ft int +.Fn pmc_ncpu "void" +.Ft int +.Fn pmc_npmc "uint32_t cpu" +.Ft int +.Fn pmc_pmcinfo "uint32_t cpu" "struct pmc_op_getpmcinfo **pmc_info" +.Ft int +.Fn pmc_read "pmc_id_t pmc" "pmc_value_t *value" +.Ft int +.Fn pmc_release "pmc_id_t pmc" +.Ft int +.Fn pmc_rw "pmc_id_t pmc" "pmc_value_t newvalue" "pmc_value_t *oldvaluep" +.Ft int +.Fn pmc_set "pmc_id_t pmc" "pmc_value_t value" +.Ft int +.Fn pmc_start "pmc_id_t pmc" +.Ft int +.Fn pmc_stop "pmc_id_t pmc" +.Ft int +.Fn pmc_write "pmc_id_t pmc" "pmc_value_t value" +.Ft int +.Fn pmc_x86_get_msr "int pmc" "uint32_t *msr" +.Sh DESCRIPTION +These functions implement a high-level library for using the +system's hardware performance counters. +.Pp +PMCs are allocated using +.Fn pmc_allocate , +released using +.Fn pmc_release +and read using +.Fn pmc_read . +Allocated PMCs may be started or stopped at any time using +.Fn pmc_start +and +.Fn pmc_stop +respectively. +An allocated PMC may be of +.Qq global +scope, meaning that the PMC measures system-wide events, or +.Qq process-private +scope, meaning that the PMC only counts hardware events when +the allocating process (or, optionally, its children) +are active. +.Pp +PMCs may further be in +.Qq "counting mode" , +or in +.Qq "sampling mode" . +Sampling mode PMCs deliver an interrupt to the CPU after +a configured number of hardware events have been seen. +A process-private sampling mode PMC will cause its owner +process to get periodic +.Sy SIGPROF +interrupts, while a global sampling mode PMC is used to +do system-wide statistical sampling (see +.Xr hwpmc 4 ) . +The sampling rate desired of a sampling-mode PMC is set using +.Fn pmc_set . +Counting mode PMCs do not interrupt the CPU; their values +can be read using +.Fn pmc_read . +.Pp +System-wide statistical sampling is configured by allocating +at least one sampling mode PMC with +global scope, and when a log file is configured using +.Fn pmc_configure_logfile . +The +.Xr hwpmc 4 +driver manages system-wide statistical sampling; for more +information please see +.Xr hwpmc 4 . +.Ss APPLICATION PROGRAMMING INTERFACE +.Fn pmc_init +initializes the +.Xr pmc 3 +library. +This function must be called first, before any of the other +functions in the library. +.Pp +.Fn pmc_allocate +allocates a counter that counts the events named by +.Fa eventspecifier , +and writes the allocated counter id to +.Fa *pmcid . +Argument +.Fa eventspecifier +comprises an PMC event name followed by an optional comma separated +list of keywords and qualifiers. +The allowed syntax for +.Fa eventspecifier +is processor architecture specific and is listed in section +.Sx "EVENT SPECIFIERS" +below. +The desired PMC mode is specified by +.Fa mode , +and any mode specific modifiers are specified using +.Fa flags . +The +.Fa cpu +argument is the value +.Li PMC_CPU_ANY , +or names the cpu the allocation is to be on. +Requesting a specific CPU makes only makes sense for global PMCs; +process-private PMC allocations should always specify +.Li PMC_CPU_ANY . +.Pp +By default a PMC configured in process-virtual counting mode is setup +to profile its owner process. +The function +.Fn pmc_attach +may be used to attach the PMC to a different process. +.Fn pmc_attach +needs to be called before the counter is first started +with +.Fn pmc_start . +The function +.Fn pmc_detach +may be used to detach a PMC from a process it was attached to +using a prior call to +.Fn pmc_attach . +.Pp +.Fn pmc_release +releases a PMC previously allocated with +.Fn pmc_allocate . +This function call implicitly detaches the PMC from all its target +processes. +.Pp +An allocated PMC may be started and stopped using +.Fn pmc_start +and +.Fn pmc_stop +respectively. +.Pp +The current value of a PMC may be read with +.Fn pmc_read +and written using +.Fn pmc_write , +provided the underlying hardware supports these operations on +the allocated PMC. +The read and write operation may be combined using +.Fn pmc_rw . +.Pp +The +.Fn pmc_configure_logfile +function causes the +.Xr hwpmc 4 +driver to log system wide performance data to file corresponding +to the process' file handle +.Fa fd . +.Pp +.Fn pmc_set +configures an sampling PMC +.Fa pmc +to interrupt every +.Fa value +events. +For counting PMCs, +.Fn pmc_set +sets the initial value of the PMC to +.Fa value . +.Pp +.Fn pmc_get_driver_statistics +copies a snapshot of the usage statistics maintained by +.Xr hwpmc 4 +into the memory area pointed to be argument +.Fa gms . +.Ss SIGNAL HANDLING REQUIREMENTS +Applications using PMCs are required to handle the following signals: +.Bl -tag -width indent +.It SIGBUS +When the +.Xr hwpmc 4 +module is unloaded using +.Xr kldunload 8 , +processes that have PMCs allocated to them will be sent a +SIGBUS signal. +.It SIGIO +Attempting to read a PMC that is not currently attached to a running +process will cause a SIGIO signal to be sent to the reader. +.El +.Ss CONVENIENCE FUNCTIONS +.Fn pmc_ncpu +returns the number of CPUs present in the system. +.Pp +.Fn pmc_npmc +returns the number of PMCs supported on CPU +.Fa cpu . +.Fn pmc_cpuinfo +sets argument +.Fa cpu_info +to point to a structure with information about the system's CPUs. +.Fn pmc_pmcinfo +returns information about the current state of CPU +.Fa cpu Ap s +PMCs. +.Pp +The functions +.Fn pmc_name_of_capability , +.Fn pmc_name_of_class , +.Fn pmc_name_of_cputype , +.Fn pmc_name_of_disposition , +.Fn pmc_name_of_event , +.Fn pmc_name_of_mode +and +.Fn pmc_name_of_state +are useful for code wanting to print error messages. +They return +.Ft "const char *" +pointers to human-readable representations of their arguments. +These return values should not be freed using +.Xr free 3 . +.Pp +.Fn pmc_event_names_of_class +returns a list of event names supported by a given PMC class +.Fa cl . +On successful return, an array of +.Ft "const char *" +pointers to the names of valid events supported by class +.Fa cl +is allocated by the library using +.Xr malloc 3 , +and a pointer to this array is returned in the location pointed to by +.Fa eventnames . +The number of pointers allocated is returned in the location pointed +to by +.Fa nevents . +.Ss ADMINISTRATION +Individual PMCs may be enabled or disabled on a given CPU using +.Fn pmc_enable +and +.Fn pmc_disable +respectively. +For these functions, +.Fa cpu +is the CPU number, and +.Fa pmc +is the index of the PMC to be operated on. +Only the super-user is allowed to enable and disable PMCs. +.Ss X86 ARCHITECTURE SPECIFIC API +The +.Fn pmc_x86_get_msr +function returns the processor model specific register number +associated with +.Fa pmc . +Applications may use the x86 +.Sy RDPMC +instruction to directly read the contents of the PMC. +.Sh EVENT SPECIFIERS +Event specifiers are strings comprising of an event name, followed by +optional parameters modifying the semantics of the hardware event +being probed. +Event names are PMC architecture dependent, but the +.Xr hwpmc 4 +library defines machine independent aliases for commonly used +events. +.Ss Event Name Aliases +Event name aliases are CPU architecture independent names for commonly +used events. +The following aliases are known to this version of the +.Xr pmc 3 +library: +.Bl -tag -width indent +.It Li branches +Measure the number of branches retired. +.It Li branch-mispredicts +Measure the number of retired branches that were mispredicted. +.It Li cycles +Measure processor cycles. +This event is implemented using the processor's Time Stamp Counter +register. +.It Li dc-misses +Measure the number of data cache misses. +.It Li ic-misses +Measure the number of instruction cache misses. +.It Li instructions +Measure the number of instructions retired. +.It Li interrupts +Measure the number of interrupts seen. +.El +.Ss Time Stamp Counter (TSC) +The timestamp counter is a monontonically non-decreasing counter that +counts processor cycles. +.Pp +In the i386 architecture this counter may +be selected by requesting an event with eventspecifier +.Ic tsc . +The +.Ic tsc +event does not support any further qualifiers. +It can only be allocated in system-wide counting mode, +and is a read-only counter. +Multiple processes are allowed to allocate the TSC. +Once allocated, it may be read using the +.Fn pmc_read +function, or by using the RDTSC instruction. +.Ss AMD (K7) PMCs +These PMCs are present in the +.Tn "AMD Athlon" +series of CPUs and are documented in: +.Rs +.%B "AMD Athlon Processor x86 Code Optimization Guide" +.%N "Publication No. 22007" +.%D "February 2002" +.%Q "Advanced Micronic Devices, Inc." +.Re +.Pp +Event specifiers for AMD K7 PMCs can have the following optional +qualifiers: +.Bl -tag -width indent +.It Li count= Ns Ar value +Configure the counter to increment only if the number of configured +events measured in a cycle is greater than or equal to +.Ar value . +.It Li edge +Configure the counter to only count negated-to-asserted transitions +of the conditions expressed by the other qualifiers. +In other words, the counter will increment only once whenever a given +condition becomes true, irrespective of the number of clocks during +which the condition remains true. +.It Li inv +Invert the sense of comparision when the +.Li count +qualifier is present, making the counter to increment when the +number of events per cycle is less than the value specified by +the +.Li count +qualifier. +.It Li os +Configure the PMC to count events happening at privilege level 0. +.It Li unitmask= Ns Ar mask +This qualifier is used to further qualify a select few events, +.Li k7-dc-refills-from-l2 , +.Li k7-dc-refills-from-system +and +.Li k7-dc-writebacks . +Here +.Ar mask +is a string of the following characters optionally seperated by +.Li "+" +characters: +.Bl -tag -width indent -compact +.It Li m +Count operations for lines in the +.Dq Modified +state. +.It Li o +Count operations for lines in the +.Dq Owner +state. +.It Li e +Count operations for lines in the +.Dq Exclusive +state. +.It Li s +Count operations for lines in the +.Dq Shared +state. +.It Li i +Count operations for lines in the +.Dq Invalid +state. +.El +If no +.Ar unitmask +qualifier is specified, the default is to count events for caches +lines in any of the above states. +.It Li usr +Configure the PMC to count events occurring at privilege levels 1, 2 +or 3. +.El +If neither of the +.Li os +or +.Li usr +qualifiers were specified, the default is to enable both. +.Pp +The event specifiers support on AMD K7 PMCs are: +.Bl -tag -width indent +.It Li k7-dc-accesses +Count data cache accesses. +.It Li k7-dc-misses +Count data cache misses. +.It Li k7-dc-refills-from-l2 Op Li ,unitmask= Ns Ar mask +Count data cache refills from L2 cache. +This event may be further qualified using the +.Li unitmask +qualifier. +.It Li k7-dc-refills-from-system Op Li ,unitmask= Ns Ar mask +Count data cache refills from system memory. +This event may be further qualified using the +.Li unitmask +qualifier. +.It Li k7-dc-writebacks Op Li ,unitmask= Ns Ar mask +Count data cache writebacks. +This event may be further qualified using the +.Li unitmask +qualifier. +.It Li k7-l1-dtlb-miss-and-l2-dtlb-hits +Count L1 DTLB misses and L2 DTLB hits. +.It Li k7-l1-and-l2-dtlb-misses +Count L1 and L2 DTLB misses. +.It Li k7-misaligned-references +Count misaligned data references. +.It Li k7-ic-fetches +Count instruction cache fetches. +.It Li k7-ic-misses +Count instruction cache misses. +.It Li k7-l1-itlb-misses +Count L1 ITLB misses that are L2 ITLB hits. +.It Li k7-l1-l2-itlb-misses +Count L1 (and L2) ITLB misses. +.It Li k7-retired-instructions +Count all retired instructions. +.It Li k7-retired-ops +Count retired ops. +.It Li k7-retired-branches +Count all retired branches (conditional, unconditional, exceptions +and interrupts). +.It Li k7-retired-branches-mispredicted +Count all misprediced retired branches. +.It Li k7-retired-taken-branches +Count retired taken branches. +.It Li k7-retired-taken-branches-mispredicted +Count mispredicted taken branches that were retired. +.It Li k7-retired-far-control-transfers +Count retired far control transfers. +.It Li k7-retired-resync-branches +Count retired resync branches (non control transfer branches). +.It Li k7-interrupts-masked-cycles +Count the number of cycles when the processor's +.Li IF +flag was zero. +.It Li k7-interrupts-masked-while-pending-cycles +Count the number of cycles interrupts were masked while pending due +to the processor's +.Li IF +flag being zero. +.It Li k7-hardware-interrupts +Count the number of taken hardware interrupts. +.El +.Ss AMD (K8) PMCs +These PMCs are present in the +.Tn "AMD Athlon64" +and +.Tn "AMD Opteron" +series of CPUs. +They are documented in: +.Rs +.%B "BIOS and Kernel Developer's Guide for the AMD Athlon(tm) 64 and AMD Opteron Processors" +.%N "Publication No. 26094" +.%D "April 2004" +.%Q "Advanced Micronic Devices, Inc." +.Re +.Pp +Event specifiers for AMD K8 PMCs can have the following optional +qualifiers: +.Bl -tag -width indent +.It Li count= Ns Ar value +Configure the counter to increment only if the number of configured +events measured in a cycle is greater than or equal to +.Ar value . +.It Li edge +Configure the counter to only count negated-to-asserted transitions +of the conditions expressed by the other fields. +In other words, the counter will increment only once whenever a given +condition becomes true, irrespective of the number of clocks during +which the condition remains true. +.It Li inv +Invert the sense of comparision when the +.Li count +qualifier is present, making the counter to increment when the +number of events per cycle is less than the value specified by +the +.Li count +qualifier. +.It Li mask= Ns Ar qualifier +Many event specifiers for AMD K8 PMCs need to be additionally +qualified using a mask qualifier. +These additional qualifiers are event-specific and are documented +along with their associated event specifiers below. +.It Li os +Configure the PMC to count events happening at privilege level 0. +.It Li usr +Configure the PMC to count events occurring at privilege levels 1, 2 +or 3. +.El +If neither of the +.Li os +or +.Li usr +qualifiers were specified, the default is to enable both. +.Pp +The event specifiers support on AMD K8 PMCs are: +.Bl -tag -width indent +.It Li k8-bu-cpu-clk-unhalted +Count the number of clock cycles when the CPU is not in the HLT or +STPCLK states. +.It Li k8-bu-fill-request-l2-miss Op Li ,mask= Ns Ar qualifier +Count fill requests that missed in the L2 cache. +This event may be further qualified using +.Ar qualifier , +which is a +.Li + Ns - Ns +separated set of the following keywords: +.Bl -tag -width "XXXXXXXXXX" -compact +.It Li dc-fill +Count data cache fill requests. +.It Li ic-fill +Count instruction cache fill requests. +.It Li tlb-reload +Count TLB reloads. +.El +The default is to count all types of requests. +.It Li k8-bu-internal-l2-request Op Li ,mask= Ns Ar qualifier +Count internally generated requests to the L2 cache. +This event may be further qualified using +.Ar qualifier , +which is a +.Li "+" Ns - Ns +separated set of the following keywords: +.Bl -tag -width "XXXXXXXXXX" -compact +.It Li cancelled +Count cancelled requests. +.It Li dc-fill +Count data cache fill requests. +.It Li ic-fill +Count instruction cache fill requests. +.It Li tag-snoop +Count tag snoop requests. +.It Li tlb-reload +Count TLB reloads. +.El +The default is to count all types of requests. +.It Li k8-dc-access +Count data cache accesses including microcode scratchpad accesses. +.It Li k8-dc-copyback Op Li ,mask= Ns Ar qualifier +Count data cache copyback operations. +This event may be further qualified using +.Ar qualifier , +which is a +.Li "+" Ns - Ns +separated set of the following keywords: +.Bl -tag -width "exclusive" -compact +.It Li exclusive +Count operations for lines in the +.Dq exclusive +state. +.It Li invalid +Count operations for lines in the +.Dq invalid +state. +.It Li modified +Count operations for lines in the +.Dq modified +state. +.It Li owner +Count operations for lines in the +.Dq owner +state. +.It Li shared +Count operations for lines in the +.Dq shared +state. +.El +The default is to count operations for lines in all the +above states. +.It Li k8-dc-dcache-accesses-by-locks Op Li ,mask= Ns Ar qualifier +Count data cache accesses by lock instructions. +This event is only available on processors of revision C or later +vintage. +This event may be further qualified using +.Ar qualifier , +which is a +.Li "+" Ns - Ns +separated set of the following keywords: +.Bl -tag -width "exclusive" -compact +.It Li accesses +Count data cache accesses by lock instructions. +.It Li misses +Count data cache misses by lock instructions. +.El +The default is to count all accesses. +.It Li k8-dc-dispatched-prefetch-instructions Op Li ,mask= Ns Ar qualifier +Count the number of dispatched prefetch instructions. +This event may be further qualified using +.Ar qualifier , +which is a +.Li "+" Ns - Ns +separated set of the following keywords: +.Bl -tag -width "exclusive" -compact +.It Li load +Count load operations. +.It Li nta +Count non-temporal operations. +.It Li store +Count store operations. +.El +The default is to count all operations. +.It Li k8-dc-l1-dtlb-miss-and-l2-dtlb-hit +Count L1 DTLB misses that are L2 DTLB hits. +.It Li k8-dc-l1-dtlb-miss-and-l2-dtlb-miss +Count L1 DTLB misses that are also misses in the L2 DTLB. +.It Li k8-dc-microarchitectural-early-cancel-of-an-access +Count microarchitectural early cancels of data cache accesses. +.It Li k8-dc-microarchitectural-late-cancel-of-an-access +Count microarchitectural late cancels of data cache accesses. +.It Li k8-dc-misaligned-data-reference +Count misaligned data references. +.It Li k8-dc-miss +Count data cache misses. +.It Li k8-dc-one-bit-ecc-error Op Li ,mask= Ns Ar qualifier +Count one bit ECC errors found by the scrubber. +This event may be further qualified using +.Ar qualifier , +which is a +.Li "+" Ns - Ns +separated set of the following keywords: +.Bl -tag -width "piggyback" -compact +.It Li scrubber +Count scrubber detected errors. +.It Li piggyback +Count piggyback scrubber errors. +.El +The default is to count both kinds of errors. +.It Li k8-dc-refill-from-l2 Op Li ,mask= Ns Ar qualifier +Count data cache refills from L2 cache. +This event may be further qualified using +.Ar qualifier , +which is a +.Li "+" Ns - Ns +separated set of the following keywords: +.Bl -tag -width "exclusive" -compact +.It Li exclusive +Count operations for lines in the +.Dq exclusive +state. +.It Li invalid +Count operations for lines in the +.Dq invalid +state. +.It Li modified +Count operations for lines in the +.Dq modified +state. +.It Li owner +Count operations for lines in the +.Dq owner +state. +.It Li shared +Count operations for lines in the +.Dq shared +state. +.El +The default is to count operations for lines in all the +above states. +.It Li k8-dc-refill-from-system Op Li ,mask= Ns Ar qualifier +Count data cache refills from system memory. +This event may be further qualified using +.Ar qualifier , +which is a +.Li "+" Ns - Ns +separated set of the following keywords: +.Bl -tag -width "exclusive" -compact +.It Li exclusive +Count operations for lines in the +.Dq exclusive +state. +.It Li invalid +Count operations for lines in the +.Dq invalid +state. +.It Li modified +Count operations for lines in the +.Dq modified +state. +.It Li owner +Count operations for lines in the +.Dq owner +state. +.It Li shared +Count operations for lines in the +.Dq shared +state. +.El +The default is to count operations for lines in all the +above states. +.It Li k8-fp-dispatched-fpu-ops Op Li ,mask= Ns Ar qualifier +Count the number of dispatched FPU ops. +This event is supported in revision B and later CPUs. +This event may be further qualified using +.Ar qualifier , +which is a +.Li "+" Ns - Ns +separated set of the following keywords: +.Bl -tag -width "XXXXXXXXXX" -compact +.It Li add-pipe-excluding-junk-ops +Count add pipe ops excluding junk ops. +.It Li add-pipe-junk-ops +Count junk ops in the add pipe. +.It Li multiply-pipe-excluding-junk-ops +Count multiply pipe ops excluding junk ops. +.It Li multiply-pipe-junk-ops +Count junk ops in the multiply pipe. +.It Li store-pipe-excluding-junk-ops +Count store pipe ops excluding junk ops +.It Li store-pipe-junk-ops +Count junk ops in the store pipe. +.El +The default is to count all types of ops. +.It Li k8-fp-cycles-with-no-fpu-ops-retired +Count cycles when no FPU ops were retired. +This event is supported in revision B and later CPUs. +.It Li k8-fp-dispatched-fpu-fast-flag-ops +Count dispatched FPU ops that use the fast flag interface. +This event is supported in revision B and later CPUs. +.It Li k8-fr-decoder-empty +Count cycles when there was nothing to dispatch (i.e., the decoder +was empty). +.It Li k8-fr-dispatch-stalls +Count all dispatch stalls. +.It Li k8-fr-dispatch-stall-for-segment-load +Count dispatch stalls for segment loads. +.It Li k8-fr-dispatch-stall-for-serialization +Count dispatch stalls for serialization. +.It Li k8-fr-dispatch-stall-from-branch-abort-to-retire +Count dispatch stalls from branch abort to retiral. +.It Li k8-fr-dispatch-stall-when-fpu-is-full +Count dispatch stalls when the FPU is full. +.It Li k8-fr-dispatch-stall-when-ls-is-full +Count dispatch stalls when the load/store unit is full. +.It Li k8-fr-dispatch-stall-when-reorder-buffer-is-full +Count dispatch stalls when the reorder buffer is full. +.It Li k8-fr-dispatch-stall-when-reservation-stations-are-full +Count dispatch stalls when reservation stations are full. +.It Li k8-fr-dispatch-stall-when-waiting-for-all-to-be-quiet +Count dispatch stalls when waiting for all to be quiet. +.\" XXX What does "waiting for all to be quiet" mean? +.It Li k8-fr-dispatch-stall-when-waiting-far-xfer-or-resync-branch-pending +Count dispatch stalls when a far control transfer or a resync branch +is pending. +.It Li k8-fr-fpu-exceptions Op Li ,mask= Ns Ar qualifier +Count FPU exceptions. +This event is supported in revision B and later CPUs. +This event may be further qualified using +.Ar qualifier , +which is a +.Li "+" Ns - Ns +separated set of the following keywords: +.Bl -tag -width "XXXXXXXXXX" -compact +.It Li sse-and-x87-microtraps +Count SSE and x87 microtraps. +.It Li sse-reclass-microfaults +Count SSE reclass microfaults +.It Li sse-retype-microfaults +Count SSE retype microfaults +.It Li x87-reclass-microfaults +Count x87 reclass microfaults. +.El +The default is to count all types of exceptions. +.It Li k8-fr-interrupts-masked-cycles +Count cycles when interrupts were masked (by CPU RFLAGS field IF was zero). +.It Li k8-fr-interrupts-masked-while-pending-cycles +Count cycles while interrupts were masked while pending (i.e., cycles +when INTR was asserted while CPU RFLAGS field IF was zero). +.It Li k8-fr-number-of-breakpoints-for-dr0 +Count the number of breakpoints for DR0. +.It Li k8-fr-number-of-breakpoints-for-dr1 +Count the number of breakpoints for DR1. +.It Li k8-fr-number-of-breakpoints-for-dr2 +Count the number of breakpoints for DR2. +.It Li k8-fr-number-of-breakpoints-for-dr3 +Count the number of breakpoints for DR3. +.It Li k8-fr-retired-branches +Count retired branches including exceptions and interrupts. +.It Li k8-fr-retired-branches-mispredicted +Count mispredicted retired branches. +.It Li k8-fr-retired-far-control-transfers +Count retired far control transfers (which are always mispredicted). +.It Li k8-fr-retired-fastpath-double-op-instructions Op Li ,mask= Ns Ar qualifier +Count retired fastpath double op instructions. +This event is supported in revision B and later CPUs. +This event may be further qualified using +.Ar qualifier , +which is a +.Li "+" Ns - Ns +separated set of the following keywords: +.Bl -tag -width "XXXXXXXXXXXX" -compact +.It Li low-op-pos-0 +Count instructions with the low op in position 0. +.It Li low-op-pos-1 +Count instructions with the low op in position 1. +.It Li low-op-pos-2 +Count instructions with the low op in position 2. +.El +The default is to count all types of instructions. +.It Li k8-fr-retired-fpu-instructions Op Li ,mask= Ns Ar qualifier +Count retired FPU instructions. +This event is supported in revision B and later CPUs. +This event may be further qualified using +.Ar qualifier , +which is a +.Li "+" Ns - Ns +separated set of the following keywords: +.Bl -tag -width "XXXXXXXXXX" -compact +.It Li mmx-3dnow +Count MMX and 3DNow! instructions. +.It Li packed-sse-sse2 +Count packed SSE and SSE2 instructions. +.It Li scalar-sse-sse2 +Count scalar SSE and SSE2 instructions +.It Li x87 +Count x87 instructions. +.El +The default is to count all types of instructions. +.It Li k8-fr-retired-near-returns +Count retired near returns. +.It Li k8-fr-retired-near-returns-mispredicted +Count mispredicted near returns. +.It Li k8-fr-retired-resyncs +Count retired resyncs (non-control transfer branches). +.It Li k8-fr-retired-taken-hardware-interrupts +Count retired taken hardware interrupts. +.It Li k8-fr-retired-taken-branches +Count retired taken branches. +.It Li k8-fr-retired-taken-branches-mispredicted +Count retired taken branches that were mispredicted. +.It Li k8-fr-retired-taken-branches-mispredicted-by-addr-miscompare +Count retired taken branches that were mispredicted only due to an +address miscompare. +.It Li k8-fr-retired-uops +Count retired uops. +.It Li k8-fr-retired-x86-instructions +Count retired x86 instructions including exceptions and interrupts. +.It Li k8-ic-fetch +Count instruction cache fetches. +.It Li k8-ic-instruction-fetch-stall +Count cycles in stalls due to instruction fetch. +.It Li k8-ic-l1-itlb-miss-and-l2-itlb-hit +Count L1 ITLB misses that are L2 ITLB hits. +.It Li k8-ic-l1-itlb-miss-and-l2-itlb-miss +Count ITLB misses that miss in both L1 and L2 ITLBs. +.It Li k8-ic-microarchitectural-resync-by-snoop +Count microarchitectural resyncs caused by snoops. +.It Li k8-ic-miss +Count instruction cache misses. +.It Li k8-ic-refill-from-l2 +Count instruction cache refills from L2 cache. +.It Li k8-ic-refill-from-system +Count instruction cache refills from system memory. +.It Li k8-ic-return-stack-hits +Count hits to the return stack. +.It Li k8-ic-return-stack-overflow +Count overflows of the return stack. +.It Li k8-ls-buffer2-full +Count load/store buffer2 full events. +.It Li k8-ls-locked-operation Op Li ,mask= Ns Ar qualifier +Count locked operations. +For revision C and later CPUs, the following qualifiers are supported: +.Bl -tag -width "XXXXXXXXXXXXX" -compact +.It Li cycles-in-request +Count the number of cycles in the lock request/grant stage. +.It Li cycles-to-complete +Count the number of cycles a lock takes to complete once it is +non-speculative and is the older load/store operation. +.It Li locked-instructions +Count the number of lock instructions executed. +.El +The default is to count the number of lock instructions executed. +.It Li k8-ls-microarchitectural-late-cancel +Count microarchitectural late cancels of operations in the load/store +unit. +.It Li k8-ls-microarchitectural-resync-by-self-modifying-code +Count microarchitectural resyncs caused by self-modifying code. +.It Li k8-ls-microarchitectural-resync-by-snoop +Count microarchitectural resyncs caused by snoops. +.It Li k8-ls-retired-cflush-instructions +Count retired CFLUSH instructions. +.It Li k8-ls-retired-cpuid-instructions +Count retired CPUID instructions. +.It Li k8-ls-segment-register-load Op Li ,mask= Ns Ar qualifier +Count segment register loads. +This event may be further qualified using +.Ar qualifier , +which is a +.Li "+" Ns - Ns +separated set of the following keywords: +.Bl -tag -width "XX" -compact +.It Li cs +Count CS register loads. +.It Li ds +Count DS register loads. +.It Li es +Count ES register loads. +.It Li fs +Count FS register loads. +.It Li gs +Count GS register loads. +.\" .It Ic hs +.\" Count HS register loads. +.\" XXX "HS" register? +.It Li ss +Count SS register loads. +.El +The default is to count all types of loads. +.It Li k8-nb-memory-controller-bypass-saturation Op Li ,mask= Ns Ar qualifier +Count memory controller bypass counter saturation events. +This event may be further qualified using +.Ar qualifier , +which is a +.Li "+" Ns - Ns +separated set of the following keywords: +.Bl -tag -width "XXXXXXXXXX" -compact +.It Li dram-controller-interface-bypass +Count DRAM controller interface bypass. +.It Li dram-controller-queue-bypass +Count DRAM controller queue bypass. +.It Li memory-controller-hi-pri-bypass +Count memory controller high priority bypasses. +.It Li memory-controller-lo-pri-bypass +Count memory controller low priority bypasses. +.El +.It Li k8-nb-memory-controller-dram-slots-missed +Count memory controller DRAM command slots missed (in MemClks). +.It Li k8-nb-memory-controller-page-access-event Op Li ,mask= Ns Ar qualifier +Count memory controller page access events. +This event may be further qualified using +.Ar qualifier , +which is a +.Li "+" Ns - Ns +separated set of the following keywords: +.Bl -tag -width "XXXXXXXXXX" -compact +.It Li page-conflict +Count page conflicts. +.It Li page-hit +Count page hits. +.It Li page-miss +Count page misses. +.El +The default is to count all types of events. +.It Li k8-nb-memory-controller-page-table-overflow +Count memory control page table overflow events. +.It Li k8-nb-probe-result Op Li ,mask= Ns Ar qualifier +Count probe events. +This event may be further qualified using +.Ar qualifier , +which is a +.Li "+" Ns - Ns +separated set of the following keywords: +.Bl -tag -width "exclusive" -compact +.It Li probe-hit +Count all probe hits. +.It Li probe-hit-dirty-no-memory-cancel +Count probe hits without memory cancels. +.It Li probe-hit-dirty-with-memory-cancel +Count probe hits with memory cancels. +.It Li probe-miss +Count probe misses. +.El +.It Li k8-nb-sized-commands Op Li ,mask= Ns Ar qualifier +Count sized commands issued. +This event may be further qualified using +.Ar qualifier , +which is a +.Li "+" Ns - Ns +separated set of the following keywords: +.Bl -tag -width "exclusive" -compact +.It Li nonpostwrszbyte +.It Li nonpostwrszdword +.It Li postwrszbyte +.It Li postwrszdword +.It Li rdszbyte +.It Li rdszdword +.It Li rdmodwr +.El +The default is to count all types of commands. +.It Li k8-nb-memory-controller-turnaround Op Li ,mask= Ns Ar qualifier +Count memory control turnaround events. +This event may be further qualified using +.Ar qualifier , +which is a +.Li "+" Ns - Ns +separated set of the following keywords: +.Bl -tag -width "XXXXXXXXXX" -compact +.\" XXX doc is unclear whether these are cycle counts or event counts +.It Li dimm-turnaround +Count DIMM turnarounds. +.It Li read-to-write-turnaround +Count read to write turnarounds. +.It Li write-to-read-turnaround +Count write to read turnarounds. +.El +The default is to count all types of events. +.It Li k8-nb-ht-bus0-bandwidth Op Li ,mask= Ns Ar qualifier +.It Li k8-nb-ht-bus1-bandwidth Op Li ,mask= Ns Ar qualifier +.It Li k8-nb-ht-bus2-bandwidth Op Li ,mask= Ns Ar qualifier +Count events on the HyperTransport(tm) buses. +These events may be further qualified using +.Ar qualifier , +which is a +.Li "+" Ns - Ns +separated set of the following keywords: +.Bl -tag -width "XXXXXXXXXX" -compact +.It Li buffer-release +Count buffer release messages sent. +.It Li command +Count command messages sent. +.It Li data +Count data messages sent. +.It Li nop +Count nop messages sent. +.El +The default is to count all types of messages. +.El +.Ss Intel P6 PMCS +Intel P6 PMCs are present in Intel +.Tn "Pentium Pro" , +.Tn "Pentium II" , +.Tn "Celeron" , +.Tn "Pentium III" +and +.Tn "Pentium M" +processors. +.Pp +These CPUs have two counters. +Some events may only be used on specific counters and some events are +defined only on specific processor models. +.Pp +These PMCs are documented in +.Rs +.%B "IA-32 Intel(R) Architecture Software Developer's Manual" +.%T "Volume 3: System Programming Guide" +.%N "Order Number 245472-012" +.%D 2003 +.%Q "Intel Corporation" +.Re +.Pp +Event specifiers for Intel P6 PMCs can have the following common +qualifiers: +.Bl -tag -width indent +.It Li cmask= Ns Ar value +Configure the PMC to increment only if the number of configured +events measured in a cycle is greater than or equal to +.Ar value . +.It Li edge +Configure the PMC to count the number of deasserted to asserted +transitions of the conditions expressed by the other qualifiers. +If specified, the counter will increment only once whenever a +condition becomes true, irrespective of the number of clocks during +which the condition remains true. +.It Li inv +Invert the sense of comparision when the +.Ar cmask +qualifier is present, making the counter increment when the number of +events per cycle is less than the value specified by the +.Ar cmask +qualifier. +.It Li os +Configure the PMC to count events happening at processor privilege +level 0. +.It Li umask= Ns Ar value +This qualifier is used to further qualify the event selected (see +below). +.It Li usr +Configure the PMC to count events occurring at privilege levels 1, 2 +or 3. +.El +If neither of the +.Li os +or +.Li usr +qualifiers are specified, the default is to enable both. +.Pp +The event specifiers supported by Intel P6 PMCs are: +.Bl -tag -width indent +.It Li p6-baclears +Count the number of times a static branch prediction was made by the +branch decoder because the BTB did not have a prediction. +.It Li p6-br-bac-missp-exec +.Pq Tn "Pentium M" +Count the number of branch instructions executed that where +mispredicted at the Front End (BAC). +.It Li p6-br-bogus +Count the number of bogus branches. +.It Li p6-br-call-exec +.Pq Tn "Pentium M" +Count the number of call instructions executed. +.It Li p6-br-call-missp-exec +.Pq Tn "Pentium M" +Count the number of call instructions executed that were mispredicted. +.It Li p6-br-cnd-exec +.Pq Tn "Pentium M" +Count the number of conditional branch instructions executed. +.It Li p6-br-cnd-missp-exec +.Pq Tn "Pentium M" +Count the number of conditional branch instructions executed that were +mispredicted. +.It Li p6-br-ind-call-exec +.Pq Tn "Pentium M" +Count the number of indirect call instructions executed. +.It Li p6-br-ind-exec +.Pq Tn "Pentium M" +Count the number of indirect branch instructions executed. +.It Li p6-br-ind-missp-exec +.Pq Tn "Pentium M" +Count the number of indirect branch instructions executed that were +mispredicted. +.It Li p6-br-inst-decoded +Count the number of branch instructions decoded. +.It Li p6-br-inst-exec +.Pq Tn "Pentium M" +Count the number of branch instructions executed but necessarily retired. +.It Li p6-br-inst-retired +Count the number of branch instructions retired. +.It Li p6-br-miss-pred-retired +Count the number of mispredicted branch instructions retired. +.It Li p6-br-miss-pred-taken-ret +Count the number of taken mispredicted branches retired. +.It Li p6-br-missp-exec +.Pq Tn "Pentium M" +Count the number of branch instructions executed that were +mispredicted at execution. +.It Li p6-br-ret-bac-missp-exec +.Pq Tn "Pentium M" +Count the number of return instructions executed that were +mispredicted at the Front End (BAC). +.It Li p6-br-ret-exec +.Pq Tn "Pentium M" +Count the number of return instructions executed. +.It Li p6-br-ret-missp-exec +.Pq Tn "Pentium M" +Count the number of return instructions executed that were +mispredicted at execution. +.It Li p6-br-taken-retired +Count the number of taken branches retired. +.It Li p6-btb-misses +Count the number of branches for which the BTB did not produce a +prediction. +.It Li p6-bus-bnr-drv +Count the number of bus clock cycles during which this processor is +driving the BNR# pin. +.It Li p6-bus-data-rcv +Count the number of bus clock cycles during which this processor is +receiving data. +.It Li p6-bus-drdy-clocks Op Li ,umask= Ns Ar qualifier +Count the number of clocks during which DRDY# is asserted. +An additional qualifier may be specified, and comprises one of the +following keywords: +.Bl -tag -width indent -compact +.It Li any +Count transactions generated by any agent on the bus. +.It Li self +Count transactions generated by this processor. +.El +The default is to count operations generated by this processor. +.It Li p6-bus-hit-drv +Count the number of bus clock cycles during which this processor is +driving the HIT# pin. +.It Li p6-bus-hitm-drv +Count the number of bus clock cycles during which this processor is +driving the HITM# pin. +.It Li p6-bus-lock-clocks Op Li ,umask= Ns Ar qualifier +Count the number of clocks during with LOCK# is asserted on the +external system bus. +An additional qualifier may be specified and comprises one of the following +keywords: +.Bl -tag -width indent -compact +.It Li any +Count transactions generated by any agent on the bus. +.It Li self +Count transactions generated by this processor. +.El +The default is to count operations generated by this processor. +.It Li p6-bus-req-outstanding +Count the number of bus requests outstanding in any given cycle. +.It Li p6-bus-snoop-stall +Count the number of clock cycles during which the bus is snoop stalled. +.It Li p6-bus-tran-any Op Li ,umask= Ns Ar qualifier +Count the number of completed bus transactions of any kind. +An additional qualifier may be specified and comprises one of the following +keywords: +.Bl -tag -width indent -compact +.It Li any +Count transactions generated by any agent on the bus. +.It Li self +Count transactions generated by this processor. +.El +The default is to count operations generated by this processor. +.It Li p6-bus-tran-brd Op Li ,umask= Ns Ar qualifier +Count the number of burst read transactions. +An additional qualifier may be specified and comprises one of the following +keywords: +.Bl -tag -width indent -compact +.It Li any +Count transactions generated by any agent on the bus. +.It Li self +Count transactions generated by this processor. +.El +The default is to count operations generated by this processor. +.It Li p6-bus-tran-burst Op Li ,umask= Ns Ar qualifier +Count the number of completed burst transactions. +An additional qualifier may be specified and comprises one of the following +keywords: +.Bl -tag -width indent -compact +.It Li any +Count transactions generated by any agent on the bus. +.It Li self +Count transactions generated by this processor. +.El +The default is to count operations generated by this processor. +.It Li p6-bus-tran-def Op Li ,umask= Ns Ar qualifier +Count the number of completed deferred transactions. +An additional qualifier may be specified and comprises one of the following +keywords: +.Bl -tag -width indent -compact +.It Li any +Count transactions generated by any agent on the bus. +.It Li self +Count transactions generated by this processor. +.El +The default is to count operations generated by this processor. +.It Li p6-bus-tran-ifetch Op Li ,umask= Ns Ar qualifier +Count the number of completed instruction fetch transactions. +An additional qualifier may be specified and comprises one of the following +keywords: +.Bl -tag -width indent -compact +.It Li any +Count transactions generated by any agent on the bus. +.It Li self +Count transactions generated by this processor. +.El +The default is to count operations generated by this processor. +.It Li p6-bus-tran-inval Op Li ,umask= Ns Ar qualifier +Count the number of completed invalidate transactions. +An additional qualifier may be specified and comprises one of the following +keywords: +.Bl -tag -width indent -compact +.It Li any +Count transactions generated by any agent on the bus. +.It Li self +Count transactions generated by this processor. +.El +The default is to count operations generated by this processor. +.It Li p6-bus-tran-mem Op Li ,umask= Ns Ar qualifier +Count the number of completed memory transactions. +An additional qualifier may be specified and comprises one of the following +keywords: +.Bl -tag -width indent -compact +.It Li any +Count transactions generated by any agent on the bus. +.It Li self +Count transactions generated by this processor. +.El +The default is to count operations generated by this processor. +.It Li p6-bus-tran-pwr Op Li ,umask= Ns Ar qualifier +Count the number of completed partial write transactions. +An additional qualifier may be specified and comprises one of the following +keywords: +.Bl -tag -width indent -compact +.It Li any +Count transactions generated by any agent on the bus. +.It Li self +Count transactions generated by this processor. +.El +The default is to count operations generated by this processor. +.It Li p6-bus-tran-rfo Op Li ,umask= Ns Ar qualifier +Count the number of completed read-for-ownership transactions. +An additional qualifier may be specified and comprises one of the following +keywords: +.Bl -tag -width indent -compact +.It Li any +Count transactions generated by any agent on the bus. +.It Li self +Count transactions generated by this processor. +.El +The default is to count operations generated by this processor. +.It Li p6-bus-trans-io Op Li ,umask= Ns Ar qualifier +Count the number of completed I/O transactions. +An additional qualifier may be specified and comprises one of the following +keywords: +.Bl -tag -width indent -compact +.It Li any +Count transactions generated by any agent on the bus. +.It Li self +Count transactions generated by this processor. +.El +The default is to count operations generated by this processor. +.It Li p6-bus-trans-p Op Li ,umask= Ns Ar qualifier +Count the number of completed partial transactions. +An additional qualifier may be specified and comprises one of the following +keywords: +.Bl -tag -width indent -compact +.It Li any +Count transactions generated by any agent on the bus. +.It Li self +Count transactions generated by this processor. +.El +The default is to count operations generated by this processor. +.It Li p6-bus-trans-wb Op Li ,umask= Ns Ar qualifier +Count the number of completed write-back transactions. +An additional qualifier may be specified and comprises one of the following +keywords: +.Bl -tag -width indent -compact +.It Li any +Count transactions generated by any agent on the bus. +.It Li self +Count transactions generated by this processor. +.El +The default is to count operations generated by this processor. +.It Li p6-cpu-clk-unhalted +Count the number of cycles during with the processor was not halted. +.Pp +.Pq Tn "Pentium M" +Count the number of cycles during with the processor was not halted +and not in a thermal trip. +.It Li p6-cycles-div-busy +Count the number of cycles during which the divider is busy and cannot +accept new divides. +This event is only allocated on counter 0. +.It Li p6-cycles-in-pending-and-masked +Count the number of processor cycles for which interrupts were +disabled and interrupts were pending. +.It Li p6-cycles-int-masked +Count the number of processor cycles for which interrupts were +disabled. +.It Li p6-data-mem-refs +Count all loads and all stores using any memory type, including +internal retries. +Each part of a split store is counted seperately. +.It Li p6-dcu-lines-in +Count the total lines allocated in the data cache unit. +.It Li p6-dcu-m-lines-in +Count the number of M state lines allocated in the data cache unit. +.It Li p6-dcu-m-lines-out +Count the number of M state lines evicted from the data cache unit. +.It Li p6-dcu-miss-outstanding +Count the weighted number of cycles while a data cache unit miss is +outstanding, incremented by the number of outstanding cache misses at +any time. +.It Li p6-div +Count the number of floating point multiplies. +This event is only allocated on counter 1. +.It Li p6-emon-esp-uops +.Pq Tn "Pentium M" +Count the total number of micro-ops. +.It Li p6-emon-est-trans Op Li ,umask= Ns Ar qualifier +.Pq Tn "Pentium M" +Count the number of +.Tn "Enhanced Intel SpeedStep" +transitions. +An additional qualifier may be specified, and can be one of the +following keywords: +.Bl -tag -width indent -compact +.It Li all +Count all transitions. +.It Li freq +Count only frequency transitions. +.El +The default is to count all transitions. +.It Li p6-emon-fused-uops-ret Op Li ,umask= Ns Ar qualifier +.Pq Tn "Pentium M" +Count the number of retired fused micro-ops. +An additional qualifier may be specified, and may be one of the +following keywords: +.Bl -tag -width indent -compact +.It Li all +Count all fused micro-ops. +.It Li loadop +Count only load and op micro-ops. +.It Li stdsta +Count only STD/STA micro-ops. +.El +The default is to count all fused micro-ops. +.It Li p6-emon-kni-comp-inst-ret +.Pq Tn "Pentium III" +Count the number of SSE computational instructions retired. +An additional qualifier may be specified, and comprises one of the +following keywords: +.Bl -tag -width indent -compact +.It Li packed-and-scalar +Count packed and scalar operations. +.It Li scalar +Count scalar operations only. +.El +The default is to count packed and scalar operations. +.It Li p6-emon-kni-inst-retired Op Li ,umask= Ns Ar qualifier +.Pq Tn "Pentium III" +Count the number of SSE instructions retired. +An additional qualifier may be specified, and comprises one of the +following keywords: +.Bl -tag -width indent -compact +.It Li packed-and-scalar +Count packed and scalar operations. +.It Li scalar +Count scalar operations only. +.El +The default is to count packed and scalar operations. +.It Li p6-emon-kni-pref-dispatched Op Li ,umask= Ns Ar qualifier +.Pq Tn "Pentium III" +Count the number of SSE prefetch or weakly ordered instructions +dispatched (including speculative prefetches). +An additional qualifier may be specified, and comprises one of the +following keywords: +.Bl -tag -width indent -compact +.It Li nta +Count non-temporal prefetches. +.It Li t1 +Count prefetches to L1. +.It Li t2 +Count prefetches to L2. +.It Li wos +Count weakly ordered stores. +.El +The default is to count non-temporal prefetches. +.It Li p6-emon-kni-pref-miss Op Li ,umask= Ns Ar qualifier +.Pq Tn "Pentium III" +Count the number of prefetch or weakly ordered instructions that miss +all caches. +An additional qualifier may be specified, and comprises one of the +following keywords: +.Bl -tag -width indent -compact +.It Li nta +Count non-temporal prefetches. +.It Li t1 +Count prefetches to L1. +.It Li t2 +Count prefetches to L2. +.It Li wos +Count weakly ordered stores. +.El +The default is to count non-temporal prefetches. +.It Li p6-emon-pref-rqsts-dn +.Pq Tn "Pentium M" +Count the number of downward prefetches issued. +.It Li p6-emon-pref-rqsts-up +.Pq Tn "Pentium M" +Count the number of upward prefetches issued. +.It Li p6-emon-simd-instr-retired +.Pq Tn "Pentium M" +Count the number of retired +.Tn MMX +instructions. +.It Li p6-emon-sse-sse2-comp-inst-retired Op Li ,umask= Ns Ar qualifier +.Pq Tn "Pentium M" +Count the number of computational SSE instructions retired. +An additional qualifier may be specified and can be one of the +following keywords: +.Bl -tag -width indent -compact +.It Li sse-packed-single +Count SSE packed-single instructions. +.It Li sse-scalar-single +Count SSE scalar-single instructions. +.It Li sse2-packed-double +Count SSE2 packed-double instructions. +.It Li sse2-scalar-double +Count SSE2 scalar-double instructions. +.El +The default is to count SSE packed-single instructions. +.It Li p6-emon-sse-sse2-inst-retired Op Li ,umask= Ns Ar qualifer +.Pp +.Pq Tn "Pentium M" +Count the number of SSE instructions retired. +An additional qualifier can be specified, and can be one of the +following keywords: +.Bl -tag -width indent -compact +.It Li sse-packed-single +Count SSE packed-single instructions. +.It Li sse-packed-single-scalar-single +Count SSE packed-single and scalar-single instructions. +.It Li sse2-packed-double +Count SSE2 packed-double instructions. +.It Li sse2-scalar-double +Count SSE2 scalar-double instructions. +.El +The default is to count SSE packed-single instructions. +.It Li p6-emon-synch-uops +.Pq Tn "Pentium M" +Count the number of sync micro-ops. +.It Li p6-emon-thermal-trip +.Pq Tn "Pentium M" +Count the duration or occurrences of thermal trips. +Use the +.Ar edge +qualifier to count occurrences of thermal trips. +.It Li p6-emon-unfusion +.Pq Tn "Pentium M" +Count the number of unfusion events in the reorder buffer. +.It Li p6-flops +Count the number of computational floating point operations retired. +This event is only allocated on counter 0. +.It Li p6-fp-assist +Count the number of floating point exceptions handled by microcode. +This event is only allocated on counter 1. +.It Li p6-fp-comps-ops-exe +Count the number of computation floating point operations executed. +This event is only allocated on counter 0. +.It Li p6-fp-mmx-trans Op Li ,umask= Ns Ar qualifier +.Pq Tn "Pentium II" , Tn "Pentium III" +Count the number of transitions between MMX and floating-point +instructions. +An additional qualifier may be specified, and comprises one of the +following keywords: +.Bl -tag -width indent -compact +.It Li mmxtofp +Count transitions from MMX instructions to floating-point instructions. +.It Li fptommx +Count transitions from floating-point instructions to MMX instructions. +.El +The default is to count MMX to floating-point transitions. +.It Li p6-hw-int-rx +Count the number of hardware interrupts received. +.It Li p6-ifu-fetch +Count the number of instruction fetches, both cacheable and non-cacheable. +.It Li p6-ifu-fetch-miss +Count the number of instruction fetch misses (i.e., those that produce +memory accesses). +.It Li p6-ifu-mem-stall +Count the number of cycles instruction fetch is stalled for any reason. +.It Li p6-ild-stall +Count the number of cycles the instruction length decoder is stalled. +.It Li p6-inst-decoded +Count the number of instructions decoded. +.It Li p6-inst-retired +Count the number of instructions retired. +.It Li p6-itlb-miss +Count the number of instruction TLB misses. +.It Li p6-l2-ads +Count the number of L2 address strobes. +.It Li p6-l2-dbus-busy +Count the number of cycles during which the L2 cache data bus was busy. +.It Li p6-l2-dbus-busy-rd +Count the number of cycles during which the L2 cache data bus was busy +transferring read data from L2 to the processor. +.It Li p6-l2-ifetch Op Li ,umask= Ns Ar qualifier +Count the number of L2 instruction fetches. +An additional qualifier may be specified and comprises a list of the following +keywords separated by +.Li "+" +characters: +.Bl -tag -width indent -compact +.It Li e +Count operations affecting E (exclusive) state lines. +.It Li i +Count operations affecting I (invalid) state lines. +.It Li m +Count operations affecting M (modified) state lines. +.It Li s +Count operations affecting S (shared) state lines. +.El +The default is to count operations affecting all (MESI) state lines. +.It Li p6-l2-ld Op Li ,umask= Ns Ar qualifier +Count the number of L2 data loads. +An additional qualifier may be specified and comprises a list of the following +keywords separated by +.Li "+" +characters: +.Bl -tag -width indent -compact +.It Li both +.Pq Tn "Pentium M" +Count both hardware-prefetched lines and non-hardware-prefetched lines. +.It Li e +Count operations affecting E (exclusive) state lines. +.It Li hw +.Pq Tn "Pentium M" +Count hardware-prefetched lines only. +.It Li i +Count operations affecting I (invalid) state lines. +.It Li m +Count operations affecting M (modified) state lines. +.It Li nonhw +.Pq Tn "Pentium M" +Exclude hardware-prefetched lines. +.It Li s +Count operations affecting S (shared) state lines. +.El +The default on processors other than +.Tn "Pentium M" +processors is to count operations affecting all (MESI) state lines. +The default on +.Tn "Pentium M" +processors is to count both hardware-prefetched and +non-hardware-prefetch operations on all (MESI) state lines. +.It Li p6-l2-lines-in Op Li ,umask= Ns Ar qualifier +Count the number of L2 lines allocated. +An additional qualifier may be specified and comprises a list of the following +keywords separated by +.Li "+" +characters: +.Bl -tag -width indent -compact +.It Li both +.Pq Tn "Pentium M" +Count both hardware-prefetched lines and non-hardware-prefetched lines. +.It Li e +Count operations affecting E (exclusive) state lines. +.It Li hw +.Pq Tn "Pentium M" +Count hardware-prefetched lines only. +.It Li i +Count operations affecting I (invalid) state lines. +.It Li m +Count operations affecting M (modified) state lines. +.It Li nonhw +.Pq Tn "Pentium M" +Exclude hardware-prefetched lines. +.It Li s +Count operations affecting S (shared) state lines. +.El +The default on processors other than +.Tn "Pentium M" +processors is to count operations affecting all (MESI) state lines. +The default on +.Tn "Pentium M" +processors is to count both hardware-prefetched and +non-hardware-prefetch operations on all (MESI) state lines. +.It Li p6-l2-lines-out Op Li ,umask= Ns Ar qualifier +Count the number of L2 lines evicted. +An additional qualifier may be specified and comprises a list of the following +keywords separated by +.Li "+" +characters: +.Bl -tag -width indent -compact +.It Li both +.Pq Tn "Pentium M" +Count both hardware-prefetched lines and non-hardware-prefetched lines. +.It Li e +Count operations affecting E (exclusive) state lines. +.It Li hw +.Pq Tn "Pentium M" +Count hardware-prefetched lines only. +.It Li i +Count operations affecting I (invalid) state lines. +.It Li m +Count operations affecting M (modified) state lines. +.It Li nonhw +.Pq Tn "Pentium M" only +Exclude hardware-prefetched lines. +.It Li s +Count operations affecting S (shared) state lines. +.El +The default on processors other than +.Tn "Pentium M" +processors is to count operations affecting all (MESI) state lines. +The default on +.Tn "Pentium M" +processors is to count both hardware-prefetched and +non-hardware-prefetch operations on all (MESI) state lines. +.It Li p6-l2-m-lines-inm +Count the number of modified lines allocated in L2 cache. +.It Li p6-l2-m-lines-outm Op Li ,umask= Ns Ar qualifier +Count the number of L2 M-state lines evicted. +.Pp +.Pq Tn "Pentium M" +On these processors an additional qualifier may be specified and +comprises a list of the following keywords separated by +.Li "+" +characters: +.Bl -tag -width indent -compact +.It Li both +Count both hardware-prefetched lines and non-hardware-prefetched lines. +.It Li hw +Count hardware-prefetched lines only. +.It Li nonhw +Exclude hardware-prefetched lines. +.El +The default is to count both hardware-prefetched and +non-hardware-prefetch operations. +.It Li p6-l2-rqsts Op Li ,umask= Ns Ar qualifier +Count the total number of L2 requests. +An additional qualifier may be specified and comprises a list of the following +keywords separated by +.Li "+" +characters: +.Bl -tag -width indent -compact +.It Li e +Count operations affecting E (exclusive) state lines. +.It Li i +Count operations affecting I (invalid) state lines. +.It Li m +Count operations affecting M (modified) state lines. +.It Li s +Count operations affecting S (shared) state lines. +.El +The default is to count operations affecting all (MESI) state lines. +.It Li p6-l2-st +Count the number of L2 data stores. +An additional qualifier may be specified and comprises a list of the following +keywords separated by +.Li "+" +characters: +.Bl -tag -width indent -compact +.It Li e +Count operations affecting E (exclusive) state lines. +.It Li i +Count operations affecting I (invalid) state lines. +.It Li m +Count operations affecting M (modified) state lines. +.It Li s +Count operations affecting S (shared) state lines. +.El +The default is to count operations affecting all (MESI) state lines. +.It Li p6-ld-blocks +Count the number of load operations delayed due to store buffer blocks. +.It Li p6-misalign-mem-ref +Count the number of misaligned data memory references (crossing a 64 +bit boundary). +.It Li p6-mmx-assist +.Pq Tn "Pentium II" , Tn "Pentium III" +Count the number of MMX assists executed. +.It Li p6-mmx-instr-exec +.Pq Tn "Celeron" , Tn "Pentium II" +Count the number of MMX instructions executed, except MOVQ and MOVD +stores from register to memory. +.It Li p6-mmx-instr-ret +.Pq Tn "Pentium II" +Count the number of MMX instructions retired. +.It Li p6-mmx-instr-type-exec Op Li ,umask= Ns Ar qualifier +.Pq Tn "Pentium II" , Tn "Pentium III" +Count the number of MMX instructions executed. +An additional qualifier may be specified and comprises a list of +the following keywords separated by +.Li "+" +characters: +.Bl -tag -width indent -compact +.It Li pack +Count MMX pack operation instructions. +.It Li packed-arithmetic +Count MMX packed arithmetic instructions. +.It Li packed-logical +Count MMX packed logical instructions. +.It Li packed-multiply +Count MMX packed multiply instructions. +.It Li packed-shift +Count MMX packed shift instructions. +.It Li unpack +Count MMX unpack operation instructions. +.El +The default is to count all operations. +.It Li p6-mmx-sat-instr-exec +.Pq Tn "Pentium II" , Tn "Pentium III" +Count the number of MMX saturating instructions executed. +.It Li p6-mmx-uops-exec +.Pq Tn "Pentium II" , Tn "Pentium III" +Count the number of MMX micro-ops executed. +.It Li p6-mul +Count the number of floating point multiplies. +This event is only allocated on counter 1. +.It Li p6-partial-rat-stalls +Count the number of cycles or events for partial stalls. +.It Li p6-resource-stalls +Count the number of cycles there was a resource related stall of any kind. +.It Li p6-ret-seg-renames +.Pq Tn "Pentium II" , Tn "Pentium III" +Count the number of segment register rename events retired. +.It Li p6-sb-drains +Count the number of cycles the store buffer is draining. +.It Li p6-seg-reg-renames Op Li ,umask= Ns Ar qualifier +.Pq Tn "Pentium II" , Tn "Pentium III" +Count the number of segment register renames. +An additional qualifier may be specified, and comprises a list of the +following keywords separated by +.Li "+" +characters: +.Bl -tag -width indent -compact +.It Li ds +Count renames for segment register DS. +.It Li es +Count renames for segment register ES. +.It Li fs +Count renames for segment register FS. +.It Li gs +Count renames for segment register GS. +.El +The default is to count operations affecting all segment registers. +.It Li p6-seg-rename-stalls +.Pq Tn "Pentium II" , Tn "Pentium III" +Count the number of segment register renaming stalls. +An additional qualifier may be specified, and comprises a list of the +following keywords separated by +.Li "+" +characters: +.Bl -tag -width indent -compact +.It Li ds +Count stalls for segment register DS. +.It Li es +Count stalls for segment register ES. +.It Li fs +Count stalls for segment register FS. +.It Li gs +Count stalls for segment register GS. +.El +The default is to count operations affecting all the segment registers. +.It Li p6-segment-reg-loads +Count the number of segment register loads. +.It Li p6-uops-retired +Count the number of micro-ops retired. +.El +.Ss Intel P4 PMCS +Intel P4 PMCs are present in Intel +.Tn "Pentium 4" +and +.Tn Xeon +processors. +These PMCs are documented in +.Rs +.%B "IA-32 Intel(R) Architecture Software Developer's Manual" +.%T "Volume 3: System Programming Guide" +.%N "Order Number 245472-012" +.%D 2003 +.%Q "Intel Corporation" +.Re +Further information about using these PMCs may be found in +.Rs +.%B "IA-32 Intel(R) Architecture Optimization Guide" +.%D 2003 +.%N "Order Number 248966-009" +.%Q "Intel Corporation" +.Re +.Pp +Event specifiers for Intel P4 PMCs can have the following common +qualifiers: +.Bl -tag -width indent +.It Li active= Ns Ar choice +(On P4 HTT CPUs) Filter event counting based on which logical +processors are active. +The allowed values of +.Ar choice +are: +.Bl -tag -width indent -compact +.It Li any +Count when either logical processor is active. +.It Li both +Count when both logical processors are active. +.It Li none +Count only when neither logical processor is active. +.It Li single +Count only when one logical processor is active. +.El +The default is +.Li both . +.It Li cascade +Configure the PMC to cascade onto its partner. +The PMC for the partner must already have been allocated by the +current process. +See +.Sx "Cascading P4 PMCs" +below for more information. +.It Li edge +Configure the counter to count false to true transitions of the threshold +comparision output. +This qualifier only takes effect if a threshold qualifier has also been +specified. +.It Li complement +Configure the counter to increment only when the event count seen is +less than the threshold qualifier value specified. +.It Li mask= Ns Ar qualifier +Many event specifiers for Intel P4 PMCs need to be additionally +qualified using a mask qualifier. +The allowed syntax for these qualifiers is event specific and is +described along with the events. +.It Li os +Configure the PMC to count when the CPL of the processor is 0. +.It Li precise +Select precise event based sampling. +Precise sampling is supported by the hardware for a limited set of +events. +.It Li tag= Ns Ar value +Configure the PMC to tag the internal uop selected by the other +fields in this event specifier with value +.Ar value . +This feature is used when cascading PMCs. +.It Li threshold= Ns Ar value +Configure the PMC to increment only when the event counts seen are +greater than the specified threshold value +.Ar value . +.It Li usr +Configure the PMC to count when the CPL of the processor is 1, 2 or 3. +.El +If neither of the +.Li os +or +.Li usr +qualifiers are specified, the default is to enable both. +.Pp +On Intel Pentium 4 processors with HTT, events are +divided into two classes: +.Bl -tag -width "XXXXXXXXXX" -compact +.It "TS Events" +are those where hardware can differentiate between events +generated on one logical processor from those generated on the +other. +.It "TI Events" +are those where hardware cannot differentiate between events +generated by multiple logical processors in a package. +.El +Only TS events are allowed for use with process-mode PMCs on +Pentium-4/HTT CPUs. +.Pp +The event specifiers supported by Intel P4 PMCs are: +.Bl -tag -width indent +.It Li p4-128bit-mmx-uop Op Li ,mask= Ns Ar flags +.Pq "TI event" +Count integer SIMD SSE2 instructions that operate on 128 bit SIMD +operands. +Qualifier +.Ar flags +can take the following value (which is also the default): +.Bl -tag -width indent -compact +.It Li all +Count all uops operating on 128 bit SIMD integer operands in memory or +XMM register. +.El +If an instruction contains more than one 128 bit MMX uop, then each +uop will be counted. +.It Li p4-64bit-mmx-uop Op Li ,mask= Ns Ar flags +.Pq "TI event" +Count MMX instructions that operate on 64 bit SIMD operands. +Qualifier +.Ar flags +can take the following value (which is also the default): +.Bl -tag -width indent -compact +.It Li all +Count all uops operating on 64 bit SIMD integer operands in memory or +in MMX registers. +.El +If an instruction contains more than one 64 bit MMX uop, then each +uop will be counted. +.It Li p4-b2b-cycles +.Pq "TI event" +Count back-to-back bys cycles. +Further documentation for this event is unavailable. +.It Li p4-bnr +.Pq "TI event" +Count bus-not-ready conditions. +Further documentation for this event is unavailable. +.It Li p4-bpu-fetch-request Op Li ,mask= Ns Ar qualifier +.Pq "TS event" +Count instruction fetch requests qualified by additional +flags specified in +.Ar qualifier . +At this point only one flag is supported: +.Bl -tag -width indent -compact +.It Li tcmiss +Count trace cache lookup misses. +.El +The default qualifier is also +.Ar mask=tcmiss . +.It Li p4-branch-retired Op Li ,mask= Ns Ar flags +.Pq "TS event" +Counts retired branches. +Qualifier +.Ar flags +is a list of the following +.Li + +separated strings: +.Bl -tag -width indent -compact +.It Li mmnp +Count branches not-taken and predicted. +.It Li mmnm +Count branches not-taken and mis-predicted. +.It Li mmtp +Count branches taken and predicted. +.It Li mmtm +Count branches taken and mis-predicted. +.El +The default qualifier counts all four kinds of branches. +.It Li p4-bsq-active-entries Op Li ,mask= Ns Ar qualifier +.Pq "TS event" +Count the number of entries (clipped at 15) currently active in the +BSQ. +Qualifier +.Ar qualifier +is a +.Li + +separated set of the following flags: +.Bl -tag -width indent -compact +.It Li req-type0 , Li req-type1 +Forms a 2-bit number used to select the request type encoding: +.Bl -tag -width indent -compact +.It Li 0 +reads excluding read invalidate +.It Li 1 +read invalidates +.It Li 2 +writes other than writebacks +.It Li 3 +writebacks +.El +Bit +.Li req-type1 +is the MSB for this two bit number. +.It Li req-len0 , Li req-len1 +Forms a two-bit number that specifies the request length encoding: +.Bl -tag -width indent -compact +.It Li 0 +0 chunks +.It Li 1 +1 chunk +.It Li 3 +8 chunks +.El +Bit +.Li req-len1 +is the MSB for this two bit number. +.It Li req-io-type +Count requests that are input or output requests. +.It Li req-lock-type +Count requests that lock the bus. +.It Li req-lock-cache +Count requests that lock the cache. +.It Li req-split-type +Count requests that is a bus 8-byte chunk that is split across an +8-byte boundary. +.It Li req-dem-type +Count requests that are demand (not prefetches) if set. +Count requests that are prefetches if not set. +.It Li req-ord-type +Count requests that are ordered. +.It Li mem-type0 , Li mem-type1 , Li mem-type2 +Forms a 3-bit number that specifies a memory type encoding: +.Bl -tag -width indent -compact +.It Li 0 +UC +.It Li 1 +USWC +.It Li 4 +WT +.It Li 5 +WP +.It Li 6 +WB +.El +Bit +.Li mem-type2 +is the MSB of this 3-bit number. +.El +The default qualifier has all the above bits set. +.Pp +Edge triggering using the +.Li edge +qualifier should not be used with this event when counting cycles. +.It Li p4-bsq-allocation Op Li ,mask= Ns Ar qualifier +.Pq "TS event" +Count allocations in the bus sequence unit according to the flags +specified in +.Ar qualifier , +which is a +.Li + +separated set of the following flags: +.Bl -tag -width indent -compact +.It Li req-type0 , Li req-type1 +Forms a 2-bit number used to select the request type encoding: +.Bl -tag -width indent -compact +.It Li 0 +reads excluding read invalidate +.It Li 1 +read invalidates +.It Li 2 +writes other than writebacks +.It Li 3 +writebacks +.El +Bit +.Li req-type1 +is the MSB for this two bit number. +.It Li req-len0 , Li req-len1 +Forms a two-bit number that specifies the request length encoding: +.Bl -tag -width indent -compact +.It Li 0 +0 chunks +.It Li 1 +1 chunk +.It Li 3 +8 chunks +.El +Bit +.Li req-len1 +is the MSB for this two bit number. +.It Li req-io-type +Count requests that are input or output requests. +.It Li req-lock-type +Count requests that lock the bus. +.It Li req-lock-cache +Count requests that lock the cache. +.It Li req-split-type +Count requests that is a bus 8-byte chunk that is split across an +8-byte boundary. +.It Li req-dem-type +Count requests that are demand (not prefetches) if set. +Count requests that are prefetches if not set. +.It Li req-ord-type +Count requests that are ordered. +.It Li mem-type0 , Li mem-type1 , Li mem-type2 +Forms a 3-bit number that specifies a memory type encoding: +.Bl -tag -width indent -compact +.It Li 0 +UC +.It Li 1 +USWC +.It Li 4 +WT +.It Li 5 +WP +.It Li 6 +WB +.El +Bit +.Li mem-type2 +is the MSB of this 3-bit number. +.El +The default qualifier has all the above bits set. +.Pp +This event is usually used along with the +.Li edge +qualifier to avoid multiple counting. +.It Li p4-bsq-cache-reference Op Li ,mask= Ns Ar qualifier +.Pq "TS event" +Count cache references as seen by the bus unit (2nd or 3rd level +cache references). +Qualifier +.Ar qualifier +is a +.Li + +separated list of the following keywords: +.Bl -tag -width indent -compact +.It Li rd-2ndl-hits +Count 2nd level cache hits in the shared state. +.It Li rd-2ndl-hite +Count 2nd level cache hits in the exclusive state. +.It Li rd-2ndl-hitm +Count 2nd level cache hits in the modified state. +.It Li rd-3rdl-hits +Count 3rd level cache hits in the shared state. +.It Li rd-3rdl-hite +Count 3rd level cache hits in the exclusive state. +.It Li rd-3rdl-hitm +Count 3rd level cache hits in the modified state. +.It Li rd-2ndl-miss +Count 2nd level cache misses. +.It Li rd-3rdl-miss +Count 3rd level cache misses. +.It Li wr-2ndl-miss +Count write-back lookups from the data access cache that miss the 2nd +level cache. +.El +The default is to count all the above events. +.It Li p4-execution-event Op Li ,mask= Ns Ar flags +.Pq "TS event" +Count the retirement of tagged uops selected through the execution +tagging mechanism. +Qualifier +.Ar flags +can contain the following strings separated by +.Li + +characters: +.Bl -tag -width indent -compact +.It Li nbogus0 , Li nbogus1 , Li nbogus2 , Li nbogus3 +The marked uops are not bogus. +.It Li bogus0 , Li bogus1 , Li bogus2 , Li bogus3 +The marked uops are bogus. +.El +This event requires additional (upstream) events to be allocated to +perform the desired uop tagging. +The default is to set all the above flags. +This event can be used for precise event based sampling. +.It Li p4-front-end-event Op Li ,mask= Ns Ar flags +.Pq "TS event" +Count the retirement of tagged uops selected through the front-end +tagging mechanism. +Qualifier +.Ar flags +can contain the following strings separated by +.Li + +characters: +.Bl -tag -width indent -compact +.It Li nbogus +The marked uops are not bogus. +.It Li bogus +The marked uops are bogus. +.El +This event requires additional (upstream) events to be allocated to +perform the desired uop tagging. +The default is to select both kinds of events. +This event can be used for precise event based sampling. +.It Li p4-fsb-data-activity Op Li ,mask= Ns Ar flags +.Pq "TI event" +Count each DBSY or DRDY event selected by qualifier +.Ar flags . +Qualifier +.Ar flags +is a +.Li + +separated set of the following flags: +.Bl -tag -width indent -compact +.It Li drdy-drv +Count when this processor is driving data onto the bus. +.It Li drdy-own +Count when this processor is reading data from the bus. +.It Li drdy-other +Count when data is on the bus but not being sampled by this processor. +.It Li dbsy-drv +Count when this processor reserves the bus for use in the next cycle +in order to drive data. +.It Li dbsy-own +Count when some agent reserves the bus for use in the next bus cycle +to drive data that this processor will sample. +.It Li dbsy-other +Count when some agent reserves the bus for use in the next bus cycle +to drive data that this processor will not sample. +.El +Flags +.Li drdy-own +and +.Li drdy-other +are mutually exclusive. +Flags +.Li dbsy-own +and +.Li dbsy-other +are mutually exclusive. +The default value for +.Ar qualifier +is +.Li drdy-drv+drdy-own+dbsy-drv+dbsy-own . +.It Li p4-global-power-events Op Li ,mask= Ns Ar flags +.Pq "TS event" +Count cycles during which the processor is not stopped. +Qualifier +.Ar flags +can take the following value (which is also the default): +.Bl -tag -width indent -compact +.It Li running +Count cycles when the processor is active. +.El +.It Li p4-instr-retired Op Li ,mask= Ns Ar flags +.Pq "TS event" +Count instructions retired during a clock cycle. +Qualifer +.Ar flags +comprises of the following strings separated by +.Li + +characters: +.Bl -tag -width indent -compact +.It Li nbogusntag +Count non-bogus instructions that are not tagged. +.It Li nbogustag +Count non-bogus instructions that are tagged. +.It Li bogusntag +Count bogus instructions that are not tagged. +.It Li bogustag +Count bogus instructions that are tagged. +.El +The default qualifier counts all the above kinds of instructions. +.It Li p4-ioq-active-entries Xo +.Op Li ,mask= Ns Ar qualifier +.Op Li ,busreqtype= Ns Ar req-type +.Xc +.Pq "TS event" +Count the number of entries (clipped at 15) in the IOQ that are +active. +The event masks are specified by qualifier +.Ar qualifier +and +.Ar req-type . +.Pp +Qualifier +.Ar qualifier +is a +.Li + +separated set of the following flags: +.Bl -tag -width indent -compact +.It Li all-read +Count read entries. +.It Li all-write +Count write entries. +.It Li mem-uc +Count entries accessing uncacheable memory. +.It Li mem-wc +Count entries accessing write-combining memory. +.It Li mem-wt +Count entries accessing write-through memory. +.It Li mem-wp +Count entries accessing write-protected memory +.It Li mem-wb +Count entries accessing write-back memory. +.It Li own +Count store requests driven by the processor (i.e., not by other +processors or by DMA). +.It Li other +Count store requests driven by other processors or by DMA. +.It Li prefetch +Include hardware and software prefetch requests in the count. +.El +The default value for +.Ar qualifier +is to enable all the above flags. +.Pp +The +.Ar req-type +qualifier is a 5-bit number can be additionally used to select a +specific bus request type. +The default is 0. +.Pp +The +.Li edge +qualifier should not be used when counting cycles with this event. +The exact behaviour of this event depends on the processor revision. +.It Li p4-ioq-allocation Xo +.Op Li ,mask= Ns Ar qualifier +.Op Li ,busreqtype= Ns Ar req-type +.Xc +.Pq "TS event" +Count various types of transactions on the bus matching the flags set +in +.Ar qualifier +and +.Ar req-type . +.Pp +Qualifier +.Ar qualifier +is a +.Li + +separated set of the following flags: +.Bl -tag -width indent -compact +.It Li all-read +Count read entries. +.It Li all-write +Count write entries. +.It Li mem-uc +Count entries accessing uncacheable memory. +.It Li mem-wc +Count entries accessing write-combining memory. +.It Li mem-wt +Count entries accessing write-through memory. +.It Li mem-wp +Count entries accessing write-protected memory +.It Li mem-wb +Count entries accessing write-back memory. +.It Li own +Count store requests driven by the processor (i.e., not by other +processors or by DMA). +.It Li other +Count store requests driven by other processors or by DMA. +.It Li prefetch +Include hardware and software prefetch requests in the count. +.El +The default value for +.Ar qualifier +is to enable all the above flags. +.Pp +The +.Ar req-type +qualifier is a 5-bit number can be additionally used to select a +specific bus request type. +The default is 0. +.Pp +The +.Li edge +qualifier is normally used with this event to prevent multiple +counting. +The exact behaviour of this event depends on the processor revision. +.It Li p4-itlb-reference Op mask= Ns Ar qualifier +.Pq "TS event" +Count translations using the intruction translation look-aside +buffer. +The +.Ar qualifier +argument is a list of the following strings separated by +.Li + +characters. +.Bl -tag -width indent -compact +.It Li hit +Count ITLB hits. +.It Li miss +Count ITLB misses. +.It Li hit-uc +Count uncacheable ITLB hits. +.El +If no +.Ar qualifier +is specified the default is to count all the three kinds of ITLB +translations. +.It Li p4-load-port-replay Op Li ,mask= Ns Ar qualifier +.Pq "TS event" +Count replayed events at the load port. +Qualifier +.Ar qualifier +can take on one value: +.Bl -tag -width indent -compact +.It Li split-ld +Count split loads. +.El +The default value for +.Ar qualifier +is +.Li split-ld . +.It Li p4-mispred-branch-retired Op Li ,mask= Ns Ar flags +.Pq "TS event" +Count mispredicted IA-32 branch instructions. +Qualifier +.Ar flags +can take the following value (which is also the default): +.Bl -tag -width indent -compact +.It Li nbogus +Count non-bogus retired branch instructions. +.El +.It Li p4-machine-clear Op Li ,mask= Ns Ar flags +.Pq "TS event" +Count the number of pipeline clears seen by the processor. +Qualifer +.Ar flags +is a list of the following strings separated by +.Li + +characters: +.Bl -tag -width indent -compact +.It Li clear +Count for a portion of the many cycles when the machine is being +cleared for any reason. +.It Li moclear +Count machine clears due to memory ordering issues. +.It Li smclear +Count machine clears due to self-modifying code. +.El +Use qualifier +.Li edge +to get a count of occurrences of machine clears. +The default qualifier is +.Li clear . +.It Li p4-memory-cancel Op Li ,mask= Ns Ar event-list +.Pq "TS event" +Count the cancelling of various kinds of requests in the data cache +address control unit of the CPU. +The qualifier +.Ar event-list +is a list of the following strings separated by +.Li "+" +characters: +.Bl -tag -width indent -compact +.It Li st-rb-full +Requests cancelled because no store request buffer was available. +.It Li 64k-conf +Requests that conflict due to 64K aliasing. +.El +If +.Ar event-list +is not specified, then the default is to count both kinds of events. +.It Li p4-memory-complete Op Li ,mask= Ns Ar event-list +.Pq "TS event" +Count the completion of load split, store split, uncacheable split and +uncacheable load operations selected by qualifier +.Ar event-list . +The qualifier +.Ar event-list +is a +.Li + +separated list of the following flags: +.Bl -tag -width indent -compact +.It Li lsc +Count load splits completed, excluding loads from uncacheable or +write-combining areas. +.It Li ssc +Count any split stores completed. +.El +The default is to count both kinds of operations. +.It Li p4-mob-load-replay Op Li ,mask= Ns Ar qualifier +.Pq "TS event" +Count load replays triggered by the memory order buffer. +Qualifier +.Ar qualifier +can be a +.Li + +separated list of the following flags: +.Bl -tag -width indent -compact +.It Li no-sta +Count replays because of unknown store addresses. +.It Li no-std +Count replays because of unknown store data. +.It Li partial-data +Count replays because of partially overlapped data accesses between +load and store operations. +.It Li unalgn-addr +Count replays because of mismatches in the lower 4 bits of load and +store operations. +.El +The default qualifier is +.Ar no-sta+no-std+partial-data+unalgn-addr . +.It Li p4-packed-dp-uop Op Li ,mask= Ns Ar flags +.Pq "TI event" +Count packed double-precision uops. +Qualifier +.Ar flags +can take the following value (which is also the default): +.Bl -tag -width indent -compact +.It Li all +Count all uops operating on packed double-precision operands. +.El +.It Li p4-packed-sp-uop Op Li ,mask= Ns Ar flags +.Pq "TI event" +Count packed single-precision uops. +Qualifier +.Ar flags +can take the following value (which is also the default): +.Bl -tag -width indent -compact +.It Li all +Count all uops operating on packed single-precision operands. +.El +.It Li p4-page-walk-type Op Li ,mask= Ns Ar qualifier +.Pq "TI event" +Count page walks performed by the page miss handler. +Qualifier +.Ar qualifier +can be a +.Li + +separated list of the following keywords: +.Bl -tag -width indent -compact +.It Li dtmiss +Count page walks for data TLB misses. +.It Li itmiss +Count page walks for instruction TLB misses. +.El +The default value for +.Ar qualifier +is +.Li dtmiss+itmiss . +.It Li p4-replay-event Op Li ,mask= Ns Ar flags +.Pq "TS event" +Count the retirement of tagged uops selected through the replay +tagging mechanism. +Qualifier +.Ar flags +contains a +.Li + +separated set of the following strings: +.Bl -tag -width indent -compact +.It Li nbogus +The marked uops are not bogus. +.It Li bogus +The marked uops are bogus. +.El +This event requires additional (upstream) events to be allocated to +perform the desired uop tagging. +The default qualifier counts both kinds of uops. +This event can be used for precise event based sampling. +.It Li p4-resource-stall Op Li ,mask= Ns Ar flags +.Pq "TS event" +Count the occurrence or latency of stalls in the allocator. +Qualifier +.Ar flags +can take the following value (which is also the default): +.Bl -tag -width indent -compact +.It Li sbfull +A stall due to the lack of store buffers. +.El +.It Li p4-response +.Pq "TI event" +Count different types of responses. +Further documentation on this event is not available. +.It Li p4-retired-branch-type Op Li ,mask= Ns Ar flags +.Pq "TS event" +Count branches retired. +Qualifier +.Ar flags +contains a +.Li + +separated list of strings: +.Bl -tag -width indent -compact +.It Li conditional +Count conditional jumps. +.It Li call +Count direct and indirect call branches. +.It Li return +Count return branches. +.It Li indirect +Count returns, indirect calls or indirect jumps. +.El +The default qualifier counts all the above branch types. +.It Li p4-retired-mispred-branch-type Op Li ,mask= Ns Ar flags +.Pq "TS event" +Count mispredicted branches retired. +Qualifier +.Ar flags +contains a +.Li + +separated list of strings: +.Bl -tag -width indent -compact +.It Li conditional +Count conditional jumps. +.It Li call +Count indirect call branches. +.It Li return +Count return branches. +.It Li indirect +Count returns, indirect calls or indirect jumps. +.El +The default qualifier counts all the above branch types. +.It Li p4-scalar-dp-uop Op Li ,mask= Ns Ar flags +.Pq "TI event" +Count the number of scalar double-precision uops. +Qualifier +.Ar flags +can take the following value (which is also the default): +.Bl -tag -width indent -compact +.It Li all +Count the number of scalar double-precision uops. +.El +.It Li p4-scalar-sp-uop Op Li ,mask= Ns Ar flags +.Pq "TI event" +Count the number of scalar single-precision uops. +Qualifier +.Ar flags +can take the following value (which is also the default): +.Bl -tag -width indent -compact +.It Li all +Count all uops operating on scalar single-precision operands. +.El +.It Li p4-snoop +.Pq "TI event" +Count snoop traffic. +Further documentation on this event is not available. +.It Li p4-sse-input-assist Op Li ,mask= Ns Ar flags +.Pq "TI event" +Count the number of times an assist is required to handle problems +with the operands for SSE and SSE2 operations. +Qualifier +.Ar flags +can take the following value (which is also the default): +.Bl -tag -width indent -compact +.It Li all +Count assists for all SSE and SSE2 uops. +.El +.It Li p4-store-port-replay Op Li ,mask= Ns Ar qualifier +.Pq "TS event" +Count events replayed at the store port. +Qualifier +.Ar qualifier +can take on one value: +.Bl -tag -width indent -compact +.It Li split-st +Count split stores. +.El +The default value for +.Ar qualifier +is +.Li split-st . +.It Li p4-tc-deliver-mode Op Li ,mask= Ns Ar qualifier +.Pq "TI event" +Count the duration in cycles of operating modes of the trace cache and +decode engine. +The desired operating mode is selected by +.Ar qualifier , +which is a list of the following strings separated by +.Li "+" +characters: +.Bl -tag -width indent -compact +.It Li DD +Both logical processors are in deliver mode. +.It Li DB +Logical processor 0 is in deliver mode while logical processor 1 is in +build mode. +.It Li DI +Logical processor 0 is in deliver mode while logical processor 1 is +halted, or in machine clear, or transitioning to a long microcode +flow. +.It Li BD +Logical processor 0 is in build mode while logical processor 1 is in +deliver mode. +.It Li BB +Both logical processors are in build mode. +.It Li BI +Logical processor 0 is in build mode while logical processor 1 is +halted, or in machine clear or transitioning to a long microcode +flow. +.It Li ID +Logical processor 0 is halted, or in machine clear or transitioning to +a long microcode flow while logical processor 1 is in deliver mode. +.It Li IB +Logical processor 0 is halted, or in machine clear or transitioning to +a long microcode flow while logical processor 1 is in build mode. +.El +If there is only one logical processor in the processor package then +the qualifier for logical processor 1 is ignored. +If no qualifier is specified, the default qualifier is +.Li DD+DB+DI+BD+BB+BI+ID+IB . +.It Li p4-tc-ms-xfer Op Li ,mask= Ns Ar flags +.Pq "TI event" +Count the number of times uop delivery changed from the trace cache to +MS ROM. +Qualifier +.Ar flags +can take the following value (which is also the default): +.Bl -tag -width indent -compact +.It Li cisc +Count TC to MS transfers. +.El +.It Li p4-uop-queue-writes Op Li ,mask= Ns Ar flags +.Pq "TS event" +Count the number of valid uops written to the uop queue. +Qualifier +.Ar flags +is a list of the following strings, separated by +.Li + +characters: +.Bl -tag -width indent -compact +.It Li from-tc-build +Count uops being written from the trace cache in build mode. +.It Li from-tc-deliver +Count uops being written from the trace cache in deliver mode. +.It Li from-rom +Count uops being written from microcode ROM. +.El +The default qualifier counts all the above kinds of uops. +.It Li p4-uop-type Op Li ,mask= Ns Ar flags +.Pq "TS event" +This event is used in conjunction with the front-end at-retirement +mechanism to tag load and store uops. +Qualifer +.Ar flags +comprises the following strings separated by +.Li + +characters: +.Bl -tag -width indent -compact +.It Li tagloads +Mark uops that are load operations. +.It Li tagstores +Mark uops that are store operations. +.El +The default qualifier counts both kinds of uops. +.It Li p4-uops-retired Op Li ,mask= Ns Ar flags +.Pq "TS event" +Count uops retired during a clock cycle. +Qualifier +.Ar flags +comprises the following strings separated by +.Li + +characters: +.Bl -tag -width indent -compact +.It Li nbogus +Count marked uops that are not bogus. +.It Li bogus +Count marked uops that are bogus. +.El +The default qualifier counts both kinds of uops. +.It Li p4-wc-buffer Op Li ,mask= Ns Ar flags +.Pq "TI event" +Count write-combining buffer operations. +Qualifier +.Ar flags +contains the following strings separated by +.Li + +characters: +.Bl -tag -width indent -compact +.It Li wcb-evicts +WC buffer evictions due to any cause. +.It Li wcb-full-evict +WC buffer evictions due to no WC buffer being available. +.El +The default qualifer counts both kinds of evictions. +.It Li p4-x87-assist Op Li ,mask= Ns Ar flags +.Pq "TS event" +Count the retirement of x87 instructions that required special +handling. +Qualifier +.Ar flags +contains the following strings separated by +.Li + +characters: +.Bl -tag -width indent -compact +.It Li fpsu +Count instructions that saw an FP stack underflow. +.It Li fpso +Count instructions that saw an FP stack overflow. +.It Li poao +Count instructions that saw an x87 output overflow. +.It Li poau +Count instructions that saw an x87 output underflow. +.It Li prea +Count instructions that needed an x87 input assist. +.El +The default qualifier counts all the above types of instruction +retirements. +.It Li p4-x87-fp-uop Op Li ,mask= Ns Ar flags +.Pq "TI event" +Count x87 floating-point uops. +Qualifier +.Ar flags +can take the following value (which is also the default): +.Bl -tag -width indent -compact +.It Li all +Count all x87 floating-point uops. +.El +If an instruction contains more than one x87 floating-point uops, then +all x87 floating-point uops will be counted. +This event does not count x87 floating-point data movement operations. +.It Li p4-x87-simd-moves-uop Op Li ,mask= Ns Ar flags +.Pq "TI event" +Count each x87 FPU, MMX, SSE, or SSE2 uops that load data or store +data or perform register-to-register moves. +This event does not count integer move uops. +Qualifier +.Ar flags +may contain the following keywords separated by +.Li + +characters: +.Bl -tag -width indent -compact +.It Li allp0 +Count all x87 and SIMD store and move uops. +.It Li allp2 +Count all x87 and SIMD load uops. +.El +The default is to count all uops. +.El +.Ss "Cascading P4 PMCs" +To be filled in. +.Ss "Precise Event Based Sampling" +To be filled in. +.Sh IMPLEMENTATION NOTES +On the i386 architecture, +.Fx +has historically allowed the use of the RDTSC instruction from +user-mode (i.e., at a processor CPL of 3) by any process. +This behaviour is preserved by +.Xr hwpmc 4 . +.Sh RETURN VALUES +The +.Fn pmc_name_of_capability , +.Fn pmc_name_of_class , +.Fn pmc_name_of_cputype , +.Fn pmc_name_of_disposition , +.Fn pmc_name_of_event , +.Fn pmc_name_of_mode , +and +.Fn pmc_name_of_state +functions return a pointer to the human readable form of their argument. +These pointers may point to statically allocated storage and must +not be passed to +.Fn free . +In case of an error, these functions return +.Li NULL +and set the global variable +.Va errno . +.Pp +The functions +.Fn pmc_ncpu +and +.Fn pmc_npmc +return the number of CPUs and number of PMCs configured respectively; +in case of an error they return the value +.Li -1 +and set the global variable +.Va errno . +.Pp +All other functions return the value +.Li 0 +if successful; otherwise the value +.Li -1 +is returned and the global variable +.Va errno +is set to indicate the error. +.Sh ERRORS +A call to +.Fn pmc_init +may fail with the following errors in addition to those returned by +.Xr modfind 2 , +.Xr modstat 2 +and +.Xr hwpmc 4 : +.Bl -tag -width Er +.It Bq Er ENXIO +An unknown CPU type was encountered during initialization. +.It Bq Er EPROGMISMATCH +The version number of the +.Xr hwpmc 4 +kernel module did not match that compiled into the +.Xr pmc 3 +library. +.El +.Pp +A call to +.Fn pmc_name_of_capability , +.Fn pmc_name_of_disposition , +.Fn pmc_name_of_state , +.Fn pmc_name_of_event , +.Fn pmc_name_of_mode +and +.Fn pmc_name_of_class +may fail with the following error: +.Bl -tag -width Er +.It Bq Er EINVAL +An invalid argument was passed to the function. +.El +.Pp +A call to +.Fn pmc_cpuinfo +or +.Fn pmc_ncpu +may fail with the following error: +.Bl -tag -width Er +.It Bq Er ENXIO +The +.Xr pmc 3 +has not been initialized. +.El +.Pp +A call to +.Fn pmc_npmc +may fail with the following errors: +.Bl -tag -width Er +.It Bq Er EINVAL +The argument passed in was out of range. +.It Bq Er ENXIO +The +.Xr pmc 3 +library has not been initialized. +.El +.Pp +A call to +.Fn pmc_pmcinfo +may fail with the following errors, in addition to those returned by +.Xr hwpmc 4 : +.Bl -tag -width Er +.It Bq Er ENXIO +The +.Xr pmc 3 +library is not yet initialized. +.El +.Pp +A call to +.Fn pmc_allocate +may fail with the following errors, in addition to those returned by +.Xr hwpmc 4 : +.Bl -tag -width Er +.It Bq Er EINVAL +The +.Fa mode +argument passed in had an illegal value, or the event specification +.Fa ctrspec +was unrecognized for this cpu type. +.El +.Pp +Calls to +.Fn pmc_attach , +.Fn pmc_detach , +.Fn pmc_release , +.Fn pmc_start , +.Fn pmc_stop , +.Fn pmc_read , +.Fn pmc_write , +.Fn pmc_rw , +.Fn pmc_set , +.Fn pmc_configure_logfile , +.Fn pmc_get_driver_stats , +.Fn pmc_enable , +.Fn pmc_disable , +and +.Fn pmc_x86_get_msr +may fail with the errors described in +.Xr hwpmc 4 . +.Sh SEE ALSO +.Xr modfind 2 , +.Xr modstat 2 , +.Xr hwpmc 4 , +.Xr pmccontrol 8 , +.Xr pmcreport 8 , +.Xr pmcstat 8 +.Sh BUGS +The information returned by +.Fn pmc_cpuinfo , +.Fn pmc_ncpu +and possibly +.Fn pmc_npmc +should really be available all the time, through a better designed +interface. +.Pp +The API for +.Fn pmc_cpuinfo +and +.Fn pmc_pmcinfo +expose too much of the underlying +.Xr hwpmc 4 +driver's internals to userland. diff --git a/lib/libpmc/pmc.h b/lib/libpmc/pmc.h new file mode 100644 index 0000000..98c4af2 --- /dev/null +++ b/lib/libpmc/pmc.h @@ -0,0 +1,79 @@ +/*- + * Copyright (c) 2003,2004 Joseph Koshy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _PMC_H_ +#define _PMC_H_ + +#include <sys/pmc.h> + +/* + * Prototypes + */ + +int pmc_allocate(const char *_ctrspec, enum pmc_mode _mode, uint32_t _flags, + int _cpu, pmc_id_t *_pmcid); +int pmc_attach(pmc_id_t _pmcid, pid_t _pid); +int pmc_configure_logfile(int _fd); +int pmc_detach(pmc_id_t _pmcid, pid_t _pid); +int pmc_disable(int _cpu, int _pmc); +int pmc_enable(int _cpu, int _pmc); +int pmc_get_driver_stats(struct pmc_op_getdriverstats *_gms); +int pmc_init(void); +int pmc_read(pmc_id_t _pmc, pmc_value_t *_value); +int pmc_release(pmc_id_t _pmc); +int pmc_rw(pmc_id_t _pmc, pmc_value_t _newvalue, pmc_value_t *_oldvalue); +int pmc_set(pmc_id_t _pmc, pmc_value_t _value); +int pmc_start(pmc_id_t _pmc); +int pmc_stop(pmc_id_t _pmc); +int pmc_write(pmc_id_t _pmc, pmc_value_t _value); + +int pmc_ncpu(void); +int pmc_npmc(int _cpu); +int pmc_cpuinfo(const struct pmc_op_getcpuinfo **_cpu_info); +int pmc_pmcinfo(int _cpu, struct pmc_op_getpmcinfo **_pmc_info); + +const char *pmc_name_of_capability(uint32_t _c); +const char *pmc_name_of_class(enum pmc_class _pc); +const char *pmc_name_of_cputype(enum pmc_cputype _cp); +const char *pmc_name_of_disposition(enum pmc_disp _pd); +const char *pmc_name_of_event(enum pmc_event _pe); +const char *pmc_name_of_mode(enum pmc_mode _pm); +const char *pmc_name_of_state(enum pmc_state _ps); + +int pmc_event_names_of_class(enum pmc_class _cl, const char ***_eventnames, + int *_nevents); + +/* + * Architecture specific extensions + */ + +#if __i386__ || __amd64__ +int pmc_x86_get_msr(pmc_id_t _pmc, uint32_t *_msr); +#endif + +#endif |