summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorrrs <rrs@FreeBSD.org>2015-12-10 01:52:29 +0000
committerrrs <rrs@FreeBSD.org>2015-12-10 01:52:29 +0000
commit7d03b5290e120264ab7a32f6b787241e05841c65 (patch)
tree360ed40977204adbb0b4dba0444489815d62e6ad
parent8f18910d2bd36d55a66bd317d2a7a88fa03f1c06 (diff)
downloadFreeBSD-src-7d03b5290e120264ab7a32f6b787241e05841c65.zip
FreeBSD-src-7d03b5290e120264ab7a32f6b787241e05841c65.tar.gz
Fix several typos and bugs within pmcstudy. Also highlight the one SB test
that is failing (and is likely a problem in the actual PMC defintions). Add to this also the -A option to run all canned tests. Sponsored by: Netflix Inc.
-rw-r--r--usr.sbin/pmcstudy/pmcstudy.84
-rw-r--r--usr.sbin/pmcstudy/pmcstudy.c1390
2 files changed, 722 insertions, 672 deletions
diff --git a/usr.sbin/pmcstudy/pmcstudy.8 b/usr.sbin/pmcstudy/pmcstudy.8
index ebe63c4..7c03897 100644
--- a/usr.sbin/pmcstudy/pmcstudy.8
+++ b/usr.sbin/pmcstudy/pmcstudy.8
@@ -32,7 +32,7 @@
.Nd Perform various studies on a system's overall PMCs.
.Sh SYNOPSIS
.Nm
-.Oo Fl i Ar inputfile | Fl T | Fl v | Fl m Ar max | Fl e exp | Fl Ar E | Fl h | fl H Oc
+.Oo Fl i Ar inputfile | Fl A | Fl T | Fl v | Fl m Ar max | Fl e exp | Fl Ar E | Fl h | fl H Oc
.Nm
.Fl i Ar inputfile
.Nm
@@ -128,6 +128,8 @@ test like
"UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD_P)".
.It Fl L
This option will list all known PMCs and their abbreviation (%NNN).
+.It Fl A
+Run all canned tests.
.El
.Sh SEE ALSO
.Xr pmc 3 ,
diff --git a/usr.sbin/pmcstudy/pmcstudy.c b/usr.sbin/pmcstudy/pmcstudy.c
index 06ded97..16c9f51 100644
--- a/usr.sbin/pmcstudy/pmcstudy.c
+++ b/usr.sbin/pmcstudy/pmcstudy.c
@@ -38,6 +38,9 @@
#include "eval_expr.h"
__FBSDID("$FreeBSD$");
+static int max_pmc_counters = 1;
+static int run_all = 0;
+
#define MAX_COUNTER_SLOTS 1024
#define MAX_NLEN 64
#define MAX_CPU 64
@@ -45,20 +48,20 @@ static int verbose = 0;
extern char **environ;
extern struct expression *master_exp;
-struct expression *master_exp = NULL;
+struct expression *master_exp=NULL;
#define PMC_INITIAL_ALLOC 512
extern char **valid_pmcs;
char **valid_pmcs = NULL;
extern int valid_pmc_cnt;
-int valid_pmc_cnt = 0;
+int valid_pmc_cnt=0;
extern int pmc_allocated_cnt;
-int pmc_allocated_cnt = 0;
+int pmc_allocated_cnt=0;
/*
* The following two varients on popen and pclose with
* the cavet that they get you the PID so that you
- * can supply it to pclose so it can send a SIGTERM
+ * can supply it to pclose so it can send a SIGTERM
* to the process.
*/
static FILE *
@@ -75,7 +78,7 @@ my_popen(const char *command, const char *dir, pid_t *p_pid)
if ((strcmp(dir, "r") != 0) &&
(strcmp(dir, "w") != 0)) {
errno = EINVAL;
- return (NULL);
+ return(NULL);
}
if (pipe(pdesin) < 0)
return (NULL);
@@ -94,14 +97,14 @@ my_popen(const char *command, const char *dir, pid_t *p_pid)
argv[3] = NULL;
switch (pid = fork()) {
- case -1: /* Error. */
+ case -1: /* Error. */
(void)close(pdesin[0]);
(void)close(pdesin[1]);
(void)close(pdesout[0]);
(void)close(pdesout[1]);
return (NULL);
/* NOTREACHED */
- case 0: /* Child. */
+ case 0: /* Child. */
/* Close out un-used sides */
(void)close(pdesin[1]);
(void)close(pdesout[0]);
@@ -129,8 +132,8 @@ my_popen(const char *command, const char *dir, pid_t *p_pid)
(void)close(pdesin[0]);
(void)close(pdesout[0]);
(void)close(pdesout[1]);
- return (io_out);
- } else {
+ return(io_out);
+ } else {
/* Prepare the input stream */
io_in = fdopen(pdesout[0], "r");
(void)close(pdesout[1]);
@@ -146,7 +149,7 @@ my_popen(const char *command, const char *dir, pid_t *p_pid)
* if already `pclosed', or waitpid returns an error.
*/
static void
-my_pclose(FILE * io, pid_t the_pid)
+my_pclose(FILE *io, pid_t the_pid)
{
int pstat;
pid_t pid;
@@ -164,41 +167,41 @@ my_pclose(FILE * io, pid_t the_pid)
struct counters {
struct counters *next_cpu;
- char counter_name[MAX_NLEN]; /* Name of counter */
- int cpu; /* CPU we are on */
- int pos; /* Index we are filling to. */
+ char counter_name[MAX_NLEN]; /* Name of counter */
+ int cpu; /* CPU we are on */
+ int pos; /* Index we are filling to. */
uint64_t vals[MAX_COUNTER_SLOTS]; /* Last 64 entries */
- uint64_t sum; /* Summary of entries */
+ uint64_t sum; /* Summary of entries */
};
extern struct counters *glob_cpu[MAX_CPU];
struct counters *glob_cpu[MAX_CPU];
extern struct counters *cnts;
-struct counters *cnts = NULL;
+struct counters *cnts=NULL;
extern int ncnts;
-int ncnts = 0;
+int ncnts=0;
-extern int (*expression) (struct counters *, int);
-int (*expression) (struct counters *, int);
+extern int (*expression)(struct counters *, int);
+int (*expression)(struct counters *, int);
-static const char *threshold = NULL;
+static const char *threshold=NULL;
static const char *command;
struct cpu_entry {
const char *name;
const char *thresh;
const char *command;
- int (*func) (struct counters *, int);
+ int (*func)(struct counters *, int);
+ int counters_required;
};
-
struct cpu_type {
char cputype[32];
int number;
struct cpu_entry *ents;
- void (*explain) (const char *name);
+ void (*explain)(const char *name);
};
extern struct cpu_type the_cpu;
struct cpu_type the_cpu;
@@ -207,7 +210,6 @@ static void
explain_name_sb(const char *name)
{
const char *mythresh;
-
if (strcmp(name, "allocstall1") == 0) {
printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
@@ -218,10 +220,10 @@ explain_name_sb(const char *name)
printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .2";
} else if (strcmp(name, "splitload") == 0) {
- printf("Examine MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
+ printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "splitstore") == 0) {
- printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
+ printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
mythresh = "thresh >= .01";
} else if (strcmp(name, "contested") == 0) {
printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
@@ -279,7 +281,7 @@ explain_name_sb(const char *name)
} else {
printf("Unknown name:%s\n", name);
mythresh = "unknown entry";
- }
+ }
printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
}
@@ -287,7 +289,6 @@ static void
explain_name_ib(const char *name)
{
const char *mythresh;
-
if (strcmp(name, "br_miss") == 0) {
printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
printf(" MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
@@ -307,7 +308,7 @@ explain_name_ib(const char *name)
mythresh = "thresh >= .2";
} else if (strcmp(name, "itlbmiss") == 0) {
printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
- mythresh = "thresh > .05";
+ mythresh = "thresh > .05";
} else if (strcmp(name, "icachemiss") == 0) {
printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
@@ -325,7 +326,7 @@ explain_name_ib(const char *name)
printf(" LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "splitstore") == 0) {
- printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
+ printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
mythresh = "thresh >= .01";
} else if (strcmp(name, "aliasing_4k") == 0) {
printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
@@ -362,7 +363,7 @@ explain_name_ib(const char *name)
} else {
printf("Unknown name:%s\n", name);
mythresh = "unknown entry";
- }
+ }
printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
}
@@ -371,7 +372,6 @@ static void
explain_name_has(const char *name)
{
const char *mythresh;
-
if (strcmp(name, "eff1") == 0) {
printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
mythresh = "thresh < .75";
@@ -380,7 +380,7 @@ explain_name_has(const char *name)
mythresh = "thresh > 1.0";
} else if (strcmp(name, "itlbmiss") == 0) {
printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
- mythresh = "thresh > .05";
+ mythresh = "thresh > .05";
} else if (strcmp(name, "icachemiss") == 0) {
printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh > .05";
@@ -406,10 +406,10 @@ explain_name_has(const char *name)
printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .05";
} else if (strcmp(name, "splitload") == 0) {
- printf("Examine (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
+ printf("Examine (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
mythresh = "thresh >= .1";
} else if (strcmp(name, "splitstore") == 0) {
- printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
+ printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
mythresh = "thresh >= .01";
} else if (strcmp(name, "aliasing_4k") == 0) {
printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
@@ -442,10 +442,12 @@ explain_name_has(const char *name)
} else {
printf("Unknown name:%s\n", name);
mythresh = "unknown entry";
- }
+ }
printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
}
+
+
static struct counters *
find_counter(struct counters *base, const char *name)
{
@@ -454,16 +456,16 @@ find_counter(struct counters *base, const char *name)
at = base;
len = strlen(name);
- while (at) {
+ while(at) {
if (strncmp(at->counter_name, name, len) == 0) {
- return (at);
+ return(at);
}
at = at->next_cpu;
}
printf("Can't find counter %s\n", name);
printf("We have:\n");
at = base;
- while (at) {
+ while(at) {
printf("- %s\n", at->counter_name);
at = at->next_cpu;
}
@@ -478,7 +480,6 @@ allocstall1(struct counters *cpu, int pos)
struct counters *partial;
struct counters *unhalt;
double un, par, res;
-
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
if (pos != -1) {
@@ -488,9 +489,9 @@ allocstall1(struct counters *cpu, int pos)
par = partial->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = par / un;
+ res = par/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
@@ -501,7 +502,6 @@ allocstall2(struct counters *cpu, int pos)
struct counters *partial;
struct counters *unhalt;
double un, par, res;
-
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
if (pos != -1) {
@@ -511,9 +511,9 @@ allocstall2(struct counters *cpu, int pos)
par = partial->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = par / un;
+ res = par/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
@@ -522,14 +522,12 @@ br_mispredict(struct counters *cpu, int pos)
struct counters *brctr;
struct counters *unhalt;
int ret;
-
/* 3 - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
double br, un, con, res;
-
con = 20.0;
-
+
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
- brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
+ brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
if (pos != -1) {
br = brctr->vals[pos] * 1.0;
un = unhalt->vals[pos] * 1.0;
@@ -537,9 +535,9 @@ br_mispredict(struct counters *cpu, int pos)
br = brctr->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = (con * br) / un;
- ret = printf("%1.3f", res);
- return (ret);
+ res = (con * br)/un;
+ ret = printf("%1.3f", res);
+ return(ret);
}
static int
@@ -549,25 +547,22 @@ br_mispredictib(struct counters *cpu, int pos)
struct counters *unhalt;
struct counters *clear, *clear2, *clear3;
struct counters *uops;
- struct counters *recv;
+ struct counters *recv;
struct counters *iss;
-
/* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
int ret;
-
- /*
- * (BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES +
- * MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY -
- * UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 *
- * CPU_CLK_UNHALTED.THREAD)))
- *
+ /*
+ * (BR_MISP_RETIRED.ALL_BRANCHES /
+ * (BR_MISP_RETIRED.ALL_BRANCHES +
+ * MACHINE_CLEAR.COUNT) *
+ * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
+ *
*/
double br, cl, cl2, cl3, uo, re, un, con, res, is;
-
con = 4.0;
-
+
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
- brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
+ brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
@@ -593,11 +588,12 @@ br_mispredictib(struct counters *cpu, int pos)
is = iss->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = (br / (br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
- ret = printf("%1.3f", res);
- return (ret);
+ res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
+ ret = printf("%1.3f", res);
+ return(ret);
}
+
static int
br_mispredict_broad(struct counters *cpu, int pos)
{
@@ -611,9 +607,9 @@ br_mispredict_broad(struct counters *cpu, int pos)
double br, cl, uo, uo_r, re, con, un, res;
con = 4.0;
-
+
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
- brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
+ brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
uops = find_counter(cpu, "UOPS_ISSUED.ANY");
uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
@@ -635,8 +631,8 @@ br_mispredict_broad(struct counters *cpu, int pos)
re = recv->sum * 1.0;
}
res = br / (br + cl) * (uo - uo_r + con * re) / (un * con);
- ret = printf("%1.3f", res);
- return (ret);
+ ret = printf("%1.3f", res);
+ return(ret);
}
static int
@@ -647,12 +643,9 @@ splitloadib(struct counters *cpu, int pos)
struct counters *l1d, *ldblock;
struct counters *unhalt;
double un, memd, res, l1, ldb;
-
- /*
- * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *
- * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P "pmcstat -s
- * CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s
- * MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
+ /*
+ * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
+ * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
*/
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
@@ -670,11 +663,12 @@ splitloadib(struct counters *cpu, int pos)
ldb = ldblock->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = ((l1 / memd) * ldb) / un;
+ res = ((l1 / memd) * ldb)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
+
static int
splitload(struct counters *cpu, int pos)
{
@@ -682,7 +676,31 @@ splitload(struct counters *cpu, int pos)
struct counters *mem;
struct counters *unhalt;
double con, un, memd, res;
+/* 4 - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
+
+ con = 5.0;
+ unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
+ mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS");
+ if (pos != -1) {
+ memd = mem->vals[pos] * 1.0;
+ un = unhalt->vals[pos] * 1.0;
+ } else {
+ memd = mem->sum * 1.0;
+ un = unhalt->sum * 1.0;
+ }
+ res = (memd * con)/un;
+ ret = printf("%1.3f", res);
+ return(ret);
+}
+
+static int
+splitload_sb(struct counters *cpu, int pos)
+{
+ int ret;
+ struct counters *mem;
+ struct counters *unhalt;
+ double con, un, memd, res;
/* 4 - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
con = 5.0;
@@ -695,23 +713,20 @@ splitload(struct counters *cpu, int pos)
memd = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = (memd * con) / un;
+ res = (memd * con)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
+
static int
-splitstore(struct counters *cpu, int pos)
+splitstore_sb(struct counters *cpu, int pos)
{
- /*
- * 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES
- * (thresh > 0.01)
- */
+ /* 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
int ret;
struct counters *mem_split;
struct counters *mem_stores;
double memsplit, memstore, res;
-
mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
if (pos != -1) {
@@ -721,19 +736,40 @@ splitstore(struct counters *cpu, int pos)
memsplit = mem_split->sum * 1.0;
memstore = mem_stores->sum * 1.0;
}
- res = memsplit / memstore;
+ res = memsplit/memstore;
+ ret = printf("%1.3f", res);
+ return(ret);
+}
+
+
+
+static int
+splitstore(struct counters *cpu, int pos)
+{
+ /* 5 - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */
+ int ret;
+ struct counters *mem_split;
+ struct counters *mem_stores;
+ double memsplit, memstore, res;
+ mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES");
+ mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES");
+ if (pos != -1) {
+ memsplit = mem_split->vals[pos] * 1.0;
+ memstore = mem_stores->vals[pos] * 1.0;
+ } else {
+ memsplit = mem_split->sum * 1.0;
+ memstore = mem_stores->sum * 1.0;
+ }
+ res = memsplit/memstore;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
contested(struct counters *cpu, int pos)
{
- /*
- * 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) /
- * CPU_CLK_UNHALTED.THREAD_P (thresh >.05)
- */
+ /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
int ret;
struct counters *mem;
struct counters *unhalt;
@@ -749,18 +785,15 @@ contested(struct counters *cpu, int pos)
memd = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = (memd * con) / un;
+ res = (memd * con)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
contested_has(struct counters *cpu, int pos)
{
- /*
- * 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) /
- * CPU_CLK_UNHALTED.THREAD_P (thresh >.05)
- */
+ /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
int ret;
struct counters *mem;
struct counters *unhalt;
@@ -776,18 +809,15 @@ contested_has(struct counters *cpu, int pos)
memd = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = (memd * con) / un;
+ res = (memd * con)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
contestedbroad(struct counters *cpu, int pos)
{
- /*
- * 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) /
- * CPU_CLK_UNHALTED.THREAD_P (thresh >.05)
- */
+ /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
int ret;
struct counters *mem;
struct counters *mem2;
@@ -797,7 +827,7 @@ contestedbroad(struct counters *cpu, int pos)
con = 84.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
- mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS");
+ mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS");
if (pos != -1) {
memd = mem->vals[pos] * 1.0;
@@ -808,19 +838,16 @@ contestedbroad(struct counters *cpu, int pos)
memtoo = mem2->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = ((memd * con) + memtoo) / un;
+ res = ((memd * con) + memtoo)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
blockstoreforward(struct counters *cpu, int pos)
{
- /*
- * 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P
- * (thresh >= .05)
- */
+ /* 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
int ret;
struct counters *ldb;
struct counters *unhalt;
@@ -836,19 +863,17 @@ blockstoreforward(struct counters *cpu, int pos)
ld = ldb->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = (ld * con) / un;
+ res = (ld * con)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
cache2(struct counters *cpu, int pos)
{
- /*
- * ** Suspect *** 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) +
- * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
- * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) /
- * CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
+ /* ** Suspect ***
+ * 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
+ * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
*/
int ret;
struct counters *mem1, *mem2, *mem3;
@@ -874,17 +899,16 @@ cache2(struct counters *cpu, int pos)
me_3 = mem3->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3)) / un;
+ res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
datasharing(struct counters *cpu, int pos)
{
- /*
- * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/
- * CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
+ /*
+ * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
*/
int ret;
struct counters *mem;
@@ -901,9 +925,9 @@ datasharing(struct counters *cpu, int pos)
me = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = (me * con) / un;
+ res = (me * con)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
@@ -911,9 +935,8 @@ datasharing(struct counters *cpu, int pos)
static int
datasharing_has(struct counters *cpu, int pos)
{
- /*
- * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/
- * CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
+ /*
+ * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
*/
int ret;
struct counters *mem;
@@ -930,9 +953,9 @@ datasharing_has(struct counters *cpu, int pos)
me = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = (me * con) / un;
+ res = (me * con)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
@@ -940,9 +963,8 @@ datasharing_has(struct counters *cpu, int pos)
static int
cache2ib(struct counters *cpu, int pos)
{
- /*
- * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P
- * (thresh >.2)
+ /*
+ * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
*/
int ret;
struct counters *mem;
@@ -959,9 +981,9 @@ cache2ib(struct counters *cpu, int pos)
me = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = (con * me) / un;
+ res = (con * me)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
@@ -969,9 +991,9 @@ cache2has(struct counters *cpu, int pos)
{
/*
* Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
- * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
- * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84)) /
- * CPU_CLK_UNHALTED.THREAD_P
+ * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
+ * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
+ * / CPU_CLK_UNHALTED.THREAD_P
*/
int ret;
struct counters *mem1, *mem2, *mem3;
@@ -996,17 +1018,17 @@ cache2has(struct counters *cpu, int pos)
me3 = mem3->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = ((me1 * con1) + (me2 * con2) + (me3 * con3)) / un;
+ res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
+
static int
cache2broad(struct counters *cpu, int pos)
{
- /*
- * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P
- * (thresh >.2)
+ /*
+ * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
*/
int ret;
struct counters *mem;
@@ -1023,19 +1045,16 @@ cache2broad(struct counters *cpu, int pos)
me = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = (con * me) / un;
+ res = (con * me)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
cache1(struct counters *cpu, int pos)
{
- /*
- * 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) /
- * CPU_CLK_UNHALTED.THREAD_P (thresh >= .2)
- */
+ /* 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
int ret;
struct counters *mem;
struct counters *unhalt;
@@ -1051,18 +1070,15 @@ cache1(struct counters *cpu, int pos)
me = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = (me * con) / un;
+ res = (me * con)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
cache1ib(struct counters *cpu, int pos)
{
- /*
- * 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) /
- * CPU_CLK_UNHALTED.THREAD_P (thresh >= .2)
- */
+ /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
int ret;
struct counters *mem;
struct counters *unhalt;
@@ -1078,19 +1094,16 @@ cache1ib(struct counters *cpu, int pos)
me = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = (me * con) / un;
+ res = (me * con)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
cache1broad(struct counters *cpu, int pos)
{
- /*
- * 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) /
- * CPU_CLK_UNHALTED.THREAD_P (thresh >= .2)
- */
+ /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
int ret;
struct counters *mem;
struct counters *unhalt;
@@ -1106,20 +1119,16 @@ cache1broad(struct counters *cpu, int pos)
me = mem->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = (me * con) / un;
+ res = (me * con)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
dtlb_missload(struct counters *cpu, int pos)
{
- /*
- * 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) +
- * DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t
- * >=.1)
- */
+ /* 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
int ret;
struct counters *dtlb_m, *dtlb_d;
struct counters *unhalt;
@@ -1138,40 +1147,39 @@ dtlb_missload(struct counters *cpu, int pos)
d2 = dtlb_d->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = ((d1 * con) + d2) / un;
+ res = ((d1 * con) + d2)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
dtlb_missstore(struct counters *cpu, int pos)
{
- /*
- * ((DTLB_STORE_MISSES.STLB_HIT * 7) +
- * DTLB_STORE_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t
- * >= .1)
+ /*
+ * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) /
+ * CPU_CLK_UNHALTED.THREAD_P (t >= .1)
*/
- int ret;
- struct counters *dtsb_m, *dtsb_d;
- struct counters *unhalt;
- double con, un, d1, d2, res;
-
- con = 7.0;
- unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
- dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
- dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
- if (pos != -1) {
- d1 = dtsb_m->vals[pos] * 1.0;
- d2 = dtsb_d->vals[pos] * 1.0;
- un = unhalt->vals[pos] * 1.0;
- } else {
- d1 = dtsb_m->sum * 1.0;
- d2 = dtsb_d->sum * 1.0;
- un = unhalt->sum * 1.0;
- }
- res = ((d1 * con) + d2) / un;
- ret = printf("%1.3f", res);
- return (ret);
+ int ret;
+ struct counters *dtsb_m, *dtsb_d;
+ struct counters *unhalt;
+ double con, un, d1, d2, res;
+
+ con = 7.0;
+ unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
+ dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
+ dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
+ if (pos != -1) {
+ d1 = dtsb_m->vals[pos] * 1.0;
+ d2 = dtsb_d->vals[pos] * 1.0;
+ un = unhalt->vals[pos] * 1.0;
+ } else {
+ d1 = dtsb_m->sum * 1.0;
+ d2 = dtsb_d->sum * 1.0;
+ un = unhalt->sum * 1.0;
+ }
+ res = ((d1 * con) + d2)/un;
+ ret = printf("%1.3f", res);
+ return(ret);
}
static int
@@ -1192,19 +1200,16 @@ itlb_miss(struct counters *cpu, int pos)
d1 = itlb->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = d1 / un;
+ res = d1/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
itlb_miss_broad(struct counters *cpu, int pos)
{
- /*
- * (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) /
- * CPU_CLK_UNTHREAD_P
- */
+ /* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P */
int ret;
struct counters *itlb;
struct counters *unhalt;
@@ -1223,19 +1228,16 @@ itlb_miss_broad(struct counters *cpu, int pos)
un = unhalt->sum * 1.0;
k = four_k->sum * 1.0;
}
- res = (7.0 * k + d1) / un;
+ res = (7.0 * k + d1)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
icache_miss(struct counters *cpu, int pos)
{
- /*
- * (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) /
- * CPU_CLK_UNHALTED.THREAD_P IB
- */
+ /* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
int ret;
struct counters *itlb, *icache;
@@ -1254,9 +1256,9 @@ icache_miss(struct counters *cpu, int pos)
ic = icache->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = (ic - d1) / un;
+ res = (ic-d1)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
@@ -1280,16 +1282,16 @@ icache_miss_has(struct counters *cpu, int pos)
ic = icache->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = (con * ic) / un;
+ res = (con * ic)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
lcp_stall(struct counters *cpu, int pos)
{
- /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
+ /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
int ret;
struct counters *ild;
struct counters *unhalt;
@@ -1304,9 +1306,9 @@ lcp_stall(struct counters *cpu, int pos)
d1 = ild->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = d1 / un;
+ res = d1/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
@@ -1314,10 +1316,7 @@ lcp_stall(struct counters *cpu, int pos)
static int
frontendstall(struct counters *cpu, int pos)
{
- /*
- * 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P *
- * 4) (thresh >= .15)
- */
+ /* 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
int ret;
struct counters *idq;
struct counters *unhalt;
@@ -1333,20 +1332,17 @@ frontendstall(struct counters *cpu, int pos)
id = idq->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = id / (un * con);
+ res = id/(un * con);
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
clears(struct counters *cpu, int pos)
{
- /*
- * 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC +
- * MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P
- * (thresh >= .02)
- */
-
+ /* 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )
+ * / CPU_CLK_UNHALTED.THREAD_P (thresh >= .02)*/
+
int ret;
struct counters *clr1, *clr2, *clr3;
struct counters *unhalt;
@@ -1357,7 +1353,7 @@ clears(struct counters *cpu, int pos)
clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
-
+
if (pos != -1) {
cl1 = clr1->vals[pos] * 1.0;
cl2 = clr2->vals[pos] * 1.0;
@@ -1369,11 +1365,13 @@ clears(struct counters *cpu, int pos)
cl3 = clr3->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = ((cl1 + cl2 + cl3) * con) / un;
+ res = ((cl1 + cl2 + cl3) * con)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
+
+
static int
clears_broad(struct counters *cpu, int pos)
{
@@ -1402,11 +1400,15 @@ clears_broad(struct counters *cpu, int pos)
un = unhalt->sum * 1.0;
}
/* Formula not listed but extrapulated to add the cy ?? */
- res = ((cl1 + cl2 + cl3 + cy) * con) / un;
+ res = ((cl1 + cl2 + cl3 + cy) * con)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
+
+
+
+
static int
microassist(struct counters *cpu, int pos)
{
@@ -1426,11 +1428,12 @@ microassist(struct counters *cpu, int pos)
id = idq->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = id / (un * con);
+ res = id/(un * con);
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
+
static int
microassist_broad(struct counters *cpu, int pos)
{
@@ -1457,19 +1460,17 @@ microassist_broad(struct counters *cpu, int pos)
uoi = uopiss->sum * 1.0;
uor = uopret->sum * 1.0;
}
- res = (uor / uoi) * (id / (un * con));
+ res = (uor/uoi) * (id/(un * con));
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
+
static int
aliasing(struct counters *cpu, int pos)
{
- /*
- * 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) /
- * CPU_CLK_UNHALTED.THREAD_P (thresh > .1)
- */
- int ret;
+ /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
+ int ret;
struct counters *ld;
struct counters *unhalt;
double un, lds, con, res;
@@ -1484,19 +1485,16 @@ aliasing(struct counters *cpu, int pos)
lds = ld->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = (lds * con) / un;
+ res = (lds * con)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
aliasing_broad(struct counters *cpu, int pos)
{
- /*
- * 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) /
- * CPU_CLK_UNHALTED.THREAD_P (thresh > .1)
- */
- int ret;
+ /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
+ int ret;
struct counters *ld;
struct counters *unhalt;
double un, lds, con, res;
@@ -1511,9 +1509,9 @@ aliasing_broad(struct counters *cpu, int pos)
lds = ld->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = (lds * con) / un;
+ res = (lds * con)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
@@ -1521,7 +1519,7 @@ static int
fpassists(struct counters *cpu, int pos)
{
/* 16 - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
- int ret;
+ int ret;
struct counters *fp;
struct counters *inst;
double un, fpd, res;
@@ -1535,19 +1533,16 @@ fpassists(struct counters *cpu, int pos)
fpd = fp->sum * 1.0;
un = inst->sum * 1.0;
}
- res = fpd / un;
+ res = fpd/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
otherassistavx(struct counters *cpu, int pos)
{
- /*
- * 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P
- * thresh .1
- */
- int ret;
+ /* 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/
+ int ret;
struct counters *oth;
struct counters *unhalt;
double un, ot, con, res;
@@ -1562,24 +1557,21 @@ otherassistavx(struct counters *cpu, int pos)
ot = oth->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = (ot * con) / un;
+ res = (ot * con)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
otherassistsse(struct counters *cpu, int pos)
{
- int ret;
+ int ret;
struct counters *oth;
struct counters *unhalt;
double un, ot, con, res;
- /*
- * 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P
- * thresh .1
- */
+ /* 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/
con = 75.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
@@ -1590,24 +1582,21 @@ otherassistsse(struct counters *cpu, int pos)
ot = oth->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = (ot * con) / un;
+ res = (ot * con)/un;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
efficiency1(struct counters *cpu, int pos)
{
- int ret;
+ int ret;
struct counters *uops;
struct counters *unhalt;
double un, ot, con, res;
- /*
- * 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look
- * if thresh < .9
- */
+ /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
con = 4.0;
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
@@ -1618,24 +1607,21 @@ efficiency1(struct counters *cpu, int pos)
ot = uops->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = ot / (con * un);
+ res = ot/(con * un);
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
static int
efficiency2(struct counters *cpu, int pos)
{
- int ret;
+ int ret;
struct counters *uops;
struct counters *unhalt;
double un, ot, res;
- /*
- * 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1.
- * (comp factor)
- */
+ /* 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
uops = find_counter(cpu, "INST_RETIRED.ANY_P");
if (pos != -1) {
@@ -1645,205 +1631,206 @@ efficiency2(struct counters *cpu, int pos)
ot = uops->sum * 1.0;
un = unhalt->sum * 1.0;
}
- res = un / ot;
+ res = un/ot;
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
-#define SANDY_BRIDGE_COUNT 20
+#define SANDY_BRIDGE_COUNT 20
static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
- /*01*/ {"allocstall1", "thresh > .05",
- "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
- allocstall1},
- /*02*/ {"allocstall2", "thresh > .05",
- "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES -w 1",
- allocstall2},
- /*03*/ {"br_miss", "thresh >= .2",
- "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
- br_mispredict},
- /*04*/ {"splitload", "thresh >= .1",
- "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
- splitload},
- /*05*/ {"splitstore", "thresh >= .01",
- "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
- splitstore},
- /*06*/ {"contested", "thresh >= .05",
- "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- contested},
- /*07*/ {"blockstorefwd", "thresh >= .05",
- "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- blockstoreforward},
- /*08*/ {"cache2", "thresh >= .2",
- "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- cache2},
- /*09*/ {"cache1", "thresh >= .2",
- "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- cache1},
- /*10*/ {"dtlbmissload", "thresh >= .1",
- "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- dtlb_missload},
- /*11*/ {"dtlbmissstore", "thresh >= .05",
- "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- dtlb_missstore},
- /*12*/ {"frontendstall", "thresh >= .15",
- "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- frontendstall},
- /*13*/ {"clears", "thresh >= .02",
- "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- clears},
- /*14*/ {"microassist", "thresh >= .05",
- "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- microassist},
- /*15*/ {"aliasing_4k", "thresh >= .1",
- "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- aliasing},
- /*16*/ {"fpassist", "look for a excessive value",
- "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
- fpassists},
- /*17*/ {"otherassistavx", "look for a excessive value",
- "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- otherassistavx},
- /*18*/ {"otherassistsse", "look for a excessive value",
- "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- otherassistsse},
- /*19*/ {"eff1", "thresh < .9",
- "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- efficiency1},
- /*20*/ {"eff2", "thresh > 1.0",
- "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- efficiency2},
+/*01*/ { "allocstall1", "thresh > .05",
+ "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
+ allocstall1, 2 },
+/* -- not defined for SB right (partial-rat_stalls) 02*/
+ { "allocstall2", "thresh > .05",
+ "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1",
+ allocstall2, 2 },
+/*03*/ { "br_miss", "thresh >= .2",
+ "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
+ br_mispredict, 2 },
+/*04*/ { "splitload", "thresh >= .1",
+ "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
+ splitload_sb, 2 },
+/* 05*/ { "splitstore", "thresh >= .01",
+ "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
+ splitstore_sb, 2 },
+/*06*/ { "contested", "thresh >= .05",
+ "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ contested, 2 },
+/*07*/ { "blockstorefwd", "thresh >= .05",
+ "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ blockstoreforward, 2 },
+/*08*/ { "cache2", "thresh >= .2",
+ "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ cache2, 4 },
+/*09*/ { "cache1", "thresh >= .2",
+ "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ cache1, 2 },
+/*10*/ { "dtlbmissload", "thresh >= .1",
+ "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ dtlb_missload, 3 },
+/*11*/ { "dtlbmissstore", "thresh >= .05",
+ "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ dtlb_missstore, 3 },
+/*12*/ { "frontendstall", "thresh >= .15",
+ "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ frontendstall, 2 },
+/*13*/ { "clears", "thresh >= .02",
+ "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ clears, 4 },
+/*14*/ { "microassist", "thresh >= .05",
+ "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ microassist, 2 },
+/*15*/ { "aliasing_4k", "thresh >= .1",
+ "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ aliasing, 2 },
+/*16*/ { "fpassist", "look for a excessive value",
+ "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
+ fpassists, 2 },
+/*17*/ { "otherassistavx", "look for a excessive value",
+ "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ otherassistavx, 2},
+/*18*/ { "otherassistsse", "look for a excessive value",
+ "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ otherassistsse, 2 },
+/*19*/ { "eff1", "thresh < .9",
+ "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ efficiency1, 2 },
+/*20*/ { "eff2", "thresh > 1.0",
+ "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ efficiency2, 2 },
};
#define IVY_BRIDGE_COUNT 21
static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
- /*1*/ {"eff1", "thresh < .75",
- "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- efficiency1},
- /*2*/ {"eff2", "thresh > 1.0",
- "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- efficiency2},
- /*3*/ {"itlbmiss", "thresh > .05",
- "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- itlb_miss},
- /*4*/ {"icachemiss", "thresh > .05",
- "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- icache_miss},
- /*5*/ {"lcpstall", "thresh > .05",
- "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- lcp_stall},
- /*6*/ {"cache1", "thresh >= .2",
- "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- cache1ib},
- /*7*/ {"cache2", "thresh >= .2",
- "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- cache2ib},
- /*8*/ {"contested", "thresh >= .05",
- "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- contested},
- /*9*/ {"datashare", "thresh >= .05",
- "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- datasharing},
- /*10*/ {"blockstorefwd", "thresh >= .05",
- "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- blockstoreforward},
- /*11*/ {"splitload", "thresh >= .1",
- "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
- splitloadib},
- /*12*/ {"splitstore", "thresh >= .01",
- "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
- splitstore},
- /*13*/ {"aliasing_4k", "thresh >= .1",
- "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- aliasing},
- /*14*/ {"dtlbmissload", "thresh >= .1",
- "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- dtlb_missload},
- /*15*/ {"dtlbmissstore", "thresh >= .05",
- "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- dtlb_missstore},
- /*16*/ {"br_miss", "thresh >= .2",
- "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
- br_mispredictib},
- /*17*/ {"clears", "thresh >= .02",
- "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- clears},
- /*18*/ {"microassist", "thresh >= .05",
- "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- microassist},
- /*19*/ {"fpassist", "look for a excessive value",
- "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
- fpassists},
- /*20*/ {"otherassistavx", "look for a excessive value",
- "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- otherassistavx},
- /*21*/ {"otherassistsse", "look for a excessive value",
- "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- otherassistsse},
+/*1*/ { "eff1", "thresh < .75",
+ "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ efficiency1, 2 },
+/*2*/ { "eff2", "thresh > 1.0",
+ "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ efficiency2, 2 },
+/*3*/ { "itlbmiss", "thresh > .05",
+ "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ itlb_miss, 2 },
+/*4*/ { "icachemiss", "thresh > .05",
+ "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ icache_miss, 3 },
+/*5*/ { "lcpstall", "thresh > .05",
+ "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ lcp_stall, 2 },
+/*6*/ { "cache1", "thresh >= .2",
+ "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ cache1ib, 2 },
+/*7*/ { "cache2", "thresh >= .2",
+ "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ cache2ib, 2 },
+/*8*/ { "contested", "thresh >= .05",
+ "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ contested, 2 },
+/*9*/ { "datashare", "thresh >= .05",
+ "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ datasharing, 2 },
+/*10*/ { "blockstorefwd", "thresh >= .05",
+ "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ blockstoreforward, 2 },
+/*11*/ { "splitload", "thresh >= .1",
+ "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
+ splitloadib, 4 },
+/*12*/ { "splitstore", "thresh >= .01",
+ "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
+ splitstore, 2 },
+/*13*/ { "aliasing_4k", "thresh >= .1",
+ "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ aliasing, 2 },
+/*14*/ { "dtlbmissload", "thresh >= .1",
+ "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ dtlb_missload , 3},
+/*15*/ { "dtlbmissstore", "thresh >= .05",
+ "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ dtlb_missstore, 3 },
+/*16*/ { "br_miss", "thresh >= .2",
+ "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
+ br_mispredictib, 8 },
+/*17*/ { "clears", "thresh >= .02",
+ "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ clears, 4 },
+/*18*/ { "microassist", "thresh >= .05",
+ "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ microassist, 2 },
+/*19*/ { "fpassist", "look for a excessive value",
+ "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
+ fpassists, 2 },
+/*20*/ { "otherassistavx", "look for a excessive value",
+ "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ otherassistavx , 2},
+/*21*/ { "otherassistsse", "look for a excessive value",
+ "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ otherassistsse, 2 },
};
#define HASWELL_COUNT 20
static struct cpu_entry haswell[HASWELL_COUNT] = {
- /*1*/ {"eff1", "thresh < .75",
- "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- efficiency1},
- /*2*/ {"eff2", "thresh > 1.0",
- "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- efficiency2},
- /*3*/ {"itlbmiss", "thresh > .05",
- "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- itlb_miss},
- /*4*/ {"icachemiss", "thresh > .05",
- "pmcstat -s ICACHE.MISSES --s CPU_CLK_UNHALTED.THREAD_P -w 1",
- icache_miss_has},
- /*5*/ {"lcpstall", "thresh > .05",
- "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- lcp_stall},
- /*6*/ {"cache1", "thresh >= .2",
- "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- cache1ib},
- /*7*/ {"cache2", "thresh >= .2",
- "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- cache2has},
- /*8*/ {"contested", "thresh >= .05",
- "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- contested_has},
- /*9*/ {"datashare", "thresh >= .05",
- "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- datasharing_has},
- /*10*/ {"blockstorefwd", "thresh >= .05",
- "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- blockstoreforward},
- /*11*/ {"splitload", "thresh >= .1",
- "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
- splitload},
- /*12*/ {"splitstore", "thresh >= .01",
- "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
- splitstore},
- /*13*/ {"aliasing_4k", "thresh >= .1",
- "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- aliasing},
- /*14*/ {"dtlbmissload", "thresh >= .1",
- "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- dtlb_missload},
- /*15*/ {"br_miss", "thresh >= .2",
- "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
- br_mispredict},
- /*16*/ {"clears", "thresh >= .02",
- "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- clears},
- /*17*/ {"microassist", "thresh >= .05",
- "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- microassist},
- /*18*/ {"fpassist", "look for a excessive value",
- "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
- fpassists},
- /*19*/ {"otherassistavx", "look for a excessive value",
- "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- otherassistavx},
- /*20*/ {"otherassistsse", "look for a excessive value",
- "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- otherassistsse},
+/*1*/ { "eff1", "thresh < .75",
+ "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ efficiency1, 2 },
+/*2*/ { "eff2", "thresh > 1.0",
+ "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ efficiency2, 2 },
+/*3*/ { "itlbmiss", "thresh > .05",
+ "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ itlb_miss, 2 },
+/*4*/ { "icachemiss", "thresh > .05",
+ "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ icache_miss_has, 2 },
+/*5*/ { "lcpstall", "thresh > .05",
+ "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ lcp_stall, 2 },
+/*6*/ { "cache1", "thresh >= .2",
+ "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ cache1ib, 2 },
+/*7*/ { "cache2", "thresh >= .2",
+ "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ cache2has, 4 },
+/*8*/ { "contested", "thresh >= .05",
+ "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ contested_has, 2 },
+/*9*/ { "datashare", "thresh >= .05",
+ "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ datasharing_has, 2 },
+/*10*/ { "blockstorefwd", "thresh >= .05",
+ "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ blockstoreforward, 2 },
+/*11*/ { "splitload", "thresh >= .1",
+ "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1",
+ splitload , 2},
+/*12*/ { "splitstore", "thresh >= .01",
+ "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
+ splitstore, 2 },
+/*13*/ { "aliasing_4k", "thresh >= .1",
+ "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ aliasing, 2 },
+/*14*/ { "dtlbmissload", "thresh >= .1",
+ "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ dtlb_missload, 3 },
+/*15*/ { "br_miss", "thresh >= .2",
+ "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
+ br_mispredict, 2 },
+/*16*/ { "clears", "thresh >= .02",
+ "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ clears, 4 },
+/*17*/ { "microassist", "thresh >= .05",
+ "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ microassist, 2 },
+/*18*/ { "fpassist", "look for a excessive value",
+ "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
+ fpassists, 2 },
+/*19*/ { "otherassistavx", "look for a excessive value",
+ "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ otherassistavx, 2 },
+/*20*/ { "otherassistsse", "look for a excessive value",
+ "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ otherassistsse, 2 },
};
@@ -1851,7 +1838,6 @@ static void
explain_name_broad(const char *name)
{
const char *mythresh;
-
if (strcmp(name, "eff1") == 0) {
printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
mythresh = "thresh < .75";
@@ -1860,9 +1846,9 @@ explain_name_broad(const char *name)
mythresh = "thresh > 1.0";
} else if (strcmp(name, "itlbmiss") == 0) {
printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
- mythresh = "thresh > .05";
+ mythresh = "thresh > .05";
} else if (strcmp(name, "icachemiss") == 0) {
- printf("Examine ( 36.0 * CACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n");
+ printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n");
mythresh = "thresh > .05";
} else if (strcmp(name, "lcpstall") == 0) {
printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
@@ -1913,66 +1899,67 @@ explain_name_broad(const char *name)
} else {
printf("Unknown name:%s\n", name);
mythresh = "unknown entry";
- }
+ }
printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
}
#define BROADWELL_COUNT 17
static struct cpu_entry broadwell[BROADWELL_COUNT] = {
- /*1*/ {"eff1", "thresh < .75",
- "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- efficiency1},
- /*2*/ {"eff2", "thresh > 1.0",
- "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- efficiency2},
- /*3*/ {"itlbmiss", "thresh > .05",
- "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1",
- itlb_miss_broad},
- /*4*/ {"icachemiss", "thresh > .05",
- "pmcstat -s ICACHE.MISSES --s CPU_CLK_UNHALTED.THREAD_P -w 1",
- icache_miss_has},
- /*5*/ {"lcpstall", "thresh > .05",
- "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- lcp_stall},
- /*6*/ {"cache1", "thresh >= .1",
- "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- cache1broad},
- /*7*/ {"cache2", "thresh >= .2",
- "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- cache2broad},
- /*8*/ {"contested", "thresh >= .05",
- "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1",
- contestedbroad},
- /*9*/ {"datashare", "thresh >= .05",
- "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- datasharing_has},
- /*10*/ {"blockstorefwd", "thresh >= .05",
- "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- blockstoreforward},
- /*11*/ {"aliasing_4k", "thresh >= .1",
- "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- aliasing_broad},
- /*12*/ {"dtlbmissload", "thresh >= .1",
- "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- dtlb_missload},
- /*13*/ {"br_miss", "thresh >= .2",
- "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
- br_mispredict_broad},
- /*14*/ {"clears", "thresh >= .02",
- "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- clears_broad},
- /*15*/ {"fpassist", "look for a excessive value",
- "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
- fpassists},
- /*16*/ {"otherassistavx", "look for a excessive value",
- "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
- otherassistavx},
- /*17*/ {"microassist", "thresh >= .2",
- "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -w 1",
- microassist_broad},
+/*1*/ { "eff1", "thresh < .75",
+ "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ efficiency1, 2 },
+/*2*/ { "eff2", "thresh > 1.0",
+ "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ efficiency2, 2 },
+/*3*/ { "itlbmiss", "thresh > .05",
+ "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1",
+ itlb_miss_broad, 3 },
+/*4*/ { "icachemiss", "thresh > .05",
+ "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ icache_miss_has, 2 },
+/*5*/ { "lcpstall", "thresh > .05",
+ "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ lcp_stall, 2 },
+/*6*/ { "cache1", "thresh >= .1",
+ "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ cache1broad, 2 },
+/*7*/ { "cache2", "thresh >= .2",
+ "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ cache2broad, 2 },
+/*8*/ { "contested", "thresh >= .05",
+ "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1",
+ contestedbroad, 2 },
+/*9*/ { "datashare", "thresh >= .05",
+ "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ datasharing_has, 2 },
+/*10*/ { "blockstorefwd", "thresh >= .05",
+ "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ blockstoreforward, 2 },
+/*11*/ { "aliasing_4k", "thresh >= .1",
+ "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ aliasing_broad, 2 },
+/*12*/ { "dtlbmissload", "thresh >= .1",
+ "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ dtlb_missload, 3 },
+/*13*/ { "br_miss", "thresh >= .2",
+ "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
+ br_mispredict_broad, 7 },
+/*14*/ { "clears", "thresh >= .02",
+ "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ clears_broad, 5 },
+/*15*/ { "fpassist", "look for a excessive value",
+ "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
+ fpassists, 2 },
+/*16*/ { "otherassistavx", "look for a excessive value",
+ "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
+ otherassistavx, 2 },
+/*17*/ { "microassist", "thresh >= .2",
+ "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -w 1",
+ microassist_broad, 4 },
};
+
static void
set_sandybridge(void)
{
@@ -2001,6 +1988,7 @@ set_haswell(void)
the_cpu.explain = explain_name_has;
}
+
static void
set_broadwell(void)
{
@@ -2010,41 +1998,57 @@ set_broadwell(void)
the_cpu.explain = explain_name_broad;
}
-static void
-set_expression(char *name)
+
+static int
+set_expression(const char *name)
{
int found = 0, i;
-
- for (i = 0; i < the_cpu.number; i++) {
+ for(i=0 ; i< the_cpu.number; i++) {
if (strcmp(name, the_cpu.ents[i].name) == 0) {
found = 1;
expression = the_cpu.ents[i].func;
command = the_cpu.ents[i].command;
threshold = the_cpu.ents[i].thresh;
+ if (the_cpu.ents[i].counters_required > max_pmc_counters) {
+ printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n",
+ the_cpu.ents[i].name,
+ the_cpu.ents[i].counters_required, max_pmc_counters);
+ printf("Sorry this test can not be run\n");
+ if (run_all == 0) {
+ exit(-1);
+ } else {
+ return(-1);
+ }
+ }
break;
}
}
if (!found) {
printf("For CPU type %s we have no expression:%s\n",
- the_cpu.cputype, name);
+ the_cpu.cputype, name);
exit(-1);
}
+ return(0);
}
+
+
+
+
static int
-validate_expression(char *name)
+validate_expression(char *name)
{
int i, found;
found = 0;
- for (i = 0; i < the_cpu.number; i++) {
+ for(i=0 ; i< the_cpu.number; i++) {
if (strcmp(name, the_cpu.ents[i].name) == 0) {
found = 1;
break;
}
}
if (!found) {
- return (-1);
+ return(-1);
}
return (0);
}
@@ -2052,9 +2056,9 @@ validate_expression(char *name)
static void
do_expression(struct counters *cpu, int pos)
{
- if (expression == NULL)
+ if (expression == NULL)
return;
- (*expression) (cpu, pos);
+ (*expression)(cpu, pos);
}
static void
@@ -2062,57 +2066,60 @@ process_header(int idx, char *p)
{
struct counters *up;
int i, len, nlen;
-
- /*
- * Given header element idx, at p in form 's/NN/nameof' process the
- * entry to pull out the name and the CPU number.
+ /*
+ * Given header element idx, at p in
+ * form 's/NN/nameof'
+ * process the entry to pull out the name and
+ * the CPU number.
*/
if (strncmp(p, "s/", 2)) {
printf("Check -- invalid header no s/ in %s\n",
- p);
+ p);
return;
}
up = &cnts[idx];
up->cpu = strtol(&p[2], NULL, 10);
len = strlen(p);
- for (i = 2; i < len; i++) {
+ for (i=2; i<len; i++) {
if (p[i] == '/') {
- nlen = strlen(&p[(i + 1)]);
- if (nlen < (MAX_NLEN - 1)) {
- strcpy(up->counter_name, &p[(i + 1)]);
+ nlen = strlen(&p[(i+1)]);
+ if (nlen < (MAX_NLEN-1)) {
+ strcpy(up->counter_name, &p[(i+1)]);
} else {
- strncpy(up->counter_name, &p[(i + 1)], (MAX_NLEN - 1));
+ strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
}
}
}
}
static void
-build_counters_from_header(FILE * io)
+build_counters_from_header(FILE *io)
{
char buffer[8192], *p;
int i, len, cnt;
size_t mlen;
- /*
- * We have a new start, lets setup our headers and cpus.
+ /* We have a new start, lets
+ * setup our headers and cpus.
*/
if (fgets(buffer, sizeof(buffer), io) == NULL) {
printf("First line can't be read from file err:%d\n", errno);
return;
}
/*
- * Ok output is an array of counters. Once we start to read the
- * values in we must put them in there slot to match there CPU and
- * counter being updated. We create a mass array of the counters,
- * filling in the CPU and counter name.
+ * Ok output is an array of counters. Once
+ * we start to read the values in we must
+ * put them in there slot to match there CPU and
+ * counter being updated. We create a mass array
+ * of the counters, filling in the CPU and
+ * counter name.
*/
/* How many do we get? */
len = strlen(buffer);
- for (i = 0, cnt = 0; i < len; i++) {
+ for (i=0, cnt=0; i<len; i++) {
if (strncmp(&buffer[i], "s/", 2) == 0) {
cnt++;
- for (; i < len; i++) {
+ for(;i<len;i++) {
if (buffer[i] == ' ')
break;
}
@@ -2126,10 +2133,10 @@ build_counters_from_header(FILE * io)
return;
}
memset(cnts, 0, mlen);
- for (i = 0, cnt = 0; i < len; i++) {
+ for (i=0, cnt=0; i<len; i++) {
if (strncmp(&buffer[i], "s/", 2) == 0) {
p = &buffer[i];
- for (; i < len; i++) {
+ for(;i<len;i++) {
if (buffer[i] == ' ') {
buffer[i] = 0;
break;
@@ -2140,22 +2147,22 @@ build_counters_from_header(FILE * io)
}
}
if (verbose)
- printf("We have %d entries\n", cnt);
+ printf("We have %d entries\n", cnt);
}
extern int max_to_collect;
int max_to_collect = MAX_COUNTER_SLOTS;
static int
-read_a_line(FILE * io)
+read_a_line(FILE *io)
{
- char buffer[8192], *p, *stop;
+ char buffer[8192], *p, *stop;
int pos, i;
if (fgets(buffer, sizeof(buffer), io) == NULL) {
- return (0);
+ return(0);
}
p = buffer;
- for (i = 0; i < ncnts; i++) {
+ for (i=0; i<ncnts; i++) {
pos = cnts[i].pos;
cnts[i].vals[pos] = strtol(p, &stop, 0);
cnts[i].pos++;
@@ -2166,7 +2173,7 @@ read_a_line(FILE * io)
}
extern int cpu_count_out;
-int cpu_count_out = 0;
+int cpu_count_out=0;
static void
print_header(void)
@@ -2174,13 +2181,13 @@ print_header(void)
int i, cnt, printed_cnt;
printf("*********************************\n");
- for (i = 0, cnt = 0; i < MAX_CPU; i++) {
+ for(i=0, cnt=0; i<MAX_CPU; i++) {
if (glob_cpu[i]) {
cnt++;
}
- }
+ }
cpu_count_out = cnt;
- for (i = 0, printed_cnt = 0; i < MAX_CPU; i++) {
+ for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
if (glob_cpu[i]) {
printf("CPU%d", i);
printed_cnt++;
@@ -2200,7 +2207,7 @@ lace_cpus_together(void)
int i, j, lace_cpu;
struct counters *cpat, *at;
- for (i = 0; i < ncnts; i++) {
+ for(i=0; i<ncnts; i++) {
cpat = &cnts[i];
if (cpat->next_cpu) {
/* Already laced in */
@@ -2218,7 +2225,7 @@ lace_cpus_together(void)
continue;
}
/* Ok look forward for cpu->cpu and link in */
- for (j = (i + 1); j < ncnts; j++) {
+ for(j=(i+1); j<ncnts; j++) {
at = &cnts[j];
if (at->next_cpu) {
continue;
@@ -2239,19 +2246,15 @@ process_file(char *filename)
FILE *io;
int i;
int line_at, not_done;
- pid_t pid_of_command = 0;
+ pid_t pid_of_command=0;
- if (filename == NULL) {
+ if (filename == NULL) {
io = my_popen(command, "r", &pid_of_command);
- if (io == NULL) {
- printf("Can't popen the command %s\n", command);
- return;
- }
} else {
io = fopen(filename, "r");
if (io == NULL) {
printf("Can't process file %s err:%d\n",
- filename, errno);
+ filename, errno);
return;
}
}
@@ -2259,25 +2262,23 @@ process_file(char *filename)
if (cnts == NULL) {
/* Nothing we can do */
printf("Nothing to do -- no counters built\n");
- if (filename) {
+ if (io) {
fclose(io);
- } else {
- my_pclose(io, pid_of_command);
}
return;
}
lace_cpus_together();
print_header();
if (verbose) {
- for (i = 0; i < ncnts; i++) {
+ for (i=0; i<ncnts; i++) {
printf("Counter:%s cpu:%d index:%d\n",
- cnts[i].counter_name,
- cnts[i].cpu, i);
+ cnts[i].counter_name,
+ cnts[i].cpu, i);
}
}
line_at = 0;
not_done = 1;
- while (not_done) {
+ while(not_done) {
if (read_a_line(io)) {
line_at++;
} else {
@@ -2288,10 +2289,9 @@ process_file(char *filename)
}
if (filename == NULL) {
int cnt;
-
/* For the ones we dynamically open we print now */
- for (i = 0, cnt = 0; i < MAX_CPU; i++) {
- do_expression(glob_cpu[i], (line_at - 1));
+ for(i=0, cnt=0; i<MAX_CPU; i++) {
+ do_expression(glob_cpu[i], (line_at-1));
cnt++;
if (cnt == cpu_count_out) {
printf("\n");
@@ -2308,12 +2308,25 @@ process_file(char *filename)
my_pclose(io, pid_of_command);
}
}
-
#if defined(__amd64__)
#define cpuid(in,a,b,c,d)\
asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
+
+static __inline void
+do_cpuid(u_int ax, u_int cx, u_int *p)
+{
+ __asm __volatile("cpuid"
+ : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
+ : "0" (ax), "c" (cx) );
+}
+
#else
-#define cpuid(in, a, b, c, d)
+#define cpuid(in, a, b, c, d)
+static __inline void
+do_cpuid(u_int ax, u_int cx, u_int *p)
+{
+}
+
#endif
static void
@@ -2321,10 +2334,11 @@ get_cpuid_set(void)
{
unsigned long eax, ebx, ecx, edx;
int model;
- pid_t pid_of_command = 0;
+ pid_t pid_of_command=0;
size_t sz, len;
FILE *io;
char linebuf[1024], *str;
+ u_int reg[4];
eax = ebx = ecx = edx = 0;
@@ -2357,25 +2371,20 @@ get_cpuid_set(void)
printf("Intel Pentium P6\n");
goto not_supported;
break;
- case 0x3:
+ case 0x3:
case 0x5:
printf("Intel PII\n");
goto not_supported;
break;
- case 0x6:
- case 0x16:
+ case 0x6: case 0x16:
printf("Intel CL\n");
goto not_supported;
break;
- case 0x7:
- case 0x8:
- case 0xA:
- case 0xB:
+ case 0x7: case 0x8: case 0xA: case 0xB:
printf("Intel PIII\n");
goto not_supported;
break;
- case 0x9:
- case 0xD:
+ case 0x9: case 0xD:
printf("Intel PM\n");
goto not_supported;
break;
@@ -2396,10 +2405,14 @@ get_cpuid_set(void)
goto not_supported;
break;
case 0x1A:
- case 0x1E: /* Per Intel document 253669-032 9/2009, pages
- * A-2 and A-57 */
- case 0x1F: /* Per Intel document 253669-032 9/2009, pages
- * A-2 and A-57 */
+ case 0x1E: /*
+ * Per Intel document 253669-032 9/2009,
+ * pages A-2 and A-57
+ */
+ case 0x1F: /*
+ * Per Intel document 253669-032 9/2009,
+ * pages A-2 and A-57
+ */
printf("Intel COREI7\n");
goto not_supported;
break;
@@ -2455,7 +2468,7 @@ get_cpuid_set(void)
break;
case 0x4f:
case 0x56:
- printf("Intel BROADWEL (Xeon)L\n");
+ printf("Intel BROADWEL (Xeon)\n");
set_broadwell();
break;
@@ -2466,7 +2479,7 @@ get_cpuid_set(void)
break;
default:
printf("Intel model 0x%x is not known -- sorry\n",
- model);
+ model);
goto not_supported;
break;
}
@@ -2476,6 +2489,9 @@ get_cpuid_set(void)
goto not_supported;
break;
}
+ do_cpuid(0xa, 0, reg);
+ max_pmc_counters = (reg[3] & 0x0000000f) + 1;
+ printf("We have %d PMC counters to work with\n", max_pmc_counters);
/* Ok lets load the list of all known PMC's */
io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
if (valid_pmcs == NULL) {
@@ -2484,26 +2500,27 @@ get_cpuid_set(void)
sz = sizeof(char *) * pmc_allocated_cnt;
valid_pmcs = malloc(sz);
if (valid_pmcs == NULL) {
- printf("No memory allocation fails at startup?\n");
+ printf("No memory allocation fails at startup?\n");
exit(-1);
}
memset(valid_pmcs, 0, sz);
}
+
while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
if (linebuf[0] != '\t') {
/* sometimes headers ;-) */
continue;
}
len = strlen(linebuf);
- if (linebuf[(len - 1)] == '\n') {
+ if (linebuf[(len-1)] == '\n') {
/* Likely */
- linebuf[(len - 1)] = 0;
+ linebuf[(len-1)] = 0;
}
str = &linebuf[1];
len = strlen(str) + 1;
valid_pmcs[valid_pmc_cnt] = malloc(len);
if (valid_pmcs[valid_pmc_cnt] == NULL) {
- printf("No memory2 allocation fails at startup?\n");
+ printf("No memory2 allocation fails at startup?\n");
exit(-1);
}
memset(valid_pmcs[valid_pmc_cnt], 0, len);
@@ -2516,7 +2533,7 @@ get_cpuid_set(void)
sz = sizeof(char *) * (pmc_allocated_cnt * 2);
more = malloc(sz);
if (more == NULL) {
- printf("No memory3 allocation fails at startup?\n");
+ printf("No memory3 allocation fails at startup?\n");
exit(-1);
}
memset(more, 0, sz);
@@ -2526,10 +2543,10 @@ get_cpuid_set(void)
valid_pmcs = more;
}
}
- my_pclose(io, pid_of_command);
+ my_pclose(io, pid_of_command);
return;
not_supported:
- printf("Not supported\n");
+ printf("Not supported\n");
exit(-1);
}
@@ -2537,12 +2554,11 @@ static void
explain_all(void)
{
int i;
-
- printf("For CPU's of type %s the following expressions are available:\n", the_cpu.cputype);
+ printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
printf("-------------------------------------------------------------\n");
- for (i = 0; i < the_cpu.number; i++) {
+ for(i=0; i<the_cpu.number; i++){
printf("For -e %s ", the_cpu.ents[i].name);
- (*the_cpu.explain) (the_cpu.ents[i].name);
+ (*the_cpu.explain)(the_cpu.ents[i].name);
printf("----------------------------\n");
}
}
@@ -2551,7 +2567,7 @@ static void
test_for_a_pmc(const char *pmc, int out_so_far)
{
FILE *io;
- pid_t pid_of_command = 0;
+ pid_t pid_of_command=0;
char my_command[1024];
char line[1024];
char resp[1024];
@@ -2559,12 +2575,12 @@ test_for_a_pmc(const char *pmc, int out_so_far)
if (out_so_far < 50) {
len = 50 - out_so_far;
- for (i = 0; i < len; i++) {
+ for(i=0; i<len; i++) {
printf(" ");
}
}
sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
- io = my_popen(my_command, "r", &pid_of_command);
+ io = my_popen(my_command, "r", &pid_of_command);
if (io == NULL) {
printf("Failed -- popen fails\n");
return;
@@ -2576,11 +2592,11 @@ test_for_a_pmc(const char *pmc, int out_so_far)
goto out;
}
llen = strlen(line);
- if (line[(llen - 1)] == '\n') {
- line[(llen - 1)] = 0;
+ if (line[(llen-1)] == '\n') {
+ line[(llen-1)] = 0;
llen--;
}
- for (i = 2; i < (llen - len); i++) {
+ for(i=2; i<(llen-len); i++) {
if (strncmp(&line[i], "ERROR", 5) == 0) {
printf("Failed %s\n", line);
goto out;
@@ -2592,9 +2608,9 @@ test_for_a_pmc(const char *pmc, int out_so_far)
goto out;
}
len = strlen(line);
- for (j = 0; j < len; j++) {
+ for (j=0; j<len; j++) {
if (line[j] == ' ') {
- j++;
+ j++;
} else {
break;
}
@@ -2602,7 +2618,7 @@ test_for_a_pmc(const char *pmc, int out_so_far)
printf("Pass");
len = strlen(&line[j]);
if (len < 20) {
- for (k = 0; k < (20 - len); k++) {
+ for(k=0; k<(20-len); k++) {
printf(" ");
}
}
@@ -2616,8 +2632,8 @@ test_for_a_pmc(const char *pmc, int out_so_far)
}
printf("Failed -- '%s' not '%s'\n", line, resp);
out:
- my_pclose(io, pid_of_command);
-
+ my_pclose(io, pid_of_command);
+
}
static int
@@ -2625,16 +2641,15 @@ add_it_to(char **vars, int cur_cnt, char *name)
{
int i;
size_t len;
-
- for (i = 0; i < cur_cnt; i++) {
+ for(i=0; i<cur_cnt; i++) {
if (strcmp(vars[i], name) == 0) {
/* Already have */
- return (0);
+ return(0);
}
}
if (vars[cur_cnt] != NULL) {
- printf("Cur_cnt:%d filled with %s??\n",
- cur_cnt, vars[cur_cnt]);
+ printf("Cur_cnt:%d filled with %s??\n",
+ cur_cnt, vars[cur_cnt]);
exit(-1);
}
/* Ok its new */
@@ -2646,16 +2661,18 @@ add_it_to(char **vars, int cur_cnt, char *name)
}
memset(vars[cur_cnt], 0, len);
strcpy(vars[cur_cnt], name);
- return (1);
+ return(1);
}
static char *
build_command_for_exp(struct expression *exp)
{
/*
- * Build the pmcstat command to handle the passed in expression.
- * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ where NNN and QQQ represent
- * the PMC's in the expression uniquely..
+ * Build the pmcstat command to handle
+ * the passed in expression.
+ * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
+ * where NNN and QQQ represent the PMC's in the expression
+ * uniquely..
*/
char forming[1024];
int cnt_pmc, alloced_pmcs, i;
@@ -2686,39 +2703,39 @@ build_command_for_exp(struct expression *exp)
at = exp;
while (at) {
if (at->type == TYPE_VALUE_PMC) {
- if (add_it_to(vars, alloced_pmcs, at->name)) {
+ if(add_it_to(vars, alloced_pmcs, at->name)) {
alloced_pmcs++;
}
}
at = at->next;
}
/* Now we have a unique list in vars so create our command */
- mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */
- for (i = 0; i < alloced_pmcs; i++) {
+ mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */
+ for(i=0; i<alloced_pmcs; i++) {
mal += strlen(vars[i]) + 4; /* var + " -s " */
}
- cmd = malloc((mal + 2));
+ cmd = malloc((mal+2));
if (cmd == NULL) {
printf("%s out of mem\n", __FUNCTION__);
exit(-1);
}
- memset(cmd, 0, (mal + 2));
+ memset(cmd, 0, (mal+2));
strcpy(cmd, "/usr/sbin/pmcstat -w 1");
at = exp;
- for (i = 0; i < alloced_pmcs; i++) {
+ for(i=0; i<alloced_pmcs; i++) {
sprintf(forming, " -s %s", vars[i]);
strcat(cmd, forming);
free(vars[i]);
vars[i] = NULL;
}
free(vars);
- return (cmd);
+ return(cmd);
}
static int
user_expr(struct counters *cpu, int pos)
{
- int ret;
+ int ret;
double res;
struct counters *var;
struct expression *at;
@@ -2741,7 +2758,7 @@ user_expr(struct counters *cpu, int pos)
}
res = run_expr(master_exp, 1, NULL);
ret = printf("%1.3f", res);
- return (ret);
+ return(ret);
}
@@ -2757,10 +2774,9 @@ static void
run_tests(void)
{
int i, lenout;
-
printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
printf("------------------------------------------------------------------------\n");
- for (i = 0; i < valid_pmc_cnt; i++) {
+ for(i=0; i<valid_pmc_cnt; i++) {
lenout = printf("%s", valid_pmcs[i]);
fflush(stdout);
test_for_a_pmc(valid_pmcs[i], lenout);
@@ -2770,12 +2786,11 @@ static void
list_all(void)
{
int i, cnt, j;
-
printf("PMC Abbreviation\n");
printf("--------------------------------------------------------------\n");
- for (i = 0; i < valid_pmc_cnt; i++) {
+ for(i=0; i<valid_pmc_cnt; i++) {
cnt = printf("%s", valid_pmcs[i]);
- for (j = cnt; j < 52; j++) {
+ for(j=cnt; j<52; j++) {
printf(" ");
}
printf("%%%d\n", i);
@@ -2787,23 +2802,27 @@ int
main(int argc, char **argv)
{
int i, j, cnt;
- char *filename = NULL;
- char *name = NULL;
+ char *filename=NULL;
+ const char *name=NULL;
int help_only = 0;
int test_mode = 0;
+ int test_at = 0;
get_cpuid_set();
memset(glob_cpu, 0, sizeof(glob_cpu));
- while ((i = getopt(argc, argv, "LHhvm:i:?e:TE:")) != -1) {
+ while ((i = getopt(argc, argv, "ALHhvm:i:?e:TE:")) != -1) {
switch (i) {
+ case 'A':
+ run_all = 1;
+ break;
case 'L':
list_all();
- return (0);
+ return(0);
case 'H':
printf("**********************************\n");
explain_all();
printf("**********************************\n");
- return (0);
+ return(0);
break;
case 'T':
test_mode = 1;
@@ -2817,7 +2836,7 @@ main(int argc, char **argv)
case 'e':
if (validate_expression(optarg)) {
printf("Unknown expression %s\n", optarg);
- return (0);
+ return(0);
}
name = optarg;
set_expression(optarg);
@@ -2840,9 +2859,9 @@ main(int argc, char **argv)
break;
case '?':
default:
- use:
+ use:
printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
- argv[0]);
+ argv[0]);
printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
printf("-v -- verbose dump debug type things -- you don't want this\n");
printf("-m N -- maximum to collect is N measurments\n");
@@ -2851,56 +2870,85 @@ main(int argc, char **argv)
printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
printf("-H -- Don't run anything, just explain all canned expressions\n");
printf("-T -- Test all PMC's defined by this processor\n");
- return (0);
+ printf("-A -- Run all canned tests\n");
+ return(0);
break;
};
}
- if ((name == NULL) && (filename == NULL) && (test_mode == 0) && (master_exp == NULL)) {
+ if ((run_all == 0) && (name == NULL) && (filename == NULL) &&
+ (test_mode == 0) && (master_exp == NULL)) {
printf("Without setting an expression we cannot dynamically gather information\n");
printf("you must supply a filename (and you probably want verbosity)\n");
goto use;
}
+ if (run_all && max_to_collect > 10) {
+ max_to_collect = 3;
+ }
if (test_mode) {
run_tests();
- return (0);
+ return(0);
}
printf("*********************************\n");
- if (master_exp == NULL) {
- (*the_cpu.explain) (name);
- } else {
+ if ((master_exp == NULL) && name) {
+ (*the_cpu.explain)(name);
+ } else if (master_exp) {
printf("Examine your expression ");
print_exp(master_exp);
printf("User defined threshold\n");
}
if (help_only) {
- return (0);
+ return(0);
+ }
+ if (run_all) {
+ more:
+ name = the_cpu.ents[test_at].name;
+ printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh);
+ test_at++;
+ if (set_expression(name) == -1) {
+ if (test_at >= the_cpu.number) {
+ goto done;
+ } else
+ goto more;
+ }
+
}
process_file(filename);
if (verbose >= 2) {
- for (i = 0; i < ncnts; i++) {
+ for (i=0; i<ncnts; i++) {
printf("Counter:%s cpu:%d index:%d\n",
- cnts[i].counter_name,
- cnts[i].cpu, i);
- for (j = 0; j < cnts[i].pos; j++) {
+ cnts[i].counter_name,
+ cnts[i].cpu, i);
+ for(j=0; j<cnts[i].pos; j++) {
printf(" val - %ld\n", (long int)cnts[i].vals[j]);
}
printf(" sum - %ld\n", (long int)cnts[i].sum);
}
}
if (expression == NULL) {
- return (0);
+ return(0);
}
- for (i = 0, cnt = 0; i < MAX_CPU; i++) {
- if (glob_cpu[i]) {
- do_expression(glob_cpu[i], -1);
- cnt++;
- if (cnt == cpu_count_out) {
- printf("\n");
- break;
- } else {
- printf("\t");
+ if (max_to_collect > 1) {
+ for(i=0, cnt=0; i<MAX_CPU; i++) {
+ if (glob_cpu[i]) {
+ do_expression(glob_cpu[i], -1);
+ cnt++;
+ if (cnt == cpu_count_out) {
+ printf("\n");
+ break;
+ } else {
+ printf("\t");
+ }
}
}
}
- return (0);
+ if (run_all && (test_at < the_cpu.number)) {
+ memset(glob_cpu, 0, sizeof(glob_cpu));
+ ncnts = 0;
+ printf("*********************************\n");
+ goto more;
+ } else if (run_all) {
+ done:
+ printf("*********************************\n");
+ }
+ return(0);
}
OpenPOWER on IntegriCloud