diff options
author | rrs <rrs@FreeBSD.org> | 2015-12-10 01:52:29 +0000 |
---|---|---|
committer | rrs <rrs@FreeBSD.org> | 2015-12-10 01:52:29 +0000 |
commit | 7d03b5290e120264ab7a32f6b787241e05841c65 (patch) | |
tree | 360ed40977204adbb0b4dba0444489815d62e6ad /usr.sbin | |
parent | 8f18910d2bd36d55a66bd317d2a7a88fa03f1c06 (diff) | |
download | FreeBSD-src-7d03b5290e120264ab7a32f6b787241e05841c65.zip FreeBSD-src-7d03b5290e120264ab7a32f6b787241e05841c65.tar.gz |
Fix several typos and bugs within pmcstudy. Also highlight the one SB test
that is failing (and is likely a problem in the actual PMC defintions). Add
to this also the -A option to run all canned tests.
Sponsored by: Netflix Inc.
Diffstat (limited to 'usr.sbin')
-rw-r--r-- | usr.sbin/pmcstudy/pmcstudy.8 | 4 | ||||
-rw-r--r-- | usr.sbin/pmcstudy/pmcstudy.c | 1390 |
2 files changed, 722 insertions, 672 deletions
diff --git a/usr.sbin/pmcstudy/pmcstudy.8 b/usr.sbin/pmcstudy/pmcstudy.8 index ebe63c4..7c03897 100644 --- a/usr.sbin/pmcstudy/pmcstudy.8 +++ b/usr.sbin/pmcstudy/pmcstudy.8 @@ -32,7 +32,7 @@ .Nd Perform various studies on a system's overall PMCs. .Sh SYNOPSIS .Nm -.Oo Fl i Ar inputfile | Fl T | Fl v | Fl m Ar max | Fl e exp | Fl Ar E | Fl h | fl H Oc +.Oo Fl i Ar inputfile | Fl A | Fl T | Fl v | Fl m Ar max | Fl e exp | Fl Ar E | Fl h | fl H Oc .Nm .Fl i Ar inputfile .Nm @@ -128,6 +128,8 @@ test like "UOPS_RETIRED.RETIRE_SLOTS / (4 * CPU_CLK_UNHALTED.THREAD_P)". .It Fl L This option will list all known PMCs and their abbreviation (%NNN). +.It Fl A +Run all canned tests. .El .Sh SEE ALSO .Xr pmc 3 , diff --git a/usr.sbin/pmcstudy/pmcstudy.c b/usr.sbin/pmcstudy/pmcstudy.c index 06ded97..16c9f51 100644 --- a/usr.sbin/pmcstudy/pmcstudy.c +++ b/usr.sbin/pmcstudy/pmcstudy.c @@ -38,6 +38,9 @@ #include "eval_expr.h" __FBSDID("$FreeBSD$"); +static int max_pmc_counters = 1; +static int run_all = 0; + #define MAX_COUNTER_SLOTS 1024 #define MAX_NLEN 64 #define MAX_CPU 64 @@ -45,20 +48,20 @@ static int verbose = 0; extern char **environ; extern struct expression *master_exp; -struct expression *master_exp = NULL; +struct expression *master_exp=NULL; #define PMC_INITIAL_ALLOC 512 extern char **valid_pmcs; char **valid_pmcs = NULL; extern int valid_pmc_cnt; -int valid_pmc_cnt = 0; +int valid_pmc_cnt=0; extern int pmc_allocated_cnt; -int pmc_allocated_cnt = 0; +int pmc_allocated_cnt=0; /* * The following two varients on popen and pclose with * the cavet that they get you the PID so that you - * can supply it to pclose so it can send a SIGTERM + * can supply it to pclose so it can send a SIGTERM * to the process. */ static FILE * @@ -75,7 +78,7 @@ my_popen(const char *command, const char *dir, pid_t *p_pid) if ((strcmp(dir, "r") != 0) && (strcmp(dir, "w") != 0)) { errno = EINVAL; - return (NULL); + return(NULL); } if (pipe(pdesin) < 0) return (NULL); @@ -94,14 +97,14 @@ my_popen(const char *command, const char *dir, pid_t *p_pid) argv[3] = NULL; switch (pid = fork()) { - case -1: /* Error. */ + case -1: /* Error. */ (void)close(pdesin[0]); (void)close(pdesin[1]); (void)close(pdesout[0]); (void)close(pdesout[1]); return (NULL); /* NOTREACHED */ - case 0: /* Child. */ + case 0: /* Child. */ /* Close out un-used sides */ (void)close(pdesin[1]); (void)close(pdesout[0]); @@ -129,8 +132,8 @@ my_popen(const char *command, const char *dir, pid_t *p_pid) (void)close(pdesin[0]); (void)close(pdesout[0]); (void)close(pdesout[1]); - return (io_out); - } else { + return(io_out); + } else { /* Prepare the input stream */ io_in = fdopen(pdesout[0], "r"); (void)close(pdesout[1]); @@ -146,7 +149,7 @@ my_popen(const char *command, const char *dir, pid_t *p_pid) * if already `pclosed', or waitpid returns an error. */ static void -my_pclose(FILE * io, pid_t the_pid) +my_pclose(FILE *io, pid_t the_pid) { int pstat; pid_t pid; @@ -164,41 +167,41 @@ my_pclose(FILE * io, pid_t the_pid) struct counters { struct counters *next_cpu; - char counter_name[MAX_NLEN]; /* Name of counter */ - int cpu; /* CPU we are on */ - int pos; /* Index we are filling to. */ + char counter_name[MAX_NLEN]; /* Name of counter */ + int cpu; /* CPU we are on */ + int pos; /* Index we are filling to. */ uint64_t vals[MAX_COUNTER_SLOTS]; /* Last 64 entries */ - uint64_t sum; /* Summary of entries */ + uint64_t sum; /* Summary of entries */ }; extern struct counters *glob_cpu[MAX_CPU]; struct counters *glob_cpu[MAX_CPU]; extern struct counters *cnts; -struct counters *cnts = NULL; +struct counters *cnts=NULL; extern int ncnts; -int ncnts = 0; +int ncnts=0; -extern int (*expression) (struct counters *, int); -int (*expression) (struct counters *, int); +extern int (*expression)(struct counters *, int); +int (*expression)(struct counters *, int); -static const char *threshold = NULL; +static const char *threshold=NULL; static const char *command; struct cpu_entry { const char *name; const char *thresh; const char *command; - int (*func) (struct counters *, int); + int (*func)(struct counters *, int); + int counters_required; }; - struct cpu_type { char cputype[32]; int number; struct cpu_entry *ents; - void (*explain) (const char *name); + void (*explain)(const char *name); }; extern struct cpu_type the_cpu; struct cpu_type the_cpu; @@ -207,7 +210,6 @@ static void explain_name_sb(const char *name) { const char *mythresh; - if (strcmp(name, "allocstall1") == 0) { printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh > .05"; @@ -218,10 +220,10 @@ explain_name_sb(const char *name) printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .2"; } else if (strcmp(name, "splitload") == 0) { - printf("Examine MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); + printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .1"; } else if (strcmp(name, "splitstore") == 0) { - printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n"); + printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n"); mythresh = "thresh >= .01"; } else if (strcmp(name, "contested") == 0) { printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n"); @@ -279,7 +281,7 @@ explain_name_sb(const char *name) } else { printf("Unknown name:%s\n", name); mythresh = "unknown entry"; - } + } printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); } @@ -287,7 +289,6 @@ static void explain_name_ib(const char *name) { const char *mythresh; - if (strcmp(name, "br_miss") == 0) { printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n"); printf(" MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n"); @@ -307,7 +308,7 @@ explain_name_ib(const char *name) mythresh = "thresh >= .2"; } else if (strcmp(name, "itlbmiss") == 0) { printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); - mythresh = "thresh > .05"; + mythresh = "thresh > .05"; } else if (strcmp(name, "icachemiss") == 0) { printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh > .05"; @@ -325,7 +326,7 @@ explain_name_ib(const char *name) printf(" LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .1"; } else if (strcmp(name, "splitstore") == 0) { - printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n"); + printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n"); mythresh = "thresh >= .01"; } else if (strcmp(name, "aliasing_4k") == 0) { printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); @@ -362,7 +363,7 @@ explain_name_ib(const char *name) } else { printf("Unknown name:%s\n", name); mythresh = "unknown entry"; - } + } printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); } @@ -371,7 +372,6 @@ static void explain_name_has(const char *name) { const char *mythresh; - if (strcmp(name, "eff1") == 0) { printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); mythresh = "thresh < .75"; @@ -380,7 +380,7 @@ explain_name_has(const char *name) mythresh = "thresh > 1.0"; } else if (strcmp(name, "itlbmiss") == 0) { printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); - mythresh = "thresh > .05"; + mythresh = "thresh > .05"; } else if (strcmp(name, "icachemiss") == 0) { printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh > .05"; @@ -406,10 +406,10 @@ explain_name_has(const char *name) printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .05"; } else if (strcmp(name, "splitload") == 0) { - printf("Examine (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); + printf("Examine (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); mythresh = "thresh >= .1"; } else if (strcmp(name, "splitstore") == 0) { - printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n"); + printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n"); mythresh = "thresh >= .01"; } else if (strcmp(name, "aliasing_4k") == 0) { printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); @@ -442,10 +442,12 @@ explain_name_has(const char *name) } else { printf("Unknown name:%s\n", name); mythresh = "unknown entry"; - } + } printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); } + + static struct counters * find_counter(struct counters *base, const char *name) { @@ -454,16 +456,16 @@ find_counter(struct counters *base, const char *name) at = base; len = strlen(name); - while (at) { + while(at) { if (strncmp(at->counter_name, name, len) == 0) { - return (at); + return(at); } at = at->next_cpu; } printf("Can't find counter %s\n", name); printf("We have:\n"); at = base; - while (at) { + while(at) { printf("- %s\n", at->counter_name); at = at->next_cpu; } @@ -478,7 +480,6 @@ allocstall1(struct counters *cpu, int pos) struct counters *partial; struct counters *unhalt; double un, par, res; - unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW"); if (pos != -1) { @@ -488,9 +489,9 @@ allocstall1(struct counters *cpu, int pos) par = partial->sum * 1.0; un = unhalt->sum * 1.0; } - res = par / un; + res = par/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } static int @@ -501,7 +502,6 @@ allocstall2(struct counters *cpu, int pos) struct counters *partial; struct counters *unhalt; double un, par, res; - unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP"); if (pos != -1) { @@ -511,9 +511,9 @@ allocstall2(struct counters *cpu, int pos) par = partial->sum * 1.0; un = unhalt->sum * 1.0; } - res = par / un; + res = par/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } static int @@ -522,14 +522,12 @@ br_mispredict(struct counters *cpu, int pos) struct counters *brctr; struct counters *unhalt; int ret; - /* 3 - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ double br, un, con, res; - con = 20.0; - + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); - brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); + brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); if (pos != -1) { br = brctr->vals[pos] * 1.0; un = unhalt->vals[pos] * 1.0; @@ -537,9 +535,9 @@ br_mispredict(struct counters *cpu, int pos) br = brctr->sum * 1.0; un = unhalt->sum * 1.0; } - res = (con * br) / un; - ret = printf("%1.3f", res); - return (ret); + res = (con * br)/un; + ret = printf("%1.3f", res); + return(ret); } static int @@ -549,25 +547,22 @@ br_mispredictib(struct counters *cpu, int pos) struct counters *unhalt; struct counters *clear, *clear2, *clear3; struct counters *uops; - struct counters *recv; + struct counters *recv; struct counters *iss; - /* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/ int ret; - - /* - * (BR_MISP_RETIRED.ALL_BRANCHES / (BR_MISP_RETIRED.ALL_BRANCHES + - * MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - - * UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * - * CPU_CLK_UNHALTED.THREAD))) - * + /* + * (BR_MISP_RETIRED.ALL_BRANCHES / + * (BR_MISP_RETIRED.ALL_BRANCHES + + * MACHINE_CLEAR.COUNT) * + * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD))) + * */ double br, cl, cl2, cl3, uo, re, un, con, res, is; - con = 4.0; - + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); - brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); + brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); @@ -593,11 +588,12 @@ br_mispredictib(struct counters *cpu, int pos) is = iss->sum * 1.0; un = unhalt->sum * 1.0; } - res = (br / (br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un))); - ret = printf("%1.3f", res); - return (ret); + res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un))); + ret = printf("%1.3f", res); + return(ret); } + static int br_mispredict_broad(struct counters *cpu, int pos) { @@ -611,9 +607,9 @@ br_mispredict_broad(struct counters *cpu, int pos) double br, cl, uo, uo_r, re, con, un, res; con = 4.0; - + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); - brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); + brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES"); uops = find_counter(cpu, "UOPS_ISSUED.ANY"); uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); @@ -635,8 +631,8 @@ br_mispredict_broad(struct counters *cpu, int pos) re = recv->sum * 1.0; } res = br / (br + cl) * (uo - uo_r + con * re) / (un * con); - ret = printf("%1.3f", res); - return (ret); + ret = printf("%1.3f", res); + return(ret); } static int @@ -647,12 +643,9 @@ splitloadib(struct counters *cpu, int pos) struct counters *l1d, *ldblock; struct counters *unhalt; double un, memd, res, l1, ldb; - - /* - * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * - * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P "pmcstat -s - * CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s - * MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", + /* + * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P + * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", */ unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); @@ -670,11 +663,12 @@ splitloadib(struct counters *cpu, int pos) ldb = ldblock->sum * 1.0; un = unhalt->sum * 1.0; } - res = ((l1 / memd) * ldb) / un; + res = ((l1 / memd) * ldb)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } + static int splitload(struct counters *cpu, int pos) { @@ -682,7 +676,31 @@ splitload(struct counters *cpu, int pos) struct counters *mem; struct counters *unhalt; double con, un, memd, res; +/* 4 - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/ + + con = 5.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS"); + if (pos != -1) { + memd = mem->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + memd = mem->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = (memd * con)/un; + ret = printf("%1.3f", res); + return(ret); +} + +static int +splitload_sb(struct counters *cpu, int pos) +{ + int ret; + struct counters *mem; + struct counters *unhalt; + double con, un, memd, res; /* 4 - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/ con = 5.0; @@ -695,23 +713,20 @@ splitload(struct counters *cpu, int pos) memd = mem->sum * 1.0; un = unhalt->sum * 1.0; } - res = (memd * con) / un; + res = (memd * con)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } + static int -splitstore(struct counters *cpu, int pos) +splitstore_sb(struct counters *cpu, int pos) { - /* - * 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES - * (thresh > 0.01) - */ + /* 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */ int ret; struct counters *mem_split; struct counters *mem_stores; double memsplit, memstore, res; - mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES"); mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES"); if (pos != -1) { @@ -721,19 +736,40 @@ splitstore(struct counters *cpu, int pos) memsplit = mem_split->sum * 1.0; memstore = mem_stores->sum * 1.0; } - res = memsplit / memstore; + res = memsplit/memstore; + ret = printf("%1.3f", res); + return(ret); +} + + + +static int +splitstore(struct counters *cpu, int pos) +{ + /* 5 - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */ + int ret; + struct counters *mem_split; + struct counters *mem_stores; + double memsplit, memstore, res; + mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES"); + mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES"); + if (pos != -1) { + memsplit = mem_split->vals[pos] * 1.0; + memstore = mem_stores->vals[pos] * 1.0; + } else { + memsplit = mem_split->sum * 1.0; + memstore = mem_stores->sum * 1.0; + } + res = memsplit/memstore; ret = printf("%1.3f", res); - return (ret); + return(ret); } static int contested(struct counters *cpu, int pos) { - /* - * 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / - * CPU_CLK_UNHALTED.THREAD_P (thresh >.05) - */ + /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ int ret; struct counters *mem; struct counters *unhalt; @@ -749,18 +785,15 @@ contested(struct counters *cpu, int pos) memd = mem->sum * 1.0; un = unhalt->sum * 1.0; } - res = (memd * con) / un; + res = (memd * con)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } static int contested_has(struct counters *cpu, int pos) { - /* - * 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / - * CPU_CLK_UNHALTED.THREAD_P (thresh >.05) - */ + /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ int ret; struct counters *mem; struct counters *unhalt; @@ -776,18 +809,15 @@ contested_has(struct counters *cpu, int pos) memd = mem->sum * 1.0; un = unhalt->sum * 1.0; } - res = (memd * con) / un; + res = (memd * con)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } static int contestedbroad(struct counters *cpu, int pos) { - /* - * 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / - * CPU_CLK_UNHALTED.THREAD_P (thresh >.05) - */ + /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ int ret; struct counters *mem; struct counters *mem2; @@ -797,7 +827,7 @@ contestedbroad(struct counters *cpu, int pos) con = 84.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); - mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS"); + mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS"); if (pos != -1) { memd = mem->vals[pos] * 1.0; @@ -808,19 +838,16 @@ contestedbroad(struct counters *cpu, int pos) memtoo = mem2->sum * 1.0; un = unhalt->sum * 1.0; } - res = ((memd * con) + memtoo) / un; + res = ((memd * con) + memtoo)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } static int blockstoreforward(struct counters *cpu, int pos) { - /* - * 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P - * (thresh >= .05) - */ + /* 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/ int ret; struct counters *ldb; struct counters *unhalt; @@ -836,19 +863,17 @@ blockstoreforward(struct counters *cpu, int pos) ld = ldb->sum * 1.0; un = unhalt->sum * 1.0; } - res = (ld * con) / un; + res = (ld * con)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } static int cache2(struct counters *cpu, int pos) { - /* - * ** Suspect *** 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + - * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + - * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / - * CPU_CLK_UNHALTED.THREAD_P (thresh >.2) + /* ** Suspect *** + * 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + + * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) */ int ret; struct counters *mem1, *mem2, *mem3; @@ -874,17 +899,16 @@ cache2(struct counters *cpu, int pos) me_3 = mem3->sum * 1.0; un = unhalt->sum * 1.0; } - res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3)) / un; + res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } static int datasharing(struct counters *cpu, int pos) { - /* - * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ - * CPU_CLK_UNHALTED.THREAD_P (thresh >.2) + /* + * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) */ int ret; struct counters *mem; @@ -901,9 +925,9 @@ datasharing(struct counters *cpu, int pos) me = mem->sum * 1.0; un = unhalt->sum * 1.0; } - res = (me * con) / un; + res = (me * con)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } @@ -911,9 +935,8 @@ datasharing(struct counters *cpu, int pos) static int datasharing_has(struct counters *cpu, int pos) { - /* - * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ - * CPU_CLK_UNHALTED.THREAD_P (thresh >.2) + /* + * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) */ int ret; struct counters *mem; @@ -930,9 +953,9 @@ datasharing_has(struct counters *cpu, int pos) me = mem->sum * 1.0; un = unhalt->sum * 1.0; } - res = (me * con) / un; + res = (me * con)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } @@ -940,9 +963,8 @@ datasharing_has(struct counters *cpu, int pos) static int cache2ib(struct counters *cpu, int pos) { - /* - * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P - * (thresh >.2) + /* + * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) */ int ret; struct counters *mem; @@ -959,9 +981,9 @@ cache2ib(struct counters *cpu, int pos) me = mem->sum * 1.0; un = unhalt->sum * 1.0; } - res = (con * me) / un; + res = (con * me)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } static int @@ -969,9 +991,9 @@ cache2has(struct counters *cpu, int pos) { /* * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \ - * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + - * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84)) / - * CPU_CLK_UNHALTED.THREAD_P + * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + + * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84)) + * / CPU_CLK_UNHALTED.THREAD_P */ int ret; struct counters *mem1, *mem2, *mem3; @@ -996,17 +1018,17 @@ cache2has(struct counters *cpu, int pos) me3 = mem3->sum * 1.0; un = unhalt->sum * 1.0; } - res = ((me1 * con1) + (me2 * con2) + (me3 * con3)) / un; + res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } + static int cache2broad(struct counters *cpu, int pos) { - /* - * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P - * (thresh >.2) + /* + * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) */ int ret; struct counters *mem; @@ -1023,19 +1045,16 @@ cache2broad(struct counters *cpu, int pos) me = mem->sum * 1.0; un = unhalt->sum * 1.0; } - res = (con * me) / un; + res = (con * me)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } static int cache1(struct counters *cpu, int pos) { - /* - * 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / - * CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) - */ + /* 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ int ret; struct counters *mem; struct counters *unhalt; @@ -1051,18 +1070,15 @@ cache1(struct counters *cpu, int pos) me = mem->sum * 1.0; un = unhalt->sum * 1.0; } - res = (me * con) / un; + res = (me * con)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } static int cache1ib(struct counters *cpu, int pos) { - /* - * 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / - * CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) - */ + /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ int ret; struct counters *mem; struct counters *unhalt; @@ -1078,19 +1094,16 @@ cache1ib(struct counters *cpu, int pos) me = mem->sum * 1.0; un = unhalt->sum * 1.0; } - res = (me * con) / un; + res = (me * con)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } static int cache1broad(struct counters *cpu, int pos) { - /* - * 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / - * CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) - */ + /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ int ret; struct counters *mem; struct counters *unhalt; @@ -1106,20 +1119,16 @@ cache1broad(struct counters *cpu, int pos) me = mem->sum * 1.0; un = unhalt->sum * 1.0; } - res = (me * con) / un; + res = (me * con)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } static int dtlb_missload(struct counters *cpu, int pos) { - /* - * 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + - * DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t - * >=.1) - */ + /* 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */ int ret; struct counters *dtlb_m, *dtlb_d; struct counters *unhalt; @@ -1138,40 +1147,39 @@ dtlb_missload(struct counters *cpu, int pos) d2 = dtlb_d->sum * 1.0; un = unhalt->sum * 1.0; } - res = ((d1 * con) + d2) / un; + res = ((d1 * con) + d2)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } static int dtlb_missstore(struct counters *cpu, int pos) { - /* - * ((DTLB_STORE_MISSES.STLB_HIT * 7) + - * DTLB_STORE_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t - * >= .1) + /* + * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) / + * CPU_CLK_UNHALTED.THREAD_P (t >= .1) */ - int ret; - struct counters *dtsb_m, *dtsb_d; - struct counters *unhalt; - double con, un, d1, d2, res; - - con = 7.0; - unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); - dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT"); - dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION"); - if (pos != -1) { - d1 = dtsb_m->vals[pos] * 1.0; - d2 = dtsb_d->vals[pos] * 1.0; - un = unhalt->vals[pos] * 1.0; - } else { - d1 = dtsb_m->sum * 1.0; - d2 = dtsb_d->sum * 1.0; - un = unhalt->sum * 1.0; - } - res = ((d1 * con) + d2) / un; - ret = printf("%1.3f", res); - return (ret); + int ret; + struct counters *dtsb_m, *dtsb_d; + struct counters *unhalt; + double con, un, d1, d2, res; + + con = 7.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT"); + dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION"); + if (pos != -1) { + d1 = dtsb_m->vals[pos] * 1.0; + d2 = dtsb_d->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + d1 = dtsb_m->sum * 1.0; + d2 = dtsb_d->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = ((d1 * con) + d2)/un; + ret = printf("%1.3f", res); + return(ret); } static int @@ -1192,19 +1200,16 @@ itlb_miss(struct counters *cpu, int pos) d1 = itlb->sum * 1.0; un = unhalt->sum * 1.0; } - res = d1 / un; + res = d1/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } static int itlb_miss_broad(struct counters *cpu, int pos) { - /* - * (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / - * CPU_CLK_UNTHREAD_P - */ + /* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P */ int ret; struct counters *itlb; struct counters *unhalt; @@ -1223,19 +1228,16 @@ itlb_miss_broad(struct counters *cpu, int pos) un = unhalt->sum * 1.0; k = four_k->sum * 1.0; } - res = (7.0 * k + d1) / un; + res = (7.0 * k + d1)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } static int icache_miss(struct counters *cpu, int pos) { - /* - * (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / - * CPU_CLK_UNHALTED.THREAD_P IB - */ + /* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */ int ret; struct counters *itlb, *icache; @@ -1254,9 +1256,9 @@ icache_miss(struct counters *cpu, int pos) ic = icache->sum * 1.0; un = unhalt->sum * 1.0; } - res = (ic - d1) / un; + res = (ic-d1)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } @@ -1280,16 +1282,16 @@ icache_miss_has(struct counters *cpu, int pos) ic = icache->sum * 1.0; un = unhalt->sum * 1.0; } - res = (con * ic) / un; + res = (con * ic)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } static int lcp_stall(struct counters *cpu, int pos) { - /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */ + /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */ int ret; struct counters *ild; struct counters *unhalt; @@ -1304,9 +1306,9 @@ lcp_stall(struct counters *cpu, int pos) d1 = ild->sum * 1.0; un = unhalt->sum * 1.0; } - res = d1 / un; + res = d1/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } @@ -1314,10 +1316,7 @@ lcp_stall(struct counters *cpu, int pos) static int frontendstall(struct counters *cpu, int pos) { - /* - * 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * - * 4) (thresh >= .15) - */ + /* 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */ int ret; struct counters *idq; struct counters *unhalt; @@ -1333,20 +1332,17 @@ frontendstall(struct counters *cpu, int pos) id = idq->sum * 1.0; un = unhalt->sum * 1.0; } - res = id / (un * con); + res = id/(un * con); ret = printf("%1.3f", res); - return (ret); + return(ret); } static int clears(struct counters *cpu, int pos) { - /* - * 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + - * MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P - * (thresh >= .02) - */ - + /* 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 ) + * / CPU_CLK_UNHALTED.THREAD_P (thresh >= .02)*/ + int ret; struct counters *clr1, *clr2, *clr3; struct counters *unhalt; @@ -1357,7 +1353,7 @@ clears(struct counters *cpu, int pos) clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); - + if (pos != -1) { cl1 = clr1->vals[pos] * 1.0; cl2 = clr2->vals[pos] * 1.0; @@ -1369,11 +1365,13 @@ clears(struct counters *cpu, int pos) cl3 = clr3->sum * 1.0; un = unhalt->sum * 1.0; } - res = ((cl1 + cl2 + cl3) * con) / un; + res = ((cl1 + cl2 + cl3) * con)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } + + static int clears_broad(struct counters *cpu, int pos) { @@ -1402,11 +1400,15 @@ clears_broad(struct counters *cpu, int pos) un = unhalt->sum * 1.0; } /* Formula not listed but extrapulated to add the cy ?? */ - res = ((cl1 + cl2 + cl3 + cy) * con) / un; + res = ((cl1 + cl2 + cl3 + cy) * con)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } + + + + static int microassist(struct counters *cpu, int pos) { @@ -1426,11 +1428,12 @@ microassist(struct counters *cpu, int pos) id = idq->sum * 1.0; un = unhalt->sum * 1.0; } - res = id / (un * con); + res = id/(un * con); ret = printf("%1.3f", res); - return (ret); + return(ret); } + static int microassist_broad(struct counters *cpu, int pos) { @@ -1457,19 +1460,17 @@ microassist_broad(struct counters *cpu, int pos) uoi = uopiss->sum * 1.0; uor = uopret->sum * 1.0; } - res = (uor / uoi) * (id / (un * con)); + res = (uor/uoi) * (id/(un * con)); ret = printf("%1.3f", res); - return (ret); + return(ret); } + static int aliasing(struct counters *cpu, int pos) { - /* - * 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / - * CPU_CLK_UNHALTED.THREAD_P (thresh > .1) - */ - int ret; + /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */ + int ret; struct counters *ld; struct counters *unhalt; double un, lds, con, res; @@ -1484,19 +1485,16 @@ aliasing(struct counters *cpu, int pos) lds = ld->sum * 1.0; un = unhalt->sum * 1.0; } - res = (lds * con) / un; + res = (lds * con)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } static int aliasing_broad(struct counters *cpu, int pos) { - /* - * 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / - * CPU_CLK_UNHALTED.THREAD_P (thresh > .1) - */ - int ret; + /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */ + int ret; struct counters *ld; struct counters *unhalt; double un, lds, con, res; @@ -1511,9 +1509,9 @@ aliasing_broad(struct counters *cpu, int pos) lds = ld->sum * 1.0; un = unhalt->sum * 1.0; } - res = (lds * con) / un; + res = (lds * con)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } @@ -1521,7 +1519,7 @@ static int fpassists(struct counters *cpu, int pos) { /* 16 - FP_ASSIST.ANY/INST_RETIRED.ANY_P */ - int ret; + int ret; struct counters *fp; struct counters *inst; double un, fpd, res; @@ -1535,19 +1533,16 @@ fpassists(struct counters *cpu, int pos) fpd = fp->sum * 1.0; un = inst->sum * 1.0; } - res = fpd / un; + res = fpd/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } static int otherassistavx(struct counters *cpu, int pos) { - /* - * 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P - * thresh .1 - */ - int ret; + /* 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ + int ret; struct counters *oth; struct counters *unhalt; double un, ot, con, res; @@ -1562,24 +1557,21 @@ otherassistavx(struct counters *cpu, int pos) ot = oth->sum * 1.0; un = unhalt->sum * 1.0; } - res = (ot * con) / un; + res = (ot * con)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } static int otherassistsse(struct counters *cpu, int pos) { - int ret; + int ret; struct counters *oth; struct counters *unhalt; double un, ot, con, res; - /* - * 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P - * thresh .1 - */ + /* 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ con = 75.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX"); @@ -1590,24 +1582,21 @@ otherassistsse(struct counters *cpu, int pos) ot = oth->sum * 1.0; un = unhalt->sum * 1.0; } - res = (ot * con) / un; + res = (ot * con)/un; ret = printf("%1.3f", res); - return (ret); + return(ret); } static int efficiency1(struct counters *cpu, int pos) { - int ret; + int ret; struct counters *uops; struct counters *unhalt; double un, ot, con, res; - /* - * 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look - * if thresh < .9 - */ + /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/ con = 4.0; unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); @@ -1618,24 +1607,21 @@ efficiency1(struct counters *cpu, int pos) ot = uops->sum * 1.0; un = unhalt->sum * 1.0; } - res = ot / (con * un); + res = ot/(con * un); ret = printf("%1.3f", res); - return (ret); + return(ret); } static int efficiency2(struct counters *cpu, int pos) { - int ret; + int ret; struct counters *uops; struct counters *unhalt; double un, ot, res; - /* - * 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. - * (comp factor) - */ + /* 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/ unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); uops = find_counter(cpu, "INST_RETIRED.ANY_P"); if (pos != -1) { @@ -1645,205 +1631,206 @@ efficiency2(struct counters *cpu, int pos) ot = uops->sum * 1.0; un = unhalt->sum * 1.0; } - res = un / ot; + res = un/ot; ret = printf("%1.3f", res); - return (ret); + return(ret); } -#define SANDY_BRIDGE_COUNT 20 +#define SANDY_BRIDGE_COUNT 20 static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = { - /*01*/ {"allocstall1", "thresh > .05", - "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1", - allocstall1}, - /*02*/ {"allocstall2", "thresh > .05", - "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES -w 1", - allocstall2}, - /*03*/ {"br_miss", "thresh >= .2", - "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", - br_mispredict}, - /*04*/ {"splitload", "thresh >= .1", - "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1", - splitload}, - /*05*/ {"splitstore", "thresh >= .01", - "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", - splitstore}, - /*06*/ {"contested", "thresh >= .05", - "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", - contested}, - /*07*/ {"blockstorefwd", "thresh >= .05", - "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", - blockstoreforward}, - /*08*/ {"cache2", "thresh >= .2", - "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", - cache2}, - /*09*/ {"cache1", "thresh >= .2", - "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", - cache1}, - /*10*/ {"dtlbmissload", "thresh >= .1", - "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", - dtlb_missload}, - /*11*/ {"dtlbmissstore", "thresh >= .05", - "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", - dtlb_missstore}, - /*12*/ {"frontendstall", "thresh >= .15", - "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1", - frontendstall}, - /*13*/ {"clears", "thresh >= .02", - "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", - clears}, - /*14*/ {"microassist", "thresh >= .05", - "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", - microassist}, - /*15*/ {"aliasing_4k", "thresh >= .1", - "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", - aliasing}, - /*16*/ {"fpassist", "look for a excessive value", - "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", - fpassists}, - /*17*/ {"otherassistavx", "look for a excessive value", - "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", - otherassistavx}, - /*18*/ {"otherassistsse", "look for a excessive value", - "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", - otherassistsse}, - /*19*/ {"eff1", "thresh < .9", - "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", - efficiency1}, - /*20*/ {"eff2", "thresh > 1.0", - "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", - efficiency2}, +/*01*/ { "allocstall1", "thresh > .05", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1", + allocstall1, 2 }, +/* -- not defined for SB right (partial-rat_stalls) 02*/ + { "allocstall2", "thresh > .05", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1", + allocstall2, 2 }, +/*03*/ { "br_miss", "thresh >= .2", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", + br_mispredict, 2 }, +/*04*/ { "splitload", "thresh >= .1", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1", + splitload_sb, 2 }, +/* 05*/ { "splitstore", "thresh >= .01", + "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", + splitstore_sb, 2 }, +/*06*/ { "contested", "thresh >= .05", + "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", + contested, 2 }, +/*07*/ { "blockstorefwd", "thresh >= .05", + "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", + blockstoreforward, 2 }, +/*08*/ { "cache2", "thresh >= .2", + "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache2, 4 }, +/*09*/ { "cache1", "thresh >= .2", + "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache1, 2 }, +/*10*/ { "dtlbmissload", "thresh >= .1", + "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + dtlb_missload, 3 }, +/*11*/ { "dtlbmissstore", "thresh >= .05", + "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + dtlb_missstore, 3 }, +/*12*/ { "frontendstall", "thresh >= .15", + "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1", + frontendstall, 2 }, +/*13*/ { "clears", "thresh >= .02", + "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", + clears, 4 }, +/*14*/ { "microassist", "thresh >= .05", + "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", + microassist, 2 }, +/*15*/ { "aliasing_4k", "thresh >= .1", + "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + aliasing, 2 }, +/*16*/ { "fpassist", "look for a excessive value", + "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", + fpassists, 2 }, +/*17*/ { "otherassistavx", "look for a excessive value", + "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", + otherassistavx, 2}, +/*18*/ { "otherassistsse", "look for a excessive value", + "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", + otherassistsse, 2 }, +/*19*/ { "eff1", "thresh < .9", + "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency1, 2 }, +/*20*/ { "eff2", "thresh > 1.0", + "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency2, 2 }, }; #define IVY_BRIDGE_COUNT 21 static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = { - /*1*/ {"eff1", "thresh < .75", - "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", - efficiency1}, - /*2*/ {"eff2", "thresh > 1.0", - "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", - efficiency2}, - /*3*/ {"itlbmiss", "thresh > .05", - "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", - itlb_miss}, - /*4*/ {"icachemiss", "thresh > .05", - "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", - icache_miss}, - /*5*/ {"lcpstall", "thresh > .05", - "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", - lcp_stall}, - /*6*/ {"cache1", "thresh >= .2", - "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", - cache1ib}, - /*7*/ {"cache2", "thresh >= .2", - "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", - cache2ib}, - /*8*/ {"contested", "thresh >= .05", - "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", - contested}, - /*9*/ {"datashare", "thresh >= .05", - "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", - datasharing}, - /*10*/ {"blockstorefwd", "thresh >= .05", - "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", - blockstoreforward}, - /*11*/ {"splitload", "thresh >= .1", - "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", - splitloadib}, - /*12*/ {"splitstore", "thresh >= .01", - "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", - splitstore}, - /*13*/ {"aliasing_4k", "thresh >= .1", - "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", - aliasing}, - /*14*/ {"dtlbmissload", "thresh >= .1", - "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", - dtlb_missload}, - /*15*/ {"dtlbmissstore", "thresh >= .05", - "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", - dtlb_missstore}, - /*16*/ {"br_miss", "thresh >= .2", - "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", - br_mispredictib}, - /*17*/ {"clears", "thresh >= .02", - "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", - clears}, - /*18*/ {"microassist", "thresh >= .05", - "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", - microassist}, - /*19*/ {"fpassist", "look for a excessive value", - "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", - fpassists}, - /*20*/ {"otherassistavx", "look for a excessive value", - "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", - otherassistavx}, - /*21*/ {"otherassistsse", "look for a excessive value", - "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", - otherassistsse}, +/*1*/ { "eff1", "thresh < .75", + "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency1, 2 }, +/*2*/ { "eff2", "thresh > 1.0", + "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency2, 2 }, +/*3*/ { "itlbmiss", "thresh > .05", + "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + itlb_miss, 2 }, +/*4*/ { "icachemiss", "thresh > .05", + "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + icache_miss, 3 }, +/*5*/ { "lcpstall", "thresh > .05", + "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", + lcp_stall, 2 }, +/*6*/ { "cache1", "thresh >= .2", + "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache1ib, 2 }, +/*7*/ { "cache2", "thresh >= .2", + "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache2ib, 2 }, +/*8*/ { "contested", "thresh >= .05", + "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", + contested, 2 }, +/*9*/ { "datashare", "thresh >= .05", + "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", + datasharing, 2 }, +/*10*/ { "blockstorefwd", "thresh >= .05", + "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", + blockstoreforward, 2 }, +/*11*/ { "splitload", "thresh >= .1", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", + splitloadib, 4 }, +/*12*/ { "splitstore", "thresh >= .01", + "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1", + splitstore, 2 }, +/*13*/ { "aliasing_4k", "thresh >= .1", + "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + aliasing, 2 }, +/*14*/ { "dtlbmissload", "thresh >= .1", + "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + dtlb_missload , 3}, +/*15*/ { "dtlbmissstore", "thresh >= .05", + "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + dtlb_missstore, 3 }, +/*16*/ { "br_miss", "thresh >= .2", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", + br_mispredictib, 8 }, +/*17*/ { "clears", "thresh >= .02", + "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", + clears, 4 }, +/*18*/ { "microassist", "thresh >= .05", + "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", + microassist, 2 }, +/*19*/ { "fpassist", "look for a excessive value", + "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", + fpassists, 2 }, +/*20*/ { "otherassistavx", "look for a excessive value", + "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", + otherassistavx , 2}, +/*21*/ { "otherassistsse", "look for a excessive value", + "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", + otherassistsse, 2 }, }; #define HASWELL_COUNT 20 static struct cpu_entry haswell[HASWELL_COUNT] = { - /*1*/ {"eff1", "thresh < .75", - "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", - efficiency1}, - /*2*/ {"eff2", "thresh > 1.0", - "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", - efficiency2}, - /*3*/ {"itlbmiss", "thresh > .05", - "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", - itlb_miss}, - /*4*/ {"icachemiss", "thresh > .05", - "pmcstat -s ICACHE.MISSES --s CPU_CLK_UNHALTED.THREAD_P -w 1", - icache_miss_has}, - /*5*/ {"lcpstall", "thresh > .05", - "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", - lcp_stall}, - /*6*/ {"cache1", "thresh >= .2", - "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", - cache1ib}, - /*7*/ {"cache2", "thresh >= .2", - "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", - cache2has}, - /*8*/ {"contested", "thresh >= .05", - "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", - contested_has}, - /*9*/ {"datashare", "thresh >= .05", - "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", - datasharing_has}, - /*10*/ {"blockstorefwd", "thresh >= .05", - "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", - blockstoreforward}, - /*11*/ {"splitload", "thresh >= .1", - "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1", - splitload}, - /*12*/ {"splitstore", "thresh >= .01", - "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", - splitstore}, - /*13*/ {"aliasing_4k", "thresh >= .1", - "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", - aliasing}, - /*14*/ {"dtlbmissload", "thresh >= .1", - "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", - dtlb_missload}, - /*15*/ {"br_miss", "thresh >= .2", - "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", - br_mispredict}, - /*16*/ {"clears", "thresh >= .02", - "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", - clears}, - /*17*/ {"microassist", "thresh >= .05", - "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", - microassist}, - /*18*/ {"fpassist", "look for a excessive value", - "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", - fpassists}, - /*19*/ {"otherassistavx", "look for a excessive value", - "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", - otherassistavx}, - /*20*/ {"otherassistsse", "look for a excessive value", - "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", - otherassistsse}, +/*1*/ { "eff1", "thresh < .75", + "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency1, 2 }, +/*2*/ { "eff2", "thresh > 1.0", + "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency2, 2 }, +/*3*/ { "itlbmiss", "thresh > .05", + "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + itlb_miss, 2 }, +/*4*/ { "icachemiss", "thresh > .05", + "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1", + icache_miss_has, 2 }, +/*5*/ { "lcpstall", "thresh > .05", + "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", + lcp_stall, 2 }, +/*6*/ { "cache1", "thresh >= .2", + "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache1ib, 2 }, +/*7*/ { "cache2", "thresh >= .2", + "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache2has, 4 }, +/*8*/ { "contested", "thresh >= .05", + "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", + contested_has, 2 }, +/*9*/ { "datashare", "thresh >= .05", + "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", + datasharing_has, 2 }, +/*10*/ { "blockstorefwd", "thresh >= .05", + "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", + blockstoreforward, 2 }, +/*11*/ { "splitload", "thresh >= .1", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1", + splitload , 2}, +/*12*/ { "splitstore", "thresh >= .01", + "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1", + splitstore, 2 }, +/*13*/ { "aliasing_4k", "thresh >= .1", + "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + aliasing, 2 }, +/*14*/ { "dtlbmissload", "thresh >= .1", + "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + dtlb_missload, 3 }, +/*15*/ { "br_miss", "thresh >= .2", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", + br_mispredict, 2 }, +/*16*/ { "clears", "thresh >= .02", + "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", + clears, 4 }, +/*17*/ { "microassist", "thresh >= .05", + "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", + microassist, 2 }, +/*18*/ { "fpassist", "look for a excessive value", + "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", + fpassists, 2 }, +/*19*/ { "otherassistavx", "look for a excessive value", + "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", + otherassistavx, 2 }, +/*20*/ { "otherassistsse", "look for a excessive value", + "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", + otherassistsse, 2 }, }; @@ -1851,7 +1838,6 @@ static void explain_name_broad(const char *name) { const char *mythresh; - if (strcmp(name, "eff1") == 0) { printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); mythresh = "thresh < .75"; @@ -1860,9 +1846,9 @@ explain_name_broad(const char *name) mythresh = "thresh > 1.0"; } else if (strcmp(name, "itlbmiss") == 0) { printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n"); - mythresh = "thresh > .05"; + mythresh = "thresh > .05"; } else if (strcmp(name, "icachemiss") == 0) { - printf("Examine ( 36.0 * CACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n"); + printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n"); mythresh = "thresh > .05"; } else if (strcmp(name, "lcpstall") == 0) { printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); @@ -1913,66 +1899,67 @@ explain_name_broad(const char *name) } else { printf("Unknown name:%s\n", name); mythresh = "unknown entry"; - } + } printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); } #define BROADWELL_COUNT 17 static struct cpu_entry broadwell[BROADWELL_COUNT] = { - /*1*/ {"eff1", "thresh < .75", - "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", - efficiency1}, - /*2*/ {"eff2", "thresh > 1.0", - "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", - efficiency2}, - /*3*/ {"itlbmiss", "thresh > .05", - "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1", - itlb_miss_broad}, - /*4*/ {"icachemiss", "thresh > .05", - "pmcstat -s ICACHE.MISSES --s CPU_CLK_UNHALTED.THREAD_P -w 1", - icache_miss_has}, - /*5*/ {"lcpstall", "thresh > .05", - "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", - lcp_stall}, - /*6*/ {"cache1", "thresh >= .1", - "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", - cache1broad}, - /*7*/ {"cache2", "thresh >= .2", - "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", - cache2broad}, - /*8*/ {"contested", "thresh >= .05", - "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1", - contestedbroad}, - /*9*/ {"datashare", "thresh >= .05", - "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", - datasharing_has}, - /*10*/ {"blockstorefwd", "thresh >= .05", - "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", - blockstoreforward}, - /*11*/ {"aliasing_4k", "thresh >= .1", - "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", - aliasing_broad}, - /*12*/ {"dtlbmissload", "thresh >= .1", - "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", - dtlb_missload}, - /*13*/ {"br_miss", "thresh >= .2", - "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", - br_mispredict_broad}, - /*14*/ {"clears", "thresh >= .02", - "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", - clears_broad}, - /*15*/ {"fpassist", "look for a excessive value", - "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", - fpassists}, - /*16*/ {"otherassistavx", "look for a excessive value", - "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", - otherassistavx}, - /*17*/ {"microassist", "thresh >= .2", - "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -w 1", - microassist_broad}, +/*1*/ { "eff1", "thresh < .75", + "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency1, 2 }, +/*2*/ { "eff2", "thresh > 1.0", + "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency2, 2 }, +/*3*/ { "itlbmiss", "thresh > .05", + "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1", + itlb_miss_broad, 3 }, +/*4*/ { "icachemiss", "thresh > .05", + "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1", + icache_miss_has, 2 }, +/*5*/ { "lcpstall", "thresh > .05", + "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", + lcp_stall, 2 }, +/*6*/ { "cache1", "thresh >= .1", + "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache1broad, 2 }, +/*7*/ { "cache2", "thresh >= .2", + "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache2broad, 2 }, +/*8*/ { "contested", "thresh >= .05", + "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1", + contestedbroad, 2 }, +/*9*/ { "datashare", "thresh >= .05", + "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", + datasharing_has, 2 }, +/*10*/ { "blockstorefwd", "thresh >= .05", + "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", + blockstoreforward, 2 }, +/*11*/ { "aliasing_4k", "thresh >= .1", + "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + aliasing_broad, 2 }, +/*12*/ { "dtlbmissload", "thresh >= .1", + "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + dtlb_missload, 3 }, +/*13*/ { "br_miss", "thresh >= .2", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", + br_mispredict_broad, 7 }, +/*14*/ { "clears", "thresh >= .02", + "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", + clears_broad, 5 }, +/*15*/ { "fpassist", "look for a excessive value", + "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", + fpassists, 2 }, +/*16*/ { "otherassistavx", "look for a excessive value", + "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", + otherassistavx, 2 }, +/*17*/ { "microassist", "thresh >= .2", + "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -w 1", + microassist_broad, 4 }, }; + static void set_sandybridge(void) { @@ -2001,6 +1988,7 @@ set_haswell(void) the_cpu.explain = explain_name_has; } + static void set_broadwell(void) { @@ -2010,41 +1998,57 @@ set_broadwell(void) the_cpu.explain = explain_name_broad; } -static void -set_expression(char *name) + +static int +set_expression(const char *name) { int found = 0, i; - - for (i = 0; i < the_cpu.number; i++) { + for(i=0 ; i< the_cpu.number; i++) { if (strcmp(name, the_cpu.ents[i].name) == 0) { found = 1; expression = the_cpu.ents[i].func; command = the_cpu.ents[i].command; threshold = the_cpu.ents[i].thresh; + if (the_cpu.ents[i].counters_required > max_pmc_counters) { + printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n", + the_cpu.ents[i].name, + the_cpu.ents[i].counters_required, max_pmc_counters); + printf("Sorry this test can not be run\n"); + if (run_all == 0) { + exit(-1); + } else { + return(-1); + } + } break; } } if (!found) { printf("For CPU type %s we have no expression:%s\n", - the_cpu.cputype, name); + the_cpu.cputype, name); exit(-1); } + return(0); } + + + + static int -validate_expression(char *name) +validate_expression(char *name) { int i, found; found = 0; - for (i = 0; i < the_cpu.number; i++) { + for(i=0 ; i< the_cpu.number; i++) { if (strcmp(name, the_cpu.ents[i].name) == 0) { found = 1; break; } } if (!found) { - return (-1); + return(-1); } return (0); } @@ -2052,9 +2056,9 @@ validate_expression(char *name) static void do_expression(struct counters *cpu, int pos) { - if (expression == NULL) + if (expression == NULL) return; - (*expression) (cpu, pos); + (*expression)(cpu, pos); } static void @@ -2062,57 +2066,60 @@ process_header(int idx, char *p) { struct counters *up; int i, len, nlen; - - /* - * Given header element idx, at p in form 's/NN/nameof' process the - * entry to pull out the name and the CPU number. + /* + * Given header element idx, at p in + * form 's/NN/nameof' + * process the entry to pull out the name and + * the CPU number. */ if (strncmp(p, "s/", 2)) { printf("Check -- invalid header no s/ in %s\n", - p); + p); return; } up = &cnts[idx]; up->cpu = strtol(&p[2], NULL, 10); len = strlen(p); - for (i = 2; i < len; i++) { + for (i=2; i<len; i++) { if (p[i] == '/') { - nlen = strlen(&p[(i + 1)]); - if (nlen < (MAX_NLEN - 1)) { - strcpy(up->counter_name, &p[(i + 1)]); + nlen = strlen(&p[(i+1)]); + if (nlen < (MAX_NLEN-1)) { + strcpy(up->counter_name, &p[(i+1)]); } else { - strncpy(up->counter_name, &p[(i + 1)], (MAX_NLEN - 1)); + strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1)); } } } } static void -build_counters_from_header(FILE * io) +build_counters_from_header(FILE *io) { char buffer[8192], *p; int i, len, cnt; size_t mlen; - /* - * We have a new start, lets setup our headers and cpus. + /* We have a new start, lets + * setup our headers and cpus. */ if (fgets(buffer, sizeof(buffer), io) == NULL) { printf("First line can't be read from file err:%d\n", errno); return; } /* - * Ok output is an array of counters. Once we start to read the - * values in we must put them in there slot to match there CPU and - * counter being updated. We create a mass array of the counters, - * filling in the CPU and counter name. + * Ok output is an array of counters. Once + * we start to read the values in we must + * put them in there slot to match there CPU and + * counter being updated. We create a mass array + * of the counters, filling in the CPU and + * counter name. */ /* How many do we get? */ len = strlen(buffer); - for (i = 0, cnt = 0; i < len; i++) { + for (i=0, cnt=0; i<len; i++) { if (strncmp(&buffer[i], "s/", 2) == 0) { cnt++; - for (; i < len; i++) { + for(;i<len;i++) { if (buffer[i] == ' ') break; } @@ -2126,10 +2133,10 @@ build_counters_from_header(FILE * io) return; } memset(cnts, 0, mlen); - for (i = 0, cnt = 0; i < len; i++) { + for (i=0, cnt=0; i<len; i++) { if (strncmp(&buffer[i], "s/", 2) == 0) { p = &buffer[i]; - for (; i < len; i++) { + for(;i<len;i++) { if (buffer[i] == ' ') { buffer[i] = 0; break; @@ -2140,22 +2147,22 @@ build_counters_from_header(FILE * io) } } if (verbose) - printf("We have %d entries\n", cnt); + printf("We have %d entries\n", cnt); } extern int max_to_collect; int max_to_collect = MAX_COUNTER_SLOTS; static int -read_a_line(FILE * io) +read_a_line(FILE *io) { - char buffer[8192], *p, *stop; + char buffer[8192], *p, *stop; int pos, i; if (fgets(buffer, sizeof(buffer), io) == NULL) { - return (0); + return(0); } p = buffer; - for (i = 0; i < ncnts; i++) { + for (i=0; i<ncnts; i++) { pos = cnts[i].pos; cnts[i].vals[pos] = strtol(p, &stop, 0); cnts[i].pos++; @@ -2166,7 +2173,7 @@ read_a_line(FILE * io) } extern int cpu_count_out; -int cpu_count_out = 0; +int cpu_count_out=0; static void print_header(void) @@ -2174,13 +2181,13 @@ print_header(void) int i, cnt, printed_cnt; printf("*********************************\n"); - for (i = 0, cnt = 0; i < MAX_CPU; i++) { + for(i=0, cnt=0; i<MAX_CPU; i++) { if (glob_cpu[i]) { cnt++; } - } + } cpu_count_out = cnt; - for (i = 0, printed_cnt = 0; i < MAX_CPU; i++) { + for(i=0, printed_cnt=0; i<MAX_CPU; i++) { if (glob_cpu[i]) { printf("CPU%d", i); printed_cnt++; @@ -2200,7 +2207,7 @@ lace_cpus_together(void) int i, j, lace_cpu; struct counters *cpat, *at; - for (i = 0; i < ncnts; i++) { + for(i=0; i<ncnts; i++) { cpat = &cnts[i]; if (cpat->next_cpu) { /* Already laced in */ @@ -2218,7 +2225,7 @@ lace_cpus_together(void) continue; } /* Ok look forward for cpu->cpu and link in */ - for (j = (i + 1); j < ncnts; j++) { + for(j=(i+1); j<ncnts; j++) { at = &cnts[j]; if (at->next_cpu) { continue; @@ -2239,19 +2246,15 @@ process_file(char *filename) FILE *io; int i; int line_at, not_done; - pid_t pid_of_command = 0; + pid_t pid_of_command=0; - if (filename == NULL) { + if (filename == NULL) { io = my_popen(command, "r", &pid_of_command); - if (io == NULL) { - printf("Can't popen the command %s\n", command); - return; - } } else { io = fopen(filename, "r"); if (io == NULL) { printf("Can't process file %s err:%d\n", - filename, errno); + filename, errno); return; } } @@ -2259,25 +2262,23 @@ process_file(char *filename) if (cnts == NULL) { /* Nothing we can do */ printf("Nothing to do -- no counters built\n"); - if (filename) { + if (io) { fclose(io); - } else { - my_pclose(io, pid_of_command); } return; } lace_cpus_together(); print_header(); if (verbose) { - for (i = 0; i < ncnts; i++) { + for (i=0; i<ncnts; i++) { printf("Counter:%s cpu:%d index:%d\n", - cnts[i].counter_name, - cnts[i].cpu, i); + cnts[i].counter_name, + cnts[i].cpu, i); } } line_at = 0; not_done = 1; - while (not_done) { + while(not_done) { if (read_a_line(io)) { line_at++; } else { @@ -2288,10 +2289,9 @@ process_file(char *filename) } if (filename == NULL) { int cnt; - /* For the ones we dynamically open we print now */ - for (i = 0, cnt = 0; i < MAX_CPU; i++) { - do_expression(glob_cpu[i], (line_at - 1)); + for(i=0, cnt=0; i<MAX_CPU; i++) { + do_expression(glob_cpu[i], (line_at-1)); cnt++; if (cnt == cpu_count_out) { printf("\n"); @@ -2308,12 +2308,25 @@ process_file(char *filename) my_pclose(io, pid_of_command); } } - #if defined(__amd64__) #define cpuid(in,a,b,c,d)\ asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in)); + +static __inline void +do_cpuid(u_int ax, u_int cx, u_int *p) +{ + __asm __volatile("cpuid" + : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) + : "0" (ax), "c" (cx) ); +} + #else -#define cpuid(in, a, b, c, d) +#define cpuid(in, a, b, c, d) +static __inline void +do_cpuid(u_int ax, u_int cx, u_int *p) +{ +} + #endif static void @@ -2321,10 +2334,11 @@ get_cpuid_set(void) { unsigned long eax, ebx, ecx, edx; int model; - pid_t pid_of_command = 0; + pid_t pid_of_command=0; size_t sz, len; FILE *io; char linebuf[1024], *str; + u_int reg[4]; eax = ebx = ecx = edx = 0; @@ -2357,25 +2371,20 @@ get_cpuid_set(void) printf("Intel Pentium P6\n"); goto not_supported; break; - case 0x3: + case 0x3: case 0x5: printf("Intel PII\n"); goto not_supported; break; - case 0x6: - case 0x16: + case 0x6: case 0x16: printf("Intel CL\n"); goto not_supported; break; - case 0x7: - case 0x8: - case 0xA: - case 0xB: + case 0x7: case 0x8: case 0xA: case 0xB: printf("Intel PIII\n"); goto not_supported; break; - case 0x9: - case 0xD: + case 0x9: case 0xD: printf("Intel PM\n"); goto not_supported; break; @@ -2396,10 +2405,14 @@ get_cpuid_set(void) goto not_supported; break; case 0x1A: - case 0x1E: /* Per Intel document 253669-032 9/2009, pages - * A-2 and A-57 */ - case 0x1F: /* Per Intel document 253669-032 9/2009, pages - * A-2 and A-57 */ + case 0x1E: /* + * Per Intel document 253669-032 9/2009, + * pages A-2 and A-57 + */ + case 0x1F: /* + * Per Intel document 253669-032 9/2009, + * pages A-2 and A-57 + */ printf("Intel COREI7\n"); goto not_supported; break; @@ -2455,7 +2468,7 @@ get_cpuid_set(void) break; case 0x4f: case 0x56: - printf("Intel BROADWEL (Xeon)L\n"); + printf("Intel BROADWEL (Xeon)\n"); set_broadwell(); break; @@ -2466,7 +2479,7 @@ get_cpuid_set(void) break; default: printf("Intel model 0x%x is not known -- sorry\n", - model); + model); goto not_supported; break; } @@ -2476,6 +2489,9 @@ get_cpuid_set(void) goto not_supported; break; } + do_cpuid(0xa, 0, reg); + max_pmc_counters = (reg[3] & 0x0000000f) + 1; + printf("We have %d PMC counters to work with\n", max_pmc_counters); /* Ok lets load the list of all known PMC's */ io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command); if (valid_pmcs == NULL) { @@ -2484,26 +2500,27 @@ get_cpuid_set(void) sz = sizeof(char *) * pmc_allocated_cnt; valid_pmcs = malloc(sz); if (valid_pmcs == NULL) { - printf("No memory allocation fails at startup?\n"); + printf("No memory allocation fails at startup?\n"); exit(-1); } memset(valid_pmcs, 0, sz); } + while (fgets(linebuf, sizeof(linebuf), io) != NULL) { if (linebuf[0] != '\t') { /* sometimes headers ;-) */ continue; } len = strlen(linebuf); - if (linebuf[(len - 1)] == '\n') { + if (linebuf[(len-1)] == '\n') { /* Likely */ - linebuf[(len - 1)] = 0; + linebuf[(len-1)] = 0; } str = &linebuf[1]; len = strlen(str) + 1; valid_pmcs[valid_pmc_cnt] = malloc(len); if (valid_pmcs[valid_pmc_cnt] == NULL) { - printf("No memory2 allocation fails at startup?\n"); + printf("No memory2 allocation fails at startup?\n"); exit(-1); } memset(valid_pmcs[valid_pmc_cnt], 0, len); @@ -2516,7 +2533,7 @@ get_cpuid_set(void) sz = sizeof(char *) * (pmc_allocated_cnt * 2); more = malloc(sz); if (more == NULL) { - printf("No memory3 allocation fails at startup?\n"); + printf("No memory3 allocation fails at startup?\n"); exit(-1); } memset(more, 0, sz); @@ -2526,10 +2543,10 @@ get_cpuid_set(void) valid_pmcs = more; } } - my_pclose(io, pid_of_command); + my_pclose(io, pid_of_command); return; not_supported: - printf("Not supported\n"); + printf("Not supported\n"); exit(-1); } @@ -2537,12 +2554,11 @@ static void explain_all(void) { int i; - - printf("For CPU's of type %s the following expressions are available:\n", the_cpu.cputype); + printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype); printf("-------------------------------------------------------------\n"); - for (i = 0; i < the_cpu.number; i++) { + for(i=0; i<the_cpu.number; i++){ printf("For -e %s ", the_cpu.ents[i].name); - (*the_cpu.explain) (the_cpu.ents[i].name); + (*the_cpu.explain)(the_cpu.ents[i].name); printf("----------------------------\n"); } } @@ -2551,7 +2567,7 @@ static void test_for_a_pmc(const char *pmc, int out_so_far) { FILE *io; - pid_t pid_of_command = 0; + pid_t pid_of_command=0; char my_command[1024]; char line[1024]; char resp[1024]; @@ -2559,12 +2575,12 @@ test_for_a_pmc(const char *pmc, int out_so_far) if (out_so_far < 50) { len = 50 - out_so_far; - for (i = 0; i < len; i++) { + for(i=0; i<len; i++) { printf(" "); } } sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc); - io = my_popen(my_command, "r", &pid_of_command); + io = my_popen(my_command, "r", &pid_of_command); if (io == NULL) { printf("Failed -- popen fails\n"); return; @@ -2576,11 +2592,11 @@ test_for_a_pmc(const char *pmc, int out_so_far) goto out; } llen = strlen(line); - if (line[(llen - 1)] == '\n') { - line[(llen - 1)] = 0; + if (line[(llen-1)] == '\n') { + line[(llen-1)] = 0; llen--; } - for (i = 2; i < (llen - len); i++) { + for(i=2; i<(llen-len); i++) { if (strncmp(&line[i], "ERROR", 5) == 0) { printf("Failed %s\n", line); goto out; @@ -2592,9 +2608,9 @@ test_for_a_pmc(const char *pmc, int out_so_far) goto out; } len = strlen(line); - for (j = 0; j < len; j++) { + for (j=0; j<len; j++) { if (line[j] == ' ') { - j++; + j++; } else { break; } @@ -2602,7 +2618,7 @@ test_for_a_pmc(const char *pmc, int out_so_far) printf("Pass"); len = strlen(&line[j]); if (len < 20) { - for (k = 0; k < (20 - len); k++) { + for(k=0; k<(20-len); k++) { printf(" "); } } @@ -2616,8 +2632,8 @@ test_for_a_pmc(const char *pmc, int out_so_far) } printf("Failed -- '%s' not '%s'\n", line, resp); out: - my_pclose(io, pid_of_command); - + my_pclose(io, pid_of_command); + } static int @@ -2625,16 +2641,15 @@ add_it_to(char **vars, int cur_cnt, char *name) { int i; size_t len; - - for (i = 0; i < cur_cnt; i++) { + for(i=0; i<cur_cnt; i++) { if (strcmp(vars[i], name) == 0) { /* Already have */ - return (0); + return(0); } } if (vars[cur_cnt] != NULL) { - printf("Cur_cnt:%d filled with %s??\n", - cur_cnt, vars[cur_cnt]); + printf("Cur_cnt:%d filled with %s??\n", + cur_cnt, vars[cur_cnt]); exit(-1); } /* Ok its new */ @@ -2646,16 +2661,18 @@ add_it_to(char **vars, int cur_cnt, char *name) } memset(vars[cur_cnt], 0, len); strcpy(vars[cur_cnt], name); - return (1); + return(1); } static char * build_command_for_exp(struct expression *exp) { /* - * Build the pmcstat command to handle the passed in expression. - * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ where NNN and QQQ represent - * the PMC's in the expression uniquely.. + * Build the pmcstat command to handle + * the passed in expression. + * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ + * where NNN and QQQ represent the PMC's in the expression + * uniquely.. */ char forming[1024]; int cnt_pmc, alloced_pmcs, i; @@ -2686,39 +2703,39 @@ build_command_for_exp(struct expression *exp) at = exp; while (at) { if (at->type == TYPE_VALUE_PMC) { - if (add_it_to(vars, alloced_pmcs, at->name)) { + if(add_it_to(vars, alloced_pmcs, at->name)) { alloced_pmcs++; } } at = at->next; } /* Now we have a unique list in vars so create our command */ - mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */ - for (i = 0; i < alloced_pmcs; i++) { + mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */ + for(i=0; i<alloced_pmcs; i++) { mal += strlen(vars[i]) + 4; /* var + " -s " */ } - cmd = malloc((mal + 2)); + cmd = malloc((mal+2)); if (cmd == NULL) { printf("%s out of mem\n", __FUNCTION__); exit(-1); } - memset(cmd, 0, (mal + 2)); + memset(cmd, 0, (mal+2)); strcpy(cmd, "/usr/sbin/pmcstat -w 1"); at = exp; - for (i = 0; i < alloced_pmcs; i++) { + for(i=0; i<alloced_pmcs; i++) { sprintf(forming, " -s %s", vars[i]); strcat(cmd, forming); free(vars[i]); vars[i] = NULL; } free(vars); - return (cmd); + return(cmd); } static int user_expr(struct counters *cpu, int pos) { - int ret; + int ret; double res; struct counters *var; struct expression *at; @@ -2741,7 +2758,7 @@ user_expr(struct counters *cpu, int pos) } res = run_expr(master_exp, 1, NULL); ret = printf("%1.3f", res); - return (ret); + return(ret); } @@ -2757,10 +2774,9 @@ static void run_tests(void) { int i, lenout; - printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt); printf("------------------------------------------------------------------------\n"); - for (i = 0; i < valid_pmc_cnt; i++) { + for(i=0; i<valid_pmc_cnt; i++) { lenout = printf("%s", valid_pmcs[i]); fflush(stdout); test_for_a_pmc(valid_pmcs[i], lenout); @@ -2770,12 +2786,11 @@ static void list_all(void) { int i, cnt, j; - printf("PMC Abbreviation\n"); printf("--------------------------------------------------------------\n"); - for (i = 0; i < valid_pmc_cnt; i++) { + for(i=0; i<valid_pmc_cnt; i++) { cnt = printf("%s", valid_pmcs[i]); - for (j = cnt; j < 52; j++) { + for(j=cnt; j<52; j++) { printf(" "); } printf("%%%d\n", i); @@ -2787,23 +2802,27 @@ int main(int argc, char **argv) { int i, j, cnt; - char *filename = NULL; - char *name = NULL; + char *filename=NULL; + const char *name=NULL; int help_only = 0; int test_mode = 0; + int test_at = 0; get_cpuid_set(); memset(glob_cpu, 0, sizeof(glob_cpu)); - while ((i = getopt(argc, argv, "LHhvm:i:?e:TE:")) != -1) { + while ((i = getopt(argc, argv, "ALHhvm:i:?e:TE:")) != -1) { switch (i) { + case 'A': + run_all = 1; + break; case 'L': list_all(); - return (0); + return(0); case 'H': printf("**********************************\n"); explain_all(); printf("**********************************\n"); - return (0); + return(0); break; case 'T': test_mode = 1; @@ -2817,7 +2836,7 @@ main(int argc, char **argv) case 'e': if (validate_expression(optarg)) { printf("Unknown expression %s\n", optarg); - return (0); + return(0); } name = optarg; set_expression(optarg); @@ -2840,9 +2859,9 @@ main(int argc, char **argv) break; case '?': default: - use: + use: printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n", - argv[0]); + argv[0]); printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n"); printf("-v -- verbose dump debug type things -- you don't want this\n"); printf("-m N -- maximum to collect is N measurments\n"); @@ -2851,56 +2870,85 @@ main(int argc, char **argv) printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n"); printf("-H -- Don't run anything, just explain all canned expressions\n"); printf("-T -- Test all PMC's defined by this processor\n"); - return (0); + printf("-A -- Run all canned tests\n"); + return(0); break; }; } - if ((name == NULL) && (filename == NULL) && (test_mode == 0) && (master_exp == NULL)) { + if ((run_all == 0) && (name == NULL) && (filename == NULL) && + (test_mode == 0) && (master_exp == NULL)) { printf("Without setting an expression we cannot dynamically gather information\n"); printf("you must supply a filename (and you probably want verbosity)\n"); goto use; } + if (run_all && max_to_collect > 10) { + max_to_collect = 3; + } if (test_mode) { run_tests(); - return (0); + return(0); } printf("*********************************\n"); - if (master_exp == NULL) { - (*the_cpu.explain) (name); - } else { + if ((master_exp == NULL) && name) { + (*the_cpu.explain)(name); + } else if (master_exp) { printf("Examine your expression "); print_exp(master_exp); printf("User defined threshold\n"); } if (help_only) { - return (0); + return(0); + } + if (run_all) { + more: + name = the_cpu.ents[test_at].name; + printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh); + test_at++; + if (set_expression(name) == -1) { + if (test_at >= the_cpu.number) { + goto done; + } else + goto more; + } + } process_file(filename); if (verbose >= 2) { - for (i = 0; i < ncnts; i++) { + for (i=0; i<ncnts; i++) { printf("Counter:%s cpu:%d index:%d\n", - cnts[i].counter_name, - cnts[i].cpu, i); - for (j = 0; j < cnts[i].pos; j++) { + cnts[i].counter_name, + cnts[i].cpu, i); + for(j=0; j<cnts[i].pos; j++) { printf(" val - %ld\n", (long int)cnts[i].vals[j]); } printf(" sum - %ld\n", (long int)cnts[i].sum); } } if (expression == NULL) { - return (0); + return(0); } - for (i = 0, cnt = 0; i < MAX_CPU; i++) { - if (glob_cpu[i]) { - do_expression(glob_cpu[i], -1); - cnt++; - if (cnt == cpu_count_out) { - printf("\n"); - break; - } else { - printf("\t"); + if (max_to_collect > 1) { + for(i=0, cnt=0; i<MAX_CPU; i++) { + if (glob_cpu[i]) { + do_expression(glob_cpu[i], -1); + cnt++; + if (cnt == cpu_count_out) { + printf("\n"); + break; + } else { + printf("\t"); + } } } } - return (0); + if (run_all && (test_at < the_cpu.number)) { + memset(glob_cpu, 0, sizeof(glob_cpu)); + ncnts = 0; + printf("*********************************\n"); + goto more; + } else if (run_all) { + done: + printf("*********************************\n"); + } + return(0); } |