diff options
Diffstat (limited to 'usr.sbin/pmcstudy/pmcstudy.c')
-rw-r--r-- | usr.sbin/pmcstudy/pmcstudy.c | 2954 |
1 files changed, 2954 insertions, 0 deletions
diff --git a/usr.sbin/pmcstudy/pmcstudy.c b/usr.sbin/pmcstudy/pmcstudy.c new file mode 100644 index 0000000..16c9f51 --- /dev/null +++ b/usr.sbin/pmcstudy/pmcstudy.c @@ -0,0 +1,2954 @@ +/*- + * Copyright (c) 2014, 2015 Netflix Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer, + * in this position and unchanged. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <sys/types.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <strings.h> +#include <sys/errno.h> +#include <signal.h> +#include <sys/wait.h> +#include <getopt.h> +#include "eval_expr.h" +__FBSDID("$FreeBSD$"); + +static int max_pmc_counters = 1; +static int run_all = 0; + +#define MAX_COUNTER_SLOTS 1024 +#define MAX_NLEN 64 +#define MAX_CPU 64 +static int verbose = 0; + +extern char **environ; +extern struct expression *master_exp; +struct expression *master_exp=NULL; + +#define PMC_INITIAL_ALLOC 512 +extern char **valid_pmcs; +char **valid_pmcs = NULL; +extern int valid_pmc_cnt; +int valid_pmc_cnt=0; +extern int pmc_allocated_cnt; +int pmc_allocated_cnt=0; + +/* + * The following two varients on popen and pclose with + * the cavet that they get you the PID so that you + * can supply it to pclose so it can send a SIGTERM + * to the process. + */ +static FILE * +my_popen(const char *command, const char *dir, pid_t *p_pid) +{ + FILE *io_out, *io_in; + int pdesin[2], pdesout[2]; + char *argv[4]; + pid_t pid; + char cmd[4]; + char cmd2[1024]; + char arg1[4]; + + if ((strcmp(dir, "r") != 0) && + (strcmp(dir, "w") != 0)) { + errno = EINVAL; + return(NULL); + } + if (pipe(pdesin) < 0) + return (NULL); + + if (pipe(pdesout) < 0) { + (void)close(pdesin[0]); + (void)close(pdesin[1]); + return (NULL); + } + strcpy(cmd, "sh"); + strcpy(arg1, "-c"); + strcpy(cmd2, command); + argv[0] = cmd; + argv[1] = arg1; + argv[2] = cmd2; + argv[3] = NULL; + + switch (pid = fork()) { + case -1: /* Error. */ + (void)close(pdesin[0]); + (void)close(pdesin[1]); + (void)close(pdesout[0]); + (void)close(pdesout[1]); + return (NULL); + /* NOTREACHED */ + case 0: /* Child. */ + /* Close out un-used sides */ + (void)close(pdesin[1]); + (void)close(pdesout[0]); + /* Now prepare the stdin of the process */ + close(0); + (void)dup(pdesin[0]); + (void)close(pdesin[0]); + /* Now prepare the stdout of the process */ + close(1); + (void)dup(pdesout[1]); + /* And lets do stderr just in case */ + close(2); + (void)dup(pdesout[1]); + (void)close(pdesout[1]); + /* Now run it */ + execve("/bin/sh", argv, environ); + exit(127); + /* NOTREACHED */ + } + /* Parent; assume fdopen can't fail. */ + /* Store the pid */ + *p_pid = pid; + if (strcmp(dir, "r") != 0) { + io_out = fdopen(pdesin[1], "w"); + (void)close(pdesin[0]); + (void)close(pdesout[0]); + (void)close(pdesout[1]); + return(io_out); + } else { + /* Prepare the input stream */ + io_in = fdopen(pdesout[0], "r"); + (void)close(pdesout[1]); + (void)close(pdesin[0]); + (void)close(pdesin[1]); + return (io_in); + } +} + +/* + * pclose -- + * Pclose returns -1 if stream is not associated with a `popened' command, + * if already `pclosed', or waitpid returns an error. + */ +static void +my_pclose(FILE *io, pid_t the_pid) +{ + int pstat; + pid_t pid; + + /* + * Find the appropriate file pointer and remove it from the list. + */ + (void)fclose(io); + /* Die if you are not dead! */ + kill(the_pid, SIGTERM); + do { + pid = wait4(the_pid, &pstat, 0, (struct rusage *)0); + } while (pid == -1 && errno == EINTR); +} + +struct counters { + struct counters *next_cpu; + char counter_name[MAX_NLEN]; /* Name of counter */ + int cpu; /* CPU we are on */ + int pos; /* Index we are filling to. */ + uint64_t vals[MAX_COUNTER_SLOTS]; /* Last 64 entries */ + uint64_t sum; /* Summary of entries */ +}; + +extern struct counters *glob_cpu[MAX_CPU]; +struct counters *glob_cpu[MAX_CPU]; + +extern struct counters *cnts; +struct counters *cnts=NULL; + +extern int ncnts; +int ncnts=0; + +extern int (*expression)(struct counters *, int); +int (*expression)(struct counters *, int); + +static const char *threshold=NULL; +static const char *command; + +struct cpu_entry { + const char *name; + const char *thresh; + const char *command; + int (*func)(struct counters *, int); + int counters_required; +}; + +struct cpu_type { + char cputype[32]; + int number; + struct cpu_entry *ents; + void (*explain)(const char *name); +}; +extern struct cpu_type the_cpu; +struct cpu_type the_cpu; + +static void +explain_name_sb(const char *name) +{ + const char *mythresh; + if (strcmp(name, "allocstall1") == 0) { + printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh > .05"; + } else if (strcmp(name, "allocstall2") == 0) { + printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh > .05"; + } else if (strcmp(name, "br_miss") == 0) { + printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .2"; + } else if (strcmp(name, "splitload") == 0) { + printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .1"; + } else if (strcmp(name, "splitstore") == 0) { + printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n"); + mythresh = "thresh >= .01"; + } else if (strcmp(name, "contested") == 0) { + printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .05"; + } else if (strcmp(name, "blockstorefwd") == 0) { + printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .05"; + } else if (strcmp(name, "cache2") == 0) { + printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n"); + printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n"); + printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n"); + printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n"); + mythresh = "thresh >= .2"; + } else if (strcmp(name, "cache1") == 0) { + printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .2"; + } else if (strcmp(name, "dtlbmissload") == 0) { + printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); + printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); + mythresh = "thresh >= .1"; + } else if (strcmp(name, "frontendstall") == 0) { + printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n"); + mythresh = "thresh >= .15"; + } else if (strcmp(name, "clears") == 0) { + printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); + printf(" MACHINE_CLEARS.SMC + \n"); + printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .02"; + } else if (strcmp(name, "microassist") == 0) { + printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n"); + printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); + mythresh = "thresh >= .05"; + } else if (strcmp(name, "aliasing_4k") == 0) { + printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .1"; + } else if (strcmp(name, "fpassist") == 0) { + printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); + mythresh = "look for a excessive value"; + } else if (strcmp(name, "otherassistavx") == 0) { + printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "look for a excessive value"; + } else if (strcmp(name, "otherassistsse") == 0) { + printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "look for a excessive value"; + } else if (strcmp(name, "eff1") == 0) { + printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); + mythresh = "thresh < .9"; + } else if (strcmp(name, "eff2") == 0) { + printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); + mythresh = "thresh > 1.0"; + } else if (strcmp(name, "dtlbmissstore") == 0) { + printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n"); + printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); + mythresh = "thresh >= .05"; + } else { + printf("Unknown name:%s\n", name); + mythresh = "unknown entry"; + } + printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); +} + +static void +explain_name_ib(const char *name) +{ + const char *mythresh; + if (strcmp(name, "br_miss") == 0) { + printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n"); + printf(" MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n"); + printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n"); + mythresh = "thresh >= .2"; + } else if (strcmp(name, "eff1") == 0) { + printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); + mythresh = "thresh < .9"; + } else if (strcmp(name, "eff2") == 0) { + printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); + mythresh = "thresh > 1.0"; + } else if (strcmp(name, "cache1") == 0) { + printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .2"; + } else if (strcmp(name, "cache2") == 0) { + printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .2"; + } else if (strcmp(name, "itlbmiss") == 0) { + printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh > .05"; + } else if (strcmp(name, "icachemiss") == 0) { + printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh > .05"; + } else if (strcmp(name, "lcpstall") == 0) { + printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh > .05"; + } else if (strcmp(name, "datashare") == 0) { + printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh > .05"; + } else if (strcmp(name, "blockstorefwd") == 0) { + printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .05"; + } else if (strcmp(name, "splitload") == 0) { + printf("Examine ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n"); + printf(" LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .1"; + } else if (strcmp(name, "splitstore") == 0) { + printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n"); + mythresh = "thresh >= .01"; + } else if (strcmp(name, "aliasing_4k") == 0) { + printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .1"; + } else if (strcmp(name, "dtlbmissload") == 0) { + printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); + printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); + mythresh = "thresh >= .1"; + } else if (strcmp(name, "dtlbmissstore") == 0) { + printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n"); + printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); + mythresh = "thresh >= .05"; + } else if (strcmp(name, "contested") == 0) { + printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .05"; + } else if (strcmp(name, "clears") == 0) { + printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); + printf(" MACHINE_CLEARS.SMC + \n"); + printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .02"; + } else if (strcmp(name, "microassist") == 0) { + printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); + printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); + mythresh = "thresh >= .05"; + } else if (strcmp(name, "fpassist") == 0) { + printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); + mythresh = "look for a excessive value"; + } else if (strcmp(name, "otherassistavx") == 0) { + printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "look for a excessive value"; + } else if (strcmp(name, "otherassistsse") == 0) { + printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "look for a excessive value"; + } else { + printf("Unknown name:%s\n", name); + mythresh = "unknown entry"; + } + printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); +} + + +static void +explain_name_has(const char *name) +{ + const char *mythresh; + if (strcmp(name, "eff1") == 0) { + printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); + mythresh = "thresh < .75"; + } else if (strcmp(name, "eff2") == 0) { + printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); + mythresh = "thresh > 1.0"; + } else if (strcmp(name, "itlbmiss") == 0) { + printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh > .05"; + } else if (strcmp(name, "icachemiss") == 0) { + printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh > .05"; + } else if (strcmp(name, "lcpstall") == 0) { + printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh > .05"; + } else if (strcmp(name, "cache1") == 0) { + printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .2"; + } else if (strcmp(name, "cache2") == 0) { + printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n"); + printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n"); + printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n"); + printf(" / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .2"; + } else if (strcmp(name, "contested") == 0) { + printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .05"; + } else if (strcmp(name, "datashare") == 0) { + printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh > .05"; + } else if (strcmp(name, "blockstorefwd") == 0) { + printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .05"; + } else if (strcmp(name, "splitload") == 0) { + printf("Examine (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .1"; + } else if (strcmp(name, "splitstore") == 0) { + printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n"); + mythresh = "thresh >= .01"; + } else if (strcmp(name, "aliasing_4k") == 0) { + printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .1"; + } else if (strcmp(name, "dtlbmissload") == 0) { + printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); + printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); + mythresh = "thresh >= .1"; + } else if (strcmp(name, "br_miss") == 0) { + printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n"); + mythresh = "thresh >= .2"; + } else if (strcmp(name, "clears") == 0) { + printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); + printf(" MACHINE_CLEARS.SMC + \n"); + printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .02"; + } else if (strcmp(name, "microassist") == 0) { + printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); + printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); + mythresh = "thresh >= .05"; + } else if (strcmp(name, "fpassist") == 0) { + printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); + mythresh = "look for a excessive value"; + } else if (strcmp(name, "otherassistavx") == 0) { + printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "look for a excessive value"; + } else if (strcmp(name, "otherassistsse") == 0) { + printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "look for a excessive value"; + } else { + printf("Unknown name:%s\n", name); + mythresh = "unknown entry"; + } + printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); +} + + + +static struct counters * +find_counter(struct counters *base, const char *name) +{ + struct counters *at; + int len; + + at = base; + len = strlen(name); + while(at) { + if (strncmp(at->counter_name, name, len) == 0) { + return(at); + } + at = at->next_cpu; + } + printf("Can't find counter %s\n", name); + printf("We have:\n"); + at = base; + while(at) { + printf("- %s\n", at->counter_name); + at = at->next_cpu; + } + exit(-1); +} + +static int +allocstall1(struct counters *cpu, int pos) +{ +/* 1 - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/ + int ret; + struct counters *partial; + struct counters *unhalt; + double un, par, res; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW"); + if (pos != -1) { + par = partial->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + par = partial->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = par/un; + ret = printf("%1.3f", res); + return(ret); +} + +static int +allocstall2(struct counters *cpu, int pos) +{ +/* 2 - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ + int ret; + struct counters *partial; + struct counters *unhalt; + double un, par, res; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP"); + if (pos != -1) { + par = partial->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + par = partial->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = par/un; + ret = printf("%1.3f", res); + return(ret); +} + +static int +br_mispredict(struct counters *cpu, int pos) +{ + struct counters *brctr; + struct counters *unhalt; + int ret; +/* 3 - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ + double br, un, con, res; + con = 20.0; + + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); + if (pos != -1) { + br = brctr->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + br = brctr->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = (con * br)/un; + ret = printf("%1.3f", res); + return(ret); +} + +static int +br_mispredictib(struct counters *cpu, int pos) +{ + struct counters *brctr; + struct counters *unhalt; + struct counters *clear, *clear2, *clear3; + struct counters *uops; + struct counters *recv; + struct counters *iss; +/* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/ + int ret; + /* + * (BR_MISP_RETIRED.ALL_BRANCHES / + * (BR_MISP_RETIRED.ALL_BRANCHES + + * MACHINE_CLEAR.COUNT) * + * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD))) + * + */ + double br, cl, cl2, cl3, uo, re, un, con, res, is; + con = 4.0; + + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); + clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); + clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); + clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); + uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); + iss = find_counter(cpu, "UOPS_ISSUED.ANY"); + recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES"); + if (pos != -1) { + br = brctr->vals[pos] * 1.0; + cl = clear->vals[pos] * 1.0; + cl2 = clear2->vals[pos] * 1.0; + cl3 = clear3->vals[pos] * 1.0; + uo = uops->vals[pos] * 1.0; + re = recv->vals[pos] * 1.0; + is = iss->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + br = brctr->sum * 1.0; + cl = clear->sum * 1.0; + cl2 = clear2->sum * 1.0; + cl3 = clear3->sum * 1.0; + uo = uops->sum * 1.0; + re = recv->sum * 1.0; + is = iss->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un))); + ret = printf("%1.3f", res); + return(ret); +} + + +static int +br_mispredict_broad(struct counters *cpu, int pos) +{ + struct counters *brctr; + struct counters *unhalt; + struct counters *clear; + struct counters *uops; + struct counters *uops_ret; + struct counters *recv; + int ret; + double br, cl, uo, uo_r, re, con, un, res; + + con = 4.0; + + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES"); + clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES"); + uops = find_counter(cpu, "UOPS_ISSUED.ANY"); + uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); + recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES"); + + if (pos != -1) { + un = unhalt->vals[pos] * 1.0; + br = brctr->vals[pos] * 1.0; + cl = clear->vals[pos] * 1.0; + uo = uops->vals[pos] * 1.0; + uo_r = uops_ret->vals[pos] * 1.0; + re = recv->vals[pos] * 1.0; + } else { + un = unhalt->sum * 1.0; + br = brctr->sum * 1.0; + cl = clear->sum * 1.0; + uo = uops->sum * 1.0; + uo_r = uops_ret->sum * 1.0; + re = recv->sum * 1.0; + } + res = br / (br + cl) * (uo - uo_r + con * re) / (un * con); + ret = printf("%1.3f", res); + return(ret); +} + +static int +splitloadib(struct counters *cpu, int pos) +{ + int ret; + struct counters *mem; + struct counters *l1d, *ldblock; + struct counters *unhalt; + double un, memd, res, l1, ldb; + /* + * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P + * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", + */ + + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS"); + l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING"); + ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR"); + if (pos != -1) { + memd = mem->vals[pos] * 1.0; + l1 = l1d->vals[pos] * 1.0; + ldb = ldblock->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + memd = mem->sum * 1.0; + l1 = l1d->sum * 1.0; + ldb = ldblock->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = ((l1 / memd) * ldb)/un; + ret = printf("%1.3f", res); + return(ret); +} + + +static int +splitload(struct counters *cpu, int pos) +{ + int ret; + struct counters *mem; + struct counters *unhalt; + double con, un, memd, res; +/* 4 - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/ + + con = 5.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS"); + if (pos != -1) { + memd = mem->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + memd = mem->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = (memd * con)/un; + ret = printf("%1.3f", res); + return(ret); +} + + +static int +splitload_sb(struct counters *cpu, int pos) +{ + int ret; + struct counters *mem; + struct counters *unhalt; + double con, un, memd, res; +/* 4 - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/ + + con = 5.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS"); + if (pos != -1) { + memd = mem->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + memd = mem->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = (memd * con)/un; + ret = printf("%1.3f", res); + return(ret); +} + + +static int +splitstore_sb(struct counters *cpu, int pos) +{ + /* 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */ + int ret; + struct counters *mem_split; + struct counters *mem_stores; + double memsplit, memstore, res; + mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES"); + mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES"); + if (pos != -1) { + memsplit = mem_split->vals[pos] * 1.0; + memstore = mem_stores->vals[pos] * 1.0; + } else { + memsplit = mem_split->sum * 1.0; + memstore = mem_stores->sum * 1.0; + } + res = memsplit/memstore; + ret = printf("%1.3f", res); + return(ret); +} + + + +static int +splitstore(struct counters *cpu, int pos) +{ + /* 5 - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */ + int ret; + struct counters *mem_split; + struct counters *mem_stores; + double memsplit, memstore, res; + mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES"); + mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES"); + if (pos != -1) { + memsplit = mem_split->vals[pos] * 1.0; + memstore = mem_stores->vals[pos] * 1.0; + } else { + memsplit = mem_split->sum * 1.0; + memstore = mem_stores->sum * 1.0; + } + res = memsplit/memstore; + ret = printf("%1.3f", res); + return(ret); +} + + +static int +contested(struct counters *cpu, int pos) +{ + /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ + int ret; + struct counters *mem; + struct counters *unhalt; + double con, un, memd, res; + + con = 60.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); + if (pos != -1) { + memd = mem->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + memd = mem->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = (memd * con)/un; + ret = printf("%1.3f", res); + return(ret); +} + +static int +contested_has(struct counters *cpu, int pos) +{ + /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ + int ret; + struct counters *mem; + struct counters *unhalt; + double con, un, memd, res; + + con = 84.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); + if (pos != -1) { + memd = mem->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + memd = mem->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = (memd * con)/un; + ret = printf("%1.3f", res); + return(ret); +} + +static int +contestedbroad(struct counters *cpu, int pos) +{ + /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */ + int ret; + struct counters *mem; + struct counters *mem2; + struct counters *unhalt; + double con, un, memd, memtoo, res; + + con = 84.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); + mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS"); + + if (pos != -1) { + memd = mem->vals[pos] * 1.0; + memtoo = mem2->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + memd = mem->sum * 1.0; + memtoo = mem2->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = ((memd * con) + memtoo)/un; + ret = printf("%1.3f", res); + return(ret); +} + + +static int +blockstoreforward(struct counters *cpu, int pos) +{ + /* 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/ + int ret; + struct counters *ldb; + struct counters *unhalt; + double con, un, ld, res; + + con = 13.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD"); + if (pos != -1) { + ld = ldb->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + ld = ldb->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = (ld * con)/un; + ret = printf("%1.3f", res); + return(ret); +} + +static int +cache2(struct counters *cpu, int pos) +{ + /* ** Suspect *** + * 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + + * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) + */ + int ret; + struct counters *mem1, *mem2, *mem3; + struct counters *unhalt; + double con1, con2, con3, un, me_1, me_2, me_3, res; + + con1 = 26.0; + con2 = 43.0; + con3 = 60.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); +/* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/ + mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); + mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); + mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); + if (pos != -1) { + me_1 = mem1->vals[pos] * 1.0; + me_2 = mem2->vals[pos] * 1.0; + me_3 = mem3->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + me_1 = mem1->sum * 1.0; + me_2 = mem2->sum * 1.0; + me_3 = mem3->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un; + ret = printf("%1.3f", res); + return(ret); +} + +static int +datasharing(struct counters *cpu, int pos) +{ + /* + * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) + */ + int ret; + struct counters *mem; + struct counters *unhalt; + double con, res, me, un; + + con = 43.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); + if (pos != -1) { + me = mem->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + me = mem->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = (me * con)/un; + ret = printf("%1.3f", res); + return(ret); + +} + + +static int +datasharing_has(struct counters *cpu, int pos) +{ + /* + * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2) + */ + int ret; + struct counters *mem; + struct counters *unhalt; + double con, res, me, un; + + con = 72.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); + if (pos != -1) { + me = mem->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + me = mem->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = (me * con)/un; + ret = printf("%1.3f", res); + return(ret); + +} + + +static int +cache2ib(struct counters *cpu, int pos) +{ + /* + * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) + */ + int ret; + struct counters *mem; + struct counters *unhalt; + double con, un, me, res; + + con = 29.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); + if (pos != -1) { + me = mem->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + me = mem->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = (con * me)/un; + ret = printf("%1.3f", res); + return(ret); +} + +static int +cache2has(struct counters *cpu, int pos) +{ + /* + * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \ + * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + + * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84)) + * / CPU_CLK_UNHALTED.THREAD_P + */ + int ret; + struct counters *mem1, *mem2, *mem3; + struct counters *unhalt; + double con1, con2, con3, un, me1, me2, me3, res; + + con1 = 36.0; + con2 = 72.0; + con3 = 84.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT"); + mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT"); + mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM"); + if (pos != -1) { + me1 = mem1->vals[pos] * 1.0; + me2 = mem2->vals[pos] * 1.0; + me3 = mem3->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + me1 = mem1->sum * 1.0; + me2 = mem2->sum * 1.0; + me3 = mem3->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un; + ret = printf("%1.3f", res); + return(ret); +} + + +static int +cache2broad(struct counters *cpu, int pos) +{ + /* + * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2) + */ + int ret; + struct counters *mem; + struct counters *unhalt; + double con, un, me, res; + + con = 36.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT"); + if (pos != -1) { + me = mem->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + me = mem->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = (con * me)/un; + ret = printf("%1.3f", res); + return(ret); +} + + +static int +cache1(struct counters *cpu, int pos) +{ + /* 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ + int ret; + struct counters *mem; + struct counters *unhalt; + double con, un, me, res; + + con = 180.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS"); + if (pos != -1) { + me = mem->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + me = mem->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = (me * con)/un; + ret = printf("%1.3f", res); + return(ret); +} + +static int +cache1ib(struct counters *cpu, int pos) +{ + /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ + int ret; + struct counters *mem; + struct counters *unhalt; + double con, un, me, res; + + con = 180.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM"); + if (pos != -1) { + me = mem->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + me = mem->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = (me * con)/un; + ret = printf("%1.3f", res); + return(ret); +} + + +static int +cache1broad(struct counters *cpu, int pos) +{ + /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */ + int ret; + struct counters *mem; + struct counters *unhalt; + double con, un, me, res; + + con = 180.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS"); + if (pos != -1) { + me = mem->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + me = mem->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = (me * con)/un; + ret = printf("%1.3f", res); + return(ret); +} + + +static int +dtlb_missload(struct counters *cpu, int pos) +{ + /* 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */ + int ret; + struct counters *dtlb_m, *dtlb_d; + struct counters *unhalt; + double con, un, d1, d2, res; + + con = 7.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT"); + dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION"); + if (pos != -1) { + d1 = dtlb_m->vals[pos] * 1.0; + d2 = dtlb_d->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + d1 = dtlb_m->sum * 1.0; + d2 = dtlb_d->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = ((d1 * con) + d2)/un; + ret = printf("%1.3f", res); + return(ret); +} + +static int +dtlb_missstore(struct counters *cpu, int pos) +{ + /* + * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) / + * CPU_CLK_UNHALTED.THREAD_P (t >= .1) + */ + int ret; + struct counters *dtsb_m, *dtsb_d; + struct counters *unhalt; + double con, un, d1, d2, res; + + con = 7.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT"); + dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION"); + if (pos != -1) { + d1 = dtsb_m->vals[pos] * 1.0; + d2 = dtsb_d->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + d1 = dtsb_m->sum * 1.0; + d2 = dtsb_d->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = ((d1 * con) + d2)/un; + ret = printf("%1.3f", res); + return(ret); +} + +static int +itlb_miss(struct counters *cpu, int pos) +{ + /* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P IB */ + int ret; + struct counters *itlb; + struct counters *unhalt; + double un, d1, res; + + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); + if (pos != -1) { + d1 = itlb->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + d1 = itlb->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = d1/un; + ret = printf("%1.3f", res); + return(ret); +} + + +static int +itlb_miss_broad(struct counters *cpu, int pos) +{ + /* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P */ + int ret; + struct counters *itlb; + struct counters *unhalt; + struct counters *four_k; + double un, d1, res, k; + + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); + four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K"); + if (pos != -1) { + d1 = itlb->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + k = four_k->vals[pos] * 1.0; + } else { + d1 = itlb->sum * 1.0; + un = unhalt->sum * 1.0; + k = four_k->sum * 1.0; + } + res = (7.0 * k + d1)/un; + ret = printf("%1.3f", res); + return(ret); +} + + +static int +icache_miss(struct counters *cpu, int pos) +{ + /* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */ + + int ret; + struct counters *itlb, *icache; + struct counters *unhalt; + double un, d1, ic, res; + + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION"); + icache = find_counter(cpu, "ICACHE.IFETCH_STALL"); + if (pos != -1) { + d1 = itlb->vals[pos] * 1.0; + ic = icache->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + d1 = itlb->sum * 1.0; + ic = icache->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = (ic-d1)/un; + ret = printf("%1.3f", res); + return(ret); + +} + +static int +icache_miss_has(struct counters *cpu, int pos) +{ + /* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */ + + int ret; + struct counters *icache; + struct counters *unhalt; + double un, con, ic, res; + + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + icache = find_counter(cpu, "ICACHE.MISSES"); + con = 36.0; + if (pos != -1) { + ic = icache->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + ic = icache->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = (con * ic)/un; + ret = printf("%1.3f", res); + return(ret); + +} + +static int +lcp_stall(struct counters *cpu, int pos) +{ + /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */ + int ret; + struct counters *ild; + struct counters *unhalt; + double un, d1, res; + + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + ild = find_counter(cpu, "ILD_STALL.LCP"); + if (pos != -1) { + d1 = ild->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + d1 = ild->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = d1/un; + ret = printf("%1.3f", res); + return(ret); + +} + + +static int +frontendstall(struct counters *cpu, int pos) +{ + /* 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */ + int ret; + struct counters *idq; + struct counters *unhalt; + double con, un, id, res; + + con = 4.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE"); + if (pos != -1) { + id = idq->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + id = idq->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = id/(un * con); + ret = printf("%1.3f", res); + return(ret); +} + +static int +clears(struct counters *cpu, int pos) +{ + /* 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 ) + * / CPU_CLK_UNHALTED.THREAD_P (thresh >= .02)*/ + + int ret; + struct counters *clr1, *clr2, *clr3; + struct counters *unhalt; + double con, un, cl1, cl2, cl3, res; + + con = 100.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); + clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); + clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); + + if (pos != -1) { + cl1 = clr1->vals[pos] * 1.0; + cl2 = clr2->vals[pos] * 1.0; + cl3 = clr3->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + cl1 = clr1->sum * 1.0; + cl2 = clr2->sum * 1.0; + cl3 = clr3->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = ((cl1 + cl2 + cl3) * con)/un; + ret = printf("%1.3f", res); + return(ret); +} + + + +static int +clears_broad(struct counters *cpu, int pos) +{ + int ret; + struct counters *clr1, *clr2, *clr3, *cyc; + struct counters *unhalt; + double con, un, cl1, cl2, cl3, cy, res; + + con = 100.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING"); + clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC"); + clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV"); + cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES"); + if (pos != -1) { + cl1 = clr1->vals[pos] * 1.0; + cl2 = clr2->vals[pos] * 1.0; + cl3 = clr3->vals[pos] * 1.0; + cy = cyc->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + cl1 = clr1->sum * 1.0; + cl2 = clr2->sum * 1.0; + cl3 = clr3->sum * 1.0; + cy = cyc->sum * 1.0; + un = unhalt->sum * 1.0; + } + /* Formula not listed but extrapulated to add the cy ?? */ + res = ((cl1 + cl2 + cl3 + cy) * con)/un; + ret = printf("%1.3f", res); + return(ret); +} + + + + + +static int +microassist(struct counters *cpu, int pos) +{ + /* 14 - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */ + int ret; + struct counters *idq; + struct counters *unhalt; + double un, id, res, con; + + con = 4.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + idq = find_counter(cpu, "IDQ.MS_UOPS"); + if (pos != -1) { + id = idq->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + id = idq->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = id/(un * con); + ret = printf("%1.3f", res); + return(ret); +} + + +static int +microassist_broad(struct counters *cpu, int pos) +{ + int ret; + struct counters *idq; + struct counters *unhalt; + struct counters *uopiss; + struct counters *uopret; + double un, id, res, con, uoi, uor; + + con = 4.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + idq = find_counter(cpu, "IDQ.MS_UOPS"); + uopiss = find_counter(cpu, "UOPS_ISSUED.ANY"); + uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); + if (pos != -1) { + id = idq->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + uoi = uopiss->vals[pos] * 1.0; + uor = uopret->vals[pos] * 1.0; + } else { + id = idq->sum * 1.0; + un = unhalt->sum * 1.0; + uoi = uopiss->sum * 1.0; + uor = uopret->sum * 1.0; + } + res = (uor/uoi) * (id/(un * con)); + ret = printf("%1.3f", res); + return(ret); +} + + +static int +aliasing(struct counters *cpu, int pos) +{ + /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */ + int ret; + struct counters *ld; + struct counters *unhalt; + double un, lds, con, res; + + con = 5.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS"); + if (pos != -1) { + lds = ld->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + lds = ld->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = (lds * con)/un; + ret = printf("%1.3f", res); + return(ret); +} + +static int +aliasing_broad(struct counters *cpu, int pos) +{ + /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */ + int ret; + struct counters *ld; + struct counters *unhalt; + double un, lds, con, res; + + con = 7.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS"); + if (pos != -1) { + lds = ld->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + lds = ld->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = (lds * con)/un; + ret = printf("%1.3f", res); + return(ret); +} + + +static int +fpassists(struct counters *cpu, int pos) +{ + /* 16 - FP_ASSIST.ANY/INST_RETIRED.ANY_P */ + int ret; + struct counters *fp; + struct counters *inst; + double un, fpd, res; + + inst = find_counter(cpu, "INST_RETIRED.ANY_P"); + fp = find_counter(cpu, "FP_ASSIST.ANY"); + if (pos != -1) { + fpd = fp->vals[pos] * 1.0; + un = inst->vals[pos] * 1.0; + } else { + fpd = fp->sum * 1.0; + un = inst->sum * 1.0; + } + res = fpd/un; + ret = printf("%1.3f", res); + return(ret); +} + +static int +otherassistavx(struct counters *cpu, int pos) +{ + /* 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ + int ret; + struct counters *oth; + struct counters *unhalt; + double un, ot, con, res; + + con = 75.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE"); + if (pos != -1) { + ot = oth->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + ot = oth->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = (ot * con)/un; + ret = printf("%1.3f", res); + return(ret); +} + +static int +otherassistsse(struct counters *cpu, int pos) +{ + + int ret; + struct counters *oth; + struct counters *unhalt; + double un, ot, con, res; + + /* 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/ + con = 75.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX"); + if (pos != -1) { + ot = oth->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + ot = oth->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = (ot * con)/un; + ret = printf("%1.3f", res); + return(ret); +} + +static int +efficiency1(struct counters *cpu, int pos) +{ + + int ret; + struct counters *uops; + struct counters *unhalt; + double un, ot, con, res; + + /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/ + con = 4.0; + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS"); + if (pos != -1) { + ot = uops->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + ot = uops->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = ot/(con * un); + ret = printf("%1.3f", res); + return(ret); +} + +static int +efficiency2(struct counters *cpu, int pos) +{ + + int ret; + struct counters *uops; + struct counters *unhalt; + double un, ot, res; + + /* 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/ + unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P"); + uops = find_counter(cpu, "INST_RETIRED.ANY_P"); + if (pos != -1) { + ot = uops->vals[pos] * 1.0; + un = unhalt->vals[pos] * 1.0; + } else { + ot = uops->sum * 1.0; + un = unhalt->sum * 1.0; + } + res = un/ot; + ret = printf("%1.3f", res); + return(ret); +} + +#define SANDY_BRIDGE_COUNT 20 +static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = { +/*01*/ { "allocstall1", "thresh > .05", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1", + allocstall1, 2 }, +/* -- not defined for SB right (partial-rat_stalls) 02*/ + { "allocstall2", "thresh > .05", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1", + allocstall2, 2 }, +/*03*/ { "br_miss", "thresh >= .2", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", + br_mispredict, 2 }, +/*04*/ { "splitload", "thresh >= .1", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1", + splitload_sb, 2 }, +/* 05*/ { "splitstore", "thresh >= .01", + "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1", + splitstore_sb, 2 }, +/*06*/ { "contested", "thresh >= .05", + "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", + contested, 2 }, +/*07*/ { "blockstorefwd", "thresh >= .05", + "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", + blockstoreforward, 2 }, +/*08*/ { "cache2", "thresh >= .2", + "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache2, 4 }, +/*09*/ { "cache1", "thresh >= .2", + "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache1, 2 }, +/*10*/ { "dtlbmissload", "thresh >= .1", + "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + dtlb_missload, 3 }, +/*11*/ { "dtlbmissstore", "thresh >= .05", + "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + dtlb_missstore, 3 }, +/*12*/ { "frontendstall", "thresh >= .15", + "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1", + frontendstall, 2 }, +/*13*/ { "clears", "thresh >= .02", + "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", + clears, 4 }, +/*14*/ { "microassist", "thresh >= .05", + "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", + microassist, 2 }, +/*15*/ { "aliasing_4k", "thresh >= .1", + "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + aliasing, 2 }, +/*16*/ { "fpassist", "look for a excessive value", + "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", + fpassists, 2 }, +/*17*/ { "otherassistavx", "look for a excessive value", + "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", + otherassistavx, 2}, +/*18*/ { "otherassistsse", "look for a excessive value", + "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", + otherassistsse, 2 }, +/*19*/ { "eff1", "thresh < .9", + "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency1, 2 }, +/*20*/ { "eff2", "thresh > 1.0", + "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency2, 2 }, +}; + + +#define IVY_BRIDGE_COUNT 21 +static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = { +/*1*/ { "eff1", "thresh < .75", + "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency1, 2 }, +/*2*/ { "eff2", "thresh > 1.0", + "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency2, 2 }, +/*3*/ { "itlbmiss", "thresh > .05", + "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + itlb_miss, 2 }, +/*4*/ { "icachemiss", "thresh > .05", + "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + icache_miss, 3 }, +/*5*/ { "lcpstall", "thresh > .05", + "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", + lcp_stall, 2 }, +/*6*/ { "cache1", "thresh >= .2", + "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache1ib, 2 }, +/*7*/ { "cache2", "thresh >= .2", + "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache2ib, 2 }, +/*8*/ { "contested", "thresh >= .05", + "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", + contested, 2 }, +/*9*/ { "datashare", "thresh >= .05", + "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", + datasharing, 2 }, +/*10*/ { "blockstorefwd", "thresh >= .05", + "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", + blockstoreforward, 2 }, +/*11*/ { "splitload", "thresh >= .1", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1", + splitloadib, 4 }, +/*12*/ { "splitstore", "thresh >= .01", + "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1", + splitstore, 2 }, +/*13*/ { "aliasing_4k", "thresh >= .1", + "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + aliasing, 2 }, +/*14*/ { "dtlbmissload", "thresh >= .1", + "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + dtlb_missload , 3}, +/*15*/ { "dtlbmissstore", "thresh >= .05", + "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + dtlb_missstore, 3 }, +/*16*/ { "br_miss", "thresh >= .2", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", + br_mispredictib, 8 }, +/*17*/ { "clears", "thresh >= .02", + "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", + clears, 4 }, +/*18*/ { "microassist", "thresh >= .05", + "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", + microassist, 2 }, +/*19*/ { "fpassist", "look for a excessive value", + "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", + fpassists, 2 }, +/*20*/ { "otherassistavx", "look for a excessive value", + "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", + otherassistavx , 2}, +/*21*/ { "otherassistsse", "look for a excessive value", + "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", + otherassistsse, 2 }, +}; + +#define HASWELL_COUNT 20 +static struct cpu_entry haswell[HASWELL_COUNT] = { +/*1*/ { "eff1", "thresh < .75", + "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency1, 2 }, +/*2*/ { "eff2", "thresh > 1.0", + "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency2, 2 }, +/*3*/ { "itlbmiss", "thresh > .05", + "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + itlb_miss, 2 }, +/*4*/ { "icachemiss", "thresh > .05", + "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1", + icache_miss_has, 2 }, +/*5*/ { "lcpstall", "thresh > .05", + "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", + lcp_stall, 2 }, +/*6*/ { "cache1", "thresh >= .2", + "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache1ib, 2 }, +/*7*/ { "cache2", "thresh >= .2", + "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache2has, 4 }, +/*8*/ { "contested", "thresh >= .05", + "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1", + contested_has, 2 }, +/*9*/ { "datashare", "thresh >= .05", + "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", + datasharing_has, 2 }, +/*10*/ { "blockstorefwd", "thresh >= .05", + "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", + blockstoreforward, 2 }, +/*11*/ { "splitload", "thresh >= .1", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1", + splitload , 2}, +/*12*/ { "splitstore", "thresh >= .01", + "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1", + splitstore, 2 }, +/*13*/ { "aliasing_4k", "thresh >= .1", + "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + aliasing, 2 }, +/*14*/ { "dtlbmissload", "thresh >= .1", + "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + dtlb_missload, 3 }, +/*15*/ { "br_miss", "thresh >= .2", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1", + br_mispredict, 2 }, +/*16*/ { "clears", "thresh >= .02", + "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", + clears, 4 }, +/*17*/ { "microassist", "thresh >= .05", + "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1", + microassist, 2 }, +/*18*/ { "fpassist", "look for a excessive value", + "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", + fpassists, 2 }, +/*19*/ { "otherassistavx", "look for a excessive value", + "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", + otherassistavx, 2 }, +/*20*/ { "otherassistsse", "look for a excessive value", + "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1", + otherassistsse, 2 }, +}; + + +static void +explain_name_broad(const char *name) +{ + const char *mythresh; + if (strcmp(name, "eff1") == 0) { + printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n"); + mythresh = "thresh < .75"; + } else if (strcmp(name, "eff2") == 0) { + printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n"); + mythresh = "thresh > 1.0"; + } else if (strcmp(name, "itlbmiss") == 0) { + printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh > .05"; + } else if (strcmp(name, "icachemiss") == 0) { + printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n"); + mythresh = "thresh > .05"; + } else if (strcmp(name, "lcpstall") == 0) { + printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh > .05"; + } else if (strcmp(name, "cache1") == 0) { + printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .1"; + } else if (strcmp(name, "cache2") == 0) { + printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n"); + mythresh = "thresh >= .2"; + } else if (strcmp(name, "contested") == 0) { + printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .05"; + } else if (strcmp(name, "datashare") == 0) { + printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh > .05"; + } else if (strcmp(name, "blockstorefwd") == 0) { + printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .05"; + } else if (strcmp(name, "aliasing_4k") == 0) { + printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .1"; + } else if (strcmp(name, "dtlbmissload") == 0) { + printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n"); + printf(" / CPU_CLK_UNHALTED.THREAD_P)\n"); + mythresh = "thresh >= .1"; + + } else if (strcmp(name, "br_miss") == 0) { + printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n"); + printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n"); + printf("CPU_CLK_UNHALTED.THREAD * 4)\n"); + mythresh = "thresh >= .2"; + } else if (strcmp(name, "clears") == 0) { + printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n"); + printf(" MACHINE_CLEARS.SMC + \n"); + printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "thresh >= .02"; + } else if (strcmp(name, "fpassist") == 0) { + printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n"); + mythresh = "look for a excessive value"; + } else if (strcmp(name, "otherassistavx") == 0) { + printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n"); + mythresh = "look for a excessive value"; + } else if (strcmp(name, "microassist") == 0) { + printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n"); + printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n"); + mythresh = "thresh >= .05"; + } else { + printf("Unknown name:%s\n", name); + mythresh = "unknown entry"; + } + printf("If the value printed is %s we may have the ability to improve performance\n", mythresh); +} + + +#define BROADWELL_COUNT 17 +static struct cpu_entry broadwell[BROADWELL_COUNT] = { +/*1*/ { "eff1", "thresh < .75", + "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency1, 2 }, +/*2*/ { "eff2", "thresh > 1.0", + "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1", + efficiency2, 2 }, +/*3*/ { "itlbmiss", "thresh > .05", + "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1", + itlb_miss_broad, 3 }, +/*4*/ { "icachemiss", "thresh > .05", + "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1", + icache_miss_has, 2 }, +/*5*/ { "lcpstall", "thresh > .05", + "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1", + lcp_stall, 2 }, +/*6*/ { "cache1", "thresh >= .1", + "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache1broad, 2 }, +/*7*/ { "cache2", "thresh >= .2", + "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", + cache2broad, 2 }, +/*8*/ { "contested", "thresh >= .05", + "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1", + contestedbroad, 2 }, +/*9*/ { "datashare", "thresh >= .05", + "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1", + datasharing_has, 2 }, +/*10*/ { "blockstorefwd", "thresh >= .05", + "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1", + blockstoreforward, 2 }, +/*11*/ { "aliasing_4k", "thresh >= .1", + "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1", + aliasing_broad, 2 }, +/*12*/ { "dtlbmissload", "thresh >= .1", + "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1", + dtlb_missload, 3 }, +/*13*/ { "br_miss", "thresh >= .2", + "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1", + br_mispredict_broad, 7 }, +/*14*/ { "clears", "thresh >= .02", + "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1", + clears_broad, 5 }, +/*15*/ { "fpassist", "look for a excessive value", + "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1", + fpassists, 2 }, +/*16*/ { "otherassistavx", "look for a excessive value", + "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1", + otherassistavx, 2 }, +/*17*/ { "microassist", "thresh >= .2", + "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -w 1", + microassist_broad, 4 }, +}; + + +static void +set_sandybridge(void) +{ + strcpy(the_cpu.cputype, "SandyBridge PMC"); + the_cpu.number = SANDY_BRIDGE_COUNT; + the_cpu.ents = sandy_bridge; + the_cpu.explain = explain_name_sb; +} + +static void +set_ivybridge(void) +{ + strcpy(the_cpu.cputype, "IvyBridge PMC"); + the_cpu.number = IVY_BRIDGE_COUNT; + the_cpu.ents = ivy_bridge; + the_cpu.explain = explain_name_ib; +} + + +static void +set_haswell(void) +{ + strcpy(the_cpu.cputype, "HASWELL PMC"); + the_cpu.number = HASWELL_COUNT; + the_cpu.ents = haswell; + the_cpu.explain = explain_name_has; +} + + +static void +set_broadwell(void) +{ + strcpy(the_cpu.cputype, "HASWELL PMC"); + the_cpu.number = BROADWELL_COUNT; + the_cpu.ents = broadwell; + the_cpu.explain = explain_name_broad; +} + + +static int +set_expression(const char *name) +{ + int found = 0, i; + for(i=0 ; i< the_cpu.number; i++) { + if (strcmp(name, the_cpu.ents[i].name) == 0) { + found = 1; + expression = the_cpu.ents[i].func; + command = the_cpu.ents[i].command; + threshold = the_cpu.ents[i].thresh; + if (the_cpu.ents[i].counters_required > max_pmc_counters) { + printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n", + the_cpu.ents[i].name, + the_cpu.ents[i].counters_required, max_pmc_counters); + printf("Sorry this test can not be run\n"); + if (run_all == 0) { + exit(-1); + } else { + return(-1); + } + } + break; + } + } + if (!found) { + printf("For CPU type %s we have no expression:%s\n", + the_cpu.cputype, name); + exit(-1); + } + return(0); +} + + + + + +static int +validate_expression(char *name) +{ + int i, found; + + found = 0; + for(i=0 ; i< the_cpu.number; i++) { + if (strcmp(name, the_cpu.ents[i].name) == 0) { + found = 1; + break; + } + } + if (!found) { + return(-1); + } + return (0); +} + +static void +do_expression(struct counters *cpu, int pos) +{ + if (expression == NULL) + return; + (*expression)(cpu, pos); +} + +static void +process_header(int idx, char *p) +{ + struct counters *up; + int i, len, nlen; + /* + * Given header element idx, at p in + * form 's/NN/nameof' + * process the entry to pull out the name and + * the CPU number. + */ + if (strncmp(p, "s/", 2)) { + printf("Check -- invalid header no s/ in %s\n", + p); + return; + } + up = &cnts[idx]; + up->cpu = strtol(&p[2], NULL, 10); + len = strlen(p); + for (i=2; i<len; i++) { + if (p[i] == '/') { + nlen = strlen(&p[(i+1)]); + if (nlen < (MAX_NLEN-1)) { + strcpy(up->counter_name, &p[(i+1)]); + } else { + strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1)); + } + } + } +} + +static void +build_counters_from_header(FILE *io) +{ + char buffer[8192], *p; + int i, len, cnt; + size_t mlen; + + /* We have a new start, lets + * setup our headers and cpus. + */ + if (fgets(buffer, sizeof(buffer), io) == NULL) { + printf("First line can't be read from file err:%d\n", errno); + return; + } + /* + * Ok output is an array of counters. Once + * we start to read the values in we must + * put them in there slot to match there CPU and + * counter being updated. We create a mass array + * of the counters, filling in the CPU and + * counter name. + */ + /* How many do we get? */ + len = strlen(buffer); + for (i=0, cnt=0; i<len; i++) { + if (strncmp(&buffer[i], "s/", 2) == 0) { + cnt++; + for(;i<len;i++) { + if (buffer[i] == ' ') + break; + } + } + } + mlen = sizeof(struct counters) * cnt; + cnts = malloc(mlen); + ncnts = cnt; + if (cnts == NULL) { + printf("No memory err:%d\n", errno); + return; + } + memset(cnts, 0, mlen); + for (i=0, cnt=0; i<len; i++) { + if (strncmp(&buffer[i], "s/", 2) == 0) { + p = &buffer[i]; + for(;i<len;i++) { + if (buffer[i] == ' ') { + buffer[i] = 0; + break; + } + } + process_header(cnt, p); + cnt++; + } + } + if (verbose) + printf("We have %d entries\n", cnt); +} +extern int max_to_collect; +int max_to_collect = MAX_COUNTER_SLOTS; + +static int +read_a_line(FILE *io) +{ + char buffer[8192], *p, *stop; + int pos, i; + + if (fgets(buffer, sizeof(buffer), io) == NULL) { + return(0); + } + p = buffer; + for (i=0; i<ncnts; i++) { + pos = cnts[i].pos; + cnts[i].vals[pos] = strtol(p, &stop, 0); + cnts[i].pos++; + cnts[i].sum += cnts[i].vals[pos]; + p = stop; + } + return (1); +} + +extern int cpu_count_out; +int cpu_count_out=0; + +static void +print_header(void) +{ + int i, cnt, printed_cnt; + + printf("*********************************\n"); + for(i=0, cnt=0; i<MAX_CPU; i++) { + if (glob_cpu[i]) { + cnt++; + } + } + cpu_count_out = cnt; + for(i=0, printed_cnt=0; i<MAX_CPU; i++) { + if (glob_cpu[i]) { + printf("CPU%d", i); + printed_cnt++; + } + if (printed_cnt == cnt) { + printf("\n"); + break; + } else { + printf("\t"); + } + } +} + +static void +lace_cpus_together(void) +{ + int i, j, lace_cpu; + struct counters *cpat, *at; + + for(i=0; i<ncnts; i++) { + cpat = &cnts[i]; + if (cpat->next_cpu) { + /* Already laced in */ + continue; + } + lace_cpu = cpat->cpu; + if (lace_cpu >= MAX_CPU) { + printf("CPU %d to big\n", lace_cpu); + continue; + } + if (glob_cpu[lace_cpu] == NULL) { + glob_cpu[lace_cpu] = cpat; + } else { + /* Already processed this cpu */ + continue; + } + /* Ok look forward for cpu->cpu and link in */ + for(j=(i+1); j<ncnts; j++) { + at = &cnts[j]; + if (at->next_cpu) { + continue; + } + if (at->cpu == lace_cpu) { + /* Found one */ + cpat->next_cpu = at; + cpat = at; + } + } + } +} + + +static void +process_file(char *filename) +{ + FILE *io; + int i; + int line_at, not_done; + pid_t pid_of_command=0; + + if (filename == NULL) { + io = my_popen(command, "r", &pid_of_command); + } else { + io = fopen(filename, "r"); + if (io == NULL) { + printf("Can't process file %s err:%d\n", + filename, errno); + return; + } + } + build_counters_from_header(io); + if (cnts == NULL) { + /* Nothing we can do */ + printf("Nothing to do -- no counters built\n"); + if (io) { + fclose(io); + } + return; + } + lace_cpus_together(); + print_header(); + if (verbose) { + for (i=0; i<ncnts; i++) { + printf("Counter:%s cpu:%d index:%d\n", + cnts[i].counter_name, + cnts[i].cpu, i); + } + } + line_at = 0; + not_done = 1; + while(not_done) { + if (read_a_line(io)) { + line_at++; + } else { + break; + } + if (line_at >= max_to_collect) { + not_done = 0; + } + if (filename == NULL) { + int cnt; + /* For the ones we dynamically open we print now */ + for(i=0, cnt=0; i<MAX_CPU; i++) { + do_expression(glob_cpu[i], (line_at-1)); + cnt++; + if (cnt == cpu_count_out) { + printf("\n"); + break; + } else { + printf("\t"); + } + } + } + } + if (filename) { + fclose(io); + } else { + my_pclose(io, pid_of_command); + } +} +#if defined(__amd64__) +#define cpuid(in,a,b,c,d)\ + asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in)); + +static __inline void +do_cpuid(u_int ax, u_int cx, u_int *p) +{ + __asm __volatile("cpuid" + : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) + : "0" (ax), "c" (cx) ); +} + +#else +#define cpuid(in, a, b, c, d) +static __inline void +do_cpuid(u_int ax, u_int cx, u_int *p) +{ +} + +#endif + +static void +get_cpuid_set(void) +{ + unsigned long eax, ebx, ecx, edx; + int model; + pid_t pid_of_command=0; + size_t sz, len; + FILE *io; + char linebuf[1024], *str; + u_int reg[4]; + + eax = ebx = ecx = edx = 0; + + cpuid(0, eax, ebx, ecx, edx); + if (ebx == 0x68747541) { + printf("AMD processors are not supported by this program\n"); + printf("Sorry\n"); + exit(0); + } else if (ebx == 0x6972794) { + printf("Cyrix processors are not supported by this program\n"); + printf("Sorry\n"); + exit(0); + } else if (ebx == 0x756e6547) { + printf("Genuine Intel\n"); + } else { + printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx); + exit(0); + } + cpuid(1, eax, ebx, ecx, edx); + model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4)); + printf("CPU model is 0x%x id:0x%lx\n", model, eax); + switch (eax & 0xF00) { + case 0x500: /* Pentium family processors */ + printf("Intel Pentium P5\n"); + goto not_supported; + break; + case 0x600: /* Pentium Pro, Celeron, Pentium II & III */ + switch (model) { + case 0x1: + printf("Intel Pentium P6\n"); + goto not_supported; + break; + case 0x3: + case 0x5: + printf("Intel PII\n"); + goto not_supported; + break; + case 0x6: case 0x16: + printf("Intel CL\n"); + goto not_supported; + break; + case 0x7: case 0x8: case 0xA: case 0xB: + printf("Intel PIII\n"); + goto not_supported; + break; + case 0x9: case 0xD: + printf("Intel PM\n"); + goto not_supported; + break; + case 0xE: + printf("Intel CORE\n"); + goto not_supported; + break; + case 0xF: + printf("Intel CORE2\n"); + goto not_supported; + break; + case 0x17: + printf("Intel CORE2EXTREME\n"); + goto not_supported; + break; + case 0x1C: /* Per Intel document 320047-002. */ + printf("Intel ATOM\n"); + goto not_supported; + break; + case 0x1A: + case 0x1E: /* + * Per Intel document 253669-032 9/2009, + * pages A-2 and A-57 + */ + case 0x1F: /* + * Per Intel document 253669-032 9/2009, + * pages A-2 and A-57 + */ + printf("Intel COREI7\n"); + goto not_supported; + break; + case 0x2E: + printf("Intel NEHALEM\n"); + goto not_supported; + break; + case 0x25: /* Per Intel document 253669-033US 12/2009. */ + case 0x2C: /* Per Intel document 253669-033US 12/2009. */ + printf("Intel WESTMERE\n"); + goto not_supported; + break; + case 0x2F: /* Westmere-EX, seen in wild */ + printf("Intel WESTMERE\n"); + goto not_supported; + break; + case 0x2A: /* Per Intel document 253669-039US 05/2011. */ + printf("Intel SANDYBRIDGE\n"); + set_sandybridge(); + break; + case 0x2D: /* Per Intel document 253669-044US 08/2012. */ + printf("Intel SANDYBRIDGE_XEON\n"); + set_sandybridge(); + break; + case 0x3A: /* Per Intel document 253669-043US 05/2012. */ + printf("Intel IVYBRIDGE\n"); + set_ivybridge(); + break; + case 0x3E: /* Per Intel document 325462-045US 01/2013. */ + printf("Intel IVYBRIDGE_XEON\n"); + set_ivybridge(); + break; + case 0x3F: /* Per Intel document 325462-045US 09/2014. */ + printf("Intel HASWELL (Xeon)\n"); + set_haswell(); + break; + case 0x3C: /* Per Intel document 325462-045US 01/2013. */ + case 0x45: + case 0x46: + printf("Intel HASWELL\n"); + set_haswell(); + break; + + case 0x4e: + case 0x5e: + printf("Intel SKY-LAKE\n"); + goto not_supported; + break; + case 0x3D: + case 0x47: + printf("Intel BROADWELL\n"); + set_broadwell(); + break; + case 0x4f: + case 0x56: + printf("Intel BROADWEL (Xeon)\n"); + set_broadwell(); + break; + + case 0x4D: + /* Per Intel document 330061-001 01/2014. */ + printf("Intel ATOM_SILVERMONT\n"); + goto not_supported; + break; + default: + printf("Intel model 0x%x is not known -- sorry\n", + model); + goto not_supported; + break; + } + break; + case 0xF00: /* P4 */ + printf("Intel unknown model %d\n", model); + goto not_supported; + break; + } + do_cpuid(0xa, 0, reg); + max_pmc_counters = (reg[3] & 0x0000000f) + 1; + printf("We have %d PMC counters to work with\n", max_pmc_counters); + /* Ok lets load the list of all known PMC's */ + io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command); + if (valid_pmcs == NULL) { + /* Likely */ + pmc_allocated_cnt = PMC_INITIAL_ALLOC; + sz = sizeof(char *) * pmc_allocated_cnt; + valid_pmcs = malloc(sz); + if (valid_pmcs == NULL) { + printf("No memory allocation fails at startup?\n"); + exit(-1); + } + memset(valid_pmcs, 0, sz); + } + + while (fgets(linebuf, sizeof(linebuf), io) != NULL) { + if (linebuf[0] != '\t') { + /* sometimes headers ;-) */ + continue; + } + len = strlen(linebuf); + if (linebuf[(len-1)] == '\n') { + /* Likely */ + linebuf[(len-1)] = 0; + } + str = &linebuf[1]; + len = strlen(str) + 1; + valid_pmcs[valid_pmc_cnt] = malloc(len); + if (valid_pmcs[valid_pmc_cnt] == NULL) { + printf("No memory2 allocation fails at startup?\n"); + exit(-1); + } + memset(valid_pmcs[valid_pmc_cnt], 0, len); + strcpy(valid_pmcs[valid_pmc_cnt], str); + valid_pmc_cnt++; + if (valid_pmc_cnt >= pmc_allocated_cnt) { + /* Got to expand -- unlikely */ + char **more; + + sz = sizeof(char *) * (pmc_allocated_cnt * 2); + more = malloc(sz); + if (more == NULL) { + printf("No memory3 allocation fails at startup?\n"); + exit(-1); + } + memset(more, 0, sz); + memcpy(more, valid_pmcs, sz); + pmc_allocated_cnt *= 2; + free(valid_pmcs); + valid_pmcs = more; + } + } + my_pclose(io, pid_of_command); + return; +not_supported: + printf("Not supported\n"); + exit(-1); +} + +static void +explain_all(void) +{ + int i; + printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype); + printf("-------------------------------------------------------------\n"); + for(i=0; i<the_cpu.number; i++){ + printf("For -e %s ", the_cpu.ents[i].name); + (*the_cpu.explain)(the_cpu.ents[i].name); + printf("----------------------------\n"); + } +} + +static void +test_for_a_pmc(const char *pmc, int out_so_far) +{ + FILE *io; + pid_t pid_of_command=0; + char my_command[1024]; + char line[1024]; + char resp[1024]; + int len, llen, i; + + if (out_so_far < 50) { + len = 50 - out_so_far; + for(i=0; i<len; i++) { + printf(" "); + } + } + sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc); + io = my_popen(my_command, "r", &pid_of_command); + if (io == NULL) { + printf("Failed -- popen fails\n"); + return; + } + /* Setup what we expect */ + len = sprintf(resp, "%s", pmc); + if (fgets(line, sizeof(line), io) == NULL) { + printf("Failed -- no output from pmstat\n"); + goto out; + } + llen = strlen(line); + if (line[(llen-1)] == '\n') { + line[(llen-1)] = 0; + llen--; + } + for(i=2; i<(llen-len); i++) { + if (strncmp(&line[i], "ERROR", 5) == 0) { + printf("Failed %s\n", line); + goto out; + } else if (strncmp(&line[i], resp, len) == 0) { + int j, k; + + if (fgets(line, sizeof(line), io) == NULL) { + printf("Failed -- no second output from pmstat\n"); + goto out; + } + len = strlen(line); + for (j=0; j<len; j++) { + if (line[j] == ' ') { + j++; + } else { + break; + } + } + printf("Pass"); + len = strlen(&line[j]); + if (len < 20) { + for(k=0; k<(20-len); k++) { + printf(" "); + } + } + if (len) { + printf("%s", &line[j]); + } else { + printf("\n"); + } + goto out; + } + } + printf("Failed -- '%s' not '%s'\n", line, resp); +out: + my_pclose(io, pid_of_command); + +} + +static int +add_it_to(char **vars, int cur_cnt, char *name) +{ + int i; + size_t len; + for(i=0; i<cur_cnt; i++) { + if (strcmp(vars[i], name) == 0) { + /* Already have */ + return(0); + } + } + if (vars[cur_cnt] != NULL) { + printf("Cur_cnt:%d filled with %s??\n", + cur_cnt, vars[cur_cnt]); + exit(-1); + } + /* Ok its new */ + len = strlen(name) + 1; + vars[cur_cnt] = malloc(len); + if (vars[cur_cnt] == NULL) { + printf("No memory %s\n", __FUNCTION__); + exit(-1); + } + memset(vars[cur_cnt], 0, len); + strcpy(vars[cur_cnt], name); + return(1); +} + +static char * +build_command_for_exp(struct expression *exp) +{ + /* + * Build the pmcstat command to handle + * the passed in expression. + * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ + * where NNN and QQQ represent the PMC's in the expression + * uniquely.. + */ + char forming[1024]; + int cnt_pmc, alloced_pmcs, i; + struct expression *at; + char **vars, *cmd; + size_t mal; + + alloced_pmcs = cnt_pmc = 0; + /* first how many do we have */ + at = exp; + while (at) { + if (at->type == TYPE_VALUE_PMC) { + cnt_pmc++; + } + at = at->next; + } + if (cnt_pmc == 0) { + printf("No PMC's in your expression -- nothing to do!!\n"); + exit(0); + } + mal = cnt_pmc * sizeof(char *); + vars = malloc(mal); + if (vars == NULL) { + printf("No memory\n"); + exit(-1); + } + memset(vars, 0, mal); + at = exp; + while (at) { + if (at->type == TYPE_VALUE_PMC) { + if(add_it_to(vars, alloced_pmcs, at->name)) { + alloced_pmcs++; + } + } + at = at->next; + } + /* Now we have a unique list in vars so create our command */ + mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */ + for(i=0; i<alloced_pmcs; i++) { + mal += strlen(vars[i]) + 4; /* var + " -s " */ + } + cmd = malloc((mal+2)); + if (cmd == NULL) { + printf("%s out of mem\n", __FUNCTION__); + exit(-1); + } + memset(cmd, 0, (mal+2)); + strcpy(cmd, "/usr/sbin/pmcstat -w 1"); + at = exp; + for(i=0; i<alloced_pmcs; i++) { + sprintf(forming, " -s %s", vars[i]); + strcat(cmd, forming); + free(vars[i]); + vars[i] = NULL; + } + free(vars); + return(cmd); +} + +static int +user_expr(struct counters *cpu, int pos) +{ + int ret; + double res; + struct counters *var; + struct expression *at; + + at = master_exp; + while (at) { + if (at->type == TYPE_VALUE_PMC) { + var = find_counter(cpu, at->name); + if (var == NULL) { + printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name); + exit(-1); + } + if (pos != -1) { + at->value = var->vals[pos] * 1.0; + } else { + at->value = var->sum * 1.0; + } + } + at = at->next; + } + res = run_expr(master_exp, 1, NULL); + ret = printf("%1.3f", res); + return(ret); +} + + +static void +set_manual_exp(struct expression *exp) +{ + expression = user_expr; + command = build_command_for_exp(exp); + threshold = "User defined threshold"; +} + +static void +run_tests(void) +{ + int i, lenout; + printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt); + printf("------------------------------------------------------------------------\n"); + for(i=0; i<valid_pmc_cnt; i++) { + lenout = printf("%s", valid_pmcs[i]); + fflush(stdout); + test_for_a_pmc(valid_pmcs[i], lenout); + } +} +static void +list_all(void) +{ + int i, cnt, j; + printf("PMC Abbreviation\n"); + printf("--------------------------------------------------------------\n"); + for(i=0; i<valid_pmc_cnt; i++) { + cnt = printf("%s", valid_pmcs[i]); + for(j=cnt; j<52; j++) { + printf(" "); + } + printf("%%%d\n", i); + } +} + + +int +main(int argc, char **argv) +{ + int i, j, cnt; + char *filename=NULL; + const char *name=NULL; + int help_only = 0; + int test_mode = 0; + int test_at = 0; + + get_cpuid_set(); + memset(glob_cpu, 0, sizeof(glob_cpu)); + while ((i = getopt(argc, argv, "ALHhvm:i:?e:TE:")) != -1) { + switch (i) { + case 'A': + run_all = 1; + break; + case 'L': + list_all(); + return(0); + case 'H': + printf("**********************************\n"); + explain_all(); + printf("**********************************\n"); + return(0); + break; + case 'T': + test_mode = 1; + break; + case 'E': + master_exp = parse_expression(optarg); + if (master_exp) { + set_manual_exp(master_exp); + } + break; + case 'e': + if (validate_expression(optarg)) { + printf("Unknown expression %s\n", optarg); + return(0); + } + name = optarg; + set_expression(optarg); + break; + case 'm': + max_to_collect = strtol(optarg, NULL, 0); + if (max_to_collect > MAX_COUNTER_SLOTS) { + /* You can't collect more than max in array */ + max_to_collect = MAX_COUNTER_SLOTS; + } + break; + case 'v': + verbose++; + break; + case 'h': + help_only = 1; + break; + case 'i': + filename = optarg; + break; + case '?': + default: + use: + printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n", + argv[0]); + printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n"); + printf("-v -- verbose dump debug type things -- you don't want this\n"); + printf("-m N -- maximum to collect is N measurments\n"); + printf("-e expr-name -- Do expression expr-name\n"); + printf("-E 'your expression' -- Do your expression\n"); + printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n"); + printf("-H -- Don't run anything, just explain all canned expressions\n"); + printf("-T -- Test all PMC's defined by this processor\n"); + printf("-A -- Run all canned tests\n"); + return(0); + break; + }; + } + if ((run_all == 0) && (name == NULL) && (filename == NULL) && + (test_mode == 0) && (master_exp == NULL)) { + printf("Without setting an expression we cannot dynamically gather information\n"); + printf("you must supply a filename (and you probably want verbosity)\n"); + goto use; + } + if (run_all && max_to_collect > 10) { + max_to_collect = 3; + } + if (test_mode) { + run_tests(); + return(0); + } + printf("*********************************\n"); + if ((master_exp == NULL) && name) { + (*the_cpu.explain)(name); + } else if (master_exp) { + printf("Examine your expression "); + print_exp(master_exp); + printf("User defined threshold\n"); + } + if (help_only) { + return(0); + } + if (run_all) { + more: + name = the_cpu.ents[test_at].name; + printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh); + test_at++; + if (set_expression(name) == -1) { + if (test_at >= the_cpu.number) { + goto done; + } else + goto more; + } + + } + process_file(filename); + if (verbose >= 2) { + for (i=0; i<ncnts; i++) { + printf("Counter:%s cpu:%d index:%d\n", + cnts[i].counter_name, + cnts[i].cpu, i); + for(j=0; j<cnts[i].pos; j++) { + printf(" val - %ld\n", (long int)cnts[i].vals[j]); + } + printf(" sum - %ld\n", (long int)cnts[i].sum); + } + } + if (expression == NULL) { + return(0); + } + if (max_to_collect > 1) { + for(i=0, cnt=0; i<MAX_CPU; i++) { + if (glob_cpu[i]) { + do_expression(glob_cpu[i], -1); + cnt++; + if (cnt == cpu_count_out) { + printf("\n"); + break; + } else { + printf("\t"); + } + } + } + } + if (run_all && (test_at < the_cpu.number)) { + memset(glob_cpu, 0, sizeof(glob_cpu)); + ncnts = 0; + printf("*********************************\n"); + goto more; + } else if (run_all) { + done: + printf("*********************************\n"); + } + return(0); +} |