]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.sbin/pmcstudy/pmcstudy.c
zfs: merge openzfs/zfs@dbda45160
[FreeBSD/FreeBSD.git] / usr.sbin / pmcstudy / pmcstudy.c
1 /*-
2  * Copyright (c) 2014-2015 Netflix, Inc.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer,
9  *    in this position and unchanged.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. The name of the author may not be used to endorse or promote products
14  *    derived from this software without specific prior written permission
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 #include <sys/types.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <unistd.h>
31 #include <string.h>
32 #include <strings.h>
33 #include <sys/errno.h>
34 #include <signal.h>
35 #include <sys/wait.h>
36 #include <getopt.h>
37 #include "eval_expr.h"
38 static int max_pmc_counters = 1;
39 static int run_all = 0;
40
41 #define MAX_COUNTER_SLOTS 1024
42 #define MAX_NLEN 64
43 #define MAX_CPU 64
44 static int verbose = 0;
45
46 extern char **environ;
47 extern struct expression *master_exp;
48 struct expression *master_exp=NULL;
49
50 #define PMC_INITIAL_ALLOC 512
51 extern char **valid_pmcs;
52 char **valid_pmcs = NULL;
53 extern int valid_pmc_cnt;
54 int valid_pmc_cnt=0;
55 extern int pmc_allocated_cnt;
56 int pmc_allocated_cnt=0;
57
58 /*
59  * The following two varients on popen and pclose with
60  * the cavet that they get you the PID so that you
61  * can supply it to pclose so it can send a SIGTERM 
62  *  to the process.
63  */
64 static FILE *
65 my_popen(const char *command, const char *dir, pid_t *p_pid)
66 {
67         FILE *io_out, *io_in;
68         int pdesin[2], pdesout[2];
69         char *argv[4];
70         pid_t pid;
71         char cmd[4];
72         char cmd2[1024];
73         char arg1[4];
74
75         if ((strcmp(dir, "r") != 0) &&
76             (strcmp(dir, "w") != 0)) {
77                 errno = EINVAL;
78                 return(NULL);
79         }
80         if (pipe(pdesin) < 0)
81                 return (NULL);
82
83         if (pipe(pdesout) < 0) {
84                 (void)close(pdesin[0]);
85                 (void)close(pdesin[1]);
86                 return (NULL);
87         }
88         strcpy(cmd, "sh");
89         strcpy(arg1, "-c");
90         strcpy(cmd2, command);
91         argv[0] = cmd;
92         argv[1] = arg1;
93         argv[2] = cmd2;
94         argv[3] = NULL;
95
96         switch (pid = fork()) {
97         case -1:                        /* Error. */
98                 (void)close(pdesin[0]);
99                 (void)close(pdesin[1]);
100                 (void)close(pdesout[0]);
101                 (void)close(pdesout[1]);
102                 return (NULL);
103                 /* NOTREACHED */
104         case 0:                         /* Child. */
105                 /* Close out un-used sides */
106                 (void)close(pdesin[1]);
107                 (void)close(pdesout[0]);
108                 /* Now prepare the stdin of the process */
109                 close(0);
110                 (void)dup(pdesin[0]);
111                 (void)close(pdesin[0]);
112                 /* Now prepare the stdout of the process */
113                 close(1);
114                 (void)dup(pdesout[1]);
115                 /* And lets do stderr just in case */
116                 close(2);
117                 (void)dup(pdesout[1]);
118                 (void)close(pdesout[1]);
119                 /* Now run it */
120                 execve("/bin/sh", argv, environ);
121                 exit(127);
122                 /* NOTREACHED */
123         }
124         /* Parent; assume fdopen can't fail. */
125         /* Store the pid */
126         *p_pid = pid;
127         if (strcmp(dir, "r") != 0) {
128                 io_out = fdopen(pdesin[1], "w");
129                 (void)close(pdesin[0]);
130                 (void)close(pdesout[0]);
131                 (void)close(pdesout[1]);
132                 return(io_out);
133         } else {
134                 /* Prepare the input stream */
135                 io_in = fdopen(pdesout[0], "r");
136                 (void)close(pdesout[1]);
137                 (void)close(pdesin[0]);
138                 (void)close(pdesin[1]);
139                 return (io_in);
140         }
141 }
142
143 /*
144  * pclose --
145  *      Pclose returns -1 if stream is not associated with a `popened' command,
146  *      if already `pclosed', or waitpid returns an error.
147  */
148 static void
149 my_pclose(FILE *io, pid_t the_pid)
150 {
151         int pstat;
152         pid_t pid;
153
154         /*
155          * Find the appropriate file pointer and remove it from the list.
156          */
157         (void)fclose(io);
158         /* Die if you are not dead! */
159         kill(the_pid, SIGTERM);
160         do {
161                 pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
162         } while (pid == -1 && errno == EINTR);
163 }
164
165 struct counters {
166         struct counters *next_cpu;
167         char counter_name[MAX_NLEN];            /* Name of counter */
168         int cpu;                                /* CPU we are on */
169         int pos;                                /* Index we are filling to. */
170         uint64_t vals[MAX_COUNTER_SLOTS];       /* Last 64 entries */
171         uint64_t sum;                           /* Summary of entries */
172 };
173
174 extern struct counters *glob_cpu[MAX_CPU];
175 struct counters *glob_cpu[MAX_CPU];
176
177 extern struct counters *cnts;
178 struct counters *cnts=NULL;
179
180 extern int ncnts;
181 int ncnts=0;
182
183 extern int (*expression)(struct counters *, int);
184 int (*expression)(struct counters *, int);
185
186 static const char *threshold=NULL;
187 static const char *command;
188
189 struct cpu_entry {
190         const char *name;
191         const char *thresh;
192         const char *command;
193         int (*func)(struct counters *, int);
194         int counters_required;
195 };
196
197 struct cpu_type {
198         char cputype[32];
199         int number;
200         struct cpu_entry *ents;
201         void (*explain)(const char *name);
202 };
203 extern struct cpu_type the_cpu;
204 struct cpu_type the_cpu;
205
206 static void
207 explain_name_sb(const char *name)
208 {
209         const char *mythresh;
210         if (strcmp(name, "allocstall1") == 0) {
211                 printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
212                 mythresh = "thresh > .05";
213         } else if (strcmp(name, "allocstall2") == 0) {
214                 printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
215                 mythresh = "thresh > .05";
216         } else if (strcmp(name, "br_miss") == 0) {
217                 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
218                 mythresh = "thresh >= .2";
219         } else if (strcmp(name, "splitload") == 0) {
220                 printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
221                 mythresh = "thresh >= .1";
222         } else if (strcmp(name, "splitstore") == 0) {
223                 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
224                 mythresh = "thresh >= .01";
225         } else if (strcmp(name, "contested") == 0) {
226                 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
227                 mythresh = "thresh >= .05";
228         } else if (strcmp(name, "blockstorefwd") == 0) {
229                 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
230                 mythresh = "thresh >= .05";
231         } else if (strcmp(name, "cache2") == 0) {
232                 printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
233                 printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
234                 printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
235                 printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
236                 mythresh = "thresh >= .2";
237         } else if (strcmp(name, "cache1") == 0) {
238                 printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
239                 mythresh = "thresh >= .2";
240         } else if (strcmp(name, "dtlbmissload") == 0) {
241                 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
242                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
243                 mythresh = "thresh >= .1";
244         } else if (strcmp(name, "frontendstall") == 0) {
245                 printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
246                 mythresh = "thresh >= .15";
247         } else if (strcmp(name, "clears") == 0) {
248                 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
249                 printf("          MACHINE_CLEARS.SMC + \n");
250                 printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
251                 mythresh = "thresh >= .02";
252         } else if (strcmp(name, "microassist") == 0) {
253                 printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
254                 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
255                 mythresh = "thresh >= .05";
256         } else if (strcmp(name, "aliasing_4k") == 0) {
257                 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
258                 mythresh = "thresh >= .1";
259         } else if (strcmp(name, "fpassist") == 0) {
260                 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
261                 mythresh = "look for a excessive value";
262         } else if (strcmp(name, "otherassistavx") == 0) {
263                 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
264                 mythresh = "look for a excessive value";
265         } else if (strcmp(name, "otherassistsse") == 0) {
266                 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
267                 mythresh = "look for a excessive value";
268         } else if (strcmp(name, "eff1") == 0) {
269                 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
270                 mythresh = "thresh < .9";
271         } else if (strcmp(name, "eff2") == 0) {
272                 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
273                 mythresh = "thresh > 1.0";
274         } else if (strcmp(name, "dtlbmissstore") == 0) {
275                 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
276                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
277                 mythresh = "thresh >= .05";
278         } else {
279                 printf("Unknown name:%s\n", name);
280                 mythresh = "unknown entry";
281         }
282         printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
283 }
284
285 static void
286 explain_name_ib(const char *name)
287 {
288         const char *mythresh;
289         if (strcmp(name, "br_miss") == 0) {
290                 printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
291                 printf("         MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
292                 printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
293                 mythresh = "thresh >= .2";
294         } else if (strcmp(name, "eff1") == 0) {
295                 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
296                 mythresh = "thresh < .9";
297         } else if (strcmp(name, "eff2") == 0) {
298                 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
299                 mythresh = "thresh > 1.0";
300         } else if (strcmp(name, "cache1") == 0) {
301                 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
302                 mythresh = "thresh >= .2";
303         } else if (strcmp(name, "cache2") == 0) {
304                 printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
305                 mythresh = "thresh >= .2";
306         } else if (strcmp(name, "itlbmiss") == 0) {
307                 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
308                 mythresh = "thresh > .05"; 
309         } else if (strcmp(name, "icachemiss") == 0) {
310                 printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
311                 mythresh = "thresh > .05";
312         } else if (strcmp(name, "lcpstall") == 0) {
313                 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
314                 mythresh = "thresh > .05";
315         } else if (strcmp(name, "datashare") == 0) {
316                 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
317                 mythresh = "thresh > .05";
318         } else if (strcmp(name, "blockstorefwd") == 0) {
319                 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
320                 mythresh = "thresh >= .05";
321         } else if (strcmp(name, "splitload") == 0) {
322                 printf("Examine  ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
323                 printf("         LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
324                 mythresh = "thresh >= .1";
325         } else if (strcmp(name, "splitstore") == 0) {
326                 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
327                 mythresh = "thresh >= .01";
328         } else if (strcmp(name, "aliasing_4k") == 0) {
329                 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
330                 mythresh = "thresh >= .1";
331         } else if (strcmp(name, "dtlbmissload") == 0) {
332                 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
333                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
334                 mythresh = "thresh >= .1";
335         } else if (strcmp(name, "dtlbmissstore") == 0) {
336                 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
337                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
338                 mythresh = "thresh >= .05";
339         } else if (strcmp(name, "contested") == 0) {
340                 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
341                 mythresh = "thresh >= .05";
342         } else if (strcmp(name, "clears") == 0) {
343                 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
344                 printf("          MACHINE_CLEARS.SMC + \n");
345                 printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
346                 mythresh = "thresh >= .02";
347         } else if (strcmp(name, "microassist") == 0) {
348                 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
349                 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
350                 mythresh = "thresh >= .05";
351         } else if (strcmp(name, "fpassist") == 0) {
352                 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
353                 mythresh = "look for a excessive value";
354         } else if (strcmp(name, "otherassistavx") == 0) {
355                 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
356                 mythresh = "look for a excessive value";
357         } else if (strcmp(name, "otherassistsse") == 0) {
358                 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
359                 mythresh = "look for a excessive value";
360         } else {
361                 printf("Unknown name:%s\n", name);
362                 mythresh = "unknown entry";
363         }
364         printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
365 }
366
367
368 static void
369 explain_name_has(const char *name)
370 {
371         const char *mythresh;
372         if (strcmp(name, "eff1") == 0) {
373                 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
374                 mythresh = "thresh < .75";
375         } else if (strcmp(name, "eff2") == 0) {
376                 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
377                 mythresh = "thresh > 1.0";
378         } else if (strcmp(name, "itlbmiss") == 0) {
379                 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
380                 mythresh = "thresh > .05"; 
381         } else if (strcmp(name, "icachemiss") == 0) {
382                 printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
383                 mythresh = "thresh > .05";
384         } else if (strcmp(name, "lcpstall") == 0) {
385                 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
386                 mythresh = "thresh > .05";
387         } else if (strcmp(name, "cache1") == 0) {
388                 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
389                 mythresh = "thresh >= .2";
390         } else if (strcmp(name, "cache2") == 0) {
391                 printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
392                 printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
393                 printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
394                 printf("          / CPU_CLK_UNHALTED.THREAD_P\n");
395                 mythresh = "thresh >= .2";
396         } else if (strcmp(name, "contested") == 0) {
397                 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
398                 mythresh = "thresh >= .05";
399         } else if (strcmp(name, "datashare") == 0) {
400                 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
401                 mythresh = "thresh > .05";
402         } else if (strcmp(name, "blockstorefwd") == 0) {
403                 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
404                 mythresh = "thresh >= .05";
405         } else if (strcmp(name, "splitload") == 0) {
406                 printf("Examine  (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
407                 mythresh = "thresh >= .1";
408         } else if (strcmp(name, "splitstore") == 0) {
409                 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
410                 mythresh = "thresh >= .01";
411         } else if (strcmp(name, "aliasing_4k") == 0) {
412                 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
413                 mythresh = "thresh >= .1";
414         } else if (strcmp(name, "dtlbmissload") == 0) {
415                 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
416                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
417                 mythresh = "thresh >= .1";
418         } else if (strcmp(name, "br_miss") == 0) {
419                 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
420                 mythresh = "thresh >= .2";
421         } else if (strcmp(name, "clears") == 0) {
422                 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
423                 printf("          MACHINE_CLEARS.SMC + \n");
424                 printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
425                 mythresh = "thresh >= .02";
426         } else if (strcmp(name, "microassist") == 0) {
427                 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
428                 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
429                 mythresh = "thresh >= .05";
430         } else if (strcmp(name, "fpassist") == 0) {
431                 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
432                 mythresh = "look for a excessive value";
433         } else if (strcmp(name, "otherassistavx") == 0) {
434                 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
435                 mythresh = "look for a excessive value";
436         } else if (strcmp(name, "otherassistsse") == 0) {
437                 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
438                 mythresh = "look for a excessive value";
439         } else {
440                 printf("Unknown name:%s\n", name);
441                 mythresh = "unknown entry";
442         }
443         printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
444 }
445
446
447
448 static struct counters *
449 find_counter(struct counters *base, const char *name)
450 {
451         struct counters *at;
452         int len;
453
454         at = base;
455         len = strlen(name);
456         while(at) {
457                 if (strncmp(at->counter_name, name, len) == 0) {
458                         return(at);
459                 }
460                 at = at->next_cpu;
461         }
462         printf("Can't find counter %s\n", name);
463         printf("We have:\n");
464         at = base;
465         while(at) {
466                 printf("- %s\n", at->counter_name);
467                 at = at->next_cpu;
468         }
469         exit(-1);
470 }
471
472 static int
473 allocstall1(struct counters *cpu, int pos)
474 {
475 /*  1  - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
476         int ret;
477         struct counters *partial;
478         struct counters *unhalt;
479         double un, par, res;
480         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
481         partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
482         if (pos != -1) {
483                 par = partial->vals[pos] * 1.0;
484                 un = unhalt->vals[pos] * 1.0;
485         } else {
486                 par = partial->sum * 1.0;
487                 un = unhalt->sum * 1.0;
488         }
489         res = par/un;
490         ret = printf("%1.3f", res);
491         return(ret);
492 }
493
494 static int
495 allocstall2(struct counters *cpu, int pos)
496 {
497 /*  2  - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
498         int ret;
499         struct counters *partial;
500         struct counters *unhalt;
501         double un, par, res;
502         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
503         partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
504         if (pos != -1) {
505                 par = partial->vals[pos] * 1.0;
506                 un = unhalt->vals[pos] * 1.0;
507         } else {
508                 par = partial->sum * 1.0;
509                 un = unhalt->sum * 1.0;
510         }
511         res = par/un;
512         ret = printf("%1.3f", res);
513         return(ret);
514 }
515
516 static int
517 br_mispredict(struct counters *cpu, int pos)
518 {
519         struct counters *brctr;
520         struct counters *unhalt;
521         int ret;
522 /*  3  - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
523         double br, un, con, res;
524         con = 20.0;
525         
526         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
527         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
528         if (pos != -1) {
529                 br = brctr->vals[pos] * 1.0;
530                 un = unhalt->vals[pos] * 1.0;
531         } else {
532                 br = brctr->sum * 1.0;
533                 un = unhalt->sum * 1.0;
534         }
535         res = (con * br)/un;
536         ret = printf("%1.3f", res);
537         return(ret);
538 }
539
540 static int
541 br_mispredictib(struct counters *cpu, int pos)
542 {
543         struct counters *brctr;
544         struct counters *unhalt;
545         struct counters *clear, *clear2, *clear3;
546         struct counters *uops;
547         struct counters *recv;  
548         struct counters *iss;
549 /*        "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
550         int ret;
551         /*  
552          * (BR_MISP_RETIRED.ALL_BRANCHES / 
553          *         (BR_MISP_RETIRED.ALL_BRANCHES +
554          *          MACHINE_CLEAR.COUNT) * 
555          *         ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
556          *
557          */
558         double br, cl, cl2, cl3, uo, re, un, con, res, is;
559         con = 4.0;
560         
561         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
562         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
563         clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
564         clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
565         clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
566         uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
567         iss = find_counter(cpu, "UOPS_ISSUED.ANY");
568         recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
569         if (pos != -1) {
570                 br = brctr->vals[pos] * 1.0;
571                 cl = clear->vals[pos] * 1.0;
572                 cl2 = clear2->vals[pos] * 1.0;
573                 cl3 = clear3->vals[pos] * 1.0;
574                 uo = uops->vals[pos] * 1.0;
575                 re = recv->vals[pos] * 1.0;
576                 is = iss->vals[pos] * 1.0;
577                 un = unhalt->vals[pos] * 1.0;
578         } else {
579                 br = brctr->sum * 1.0;
580                 cl = clear->sum * 1.0;
581                 cl2 = clear2->sum * 1.0;
582                 cl3 = clear3->sum * 1.0;
583                 uo = uops->sum * 1.0;
584                 re = recv->sum * 1.0;
585                 is = iss->sum * 1.0;
586                 un = unhalt->sum * 1.0;
587         }
588         res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
589         ret = printf("%1.3f", res);
590         return(ret);
591 }
592
593
594 static int
595 br_mispredict_broad(struct counters *cpu, int pos)
596 {
597         struct counters *brctr;
598         struct counters *unhalt;
599         struct counters *clear;
600         struct counters *uops;
601         struct counters *uops_ret;
602         struct counters *recv;
603         int ret;
604         double br, cl, uo, uo_r, re, con, un, res;
605
606         con = 4.0;
607         
608         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
609         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
610         clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
611         uops = find_counter(cpu, "UOPS_ISSUED.ANY");
612         uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
613         recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
614
615         if (pos != -1) {
616                 un = unhalt->vals[pos] * 1.0;
617                 br = brctr->vals[pos] * 1.0;
618                 cl = clear->vals[pos] * 1.0;
619                 uo = uops->vals[pos] * 1.0;
620                 uo_r = uops_ret->vals[pos] * 1.0;
621                 re = recv->vals[pos] * 1.0;
622         } else {
623                 un = unhalt->sum * 1.0;
624                 br = brctr->sum * 1.0;
625                 cl = clear->sum * 1.0;
626                 uo = uops->sum * 1.0;
627                 uo_r = uops_ret->sum * 1.0;
628                 re = recv->sum * 1.0;
629         }
630         res = br / (br + cl) * (uo - uo_r + con * re) / (un * con);
631         ret = printf("%1.3f", res);
632         return(ret);
633 }
634
635 static int
636 splitloadib(struct counters *cpu, int pos)
637 {
638         int ret;
639         struct counters *mem;
640         struct counters *l1d, *ldblock;
641         struct counters *unhalt;
642         double un, memd, res, l1, ldb;
643         /*  
644          * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
645          * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
646          */
647
648         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
649         mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
650         l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
651         ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
652         if (pos != -1) {
653                 memd = mem->vals[pos] * 1.0;
654                 l1 = l1d->vals[pos] * 1.0;
655                 ldb = ldblock->vals[pos] * 1.0;
656                 un = unhalt->vals[pos] * 1.0;
657         } else {
658                 memd = mem->sum * 1.0;
659                 l1 = l1d->sum * 1.0;
660                 ldb = ldblock->sum * 1.0;
661                 un = unhalt->sum * 1.0;
662         }
663         res = ((l1 / memd) * ldb)/un;
664         ret = printf("%1.3f", res);
665         return(ret);
666 }
667
668
669 static int
670 splitload(struct counters *cpu, int pos)
671 {
672         int ret;
673         struct counters *mem;
674         struct counters *unhalt;
675         double con, un, memd, res;
676 /*  4  - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
677
678         con = 5.0;
679         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
680         mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS");
681         if (pos != -1) {
682                 memd = mem->vals[pos] * 1.0;
683                 un = unhalt->vals[pos] * 1.0;
684         } else {
685                 memd = mem->sum * 1.0;
686                 un = unhalt->sum * 1.0;
687         }
688         res = (memd * con)/un;
689         ret = printf("%1.3f", res);
690         return(ret);
691 }
692
693
694 static int
695 splitload_sb(struct counters *cpu, int pos)
696 {
697         int ret;
698         struct counters *mem;
699         struct counters *unhalt;
700         double con, un, memd, res;
701 /*  4  - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
702
703         con = 5.0;
704         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
705         mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
706         if (pos != -1) {
707                 memd = mem->vals[pos] * 1.0;
708                 un = unhalt->vals[pos] * 1.0;
709         } else {
710                 memd = mem->sum * 1.0;
711                 un = unhalt->sum * 1.0;
712         }
713         res = (memd * con)/un;
714         ret = printf("%1.3f", res);
715         return(ret);
716 }
717
718
719 static int
720 splitstore_sb(struct counters *cpu, int pos)
721 {
722         /*  5  - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
723         int ret;
724         struct counters *mem_split;
725         struct counters *mem_stores;
726         double memsplit, memstore, res;
727         mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
728         mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
729         if (pos != -1) {
730                 memsplit = mem_split->vals[pos] * 1.0;
731                 memstore = mem_stores->vals[pos] * 1.0;
732         } else {
733                 memsplit = mem_split->sum * 1.0;
734                 memstore = mem_stores->sum * 1.0;
735         }
736         res = memsplit/memstore;
737         ret = printf("%1.3f", res);
738         return(ret);
739 }
740
741
742
743 static int
744 splitstore(struct counters *cpu, int pos)
745 {
746         /*  5  - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */
747         int ret;
748         struct counters *mem_split;
749         struct counters *mem_stores;
750         double memsplit, memstore, res;
751         mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES");
752         mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES");
753         if (pos != -1) {
754                 memsplit = mem_split->vals[pos] * 1.0;
755                 memstore = mem_stores->vals[pos] * 1.0;
756         } else {
757                 memsplit = mem_split->sum * 1.0;
758                 memstore = mem_stores->sum * 1.0;
759         }
760         res = memsplit/memstore;
761         ret = printf("%1.3f", res);
762         return(ret);
763 }
764
765
766 static int
767 contested(struct counters *cpu, int pos)
768 {
769         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
770         int ret;
771         struct counters *mem;
772         struct counters *unhalt;
773         double con, un, memd, res;
774
775         con = 60.0;
776         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
777         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
778         if (pos != -1) {
779                 memd = mem->vals[pos] * 1.0;
780                 un = unhalt->vals[pos] * 1.0;
781         } else {
782                 memd = mem->sum * 1.0;
783                 un = unhalt->sum * 1.0;
784         }
785         res = (memd * con)/un;
786         ret = printf("%1.3f", res);
787         return(ret);
788 }
789
790 static int
791 contested_has(struct counters *cpu, int pos)
792 {
793         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
794         int ret;
795         struct counters *mem;
796         struct counters *unhalt;
797         double con, un, memd, res;
798
799         con = 84.0;
800         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
801         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
802         if (pos != -1) {
803                 memd = mem->vals[pos] * 1.0;
804                 un = unhalt->vals[pos] * 1.0;
805         } else {
806                 memd = mem->sum * 1.0;
807                 un = unhalt->sum * 1.0;
808         }
809         res = (memd * con)/un;
810         ret = printf("%1.3f", res);
811         return(ret);
812 }
813
814 static int
815 contestedbroad(struct counters *cpu, int pos)
816 {
817         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
818         int ret;
819         struct counters *mem;
820         struct counters *mem2;
821         struct counters *unhalt;
822         double con, un, memd, memtoo, res;
823
824         con = 84.0;
825         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
826         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
827         mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS");
828
829         if (pos != -1) {
830                 memd = mem->vals[pos] * 1.0;
831                 memtoo = mem2->vals[pos] * 1.0;
832                 un = unhalt->vals[pos] * 1.0;
833         } else {
834                 memd = mem->sum * 1.0;
835                 memtoo = mem2->sum * 1.0;
836                 un = unhalt->sum * 1.0;
837         }
838         res = ((memd * con) + memtoo)/un;
839         ret = printf("%1.3f", res);
840         return(ret);
841 }
842
843
844 static int
845 blockstoreforward(struct counters *cpu, int pos)
846 {
847         /*  7  - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
848         int ret;
849         struct counters *ldb;
850         struct counters *unhalt;
851         double con, un, ld, res;
852
853         con = 13.0;
854         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
855         ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
856         if (pos != -1) {
857                 ld = ldb->vals[pos] * 1.0;
858                 un = unhalt->vals[pos] * 1.0;
859         } else {
860                 ld = ldb->sum * 1.0;
861                 un = unhalt->sum * 1.0;
862         }
863         res = (ld * con)/un;
864         ret = printf("%1.3f", res);
865         return(ret);
866 }
867
868 static int
869 cache2(struct counters *cpu, int pos)
870 {
871         /* ** Suspect ***
872          *  8  - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
873          *        (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
874          */
875         int ret;
876         struct counters *mem1, *mem2, *mem3;
877         struct counters *unhalt;
878         double con1, con2, con3, un, me_1, me_2, me_3, res;
879
880         con1 = 26.0;
881         con2 = 43.0;
882         con3 = 60.0;
883         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
884 /* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
885         mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
886         mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
887         mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
888         if (pos != -1) {
889                 me_1 = mem1->vals[pos] * 1.0;
890                 me_2 = mem2->vals[pos] * 1.0;
891                 me_3 = mem3->vals[pos] * 1.0;
892                 un = unhalt->vals[pos] * 1.0;
893         } else {
894                 me_1 = mem1->sum * 1.0;
895                 me_2 = mem2->sum * 1.0;
896                 me_3 = mem3->sum * 1.0;
897                 un = unhalt->sum * 1.0;
898         }
899         res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
900         ret = printf("%1.3f", res);
901         return(ret);
902 }
903
904 static int
905 datasharing(struct counters *cpu, int pos)
906 {
907         /* 
908          * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
909          */
910         int ret;
911         struct counters *mem;
912         struct counters *unhalt;
913         double con, res, me, un;
914
915         con = 43.0;
916         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
917         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
918         if (pos != -1) {
919                 me = mem->vals[pos] * 1.0;
920                 un = unhalt->vals[pos] * 1.0;
921         } else {
922                 me = mem->sum * 1.0;
923                 un = unhalt->sum * 1.0;
924         }
925         res = (me * con)/un;
926         ret = printf("%1.3f", res);
927         return(ret);
928
929 }
930
931
932 static int
933 datasharing_has(struct counters *cpu, int pos)
934 {
935         /* 
936          * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
937          */
938         int ret;
939         struct counters *mem;
940         struct counters *unhalt;
941         double con, res, me, un;
942
943         con = 72.0;
944         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
945         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
946         if (pos != -1) {
947                 me = mem->vals[pos] * 1.0;
948                 un = unhalt->vals[pos] * 1.0;
949         } else {
950                 me = mem->sum * 1.0;
951                 un = unhalt->sum * 1.0;
952         }
953         res = (me * con)/un;
954         ret = printf("%1.3f", res);
955         return(ret);
956
957 }
958
959
960 static int
961 cache2ib(struct counters *cpu, int pos)
962 {
963         /*
964          *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
965          */
966         int ret;
967         struct counters *mem;
968         struct counters *unhalt;
969         double con, un, me, res;
970
971         con = 29.0;
972         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
973         mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
974         if (pos != -1) {
975                 me = mem->vals[pos] * 1.0;
976                 un = unhalt->vals[pos] * 1.0;
977         } else {
978                 me = mem->sum * 1.0;
979                 un = unhalt->sum * 1.0;
980         }
981         res = (con * me)/un; 
982         ret = printf("%1.3f", res);
983         return(ret);
984 }
985
986 static int
987 cache2has(struct counters *cpu, int pos)
988 {
989         /*
990          * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
991          *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
992          *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
993          *           / CPU_CLK_UNHALTED.THREAD_P
994          */
995         int ret;
996         struct counters *mem1, *mem2, *mem3;
997         struct counters *unhalt;
998         double con1, con2, con3, un, me1, me2, me3, res;
999
1000         con1 = 36.0;
1001         con2 = 72.0;
1002         con3 = 84.0;
1003         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1004         mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
1005         mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
1006         mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
1007         if (pos != -1) {
1008                 me1 = mem1->vals[pos] * 1.0;
1009                 me2 = mem2->vals[pos] * 1.0;
1010                 me3 = mem3->vals[pos] * 1.0;
1011                 un = unhalt->vals[pos] * 1.0;
1012         } else {
1013                 me1 = mem1->sum * 1.0;
1014                 me2 = mem2->sum * 1.0;
1015                 me3 = mem3->sum * 1.0;
1016                 un = unhalt->sum * 1.0;
1017         }
1018         res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
1019         ret = printf("%1.3f", res);
1020         return(ret);
1021 }
1022
1023
1024 static int
1025 cache2broad(struct counters *cpu, int pos)
1026 {
1027         /*
1028          *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
1029          */
1030         int ret;
1031         struct counters *mem;
1032         struct counters *unhalt;
1033         double con, un, me, res;
1034
1035         con = 36.0;
1036         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1037         mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT");
1038         if (pos != -1) {
1039                 me = mem->vals[pos] * 1.0;
1040                 un = unhalt->vals[pos] * 1.0;
1041         } else {
1042                 me = mem->sum * 1.0;
1043                 un = unhalt->sum * 1.0;
1044         }
1045         res = (con * me)/un; 
1046         ret = printf("%1.3f", res);
1047         return(ret);
1048 }
1049
1050
1051 static int
1052 cache1(struct counters *cpu, int pos)
1053 {
1054         /*  9  - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1055         int ret;
1056         struct counters *mem;
1057         struct counters *unhalt;
1058         double con, un, me, res;
1059
1060         con = 180.0;
1061         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1062         mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
1063         if (pos != -1) {
1064                 me = mem->vals[pos] * 1.0;
1065                 un = unhalt->vals[pos] * 1.0;
1066         } else {
1067                 me = mem->sum * 1.0;
1068                 un = unhalt->sum * 1.0;
1069         }
1070         res = (me * con)/un;
1071         ret = printf("%1.3f", res);
1072         return(ret);
1073 }
1074
1075 static int
1076 cache1ib(struct counters *cpu, int pos)
1077 {
1078         /*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1079         int ret;
1080         struct counters *mem;
1081         struct counters *unhalt;
1082         double con, un, me, res;
1083
1084         con = 180.0;
1085         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1086         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
1087         if (pos != -1) {
1088                 me = mem->vals[pos] * 1.0;
1089                 un = unhalt->vals[pos] * 1.0;
1090         } else {
1091                 me = mem->sum * 1.0;
1092                 un = unhalt->sum * 1.0;
1093         }
1094         res = (me * con)/un;
1095         ret = printf("%1.3f", res);
1096         return(ret);
1097 }
1098
1099
1100 static int
1101 cache1broad(struct counters *cpu, int pos)
1102 {
1103         /*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1104         int ret;
1105         struct counters *mem;
1106         struct counters *unhalt;
1107         double con, un, me, res;
1108
1109         con = 180.0;
1110         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1111         mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS");
1112         if (pos != -1) {
1113                 me = mem->vals[pos] * 1.0;
1114                 un = unhalt->vals[pos] * 1.0;
1115         } else {
1116                 me = mem->sum * 1.0;
1117                 un = unhalt->sum * 1.0;
1118         }
1119         res = (me * con)/un;
1120         ret = printf("%1.3f", res);
1121         return(ret);
1122 }
1123
1124
1125 static int
1126 dtlb_missload(struct counters *cpu, int pos)
1127 {
1128         /* 10  - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
1129         int ret;
1130         struct counters *dtlb_m, *dtlb_d;
1131         struct counters *unhalt;
1132         double con, un, d1, d2, res;
1133
1134         con = 7.0;
1135         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1136         dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
1137         dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
1138         if (pos != -1) {
1139                 d1 = dtlb_m->vals[pos] * 1.0;
1140                 d2 = dtlb_d->vals[pos] * 1.0;
1141                 un = unhalt->vals[pos] * 1.0;
1142         } else {
1143                 d1 = dtlb_m->sum * 1.0;
1144                 d2 = dtlb_d->sum * 1.0;
1145                 un = unhalt->sum * 1.0;
1146         }
1147         res = ((d1 * con) + d2)/un;
1148         ret = printf("%1.3f", res);
1149         return(ret);
1150 }
1151
1152 static int
1153 dtlb_missstore(struct counters *cpu, int pos)
1154 {
1155         /* 
1156          * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) / 
1157          * CPU_CLK_UNHALTED.THREAD_P (t >= .1) 
1158          */
1159         int ret;
1160         struct counters *dtsb_m, *dtsb_d;
1161         struct counters *unhalt;
1162         double con, un, d1, d2, res;
1163
1164         con = 7.0;
1165         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1166         dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
1167         dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
1168         if (pos != -1) {
1169                 d1 = dtsb_m->vals[pos] * 1.0;
1170                 d2 = dtsb_d->vals[pos] * 1.0;
1171                 un = unhalt->vals[pos] * 1.0;
1172         } else {
1173                 d1 = dtsb_m->sum * 1.0;
1174                 d2 = dtsb_d->sum * 1.0;
1175                 un = unhalt->sum * 1.0;
1176         }
1177         res = ((d1 * con) + d2)/un;
1178         ret = printf("%1.3f", res);
1179         return(ret);
1180 }
1181
1182 static int
1183 itlb_miss(struct counters *cpu, int pos)
1184 {
1185         /* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P  IB */
1186         int ret;
1187         struct counters *itlb;
1188         struct counters *unhalt;
1189         double un, d1, res;
1190
1191         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1192         itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1193         if (pos != -1) {
1194                 d1 = itlb->vals[pos] * 1.0;
1195                 un = unhalt->vals[pos] * 1.0;
1196         } else {
1197                 d1 = itlb->sum * 1.0;
1198                 un = unhalt->sum * 1.0;
1199         }
1200         res = d1/un;
1201         ret = printf("%1.3f", res);
1202         return(ret);
1203 }
1204
1205
1206 static int
1207 itlb_miss_broad(struct counters *cpu, int pos)
1208 {
1209         /* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P   */
1210         int ret;
1211         struct counters *itlb;
1212         struct counters *unhalt;
1213         struct counters *four_k;
1214         double un, d1, res, k;
1215
1216         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1217         itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1218         four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K");
1219         if (pos != -1) {
1220                 d1 = itlb->vals[pos] * 1.0;
1221                 un = unhalt->vals[pos] * 1.0;
1222                 k = four_k->vals[pos] * 1.0;
1223         } else {
1224                 d1 = itlb->sum * 1.0;
1225                 un = unhalt->sum * 1.0;
1226                 k = four_k->sum * 1.0;
1227         }
1228         res = (7.0 * k + d1)/un;
1229         ret = printf("%1.3f", res);
1230         return(ret);
1231 }
1232
1233
1234 static int
1235 icache_miss(struct counters *cpu, int pos)
1236 {
1237         /* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
1238
1239         int ret;
1240         struct counters *itlb, *icache;
1241         struct counters *unhalt;
1242         double un, d1, ic, res;
1243
1244         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1245         itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1246         icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
1247         if (pos != -1) {
1248                 d1 = itlb->vals[pos] * 1.0;
1249                 ic = icache->vals[pos] * 1.0;
1250                 un = unhalt->vals[pos] * 1.0;
1251         } else {
1252                 d1 = itlb->sum * 1.0;
1253                 ic = icache->sum * 1.0;
1254                 un = unhalt->sum * 1.0;
1255         }
1256         res = (ic-d1)/un;
1257         ret = printf("%1.3f", res);
1258         return(ret);
1259
1260 }
1261
1262 static int
1263 icache_miss_has(struct counters *cpu, int pos)
1264 {
1265         /* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */
1266
1267         int ret;
1268         struct counters *icache;
1269         struct counters *unhalt;
1270         double un, con, ic, res;
1271
1272         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1273         icache = find_counter(cpu, "ICACHE.MISSES");
1274         con = 36.0;
1275         if (pos != -1) {
1276                 ic = icache->vals[pos] * 1.0;
1277                 un = unhalt->vals[pos] * 1.0;
1278         } else {
1279                 ic = icache->sum * 1.0;
1280                 un = unhalt->sum * 1.0;
1281         }
1282         res = (con * ic)/un;
1283         ret = printf("%1.3f", res);
1284         return(ret);
1285
1286 }
1287
1288 static int
1289 lcp_stall(struct counters *cpu, int pos)
1290 {
1291          /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
1292         int ret;
1293         struct counters *ild;
1294         struct counters *unhalt;
1295         double un, d1, res;
1296
1297         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1298         ild = find_counter(cpu, "ILD_STALL.LCP");
1299         if (pos != -1) {
1300                 d1 = ild->vals[pos] * 1.0;
1301                 un = unhalt->vals[pos] * 1.0;
1302         } else {
1303                 d1 = ild->sum * 1.0;
1304                 un = unhalt->sum * 1.0;
1305         }
1306         res = d1/un;
1307         ret = printf("%1.3f", res);
1308         return(ret);
1309
1310 }
1311
1312
1313 static int
1314 frontendstall(struct counters *cpu, int pos)
1315 {
1316       /* 12  -  IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
1317         int ret;
1318         struct counters *idq;
1319         struct counters *unhalt;
1320         double con, un, id, res;
1321
1322         con = 4.0;
1323         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1324         idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
1325         if (pos != -1) {
1326                 id = idq->vals[pos] * 1.0;
1327                 un = unhalt->vals[pos] * 1.0;
1328         } else {
1329                 id = idq->sum * 1.0;
1330                 un = unhalt->sum * 1.0;
1331         }
1332         res = id/(un * con);
1333         ret = printf("%1.3f", res);
1334         return(ret);
1335 }
1336
1337 static int
1338 clears(struct counters *cpu, int pos)
1339 {
1340         /* 13  - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )  
1341          *         / CPU_CLK_UNHALTED.THREAD_P (thresh  >= .02)*/
1342         
1343         int ret;
1344         struct counters *clr1, *clr2, *clr3;
1345         struct counters *unhalt;
1346         double con, un, cl1, cl2, cl3, res;
1347
1348         con = 100.0;
1349         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1350         clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1351         clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1352         clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1353         
1354         if (pos != -1) {
1355                 cl1 = clr1->vals[pos] * 1.0;
1356                 cl2 = clr2->vals[pos] * 1.0;
1357                 cl3 = clr3->vals[pos] * 1.0;
1358                 un = unhalt->vals[pos] * 1.0;
1359         } else {
1360                 cl1 = clr1->sum * 1.0;
1361                 cl2 = clr2->sum * 1.0;
1362                 cl3 = clr3->sum * 1.0;
1363                 un = unhalt->sum * 1.0;
1364         }
1365         res = ((cl1 + cl2 + cl3) * con)/un;
1366         ret = printf("%1.3f", res);
1367         return(ret);
1368 }
1369
1370
1371
1372 static int
1373 clears_broad(struct counters *cpu, int pos)
1374 {
1375         int ret;
1376         struct counters *clr1, *clr2, *clr3, *cyc;
1377         struct counters *unhalt;
1378         double con, un, cl1, cl2, cl3, cy, res;
1379
1380         con = 100.0;
1381         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1382         clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1383         clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1384         clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1385         cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
1386         if (pos != -1) {
1387                 cl1 = clr1->vals[pos] * 1.0;
1388                 cl2 = clr2->vals[pos] * 1.0;
1389                 cl3 = clr3->vals[pos] * 1.0;
1390                 cy = cyc->vals[pos] * 1.0;
1391                 un = unhalt->vals[pos] * 1.0;
1392         } else {
1393                 cl1 = clr1->sum * 1.0;
1394                 cl2 = clr2->sum * 1.0;
1395                 cl3 = clr3->sum * 1.0;
1396                 cy = cyc->sum * 1.0;
1397                 un = unhalt->sum * 1.0;
1398         }
1399         /* Formula not listed but extrapulated to add the cy ?? */
1400         res = ((cl1 + cl2 + cl3 + cy) * con)/un;
1401         ret = printf("%1.3f", res);
1402         return(ret);
1403 }
1404
1405
1406
1407
1408
1409 static int
1410 microassist(struct counters *cpu, int pos)
1411 {
1412         /* 14  - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
1413         int ret;
1414         struct counters *idq;
1415         struct counters *unhalt;
1416         double un, id, res, con;
1417
1418         con = 4.0;
1419         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1420         idq = find_counter(cpu, "IDQ.MS_UOPS");
1421         if (pos != -1) {
1422                 id = idq->vals[pos] * 1.0;
1423                 un = unhalt->vals[pos] * 1.0;
1424         } else {
1425                 id = idq->sum * 1.0;
1426                 un = unhalt->sum * 1.0;
1427         }
1428         res = id/(un * con);
1429         ret = printf("%1.3f", res);
1430         return(ret);
1431 }
1432
1433
1434 static int
1435 microassist_broad(struct counters *cpu, int pos)
1436 {
1437         int ret;
1438         struct counters *idq;
1439         struct counters *unhalt;
1440         struct counters *uopiss;
1441         struct counters *uopret;
1442         double un, id, res, con, uoi, uor;
1443
1444         con = 4.0;
1445         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1446         idq = find_counter(cpu, "IDQ.MS_UOPS");
1447         uopiss = find_counter(cpu, "UOPS_ISSUED.ANY");
1448         uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1449         if (pos != -1) {
1450                 id = idq->vals[pos] * 1.0;
1451                 un = unhalt->vals[pos] * 1.0;
1452                 uoi = uopiss->vals[pos] * 1.0;
1453                 uor = uopret->vals[pos] * 1.0;
1454         } else {
1455                 id = idq->sum * 1.0;
1456                 un = unhalt->sum * 1.0;
1457                 uoi = uopiss->sum * 1.0;
1458                 uor = uopret->sum * 1.0;
1459         }
1460         res = (uor/uoi) * (id/(un * con));
1461         ret = printf("%1.3f", res);
1462         return(ret);
1463 }
1464
1465
1466 static int
1467 aliasing(struct counters *cpu, int pos)
1468 {
1469         /* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1470         int ret;        
1471         struct counters *ld;
1472         struct counters *unhalt;
1473         double un, lds, con, res;
1474
1475         con = 5.0;
1476         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1477         ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1478         if (pos != -1) {
1479                 lds = ld->vals[pos] * 1.0;
1480                 un = unhalt->vals[pos] * 1.0;
1481         } else {
1482                 lds = ld->sum * 1.0;
1483                 un = unhalt->sum * 1.0;
1484         }
1485         res = (lds * con)/un;
1486         ret = printf("%1.3f", res);
1487         return(ret);
1488 }
1489
1490 static int
1491 aliasing_broad(struct counters *cpu, int pos)
1492 {
1493         /* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1494         int ret;        
1495         struct counters *ld;
1496         struct counters *unhalt;
1497         double un, lds, con, res;
1498
1499         con = 7.0;
1500         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1501         ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1502         if (pos != -1) {
1503                 lds = ld->vals[pos] * 1.0;
1504                 un = unhalt->vals[pos] * 1.0;
1505         } else {
1506                 lds = ld->sum * 1.0;
1507                 un = unhalt->sum * 1.0;
1508         }
1509         res = (lds * con)/un;
1510         ret = printf("%1.3f", res);
1511         return(ret);
1512 }
1513
1514
1515 static int
1516 fpassists(struct counters *cpu, int pos)
1517 {
1518         /* 16  - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
1519         int ret;        
1520         struct counters *fp;
1521         struct counters *inst;
1522         double un, fpd, res;
1523
1524         inst = find_counter(cpu, "INST_RETIRED.ANY_P");
1525         fp = find_counter(cpu, "FP_ASSIST.ANY");
1526         if (pos != -1) {
1527                 fpd = fp->vals[pos] * 1.0;
1528                 un = inst->vals[pos] * 1.0;
1529         } else {
1530                 fpd = fp->sum * 1.0;
1531                 un = inst->sum * 1.0;
1532         }
1533         res = fpd/un;
1534         ret = printf("%1.3f", res);
1535         return(ret);
1536 }
1537
1538 static int
1539 otherassistavx(struct counters *cpu, int pos)
1540 {
1541         /* 17  - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh  .1*/
1542         int ret;        
1543         struct counters *oth;
1544         struct counters *unhalt;
1545         double un, ot, con, res;
1546
1547         con = 75.0;
1548         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1549         oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
1550         if (pos != -1) {
1551                 ot = oth->vals[pos] * 1.0;
1552                 un = unhalt->vals[pos] * 1.0;
1553         } else {
1554                 ot = oth->sum * 1.0;
1555                 un = unhalt->sum * 1.0;
1556         }
1557         res = (ot * con)/un;
1558         ret = printf("%1.3f", res);
1559         return(ret);
1560 }
1561
1562 static int
1563 otherassistsse(struct counters *cpu, int pos)
1564 {
1565
1566         int ret;        
1567         struct counters *oth;
1568         struct counters *unhalt;
1569         double un, ot, con, res;
1570
1571         /* 18     (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P  thresh .1*/
1572         con = 75.0;
1573         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1574         oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
1575         if (pos != -1) {
1576                 ot = oth->vals[pos] * 1.0;
1577                 un = unhalt->vals[pos] * 1.0;
1578         } else {
1579                 ot = oth->sum * 1.0;
1580                 un = unhalt->sum * 1.0;
1581         }
1582         res = (ot * con)/un;
1583         ret = printf("%1.3f", res);
1584         return(ret);
1585 }
1586
1587 static int
1588 efficiency1(struct counters *cpu, int pos)
1589 {
1590
1591         int ret;        
1592         struct counters *uops;
1593         struct counters *unhalt;
1594         double un, ot, con, res;
1595
1596         /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
1597         con = 4.0;
1598         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1599         uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1600         if (pos != -1) {
1601                 ot = uops->vals[pos] * 1.0;
1602                 un = unhalt->vals[pos] * 1.0;
1603         } else {
1604                 ot = uops->sum * 1.0;
1605                 un = unhalt->sum * 1.0;
1606         }
1607         res = ot/(con * un);
1608         ret = printf("%1.3f", res);
1609         return(ret);
1610 }
1611
1612 static int
1613 efficiency2(struct counters *cpu, int pos)
1614 {
1615
1616         int ret;        
1617         struct counters *uops;
1618         struct counters *unhalt;
1619         double un, ot, res;
1620
1621         /* 20  - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
1622         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1623         uops = find_counter(cpu, "INST_RETIRED.ANY_P");
1624         if (pos != -1) {
1625                 ot = uops->vals[pos] * 1.0;
1626                 un = unhalt->vals[pos] * 1.0;
1627         } else {
1628                 ot = uops->sum * 1.0;
1629                 un = unhalt->sum * 1.0;
1630         }
1631         res = un/ot;
1632         ret = printf("%1.3f", res);
1633         return(ret);
1634 }
1635
1636 #define SANDY_BRIDGE_COUNT 20   
1637 static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
1638 /*01*/  { "allocstall1", "thresh > .05", 
1639           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
1640           allocstall1, 2 },
1641 /* -- not defined for SB right (partial-rat_stalls) 02*/
1642         { "allocstall2", "thresh > .05", 
1643           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1",
1644           allocstall2, 2 },
1645 /*03*/  { "br_miss", "thresh >= .2", 
1646           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1647           br_mispredict, 2 },
1648 /*04*/  { "splitload", "thresh >= .1", 
1649           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1650           splitload_sb, 2 },
1651 /* 05*/ { "splitstore", "thresh >= .01", 
1652           "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1653           splitstore_sb, 2 },
1654 /*06*/  { "contested", "thresh >= .05", 
1655           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1656           contested, 2 },
1657 /*07*/  { "blockstorefwd", "thresh >= .05", 
1658           "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1659           blockstoreforward, 2 },
1660 /*08*/  { "cache2", "thresh >= .2", 
1661           "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1662           cache2, 4 },
1663 /*09*/  { "cache1", "thresh >= .2", 
1664           "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1665           cache1, 2 },
1666 /*10*/  { "dtlbmissload", "thresh >= .1", 
1667           "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1668           dtlb_missload, 3 },
1669 /*11*/  { "dtlbmissstore", "thresh >= .05", 
1670           "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1671           dtlb_missstore, 3 },
1672 /*12*/  { "frontendstall", "thresh >= .15", 
1673           "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1674           frontendstall, 2 },
1675 /*13*/  { "clears", "thresh >= .02", 
1676           "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1677           clears, 4 },
1678 /*14*/  { "microassist", "thresh >= .05", 
1679           "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1680           microassist, 2 },
1681 /*15*/  { "aliasing_4k", "thresh >= .1", 
1682           "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1683           aliasing, 2 },
1684 /*16*/  { "fpassist", "look for a excessive value", 
1685           "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1686           fpassists, 2 },
1687 /*17*/  { "otherassistavx", "look for a excessive value", 
1688           "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1689           otherassistavx, 2},
1690 /*18*/  { "otherassistsse", "look for a excessive value", 
1691           "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1692           otherassistsse, 2 },
1693 /*19*/  { "eff1", "thresh < .9", 
1694           "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1695           efficiency1, 2 },
1696 /*20*/  { "eff2", "thresh > 1.0", 
1697           "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1698           efficiency2, 2 },
1699 };
1700
1701
1702 #define IVY_BRIDGE_COUNT 21
1703 static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
1704 /*1*/   { "eff1", "thresh < .75", 
1705           "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1706           efficiency1, 2 },
1707 /*2*/   { "eff2", "thresh > 1.0", 
1708           "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1709           efficiency2, 2 },
1710 /*3*/   { "itlbmiss", "thresh > .05", 
1711           "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1712           itlb_miss, 2 },
1713 /*4*/   { "icachemiss", "thresh > .05", 
1714           "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1715           icache_miss, 3 },
1716 /*5*/   { "lcpstall", "thresh > .05", 
1717           "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1718           lcp_stall, 2 },
1719 /*6*/   { "cache1", "thresh >= .2", 
1720           "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1721           cache1ib, 2 },
1722 /*7*/   { "cache2", "thresh >= .2", 
1723           "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1724           cache2ib, 2 },
1725 /*8*/   { "contested", "thresh >= .05", 
1726           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1727           contested, 2 },
1728 /*9*/   { "datashare", "thresh >= .05",
1729           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1730           datasharing, 2 },
1731 /*10*/  { "blockstorefwd", "thresh >= .05", 
1732           "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1733           blockstoreforward, 2 },
1734 /*11*/  { "splitload", "thresh >= .1", 
1735           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
1736           splitloadib, 4 },
1737 /*12*/  { "splitstore", "thresh >= .01", 
1738           "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1739           splitstore, 2 },
1740 /*13*/  { "aliasing_4k", "thresh >= .1", 
1741           "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1742           aliasing, 2 },
1743 /*14*/  { "dtlbmissload", "thresh >= .1", 
1744           "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1745           dtlb_missload , 3},
1746 /*15*/  { "dtlbmissstore", "thresh >= .05", 
1747           "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1748           dtlb_missstore, 3 },
1749 /*16*/  { "br_miss", "thresh >= .2", 
1750           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1751           br_mispredictib, 8 },
1752 /*17*/  { "clears", "thresh >= .02", 
1753           "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1754           clears, 4 },
1755 /*18*/  { "microassist", "thresh >= .05", 
1756           "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1757           microassist, 2 },
1758 /*19*/  { "fpassist", "look for a excessive value", 
1759           "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1760           fpassists, 2 },
1761 /*20*/  { "otherassistavx", "look for a excessive value", 
1762           "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1763           otherassistavx , 2},
1764 /*21*/  { "otherassistsse", "look for a excessive value", 
1765           "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1766           otherassistsse, 2 },
1767 };
1768
1769 #define HASWELL_COUNT 20
1770 static struct cpu_entry haswell[HASWELL_COUNT] = {
1771 /*1*/   { "eff1", "thresh < .75", 
1772           "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1773           efficiency1, 2 },
1774 /*2*/   { "eff2", "thresh > 1.0", 
1775           "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1776           efficiency2, 2 },
1777 /*3*/   { "itlbmiss", "thresh > .05", 
1778           "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1779           itlb_miss, 2 },
1780 /*4*/   { "icachemiss", "thresh > .05", 
1781           "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1782           icache_miss_has, 2 },
1783 /*5*/   { "lcpstall", "thresh > .05", 
1784           "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1785           lcp_stall, 2 },
1786 /*6*/   { "cache1", "thresh >= .2", 
1787           "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1788           cache1ib, 2 },
1789 /*7*/   { "cache2", "thresh >= .2", 
1790           "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1791           cache2has, 4 },
1792 /*8*/   { "contested", "thresh >= .05", 
1793           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1794           contested_has, 2 },
1795 /*9*/   { "datashare", "thresh >= .05",
1796           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1797           datasharing_has, 2 },
1798 /*10*/  { "blockstorefwd", "thresh >= .05", 
1799           "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1800           blockstoreforward, 2 },
1801 /*11*/  { "splitload", "thresh >= .1", 
1802           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1",
1803           splitload , 2},
1804 /*12*/  { "splitstore", "thresh >= .01", 
1805           "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1806           splitstore, 2 },
1807 /*13*/  { "aliasing_4k", "thresh >= .1", 
1808           "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1809           aliasing, 2 },
1810 /*14*/  { "dtlbmissload", "thresh >= .1", 
1811           "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1812           dtlb_missload, 3 },
1813 /*15*/  { "br_miss", "thresh >= .2", 
1814           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1815           br_mispredict, 2 },
1816 /*16*/  { "clears", "thresh >= .02", 
1817           "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1818           clears, 4 },
1819 /*17*/  { "microassist", "thresh >= .05", 
1820           "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1821           microassist, 2 },
1822 /*18*/  { "fpassist", "look for a excessive value", 
1823           "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1824           fpassists, 2 },
1825 /*19*/  { "otherassistavx", "look for a excessive value", 
1826           "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1827           otherassistavx, 2 },
1828 /*20*/  { "otherassistsse", "look for a excessive value", 
1829           "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1830           otherassistsse, 2 },
1831 };
1832
1833
1834 static void
1835 explain_name_broad(const char *name)
1836 {
1837         const char *mythresh;
1838         if (strcmp(name, "eff1") == 0) {
1839                 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
1840                 mythresh = "thresh < .75";
1841         } else if (strcmp(name, "eff2") == 0) {
1842                 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
1843                 mythresh = "thresh > 1.0";
1844         } else if (strcmp(name, "itlbmiss") == 0) {
1845                 printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
1846                 mythresh = "thresh > .05"; 
1847         } else if (strcmp(name, "icachemiss") == 0) {
1848                 printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n");
1849                 mythresh = "thresh > .05";
1850         } else if (strcmp(name, "lcpstall") == 0) {
1851                 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
1852                 mythresh = "thresh > .05";
1853         } else if (strcmp(name, "cache1") == 0) {
1854                 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
1855                 mythresh = "thresh >= .1";
1856         } else if (strcmp(name, "cache2") == 0) {
1857                 printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n");
1858                 mythresh = "thresh >= .2";
1859         } else if (strcmp(name, "contested") == 0) {
1860                 printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) +  MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n");
1861                 mythresh = "thresh >= .05";
1862         } else if (strcmp(name, "datashare") == 0) {
1863                 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
1864                 mythresh = "thresh > .05";
1865         } else if (strcmp(name, "blockstorefwd") == 0) {
1866                 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
1867                 mythresh = "thresh >= .05";
1868         } else if (strcmp(name, "aliasing_4k") == 0) {
1869                 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n");
1870                 mythresh = "thresh >= .1";
1871         } else if (strcmp(name, "dtlbmissload") == 0) {
1872                 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
1873                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
1874                 mythresh = "thresh >= .1";
1875
1876         } else if (strcmp(name, "br_miss") == 0) {
1877                 printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n");
1878                 printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n");
1879                 printf("CPU_CLK_UNHALTED.THREAD * 4)\n");
1880                 mythresh = "thresh >= .2";
1881         } else if (strcmp(name, "clears") == 0) {
1882                 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
1883                 printf("          MACHINE_CLEARS.SMC + \n");
1884                 printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
1885                 mythresh = "thresh >= .02";
1886         } else if (strcmp(name, "fpassist") == 0) {
1887                 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
1888                 mythresh = "look for a excessive value";
1889         } else if (strcmp(name, "otherassistavx") == 0) {
1890                 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
1891                 mythresh = "look for a excessive value";
1892         } else if (strcmp(name, "microassist") == 0) {
1893                 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
1894                 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
1895                 mythresh = "thresh >= .05";
1896         } else {
1897                 printf("Unknown name:%s\n", name);
1898                 mythresh = "unknown entry";
1899         }
1900         printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
1901 }
1902
1903
1904 #define BROADWELL_COUNT 17
1905 static struct cpu_entry broadwell[BROADWELL_COUNT] = {
1906 /*1*/   { "eff1", "thresh < .75", 
1907           "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1908           efficiency1, 2 }, 
1909 /*2*/   { "eff2", "thresh > 1.0", 
1910           "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1911           efficiency2, 2 },
1912 /*3*/   { "itlbmiss", "thresh > .05", 
1913           "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1",
1914           itlb_miss_broad, 3 },
1915 /*4*/   { "icachemiss", "thresh > .05", 
1916           "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1917           icache_miss_has, 2 },
1918 /*5*/   { "lcpstall", "thresh > .05", 
1919           "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1920           lcp_stall, 2 },
1921 /*6*/   { "cache1", "thresh >= .1", 
1922           "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1923           cache1broad, 2 },
1924 /*7*/   { "cache2", "thresh >= .2", 
1925           "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1926           cache2broad, 2 },
1927 /*8*/   { "contested", "thresh >= .05", 
1928           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1",
1929           contestedbroad, 2 },
1930 /*9*/   { "datashare", "thresh >= .05",
1931           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1932           datasharing_has, 2 },
1933 /*10*/  { "blockstorefwd", "thresh >= .05", 
1934           "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1935           blockstoreforward, 2 },
1936 /*11*/  { "aliasing_4k", "thresh >= .1", 
1937           "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1938           aliasing_broad, 2 }, 
1939 /*12*/  { "dtlbmissload", "thresh >= .1", 
1940           "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1941           dtlb_missload, 3 },
1942 /*13*/  { "br_miss", "thresh >= .2", 
1943           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1944           br_mispredict_broad, 7 },
1945 /*14*/  { "clears", "thresh >= .02", 
1946           "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1947           clears_broad, 5 },
1948 /*15*/  { "fpassist", "look for a excessive value", 
1949           "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1950           fpassists, 2 },
1951 /*16*/  { "otherassistavx", "look for a excessive value", 
1952           "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1953           otherassistavx, 2 },
1954 /*17*/  { "microassist", "thresh >= .2", 
1955           "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS  -w 1",
1956           microassist_broad, 4 },
1957 };
1958
1959
1960 static void
1961 set_sandybridge(void)
1962 {
1963         strcpy(the_cpu.cputype, "SandyBridge PMC");
1964         the_cpu.number = SANDY_BRIDGE_COUNT;
1965         the_cpu.ents = sandy_bridge;
1966         the_cpu.explain = explain_name_sb;
1967 }
1968
1969 static void
1970 set_ivybridge(void)
1971 {
1972         strcpy(the_cpu.cputype, "IvyBridge PMC");
1973         the_cpu.number = IVY_BRIDGE_COUNT;
1974         the_cpu.ents = ivy_bridge;
1975         the_cpu.explain = explain_name_ib;
1976 }
1977
1978
1979 static void
1980 set_haswell(void)
1981 {
1982         strcpy(the_cpu.cputype, "HASWELL PMC");
1983         the_cpu.number = HASWELL_COUNT;
1984         the_cpu.ents = haswell;
1985         the_cpu.explain = explain_name_has;
1986 }
1987
1988
1989 static void
1990 set_broadwell(void)
1991 {
1992         strcpy(the_cpu.cputype, "HASWELL PMC");
1993         the_cpu.number = BROADWELL_COUNT;
1994         the_cpu.ents = broadwell;
1995         the_cpu.explain = explain_name_broad;
1996 }
1997
1998
1999 static int
2000 set_expression(const char *name)
2001 {
2002         int found = 0, i;
2003         for(i=0 ; i< the_cpu.number; i++) {
2004                 if (strcmp(name, the_cpu.ents[i].name) == 0) {
2005                         found = 1;
2006                         expression = the_cpu.ents[i].func;
2007                         command = the_cpu.ents[i].command;
2008                         threshold = the_cpu.ents[i].thresh;
2009                         if  (the_cpu.ents[i].counters_required > max_pmc_counters) {
2010                                 printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n",
2011                                        the_cpu.ents[i].name,
2012                                        the_cpu.ents[i].counters_required, max_pmc_counters);
2013                                 printf("Sorry this test can not be run\n");
2014                                 if (run_all == 0) {
2015                                         exit(-1);
2016                                 } else {
2017                                         return(-1);
2018                                 }
2019                         }
2020                         break;
2021                 }
2022         }
2023         if (!found) {
2024                 printf("For CPU type %s we have no expression:%s\n",
2025                        the_cpu.cputype, name);
2026                 exit(-1);
2027         }
2028         return(0);
2029 }
2030
2031
2032
2033
2034
2035 static int
2036 validate_expression(char *name) 
2037 {
2038         int i, found;
2039
2040         found = 0;
2041         for(i=0 ; i< the_cpu.number; i++) {
2042                 if (strcmp(name, the_cpu.ents[i].name) == 0) {
2043                         found = 1;
2044                         break;
2045                 }
2046         }
2047         if (!found) {
2048                 return(-1);
2049         }
2050         return (0);
2051 }
2052
2053 static void
2054 do_expression(struct counters *cpu, int pos)
2055 {
2056         if (expression == NULL) 
2057                 return;
2058         (*expression)(cpu, pos);
2059 }
2060
2061 static void
2062 process_header(int idx, char *p)
2063 {
2064         struct counters *up;
2065         int i, len, nlen;
2066         /* 
2067          * Given header element idx, at p in
2068          * form 's/NN/nameof'
2069          * process the entry to pull out the name and
2070          * the CPU number.
2071          */
2072         if (strncmp(p, "s/", 2)) {
2073                 printf("Check -- invalid header no s/ in %s\n",
2074                        p);
2075                 return;
2076         }
2077         up = &cnts[idx];
2078         up->cpu = strtol(&p[2], NULL, 10);
2079         len = strlen(p);
2080         for (i=2; i<len; i++) {
2081                 if (p[i] == '/') {
2082                         nlen = strlen(&p[(i+1)]);
2083                         if (nlen < (MAX_NLEN-1)) {
2084                                 strcpy(up->counter_name, &p[(i+1)]);
2085                         } else {
2086                                 strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
2087                         }
2088                 }
2089         }
2090 }
2091
2092 static void
2093 build_counters_from_header(FILE *io)
2094 {
2095         char buffer[8192], *p;
2096         int i, len, cnt;
2097         size_t mlen;
2098
2099         /* We have a new start, lets 
2100          * setup our headers and cpus.
2101          */
2102         if (fgets(buffer, sizeof(buffer), io) == NULL) {
2103                 printf("First line can't be read from file err:%d\n", errno);
2104                 return;
2105         }
2106         /*
2107          * Ok output is an array of counters. Once
2108          * we start to read the values in we must
2109          * put them in there slot to match there CPU and 
2110          * counter being updated. We create a mass array
2111          * of the counters, filling in the CPU and 
2112          * counter name. 
2113          */
2114         /* How many do we get? */
2115         len = strlen(buffer);
2116         for (i=0, cnt=0; i<len; i++) {
2117                 if (strncmp(&buffer[i], "s/", 2) == 0) {
2118                         cnt++;
2119                         for(;i<len;i++) {
2120                                 if (buffer[i] == ' ')
2121                                         break;
2122                         }
2123                 }
2124         }
2125         mlen = sizeof(struct counters) * cnt;
2126         cnts = malloc(mlen);
2127         ncnts = cnt;
2128         if (cnts == NULL) {
2129                 printf("No memory err:%d\n", errno);
2130                 return;
2131         }
2132         memset(cnts, 0, mlen);
2133         for (i=0, cnt=0; i<len; i++) {
2134                 if (strncmp(&buffer[i], "s/", 2) == 0) {
2135                         p = &buffer[i];
2136                         for(;i<len;i++) {
2137                                 if (buffer[i] == ' ') {
2138                                         buffer[i] = 0;
2139                                         break;
2140                                 }
2141                         }
2142                         process_header(cnt, p);
2143                         cnt++;
2144                 }
2145         }
2146         if (verbose)
2147                 printf("We have %d entries\n", cnt);    
2148 }
2149 extern int max_to_collect;
2150 int max_to_collect = MAX_COUNTER_SLOTS;
2151
2152 static int
2153 read_a_line(FILE *io) 
2154 {
2155         char buffer[8192], *p, *stop;   
2156         int pos, i;
2157
2158         if (fgets(buffer, sizeof(buffer), io) == NULL) {
2159                 return(0);
2160         }
2161         p = buffer;
2162         for (i=0; i<ncnts; i++) {
2163                 pos = cnts[i].pos;
2164                 cnts[i].vals[pos] = strtol(p, &stop, 0);
2165                 cnts[i].pos++;
2166                 cnts[i].sum += cnts[i].vals[pos];
2167                 p = stop;
2168         }
2169         return (1);
2170 }
2171
2172 extern int cpu_count_out;
2173 int cpu_count_out=0;
2174
2175 static void
2176 print_header(void)
2177 {
2178         int i, cnt, printed_cnt;
2179
2180         printf("*********************************\n");
2181         for(i=0, cnt=0; i<MAX_CPU; i++) {
2182                 if (glob_cpu[i]) {
2183                         cnt++;
2184                 }
2185         }       
2186         cpu_count_out = cnt;
2187         for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
2188                 if (glob_cpu[i]) {
2189                         printf("CPU%d", i);
2190                         printed_cnt++;
2191                 }
2192                 if (printed_cnt == cnt) {
2193                         printf("\n");
2194                         break;
2195                 } else {
2196                         printf("\t");
2197                 }
2198         }
2199 }
2200
2201 static void
2202 lace_cpus_together(void)
2203 {
2204         int i, j, lace_cpu;
2205         struct counters *cpat, *at;
2206
2207         for(i=0; i<ncnts; i++) {
2208                 cpat = &cnts[i];
2209                 if (cpat->next_cpu) {
2210                         /* Already laced in */
2211                         continue;
2212                 }
2213                 lace_cpu = cpat->cpu;
2214                 if (lace_cpu >= MAX_CPU) {
2215                         printf("CPU %d to big\n", lace_cpu);
2216                         continue;
2217                 }
2218                 if (glob_cpu[lace_cpu] == NULL) {
2219                         glob_cpu[lace_cpu] = cpat;
2220                 } else {
2221                         /* Already processed this cpu */
2222                         continue;
2223                 }
2224                 /* Ok look forward for cpu->cpu and link in */
2225                 for(j=(i+1); j<ncnts; j++) {
2226                         at = &cnts[j];
2227                         if (at->next_cpu) {
2228                                 continue;
2229                         }
2230                         if (at->cpu == lace_cpu) {
2231                                 /* Found one */
2232                                 cpat->next_cpu = at;
2233                                 cpat = at;
2234                         }
2235                 }
2236         }
2237 }
2238
2239
2240 static void
2241 process_file(char *filename)
2242 {
2243         FILE *io;
2244         int i;
2245         int line_at, not_done;
2246         pid_t pid_of_command=0;
2247
2248         if (filename ==  NULL) {
2249                 io = my_popen(command, "r", &pid_of_command);
2250                 if (io == NULL) {
2251                         printf("Can't popen the command %s\n", command);
2252                         return;
2253                 }
2254         } else {
2255                 io = fopen(filename, "r");
2256                 if (io == NULL) {
2257                         printf("Can't process file %s err:%d\n",
2258                                filename, errno);
2259                         return;
2260                 }
2261         }
2262         build_counters_from_header(io);
2263         if (cnts == NULL) {
2264                 /* Nothing we can do */
2265                 printf("Nothing to do -- no counters built\n");
2266                 if (filename) {
2267                         fclose(io);
2268                 } else {
2269                         my_pclose(io, pid_of_command);
2270                 }
2271                 return;
2272         }
2273         lace_cpus_together();
2274         print_header();
2275         if (verbose) {
2276                 for (i=0; i<ncnts; i++) {
2277                         printf("Counter:%s cpu:%d index:%d\n",
2278                                cnts[i].counter_name,
2279                                cnts[i].cpu, i);
2280                 }
2281         }
2282         line_at = 0;
2283         not_done = 1;
2284         while(not_done) {
2285                 if (read_a_line(io)) {
2286                         line_at++;
2287                 } else {
2288                         break;
2289                 }
2290                 if (line_at >= max_to_collect) {
2291                         not_done = 0;
2292                 }
2293                 if (filename == NULL) {
2294                         int cnt;
2295                         /* For the ones we dynamically open we print now */
2296                         for(i=0, cnt=0; i<MAX_CPU; i++) {
2297                                 do_expression(glob_cpu[i], (line_at-1));
2298                                 cnt++;
2299                                 if (cnt == cpu_count_out) {
2300                                         printf("\n");
2301                                         break;
2302                                 } else {
2303                                         printf("\t");
2304                                 }
2305                         }
2306                 }
2307         }
2308         if (filename) {
2309                 fclose(io);
2310         } else {
2311                 my_pclose(io, pid_of_command);
2312         }
2313 }
2314 #if defined(__amd64__)
2315 #define cpuid(in,a,b,c,d)\
2316   asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
2317
2318 static __inline void
2319 do_cpuid(u_int ax, u_int cx, u_int *p)
2320 {
2321         __asm __volatile("cpuid"
2322                          : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
2323                          :  "0" (ax), "c" (cx) );
2324 }
2325
2326 #else
2327 #define cpuid(in, a, b, c, d) 
2328 #define do_cpuid(ax, cx, p)
2329 #endif
2330
2331 static void
2332 get_cpuid_set(void)
2333 {
2334         unsigned long eax, ebx, ecx, edx;
2335         int model;
2336         pid_t pid_of_command=0;
2337         size_t sz, len;
2338         FILE *io;
2339         char linebuf[1024], *str;
2340         u_int reg[4];
2341
2342         eax = ebx = ecx = edx = 0;
2343
2344         cpuid(0, eax, ebx, ecx, edx);
2345         if (ebx == 0x68747541) {
2346                 printf("AMD processors are not supported by this program\n");
2347                 printf("Sorry\n");
2348                 exit(0);
2349         } else if (ebx == 0x6972794) {
2350                 printf("Cyrix processors are not supported by this program\n");
2351                 printf("Sorry\n");
2352                 exit(0);
2353         } else if (ebx == 0x756e6547) {
2354                 printf("Genuine Intel\n");
2355         } else {
2356                 printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
2357                 exit(0);
2358         }
2359         cpuid(1, eax, ebx, ecx, edx);
2360         model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
2361         printf("CPU model is 0x%x id:0x%lx\n", model, eax);
2362         switch (eax & 0xF00) {
2363         case 0x500:             /* Pentium family processors */
2364                 printf("Intel Pentium P5\n");
2365                 goto not_supported;
2366                 break;
2367         case 0x600:             /* Pentium Pro, Celeron, Pentium II & III */
2368                 switch (model) {
2369                 case 0x1:
2370                         printf("Intel Pentium P6\n");
2371                         goto not_supported;
2372                         break;
2373                 case 0x3: 
2374                 case 0x5:
2375                         printf("Intel PII\n");
2376                         goto not_supported;
2377                         break;
2378                 case 0x6: case 0x16:
2379                         printf("Intel CL\n");
2380                         goto not_supported;
2381                         break;
2382                 case 0x7: case 0x8: case 0xA: case 0xB:
2383                         printf("Intel PIII\n");
2384                         goto not_supported;
2385                         break;
2386                 case 0x9: case 0xD:
2387                         printf("Intel PM\n");
2388                         goto not_supported;
2389                         break;
2390                 case 0xE:
2391                         printf("Intel CORE\n");
2392                         goto not_supported;
2393                         break;
2394                 case 0xF:
2395                         printf("Intel CORE2\n");
2396                         goto not_supported;
2397                         break;
2398                 case 0x17:
2399                         printf("Intel CORE2EXTREME\n");
2400                         goto not_supported;
2401                         break;
2402                 case 0x1C:      /* Per Intel document 320047-002. */
2403                         printf("Intel ATOM\n");
2404                         goto not_supported;
2405                         break;
2406                 case 0x1A:
2407                 case 0x1E:      /*
2408                                  * Per Intel document 253669-032 9/2009,
2409                                  * pages A-2 and A-57
2410                                  */
2411                 case 0x1F:      /*
2412                                  * Per Intel document 253669-032 9/2009,
2413                                  * pages A-2 and A-57
2414                                  */
2415                         printf("Intel COREI7\n");
2416                         goto not_supported;
2417                         break;
2418                 case 0x2E:
2419                         printf("Intel NEHALEM\n");
2420                         goto not_supported;
2421                         break;
2422                 case 0x25:      /* Per Intel document 253669-033US 12/2009. */
2423                 case 0x2C:      /* Per Intel document 253669-033US 12/2009. */
2424                         printf("Intel WESTMERE\n");
2425                         goto not_supported;
2426                         break;
2427                 case 0x2F:      /* Westmere-EX, seen in wild */
2428                         printf("Intel WESTMERE\n");
2429                         goto not_supported;
2430                         break;
2431                 case 0x2A:      /* Per Intel document 253669-039US 05/2011. */
2432                         printf("Intel SANDYBRIDGE\n");
2433                         set_sandybridge();
2434                         break;
2435                 case 0x2D:      /* Per Intel document 253669-044US 08/2012. */
2436                         printf("Intel SANDYBRIDGE_XEON\n");
2437                         set_sandybridge();
2438                         break;
2439                 case 0x3A:      /* Per Intel document 253669-043US 05/2012. */
2440                         printf("Intel IVYBRIDGE\n");
2441                         set_ivybridge();
2442                         break;
2443                 case 0x3E:      /* Per Intel document 325462-045US 01/2013. */
2444                         printf("Intel IVYBRIDGE_XEON\n");
2445                         set_ivybridge();
2446                         break;
2447                 case 0x3F:      /* Per Intel document 325462-045US 09/2014. */
2448                         printf("Intel HASWELL (Xeon)\n");
2449                         set_haswell();
2450                         break;
2451                 case 0x3C:      /* Per Intel document 325462-045US 01/2013. */
2452                 case 0x45:
2453                 case 0x46:
2454                         printf("Intel HASWELL\n");
2455                         set_haswell();
2456                         break;
2457
2458                 case 0x4e:
2459                 case 0x5e:
2460                         printf("Intel SKY-LAKE\n");
2461                         goto not_supported;
2462                         break;
2463                 case 0x3D:
2464                 case 0x47:
2465                         printf("Intel BROADWELL\n");
2466                         set_broadwell();
2467                         break;
2468                 case 0x4f:
2469                 case 0x56:
2470                         printf("Intel BROADWEL (Xeon)\n");
2471                         set_broadwell();
2472                         break;
2473
2474                 case 0x4D:
2475                         /* Per Intel document 330061-001 01/2014. */
2476                         printf("Intel ATOM_SILVERMONT\n");
2477                         goto not_supported;
2478                         break;
2479                 default:
2480                         printf("Intel model 0x%x is not known -- sorry\n",
2481                                model);
2482                         goto not_supported;
2483                         break;
2484                 }
2485                 break;
2486         case 0xF00:             /* P4 */
2487                 printf("Intel unknown model %d\n", model);
2488                 goto not_supported;
2489                 break;
2490         }
2491         do_cpuid(0xa, 0, reg);
2492         max_pmc_counters = (reg[3] & 0x0000000f) + 1;
2493         printf("We have %d PMC counters to work with\n", max_pmc_counters);
2494         /* Ok lets load the list of all known PMC's */
2495         io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
2496         if (valid_pmcs == NULL) {
2497                 /* Likely */
2498                 pmc_allocated_cnt = PMC_INITIAL_ALLOC;
2499                 sz = sizeof(char *) * pmc_allocated_cnt;
2500                 valid_pmcs = malloc(sz);
2501                 if (valid_pmcs == NULL) {
2502                         printf("No memory allocation fails at startup?\n");     
2503                         exit(-1);
2504                 }
2505                 memset(valid_pmcs, 0, sz);
2506         }
2507         
2508         while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
2509                 if (linebuf[0] != '\t') {
2510                         /* sometimes headers ;-) */
2511                         continue;
2512                 }
2513                 len = strlen(linebuf);
2514                 if (linebuf[(len-1)] == '\n') {
2515                         /* Likely */
2516                         linebuf[(len-1)] = 0;
2517                 }
2518                 str = &linebuf[1];
2519                 len = strlen(str) + 1;
2520                 valid_pmcs[valid_pmc_cnt] = malloc(len);
2521                 if (valid_pmcs[valid_pmc_cnt] == NULL) {
2522                         printf("No memory2 allocation fails at startup?\n");    
2523                         exit(-1);
2524                 }
2525                 memset(valid_pmcs[valid_pmc_cnt], 0, len);
2526                 strcpy(valid_pmcs[valid_pmc_cnt], str);
2527                 valid_pmc_cnt++;
2528                 if (valid_pmc_cnt >= pmc_allocated_cnt) {
2529                         /* Got to expand -- unlikely */
2530                         char **more;
2531
2532                         sz = sizeof(char *) * (pmc_allocated_cnt * 2);
2533                         more = malloc(sz);
2534                         if (more == NULL) {
2535                                 printf("No memory3 allocation fails at startup?\n");    
2536                                 exit(-1);
2537                         }
2538                         memset(more, 0, sz);
2539                         memcpy(more, valid_pmcs, sz);
2540                         pmc_allocated_cnt *= 2;
2541                         free(valid_pmcs);
2542                         valid_pmcs = more;
2543                 }
2544         }
2545         my_pclose(io, pid_of_command);  
2546         return;
2547 not_supported:
2548         printf("Not supported\n");      
2549         exit(-1);
2550 }
2551
2552 static void
2553 explain_all(void)
2554 {
2555         int i;
2556         printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
2557         printf("-------------------------------------------------------------\n");
2558         for(i=0; i<the_cpu.number; i++){
2559                 printf("For -e %s ", the_cpu.ents[i].name);
2560                 (*the_cpu.explain)(the_cpu.ents[i].name);
2561                 printf("----------------------------\n");
2562         }
2563 }
2564
2565 static void
2566 test_for_a_pmc(const char *pmc, int out_so_far)
2567 {
2568         FILE *io;
2569         pid_t pid_of_command=0; 
2570         char my_command[1024];
2571         char line[1024];
2572         char resp[1024];
2573         int len, llen, i;
2574
2575         if (out_so_far < 50) {
2576                 len = 50 - out_so_far;
2577                 for(i=0; i<len; i++) {
2578                         printf(" ");
2579                 }
2580         }
2581         sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
2582         io = my_popen(my_command, "r", &pid_of_command);        
2583         if (io == NULL) {
2584                 printf("Failed -- popen fails\n");
2585                 return;
2586         }
2587         /* Setup what we expect */
2588         len = sprintf(resp, "%s", pmc);
2589         if (fgets(line, sizeof(line), io) == NULL) {
2590                 printf("Failed -- no output from pmstat\n");
2591                 goto out;
2592         }
2593         llen = strlen(line);
2594         if (line[(llen-1)] == '\n') {
2595                 line[(llen-1)] = 0;
2596                 llen--;
2597         }
2598         for(i=2; i<(llen-len); i++) {
2599                 if (strncmp(&line[i], "ERROR", 5) == 0) {
2600                         printf("Failed %s\n", line);
2601                         goto out;
2602                 } else if (strncmp(&line[i], resp, len) == 0) {
2603                         int j, k;
2604
2605                         if (fgets(line, sizeof(line), io) == NULL) {
2606                                 printf("Failed -- no second output from pmstat\n");
2607                                 goto out;
2608                         }
2609                         len = strlen(line);
2610                         for (j=0; j<len; j++) {
2611                                 if (line[j] == ' ') {
2612                                         j++; 
2613                                 } else {
2614                                         break;
2615                                 }
2616                         }
2617                         printf("Pass");
2618                         len = strlen(&line[j]);
2619                         if (len < 20) {
2620                                 for(k=0; k<(20-len); k++) {
2621                                         printf(" ");
2622                                 }
2623                         }
2624                         if (len) {
2625                                 printf("%s", &line[j]);
2626                         } else {
2627                                 printf("\n");
2628                         }
2629                         goto out;
2630                 }
2631         }
2632         printf("Failed -- '%s' not '%s'\n", line, resp);
2633 out:
2634         my_pclose(io, pid_of_command);          
2635         
2636 }
2637
2638 static int
2639 add_it_to(char **vars, int cur_cnt, char *name)
2640 {
2641         int i;
2642         size_t len;
2643         for(i=0; i<cur_cnt; i++) {
2644                 if (strcmp(vars[i], name) == 0) {
2645                         /* Already have */
2646                         return(0);
2647                 }
2648         }
2649         if (vars[cur_cnt] != NULL) {
2650                 printf("Cur_cnt:%d filled with %s??\n", 
2651                        cur_cnt, vars[cur_cnt]);
2652                 exit(-1);
2653         }
2654         /* Ok its new */
2655         len = strlen(name) + 1;
2656         vars[cur_cnt] = malloc(len);
2657         if (vars[cur_cnt] == NULL) {
2658                 printf("No memory %s\n", __FUNCTION__);
2659                 exit(-1);
2660         }
2661         memset(vars[cur_cnt], 0, len);
2662         strcpy(vars[cur_cnt], name);
2663         return(1);
2664 }
2665
2666 static char *
2667 build_command_for_exp(struct expression *exp)
2668 {
2669         /*
2670          * Build the pmcstat command to handle
2671          * the passed in expression.
2672          * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
2673          * where NNN and QQQ represent the PMC's in the expression
2674          * uniquely..
2675          */
2676         char forming[1024];
2677         int cnt_pmc, alloced_pmcs, i;
2678         struct expression *at;
2679         char **vars, *cmd;
2680         size_t mal;
2681
2682         alloced_pmcs = cnt_pmc = 0;
2683         /* first how many do we have */
2684         at = exp;
2685         while (at) {
2686                 if (at->type == TYPE_VALUE_PMC) {
2687                         cnt_pmc++;
2688                 }
2689                 at = at->next;
2690         }
2691         if (cnt_pmc == 0) {
2692                 printf("No PMC's in your expression -- nothing to do!!\n");
2693                 exit(0);
2694         }
2695         mal = cnt_pmc * sizeof(char *);
2696         vars = malloc(mal);
2697         if (vars == NULL) {
2698                 printf("No memory\n");
2699                 exit(-1);
2700         }
2701         memset(vars, 0, mal);
2702         at = exp;
2703         while (at) {
2704                 if (at->type == TYPE_VALUE_PMC) {
2705                         if(add_it_to(vars, alloced_pmcs, at->name)) {
2706                                 alloced_pmcs++;
2707                         }
2708                 }
2709                 at = at->next;
2710         }
2711         /* Now we have a unique list in vars so create our command */
2712         mal = 23; /*    "/usr/sbin/pmcstat -w 1"  + \0 */
2713         for(i=0; i<alloced_pmcs; i++) {
2714                 mal += strlen(vars[i]) + 4;     /* var + " -s " */
2715         }
2716         cmd = malloc((mal+2));
2717         if (cmd == NULL) {
2718                 printf("%s out of mem\n", __FUNCTION__);
2719                 exit(-1);
2720         }
2721         memset(cmd, 0, (mal+2));
2722         strcpy(cmd, "/usr/sbin/pmcstat -w 1");
2723         at = exp;
2724         for(i=0; i<alloced_pmcs; i++) {
2725                 sprintf(forming, " -s %s", vars[i]);
2726                 strcat(cmd, forming);
2727                 free(vars[i]);
2728                 vars[i] = NULL;
2729         }
2730         free(vars);
2731         return(cmd);
2732 }
2733
2734 static int
2735 user_expr(struct counters *cpu, int pos)
2736 {
2737         int ret;        
2738         double res;
2739         struct counters *var;
2740         struct expression *at;
2741
2742         at = master_exp;
2743         while (at) {
2744                 if (at->type == TYPE_VALUE_PMC) {
2745                         var = find_counter(cpu, at->name);
2746                         if (var == NULL) {
2747                                 printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
2748                                 exit(-1);
2749                         }
2750                         if (pos != -1) {
2751                                 at->value = var->vals[pos] * 1.0;
2752                         } else {
2753                                 at->value = var->sum * 1.0;
2754                         }
2755                 }
2756                 at = at->next;
2757         }
2758         res = run_expr(master_exp, 1, NULL);
2759         ret = printf("%1.3f", res);
2760         return(ret);
2761 }
2762
2763
2764 static void
2765 set_manual_exp(struct expression *exp)
2766 {
2767         expression = user_expr;
2768         command = build_command_for_exp(exp);
2769         threshold = "User defined threshold";
2770 }
2771
2772 static void
2773 run_tests(void)
2774 {
2775         int i, lenout;
2776         printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
2777         printf("------------------------------------------------------------------------\n");
2778         for(i=0; i<valid_pmc_cnt; i++) {
2779                 lenout = printf("%s", valid_pmcs[i]);
2780                 fflush(stdout);
2781                 test_for_a_pmc(valid_pmcs[i], lenout);
2782         }
2783 }
2784 static void
2785 list_all(void)
2786 {
2787         int i, cnt, j;
2788         printf("PMC                                               Abbreviation\n");
2789         printf("--------------------------------------------------------------\n");
2790         for(i=0; i<valid_pmc_cnt; i++) {
2791                 cnt = printf("%s", valid_pmcs[i]);
2792                 for(j=cnt; j<52; j++) {
2793                         printf(" ");
2794                 }
2795                 printf("%%%d\n", i);
2796         }
2797 }
2798
2799
2800 int
2801 main(int argc, char **argv)
2802 {
2803         int i, j, cnt;
2804         char *filename=NULL;
2805         const char *name=NULL;
2806         int help_only = 0;
2807         int test_mode = 0;
2808         int test_at = 0;
2809
2810         get_cpuid_set();
2811         memset(glob_cpu, 0, sizeof(glob_cpu));
2812         while ((i = getopt(argc, argv, "ALHhvm:i:?e:TE:")) != -1) {
2813                 switch (i) {
2814                 case 'A':
2815                         run_all = 1;
2816                         break;
2817                 case 'L':
2818                         list_all();
2819                         return(0);
2820                 case 'H':
2821                         printf("**********************************\n");
2822                         explain_all();
2823                         printf("**********************************\n");
2824                         return(0);
2825                         break;
2826                 case 'T':
2827                         test_mode = 1;
2828                         break;
2829                 case 'E':
2830                         master_exp = parse_expression(optarg);
2831                         if (master_exp) {
2832                                 set_manual_exp(master_exp);
2833                         }
2834                         break;
2835                 case 'e':
2836                         if (validate_expression(optarg)) {
2837                                 printf("Unknown expression %s\n", optarg);
2838                                 return(0);
2839                         }
2840                         name = optarg;
2841                         set_expression(optarg);
2842                         break;
2843                 case 'm':
2844                         max_to_collect = strtol(optarg, NULL, 0);
2845                         if (max_to_collect > MAX_COUNTER_SLOTS) {
2846                                 /* You can't collect more than max in array */
2847                                 max_to_collect = MAX_COUNTER_SLOTS;
2848                         }
2849                         break;
2850                 case 'v':
2851                         verbose++;
2852                         break;
2853                 case 'h':
2854                         help_only = 1;
2855                         break;
2856                 case 'i':
2857                         filename = optarg;
2858                         break;
2859                 case '?':
2860                 default:
2861                 use:
2862                         printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
2863                                argv[0]);
2864                         printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
2865                         printf("-v -- verbose dump debug type things -- you don't want this\n");
2866                         printf("-m N -- maximum to collect is N measurements\n");
2867                         printf("-e expr-name -- Do expression expr-name\n");
2868                         printf("-E 'your expression' -- Do your expression\n");
2869                         printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
2870                         printf("-H -- Don't run anything, just explain all canned expressions\n");
2871                         printf("-T -- Test all PMC's defined by this processor\n");
2872                         printf("-A -- Run all canned tests\n");
2873                         return(0);
2874                         break;
2875                 }
2876         }
2877         if ((run_all == 0) && (name == NULL) && (filename == NULL) &&
2878             (test_mode == 0) && (master_exp == NULL)) {
2879                 printf("Without setting an expression we cannot dynamically gather information\n");
2880                 printf("you must supply a filename (and you probably want verbosity)\n");
2881                 goto use;
2882         }
2883         if (run_all && max_to_collect > 10) {
2884                 max_to_collect = 3;
2885         }
2886         if (test_mode) {
2887                 run_tests();
2888                 return(0);
2889         }
2890         printf("*********************************\n");
2891         if ((master_exp == NULL) && name) {
2892                 (*the_cpu.explain)(name);
2893         } else if (master_exp) {
2894                 printf("Examine your expression ");
2895                 print_exp(master_exp);
2896                 printf("User defined threshold\n");
2897         }
2898         if (help_only) {
2899                 return(0);
2900         }
2901         if (run_all) {
2902         more:
2903                 name = the_cpu.ents[test_at].name;
2904                 printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh);
2905                 test_at++;
2906                 if (set_expression(name) == -1) {
2907                         if (test_at >= the_cpu.number) {
2908                                 goto done;
2909                         } else
2910                                 goto more;
2911                 }
2912
2913         }
2914         process_file(filename);
2915         if (verbose >= 2) {
2916                 for (i=0; i<ncnts; i++) {
2917                         printf("Counter:%s cpu:%d index:%d\n",
2918                                cnts[i].counter_name,
2919                                cnts[i].cpu, i);
2920                         for(j=0; j<cnts[i].pos; j++) {
2921                                 printf(" val - %ld\n", (long int)cnts[i].vals[j]);
2922                         }
2923                         printf(" sum - %ld\n", (long int)cnts[i].sum);
2924                 }
2925         }
2926         if (expression == NULL) {
2927                 return(0);
2928         }
2929         if (max_to_collect > 1) {
2930                 for(i=0, cnt=0; i<MAX_CPU; i++) {
2931                         if (glob_cpu[i]) {
2932                                 do_expression(glob_cpu[i], -1);
2933                                 cnt++;
2934                                 if (cnt == cpu_count_out) {
2935                                         printf("\n");
2936                                         break;
2937                                 } else {
2938                                         printf("\t");
2939                                 }
2940                         }
2941                 }
2942         }
2943         if (run_all && (test_at < the_cpu.number)) {
2944                 memset(glob_cpu, 0, sizeof(glob_cpu));
2945                 ncnts = 0;
2946                 printf("*********************************\n");
2947                 goto more;
2948         } else if (run_all) {
2949         done:
2950                 printf("*********************************\n");
2951         }
2952         return(0);      
2953 }