]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.sbin/pmcstudy/pmcstudy.c
sysctl(9): Fix a few mandoc related issues
[FreeBSD/FreeBSD.git] / usr.sbin / pmcstudy / pmcstudy.c
1 /*-
2  * Copyright (c) 2014-2015 Netflix, Inc.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer,
9  *    in this position and unchanged.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. The name of the author may not be used to endorse or promote products
14  *    derived from this software without specific prior written permission
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  */
27 #include <sys/types.h>
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <unistd.h>
31 #include <string.h>
32 #include <strings.h>
33 #include <sys/errno.h>
34 #include <signal.h>
35 #include <sys/wait.h>
36 #include <getopt.h>
37 #include "eval_expr.h"
38 __FBSDID("$FreeBSD$");
39
40 static int max_pmc_counters = 1;
41 static int run_all = 0;
42
43 #define MAX_COUNTER_SLOTS 1024
44 #define MAX_NLEN 64
45 #define MAX_CPU 64
46 static int verbose = 0;
47
48 extern char **environ;
49 extern struct expression *master_exp;
50 struct expression *master_exp=NULL;
51
52 #define PMC_INITIAL_ALLOC 512
53 extern char **valid_pmcs;
54 char **valid_pmcs = NULL;
55 extern int valid_pmc_cnt;
56 int valid_pmc_cnt=0;
57 extern int pmc_allocated_cnt;
58 int pmc_allocated_cnt=0;
59
60 /*
61  * The following two varients on popen and pclose with
62  * the cavet that they get you the PID so that you
63  * can supply it to pclose so it can send a SIGTERM 
64  *  to the process.
65  */
66 static FILE *
67 my_popen(const char *command, const char *dir, pid_t *p_pid)
68 {
69         FILE *io_out, *io_in;
70         int pdesin[2], pdesout[2];
71         char *argv[4];
72         pid_t pid;
73         char cmd[4];
74         char cmd2[1024];
75         char arg1[4];
76
77         if ((strcmp(dir, "r") != 0) &&
78             (strcmp(dir, "w") != 0)) {
79                 errno = EINVAL;
80                 return(NULL);
81         }
82         if (pipe(pdesin) < 0)
83                 return (NULL);
84
85         if (pipe(pdesout) < 0) {
86                 (void)close(pdesin[0]);
87                 (void)close(pdesin[1]);
88                 return (NULL);
89         }
90         strcpy(cmd, "sh");
91         strcpy(arg1, "-c");
92         strcpy(cmd2, command);
93         argv[0] = cmd;
94         argv[1] = arg1;
95         argv[2] = cmd2;
96         argv[3] = NULL;
97
98         switch (pid = fork()) {
99         case -1:                        /* Error. */
100                 (void)close(pdesin[0]);
101                 (void)close(pdesin[1]);
102                 (void)close(pdesout[0]);
103                 (void)close(pdesout[1]);
104                 return (NULL);
105                 /* NOTREACHED */
106         case 0:                         /* Child. */
107                 /* Close out un-used sides */
108                 (void)close(pdesin[1]);
109                 (void)close(pdesout[0]);
110                 /* Now prepare the stdin of the process */
111                 close(0);
112                 (void)dup(pdesin[0]);
113                 (void)close(pdesin[0]);
114                 /* Now prepare the stdout of the process */
115                 close(1);
116                 (void)dup(pdesout[1]);
117                 /* And lets do stderr just in case */
118                 close(2);
119                 (void)dup(pdesout[1]);
120                 (void)close(pdesout[1]);
121                 /* Now run it */
122                 execve("/bin/sh", argv, environ);
123                 exit(127);
124                 /* NOTREACHED */
125         }
126         /* Parent; assume fdopen can't fail. */
127         /* Store the pid */
128         *p_pid = pid;
129         if (strcmp(dir, "r") != 0) {
130                 io_out = fdopen(pdesin[1], "w");
131                 (void)close(pdesin[0]);
132                 (void)close(pdesout[0]);
133                 (void)close(pdesout[1]);
134                 return(io_out);
135         } else {
136                 /* Prepare the input stream */
137                 io_in = fdopen(pdesout[0], "r");
138                 (void)close(pdesout[1]);
139                 (void)close(pdesin[0]);
140                 (void)close(pdesin[1]);
141                 return (io_in);
142         }
143 }
144
145 /*
146  * pclose --
147  *      Pclose returns -1 if stream is not associated with a `popened' command,
148  *      if already `pclosed', or waitpid returns an error.
149  */
150 static void
151 my_pclose(FILE *io, pid_t the_pid)
152 {
153         int pstat;
154         pid_t pid;
155
156         /*
157          * Find the appropriate file pointer and remove it from the list.
158          */
159         (void)fclose(io);
160         /* Die if you are not dead! */
161         kill(the_pid, SIGTERM);
162         do {
163                 pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
164         } while (pid == -1 && errno == EINTR);
165 }
166
167 struct counters {
168         struct counters *next_cpu;
169         char counter_name[MAX_NLEN];            /* Name of counter */
170         int cpu;                                /* CPU we are on */
171         int pos;                                /* Index we are filling to. */
172         uint64_t vals[MAX_COUNTER_SLOTS];       /* Last 64 entries */
173         uint64_t sum;                           /* Summary of entries */
174 };
175
176 extern struct counters *glob_cpu[MAX_CPU];
177 struct counters *glob_cpu[MAX_CPU];
178
179 extern struct counters *cnts;
180 struct counters *cnts=NULL;
181
182 extern int ncnts;
183 int ncnts=0;
184
185 extern int (*expression)(struct counters *, int);
186 int (*expression)(struct counters *, int);
187
188 static const char *threshold=NULL;
189 static const char *command;
190
191 struct cpu_entry {
192         const char *name;
193         const char *thresh;
194         const char *command;
195         int (*func)(struct counters *, int);
196         int counters_required;
197 };
198
199 struct cpu_type {
200         char cputype[32];
201         int number;
202         struct cpu_entry *ents;
203         void (*explain)(const char *name);
204 };
205 extern struct cpu_type the_cpu;
206 struct cpu_type the_cpu;
207
208 static void
209 explain_name_sb(const char *name)
210 {
211         const char *mythresh;
212         if (strcmp(name, "allocstall1") == 0) {
213                 printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
214                 mythresh = "thresh > .05";
215         } else if (strcmp(name, "allocstall2") == 0) {
216                 printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
217                 mythresh = "thresh > .05";
218         } else if (strcmp(name, "br_miss") == 0) {
219                 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
220                 mythresh = "thresh >= .2";
221         } else if (strcmp(name, "splitload") == 0) {
222                 printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
223                 mythresh = "thresh >= .1";
224         } else if (strcmp(name, "splitstore") == 0) {
225                 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
226                 mythresh = "thresh >= .01";
227         } else if (strcmp(name, "contested") == 0) {
228                 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
229                 mythresh = "thresh >= .05";
230         } else if (strcmp(name, "blockstorefwd") == 0) {
231                 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
232                 mythresh = "thresh >= .05";
233         } else if (strcmp(name, "cache2") == 0) {
234                 printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
235                 printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
236                 printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
237                 printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
238                 mythresh = "thresh >= .2";
239         } else if (strcmp(name, "cache1") == 0) {
240                 printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
241                 mythresh = "thresh >= .2";
242         } else if (strcmp(name, "dtlbmissload") == 0) {
243                 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
244                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
245                 mythresh = "thresh >= .1";
246         } else if (strcmp(name, "frontendstall") == 0) {
247                 printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
248                 mythresh = "thresh >= .15";
249         } else if (strcmp(name, "clears") == 0) {
250                 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
251                 printf("          MACHINE_CLEARS.SMC + \n");
252                 printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
253                 mythresh = "thresh >= .02";
254         } else if (strcmp(name, "microassist") == 0) {
255                 printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
256                 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
257                 mythresh = "thresh >= .05";
258         } else if (strcmp(name, "aliasing_4k") == 0) {
259                 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
260                 mythresh = "thresh >= .1";
261         } else if (strcmp(name, "fpassist") == 0) {
262                 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
263                 mythresh = "look for a excessive value";
264         } else if (strcmp(name, "otherassistavx") == 0) {
265                 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
266                 mythresh = "look for a excessive value";
267         } else if (strcmp(name, "otherassistsse") == 0) {
268                 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
269                 mythresh = "look for a excessive value";
270         } else if (strcmp(name, "eff1") == 0) {
271                 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
272                 mythresh = "thresh < .9";
273         } else if (strcmp(name, "eff2") == 0) {
274                 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
275                 mythresh = "thresh > 1.0";
276         } else if (strcmp(name, "dtlbmissstore") == 0) {
277                 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
278                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
279                 mythresh = "thresh >= .05";
280         } else {
281                 printf("Unknown name:%s\n", name);
282                 mythresh = "unknown entry";
283         }
284         printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
285 }
286
287 static void
288 explain_name_ib(const char *name)
289 {
290         const char *mythresh;
291         if (strcmp(name, "br_miss") == 0) {
292                 printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
293                 printf("         MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
294                 printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
295                 mythresh = "thresh >= .2";
296         } else if (strcmp(name, "eff1") == 0) {
297                 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
298                 mythresh = "thresh < .9";
299         } else if (strcmp(name, "eff2") == 0) {
300                 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
301                 mythresh = "thresh > 1.0";
302         } else if (strcmp(name, "cache1") == 0) {
303                 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
304                 mythresh = "thresh >= .2";
305         } else if (strcmp(name, "cache2") == 0) {
306                 printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
307                 mythresh = "thresh >= .2";
308         } else if (strcmp(name, "itlbmiss") == 0) {
309                 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
310                 mythresh = "thresh > .05"; 
311         } else if (strcmp(name, "icachemiss") == 0) {
312                 printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
313                 mythresh = "thresh > .05";
314         } else if (strcmp(name, "lcpstall") == 0) {
315                 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
316                 mythresh = "thresh > .05";
317         } else if (strcmp(name, "datashare") == 0) {
318                 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
319                 mythresh = "thresh > .05";
320         } else if (strcmp(name, "blockstorefwd") == 0) {
321                 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
322                 mythresh = "thresh >= .05";
323         } else if (strcmp(name, "splitload") == 0) {
324                 printf("Examine  ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
325                 printf("         LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
326                 mythresh = "thresh >= .1";
327         } else if (strcmp(name, "splitstore") == 0) {
328                 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
329                 mythresh = "thresh >= .01";
330         } else if (strcmp(name, "aliasing_4k") == 0) {
331                 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
332                 mythresh = "thresh >= .1";
333         } else if (strcmp(name, "dtlbmissload") == 0) {
334                 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
335                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
336                 mythresh = "thresh >= .1";
337         } else if (strcmp(name, "dtlbmissstore") == 0) {
338                 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
339                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
340                 mythresh = "thresh >= .05";
341         } else if (strcmp(name, "contested") == 0) {
342                 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
343                 mythresh = "thresh >= .05";
344         } else if (strcmp(name, "clears") == 0) {
345                 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
346                 printf("          MACHINE_CLEARS.SMC + \n");
347                 printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
348                 mythresh = "thresh >= .02";
349         } else if (strcmp(name, "microassist") == 0) {
350                 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
351                 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
352                 mythresh = "thresh >= .05";
353         } else if (strcmp(name, "fpassist") == 0) {
354                 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
355                 mythresh = "look for a excessive value";
356         } else if (strcmp(name, "otherassistavx") == 0) {
357                 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
358                 mythresh = "look for a excessive value";
359         } else if (strcmp(name, "otherassistsse") == 0) {
360                 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
361                 mythresh = "look for a excessive value";
362         } else {
363                 printf("Unknown name:%s\n", name);
364                 mythresh = "unknown entry";
365         }
366         printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
367 }
368
369
370 static void
371 explain_name_has(const char *name)
372 {
373         const char *mythresh;
374         if (strcmp(name, "eff1") == 0) {
375                 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
376                 mythresh = "thresh < .75";
377         } else if (strcmp(name, "eff2") == 0) {
378                 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
379                 mythresh = "thresh > 1.0";
380         } else if (strcmp(name, "itlbmiss") == 0) {
381                 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
382                 mythresh = "thresh > .05"; 
383         } else if (strcmp(name, "icachemiss") == 0) {
384                 printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
385                 mythresh = "thresh > .05";
386         } else if (strcmp(name, "lcpstall") == 0) {
387                 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
388                 mythresh = "thresh > .05";
389         } else if (strcmp(name, "cache1") == 0) {
390                 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
391                 mythresh = "thresh >= .2";
392         } else if (strcmp(name, "cache2") == 0) {
393                 printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
394                 printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
395                 printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
396                 printf("          / CPU_CLK_UNHALTED.THREAD_P\n");
397                 mythresh = "thresh >= .2";
398         } else if (strcmp(name, "contested") == 0) {
399                 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
400                 mythresh = "thresh >= .05";
401         } else if (strcmp(name, "datashare") == 0) {
402                 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
403                 mythresh = "thresh > .05";
404         } else if (strcmp(name, "blockstorefwd") == 0) {
405                 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
406                 mythresh = "thresh >= .05";
407         } else if (strcmp(name, "splitload") == 0) {
408                 printf("Examine  (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
409                 mythresh = "thresh >= .1";
410         } else if (strcmp(name, "splitstore") == 0) {
411                 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
412                 mythresh = "thresh >= .01";
413         } else if (strcmp(name, "aliasing_4k") == 0) {
414                 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
415                 mythresh = "thresh >= .1";
416         } else if (strcmp(name, "dtlbmissload") == 0) {
417                 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
418                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
419                 mythresh = "thresh >= .1";
420         } else if (strcmp(name, "br_miss") == 0) {
421                 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
422                 mythresh = "thresh >= .2";
423         } else if (strcmp(name, "clears") == 0) {
424                 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
425                 printf("          MACHINE_CLEARS.SMC + \n");
426                 printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
427                 mythresh = "thresh >= .02";
428         } else if (strcmp(name, "microassist") == 0) {
429                 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
430                 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
431                 mythresh = "thresh >= .05";
432         } else if (strcmp(name, "fpassist") == 0) {
433                 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
434                 mythresh = "look for a excessive value";
435         } else if (strcmp(name, "otherassistavx") == 0) {
436                 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
437                 mythresh = "look for a excessive value";
438         } else if (strcmp(name, "otherassistsse") == 0) {
439                 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
440                 mythresh = "look for a excessive value";
441         } else {
442                 printf("Unknown name:%s\n", name);
443                 mythresh = "unknown entry";
444         }
445         printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
446 }
447
448
449
450 static struct counters *
451 find_counter(struct counters *base, const char *name)
452 {
453         struct counters *at;
454         int len;
455
456         at = base;
457         len = strlen(name);
458         while(at) {
459                 if (strncmp(at->counter_name, name, len) == 0) {
460                         return(at);
461                 }
462                 at = at->next_cpu;
463         }
464         printf("Can't find counter %s\n", name);
465         printf("We have:\n");
466         at = base;
467         while(at) {
468                 printf("- %s\n", at->counter_name);
469                 at = at->next_cpu;
470         }
471         exit(-1);
472 }
473
474 static int
475 allocstall1(struct counters *cpu, int pos)
476 {
477 /*  1  - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
478         int ret;
479         struct counters *partial;
480         struct counters *unhalt;
481         double un, par, res;
482         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
483         partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
484         if (pos != -1) {
485                 par = partial->vals[pos] * 1.0;
486                 un = unhalt->vals[pos] * 1.0;
487         } else {
488                 par = partial->sum * 1.0;
489                 un = unhalt->sum * 1.0;
490         }
491         res = par/un;
492         ret = printf("%1.3f", res);
493         return(ret);
494 }
495
496 static int
497 allocstall2(struct counters *cpu, int pos)
498 {
499 /*  2  - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
500         int ret;
501         struct counters *partial;
502         struct counters *unhalt;
503         double un, par, res;
504         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
505         partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
506         if (pos != -1) {
507                 par = partial->vals[pos] * 1.0;
508                 un = unhalt->vals[pos] * 1.0;
509         } else {
510                 par = partial->sum * 1.0;
511                 un = unhalt->sum * 1.0;
512         }
513         res = par/un;
514         ret = printf("%1.3f", res);
515         return(ret);
516 }
517
518 static int
519 br_mispredict(struct counters *cpu, int pos)
520 {
521         struct counters *brctr;
522         struct counters *unhalt;
523         int ret;
524 /*  3  - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
525         double br, un, con, res;
526         con = 20.0;
527         
528         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
529         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
530         if (pos != -1) {
531                 br = brctr->vals[pos] * 1.0;
532                 un = unhalt->vals[pos] * 1.0;
533         } else {
534                 br = brctr->sum * 1.0;
535                 un = unhalt->sum * 1.0;
536         }
537         res = (con * br)/un;
538         ret = printf("%1.3f", res);
539         return(ret);
540 }
541
542 static int
543 br_mispredictib(struct counters *cpu, int pos)
544 {
545         struct counters *brctr;
546         struct counters *unhalt;
547         struct counters *clear, *clear2, *clear3;
548         struct counters *uops;
549         struct counters *recv;  
550         struct counters *iss;
551 /*        "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
552         int ret;
553         /*  
554          * (BR_MISP_RETIRED.ALL_BRANCHES / 
555          *         (BR_MISP_RETIRED.ALL_BRANCHES +
556          *          MACHINE_CLEAR.COUNT) * 
557          *         ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
558          *
559          */
560         double br, cl, cl2, cl3, uo, re, un, con, res, is;
561         con = 4.0;
562         
563         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
564         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
565         clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
566         clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
567         clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
568         uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
569         iss = find_counter(cpu, "UOPS_ISSUED.ANY");
570         recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
571         if (pos != -1) {
572                 br = brctr->vals[pos] * 1.0;
573                 cl = clear->vals[pos] * 1.0;
574                 cl2 = clear2->vals[pos] * 1.0;
575                 cl3 = clear3->vals[pos] * 1.0;
576                 uo = uops->vals[pos] * 1.0;
577                 re = recv->vals[pos] * 1.0;
578                 is = iss->vals[pos] * 1.0;
579                 un = unhalt->vals[pos] * 1.0;
580         } else {
581                 br = brctr->sum * 1.0;
582                 cl = clear->sum * 1.0;
583                 cl2 = clear2->sum * 1.0;
584                 cl3 = clear3->sum * 1.0;
585                 uo = uops->sum * 1.0;
586                 re = recv->sum * 1.0;
587                 is = iss->sum * 1.0;
588                 un = unhalt->sum * 1.0;
589         }
590         res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
591         ret = printf("%1.3f", res);
592         return(ret);
593 }
594
595
596 static int
597 br_mispredict_broad(struct counters *cpu, int pos)
598 {
599         struct counters *brctr;
600         struct counters *unhalt;
601         struct counters *clear;
602         struct counters *uops;
603         struct counters *uops_ret;
604         struct counters *recv;
605         int ret;
606         double br, cl, uo, uo_r, re, con, un, res;
607
608         con = 4.0;
609         
610         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
611         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
612         clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
613         uops = find_counter(cpu, "UOPS_ISSUED.ANY");
614         uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
615         recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
616
617         if (pos != -1) {
618                 un = unhalt->vals[pos] * 1.0;
619                 br = brctr->vals[pos] * 1.0;
620                 cl = clear->vals[pos] * 1.0;
621                 uo = uops->vals[pos] * 1.0;
622                 uo_r = uops_ret->vals[pos] * 1.0;
623                 re = recv->vals[pos] * 1.0;
624         } else {
625                 un = unhalt->sum * 1.0;
626                 br = brctr->sum * 1.0;
627                 cl = clear->sum * 1.0;
628                 uo = uops->sum * 1.0;
629                 uo_r = uops_ret->sum * 1.0;
630                 re = recv->sum * 1.0;
631         }
632         res = br / (br + cl) * (uo - uo_r + con * re) / (un * con);
633         ret = printf("%1.3f", res);
634         return(ret);
635 }
636
637 static int
638 splitloadib(struct counters *cpu, int pos)
639 {
640         int ret;
641         struct counters *mem;
642         struct counters *l1d, *ldblock;
643         struct counters *unhalt;
644         double un, memd, res, l1, ldb;
645         /*  
646          * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
647          * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
648          */
649
650         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
651         mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
652         l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
653         ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
654         if (pos != -1) {
655                 memd = mem->vals[pos] * 1.0;
656                 l1 = l1d->vals[pos] * 1.0;
657                 ldb = ldblock->vals[pos] * 1.0;
658                 un = unhalt->vals[pos] * 1.0;
659         } else {
660                 memd = mem->sum * 1.0;
661                 l1 = l1d->sum * 1.0;
662                 ldb = ldblock->sum * 1.0;
663                 un = unhalt->sum * 1.0;
664         }
665         res = ((l1 / memd) * ldb)/un;
666         ret = printf("%1.3f", res);
667         return(ret);
668 }
669
670
671 static int
672 splitload(struct counters *cpu, int pos)
673 {
674         int ret;
675         struct counters *mem;
676         struct counters *unhalt;
677         double con, un, memd, res;
678 /*  4  - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
679
680         con = 5.0;
681         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
682         mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS");
683         if (pos != -1) {
684                 memd = mem->vals[pos] * 1.0;
685                 un = unhalt->vals[pos] * 1.0;
686         } else {
687                 memd = mem->sum * 1.0;
688                 un = unhalt->sum * 1.0;
689         }
690         res = (memd * con)/un;
691         ret = printf("%1.3f", res);
692         return(ret);
693 }
694
695
696 static int
697 splitload_sb(struct counters *cpu, int pos)
698 {
699         int ret;
700         struct counters *mem;
701         struct counters *unhalt;
702         double con, un, memd, res;
703 /*  4  - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
704
705         con = 5.0;
706         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
707         mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
708         if (pos != -1) {
709                 memd = mem->vals[pos] * 1.0;
710                 un = unhalt->vals[pos] * 1.0;
711         } else {
712                 memd = mem->sum * 1.0;
713                 un = unhalt->sum * 1.0;
714         }
715         res = (memd * con)/un;
716         ret = printf("%1.3f", res);
717         return(ret);
718 }
719
720
721 static int
722 splitstore_sb(struct counters *cpu, int pos)
723 {
724         /*  5  - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
725         int ret;
726         struct counters *mem_split;
727         struct counters *mem_stores;
728         double memsplit, memstore, res;
729         mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
730         mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
731         if (pos != -1) {
732                 memsplit = mem_split->vals[pos] * 1.0;
733                 memstore = mem_stores->vals[pos] * 1.0;
734         } else {
735                 memsplit = mem_split->sum * 1.0;
736                 memstore = mem_stores->sum * 1.0;
737         }
738         res = memsplit/memstore;
739         ret = printf("%1.3f", res);
740         return(ret);
741 }
742
743
744
745 static int
746 splitstore(struct counters *cpu, int pos)
747 {
748         /*  5  - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */
749         int ret;
750         struct counters *mem_split;
751         struct counters *mem_stores;
752         double memsplit, memstore, res;
753         mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES");
754         mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES");
755         if (pos != -1) {
756                 memsplit = mem_split->vals[pos] * 1.0;
757                 memstore = mem_stores->vals[pos] * 1.0;
758         } else {
759                 memsplit = mem_split->sum * 1.0;
760                 memstore = mem_stores->sum * 1.0;
761         }
762         res = memsplit/memstore;
763         ret = printf("%1.3f", res);
764         return(ret);
765 }
766
767
768 static int
769 contested(struct counters *cpu, int pos)
770 {
771         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
772         int ret;
773         struct counters *mem;
774         struct counters *unhalt;
775         double con, un, memd, res;
776
777         con = 60.0;
778         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
779         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
780         if (pos != -1) {
781                 memd = mem->vals[pos] * 1.0;
782                 un = unhalt->vals[pos] * 1.0;
783         } else {
784                 memd = mem->sum * 1.0;
785                 un = unhalt->sum * 1.0;
786         }
787         res = (memd * con)/un;
788         ret = printf("%1.3f", res);
789         return(ret);
790 }
791
792 static int
793 contested_has(struct counters *cpu, int pos)
794 {
795         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
796         int ret;
797         struct counters *mem;
798         struct counters *unhalt;
799         double con, un, memd, res;
800
801         con = 84.0;
802         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
803         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
804         if (pos != -1) {
805                 memd = mem->vals[pos] * 1.0;
806                 un = unhalt->vals[pos] * 1.0;
807         } else {
808                 memd = mem->sum * 1.0;
809                 un = unhalt->sum * 1.0;
810         }
811         res = (memd * con)/un;
812         ret = printf("%1.3f", res);
813         return(ret);
814 }
815
816 static int
817 contestedbroad(struct counters *cpu, int pos)
818 {
819         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
820         int ret;
821         struct counters *mem;
822         struct counters *mem2;
823         struct counters *unhalt;
824         double con, un, memd, memtoo, res;
825
826         con = 84.0;
827         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
828         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
829         mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS");
830
831         if (pos != -1) {
832                 memd = mem->vals[pos] * 1.0;
833                 memtoo = mem2->vals[pos] * 1.0;
834                 un = unhalt->vals[pos] * 1.0;
835         } else {
836                 memd = mem->sum * 1.0;
837                 memtoo = mem2->sum * 1.0;
838                 un = unhalt->sum * 1.0;
839         }
840         res = ((memd * con) + memtoo)/un;
841         ret = printf("%1.3f", res);
842         return(ret);
843 }
844
845
846 static int
847 blockstoreforward(struct counters *cpu, int pos)
848 {
849         /*  7  - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
850         int ret;
851         struct counters *ldb;
852         struct counters *unhalt;
853         double con, un, ld, res;
854
855         con = 13.0;
856         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
857         ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
858         if (pos != -1) {
859                 ld = ldb->vals[pos] * 1.0;
860                 un = unhalt->vals[pos] * 1.0;
861         } else {
862                 ld = ldb->sum * 1.0;
863                 un = unhalt->sum * 1.0;
864         }
865         res = (ld * con)/un;
866         ret = printf("%1.3f", res);
867         return(ret);
868 }
869
870 static int
871 cache2(struct counters *cpu, int pos)
872 {
873         /* ** Suspect ***
874          *  8  - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
875          *        (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
876          */
877         int ret;
878         struct counters *mem1, *mem2, *mem3;
879         struct counters *unhalt;
880         double con1, con2, con3, un, me_1, me_2, me_3, res;
881
882         con1 = 26.0;
883         con2 = 43.0;
884         con3 = 60.0;
885         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
886 /* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
887         mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
888         mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
889         mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
890         if (pos != -1) {
891                 me_1 = mem1->vals[pos] * 1.0;
892                 me_2 = mem2->vals[pos] * 1.0;
893                 me_3 = mem3->vals[pos] * 1.0;
894                 un = unhalt->vals[pos] * 1.0;
895         } else {
896                 me_1 = mem1->sum * 1.0;
897                 me_2 = mem2->sum * 1.0;
898                 me_3 = mem3->sum * 1.0;
899                 un = unhalt->sum * 1.0;
900         }
901         res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
902         ret = printf("%1.3f", res);
903         return(ret);
904 }
905
906 static int
907 datasharing(struct counters *cpu, int pos)
908 {
909         /* 
910          * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
911          */
912         int ret;
913         struct counters *mem;
914         struct counters *unhalt;
915         double con, res, me, un;
916
917         con = 43.0;
918         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
919         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
920         if (pos != -1) {
921                 me = mem->vals[pos] * 1.0;
922                 un = unhalt->vals[pos] * 1.0;
923         } else {
924                 me = mem->sum * 1.0;
925                 un = unhalt->sum * 1.0;
926         }
927         res = (me * con)/un;
928         ret = printf("%1.3f", res);
929         return(ret);
930
931 }
932
933
934 static int
935 datasharing_has(struct counters *cpu, int pos)
936 {
937         /* 
938          * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
939          */
940         int ret;
941         struct counters *mem;
942         struct counters *unhalt;
943         double con, res, me, un;
944
945         con = 72.0;
946         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
947         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
948         if (pos != -1) {
949                 me = mem->vals[pos] * 1.0;
950                 un = unhalt->vals[pos] * 1.0;
951         } else {
952                 me = mem->sum * 1.0;
953                 un = unhalt->sum * 1.0;
954         }
955         res = (me * con)/un;
956         ret = printf("%1.3f", res);
957         return(ret);
958
959 }
960
961
962 static int
963 cache2ib(struct counters *cpu, int pos)
964 {
965         /*
966          *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
967          */
968         int ret;
969         struct counters *mem;
970         struct counters *unhalt;
971         double con, un, me, res;
972
973         con = 29.0;
974         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
975         mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
976         if (pos != -1) {
977                 me = mem->vals[pos] * 1.0;
978                 un = unhalt->vals[pos] * 1.0;
979         } else {
980                 me = mem->sum * 1.0;
981                 un = unhalt->sum * 1.0;
982         }
983         res = (con * me)/un; 
984         ret = printf("%1.3f", res);
985         return(ret);
986 }
987
988 static int
989 cache2has(struct counters *cpu, int pos)
990 {
991         /*
992          * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
993          *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
994          *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
995          *           / CPU_CLK_UNHALTED.THREAD_P
996          */
997         int ret;
998         struct counters *mem1, *mem2, *mem3;
999         struct counters *unhalt;
1000         double con1, con2, con3, un, me1, me2, me3, res;
1001
1002         con1 = 36.0;
1003         con2 = 72.0;
1004         con3 = 84.0;
1005         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1006         mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
1007         mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
1008         mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
1009         if (pos != -1) {
1010                 me1 = mem1->vals[pos] * 1.0;
1011                 me2 = mem2->vals[pos] * 1.0;
1012                 me3 = mem3->vals[pos] * 1.0;
1013                 un = unhalt->vals[pos] * 1.0;
1014         } else {
1015                 me1 = mem1->sum * 1.0;
1016                 me2 = mem2->sum * 1.0;
1017                 me3 = mem3->sum * 1.0;
1018                 un = unhalt->sum * 1.0;
1019         }
1020         res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
1021         ret = printf("%1.3f", res);
1022         return(ret);
1023 }
1024
1025
1026 static int
1027 cache2broad(struct counters *cpu, int pos)
1028 {
1029         /*
1030          *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
1031          */
1032         int ret;
1033         struct counters *mem;
1034         struct counters *unhalt;
1035         double con, un, me, res;
1036
1037         con = 36.0;
1038         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1039         mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT");
1040         if (pos != -1) {
1041                 me = mem->vals[pos] * 1.0;
1042                 un = unhalt->vals[pos] * 1.0;
1043         } else {
1044                 me = mem->sum * 1.0;
1045                 un = unhalt->sum * 1.0;
1046         }
1047         res = (con * me)/un; 
1048         ret = printf("%1.3f", res);
1049         return(ret);
1050 }
1051
1052
1053 static int
1054 cache1(struct counters *cpu, int pos)
1055 {
1056         /*  9  - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1057         int ret;
1058         struct counters *mem;
1059         struct counters *unhalt;
1060         double con, un, me, res;
1061
1062         con = 180.0;
1063         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1064         mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
1065         if (pos != -1) {
1066                 me = mem->vals[pos] * 1.0;
1067                 un = unhalt->vals[pos] * 1.0;
1068         } else {
1069                 me = mem->sum * 1.0;
1070                 un = unhalt->sum * 1.0;
1071         }
1072         res = (me * con)/un;
1073         ret = printf("%1.3f", res);
1074         return(ret);
1075 }
1076
1077 static int
1078 cache1ib(struct counters *cpu, int pos)
1079 {
1080         /*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1081         int ret;
1082         struct counters *mem;
1083         struct counters *unhalt;
1084         double con, un, me, res;
1085
1086         con = 180.0;
1087         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1088         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
1089         if (pos != -1) {
1090                 me = mem->vals[pos] * 1.0;
1091                 un = unhalt->vals[pos] * 1.0;
1092         } else {
1093                 me = mem->sum * 1.0;
1094                 un = unhalt->sum * 1.0;
1095         }
1096         res = (me * con)/un;
1097         ret = printf("%1.3f", res);
1098         return(ret);
1099 }
1100
1101
1102 static int
1103 cache1broad(struct counters *cpu, int pos)
1104 {
1105         /*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1106         int ret;
1107         struct counters *mem;
1108         struct counters *unhalt;
1109         double con, un, me, res;
1110
1111         con = 180.0;
1112         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1113         mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS");
1114         if (pos != -1) {
1115                 me = mem->vals[pos] * 1.0;
1116                 un = unhalt->vals[pos] * 1.0;
1117         } else {
1118                 me = mem->sum * 1.0;
1119                 un = unhalt->sum * 1.0;
1120         }
1121         res = (me * con)/un;
1122         ret = printf("%1.3f", res);
1123         return(ret);
1124 }
1125
1126
1127 static int
1128 dtlb_missload(struct counters *cpu, int pos)
1129 {
1130         /* 10  - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
1131         int ret;
1132         struct counters *dtlb_m, *dtlb_d;
1133         struct counters *unhalt;
1134         double con, un, d1, d2, res;
1135
1136         con = 7.0;
1137         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1138         dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
1139         dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
1140         if (pos != -1) {
1141                 d1 = dtlb_m->vals[pos] * 1.0;
1142                 d2 = dtlb_d->vals[pos] * 1.0;
1143                 un = unhalt->vals[pos] * 1.0;
1144         } else {
1145                 d1 = dtlb_m->sum * 1.0;
1146                 d2 = dtlb_d->sum * 1.0;
1147                 un = unhalt->sum * 1.0;
1148         }
1149         res = ((d1 * con) + d2)/un;
1150         ret = printf("%1.3f", res);
1151         return(ret);
1152 }
1153
1154 static int
1155 dtlb_missstore(struct counters *cpu, int pos)
1156 {
1157         /* 
1158          * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) / 
1159          * CPU_CLK_UNHALTED.THREAD_P (t >= .1) 
1160          */
1161         int ret;
1162         struct counters *dtsb_m, *dtsb_d;
1163         struct counters *unhalt;
1164         double con, un, d1, d2, res;
1165
1166         con = 7.0;
1167         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1168         dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
1169         dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
1170         if (pos != -1) {
1171                 d1 = dtsb_m->vals[pos] * 1.0;
1172                 d2 = dtsb_d->vals[pos] * 1.0;
1173                 un = unhalt->vals[pos] * 1.0;
1174         } else {
1175                 d1 = dtsb_m->sum * 1.0;
1176                 d2 = dtsb_d->sum * 1.0;
1177                 un = unhalt->sum * 1.0;
1178         }
1179         res = ((d1 * con) + d2)/un;
1180         ret = printf("%1.3f", res);
1181         return(ret);
1182 }
1183
1184 static int
1185 itlb_miss(struct counters *cpu, int pos)
1186 {
1187         /* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P  IB */
1188         int ret;
1189         struct counters *itlb;
1190         struct counters *unhalt;
1191         double un, d1, res;
1192
1193         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1194         itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1195         if (pos != -1) {
1196                 d1 = itlb->vals[pos] * 1.0;
1197                 un = unhalt->vals[pos] * 1.0;
1198         } else {
1199                 d1 = itlb->sum * 1.0;
1200                 un = unhalt->sum * 1.0;
1201         }
1202         res = d1/un;
1203         ret = printf("%1.3f", res);
1204         return(ret);
1205 }
1206
1207
1208 static int
1209 itlb_miss_broad(struct counters *cpu, int pos)
1210 {
1211         /* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P   */
1212         int ret;
1213         struct counters *itlb;
1214         struct counters *unhalt;
1215         struct counters *four_k;
1216         double un, d1, res, k;
1217
1218         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1219         itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1220         four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K");
1221         if (pos != -1) {
1222                 d1 = itlb->vals[pos] * 1.0;
1223                 un = unhalt->vals[pos] * 1.0;
1224                 k = four_k->vals[pos] * 1.0;
1225         } else {
1226                 d1 = itlb->sum * 1.0;
1227                 un = unhalt->sum * 1.0;
1228                 k = four_k->sum * 1.0;
1229         }
1230         res = (7.0 * k + d1)/un;
1231         ret = printf("%1.3f", res);
1232         return(ret);
1233 }
1234
1235
1236 static int
1237 icache_miss(struct counters *cpu, int pos)
1238 {
1239         /* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
1240
1241         int ret;
1242         struct counters *itlb, *icache;
1243         struct counters *unhalt;
1244         double un, d1, ic, res;
1245
1246         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1247         itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1248         icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
1249         if (pos != -1) {
1250                 d1 = itlb->vals[pos] * 1.0;
1251                 ic = icache->vals[pos] * 1.0;
1252                 un = unhalt->vals[pos] * 1.0;
1253         } else {
1254                 d1 = itlb->sum * 1.0;
1255                 ic = icache->sum * 1.0;
1256                 un = unhalt->sum * 1.0;
1257         }
1258         res = (ic-d1)/un;
1259         ret = printf("%1.3f", res);
1260         return(ret);
1261
1262 }
1263
1264 static int
1265 icache_miss_has(struct counters *cpu, int pos)
1266 {
1267         /* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */
1268
1269         int ret;
1270         struct counters *icache;
1271         struct counters *unhalt;
1272         double un, con, ic, res;
1273
1274         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1275         icache = find_counter(cpu, "ICACHE.MISSES");
1276         con = 36.0;
1277         if (pos != -1) {
1278                 ic = icache->vals[pos] * 1.0;
1279                 un = unhalt->vals[pos] * 1.0;
1280         } else {
1281                 ic = icache->sum * 1.0;
1282                 un = unhalt->sum * 1.0;
1283         }
1284         res = (con * ic)/un;
1285         ret = printf("%1.3f", res);
1286         return(ret);
1287
1288 }
1289
1290 static int
1291 lcp_stall(struct counters *cpu, int pos)
1292 {
1293          /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
1294         int ret;
1295         struct counters *ild;
1296         struct counters *unhalt;
1297         double un, d1, res;
1298
1299         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1300         ild = find_counter(cpu, "ILD_STALL.LCP");
1301         if (pos != -1) {
1302                 d1 = ild->vals[pos] * 1.0;
1303                 un = unhalt->vals[pos] * 1.0;
1304         } else {
1305                 d1 = ild->sum * 1.0;
1306                 un = unhalt->sum * 1.0;
1307         }
1308         res = d1/un;
1309         ret = printf("%1.3f", res);
1310         return(ret);
1311
1312 }
1313
1314
1315 static int
1316 frontendstall(struct counters *cpu, int pos)
1317 {
1318       /* 12  -  IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
1319         int ret;
1320         struct counters *idq;
1321         struct counters *unhalt;
1322         double con, un, id, res;
1323
1324         con = 4.0;
1325         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1326         idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
1327         if (pos != -1) {
1328                 id = idq->vals[pos] * 1.0;
1329                 un = unhalt->vals[pos] * 1.0;
1330         } else {
1331                 id = idq->sum * 1.0;
1332                 un = unhalt->sum * 1.0;
1333         }
1334         res = id/(un * con);
1335         ret = printf("%1.3f", res);
1336         return(ret);
1337 }
1338
1339 static int
1340 clears(struct counters *cpu, int pos)
1341 {
1342         /* 13  - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )  
1343          *         / CPU_CLK_UNHALTED.THREAD_P (thresh  >= .02)*/
1344         
1345         int ret;
1346         struct counters *clr1, *clr2, *clr3;
1347         struct counters *unhalt;
1348         double con, un, cl1, cl2, cl3, res;
1349
1350         con = 100.0;
1351         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1352         clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1353         clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1354         clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1355         
1356         if (pos != -1) {
1357                 cl1 = clr1->vals[pos] * 1.0;
1358                 cl2 = clr2->vals[pos] * 1.0;
1359                 cl3 = clr3->vals[pos] * 1.0;
1360                 un = unhalt->vals[pos] * 1.0;
1361         } else {
1362                 cl1 = clr1->sum * 1.0;
1363                 cl2 = clr2->sum * 1.0;
1364                 cl3 = clr3->sum * 1.0;
1365                 un = unhalt->sum * 1.0;
1366         }
1367         res = ((cl1 + cl2 + cl3) * con)/un;
1368         ret = printf("%1.3f", res);
1369         return(ret);
1370 }
1371
1372
1373
1374 static int
1375 clears_broad(struct counters *cpu, int pos)
1376 {
1377         int ret;
1378         struct counters *clr1, *clr2, *clr3, *cyc;
1379         struct counters *unhalt;
1380         double con, un, cl1, cl2, cl3, cy, res;
1381
1382         con = 100.0;
1383         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1384         clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1385         clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1386         clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1387         cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
1388         if (pos != -1) {
1389                 cl1 = clr1->vals[pos] * 1.0;
1390                 cl2 = clr2->vals[pos] * 1.0;
1391                 cl3 = clr3->vals[pos] * 1.0;
1392                 cy = cyc->vals[pos] * 1.0;
1393                 un = unhalt->vals[pos] * 1.0;
1394         } else {
1395                 cl1 = clr1->sum * 1.0;
1396                 cl2 = clr2->sum * 1.0;
1397                 cl3 = clr3->sum * 1.0;
1398                 cy = cyc->sum * 1.0;
1399                 un = unhalt->sum * 1.0;
1400         }
1401         /* Formula not listed but extrapulated to add the cy ?? */
1402         res = ((cl1 + cl2 + cl3 + cy) * con)/un;
1403         ret = printf("%1.3f", res);
1404         return(ret);
1405 }
1406
1407
1408
1409
1410
1411 static int
1412 microassist(struct counters *cpu, int pos)
1413 {
1414         /* 14  - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
1415         int ret;
1416         struct counters *idq;
1417         struct counters *unhalt;
1418         double un, id, res, con;
1419
1420         con = 4.0;
1421         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1422         idq = find_counter(cpu, "IDQ.MS_UOPS");
1423         if (pos != -1) {
1424                 id = idq->vals[pos] * 1.0;
1425                 un = unhalt->vals[pos] * 1.0;
1426         } else {
1427                 id = idq->sum * 1.0;
1428                 un = unhalt->sum * 1.0;
1429         }
1430         res = id/(un * con);
1431         ret = printf("%1.3f", res);
1432         return(ret);
1433 }
1434
1435
1436 static int
1437 microassist_broad(struct counters *cpu, int pos)
1438 {
1439         int ret;
1440         struct counters *idq;
1441         struct counters *unhalt;
1442         struct counters *uopiss;
1443         struct counters *uopret;
1444         double un, id, res, con, uoi, uor;
1445
1446         con = 4.0;
1447         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1448         idq = find_counter(cpu, "IDQ.MS_UOPS");
1449         uopiss = find_counter(cpu, "UOPS_ISSUED.ANY");
1450         uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1451         if (pos != -1) {
1452                 id = idq->vals[pos] * 1.0;
1453                 un = unhalt->vals[pos] * 1.0;
1454                 uoi = uopiss->vals[pos] * 1.0;
1455                 uor = uopret->vals[pos] * 1.0;
1456         } else {
1457                 id = idq->sum * 1.0;
1458                 un = unhalt->sum * 1.0;
1459                 uoi = uopiss->sum * 1.0;
1460                 uor = uopret->sum * 1.0;
1461         }
1462         res = (uor/uoi) * (id/(un * con));
1463         ret = printf("%1.3f", res);
1464         return(ret);
1465 }
1466
1467
1468 static int
1469 aliasing(struct counters *cpu, int pos)
1470 {
1471         /* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1472         int ret;        
1473         struct counters *ld;
1474         struct counters *unhalt;
1475         double un, lds, con, res;
1476
1477         con = 5.0;
1478         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1479         ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1480         if (pos != -1) {
1481                 lds = ld->vals[pos] * 1.0;
1482                 un = unhalt->vals[pos] * 1.0;
1483         } else {
1484                 lds = ld->sum * 1.0;
1485                 un = unhalt->sum * 1.0;
1486         }
1487         res = (lds * con)/un;
1488         ret = printf("%1.3f", res);
1489         return(ret);
1490 }
1491
1492 static int
1493 aliasing_broad(struct counters *cpu, int pos)
1494 {
1495         /* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1496         int ret;        
1497         struct counters *ld;
1498         struct counters *unhalt;
1499         double un, lds, con, res;
1500
1501         con = 7.0;
1502         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1503         ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1504         if (pos != -1) {
1505                 lds = ld->vals[pos] * 1.0;
1506                 un = unhalt->vals[pos] * 1.0;
1507         } else {
1508                 lds = ld->sum * 1.0;
1509                 un = unhalt->sum * 1.0;
1510         }
1511         res = (lds * con)/un;
1512         ret = printf("%1.3f", res);
1513         return(ret);
1514 }
1515
1516
1517 static int
1518 fpassists(struct counters *cpu, int pos)
1519 {
1520         /* 16  - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
1521         int ret;        
1522         struct counters *fp;
1523         struct counters *inst;
1524         double un, fpd, res;
1525
1526         inst = find_counter(cpu, "INST_RETIRED.ANY_P");
1527         fp = find_counter(cpu, "FP_ASSIST.ANY");
1528         if (pos != -1) {
1529                 fpd = fp->vals[pos] * 1.0;
1530                 un = inst->vals[pos] * 1.0;
1531         } else {
1532                 fpd = fp->sum * 1.0;
1533                 un = inst->sum * 1.0;
1534         }
1535         res = fpd/un;
1536         ret = printf("%1.3f", res);
1537         return(ret);
1538 }
1539
1540 static int
1541 otherassistavx(struct counters *cpu, int pos)
1542 {
1543         /* 17  - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh  .1*/
1544         int ret;        
1545         struct counters *oth;
1546         struct counters *unhalt;
1547         double un, ot, con, res;
1548
1549         con = 75.0;
1550         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1551         oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
1552         if (pos != -1) {
1553                 ot = oth->vals[pos] * 1.0;
1554                 un = unhalt->vals[pos] * 1.0;
1555         } else {
1556                 ot = oth->sum * 1.0;
1557                 un = unhalt->sum * 1.0;
1558         }
1559         res = (ot * con)/un;
1560         ret = printf("%1.3f", res);
1561         return(ret);
1562 }
1563
1564 static int
1565 otherassistsse(struct counters *cpu, int pos)
1566 {
1567
1568         int ret;        
1569         struct counters *oth;
1570         struct counters *unhalt;
1571         double un, ot, con, res;
1572
1573         /* 18     (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P  thresh .1*/
1574         con = 75.0;
1575         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1576         oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
1577         if (pos != -1) {
1578                 ot = oth->vals[pos] * 1.0;
1579                 un = unhalt->vals[pos] * 1.0;
1580         } else {
1581                 ot = oth->sum * 1.0;
1582                 un = unhalt->sum * 1.0;
1583         }
1584         res = (ot * con)/un;
1585         ret = printf("%1.3f", res);
1586         return(ret);
1587 }
1588
1589 static int
1590 efficiency1(struct counters *cpu, int pos)
1591 {
1592
1593         int ret;        
1594         struct counters *uops;
1595         struct counters *unhalt;
1596         double un, ot, con, res;
1597
1598         /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
1599         con = 4.0;
1600         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1601         uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1602         if (pos != -1) {
1603                 ot = uops->vals[pos] * 1.0;
1604                 un = unhalt->vals[pos] * 1.0;
1605         } else {
1606                 ot = uops->sum * 1.0;
1607                 un = unhalt->sum * 1.0;
1608         }
1609         res = ot/(con * un);
1610         ret = printf("%1.3f", res);
1611         return(ret);
1612 }
1613
1614 static int
1615 efficiency2(struct counters *cpu, int pos)
1616 {
1617
1618         int ret;        
1619         struct counters *uops;
1620         struct counters *unhalt;
1621         double un, ot, res;
1622
1623         /* 20  - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
1624         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1625         uops = find_counter(cpu, "INST_RETIRED.ANY_P");
1626         if (pos != -1) {
1627                 ot = uops->vals[pos] * 1.0;
1628                 un = unhalt->vals[pos] * 1.0;
1629         } else {
1630                 ot = uops->sum * 1.0;
1631                 un = unhalt->sum * 1.0;
1632         }
1633         res = un/ot;
1634         ret = printf("%1.3f", res);
1635         return(ret);
1636 }
1637
1638 #define SANDY_BRIDGE_COUNT 20   
1639 static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
1640 /*01*/  { "allocstall1", "thresh > .05", 
1641           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
1642           allocstall1, 2 },
1643 /* -- not defined for SB right (partial-rat_stalls) 02*/
1644         { "allocstall2", "thresh > .05", 
1645           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1",
1646           allocstall2, 2 },
1647 /*03*/  { "br_miss", "thresh >= .2", 
1648           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1649           br_mispredict, 2 },
1650 /*04*/  { "splitload", "thresh >= .1", 
1651           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1652           splitload_sb, 2 },
1653 /* 05*/ { "splitstore", "thresh >= .01", 
1654           "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1655           splitstore_sb, 2 },
1656 /*06*/  { "contested", "thresh >= .05", 
1657           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1658           contested, 2 },
1659 /*07*/  { "blockstorefwd", "thresh >= .05", 
1660           "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1661           blockstoreforward, 2 },
1662 /*08*/  { "cache2", "thresh >= .2", 
1663           "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1664           cache2, 4 },
1665 /*09*/  { "cache1", "thresh >= .2", 
1666           "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1667           cache1, 2 },
1668 /*10*/  { "dtlbmissload", "thresh >= .1", 
1669           "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1670           dtlb_missload, 3 },
1671 /*11*/  { "dtlbmissstore", "thresh >= .05", 
1672           "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1673           dtlb_missstore, 3 },
1674 /*12*/  { "frontendstall", "thresh >= .15", 
1675           "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1676           frontendstall, 2 },
1677 /*13*/  { "clears", "thresh >= .02", 
1678           "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1679           clears, 4 },
1680 /*14*/  { "microassist", "thresh >= .05", 
1681           "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1682           microassist, 2 },
1683 /*15*/  { "aliasing_4k", "thresh >= .1", 
1684           "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1685           aliasing, 2 },
1686 /*16*/  { "fpassist", "look for a excessive value", 
1687           "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1688           fpassists, 2 },
1689 /*17*/  { "otherassistavx", "look for a excessive value", 
1690           "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1691           otherassistavx, 2},
1692 /*18*/  { "otherassistsse", "look for a excessive value", 
1693           "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1694           otherassistsse, 2 },
1695 /*19*/  { "eff1", "thresh < .9", 
1696           "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1697           efficiency1, 2 },
1698 /*20*/  { "eff2", "thresh > 1.0", 
1699           "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1700           efficiency2, 2 },
1701 };
1702
1703
1704 #define IVY_BRIDGE_COUNT 21
1705 static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
1706 /*1*/   { "eff1", "thresh < .75", 
1707           "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1708           efficiency1, 2 },
1709 /*2*/   { "eff2", "thresh > 1.0", 
1710           "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1711           efficiency2, 2 },
1712 /*3*/   { "itlbmiss", "thresh > .05", 
1713           "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1714           itlb_miss, 2 },
1715 /*4*/   { "icachemiss", "thresh > .05", 
1716           "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1717           icache_miss, 3 },
1718 /*5*/   { "lcpstall", "thresh > .05", 
1719           "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1720           lcp_stall, 2 },
1721 /*6*/   { "cache1", "thresh >= .2", 
1722           "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1723           cache1ib, 2 },
1724 /*7*/   { "cache2", "thresh >= .2", 
1725           "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1726           cache2ib, 2 },
1727 /*8*/   { "contested", "thresh >= .05", 
1728           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1729           contested, 2 },
1730 /*9*/   { "datashare", "thresh >= .05",
1731           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1732           datasharing, 2 },
1733 /*10*/  { "blockstorefwd", "thresh >= .05", 
1734           "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1735           blockstoreforward, 2 },
1736 /*11*/  { "splitload", "thresh >= .1", 
1737           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
1738           splitloadib, 4 },
1739 /*12*/  { "splitstore", "thresh >= .01", 
1740           "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1741           splitstore, 2 },
1742 /*13*/  { "aliasing_4k", "thresh >= .1", 
1743           "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1744           aliasing, 2 },
1745 /*14*/  { "dtlbmissload", "thresh >= .1", 
1746           "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1747           dtlb_missload , 3},
1748 /*15*/  { "dtlbmissstore", "thresh >= .05", 
1749           "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1750           dtlb_missstore, 3 },
1751 /*16*/  { "br_miss", "thresh >= .2", 
1752           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1753           br_mispredictib, 8 },
1754 /*17*/  { "clears", "thresh >= .02", 
1755           "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1756           clears, 4 },
1757 /*18*/  { "microassist", "thresh >= .05", 
1758           "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1759           microassist, 2 },
1760 /*19*/  { "fpassist", "look for a excessive value", 
1761           "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1762           fpassists, 2 },
1763 /*20*/  { "otherassistavx", "look for a excessive value", 
1764           "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1765           otherassistavx , 2},
1766 /*21*/  { "otherassistsse", "look for a excessive value", 
1767           "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1768           otherassistsse, 2 },
1769 };
1770
1771 #define HASWELL_COUNT 20
1772 static struct cpu_entry haswell[HASWELL_COUNT] = {
1773 /*1*/   { "eff1", "thresh < .75", 
1774           "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1775           efficiency1, 2 },
1776 /*2*/   { "eff2", "thresh > 1.0", 
1777           "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1778           efficiency2, 2 },
1779 /*3*/   { "itlbmiss", "thresh > .05", 
1780           "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1781           itlb_miss, 2 },
1782 /*4*/   { "icachemiss", "thresh > .05", 
1783           "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1784           icache_miss_has, 2 },
1785 /*5*/   { "lcpstall", "thresh > .05", 
1786           "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1787           lcp_stall, 2 },
1788 /*6*/   { "cache1", "thresh >= .2", 
1789           "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1790           cache1ib, 2 },
1791 /*7*/   { "cache2", "thresh >= .2", 
1792           "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1793           cache2has, 4 },
1794 /*8*/   { "contested", "thresh >= .05", 
1795           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1796           contested_has, 2 },
1797 /*9*/   { "datashare", "thresh >= .05",
1798           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1799           datasharing_has, 2 },
1800 /*10*/  { "blockstorefwd", "thresh >= .05", 
1801           "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1802           blockstoreforward, 2 },
1803 /*11*/  { "splitload", "thresh >= .1", 
1804           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1",
1805           splitload , 2},
1806 /*12*/  { "splitstore", "thresh >= .01", 
1807           "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1808           splitstore, 2 },
1809 /*13*/  { "aliasing_4k", "thresh >= .1", 
1810           "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1811           aliasing, 2 },
1812 /*14*/  { "dtlbmissload", "thresh >= .1", 
1813           "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1814           dtlb_missload, 3 },
1815 /*15*/  { "br_miss", "thresh >= .2", 
1816           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1817           br_mispredict, 2 },
1818 /*16*/  { "clears", "thresh >= .02", 
1819           "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1820           clears, 4 },
1821 /*17*/  { "microassist", "thresh >= .05", 
1822           "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1823           microassist, 2 },
1824 /*18*/  { "fpassist", "look for a excessive value", 
1825           "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1826           fpassists, 2 },
1827 /*19*/  { "otherassistavx", "look for a excessive value", 
1828           "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1829           otherassistavx, 2 },
1830 /*20*/  { "otherassistsse", "look for a excessive value", 
1831           "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1832           otherassistsse, 2 },
1833 };
1834
1835
1836 static void
1837 explain_name_broad(const char *name)
1838 {
1839         const char *mythresh;
1840         if (strcmp(name, "eff1") == 0) {
1841                 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
1842                 mythresh = "thresh < .75";
1843         } else if (strcmp(name, "eff2") == 0) {
1844                 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
1845                 mythresh = "thresh > 1.0";
1846         } else if (strcmp(name, "itlbmiss") == 0) {
1847                 printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
1848                 mythresh = "thresh > .05"; 
1849         } else if (strcmp(name, "icachemiss") == 0) {
1850                 printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n");
1851                 mythresh = "thresh > .05";
1852         } else if (strcmp(name, "lcpstall") == 0) {
1853                 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
1854                 mythresh = "thresh > .05";
1855         } else if (strcmp(name, "cache1") == 0) {
1856                 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
1857                 mythresh = "thresh >= .1";
1858         } else if (strcmp(name, "cache2") == 0) {
1859                 printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n");
1860                 mythresh = "thresh >= .2";
1861         } else if (strcmp(name, "contested") == 0) {
1862                 printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) +  MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n");
1863                 mythresh = "thresh >= .05";
1864         } else if (strcmp(name, "datashare") == 0) {
1865                 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
1866                 mythresh = "thresh > .05";
1867         } else if (strcmp(name, "blockstorefwd") == 0) {
1868                 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
1869                 mythresh = "thresh >= .05";
1870         } else if (strcmp(name, "aliasing_4k") == 0) {
1871                 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n");
1872                 mythresh = "thresh >= .1";
1873         } else if (strcmp(name, "dtlbmissload") == 0) {
1874                 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
1875                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
1876                 mythresh = "thresh >= .1";
1877
1878         } else if (strcmp(name, "br_miss") == 0) {
1879                 printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n");
1880                 printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n");
1881                 printf("CPU_CLK_UNHALTED.THREAD * 4)\n");
1882                 mythresh = "thresh >= .2";
1883         } else if (strcmp(name, "clears") == 0) {
1884                 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
1885                 printf("          MACHINE_CLEARS.SMC + \n");
1886                 printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
1887                 mythresh = "thresh >= .02";
1888         } else if (strcmp(name, "fpassist") == 0) {
1889                 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
1890                 mythresh = "look for a excessive value";
1891         } else if (strcmp(name, "otherassistavx") == 0) {
1892                 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
1893                 mythresh = "look for a excessive value";
1894         } else if (strcmp(name, "microassist") == 0) {
1895                 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
1896                 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
1897                 mythresh = "thresh >= .05";
1898         } else {
1899                 printf("Unknown name:%s\n", name);
1900                 mythresh = "unknown entry";
1901         }
1902         printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
1903 }
1904
1905
1906 #define BROADWELL_COUNT 17
1907 static struct cpu_entry broadwell[BROADWELL_COUNT] = {
1908 /*1*/   { "eff1", "thresh < .75", 
1909           "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1910           efficiency1, 2 }, 
1911 /*2*/   { "eff2", "thresh > 1.0", 
1912           "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1913           efficiency2, 2 },
1914 /*3*/   { "itlbmiss", "thresh > .05", 
1915           "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1",
1916           itlb_miss_broad, 3 },
1917 /*4*/   { "icachemiss", "thresh > .05", 
1918           "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1919           icache_miss_has, 2 },
1920 /*5*/   { "lcpstall", "thresh > .05", 
1921           "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1922           lcp_stall, 2 },
1923 /*6*/   { "cache1", "thresh >= .1", 
1924           "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1925           cache1broad, 2 },
1926 /*7*/   { "cache2", "thresh >= .2", 
1927           "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1928           cache2broad, 2 },
1929 /*8*/   { "contested", "thresh >= .05", 
1930           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1",
1931           contestedbroad, 2 },
1932 /*9*/   { "datashare", "thresh >= .05",
1933           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1934           datasharing_has, 2 },
1935 /*10*/  { "blockstorefwd", "thresh >= .05", 
1936           "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1937           blockstoreforward, 2 },
1938 /*11*/  { "aliasing_4k", "thresh >= .1", 
1939           "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1940           aliasing_broad, 2 }, 
1941 /*12*/  { "dtlbmissload", "thresh >= .1", 
1942           "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1943           dtlb_missload, 3 },
1944 /*13*/  { "br_miss", "thresh >= .2", 
1945           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1946           br_mispredict_broad, 7 },
1947 /*14*/  { "clears", "thresh >= .02", 
1948           "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1949           clears_broad, 5 },
1950 /*15*/  { "fpassist", "look for a excessive value", 
1951           "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1952           fpassists, 2 },
1953 /*16*/  { "otherassistavx", "look for a excessive value", 
1954           "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1955           otherassistavx, 2 },
1956 /*17*/  { "microassist", "thresh >= .2", 
1957           "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS  -w 1",
1958           microassist_broad, 4 },
1959 };
1960
1961
1962 static void
1963 set_sandybridge(void)
1964 {
1965         strcpy(the_cpu.cputype, "SandyBridge PMC");
1966         the_cpu.number = SANDY_BRIDGE_COUNT;
1967         the_cpu.ents = sandy_bridge;
1968         the_cpu.explain = explain_name_sb;
1969 }
1970
1971 static void
1972 set_ivybridge(void)
1973 {
1974         strcpy(the_cpu.cputype, "IvyBridge PMC");
1975         the_cpu.number = IVY_BRIDGE_COUNT;
1976         the_cpu.ents = ivy_bridge;
1977         the_cpu.explain = explain_name_ib;
1978 }
1979
1980
1981 static void
1982 set_haswell(void)
1983 {
1984         strcpy(the_cpu.cputype, "HASWELL PMC");
1985         the_cpu.number = HASWELL_COUNT;
1986         the_cpu.ents = haswell;
1987         the_cpu.explain = explain_name_has;
1988 }
1989
1990
1991 static void
1992 set_broadwell(void)
1993 {
1994         strcpy(the_cpu.cputype, "HASWELL PMC");
1995         the_cpu.number = BROADWELL_COUNT;
1996         the_cpu.ents = broadwell;
1997         the_cpu.explain = explain_name_broad;
1998 }
1999
2000
2001 static int
2002 set_expression(const char *name)
2003 {
2004         int found = 0, i;
2005         for(i=0 ; i< the_cpu.number; i++) {
2006                 if (strcmp(name, the_cpu.ents[i].name) == 0) {
2007                         found = 1;
2008                         expression = the_cpu.ents[i].func;
2009                         command = the_cpu.ents[i].command;
2010                         threshold = the_cpu.ents[i].thresh;
2011                         if  (the_cpu.ents[i].counters_required > max_pmc_counters) {
2012                                 printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n",
2013                                        the_cpu.ents[i].name,
2014                                        the_cpu.ents[i].counters_required, max_pmc_counters);
2015                                 printf("Sorry this test can not be run\n");
2016                                 if (run_all == 0) {
2017                                         exit(-1);
2018                                 } else {
2019                                         return(-1);
2020                                 }
2021                         }
2022                         break;
2023                 }
2024         }
2025         if (!found) {
2026                 printf("For CPU type %s we have no expression:%s\n",
2027                        the_cpu.cputype, name);
2028                 exit(-1);
2029         }
2030         return(0);
2031 }
2032
2033
2034
2035
2036
2037 static int
2038 validate_expression(char *name) 
2039 {
2040         int i, found;
2041
2042         found = 0;
2043         for(i=0 ; i< the_cpu.number; i++) {
2044                 if (strcmp(name, the_cpu.ents[i].name) == 0) {
2045                         found = 1;
2046                         break;
2047                 }
2048         }
2049         if (!found) {
2050                 return(-1);
2051         }
2052         return (0);
2053 }
2054
2055 static void
2056 do_expression(struct counters *cpu, int pos)
2057 {
2058         if (expression == NULL) 
2059                 return;
2060         (*expression)(cpu, pos);
2061 }
2062
2063 static void
2064 process_header(int idx, char *p)
2065 {
2066         struct counters *up;
2067         int i, len, nlen;
2068         /* 
2069          * Given header element idx, at p in
2070          * form 's/NN/nameof'
2071          * process the entry to pull out the name and
2072          * the CPU number.
2073          */
2074         if (strncmp(p, "s/", 2)) {
2075                 printf("Check -- invalid header no s/ in %s\n",
2076                        p);
2077                 return;
2078         }
2079         up = &cnts[idx];
2080         up->cpu = strtol(&p[2], NULL, 10);
2081         len = strlen(p);
2082         for (i=2; i<len; i++) {
2083                 if (p[i] == '/') {
2084                         nlen = strlen(&p[(i+1)]);
2085                         if (nlen < (MAX_NLEN-1)) {
2086                                 strcpy(up->counter_name, &p[(i+1)]);
2087                         } else {
2088                                 strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
2089                         }
2090                 }
2091         }
2092 }
2093
2094 static void
2095 build_counters_from_header(FILE *io)
2096 {
2097         char buffer[8192], *p;
2098         int i, len, cnt;
2099         size_t mlen;
2100
2101         /* We have a new start, lets 
2102          * setup our headers and cpus.
2103          */
2104         if (fgets(buffer, sizeof(buffer), io) == NULL) {
2105                 printf("First line can't be read from file err:%d\n", errno);
2106                 return;
2107         }
2108         /*
2109          * Ok output is an array of counters. Once
2110          * we start to read the values in we must
2111          * put them in there slot to match there CPU and 
2112          * counter being updated. We create a mass array
2113          * of the counters, filling in the CPU and 
2114          * counter name. 
2115          */
2116         /* How many do we get? */
2117         len = strlen(buffer);
2118         for (i=0, cnt=0; i<len; i++) {
2119                 if (strncmp(&buffer[i], "s/", 2) == 0) {
2120                         cnt++;
2121                         for(;i<len;i++) {
2122                                 if (buffer[i] == ' ')
2123                                         break;
2124                         }
2125                 }
2126         }
2127         mlen = sizeof(struct counters) * cnt;
2128         cnts = malloc(mlen);
2129         ncnts = cnt;
2130         if (cnts == NULL) {
2131                 printf("No memory err:%d\n", errno);
2132                 return;
2133         }
2134         memset(cnts, 0, mlen);
2135         for (i=0, cnt=0; i<len; i++) {
2136                 if (strncmp(&buffer[i], "s/", 2) == 0) {
2137                         p = &buffer[i];
2138                         for(;i<len;i++) {
2139                                 if (buffer[i] == ' ') {
2140                                         buffer[i] = 0;
2141                                         break;
2142                                 }
2143                         }
2144                         process_header(cnt, p);
2145                         cnt++;
2146                 }
2147         }
2148         if (verbose)
2149                 printf("We have %d entries\n", cnt);    
2150 }
2151 extern int max_to_collect;
2152 int max_to_collect = MAX_COUNTER_SLOTS;
2153
2154 static int
2155 read_a_line(FILE *io) 
2156 {
2157         char buffer[8192], *p, *stop;   
2158         int pos, i;
2159
2160         if (fgets(buffer, sizeof(buffer), io) == NULL) {
2161                 return(0);
2162         }
2163         p = buffer;
2164         for (i=0; i<ncnts; i++) {
2165                 pos = cnts[i].pos;
2166                 cnts[i].vals[pos] = strtol(p, &stop, 0);
2167                 cnts[i].pos++;
2168                 cnts[i].sum += cnts[i].vals[pos];
2169                 p = stop;
2170         }
2171         return (1);
2172 }
2173
2174 extern int cpu_count_out;
2175 int cpu_count_out=0;
2176
2177 static void
2178 print_header(void)
2179 {
2180         int i, cnt, printed_cnt;
2181
2182         printf("*********************************\n");
2183         for(i=0, cnt=0; i<MAX_CPU; i++) {
2184                 if (glob_cpu[i]) {
2185                         cnt++;
2186                 }
2187         }       
2188         cpu_count_out = cnt;
2189         for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
2190                 if (glob_cpu[i]) {
2191                         printf("CPU%d", i);
2192                         printed_cnt++;
2193                 }
2194                 if (printed_cnt == cnt) {
2195                         printf("\n");
2196                         break;
2197                 } else {
2198                         printf("\t");
2199                 }
2200         }
2201 }
2202
2203 static void
2204 lace_cpus_together(void)
2205 {
2206         int i, j, lace_cpu;
2207         struct counters *cpat, *at;
2208
2209         for(i=0; i<ncnts; i++) {
2210                 cpat = &cnts[i];
2211                 if (cpat->next_cpu) {
2212                         /* Already laced in */
2213                         continue;
2214                 }
2215                 lace_cpu = cpat->cpu;
2216                 if (lace_cpu >= MAX_CPU) {
2217                         printf("CPU %d to big\n", lace_cpu);
2218                         continue;
2219                 }
2220                 if (glob_cpu[lace_cpu] == NULL) {
2221                         glob_cpu[lace_cpu] = cpat;
2222                 } else {
2223                         /* Already processed this cpu */
2224                         continue;
2225                 }
2226                 /* Ok look forward for cpu->cpu and link in */
2227                 for(j=(i+1); j<ncnts; j++) {
2228                         at = &cnts[j];
2229                         if (at->next_cpu) {
2230                                 continue;
2231                         }
2232                         if (at->cpu == lace_cpu) {
2233                                 /* Found one */
2234                                 cpat->next_cpu = at;
2235                                 cpat = at;
2236                         }
2237                 }
2238         }
2239 }
2240
2241
2242 static void
2243 process_file(char *filename)
2244 {
2245         FILE *io;
2246         int i;
2247         int line_at, not_done;
2248         pid_t pid_of_command=0;
2249
2250         if (filename ==  NULL) {
2251                 io = my_popen(command, "r", &pid_of_command);
2252         } else {
2253                 io = fopen(filename, "r");
2254                 if (io == NULL) {
2255                         printf("Can't process file %s err:%d\n",
2256                                filename, errno);
2257                         return;
2258                 }
2259         }
2260         build_counters_from_header(io);
2261         if (cnts == NULL) {
2262                 /* Nothing we can do */
2263                 printf("Nothing to do -- no counters built\n");
2264                 if (io) {
2265                         fclose(io);
2266                 }
2267                 return;
2268         }
2269         lace_cpus_together();
2270         print_header();
2271         if (verbose) {
2272                 for (i=0; i<ncnts; i++) {
2273                         printf("Counter:%s cpu:%d index:%d\n",
2274                                cnts[i].counter_name,
2275                                cnts[i].cpu, i);
2276                 }
2277         }
2278         line_at = 0;
2279         not_done = 1;
2280         while(not_done) {
2281                 if (read_a_line(io)) {
2282                         line_at++;
2283                 } else {
2284                         break;
2285                 }
2286                 if (line_at >= max_to_collect) {
2287                         not_done = 0;
2288                 }
2289                 if (filename == NULL) {
2290                         int cnt;
2291                         /* For the ones we dynamically open we print now */
2292                         for(i=0, cnt=0; i<MAX_CPU; i++) {
2293                                 do_expression(glob_cpu[i], (line_at-1));
2294                                 cnt++;
2295                                 if (cnt == cpu_count_out) {
2296                                         printf("\n");
2297                                         break;
2298                                 } else {
2299                                         printf("\t");
2300                                 }
2301                         }
2302                 }
2303         }
2304         if (filename) {
2305                 fclose(io);
2306         } else {
2307                 my_pclose(io, pid_of_command);
2308         }
2309 }
2310 #if defined(__amd64__)
2311 #define cpuid(in,a,b,c,d)\
2312   asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
2313
2314 static __inline void
2315 do_cpuid(u_int ax, u_int cx, u_int *p)
2316 {
2317         __asm __volatile("cpuid"
2318                          : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
2319                          :  "0" (ax), "c" (cx) );
2320 }
2321
2322 #else
2323 #define cpuid(in, a, b, c, d) 
2324 #define do_cpuid(ax, cx, p)
2325 #endif
2326
2327 static void
2328 get_cpuid_set(void)
2329 {
2330         unsigned long eax, ebx, ecx, edx;
2331         int model;
2332         pid_t pid_of_command=0;
2333         size_t sz, len;
2334         FILE *io;
2335         char linebuf[1024], *str;
2336         u_int reg[4];
2337
2338         eax = ebx = ecx = edx = 0;
2339
2340         cpuid(0, eax, ebx, ecx, edx);
2341         if (ebx == 0x68747541) {
2342                 printf("AMD processors are not supported by this program\n");
2343                 printf("Sorry\n");
2344                 exit(0);
2345         } else if (ebx == 0x6972794) {
2346                 printf("Cyrix processors are not supported by this program\n");
2347                 printf("Sorry\n");
2348                 exit(0);
2349         } else if (ebx == 0x756e6547) {
2350                 printf("Genuine Intel\n");
2351         } else {
2352                 printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
2353                 exit(0);
2354         }
2355         cpuid(1, eax, ebx, ecx, edx);
2356         model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
2357         printf("CPU model is 0x%x id:0x%lx\n", model, eax);
2358         switch (eax & 0xF00) {
2359         case 0x500:             /* Pentium family processors */
2360                 printf("Intel Pentium P5\n");
2361                 goto not_supported;
2362                 break;
2363         case 0x600:             /* Pentium Pro, Celeron, Pentium II & III */
2364                 switch (model) {
2365                 case 0x1:
2366                         printf("Intel Pentium P6\n");
2367                         goto not_supported;
2368                         break;
2369                 case 0x3: 
2370                 case 0x5:
2371                         printf("Intel PII\n");
2372                         goto not_supported;
2373                         break;
2374                 case 0x6: case 0x16:
2375                         printf("Intel CL\n");
2376                         goto not_supported;
2377                         break;
2378                 case 0x7: case 0x8: case 0xA: case 0xB:
2379                         printf("Intel PIII\n");
2380                         goto not_supported;
2381                         break;
2382                 case 0x9: case 0xD:
2383                         printf("Intel PM\n");
2384                         goto not_supported;
2385                         break;
2386                 case 0xE:
2387                         printf("Intel CORE\n");
2388                         goto not_supported;
2389                         break;
2390                 case 0xF:
2391                         printf("Intel CORE2\n");
2392                         goto not_supported;
2393                         break;
2394                 case 0x17:
2395                         printf("Intel CORE2EXTREME\n");
2396                         goto not_supported;
2397                         break;
2398                 case 0x1C:      /* Per Intel document 320047-002. */
2399                         printf("Intel ATOM\n");
2400                         goto not_supported;
2401                         break;
2402                 case 0x1A:
2403                 case 0x1E:      /*
2404                                  * Per Intel document 253669-032 9/2009,
2405                                  * pages A-2 and A-57
2406                                  */
2407                 case 0x1F:      /*
2408                                  * Per Intel document 253669-032 9/2009,
2409                                  * pages A-2 and A-57
2410                                  */
2411                         printf("Intel COREI7\n");
2412                         goto not_supported;
2413                         break;
2414                 case 0x2E:
2415                         printf("Intel NEHALEM\n");
2416                         goto not_supported;
2417                         break;
2418                 case 0x25:      /* Per Intel document 253669-033US 12/2009. */
2419                 case 0x2C:      /* Per Intel document 253669-033US 12/2009. */
2420                         printf("Intel WESTMERE\n");
2421                         goto not_supported;
2422                         break;
2423                 case 0x2F:      /* Westmere-EX, seen in wild */
2424                         printf("Intel WESTMERE\n");
2425                         goto not_supported;
2426                         break;
2427                 case 0x2A:      /* Per Intel document 253669-039US 05/2011. */
2428                         printf("Intel SANDYBRIDGE\n");
2429                         set_sandybridge();
2430                         break;
2431                 case 0x2D:      /* Per Intel document 253669-044US 08/2012. */
2432                         printf("Intel SANDYBRIDGE_XEON\n");
2433                         set_sandybridge();
2434                         break;
2435                 case 0x3A:      /* Per Intel document 253669-043US 05/2012. */
2436                         printf("Intel IVYBRIDGE\n");
2437                         set_ivybridge();
2438                         break;
2439                 case 0x3E:      /* Per Intel document 325462-045US 01/2013. */
2440                         printf("Intel IVYBRIDGE_XEON\n");
2441                         set_ivybridge();
2442                         break;
2443                 case 0x3F:      /* Per Intel document 325462-045US 09/2014. */
2444                         printf("Intel HASWELL (Xeon)\n");
2445                         set_haswell();
2446                         break;
2447                 case 0x3C:      /* Per Intel document 325462-045US 01/2013. */
2448                 case 0x45:
2449                 case 0x46:
2450                         printf("Intel HASWELL\n");
2451                         set_haswell();
2452                         break;
2453
2454                 case 0x4e:
2455                 case 0x5e:
2456                         printf("Intel SKY-LAKE\n");
2457                         goto not_supported;
2458                         break;
2459                 case 0x3D:
2460                 case 0x47:
2461                         printf("Intel BROADWELL\n");
2462                         set_broadwell();
2463                         break;
2464                 case 0x4f:
2465                 case 0x56:
2466                         printf("Intel BROADWEL (Xeon)\n");
2467                         set_broadwell();
2468                         break;
2469
2470                 case 0x4D:
2471                         /* Per Intel document 330061-001 01/2014. */
2472                         printf("Intel ATOM_SILVERMONT\n");
2473                         goto not_supported;
2474                         break;
2475                 default:
2476                         printf("Intel model 0x%x is not known -- sorry\n",
2477                                model);
2478                         goto not_supported;
2479                         break;
2480                 }
2481                 break;
2482         case 0xF00:             /* P4 */
2483                 printf("Intel unknown model %d\n", model);
2484                 goto not_supported;
2485                 break;
2486         }
2487         do_cpuid(0xa, 0, reg);
2488         max_pmc_counters = (reg[3] & 0x0000000f) + 1;
2489         printf("We have %d PMC counters to work with\n", max_pmc_counters);
2490         /* Ok lets load the list of all known PMC's */
2491         io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
2492         if (valid_pmcs == NULL) {
2493                 /* Likely */
2494                 pmc_allocated_cnt = PMC_INITIAL_ALLOC;
2495                 sz = sizeof(char *) * pmc_allocated_cnt;
2496                 valid_pmcs = malloc(sz);
2497                 if (valid_pmcs == NULL) {
2498                         printf("No memory allocation fails at startup?\n");     
2499                         exit(-1);
2500                 }
2501                 memset(valid_pmcs, 0, sz);
2502         }
2503         
2504         while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
2505                 if (linebuf[0] != '\t') {
2506                         /* sometimes headers ;-) */
2507                         continue;
2508                 }
2509                 len = strlen(linebuf);
2510                 if (linebuf[(len-1)] == '\n') {
2511                         /* Likely */
2512                         linebuf[(len-1)] = 0;
2513                 }
2514                 str = &linebuf[1];
2515                 len = strlen(str) + 1;
2516                 valid_pmcs[valid_pmc_cnt] = malloc(len);
2517                 if (valid_pmcs[valid_pmc_cnt] == NULL) {
2518                         printf("No memory2 allocation fails at startup?\n");    
2519                         exit(-1);
2520                 }
2521                 memset(valid_pmcs[valid_pmc_cnt], 0, len);
2522                 strcpy(valid_pmcs[valid_pmc_cnt], str);
2523                 valid_pmc_cnt++;
2524                 if (valid_pmc_cnt >= pmc_allocated_cnt) {
2525                         /* Got to expand -- unlikely */
2526                         char **more;
2527
2528                         sz = sizeof(char *) * (pmc_allocated_cnt * 2);
2529                         more = malloc(sz);
2530                         if (more == NULL) {
2531                                 printf("No memory3 allocation fails at startup?\n");    
2532                                 exit(-1);
2533                         }
2534                         memset(more, 0, sz);
2535                         memcpy(more, valid_pmcs, sz);
2536                         pmc_allocated_cnt *= 2;
2537                         free(valid_pmcs);
2538                         valid_pmcs = more;
2539                 }
2540         }
2541         my_pclose(io, pid_of_command);  
2542         return;
2543 not_supported:
2544         printf("Not supported\n");      
2545         exit(-1);
2546 }
2547
2548 static void
2549 explain_all(void)
2550 {
2551         int i;
2552         printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
2553         printf("-------------------------------------------------------------\n");
2554         for(i=0; i<the_cpu.number; i++){
2555                 printf("For -e %s ", the_cpu.ents[i].name);
2556                 (*the_cpu.explain)(the_cpu.ents[i].name);
2557                 printf("----------------------------\n");
2558         }
2559 }
2560
2561 static void
2562 test_for_a_pmc(const char *pmc, int out_so_far)
2563 {
2564         FILE *io;
2565         pid_t pid_of_command=0; 
2566         char my_command[1024];
2567         char line[1024];
2568         char resp[1024];
2569         int len, llen, i;
2570
2571         if (out_so_far < 50) {
2572                 len = 50 - out_so_far;
2573                 for(i=0; i<len; i++) {
2574                         printf(" ");
2575                 }
2576         }
2577         sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
2578         io = my_popen(my_command, "r", &pid_of_command);        
2579         if (io == NULL) {
2580                 printf("Failed -- popen fails\n");
2581                 return;
2582         }
2583         /* Setup what we expect */
2584         len = sprintf(resp, "%s", pmc);
2585         if (fgets(line, sizeof(line), io) == NULL) {
2586                 printf("Failed -- no output from pmstat\n");
2587                 goto out;
2588         }
2589         llen = strlen(line);
2590         if (line[(llen-1)] == '\n') {
2591                 line[(llen-1)] = 0;
2592                 llen--;
2593         }
2594         for(i=2; i<(llen-len); i++) {
2595                 if (strncmp(&line[i], "ERROR", 5) == 0) {
2596                         printf("Failed %s\n", line);
2597                         goto out;
2598                 } else if (strncmp(&line[i], resp, len) == 0) {
2599                         int j, k;
2600
2601                         if (fgets(line, sizeof(line), io) == NULL) {
2602                                 printf("Failed -- no second output from pmstat\n");
2603                                 goto out;
2604                         }
2605                         len = strlen(line);
2606                         for (j=0; j<len; j++) {
2607                                 if (line[j] == ' ') {
2608                                         j++; 
2609                                 } else {
2610                                         break;
2611                                 }
2612                         }
2613                         printf("Pass");
2614                         len = strlen(&line[j]);
2615                         if (len < 20) {
2616                                 for(k=0; k<(20-len); k++) {
2617                                         printf(" ");
2618                                 }
2619                         }
2620                         if (len) {
2621                                 printf("%s", &line[j]);
2622                         } else {
2623                                 printf("\n");
2624                         }
2625                         goto out;
2626                 }
2627         }
2628         printf("Failed -- '%s' not '%s'\n", line, resp);
2629 out:
2630         my_pclose(io, pid_of_command);          
2631         
2632 }
2633
2634 static int
2635 add_it_to(char **vars, int cur_cnt, char *name)
2636 {
2637         int i;
2638         size_t len;
2639         for(i=0; i<cur_cnt; i++) {
2640                 if (strcmp(vars[i], name) == 0) {
2641                         /* Already have */
2642                         return(0);
2643                 }
2644         }
2645         if (vars[cur_cnt] != NULL) {
2646                 printf("Cur_cnt:%d filled with %s??\n", 
2647                        cur_cnt, vars[cur_cnt]);
2648                 exit(-1);
2649         }
2650         /* Ok its new */
2651         len = strlen(name) + 1;
2652         vars[cur_cnt] = malloc(len);
2653         if (vars[cur_cnt] == NULL) {
2654                 printf("No memory %s\n", __FUNCTION__);
2655                 exit(-1);
2656         }
2657         memset(vars[cur_cnt], 0, len);
2658         strcpy(vars[cur_cnt], name);
2659         return(1);
2660 }
2661
2662 static char *
2663 build_command_for_exp(struct expression *exp)
2664 {
2665         /*
2666          * Build the pmcstat command to handle
2667          * the passed in expression.
2668          * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
2669          * where NNN and QQQ represent the PMC's in the expression
2670          * uniquely..
2671          */
2672         char forming[1024];
2673         int cnt_pmc, alloced_pmcs, i;
2674         struct expression *at;
2675         char **vars, *cmd;
2676         size_t mal;
2677
2678         alloced_pmcs = cnt_pmc = 0;
2679         /* first how many do we have */
2680         at = exp;
2681         while (at) {
2682                 if (at->type == TYPE_VALUE_PMC) {
2683                         cnt_pmc++;
2684                 }
2685                 at = at->next;
2686         }
2687         if (cnt_pmc == 0) {
2688                 printf("No PMC's in your expression -- nothing to do!!\n");
2689                 exit(0);
2690         }
2691         mal = cnt_pmc * sizeof(char *);
2692         vars = malloc(mal);
2693         if (vars == NULL) {
2694                 printf("No memory\n");
2695                 exit(-1);
2696         }
2697         memset(vars, 0, mal);
2698         at = exp;
2699         while (at) {
2700                 if (at->type == TYPE_VALUE_PMC) {
2701                         if(add_it_to(vars, alloced_pmcs, at->name)) {
2702                                 alloced_pmcs++;
2703                         }
2704                 }
2705                 at = at->next;
2706         }
2707         /* Now we have a unique list in vars so create our command */
2708         mal = 23; /*    "/usr/sbin/pmcstat -w 1"  + \0 */
2709         for(i=0; i<alloced_pmcs; i++) {
2710                 mal += strlen(vars[i]) + 4;     /* var + " -s " */
2711         }
2712         cmd = malloc((mal+2));
2713         if (cmd == NULL) {
2714                 printf("%s out of mem\n", __FUNCTION__);
2715                 exit(-1);
2716         }
2717         memset(cmd, 0, (mal+2));
2718         strcpy(cmd, "/usr/sbin/pmcstat -w 1");
2719         at = exp;
2720         for(i=0; i<alloced_pmcs; i++) {
2721                 sprintf(forming, " -s %s", vars[i]);
2722                 strcat(cmd, forming);
2723                 free(vars[i]);
2724                 vars[i] = NULL;
2725         }
2726         free(vars);
2727         return(cmd);
2728 }
2729
2730 static int
2731 user_expr(struct counters *cpu, int pos)
2732 {
2733         int ret;        
2734         double res;
2735         struct counters *var;
2736         struct expression *at;
2737
2738         at = master_exp;
2739         while (at) {
2740                 if (at->type == TYPE_VALUE_PMC) {
2741                         var = find_counter(cpu, at->name);
2742                         if (var == NULL) {
2743                                 printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
2744                                 exit(-1);
2745                         }
2746                         if (pos != -1) {
2747                                 at->value = var->vals[pos] * 1.0;
2748                         } else {
2749                                 at->value = var->sum * 1.0;
2750                         }
2751                 }
2752                 at = at->next;
2753         }
2754         res = run_expr(master_exp, 1, NULL);
2755         ret = printf("%1.3f", res);
2756         return(ret);
2757 }
2758
2759
2760 static void
2761 set_manual_exp(struct expression *exp)
2762 {
2763         expression = user_expr;
2764         command = build_command_for_exp(exp);
2765         threshold = "User defined threshold";
2766 }
2767
2768 static void
2769 run_tests(void)
2770 {
2771         int i, lenout;
2772         printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
2773         printf("------------------------------------------------------------------------\n");
2774         for(i=0; i<valid_pmc_cnt; i++) {
2775                 lenout = printf("%s", valid_pmcs[i]);
2776                 fflush(stdout);
2777                 test_for_a_pmc(valid_pmcs[i], lenout);
2778         }
2779 }
2780 static void
2781 list_all(void)
2782 {
2783         int i, cnt, j;
2784         printf("PMC                                               Abbreviation\n");
2785         printf("--------------------------------------------------------------\n");
2786         for(i=0; i<valid_pmc_cnt; i++) {
2787                 cnt = printf("%s", valid_pmcs[i]);
2788                 for(j=cnt; j<52; j++) {
2789                         printf(" ");
2790                 }
2791                 printf("%%%d\n", i);
2792         }
2793 }
2794
2795
2796 int
2797 main(int argc, char **argv)
2798 {
2799         int i, j, cnt;
2800         char *filename=NULL;
2801         const char *name=NULL;
2802         int help_only = 0;
2803         int test_mode = 0;
2804         int test_at = 0;
2805
2806         get_cpuid_set();
2807         memset(glob_cpu, 0, sizeof(glob_cpu));
2808         while ((i = getopt(argc, argv, "ALHhvm:i:?e:TE:")) != -1) {
2809                 switch (i) {
2810                 case 'A':
2811                         run_all = 1;
2812                         break;
2813                 case 'L':
2814                         list_all();
2815                         return(0);
2816                 case 'H':
2817                         printf("**********************************\n");
2818                         explain_all();
2819                         printf("**********************************\n");
2820                         return(0);
2821                         break;
2822                 case 'T':
2823                         test_mode = 1;
2824                         break;
2825                 case 'E':
2826                         master_exp = parse_expression(optarg);
2827                         if (master_exp) {
2828                                 set_manual_exp(master_exp);
2829                         }
2830                         break;
2831                 case 'e':
2832                         if (validate_expression(optarg)) {
2833                                 printf("Unknown expression %s\n", optarg);
2834                                 return(0);
2835                         }
2836                         name = optarg;
2837                         set_expression(optarg);
2838                         break;
2839                 case 'm':
2840                         max_to_collect = strtol(optarg, NULL, 0);
2841                         if (max_to_collect > MAX_COUNTER_SLOTS) {
2842                                 /* You can't collect more than max in array */
2843                                 max_to_collect = MAX_COUNTER_SLOTS;
2844                         }
2845                         break;
2846                 case 'v':
2847                         verbose++;
2848                         break;
2849                 case 'h':
2850                         help_only = 1;
2851                         break;
2852                 case 'i':
2853                         filename = optarg;
2854                         break;
2855                 case '?':
2856                 default:
2857                 use:
2858                         printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
2859                                argv[0]);
2860                         printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
2861                         printf("-v -- verbose dump debug type things -- you don't want this\n");
2862                         printf("-m N -- maximum to collect is N measurments\n");
2863                         printf("-e expr-name -- Do expression expr-name\n");
2864                         printf("-E 'your expression' -- Do your expression\n");
2865                         printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
2866                         printf("-H -- Don't run anything, just explain all canned expressions\n");
2867                         printf("-T -- Test all PMC's defined by this processor\n");
2868                         printf("-A -- Run all canned tests\n");
2869                         return(0);
2870                         break;
2871                 }
2872         }
2873         if ((run_all == 0) && (name == NULL) && (filename == NULL) &&
2874             (test_mode == 0) && (master_exp == NULL)) {
2875                 printf("Without setting an expression we cannot dynamically gather information\n");
2876                 printf("you must supply a filename (and you probably want verbosity)\n");
2877                 goto use;
2878         }
2879         if (run_all && max_to_collect > 10) {
2880                 max_to_collect = 3;
2881         }
2882         if (test_mode) {
2883                 run_tests();
2884                 return(0);
2885         }
2886         printf("*********************************\n");
2887         if ((master_exp == NULL) && name) {
2888                 (*the_cpu.explain)(name);
2889         } else if (master_exp) {
2890                 printf("Examine your expression ");
2891                 print_exp(master_exp);
2892                 printf("User defined threshold\n");
2893         }
2894         if (help_only) {
2895                 return(0);
2896         }
2897         if (run_all) {
2898         more:
2899                 name = the_cpu.ents[test_at].name;
2900                 printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh);
2901                 test_at++;
2902                 if (set_expression(name) == -1) {
2903                         if (test_at >= the_cpu.number) {
2904                                 goto done;
2905                         } else
2906                                 goto more;
2907                 }
2908
2909         }
2910         process_file(filename);
2911         if (verbose >= 2) {
2912                 for (i=0; i<ncnts; i++) {
2913                         printf("Counter:%s cpu:%d index:%d\n",
2914                                cnts[i].counter_name,
2915                                cnts[i].cpu, i);
2916                         for(j=0; j<cnts[i].pos; j++) {
2917                                 printf(" val - %ld\n", (long int)cnts[i].vals[j]);
2918                         }
2919                         printf(" sum - %ld\n", (long int)cnts[i].sum);
2920                 }
2921         }
2922         if (expression == NULL) {
2923                 return(0);
2924         }
2925         if (max_to_collect > 1) {
2926                 for(i=0, cnt=0; i<MAX_CPU; i++) {
2927                         if (glob_cpu[i]) {
2928                                 do_expression(glob_cpu[i], -1);
2929                                 cnt++;
2930                                 if (cnt == cpu_count_out) {
2931                                         printf("\n");
2932                                         break;
2933                                 } else {
2934                                         printf("\t");
2935                                 }
2936                         }
2937                 }
2938         }
2939         if (run_all && (test_at < the_cpu.number)) {
2940                 memset(glob_cpu, 0, sizeof(glob_cpu));
2941                 ncnts = 0;
2942                 printf("*********************************\n");
2943                 goto more;
2944         } else if (run_all) {
2945         done:
2946                 printf("*********************************\n");
2947         }
2948         return(0);      
2949 }