]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.sbin/pmcstudy/pmcstudy.c
Copy libevent sources to contrib
[FreeBSD/FreeBSD.git] / usr.sbin / pmcstudy / pmcstudy.c
1 /*-
2  * Copyright (c) 2014, 2015 Netflix Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer,
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 #include <sys/types.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32 #include <string.h>
33 #include <strings.h>
34 #include <sys/errno.h>
35 #include <signal.h>
36 #include <sys/wait.h>
37 #include <getopt.h>
38 #include "eval_expr.h"
39 __FBSDID("$FreeBSD$");
40
41 static int max_pmc_counters = 1;
42 static int run_all = 0;
43
44 #define MAX_COUNTER_SLOTS 1024
45 #define MAX_NLEN 64
46 #define MAX_CPU 64
47 static int verbose = 0;
48
49 extern char **environ;
50 extern struct expression *master_exp;
51 struct expression *master_exp=NULL;
52
53 #define PMC_INITIAL_ALLOC 512
54 extern char **valid_pmcs;
55 char **valid_pmcs = NULL;
56 extern int valid_pmc_cnt;
57 int valid_pmc_cnt=0;
58 extern int pmc_allocated_cnt;
59 int pmc_allocated_cnt=0;
60
61 /*
62  * The following two varients on popen and pclose with
63  * the cavet that they get you the PID so that you
64  * can supply it to pclose so it can send a SIGTERM 
65  *  to the process.
66  */
67 static FILE *
68 my_popen(const char *command, const char *dir, pid_t *p_pid)
69 {
70         FILE *io_out, *io_in;
71         int pdesin[2], pdesout[2];
72         char *argv[4];
73         pid_t pid;
74         char cmd[4];
75         char cmd2[1024];
76         char arg1[4];
77
78         if ((strcmp(dir, "r") != 0) &&
79             (strcmp(dir, "w") != 0)) {
80                 errno = EINVAL;
81                 return(NULL);
82         }
83         if (pipe(pdesin) < 0)
84                 return (NULL);
85
86         if (pipe(pdesout) < 0) {
87                 (void)close(pdesin[0]);
88                 (void)close(pdesin[1]);
89                 return (NULL);
90         }
91         strcpy(cmd, "sh");
92         strcpy(arg1, "-c");
93         strcpy(cmd2, command);
94         argv[0] = cmd;
95         argv[1] = arg1;
96         argv[2] = cmd2;
97         argv[3] = NULL;
98
99         switch (pid = fork()) {
100         case -1:                        /* Error. */
101                 (void)close(pdesin[0]);
102                 (void)close(pdesin[1]);
103                 (void)close(pdesout[0]);
104                 (void)close(pdesout[1]);
105                 return (NULL);
106                 /* NOTREACHED */
107         case 0:                         /* Child. */
108                 /* Close out un-used sides */
109                 (void)close(pdesin[1]);
110                 (void)close(pdesout[0]);
111                 /* Now prepare the stdin of the process */
112                 close(0);
113                 (void)dup(pdesin[0]);
114                 (void)close(pdesin[0]);
115                 /* Now prepare the stdout of the process */
116                 close(1);
117                 (void)dup(pdesout[1]);
118                 /* And lets do stderr just in case */
119                 close(2);
120                 (void)dup(pdesout[1]);
121                 (void)close(pdesout[1]);
122                 /* Now run it */
123                 execve("/bin/sh", argv, environ);
124                 exit(127);
125                 /* NOTREACHED */
126         }
127         /* Parent; assume fdopen can't fail. */
128         /* Store the pid */
129         *p_pid = pid;
130         if (strcmp(dir, "r") != 0) {
131                 io_out = fdopen(pdesin[1], "w");
132                 (void)close(pdesin[0]);
133                 (void)close(pdesout[0]);
134                 (void)close(pdesout[1]);
135                 return(io_out);
136         } else {
137                 /* Prepare the input stream */
138                 io_in = fdopen(pdesout[0], "r");
139                 (void)close(pdesout[1]);
140                 (void)close(pdesin[0]);
141                 (void)close(pdesin[1]);
142                 return (io_in);
143         }
144 }
145
146 /*
147  * pclose --
148  *      Pclose returns -1 if stream is not associated with a `popened' command,
149  *      if already `pclosed', or waitpid returns an error.
150  */
151 static void
152 my_pclose(FILE *io, pid_t the_pid)
153 {
154         int pstat;
155         pid_t pid;
156
157         /*
158          * Find the appropriate file pointer and remove it from the list.
159          */
160         (void)fclose(io);
161         /* Die if you are not dead! */
162         kill(the_pid, SIGTERM);
163         do {
164                 pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
165         } while (pid == -1 && errno == EINTR);
166 }
167
168 struct counters {
169         struct counters *next_cpu;
170         char counter_name[MAX_NLEN];            /* Name of counter */
171         int cpu;                                /* CPU we are on */
172         int pos;                                /* Index we are filling to. */
173         uint64_t vals[MAX_COUNTER_SLOTS];       /* Last 64 entries */
174         uint64_t sum;                           /* Summary of entries */
175 };
176
177 extern struct counters *glob_cpu[MAX_CPU];
178 struct counters *glob_cpu[MAX_CPU];
179
180 extern struct counters *cnts;
181 struct counters *cnts=NULL;
182
183 extern int ncnts;
184 int ncnts=0;
185
186 extern int (*expression)(struct counters *, int);
187 int (*expression)(struct counters *, int);
188
189 static const char *threshold=NULL;
190 static const char *command;
191
192 struct cpu_entry {
193         const char *name;
194         const char *thresh;
195         const char *command;
196         int (*func)(struct counters *, int);
197         int counters_required;
198 };
199
200 struct cpu_type {
201         char cputype[32];
202         int number;
203         struct cpu_entry *ents;
204         void (*explain)(const char *name);
205 };
206 extern struct cpu_type the_cpu;
207 struct cpu_type the_cpu;
208
209 static void
210 explain_name_sb(const char *name)
211 {
212         const char *mythresh;
213         if (strcmp(name, "allocstall1") == 0) {
214                 printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
215                 mythresh = "thresh > .05";
216         } else if (strcmp(name, "allocstall2") == 0) {
217                 printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
218                 mythresh = "thresh > .05";
219         } else if (strcmp(name, "br_miss") == 0) {
220                 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
221                 mythresh = "thresh >= .2";
222         } else if (strcmp(name, "splitload") == 0) {
223                 printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
224                 mythresh = "thresh >= .1";
225         } else if (strcmp(name, "splitstore") == 0) {
226                 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
227                 mythresh = "thresh >= .01";
228         } else if (strcmp(name, "contested") == 0) {
229                 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
230                 mythresh = "thresh >= .05";
231         } else if (strcmp(name, "blockstorefwd") == 0) {
232                 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
233                 mythresh = "thresh >= .05";
234         } else if (strcmp(name, "cache2") == 0) {
235                 printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
236                 printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
237                 printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
238                 printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
239                 mythresh = "thresh >= .2";
240         } else if (strcmp(name, "cache1") == 0) {
241                 printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
242                 mythresh = "thresh >= .2";
243         } else if (strcmp(name, "dtlbmissload") == 0) {
244                 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
245                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
246                 mythresh = "thresh >= .1";
247         } else if (strcmp(name, "frontendstall") == 0) {
248                 printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
249                 mythresh = "thresh >= .15";
250         } else if (strcmp(name, "clears") == 0) {
251                 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
252                 printf("          MACHINE_CLEARS.SMC + \n");
253                 printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
254                 mythresh = "thresh >= .02";
255         } else if (strcmp(name, "microassist") == 0) {
256                 printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
257                 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
258                 mythresh = "thresh >= .05";
259         } else if (strcmp(name, "aliasing_4k") == 0) {
260                 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
261                 mythresh = "thresh >= .1";
262         } else if (strcmp(name, "fpassist") == 0) {
263                 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
264                 mythresh = "look for a excessive value";
265         } else if (strcmp(name, "otherassistavx") == 0) {
266                 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
267                 mythresh = "look for a excessive value";
268         } else if (strcmp(name, "otherassistsse") == 0) {
269                 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
270                 mythresh = "look for a excessive value";
271         } else if (strcmp(name, "eff1") == 0) {
272                 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
273                 mythresh = "thresh < .9";
274         } else if (strcmp(name, "eff2") == 0) {
275                 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
276                 mythresh = "thresh > 1.0";
277         } else if (strcmp(name, "dtlbmissstore") == 0) {
278                 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
279                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
280                 mythresh = "thresh >= .05";
281         } else {
282                 printf("Unknown name:%s\n", name);
283                 mythresh = "unknown entry";
284         }
285         printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
286 }
287
288 static void
289 explain_name_ib(const char *name)
290 {
291         const char *mythresh;
292         if (strcmp(name, "br_miss") == 0) {
293                 printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
294                 printf("         MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
295                 printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
296                 mythresh = "thresh >= .2";
297         } else if (strcmp(name, "eff1") == 0) {
298                 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
299                 mythresh = "thresh < .9";
300         } else if (strcmp(name, "eff2") == 0) {
301                 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
302                 mythresh = "thresh > 1.0";
303         } else if (strcmp(name, "cache1") == 0) {
304                 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
305                 mythresh = "thresh >= .2";
306         } else if (strcmp(name, "cache2") == 0) {
307                 printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
308                 mythresh = "thresh >= .2";
309         } else if (strcmp(name, "itlbmiss") == 0) {
310                 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
311                 mythresh = "thresh > .05"; 
312         } else if (strcmp(name, "icachemiss") == 0) {
313                 printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
314                 mythresh = "thresh > .05";
315         } else if (strcmp(name, "lcpstall") == 0) {
316                 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
317                 mythresh = "thresh > .05";
318         } else if (strcmp(name, "datashare") == 0) {
319                 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
320                 mythresh = "thresh > .05";
321         } else if (strcmp(name, "blockstorefwd") == 0) {
322                 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
323                 mythresh = "thresh >= .05";
324         } else if (strcmp(name, "splitload") == 0) {
325                 printf("Examine  ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
326                 printf("         LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
327                 mythresh = "thresh >= .1";
328         } else if (strcmp(name, "splitstore") == 0) {
329                 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
330                 mythresh = "thresh >= .01";
331         } else if (strcmp(name, "aliasing_4k") == 0) {
332                 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
333                 mythresh = "thresh >= .1";
334         } else if (strcmp(name, "dtlbmissload") == 0) {
335                 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
336                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
337                 mythresh = "thresh >= .1";
338         } else if (strcmp(name, "dtlbmissstore") == 0) {
339                 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
340                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
341                 mythresh = "thresh >= .05";
342         } else if (strcmp(name, "contested") == 0) {
343                 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
344                 mythresh = "thresh >= .05";
345         } else if (strcmp(name, "clears") == 0) {
346                 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
347                 printf("          MACHINE_CLEARS.SMC + \n");
348                 printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
349                 mythresh = "thresh >= .02";
350         } else if (strcmp(name, "microassist") == 0) {
351                 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
352                 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
353                 mythresh = "thresh >= .05";
354         } else if (strcmp(name, "fpassist") == 0) {
355                 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
356                 mythresh = "look for a excessive value";
357         } else if (strcmp(name, "otherassistavx") == 0) {
358                 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
359                 mythresh = "look for a excessive value";
360         } else if (strcmp(name, "otherassistsse") == 0) {
361                 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
362                 mythresh = "look for a excessive value";
363         } else {
364                 printf("Unknown name:%s\n", name);
365                 mythresh = "unknown entry";
366         }
367         printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
368 }
369
370
371 static void
372 explain_name_has(const char *name)
373 {
374         const char *mythresh;
375         if (strcmp(name, "eff1") == 0) {
376                 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
377                 mythresh = "thresh < .75";
378         } else if (strcmp(name, "eff2") == 0) {
379                 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
380                 mythresh = "thresh > 1.0";
381         } else if (strcmp(name, "itlbmiss") == 0) {
382                 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
383                 mythresh = "thresh > .05"; 
384         } else if (strcmp(name, "icachemiss") == 0) {
385                 printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
386                 mythresh = "thresh > .05";
387         } else if (strcmp(name, "lcpstall") == 0) {
388                 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
389                 mythresh = "thresh > .05";
390         } else if (strcmp(name, "cache1") == 0) {
391                 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
392                 mythresh = "thresh >= .2";
393         } else if (strcmp(name, "cache2") == 0) {
394                 printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
395                 printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
396                 printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
397                 printf("          / CPU_CLK_UNHALTED.THREAD_P\n");
398                 mythresh = "thresh >= .2";
399         } else if (strcmp(name, "contested") == 0) {
400                 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
401                 mythresh = "thresh >= .05";
402         } else if (strcmp(name, "datashare") == 0) {
403                 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
404                 mythresh = "thresh > .05";
405         } else if (strcmp(name, "blockstorefwd") == 0) {
406                 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
407                 mythresh = "thresh >= .05";
408         } else if (strcmp(name, "splitload") == 0) {
409                 printf("Examine  (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
410                 mythresh = "thresh >= .1";
411         } else if (strcmp(name, "splitstore") == 0) {
412                 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
413                 mythresh = "thresh >= .01";
414         } else if (strcmp(name, "aliasing_4k") == 0) {
415                 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
416                 mythresh = "thresh >= .1";
417         } else if (strcmp(name, "dtlbmissload") == 0) {
418                 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
419                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
420                 mythresh = "thresh >= .1";
421         } else if (strcmp(name, "br_miss") == 0) {
422                 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
423                 mythresh = "thresh >= .2";
424         } else if (strcmp(name, "clears") == 0) {
425                 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
426                 printf("          MACHINE_CLEARS.SMC + \n");
427                 printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
428                 mythresh = "thresh >= .02";
429         } else if (strcmp(name, "microassist") == 0) {
430                 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
431                 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
432                 mythresh = "thresh >= .05";
433         } else if (strcmp(name, "fpassist") == 0) {
434                 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
435                 mythresh = "look for a excessive value";
436         } else if (strcmp(name, "otherassistavx") == 0) {
437                 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
438                 mythresh = "look for a excessive value";
439         } else if (strcmp(name, "otherassistsse") == 0) {
440                 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
441                 mythresh = "look for a excessive value";
442         } else {
443                 printf("Unknown name:%s\n", name);
444                 mythresh = "unknown entry";
445         }
446         printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
447 }
448
449
450
451 static struct counters *
452 find_counter(struct counters *base, const char *name)
453 {
454         struct counters *at;
455         int len;
456
457         at = base;
458         len = strlen(name);
459         while(at) {
460                 if (strncmp(at->counter_name, name, len) == 0) {
461                         return(at);
462                 }
463                 at = at->next_cpu;
464         }
465         printf("Can't find counter %s\n", name);
466         printf("We have:\n");
467         at = base;
468         while(at) {
469                 printf("- %s\n", at->counter_name);
470                 at = at->next_cpu;
471         }
472         exit(-1);
473 }
474
475 static int
476 allocstall1(struct counters *cpu, int pos)
477 {
478 /*  1  - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
479         int ret;
480         struct counters *partial;
481         struct counters *unhalt;
482         double un, par, res;
483         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
484         partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
485         if (pos != -1) {
486                 par = partial->vals[pos] * 1.0;
487                 un = unhalt->vals[pos] * 1.0;
488         } else {
489                 par = partial->sum * 1.0;
490                 un = unhalt->sum * 1.0;
491         }
492         res = par/un;
493         ret = printf("%1.3f", res);
494         return(ret);
495 }
496
497 static int
498 allocstall2(struct counters *cpu, int pos)
499 {
500 /*  2  - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
501         int ret;
502         struct counters *partial;
503         struct counters *unhalt;
504         double un, par, res;
505         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
506         partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
507         if (pos != -1) {
508                 par = partial->vals[pos] * 1.0;
509                 un = unhalt->vals[pos] * 1.0;
510         } else {
511                 par = partial->sum * 1.0;
512                 un = unhalt->sum * 1.0;
513         }
514         res = par/un;
515         ret = printf("%1.3f", res);
516         return(ret);
517 }
518
519 static int
520 br_mispredict(struct counters *cpu, int pos)
521 {
522         struct counters *brctr;
523         struct counters *unhalt;
524         int ret;
525 /*  3  - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
526         double br, un, con, res;
527         con = 20.0;
528         
529         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
530         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
531         if (pos != -1) {
532                 br = brctr->vals[pos] * 1.0;
533                 un = unhalt->vals[pos] * 1.0;
534         } else {
535                 br = brctr->sum * 1.0;
536                 un = unhalt->sum * 1.0;
537         }
538         res = (con * br)/un;
539         ret = printf("%1.3f", res);
540         return(ret);
541 }
542
543 static int
544 br_mispredictib(struct counters *cpu, int pos)
545 {
546         struct counters *brctr;
547         struct counters *unhalt;
548         struct counters *clear, *clear2, *clear3;
549         struct counters *uops;
550         struct counters *recv;  
551         struct counters *iss;
552 /*        "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
553         int ret;
554         /*  
555          * (BR_MISP_RETIRED.ALL_BRANCHES / 
556          *         (BR_MISP_RETIRED.ALL_BRANCHES +
557          *          MACHINE_CLEAR.COUNT) * 
558          *         ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
559          *
560          */
561         double br, cl, cl2, cl3, uo, re, un, con, res, is;
562         con = 4.0;
563         
564         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
565         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
566         clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
567         clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
568         clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
569         uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
570         iss = find_counter(cpu, "UOPS_ISSUED.ANY");
571         recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
572         if (pos != -1) {
573                 br = brctr->vals[pos] * 1.0;
574                 cl = clear->vals[pos] * 1.0;
575                 cl2 = clear2->vals[pos] * 1.0;
576                 cl3 = clear3->vals[pos] * 1.0;
577                 uo = uops->vals[pos] * 1.0;
578                 re = recv->vals[pos] * 1.0;
579                 is = iss->vals[pos] * 1.0;
580                 un = unhalt->vals[pos] * 1.0;
581         } else {
582                 br = brctr->sum * 1.0;
583                 cl = clear->sum * 1.0;
584                 cl2 = clear2->sum * 1.0;
585                 cl3 = clear3->sum * 1.0;
586                 uo = uops->sum * 1.0;
587                 re = recv->sum * 1.0;
588                 is = iss->sum * 1.0;
589                 un = unhalt->sum * 1.0;
590         }
591         res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
592         ret = printf("%1.3f", res);
593         return(ret);
594 }
595
596
597 static int
598 br_mispredict_broad(struct counters *cpu, int pos)
599 {
600         struct counters *brctr;
601         struct counters *unhalt;
602         struct counters *clear;
603         struct counters *uops;
604         struct counters *uops_ret;
605         struct counters *recv;
606         int ret;
607         double br, cl, uo, uo_r, re, con, un, res;
608
609         con = 4.0;
610         
611         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
612         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
613         clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
614         uops = find_counter(cpu, "UOPS_ISSUED.ANY");
615         uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
616         recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
617
618         if (pos != -1) {
619                 un = unhalt->vals[pos] * 1.0;
620                 br = brctr->vals[pos] * 1.0;
621                 cl = clear->vals[pos] * 1.0;
622                 uo = uops->vals[pos] * 1.0;
623                 uo_r = uops_ret->vals[pos] * 1.0;
624                 re = recv->vals[pos] * 1.0;
625         } else {
626                 un = unhalt->sum * 1.0;
627                 br = brctr->sum * 1.0;
628                 cl = clear->sum * 1.0;
629                 uo = uops->sum * 1.0;
630                 uo_r = uops_ret->sum * 1.0;
631                 re = recv->sum * 1.0;
632         }
633         res = br / (br + cl) * (uo - uo_r + con * re) / (un * con);
634         ret = printf("%1.3f", res);
635         return(ret);
636 }
637
638 static int
639 splitloadib(struct counters *cpu, int pos)
640 {
641         int ret;
642         struct counters *mem;
643         struct counters *l1d, *ldblock;
644         struct counters *unhalt;
645         double un, memd, res, l1, ldb;
646         /*  
647          * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
648          * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
649          */
650
651         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
652         mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
653         l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
654         ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
655         if (pos != -1) {
656                 memd = mem->vals[pos] * 1.0;
657                 l1 = l1d->vals[pos] * 1.0;
658                 ldb = ldblock->vals[pos] * 1.0;
659                 un = unhalt->vals[pos] * 1.0;
660         } else {
661                 memd = mem->sum * 1.0;
662                 l1 = l1d->sum * 1.0;
663                 ldb = ldblock->sum * 1.0;
664                 un = unhalt->sum * 1.0;
665         }
666         res = ((l1 / memd) * ldb)/un;
667         ret = printf("%1.3f", res);
668         return(ret);
669 }
670
671
672 static int
673 splitload(struct counters *cpu, int pos)
674 {
675         int ret;
676         struct counters *mem;
677         struct counters *unhalt;
678         double con, un, memd, res;
679 /*  4  - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
680
681         con = 5.0;
682         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
683         mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS");
684         if (pos != -1) {
685                 memd = mem->vals[pos] * 1.0;
686                 un = unhalt->vals[pos] * 1.0;
687         } else {
688                 memd = mem->sum * 1.0;
689                 un = unhalt->sum * 1.0;
690         }
691         res = (memd * con)/un;
692         ret = printf("%1.3f", res);
693         return(ret);
694 }
695
696
697 static int
698 splitload_sb(struct counters *cpu, int pos)
699 {
700         int ret;
701         struct counters *mem;
702         struct counters *unhalt;
703         double con, un, memd, res;
704 /*  4  - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
705
706         con = 5.0;
707         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
708         mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
709         if (pos != -1) {
710                 memd = mem->vals[pos] * 1.0;
711                 un = unhalt->vals[pos] * 1.0;
712         } else {
713                 memd = mem->sum * 1.0;
714                 un = unhalt->sum * 1.0;
715         }
716         res = (memd * con)/un;
717         ret = printf("%1.3f", res);
718         return(ret);
719 }
720
721
722 static int
723 splitstore_sb(struct counters *cpu, int pos)
724 {
725         /*  5  - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
726         int ret;
727         struct counters *mem_split;
728         struct counters *mem_stores;
729         double memsplit, memstore, res;
730         mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
731         mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
732         if (pos != -1) {
733                 memsplit = mem_split->vals[pos] * 1.0;
734                 memstore = mem_stores->vals[pos] * 1.0;
735         } else {
736                 memsplit = mem_split->sum * 1.0;
737                 memstore = mem_stores->sum * 1.0;
738         }
739         res = memsplit/memstore;
740         ret = printf("%1.3f", res);
741         return(ret);
742 }
743
744
745
746 static int
747 splitstore(struct counters *cpu, int pos)
748 {
749         /*  5  - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */
750         int ret;
751         struct counters *mem_split;
752         struct counters *mem_stores;
753         double memsplit, memstore, res;
754         mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES");
755         mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES");
756         if (pos != -1) {
757                 memsplit = mem_split->vals[pos] * 1.0;
758                 memstore = mem_stores->vals[pos] * 1.0;
759         } else {
760                 memsplit = mem_split->sum * 1.0;
761                 memstore = mem_stores->sum * 1.0;
762         }
763         res = memsplit/memstore;
764         ret = printf("%1.3f", res);
765         return(ret);
766 }
767
768
769 static int
770 contested(struct counters *cpu, int pos)
771 {
772         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
773         int ret;
774         struct counters *mem;
775         struct counters *unhalt;
776         double con, un, memd, res;
777
778         con = 60.0;
779         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
780         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
781         if (pos != -1) {
782                 memd = mem->vals[pos] * 1.0;
783                 un = unhalt->vals[pos] * 1.0;
784         } else {
785                 memd = mem->sum * 1.0;
786                 un = unhalt->sum * 1.0;
787         }
788         res = (memd * con)/un;
789         ret = printf("%1.3f", res);
790         return(ret);
791 }
792
793 static int
794 contested_has(struct counters *cpu, int pos)
795 {
796         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
797         int ret;
798         struct counters *mem;
799         struct counters *unhalt;
800         double con, un, memd, res;
801
802         con = 84.0;
803         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
804         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
805         if (pos != -1) {
806                 memd = mem->vals[pos] * 1.0;
807                 un = unhalt->vals[pos] * 1.0;
808         } else {
809                 memd = mem->sum * 1.0;
810                 un = unhalt->sum * 1.0;
811         }
812         res = (memd * con)/un;
813         ret = printf("%1.3f", res);
814         return(ret);
815 }
816
817 static int
818 contestedbroad(struct counters *cpu, int pos)
819 {
820         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
821         int ret;
822         struct counters *mem;
823         struct counters *mem2;
824         struct counters *unhalt;
825         double con, un, memd, memtoo, res;
826
827         con = 84.0;
828         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
829         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
830         mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS");
831
832         if (pos != -1) {
833                 memd = mem->vals[pos] * 1.0;
834                 memtoo = mem2->vals[pos] * 1.0;
835                 un = unhalt->vals[pos] * 1.0;
836         } else {
837                 memd = mem->sum * 1.0;
838                 memtoo = mem2->sum * 1.0;
839                 un = unhalt->sum * 1.0;
840         }
841         res = ((memd * con) + memtoo)/un;
842         ret = printf("%1.3f", res);
843         return(ret);
844 }
845
846
847 static int
848 blockstoreforward(struct counters *cpu, int pos)
849 {
850         /*  7  - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
851         int ret;
852         struct counters *ldb;
853         struct counters *unhalt;
854         double con, un, ld, res;
855
856         con = 13.0;
857         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
858         ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
859         if (pos != -1) {
860                 ld = ldb->vals[pos] * 1.0;
861                 un = unhalt->vals[pos] * 1.0;
862         } else {
863                 ld = ldb->sum * 1.0;
864                 un = unhalt->sum * 1.0;
865         }
866         res = (ld * con)/un;
867         ret = printf("%1.3f", res);
868         return(ret);
869 }
870
871 static int
872 cache2(struct counters *cpu, int pos)
873 {
874         /* ** Suspect ***
875          *  8  - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
876          *        (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
877          */
878         int ret;
879         struct counters *mem1, *mem2, *mem3;
880         struct counters *unhalt;
881         double con1, con2, con3, un, me_1, me_2, me_3, res;
882
883         con1 = 26.0;
884         con2 = 43.0;
885         con3 = 60.0;
886         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
887 /* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
888         mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
889         mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
890         mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
891         if (pos != -1) {
892                 me_1 = mem1->vals[pos] * 1.0;
893                 me_2 = mem2->vals[pos] * 1.0;
894                 me_3 = mem3->vals[pos] * 1.0;
895                 un = unhalt->vals[pos] * 1.0;
896         } else {
897                 me_1 = mem1->sum * 1.0;
898                 me_2 = mem2->sum * 1.0;
899                 me_3 = mem3->sum * 1.0;
900                 un = unhalt->sum * 1.0;
901         }
902         res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
903         ret = printf("%1.3f", res);
904         return(ret);
905 }
906
907 static int
908 datasharing(struct counters *cpu, int pos)
909 {
910         /* 
911          * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
912          */
913         int ret;
914         struct counters *mem;
915         struct counters *unhalt;
916         double con, res, me, un;
917
918         con = 43.0;
919         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
920         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
921         if (pos != -1) {
922                 me = mem->vals[pos] * 1.0;
923                 un = unhalt->vals[pos] * 1.0;
924         } else {
925                 me = mem->sum * 1.0;
926                 un = unhalt->sum * 1.0;
927         }
928         res = (me * con)/un;
929         ret = printf("%1.3f", res);
930         return(ret);
931
932 }
933
934
935 static int
936 datasharing_has(struct counters *cpu, int pos)
937 {
938         /* 
939          * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
940          */
941         int ret;
942         struct counters *mem;
943         struct counters *unhalt;
944         double con, res, me, un;
945
946         con = 72.0;
947         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
948         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
949         if (pos != -1) {
950                 me = mem->vals[pos] * 1.0;
951                 un = unhalt->vals[pos] * 1.0;
952         } else {
953                 me = mem->sum * 1.0;
954                 un = unhalt->sum * 1.0;
955         }
956         res = (me * con)/un;
957         ret = printf("%1.3f", res);
958         return(ret);
959
960 }
961
962
963 static int
964 cache2ib(struct counters *cpu, int pos)
965 {
966         /*
967          *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
968          */
969         int ret;
970         struct counters *mem;
971         struct counters *unhalt;
972         double con, un, me, res;
973
974         con = 29.0;
975         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
976         mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
977         if (pos != -1) {
978                 me = mem->vals[pos] * 1.0;
979                 un = unhalt->vals[pos] * 1.0;
980         } else {
981                 me = mem->sum * 1.0;
982                 un = unhalt->sum * 1.0;
983         }
984         res = (con * me)/un; 
985         ret = printf("%1.3f", res);
986         return(ret);
987 }
988
989 static int
990 cache2has(struct counters *cpu, int pos)
991 {
992         /*
993          * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
994          *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
995          *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
996          *           / CPU_CLK_UNHALTED.THREAD_P
997          */
998         int ret;
999         struct counters *mem1, *mem2, *mem3;
1000         struct counters *unhalt;
1001         double con1, con2, con3, un, me1, me2, me3, res;
1002
1003         con1 = 36.0;
1004         con2 = 72.0;
1005         con3 = 84.0;
1006         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1007         mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
1008         mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
1009         mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
1010         if (pos != -1) {
1011                 me1 = mem1->vals[pos] * 1.0;
1012                 me2 = mem2->vals[pos] * 1.0;
1013                 me3 = mem3->vals[pos] * 1.0;
1014                 un = unhalt->vals[pos] * 1.0;
1015         } else {
1016                 me1 = mem1->sum * 1.0;
1017                 me2 = mem2->sum * 1.0;
1018                 me3 = mem3->sum * 1.0;
1019                 un = unhalt->sum * 1.0;
1020         }
1021         res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
1022         ret = printf("%1.3f", res);
1023         return(ret);
1024 }
1025
1026
1027 static int
1028 cache2broad(struct counters *cpu, int pos)
1029 {
1030         /*
1031          *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
1032          */
1033         int ret;
1034         struct counters *mem;
1035         struct counters *unhalt;
1036         double con, un, me, res;
1037
1038         con = 36.0;
1039         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1040         mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT");
1041         if (pos != -1) {
1042                 me = mem->vals[pos] * 1.0;
1043                 un = unhalt->vals[pos] * 1.0;
1044         } else {
1045                 me = mem->sum * 1.0;
1046                 un = unhalt->sum * 1.0;
1047         }
1048         res = (con * me)/un; 
1049         ret = printf("%1.3f", res);
1050         return(ret);
1051 }
1052
1053
1054 static int
1055 cache1(struct counters *cpu, int pos)
1056 {
1057         /*  9  - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1058         int ret;
1059         struct counters *mem;
1060         struct counters *unhalt;
1061         double con, un, me, res;
1062
1063         con = 180.0;
1064         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1065         mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
1066         if (pos != -1) {
1067                 me = mem->vals[pos] * 1.0;
1068                 un = unhalt->vals[pos] * 1.0;
1069         } else {
1070                 me = mem->sum * 1.0;
1071                 un = unhalt->sum * 1.0;
1072         }
1073         res = (me * con)/un;
1074         ret = printf("%1.3f", res);
1075         return(ret);
1076 }
1077
1078 static int
1079 cache1ib(struct counters *cpu, int pos)
1080 {
1081         /*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1082         int ret;
1083         struct counters *mem;
1084         struct counters *unhalt;
1085         double con, un, me, res;
1086
1087         con = 180.0;
1088         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1089         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
1090         if (pos != -1) {
1091                 me = mem->vals[pos] * 1.0;
1092                 un = unhalt->vals[pos] * 1.0;
1093         } else {
1094                 me = mem->sum * 1.0;
1095                 un = unhalt->sum * 1.0;
1096         }
1097         res = (me * con)/un;
1098         ret = printf("%1.3f", res);
1099         return(ret);
1100 }
1101
1102
1103 static int
1104 cache1broad(struct counters *cpu, int pos)
1105 {
1106         /*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1107         int ret;
1108         struct counters *mem;
1109         struct counters *unhalt;
1110         double con, un, me, res;
1111
1112         con = 180.0;
1113         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1114         mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS");
1115         if (pos != -1) {
1116                 me = mem->vals[pos] * 1.0;
1117                 un = unhalt->vals[pos] * 1.0;
1118         } else {
1119                 me = mem->sum * 1.0;
1120                 un = unhalt->sum * 1.0;
1121         }
1122         res = (me * con)/un;
1123         ret = printf("%1.3f", res);
1124         return(ret);
1125 }
1126
1127
1128 static int
1129 dtlb_missload(struct counters *cpu, int pos)
1130 {
1131         /* 10  - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
1132         int ret;
1133         struct counters *dtlb_m, *dtlb_d;
1134         struct counters *unhalt;
1135         double con, un, d1, d2, res;
1136
1137         con = 7.0;
1138         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1139         dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
1140         dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
1141         if (pos != -1) {
1142                 d1 = dtlb_m->vals[pos] * 1.0;
1143                 d2 = dtlb_d->vals[pos] * 1.0;
1144                 un = unhalt->vals[pos] * 1.0;
1145         } else {
1146                 d1 = dtlb_m->sum * 1.0;
1147                 d2 = dtlb_d->sum * 1.0;
1148                 un = unhalt->sum * 1.0;
1149         }
1150         res = ((d1 * con) + d2)/un;
1151         ret = printf("%1.3f", res);
1152         return(ret);
1153 }
1154
1155 static int
1156 dtlb_missstore(struct counters *cpu, int pos)
1157 {
1158         /* 
1159          * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) / 
1160          * CPU_CLK_UNHALTED.THREAD_P (t >= .1) 
1161          */
1162         int ret;
1163         struct counters *dtsb_m, *dtsb_d;
1164         struct counters *unhalt;
1165         double con, un, d1, d2, res;
1166
1167         con = 7.0;
1168         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1169         dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
1170         dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
1171         if (pos != -1) {
1172                 d1 = dtsb_m->vals[pos] * 1.0;
1173                 d2 = dtsb_d->vals[pos] * 1.0;
1174                 un = unhalt->vals[pos] * 1.0;
1175         } else {
1176                 d1 = dtsb_m->sum * 1.0;
1177                 d2 = dtsb_d->sum * 1.0;
1178                 un = unhalt->sum * 1.0;
1179         }
1180         res = ((d1 * con) + d2)/un;
1181         ret = printf("%1.3f", res);
1182         return(ret);
1183 }
1184
1185 static int
1186 itlb_miss(struct counters *cpu, int pos)
1187 {
1188         /* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P  IB */
1189         int ret;
1190         struct counters *itlb;
1191         struct counters *unhalt;
1192         double un, d1, res;
1193
1194         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1195         itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1196         if (pos != -1) {
1197                 d1 = itlb->vals[pos] * 1.0;
1198                 un = unhalt->vals[pos] * 1.0;
1199         } else {
1200                 d1 = itlb->sum * 1.0;
1201                 un = unhalt->sum * 1.0;
1202         }
1203         res = d1/un;
1204         ret = printf("%1.3f", res);
1205         return(ret);
1206 }
1207
1208
1209 static int
1210 itlb_miss_broad(struct counters *cpu, int pos)
1211 {
1212         /* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P   */
1213         int ret;
1214         struct counters *itlb;
1215         struct counters *unhalt;
1216         struct counters *four_k;
1217         double un, d1, res, k;
1218
1219         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1220         itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1221         four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K");
1222         if (pos != -1) {
1223                 d1 = itlb->vals[pos] * 1.0;
1224                 un = unhalt->vals[pos] * 1.0;
1225                 k = four_k->vals[pos] * 1.0;
1226         } else {
1227                 d1 = itlb->sum * 1.0;
1228                 un = unhalt->sum * 1.0;
1229                 k = four_k->sum * 1.0;
1230         }
1231         res = (7.0 * k + d1)/un;
1232         ret = printf("%1.3f", res);
1233         return(ret);
1234 }
1235
1236
1237 static int
1238 icache_miss(struct counters *cpu, int pos)
1239 {
1240         /* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
1241
1242         int ret;
1243         struct counters *itlb, *icache;
1244         struct counters *unhalt;
1245         double un, d1, ic, res;
1246
1247         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1248         itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1249         icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
1250         if (pos != -1) {
1251                 d1 = itlb->vals[pos] * 1.0;
1252                 ic = icache->vals[pos] * 1.0;
1253                 un = unhalt->vals[pos] * 1.0;
1254         } else {
1255                 d1 = itlb->sum * 1.0;
1256                 ic = icache->sum * 1.0;
1257                 un = unhalt->sum * 1.0;
1258         }
1259         res = (ic-d1)/un;
1260         ret = printf("%1.3f", res);
1261         return(ret);
1262
1263 }
1264
1265 static int
1266 icache_miss_has(struct counters *cpu, int pos)
1267 {
1268         /* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */
1269
1270         int ret;
1271         struct counters *icache;
1272         struct counters *unhalt;
1273         double un, con, ic, res;
1274
1275         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1276         icache = find_counter(cpu, "ICACHE.MISSES");
1277         con = 36.0;
1278         if (pos != -1) {
1279                 ic = icache->vals[pos] * 1.0;
1280                 un = unhalt->vals[pos] * 1.0;
1281         } else {
1282                 ic = icache->sum * 1.0;
1283                 un = unhalt->sum * 1.0;
1284         }
1285         res = (con * ic)/un;
1286         ret = printf("%1.3f", res);
1287         return(ret);
1288
1289 }
1290
1291 static int
1292 lcp_stall(struct counters *cpu, int pos)
1293 {
1294          /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
1295         int ret;
1296         struct counters *ild;
1297         struct counters *unhalt;
1298         double un, d1, res;
1299
1300         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1301         ild = find_counter(cpu, "ILD_STALL.LCP");
1302         if (pos != -1) {
1303                 d1 = ild->vals[pos] * 1.0;
1304                 un = unhalt->vals[pos] * 1.0;
1305         } else {
1306                 d1 = ild->sum * 1.0;
1307                 un = unhalt->sum * 1.0;
1308         }
1309         res = d1/un;
1310         ret = printf("%1.3f", res);
1311         return(ret);
1312
1313 }
1314
1315
1316 static int
1317 frontendstall(struct counters *cpu, int pos)
1318 {
1319       /* 12  -  IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
1320         int ret;
1321         struct counters *idq;
1322         struct counters *unhalt;
1323         double con, un, id, res;
1324
1325         con = 4.0;
1326         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1327         idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
1328         if (pos != -1) {
1329                 id = idq->vals[pos] * 1.0;
1330                 un = unhalt->vals[pos] * 1.0;
1331         } else {
1332                 id = idq->sum * 1.0;
1333                 un = unhalt->sum * 1.0;
1334         }
1335         res = id/(un * con);
1336         ret = printf("%1.3f", res);
1337         return(ret);
1338 }
1339
1340 static int
1341 clears(struct counters *cpu, int pos)
1342 {
1343         /* 13  - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )  
1344          *         / CPU_CLK_UNHALTED.THREAD_P (thresh  >= .02)*/
1345         
1346         int ret;
1347         struct counters *clr1, *clr2, *clr3;
1348         struct counters *unhalt;
1349         double con, un, cl1, cl2, cl3, res;
1350
1351         con = 100.0;
1352         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1353         clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1354         clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1355         clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1356         
1357         if (pos != -1) {
1358                 cl1 = clr1->vals[pos] * 1.0;
1359                 cl2 = clr2->vals[pos] * 1.0;
1360                 cl3 = clr3->vals[pos] * 1.0;
1361                 un = unhalt->vals[pos] * 1.0;
1362         } else {
1363                 cl1 = clr1->sum * 1.0;
1364                 cl2 = clr2->sum * 1.0;
1365                 cl3 = clr3->sum * 1.0;
1366                 un = unhalt->sum * 1.0;
1367         }
1368         res = ((cl1 + cl2 + cl3) * con)/un;
1369         ret = printf("%1.3f", res);
1370         return(ret);
1371 }
1372
1373
1374
1375 static int
1376 clears_broad(struct counters *cpu, int pos)
1377 {
1378         int ret;
1379         struct counters *clr1, *clr2, *clr3, *cyc;
1380         struct counters *unhalt;
1381         double con, un, cl1, cl2, cl3, cy, res;
1382
1383         con = 100.0;
1384         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1385         clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1386         clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1387         clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1388         cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
1389         if (pos != -1) {
1390                 cl1 = clr1->vals[pos] * 1.0;
1391                 cl2 = clr2->vals[pos] * 1.0;
1392                 cl3 = clr3->vals[pos] * 1.0;
1393                 cy = cyc->vals[pos] * 1.0;
1394                 un = unhalt->vals[pos] * 1.0;
1395         } else {
1396                 cl1 = clr1->sum * 1.0;
1397                 cl2 = clr2->sum * 1.0;
1398                 cl3 = clr3->sum * 1.0;
1399                 cy = cyc->sum * 1.0;
1400                 un = unhalt->sum * 1.0;
1401         }
1402         /* Formula not listed but extrapulated to add the cy ?? */
1403         res = ((cl1 + cl2 + cl3 + cy) * con)/un;
1404         ret = printf("%1.3f", res);
1405         return(ret);
1406 }
1407
1408
1409
1410
1411
1412 static int
1413 microassist(struct counters *cpu, int pos)
1414 {
1415         /* 14  - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
1416         int ret;
1417         struct counters *idq;
1418         struct counters *unhalt;
1419         double un, id, res, con;
1420
1421         con = 4.0;
1422         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1423         idq = find_counter(cpu, "IDQ.MS_UOPS");
1424         if (pos != -1) {
1425                 id = idq->vals[pos] * 1.0;
1426                 un = unhalt->vals[pos] * 1.0;
1427         } else {
1428                 id = idq->sum * 1.0;
1429                 un = unhalt->sum * 1.0;
1430         }
1431         res = id/(un * con);
1432         ret = printf("%1.3f", res);
1433         return(ret);
1434 }
1435
1436
1437 static int
1438 microassist_broad(struct counters *cpu, int pos)
1439 {
1440         int ret;
1441         struct counters *idq;
1442         struct counters *unhalt;
1443         struct counters *uopiss;
1444         struct counters *uopret;
1445         double un, id, res, con, uoi, uor;
1446
1447         con = 4.0;
1448         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1449         idq = find_counter(cpu, "IDQ.MS_UOPS");
1450         uopiss = find_counter(cpu, "UOPS_ISSUED.ANY");
1451         uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1452         if (pos != -1) {
1453                 id = idq->vals[pos] * 1.0;
1454                 un = unhalt->vals[pos] * 1.0;
1455                 uoi = uopiss->vals[pos] * 1.0;
1456                 uor = uopret->vals[pos] * 1.0;
1457         } else {
1458                 id = idq->sum * 1.0;
1459                 un = unhalt->sum * 1.0;
1460                 uoi = uopiss->sum * 1.0;
1461                 uor = uopret->sum * 1.0;
1462         }
1463         res = (uor/uoi) * (id/(un * con));
1464         ret = printf("%1.3f", res);
1465         return(ret);
1466 }
1467
1468
1469 static int
1470 aliasing(struct counters *cpu, int pos)
1471 {
1472         /* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1473         int ret;        
1474         struct counters *ld;
1475         struct counters *unhalt;
1476         double un, lds, con, res;
1477
1478         con = 5.0;
1479         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1480         ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1481         if (pos != -1) {
1482                 lds = ld->vals[pos] * 1.0;
1483                 un = unhalt->vals[pos] * 1.0;
1484         } else {
1485                 lds = ld->sum * 1.0;
1486                 un = unhalt->sum * 1.0;
1487         }
1488         res = (lds * con)/un;
1489         ret = printf("%1.3f", res);
1490         return(ret);
1491 }
1492
1493 static int
1494 aliasing_broad(struct counters *cpu, int pos)
1495 {
1496         /* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1497         int ret;        
1498         struct counters *ld;
1499         struct counters *unhalt;
1500         double un, lds, con, res;
1501
1502         con = 7.0;
1503         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1504         ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1505         if (pos != -1) {
1506                 lds = ld->vals[pos] * 1.0;
1507                 un = unhalt->vals[pos] * 1.0;
1508         } else {
1509                 lds = ld->sum * 1.0;
1510                 un = unhalt->sum * 1.0;
1511         }
1512         res = (lds * con)/un;
1513         ret = printf("%1.3f", res);
1514         return(ret);
1515 }
1516
1517
1518 static int
1519 fpassists(struct counters *cpu, int pos)
1520 {
1521         /* 16  - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
1522         int ret;        
1523         struct counters *fp;
1524         struct counters *inst;
1525         double un, fpd, res;
1526
1527         inst = find_counter(cpu, "INST_RETIRED.ANY_P");
1528         fp = find_counter(cpu, "FP_ASSIST.ANY");
1529         if (pos != -1) {
1530                 fpd = fp->vals[pos] * 1.0;
1531                 un = inst->vals[pos] * 1.0;
1532         } else {
1533                 fpd = fp->sum * 1.0;
1534                 un = inst->sum * 1.0;
1535         }
1536         res = fpd/un;
1537         ret = printf("%1.3f", res);
1538         return(ret);
1539 }
1540
1541 static int
1542 otherassistavx(struct counters *cpu, int pos)
1543 {
1544         /* 17  - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh  .1*/
1545         int ret;        
1546         struct counters *oth;
1547         struct counters *unhalt;
1548         double un, ot, con, res;
1549
1550         con = 75.0;
1551         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1552         oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
1553         if (pos != -1) {
1554                 ot = oth->vals[pos] * 1.0;
1555                 un = unhalt->vals[pos] * 1.0;
1556         } else {
1557                 ot = oth->sum * 1.0;
1558                 un = unhalt->sum * 1.0;
1559         }
1560         res = (ot * con)/un;
1561         ret = printf("%1.3f", res);
1562         return(ret);
1563 }
1564
1565 static int
1566 otherassistsse(struct counters *cpu, int pos)
1567 {
1568
1569         int ret;        
1570         struct counters *oth;
1571         struct counters *unhalt;
1572         double un, ot, con, res;
1573
1574         /* 18     (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P  thresh .1*/
1575         con = 75.0;
1576         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1577         oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
1578         if (pos != -1) {
1579                 ot = oth->vals[pos] * 1.0;
1580                 un = unhalt->vals[pos] * 1.0;
1581         } else {
1582                 ot = oth->sum * 1.0;
1583                 un = unhalt->sum * 1.0;
1584         }
1585         res = (ot * con)/un;
1586         ret = printf("%1.3f", res);
1587         return(ret);
1588 }
1589
1590 static int
1591 efficiency1(struct counters *cpu, int pos)
1592 {
1593
1594         int ret;        
1595         struct counters *uops;
1596         struct counters *unhalt;
1597         double un, ot, con, res;
1598
1599         /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
1600         con = 4.0;
1601         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1602         uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1603         if (pos != -1) {
1604                 ot = uops->vals[pos] * 1.0;
1605                 un = unhalt->vals[pos] * 1.0;
1606         } else {
1607                 ot = uops->sum * 1.0;
1608                 un = unhalt->sum * 1.0;
1609         }
1610         res = ot/(con * un);
1611         ret = printf("%1.3f", res);
1612         return(ret);
1613 }
1614
1615 static int
1616 efficiency2(struct counters *cpu, int pos)
1617 {
1618
1619         int ret;        
1620         struct counters *uops;
1621         struct counters *unhalt;
1622         double un, ot, res;
1623
1624         /* 20  - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
1625         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1626         uops = find_counter(cpu, "INST_RETIRED.ANY_P");
1627         if (pos != -1) {
1628                 ot = uops->vals[pos] * 1.0;
1629                 un = unhalt->vals[pos] * 1.0;
1630         } else {
1631                 ot = uops->sum * 1.0;
1632                 un = unhalt->sum * 1.0;
1633         }
1634         res = un/ot;
1635         ret = printf("%1.3f", res);
1636         return(ret);
1637 }
1638
1639 #define SANDY_BRIDGE_COUNT 20   
1640 static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
1641 /*01*/  { "allocstall1", "thresh > .05", 
1642           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
1643           allocstall1, 2 },
1644 /* -- not defined for SB right (partial-rat_stalls) 02*/
1645         { "allocstall2", "thresh > .05", 
1646           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1",
1647           allocstall2, 2 },
1648 /*03*/  { "br_miss", "thresh >= .2", 
1649           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1650           br_mispredict, 2 },
1651 /*04*/  { "splitload", "thresh >= .1", 
1652           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1653           splitload_sb, 2 },
1654 /* 05*/ { "splitstore", "thresh >= .01", 
1655           "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1656           splitstore_sb, 2 },
1657 /*06*/  { "contested", "thresh >= .05", 
1658           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1659           contested, 2 },
1660 /*07*/  { "blockstorefwd", "thresh >= .05", 
1661           "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1662           blockstoreforward, 2 },
1663 /*08*/  { "cache2", "thresh >= .2", 
1664           "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1665           cache2, 4 },
1666 /*09*/  { "cache1", "thresh >= .2", 
1667           "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1668           cache1, 2 },
1669 /*10*/  { "dtlbmissload", "thresh >= .1", 
1670           "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1671           dtlb_missload, 3 },
1672 /*11*/  { "dtlbmissstore", "thresh >= .05", 
1673           "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1674           dtlb_missstore, 3 },
1675 /*12*/  { "frontendstall", "thresh >= .15", 
1676           "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1677           frontendstall, 2 },
1678 /*13*/  { "clears", "thresh >= .02", 
1679           "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1680           clears, 4 },
1681 /*14*/  { "microassist", "thresh >= .05", 
1682           "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1683           microassist, 2 },
1684 /*15*/  { "aliasing_4k", "thresh >= .1", 
1685           "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1686           aliasing, 2 },
1687 /*16*/  { "fpassist", "look for a excessive value", 
1688           "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1689           fpassists, 2 },
1690 /*17*/  { "otherassistavx", "look for a excessive value", 
1691           "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1692           otherassistavx, 2},
1693 /*18*/  { "otherassistsse", "look for a excessive value", 
1694           "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1695           otherassistsse, 2 },
1696 /*19*/  { "eff1", "thresh < .9", 
1697           "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1698           efficiency1, 2 },
1699 /*20*/  { "eff2", "thresh > 1.0", 
1700           "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1701           efficiency2, 2 },
1702 };
1703
1704
1705 #define IVY_BRIDGE_COUNT 21
1706 static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
1707 /*1*/   { "eff1", "thresh < .75", 
1708           "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1709           efficiency1, 2 },
1710 /*2*/   { "eff2", "thresh > 1.0", 
1711           "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1712           efficiency2, 2 },
1713 /*3*/   { "itlbmiss", "thresh > .05", 
1714           "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1715           itlb_miss, 2 },
1716 /*4*/   { "icachemiss", "thresh > .05", 
1717           "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1718           icache_miss, 3 },
1719 /*5*/   { "lcpstall", "thresh > .05", 
1720           "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1721           lcp_stall, 2 },
1722 /*6*/   { "cache1", "thresh >= .2", 
1723           "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1724           cache1ib, 2 },
1725 /*7*/   { "cache2", "thresh >= .2", 
1726           "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1727           cache2ib, 2 },
1728 /*8*/   { "contested", "thresh >= .05", 
1729           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1730           contested, 2 },
1731 /*9*/   { "datashare", "thresh >= .05",
1732           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1733           datasharing, 2 },
1734 /*10*/  { "blockstorefwd", "thresh >= .05", 
1735           "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1736           blockstoreforward, 2 },
1737 /*11*/  { "splitload", "thresh >= .1", 
1738           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
1739           splitloadib, 4 },
1740 /*12*/  { "splitstore", "thresh >= .01", 
1741           "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1742           splitstore, 2 },
1743 /*13*/  { "aliasing_4k", "thresh >= .1", 
1744           "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1745           aliasing, 2 },
1746 /*14*/  { "dtlbmissload", "thresh >= .1", 
1747           "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1748           dtlb_missload , 3},
1749 /*15*/  { "dtlbmissstore", "thresh >= .05", 
1750           "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1751           dtlb_missstore, 3 },
1752 /*16*/  { "br_miss", "thresh >= .2", 
1753           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1754           br_mispredictib, 8 },
1755 /*17*/  { "clears", "thresh >= .02", 
1756           "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1757           clears, 4 },
1758 /*18*/  { "microassist", "thresh >= .05", 
1759           "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1760           microassist, 2 },
1761 /*19*/  { "fpassist", "look for a excessive value", 
1762           "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1763           fpassists, 2 },
1764 /*20*/  { "otherassistavx", "look for a excessive value", 
1765           "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1766           otherassistavx , 2},
1767 /*21*/  { "otherassistsse", "look for a excessive value", 
1768           "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1769           otherassistsse, 2 },
1770 };
1771
1772 #define HASWELL_COUNT 20
1773 static struct cpu_entry haswell[HASWELL_COUNT] = {
1774 /*1*/   { "eff1", "thresh < .75", 
1775           "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1776           efficiency1, 2 },
1777 /*2*/   { "eff2", "thresh > 1.0", 
1778           "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1779           efficiency2, 2 },
1780 /*3*/   { "itlbmiss", "thresh > .05", 
1781           "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1782           itlb_miss, 2 },
1783 /*4*/   { "icachemiss", "thresh > .05", 
1784           "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1785           icache_miss_has, 2 },
1786 /*5*/   { "lcpstall", "thresh > .05", 
1787           "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1788           lcp_stall, 2 },
1789 /*6*/   { "cache1", "thresh >= .2", 
1790           "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1791           cache1ib, 2 },
1792 /*7*/   { "cache2", "thresh >= .2", 
1793           "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1794           cache2has, 4 },
1795 /*8*/   { "contested", "thresh >= .05", 
1796           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1797           contested_has, 2 },
1798 /*9*/   { "datashare", "thresh >= .05",
1799           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1800           datasharing_has, 2 },
1801 /*10*/  { "blockstorefwd", "thresh >= .05", 
1802           "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1803           blockstoreforward, 2 },
1804 /*11*/  { "splitload", "thresh >= .1", 
1805           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1",
1806           splitload , 2},
1807 /*12*/  { "splitstore", "thresh >= .01", 
1808           "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1809           splitstore, 2 },
1810 /*13*/  { "aliasing_4k", "thresh >= .1", 
1811           "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1812           aliasing, 2 },
1813 /*14*/  { "dtlbmissload", "thresh >= .1", 
1814           "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1815           dtlb_missload, 3 },
1816 /*15*/  { "br_miss", "thresh >= .2", 
1817           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1818           br_mispredict, 2 },
1819 /*16*/  { "clears", "thresh >= .02", 
1820           "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1821           clears, 4 },
1822 /*17*/  { "microassist", "thresh >= .05", 
1823           "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1824           microassist, 2 },
1825 /*18*/  { "fpassist", "look for a excessive value", 
1826           "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1827           fpassists, 2 },
1828 /*19*/  { "otherassistavx", "look for a excessive value", 
1829           "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1830           otherassistavx, 2 },
1831 /*20*/  { "otherassistsse", "look for a excessive value", 
1832           "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1833           otherassistsse, 2 },
1834 };
1835
1836
1837 static void
1838 explain_name_broad(const char *name)
1839 {
1840         const char *mythresh;
1841         if (strcmp(name, "eff1") == 0) {
1842                 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
1843                 mythresh = "thresh < .75";
1844         } else if (strcmp(name, "eff2") == 0) {
1845                 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
1846                 mythresh = "thresh > 1.0";
1847         } else if (strcmp(name, "itlbmiss") == 0) {
1848                 printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
1849                 mythresh = "thresh > .05"; 
1850         } else if (strcmp(name, "icachemiss") == 0) {
1851                 printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n");
1852                 mythresh = "thresh > .05";
1853         } else if (strcmp(name, "lcpstall") == 0) {
1854                 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
1855                 mythresh = "thresh > .05";
1856         } else if (strcmp(name, "cache1") == 0) {
1857                 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
1858                 mythresh = "thresh >= .1";
1859         } else if (strcmp(name, "cache2") == 0) {
1860                 printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n");
1861                 mythresh = "thresh >= .2";
1862         } else if (strcmp(name, "contested") == 0) {
1863                 printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) +  MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n");
1864                 mythresh = "thresh >= .05";
1865         } else if (strcmp(name, "datashare") == 0) {
1866                 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
1867                 mythresh = "thresh > .05";
1868         } else if (strcmp(name, "blockstorefwd") == 0) {
1869                 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
1870                 mythresh = "thresh >= .05";
1871         } else if (strcmp(name, "aliasing_4k") == 0) {
1872                 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n");
1873                 mythresh = "thresh >= .1";
1874         } else if (strcmp(name, "dtlbmissload") == 0) {
1875                 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
1876                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
1877                 mythresh = "thresh >= .1";
1878
1879         } else if (strcmp(name, "br_miss") == 0) {
1880                 printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n");
1881                 printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n");
1882                 printf("CPU_CLK_UNHALTED.THREAD * 4)\n");
1883                 mythresh = "thresh >= .2";
1884         } else if (strcmp(name, "clears") == 0) {
1885                 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
1886                 printf("          MACHINE_CLEARS.SMC + \n");
1887                 printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
1888                 mythresh = "thresh >= .02";
1889         } else if (strcmp(name, "fpassist") == 0) {
1890                 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
1891                 mythresh = "look for a excessive value";
1892         } else if (strcmp(name, "otherassistavx") == 0) {
1893                 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
1894                 mythresh = "look for a excessive value";
1895         } else if (strcmp(name, "microassist") == 0) {
1896                 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
1897                 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
1898                 mythresh = "thresh >= .05";
1899         } else {
1900                 printf("Unknown name:%s\n", name);
1901                 mythresh = "unknown entry";
1902         }
1903         printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
1904 }
1905
1906
1907 #define BROADWELL_COUNT 17
1908 static struct cpu_entry broadwell[BROADWELL_COUNT] = {
1909 /*1*/   { "eff1", "thresh < .75", 
1910           "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1911           efficiency1, 2 }, 
1912 /*2*/   { "eff2", "thresh > 1.0", 
1913           "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1914           efficiency2, 2 },
1915 /*3*/   { "itlbmiss", "thresh > .05", 
1916           "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1",
1917           itlb_miss_broad, 3 },
1918 /*4*/   { "icachemiss", "thresh > .05", 
1919           "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1920           icache_miss_has, 2 },
1921 /*5*/   { "lcpstall", "thresh > .05", 
1922           "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1923           lcp_stall, 2 },
1924 /*6*/   { "cache1", "thresh >= .1", 
1925           "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1926           cache1broad, 2 },
1927 /*7*/   { "cache2", "thresh >= .2", 
1928           "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1929           cache2broad, 2 },
1930 /*8*/   { "contested", "thresh >= .05", 
1931           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1",
1932           contestedbroad, 2 },
1933 /*9*/   { "datashare", "thresh >= .05",
1934           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1935           datasharing_has, 2 },
1936 /*10*/  { "blockstorefwd", "thresh >= .05", 
1937           "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1938           blockstoreforward, 2 },
1939 /*11*/  { "aliasing_4k", "thresh >= .1", 
1940           "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1941           aliasing_broad, 2 }, 
1942 /*12*/  { "dtlbmissload", "thresh >= .1", 
1943           "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1944           dtlb_missload, 3 },
1945 /*13*/  { "br_miss", "thresh >= .2", 
1946           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1947           br_mispredict_broad, 7 },
1948 /*14*/  { "clears", "thresh >= .02", 
1949           "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1950           clears_broad, 5 },
1951 /*15*/  { "fpassist", "look for a excessive value", 
1952           "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1953           fpassists, 2 },
1954 /*16*/  { "otherassistavx", "look for a excessive value", 
1955           "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1956           otherassistavx, 2 },
1957 /*17*/  { "microassist", "thresh >= .2", 
1958           "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS  -w 1",
1959           microassist_broad, 4 },
1960 };
1961
1962
1963 static void
1964 set_sandybridge(void)
1965 {
1966         strcpy(the_cpu.cputype, "SandyBridge PMC");
1967         the_cpu.number = SANDY_BRIDGE_COUNT;
1968         the_cpu.ents = sandy_bridge;
1969         the_cpu.explain = explain_name_sb;
1970 }
1971
1972 static void
1973 set_ivybridge(void)
1974 {
1975         strcpy(the_cpu.cputype, "IvyBridge PMC");
1976         the_cpu.number = IVY_BRIDGE_COUNT;
1977         the_cpu.ents = ivy_bridge;
1978         the_cpu.explain = explain_name_ib;
1979 }
1980
1981
1982 static void
1983 set_haswell(void)
1984 {
1985         strcpy(the_cpu.cputype, "HASWELL PMC");
1986         the_cpu.number = HASWELL_COUNT;
1987         the_cpu.ents = haswell;
1988         the_cpu.explain = explain_name_has;
1989 }
1990
1991
1992 static void
1993 set_broadwell(void)
1994 {
1995         strcpy(the_cpu.cputype, "HASWELL PMC");
1996         the_cpu.number = BROADWELL_COUNT;
1997         the_cpu.ents = broadwell;
1998         the_cpu.explain = explain_name_broad;
1999 }
2000
2001
2002 static int
2003 set_expression(const char *name)
2004 {
2005         int found = 0, i;
2006         for(i=0 ; i< the_cpu.number; i++) {
2007                 if (strcmp(name, the_cpu.ents[i].name) == 0) {
2008                         found = 1;
2009                         expression = the_cpu.ents[i].func;
2010                         command = the_cpu.ents[i].command;
2011                         threshold = the_cpu.ents[i].thresh;
2012                         if  (the_cpu.ents[i].counters_required > max_pmc_counters) {
2013                                 printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n",
2014                                        the_cpu.ents[i].name,
2015                                        the_cpu.ents[i].counters_required, max_pmc_counters);
2016                                 printf("Sorry this test can not be run\n");
2017                                 if (run_all == 0) {
2018                                         exit(-1);
2019                                 } else {
2020                                         return(-1);
2021                                 }
2022                         }
2023                         break;
2024                 }
2025         }
2026         if (!found) {
2027                 printf("For CPU type %s we have no expression:%s\n",
2028                        the_cpu.cputype, name);
2029                 exit(-1);
2030         }
2031         return(0);
2032 }
2033
2034
2035
2036
2037
2038 static int
2039 validate_expression(char *name) 
2040 {
2041         int i, found;
2042
2043         found = 0;
2044         for(i=0 ; i< the_cpu.number; i++) {
2045                 if (strcmp(name, the_cpu.ents[i].name) == 0) {
2046                         found = 1;
2047                         break;
2048                 }
2049         }
2050         if (!found) {
2051                 return(-1);
2052         }
2053         return (0);
2054 }
2055
2056 static void
2057 do_expression(struct counters *cpu, int pos)
2058 {
2059         if (expression == NULL) 
2060                 return;
2061         (*expression)(cpu, pos);
2062 }
2063
2064 static void
2065 process_header(int idx, char *p)
2066 {
2067         struct counters *up;
2068         int i, len, nlen;
2069         /* 
2070          * Given header element idx, at p in
2071          * form 's/NN/nameof'
2072          * process the entry to pull out the name and
2073          * the CPU number.
2074          */
2075         if (strncmp(p, "s/", 2)) {
2076                 printf("Check -- invalid header no s/ in %s\n",
2077                        p);
2078                 return;
2079         }
2080         up = &cnts[idx];
2081         up->cpu = strtol(&p[2], NULL, 10);
2082         len = strlen(p);
2083         for (i=2; i<len; i++) {
2084                 if (p[i] == '/') {
2085                         nlen = strlen(&p[(i+1)]);
2086                         if (nlen < (MAX_NLEN-1)) {
2087                                 strcpy(up->counter_name, &p[(i+1)]);
2088                         } else {
2089                                 strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
2090                         }
2091                 }
2092         }
2093 }
2094
2095 static void
2096 build_counters_from_header(FILE *io)
2097 {
2098         char buffer[8192], *p;
2099         int i, len, cnt;
2100         size_t mlen;
2101
2102         /* We have a new start, lets 
2103          * setup our headers and cpus.
2104          */
2105         if (fgets(buffer, sizeof(buffer), io) == NULL) {
2106                 printf("First line can't be read from file err:%d\n", errno);
2107                 return;
2108         }
2109         /*
2110          * Ok output is an array of counters. Once
2111          * we start to read the values in we must
2112          * put them in there slot to match there CPU and 
2113          * counter being updated. We create a mass array
2114          * of the counters, filling in the CPU and 
2115          * counter name. 
2116          */
2117         /* How many do we get? */
2118         len = strlen(buffer);
2119         for (i=0, cnt=0; i<len; i++) {
2120                 if (strncmp(&buffer[i], "s/", 2) == 0) {
2121                         cnt++;
2122                         for(;i<len;i++) {
2123                                 if (buffer[i] == ' ')
2124                                         break;
2125                         }
2126                 }
2127         }
2128         mlen = sizeof(struct counters) * cnt;
2129         cnts = malloc(mlen);
2130         ncnts = cnt;
2131         if (cnts == NULL) {
2132                 printf("No memory err:%d\n", errno);
2133                 return;
2134         }
2135         memset(cnts, 0, mlen);
2136         for (i=0, cnt=0; i<len; i++) {
2137                 if (strncmp(&buffer[i], "s/", 2) == 0) {
2138                         p = &buffer[i];
2139                         for(;i<len;i++) {
2140                                 if (buffer[i] == ' ') {
2141                                         buffer[i] = 0;
2142                                         break;
2143                                 }
2144                         }
2145                         process_header(cnt, p);
2146                         cnt++;
2147                 }
2148         }
2149         if (verbose)
2150                 printf("We have %d entries\n", cnt);    
2151 }
2152 extern int max_to_collect;
2153 int max_to_collect = MAX_COUNTER_SLOTS;
2154
2155 static int
2156 read_a_line(FILE *io) 
2157 {
2158         char buffer[8192], *p, *stop;   
2159         int pos, i;
2160
2161         if (fgets(buffer, sizeof(buffer), io) == NULL) {
2162                 return(0);
2163         }
2164         p = buffer;
2165         for (i=0; i<ncnts; i++) {
2166                 pos = cnts[i].pos;
2167                 cnts[i].vals[pos] = strtol(p, &stop, 0);
2168                 cnts[i].pos++;
2169                 cnts[i].sum += cnts[i].vals[pos];
2170                 p = stop;
2171         }
2172         return (1);
2173 }
2174
2175 extern int cpu_count_out;
2176 int cpu_count_out=0;
2177
2178 static void
2179 print_header(void)
2180 {
2181         int i, cnt, printed_cnt;
2182
2183         printf("*********************************\n");
2184         for(i=0, cnt=0; i<MAX_CPU; i++) {
2185                 if (glob_cpu[i]) {
2186                         cnt++;
2187                 }
2188         }       
2189         cpu_count_out = cnt;
2190         for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
2191                 if (glob_cpu[i]) {
2192                         printf("CPU%d", i);
2193                         printed_cnt++;
2194                 }
2195                 if (printed_cnt == cnt) {
2196                         printf("\n");
2197                         break;
2198                 } else {
2199                         printf("\t");
2200                 }
2201         }
2202 }
2203
2204 static void
2205 lace_cpus_together(void)
2206 {
2207         int i, j, lace_cpu;
2208         struct counters *cpat, *at;
2209
2210         for(i=0; i<ncnts; i++) {
2211                 cpat = &cnts[i];
2212                 if (cpat->next_cpu) {
2213                         /* Already laced in */
2214                         continue;
2215                 }
2216                 lace_cpu = cpat->cpu;
2217                 if (lace_cpu >= MAX_CPU) {
2218                         printf("CPU %d to big\n", lace_cpu);
2219                         continue;
2220                 }
2221                 if (glob_cpu[lace_cpu] == NULL) {
2222                         glob_cpu[lace_cpu] = cpat;
2223                 } else {
2224                         /* Already processed this cpu */
2225                         continue;
2226                 }
2227                 /* Ok look forward for cpu->cpu and link in */
2228                 for(j=(i+1); j<ncnts; j++) {
2229                         at = &cnts[j];
2230                         if (at->next_cpu) {
2231                                 continue;
2232                         }
2233                         if (at->cpu == lace_cpu) {
2234                                 /* Found one */
2235                                 cpat->next_cpu = at;
2236                                 cpat = at;
2237                         }
2238                 }
2239         }
2240 }
2241
2242
2243 static void
2244 process_file(char *filename)
2245 {
2246         FILE *io;
2247         int i;
2248         int line_at, not_done;
2249         pid_t pid_of_command=0;
2250
2251         if (filename ==  NULL) {
2252                 io = my_popen(command, "r", &pid_of_command);
2253         } else {
2254                 io = fopen(filename, "r");
2255                 if (io == NULL) {
2256                         printf("Can't process file %s err:%d\n",
2257                                filename, errno);
2258                         return;
2259                 }
2260         }
2261         build_counters_from_header(io);
2262         if (cnts == NULL) {
2263                 /* Nothing we can do */
2264                 printf("Nothing to do -- no counters built\n");
2265                 if (io) {
2266                         fclose(io);
2267                 }
2268                 return;
2269         }
2270         lace_cpus_together();
2271         print_header();
2272         if (verbose) {
2273                 for (i=0; i<ncnts; i++) {
2274                         printf("Counter:%s cpu:%d index:%d\n",
2275                                cnts[i].counter_name,
2276                                cnts[i].cpu, i);
2277                 }
2278         }
2279         line_at = 0;
2280         not_done = 1;
2281         while(not_done) {
2282                 if (read_a_line(io)) {
2283                         line_at++;
2284                 } else {
2285                         break;
2286                 }
2287                 if (line_at >= max_to_collect) {
2288                         not_done = 0;
2289                 }
2290                 if (filename == NULL) {
2291                         int cnt;
2292                         /* For the ones we dynamically open we print now */
2293                         for(i=0, cnt=0; i<MAX_CPU; i++) {
2294                                 do_expression(glob_cpu[i], (line_at-1));
2295                                 cnt++;
2296                                 if (cnt == cpu_count_out) {
2297                                         printf("\n");
2298                                         break;
2299                                 } else {
2300                                         printf("\t");
2301                                 }
2302                         }
2303                 }
2304         }
2305         if (filename) {
2306                 fclose(io);
2307         } else {
2308                 my_pclose(io, pid_of_command);
2309         }
2310 }
2311 #if defined(__amd64__)
2312 #define cpuid(in,a,b,c,d)\
2313   asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
2314
2315 static __inline void
2316 do_cpuid(u_int ax, u_int cx, u_int *p)
2317 {
2318         __asm __volatile("cpuid"
2319                          : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
2320                          :  "0" (ax), "c" (cx) );
2321 }
2322
2323 #else
2324 #define cpuid(in, a, b, c, d) 
2325 #define do_cpuid(ax, cx, p)
2326 #endif
2327
2328 static void
2329 get_cpuid_set(void)
2330 {
2331         unsigned long eax, ebx, ecx, edx;
2332         int model;
2333         pid_t pid_of_command=0;
2334         size_t sz, len;
2335         FILE *io;
2336         char linebuf[1024], *str;
2337         u_int reg[4];
2338
2339         eax = ebx = ecx = edx = 0;
2340
2341         cpuid(0, eax, ebx, ecx, edx);
2342         if (ebx == 0x68747541) {
2343                 printf("AMD processors are not supported by this program\n");
2344                 printf("Sorry\n");
2345                 exit(0);
2346         } else if (ebx == 0x6972794) {
2347                 printf("Cyrix processors are not supported by this program\n");
2348                 printf("Sorry\n");
2349                 exit(0);
2350         } else if (ebx == 0x756e6547) {
2351                 printf("Genuine Intel\n");
2352         } else {
2353                 printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
2354                 exit(0);
2355         }
2356         cpuid(1, eax, ebx, ecx, edx);
2357         model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
2358         printf("CPU model is 0x%x id:0x%lx\n", model, eax);
2359         switch (eax & 0xF00) {
2360         case 0x500:             /* Pentium family processors */
2361                 printf("Intel Pentium P5\n");
2362                 goto not_supported;
2363                 break;
2364         case 0x600:             /* Pentium Pro, Celeron, Pentium II & III */
2365                 switch (model) {
2366                 case 0x1:
2367                         printf("Intel Pentium P6\n");
2368                         goto not_supported;
2369                         break;
2370                 case 0x3: 
2371                 case 0x5:
2372                         printf("Intel PII\n");
2373                         goto not_supported;
2374                         break;
2375                 case 0x6: case 0x16:
2376                         printf("Intel CL\n");
2377                         goto not_supported;
2378                         break;
2379                 case 0x7: case 0x8: case 0xA: case 0xB:
2380                         printf("Intel PIII\n");
2381                         goto not_supported;
2382                         break;
2383                 case 0x9: case 0xD:
2384                         printf("Intel PM\n");
2385                         goto not_supported;
2386                         break;
2387                 case 0xE:
2388                         printf("Intel CORE\n");
2389                         goto not_supported;
2390                         break;
2391                 case 0xF:
2392                         printf("Intel CORE2\n");
2393                         goto not_supported;
2394                         break;
2395                 case 0x17:
2396                         printf("Intel CORE2EXTREME\n");
2397                         goto not_supported;
2398                         break;
2399                 case 0x1C:      /* Per Intel document 320047-002. */
2400                         printf("Intel ATOM\n");
2401                         goto not_supported;
2402                         break;
2403                 case 0x1A:
2404                 case 0x1E:      /*
2405                                  * Per Intel document 253669-032 9/2009,
2406                                  * pages A-2 and A-57
2407                                  */
2408                 case 0x1F:      /*
2409                                  * Per Intel document 253669-032 9/2009,
2410                                  * pages A-2 and A-57
2411                                  */
2412                         printf("Intel COREI7\n");
2413                         goto not_supported;
2414                         break;
2415                 case 0x2E:
2416                         printf("Intel NEHALEM\n");
2417                         goto not_supported;
2418                         break;
2419                 case 0x25:      /* Per Intel document 253669-033US 12/2009. */
2420                 case 0x2C:      /* Per Intel document 253669-033US 12/2009. */
2421                         printf("Intel WESTMERE\n");
2422                         goto not_supported;
2423                         break;
2424                 case 0x2F:      /* Westmere-EX, seen in wild */
2425                         printf("Intel WESTMERE\n");
2426                         goto not_supported;
2427                         break;
2428                 case 0x2A:      /* Per Intel document 253669-039US 05/2011. */
2429                         printf("Intel SANDYBRIDGE\n");
2430                         set_sandybridge();
2431                         break;
2432                 case 0x2D:      /* Per Intel document 253669-044US 08/2012. */
2433                         printf("Intel SANDYBRIDGE_XEON\n");
2434                         set_sandybridge();
2435                         break;
2436                 case 0x3A:      /* Per Intel document 253669-043US 05/2012. */
2437                         printf("Intel IVYBRIDGE\n");
2438                         set_ivybridge();
2439                         break;
2440                 case 0x3E:      /* Per Intel document 325462-045US 01/2013. */
2441                         printf("Intel IVYBRIDGE_XEON\n");
2442                         set_ivybridge();
2443                         break;
2444                 case 0x3F:      /* Per Intel document 325462-045US 09/2014. */
2445                         printf("Intel HASWELL (Xeon)\n");
2446                         set_haswell();
2447                         break;
2448                 case 0x3C:      /* Per Intel document 325462-045US 01/2013. */
2449                 case 0x45:
2450                 case 0x46:
2451                         printf("Intel HASWELL\n");
2452                         set_haswell();
2453                         break;
2454
2455                 case 0x4e:
2456                 case 0x5e:
2457                         printf("Intel SKY-LAKE\n");
2458                         goto not_supported;
2459                         break;
2460                 case 0x3D:
2461                 case 0x47:
2462                         printf("Intel BROADWELL\n");
2463                         set_broadwell();
2464                         break;
2465                 case 0x4f:
2466                 case 0x56:
2467                         printf("Intel BROADWEL (Xeon)\n");
2468                         set_broadwell();
2469                         break;
2470
2471                 case 0x4D:
2472                         /* Per Intel document 330061-001 01/2014. */
2473                         printf("Intel ATOM_SILVERMONT\n");
2474                         goto not_supported;
2475                         break;
2476                 default:
2477                         printf("Intel model 0x%x is not known -- sorry\n",
2478                                model);
2479                         goto not_supported;
2480                         break;
2481                 }
2482                 break;
2483         case 0xF00:             /* P4 */
2484                 printf("Intel unknown model %d\n", model);
2485                 goto not_supported;
2486                 break;
2487         }
2488         do_cpuid(0xa, 0, reg);
2489         max_pmc_counters = (reg[3] & 0x0000000f) + 1;
2490         printf("We have %d PMC counters to work with\n", max_pmc_counters);
2491         /* Ok lets load the list of all known PMC's */
2492         io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
2493         if (valid_pmcs == NULL) {
2494                 /* Likely */
2495                 pmc_allocated_cnt = PMC_INITIAL_ALLOC;
2496                 sz = sizeof(char *) * pmc_allocated_cnt;
2497                 valid_pmcs = malloc(sz);
2498                 if (valid_pmcs == NULL) {
2499                         printf("No memory allocation fails at startup?\n");     
2500                         exit(-1);
2501                 }
2502                 memset(valid_pmcs, 0, sz);
2503         }
2504         
2505         while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
2506                 if (linebuf[0] != '\t') {
2507                         /* sometimes headers ;-) */
2508                         continue;
2509                 }
2510                 len = strlen(linebuf);
2511                 if (linebuf[(len-1)] == '\n') {
2512                         /* Likely */
2513                         linebuf[(len-1)] = 0;
2514                 }
2515                 str = &linebuf[1];
2516                 len = strlen(str) + 1;
2517                 valid_pmcs[valid_pmc_cnt] = malloc(len);
2518                 if (valid_pmcs[valid_pmc_cnt] == NULL) {
2519                         printf("No memory2 allocation fails at startup?\n");    
2520                         exit(-1);
2521                 }
2522                 memset(valid_pmcs[valid_pmc_cnt], 0, len);
2523                 strcpy(valid_pmcs[valid_pmc_cnt], str);
2524                 valid_pmc_cnt++;
2525                 if (valid_pmc_cnt >= pmc_allocated_cnt) {
2526                         /* Got to expand -- unlikely */
2527                         char **more;
2528
2529                         sz = sizeof(char *) * (pmc_allocated_cnt * 2);
2530                         more = malloc(sz);
2531                         if (more == NULL) {
2532                                 printf("No memory3 allocation fails at startup?\n");    
2533                                 exit(-1);
2534                         }
2535                         memset(more, 0, sz);
2536                         memcpy(more, valid_pmcs, sz);
2537                         pmc_allocated_cnt *= 2;
2538                         free(valid_pmcs);
2539                         valid_pmcs = more;
2540                 }
2541         }
2542         my_pclose(io, pid_of_command);  
2543         return;
2544 not_supported:
2545         printf("Not supported\n");      
2546         exit(-1);
2547 }
2548
2549 static void
2550 explain_all(void)
2551 {
2552         int i;
2553         printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
2554         printf("-------------------------------------------------------------\n");
2555         for(i=0; i<the_cpu.number; i++){
2556                 printf("For -e %s ", the_cpu.ents[i].name);
2557                 (*the_cpu.explain)(the_cpu.ents[i].name);
2558                 printf("----------------------------\n");
2559         }
2560 }
2561
2562 static void
2563 test_for_a_pmc(const char *pmc, int out_so_far)
2564 {
2565         FILE *io;
2566         pid_t pid_of_command=0; 
2567         char my_command[1024];
2568         char line[1024];
2569         char resp[1024];
2570         int len, llen, i;
2571
2572         if (out_so_far < 50) {
2573                 len = 50 - out_so_far;
2574                 for(i=0; i<len; i++) {
2575                         printf(" ");
2576                 }
2577         }
2578         sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
2579         io = my_popen(my_command, "r", &pid_of_command);        
2580         if (io == NULL) {
2581                 printf("Failed -- popen fails\n");
2582                 return;
2583         }
2584         /* Setup what we expect */
2585         len = sprintf(resp, "%s", pmc);
2586         if (fgets(line, sizeof(line), io) == NULL) {
2587                 printf("Failed -- no output from pmstat\n");
2588                 goto out;
2589         }
2590         llen = strlen(line);
2591         if (line[(llen-1)] == '\n') {
2592                 line[(llen-1)] = 0;
2593                 llen--;
2594         }
2595         for(i=2; i<(llen-len); i++) {
2596                 if (strncmp(&line[i], "ERROR", 5) == 0) {
2597                         printf("Failed %s\n", line);
2598                         goto out;
2599                 } else if (strncmp(&line[i], resp, len) == 0) {
2600                         int j, k;
2601
2602                         if (fgets(line, sizeof(line), io) == NULL) {
2603                                 printf("Failed -- no second output from pmstat\n");
2604                                 goto out;
2605                         }
2606                         len = strlen(line);
2607                         for (j=0; j<len; j++) {
2608                                 if (line[j] == ' ') {
2609                                         j++; 
2610                                 } else {
2611                                         break;
2612                                 }
2613                         }
2614                         printf("Pass");
2615                         len = strlen(&line[j]);
2616                         if (len < 20) {
2617                                 for(k=0; k<(20-len); k++) {
2618                                         printf(" ");
2619                                 }
2620                         }
2621                         if (len) {
2622                                 printf("%s", &line[j]);
2623                         } else {
2624                                 printf("\n");
2625                         }
2626                         goto out;
2627                 }
2628         }
2629         printf("Failed -- '%s' not '%s'\n", line, resp);
2630 out:
2631         my_pclose(io, pid_of_command);          
2632         
2633 }
2634
2635 static int
2636 add_it_to(char **vars, int cur_cnt, char *name)
2637 {
2638         int i;
2639         size_t len;
2640         for(i=0; i<cur_cnt; i++) {
2641                 if (strcmp(vars[i], name) == 0) {
2642                         /* Already have */
2643                         return(0);
2644                 }
2645         }
2646         if (vars[cur_cnt] != NULL) {
2647                 printf("Cur_cnt:%d filled with %s??\n", 
2648                        cur_cnt, vars[cur_cnt]);
2649                 exit(-1);
2650         }
2651         /* Ok its new */
2652         len = strlen(name) + 1;
2653         vars[cur_cnt] = malloc(len);
2654         if (vars[cur_cnt] == NULL) {
2655                 printf("No memory %s\n", __FUNCTION__);
2656                 exit(-1);
2657         }
2658         memset(vars[cur_cnt], 0, len);
2659         strcpy(vars[cur_cnt], name);
2660         return(1);
2661 }
2662
2663 static char *
2664 build_command_for_exp(struct expression *exp)
2665 {
2666         /*
2667          * Build the pmcstat command to handle
2668          * the passed in expression.
2669          * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
2670          * where NNN and QQQ represent the PMC's in the expression
2671          * uniquely..
2672          */
2673         char forming[1024];
2674         int cnt_pmc, alloced_pmcs, i;
2675         struct expression *at;
2676         char **vars, *cmd;
2677         size_t mal;
2678
2679         alloced_pmcs = cnt_pmc = 0;
2680         /* first how many do we have */
2681         at = exp;
2682         while (at) {
2683                 if (at->type == TYPE_VALUE_PMC) {
2684                         cnt_pmc++;
2685                 }
2686                 at = at->next;
2687         }
2688         if (cnt_pmc == 0) {
2689                 printf("No PMC's in your expression -- nothing to do!!\n");
2690                 exit(0);
2691         }
2692         mal = cnt_pmc * sizeof(char *);
2693         vars = malloc(mal);
2694         if (vars == NULL) {
2695                 printf("No memory\n");
2696                 exit(-1);
2697         }
2698         memset(vars, 0, mal);
2699         at = exp;
2700         while (at) {
2701                 if (at->type == TYPE_VALUE_PMC) {
2702                         if(add_it_to(vars, alloced_pmcs, at->name)) {
2703                                 alloced_pmcs++;
2704                         }
2705                 }
2706                 at = at->next;
2707         }
2708         /* Now we have a unique list in vars so create our command */
2709         mal = 23; /*    "/usr/sbin/pmcstat -w 1"  + \0 */
2710         for(i=0; i<alloced_pmcs; i++) {
2711                 mal += strlen(vars[i]) + 4;     /* var + " -s " */
2712         }
2713         cmd = malloc((mal+2));
2714         if (cmd == NULL) {
2715                 printf("%s out of mem\n", __FUNCTION__);
2716                 exit(-1);
2717         }
2718         memset(cmd, 0, (mal+2));
2719         strcpy(cmd, "/usr/sbin/pmcstat -w 1");
2720         at = exp;
2721         for(i=0; i<alloced_pmcs; i++) {
2722                 sprintf(forming, " -s %s", vars[i]);
2723                 strcat(cmd, forming);
2724                 free(vars[i]);
2725                 vars[i] = NULL;
2726         }
2727         free(vars);
2728         return(cmd);
2729 }
2730
2731 static int
2732 user_expr(struct counters *cpu, int pos)
2733 {
2734         int ret;        
2735         double res;
2736         struct counters *var;
2737         struct expression *at;
2738
2739         at = master_exp;
2740         while (at) {
2741                 if (at->type == TYPE_VALUE_PMC) {
2742                         var = find_counter(cpu, at->name);
2743                         if (var == NULL) {
2744                                 printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
2745                                 exit(-1);
2746                         }
2747                         if (pos != -1) {
2748                                 at->value = var->vals[pos] * 1.0;
2749                         } else {
2750                                 at->value = var->sum * 1.0;
2751                         }
2752                 }
2753                 at = at->next;
2754         }
2755         res = run_expr(master_exp, 1, NULL);
2756         ret = printf("%1.3f", res);
2757         return(ret);
2758 }
2759
2760
2761 static void
2762 set_manual_exp(struct expression *exp)
2763 {
2764         expression = user_expr;
2765         command = build_command_for_exp(exp);
2766         threshold = "User defined threshold";
2767 }
2768
2769 static void
2770 run_tests(void)
2771 {
2772         int i, lenout;
2773         printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
2774         printf("------------------------------------------------------------------------\n");
2775         for(i=0; i<valid_pmc_cnt; i++) {
2776                 lenout = printf("%s", valid_pmcs[i]);
2777                 fflush(stdout);
2778                 test_for_a_pmc(valid_pmcs[i], lenout);
2779         }
2780 }
2781 static void
2782 list_all(void)
2783 {
2784         int i, cnt, j;
2785         printf("PMC                                               Abbreviation\n");
2786         printf("--------------------------------------------------------------\n");
2787         for(i=0; i<valid_pmc_cnt; i++) {
2788                 cnt = printf("%s", valid_pmcs[i]);
2789                 for(j=cnt; j<52; j++) {
2790                         printf(" ");
2791                 }
2792                 printf("%%%d\n", i);
2793         }
2794 }
2795
2796
2797 int
2798 main(int argc, char **argv)
2799 {
2800         int i, j, cnt;
2801         char *filename=NULL;
2802         const char *name=NULL;
2803         int help_only = 0;
2804         int test_mode = 0;
2805         int test_at = 0;
2806
2807         get_cpuid_set();
2808         memset(glob_cpu, 0, sizeof(glob_cpu));
2809         while ((i = getopt(argc, argv, "ALHhvm:i:?e:TE:")) != -1) {
2810                 switch (i) {
2811                 case 'A':
2812                         run_all = 1;
2813                         break;
2814                 case 'L':
2815                         list_all();
2816                         return(0);
2817                 case 'H':
2818                         printf("**********************************\n");
2819                         explain_all();
2820                         printf("**********************************\n");
2821                         return(0);
2822                         break;
2823                 case 'T':
2824                         test_mode = 1;
2825                         break;
2826                 case 'E':
2827                         master_exp = parse_expression(optarg);
2828                         if (master_exp) {
2829                                 set_manual_exp(master_exp);
2830                         }
2831                         break;
2832                 case 'e':
2833                         if (validate_expression(optarg)) {
2834                                 printf("Unknown expression %s\n", optarg);
2835                                 return(0);
2836                         }
2837                         name = optarg;
2838                         set_expression(optarg);
2839                         break;
2840                 case 'm':
2841                         max_to_collect = strtol(optarg, NULL, 0);
2842                         if (max_to_collect > MAX_COUNTER_SLOTS) {
2843                                 /* You can't collect more than max in array */
2844                                 max_to_collect = MAX_COUNTER_SLOTS;
2845                         }
2846                         break;
2847                 case 'v':
2848                         verbose++;
2849                         break;
2850                 case 'h':
2851                         help_only = 1;
2852                         break;
2853                 case 'i':
2854                         filename = optarg;
2855                         break;
2856                 case '?':
2857                 default:
2858                 use:
2859                         printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
2860                                argv[0]);
2861                         printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
2862                         printf("-v -- verbose dump debug type things -- you don't want this\n");
2863                         printf("-m N -- maximum to collect is N measurments\n");
2864                         printf("-e expr-name -- Do expression expr-name\n");
2865                         printf("-E 'your expression' -- Do your expression\n");
2866                         printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
2867                         printf("-H -- Don't run anything, just explain all canned expressions\n");
2868                         printf("-T -- Test all PMC's defined by this processor\n");
2869                         printf("-A -- Run all canned tests\n");
2870                         return(0);
2871                         break;
2872                 }
2873         }
2874         if ((run_all == 0) && (name == NULL) && (filename == NULL) &&
2875             (test_mode == 0) && (master_exp == NULL)) {
2876                 printf("Without setting an expression we cannot dynamically gather information\n");
2877                 printf("you must supply a filename (and you probably want verbosity)\n");
2878                 goto use;
2879         }
2880         if (run_all && max_to_collect > 10) {
2881                 max_to_collect = 3;
2882         }
2883         if (test_mode) {
2884                 run_tests();
2885                 return(0);
2886         }
2887         printf("*********************************\n");
2888         if ((master_exp == NULL) && name) {
2889                 (*the_cpu.explain)(name);
2890         } else if (master_exp) {
2891                 printf("Examine your expression ");
2892                 print_exp(master_exp);
2893                 printf("User defined threshold\n");
2894         }
2895         if (help_only) {
2896                 return(0);
2897         }
2898         if (run_all) {
2899         more:
2900                 name = the_cpu.ents[test_at].name;
2901                 printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh);
2902                 test_at++;
2903                 if (set_expression(name) == -1) {
2904                         if (test_at >= the_cpu.number) {
2905                                 goto done;
2906                         } else
2907                                 goto more;
2908                 }
2909
2910         }
2911         process_file(filename);
2912         if (verbose >= 2) {
2913                 for (i=0; i<ncnts; i++) {
2914                         printf("Counter:%s cpu:%d index:%d\n",
2915                                cnts[i].counter_name,
2916                                cnts[i].cpu, i);
2917                         for(j=0; j<cnts[i].pos; j++) {
2918                                 printf(" val - %ld\n", (long int)cnts[i].vals[j]);
2919                         }
2920                         printf(" sum - %ld\n", (long int)cnts[i].sum);
2921                 }
2922         }
2923         if (expression == NULL) {
2924                 return(0);
2925         }
2926         if (max_to_collect > 1) {
2927                 for(i=0, cnt=0; i<MAX_CPU; i++) {
2928                         if (glob_cpu[i]) {
2929                                 do_expression(glob_cpu[i], -1);
2930                                 cnt++;
2931                                 if (cnt == cpu_count_out) {
2932                                         printf("\n");
2933                                         break;
2934                                 } else {
2935                                         printf("\t");
2936                                 }
2937                         }
2938                 }
2939         }
2940         if (run_all && (test_at < the_cpu.number)) {
2941                 memset(glob_cpu, 0, sizeof(glob_cpu));
2942                 ncnts = 0;
2943                 printf("*********************************\n");
2944                 goto more;
2945         } else if (run_all) {
2946         done:
2947                 printf("*********************************\n");
2948         }
2949         return(0);      
2950 }