]> CyberLeo.Net >> Repos - FreeBSD/releng/10.2.git/blob - usr.sbin/pmcstudy/pmcstudy.c
- Copy stable/10@285827 to releng/10.2 in preparation for 10.2-RC1
[FreeBSD/releng/10.2.git] / usr.sbin / pmcstudy / pmcstudy.c
1 /*-
2  * Copyright (c) 2014, 2015 Netflix Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer,
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 #include <sys/types.h>
29 #include <stdio.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32 #include <string.h>
33 #include <strings.h>
34 #include <sys/errno.h>
35 #include <signal.h>
36 #include <sys/wait.h>
37 #include <getopt.h>
38 #include "eval_expr.h"
39 __FBSDID("$FreeBSD$");
40
41 #define MAX_COUNTER_SLOTS 1024
42 #define MAX_NLEN 64
43 #define MAX_CPU 64
44 static int verbose = 0;
45
46 extern char **environ;
47 extern struct expression *master_exp;
48 struct expression *master_exp=NULL;
49
50 #define PMC_INITIAL_ALLOC 512
51 extern char **valid_pmcs;
52 char **valid_pmcs = NULL;
53 extern int valid_pmc_cnt;
54 int valid_pmc_cnt=0;
55 extern int pmc_allocated_cnt;
56 int pmc_allocated_cnt=0;
57
58 /*
59  * The following two varients on popen and pclose with
60  * the cavet that they get you the PID so that you
61  * can supply it to pclose so it can send a SIGTERM 
62  *  to the process.
63  */
64 static FILE *
65 my_popen(const char *command, const char *dir, pid_t *p_pid)
66 {
67         FILE *io_out, *io_in;
68         int pdesin[2], pdesout[2];
69         char *argv[4];
70         pid_t pid;
71         char cmd[4];
72         char cmd2[1024];
73         char arg1[4];
74
75         if ((strcmp(dir, "r") != 0) &&
76             (strcmp(dir, "w") != 0)) {
77                 errno = EINVAL;
78                 return(NULL);
79         }
80         if (pipe(pdesin) < 0)
81                 return (NULL);
82
83         if (pipe(pdesout) < 0) {
84                 (void)close(pdesin[0]);
85                 (void)close(pdesin[1]);
86                 return (NULL);
87         }
88         strcpy(cmd, "sh");
89         strcpy(arg1, "-c");
90         strcpy(cmd2, command);
91         argv[0] = cmd;
92         argv[1] = arg1;
93         argv[2] = cmd2;
94         argv[3] = NULL;
95
96         switch (pid = fork()) {
97         case -1:                        /* Error. */
98                 (void)close(pdesin[0]);
99                 (void)close(pdesin[1]);
100                 (void)close(pdesout[0]);
101                 (void)close(pdesout[1]);
102                 return (NULL);
103                 /* NOTREACHED */
104         case 0:                         /* Child. */
105                 /* Close out un-used sides */
106                 (void)close(pdesin[1]);
107                 (void)close(pdesout[0]);
108                 /* Now prepare the stdin of the process */
109                 close(0);
110                 (void)dup(pdesin[0]);
111                 (void)close(pdesin[0]);
112                 /* Now prepare the stdout of the process */
113                 close(1);
114                 (void)dup(pdesout[1]);
115                 /* And lets do stderr just in case */
116                 close(2);
117                 (void)dup(pdesout[1]);
118                 (void)close(pdesout[1]);
119                 /* Now run it */
120                 execve("/bin/sh", argv, environ);
121                 exit(127);
122                 /* NOTREACHED */
123         }
124         /* Parent; assume fdopen can't fail. */
125         /* Store the pid */
126         *p_pid = pid;
127         if (strcmp(dir, "r") != 0) {
128                 io_out = fdopen(pdesin[1], "w");
129                 (void)close(pdesin[0]);
130                 (void)close(pdesout[0]);
131                 (void)close(pdesout[1]);
132                 return(io_out);
133         } else {
134                 /* Prepare the input stream */
135                 io_in = fdopen(pdesout[0], "r");
136                 (void)close(pdesout[1]);
137                 (void)close(pdesin[0]);
138                 (void)close(pdesin[1]);
139                 return (io_in);
140         }
141 }
142
143 /*
144  * pclose --
145  *      Pclose returns -1 if stream is not associated with a `popened' command,
146  *      if already `pclosed', or waitpid returns an error.
147  */
148 static void
149 my_pclose(FILE *io, pid_t the_pid)
150 {
151         int pstat;
152         pid_t pid;
153
154         /*
155          * Find the appropriate file pointer and remove it from the list.
156          */
157         (void)fclose(io);
158         /* Die if you are not dead! */
159         kill(the_pid, SIGTERM);
160         do {
161                 pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
162         } while (pid == -1 && errno == EINTR);
163 }
164
165 struct counters {
166         struct counters *next_cpu;
167         char counter_name[MAX_NLEN];            /* Name of counter */
168         int cpu;                                /* CPU we are on */
169         int pos;                                /* Index we are filling to. */
170         uint64_t vals[MAX_COUNTER_SLOTS];       /* Last 64 entries */
171         uint64_t sum;                           /* Summary of entries */
172 };
173
174 extern struct counters *glob_cpu[MAX_CPU];
175 struct counters *glob_cpu[MAX_CPU];
176
177 extern struct counters *cnts;
178 struct counters *cnts=NULL;
179
180 extern int ncnts;
181 int ncnts=0;
182
183 extern int (*expression)(struct counters *, int);
184 int (*expression)(struct counters *, int);
185
186 static const char *threshold=NULL;
187 static const char *command;
188
189 struct cpu_entry {
190         const char *name;
191         const char *thresh;
192         const char *command;
193         int (*func)(struct counters *, int);
194 };
195
196
197 struct cpu_type {
198         char cputype[32];
199         int number;
200         struct cpu_entry *ents;
201         void (*explain)(const char *name);
202 };
203 extern struct cpu_type the_cpu;
204 struct cpu_type the_cpu;
205
206 static void
207 explain_name_sb(const char *name)
208 {
209         const char *mythresh;
210         if (strcmp(name, "allocstall1") == 0) {
211                 printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
212                 mythresh = "thresh > .05";
213         } else if (strcmp(name, "allocstall2") == 0) {
214                 printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
215                 mythresh = "thresh > .05";
216         } else if (strcmp(name, "br_miss") == 0) {
217                 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
218                 mythresh = "thresh >= .2";
219         } else if (strcmp(name, "splitload") == 0) {
220                 printf("Examine MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
221                 mythresh = "thresh >= .1";
222         } else if (strcmp(name, "splitstore") == 0) {
223                 printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
224                 mythresh = "thresh >= .01";
225         } else if (strcmp(name, "contested") == 0) {
226                 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
227                 mythresh = "thresh >= .05";
228         } else if (strcmp(name, "blockstorefwd") == 0) {
229                 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
230                 mythresh = "thresh >= .05";
231         } else if (strcmp(name, "cache2") == 0) {
232                 printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
233                 printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
234                 printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
235                 printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
236                 mythresh = "thresh >= .2";
237         } else if (strcmp(name, "cache1") == 0) {
238                 printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
239                 mythresh = "thresh >= .2";
240         } else if (strcmp(name, "dtlbmissload") == 0) {
241                 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
242                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
243                 mythresh = "thresh >= .1";
244         } else if (strcmp(name, "frontendstall") == 0) {
245                 printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
246                 mythresh = "thresh >= .15";
247         } else if (strcmp(name, "clears") == 0) {
248                 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
249                 printf("          MACHINE_CLEARS.SMC + \n");
250                 printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
251                 mythresh = "thresh >= .02";
252         } else if (strcmp(name, "microassist") == 0) {
253                 printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
254                 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
255                 mythresh = "thresh >= .05";
256         } else if (strcmp(name, "aliasing_4k") == 0) {
257                 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
258                 mythresh = "thresh >= .1";
259         } else if (strcmp(name, "fpassist") == 0) {
260                 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
261                 mythresh = "look for a excessive value";
262         } else if (strcmp(name, "otherassistavx") == 0) {
263                 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
264                 mythresh = "look for a excessive value";
265         } else if (strcmp(name, "otherassistsse") == 0) {
266                 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
267                 mythresh = "look for a excessive value";
268         } else if (strcmp(name, "eff1") == 0) {
269                 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
270                 mythresh = "thresh < .9";
271         } else if (strcmp(name, "eff2") == 0) {
272                 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
273                 mythresh = "thresh > 1.0";
274         } else if (strcmp(name, "dtlbmissstore") == 0) {
275                 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
276                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
277                 mythresh = "thresh >= .05";
278         } else {
279                 printf("Unknown name:%s\n", name);
280                 mythresh = "unknown entry";
281         }
282         printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
283 }
284
285 static void
286 explain_name_ib(const char *name)
287 {
288         const char *mythresh;
289         if (strcmp(name, "br_miss") == 0) {
290                 printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
291                 printf("         MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
292                 printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
293                 mythresh = "thresh >= .2";
294         } else if (strcmp(name, "eff1") == 0) {
295                 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
296                 mythresh = "thresh < .9";
297         } else if (strcmp(name, "eff2") == 0) {
298                 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
299                 mythresh = "thresh > 1.0";
300         } else if (strcmp(name, "cache1") == 0) {
301                 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
302                 mythresh = "thresh >= .2";
303         } else if (strcmp(name, "cache2") == 0) {
304                 printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
305                 mythresh = "thresh >= .2";
306         } else if (strcmp(name, "itlbmiss") == 0) {
307                 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
308                 mythresh = "thresh > .05"; 
309         } else if (strcmp(name, "icachemiss") == 0) {
310                 printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
311                 mythresh = "thresh > .05";
312         } else if (strcmp(name, "lcpstall") == 0) {
313                 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
314                 mythresh = "thresh > .05";
315         } else if (strcmp(name, "datashare") == 0) {
316                 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
317                 mythresh = "thresh > .05";
318         } else if (strcmp(name, "blockstorefwd") == 0) {
319                 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
320                 mythresh = "thresh >= .05";
321         } else if (strcmp(name, "splitload") == 0) {
322                 printf("Examine  ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
323                 printf("         LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
324                 mythresh = "thresh >= .1";
325         } else if (strcmp(name, "splitstore") == 0) {
326                 printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
327                 mythresh = "thresh >= .01";
328         } else if (strcmp(name, "aliasing_4k") == 0) {
329                 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
330                 mythresh = "thresh >= .1";
331         } else if (strcmp(name, "dtlbmissload") == 0) {
332                 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
333                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
334                 mythresh = "thresh >= .1";
335         } else if (strcmp(name, "dtlbmissstore") == 0) {
336                 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
337                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
338                 mythresh = "thresh >= .05";
339         } else if (strcmp(name, "contested") == 0) {
340                 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
341                 mythresh = "thresh >= .05";
342         } else if (strcmp(name, "clears") == 0) {
343                 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
344                 printf("          MACHINE_CLEARS.SMC + \n");
345                 printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
346                 mythresh = "thresh >= .02";
347         } else if (strcmp(name, "microassist") == 0) {
348                 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
349                 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
350                 mythresh = "thresh >= .05";
351         } else if (strcmp(name, "fpassist") == 0) {
352                 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
353                 mythresh = "look for a excessive value";
354         } else if (strcmp(name, "otherassistavx") == 0) {
355                 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
356                 mythresh = "look for a excessive value";
357         } else if (strcmp(name, "otherassistsse") == 0) {
358                 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
359                 mythresh = "look for a excessive value";
360         } else {
361                 printf("Unknown name:%s\n", name);
362                 mythresh = "unknown entry";
363         }
364         printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
365 }
366
367
368 static void
369 explain_name_has(const char *name)
370 {
371         const char *mythresh;
372         if (strcmp(name, "eff1") == 0) {
373                 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
374                 mythresh = "thresh < .75";
375         } else if (strcmp(name, "eff2") == 0) {
376                 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
377                 mythresh = "thresh > 1.0";
378         } else if (strcmp(name, "itlbmiss") == 0) {
379                 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
380                 mythresh = "thresh > .05"; 
381         } else if (strcmp(name, "icachemiss") == 0) {
382                 printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
383                 mythresh = "thresh > .05";
384         } else if (strcmp(name, "lcpstall") == 0) {
385                 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
386                 mythresh = "thresh > .05";
387         } else if (strcmp(name, "cache1") == 0) {
388                 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
389                 mythresh = "thresh >= .2";
390         } else if (strcmp(name, "cache2") == 0) {
391                 printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
392                 printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
393                 printf("         (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
394                 printf("          / CPU_CLK_UNHALTED.THREAD_P\n");
395                 mythresh = "thresh >= .2";
396         } else if (strcmp(name, "contested") == 0) {
397                 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
398                 mythresh = "thresh >= .05";
399         } else if (strcmp(name, "datashare") == 0) {
400                 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
401                 mythresh = "thresh > .05";
402         } else if (strcmp(name, "blockstorefwd") == 0) {
403                 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
404                 mythresh = "thresh >= .05";
405         } else if (strcmp(name, "splitload") == 0) {
406                 printf("Examine  (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
407                 mythresh = "thresh >= .1";
408         } else if (strcmp(name, "splitstore") == 0) {
409                 printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
410                 mythresh = "thresh >= .01";
411         } else if (strcmp(name, "aliasing_4k") == 0) {
412                 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
413                 mythresh = "thresh >= .1";
414         } else if (strcmp(name, "dtlbmissload") == 0) {
415                 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
416                 printf("         / CPU_CLK_UNHALTED.THREAD_P)\n");
417                 mythresh = "thresh >= .1";
418         } else if (strcmp(name, "br_miss") == 0) {
419                 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
420                 mythresh = "thresh >= .2";
421         } else if (strcmp(name, "clears") == 0) {
422                 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
423                 printf("          MACHINE_CLEARS.SMC + \n");
424                 printf("          MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
425                 mythresh = "thresh >= .02";
426         } else if (strcmp(name, "microassist") == 0) {
427                 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
428                 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
429                 mythresh = "thresh >= .05";
430         } else if (strcmp(name, "fpassist") == 0) {
431                 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
432                 mythresh = "look for a excessive value";
433         } else if (strcmp(name, "otherassistavx") == 0) {
434                 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
435                 mythresh = "look for a excessive value";
436         } else if (strcmp(name, "otherassistsse") == 0) {
437                 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
438                 mythresh = "look for a excessive value";
439         } else {
440                 printf("Unknown name:%s\n", name);
441                 mythresh = "unknown entry";
442         }
443         printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
444 }
445
446
447 static struct counters *
448 find_counter(struct counters *base, const char *name)
449 {
450         struct counters *at;
451         int len;
452
453         at = base;
454         len = strlen(name);
455         while(at) {
456                 if (strncmp(at->counter_name, name, len) == 0) {
457                         return(at);
458                 }
459                 at = at->next_cpu;
460         }
461         printf("Can't find counter %s\n", name);
462         printf("We have:\n");
463         at = base;
464         while(at) {
465                 printf("- %s\n", at->counter_name);
466                 at = at->next_cpu;
467         }
468         exit(-1);
469 }
470
471 static int
472 allocstall1(struct counters *cpu, int pos)
473 {
474 /*  1  - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
475         int ret;
476         struct counters *partial;
477         struct counters *unhalt;
478         double un, par, res;
479         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
480         partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
481         if (pos != -1) {
482                 par = partial->vals[pos] * 1.0;
483                 un = unhalt->vals[pos] * 1.0;
484         } else {
485                 par = partial->sum * 1.0;
486                 un = unhalt->sum * 1.0;
487         }
488         res = par/un;
489         ret = printf("%1.3f", res);
490         return(ret);
491 }
492
493 static int
494 allocstall2(struct counters *cpu, int pos)
495 {
496 /*  2  - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
497         int ret;
498         struct counters *partial;
499         struct counters *unhalt;
500         double un, par, res;
501         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
502         partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
503         if (pos != -1) {
504                 par = partial->vals[pos] * 1.0;
505                 un = unhalt->vals[pos] * 1.0;
506         } else {
507                 par = partial->sum * 1.0;
508                 un = unhalt->sum * 1.0;
509         }
510         res = par/un;
511         ret = printf("%1.3f", res);
512         return(ret);
513 }
514
515 static int
516 br_mispredict(struct counters *cpu, int pos)
517 {
518         struct counters *brctr;
519         struct counters *unhalt;
520         int ret;
521 /*  3  - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
522         double br, un, con, res;
523         con = 20.0;
524         
525         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
526         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
527         if (pos != -1) {
528                 br = brctr->vals[pos] * 1.0;
529                 un = unhalt->vals[pos] * 1.0;
530         } else {
531                 br = brctr->sum * 1.0;
532                 un = unhalt->sum * 1.0;
533         }
534         res = (con * br)/un;
535         ret = printf("%1.3f", res);
536         return(ret);
537 }
538
539 static int
540 br_mispredictib(struct counters *cpu, int pos)
541 {
542         struct counters *brctr;
543         struct counters *unhalt;
544         struct counters *clear, *clear2, *clear3;
545         struct counters *uops;
546         struct counters *recv;  
547         struct counters *iss;
548 /*        "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
549         int ret;
550         /*  
551          * (BR_MISP_RETIRED.ALL_BRANCHES / 
552          *         (BR_MISP_RETIRED.ALL_BRANCHES +
553          *          MACHINE_CLEAR.COUNT) * 
554          *         ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
555          *
556          */
557         double br, cl, cl2, cl3, uo, re, un, con, res, is;
558         con = 4.0;
559         
560         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
561         brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
562         clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
563         clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
564         clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
565         uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
566         iss = find_counter(cpu, "UOPS_ISSUED.ANY");
567         recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
568         if (pos != -1) {
569                 br = brctr->vals[pos] * 1.0;
570                 cl = clear->vals[pos] * 1.0;
571                 cl2 = clear2->vals[pos] * 1.0;
572                 cl3 = clear3->vals[pos] * 1.0;
573                 uo = uops->vals[pos] * 1.0;
574                 re = recv->vals[pos] * 1.0;
575                 is = iss->vals[pos] * 1.0;
576                 un = unhalt->vals[pos] * 1.0;
577         } else {
578                 br = brctr->sum * 1.0;
579                 cl = clear->sum * 1.0;
580                 cl2 = clear2->sum * 1.0;
581                 cl3 = clear3->sum * 1.0;
582                 uo = uops->sum * 1.0;
583                 re = recv->sum * 1.0;
584                 is = iss->sum * 1.0;
585                 un = unhalt->sum * 1.0;
586         }
587         res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
588         ret = printf("%1.3f", res);
589         return(ret);
590 }
591
592 static int
593 splitloadib(struct counters *cpu, int pos)
594 {
595         int ret;
596         struct counters *mem;
597         struct counters *l1d, *ldblock;
598         struct counters *unhalt;
599         double un, memd, res, l1, ldb;
600         /*  
601          * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
602          * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
603          */
604
605         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
606         mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
607         l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
608         ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
609         if (pos != -1) {
610                 memd = mem->vals[pos] * 1.0;
611                 l1 = l1d->vals[pos] * 1.0;
612                 ldb = ldblock->vals[pos] * 1.0;
613                 un = unhalt->vals[pos] * 1.0;
614         } else {
615                 memd = mem->sum * 1.0;
616                 l1 = l1d->sum * 1.0;
617                 ldb = ldblock->sum * 1.0;
618                 un = unhalt->sum * 1.0;
619         }
620         res = ((l1 / memd) * ldb)/un;
621         ret = printf("%1.3f", res);
622         return(ret);
623 }
624
625 static int
626 splitload(struct counters *cpu, int pos)
627 {
628         int ret;
629         struct counters *mem;
630         struct counters *unhalt;
631         double con, un, memd, res;
632 /*  4  - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
633
634         con = 5.0;
635         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
636         mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
637         if (pos != -1) {
638                 memd = mem->vals[pos] * 1.0;
639                 un = unhalt->vals[pos] * 1.0;
640         } else {
641                 memd = mem->sum * 1.0;
642                 un = unhalt->sum * 1.0;
643         }
644         res = (memd * con)/un;
645         ret = printf("%1.3f", res);
646         return(ret);
647 }
648
649 static int
650 splitstore(struct counters *cpu, int pos)
651 {
652         /*  5  - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
653         int ret;
654         struct counters *mem_split;
655         struct counters *mem_stores;
656         double memsplit, memstore, res;
657         mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
658         mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
659         if (pos != -1) {
660                 memsplit = mem_split->vals[pos] * 1.0;
661                 memstore = mem_stores->vals[pos] * 1.0;
662         } else {
663                 memsplit = mem_split->sum * 1.0;
664                 memstore = mem_stores->sum * 1.0;
665         }
666         res = memsplit/memstore;
667         ret = printf("%1.3f", res);
668         return(ret);
669 }
670
671
672 static int
673 contested(struct counters *cpu, int pos)
674 {
675         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
676         int ret;
677         struct counters *mem;
678         struct counters *unhalt;
679         double con, un, memd, res;
680
681         con = 60.0;
682         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
683         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
684         if (pos != -1) {
685                 memd = mem->vals[pos] * 1.0;
686                 un = unhalt->vals[pos] * 1.0;
687         } else {
688                 memd = mem->sum * 1.0;
689                 un = unhalt->sum * 1.0;
690         }
691         res = (memd * con)/un;
692         ret = printf("%1.3f", res);
693         return(ret);
694 }
695
696 static int
697 contested_has(struct counters *cpu, int pos)
698 {
699         /*  6  - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
700         int ret;
701         struct counters *mem;
702         struct counters *unhalt;
703         double con, un, memd, res;
704
705         con = 84.0;
706         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
707         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
708         if (pos != -1) {
709                 memd = mem->vals[pos] * 1.0;
710                 un = unhalt->vals[pos] * 1.0;
711         } else {
712                 memd = mem->sum * 1.0;
713                 un = unhalt->sum * 1.0;
714         }
715         res = (memd * con)/un;
716         ret = printf("%1.3f", res);
717         return(ret);
718 }
719
720
721 static int
722 blockstoreforward(struct counters *cpu, int pos)
723 {
724         /*  7  - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
725         int ret;
726         struct counters *ldb;
727         struct counters *unhalt;
728         double con, un, ld, res;
729
730         con = 13.0;
731         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
732         ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
733         if (pos != -1) {
734                 ld = ldb->vals[pos] * 1.0;
735                 un = unhalt->vals[pos] * 1.0;
736         } else {
737                 ld = ldb->sum * 1.0;
738                 un = unhalt->sum * 1.0;
739         }
740         res = (ld * con)/un;
741         ret = printf("%1.3f", res);
742         return(ret);
743 }
744
745 static int
746 cache2(struct counters *cpu, int pos)
747 {
748         /* ** Suspect ***
749          *  8  - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
750          *        (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
751          */
752         int ret;
753         struct counters *mem1, *mem2, *mem3;
754         struct counters *unhalt;
755         double con1, con2, con3, un, me_1, me_2, me_3, res;
756
757         con1 = 26.0;
758         con2 = 43.0;
759         con3 = 60.0;
760         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
761 /* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
762         mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
763         mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
764         mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
765         if (pos != -1) {
766                 me_1 = mem1->vals[pos] * 1.0;
767                 me_2 = mem2->vals[pos] * 1.0;
768                 me_3 = mem3->vals[pos] * 1.0;
769                 un = unhalt->vals[pos] * 1.0;
770         } else {
771                 me_1 = mem1->sum * 1.0;
772                 me_2 = mem2->sum * 1.0;
773                 me_3 = mem3->sum * 1.0;
774                 un = unhalt->sum * 1.0;
775         }
776         res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
777         ret = printf("%1.3f", res);
778         return(ret);
779 }
780
781 static int
782 datasharing(struct counters *cpu, int pos)
783 {
784         /* 
785          * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
786          */
787         int ret;
788         struct counters *mem;
789         struct counters *unhalt;
790         double con, res, me, un;
791
792         con = 43.0;
793         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
794         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
795         if (pos != -1) {
796                 me = mem->vals[pos] * 1.0;
797                 un = unhalt->vals[pos] * 1.0;
798         } else {
799                 me = mem->sum * 1.0;
800                 un = unhalt->sum * 1.0;
801         }
802         res = (me * con)/un;
803         ret = printf("%1.3f", res);
804         return(ret);
805
806 }
807
808
809 static int
810 datasharing_has(struct counters *cpu, int pos)
811 {
812         /* 
813          * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
814          */
815         int ret;
816         struct counters *mem;
817         struct counters *unhalt;
818         double con, res, me, un;
819
820         con = 72.0;
821         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
822         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
823         if (pos != -1) {
824                 me = mem->vals[pos] * 1.0;
825                 un = unhalt->vals[pos] * 1.0;
826         } else {
827                 me = mem->sum * 1.0;
828                 un = unhalt->sum * 1.0;
829         }
830         res = (me * con)/un;
831         ret = printf("%1.3f", res);
832         return(ret);
833
834 }
835
836
837 static int
838 cache2ib(struct counters *cpu, int pos)
839 {
840         /*
841          *  (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
842          */
843         int ret;
844         struct counters *mem;
845         struct counters *unhalt;
846         double con, un, me, res;
847
848         con = 29.0;
849         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
850         mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
851         if (pos != -1) {
852                 me = mem->vals[pos] * 1.0;
853                 un = unhalt->vals[pos] * 1.0;
854         } else {
855                 me = mem->sum * 1.0;
856                 un = unhalt->sum * 1.0;
857         }
858         res = (con * me)/un; 
859         ret = printf("%1.3f", res);
860         return(ret);
861 }
862
863 static int
864 cache2has(struct counters *cpu, int pos)
865 {
866         /*
867          * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
868          *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
869          *          (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
870          *           / CPU_CLK_UNHALTED.THREAD_P
871          */
872         int ret;
873         struct counters *mem1, *mem2, *mem3;
874         struct counters *unhalt;
875         double con1, con2, con3, un, me1, me2, me3, res;
876
877         con1 = 36.0;
878         con2 = 72.0;
879         con3 = 84.0;
880         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
881         mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
882         mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
883         mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
884         if (pos != -1) {
885                 me1 = mem1->vals[pos] * 1.0;
886                 me2 = mem2->vals[pos] * 1.0;
887                 me3 = mem3->vals[pos] * 1.0;
888                 un = unhalt->vals[pos] * 1.0;
889         } else {
890                 me1 = mem1->sum * 1.0;
891                 me2 = mem2->sum * 1.0;
892                 me3 = mem3->sum * 1.0;
893                 un = unhalt->sum * 1.0;
894         }
895         res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
896         ret = printf("%1.3f", res);
897         return(ret);
898 }
899
900 static int
901 cache1(struct counters *cpu, int pos)
902 {
903         /*  9  - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
904         int ret;
905         struct counters *mem;
906         struct counters *unhalt;
907         double con, un, me, res;
908
909         con = 180.0;
910         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
911         mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
912         if (pos != -1) {
913                 me = mem->vals[pos] * 1.0;
914                 un = unhalt->vals[pos] * 1.0;
915         } else {
916                 me = mem->sum * 1.0;
917                 un = unhalt->sum * 1.0;
918         }
919         res = (me * con)/un;
920         ret = printf("%1.3f", res);
921         return(ret);
922 }
923
924 static int
925 cache1ib(struct counters *cpu, int pos)
926 {
927         /*  9  - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
928         int ret;
929         struct counters *mem;
930         struct counters *unhalt;
931         double con, un, me, res;
932
933         con = 180.0;
934         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
935         mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
936         if (pos != -1) {
937                 me = mem->vals[pos] * 1.0;
938                 un = unhalt->vals[pos] * 1.0;
939         } else {
940                 me = mem->sum * 1.0;
941                 un = unhalt->sum * 1.0;
942         }
943         res = (me * con)/un;
944         ret = printf("%1.3f", res);
945         return(ret);
946 }
947
948
949 static int
950 dtlb_missload(struct counters *cpu, int pos)
951 {
952         /* 10  - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
953         int ret;
954         struct counters *dtlb_m, *dtlb_d;
955         struct counters *unhalt;
956         double con, un, d1, d2, res;
957
958         con = 7.0;
959         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
960         dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
961         dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
962         if (pos != -1) {
963                 d1 = dtlb_m->vals[pos] * 1.0;
964                 d2 = dtlb_d->vals[pos] * 1.0;
965                 un = unhalt->vals[pos] * 1.0;
966         } else {
967                 d1 = dtlb_m->sum * 1.0;
968                 d2 = dtlb_d->sum * 1.0;
969                 un = unhalt->sum * 1.0;
970         }
971         res = ((d1 * con) + d2)/un;
972         ret = printf("%1.3f", res);
973         return(ret);
974 }
975
976 static int
977 dtlb_missstore(struct counters *cpu, int pos)
978 {
979         /* 
980          * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) / 
981          * CPU_CLK_UNHALTED.THREAD_P (t >= .1) 
982          */
983         int ret;
984         struct counters *dtsb_m, *dtsb_d;
985         struct counters *unhalt;
986         double con, un, d1, d2, res;
987
988         con = 7.0;
989         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
990         dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
991         dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
992         if (pos != -1) {
993                 d1 = dtsb_m->vals[pos] * 1.0;
994                 d2 = dtsb_d->vals[pos] * 1.0;
995                 un = unhalt->vals[pos] * 1.0;
996         } else {
997                 d1 = dtsb_m->sum * 1.0;
998                 d2 = dtsb_d->sum * 1.0;
999                 un = unhalt->sum * 1.0;
1000         }
1001         res = ((d1 * con) + d2)/un;
1002         ret = printf("%1.3f", res);
1003         return(ret);
1004 }
1005
1006 static int
1007 itlb_miss(struct counters *cpu, int pos)
1008 {
1009         /* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P  IB */
1010         int ret;
1011         struct counters *itlb;
1012         struct counters *unhalt;
1013         double un, d1, res;
1014
1015         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1016         itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1017         if (pos != -1) {
1018                 d1 = itlb->vals[pos] * 1.0;
1019                 un = unhalt->vals[pos] * 1.0;
1020         } else {
1021                 d1 = itlb->sum * 1.0;
1022                 un = unhalt->sum * 1.0;
1023         }
1024         res = d1/un;
1025         ret = printf("%1.3f", res);
1026         return(ret);
1027 }
1028
1029 static int
1030 icache_miss(struct counters *cpu, int pos)
1031 {
1032         /* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
1033
1034         int ret;
1035         struct counters *itlb, *icache;
1036         struct counters *unhalt;
1037         double un, d1, ic, res;
1038
1039         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1040         itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1041         icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
1042         if (pos != -1) {
1043                 d1 = itlb->vals[pos] * 1.0;
1044                 ic = icache->vals[pos] * 1.0;
1045                 un = unhalt->vals[pos] * 1.0;
1046         } else {
1047                 d1 = itlb->sum * 1.0;
1048                 ic = icache->sum * 1.0;
1049                 un = unhalt->sum * 1.0;
1050         }
1051         res = (ic-d1)/un;
1052         ret = printf("%1.3f", res);
1053         return(ret);
1054
1055 }
1056
1057 static int
1058 icache_miss_has(struct counters *cpu, int pos)
1059 {
1060         /* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */
1061
1062         int ret;
1063         struct counters *icache;
1064         struct counters *unhalt;
1065         double un, con, ic, res;
1066
1067         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1068         icache = find_counter(cpu, "ICACHE.MISSES");
1069         con = 36.0;
1070         if (pos != -1) {
1071                 ic = icache->vals[pos] * 1.0;
1072                 un = unhalt->vals[pos] * 1.0;
1073         } else {
1074                 ic = icache->sum * 1.0;
1075                 un = unhalt->sum * 1.0;
1076         }
1077         res = (con * ic)/un;
1078         ret = printf("%1.3f", res);
1079         return(ret);
1080
1081 }
1082
1083 static int
1084 lcp_stall(struct counters *cpu, int pos)
1085 {
1086          /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
1087         int ret;
1088         struct counters *ild;
1089         struct counters *unhalt;
1090         double un, d1, res;
1091
1092         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1093         ild = find_counter(cpu, "ILD_STALL.LCP");
1094         if (pos != -1) {
1095                 d1 = ild->vals[pos] * 1.0;
1096                 un = unhalt->vals[pos] * 1.0;
1097         } else {
1098                 d1 = ild->sum * 1.0;
1099                 un = unhalt->sum * 1.0;
1100         }
1101         res = d1/un;
1102         ret = printf("%1.3f", res);
1103         return(ret);
1104
1105 }
1106
1107
1108 static int
1109 frontendstall(struct counters *cpu, int pos)
1110 {
1111       /* 12  -  IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
1112         int ret;
1113         struct counters *idq;
1114         struct counters *unhalt;
1115         double con, un, id, res;
1116
1117         con = 4.0;
1118         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1119         idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
1120         if (pos != -1) {
1121                 id = idq->vals[pos] * 1.0;
1122                 un = unhalt->vals[pos] * 1.0;
1123         } else {
1124                 id = idq->sum * 1.0;
1125                 un = unhalt->sum * 1.0;
1126         }
1127         res = id/(un * con);
1128         ret = printf("%1.3f", res);
1129         return(ret);
1130 }
1131
1132 static int
1133 clears(struct counters *cpu, int pos)
1134 {
1135         /* 13  - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )  
1136          *         / CPU_CLK_UNHALTED.THREAD_P (thresh  >= .02)*/
1137         
1138         int ret;
1139         struct counters *clr1, *clr2, *clr3;
1140         struct counters *unhalt;
1141         double con, un, cl1, cl2, cl3, res;
1142
1143         con = 100.0;
1144         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1145         clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1146         clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1147         clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1148         
1149         if (pos != -1) {
1150                 cl1 = clr1->vals[pos] * 1.0;
1151                 cl2 = clr2->vals[pos] * 1.0;
1152                 cl3 = clr3->vals[pos] * 1.0;
1153                 un = unhalt->vals[pos] * 1.0;
1154         } else {
1155                 cl1 = clr1->sum * 1.0;
1156                 cl2 = clr2->sum * 1.0;
1157                 cl3 = clr3->sum * 1.0;
1158                 un = unhalt->sum * 1.0;
1159         }
1160         res = ((cl1 + cl2 + cl3) * con)/un;
1161         ret = printf("%1.3f", res);
1162         return(ret);
1163 }
1164
1165 static int
1166 microassist(struct counters *cpu, int pos)
1167 {
1168         /* 14  - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
1169         int ret;
1170         struct counters *idq;
1171         struct counters *unhalt;
1172         double un, id, res, con;
1173
1174         con = 4.0;
1175         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1176         idq = find_counter(cpu, "IDQ.MS_UOPS");
1177         if (pos != -1) {
1178                 id = idq->vals[pos] * 1.0;
1179                 un = unhalt->vals[pos] * 1.0;
1180         } else {
1181                 id = idq->sum * 1.0;
1182                 un = unhalt->sum * 1.0;
1183         }
1184         res = id/(un * con);
1185         ret = printf("%1.3f", res);
1186         return(ret);
1187 }
1188
1189
1190 static int
1191 aliasing(struct counters *cpu, int pos)
1192 {
1193         /* 15  - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1194         int ret;        
1195         struct counters *ld;
1196         struct counters *unhalt;
1197         double un, lds, con, res;
1198
1199         con = 5.0;
1200         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1201         ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1202         if (pos != -1) {
1203                 lds = ld->vals[pos] * 1.0;
1204                 un = unhalt->vals[pos] * 1.0;
1205         } else {
1206                 lds = ld->sum * 1.0;
1207                 un = unhalt->sum * 1.0;
1208         }
1209         res = (lds * con)/un;
1210         ret = printf("%1.3f", res);
1211         return(ret);
1212 }
1213
1214 static int
1215 fpassists(struct counters *cpu, int pos)
1216 {
1217         /* 16  - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
1218         int ret;        
1219         struct counters *fp;
1220         struct counters *inst;
1221         double un, fpd, res;
1222
1223         inst = find_counter(cpu, "INST_RETIRED.ANY_P");
1224         fp = find_counter(cpu, "FP_ASSIST.ANY");
1225         if (pos != -1) {
1226                 fpd = fp->vals[pos] * 1.0;
1227                 un = inst->vals[pos] * 1.0;
1228         } else {
1229                 fpd = fp->sum * 1.0;
1230                 un = inst->sum * 1.0;
1231         }
1232         res = fpd/un;
1233         ret = printf("%1.3f", res);
1234         return(ret);
1235 }
1236
1237 static int
1238 otherassistavx(struct counters *cpu, int pos)
1239 {
1240         /* 17  - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh  .1*/
1241         int ret;        
1242         struct counters *oth;
1243         struct counters *unhalt;
1244         double un, ot, con, res;
1245
1246         con = 75.0;
1247         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1248         oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
1249         if (pos != -1) {
1250                 ot = oth->vals[pos] * 1.0;
1251                 un = unhalt->vals[pos] * 1.0;
1252         } else {
1253                 ot = oth->sum * 1.0;
1254                 un = unhalt->sum * 1.0;
1255         }
1256         res = (ot * con)/un;
1257         ret = printf("%1.3f", res);
1258         return(ret);
1259 }
1260
1261 static int
1262 otherassistsse(struct counters *cpu, int pos)
1263 {
1264
1265         int ret;        
1266         struct counters *oth;
1267         struct counters *unhalt;
1268         double un, ot, con, res;
1269
1270         /* 18     (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P  thresh .1*/
1271         con = 75.0;
1272         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1273         oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
1274         if (pos != -1) {
1275                 ot = oth->vals[pos] * 1.0;
1276                 un = unhalt->vals[pos] * 1.0;
1277         } else {
1278                 ot = oth->sum * 1.0;
1279                 un = unhalt->sum * 1.0;
1280         }
1281         res = (ot * con)/un;
1282         ret = printf("%1.3f", res);
1283         return(ret);
1284 }
1285
1286 static int
1287 efficiency1(struct counters *cpu, int pos)
1288 {
1289
1290         int ret;        
1291         struct counters *uops;
1292         struct counters *unhalt;
1293         double un, ot, con, res;
1294
1295         /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
1296         con = 4.0;
1297         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1298         uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1299         if (pos != -1) {
1300                 ot = uops->vals[pos] * 1.0;
1301                 un = unhalt->vals[pos] * 1.0;
1302         } else {
1303                 ot = uops->sum * 1.0;
1304                 un = unhalt->sum * 1.0;
1305         }
1306         res = ot/(con * un);
1307         ret = printf("%1.3f", res);
1308         return(ret);
1309 }
1310
1311 static int
1312 efficiency2(struct counters *cpu, int pos)
1313 {
1314
1315         int ret;        
1316         struct counters *uops;
1317         struct counters *unhalt;
1318         double un, ot, res;
1319
1320         /* 20  - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
1321         unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1322         uops = find_counter(cpu, "INST_RETIRED.ANY_P");
1323         if (pos != -1) {
1324                 ot = uops->vals[pos] * 1.0;
1325                 un = unhalt->vals[pos] * 1.0;
1326         } else {
1327                 ot = uops->sum * 1.0;
1328                 un = unhalt->sum * 1.0;
1329         }
1330         res = un/ot;
1331         ret = printf("%1.3f", res);
1332         return(ret);
1333 }
1334
1335 #define SANDY_BRIDGE_COUNT 20   
1336 static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
1337 /*01*/  { "allocstall1", "thresh > .05", 
1338           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
1339           allocstall1 },
1340 /*02*/  { "allocstall2", "thresh > .05", 
1341           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES -w 1",
1342           allocstall2 },
1343 /*03*/  { "br_miss", "thresh >= .2", 
1344           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1345           br_mispredict },
1346 /*04*/  { "splitload", "thresh >= .1", 
1347           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1348           splitload },
1349 /*05*/  { "splitstore", "thresh >= .01", 
1350           "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1351           splitstore },
1352 /*06*/  { "contested", "thresh >= .05", 
1353           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1354           contested },
1355 /*07*/  { "blockstorefwd", "thresh >= .05", 
1356           "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1357           blockstoreforward },
1358 /*08*/  { "cache2", "thresh >= .2", 
1359           "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1360           cache2 },
1361 /*09*/  { "cache1", "thresh >= .2", 
1362           "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1363           cache1 },
1364 /*10*/  { "dtlbmissload", "thresh >= .1", 
1365           "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1366           dtlb_missload },
1367 /*11*/  { "dtlbmissstore", "thresh >= .05", 
1368           "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1369           dtlb_missstore },
1370 /*12*/  { "frontendstall", "thresh >= .15", 
1371           "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1372           frontendstall },
1373 /*13*/  { "clears", "thresh >= .02", 
1374           "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1375           clears },
1376 /*14*/  { "microassist", "thresh >= .05", 
1377           "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1378           microassist },
1379 /*15*/  { "aliasing_4k", "thresh >= .1", 
1380           "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1381           aliasing },
1382 /*16*/  { "fpassist", "look for a excessive value", 
1383           "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1384           fpassists },
1385 /*17*/  { "otherassistavx", "look for a excessive value", 
1386           "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1387           otherassistavx },
1388 /*18*/  { "otherassistsse", "look for a excessive value", 
1389           "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1390           otherassistsse },
1391 /*19*/  { "eff1", "thresh < .9", 
1392           "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1393           efficiency1 },
1394 /*20*/  { "eff2", "thresh > 1.0", 
1395           "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1396           efficiency2 },
1397 };
1398
1399
1400 #define IVY_BRIDGE_COUNT 21
1401 static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
1402 /*1*/   { "eff1", "thresh < .75", 
1403           "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1404           efficiency1 },
1405 /*2*/   { "eff2", "thresh > 1.0", 
1406           "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1407           efficiency2 },
1408 /*3*/   { "itlbmiss", "thresh > .05", 
1409           "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1410           itlb_miss },
1411 /*4*/   { "icachemiss", "thresh > .05", 
1412           "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1413           icache_miss },
1414 /*5*/   { "lcpstall", "thresh > .05", 
1415           "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1416           lcp_stall },
1417 /*6*/   { "cache1", "thresh >= .2", 
1418           "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1419           cache1ib },
1420 /*7*/   { "cache2", "thresh >= .2", 
1421           "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1422           cache2ib },
1423 /*8*/   { "contested", "thresh >= .05", 
1424           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1425           contested },
1426 /*9*/   { "datashare", "thresh >= .05",
1427           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1428           datasharing },
1429 /*10*/  { "blockstorefwd", "thresh >= .05", 
1430           "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1431           blockstoreforward },
1432 /*11*/  { "splitload", "thresh >= .1", 
1433           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
1434           splitloadib },
1435 /*12*/  { "splitstore", "thresh >= .01", 
1436           "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1437           splitstore },
1438 /*13*/  { "aliasing_4k", "thresh >= .1", 
1439           "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1440           aliasing },
1441 /*14*/  { "dtlbmissload", "thresh >= .1", 
1442           "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1443           dtlb_missload },
1444 /*15*/  { "dtlbmissstore", "thresh >= .05", 
1445           "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1446           dtlb_missstore },
1447 /*16*/  { "br_miss", "thresh >= .2", 
1448           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1449           br_mispredictib },
1450 /*17*/  { "clears", "thresh >= .02", 
1451           "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1452           clears },
1453 /*18*/  { "microassist", "thresh >= .05", 
1454           "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1455           microassist },
1456 /*19*/  { "fpassist", "look for a excessive value", 
1457           "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1458           fpassists },
1459 /*20*/  { "otherassistavx", "look for a excessive value", 
1460           "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1461           otherassistavx },
1462 /*21*/  { "otherassistsse", "look for a excessive value", 
1463           "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1464           otherassistsse },
1465 };
1466
1467 #define HASWELL_COUNT 20
1468 static struct cpu_entry haswell[HASWELL_COUNT] = {
1469 /*1*/   { "eff1", "thresh < .75", 
1470           "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1471           efficiency1 },
1472 /*2*/   { "eff2", "thresh > 1.0", 
1473           "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1474           efficiency2 },
1475 /*3*/   { "itlbmiss", "thresh > .05", 
1476           "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1477           itlb_miss },
1478 /*4*/   { "icachemiss", "thresh > .05", 
1479           "pmcstat -s ICACHE.MISSES --s CPU_CLK_UNHALTED.THREAD_P -w 1",
1480           icache_miss_has },
1481 /*5*/   { "lcpstall", "thresh > .05", 
1482           "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1483           lcp_stall },
1484 /*6*/   { "cache1", "thresh >= .2", 
1485           "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1486           cache1ib },
1487 /*7*/   { "cache2", "thresh >= .2", 
1488           "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT  -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1489           cache2has },
1490 /*8*/   { "contested", "thresh >= .05", 
1491           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM  -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1492           contested_has },
1493 /*9*/   { "datashare", "thresh >= .05",
1494           "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1495           datasharing_has },
1496 /*10*/  { "blockstorefwd", "thresh >= .05", 
1497           "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1498           blockstoreforward },
1499 /*11*/  { "splitload", "thresh >= .1", 
1500           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1501           splitload },
1502 /*12*/  { "splitstore", "thresh >= .01", 
1503           "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1504           splitstore },
1505 /*13*/  { "aliasing_4k", "thresh >= .1", 
1506           "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1507           aliasing },
1508 /*14*/  { "dtlbmissload", "thresh >= .1", 
1509           "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1510           dtlb_missload },
1511 /*15*/  { "br_miss", "thresh >= .2", 
1512           "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1513           br_mispredict },
1514 /*16*/  { "clears", "thresh >= .02", 
1515           "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1516           clears },
1517 /*17*/  { "microassist", "thresh >= .05", 
1518           "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1519           microassist },
1520 /*18*/  { "fpassist", "look for a excessive value", 
1521           "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1522           fpassists },
1523 /*19*/  { "otherassistavx", "look for a excessive value", 
1524           "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1525           otherassistavx },
1526 /*20*/  { "otherassistsse", "look for a excessive value", 
1527           "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1528           otherassistsse },
1529 };
1530
1531
1532 static void
1533 set_sandybridge(void)
1534 {
1535         strcpy(the_cpu.cputype, "SandyBridge PMC");
1536         the_cpu.number = SANDY_BRIDGE_COUNT;
1537         the_cpu.ents = sandy_bridge;
1538         the_cpu.explain = explain_name_sb;
1539 }
1540
1541 static void
1542 set_ivybridge(void)
1543 {
1544         strcpy(the_cpu.cputype, "IvyBridge PMC");
1545         the_cpu.number = IVY_BRIDGE_COUNT;
1546         the_cpu.ents = ivy_bridge;
1547         the_cpu.explain = explain_name_ib;
1548 }
1549
1550
1551 static void
1552 set_haswell(void)
1553 {
1554         strcpy(the_cpu.cputype, "HASWELL PMC");
1555         the_cpu.number = HASWELL_COUNT;
1556         the_cpu.ents = haswell;
1557         the_cpu.explain = explain_name_has;
1558 }
1559
1560 static void
1561 set_expression(char *name)
1562 {
1563         int found = 0, i;
1564         for(i=0 ; i< the_cpu.number; i++) {
1565                 if (strcmp(name, the_cpu.ents[i].name) == 0) {
1566                         found = 1;
1567                         expression = the_cpu.ents[i].func;
1568                         command = the_cpu.ents[i].command;
1569                         threshold = the_cpu.ents[i].thresh;
1570                         break;
1571                 }
1572         }
1573         if (!found) {
1574                 printf("For CPU type %s we have no expression:%s\n",
1575                        the_cpu.cputype, name);
1576                 exit(-1);
1577         }
1578 }
1579
1580
1581
1582
1583
1584 static int
1585 validate_expression(char *name) 
1586 {
1587         int i, found;
1588
1589         found = 0;
1590         for(i=0 ; i< the_cpu.number; i++) {
1591                 if (strcmp(name, the_cpu.ents[i].name) == 0) {
1592                         found = 1;
1593                         break;
1594                 }
1595         }
1596         if (!found) {
1597                 return(-1);
1598         }
1599         return (0);
1600 }
1601
1602 static void
1603 do_expression(struct counters *cpu, int pos)
1604 {
1605         if (expression == NULL) 
1606                 return;
1607         (*expression)(cpu, pos);
1608 }
1609
1610 static void
1611 process_header(int idx, char *p)
1612 {
1613         struct counters *up;
1614         int i, len, nlen;
1615         /* 
1616          * Given header element idx, at p in
1617          * form 's/NN/nameof'
1618          * process the entry to pull out the name and
1619          * the CPU number.
1620          */
1621         if (strncmp(p, "s/", 2)) {
1622                 printf("Check -- invalid header no s/ in %s\n",
1623                        p);
1624                 return;
1625         }
1626         up = &cnts[idx];
1627         up->cpu = strtol(&p[2], NULL, 10);
1628         len = strlen(p);
1629         for (i=2; i<len; i++) {
1630                 if (p[i] == '/') {
1631                         nlen = strlen(&p[(i+1)]);
1632                         if (nlen < (MAX_NLEN-1)) {
1633                                 strcpy(up->counter_name, &p[(i+1)]);
1634                         } else {
1635                                 strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
1636                         }
1637                 }
1638         }
1639 }
1640
1641 static void
1642 build_counters_from_header(FILE *io)
1643 {
1644         char buffer[8192], *p;
1645         int i, len, cnt;
1646         size_t mlen;
1647
1648         /* We have a new start, lets 
1649          * setup our headers and cpus.
1650          */
1651         if (fgets(buffer, sizeof(buffer), io) == NULL) {
1652                 printf("First line can't be read from file err:%d\n", errno);
1653                 return;
1654         }
1655         /*
1656          * Ok output is an array of counters. Once
1657          * we start to read the values in we must
1658          * put them in there slot to match there CPU and 
1659          * counter being updated. We create a mass array
1660          * of the counters, filling in the CPU and 
1661          * counter name. 
1662          */
1663         /* How many do we get? */
1664         len = strlen(buffer);
1665         for (i=0, cnt=0; i<len; i++) {
1666                 if (strncmp(&buffer[i], "s/", 2) == 0) {
1667                         cnt++;
1668                         for(;i<len;i++) {
1669                                 if (buffer[i] == ' ')
1670                                         break;
1671                         }
1672                 }
1673         }
1674         mlen = sizeof(struct counters) * cnt;
1675         cnts = malloc(mlen);
1676         ncnts = cnt;
1677         if (cnts == NULL) {
1678                 printf("No memory err:%d\n", errno);
1679                 return;
1680         }
1681         memset(cnts, 0, mlen);
1682         for (i=0, cnt=0; i<len; i++) {
1683                 if (strncmp(&buffer[i], "s/", 2) == 0) {
1684                         p = &buffer[i];
1685                         for(;i<len;i++) {
1686                                 if (buffer[i] == ' ') {
1687                                         buffer[i] = 0;
1688                                         break;
1689                                 }
1690                         }
1691                         process_header(cnt, p);
1692                         cnt++;
1693                 }
1694         }
1695         if (verbose)
1696                 printf("We have %d entries\n", cnt);    
1697 }
1698 extern int max_to_collect;
1699 int max_to_collect = MAX_COUNTER_SLOTS;
1700
1701 static int
1702 read_a_line(FILE *io) 
1703 {
1704         char buffer[8192], *p, *stop;   
1705         int pos, i;
1706
1707         if (fgets(buffer, sizeof(buffer), io) == NULL) {
1708                 return(0);
1709         }
1710         p = buffer;
1711         for (i=0; i<ncnts; i++) {
1712                 pos = cnts[i].pos;
1713                 cnts[i].vals[pos] = strtol(p, &stop, 0);
1714                 cnts[i].pos++;
1715                 cnts[i].sum += cnts[i].vals[pos];
1716                 p = stop;
1717         }
1718         return (1);
1719 }
1720
1721 extern int cpu_count_out;
1722 int cpu_count_out=0;
1723
1724 static void
1725 print_header(void)
1726 {
1727         int i, cnt, printed_cnt;
1728
1729         printf("*********************************\n");
1730         for(i=0, cnt=0; i<MAX_CPU; i++) {
1731                 if (glob_cpu[i]) {
1732                         cnt++;
1733                 }
1734         }       
1735         cpu_count_out = cnt;
1736         for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
1737                 if (glob_cpu[i]) {
1738                         printf("CPU%d", i);
1739                         printed_cnt++;
1740                 }
1741                 if (printed_cnt == cnt) {
1742                         printf("\n");
1743                         break;
1744                 } else {
1745                         printf("\t");
1746                 }
1747         }
1748 }
1749
1750 static void
1751 lace_cpus_together(void)
1752 {
1753         int i, j, lace_cpu;
1754         struct counters *cpat, *at;
1755
1756         for(i=0; i<ncnts; i++) {
1757                 cpat = &cnts[i];
1758                 if (cpat->next_cpu) {
1759                         /* Already laced in */
1760                         continue;
1761                 }
1762                 lace_cpu = cpat->cpu;
1763                 if (lace_cpu >= MAX_CPU) {
1764                         printf("CPU %d to big\n", lace_cpu);
1765                         continue;
1766                 }
1767                 if (glob_cpu[lace_cpu] == NULL) {
1768                         glob_cpu[lace_cpu] = cpat;
1769                 } else {
1770                         /* Already processed this cpu */
1771                         continue;
1772                 }
1773                 /* Ok look forward for cpu->cpu and link in */
1774                 for(j=(i+1); j<ncnts; j++) {
1775                         at = &cnts[j];
1776                         if (at->next_cpu) {
1777                                 continue;
1778                         }
1779                         if (at->cpu == lace_cpu) {
1780                                 /* Found one */
1781                                 cpat->next_cpu = at;
1782                                 cpat = at;
1783                         }
1784                 }
1785         }
1786 }
1787
1788
1789 static void
1790 process_file(char *filename)
1791 {
1792         FILE *io;
1793         int i;
1794         int line_at, not_done;
1795         pid_t pid_of_command=0;
1796
1797         if (filename ==  NULL) {
1798                 io = my_popen(command, "r", &pid_of_command);
1799         } else {
1800                 io = fopen(filename, "r");
1801                 if (io == NULL) {
1802                         printf("Can't process file %s err:%d\n",
1803                                filename, errno);
1804                         return;
1805                 }
1806         }
1807         build_counters_from_header(io);
1808         if (cnts == NULL) {
1809                 /* Nothing we can do */
1810                 printf("Nothing to do -- no counters built\n");
1811                 return;
1812         }
1813         lace_cpus_together();
1814         print_header();
1815         if (verbose) {
1816                 for (i=0; i<ncnts; i++) {
1817                         printf("Counter:%s cpu:%d index:%d\n",
1818                                cnts[i].counter_name,
1819                                cnts[i].cpu, i);
1820                 }
1821         }
1822         line_at = 0;
1823         not_done = 1;
1824         while(not_done) {
1825                 if (read_a_line(io)) {
1826                         line_at++;
1827                 } else {
1828                         break;
1829                 }
1830                 if (line_at >= max_to_collect) {
1831                         not_done = 0;
1832                 }
1833                 if (filename == NULL) {
1834                         int cnt;
1835                         /* For the ones we dynamically open we print now */
1836                         for(i=0, cnt=0; i<MAX_CPU; i++) {
1837                                 do_expression(glob_cpu[i], (line_at-1));
1838                                 cnt++;
1839                                 if (cnt == cpu_count_out) {
1840                                         printf("\n");
1841                                         break;
1842                                 } else {
1843                                         printf("\t");
1844                                 }
1845                         }
1846                 }
1847         }
1848         if (filename) {
1849                 fclose(io);
1850         } else {
1851                 my_pclose(io, pid_of_command);
1852         }
1853 }
1854 #if defined(__amd64__)
1855 #define cpuid(in,a,b,c,d)\
1856   asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
1857 #else
1858 #define cpuid(in, a, b, c, d) 
1859 #endif
1860
1861 static void
1862 get_cpuid_set(void)
1863 {
1864         unsigned long eax, ebx, ecx, edx;
1865         int model;
1866         pid_t pid_of_command=0;
1867         size_t sz, len;
1868         FILE *io;
1869         char linebuf[1024], *str;
1870
1871         eax = ebx = ecx = edx = 0;
1872
1873         cpuid(0, eax, ebx, ecx, edx);
1874         if (ebx == 0x68747541) {
1875                 printf("AMD processors are not supported by this program\n");
1876                 printf("Sorry\n");
1877                 exit(0);
1878         } else if (ebx == 0x6972794) {
1879                 printf("Cyrix processors are not supported by this program\n");
1880                 printf("Sorry\n");
1881                 exit(0);
1882         } else if (ebx == 0x756e6547) {
1883                 printf("Genuine Intel\n");
1884         } else {
1885                 printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
1886                 exit(0);
1887         }
1888         cpuid(1, eax, ebx, ecx, edx);
1889         model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
1890         printf("CPU model is 0x%x id:0x%lx\n", model, eax);
1891         switch (eax & 0xF00) {
1892         case 0x500:             /* Pentium family processors */
1893                 printf("Intel Pentium P5\n");
1894                 goto not_supported;
1895                 break;
1896         case 0x600:             /* Pentium Pro, Celeron, Pentium II & III */
1897                 switch (model) {
1898                 case 0x1:
1899                         printf("Intel Pentium P6\n");
1900                         goto not_supported;
1901                         break;
1902                 case 0x3: 
1903                 case 0x5:
1904                         printf("Intel PII\n");
1905                         goto not_supported;
1906                         break;
1907                 case 0x6: case 0x16:
1908                         printf("Intel CL\n");
1909                         goto not_supported;
1910                         break;
1911                 case 0x7: case 0x8: case 0xA: case 0xB:
1912                         printf("Intel PIII\n");
1913                         goto not_supported;
1914                         break;
1915                 case 0x9: case 0xD:
1916                         printf("Intel PM\n");
1917                         goto not_supported;
1918                         break;
1919                 case 0xE:
1920                         printf("Intel CORE\n");
1921                         goto not_supported;
1922                         break;
1923                 case 0xF:
1924                         printf("Intel CORE2\n");
1925                         goto not_supported;
1926                         break;
1927                 case 0x17:
1928                         printf("Intel CORE2EXTREME\n");
1929                         goto not_supported;
1930                         break;
1931                 case 0x1C:      /* Per Intel document 320047-002. */
1932                         printf("Intel ATOM\n");
1933                         goto not_supported;
1934                         break;
1935                 case 0x1A:
1936                 case 0x1E:      /*
1937                                  * Per Intel document 253669-032 9/2009,
1938                                  * pages A-2 and A-57
1939                                  */
1940                 case 0x1F:      /*
1941                                  * Per Intel document 253669-032 9/2009,
1942                                  * pages A-2 and A-57
1943                                  */
1944                         printf("Intel COREI7\n");
1945                         goto not_supported;
1946                         break;
1947                 case 0x2E:
1948                         printf("Intel NEHALEM\n");
1949                         goto not_supported;
1950                         break;
1951                 case 0x25:      /* Per Intel document 253669-033US 12/2009. */
1952                 case 0x2C:      /* Per Intel document 253669-033US 12/2009. */
1953                         printf("Intel WESTMERE\n");
1954                         goto not_supported;
1955                         break;
1956                 case 0x2F:      /* Westmere-EX, seen in wild */
1957                         printf("Intel WESTMERE\n");
1958                         goto not_supported;
1959                         break;
1960                 case 0x2A:      /* Per Intel document 253669-039US 05/2011. */
1961                         printf("Intel SANDYBRIDGE\n");
1962                         set_sandybridge();
1963                         break;
1964                 case 0x2D:      /* Per Intel document 253669-044US 08/2012. */
1965                         printf("Intel SANDYBRIDGE_XEON\n");
1966                         set_sandybridge();
1967                         break;
1968                 case 0x3A:      /* Per Intel document 253669-043US 05/2012. */
1969                         printf("Intel IVYBRIDGE\n");
1970                         set_ivybridge();
1971                         break;
1972                 case 0x3E:      /* Per Intel document 325462-045US 01/2013. */
1973                         printf("Intel IVYBRIDGE_XEON\n");
1974                         set_ivybridge();
1975                         break;
1976                 case 0x3F:      /* Per Intel document 325462-045US 09/2014. */
1977                         printf("Intel HASWELL (Xeon)\n");
1978                         set_haswell();
1979                         break;
1980                 case 0x3C:      /* Per Intel document 325462-045US 01/2013. */
1981                 case 0x45:
1982                 case 0x46:
1983                         printf("Intel HASWELL\n");
1984                         set_haswell();
1985                         break;
1986                 case 0x4D:
1987                         /* Per Intel document 330061-001 01/2014. */
1988                         printf("Intel ATOM_SILVERMONT\n");
1989                         goto not_supported;
1990                         break;
1991                 default:
1992                         printf("Intel model 0x%x is not known -- sorry\n",
1993                                model);
1994                         goto not_supported;
1995                         break;
1996                 }
1997                 break;
1998         case 0xF00:             /* P4 */
1999                 printf("Intel unknown model %d\n", model);
2000                 goto not_supported;
2001                 break;
2002         }
2003         /* Ok lets load the list of all known PMC's */
2004         io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
2005         if (valid_pmcs == NULL) {
2006                 /* Likely */
2007                 pmc_allocated_cnt = PMC_INITIAL_ALLOC;
2008                 sz = sizeof(char *) * pmc_allocated_cnt;
2009                 valid_pmcs = malloc(sz);
2010                 if (valid_pmcs == NULL) {
2011                         printf("No memory allocation fails at startup?\n");     
2012                         exit(-1);
2013                 }
2014                 memset(valid_pmcs, 0, sz);
2015         }
2016         
2017         while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
2018                 if (linebuf[0] != '\t') {
2019                         /* sometimes headers ;-) */
2020                         continue;
2021                 }
2022                 len = strlen(linebuf);
2023                 if (linebuf[(len-1)] == '\n') {
2024                         /* Likely */
2025                         linebuf[(len-1)] = 0;
2026                 }
2027                 str = &linebuf[1];
2028                 len = strlen(str) + 1;
2029                 valid_pmcs[valid_pmc_cnt] = malloc(len);
2030                 if (valid_pmcs[valid_pmc_cnt] == NULL) {
2031                         printf("No memory2 allocation fails at startup?\n");    
2032                         exit(-1);
2033                 }
2034                 memset(valid_pmcs[valid_pmc_cnt], 0, len);
2035                 strcpy(valid_pmcs[valid_pmc_cnt], str);
2036                 valid_pmc_cnt++;
2037                 if (valid_pmc_cnt >= pmc_allocated_cnt) {
2038                         /* Got to expand -- unlikely */
2039                         char **more;
2040
2041                         sz = sizeof(char *) * (pmc_allocated_cnt * 2);
2042                         more = malloc(sz);
2043                         if (more == NULL) {
2044                                 printf("No memory3 allocation fails at startup?\n");    
2045                                 exit(-1);
2046                         }
2047                         memset(more, sz, 0);
2048                         memcpy(more, valid_pmcs, sz);
2049                         pmc_allocated_cnt *= 2;
2050                         free(valid_pmcs);
2051                         valid_pmcs = more;
2052                 }
2053         }
2054         my_pclose(io, pid_of_command);  
2055         return;
2056 not_supported:
2057         printf("Not supported\n");      
2058         exit(-1);
2059 }
2060
2061 static void
2062 explain_all(void)
2063 {
2064         int i;
2065         printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
2066         printf("-------------------------------------------------------------\n");
2067         for(i=0; i<the_cpu.number; i++){
2068                 printf("For -e %s ", the_cpu.ents[i].name);
2069                 (*the_cpu.explain)(the_cpu.ents[i].name);
2070                 printf("----------------------------\n");
2071         }
2072 }
2073
2074 static void
2075 test_for_a_pmc(const char *pmc, int out_so_far)
2076 {
2077         FILE *io;
2078         pid_t pid_of_command=0; 
2079         char my_command[1024];
2080         char line[1024];
2081         char resp[1024];
2082         int len, llen, i;
2083
2084         if (out_so_far < 50) {
2085                 len = 50 - out_so_far;
2086                 for(i=0; i<len; i++) {
2087                         printf(" ");
2088                 }
2089         }
2090         sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
2091         io = my_popen(my_command, "r", &pid_of_command);        
2092         if (io == NULL) {
2093                 printf("Failed -- popen fails\n");
2094                 return;
2095         }
2096         /* Setup what we expect */
2097         len = sprintf(resp, "%s", pmc);
2098         if (fgets(line, sizeof(line), io) == NULL) {
2099                 printf("Failed -- no output from pmstat\n");
2100                 goto out;
2101         }
2102         llen = strlen(line);
2103         if (line[(llen-1)] == '\n') {
2104                 line[(llen-1)] = 0;
2105                 llen--;
2106         }
2107         for(i=2; i<(llen-len); i++) {
2108                 if (strncmp(&line[i], "ERROR", 5) == 0) {
2109                         printf("Failed %s\n", line);
2110                         goto out;
2111                 } else if (strncmp(&line[i], resp, len) == 0) {
2112                         int j, k;
2113
2114                         if (fgets(line, sizeof(line), io) == NULL) {
2115                                 printf("Failed -- no second output from pmstat\n");
2116                                 goto out;
2117                         }
2118                         len = strlen(line);
2119                         for (j=0; j<len; j++) {
2120                                 if (line[j] == ' ') {
2121                                         j++; 
2122                                 } else {
2123                                         break;
2124                                 }
2125                         }
2126                         printf("Pass");
2127                         len = strlen(&line[j]);
2128                         if (len < 20) {
2129                                 for(k=0; k<(20-len); k++) {
2130                                         printf(" ");
2131                                 }
2132                         }
2133                         if (len) {
2134                                 printf("%s", &line[j]);
2135                         } else {
2136                                 printf("\n");
2137                         }
2138                         goto out;
2139                 }
2140         }
2141         printf("Failed -- '%s' not '%s'\n", line, resp);
2142 out:
2143         my_pclose(io, pid_of_command);          
2144         
2145 }
2146
2147 static int
2148 add_it_to(char **vars, int cur_cnt, char *name)
2149 {
2150         int i;
2151         size_t len;
2152         for(i=0; i<cur_cnt; i++) {
2153                 if (strcmp(vars[i], name) == 0) {
2154                         /* Already have */
2155                         return(0);
2156                 }
2157         }
2158         if (vars[cur_cnt] != NULL) {
2159                 printf("Cur_cnt:%d filled with %s??\n", 
2160                        cur_cnt, vars[cur_cnt]);
2161                 exit(-1);
2162         }
2163         /* Ok its new */
2164         len = strlen(name) + 1;
2165         vars[cur_cnt] = malloc(len);
2166         if (vars[cur_cnt] == NULL) {
2167                 printf("No memory %s\n", __FUNCTION__);
2168                 exit(-1);
2169         }
2170         memset(vars[cur_cnt], 0, len);
2171         strcpy(vars[cur_cnt], name);
2172         return(1);
2173 }
2174
2175 static char *
2176 build_command_for_exp(struct expression *exp)
2177 {
2178         /*
2179          * Build the pmcstat command to handle
2180          * the passed in expression.
2181          * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
2182          * where NNN and QQQ represent the PMC's in the expression
2183          * uniquely..
2184          */
2185         char forming[1024];
2186         int cnt_pmc, alloced_pmcs, i;
2187         struct expression *at;
2188         char **vars, *cmd;
2189         size_t mal;
2190
2191         alloced_pmcs = cnt_pmc = 0;
2192         /* first how many do we have */
2193         at = exp;
2194         while (at) {
2195                 if (at->type == TYPE_VALUE_PMC) {
2196                         cnt_pmc++;
2197                 }
2198                 at = at->next;
2199         }
2200         if (cnt_pmc == 0) {
2201                 printf("No PMC's in your expression -- nothing to do!!\n");
2202                 exit(0);
2203         }
2204         mal = cnt_pmc * sizeof(char *);
2205         vars = malloc(mal);
2206         if (vars == NULL) {
2207                 printf("No memory\n");
2208                 exit(-1);
2209         }
2210         memset(vars, 0, mal);
2211         at = exp;
2212         while (at) {
2213                 if (at->type == TYPE_VALUE_PMC) {
2214                         if(add_it_to(vars, alloced_pmcs, at->name)) {
2215                                 alloced_pmcs++;
2216                         }
2217                 }
2218                 at = at->next;
2219         }
2220         /* Now we have a unique list in vars so create our command */
2221         mal = 23; /*    "/usr/sbin/pmcstat -w 1"  + \0 */
2222         for(i=0; i<alloced_pmcs; i++) {
2223                 mal += strlen(vars[i]) + 4;     /* var + " -s " */
2224         }
2225         cmd = malloc((mal+2));
2226         if (cmd == NULL) {
2227                 printf("%s out of mem\n", __FUNCTION__);
2228                 exit(-1);
2229         }
2230         memset(cmd, 0, (mal+2));
2231         strcpy(cmd, "/usr/sbin/pmcstat -w 1");
2232         at = exp;
2233         for(i=0; i<alloced_pmcs; i++) {
2234                 sprintf(forming, " -s %s", vars[i]);
2235                 strcat(cmd, forming);
2236                 free(vars[i]);
2237                 vars[i] = NULL;
2238         }
2239         free(vars);
2240         return(cmd);
2241 }
2242
2243 static int
2244 user_expr(struct counters *cpu, int pos)
2245 {
2246         int ret;        
2247         double res;
2248         struct counters *var;
2249         struct expression *at;
2250
2251         at = master_exp;
2252         while (at) {
2253                 if (at->type == TYPE_VALUE_PMC) {
2254                         var = find_counter(cpu, at->name);
2255                         if (var == NULL) {
2256                                 printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
2257                                 exit(-1);
2258                         }
2259                         if (pos != -1) {
2260                                 at->value = var->vals[pos] * 1.0;
2261                         } else {
2262                                 at->value = var->sum * 1.0;
2263                         }
2264                 }
2265                 at = at->next;
2266         }
2267         res = run_expr(master_exp, 1, NULL);
2268         ret = printf("%1.3f", res);
2269         return(ret);
2270 }
2271
2272
2273 static void
2274 set_manual_exp(struct expression *exp)
2275 {
2276         expression = user_expr;
2277         command = build_command_for_exp(exp);
2278         threshold = "User defined threshold";
2279 }
2280
2281 static void
2282 run_tests(void)
2283 {
2284         int i, lenout;
2285         printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
2286         printf("------------------------------------------------------------------------\n");
2287         for(i=0; i<valid_pmc_cnt; i++) {
2288                 lenout = printf("%s", valid_pmcs[i]);
2289                 fflush(stdout);
2290                 test_for_a_pmc(valid_pmcs[i], lenout);
2291         }
2292 }
2293 static void
2294 list_all(void)
2295 {
2296         int i, cnt, j;
2297         printf("PMC                                               Abbreviation\n");
2298         printf("--------------------------------------------------------------\n");
2299         for(i=0; i<valid_pmc_cnt; i++) {
2300                 cnt = printf("%s", valid_pmcs[i]);
2301                 for(j=cnt; j<52; j++) {
2302                         printf(" ");
2303                 }
2304                 printf("%%%d\n", i);
2305         }
2306 }
2307
2308
2309 int
2310 main(int argc, char **argv)
2311 {
2312         int i, j, cnt;
2313         char *filename=NULL;
2314         char *name=NULL;
2315         int help_only = 0;
2316         int test_mode = 0;
2317
2318         get_cpuid_set();
2319         memset(glob_cpu, 0, sizeof(glob_cpu));
2320         while ((i = getopt(argc, argv, "LHhvm:i:?e:TE:")) != -1) {
2321                 switch (i) {
2322                 case 'L':
2323                         list_all();
2324                         return(0);
2325                 case 'H':
2326                         printf("**********************************\n");
2327                         explain_all();
2328                         printf("**********************************\n");
2329                         return(0);
2330                         break;
2331                 case 'T':
2332                         test_mode = 1;
2333                         break;
2334                 case 'E':
2335                         master_exp = parse_expression(optarg);
2336                         if (master_exp) {
2337                                 set_manual_exp(master_exp);
2338                         }
2339                         break;
2340                 case 'e':
2341                         if (validate_expression(optarg)) {
2342                                 printf("Unknown expression %s\n", optarg);
2343                                 return(0);
2344                         }
2345                         name = optarg;
2346                         set_expression(optarg);
2347                         break;
2348                 case 'm':
2349                         max_to_collect = strtol(optarg, NULL, 0);
2350                         if (max_to_collect > MAX_COUNTER_SLOTS) {
2351                                 /* You can't collect more than max in array */
2352                                 max_to_collect = MAX_COUNTER_SLOTS;
2353                         }
2354                         break;
2355                 case 'v':
2356                         verbose++;
2357                         break;
2358                 case 'h':
2359                         help_only = 1;
2360                         break;
2361                 case 'i':
2362                         filename = optarg;
2363                         break;
2364                 case '?':
2365                 default:
2366                 use:
2367                         printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
2368                                argv[0]);
2369                         printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
2370                         printf("-v -- verbose dump debug type things -- you don't want this\n");
2371                         printf("-m N -- maximum to collect is N measurments\n");
2372                         printf("-e expr-name -- Do expression expr-name\n");
2373                         printf("-E 'your expression' -- Do your expression\n");
2374                         printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
2375                         printf("-H -- Don't run anything, just explain all canned expressions\n");
2376                         printf("-T -- Test all PMC's defined by this processor\n");
2377                         return(0);
2378                         break;
2379                 };
2380         }
2381         if ((name == NULL) && (filename == NULL) && (test_mode == 0) && (master_exp == NULL)) {
2382                 printf("Without setting an expression we cannot dynamically gather information\n");
2383                 printf("you must supply a filename (and you probably want verbosity)\n");
2384                 goto use;
2385         }
2386         if (test_mode) {
2387                 run_tests();
2388                 return(0);
2389         }
2390         printf("*********************************\n");
2391         if (master_exp == NULL) {
2392                 (*the_cpu.explain)(name);
2393         } else {
2394                 printf("Examine your expression ");
2395                 print_exp(master_exp);
2396                 printf("User defined threshold\n");
2397         }
2398         if (help_only) {
2399                 return(0);
2400         }
2401         process_file(filename);
2402         if (verbose >= 2) {
2403                 for (i=0; i<ncnts; i++) {
2404                         printf("Counter:%s cpu:%d index:%d\n",
2405                                cnts[i].counter_name,
2406                                cnts[i].cpu, i);
2407                         for(j=0; j<cnts[i].pos; j++) {
2408                                 printf(" val - %ld\n", (long int)cnts[i].vals[j]);
2409                         }
2410                         printf(" sum - %ld\n", (long int)cnts[i].sum);
2411                 }
2412         }
2413         if (expression == NULL) {
2414                 return(0);
2415         }
2416         for(i=0, cnt=0; i<MAX_CPU; i++) {
2417                 if (glob_cpu[i]) {
2418                         do_expression(glob_cpu[i], -1);
2419                         cnt++;
2420                         if (cnt == cpu_count_out) {
2421                                 printf("\n");
2422                                 break;
2423                         } else {
2424                                 printf("\t");
2425                         }
2426                 }
2427         }
2428         return(0);      
2429 }