2 * Copyright (c) 2014, 2015 Netflix Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer,
10 * in this position and unchanged.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 * 3. The name of the author may not be used to endorse or promote products
15 * derived from this software without specific prior written permission
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #include <sys/types.h>
34 #include <sys/errno.h>
38 #include "eval_expr.h"
39 __FBSDID("$FreeBSD$");
41 #define MAX_COUNTER_SLOTS 1024
44 static int verbose = 0;
46 extern char **environ;
47 extern struct expression *master_exp;
48 struct expression *master_exp=NULL;
50 #define PMC_INITIAL_ALLOC 512
51 extern char **valid_pmcs;
52 char **valid_pmcs = NULL;
53 extern int valid_pmc_cnt;
55 extern int pmc_allocated_cnt;
56 int pmc_allocated_cnt=0;
59 * The following two varients on popen and pclose with
60 * the cavet that they get you the PID so that you
61 * can supply it to pclose so it can send a SIGTERM
65 my_popen(const char *command, const char *dir, pid_t *p_pid)
68 int pdesin[2], pdesout[2];
75 if ((strcmp(dir, "r") != 0) &&
76 (strcmp(dir, "w") != 0)) {
83 if (pipe(pdesout) < 0) {
84 (void)close(pdesin[0]);
85 (void)close(pdesin[1]);
90 strcpy(cmd2, command);
96 switch (pid = fork()) {
98 (void)close(pdesin[0]);
99 (void)close(pdesin[1]);
100 (void)close(pdesout[0]);
101 (void)close(pdesout[1]);
105 /* Close out un-used sides */
106 (void)close(pdesin[1]);
107 (void)close(pdesout[0]);
108 /* Now prepare the stdin of the process */
110 (void)dup(pdesin[0]);
111 (void)close(pdesin[0]);
112 /* Now prepare the stdout of the process */
114 (void)dup(pdesout[1]);
115 /* And lets do stderr just in case */
117 (void)dup(pdesout[1]);
118 (void)close(pdesout[1]);
120 execve("/bin/sh", argv, environ);
124 /* Parent; assume fdopen can't fail. */
127 if (strcmp(dir, "r") != 0) {
128 io_out = fdopen(pdesin[1], "w");
129 (void)close(pdesin[0]);
130 (void)close(pdesout[0]);
131 (void)close(pdesout[1]);
134 /* Prepare the input stream */
135 io_in = fdopen(pdesout[0], "r");
136 (void)close(pdesout[1]);
137 (void)close(pdesin[0]);
138 (void)close(pdesin[1]);
145 * Pclose returns -1 if stream is not associated with a `popened' command,
146 * if already `pclosed', or waitpid returns an error.
149 my_pclose(FILE *io, pid_t the_pid)
155 * Find the appropriate file pointer and remove it from the list.
158 /* Die if you are not dead! */
159 kill(the_pid, SIGTERM);
161 pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
162 } while (pid == -1 && errno == EINTR);
166 struct counters *next_cpu;
167 char counter_name[MAX_NLEN]; /* Name of counter */
168 int cpu; /* CPU we are on */
169 int pos; /* Index we are filling to. */
170 uint64_t vals[MAX_COUNTER_SLOTS]; /* Last 64 entries */
171 uint64_t sum; /* Summary of entries */
174 extern struct counters *glob_cpu[MAX_CPU];
175 struct counters *glob_cpu[MAX_CPU];
177 extern struct counters *cnts;
178 struct counters *cnts=NULL;
183 extern int (*expression)(struct counters *, int);
184 int (*expression)(struct counters *, int);
186 static const char *threshold=NULL;
187 static const char *command;
193 int (*func)(struct counters *, int);
200 struct cpu_entry *ents;
201 void (*explain)(const char *name);
203 extern struct cpu_type the_cpu;
204 struct cpu_type the_cpu;
207 explain_name_sb(const char *name)
209 const char *mythresh;
210 if (strcmp(name, "allocstall1") == 0) {
211 printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
212 mythresh = "thresh > .05";
213 } else if (strcmp(name, "allocstall2") == 0) {
214 printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
215 mythresh = "thresh > .05";
216 } else if (strcmp(name, "br_miss") == 0) {
217 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
218 mythresh = "thresh >= .2";
219 } else if (strcmp(name, "splitload") == 0) {
220 printf("Examine MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
221 mythresh = "thresh >= .1";
222 } else if (strcmp(name, "splitstore") == 0) {
223 printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
224 mythresh = "thresh >= .01";
225 } else if (strcmp(name, "contested") == 0) {
226 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
227 mythresh = "thresh >= .05";
228 } else if (strcmp(name, "blockstorefwd") == 0) {
229 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
230 mythresh = "thresh >= .05";
231 } else if (strcmp(name, "cache2") == 0) {
232 printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
233 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
234 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
235 printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
236 mythresh = "thresh >= .2";
237 } else if (strcmp(name, "cache1") == 0) {
238 printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
239 mythresh = "thresh >= .2";
240 } else if (strcmp(name, "dtlbmissload") == 0) {
241 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
242 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
243 mythresh = "thresh >= .1";
244 } else if (strcmp(name, "frontendstall") == 0) {
245 printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
246 mythresh = "thresh >= .15";
247 } else if (strcmp(name, "clears") == 0) {
248 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
249 printf(" MACHINE_CLEARS.SMC + \n");
250 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
251 mythresh = "thresh >= .02";
252 } else if (strcmp(name, "microassist") == 0) {
253 printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
254 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
255 mythresh = "thresh >= .05";
256 } else if (strcmp(name, "aliasing_4k") == 0) {
257 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
258 mythresh = "thresh >= .1";
259 } else if (strcmp(name, "fpassist") == 0) {
260 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
261 mythresh = "look for a excessive value";
262 } else if (strcmp(name, "otherassistavx") == 0) {
263 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
264 mythresh = "look for a excessive value";
265 } else if (strcmp(name, "otherassistsse") == 0) {
266 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
267 mythresh = "look for a excessive value";
268 } else if (strcmp(name, "eff1") == 0) {
269 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
270 mythresh = "thresh < .9";
271 } else if (strcmp(name, "eff2") == 0) {
272 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
273 mythresh = "thresh > 1.0";
274 } else if (strcmp(name, "dtlbmissstore") == 0) {
275 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
276 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
277 mythresh = "thresh >= .05";
279 printf("Unknown name:%s\n", name);
280 mythresh = "unknown entry";
282 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
286 explain_name_ib(const char *name)
288 const char *mythresh;
289 if (strcmp(name, "br_miss") == 0) {
290 printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
291 printf(" MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
292 printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
293 mythresh = "thresh >= .2";
294 } else if (strcmp(name, "eff1") == 0) {
295 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
296 mythresh = "thresh < .9";
297 } else if (strcmp(name, "eff2") == 0) {
298 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
299 mythresh = "thresh > 1.0";
300 } else if (strcmp(name, "cache1") == 0) {
301 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
302 mythresh = "thresh >= .2";
303 } else if (strcmp(name, "cache2") == 0) {
304 printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
305 mythresh = "thresh >= .2";
306 } else if (strcmp(name, "itlbmiss") == 0) {
307 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
308 mythresh = "thresh > .05";
309 } else if (strcmp(name, "icachemiss") == 0) {
310 printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
311 mythresh = "thresh > .05";
312 } else if (strcmp(name, "lcpstall") == 0) {
313 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
314 mythresh = "thresh > .05";
315 } else if (strcmp(name, "datashare") == 0) {
316 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
317 mythresh = "thresh > .05";
318 } else if (strcmp(name, "blockstorefwd") == 0) {
319 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
320 mythresh = "thresh >= .05";
321 } else if (strcmp(name, "splitload") == 0) {
322 printf("Examine ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
323 printf(" LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
324 mythresh = "thresh >= .1";
325 } else if (strcmp(name, "splitstore") == 0) {
326 printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
327 mythresh = "thresh >= .01";
328 } else if (strcmp(name, "aliasing_4k") == 0) {
329 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
330 mythresh = "thresh >= .1";
331 } else if (strcmp(name, "dtlbmissload") == 0) {
332 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
333 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
334 mythresh = "thresh >= .1";
335 } else if (strcmp(name, "dtlbmissstore") == 0) {
336 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
337 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
338 mythresh = "thresh >= .05";
339 } else if (strcmp(name, "contested") == 0) {
340 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
341 mythresh = "thresh >= .05";
342 } else if (strcmp(name, "clears") == 0) {
343 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
344 printf(" MACHINE_CLEARS.SMC + \n");
345 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
346 mythresh = "thresh >= .02";
347 } else if (strcmp(name, "microassist") == 0) {
348 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
349 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
350 mythresh = "thresh >= .05";
351 } else if (strcmp(name, "fpassist") == 0) {
352 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
353 mythresh = "look for a excessive value";
354 } else if (strcmp(name, "otherassistavx") == 0) {
355 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
356 mythresh = "look for a excessive value";
357 } else if (strcmp(name, "otherassistsse") == 0) {
358 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
359 mythresh = "look for a excessive value";
361 printf("Unknown name:%s\n", name);
362 mythresh = "unknown entry";
364 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
369 explain_name_has(const char *name)
371 const char *mythresh;
372 if (strcmp(name, "eff1") == 0) {
373 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
374 mythresh = "thresh < .75";
375 } else if (strcmp(name, "eff2") == 0) {
376 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
377 mythresh = "thresh > 1.0";
378 } else if (strcmp(name, "itlbmiss") == 0) {
379 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
380 mythresh = "thresh > .05";
381 } else if (strcmp(name, "icachemiss") == 0) {
382 printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
383 mythresh = "thresh > .05";
384 } else if (strcmp(name, "lcpstall") == 0) {
385 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
386 mythresh = "thresh > .05";
387 } else if (strcmp(name, "cache1") == 0) {
388 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
389 mythresh = "thresh >= .2";
390 } else if (strcmp(name, "cache2") == 0) {
391 printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
392 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
393 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
394 printf(" / CPU_CLK_UNHALTED.THREAD_P\n");
395 mythresh = "thresh >= .2";
396 } else if (strcmp(name, "contested") == 0) {
397 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
398 mythresh = "thresh >= .05";
399 } else if (strcmp(name, "datashare") == 0) {
400 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
401 mythresh = "thresh > .05";
402 } else if (strcmp(name, "blockstorefwd") == 0) {
403 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
404 mythresh = "thresh >= .05";
405 } else if (strcmp(name, "splitload") == 0) {
406 printf("Examine (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
407 mythresh = "thresh >= .1";
408 } else if (strcmp(name, "splitstore") == 0) {
409 printf("Examine MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES\n");
410 mythresh = "thresh >= .01";
411 } else if (strcmp(name, "aliasing_4k") == 0) {
412 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
413 mythresh = "thresh >= .1";
414 } else if (strcmp(name, "dtlbmissload") == 0) {
415 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
416 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
417 mythresh = "thresh >= .1";
418 } else if (strcmp(name, "br_miss") == 0) {
419 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
420 mythresh = "thresh >= .2";
421 } else if (strcmp(name, "clears") == 0) {
422 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
423 printf(" MACHINE_CLEARS.SMC + \n");
424 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
425 mythresh = "thresh >= .02";
426 } else if (strcmp(name, "microassist") == 0) {
427 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
428 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
429 mythresh = "thresh >= .05";
430 } else if (strcmp(name, "fpassist") == 0) {
431 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
432 mythresh = "look for a excessive value";
433 } else if (strcmp(name, "otherassistavx") == 0) {
434 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
435 mythresh = "look for a excessive value";
436 } else if (strcmp(name, "otherassistsse") == 0) {
437 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
438 mythresh = "look for a excessive value";
440 printf("Unknown name:%s\n", name);
441 mythresh = "unknown entry";
443 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
447 static struct counters *
448 find_counter(struct counters *base, const char *name)
456 if (strncmp(at->counter_name, name, len) == 0) {
461 printf("Can't find counter %s\n", name);
462 printf("We have:\n");
465 printf("- %s\n", at->counter_name);
472 allocstall1(struct counters *cpu, int pos)
474 /* 1 - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
476 struct counters *partial;
477 struct counters *unhalt;
479 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
480 partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
482 par = partial->vals[pos] * 1.0;
483 un = unhalt->vals[pos] * 1.0;
485 par = partial->sum * 1.0;
486 un = unhalt->sum * 1.0;
489 ret = printf("%1.3f", res);
494 allocstall2(struct counters *cpu, int pos)
496 /* 2 - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
498 struct counters *partial;
499 struct counters *unhalt;
501 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
502 partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
504 par = partial->vals[pos] * 1.0;
505 un = unhalt->vals[pos] * 1.0;
507 par = partial->sum * 1.0;
508 un = unhalt->sum * 1.0;
511 ret = printf("%1.3f", res);
516 br_mispredict(struct counters *cpu, int pos)
518 struct counters *brctr;
519 struct counters *unhalt;
521 /* 3 - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
522 double br, un, con, res;
525 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
526 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
528 br = brctr->vals[pos] * 1.0;
529 un = unhalt->vals[pos] * 1.0;
531 br = brctr->sum * 1.0;
532 un = unhalt->sum * 1.0;
535 ret = printf("%1.3f", res);
540 br_mispredictib(struct counters *cpu, int pos)
542 struct counters *brctr;
543 struct counters *unhalt;
544 struct counters *clear, *clear2, *clear3;
545 struct counters *uops;
546 struct counters *recv;
547 struct counters *iss;
548 /* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
551 * (BR_MISP_RETIRED.ALL_BRANCHES /
552 * (BR_MISP_RETIRED.ALL_BRANCHES +
553 * MACHINE_CLEAR.COUNT) *
554 * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
557 double br, cl, cl2, cl3, uo, re, un, con, res, is;
560 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
561 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
562 clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
563 clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
564 clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
565 uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
566 iss = find_counter(cpu, "UOPS_ISSUED.ANY");
567 recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
569 br = brctr->vals[pos] * 1.0;
570 cl = clear->vals[pos] * 1.0;
571 cl2 = clear2->vals[pos] * 1.0;
572 cl3 = clear3->vals[pos] * 1.0;
573 uo = uops->vals[pos] * 1.0;
574 re = recv->vals[pos] * 1.0;
575 is = iss->vals[pos] * 1.0;
576 un = unhalt->vals[pos] * 1.0;
578 br = brctr->sum * 1.0;
579 cl = clear->sum * 1.0;
580 cl2 = clear2->sum * 1.0;
581 cl3 = clear3->sum * 1.0;
582 uo = uops->sum * 1.0;
583 re = recv->sum * 1.0;
585 un = unhalt->sum * 1.0;
587 res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
588 ret = printf("%1.3f", res);
593 splitloadib(struct counters *cpu, int pos)
596 struct counters *mem;
597 struct counters *l1d, *ldblock;
598 struct counters *unhalt;
599 double un, memd, res, l1, ldb;
601 * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
602 * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
605 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
606 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
607 l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
608 ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
610 memd = mem->vals[pos] * 1.0;
611 l1 = l1d->vals[pos] * 1.0;
612 ldb = ldblock->vals[pos] * 1.0;
613 un = unhalt->vals[pos] * 1.0;
615 memd = mem->sum * 1.0;
617 ldb = ldblock->sum * 1.0;
618 un = unhalt->sum * 1.0;
620 res = ((l1 / memd) * ldb)/un;
621 ret = printf("%1.3f", res);
626 splitload(struct counters *cpu, int pos)
629 struct counters *mem;
630 struct counters *unhalt;
631 double con, un, memd, res;
632 /* 4 - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
635 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
636 mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
638 memd = mem->vals[pos] * 1.0;
639 un = unhalt->vals[pos] * 1.0;
641 memd = mem->sum * 1.0;
642 un = unhalt->sum * 1.0;
644 res = (memd * con)/un;
645 ret = printf("%1.3f", res);
650 splitstore(struct counters *cpu, int pos)
652 /* 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
654 struct counters *mem_split;
655 struct counters *mem_stores;
656 double memsplit, memstore, res;
657 mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
658 mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
660 memsplit = mem_split->vals[pos] * 1.0;
661 memstore = mem_stores->vals[pos] * 1.0;
663 memsplit = mem_split->sum * 1.0;
664 memstore = mem_stores->sum * 1.0;
666 res = memsplit/memstore;
667 ret = printf("%1.3f", res);
673 contested(struct counters *cpu, int pos)
675 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
677 struct counters *mem;
678 struct counters *unhalt;
679 double con, un, memd, res;
682 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
683 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
685 memd = mem->vals[pos] * 1.0;
686 un = unhalt->vals[pos] * 1.0;
688 memd = mem->sum * 1.0;
689 un = unhalt->sum * 1.0;
691 res = (memd * con)/un;
692 ret = printf("%1.3f", res);
697 contested_has(struct counters *cpu, int pos)
699 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
701 struct counters *mem;
702 struct counters *unhalt;
703 double con, un, memd, res;
706 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
707 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
709 memd = mem->vals[pos] * 1.0;
710 un = unhalt->vals[pos] * 1.0;
712 memd = mem->sum * 1.0;
713 un = unhalt->sum * 1.0;
715 res = (memd * con)/un;
716 ret = printf("%1.3f", res);
722 blockstoreforward(struct counters *cpu, int pos)
724 /* 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
726 struct counters *ldb;
727 struct counters *unhalt;
728 double con, un, ld, res;
731 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
732 ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
734 ld = ldb->vals[pos] * 1.0;
735 un = unhalt->vals[pos] * 1.0;
738 un = unhalt->sum * 1.0;
741 ret = printf("%1.3f", res);
746 cache2(struct counters *cpu, int pos)
749 * 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
750 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
753 struct counters *mem1, *mem2, *mem3;
754 struct counters *unhalt;
755 double con1, con2, con3, un, me_1, me_2, me_3, res;
760 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
761 /* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
762 mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
763 mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
764 mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
766 me_1 = mem1->vals[pos] * 1.0;
767 me_2 = mem2->vals[pos] * 1.0;
768 me_3 = mem3->vals[pos] * 1.0;
769 un = unhalt->vals[pos] * 1.0;
771 me_1 = mem1->sum * 1.0;
772 me_2 = mem2->sum * 1.0;
773 me_3 = mem3->sum * 1.0;
774 un = unhalt->sum * 1.0;
776 res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
777 ret = printf("%1.3f", res);
782 datasharing(struct counters *cpu, int pos)
785 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
788 struct counters *mem;
789 struct counters *unhalt;
790 double con, res, me, un;
793 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
794 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
796 me = mem->vals[pos] * 1.0;
797 un = unhalt->vals[pos] * 1.0;
800 un = unhalt->sum * 1.0;
803 ret = printf("%1.3f", res);
810 datasharing_has(struct counters *cpu, int pos)
813 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
816 struct counters *mem;
817 struct counters *unhalt;
818 double con, res, me, un;
821 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
822 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
824 me = mem->vals[pos] * 1.0;
825 un = unhalt->vals[pos] * 1.0;
828 un = unhalt->sum * 1.0;
831 ret = printf("%1.3f", res);
838 cache2ib(struct counters *cpu, int pos)
841 * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
844 struct counters *mem;
845 struct counters *unhalt;
846 double con, un, me, res;
849 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
850 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
852 me = mem->vals[pos] * 1.0;
853 un = unhalt->vals[pos] * 1.0;
856 un = unhalt->sum * 1.0;
859 ret = printf("%1.3f", res);
864 cache2has(struct counters *cpu, int pos)
867 * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
868 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
869 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
870 * / CPU_CLK_UNHALTED.THREAD_P
873 struct counters *mem1, *mem2, *mem3;
874 struct counters *unhalt;
875 double con1, con2, con3, un, me1, me2, me3, res;
880 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
881 mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
882 mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
883 mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
885 me1 = mem1->vals[pos] * 1.0;
886 me2 = mem2->vals[pos] * 1.0;
887 me3 = mem3->vals[pos] * 1.0;
888 un = unhalt->vals[pos] * 1.0;
890 me1 = mem1->sum * 1.0;
891 me2 = mem2->sum * 1.0;
892 me3 = mem3->sum * 1.0;
893 un = unhalt->sum * 1.0;
895 res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
896 ret = printf("%1.3f", res);
901 cache1(struct counters *cpu, int pos)
903 /* 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
905 struct counters *mem;
906 struct counters *unhalt;
907 double con, un, me, res;
910 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
911 mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
913 me = mem->vals[pos] * 1.0;
914 un = unhalt->vals[pos] * 1.0;
917 un = unhalt->sum * 1.0;
920 ret = printf("%1.3f", res);
925 cache1ib(struct counters *cpu, int pos)
927 /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
929 struct counters *mem;
930 struct counters *unhalt;
931 double con, un, me, res;
934 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
935 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
937 me = mem->vals[pos] * 1.0;
938 un = unhalt->vals[pos] * 1.0;
941 un = unhalt->sum * 1.0;
944 ret = printf("%1.3f", res);
950 dtlb_missload(struct counters *cpu, int pos)
952 /* 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
954 struct counters *dtlb_m, *dtlb_d;
955 struct counters *unhalt;
956 double con, un, d1, d2, res;
959 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
960 dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
961 dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
963 d1 = dtlb_m->vals[pos] * 1.0;
964 d2 = dtlb_d->vals[pos] * 1.0;
965 un = unhalt->vals[pos] * 1.0;
967 d1 = dtlb_m->sum * 1.0;
968 d2 = dtlb_d->sum * 1.0;
969 un = unhalt->sum * 1.0;
971 res = ((d1 * con) + d2)/un;
972 ret = printf("%1.3f", res);
977 dtlb_missstore(struct counters *cpu, int pos)
980 * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) /
981 * CPU_CLK_UNHALTED.THREAD_P (t >= .1)
984 struct counters *dtsb_m, *dtsb_d;
985 struct counters *unhalt;
986 double con, un, d1, d2, res;
989 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
990 dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
991 dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
993 d1 = dtsb_m->vals[pos] * 1.0;
994 d2 = dtsb_d->vals[pos] * 1.0;
995 un = unhalt->vals[pos] * 1.0;
997 d1 = dtsb_m->sum * 1.0;
998 d2 = dtsb_d->sum * 1.0;
999 un = unhalt->sum * 1.0;
1001 res = ((d1 * con) + d2)/un;
1002 ret = printf("%1.3f", res);
1007 itlb_miss(struct counters *cpu, int pos)
1009 /* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P IB */
1011 struct counters *itlb;
1012 struct counters *unhalt;
1015 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1016 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1018 d1 = itlb->vals[pos] * 1.0;
1019 un = unhalt->vals[pos] * 1.0;
1021 d1 = itlb->sum * 1.0;
1022 un = unhalt->sum * 1.0;
1025 ret = printf("%1.3f", res);
1030 icache_miss(struct counters *cpu, int pos)
1032 /* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
1035 struct counters *itlb, *icache;
1036 struct counters *unhalt;
1037 double un, d1, ic, res;
1039 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1040 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1041 icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
1043 d1 = itlb->vals[pos] * 1.0;
1044 ic = icache->vals[pos] * 1.0;
1045 un = unhalt->vals[pos] * 1.0;
1047 d1 = itlb->sum * 1.0;
1048 ic = icache->sum * 1.0;
1049 un = unhalt->sum * 1.0;
1052 ret = printf("%1.3f", res);
1058 icache_miss_has(struct counters *cpu, int pos)
1060 /* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */
1063 struct counters *icache;
1064 struct counters *unhalt;
1065 double un, con, ic, res;
1067 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1068 icache = find_counter(cpu, "ICACHE.MISSES");
1071 ic = icache->vals[pos] * 1.0;
1072 un = unhalt->vals[pos] * 1.0;
1074 ic = icache->sum * 1.0;
1075 un = unhalt->sum * 1.0;
1077 res = (con * ic)/un;
1078 ret = printf("%1.3f", res);
1084 lcp_stall(struct counters *cpu, int pos)
1086 /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
1088 struct counters *ild;
1089 struct counters *unhalt;
1092 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1093 ild = find_counter(cpu, "ILD_STALL.LCP");
1095 d1 = ild->vals[pos] * 1.0;
1096 un = unhalt->vals[pos] * 1.0;
1098 d1 = ild->sum * 1.0;
1099 un = unhalt->sum * 1.0;
1102 ret = printf("%1.3f", res);
1109 frontendstall(struct counters *cpu, int pos)
1111 /* 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
1113 struct counters *idq;
1114 struct counters *unhalt;
1115 double con, un, id, res;
1118 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1119 idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
1121 id = idq->vals[pos] * 1.0;
1122 un = unhalt->vals[pos] * 1.0;
1124 id = idq->sum * 1.0;
1125 un = unhalt->sum * 1.0;
1127 res = id/(un * con);
1128 ret = printf("%1.3f", res);
1133 clears(struct counters *cpu, int pos)
1135 /* 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )
1136 * / CPU_CLK_UNHALTED.THREAD_P (thresh >= .02)*/
1139 struct counters *clr1, *clr2, *clr3;
1140 struct counters *unhalt;
1141 double con, un, cl1, cl2, cl3, res;
1144 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1145 clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1146 clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1147 clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1150 cl1 = clr1->vals[pos] * 1.0;
1151 cl2 = clr2->vals[pos] * 1.0;
1152 cl3 = clr3->vals[pos] * 1.0;
1153 un = unhalt->vals[pos] * 1.0;
1155 cl1 = clr1->sum * 1.0;
1156 cl2 = clr2->sum * 1.0;
1157 cl3 = clr3->sum * 1.0;
1158 un = unhalt->sum * 1.0;
1160 res = ((cl1 + cl2 + cl3) * con)/un;
1161 ret = printf("%1.3f", res);
1166 microassist(struct counters *cpu, int pos)
1168 /* 14 - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
1170 struct counters *idq;
1171 struct counters *unhalt;
1172 double un, id, res, con;
1175 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1176 idq = find_counter(cpu, "IDQ.MS_UOPS");
1178 id = idq->vals[pos] * 1.0;
1179 un = unhalt->vals[pos] * 1.0;
1181 id = idq->sum * 1.0;
1182 un = unhalt->sum * 1.0;
1184 res = id/(un * con);
1185 ret = printf("%1.3f", res);
1191 aliasing(struct counters *cpu, int pos)
1193 /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1195 struct counters *ld;
1196 struct counters *unhalt;
1197 double un, lds, con, res;
1200 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1201 ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1203 lds = ld->vals[pos] * 1.0;
1204 un = unhalt->vals[pos] * 1.0;
1206 lds = ld->sum * 1.0;
1207 un = unhalt->sum * 1.0;
1209 res = (lds * con)/un;
1210 ret = printf("%1.3f", res);
1215 fpassists(struct counters *cpu, int pos)
1217 /* 16 - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
1219 struct counters *fp;
1220 struct counters *inst;
1221 double un, fpd, res;
1223 inst = find_counter(cpu, "INST_RETIRED.ANY_P");
1224 fp = find_counter(cpu, "FP_ASSIST.ANY");
1226 fpd = fp->vals[pos] * 1.0;
1227 un = inst->vals[pos] * 1.0;
1229 fpd = fp->sum * 1.0;
1230 un = inst->sum * 1.0;
1233 ret = printf("%1.3f", res);
1238 otherassistavx(struct counters *cpu, int pos)
1240 /* 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/
1242 struct counters *oth;
1243 struct counters *unhalt;
1244 double un, ot, con, res;
1247 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1248 oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
1250 ot = oth->vals[pos] * 1.0;
1251 un = unhalt->vals[pos] * 1.0;
1253 ot = oth->sum * 1.0;
1254 un = unhalt->sum * 1.0;
1256 res = (ot * con)/un;
1257 ret = printf("%1.3f", res);
1262 otherassistsse(struct counters *cpu, int pos)
1266 struct counters *oth;
1267 struct counters *unhalt;
1268 double un, ot, con, res;
1270 /* 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/
1272 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1273 oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
1275 ot = oth->vals[pos] * 1.0;
1276 un = unhalt->vals[pos] * 1.0;
1278 ot = oth->sum * 1.0;
1279 un = unhalt->sum * 1.0;
1281 res = (ot * con)/un;
1282 ret = printf("%1.3f", res);
1287 efficiency1(struct counters *cpu, int pos)
1291 struct counters *uops;
1292 struct counters *unhalt;
1293 double un, ot, con, res;
1295 /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
1297 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1298 uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1300 ot = uops->vals[pos] * 1.0;
1301 un = unhalt->vals[pos] * 1.0;
1303 ot = uops->sum * 1.0;
1304 un = unhalt->sum * 1.0;
1306 res = ot/(con * un);
1307 ret = printf("%1.3f", res);
1312 efficiency2(struct counters *cpu, int pos)
1316 struct counters *uops;
1317 struct counters *unhalt;
1320 /* 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
1321 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1322 uops = find_counter(cpu, "INST_RETIRED.ANY_P");
1324 ot = uops->vals[pos] * 1.0;
1325 un = unhalt->vals[pos] * 1.0;
1327 ot = uops->sum * 1.0;
1328 un = unhalt->sum * 1.0;
1331 ret = printf("%1.3f", res);
1335 #define SANDY_BRIDGE_COUNT 20
1336 static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
1337 /*01*/ { "allocstall1", "thresh > .05",
1338 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
1340 /*02*/ { "allocstall2", "thresh > .05",
1341 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES -w 1",
1343 /*03*/ { "br_miss", "thresh >= .2",
1344 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1346 /*04*/ { "splitload", "thresh >= .1",
1347 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1349 /*05*/ { "splitstore", "thresh >= .01",
1350 "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1352 /*06*/ { "contested", "thresh >= .05",
1353 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1355 /*07*/ { "blockstorefwd", "thresh >= .05",
1356 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1357 blockstoreforward },
1358 /*08*/ { "cache2", "thresh >= .2",
1359 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1361 /*09*/ { "cache1", "thresh >= .2",
1362 "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1364 /*10*/ { "dtlbmissload", "thresh >= .1",
1365 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1367 /*11*/ { "dtlbmissstore", "thresh >= .05",
1368 "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1370 /*12*/ { "frontendstall", "thresh >= .15",
1371 "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1373 /*13*/ { "clears", "thresh >= .02",
1374 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1376 /*14*/ { "microassist", "thresh >= .05",
1377 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1379 /*15*/ { "aliasing_4k", "thresh >= .1",
1380 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1382 /*16*/ { "fpassist", "look for a excessive value",
1383 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1385 /*17*/ { "otherassistavx", "look for a excessive value",
1386 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1388 /*18*/ { "otherassistsse", "look for a excessive value",
1389 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1391 /*19*/ { "eff1", "thresh < .9",
1392 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1394 /*20*/ { "eff2", "thresh > 1.0",
1395 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1400 #define IVY_BRIDGE_COUNT 21
1401 static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
1402 /*1*/ { "eff1", "thresh < .75",
1403 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1405 /*2*/ { "eff2", "thresh > 1.0",
1406 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1408 /*3*/ { "itlbmiss", "thresh > .05",
1409 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1411 /*4*/ { "icachemiss", "thresh > .05",
1412 "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1414 /*5*/ { "lcpstall", "thresh > .05",
1415 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1417 /*6*/ { "cache1", "thresh >= .2",
1418 "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1420 /*7*/ { "cache2", "thresh >= .2",
1421 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1423 /*8*/ { "contested", "thresh >= .05",
1424 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1426 /*9*/ { "datashare", "thresh >= .05",
1427 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1429 /*10*/ { "blockstorefwd", "thresh >= .05",
1430 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1431 blockstoreforward },
1432 /*11*/ { "splitload", "thresh >= .1",
1433 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
1435 /*12*/ { "splitstore", "thresh >= .01",
1436 "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1438 /*13*/ { "aliasing_4k", "thresh >= .1",
1439 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1441 /*14*/ { "dtlbmissload", "thresh >= .1",
1442 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1444 /*15*/ { "dtlbmissstore", "thresh >= .05",
1445 "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1447 /*16*/ { "br_miss", "thresh >= .2",
1448 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1450 /*17*/ { "clears", "thresh >= .02",
1451 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1453 /*18*/ { "microassist", "thresh >= .05",
1454 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1456 /*19*/ { "fpassist", "look for a excessive value",
1457 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1459 /*20*/ { "otherassistavx", "look for a excessive value",
1460 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1462 /*21*/ { "otherassistsse", "look for a excessive value",
1463 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1467 #define HASWELL_COUNT 20
1468 static struct cpu_entry haswell[HASWELL_COUNT] = {
1469 /*1*/ { "eff1", "thresh < .75",
1470 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1472 /*2*/ { "eff2", "thresh > 1.0",
1473 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1475 /*3*/ { "itlbmiss", "thresh > .05",
1476 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1478 /*4*/ { "icachemiss", "thresh > .05",
1479 "pmcstat -s ICACHE.MISSES --s CPU_CLK_UNHALTED.THREAD_P -w 1",
1481 /*5*/ { "lcpstall", "thresh > .05",
1482 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1484 /*6*/ { "cache1", "thresh >= .2",
1485 "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1487 /*7*/ { "cache2", "thresh >= .2",
1488 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1490 /*8*/ { "contested", "thresh >= .05",
1491 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1493 /*9*/ { "datashare", "thresh >= .05",
1494 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1496 /*10*/ { "blockstorefwd", "thresh >= .05",
1497 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1498 blockstoreforward },
1499 /*11*/ { "splitload", "thresh >= .1",
1500 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1502 /*12*/ { "splitstore", "thresh >= .01",
1503 "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1505 /*13*/ { "aliasing_4k", "thresh >= .1",
1506 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1508 /*14*/ { "dtlbmissload", "thresh >= .1",
1509 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1511 /*15*/ { "br_miss", "thresh >= .2",
1512 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1514 /*16*/ { "clears", "thresh >= .02",
1515 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1517 /*17*/ { "microassist", "thresh >= .05",
1518 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1520 /*18*/ { "fpassist", "look for a excessive value",
1521 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1523 /*19*/ { "otherassistavx", "look for a excessive value",
1524 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1526 /*20*/ { "otherassistsse", "look for a excessive value",
1527 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1533 set_sandybridge(void)
1535 strcpy(the_cpu.cputype, "SandyBridge PMC");
1536 the_cpu.number = SANDY_BRIDGE_COUNT;
1537 the_cpu.ents = sandy_bridge;
1538 the_cpu.explain = explain_name_sb;
1544 strcpy(the_cpu.cputype, "IvyBridge PMC");
1545 the_cpu.number = IVY_BRIDGE_COUNT;
1546 the_cpu.ents = ivy_bridge;
1547 the_cpu.explain = explain_name_ib;
1554 strcpy(the_cpu.cputype, "HASWELL PMC");
1555 the_cpu.number = HASWELL_COUNT;
1556 the_cpu.ents = haswell;
1557 the_cpu.explain = explain_name_has;
1561 set_expression(char *name)
1564 for(i=0 ; i< the_cpu.number; i++) {
1565 if (strcmp(name, the_cpu.ents[i].name) == 0) {
1567 expression = the_cpu.ents[i].func;
1568 command = the_cpu.ents[i].command;
1569 threshold = the_cpu.ents[i].thresh;
1574 printf("For CPU type %s we have no expression:%s\n",
1575 the_cpu.cputype, name);
1585 validate_expression(char *name)
1590 for(i=0 ; i< the_cpu.number; i++) {
1591 if (strcmp(name, the_cpu.ents[i].name) == 0) {
1603 do_expression(struct counters *cpu, int pos)
1605 if (expression == NULL)
1607 (*expression)(cpu, pos);
1611 process_header(int idx, char *p)
1613 struct counters *up;
1616 * Given header element idx, at p in
1617 * form 's/NN/nameof'
1618 * process the entry to pull out the name and
1621 if (strncmp(p, "s/", 2)) {
1622 printf("Check -- invalid header no s/ in %s\n",
1627 up->cpu = strtol(&p[2], NULL, 10);
1629 for (i=2; i<len; i++) {
1631 nlen = strlen(&p[(i+1)]);
1632 if (nlen < (MAX_NLEN-1)) {
1633 strcpy(up->counter_name, &p[(i+1)]);
1635 strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
1642 build_counters_from_header(FILE *io)
1644 char buffer[8192], *p;
1648 /* We have a new start, lets
1649 * setup our headers and cpus.
1651 if (fgets(buffer, sizeof(buffer), io) == NULL) {
1652 printf("First line can't be read from file err:%d\n", errno);
1656 * Ok output is an array of counters. Once
1657 * we start to read the values in we must
1658 * put them in there slot to match there CPU and
1659 * counter being updated. We create a mass array
1660 * of the counters, filling in the CPU and
1663 /* How many do we get? */
1664 len = strlen(buffer);
1665 for (i=0, cnt=0; i<len; i++) {
1666 if (strncmp(&buffer[i], "s/", 2) == 0) {
1669 if (buffer[i] == ' ')
1674 mlen = sizeof(struct counters) * cnt;
1675 cnts = malloc(mlen);
1678 printf("No memory err:%d\n", errno);
1681 memset(cnts, 0, mlen);
1682 for (i=0, cnt=0; i<len; i++) {
1683 if (strncmp(&buffer[i], "s/", 2) == 0) {
1686 if (buffer[i] == ' ') {
1691 process_header(cnt, p);
1696 printf("We have %d entries\n", cnt);
1698 extern int max_to_collect;
1699 int max_to_collect = MAX_COUNTER_SLOTS;
1702 read_a_line(FILE *io)
1704 char buffer[8192], *p, *stop;
1707 if (fgets(buffer, sizeof(buffer), io) == NULL) {
1711 for (i=0; i<ncnts; i++) {
1713 cnts[i].vals[pos] = strtol(p, &stop, 0);
1715 cnts[i].sum += cnts[i].vals[pos];
1721 extern int cpu_count_out;
1722 int cpu_count_out=0;
1727 int i, cnt, printed_cnt;
1729 printf("*********************************\n");
1730 for(i=0, cnt=0; i<MAX_CPU; i++) {
1735 cpu_count_out = cnt;
1736 for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
1741 if (printed_cnt == cnt) {
1751 lace_cpus_together(void)
1754 struct counters *cpat, *at;
1756 for(i=0; i<ncnts; i++) {
1758 if (cpat->next_cpu) {
1759 /* Already laced in */
1762 lace_cpu = cpat->cpu;
1763 if (lace_cpu >= MAX_CPU) {
1764 printf("CPU %d to big\n", lace_cpu);
1767 if (glob_cpu[lace_cpu] == NULL) {
1768 glob_cpu[lace_cpu] = cpat;
1770 /* Already processed this cpu */
1773 /* Ok look forward for cpu->cpu and link in */
1774 for(j=(i+1); j<ncnts; j++) {
1779 if (at->cpu == lace_cpu) {
1781 cpat->next_cpu = at;
1790 process_file(char *filename)
1794 int line_at, not_done;
1795 pid_t pid_of_command=0;
1797 if (filename == NULL) {
1798 io = my_popen(command, "r", &pid_of_command);
1800 io = fopen(filename, "r");
1802 printf("Can't process file %s err:%d\n",
1807 build_counters_from_header(io);
1809 /* Nothing we can do */
1810 printf("Nothing to do -- no counters built\n");
1816 lace_cpus_together();
1819 for (i=0; i<ncnts; i++) {
1820 printf("Counter:%s cpu:%d index:%d\n",
1821 cnts[i].counter_name,
1828 if (read_a_line(io)) {
1833 if (line_at >= max_to_collect) {
1836 if (filename == NULL) {
1838 /* For the ones we dynamically open we print now */
1839 for(i=0, cnt=0; i<MAX_CPU; i++) {
1840 do_expression(glob_cpu[i], (line_at-1));
1842 if (cnt == cpu_count_out) {
1854 my_pclose(io, pid_of_command);
1857 #if defined(__amd64__)
1858 #define cpuid(in,a,b,c,d)\
1859 asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
1861 #define cpuid(in, a, b, c, d)
1867 unsigned long eax, ebx, ecx, edx;
1869 pid_t pid_of_command=0;
1872 char linebuf[1024], *str;
1874 eax = ebx = ecx = edx = 0;
1876 cpuid(0, eax, ebx, ecx, edx);
1877 if (ebx == 0x68747541) {
1878 printf("AMD processors are not supported by this program\n");
1881 } else if (ebx == 0x6972794) {
1882 printf("Cyrix processors are not supported by this program\n");
1885 } else if (ebx == 0x756e6547) {
1886 printf("Genuine Intel\n");
1888 printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
1891 cpuid(1, eax, ebx, ecx, edx);
1892 model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
1893 printf("CPU model is 0x%x id:0x%lx\n", model, eax);
1894 switch (eax & 0xF00) {
1895 case 0x500: /* Pentium family processors */
1896 printf("Intel Pentium P5\n");
1899 case 0x600: /* Pentium Pro, Celeron, Pentium II & III */
1902 printf("Intel Pentium P6\n");
1907 printf("Intel PII\n");
1910 case 0x6: case 0x16:
1911 printf("Intel CL\n");
1914 case 0x7: case 0x8: case 0xA: case 0xB:
1915 printf("Intel PIII\n");
1919 printf("Intel PM\n");
1923 printf("Intel CORE\n");
1927 printf("Intel CORE2\n");
1931 printf("Intel CORE2EXTREME\n");
1934 case 0x1C: /* Per Intel document 320047-002. */
1935 printf("Intel ATOM\n");
1940 * Per Intel document 253669-032 9/2009,
1941 * pages A-2 and A-57
1944 * Per Intel document 253669-032 9/2009,
1945 * pages A-2 and A-57
1947 printf("Intel COREI7\n");
1951 printf("Intel NEHALEM\n");
1954 case 0x25: /* Per Intel document 253669-033US 12/2009. */
1955 case 0x2C: /* Per Intel document 253669-033US 12/2009. */
1956 printf("Intel WESTMERE\n");
1959 case 0x2F: /* Westmere-EX, seen in wild */
1960 printf("Intel WESTMERE\n");
1963 case 0x2A: /* Per Intel document 253669-039US 05/2011. */
1964 printf("Intel SANDYBRIDGE\n");
1967 case 0x2D: /* Per Intel document 253669-044US 08/2012. */
1968 printf("Intel SANDYBRIDGE_XEON\n");
1971 case 0x3A: /* Per Intel document 253669-043US 05/2012. */
1972 printf("Intel IVYBRIDGE\n");
1975 case 0x3E: /* Per Intel document 325462-045US 01/2013. */
1976 printf("Intel IVYBRIDGE_XEON\n");
1979 case 0x3F: /* Per Intel document 325462-045US 09/2014. */
1980 printf("Intel HASWELL (Xeon)\n");
1983 case 0x3C: /* Per Intel document 325462-045US 01/2013. */
1986 printf("Intel HASWELL\n");
1990 /* Per Intel document 330061-001 01/2014. */
1991 printf("Intel ATOM_SILVERMONT\n");
1995 printf("Intel model 0x%x is not known -- sorry\n",
2001 case 0xF00: /* P4 */
2002 printf("Intel unknown model %d\n", model);
2006 /* Ok lets load the list of all known PMC's */
2007 io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
2008 if (valid_pmcs == NULL) {
2010 pmc_allocated_cnt = PMC_INITIAL_ALLOC;
2011 sz = sizeof(char *) * pmc_allocated_cnt;
2012 valid_pmcs = malloc(sz);
2013 if (valid_pmcs == NULL) {
2014 printf("No memory allocation fails at startup?\n");
2017 memset(valid_pmcs, 0, sz);
2020 while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
2021 if (linebuf[0] != '\t') {
2022 /* sometimes headers ;-) */
2025 len = strlen(linebuf);
2026 if (linebuf[(len-1)] == '\n') {
2028 linebuf[(len-1)] = 0;
2031 len = strlen(str) + 1;
2032 valid_pmcs[valid_pmc_cnt] = malloc(len);
2033 if (valid_pmcs[valid_pmc_cnt] == NULL) {
2034 printf("No memory2 allocation fails at startup?\n");
2037 memset(valid_pmcs[valid_pmc_cnt], 0, len);
2038 strcpy(valid_pmcs[valid_pmc_cnt], str);
2040 if (valid_pmc_cnt >= pmc_allocated_cnt) {
2041 /* Got to expand -- unlikely */
2044 sz = sizeof(char *) * (pmc_allocated_cnt * 2);
2047 printf("No memory3 allocation fails at startup?\n");
2050 memset(more, 0, sz);
2051 memcpy(more, valid_pmcs, sz);
2052 pmc_allocated_cnt *= 2;
2057 my_pclose(io, pid_of_command);
2060 printf("Not supported\n");
2068 printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
2069 printf("-------------------------------------------------------------\n");
2070 for(i=0; i<the_cpu.number; i++){
2071 printf("For -e %s ", the_cpu.ents[i].name);
2072 (*the_cpu.explain)(the_cpu.ents[i].name);
2073 printf("----------------------------\n");
2078 test_for_a_pmc(const char *pmc, int out_so_far)
2081 pid_t pid_of_command=0;
2082 char my_command[1024];
2087 if (out_so_far < 50) {
2088 len = 50 - out_so_far;
2089 for(i=0; i<len; i++) {
2093 sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
2094 io = my_popen(my_command, "r", &pid_of_command);
2096 printf("Failed -- popen fails\n");
2099 /* Setup what we expect */
2100 len = sprintf(resp, "%s", pmc);
2101 if (fgets(line, sizeof(line), io) == NULL) {
2102 printf("Failed -- no output from pmstat\n");
2105 llen = strlen(line);
2106 if (line[(llen-1)] == '\n') {
2110 for(i=2; i<(llen-len); i++) {
2111 if (strncmp(&line[i], "ERROR", 5) == 0) {
2112 printf("Failed %s\n", line);
2114 } else if (strncmp(&line[i], resp, len) == 0) {
2117 if (fgets(line, sizeof(line), io) == NULL) {
2118 printf("Failed -- no second output from pmstat\n");
2122 for (j=0; j<len; j++) {
2123 if (line[j] == ' ') {
2130 len = strlen(&line[j]);
2132 for(k=0; k<(20-len); k++) {
2137 printf("%s", &line[j]);
2144 printf("Failed -- '%s' not '%s'\n", line, resp);
2146 my_pclose(io, pid_of_command);
2151 add_it_to(char **vars, int cur_cnt, char *name)
2155 for(i=0; i<cur_cnt; i++) {
2156 if (strcmp(vars[i], name) == 0) {
2161 if (vars[cur_cnt] != NULL) {
2162 printf("Cur_cnt:%d filled with %s??\n",
2163 cur_cnt, vars[cur_cnt]);
2167 len = strlen(name) + 1;
2168 vars[cur_cnt] = malloc(len);
2169 if (vars[cur_cnt] == NULL) {
2170 printf("No memory %s\n", __FUNCTION__);
2173 memset(vars[cur_cnt], 0, len);
2174 strcpy(vars[cur_cnt], name);
2179 build_command_for_exp(struct expression *exp)
2182 * Build the pmcstat command to handle
2183 * the passed in expression.
2184 * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
2185 * where NNN and QQQ represent the PMC's in the expression
2189 int cnt_pmc, alloced_pmcs, i;
2190 struct expression *at;
2194 alloced_pmcs = cnt_pmc = 0;
2195 /* first how many do we have */
2198 if (at->type == TYPE_VALUE_PMC) {
2204 printf("No PMC's in your expression -- nothing to do!!\n");
2207 mal = cnt_pmc * sizeof(char *);
2210 printf("No memory\n");
2213 memset(vars, 0, mal);
2216 if (at->type == TYPE_VALUE_PMC) {
2217 if(add_it_to(vars, alloced_pmcs, at->name)) {
2223 /* Now we have a unique list in vars so create our command */
2224 mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */
2225 for(i=0; i<alloced_pmcs; i++) {
2226 mal += strlen(vars[i]) + 4; /* var + " -s " */
2228 cmd = malloc((mal+2));
2230 printf("%s out of mem\n", __FUNCTION__);
2233 memset(cmd, 0, (mal+2));
2234 strcpy(cmd, "/usr/sbin/pmcstat -w 1");
2236 for(i=0; i<alloced_pmcs; i++) {
2237 sprintf(forming, " -s %s", vars[i]);
2238 strcat(cmd, forming);
2247 user_expr(struct counters *cpu, int pos)
2251 struct counters *var;
2252 struct expression *at;
2256 if (at->type == TYPE_VALUE_PMC) {
2257 var = find_counter(cpu, at->name);
2259 printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
2263 at->value = var->vals[pos] * 1.0;
2265 at->value = var->sum * 1.0;
2270 res = run_expr(master_exp, 1, NULL);
2271 ret = printf("%1.3f", res);
2277 set_manual_exp(struct expression *exp)
2279 expression = user_expr;
2280 command = build_command_for_exp(exp);
2281 threshold = "User defined threshold";
2288 printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
2289 printf("------------------------------------------------------------------------\n");
2290 for(i=0; i<valid_pmc_cnt; i++) {
2291 lenout = printf("%s", valid_pmcs[i]);
2293 test_for_a_pmc(valid_pmcs[i], lenout);
2300 printf("PMC Abbreviation\n");
2301 printf("--------------------------------------------------------------\n");
2302 for(i=0; i<valid_pmc_cnt; i++) {
2303 cnt = printf("%s", valid_pmcs[i]);
2304 for(j=cnt; j<52; j++) {
2307 printf("%%%d\n", i);
2313 main(int argc, char **argv)
2316 char *filename=NULL;
2322 memset(glob_cpu, 0, sizeof(glob_cpu));
2323 while ((i = getopt(argc, argv, "LHhvm:i:?e:TE:")) != -1) {
2329 printf("**********************************\n");
2331 printf("**********************************\n");
2338 master_exp = parse_expression(optarg);
2340 set_manual_exp(master_exp);
2344 if (validate_expression(optarg)) {
2345 printf("Unknown expression %s\n", optarg);
2349 set_expression(optarg);
2352 max_to_collect = strtol(optarg, NULL, 0);
2353 if (max_to_collect > MAX_COUNTER_SLOTS) {
2354 /* You can't collect more than max in array */
2355 max_to_collect = MAX_COUNTER_SLOTS;
2370 printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
2372 printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
2373 printf("-v -- verbose dump debug type things -- you don't want this\n");
2374 printf("-m N -- maximum to collect is N measurments\n");
2375 printf("-e expr-name -- Do expression expr-name\n");
2376 printf("-E 'your expression' -- Do your expression\n");
2377 printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
2378 printf("-H -- Don't run anything, just explain all canned expressions\n");
2379 printf("-T -- Test all PMC's defined by this processor\n");
2384 if ((name == NULL) && (filename == NULL) && (test_mode == 0) && (master_exp == NULL)) {
2385 printf("Without setting an expression we cannot dynamically gather information\n");
2386 printf("you must supply a filename (and you probably want verbosity)\n");
2393 printf("*********************************\n");
2394 if (master_exp == NULL) {
2395 (*the_cpu.explain)(name);
2397 printf("Examine your expression ");
2398 print_exp(master_exp);
2399 printf("User defined threshold\n");
2404 process_file(filename);
2406 for (i=0; i<ncnts; i++) {
2407 printf("Counter:%s cpu:%d index:%d\n",
2408 cnts[i].counter_name,
2410 for(j=0; j<cnts[i].pos; j++) {
2411 printf(" val - %ld\n", (long int)cnts[i].vals[j]);
2413 printf(" sum - %ld\n", (long int)cnts[i].sum);
2416 if (expression == NULL) {
2419 for(i=0, cnt=0; i<MAX_CPU; i++) {
2421 do_expression(glob_cpu[i], -1);
2423 if (cnt == cpu_count_out) {