2 * Copyright (c) 2014-2015 Netflix, Inc.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer,
9 * in this position and unchanged.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. The name of the author may not be used to endorse or promote products
14 * derived from this software without specific prior written permission
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include <sys/types.h>
33 #include <sys/errno.h>
37 #include "eval_expr.h"
38 __FBSDID("$FreeBSD$");
40 static int max_pmc_counters = 1;
41 static int run_all = 0;
43 #define MAX_COUNTER_SLOTS 1024
46 static int verbose = 0;
48 extern char **environ;
49 extern struct expression *master_exp;
50 struct expression *master_exp=NULL;
52 #define PMC_INITIAL_ALLOC 512
53 extern char **valid_pmcs;
54 char **valid_pmcs = NULL;
55 extern int valid_pmc_cnt;
57 extern int pmc_allocated_cnt;
58 int pmc_allocated_cnt=0;
61 * The following two varients on popen and pclose with
62 * the cavet that they get you the PID so that you
63 * can supply it to pclose so it can send a SIGTERM
67 my_popen(const char *command, const char *dir, pid_t *p_pid)
70 int pdesin[2], pdesout[2];
77 if ((strcmp(dir, "r") != 0) &&
78 (strcmp(dir, "w") != 0)) {
85 if (pipe(pdesout) < 0) {
86 (void)close(pdesin[0]);
87 (void)close(pdesin[1]);
92 strcpy(cmd2, command);
98 switch (pid = fork()) {
100 (void)close(pdesin[0]);
101 (void)close(pdesin[1]);
102 (void)close(pdesout[0]);
103 (void)close(pdesout[1]);
107 /* Close out un-used sides */
108 (void)close(pdesin[1]);
109 (void)close(pdesout[0]);
110 /* Now prepare the stdin of the process */
112 (void)dup(pdesin[0]);
113 (void)close(pdesin[0]);
114 /* Now prepare the stdout of the process */
116 (void)dup(pdesout[1]);
117 /* And lets do stderr just in case */
119 (void)dup(pdesout[1]);
120 (void)close(pdesout[1]);
122 execve("/bin/sh", argv, environ);
126 /* Parent; assume fdopen can't fail. */
129 if (strcmp(dir, "r") != 0) {
130 io_out = fdopen(pdesin[1], "w");
131 (void)close(pdesin[0]);
132 (void)close(pdesout[0]);
133 (void)close(pdesout[1]);
136 /* Prepare the input stream */
137 io_in = fdopen(pdesout[0], "r");
138 (void)close(pdesout[1]);
139 (void)close(pdesin[0]);
140 (void)close(pdesin[1]);
147 * Pclose returns -1 if stream is not associated with a `popened' command,
148 * if already `pclosed', or waitpid returns an error.
151 my_pclose(FILE *io, pid_t the_pid)
157 * Find the appropriate file pointer and remove it from the list.
160 /* Die if you are not dead! */
161 kill(the_pid, SIGTERM);
163 pid = wait4(the_pid, &pstat, 0, (struct rusage *)0);
164 } while (pid == -1 && errno == EINTR);
168 struct counters *next_cpu;
169 char counter_name[MAX_NLEN]; /* Name of counter */
170 int cpu; /* CPU we are on */
171 int pos; /* Index we are filling to. */
172 uint64_t vals[MAX_COUNTER_SLOTS]; /* Last 64 entries */
173 uint64_t sum; /* Summary of entries */
176 extern struct counters *glob_cpu[MAX_CPU];
177 struct counters *glob_cpu[MAX_CPU];
179 extern struct counters *cnts;
180 struct counters *cnts=NULL;
185 extern int (*expression)(struct counters *, int);
186 int (*expression)(struct counters *, int);
188 static const char *threshold=NULL;
189 static const char *command;
195 int (*func)(struct counters *, int);
196 int counters_required;
202 struct cpu_entry *ents;
203 void (*explain)(const char *name);
205 extern struct cpu_type the_cpu;
206 struct cpu_type the_cpu;
209 explain_name_sb(const char *name)
211 const char *mythresh;
212 if (strcmp(name, "allocstall1") == 0) {
213 printf("Examine PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW / CPU_CLK_UNHALTED.THREAD_P\n");
214 mythresh = "thresh > .05";
215 } else if (strcmp(name, "allocstall2") == 0) {
216 printf("Examine PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P\n");
217 mythresh = "thresh > .05";
218 } else if (strcmp(name, "br_miss") == 0) {
219 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P\n");
220 mythresh = "thresh >= .2";
221 } else if (strcmp(name, "splitload") == 0) {
222 printf("Examine MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
223 mythresh = "thresh >= .1";
224 } else if (strcmp(name, "splitstore") == 0) {
225 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
226 mythresh = "thresh >= .01";
227 } else if (strcmp(name, "contested") == 0) {
228 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
229 mythresh = "thresh >= .05";
230 } else if (strcmp(name, "blockstorefwd") == 0) {
231 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
232 mythresh = "thresh >= .05";
233 } else if (strcmp(name, "cache2") == 0) {
234 printf("Examine ((MEM_LOAD_RETIRED.L3_HIT * 26) + \n");
235 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) + \n");
236 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P\n");
237 printf("**Note we have it labeled MEM_LOAD_UOPS_RETIRED.LLC_HIT not MEM_LOAD_RETIRED.L3_HIT\n");
238 mythresh = "thresh >= .2";
239 } else if (strcmp(name, "cache1") == 0) {
240 printf("Examine (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
241 mythresh = "thresh >= .2";
242 } else if (strcmp(name, "dtlbmissload") == 0) {
243 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
244 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
245 mythresh = "thresh >= .1";
246 } else if (strcmp(name, "frontendstall") == 0) {
247 printf("Examine IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
248 mythresh = "thresh >= .15";
249 } else if (strcmp(name, "clears") == 0) {
250 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
251 printf(" MACHINE_CLEARS.SMC + \n");
252 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
253 mythresh = "thresh >= .02";
254 } else if (strcmp(name, "microassist") == 0) {
255 printf("Examine IDQ.MS_CYCLES / (CPU_CLK_UNHALTED.THREAD_P * 4)\n");
256 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
257 mythresh = "thresh >= .05";
258 } else if (strcmp(name, "aliasing_4k") == 0) {
259 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
260 mythresh = "thresh >= .1";
261 } else if (strcmp(name, "fpassist") == 0) {
262 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
263 mythresh = "look for a excessive value";
264 } else if (strcmp(name, "otherassistavx") == 0) {
265 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
266 mythresh = "look for a excessive value";
267 } else if (strcmp(name, "otherassistsse") == 0) {
268 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
269 mythresh = "look for a excessive value";
270 } else if (strcmp(name, "eff1") == 0) {
271 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
272 mythresh = "thresh < .9";
273 } else if (strcmp(name, "eff2") == 0) {
274 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
275 mythresh = "thresh > 1.0";
276 } else if (strcmp(name, "dtlbmissstore") == 0) {
277 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
278 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
279 mythresh = "thresh >= .05";
281 printf("Unknown name:%s\n", name);
282 mythresh = "unknown entry";
284 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
288 explain_name_ib(const char *name)
290 const char *mythresh;
291 if (strcmp(name, "br_miss") == 0) {
292 printf("Examine ((BR_MISP_RETIRED.ALL_BRANCHES /(BR_MISP_RETIRED.ALL_BRANCHES +\n");
293 printf(" MACHINE_CLEAR.COUNT) * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES)\n");
294 printf("/ (4 * CPU_CLK_UNHALTED.THREAD))))\n");
295 mythresh = "thresh >= .2";
296 } else if (strcmp(name, "eff1") == 0) {
297 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
298 mythresh = "thresh < .9";
299 } else if (strcmp(name, "eff2") == 0) {
300 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
301 mythresh = "thresh > 1.0";
302 } else if (strcmp(name, "cache1") == 0) {
303 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
304 mythresh = "thresh >= .2";
305 } else if (strcmp(name, "cache2") == 0) {
306 printf("Examine (MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P\n");
307 mythresh = "thresh >= .2";
308 } else if (strcmp(name, "itlbmiss") == 0) {
309 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
310 mythresh = "thresh > .05";
311 } else if (strcmp(name, "icachemiss") == 0) {
312 printf("Examine (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
313 mythresh = "thresh > .05";
314 } else if (strcmp(name, "lcpstall") == 0) {
315 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
316 mythresh = "thresh > .05";
317 } else if (strcmp(name, "datashare") == 0) {
318 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/CPU_CLK_UNHALTED.THREAD_P\n");
319 mythresh = "thresh > .05";
320 } else if (strcmp(name, "blockstorefwd") == 0) {
321 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
322 mythresh = "thresh >= .05";
323 } else if (strcmp(name, "splitload") == 0) {
324 printf("Examine ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) *\n");
325 printf(" LD_BLOCKS.NO_SR)/CPU_CLK_UNHALTED.THREAD_P\n");
326 mythresh = "thresh >= .1";
327 } else if (strcmp(name, "splitstore") == 0) {
328 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
329 mythresh = "thresh >= .01";
330 } else if (strcmp(name, "aliasing_4k") == 0) {
331 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
332 mythresh = "thresh >= .1";
333 } else if (strcmp(name, "dtlbmissload") == 0) {
334 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
335 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
336 mythresh = "thresh >= .1";
337 } else if (strcmp(name, "dtlbmissstore") == 0) {
338 printf("Examine (((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION)\n");
339 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
340 mythresh = "thresh >= .05";
341 } else if (strcmp(name, "contested") == 0) {
342 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P\n");
343 mythresh = "thresh >= .05";
344 } else if (strcmp(name, "clears") == 0) {
345 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
346 printf(" MACHINE_CLEARS.SMC + \n");
347 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
348 mythresh = "thresh >= .02";
349 } else if (strcmp(name, "microassist") == 0) {
350 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
351 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
352 mythresh = "thresh >= .05";
353 } else if (strcmp(name, "fpassist") == 0) {
354 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
355 mythresh = "look for a excessive value";
356 } else if (strcmp(name, "otherassistavx") == 0) {
357 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
358 mythresh = "look for a excessive value";
359 } else if (strcmp(name, "otherassistsse") == 0) {
360 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
361 mythresh = "look for a excessive value";
363 printf("Unknown name:%s\n", name);
364 mythresh = "unknown entry";
366 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
371 explain_name_has(const char *name)
373 const char *mythresh;
374 if (strcmp(name, "eff1") == 0) {
375 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
376 mythresh = "thresh < .75";
377 } else if (strcmp(name, "eff2") == 0) {
378 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
379 mythresh = "thresh > 1.0";
380 } else if (strcmp(name, "itlbmiss") == 0) {
381 printf("Examine ITLB_MISSES.WALK_DURATION / CPU_CLK_UNHALTED.THREAD_P\n");
382 mythresh = "thresh > .05";
383 } else if (strcmp(name, "icachemiss") == 0) {
384 printf("Examine (36 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P\n");
385 mythresh = "thresh > .05";
386 } else if (strcmp(name, "lcpstall") == 0) {
387 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
388 mythresh = "thresh > .05";
389 } else if (strcmp(name, "cache1") == 0) {
390 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
391 mythresh = "thresh >= .2";
392 } else if (strcmp(name, "cache2") == 0) {
393 printf("Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \n");
394 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) + \n");
395 printf(" (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))\n");
396 printf(" / CPU_CLK_UNHALTED.THREAD_P\n");
397 mythresh = "thresh >= .2";
398 } else if (strcmp(name, "contested") == 0) {
399 printf("Examine (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P\n");
400 mythresh = "thresh >= .05";
401 } else if (strcmp(name, "datashare") == 0) {
402 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
403 mythresh = "thresh > .05";
404 } else if (strcmp(name, "blockstorefwd") == 0) {
405 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
406 mythresh = "thresh >= .05";
407 } else if (strcmp(name, "splitload") == 0) {
408 printf("Examine (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
409 mythresh = "thresh >= .1";
410 } else if (strcmp(name, "splitstore") == 0) {
411 printf("Examine MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES\n");
412 mythresh = "thresh >= .01";
413 } else if (strcmp(name, "aliasing_4k") == 0) {
414 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P\n");
415 mythresh = "thresh >= .1";
416 } else if (strcmp(name, "dtlbmissload") == 0) {
417 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
418 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
419 mythresh = "thresh >= .1";
420 } else if (strcmp(name, "br_miss") == 0) {
421 printf("Examine (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD\n");
422 mythresh = "thresh >= .2";
423 } else if (strcmp(name, "clears") == 0) {
424 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
425 printf(" MACHINE_CLEARS.SMC + \n");
426 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
427 mythresh = "thresh >= .02";
428 } else if (strcmp(name, "microassist") == 0) {
429 printf("Examine IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
430 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
431 mythresh = "thresh >= .05";
432 } else if (strcmp(name, "fpassist") == 0) {
433 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
434 mythresh = "look for a excessive value";
435 } else if (strcmp(name, "otherassistavx") == 0) {
436 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
437 mythresh = "look for a excessive value";
438 } else if (strcmp(name, "otherassistsse") == 0) {
439 printf("Examine (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
440 mythresh = "look for a excessive value";
442 printf("Unknown name:%s\n", name);
443 mythresh = "unknown entry";
445 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
450 static struct counters *
451 find_counter(struct counters *base, const char *name)
459 if (strncmp(at->counter_name, name, len) == 0) {
464 printf("Can't find counter %s\n", name);
465 printf("We have:\n");
468 printf("- %s\n", at->counter_name);
475 allocstall1(struct counters *cpu, int pos)
477 /* 1 - PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW/CPU_CLK_UNHALTED.THREAD_P (thresh > .05)*/
479 struct counters *partial;
480 struct counters *unhalt;
482 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
483 partial = find_counter(cpu, "PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW");
485 par = partial->vals[pos] * 1.0;
486 un = unhalt->vals[pos] * 1.0;
488 par = partial->sum * 1.0;
489 un = unhalt->sum * 1.0;
492 ret = printf("%1.3f", res);
497 allocstall2(struct counters *cpu, int pos)
499 /* 2 - PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP_CYCLES/CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
501 struct counters *partial;
502 struct counters *unhalt;
504 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
505 partial = find_counter(cpu, "PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP");
507 par = partial->vals[pos] * 1.0;
508 un = unhalt->vals[pos] * 1.0;
510 par = partial->sum * 1.0;
511 un = unhalt->sum * 1.0;
514 ret = printf("%1.3f", res);
519 br_mispredict(struct counters *cpu, int pos)
521 struct counters *brctr;
522 struct counters *unhalt;
524 /* 3 - (20 * BR_MISP_RETIRED.ALL_BRANCHES)/CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
525 double br, un, con, res;
528 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
529 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
531 br = brctr->vals[pos] * 1.0;
532 un = unhalt->vals[pos] * 1.0;
534 br = brctr->sum * 1.0;
535 un = unhalt->sum * 1.0;
538 ret = printf("%1.3f", res);
543 br_mispredictib(struct counters *cpu, int pos)
545 struct counters *brctr;
546 struct counters *unhalt;
547 struct counters *clear, *clear2, *clear3;
548 struct counters *uops;
549 struct counters *recv;
550 struct counters *iss;
551 /* "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",*/
554 * (BR_MISP_RETIRED.ALL_BRANCHES /
555 * (BR_MISP_RETIRED.ALL_BRANCHES +
556 * MACHINE_CLEAR.COUNT) *
557 * ((UOPS_ISSUED.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) / (4 * CPU_CLK_UNHALTED.THREAD)))
560 double br, cl, cl2, cl3, uo, re, un, con, res, is;
563 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
564 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
565 clear = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
566 clear2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
567 clear3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
568 uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
569 iss = find_counter(cpu, "UOPS_ISSUED.ANY");
570 recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
572 br = brctr->vals[pos] * 1.0;
573 cl = clear->vals[pos] * 1.0;
574 cl2 = clear2->vals[pos] * 1.0;
575 cl3 = clear3->vals[pos] * 1.0;
576 uo = uops->vals[pos] * 1.0;
577 re = recv->vals[pos] * 1.0;
578 is = iss->vals[pos] * 1.0;
579 un = unhalt->vals[pos] * 1.0;
581 br = brctr->sum * 1.0;
582 cl = clear->sum * 1.0;
583 cl2 = clear2->sum * 1.0;
584 cl3 = clear3->sum * 1.0;
585 uo = uops->sum * 1.0;
586 re = recv->sum * 1.0;
588 un = unhalt->sum * 1.0;
590 res = (br/(br + cl + cl2 + cl3) * ((is - uo + con * re) / (con * un)));
591 ret = printf("%1.3f", res);
597 br_mispredict_broad(struct counters *cpu, int pos)
599 struct counters *brctr;
600 struct counters *unhalt;
601 struct counters *clear;
602 struct counters *uops;
603 struct counters *uops_ret;
604 struct counters *recv;
606 double br, cl, uo, uo_r, re, con, un, res;
610 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
611 brctr = find_counter(cpu, "BR_MISP_RETIRED.ALL_BRANCHES");
612 clear = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
613 uops = find_counter(cpu, "UOPS_ISSUED.ANY");
614 uops_ret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
615 recv = find_counter(cpu, "INT_MISC.RECOVERY_CYCLES");
618 un = unhalt->vals[pos] * 1.0;
619 br = brctr->vals[pos] * 1.0;
620 cl = clear->vals[pos] * 1.0;
621 uo = uops->vals[pos] * 1.0;
622 uo_r = uops_ret->vals[pos] * 1.0;
623 re = recv->vals[pos] * 1.0;
625 un = unhalt->sum * 1.0;
626 br = brctr->sum * 1.0;
627 cl = clear->sum * 1.0;
628 uo = uops->sum * 1.0;
629 uo_r = uops_ret->sum * 1.0;
630 re = recv->sum * 1.0;
632 res = br / (br + cl) * (uo - uo_r + con * re) / (un * con);
633 ret = printf("%1.3f", res);
638 splitloadib(struct counters *cpu, int pos)
641 struct counters *mem;
642 struct counters *l1d, *ldblock;
643 struct counters *unhalt;
644 double un, memd, res, l1, ldb;
646 * ((L1D_PEND_MISS.PENDING / MEM_LOAD_UOPS_RETIRED.L1_MISS) * LD_BLOCKS.NO_SR) / CPU_CLK_UNHALTED.THREAD_P
647 * "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
650 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
651 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L1_MISS");
652 l1d = find_counter(cpu, "L1D_PEND_MISS.PENDING");
653 ldblock = find_counter(cpu, "LD_BLOCKS.NO_SR");
655 memd = mem->vals[pos] * 1.0;
656 l1 = l1d->vals[pos] * 1.0;
657 ldb = ldblock->vals[pos] * 1.0;
658 un = unhalt->vals[pos] * 1.0;
660 memd = mem->sum * 1.0;
662 ldb = ldblock->sum * 1.0;
663 un = unhalt->sum * 1.0;
665 res = ((l1 / memd) * ldb)/un;
666 ret = printf("%1.3f", res);
672 splitload(struct counters *cpu, int pos)
675 struct counters *mem;
676 struct counters *unhalt;
677 double con, un, memd, res;
678 /* 4 - (MEM_UOPS_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
681 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
682 mem = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_LOADS");
684 memd = mem->vals[pos] * 1.0;
685 un = unhalt->vals[pos] * 1.0;
687 memd = mem->sum * 1.0;
688 un = unhalt->sum * 1.0;
690 res = (memd * con)/un;
691 ret = printf("%1.3f", res);
697 splitload_sb(struct counters *cpu, int pos)
700 struct counters *mem;
701 struct counters *unhalt;
702 double con, un, memd, res;
703 /* 4 - (MEM_UOP_RETIRED.SPLIT_LOADS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .1)*/
706 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
707 mem = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_LOADS");
709 memd = mem->vals[pos] * 1.0;
710 un = unhalt->vals[pos] * 1.0;
712 memd = mem->sum * 1.0;
713 un = unhalt->sum * 1.0;
715 res = (memd * con)/un;
716 ret = printf("%1.3f", res);
722 splitstore_sb(struct counters *cpu, int pos)
724 /* 5 - MEM_UOP_RETIRED.SPLIT_STORES / MEM_UOP_RETIRED.ALL_STORES (thresh > 0.01) */
726 struct counters *mem_split;
727 struct counters *mem_stores;
728 double memsplit, memstore, res;
729 mem_split = find_counter(cpu, "MEM_UOP_RETIRED.SPLIT_STORES");
730 mem_stores = find_counter(cpu, "MEM_UOP_RETIRED.ALL_STORES");
732 memsplit = mem_split->vals[pos] * 1.0;
733 memstore = mem_stores->vals[pos] * 1.0;
735 memsplit = mem_split->sum * 1.0;
736 memstore = mem_stores->sum * 1.0;
738 res = memsplit/memstore;
739 ret = printf("%1.3f", res);
746 splitstore(struct counters *cpu, int pos)
748 /* 5 - MEM_UOPS_RETIRED.SPLIT_STORES / MEM_UOPS_RETIRED.ALL_STORES (thresh > 0.01) */
750 struct counters *mem_split;
751 struct counters *mem_stores;
752 double memsplit, memstore, res;
753 mem_split = find_counter(cpu, "MEM_UOPS_RETIRED.SPLIT_STORES");
754 mem_stores = find_counter(cpu, "MEM_UOPS_RETIRED.ALL_STORES");
756 memsplit = mem_split->vals[pos] * 1.0;
757 memstore = mem_stores->vals[pos] * 1.0;
759 memsplit = mem_split->sum * 1.0;
760 memstore = mem_stores->sum * 1.0;
762 res = memsplit/memstore;
763 ret = printf("%1.3f", res);
769 contested(struct counters *cpu, int pos)
771 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
773 struct counters *mem;
774 struct counters *unhalt;
775 double con, un, memd, res;
778 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
779 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
781 memd = mem->vals[pos] * 1.0;
782 un = unhalt->vals[pos] * 1.0;
784 memd = mem->sum * 1.0;
785 un = unhalt->sum * 1.0;
787 res = (memd * con)/un;
788 ret = printf("%1.3f", res);
793 contested_has(struct counters *cpu, int pos)
795 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
797 struct counters *mem;
798 struct counters *unhalt;
799 double con, un, memd, res;
802 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
803 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
805 memd = mem->vals[pos] * 1.0;
806 un = unhalt->vals[pos] * 1.0;
808 memd = mem->sum * 1.0;
809 un = unhalt->sum * 1.0;
811 res = (memd * con)/un;
812 ret = printf("%1.3f", res);
817 contestedbroad(struct counters *cpu, int pos)
819 /* 6 - (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) / CPU_CLK_UNHALTED.THREAD_P (thresh >.05) */
821 struct counters *mem;
822 struct counters *mem2;
823 struct counters *unhalt;
824 double con, un, memd, memtoo, res;
827 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
828 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
829 mem2 = find_counter(cpu,"MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS");
832 memd = mem->vals[pos] * 1.0;
833 memtoo = mem2->vals[pos] * 1.0;
834 un = unhalt->vals[pos] * 1.0;
836 memd = mem->sum * 1.0;
837 memtoo = mem2->sum * 1.0;
838 un = unhalt->sum * 1.0;
840 res = ((memd * con) + memtoo)/un;
841 ret = printf("%1.3f", res);
847 blockstoreforward(struct counters *cpu, int pos)
849 /* 7 - (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .05)*/
851 struct counters *ldb;
852 struct counters *unhalt;
853 double con, un, ld, res;
856 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
857 ldb = find_counter(cpu, "LD_BLOCKS_STORE_FORWARD");
859 ld = ldb->vals[pos] * 1.0;
860 un = unhalt->vals[pos] * 1.0;
863 un = unhalt->sum * 1.0;
866 ret = printf("%1.3f", res);
871 cache2(struct counters *cpu, int pos)
874 * 8 - ((MEM_LOAD_RETIRED.L3_HIT * 26) + (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 43) +
875 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 60)) / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
878 struct counters *mem1, *mem2, *mem3;
879 struct counters *unhalt;
880 double con1, con2, con3, un, me_1, me_2, me_3, res;
885 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
886 /* Call for MEM_LOAD_RETIRED.L3_HIT possibly MEM_LOAD_UOPS_RETIRED.LLC_HIT ?*/
887 mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
888 mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
889 mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
891 me_1 = mem1->vals[pos] * 1.0;
892 me_2 = mem2->vals[pos] * 1.0;
893 me_3 = mem3->vals[pos] * 1.0;
894 un = unhalt->vals[pos] * 1.0;
896 me_1 = mem1->sum * 1.0;
897 me_2 = mem2->sum * 1.0;
898 me_3 = mem3->sum * 1.0;
899 un = unhalt->sum * 1.0;
901 res = ((me_1 * con1) + (me_2 * con2) + (me_3 * con3))/un;
902 ret = printf("%1.3f", res);
907 datasharing(struct counters *cpu, int pos)
910 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
913 struct counters *mem;
914 struct counters *unhalt;
915 double con, res, me, un;
918 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
919 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
921 me = mem->vals[pos] * 1.0;
922 un = unhalt->vals[pos] * 1.0;
925 un = unhalt->sum * 1.0;
928 ret = printf("%1.3f", res);
935 datasharing_has(struct counters *cpu, int pos)
938 * (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 43)/ CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
941 struct counters *mem;
942 struct counters *unhalt;
943 double con, res, me, un;
946 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
947 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
949 me = mem->vals[pos] * 1.0;
950 un = unhalt->vals[pos] * 1.0;
953 un = unhalt->sum * 1.0;
956 ret = printf("%1.3f", res);
963 cache2ib(struct counters *cpu, int pos)
966 * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
969 struct counters *mem;
970 struct counters *unhalt;
971 double con, un, me, res;
974 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
975 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
977 me = mem->vals[pos] * 1.0;
978 un = unhalt->vals[pos] * 1.0;
981 un = unhalt->sum * 1.0;
984 ret = printf("%1.3f", res);
989 cache2has(struct counters *cpu, int pos)
992 * Examine ((MEM_LOAD_UOPS_RETIRED.LLC_HIT * 36) + \
993 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT * 72) +
994 * (MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84))
995 * / CPU_CLK_UNHALTED.THREAD_P
998 struct counters *mem1, *mem2, *mem3;
999 struct counters *unhalt;
1000 double con1, con2, con3, un, me1, me2, me3, res;
1005 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1006 mem1 = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.LLC_HIT");
1007 mem2 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT");
1008 mem3 = find_counter(cpu, "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM");
1010 me1 = mem1->vals[pos] * 1.0;
1011 me2 = mem2->vals[pos] * 1.0;
1012 me3 = mem3->vals[pos] * 1.0;
1013 un = unhalt->vals[pos] * 1.0;
1015 me1 = mem1->sum * 1.0;
1016 me2 = mem2->sum * 1.0;
1017 me3 = mem3->sum * 1.0;
1018 un = unhalt->sum * 1.0;
1020 res = ((me1 * con1) + (me2 * con2) + (me3 * con3))/un;
1021 ret = printf("%1.3f", res);
1027 cache2broad(struct counters *cpu, int pos)
1030 * (29 * MEM_LOAD_UOPS_RETIRED.LLC_HIT / CPU_CLK_UNHALTED.THREAD_P (thresh >.2)
1033 struct counters *mem;
1034 struct counters *unhalt;
1035 double con, un, me, res;
1038 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1039 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_HIT");
1041 me = mem->vals[pos] * 1.0;
1042 un = unhalt->vals[pos] * 1.0;
1044 me = mem->sum * 1.0;
1045 un = unhalt->sum * 1.0;
1047 res = (con * me)/un;
1048 ret = printf("%1.3f", res);
1054 cache1(struct counters *cpu, int pos)
1056 /* 9 - (MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1058 struct counters *mem;
1059 struct counters *unhalt;
1060 double con, un, me, res;
1063 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1064 mem = find_counter(cpu, "MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS");
1066 me = mem->vals[pos] * 1.0;
1067 un = unhalt->vals[pos] * 1.0;
1069 me = mem->sum * 1.0;
1070 un = unhalt->sum * 1.0;
1072 res = (me * con)/un;
1073 ret = printf("%1.3f", res);
1078 cache1ib(struct counters *cpu, int pos)
1080 /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1082 struct counters *mem;
1083 struct counters *unhalt;
1084 double con, un, me, res;
1087 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1088 mem = find_counter(cpu, "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM");
1090 me = mem->vals[pos] * 1.0;
1091 un = unhalt->vals[pos] * 1.0;
1093 me = mem->sum * 1.0;
1094 un = unhalt->sum * 1.0;
1096 res = (me * con)/un;
1097 ret = printf("%1.3f", res);
1103 cache1broad(struct counters *cpu, int pos)
1105 /* 9 - (MEM_LOAD_UOPS_L3_MISS_RETIRED.LCOAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P (thresh >= .2) */
1107 struct counters *mem;
1108 struct counters *unhalt;
1109 double con, un, me, res;
1112 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1113 mem = find_counter(cpu, "MEM_LOAD_UOPS_RETIRED.L3_MISS");
1115 me = mem->vals[pos] * 1.0;
1116 un = unhalt->vals[pos] * 1.0;
1118 me = mem->sum * 1.0;
1119 un = unhalt->sum * 1.0;
1121 res = (me * con)/un;
1122 ret = printf("%1.3f", res);
1128 dtlb_missload(struct counters *cpu, int pos)
1130 /* 10 - ((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P (t >=.1) */
1132 struct counters *dtlb_m, *dtlb_d;
1133 struct counters *unhalt;
1134 double con, un, d1, d2, res;
1137 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1138 dtlb_m = find_counter(cpu, "DTLB_LOAD_MISSES.STLB_HIT");
1139 dtlb_d = find_counter(cpu, "DTLB_LOAD_MISSES.WALK_DURATION");
1141 d1 = dtlb_m->vals[pos] * 1.0;
1142 d2 = dtlb_d->vals[pos] * 1.0;
1143 un = unhalt->vals[pos] * 1.0;
1145 d1 = dtlb_m->sum * 1.0;
1146 d2 = dtlb_d->sum * 1.0;
1147 un = unhalt->sum * 1.0;
1149 res = ((d1 * con) + d2)/un;
1150 ret = printf("%1.3f", res);
1155 dtlb_missstore(struct counters *cpu, int pos)
1158 * ((DTLB_STORE_MISSES.STLB_HIT * 7) + DTLB_STORE_MISSES.WALK_DURATION) /
1159 * CPU_CLK_UNHALTED.THREAD_P (t >= .1)
1162 struct counters *dtsb_m, *dtsb_d;
1163 struct counters *unhalt;
1164 double con, un, d1, d2, res;
1167 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1168 dtsb_m = find_counter(cpu, "DTLB_STORE_MISSES.STLB_HIT");
1169 dtsb_d = find_counter(cpu, "DTLB_STORE_MISSES.WALK_DURATION");
1171 d1 = dtsb_m->vals[pos] * 1.0;
1172 d2 = dtsb_d->vals[pos] * 1.0;
1173 un = unhalt->vals[pos] * 1.0;
1175 d1 = dtsb_m->sum * 1.0;
1176 d2 = dtsb_d->sum * 1.0;
1177 un = unhalt->sum * 1.0;
1179 res = ((d1 * con) + d2)/un;
1180 ret = printf("%1.3f", res);
1185 itlb_miss(struct counters *cpu, int pos)
1187 /* ITLB_MISSES.WALK_DURATION / CPU_CLK_UNTHREAD_P IB */
1189 struct counters *itlb;
1190 struct counters *unhalt;
1193 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1194 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1196 d1 = itlb->vals[pos] * 1.0;
1197 un = unhalt->vals[pos] * 1.0;
1199 d1 = itlb->sum * 1.0;
1200 un = unhalt->sum * 1.0;
1203 ret = printf("%1.3f", res);
1209 itlb_miss_broad(struct counters *cpu, int pos)
1211 /* (7 * ITLB_MISSES.STLB_HIT_4K + ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNTHREAD_P */
1213 struct counters *itlb;
1214 struct counters *unhalt;
1215 struct counters *four_k;
1216 double un, d1, res, k;
1218 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1219 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1220 four_k = find_counter(cpu, "ITLB_MISSES.STLB_HIT_4K");
1222 d1 = itlb->vals[pos] * 1.0;
1223 un = unhalt->vals[pos] * 1.0;
1224 k = four_k->vals[pos] * 1.0;
1226 d1 = itlb->sum * 1.0;
1227 un = unhalt->sum * 1.0;
1228 k = four_k->sum * 1.0;
1230 res = (7.0 * k + d1)/un;
1231 ret = printf("%1.3f", res);
1237 icache_miss(struct counters *cpu, int pos)
1239 /* (ICACHE.IFETCH_STALL - ITLB_MISSES.WALK_DURATION) / CPU_CLK_UNHALTED.THREAD_P IB */
1242 struct counters *itlb, *icache;
1243 struct counters *unhalt;
1244 double un, d1, ic, res;
1246 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1247 itlb = find_counter(cpu, "ITLB_MISSES.WALK_DURATION");
1248 icache = find_counter(cpu, "ICACHE.IFETCH_STALL");
1250 d1 = itlb->vals[pos] * 1.0;
1251 ic = icache->vals[pos] * 1.0;
1252 un = unhalt->vals[pos] * 1.0;
1254 d1 = itlb->sum * 1.0;
1255 ic = icache->sum * 1.0;
1256 un = unhalt->sum * 1.0;
1259 ret = printf("%1.3f", res);
1265 icache_miss_has(struct counters *cpu, int pos)
1267 /* (36 * ICACHE.MISSES) / CPU_CLK_UNHALTED.THREAD_P */
1270 struct counters *icache;
1271 struct counters *unhalt;
1272 double un, con, ic, res;
1274 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1275 icache = find_counter(cpu, "ICACHE.MISSES");
1278 ic = icache->vals[pos] * 1.0;
1279 un = unhalt->vals[pos] * 1.0;
1281 ic = icache->sum * 1.0;
1282 un = unhalt->sum * 1.0;
1284 res = (con * ic)/un;
1285 ret = printf("%1.3f", res);
1291 lcp_stall(struct counters *cpu, int pos)
1293 /* ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P IB */
1295 struct counters *ild;
1296 struct counters *unhalt;
1299 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1300 ild = find_counter(cpu, "ILD_STALL.LCP");
1302 d1 = ild->vals[pos] * 1.0;
1303 un = unhalt->vals[pos] * 1.0;
1305 d1 = ild->sum * 1.0;
1306 un = unhalt->sum * 1.0;
1309 ret = printf("%1.3f", res);
1316 frontendstall(struct counters *cpu, int pos)
1318 /* 12 - IDQ_UOPS_NOT_DELIVERED.CORE / (CPU_CLK_UNHALTED.THREAD_P * 4) (thresh >= .15) */
1320 struct counters *idq;
1321 struct counters *unhalt;
1322 double con, un, id, res;
1325 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1326 idq = find_counter(cpu, "IDQ_UOPS_NOT_DELIVERED.CORE");
1328 id = idq->vals[pos] * 1.0;
1329 un = unhalt->vals[pos] * 1.0;
1331 id = idq->sum * 1.0;
1332 un = unhalt->sum * 1.0;
1334 res = id/(un * con);
1335 ret = printf("%1.3f", res);
1340 clears(struct counters *cpu, int pos)
1342 /* 13 - ((MACHINE_CLEARS.MEMORY_ORDERING + MACHINE_CLEARS.SMC + MACHINE_CLEARS.MASKMOV ) * 100 )
1343 * / CPU_CLK_UNHALTED.THREAD_P (thresh >= .02)*/
1346 struct counters *clr1, *clr2, *clr3;
1347 struct counters *unhalt;
1348 double con, un, cl1, cl2, cl3, res;
1351 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1352 clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1353 clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1354 clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1357 cl1 = clr1->vals[pos] * 1.0;
1358 cl2 = clr2->vals[pos] * 1.0;
1359 cl3 = clr3->vals[pos] * 1.0;
1360 un = unhalt->vals[pos] * 1.0;
1362 cl1 = clr1->sum * 1.0;
1363 cl2 = clr2->sum * 1.0;
1364 cl3 = clr3->sum * 1.0;
1365 un = unhalt->sum * 1.0;
1367 res = ((cl1 + cl2 + cl3) * con)/un;
1368 ret = printf("%1.3f", res);
1375 clears_broad(struct counters *cpu, int pos)
1378 struct counters *clr1, *clr2, *clr3, *cyc;
1379 struct counters *unhalt;
1380 double con, un, cl1, cl2, cl3, cy, res;
1383 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1384 clr1 = find_counter(cpu, "MACHINE_CLEARS.MEMORY_ORDERING");
1385 clr2 = find_counter(cpu, "MACHINE_CLEARS.SMC");
1386 clr3 = find_counter(cpu, "MACHINE_CLEARS.MASKMOV");
1387 cyc = find_counter(cpu, "MACHINE_CLEARS.CYCLES");
1389 cl1 = clr1->vals[pos] * 1.0;
1390 cl2 = clr2->vals[pos] * 1.0;
1391 cl3 = clr3->vals[pos] * 1.0;
1392 cy = cyc->vals[pos] * 1.0;
1393 un = unhalt->vals[pos] * 1.0;
1395 cl1 = clr1->sum * 1.0;
1396 cl2 = clr2->sum * 1.0;
1397 cl3 = clr3->sum * 1.0;
1398 cy = cyc->sum * 1.0;
1399 un = unhalt->sum * 1.0;
1401 /* Formula not listed but extrapulated to add the cy ?? */
1402 res = ((cl1 + cl2 + cl3 + cy) * con)/un;
1403 ret = printf("%1.3f", res);
1412 microassist(struct counters *cpu, int pos)
1414 /* 14 - IDQ.MS_CYCLES / CPU_CLK_UNHALTED.THREAD_P (thresh > .05) */
1416 struct counters *idq;
1417 struct counters *unhalt;
1418 double un, id, res, con;
1421 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1422 idq = find_counter(cpu, "IDQ.MS_UOPS");
1424 id = idq->vals[pos] * 1.0;
1425 un = unhalt->vals[pos] * 1.0;
1427 id = idq->sum * 1.0;
1428 un = unhalt->sum * 1.0;
1430 res = id/(un * con);
1431 ret = printf("%1.3f", res);
1437 microassist_broad(struct counters *cpu, int pos)
1440 struct counters *idq;
1441 struct counters *unhalt;
1442 struct counters *uopiss;
1443 struct counters *uopret;
1444 double un, id, res, con, uoi, uor;
1447 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1448 idq = find_counter(cpu, "IDQ.MS_UOPS");
1449 uopiss = find_counter(cpu, "UOPS_ISSUED.ANY");
1450 uopret = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1452 id = idq->vals[pos] * 1.0;
1453 un = unhalt->vals[pos] * 1.0;
1454 uoi = uopiss->vals[pos] * 1.0;
1455 uor = uopret->vals[pos] * 1.0;
1457 id = idq->sum * 1.0;
1458 un = unhalt->sum * 1.0;
1459 uoi = uopiss->sum * 1.0;
1460 uor = uopret->sum * 1.0;
1462 res = (uor/uoi) * (id/(un * con));
1463 ret = printf("%1.3f", res);
1469 aliasing(struct counters *cpu, int pos)
1471 /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1473 struct counters *ld;
1474 struct counters *unhalt;
1475 double un, lds, con, res;
1478 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1479 ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1481 lds = ld->vals[pos] * 1.0;
1482 un = unhalt->vals[pos] * 1.0;
1484 lds = ld->sum * 1.0;
1485 un = unhalt->sum * 1.0;
1487 res = (lds * con)/un;
1488 ret = printf("%1.3f", res);
1493 aliasing_broad(struct counters *cpu, int pos)
1495 /* 15 - (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 5) / CPU_CLK_UNHALTED.THREAD_P (thresh > .1) */
1497 struct counters *ld;
1498 struct counters *unhalt;
1499 double un, lds, con, res;
1502 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1503 ld = find_counter(cpu, "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS");
1505 lds = ld->vals[pos] * 1.0;
1506 un = unhalt->vals[pos] * 1.0;
1508 lds = ld->sum * 1.0;
1509 un = unhalt->sum * 1.0;
1511 res = (lds * con)/un;
1512 ret = printf("%1.3f", res);
1518 fpassists(struct counters *cpu, int pos)
1520 /* 16 - FP_ASSIST.ANY/INST_RETIRED.ANY_P */
1522 struct counters *fp;
1523 struct counters *inst;
1524 double un, fpd, res;
1526 inst = find_counter(cpu, "INST_RETIRED.ANY_P");
1527 fp = find_counter(cpu, "FP_ASSIST.ANY");
1529 fpd = fp->vals[pos] * 1.0;
1530 un = inst->vals[pos] * 1.0;
1532 fpd = fp->sum * 1.0;
1533 un = inst->sum * 1.0;
1536 ret = printf("%1.3f", res);
1541 otherassistavx(struct counters *cpu, int pos)
1543 /* 17 - (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/
1545 struct counters *oth;
1546 struct counters *unhalt;
1547 double un, ot, con, res;
1550 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1551 oth = find_counter(cpu, "OTHER_ASSISTS.AVX_TO_SSE");
1553 ot = oth->vals[pos] * 1.0;
1554 un = unhalt->vals[pos] * 1.0;
1556 ot = oth->sum * 1.0;
1557 un = unhalt->sum * 1.0;
1559 res = (ot * con)/un;
1560 ret = printf("%1.3f", res);
1565 otherassistsse(struct counters *cpu, int pos)
1569 struct counters *oth;
1570 struct counters *unhalt;
1571 double un, ot, con, res;
1573 /* 18 (OTHER_ASSISTS.SSE_TO_AVX * 75)/CPU_CLK_UNHALTED.THREAD_P thresh .1*/
1575 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1576 oth = find_counter(cpu, "OTHER_ASSISTS.SSE_TO_AVX");
1578 ot = oth->vals[pos] * 1.0;
1579 un = unhalt->vals[pos] * 1.0;
1581 ot = oth->sum * 1.0;
1582 un = unhalt->sum * 1.0;
1584 res = (ot * con)/un;
1585 ret = printf("%1.3f", res);
1590 efficiency1(struct counters *cpu, int pos)
1594 struct counters *uops;
1595 struct counters *unhalt;
1596 double un, ot, con, res;
1598 /* 19 (UOPS_RETIRED.RETIRE_SLOTS/(4*CPU_CLK_UNHALTED.THREAD_P) look if thresh < .9*/
1600 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1601 uops = find_counter(cpu, "UOPS_RETIRED.RETIRE_SLOTS");
1603 ot = uops->vals[pos] * 1.0;
1604 un = unhalt->vals[pos] * 1.0;
1606 ot = uops->sum * 1.0;
1607 un = unhalt->sum * 1.0;
1609 res = ot/(con * un);
1610 ret = printf("%1.3f", res);
1615 efficiency2(struct counters *cpu, int pos)
1619 struct counters *uops;
1620 struct counters *unhalt;
1623 /* 20 - CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P good if > 1. (comp factor)*/
1624 unhalt = find_counter(cpu, "CPU_CLK_UNHALTED.THREAD_P");
1625 uops = find_counter(cpu, "INST_RETIRED.ANY_P");
1627 ot = uops->vals[pos] * 1.0;
1628 un = unhalt->vals[pos] * 1.0;
1630 ot = uops->sum * 1.0;
1631 un = unhalt->sum * 1.0;
1634 ret = printf("%1.3f", res);
1638 #define SANDY_BRIDGE_COUNT 20
1639 static struct cpu_entry sandy_bridge[SANDY_BRIDGE_COUNT] = {
1640 /*01*/ { "allocstall1", "thresh > .05",
1641 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.SLOW_LEA_WINDOW -w 1",
1643 /* -- not defined for SB right (partial-rat_stalls) 02*/
1644 { "allocstall2", "thresh > .05",
1645 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s PARTIAL_RAT_STALLS.FLAGS_MERGE_UOP -w 1",
1647 /*03*/ { "br_miss", "thresh >= .2",
1648 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1650 /*04*/ { "splitload", "thresh >= .1",
1651 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOP_RETIRED.SPLIT_LOADS -w 1",
1653 /* 05*/ { "splitstore", "thresh >= .01",
1654 "pmcstat -s MEM_UOP_RETIRED.SPLIT_STORES -s MEM_UOP_RETIRED.ALL_STORES -w 1",
1656 /*06*/ { "contested", "thresh >= .05",
1657 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1659 /*07*/ { "blockstorefwd", "thresh >= .05",
1660 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1661 blockstoreforward, 2 },
1662 /*08*/ { "cache2", "thresh >= .2",
1663 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1665 /*09*/ { "cache1", "thresh >= .2",
1666 "pmcstat -s MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1668 /*10*/ { "dtlbmissload", "thresh >= .1",
1669 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1671 /*11*/ { "dtlbmissstore", "thresh >= .05",
1672 "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1673 dtlb_missstore, 3 },
1674 /*12*/ { "frontendstall", "thresh >= .15",
1675 "pmcstat -s IDQ_UOPS_NOT_DELIVERED.CORE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1677 /*13*/ { "clears", "thresh >= .02",
1678 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1680 /*14*/ { "microassist", "thresh >= .05",
1681 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1683 /*15*/ { "aliasing_4k", "thresh >= .1",
1684 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1686 /*16*/ { "fpassist", "look for a excessive value",
1687 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1689 /*17*/ { "otherassistavx", "look for a excessive value",
1690 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1692 /*18*/ { "otherassistsse", "look for a excessive value",
1693 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1694 otherassistsse, 2 },
1695 /*19*/ { "eff1", "thresh < .9",
1696 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1698 /*20*/ { "eff2", "thresh > 1.0",
1699 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1704 #define IVY_BRIDGE_COUNT 21
1705 static struct cpu_entry ivy_bridge[IVY_BRIDGE_COUNT] = {
1706 /*1*/ { "eff1", "thresh < .75",
1707 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1709 /*2*/ { "eff2", "thresh > 1.0",
1710 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1712 /*3*/ { "itlbmiss", "thresh > .05",
1713 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1715 /*4*/ { "icachemiss", "thresh > .05",
1716 "pmcstat -s ICACHE.IFETCH_STALL -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1718 /*5*/ { "lcpstall", "thresh > .05",
1719 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1721 /*6*/ { "cache1", "thresh >= .2",
1722 "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1724 /*7*/ { "cache2", "thresh >= .2",
1725 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1727 /*8*/ { "contested", "thresh >= .05",
1728 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1730 /*9*/ { "datashare", "thresh >= .05",
1731 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1733 /*10*/ { "blockstorefwd", "thresh >= .05",
1734 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1735 blockstoreforward, 2 },
1736 /*11*/ { "splitload", "thresh >= .1",
1737 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s L1D_PEND_MISS.PENDING -s MEM_LOAD_UOPS_RETIRED.L1_MISS -s LD_BLOCKS.NO_SR -w 1",
1739 /*12*/ { "splitstore", "thresh >= .01",
1740 "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1742 /*13*/ { "aliasing_4k", "thresh >= .1",
1743 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1745 /*14*/ { "dtlbmissload", "thresh >= .1",
1746 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1748 /*15*/ { "dtlbmissstore", "thresh >= .05",
1749 "pmcstat -s DTLB_STORE_MISSES.STLB_HIT -s DTLB_STORE_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1750 dtlb_missstore, 3 },
1751 /*16*/ { "br_miss", "thresh >= .2",
1752 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1753 br_mispredictib, 8 },
1754 /*17*/ { "clears", "thresh >= .02",
1755 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1757 /*18*/ { "microassist", "thresh >= .05",
1758 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1760 /*19*/ { "fpassist", "look for a excessive value",
1761 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1763 /*20*/ { "otherassistavx", "look for a excessive value",
1764 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1765 otherassistavx , 2},
1766 /*21*/ { "otherassistsse", "look for a excessive value",
1767 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1768 otherassistsse, 2 },
1771 #define HASWELL_COUNT 20
1772 static struct cpu_entry haswell[HASWELL_COUNT] = {
1773 /*1*/ { "eff1", "thresh < .75",
1774 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1776 /*2*/ { "eff2", "thresh > 1.0",
1777 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1779 /*3*/ { "itlbmiss", "thresh > .05",
1780 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1782 /*4*/ { "icachemiss", "thresh > .05",
1783 "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1784 icache_miss_has, 2 },
1785 /*5*/ { "lcpstall", "thresh > .05",
1786 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1788 /*6*/ { "cache1", "thresh >= .2",
1789 "pmcstat -s MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1791 /*7*/ { "cache2", "thresh >= .2",
1792 "pmcstat -s MEM_LOAD_UOPS_RETIRED.LLC_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1794 /*8*/ { "contested", "thresh >= .05",
1795 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1797 /*9*/ { "datashare", "thresh >= .05",
1798 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1799 datasharing_has, 2 },
1800 /*10*/ { "blockstorefwd", "thresh >= .05",
1801 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1802 blockstoreforward, 2 },
1803 /*11*/ { "splitload", "thresh >= .1",
1804 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s MEM_UOPS_RETIRED.SPLIT_LOADS -w 1",
1806 /*12*/ { "splitstore", "thresh >= .01",
1807 "pmcstat -s MEM_UOPS_RETIRED.SPLIT_STORES -s MEM_UOPS_RETIRED.ALL_STORES -w 1",
1809 /*13*/ { "aliasing_4k", "thresh >= .1",
1810 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1812 /*14*/ { "dtlbmissload", "thresh >= .1",
1813 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1815 /*15*/ { "br_miss", "thresh >= .2",
1816 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -w 1",
1818 /*16*/ { "clears", "thresh >= .02",
1819 "pmcstat -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1821 /*17*/ { "microassist", "thresh >= .05",
1822 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1824 /*18*/ { "fpassist", "look for a excessive value",
1825 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1827 /*19*/ { "otherassistavx", "look for a excessive value",
1828 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1829 otherassistavx, 2 },
1830 /*20*/ { "otherassistsse", "look for a excessive value",
1831 "pmcstat -s OTHER_ASSISTS.SSE_TO_AVX -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1832 otherassistsse, 2 },
1837 explain_name_broad(const char *name)
1839 const char *mythresh;
1840 if (strcmp(name, "eff1") == 0) {
1841 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS)/(4 *CPU_CLK_UNHALTED.THREAD_P)\n");
1842 mythresh = "thresh < .75";
1843 } else if (strcmp(name, "eff2") == 0) {
1844 printf("Examine CPU_CLK_UNHALTED.THREAD_P/INST_RETIRED.ANY_P\n");
1845 mythresh = "thresh > 1.0";
1846 } else if (strcmp(name, "itlbmiss") == 0) {
1847 printf("Examine (7 * ITLB_MISSES_STLB_HIT_4K + ITLB_MISSES.WALK_DURATION)/ CPU_CLK_UNHALTED.THREAD_P\n");
1848 mythresh = "thresh > .05";
1849 } else if (strcmp(name, "icachemiss") == 0) {
1850 printf("Examine ( 36.0 * ICACHE.MISSES)/ CPU_CLK_UNHALTED.THREAD_P ??? may not be right \n");
1851 mythresh = "thresh > .05";
1852 } else if (strcmp(name, "lcpstall") == 0) {
1853 printf("Examine ILD_STALL.LCP/CPU_CLK_UNHALTED.THREAD_P\n");
1854 mythresh = "thresh > .05";
1855 } else if (strcmp(name, "cache1") == 0) {
1856 printf("Examine (MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM * 180) / CPU_CLK_UNHALTED.THREAD_P\n");
1857 mythresh = "thresh >= .1";
1858 } else if (strcmp(name, "cache2") == 0) {
1859 printf("Examine (36.0 * MEM_LOAD_UOPS_RETIRED.L3_HIT / CPU_CLK_UNHALTED.THREAD_P)\n");
1860 mythresh = "thresh >= .2";
1861 } else if (strcmp(name, "contested") == 0) {
1862 printf("Examine ((MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM * 84) + MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS)/ CPU_CLK_UNHALTED.THREAD_P\n");
1863 mythresh = "thresh >= .05";
1864 } else if (strcmp(name, "datashare") == 0) {
1865 printf("Examine (MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT * 72)/CPU_CLK_UNHALTED.THREAD_P\n");
1866 mythresh = "thresh > .05";
1867 } else if (strcmp(name, "blockstorefwd") == 0) {
1868 printf("Examine (LD_BLOCKS_STORE_FORWARD * 13) / CPU_CLK_UNHALTED.THREAD_P\n");
1869 mythresh = "thresh >= .05";
1870 } else if (strcmp(name, "aliasing_4k") == 0) {
1871 printf("Examine (LD_BLOCKS_PARTIAL.ADDRESS_ALIAS * 7) / CPU_CLK_UNHALTED.THREAD_P\n");
1872 mythresh = "thresh >= .1";
1873 } else if (strcmp(name, "dtlbmissload") == 0) {
1874 printf("Examine (((DTLB_LOAD_MISSES.STLB_HIT * 7) + DTLB_LOAD_MISSES.WALK_DURATION)\n");
1875 printf(" / CPU_CLK_UNHALTED.THREAD_P)\n");
1876 mythresh = "thresh >= .1";
1878 } else if (strcmp(name, "br_miss") == 0) {
1879 printf("Examine BR_MISP_RETIRED.ALL_BRANCHS_PS / (BR_MISP_RETIED.ALL_BRANCHES_PS + MACHINE_CLEARS.COUNT) *\n");
1880 printf(" (UOPS_ISSUEDF.ANY - UOPS_RETIRED.RETIRE_SLOTS + 4 * INT_MISC.RECOVERY_CYCLES) /\n");
1881 printf("CPU_CLK_UNHALTED.THREAD * 4)\n");
1882 mythresh = "thresh >= .2";
1883 } else if (strcmp(name, "clears") == 0) {
1884 printf("Examine ((MACHINE_CLEARS.MEMORY_ORDERING + \n");
1885 printf(" MACHINE_CLEARS.SMC + \n");
1886 printf(" MACHINE_CLEARS.MASKMOV ) * 100 ) / CPU_CLK_UNHALTED.THREAD_P\n");
1887 mythresh = "thresh >= .02";
1888 } else if (strcmp(name, "fpassist") == 0) {
1889 printf("Examine FP_ASSIST.ANY/INST_RETIRED.ANY_P\n");
1890 mythresh = "look for a excessive value";
1891 } else if (strcmp(name, "otherassistavx") == 0) {
1892 printf("Examine (OTHER_ASSISTS.AVX_TO_SSE * 75)/CPU_CLK_UNHALTED.THREAD_P\n");
1893 mythresh = "look for a excessive value";
1894 } else if (strcmp(name, "microassist") == 0) {
1895 printf("Examine (UOPS_RETIRED.RETIRE_SLOTS/UOPS_ISSUED.ANY) * (IDQ.MS_CYCLES / (4 * CPU_CLK_UNHALTED.THREAD_P)\n");
1896 printf("***We use IDQ.MS_UOPS,cmask=1 to get cycles\n");
1897 mythresh = "thresh >= .05";
1899 printf("Unknown name:%s\n", name);
1900 mythresh = "unknown entry";
1902 printf("If the value printed is %s we may have the ability to improve performance\n", mythresh);
1906 #define BROADWELL_COUNT 17
1907 static struct cpu_entry broadwell[BROADWELL_COUNT] = {
1908 /*1*/ { "eff1", "thresh < .75",
1909 "pmcstat -s UOPS_RETIRED.RETIRE_SLOTS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1911 /*2*/ { "eff2", "thresh > 1.0",
1912 "pmcstat -s INST_RETIRED.ANY_P -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1914 /*3*/ { "itlbmiss", "thresh > .05",
1915 "pmcstat -s ITLB_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -s ITLB_MISSES.STLB_HIT_4K -w 1",
1916 itlb_miss_broad, 3 },
1917 /*4*/ { "icachemiss", "thresh > .05",
1918 "pmcstat -s ICACHE.MISSES -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1919 icache_miss_has, 2 },
1920 /*5*/ { "lcpstall", "thresh > .05",
1921 "pmcstat -s ILD_STALL.LCP -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1923 /*6*/ { "cache1", "thresh >= .1",
1924 "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_MISS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1926 /*7*/ { "cache2", "thresh >= .2",
1927 "pmcstat -s MEM_LOAD_UOPS_RETIRED.L3_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1929 /*8*/ { "contested", "thresh >= .05",
1930 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM -s CPU_CLK_UNHALTED.THREAD_P -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS -w 1",
1931 contestedbroad, 2 },
1932 /*9*/ { "datashare", "thresh >= .05",
1933 "pmcstat -s MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1934 datasharing_has, 2 },
1935 /*10*/ { "blockstorefwd", "thresh >= .05",
1936 "pmcstat -s LD_BLOCKS_STORE_FORWARD -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1937 blockstoreforward, 2 },
1938 /*11*/ { "aliasing_4k", "thresh >= .1",
1939 "pmcstat -s LD_BLOCKS_PARTIAL.ADDRESS_ALIAS -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1940 aliasing_broad, 2 },
1941 /*12*/ { "dtlbmissload", "thresh >= .1",
1942 "pmcstat -s DTLB_LOAD_MISSES.STLB_HIT_4K -s DTLB_LOAD_MISSES.WALK_DURATION -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1944 /*13*/ { "br_miss", "thresh >= .2",
1945 "pmcstat -s CPU_CLK_UNHALTED.THREAD_P -s BR_MISP_RETIRED.ALL_BRANCHES -s MACHINE_CLEARS.CYCLES -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -s INT_MISC.RECOVERY_CYCLES -w 1",
1946 br_mispredict_broad, 7 },
1947 /*14*/ { "clears", "thresh >= .02",
1948 "pmcstat -s MACHINE_CLEARS.CYCLES -s MACHINE_CLEARS.MEMORY_ORDERING -s MACHINE_CLEARS.SMC -s MACHINE_CLEARS.MASKMOV -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1950 /*15*/ { "fpassist", "look for a excessive value",
1951 "pmcstat -s FP_ASSIST.ANY -s INST_RETIRED.ANY_P -w 1",
1953 /*16*/ { "otherassistavx", "look for a excessive value",
1954 "pmcstat -s OTHER_ASSISTS.AVX_TO_SSE -s CPU_CLK_UNHALTED.THREAD_P -w 1",
1955 otherassistavx, 2 },
1956 /*17*/ { "microassist", "thresh >= .2",
1957 "pmcstat -s IDQ.MS_UOPS,cmask=1 -s CPU_CLK_UNHALTED.THREAD_P -s UOPS_ISSUED.ANY -s UOPS_RETIRED.RETIRE_SLOTS -w 1",
1958 microassist_broad, 4 },
1963 set_sandybridge(void)
1965 strcpy(the_cpu.cputype, "SandyBridge PMC");
1966 the_cpu.number = SANDY_BRIDGE_COUNT;
1967 the_cpu.ents = sandy_bridge;
1968 the_cpu.explain = explain_name_sb;
1974 strcpy(the_cpu.cputype, "IvyBridge PMC");
1975 the_cpu.number = IVY_BRIDGE_COUNT;
1976 the_cpu.ents = ivy_bridge;
1977 the_cpu.explain = explain_name_ib;
1984 strcpy(the_cpu.cputype, "HASWELL PMC");
1985 the_cpu.number = HASWELL_COUNT;
1986 the_cpu.ents = haswell;
1987 the_cpu.explain = explain_name_has;
1994 strcpy(the_cpu.cputype, "HASWELL PMC");
1995 the_cpu.number = BROADWELL_COUNT;
1996 the_cpu.ents = broadwell;
1997 the_cpu.explain = explain_name_broad;
2002 set_expression(const char *name)
2005 for(i=0 ; i< the_cpu.number; i++) {
2006 if (strcmp(name, the_cpu.ents[i].name) == 0) {
2008 expression = the_cpu.ents[i].func;
2009 command = the_cpu.ents[i].command;
2010 threshold = the_cpu.ents[i].thresh;
2011 if (the_cpu.ents[i].counters_required > max_pmc_counters) {
2012 printf("Test %s requires that the CPU have %d counters and this CPU has only %d\n",
2013 the_cpu.ents[i].name,
2014 the_cpu.ents[i].counters_required, max_pmc_counters);
2015 printf("Sorry this test can not be run\n");
2026 printf("For CPU type %s we have no expression:%s\n",
2027 the_cpu.cputype, name);
2038 validate_expression(char *name)
2043 for(i=0 ; i< the_cpu.number; i++) {
2044 if (strcmp(name, the_cpu.ents[i].name) == 0) {
2056 do_expression(struct counters *cpu, int pos)
2058 if (expression == NULL)
2060 (*expression)(cpu, pos);
2064 process_header(int idx, char *p)
2066 struct counters *up;
2069 * Given header element idx, at p in
2070 * form 's/NN/nameof'
2071 * process the entry to pull out the name and
2074 if (strncmp(p, "s/", 2)) {
2075 printf("Check -- invalid header no s/ in %s\n",
2080 up->cpu = strtol(&p[2], NULL, 10);
2082 for (i=2; i<len; i++) {
2084 nlen = strlen(&p[(i+1)]);
2085 if (nlen < (MAX_NLEN-1)) {
2086 strcpy(up->counter_name, &p[(i+1)]);
2088 strncpy(up->counter_name, &p[(i+1)], (MAX_NLEN-1));
2095 build_counters_from_header(FILE *io)
2097 char buffer[8192], *p;
2101 /* We have a new start, lets
2102 * setup our headers and cpus.
2104 if (fgets(buffer, sizeof(buffer), io) == NULL) {
2105 printf("First line can't be read from file err:%d\n", errno);
2109 * Ok output is an array of counters. Once
2110 * we start to read the values in we must
2111 * put them in there slot to match there CPU and
2112 * counter being updated. We create a mass array
2113 * of the counters, filling in the CPU and
2116 /* How many do we get? */
2117 len = strlen(buffer);
2118 for (i=0, cnt=0; i<len; i++) {
2119 if (strncmp(&buffer[i], "s/", 2) == 0) {
2122 if (buffer[i] == ' ')
2127 mlen = sizeof(struct counters) * cnt;
2128 cnts = malloc(mlen);
2131 printf("No memory err:%d\n", errno);
2134 memset(cnts, 0, mlen);
2135 for (i=0, cnt=0; i<len; i++) {
2136 if (strncmp(&buffer[i], "s/", 2) == 0) {
2139 if (buffer[i] == ' ') {
2144 process_header(cnt, p);
2149 printf("We have %d entries\n", cnt);
2151 extern int max_to_collect;
2152 int max_to_collect = MAX_COUNTER_SLOTS;
2155 read_a_line(FILE *io)
2157 char buffer[8192], *p, *stop;
2160 if (fgets(buffer, sizeof(buffer), io) == NULL) {
2164 for (i=0; i<ncnts; i++) {
2166 cnts[i].vals[pos] = strtol(p, &stop, 0);
2168 cnts[i].sum += cnts[i].vals[pos];
2174 extern int cpu_count_out;
2175 int cpu_count_out=0;
2180 int i, cnt, printed_cnt;
2182 printf("*********************************\n");
2183 for(i=0, cnt=0; i<MAX_CPU; i++) {
2188 cpu_count_out = cnt;
2189 for(i=0, printed_cnt=0; i<MAX_CPU; i++) {
2194 if (printed_cnt == cnt) {
2204 lace_cpus_together(void)
2207 struct counters *cpat, *at;
2209 for(i=0; i<ncnts; i++) {
2211 if (cpat->next_cpu) {
2212 /* Already laced in */
2215 lace_cpu = cpat->cpu;
2216 if (lace_cpu >= MAX_CPU) {
2217 printf("CPU %d to big\n", lace_cpu);
2220 if (glob_cpu[lace_cpu] == NULL) {
2221 glob_cpu[lace_cpu] = cpat;
2223 /* Already processed this cpu */
2226 /* Ok look forward for cpu->cpu and link in */
2227 for(j=(i+1); j<ncnts; j++) {
2232 if (at->cpu == lace_cpu) {
2234 cpat->next_cpu = at;
2243 process_file(char *filename)
2247 int line_at, not_done;
2248 pid_t pid_of_command=0;
2250 if (filename == NULL) {
2251 io = my_popen(command, "r", &pid_of_command);
2253 io = fopen(filename, "r");
2255 printf("Can't process file %s err:%d\n",
2260 build_counters_from_header(io);
2262 /* Nothing we can do */
2263 printf("Nothing to do -- no counters built\n");
2269 lace_cpus_together();
2272 for (i=0; i<ncnts; i++) {
2273 printf("Counter:%s cpu:%d index:%d\n",
2274 cnts[i].counter_name,
2281 if (read_a_line(io)) {
2286 if (line_at >= max_to_collect) {
2289 if (filename == NULL) {
2291 /* For the ones we dynamically open we print now */
2292 for(i=0, cnt=0; i<MAX_CPU; i++) {
2293 do_expression(glob_cpu[i], (line_at-1));
2295 if (cnt == cpu_count_out) {
2307 my_pclose(io, pid_of_command);
2310 #if defined(__amd64__)
2311 #define cpuid(in,a,b,c,d)\
2312 asm("cpuid": "=a" (a), "=b" (b), "=c" (c), "=d" (d) : "a" (in));
2314 static __inline void
2315 do_cpuid(u_int ax, u_int cx, u_int *p)
2317 __asm __volatile("cpuid"
2318 : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3])
2319 : "0" (ax), "c" (cx) );
2323 #define cpuid(in, a, b, c, d)
2324 #define do_cpuid(ax, cx, p)
2330 unsigned long eax, ebx, ecx, edx;
2332 pid_t pid_of_command=0;
2335 char linebuf[1024], *str;
2338 eax = ebx = ecx = edx = 0;
2340 cpuid(0, eax, ebx, ecx, edx);
2341 if (ebx == 0x68747541) {
2342 printf("AMD processors are not supported by this program\n");
2345 } else if (ebx == 0x6972794) {
2346 printf("Cyrix processors are not supported by this program\n");
2349 } else if (ebx == 0x756e6547) {
2350 printf("Genuine Intel\n");
2352 printf("Unknown processor type 0x%lx Only Intel AMD64 types are supported by this routine!\n", ebx);
2355 cpuid(1, eax, ebx, ecx, edx);
2356 model = (((eax & 0xF0000) >> 12) | ((eax & 0xF0) >> 4));
2357 printf("CPU model is 0x%x id:0x%lx\n", model, eax);
2358 switch (eax & 0xF00) {
2359 case 0x500: /* Pentium family processors */
2360 printf("Intel Pentium P5\n");
2363 case 0x600: /* Pentium Pro, Celeron, Pentium II & III */
2366 printf("Intel Pentium P6\n");
2371 printf("Intel PII\n");
2374 case 0x6: case 0x16:
2375 printf("Intel CL\n");
2378 case 0x7: case 0x8: case 0xA: case 0xB:
2379 printf("Intel PIII\n");
2383 printf("Intel PM\n");
2387 printf("Intel CORE\n");
2391 printf("Intel CORE2\n");
2395 printf("Intel CORE2EXTREME\n");
2398 case 0x1C: /* Per Intel document 320047-002. */
2399 printf("Intel ATOM\n");
2404 * Per Intel document 253669-032 9/2009,
2405 * pages A-2 and A-57
2408 * Per Intel document 253669-032 9/2009,
2409 * pages A-2 and A-57
2411 printf("Intel COREI7\n");
2415 printf("Intel NEHALEM\n");
2418 case 0x25: /* Per Intel document 253669-033US 12/2009. */
2419 case 0x2C: /* Per Intel document 253669-033US 12/2009. */
2420 printf("Intel WESTMERE\n");
2423 case 0x2F: /* Westmere-EX, seen in wild */
2424 printf("Intel WESTMERE\n");
2427 case 0x2A: /* Per Intel document 253669-039US 05/2011. */
2428 printf("Intel SANDYBRIDGE\n");
2431 case 0x2D: /* Per Intel document 253669-044US 08/2012. */
2432 printf("Intel SANDYBRIDGE_XEON\n");
2435 case 0x3A: /* Per Intel document 253669-043US 05/2012. */
2436 printf("Intel IVYBRIDGE\n");
2439 case 0x3E: /* Per Intel document 325462-045US 01/2013. */
2440 printf("Intel IVYBRIDGE_XEON\n");
2443 case 0x3F: /* Per Intel document 325462-045US 09/2014. */
2444 printf("Intel HASWELL (Xeon)\n");
2447 case 0x3C: /* Per Intel document 325462-045US 01/2013. */
2450 printf("Intel HASWELL\n");
2456 printf("Intel SKY-LAKE\n");
2461 printf("Intel BROADWELL\n");
2466 printf("Intel BROADWEL (Xeon)\n");
2471 /* Per Intel document 330061-001 01/2014. */
2472 printf("Intel ATOM_SILVERMONT\n");
2476 printf("Intel model 0x%x is not known -- sorry\n",
2482 case 0xF00: /* P4 */
2483 printf("Intel unknown model %d\n", model);
2487 do_cpuid(0xa, 0, reg);
2488 max_pmc_counters = (reg[3] & 0x0000000f) + 1;
2489 printf("We have %d PMC counters to work with\n", max_pmc_counters);
2490 /* Ok lets load the list of all known PMC's */
2491 io = my_popen("/usr/sbin/pmccontrol -L", "r", &pid_of_command);
2492 if (valid_pmcs == NULL) {
2494 pmc_allocated_cnt = PMC_INITIAL_ALLOC;
2495 sz = sizeof(char *) * pmc_allocated_cnt;
2496 valid_pmcs = malloc(sz);
2497 if (valid_pmcs == NULL) {
2498 printf("No memory allocation fails at startup?\n");
2501 memset(valid_pmcs, 0, sz);
2504 while (fgets(linebuf, sizeof(linebuf), io) != NULL) {
2505 if (linebuf[0] != '\t') {
2506 /* sometimes headers ;-) */
2509 len = strlen(linebuf);
2510 if (linebuf[(len-1)] == '\n') {
2512 linebuf[(len-1)] = 0;
2515 len = strlen(str) + 1;
2516 valid_pmcs[valid_pmc_cnt] = malloc(len);
2517 if (valid_pmcs[valid_pmc_cnt] == NULL) {
2518 printf("No memory2 allocation fails at startup?\n");
2521 memset(valid_pmcs[valid_pmc_cnt], 0, len);
2522 strcpy(valid_pmcs[valid_pmc_cnt], str);
2524 if (valid_pmc_cnt >= pmc_allocated_cnt) {
2525 /* Got to expand -- unlikely */
2528 sz = sizeof(char *) * (pmc_allocated_cnt * 2);
2531 printf("No memory3 allocation fails at startup?\n");
2534 memset(more, 0, sz);
2535 memcpy(more, valid_pmcs, sz);
2536 pmc_allocated_cnt *= 2;
2541 my_pclose(io, pid_of_command);
2544 printf("Not supported\n");
2552 printf("For CPU's of type %s the following expressions are available:\n",the_cpu.cputype);
2553 printf("-------------------------------------------------------------\n");
2554 for(i=0; i<the_cpu.number; i++){
2555 printf("For -e %s ", the_cpu.ents[i].name);
2556 (*the_cpu.explain)(the_cpu.ents[i].name);
2557 printf("----------------------------\n");
2562 test_for_a_pmc(const char *pmc, int out_so_far)
2565 pid_t pid_of_command=0;
2566 char my_command[1024];
2571 if (out_so_far < 50) {
2572 len = 50 - out_so_far;
2573 for(i=0; i<len; i++) {
2577 sprintf(my_command, "/usr/sbin/pmcstat -w .25 -c 0 -s %s", pmc);
2578 io = my_popen(my_command, "r", &pid_of_command);
2580 printf("Failed -- popen fails\n");
2583 /* Setup what we expect */
2584 len = sprintf(resp, "%s", pmc);
2585 if (fgets(line, sizeof(line), io) == NULL) {
2586 printf("Failed -- no output from pmstat\n");
2589 llen = strlen(line);
2590 if (line[(llen-1)] == '\n') {
2594 for(i=2; i<(llen-len); i++) {
2595 if (strncmp(&line[i], "ERROR", 5) == 0) {
2596 printf("Failed %s\n", line);
2598 } else if (strncmp(&line[i], resp, len) == 0) {
2601 if (fgets(line, sizeof(line), io) == NULL) {
2602 printf("Failed -- no second output from pmstat\n");
2606 for (j=0; j<len; j++) {
2607 if (line[j] == ' ') {
2614 len = strlen(&line[j]);
2616 for(k=0; k<(20-len); k++) {
2621 printf("%s", &line[j]);
2628 printf("Failed -- '%s' not '%s'\n", line, resp);
2630 my_pclose(io, pid_of_command);
2635 add_it_to(char **vars, int cur_cnt, char *name)
2639 for(i=0; i<cur_cnt; i++) {
2640 if (strcmp(vars[i], name) == 0) {
2645 if (vars[cur_cnt] != NULL) {
2646 printf("Cur_cnt:%d filled with %s??\n",
2647 cur_cnt, vars[cur_cnt]);
2651 len = strlen(name) + 1;
2652 vars[cur_cnt] = malloc(len);
2653 if (vars[cur_cnt] == NULL) {
2654 printf("No memory %s\n", __FUNCTION__);
2657 memset(vars[cur_cnt], 0, len);
2658 strcpy(vars[cur_cnt], name);
2663 build_command_for_exp(struct expression *exp)
2666 * Build the pmcstat command to handle
2667 * the passed in expression.
2668 * /usr/sbin/pmcstat -w 1 -s NNN -s QQQ
2669 * where NNN and QQQ represent the PMC's in the expression
2673 int cnt_pmc, alloced_pmcs, i;
2674 struct expression *at;
2678 alloced_pmcs = cnt_pmc = 0;
2679 /* first how many do we have */
2682 if (at->type == TYPE_VALUE_PMC) {
2688 printf("No PMC's in your expression -- nothing to do!!\n");
2691 mal = cnt_pmc * sizeof(char *);
2694 printf("No memory\n");
2697 memset(vars, 0, mal);
2700 if (at->type == TYPE_VALUE_PMC) {
2701 if(add_it_to(vars, alloced_pmcs, at->name)) {
2707 /* Now we have a unique list in vars so create our command */
2708 mal = 23; /* "/usr/sbin/pmcstat -w 1" + \0 */
2709 for(i=0; i<alloced_pmcs; i++) {
2710 mal += strlen(vars[i]) + 4; /* var + " -s " */
2712 cmd = malloc((mal+2));
2714 printf("%s out of mem\n", __FUNCTION__);
2717 memset(cmd, 0, (mal+2));
2718 strcpy(cmd, "/usr/sbin/pmcstat -w 1");
2720 for(i=0; i<alloced_pmcs; i++) {
2721 sprintf(forming, " -s %s", vars[i]);
2722 strcat(cmd, forming);
2731 user_expr(struct counters *cpu, int pos)
2735 struct counters *var;
2736 struct expression *at;
2740 if (at->type == TYPE_VALUE_PMC) {
2741 var = find_counter(cpu, at->name);
2743 printf("%s:Can't find counter %s?\n", __FUNCTION__, at->name);
2747 at->value = var->vals[pos] * 1.0;
2749 at->value = var->sum * 1.0;
2754 res = run_expr(master_exp, 1, NULL);
2755 ret = printf("%1.3f", res);
2761 set_manual_exp(struct expression *exp)
2763 expression = user_expr;
2764 command = build_command_for_exp(exp);
2765 threshold = "User defined threshold";
2772 printf("Running tests on %d PMC's this may take some time\n", valid_pmc_cnt);
2773 printf("------------------------------------------------------------------------\n");
2774 for(i=0; i<valid_pmc_cnt; i++) {
2775 lenout = printf("%s", valid_pmcs[i]);
2777 test_for_a_pmc(valid_pmcs[i], lenout);
2784 printf("PMC Abbreviation\n");
2785 printf("--------------------------------------------------------------\n");
2786 for(i=0; i<valid_pmc_cnt; i++) {
2787 cnt = printf("%s", valid_pmcs[i]);
2788 for(j=cnt; j<52; j++) {
2791 printf("%%%d\n", i);
2797 main(int argc, char **argv)
2800 char *filename=NULL;
2801 const char *name=NULL;
2807 memset(glob_cpu, 0, sizeof(glob_cpu));
2808 while ((i = getopt(argc, argv, "ALHhvm:i:?e:TE:")) != -1) {
2817 printf("**********************************\n");
2819 printf("**********************************\n");
2826 master_exp = parse_expression(optarg);
2828 set_manual_exp(master_exp);
2832 if (validate_expression(optarg)) {
2833 printf("Unknown expression %s\n", optarg);
2837 set_expression(optarg);
2840 max_to_collect = strtol(optarg, NULL, 0);
2841 if (max_to_collect > MAX_COUNTER_SLOTS) {
2842 /* You can't collect more than max in array */
2843 max_to_collect = MAX_COUNTER_SLOTS;
2858 printf("Use %s [ -i inputfile -v -m max_to_collect -e expr -E -h -? -H]\n",
2860 printf("-i inputfile -- use source as inputfile not stdin (if stdin collect)\n");
2861 printf("-v -- verbose dump debug type things -- you don't want this\n");
2862 printf("-m N -- maximum to collect is N measurments\n");
2863 printf("-e expr-name -- Do expression expr-name\n");
2864 printf("-E 'your expression' -- Do your expression\n");
2865 printf("-h -- Don't do the expression I put in -e xxx just explain what it does and exit\n");
2866 printf("-H -- Don't run anything, just explain all canned expressions\n");
2867 printf("-T -- Test all PMC's defined by this processor\n");
2868 printf("-A -- Run all canned tests\n");
2873 if ((run_all == 0) && (name == NULL) && (filename == NULL) &&
2874 (test_mode == 0) && (master_exp == NULL)) {
2875 printf("Without setting an expression we cannot dynamically gather information\n");
2876 printf("you must supply a filename (and you probably want verbosity)\n");
2879 if (run_all && max_to_collect > 10) {
2886 printf("*********************************\n");
2887 if ((master_exp == NULL) && name) {
2888 (*the_cpu.explain)(name);
2889 } else if (master_exp) {
2890 printf("Examine your expression ");
2891 print_exp(master_exp);
2892 printf("User defined threshold\n");
2899 name = the_cpu.ents[test_at].name;
2900 printf("***Test %s (threshold %s)****\n", name, the_cpu.ents[test_at].thresh);
2902 if (set_expression(name) == -1) {
2903 if (test_at >= the_cpu.number) {
2910 process_file(filename);
2912 for (i=0; i<ncnts; i++) {
2913 printf("Counter:%s cpu:%d index:%d\n",
2914 cnts[i].counter_name,
2916 for(j=0; j<cnts[i].pos; j++) {
2917 printf(" val - %ld\n", (long int)cnts[i].vals[j]);
2919 printf(" sum - %ld\n", (long int)cnts[i].sum);
2922 if (expression == NULL) {
2925 if (max_to_collect > 1) {
2926 for(i=0, cnt=0; i<MAX_CPU; i++) {
2928 do_expression(glob_cpu[i], -1);
2930 if (cnt == cpu_count_out) {
2939 if (run_all && (test_at < the_cpu.number)) {
2940 memset(glob_cpu, 0, sizeof(glob_cpu));
2942 printf("*********************************\n");
2944 } else if (run_all) {
2946 printf("*********************************\n");