]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - usr.sbin/pmcstat/pmcpl_calltree.c
Merge branch 'releng/11.3' into releng-CDN/11.3
[FreeBSD/FreeBSD.git] / usr.sbin / pmcstat / pmcpl_calltree.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2012, Fabien Thomas
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29 /*
30  * Process hwpmc(4) samples as calltree.
31  *
32  * Output file format compatible with Kcachegrind (kdesdk).
33  * Handle top mode with a sorted tree display.
34  */
35
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38
39 #include <sys/param.h>
40 #include <sys/endian.h>
41 #include <sys/queue.h>
42
43 #include <assert.h>
44 #include <curses.h>
45 #include <ctype.h>
46 #include <err.h>
47 #include <errno.h>
48 #include <fcntl.h>
49 #include <pmc.h>
50 #include <pmclog.h>
51 #include <stdint.h>
52 #include <stdio.h>
53 #include <stdlib.h>
54 #include <string.h>
55 #include <unistd.h>
56 #include <sysexits.h>
57
58 #include "pmcstat.h"
59 #include "pmcstat_log.h"
60 #include "pmcstat_top.h"
61 #include "pmcpl_calltree.h"
62
63 #define PMCPL_CT_GROWSIZE       4
64
65 static int pmcstat_skiplink = 0;
66
67 struct pmcpl_ct_node;
68
69 /* Get the sample value for PMC a. */
70 #define PMCPL_CT_SAMPLE(a, b) \
71         ((a) < (b)->npmcs ? (b)->sb[a] : 0)
72
73 /* Get the sample value in percent related to rsamples. */
74 #define PMCPL_CT_SAMPLEP(a, b) \
75         (PMCPL_CT_SAMPLE(a, b) * 100.0 / rsamples->sb[a])
76
77 struct pmcpl_ct_sample {
78         int             npmcs;          /* Max pmc index available. */
79         unsigned        *sb;            /* Sample buffer for 0..npmcs. */
80 };
81
82 struct pmcpl_ct_arc {
83         struct pmcpl_ct_sample  pcta_samples;
84         struct pmcpl_ct_sample  pcta_callid;
85         unsigned                pcta_call;
86         struct pmcpl_ct_node    *pcta_child;
87 };
88
89 struct pmcpl_ct_instr {
90         uintfptr_t              pctf_func;
91         struct pmcpl_ct_sample  pctf_samples;
92 };
93
94 /*
95  * Each calltree node is tracked by a pmcpl_ct_node struct.
96  */
97 struct pmcpl_ct_node {
98         struct pmcstat_image    *pct_image;
99         uintfptr_t              pct_func;
100
101         struct pmcstat_symbol   *pct_sym;
102         pmcstat_interned_string pct_ifl;
103         pmcstat_interned_string pct_ifn;
104
105         struct pmcpl_ct_sample  pct_samples;
106
107         int                     pct_narc;
108         int                     pct_arc_c;
109         struct pmcpl_ct_arc     *pct_arc;
110
111         /* TODO: optimize for large number of items. */
112         int                     pct_ninstr;
113         int                     pct_instr_c;
114         struct pmcpl_ct_instr   *pct_instr;
115
116 #define PMCPL_PCT_ADDR  0
117 #define PMCPL_PCT_NAME  1
118         char                    pct_type;
119 #define PMCPL_PCT_WHITE 0
120 #define PMCPL_PCT_GREY  1
121 #define PMCPL_PCT_BLACK 2
122         char                    pct_color;
123 };
124
125 struct pmcpl_ct_node_hash {
126         struct pmcpl_ct_node  *pch_ctnode;
127         STAILQ_ENTRY(pmcpl_ct_node_hash) pch_next;
128 };
129
130 static struct pmcpl_ct_sample pmcpl_ct_callid;
131
132 #define PMCPL_CT_MAXCOL         PMC_CALLCHAIN_DEPTH_MAX
133 #define PMCPL_CT_MAXLINE        1024    /* TODO: dynamic. */
134
135 struct pmcpl_ct_line {
136         unsigned        ln_sum;
137         unsigned        ln_index;
138 };
139
140 static struct pmcpl_ct_line     pmcpl_ct_topmax[PMCPL_CT_MAXLINE+1];
141 static struct pmcpl_ct_node
142     *pmcpl_ct_topscreen[PMCPL_CT_MAXCOL+1][PMCPL_CT_MAXLINE+1];
143
144 /*
145  * All nodes indexed by function/image name are placed in a hash table.
146  */
147 static STAILQ_HEAD(,pmcpl_ct_node_hash) pmcpl_ct_node_hash[PMCSTAT_NHASH];
148
149 /*
150  * Root node for the graph.
151  */
152 static struct pmcpl_ct_node *pmcpl_ct_root;
153
154 /*
155  * Prototypes
156  */
157
158 /*
159  * Initialize a samples.
160  */
161
162 static void
163 pmcpl_ct_samples_init(struct pmcpl_ct_sample *samples)
164 {
165
166         samples->npmcs = 0;
167         samples->sb = NULL;
168 }
169
170 /*
171  * Free a samples.
172  */
173
174 static void
175 pmcpl_ct_samples_free(struct pmcpl_ct_sample *samples)
176 {
177
178         samples->npmcs = 0;
179         free(samples->sb);
180         samples->sb = NULL;
181 }
182
183 /*
184  * Grow a sample block to store pmcstat_npmcs PMCs.
185  */
186
187 static void
188 pmcpl_ct_samples_grow(struct pmcpl_ct_sample *samples)
189 {
190         unsigned int npmcs;
191
192         /* Enough storage. */
193         if (pmcstat_npmcs <= samples->npmcs)
194                 return;
195
196         npmcs = samples->npmcs +
197             max(pmcstat_npmcs - samples->npmcs, PMCPL_CT_GROWSIZE);
198         samples->sb = reallocarray(samples->sb, npmcs, sizeof(unsigned));
199         if (samples->sb == NULL)
200                 errx(EX_SOFTWARE, "ERROR: out of memory");
201         bzero((char *)samples->sb + samples->npmcs * sizeof(unsigned),
202             (npmcs - samples->npmcs) * sizeof(unsigned));
203         samples->npmcs = npmcs;
204 }
205
206 /*
207  * Compute the sum of all root arcs.
208  */
209
210 static void
211 pmcpl_ct_samples_root(struct pmcpl_ct_sample *samples)
212 {
213         int i, pmcin;
214
215         pmcpl_ct_samples_init(samples);
216         pmcpl_ct_samples_grow(samples);
217
218         for (i = 0; i < pmcpl_ct_root->pct_narc; i++)
219                 for (pmcin = 0; pmcin < pmcstat_npmcs; pmcin++)
220                         samples->sb[pmcin] += PMCPL_CT_SAMPLE(pmcin,
221                             &pmcpl_ct_root->pct_arc[i].pcta_samples);
222 }
223
224 /*
225  * Grow the arc table.
226  */
227
228 static void
229 pmcpl_ct_arc_grow(int cursize, int *maxsize, struct pmcpl_ct_arc **items)
230 {
231         unsigned int nmaxsize;
232
233         if (cursize < *maxsize)
234                 return;
235
236         nmaxsize = *maxsize + max(cursize + 1 - *maxsize, PMCPL_CT_GROWSIZE);
237         *items = reallocarray(*items, nmaxsize, sizeof(struct pmcpl_ct_arc));
238         if (*items == NULL)
239                 errx(EX_SOFTWARE, "ERROR: out of memory");
240         bzero((char *)*items + *maxsize * sizeof(struct pmcpl_ct_arc),
241             (nmaxsize - *maxsize) * sizeof(struct pmcpl_ct_arc));
242         *maxsize = nmaxsize;
243 }
244
245 /*
246  * Grow the instr table.
247  */
248
249 static void
250 pmcpl_ct_instr_grow(int cursize, int *maxsize, struct pmcpl_ct_instr **items)
251 {
252         unsigned int nmaxsize;
253
254         if (cursize < *maxsize)
255                 return;
256
257         nmaxsize = *maxsize + max(cursize + 1 - *maxsize, PMCPL_CT_GROWSIZE);
258         *items = reallocarray(*items, nmaxsize, sizeof(struct pmcpl_ct_instr));
259         if (*items == NULL)
260                 errx(EX_SOFTWARE, "ERROR: out of memory");
261         bzero((char *)*items + *maxsize * sizeof(struct pmcpl_ct_instr),
262             (nmaxsize - *maxsize) * sizeof(struct pmcpl_ct_instr));
263         *maxsize = nmaxsize;
264 }
265
266 /*
267  * Add a new instruction sample to given node.
268  */
269
270 static void
271 pmcpl_ct_instr_add(struct pmcpl_ct_node *ct, int pmcin,
272     uintfptr_t pc, unsigned v)
273 {
274         int i;
275         struct pmcpl_ct_instr *in;
276
277         for (i = 0; i<ct->pct_ninstr; i++) {
278                 if (ct->pct_instr[i].pctf_func == pc) {
279                         in = &ct->pct_instr[i];
280                         pmcpl_ct_samples_grow(&in->pctf_samples);
281                         in->pctf_samples.sb[pmcin] += v;
282                         return;
283                 }
284         }
285
286         pmcpl_ct_instr_grow(ct->pct_ninstr, &ct->pct_instr_c, &ct->pct_instr);
287         in = &ct->pct_instr[ct->pct_ninstr];
288         in->pctf_func = pc;
289         pmcpl_ct_samples_init(&in->pctf_samples);
290         pmcpl_ct_samples_grow(&in->pctf_samples);
291         in->pctf_samples.sb[pmcin] = v;
292         ct->pct_ninstr++;
293 }
294
295 /*
296  * Allocate a new node.
297  */
298
299 static struct pmcpl_ct_node *
300 pmcpl_ct_node_allocate(void)
301 {
302         struct pmcpl_ct_node *ct;
303
304         if ((ct = malloc(sizeof(*ct))) == NULL)
305                 err(EX_OSERR, "ERROR: Cannot allocate callgraph node");
306
307         pmcpl_ct_samples_init(&ct->pct_samples);
308
309         ct->pct_sym     = NULL;
310         ct->pct_image   = NULL;
311         ct->pct_func    = 0;
312
313         ct->pct_narc    = 0;
314         ct->pct_arc_c   = 0;
315         ct->pct_arc     = NULL;
316
317         ct->pct_ninstr  = 0;
318         ct->pct_instr_c = 0;
319         ct->pct_instr   = NULL;
320
321         ct->pct_color   = PMCPL_PCT_WHITE;
322
323         return (ct);
324 }
325
326 /*
327  * Free a node.
328  */
329
330 static void
331 pmcpl_ct_node_free(struct pmcpl_ct_node *ct)
332 {
333         int i;
334
335         for (i = 0; i < ct->pct_narc; i++) {
336                 pmcpl_ct_samples_free(&ct->pct_arc[i].pcta_samples);
337                 pmcpl_ct_samples_free(&ct->pct_arc[i].pcta_callid);
338         }
339
340         pmcpl_ct_samples_free(&ct->pct_samples);
341         free(ct->pct_arc);
342         free(ct->pct_instr);
343         free(ct);
344 }
345
346 /*
347  * Clear the graph tag on each node.
348  */
349 static void
350 pmcpl_ct_node_cleartag(void)
351 {
352         int i;
353         struct pmcpl_ct_node_hash *pch;
354
355         for (i = 0; i < PMCSTAT_NHASH; i++)
356                 STAILQ_FOREACH(pch, &pmcpl_ct_node_hash[i], pch_next)
357                         pch->pch_ctnode->pct_color = PMCPL_PCT_WHITE;
358
359         pmcpl_ct_root->pct_color = PMCPL_PCT_WHITE;
360 }
361
362 /*
363  * Print the callchain line by line with maximum cost at top.
364  */ 
365
366 static int
367 pmcpl_ct_node_dumptop(int pmcin, struct pmcpl_ct_node *ct,
368     struct pmcpl_ct_sample *rsamples, int x, int *y)
369 {
370         int i, terminal;
371         struct pmcpl_ct_arc *arc;
372
373         if (ct->pct_color == PMCPL_PCT_GREY)
374                 return 0;
375
376         if (x >= PMCPL_CT_MAXCOL) {
377                 pmcpl_ct_topscreen[x][*y] = NULL;
378                 return 1;
379         }
380         pmcpl_ct_topscreen[x][*y] = ct;
381
382         /*
383          * Check if this is a terminal node.
384          * We need to check that some samples exist
385          * for at least one arc for that PMC.
386          */
387         terminal = 1;
388         for (i = 0; i < ct->pct_narc; i++) {
389                 arc = &ct->pct_arc[i];
390                 if (arc->pcta_child->pct_color != PMCPL_PCT_GREY &&
391                     PMCPL_CT_SAMPLE(pmcin,
392                     &arc->pcta_samples) != 0 &&
393                     PMCPL_CT_SAMPLEP(pmcin,
394                     &arc->pcta_samples) > pmcstat_threshold) {
395                         terminal = 0;
396                         break;
397                 }
398         }
399
400         if (ct->pct_narc == 0 || terminal) {
401                 pmcpl_ct_topscreen[x+1][*y] = NULL;
402                 if (*y >= PMCPL_CT_MAXLINE)
403                         return 1;
404                 *y = *y + 1;
405                 for (i=0; i < x; i++)
406                         pmcpl_ct_topscreen[i][*y] =
407                             pmcpl_ct_topscreen[i][*y - 1];
408                 return 0;
409         }
410
411         ct->pct_color = PMCPL_PCT_GREY;
412         for (i = 0; i < ct->pct_narc; i++) {
413                 if (PMCPL_CT_SAMPLE(pmcin,
414                     &ct->pct_arc[i].pcta_samples) == 0)
415                         continue;
416                 if (PMCPL_CT_SAMPLEP(pmcin,
417                     &ct->pct_arc[i].pcta_samples) > pmcstat_threshold) {
418                         if (pmcpl_ct_node_dumptop(pmcin,
419                                 ct->pct_arc[i].pcta_child,
420                                 rsamples, x+1, y)) {
421                                 ct->pct_color = PMCPL_PCT_BLACK;
422                                 return 1;
423                         }
424                 }
425         }
426         ct->pct_color = PMCPL_PCT_BLACK;
427
428         return 0;
429 }
430
431 /*
432  * Compare two top line by sum.
433  */
434 static int
435 pmcpl_ct_line_compare(const void *a, const void *b)
436 {
437         const struct pmcpl_ct_line *ct1, *ct2;
438
439         ct1 = (const struct pmcpl_ct_line *) a;
440         ct2 = (const struct pmcpl_ct_line *) b;
441
442         /* Sort in reverse order */
443         if (ct1->ln_sum < ct2->ln_sum)
444                 return (1);
445         if (ct1->ln_sum > ct2->ln_sum)
446                 return (-1);
447         return (0);
448 }
449
450 /*
451  * Format and display given PMC index.
452  */
453
454 static void
455 pmcpl_ct_node_printtop(struct pmcpl_ct_sample *rsamples, int pmcin, int maxy)
456 {
457 #undef  TS
458 #undef  TSI
459 #define TS(x, y)        (pmcpl_ct_topscreen[x][y])
460 #define TSI(x, y)       (pmcpl_ct_topscreen[x][pmcpl_ct_topmax[y].ln_index])
461
462         int v_attrs, ns_len, vs_len, is_len, width, indentwidth, x, y;
463         float v;
464         char ns[30], vs[10], is[20];
465         struct pmcpl_ct_node *ct;
466         const char *space = " ";
467
468         /*
469          * Sort by line cost.
470          */
471         for (y = 0; ; y++) {
472                 ct = TS(1, y);
473                 if (ct == NULL)
474                         break;
475
476                 pmcpl_ct_topmax[y].ln_sum = 0;
477                 pmcpl_ct_topmax[y].ln_index = y;
478                 for (x = 1; TS(x, y) != NULL; x++) {
479                         pmcpl_ct_topmax[y].ln_sum +=
480                             PMCPL_CT_SAMPLE(pmcin, &TS(x, y)->pct_samples);
481                 }
482         }
483         qsort(pmcpl_ct_topmax, y, sizeof(pmcpl_ct_topmax[0]),
484             pmcpl_ct_line_compare);
485         pmcpl_ct_topmax[y].ln_index = y;
486
487         for (y = 0; y < maxy; y++) {
488                 ct = TSI(1, y);
489                 if (ct == NULL)
490                         break;
491
492                 if (y > 0)
493                         PMCSTAT_PRINTW("\n");
494
495                 /* Output sum. */
496                 v = pmcpl_ct_topmax[y].ln_sum * 100.0 /
497                     rsamples->sb[pmcin];
498                 snprintf(vs, sizeof(vs), "%.1f", v);
499                 v_attrs = PMCSTAT_ATTRPERCENT(v);
500                 PMCSTAT_ATTRON(v_attrs);
501                 PMCSTAT_PRINTW("%5.5s ", vs);
502                 PMCSTAT_ATTROFF(v_attrs);
503
504                 width = indentwidth = 5 + 1;
505
506                 for (x = 1; (ct = TSI(x, y)) != NULL; x++) {
507
508                         vs[0] = '\0'; vs_len = 0;
509                         is[0] = '\0'; is_len = 0;
510
511                         /* Format value. */
512                         v = PMCPL_CT_SAMPLEP(pmcin, &ct->pct_samples);
513                         if (v > pmcstat_threshold)
514                                 vs_len  = snprintf(vs, sizeof(vs),
515                                     "(%.1f%%)", v);
516                         v_attrs = PMCSTAT_ATTRPERCENT(v);
517
518                         if (pmcstat_skiplink && v <= pmcstat_threshold) {
519                                 strlcpy(ns, ".", sizeof(ns));
520                                 ns_len = 1;
521                         } else {
522                         if (ct->pct_sym != NULL) {
523                                 ns_len = snprintf(ns, sizeof(ns), "%s",
524                                     pmcstat_string_unintern(ct->pct_sym->ps_name));
525                         } else
526                                 ns_len = snprintf(ns, sizeof(ns), "%p",
527                                     (void *)ct->pct_func);
528
529                         /* Format image. */
530                         if (x == 1 ||
531                             TSI(x-1, y)->pct_image != ct->pct_image)
532                                 is_len = snprintf(is, sizeof(is), "@%s",
533                                     pmcstat_string_unintern(ct->pct_image->pi_name));
534
535                         /* Check for line wrap. */
536                         width += ns_len + is_len + vs_len + 1;
537                         }
538                         if (width >= pmcstat_displaywidth) {
539                                 maxy--;
540                                 if (y >= maxy)
541                                         break;
542                                 PMCSTAT_PRINTW("\n%*s", indentwidth, space);
543                                 width = indentwidth + ns_len + is_len + vs_len;
544                         }
545
546                         PMCSTAT_ATTRON(v_attrs);
547                         PMCSTAT_PRINTW("%s%s%s ", ns, is, vs);
548                         PMCSTAT_ATTROFF(v_attrs);
549                 }
550         }
551 }
552
553 /*
554  * Output top mode snapshot.
555  */
556
557 void
558 pmcpl_ct_topdisplay(void)
559 {
560         int y;
561         struct pmcpl_ct_sample r, *rsamples;
562
563         rsamples = &r;
564         pmcpl_ct_samples_root(rsamples);
565         pmcpl_ct_node_cleartag();
566
567         PMCSTAT_PRINTW("%5.5s %s\n", "%SAMP", "CALLTREE");
568
569         y = 0;
570         if (pmcpl_ct_node_dumptop(pmcstat_pmcinfilter,
571             pmcpl_ct_root, rsamples, 0, &y))
572                 PMCSTAT_PRINTW("...\n");
573         pmcpl_ct_topscreen[1][y] = NULL;
574
575         pmcpl_ct_node_printtop(rsamples,
576             pmcstat_pmcinfilter, pmcstat_displayheight - 2);
577
578         pmcpl_ct_samples_free(rsamples);
579 }
580
581 /*
582  * Handle top mode keypress.
583  */
584
585 int
586 pmcpl_ct_topkeypress(int c, WINDOW *w)
587 {
588
589         switch (c) {
590         case 'f':
591                 pmcstat_skiplink = !pmcstat_skiplink;
592                 wprintw(w, "skip empty link %s",
593                     pmcstat_skiplink ? "on" : "off");
594                 break;
595         }
596
597         return 0;
598 }
599
600 /*
601  * Look for a callgraph node associated with pmc `pmcid' in the global
602  * hash table that corresponds to the given `pc' value in the process map
603  * `ppm'.
604  */
605
606 static void
607 pmcpl_ct_node_update(struct pmcpl_ct_node *parent,
608     struct pmcpl_ct_node *child, int pmcin, unsigned v, int cd)
609 {
610         struct pmcpl_ct_arc *arc;
611         int i;
612
613         assert(parent != NULL);
614
615         /*
616          * Find related arc in parent node and
617          * increment the sample count.
618          */
619         for (i = 0; i < parent->pct_narc; i++) {
620                 if (parent->pct_arc[i].pcta_child == child) {
621                         arc = &parent->pct_arc[i];
622                         pmcpl_ct_samples_grow(&arc->pcta_samples);
623                         arc->pcta_samples.sb[pmcin] += v;
624                         /* Estimate call count. */
625                         if (cd) {
626                         pmcpl_ct_samples_grow(&arc->pcta_callid);
627                         if (pmcpl_ct_callid.sb[pmcin] -
628                             arc->pcta_callid.sb[pmcin] > 1)
629                                 arc->pcta_call++;
630                         arc->pcta_callid.sb[pmcin] =
631                             pmcpl_ct_callid.sb[pmcin];
632                         }
633                         return;
634                 }
635         }
636
637         /*
638          * No arc found for us, add ourself to the parent.
639          */
640         pmcpl_ct_arc_grow(parent->pct_narc,
641             &parent->pct_arc_c, &parent->pct_arc);
642         arc = &parent->pct_arc[parent->pct_narc];
643         pmcpl_ct_samples_grow(&arc->pcta_samples);
644         arc->pcta_samples.sb[pmcin] = v;
645         arc->pcta_call = 1;
646         if (cd) {
647                 pmcpl_ct_samples_grow(&arc->pcta_callid);
648                 arc->pcta_callid.sb[pmcin] = pmcpl_ct_callid.sb[pmcin];
649         }
650         arc->pcta_child = child;
651         parent->pct_narc++;
652 }
653
654 /*
655  * Lookup by image/pc.
656  */
657
658 static struct pmcpl_ct_node *
659 pmcpl_ct_node_hash_lookup(struct pmcstat_image *image, uintfptr_t pc,
660     struct pmcstat_symbol *sym, char *fl, char *fn)
661 {
662         int i;
663         unsigned int hash;
664         struct pmcpl_ct_node *ct;
665         struct pmcpl_ct_node_hash *h;
666         pmcstat_interned_string ifl, ifn;
667
668         if (fn != NULL) {
669                 ifl = pmcstat_string_intern(fl);
670                 ifn = pmcstat_string_intern(fn);
671         } else {
672                 ifl = 0;
673                 ifn = 0;
674         }
675
676         for (hash = i = 0; i < (int)sizeof(uintfptr_t); i++)
677                 hash += (pc >> i) & 0xFF;
678
679         hash &= PMCSTAT_HASH_MASK;
680
681         STAILQ_FOREACH(h, &pmcpl_ct_node_hash[hash], pch_next) {
682                 ct = h->pch_ctnode;
683
684                 assert(ct != NULL);
685
686                 if (ct->pct_image == image && ct->pct_func == pc) {
687                         if (fn == NULL)
688                                 return (ct);
689                         if (ct->pct_type == PMCPL_PCT_NAME &&
690                             ct->pct_ifl == ifl && ct->pct_ifn == ifn)
691                                 return (ct);
692                 }
693         }
694
695         /*
696          * We haven't seen this (pmcid, pc) tuple yet, so allocate a
697          * new callgraph node and a new hash table entry for it.
698          */
699         ct = pmcpl_ct_node_allocate();
700         if ((h = malloc(sizeof(*h))) == NULL)
701                 err(EX_OSERR, "ERROR: Could not allocate callgraph node");
702
703         if (fn != NULL) {
704                 ct->pct_type = PMCPL_PCT_NAME;
705                 ct->pct_ifl = ifl;
706                 ct->pct_ifn = ifn;
707         } else
708                 ct->pct_type = PMCPL_PCT_ADDR;
709         ct->pct_image = image;
710         ct->pct_func = pc;
711         ct->pct_sym = sym;
712
713         h->pch_ctnode = ct;
714         STAILQ_INSERT_HEAD(&pmcpl_ct_node_hash[hash], h, pch_next);
715         return (ct);
716 }
717
718 /*
719  * Record a callchain.
720  */
721
722 void
723 pmcpl_ct_process(struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr,
724     uint32_t nsamples, uintfptr_t *cc, int usermode, uint32_t cpu)
725 {
726         int i, n, pmcin;
727         uintfptr_t pc, loadaddress;
728         struct pmcstat_image *image;
729         struct pmcstat_symbol *sym;
730         struct pmcstat_pcmap *ppm[PMC_CALLCHAIN_DEPTH_MAX];
731         struct pmcstat_process *km;
732         struct pmcpl_ct_node *ct;
733         struct pmcpl_ct_node *ctl[PMC_CALLCHAIN_DEPTH_MAX+1];
734
735         (void) cpu;
736
737         assert(nsamples>0 && nsamples<=PMC_CALLCHAIN_DEPTH_MAX);
738
739         /* Get the PMC index. */
740         pmcin = pmcr->pr_pmcin;
741
742         /*
743          * Validate mapping for the callchain.
744          * Go from bottom to first invalid entry.
745          */
746         km = pmcstat_kernproc;
747         for (n = 0; n < (int)nsamples; n++) {
748                 ppm[n] = pmcstat_process_find_map(usermode ?
749                     pp : km, cc[n]);
750                 if (ppm[n] == NULL) {
751                         /* Detect full frame capture (kernel + user). */
752                         if (!usermode) {
753                                 ppm[n] = pmcstat_process_find_map(pp, cc[n]);
754                                 if (ppm[n] != NULL)
755                                         km = pp;
756                         }
757                 }
758                 if (ppm[n] == NULL)
759                         break;
760         }
761         if (n-- == 0) {
762                 pmcstat_stats.ps_callchain_dubious_frames++;
763                 pmcr->pr_dubious_frames++;
764                 return;
765         }
766
767         /* Increase the call generation counter. */
768         pmcpl_ct_samples_grow(&pmcpl_ct_callid);
769         pmcpl_ct_callid.sb[pmcin]++;
770
771         /*
772          * Build node list.
773          */
774         ctl[0] = pmcpl_ct_root;
775         for (i = 1; n >= 0; n--) {
776                 image = ppm[n]->ppm_image;
777                 loadaddress = ppm[n]->ppm_lowpc +
778                     image->pi_vaddr - image->pi_start;
779                 /* Convert to an offset in the image. */
780                 pc = cc[n] - loadaddress;
781                 /*
782                  * Try determine the function at this offset.  If we can't
783                  * find a function round leave the `pc' value alone.
784                  */
785                 if ((sym = pmcstat_symbol_search(image, pc)) != NULL)
786                         pc = sym->ps_start;
787                 else
788                         pmcstat_stats.ps_samples_unknown_function++;
789
790                 ct = pmcpl_ct_node_hash_lookup(image, pc, sym, NULL, NULL);
791                 if (ct == NULL) {
792                         pmcstat_stats.ps_callchain_dubious_frames++;
793                         continue;
794                 }
795                 ctl[i++] = ct;
796         }
797         /* No valid node found. */
798         if (i == 1)
799                 return;
800         n = i;
801
802         ct = ctl[0];
803         for (i = 1; i < n; i++)
804                 pmcpl_ct_node_update(ctl[i-1], ctl[i], pmcin, 1, 1);
805
806         /*
807          * Increment the sample count for this PMC.
808          */
809         pmcpl_ct_samples_grow(&ctl[n-1]->pct_samples);
810         ctl[n-1]->pct_samples.sb[pmcin]++;
811
812         /* Update per instruction sample if required. */
813         if (args.pa_ctdumpinstr)
814                 pmcpl_ct_instr_add(ctl[n-1], pmcin, cc[0] -
815                     (ppm[0]->ppm_lowpc + ppm[0]->ppm_image->pi_vaddr -
816                      ppm[0]->ppm_image->pi_start), 1);
817 }
818
819 /*
820  * Print node child cost.
821  */
822
823 static void
824 pmcpl_ct_node_printchild(struct pmcpl_ct_node *ct, uintfptr_t paddr,
825     int pline)
826 {
827         int i, j, line;
828         uintfptr_t addr;
829         struct pmcpl_ct_node *child;
830         char sourcefile[PATH_MAX];
831         char funcname[PATH_MAX];
832
833         /*
834          * Child cost.
835          * TODO: attach child cost to the real position in the function.
836          * TODO: cfn=<fn> / call <ncall> addr(<fn>) / addr(call <fn>) <arccost>
837          */
838         for (i=0 ; i<ct->pct_narc; i++) {
839                 child = ct->pct_arc[i].pcta_child;
840                 /* Object binary. */
841                 fprintf(args.pa_graphfile, "cob=%s\n",
842                     pmcstat_string_unintern(child->pct_image->pi_fullpath));
843                 /* Child function name. */
844                 addr = child->pct_image->pi_vaddr + child->pct_func;
845                 line = 0;
846                 /* Child function source file. */
847                 if (child->pct_type == PMCPL_PCT_NAME) {
848                         fprintf(args.pa_graphfile, "cfi=%s\ncfn=%s\n",
849                             pmcstat_string_unintern(child->pct_ifl),
850                             pmcstat_string_unintern(child->pct_ifn));
851                 } else if (pmcstat_image_addr2line(child->pct_image, addr,
852                     sourcefile, sizeof(sourcefile), &line,
853                     funcname, sizeof(funcname))) {
854                         fprintf(args.pa_graphfile, "cfi=%s\ncfn=%s\n",
855                                 sourcefile, funcname);
856                 } else {
857                         if (child->pct_sym != NULL)
858                                 fprintf(args.pa_graphfile,
859                                     "cfi=???\ncfn=%s\n",
860                                     pmcstat_string_unintern(
861                                         child->pct_sym->ps_name));
862                         else
863                                 fprintf(args.pa_graphfile,
864                                     "cfi=???\ncfn=%p\n", (void *)addr);
865                 }
866
867                 /* Child function address, line and call count. */
868                 fprintf(args.pa_graphfile, "calls=%u %p %u\n",
869                     ct->pct_arc[i].pcta_call, (void *)addr, line);
870
871                 /*
872                  * Call address, line, sample.
873                  * TODO: Associate call address to the right location.
874                  */
875                 fprintf(args.pa_graphfile, "%p %u", (void *)paddr, pline);
876                 for (j = 0; j<pmcstat_npmcs; j++)
877                         fprintf(args.pa_graphfile, " %u",
878                             PMCPL_CT_SAMPLE(j, &ct->pct_arc[i].pcta_samples));
879                 fprintf(args.pa_graphfile, "\n");
880         }
881 }
882
883 /*
884  * Print node self cost.
885  */
886
887 static void
888 pmcpl_ct_node_printself(struct pmcpl_ct_node *ct)
889 {
890         int i, j, fline, line;
891         uintfptr_t faddr, addr;
892         char sourcefile[PATH_MAX];
893         char funcname[PATH_MAX];
894
895         /*
896          * Object binary.
897          */
898         fprintf(args.pa_graphfile, "ob=%s\n",
899             pmcstat_string_unintern(ct->pct_image->pi_fullpath));
900
901         /*
902          * Function name.
903          */
904         faddr = ct->pct_image->pi_vaddr + ct->pct_func;
905         fline = 0;
906         if (ct->pct_type == PMCPL_PCT_NAME) {
907                 fprintf(args.pa_graphfile, "fl=%s\nfn=%s\n",
908                     pmcstat_string_unintern(ct->pct_ifl),
909                     pmcstat_string_unintern(ct->pct_ifn));
910         } else if (pmcstat_image_addr2line(ct->pct_image, faddr,
911             sourcefile, sizeof(sourcefile), &fline,
912             funcname, sizeof(funcname))) {
913                 fprintf(args.pa_graphfile, "fl=%s\nfn=%s\n",
914                     sourcefile, funcname);
915         } else {
916                 if (ct->pct_sym != NULL)
917                         fprintf(args.pa_graphfile, "fl=???\nfn=%s\n",
918                             pmcstat_string_unintern(ct->pct_sym->ps_name));
919                 else
920                         fprintf(args.pa_graphfile, "fl=???\nfn=%p\n",
921                             (void *)(ct->pct_image->pi_vaddr + ct->pct_func));
922         }
923
924         /*
925          * Self cost.
926          */
927         if (ct->pct_ninstr > 0) {
928                 /*
929                  * Per location cost.
930                  */
931                 for (i = 0; i < ct->pct_ninstr; i++) {
932                         addr = ct->pct_image->pi_vaddr +
933                             ct->pct_instr[i].pctf_func;
934                         line = 0;
935                         pmcstat_image_addr2line(ct->pct_image, addr,
936                             sourcefile, sizeof(sourcefile), &line,
937                             funcname, sizeof(funcname));
938                         fprintf(args.pa_graphfile, "%p %u",
939                             (void *)addr, line);
940                         for (j = 0; j<pmcstat_npmcs; j++)
941                                 fprintf(args.pa_graphfile, " %u",
942                                     PMCPL_CT_SAMPLE(j,
943                                     &ct->pct_instr[i].pctf_samples));
944                         fprintf(args.pa_graphfile, "\n");
945                 }
946         } else {
947                 /* Global cost function cost. */
948                 fprintf(args.pa_graphfile, "%p %u", (void *)faddr, fline);
949                 for (i = 0; i<pmcstat_npmcs ; i++)
950                         fprintf(args.pa_graphfile, " %u",
951                             PMCPL_CT_SAMPLE(i, &ct->pct_samples));
952                 fprintf(args.pa_graphfile, "\n");
953         }
954
955         pmcpl_ct_node_printchild(ct, faddr, fline);
956 }
957
958 static void
959 pmcpl_ct_printnode(struct pmcpl_ct_node *ct)
960 {
961         int i;
962
963         if (ct == pmcpl_ct_root) {
964                 fprintf(args.pa_graphfile, "fn=root\n");
965                 fprintf(args.pa_graphfile, "0x0 1");
966                 for (i = 0; i<pmcstat_npmcs ; i++)
967                         fprintf(args.pa_graphfile, " 0");
968                 fprintf(args.pa_graphfile, "\n");
969                 pmcpl_ct_node_printchild(ct, 0, 0);
970         } else
971                 pmcpl_ct_node_printself(ct);
972 }
973
974 /*
975  * Breadth first traversal.
976  */
977
978 static void
979 pmcpl_ct_bfs(struct pmcpl_ct_node *ct)
980 {
981         int i;
982         struct pmcpl_ct_node_hash *pch, *pchc;
983         struct pmcpl_ct_node *child;
984         STAILQ_HEAD(,pmcpl_ct_node_hash) q;
985
986         STAILQ_INIT(&q);
987         if ((pch = malloc(sizeof(*pch))) == NULL)
988                 err(EX_OSERR, "ERROR: Cannot allocate queue");
989         pch->pch_ctnode = ct;
990         STAILQ_INSERT_TAIL(&q, pch, pch_next);
991         ct->pct_color = PMCPL_PCT_BLACK;
992
993         while (!STAILQ_EMPTY(&q)) {
994                 pch = STAILQ_FIRST(&q);
995                 STAILQ_REMOVE_HEAD(&q, pch_next);
996                 pmcpl_ct_printnode(pch->pch_ctnode);
997                 for (i = 0; i<pch->pch_ctnode->pct_narc; i++) {
998                         child = pch->pch_ctnode->pct_arc[i].pcta_child;
999                         if (child->pct_color == PMCPL_PCT_WHITE) {
1000                                 child->pct_color = PMCPL_PCT_BLACK;
1001                                 if ((pchc = malloc(sizeof(*pchc))) == NULL)
1002                                         err(EX_OSERR,
1003                                             "ERROR: Cannot allocate queue");
1004                                 pchc->pch_ctnode = child;
1005                                 STAILQ_INSERT_TAIL(&q, pchc, pch_next);
1006                         }
1007                 }
1008                 free(pch);
1009         }
1010 }
1011
1012 /*
1013  * Detect and fix inlined location.
1014  */
1015
1016 static void
1017 _pmcpl_ct_expand_inline(struct pmcpl_ct_node *ct)
1018 {
1019         int i, j;
1020         unsigned fline, line, v;
1021         uintfptr_t faddr, addr, pc;
1022         char sourcefile[PATH_MAX];
1023         char ffuncname[PATH_MAX], funcname[PATH_MAX];
1024         char buffer[PATH_MAX];
1025         struct pmcpl_ct_node *child;
1026
1027         /*
1028          * Resolve parent and compare to each instr location.
1029          */
1030         faddr = ct->pct_image->pi_vaddr + ct->pct_func;
1031         fline = 0;
1032         if (!pmcstat_image_addr2line(ct->pct_image, faddr,
1033             sourcefile, sizeof(sourcefile), &fline,
1034             ffuncname, sizeof(ffuncname)))
1035                 return;
1036
1037         for (i = 0; i < ct->pct_ninstr; i++) {
1038                 addr = ct->pct_image->pi_vaddr +
1039                     ct->pct_instr[i].pctf_func;
1040                 line = 0;
1041                 if (!pmcstat_image_addr2line(ct->pct_image, addr,
1042                     sourcefile, sizeof(sourcefile), &line,
1043                     funcname, sizeof(funcname)))
1044                         continue;
1045
1046                 if (strcmp(funcname, ffuncname) == 0)
1047                         continue;
1048
1049                 /*
1050                  * - Lookup/create inline node by function name.
1051                  * - Move instr PMCs to the inline node.
1052                  * - Link nodes.
1053                  * The lookup create a specific node per image/pc.
1054                  */
1055                 if (args.pa_verbosity >= 2)
1056                         fprintf(args.pa_printfile,
1057                             "WARNING: inlined function at %p %s in %s\n",
1058                             (void *)addr, funcname, ffuncname);
1059
1060                 snprintf(buffer, sizeof(buffer), "%s@%s",
1061                         funcname, ffuncname);
1062                 child = pmcpl_ct_node_hash_lookup(ct->pct_image,
1063                     ct->pct_func, ct->pct_sym, sourcefile, buffer);
1064                 assert(child != NULL);
1065                 pc = ct->pct_instr[i].pctf_func;
1066                 for (j = 0; j<pmcstat_npmcs; j++) {
1067                         v = PMCPL_CT_SAMPLE(j,
1068                             &ct->pct_instr[i].pctf_samples);
1069                         if (v == 0)
1070                                 continue;
1071                         pmcpl_ct_instr_add(child, j, pc, v);
1072                         pmcpl_ct_node_update(ct, child, j, v, 0);
1073                         if (j < ct->pct_samples.npmcs)
1074                                 ct->pct_samples.sb[j] -=
1075                                     ct->pct_instr[i].pctf_samples.sb[j];
1076                         ct->pct_instr[i].pctf_samples.sb[j] = 0;
1077                 }
1078         }
1079 }
1080
1081 static void
1082 pmcpl_ct_expand_inline(void)
1083 {
1084         int i;
1085         struct pmcpl_ct_node_hash *pch;
1086
1087         if (!args.pa_ctdumpinstr)
1088                 return;
1089
1090         for (i = 0; i < PMCSTAT_NHASH; i++)
1091                 STAILQ_FOREACH(pch, &pmcpl_ct_node_hash[i], pch_next)
1092                         if (pch->pch_ctnode->pct_type == PMCPL_PCT_ADDR)
1093                                 _pmcpl_ct_expand_inline(pch->pch_ctnode);
1094 }
1095
1096 /*
1097  * Clean the PMC name for Kcachegrind formula
1098  */
1099
1100 static void
1101 pmcpl_ct_fixup_pmcname(char *s)
1102 {
1103         char *p;
1104
1105         for (p = s; *p; p++)
1106                 if (!isalnum(*p))
1107                         *p = '_';
1108 }
1109
1110 /*
1111  * Print a calltree (KCachegrind) for all PMCs.
1112  */
1113
1114 static void
1115 pmcpl_ct_print(void)
1116 {
1117         int i;
1118         char name[40];
1119         struct pmcpl_ct_sample rsamples;
1120
1121         pmcpl_ct_samples_root(&rsamples);
1122         pmcpl_ct_expand_inline();
1123
1124         fprintf(args.pa_graphfile,
1125                 "version: 1\n"
1126                 "creator: pmcstat\n"
1127                 "positions: instr line\n"
1128                 "events:");
1129         for (i=0; i<pmcstat_npmcs; i++) {
1130                 snprintf(name, sizeof(name), "%s_%d",
1131                     pmcstat_pmcindex_to_name(i), i);
1132                 pmcpl_ct_fixup_pmcname(name);
1133                 fprintf(args.pa_graphfile, " %s", name);
1134         }
1135         fprintf(args.pa_graphfile, "\nsummary:");
1136         for (i=0; i<pmcstat_npmcs ; i++)
1137                 fprintf(args.pa_graphfile, " %u",
1138                     PMCPL_CT_SAMPLE(i, &rsamples));
1139         fprintf(args.pa_graphfile, "\n");
1140         pmcpl_ct_bfs(pmcpl_ct_root);
1141         pmcpl_ct_samples_free(&rsamples);
1142 }
1143
1144 int
1145 pmcpl_ct_configure(char *opt)
1146 {
1147
1148         if (strncmp(opt, "skiplink=", 9) == 0) {
1149                 pmcstat_skiplink = atoi(opt+9);
1150         } else
1151                 return (0);
1152
1153         return (1);
1154 }
1155
1156 int
1157 pmcpl_ct_init(void)
1158 {
1159         int i;
1160
1161         pmcpl_ct_root = pmcpl_ct_node_allocate();
1162
1163         for (i = 0; i < PMCSTAT_NHASH; i++)
1164                 STAILQ_INIT(&pmcpl_ct_node_hash[i]);
1165
1166         pmcpl_ct_samples_init(&pmcpl_ct_callid);
1167
1168         return (0);
1169 }
1170
1171 void
1172 pmcpl_ct_shutdown(FILE *mf)
1173 {
1174         int i;
1175         struct pmcpl_ct_node_hash *pch, *pchtmp;
1176
1177         (void) mf;
1178
1179         if (args.pa_flags & FLAG_DO_CALLGRAPHS)
1180                 pmcpl_ct_print();
1181
1182         /*
1183          * Free memory.
1184          */
1185
1186         for (i = 0; i < PMCSTAT_NHASH; i++) {
1187                 STAILQ_FOREACH_SAFE(pch, &pmcpl_ct_node_hash[i], pch_next,
1188                     pchtmp) {
1189                         pmcpl_ct_node_free(pch->pch_ctnode);
1190                         free(pch);
1191                 }
1192         }
1193
1194         pmcpl_ct_node_free(pmcpl_ct_root);
1195         pmcpl_ct_root = NULL;
1196
1197         pmcpl_ct_samples_free(&pmcpl_ct_callid);
1198 }
1199