2 * Copyright (c) 2005-2007, Joseph Koshy
3 * Copyright (c) 2007 The FreeBSD Foundation
6 * Portions of this software were developed by A. Joseph Koshy under
7 * sponsorship from the FreeBSD Foundation and Google, Inc.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * Transform a hwpmc(4) log into human readable form, and into
33 * gprof(1) compatible profiles.
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
39 #include <sys/param.h>
40 #include <sys/endian.h>
41 #include <sys/cpuset.h>
43 #include <sys/imgact_aout.h>
44 #include <sys/imgact_elf.h>
47 #include <sys/queue.h>
48 #include <sys/socket.h>
52 #include <netinet/in.h>
73 #include "pmcstat_log.h"
74 #include "pmcstat_top.h"
79 * pmcstat_initialize_logging() initialize this module, called first
80 * pmcstat_shutdown_logging() orderly shutdown, called last
81 * pmcstat_open_log() open an eventlog for processing
82 * pmcstat_process_log() print/convert an event log
83 * pmcstat_display_log() top mode display for the log
84 * pmcstat_close_log() finish processing an event log
86 * IMPLEMENTATION NOTES
88 * We correlate each 'callchain' or 'sample' entry seen in the event
89 * log back to an executable object in the system. Executable objects
91 * - program executables,
92 * - shared libraries loaded by the runtime loader,
93 * - dlopen()'ed objects loaded by the program,
94 * - the runtime loader itself,
95 * - the kernel and kernel modules.
97 * Each process that we know about is treated as a set of regions that
98 * map to executable objects. Processes are described by
99 * 'pmcstat_process' structures. Executable objects are tracked by
100 * 'pmcstat_image' structures. The kernel and kernel modules are
101 * common to all processes (they reside at the same virtual addresses
102 * for all processes). Individual processes can have their text
103 * segments and shared libraries loaded at process-specific locations.
105 * A given executable object can be in use by multiple processes
106 * (e.g., libc.so) and loaded at a different address in each.
107 * pmcstat_pcmap structures track per-image mappings.
109 * The sample log could have samples from multiple PMCs; we
110 * generate one 'gmon.out' profile per PMC.
112 * IMPLEMENTATION OF GMON OUTPUT
114 * Each executable object gets one 'gmon.out' profile, per PMC in
115 * use. Creation of 'gmon.out' profiles is done lazily. The
116 * 'gmon.out' profiles generated for a given sampling PMC are
117 * aggregates of all the samples for that particular executable
120 * IMPLEMENTATION OF SYSTEM-WIDE CALLGRAPH OUTPUT
122 * Each active pmcid has its own callgraph structure, described by a
123 * 'struct pmcstat_callgraph'. Given a process id and a list of pc
124 * values, we map each pc value to a tuple (image, symbol), where
125 * 'image' denotes an executable object and 'symbol' is the closest
126 * symbol that precedes the pc value. Each pc value in the list is
127 * also given a 'rank' that reflects its depth in the call stack.
130 struct pmcstat_pmcs pmcstat_pmcs = LIST_HEAD_INITIALIZER(pmcstat_pmcs);
133 * All image descriptors are kept in a hash table.
135 struct pmcstat_image_hash_list pmcstat_image_hash[PMCSTAT_NHASH];
138 * All process descriptors are kept in a hash table.
140 struct pmcstat_process_hash_list pmcstat_process_hash[PMCSTAT_NHASH];
142 struct pmcstat_stats pmcstat_stats; /* statistics */
143 static int ps_samples_period; /* samples count between top refresh. */
145 struct pmcstat_process *pmcstat_kernproc; /* kernel 'process' */
147 #include "pmcpl_gprof.h"
148 #include "pmcpl_callgraph.h"
149 #include "pmcpl_annotate.h"
150 #include "pmcpl_annotate_cg.h"
151 #include "pmcpl_calltree.h"
153 static struct pmc_plugins plugins[] = {
158 .pl_name = "callgraph",
159 .pl_init = pmcpl_cg_init,
160 .pl_shutdown = pmcpl_cg_shutdown,
161 .pl_process = pmcpl_cg_process,
162 .pl_topkeypress = pmcpl_cg_topkeypress,
163 .pl_topdisplay = pmcpl_cg_topdisplay
167 .pl_shutdown = pmcpl_gmon_shutdown,
168 .pl_process = pmcpl_gmon_process,
169 .pl_initimage = pmcpl_gmon_initimage,
170 .pl_shutdownimage = pmcpl_gmon_shutdownimage,
171 .pl_newpmc = pmcpl_gmon_newpmc
174 .pl_name = "annotate",
175 .pl_process = pmcpl_annotate_process
178 .pl_name = "calltree",
179 .pl_configure = pmcpl_ct_configure,
180 .pl_init = pmcpl_ct_init,
181 .pl_shutdown = pmcpl_ct_shutdown,
182 .pl_process = pmcpl_ct_process,
183 .pl_topkeypress = pmcpl_ct_topkeypress,
184 .pl_topdisplay = pmcpl_ct_topdisplay
187 .pl_name = "annotate_cg",
188 .pl_process = pmcpl_annotate_cg_process
196 static int pmcstat_mergepmc;
198 int pmcstat_pmcinfilter = 0; /* PMC filter for top mode. */
199 float pmcstat_threshold = 0.5; /* Cost filter for top mode. */
205 static void pmcstat_stats_reset(int _reset_global);
213 * PMC Top mode pause state.
215 static int pmcstat_pause;
218 pmcstat_stats_reset(int reset_global)
220 struct pmcstat_pmcrecord *pr;
222 /* Flush PMCs stats. */
223 LIST_FOREACH(pr, &pmcstat_pmcs, pr_next) {
225 pr->pr_dubious_frames = 0;
227 ps_samples_period = 0;
229 /* Flush global stats. */
231 bzero(&pmcstat_stats, sizeof(struct pmcstat_stats));
235 * Resolve file name and line number for the given address.
238 pmcstat_image_addr2line(struct pmcstat_image *image, uintfptr_t addr,
239 char *sourcefile, size_t sourcefile_len, unsigned *sourceline,
240 char *funcname, size_t funcname_len)
242 static int addr2line_warn = 0;
244 char *sep, cmdline[PATH_MAX], imagepath[PATH_MAX];
248 if (image->pi_addr2line == NULL) {
249 /* Try default debug file location. */
250 snprintf(imagepath, sizeof(imagepath),
251 "/usr/lib/debug/%s%s.debug",
253 pmcstat_string_unintern(image->pi_fullpath));
254 fd = open(imagepath, O_RDONLY);
256 /* Old kernel symbol path. */
257 snprintf(imagepath, sizeof(imagepath), "%s%s.symbols",
259 pmcstat_string_unintern(image->pi_fullpath));
260 fd = open(imagepath, O_RDONLY);
262 snprintf(imagepath, sizeof(imagepath), "%s%s",
264 pmcstat_string_unintern(
265 image->pi_fullpath));
271 * New addr2line support recursive inline function with -i
272 * but the format does not add a marker when no more entries
275 snprintf(cmdline, sizeof(cmdline), "addr2line -Cfe \"%s\"",
277 image->pi_addr2line = popen(cmdline, "r+");
278 if (image->pi_addr2line == NULL) {
279 if (!addr2line_warn) {
282 "WARNING: addr2line is needed for source code information."
289 if (feof(image->pi_addr2line) || ferror(image->pi_addr2line)) {
290 warnx("WARNING: addr2line pipe error");
291 pclose(image->pi_addr2line);
292 image->pi_addr2line = NULL;
296 fprintf(image->pi_addr2line, "%p\n", (void *)addr);
298 if (fgets(funcname, funcname_len, image->pi_addr2line) == NULL) {
299 warnx("WARNING: addr2line function name read error");
302 sep = strchr(funcname, '\n');
306 if (fgets(sourcefile, sourcefile_len, image->pi_addr2line) == NULL) {
307 warnx("WARNING: addr2line source file read error");
310 sep = strchr(sourcefile, ':');
312 warnx("WARNING: addr2line source line separator missing");
324 * Given a pmcid in use, find its human-readable name.
328 pmcstat_pmcid_to_name(pmc_id_t pmcid)
330 struct pmcstat_pmcrecord *pr;
332 LIST_FOREACH(pr, &pmcstat_pmcs, pr_next)
333 if (pr->pr_pmcid == pmcid)
334 return (pmcstat_string_unintern(pr->pr_pmcname));
340 * Convert PMC index to name.
344 pmcstat_pmcindex_to_name(int pmcin)
346 struct pmcstat_pmcrecord *pr;
348 LIST_FOREACH(pr, &pmcstat_pmcs, pr_next)
349 if (pr->pr_pmcin == pmcin)
350 return pmcstat_string_unintern(pr->pr_pmcname);
356 * Return PMC record with given index.
359 struct pmcstat_pmcrecord *
360 pmcstat_pmcindex_to_pmcr(int pmcin)
362 struct pmcstat_pmcrecord *pr;
364 LIST_FOREACH(pr, &pmcstat_pmcs, pr_next)
365 if (pr->pr_pmcin == pmcin)
372 * Print log entries as text.
376 pmcstat_print_log(void)
381 while (pmclog_read(args.pa_logparser, &ev) == 0) {
382 assert(ev.pl_state == PMCLOG_OK);
383 switch (ev.pl_type) {
384 case PMCLOG_TYPE_CALLCHAIN:
385 PMCSTAT_PRINT_ENTRY("callchain",
386 "%d 0x%x %d %d %c", ev.pl_u.pl_cc.pl_pid,
387 ev.pl_u.pl_cc.pl_pmcid,
388 PMC_CALLCHAIN_CPUFLAGS_TO_CPU(ev.pl_u.pl_cc. \
389 pl_cpuflags), ev.pl_u.pl_cc.pl_npc,
390 PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(ev.pl_u.pl_cc.\
391 pl_cpuflags) ? 'u' : 's');
392 for (npc = 0; npc < ev.pl_u.pl_cc.pl_npc; npc++)
393 PMCSTAT_PRINT_ENTRY("...", "%p",
394 (void *) ev.pl_u.pl_cc.pl_pc[npc]);
396 case PMCLOG_TYPE_CLOSELOG:
397 PMCSTAT_PRINT_ENTRY("closelog",);
399 case PMCLOG_TYPE_DROPNOTIFY:
400 PMCSTAT_PRINT_ENTRY("drop",);
402 case PMCLOG_TYPE_INITIALIZE:
403 PMCSTAT_PRINT_ENTRY("initlog","0x%x \"%s\"",
404 ev.pl_u.pl_i.pl_version,
405 pmc_name_of_cputype(ev.pl_u.pl_i.pl_arch));
406 if ((ev.pl_u.pl_i.pl_version & 0xFF000000) !=
407 PMC_VERSION_MAJOR << 24 && args.pa_verbosity > 0)
409 "WARNING: Log version 0x%x != expected version 0x%x.",
410 ev.pl_u.pl_i.pl_version, PMC_VERSION);
412 case PMCLOG_TYPE_MAP_IN:
413 PMCSTAT_PRINT_ENTRY("map-in","%d %p \"%s\"",
414 ev.pl_u.pl_mi.pl_pid,
415 (void *) ev.pl_u.pl_mi.pl_start,
416 ev.pl_u.pl_mi.pl_pathname);
418 case PMCLOG_TYPE_MAP_OUT:
419 PMCSTAT_PRINT_ENTRY("map-out","%d %p %p",
420 ev.pl_u.pl_mo.pl_pid,
421 (void *) ev.pl_u.pl_mo.pl_start,
422 (void *) ev.pl_u.pl_mo.pl_end);
424 case PMCLOG_TYPE_PCSAMPLE:
425 PMCSTAT_PRINT_ENTRY("sample","0x%x %d %p %c",
426 ev.pl_u.pl_s.pl_pmcid,
428 (void *) ev.pl_u.pl_s.pl_pc,
429 ev.pl_u.pl_s.pl_usermode ? 'u' : 's');
431 case PMCLOG_TYPE_PMCALLOCATE:
432 PMCSTAT_PRINT_ENTRY("allocate","0x%x \"%s\" 0x%x",
433 ev.pl_u.pl_a.pl_pmcid,
434 ev.pl_u.pl_a.pl_evname,
435 ev.pl_u.pl_a.pl_flags);
437 case PMCLOG_TYPE_PMCALLOCATEDYN:
438 PMCSTAT_PRINT_ENTRY("allocatedyn","0x%x \"%s\" 0x%x",
439 ev.pl_u.pl_ad.pl_pmcid,
440 ev.pl_u.pl_ad.pl_evname,
441 ev.pl_u.pl_ad.pl_flags);
443 case PMCLOG_TYPE_PMCATTACH:
444 PMCSTAT_PRINT_ENTRY("attach","0x%x %d \"%s\"",
445 ev.pl_u.pl_t.pl_pmcid,
447 ev.pl_u.pl_t.pl_pathname);
449 case PMCLOG_TYPE_PMCDETACH:
450 PMCSTAT_PRINT_ENTRY("detach","0x%x %d",
451 ev.pl_u.pl_d.pl_pmcid,
452 ev.pl_u.pl_d.pl_pid);
454 case PMCLOG_TYPE_PROCCSW:
455 PMCSTAT_PRINT_ENTRY("cswval","0x%x %d %jd",
456 ev.pl_u.pl_c.pl_pmcid,
458 ev.pl_u.pl_c.pl_value);
460 case PMCLOG_TYPE_PROCEXEC:
461 PMCSTAT_PRINT_ENTRY("exec","0x%x %d %p \"%s\"",
462 ev.pl_u.pl_x.pl_pmcid,
464 (void *) ev.pl_u.pl_x.pl_entryaddr,
465 ev.pl_u.pl_x.pl_pathname);
467 case PMCLOG_TYPE_PROCEXIT:
468 PMCSTAT_PRINT_ENTRY("exitval","0x%x %d %jd",
469 ev.pl_u.pl_e.pl_pmcid,
471 ev.pl_u.pl_e.pl_value);
473 case PMCLOG_TYPE_PROCFORK:
474 PMCSTAT_PRINT_ENTRY("fork","%d %d",
475 ev.pl_u.pl_f.pl_oldpid,
476 ev.pl_u.pl_f.pl_newpid);
478 case PMCLOG_TYPE_USERDATA:
479 PMCSTAT_PRINT_ENTRY("userdata","0x%x",
480 ev.pl_u.pl_u.pl_userdata);
482 case PMCLOG_TYPE_SYSEXIT:
483 PMCSTAT_PRINT_ENTRY("exit","%d",
484 ev.pl_u.pl_se.pl_pid);
487 fprintf(args.pa_printfile, "unknown event (type %d).\n",
492 if (ev.pl_state == PMCLOG_EOF)
493 return (PMCSTAT_FINISHED);
494 else if (ev.pl_state == PMCLOG_REQUIRE_DATA)
495 return (PMCSTAT_RUNNING);
498 "ERROR: event parsing failed (record %jd, offset 0x%jx).",
499 (uintmax_t) ev.pl_count + 1, ev.pl_offset);
508 * Process a log file in offline analysis mode.
512 pmcstat_process_log(void)
516 * If analysis has not been asked for, just print the log to
517 * the current output file.
519 if (args.pa_flags & FLAG_DO_PRINT)
520 return (pmcstat_print_log());
522 return (pmcstat_analyze_log(&args, plugins, &pmcstat_stats, pmcstat_kernproc,
523 pmcstat_mergepmc, &pmcstat_npmcs, &ps_samples_period));
527 * Refresh top display.
531 pmcstat_refresh_top(void)
536 struct pmcstat_pmcrecord *pmcpr;
538 /* If in pause mode do not refresh display. */
542 /* Wait until PMC pop in the log. */
543 pmcpr = pmcstat_pmcindex_to_pmcr(pmcstat_pmcinfilter);
547 /* Format PMC name. */
548 if (pmcstat_mergepmc)
549 snprintf(pmcname, sizeof(pmcname), "[%s]",
550 pmcstat_string_unintern(pmcpr->pr_pmcname));
552 snprintf(pmcname, sizeof(pmcname), "%s.%d",
553 pmcstat_string_unintern(pmcpr->pr_pmcname),
554 pmcstat_pmcinfilter);
556 /* Format samples count. */
557 if (ps_samples_period > 0)
558 v = (pmcpr->pr_samples * 100.0) / ps_samples_period;
561 v_attrs = PMCSTAT_ATTRPERCENT(v);
563 PMCSTAT_PRINTBEGIN();
564 PMCSTAT_PRINTW("PMC: %s Samples: %u ",
567 PMCSTAT_ATTRON(v_attrs);
568 PMCSTAT_PRINTW("(%.1f%%) ", v);
569 PMCSTAT_ATTROFF(v_attrs);
570 PMCSTAT_PRINTW(", %u unresolved\n\n",
571 pmcpr->pr_dubious_frames);
572 if (plugins[args.pa_plugin].pl_topdisplay != NULL)
573 plugins[args.pa_plugin].pl_topdisplay();
578 * Find the next pmc index to display.
582 pmcstat_changefilter(void)
585 struct pmcstat_pmcrecord *pmcr;
588 * Find the next merge target.
590 if (pmcstat_mergepmc) {
591 pmcin = pmcstat_pmcinfilter;
594 pmcr = pmcstat_pmcindex_to_pmcr(pmcstat_pmcinfilter);
595 if (pmcr == NULL || pmcr == pmcr->pr_merge)
598 pmcstat_pmcinfilter++;
599 if (pmcstat_pmcinfilter >= pmcstat_npmcs)
600 pmcstat_pmcinfilter = 0;
602 } while (pmcstat_pmcinfilter != pmcin);
611 pmcstat_keypress_log(void)
616 w = newwin(1, 0, 1, 0);
618 wprintw(w, "Key: %c => ", c);
621 wprintw(w, "enter mode 'd' or 'a' => ");
624 args.pa_topmode = PMCSTAT_TOP_DELTA;
625 wprintw(w, "switching to delta mode");
627 args.pa_topmode = PMCSTAT_TOP_ACCUM;
628 wprintw(w, "switching to accumulation mode");
632 pmcstat_mergepmc = !pmcstat_mergepmc;
634 * Changing merge state require data reset.
636 if (plugins[args.pa_plugin].pl_shutdown != NULL)
637 plugins[args.pa_plugin].pl_shutdown(NULL);
638 pmcstat_stats_reset(0);
639 if (plugins[args.pa_plugin].pl_init != NULL)
640 plugins[args.pa_plugin].pl_init();
642 /* Update filter to be on a merge target. */
643 pmcstat_changefilter();
644 wprintw(w, "merge PMC %s", pmcstat_mergepmc ? "on" : "off");
647 /* Close current plugin. */
648 if (plugins[args.pa_plugin].pl_shutdown != NULL)
649 plugins[args.pa_plugin].pl_shutdown(NULL);
651 /* Find next top display available. */
654 if (plugins[args.pa_plugin].pl_name == NULL)
656 } while (plugins[args.pa_plugin].pl_topdisplay == NULL);
658 /* Open new plugin. */
659 pmcstat_stats_reset(0);
660 if (plugins[args.pa_plugin].pl_init != NULL)
661 plugins[args.pa_plugin].pl_init();
662 wprintw(w, "switching to plugin %s",
663 plugins[args.pa_plugin].pl_name);
666 pmcstat_pmcinfilter++;
667 if (pmcstat_pmcinfilter >= pmcstat_npmcs)
668 pmcstat_pmcinfilter = 0;
669 pmcstat_changefilter();
670 wprintw(w, "switching to PMC %s.%d",
671 pmcstat_pmcindex_to_name(pmcstat_pmcinfilter),
672 pmcstat_pmcinfilter);
675 pmcstat_pause = !pmcstat_pause;
677 wprintw(w, "pause => press space again to continue");
680 wprintw(w, "exiting...");
684 if (plugins[args.pa_plugin].pl_topkeypress != NULL)
685 if (plugins[args.pa_plugin].pl_topkeypress(c, (void *)w))
700 pmcstat_display_log(void)
703 pmcstat_refresh_top();
705 /* Reset everythings if delta mode. */
706 if (args.pa_topmode == PMCSTAT_TOP_DELTA) {
707 if (plugins[args.pa_plugin].pl_shutdown != NULL)
708 plugins[args.pa_plugin].pl_shutdown(NULL);
709 pmcstat_stats_reset(0);
710 if (plugins[args.pa_plugin].pl_init != NULL)
711 plugins[args.pa_plugin].pl_init();
716 * Configure a plugins.
720 pmcstat_pluginconfigure_log(char *opt)
723 if (strncmp(opt, "threshold=", 10) == 0) {
724 pmcstat_threshold = atof(opt+10);
726 if (plugins[args.pa_plugin].pl_configure != NULL) {
727 if (!plugins[args.pa_plugin].pl_configure(opt))
729 "ERROR: unknown option <%s>.", opt);
735 pmcstat_log_shutdown_logging(void)
738 pmcstat_shutdown_logging(&args, plugins, &pmcstat_stats);
742 pmcstat_log_initialize_logging(void)
745 pmcstat_initialize_logging(&pmcstat_kernproc,
746 &args, plugins, &pmcstat_npmcs, &pmcstat_mergepmc);