2 * Copyright (c) 2003-2008 Joseph Koshy
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
30 #include <sys/types.h>
31 #include <sys/cpuset.h>
32 #include <sys/param.h>
33 #include <sys/socket.h>
53 #include "libpmcstat.h"
56 * Get PMC record by id, apply merge policy.
59 static struct pmcstat_pmcrecord *
60 pmcstat_lookup_pmcid(pmc_id_t pmcid, int pmcstat_mergepmc)
62 struct pmcstat_pmcrecord *pr;
64 LIST_FOREACH(pr, &pmcstat_pmcs, pr_next) {
65 if (pr->pr_pmcid == pmcid) {
76 * Add a {pmcid,name} mapping.
80 pmcstat_pmcid_add(pmc_id_t pmcid, pmcstat_interned_string ps,
81 struct pmcstat_args *args, struct pmc_plugins *plugins,
84 struct pmcstat_pmcrecord *pr, *prm;
86 /* Replace an existing name for the PMC. */
88 LIST_FOREACH(pr, &pmcstat_pmcs, pr_next)
89 if (pr->pr_pmcid == pmcid) {
92 } else if (pr->pr_pmcname == ps)
96 * Otherwise, allocate a new descriptor and call the
99 if ((pr = malloc(sizeof(*pr))) == NULL)
100 err(EX_OSERR, "ERROR: Cannot allocate pmc record");
102 pr->pr_pmcid = pmcid;
104 pr->pr_pmcin = (*pmcstat_npmcs)++;
106 pr->pr_dubious_frames = 0;
107 pr->pr_merge = prm == NULL ? pr : prm;
109 LIST_INSERT_HEAD(&pmcstat_pmcs, pr, pr_next);
111 if (plugins[args->pa_pplugin].pl_newpmc != NULL)
112 plugins[args->pa_pplugin].pl_newpmc(ps, pr);
113 if (plugins[args->pa_plugin].pl_newpmc != NULL)
114 plugins[args->pa_plugin].pl_newpmc(ps, pr);
118 * Unmap images in the range [start..end) associated with process
123 pmcstat_image_unmap(struct pmcstat_process *pp, uintfptr_t start,
126 struct pmcstat_pcmap *pcm, *pcmtmp, *pcmnew;
133 * - we could have the range completely in the middle of an
134 * existing pcmap; in this case we have to split the pcmap
135 * structure into two (i.e., generate a 'hole').
136 * - we could have the range covering multiple pcmaps; these
137 * will have to be removed.
138 * - we could have either 'start' or 'end' falling in the
139 * middle of a pcmap; in this case shorten the entry.
141 TAILQ_FOREACH_SAFE(pcm, &pp->pp_map, ppm_next, pcmtmp) {
142 assert(pcm->ppm_lowpc < pcm->ppm_highpc);
143 if (pcm->ppm_highpc <= start)
145 if (pcm->ppm_lowpc >= end)
147 if (pcm->ppm_lowpc >= start && pcm->ppm_highpc <= end) {
149 * The current pcmap is completely inside the
150 * unmapped range: remove it entirely.
152 TAILQ_REMOVE(&pp->pp_map, pcm, ppm_next);
154 } else if (pcm->ppm_lowpc < start && pcm->ppm_highpc > end) {
156 * Split this pcmap into two; curtail the
157 * current map to end at [start-1], and start
158 * the new one at [end].
160 if ((pcmnew = malloc(sizeof(*pcmnew))) == NULL)
162 "ERROR: Cannot split a map entry");
164 pcmnew->ppm_image = pcm->ppm_image;
166 pcmnew->ppm_lowpc = end;
167 pcmnew->ppm_highpc = pcm->ppm_highpc;
169 pcm->ppm_highpc = start;
171 TAILQ_INSERT_AFTER(&pp->pp_map, pcm, pcmnew, ppm_next);
174 } else if (pcm->ppm_lowpc < start && pcm->ppm_highpc <= end)
175 pcm->ppm_highpc = start;
176 else if (pcm->ppm_lowpc >= start && pcm->ppm_highpc > end)
177 pcm->ppm_lowpc = end;
184 * Convert a hwpmc(4) log to profile information. A system-wide
185 * callgraph is generated if FLAG_DO_CALLGRAPHS is set. gmon.out
186 * files usable by gprof(1) are created if FLAG_DO_GPROF is set.
189 pmcstat_analyze_log(struct pmcstat_args *args,
190 struct pmc_plugins *plugins,
191 struct pmcstat_stats *pmcstat_stats,
192 struct pmcstat_process *pmcstat_kernproc,
193 int pmcstat_mergepmc,
195 int *ps_samples_period)
197 uint32_t cpu, cpuflags;
200 struct pmcstat_image *image;
201 struct pmcstat_process *pp, *ppnew;
202 struct pmcstat_pcmap *ppm, *ppmtmp;
204 struct pmcstat_pmcrecord *pmcr;
205 pmcstat_interned_string image_path;
207 assert(args->pa_flags & FLAG_DO_ANALYSIS);
209 if (elf_version(EV_CURRENT) == EV_NONE)
210 err(EX_UNAVAILABLE, "Elf library initialization failed");
212 while (pmclog_read(args->pa_logparser, &ev) == 0) {
213 assert(ev.pl_state == PMCLOG_OK);
215 switch (ev.pl_type) {
216 case PMCLOG_TYPE_INITIALIZE:
217 if ((ev.pl_u.pl_i.pl_version & 0xFF000000) !=
218 PMC_VERSION_MAJOR << 24 && args->pa_verbosity > 0)
220 "WARNING: Log version 0x%x does not match compiled version 0x%x.",
221 ev.pl_u.pl_i.pl_version, PMC_VERSION_MAJOR);
224 case PMCLOG_TYPE_MAP_IN:
226 * Introduce an address range mapping for a
227 * userland process or the kernel (pid == -1).
229 * We always allocate a process descriptor so
230 * that subsequent samples seen for this
231 * address range are mapped to the current
232 * object being mapped in.
234 pid = ev.pl_u.pl_mi.pl_pid;
236 pp = pmcstat_kernproc;
238 pp = pmcstat_process_lookup(pid,
243 image_path = pmcstat_string_intern(ev.pl_u.pl_mi.
245 image = pmcstat_image_from_path(image_path, pid == -1,
247 if (image->pi_type == PMCSTAT_IMAGE_UNKNOWN)
248 pmcstat_image_determine_type(image, args);
249 if (image->pi_type != PMCSTAT_IMAGE_INDETERMINABLE)
250 pmcstat_image_link(pp, image,
251 ev.pl_u.pl_mi.pl_start);
254 case PMCLOG_TYPE_MAP_OUT:
256 * Remove an address map.
258 pid = ev.pl_u.pl_mo.pl_pid;
260 pp = pmcstat_kernproc;
262 pp = pmcstat_process_lookup(pid, 0);
264 if (pp == NULL) /* unknown process */
267 pmcstat_image_unmap(pp, ev.pl_u.pl_mo.pl_start,
268 ev.pl_u.pl_mo.pl_end);
271 case PMCLOG_TYPE_PCSAMPLE:
273 * Note: the `PCSAMPLE' log entry is not
274 * generated by hpwmc(4) after version 2.
278 * We bring in the gmon file for the image
279 * currently associated with the PMC & pid
280 * pair and increment the appropriate entry
283 pmcstat_stats->ps_samples_total++;
284 *ps_samples_period += 1;
286 pc = ev.pl_u.pl_s.pl_pc;
287 pp = pmcstat_process_lookup(ev.pl_u.pl_s.pl_pid,
290 /* Get PMC record. */
291 pmcr = pmcstat_lookup_pmcid(ev.pl_u.pl_s.pl_pmcid, pmcstat_mergepmc);
292 assert(pmcr != NULL);
296 * Call the plugins processing
297 * TODO: move pmcstat_process_find_map inside plugins
300 if (plugins[args->pa_pplugin].pl_process != NULL)
301 plugins[args->pa_pplugin].pl_process(
303 pmcstat_process_find_map(pp, pc) != NULL, 0);
304 plugins[args->pa_plugin].pl_process(
306 pmcstat_process_find_map(pp, pc) != NULL, 0);
309 case PMCLOG_TYPE_CALLCHAIN:
310 pmcstat_stats->ps_samples_total++;
311 *ps_samples_period += 1;
313 cpuflags = ev.pl_u.pl_cc.pl_cpuflags;
314 cpu = PMC_CALLCHAIN_CPUFLAGS_TO_CPU(cpuflags);
316 /* Filter on the CPU id. */
317 if (!CPU_ISSET(cpu, &(args->pa_cpumask))) {
318 pmcstat_stats->ps_samples_skipped++;
322 pp = pmcstat_process_lookup(ev.pl_u.pl_cc.pl_pid,
325 /* Get PMC record. */
326 pmcr = pmcstat_lookup_pmcid(ev.pl_u.pl_cc.pl_pmcid, pmcstat_mergepmc);
327 assert(pmcr != NULL);
331 * Call the plugins processing
334 if (plugins[args->pa_pplugin].pl_process != NULL)
335 plugins[args->pa_pplugin].pl_process(
337 ev.pl_u.pl_cc.pl_npc,
339 PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(cpuflags),
341 plugins[args->pa_plugin].pl_process(
343 ev.pl_u.pl_cc.pl_npc,
345 PMC_CALLCHAIN_CPUFLAGS_TO_USERMODE(cpuflags),
349 case PMCLOG_TYPE_PMCALLOCATE:
351 * Record the association pmc id between this
354 pmcstat_pmcid_add(ev.pl_u.pl_a.pl_pmcid,
355 pmcstat_string_intern(ev.pl_u.pl_a.pl_evname),
356 args, plugins, pmcstat_npmcs);
359 case PMCLOG_TYPE_PMCALLOCATEDYN:
361 * Record the association pmc id between this
364 pmcstat_pmcid_add(ev.pl_u.pl_ad.pl_pmcid,
365 pmcstat_string_intern(ev.pl_u.pl_ad.pl_evname),
366 args, plugins, pmcstat_npmcs);
369 case PMCLOG_TYPE_PROCEXEC:
371 * Change the executable image associated with
374 pp = pmcstat_process_lookup(ev.pl_u.pl_x.pl_pid,
377 /* delete the current process map */
378 TAILQ_FOREACH_SAFE(ppm, &pp->pp_map, ppm_next, ppmtmp) {
379 TAILQ_REMOVE(&pp->pp_map, ppm, ppm_next);
384 * Associate this process image.
386 image_path = pmcstat_string_intern(
387 ev.pl_u.pl_x.pl_pathname);
388 assert(image_path != NULL);
389 pmcstat_process_exec(pp, image_path,
390 ev.pl_u.pl_x.pl_entryaddr, args,
391 plugins, pmcstat_stats);
394 case PMCLOG_TYPE_PROCEXIT:
397 * Due to the way the log is generated, the
398 * last few samples corresponding to a process
399 * may appear in the log after the process
400 * exit event is recorded. Thus we keep the
401 * process' descriptor and associated data
402 * structures around, but mark the process as
405 pp = pmcstat_process_lookup(ev.pl_u.pl_e.pl_pid, 0);
408 pp->pp_isactive = 0; /* mark as a zombie */
411 case PMCLOG_TYPE_SYSEXIT:
412 pp = pmcstat_process_lookup(ev.pl_u.pl_se.pl_pid, 0);
415 pp->pp_isactive = 0; /* make a zombie */
418 case PMCLOG_TYPE_PROCFORK:
421 * Allocate a process descriptor for the new
425 pmcstat_process_lookup(ev.pl_u.pl_f.pl_newpid,
429 * If we had been tracking the parent, clone
432 pp = pmcstat_process_lookup(ev.pl_u.pl_f.pl_oldpid, 0);
435 TAILQ_FOREACH(ppm, &pp->pp_map, ppm_next)
436 pmcstat_image_link(ppnew, ppm->ppm_image,
440 default: /* other types of entries are not relevant */
445 if (ev.pl_state == PMCLOG_EOF)
446 return (PMCSTAT_FINISHED);
447 else if (ev.pl_state == PMCLOG_REQUIRE_DATA)
448 return (PMCSTAT_RUNNING);
451 "ERROR: event parsing failed (record %jd, offset 0x%jx)",
452 (uintmax_t) ev.pl_count + 1, ev.pl_offset);
456 * Open a log file, for reading or writing.
458 * The function returns the fd of a successfully opened log or -1 in
463 pmcstat_open_log(const char *path, int mode)
467 const char *p, *errstr;
468 struct addrinfo hints, *res, *res0;
469 char hostname[MAXHOSTNAMELEN];
475 * If 'path' is "-" then open one of stdin or stdout depending
476 * on the value of 'mode'.
478 * If 'path' contains a ':' and does not start with a '/' or '.',
479 * and is being opened for writing, treat it as a "host:port"
480 * specification and open a network socket.
482 * Otherwise, treat 'path' as a file name and open that.
484 if (path[0] == '-' && path[1] == '\0')
485 fd = (mode == PMCSTAT_OPEN_FOR_READ) ? 0 : 1;
486 else if (path[0] != '/' &&
487 path[0] != '.' && strchr(path, ':') != NULL) {
489 p = strrchr(path, ':');
491 if (p == path || hlen >= sizeof(hostname)) {
492 errstr = strerror(EINVAL);
496 assert(hlen < sizeof(hostname));
497 (void) strncpy(hostname, path, hlen);
498 hostname[hlen] = '\0';
500 (void) memset(&hints, 0, sizeof(hints));
501 hints.ai_family = AF_UNSPEC;
502 hints.ai_socktype = SOCK_STREAM;
503 if ((error = getaddrinfo(hostname, p+1, &hints, &res0)) != 0) {
504 errstr = gai_strerror(error);
509 for (res = res0; res; res = res->ai_next) {
510 if ((fd = socket(res->ai_family, res->ai_socktype,
511 res->ai_protocol)) < 0) {
512 errstr = strerror(errno);
515 if (mode == PMCSTAT_OPEN_FOR_READ) {
516 if (bind(fd, res->ai_addr, res->ai_addrlen) < 0) {
517 errstr = strerror(errno);
523 cfd = accept(fd, NULL, NULL);
526 errstr = strerror(errno);
532 if (connect(fd, res->ai_addr, res->ai_addrlen) < 0) {
533 errstr = strerror(errno);
544 } else if ((fd = open(path, mode == PMCSTAT_OPEN_FOR_READ ?
545 O_RDONLY : (O_WRONLY|O_CREAT|O_TRUNC),
546 S_IRUSR|S_IWUSR|S_IRGRP|S_IROTH)) < 0)
547 errstr = strerror(errno);
551 errx(EX_OSERR, "ERROR: Cannot open \"%s\" for %s: %s.", path,
552 (mode == PMCSTAT_OPEN_FOR_READ ? "reading" : "writing"),
559 * Close a logfile, after first flushing all in-module queued data.
563 pmcstat_close_log(struct pmcstat_args *args)
565 /* If a local logfile is configured ask the kernel to stop
566 * and flush data. Kernel will close the file when data is flushed
567 * so keep the status to EXITING.
569 if (args->pa_logfd != -1) {
570 if (pmc_close_logfile() < 0)
571 err(EX_OSERR, "ERROR: logging failed");
574 return (args->pa_flags & FLAG_HAS_PIPE ? PMCSTAT_EXITING :
583 pmcstat_initialize_logging(struct pmcstat_process **pmcstat_kernproc,
584 struct pmcstat_args *args, struct pmc_plugins *plugins,
585 int *pmcstat_npmcs, int *pmcstat_mergepmc)
587 struct pmcstat_process *pmcstat_kp;
590 /* use a convenient format for 'ldd' output */
591 if (setenv("LD_TRACE_LOADED_OBJECTS_FMT1","%o \"%p\" %x\n",1) != 0)
592 err(EX_OSERR, "ERROR: Cannot setenv");
594 /* Initialize hash tables */
595 pmcstat_string_initialize();
596 for (i = 0; i < PMCSTAT_NHASH; i++) {
597 LIST_INIT(&pmcstat_image_hash[i]);
598 LIST_INIT(&pmcstat_process_hash[i]);
602 * Create a fake 'process' entry for the kernel with pid -1.
603 * hwpmc(4) will subsequently inform us about where the kernel
604 * and any loaded kernel modules are mapped.
606 if ((pmcstat_kp = pmcstat_process_lookup((pid_t) -1,
607 PMCSTAT_ALLOCATE)) == NULL)
608 err(EX_OSERR, "ERROR: Cannot initialize logging");
610 *pmcstat_kernproc = pmcstat_kp;
615 /* Merge PMC with same name. */
616 *pmcstat_mergepmc = args->pa_mergepmc;
622 if (plugins[args->pa_pplugin].pl_init != NULL)
623 plugins[args->pa_pplugin].pl_init();
624 if (plugins[args->pa_plugin].pl_init != NULL)
625 plugins[args->pa_plugin].pl_init();
633 pmcstat_shutdown_logging(struct pmcstat_args *args,
634 struct pmc_plugins *plugins,
635 struct pmcstat_stats *pmcstat_stats)
637 struct pmcstat_image *pi, *pitmp;
638 struct pmcstat_process *pp, *pptmp;
639 struct pmcstat_pcmap *ppm, *ppmtmp;
643 /* determine where to send the map file */
645 if (args->pa_mapfilename != NULL)
646 mf = (strcmp(args->pa_mapfilename, "-") == 0) ?
647 args->pa_printfile : fopen(args->pa_mapfilename, "w");
649 if (mf == NULL && args->pa_flags & FLAG_DO_GPROF &&
650 args->pa_verbosity >= 2)
651 mf = args->pa_printfile;
654 (void) fprintf(mf, "MAP:\n");
657 * Shutdown the plugins
660 if (plugins[args->pa_plugin].pl_shutdown != NULL)
661 plugins[args->pa_plugin].pl_shutdown(mf);
662 if (plugins[args->pa_pplugin].pl_shutdown != NULL)
663 plugins[args->pa_pplugin].pl_shutdown(mf);
665 for (i = 0; i < PMCSTAT_NHASH; i++) {
666 LIST_FOREACH_SAFE(pi, &pmcstat_image_hash[i], pi_next,
668 if (plugins[args->pa_plugin].pl_shutdownimage != NULL)
669 plugins[args->pa_plugin].pl_shutdownimage(pi);
670 if (plugins[args->pa_pplugin].pl_shutdownimage != NULL)
671 plugins[args->pa_pplugin].pl_shutdownimage(pi);
673 free(pi->pi_symbols);
674 if (pi->pi_addr2line != NULL)
675 pclose(pi->pi_addr2line);
676 LIST_REMOVE(pi, pi_next);
680 LIST_FOREACH_SAFE(pp, &pmcstat_process_hash[i], pp_next,
682 TAILQ_FOREACH_SAFE(ppm, &pp->pp_map, ppm_next, ppmtmp) {
683 TAILQ_REMOVE(&pp->pp_map, ppm, ppm_next);
686 LIST_REMOVE(pp, pp_next);
691 pmcstat_string_shutdown();
694 * Print errors unless -q was specified. Print all statistics
697 #define PRINT(N,V) do { \
698 if (pmcstat_stats->ps_##V || args->pa_verbosity >= 2) \
699 (void) fprintf(args->pa_printfile, " %-40s %d\n",\
700 N, pmcstat_stats->ps_##V); \
703 if (args->pa_verbosity >= 1 && (args->pa_flags & FLAG_DO_ANALYSIS)) {
704 (void) fprintf(args->pa_printfile, "CONVERSION STATISTICS:\n");
705 PRINT("#exec/a.out", exec_aout);
706 PRINT("#exec/elf", exec_elf);
707 PRINT("#exec/unknown", exec_indeterminable);
708 PRINT("#exec handling errors", exec_errors);
709 PRINT("#samples/total", samples_total);
710 PRINT("#samples/unclaimed", samples_unknown_offset);
711 PRINT("#samples/unknown-object", samples_indeterminable);
712 PRINT("#samples/unknown-function", samples_unknown_function);
713 PRINT("#callchain/dubious-frames", callchain_dubious_frames);