4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #pragma ident "%Z%%M% %I% %E% SMI"
39 #include <sys/types.h>
40 #include <sys/modctl.h>
44 #include <sys/lockstat.h>
50 #define GETOPT_EOF EOF
53 #include <sys/resource.h>
55 #define mergesort(a, b, c, d) lsmergesort(a, b, c, d)
56 #define GETOPT_EOF (-1)
58 typedef uintptr_t pc_t;
61 #define LOCKSTAT_OPTSTR "x:bths:n:d:i:l:f:e:ckwWgCHEATID:RpPo:V"
63 #define LS_MAX_STACK_DEPTH 50
64 #define LS_MAX_EVENTS 64
66 typedef struct lsrec {
67 struct lsrec *ls_next; /* next in hash chain */
69 uintptr_t ls_lock; /* lock address */
71 char *ls_lock; /* lock name */
73 uintptr_t ls_caller; /* caller address */
74 uint32_t ls_count; /* cumulative event count */
75 uint32_t ls_event; /* type of event */
76 uintptr_t ls_refcnt; /* cumulative reference count */
77 uint64_t ls_time; /* cumulative event duration */
78 uint32_t ls_hist[64]; /* log2(duration) histogram */
79 uintptr_t ls_stack[LS_MAX_STACK_DEPTH];
82 typedef struct lsdata {
83 struct lsrec *lsd_next; /* next available */
84 int lsd_count; /* number of records */
88 * Definitions for the types of experiments which can be run. They are
89 * listed in increasing order of memory cost and processing time cost.
90 * The numerical value of each type is the number of bytes needed per record.
92 #define LS_BASIC offsetof(lsrec_t, ls_time)
93 #define LS_TIME offsetof(lsrec_t, ls_hist[0])
94 #define LS_HIST offsetof(lsrec_t, ls_stack[0])
95 #define LS_STACK(depth) offsetof(lsrec_t, ls_stack[depth])
97 static void report_stats(FILE *, lsrec_t **, size_t, uint64_t, uint64_t);
98 static void report_trace(FILE *, lsrec_t **);
100 extern int symtab_init(void);
101 extern char *addr_to_sym(uintptr_t, uintptr_t *, size_t *);
102 extern uintptr_t sym_to_addr(char *name);
103 extern size_t sym_size(char *name);
104 extern char *strtok_r(char *, const char *, char **);
106 #define DEFAULT_NRECS 10000
107 #define DEFAULT_HZ 97
109 #define MIN_AGGSIZE (16 * 1024)
110 #define MAX_AGGSIZE (32 * 1024 * 1024)
112 static int g_stkdepth;
113 static int g_topn = INT_MAX;
114 static hrtime_t g_elapsed;
115 static int g_rates = 0;
116 static int g_pflag = 0;
117 static int g_Pflag = 0;
118 static int g_wflag = 0;
119 static int g_Wflag = 0;
120 static int g_cflag = 0;
121 static int g_kflag = 0;
122 static int g_gflag = 0;
123 static int g_Vflag = 0;
124 static int g_tracing = 0;
125 static size_t g_recsize;
126 static size_t g_nrecs;
127 static int g_nrecs_used;
128 static uchar_t g_enabled[LS_MAX_EVENTS];
129 static hrtime_t g_min_duration[LS_MAX_EVENTS];
130 static dtrace_hdl_t *g_dtp;
131 static char *g_predicate;
132 static char *g_ipredicate;
134 static int g_proglen;
135 static int g_dropped;
137 typedef struct ls_event_info {
142 char ev_name[DTRACE_NAMELEN];
147 static ls_event_info_t g_event_info[LS_MAX_EVENTS] = {
148 { 'C', "Lock", "Adaptive mutex spin", "nsec",
149 "lockstat:::adaptive-spin" },
150 { 'C', "Lock", "Adaptive mutex block", "nsec",
151 "lockstat:::adaptive-block" },
152 { 'C', "Lock", "Spin lock spin", "nsec",
153 "lockstat:::spin-spin" },
154 { 'C', "Lock", "Thread lock spin", "nsec",
155 "lockstat:::thread-spin" },
156 { 'C', "Lock", "R/W writer blocked by writer", "nsec",
157 "lockstat:::rw-block", "arg2 == 0 && arg3 == 1" },
158 { 'C', "Lock", "R/W writer blocked by readers", "nsec",
159 "lockstat:::rw-block", "arg2 == 0 && arg3 == 0 && arg4" },
160 { 'C', "Lock", "R/W reader blocked by writer", "nsec",
161 "lockstat:::rw-block", "arg2 != 0 && arg3 == 1" },
162 { 'C', "Lock", "R/W reader blocked by write wanted", "nsec",
163 "lockstat:::rw-block", "arg2 != 0 && arg3 == 0 && arg4" },
164 { 'C', "Lock", "R/W writer spin on writer", "nsec",
165 "lockstat:::rw-spin", "arg2 == 0 && arg3 == 1" },
166 { 'C', "Lock", "R/W writer spin on readers", "nsec",
167 "lockstat:::rw-spin", "arg2 == 0 && arg3 == 0 && arg4" },
168 { 'C', "Lock", "R/W reader spin on writer", "nsec",
169 "lockstat:::rw-spin", "arg2 != 0 && arg3 == 1" },
170 { 'C', "Lock", "R/W reader spin on write wanted", "nsec",
171 "lockstat:::rw-spin", "arg2 != 0 && arg3 == 0 && arg4" },
172 { 'C', "Lock", "SX exclusive block", "nsec",
173 "lockstat:::sx-block", "arg2 == 0" },
174 { 'C', "Lock", "SX shared block", "nsec",
175 "lockstat:::sx-block", "arg2 != 0" },
176 { 'C', "Lock", "SX exclusive spin", "nsec",
177 "lockstat:::sx-spin", "arg2 == 0" },
178 { 'C', "Lock", "SX shared spin", "nsec",
179 "lockstat:::sx-spin", "arg2 != 0" },
180 { 'C', "Lock", "Unknown event (type 16)", "units" },
181 { 'C', "Lock", "Unknown event (type 17)", "units" },
182 { 'C', "Lock", "Unknown event (type 18)", "units" },
183 { 'C', "Lock", "Unknown event (type 19)", "units" },
184 { 'C', "Lock", "Unknown event (type 20)", "units" },
185 { 'C', "Lock", "Unknown event (type 21)", "units" },
186 { 'C', "Lock", "Unknown event (type 22)", "units" },
187 { 'C', "Lock", "Unknown event (type 23)", "units" },
188 { 'C', "Lock", "Unknown event (type 24)", "units" },
189 { 'C', "Lock", "Unknown event (type 25)", "units" },
190 { 'C', "Lock", "Unknown event (type 26)", "units" },
191 { 'C', "Lock", "Unknown event (type 27)", "units" },
192 { 'C', "Lock", "Unknown event (type 28)", "units" },
193 { 'C', "Lock", "Unknown event (type 29)", "units" },
194 { 'C', "Lock", "Unknown event (type 30)", "units" },
195 { 'C', "Lock", "Unknown event (type 31)", "units" },
196 { 'H', "Lock", "Adaptive mutex hold", "nsec",
197 "lockstat:::adaptive-release", NULL,
198 "lockstat:::adaptive-acquire" },
199 { 'H', "Lock", "Spin lock hold", "nsec",
200 "lockstat:::spin-release", NULL,
201 "lockstat:::spin-acquire" },
202 { 'H', "Lock", "R/W writer hold", "nsec",
203 "lockstat:::rw-release", "arg1 == 0",
204 "lockstat:::rw-acquire" },
205 { 'H', "Lock", "R/W reader hold", "nsec",
206 "lockstat:::rw-release", "arg1 == 1",
207 "lockstat:::rw-acquire" },
208 { 'H', "Lock", "SX shared hold", "nsec",
209 "lockstat:::sx-release", "arg1 == 0",
210 "lockstat:::sx-acquire" },
211 { 'H', "Lock", "SX exclusive hold", "nsec",
212 "lockstat:::sx-release", "arg1 == 1",
213 "lockstat:::sx-acquire" },
214 { 'H', "Lock", "Unknown event (type 38)", "units" },
215 { 'H', "Lock", "Unknown event (type 39)", "units" },
216 { 'H', "Lock", "Unknown event (type 40)", "units" },
217 { 'H', "Lock", "Unknown event (type 41)", "units" },
218 { 'H', "Lock", "Unknown event (type 42)", "units" },
219 { 'H', "Lock", "Unknown event (type 43)", "units" },
220 { 'H', "Lock", "Unknown event (type 44)", "units" },
221 { 'H', "Lock", "Unknown event (type 45)", "units" },
222 { 'H', "Lock", "Unknown event (type 46)", "units" },
223 { 'H', "Lock", "Unknown event (type 47)", "units" },
224 { 'H', "Lock", "Unknown event (type 48)", "units" },
225 { 'H', "Lock", "Unknown event (type 49)", "units" },
226 { 'H', "Lock", "Unknown event (type 50)", "units" },
227 { 'H', "Lock", "Unknown event (type 51)", "units" },
228 { 'H', "Lock", "Unknown event (type 52)", "units" },
229 { 'H', "Lock", "Unknown event (type 53)", "units" },
230 { 'H', "Lock", "Unknown event (type 54)", "units" },
231 { 'H', "Lock", "Unknown event (type 55)", "units" },
233 { 'I', "CPU+PIL", "Profiling interrupt", "nsec",
235 { 'I', "CPU+Pri_Class", "Profiling interrupt", "nsec",
237 "profile:::profile-97", NULL },
238 { 'I', "Lock", "Unknown event (type 57)", "units" },
239 { 'I', "Lock", "Unknown event (type 58)", "units" },
240 { 'I', "Lock", "Unknown event (type 59)", "units" },
241 { 'E', "Lock", "Recursive lock entry detected", "(N/A)",
242 "lockstat:::rw-release", NULL, "lockstat:::rw-acquire" },
243 { 'E', "Lock", "Lockstat enter failure", "(N/A)" },
244 { 'E', "Lock", "Lockstat exit failure", "nsec" },
245 { 'E', "Lock", "Lockstat record failure", "(N/A)" },
249 static char *g_pri_class[] = {
259 fail(int do_perror, const char *message, ...)
262 int save_errno = errno;
264 va_start(args, message);
265 (void) fprintf(stderr, "lockstat: ");
266 (void) vfprintf(stderr, message, args);
269 (void) fprintf(stderr, ": %s", strerror(save_errno));
270 (void) fprintf(stderr, "\n");
275 dfail(const char *message, ...)
279 va_start(args, message);
280 (void) fprintf(stderr, "lockstat: ");
281 (void) vfprintf(stderr, message, args);
283 (void) fprintf(stderr, ": %s\n",
284 dtrace_errmsg(g_dtp, dtrace_errno(g_dtp)));
290 show_events(char event_type, char *desc)
292 int i, first = -1, last;
294 for (i = 0; i < LS_MAX_EVENTS; i++) {
295 ls_event_info_t *evp = &g_event_info[i];
296 if (evp->ev_type != event_type ||
297 strncmp(evp->ev_desc, "Unknown event", 13) == 0)
304 (void) fprintf(stderr,
305 "\n%s events (lockstat -%c or lockstat -e %d-%d):\n\n",
306 desc, event_type, first, last);
308 for (i = first; i <= last; i++)
309 (void) fprintf(stderr,
310 "%4d = %s\n", i, g_event_info[i].ev_desc);
316 (void) fprintf(stderr,
317 "Usage: lockstat [options] command [args]\n"
318 "\nGeneral options:\n\n"
319 " -V print the corresponding D program\n"
320 "\nEvent selection options:\n\n"
321 " -C watch contention events [on by default]\n"
322 " -E watch error events [off by default]\n"
323 " -H watch hold events [off by default]\n"
324 " -I watch interrupt events [off by default]\n"
325 " -A watch all lock events [equivalent to -CH]\n"
326 " -e event_list only watch the specified events (shown below);\n"
327 " <event_list> is a comma-separated list of\n"
328 " events or ranges of events, e.g. 1,4-7,35\n"
329 " -i rate interrupt rate for -I [default: %d Hz]\n"
330 "\nData gathering options:\n\n"
331 " -b basic statistics (lock, caller, event count)\n"
332 " -t timing for all events [default]\n"
333 " -h histograms for event times\n"
334 " -s depth stack traces <depth> deep\n"
335 " -x opt[=val] enable or modify DTrace options\n"
336 "\nData filtering options:\n\n"
337 " -n nrecords maximum number of data records [default: %d]\n"
338 " -l lock[,size] only watch <lock>, which can be specified as a\n"
339 " symbolic name or hex address; <size> defaults\n"
340 " to the ELF symbol size if available, 1 if not\n"
341 " -f func[,size] only watch events generated by <func>\n"
342 " -d duration only watch events longer than <duration>\n"
343 " -T trace (rather than sample) events\n"
344 "\nData reporting options:\n\n"
346 " -c coalesce lock data for arrays like pse_mutex[]\n"
348 " -k coalesce PCs within functions\n"
349 " -g show total events generated by function\n"
350 " -w wherever: don't distinguish events by caller\n"
351 " -W whichever: don't distinguish events by lock\n"
352 " -R display rates rather than counts\n"
353 " -p parsable output format (awk(1)-friendly)\n"
354 " -P sort lock data by (count * avg_time) product\n"
355 " -D n only display top <n> events of each type\n"
356 " -o filename send output to <filename>\n",
357 DEFAULT_HZ, DEFAULT_NRECS);
359 show_events('C', "Contention");
360 show_events('H', "Hold-time");
361 show_events('I', "Interrupt");
362 show_events('E', "Error");
363 (void) fprintf(stderr, "\n");
369 lockcmp(lsrec_t *a, lsrec_t *b)
373 if (a->ls_event < b->ls_event)
375 if (a->ls_event > b->ls_event)
378 for (i = g_stkdepth - 1; i >= 0; i--) {
379 if (a->ls_stack[i] < b->ls_stack[i])
381 if (a->ls_stack[i] > b->ls_stack[i])
385 if (a->ls_caller < b->ls_caller)
387 if (a->ls_caller > b->ls_caller)
391 if (a->ls_lock < b->ls_lock)
393 if (a->ls_lock > b->ls_lock)
398 return (strcmp(a->ls_lock, b->ls_lock));
403 countcmp(lsrec_t *a, lsrec_t *b)
405 if (a->ls_event < b->ls_event)
407 if (a->ls_event > b->ls_event)
410 return (b->ls_count - a->ls_count);
414 timecmp(lsrec_t *a, lsrec_t *b)
416 if (a->ls_event < b->ls_event)
418 if (a->ls_event > b->ls_event)
421 if (a->ls_time < b->ls_time)
423 if (a->ls_time > b->ls_time)
430 lockcmp_anywhere(lsrec_t *a, lsrec_t *b)
432 if (a->ls_event < b->ls_event)
434 if (a->ls_event > b->ls_event)
438 if (a->ls_lock < b->ls_lock)
440 if (a->ls_lock > b->ls_lock)
445 return (strcmp(a->ls_lock, b->ls_lock));
450 lock_and_count_cmp_anywhere(lsrec_t *a, lsrec_t *b)
456 if (a->ls_event < b->ls_event)
458 if (a->ls_event > b->ls_event)
462 if (a->ls_lock < b->ls_lock)
464 if (a->ls_lock > b->ls_lock)
467 cmp = strcmp(a->ls_lock, b->ls_lock);
472 return (b->ls_count - a->ls_count);
476 sitecmp_anylock(lsrec_t *a, lsrec_t *b)
480 if (a->ls_event < b->ls_event)
482 if (a->ls_event > b->ls_event)
485 for (i = g_stkdepth - 1; i >= 0; i--) {
486 if (a->ls_stack[i] < b->ls_stack[i])
488 if (a->ls_stack[i] > b->ls_stack[i])
492 if (a->ls_caller < b->ls_caller)
494 if (a->ls_caller > b->ls_caller)
501 site_and_count_cmp_anylock(lsrec_t *a, lsrec_t *b)
505 if (a->ls_event < b->ls_event)
507 if (a->ls_event > b->ls_event)
510 for (i = g_stkdepth - 1; i >= 0; i--) {
511 if (a->ls_stack[i] < b->ls_stack[i])
513 if (a->ls_stack[i] > b->ls_stack[i])
517 if (a->ls_caller < b->ls_caller)
519 if (a->ls_caller > b->ls_caller)
522 return (b->ls_count - a->ls_count);
526 lsmergesort(int (*cmp)(lsrec_t *, lsrec_t *), lsrec_t **a, lsrec_t **b, int n)
532 lsmergesort(cmp, a, b, m);
534 lsmergesort(cmp, a + m, b + m, n - m);
535 for (i = m; i > 0; i--)
537 for (j = m - 1; j < n - 1; j++)
538 b[n + m - j - 2] = a[j + 1];
540 *a++ = cmp(b[i], b[j]) < 0 ? b[i++] : b[j--];
545 coalesce(int (*cmp)(lsrec_t *, lsrec_t *), lsrec_t **lock, int n)
548 lsrec_t *target, *current;
552 for (i = 1; i < n; i++) {
554 if (cmp(current, target) != 0) {
558 current->ls_event = LS_MAX_EVENTS;
559 target->ls_count += current->ls_count;
560 target->ls_refcnt += current->ls_refcnt;
561 if (g_recsize < LS_TIME)
563 target->ls_time += current->ls_time;
564 if (g_recsize < LS_HIST)
566 for (j = 0; j < 64; j++)
567 target->ls_hist[j] += current->ls_hist[j];
572 coalesce_symbol(uintptr_t *addrp)
577 if (addr_to_sym(*addrp, &symoff, &symsize) != NULL && symoff < symsize)
582 predicate_add(char **pred, char *what, char *cmp, uintptr_t value)
596 newlen = len + strlen(what) + 32 + strlen("( && )");
597 new = malloc(newlen);
599 if (*pred[0] != '\0') {
601 (void) sprintf(new, "(%s) && (%s %s 0x%p)",
602 *pred, what, cmp, (void *)value);
604 (void) sprintf(new, "(%s) && (%s)", *pred, what);
608 (void) sprintf(new, "%s %s 0x%p",
609 what, cmp, (void *)value);
611 (void) sprintf(new, "%s", what);
620 predicate_destroy(char **pred)
627 filter_add(char **filt, char *what, uintptr_t base, uintptr_t size)
629 char buf[256], *c = buf, *new;
638 (void) sprintf(c, "%s(%s >= 0x%p && %s < 0x%p)", *filt[0] != '\0' ?
639 " || " : "", what, (void *)base, what, (void *)(base + size));
641 (void) sprintf(c, "%s(%s >= %p && %s < %p)", *filt[0] != '\0' ?
642 " || " : "", what, (void *)base, what, (void *)(base + size));
645 newlen = (len = strlen(*filt) + 1) + strlen(c);
646 new = malloc(newlen);
647 bcopy(*filt, new, len);
648 (void) strcat(new, c);
654 filter_destroy(char **filt)
661 dprog_add(const char *fmt, ...)
668 size = vsnprintf(&c, 1, fmt, args) + 1;
671 if (g_proglen == 0) {
674 offs = g_proglen - 1;
677 g_proglen = offs + size;
679 if ((g_prog = realloc(g_prog, g_proglen)) == NULL)
680 fail(1, "failed to reallocate program text");
683 (void) vsnprintf(&g_prog[offs], size, fmt, args);
688 * This function may read like an open sewer, but keep in mind that programs
689 * that generate other programs are rarely pretty. If one has the unenviable
690 * task of maintaining or -- worse -- extending this code, use the -V option
691 * to examine the D program as generated by this function.
694 dprog_addevent(int event)
696 ls_event_info_t *info = &g_event_info[event];
699 const char *arg0, *caller;
705 if (info->ev_name[0] == '\0')
708 if (info->ev_type == 'I') {
710 * For interrupt events, arg0 (normally the lock pointer) is
711 * the CPU address plus the current pil, and arg1 (normally
712 * the number of nanoseconds) is the number of nanoseconds
713 * late -- and it's stored in arg2.
716 arg0 = "(uintptr_t)curthread->t_cpu + \n"
717 "\t curthread->t_cpu->cpu_profile_pil";
719 arg0 = "(uintptr_t)(curthread->td_oncpu << 16) + \n"
720 "\t 0x01000000 + curthread->td_pri_class";
722 caller = "(uintptr_t)arg0";
726 arg0 = "(uintptr_t)arg0";
728 arg0 = "stringof(args[0]->lock_object.lo_name)";
733 if (g_recsize > LS_HIST) {
734 for (depth = 0; g_recsize > LS_STACK(depth); depth++)
738 (void) sprintf(stack, "\tstack(%d);\n", depth);
740 (void) sprintf(stack, ", stack(%d)", depth);
743 (void) sprintf(stack, "");
746 if (info->ev_acquire != NULL) {
748 * If this is a hold event, we need to generate an additional
749 * clause for the acquire; the clause for the release will be
750 * generated with the aggregating statement, below.
752 dprog_add("%s\n", info->ev_acquire);
753 predicate_add(&pred, info->ev_predicate, NULL, 0);
754 predicate_add(&pred, g_predicate, NULL, 0);
756 dprog_add("/%s/\n", pred);
759 (void) sprintf(buf, "self->ev%d[(uintptr_t)arg0]", event);
761 if (info->ev_type == 'H') {
762 dprog_add("\t%s = timestamp;\n", buf);
765 * If this isn't a hold event, it's the recursive
766 * error event. For this, we simply bump the
767 * thread-local, per-lock count.
769 dprog_add("\t%s++;\n", buf);
773 predicate_destroy(&pred);
776 if (info->ev_type == 'E') {
778 * If this is the recursive lock error event, we need
779 * to generate an additional clause to decrement the
780 * thread-local, per-lock count. This assures that we
781 * only execute the aggregating clause if we have
784 dprog_add("%s\n", info->ev_name);
785 dprog_add("/%s/\n{\n\t%s--;\n}\n\n", buf, buf);
788 predicate_add(&pred, buf, NULL, 0);
790 if (info->ev_type == 'H') {
791 (void) sprintf(buf, "timestamp -\n\t "
792 "self->ev%d[(uintptr_t)arg0]", event);
797 predicate_add(&pred, info->ev_predicate, NULL, 0);
798 if (info->ev_type != 'I')
799 predicate_add(&pred, g_predicate, NULL, 0);
801 predicate_add(&pred, g_ipredicate, NULL, 0);
804 if ((dur = g_min_duration[event]) != 0)
805 predicate_add(&pred, arg1, ">=", dur);
807 dprog_add("%s\n", info->ev_name);
810 dprog_add("/%s/\n", pred);
811 predicate_destroy(&pred);
816 dprog_add("\ttrace(%dULL);\n", event);
817 dprog_add("\ttrace(%s);\n", arg0);
818 dprog_add("\ttrace(%s);\n", caller);
822 * The ordering here is important: when we process the
823 * aggregate, we count on the fact that @avg appears before
824 * @hist in program order to assure that @avg is assigned the
825 * first aggregation variable ID and @hist assigned the
826 * second; see the comment in process_aggregate() for details.
828 dprog_add("\t@avg[%dULL, %s, %s%s] = avg(%s);\n",
829 event, arg0, caller, stack, arg1);
831 if (g_recsize >= LS_HIST) {
832 dprog_add("\t@hist[%dULL, %s, %s%s] = quantize"
833 "(%s);\n", event, arg0, caller, stack, arg1);
837 if (info->ev_acquire != NULL)
838 dprog_add("\tself->ev%d[arg0] = 0;\n", event);
847 dtrace_proginfo_t info;
850 (void) fprintf(stderr, "lockstat: vvvv D program vvvv\n");
851 (void) fputs(g_prog, stderr);
852 (void) fprintf(stderr, "lockstat: ^^^^ D program ^^^^\n");
855 if ((prog = dtrace_program_strcompile(g_dtp, g_prog,
856 DTRACE_PROBESPEC_NAME, 0, 0, NULL)) == NULL)
857 dfail("failed to compile program");
859 if (dtrace_program_exec(g_dtp, prog, &info) == -1)
860 dfail("failed to enable probes");
862 if (dtrace_go(g_dtp) != 0)
863 dfail("couldn't start tracing");
877 dtrace_optval_t val, status, agg;
878 struct sigaction act;
879 struct itimerspec ts;
883 if (dtrace_getopt(g_dtp, "statusrate", &status) == -1)
884 dfail("failed to get 'statusrate'");
886 if (dtrace_getopt(g_dtp, "aggrate", &agg) == -1)
887 dfail("failed to get 'statusrate'");
890 * We would want to awaken at a rate that is the GCD of the statusrate
891 * and the aggrate -- but that seems a bit absurd. Instead, we'll
892 * simply awaken at a rate that is the more frequent of the two, which
893 * assures that we're never later than the interval implied by the
894 * more frequent rate.
896 val = status < agg ? status : agg;
898 (void) sigemptyset(&act.sa_mask);
900 act.sa_handler = status_fire;
901 (void) sigaction(SIGUSR1, &act, NULL);
903 ev.sigev_notify = SIGEV_SIGNAL;
904 ev.sigev_signo = SIGUSR1;
906 if (timer_create(CLOCK_REALTIME, &ev, &tid) == -1)
907 dfail("cannot create CLOCK_REALTIME timer");
909 ts.it_value.tv_sec = val / NANOSEC;
910 ts.it_value.tv_nsec = val % NANOSEC;
911 ts.it_interval = ts.it_value;
913 if (timer_settime(tid, TIMER_RELTIME, &ts, NULL) == -1)
914 dfail("cannot set time on CLOCK_REALTIME timer");
920 if (!g_tracing && dtrace_aggregate_snap(g_dtp) != 0)
921 dfail("failed to snap aggregate");
923 if (dtrace_status(g_dtp) == -1)
924 dfail("dtrace_status()");
928 lsrec_fill(lsrec_t *lsrec, const dtrace_recdesc_t *rec, int nrecs, caddr_t data)
930 bzero(lsrec, g_recsize);
933 if ((g_recsize > LS_HIST && nrecs < 4) || (nrecs < 3))
934 fail(0, "truncated DTrace record");
936 if (rec->dtrd_size != sizeof (uint64_t))
937 fail(0, "bad event size in first record");
939 /* LINTED - alignment */
940 lsrec->ls_event = (uint32_t)*((uint64_t *)(data + rec->dtrd_offset));
944 if (rec->dtrd_size != sizeof (uintptr_t))
945 fail(0, "bad lock address size in second record");
947 /* LINTED - alignment */
948 lsrec->ls_lock = *((uintptr_t *)(data + rec->dtrd_offset));
951 lsrec->ls_lock = strdup((const char *)(data + rec->dtrd_offset));
955 if (rec->dtrd_size != sizeof (uintptr_t))
956 fail(0, "bad caller size in third record");
958 /* LINTED - alignment */
959 lsrec->ls_caller = *((uintptr_t *)(data + rec->dtrd_offset));
962 if (g_recsize > LS_HIST) {
966 frames = rec->dtrd_size / sizeof (pc_t);
967 /* LINTED - alignment */
968 stack = (pc_t *)(data + rec->dtrd_offset);
970 for (i = 1; i < frames; i++)
971 lsrec->ls_stack[i - 1] = stack[i];
977 count_aggregate(const dtrace_aggdata_t *agg, void *arg)
979 *((size_t *)arg) += 1;
981 return (DTRACE_AGGWALK_NEXT);
985 process_aggregate(const dtrace_aggdata_t *agg, void *arg)
987 const dtrace_aggdesc_t *aggdesc = agg->dtada_desc;
988 caddr_t data = agg->dtada_data;
989 lsdata_t *lsdata = arg;
990 lsrec_t *lsrec = lsdata->lsd_next;
991 const dtrace_recdesc_t *rec;
992 uint64_t *avg, *quantized;
995 assert(lsdata->lsd_count < g_nrecs);
998 * Aggregation variable IDs are guaranteed to be generated in program
999 * order, and they are guaranteed to start from DTRACE_AGGVARIDNONE
1000 * plus one. As "avg" appears before "hist" in program order, we know
1001 * that "avg" will be allocated the first aggregation variable ID, and
1002 * "hist" will be allocated the second aggregation variable ID -- and
1003 * we therefore use the aggregation variable ID to differentiate the
1006 if (aggdesc->dtagd_varid > DTRACE_AGGVARIDNONE + 1) {
1008 * If this is the histogram entry. We'll copy the quantized
1009 * data into lc_hist, and jump over the rest.
1011 rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];
1013 if (aggdesc->dtagd_varid != DTRACE_AGGVARIDNONE + 2)
1014 fail(0, "bad variable ID in aggregation record");
1016 if (rec->dtrd_size !=
1017 DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t))
1018 fail(0, "bad quantize size in aggregation record");
1020 /* LINTED - alignment */
1021 quantized = (uint64_t *)(data + rec->dtrd_offset);
1023 for (i = DTRACE_QUANTIZE_ZEROBUCKET, j = 0;
1024 i < DTRACE_QUANTIZE_NBUCKETS; i++, j++)
1025 lsrec->ls_hist[j] = quantized[i];
1030 lsrec_fill(lsrec, &aggdesc->dtagd_rec[1],
1031 aggdesc->dtagd_nrecs - 1, data);
1033 rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];
1035 if (rec->dtrd_size != 2 * sizeof (uint64_t))
1036 fail(0, "bad avg size in aggregation record");
1038 /* LINTED - alignment */
1039 avg = (uint64_t *)(data + rec->dtrd_offset);
1040 lsrec->ls_count = (uint32_t)avg[0];
1041 lsrec->ls_time = (uintptr_t)avg[1];
1043 if (g_recsize >= LS_HIST)
1044 return (DTRACE_AGGWALK_NEXT);
1047 lsdata->lsd_next = (lsrec_t *)((uintptr_t)lsrec + g_recsize);
1048 lsdata->lsd_count++;
1050 return (DTRACE_AGGWALK_NEXT);
1054 process_trace(const dtrace_probedata_t *pdata, void *arg)
1056 lsdata_t *lsdata = arg;
1057 lsrec_t *lsrec = lsdata->lsd_next;
1058 dtrace_eprobedesc_t *edesc = pdata->dtpda_edesc;
1059 caddr_t data = pdata->dtpda_data;
1061 if (lsdata->lsd_count >= g_nrecs)
1062 return (DTRACE_CONSUME_NEXT);
1064 lsrec_fill(lsrec, edesc->dtepd_rec, edesc->dtepd_nrecs, data);
1066 lsdata->lsd_next = (lsrec_t *)((uintptr_t)lsrec + g_recsize);
1067 lsdata->lsd_count++;
1069 return (DTRACE_CONSUME_NEXT);
1073 process_data(FILE *out, char *data)
1077 /* LINTED - alignment */
1078 lsdata.lsd_next = (lsrec_t *)data;
1079 lsdata.lsd_count = 0;
1082 if (dtrace_consume(g_dtp, out,
1083 process_trace, NULL, &lsdata) != 0)
1084 dfail("failed to consume buffer");
1086 return (lsdata.lsd_count);
1089 if (dtrace_aggregate_walk_keyvarsorted(g_dtp,
1090 process_aggregate, &lsdata) != 0)
1091 dfail("failed to walk aggregate");
1093 return (lsdata.lsd_count);
1098 drophandler(const dtrace_dropdata_t *data, void *arg)
1101 (void) fprintf(stderr, "lockstat: warning: %s", data->dtdda_msg);
1102 return (DTRACE_HANDLE_OK);
1106 main(int argc, char **argv)
1109 lsrec_t *lsp, **current, **first, **sort_buf, **merge_buf;
1116 char *addrp, *offp, *sizep, *evp, *lastp, *p;
1119 int events_specified = 0;
1122 char *filt = NULL, *ifilt = NULL;
1123 static uint64_t ev_count[LS_MAX_EVENTS + 1];
1124 static uint64_t ev_time[LS_MAX_EVENTS + 1];
1125 dtrace_optval_t aggsize;
1131 if ((g_dtp = dtrace_open(DTRACE_VERSION, 0, &err)) == NULL) {
1132 fail(0, "cannot open dtrace library: %s",
1133 dtrace_errmsg(NULL, err));
1136 if (dtrace_handle_drop(g_dtp, &drophandler, NULL) == -1)
1137 dfail("couldn't establish drop handler");
1139 if (symtab_init() == -1)
1140 fail(1, "can't load kernel symbols");
1142 g_nrecs = DEFAULT_NRECS;
1144 while ((c = getopt(argc, argv, LOCKSTAT_OPTSTR)) != GETOPT_EOF) {
1147 g_recsize = LS_BASIC;
1151 g_recsize = LS_TIME;
1155 g_recsize = LS_HIST;
1159 if (!isdigit(optarg[0]))
1161 g_stkdepth = atoi(optarg);
1162 if (g_stkdepth > LS_MAX_STACK_DEPTH)
1163 fail(0, "max stack depth is %d",
1164 LS_MAX_STACK_DEPTH);
1165 g_recsize = LS_STACK(g_stkdepth);
1169 if (!isdigit(optarg[0]))
1171 g_nrecs = atoi(optarg);
1175 if (!isdigit(optarg[0]))
1177 duration = atoll(optarg);
1180 * XXX -- durations really should be per event
1181 * since the units are different, but it's hard
1182 * to express this nicely in the interface.
1183 * Not clear yet what the cleanest solution is.
1185 for (i = 0; i < LS_MAX_EVENTS; i++)
1186 if (g_event_info[i].ev_type != 'E')
1187 g_min_duration[i] = duration;
1192 if (!isdigit(optarg[0]))
1198 fail(0, "max interrupt rate is %d Hz", MAX_HZ);
1200 for (j = 0; j < LS_MAX_EVENTS; j++)
1201 if (strcmp(g_event_info[j].ev_desc,
1202 "Profiling interrupt") == 0)
1205 (void) sprintf(g_event_info[j].ev_name,
1206 "profile:::profile-%d", i);
1211 addrp = strtok(optarg, ",");
1212 sizep = strtok(NULL, ",");
1213 addrp = strtok(optarg, ",+");
1214 offp = strtok(NULL, ",");
1216 size = sizep ? strtoul(sizep, NULL, 0) : 1;
1217 off = offp ? strtoul(offp, NULL, 0) : 0;
1219 if (addrp[0] == '0') {
1220 addr = strtoul(addrp, NULL, 16) + off;
1222 addr = sym_to_addr(addrp) + off;
1224 size = sym_size(addrp) - off;
1225 if (addr - off == 0)
1226 fail(0, "symbol '%s' not found", addrp);
1233 filter_add(&filt, "arg0", addr, size);
1235 filter_add(&filt, "caller", addr, size);
1236 filter_add(&ifilt, "arg0", addr, size);
1241 evp = strtok_r(optarg, ",", &lastp);
1246 (void) strtok(evp, "-");
1247 evp2 = strtok(NULL, "-");
1249 ev2 = evp2 ? atoi(evp2) : ev1;
1250 if ((uint_t)ev1 >= LS_MAX_EVENTS ||
1251 (uint_t)ev2 >= LS_MAX_EVENTS || ev1 > ev2)
1252 fail(0, "-e events out of range");
1253 for (i = ev1; i <= ev2; i++)
1255 evp = strtok_r(NULL, ",", &lastp);
1257 events_specified = 1;
1286 for (i = 0; i < LS_MAX_EVENTS; i++)
1287 if (g_event_info[i].ev_type == c)
1289 events_specified = 1;
1293 for (i = 0; i < LS_MAX_EVENTS; i++)
1294 if (strchr("CH", g_event_info[i].ev_type))
1296 events_specified = 1;
1304 if (!isdigit(optarg[0]))
1306 g_topn = atoi(optarg);
1322 if ((out = fopen(optarg, "w")) == NULL)
1323 fail(1, "error opening file");
1331 if (strchr(LOCKSTAT_OPTSTR, c) == NULL)
1337 predicate_add(&g_predicate, filt, NULL, 0);
1338 filter_destroy(&filt);
1341 if (ifilt != NULL) {
1342 predicate_add(&g_ipredicate, ifilt, NULL, 0);
1343 filter_destroy(&ifilt);
1346 if (g_recsize == 0) {
1348 g_stkdepth = LS_MAX_STACK_DEPTH;
1349 g_recsize = LS_STACK(g_stkdepth);
1351 g_recsize = LS_TIME;
1355 if (g_gflag && g_recsize <= LS_STACK(0))
1356 fail(0, "'-g' requires at least '-s 1' data gathering");
1359 * Make sure the alignment is reasonable
1361 g_recsize = -(-g_recsize & -sizeof (uint64_t));
1363 for (i = 0; i < LS_MAX_EVENTS; i++) {
1365 * If no events were specified, enable -C.
1367 if (!events_specified && g_event_info[i].ev_type == 'C')
1371 for (i = 0; i < LS_MAX_EVENTS; i++) {
1375 if (g_event_info[i].ev_acquire != NULL) {
1377 * If we've enabled a hold event, we must explicitly
1378 * allocate dynamic variable space.
1387 * Make sure there are remaining arguments to specify a child command
1393 if ((ncpus = sysconf(_SC_NPROCESSORS_ONLN)) == -1)
1394 dfail("couldn't determine number of online CPUs");
1397 * By default, we set our data buffer size to be the number of records
1398 * multiplied by the size of the record, doubled to account for some
1399 * DTrace slop and divided by the number of CPUs. We silently clamp
1400 * the aggregation size at both a minimum and a maximum to prevent
1401 * absurdly low or high values.
1403 if ((aggsize = (g_nrecs * g_recsize * 2) / ncpus) < MIN_AGGSIZE)
1404 aggsize = MIN_AGGSIZE;
1406 if (aggsize > MAX_AGGSIZE)
1407 aggsize = MAX_AGGSIZE;
1409 (void) sprintf(aggstr, "%lld", (long long)aggsize);
1412 if (dtrace_setopt(g_dtp, "bufsize", "4k") == -1)
1413 dfail("failed to set 'bufsize'");
1415 if (dtrace_setopt(g_dtp, "aggsize", aggstr) == -1)
1416 dfail("failed to set 'aggsize'");
1420 * If we're using dynamic variables, we set our
1421 * dynamic variable size to be one megabyte per CPU,
1422 * with a hard-limit of 32 megabytes. This may still
1423 * be too small in some cases, but it can be tuned
1424 * manually via -x if need be.
1426 (void) sprintf(aggstr, "%ldm", ncpus < 32 ? ncpus : 32);
1428 if (dtrace_setopt(g_dtp, "dynvarsize", aggstr) == -1)
1429 dfail("failed to set 'dynvarsize'");
1432 if (dtrace_setopt(g_dtp, "bufsize", aggstr) == -1)
1433 dfail("failed to set 'bufsize'");
1436 if (dtrace_setopt(g_dtp, "statusrate", "10sec") == -1)
1437 dfail("failed to set 'statusrate'");
1440 while ((c = getopt(argc, argv, LOCKSTAT_OPTSTR)) != GETOPT_EOF) {
1443 if ((p = strchr(optarg, '=')) != NULL)
1446 if (dtrace_setopt(g_dtp, optarg, p) != 0)
1447 dfail("failed to set -x %s", optarg);
1458 g_elapsed = -gethrtime();
1461 * Spawn the specified command and wait for it to complete.
1465 fail(1, "cannot fork");
1467 (void) dtrace_close(g_dtp);
1468 (void) execvp(argv[0], &argv[0]);
1474 while (waitpid(child, &status, WEXITED) != child)
1476 while (waitpid(child, &status, 0) != child)
1480 g_elapsed += gethrtime();
1482 if (WIFEXITED(status)) {
1483 if (WEXITSTATUS(status) != 0) {
1484 if (exec_errno != 0) {
1486 fail(1, "could not execute %s", argv[0]);
1488 (void) fprintf(stderr,
1489 "lockstat: warning: %s exited with code %d\n",
1490 argv[0], WEXITSTATUS(status));
1493 (void) fprintf(stderr,
1494 "lockstat: warning: %s died on signal %d\n",
1495 argv[0], WTERMSIG(status));
1498 if (dtrace_stop(g_dtp) == -1)
1499 dfail("failed to stop dtrace");
1502 * Before we read out the results, we need to allocate our buffer.
1503 * If we're tracing, then we'll just use the precalculated size. If
1504 * we're not, then we'll take a snapshot of the aggregate, and walk
1505 * it to count the number of records.
1508 if (dtrace_aggregate_snap(g_dtp) != 0)
1509 dfail("failed to snap aggregate");
1513 if (dtrace_aggregate_walk(g_dtp,
1514 count_aggregate, &g_nrecs) != 0)
1515 dfail("failed to walk aggregate");
1519 if ((data_buf = memalign(sizeof (uint64_t),
1520 (g_nrecs + 1) * g_recsize)) == NULL)
1522 if (posix_memalign((void **)&data_buf, sizeof (uint64_t),
1523 (g_nrecs + 1) * g_recsize) )
1525 fail(1, "Memory allocation failed");
1528 * Read out the DTrace data.
1530 g_nrecs_used = process_data(out, data_buf);
1532 if (g_nrecs_used > g_nrecs || g_dropped)
1533 (void) fprintf(stderr, "lockstat: warning: "
1534 "ran out of data records (use -n for more)\n");
1536 /* LINTED - alignment */
1537 for (i = 0, lsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
1538 /* LINTED - alignment */
1539 lsp = (lsrec_t *)((char *)lsp + g_recsize)) {
1540 ev_count[lsp->ls_event] += lsp->ls_count;
1541 ev_time[lsp->ls_event] += lsp->ls_time;
1545 * If -g was specified, convert stacks into individual records.
1548 lsrec_t *newlsp, *oldlsp;
1551 newlsp = memalign(sizeof (uint64_t),
1552 g_nrecs_used * LS_TIME * (g_stkdepth + 1));
1554 posix_memalign((void **)&newlsp, sizeof (uint64_t),
1555 g_nrecs_used * LS_TIME * (g_stkdepth + 1));
1558 fail(1, "Cannot allocate space for -g processing");
1560 /* LINTED - alignment */
1561 for (i = 0, oldlsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
1562 /* LINTED - alignment */
1563 oldlsp = (lsrec_t *)((char *)oldlsp + g_recsize)) {
1565 int caller_in_stack = 0;
1567 if (oldlsp->ls_count == 0)
1570 for (fr = 0; fr < g_stkdepth; fr++) {
1571 if (oldlsp->ls_stack[fr] == 0)
1573 if (oldlsp->ls_stack[fr] == oldlsp->ls_caller)
1574 caller_in_stack = 1;
1575 bcopy(oldlsp, lsp, LS_TIME);
1576 lsp->ls_caller = oldlsp->ls_stack[fr];
1578 lsp->ls_lock = strdup(oldlsp->ls_lock);
1580 /* LINTED - alignment */
1581 lsp = (lsrec_t *)((char *)lsp + LS_TIME);
1583 if (!caller_in_stack) {
1584 bcopy(oldlsp, lsp, LS_TIME);
1585 /* LINTED - alignment */
1586 lsp = (lsrec_t *)((char *)lsp + LS_TIME);
1589 free(oldlsp->ls_lock);
1592 g_nrecs = g_nrecs_used =
1593 ((uintptr_t)lsp - (uintptr_t)newlsp) / LS_TIME;
1594 g_recsize = LS_TIME;
1597 data_buf = (char *)newlsp;
1600 if ((sort_buf = calloc(2 * (g_nrecs + 1),
1601 sizeof (void *))) == NULL)
1602 fail(1, "Sort buffer allocation failed");
1603 merge_buf = sort_buf + (g_nrecs + 1);
1606 * Build the sort buffer, discarding zero-count records along the way.
1608 /* LINTED - alignment */
1609 for (i = 0, lsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
1610 /* LINTED - alignment */
1611 lsp = (lsrec_t *)((char *)lsp + g_recsize)) {
1612 if (lsp->ls_count == 0)
1613 lsp->ls_event = LS_MAX_EVENTS;
1617 if (g_nrecs_used == 0)
1621 * Add a sentinel after the last record
1624 lsp->ls_event = LS_MAX_EVENTS;
1627 report_trace(out, sort_buf);
1632 * Application of -g may have resulted in multiple records
1633 * with the same signature; coalesce them.
1636 mergesort(lockcmp, sort_buf, merge_buf, g_nrecs_used);
1637 coalesce(lockcmp, sort_buf, g_nrecs_used);
1641 * Coalesce locks within the same symbol if -c option specified.
1642 * Coalesce PCs within the same function if -k option specified.
1644 if (g_cflag || g_kflag) {
1645 for (i = 0; i < g_nrecs_used; i++) {
1650 coalesce_symbol(&lsp->ls_lock);
1653 for (fr = 0; fr < g_stkdepth; fr++)
1654 coalesce_symbol(&lsp->ls_stack[fr]);
1655 coalesce_symbol(&lsp->ls_caller);
1658 mergesort(lockcmp, sort_buf, merge_buf, g_nrecs_used);
1659 coalesce(lockcmp, sort_buf, g_nrecs_used);
1663 * Coalesce callers if -w option specified
1666 mergesort(lock_and_count_cmp_anywhere,
1667 sort_buf, merge_buf, g_nrecs_used);
1668 coalesce(lockcmp_anywhere, sort_buf, g_nrecs_used);
1672 * Coalesce locks if -W option specified
1675 mergesort(site_and_count_cmp_anylock,
1676 sort_buf, merge_buf, g_nrecs_used);
1677 coalesce(sitecmp_anylock, sort_buf, g_nrecs_used);
1681 * Sort data by contention count (ls_count) or total time (ls_time),
1682 * depending on g_Pflag. Override g_Pflag if time wasn't measured.
1684 if (g_recsize < LS_TIME)
1688 mergesort(timecmp, sort_buf, merge_buf, g_nrecs_used);
1690 mergesort(countcmp, sort_buf, merge_buf, g_nrecs_used);
1693 * Display data by event type
1695 first = &sort_buf[0];
1696 while ((event = (*first)->ls_event) < LS_MAX_EVENTS) {
1698 while ((lsp = *current)->ls_event == event)
1700 report_stats(out, first, current - first, ev_count[event],
1707 * Free lock name buffers
1709 for (i = 0, lsp = (lsrec_t *)data_buf; i < g_nrecs_used; i++,
1710 lsp = (lsrec_t *)((char *)lsp + g_recsize))
1718 format_symbol(char *buf, uintptr_t addr, int show_size)
1724 symname = addr_to_sym(addr, &symoff, &symsize);
1726 if (show_size && symoff == 0)
1727 (void) sprintf(buf, "%s[%ld]", symname, (long)symsize);
1728 else if (symoff == 0)
1729 (void) sprintf(buf, "%s", symname);
1730 else if (symoff < 16 && bcmp(symname, "cpu[", 4) == 0) /* CPU+PIL */
1732 (void) sprintf(buf, "%s+%ld", symname, (long)symoff);
1734 (void) sprintf(buf, "%s+%s", symname, g_pri_class[(int)symoff]);
1736 else if (symoff <= symsize || (symoff < 256 && addr != symoff))
1737 (void) sprintf(buf, "%s+0x%llx", symname,
1738 (unsigned long long)symoff);
1740 (void) sprintf(buf, "0x%llx", (unsigned long long)addr);
1745 report_stats(FILE *out, lsrec_t **sort_buf, size_t nrecs, uint64_t total_count,
1746 uint64_t total_time)
1748 uint32_t event = sort_buf[0]->ls_event;
1750 double ptotal = 0.0;
1754 int first_bin, last_bin, max_bin_count, total_bin_count;
1757 char lhdr[80], chdr[80];
1759 rectype = g_recsize;
1762 (void) fprintf(out, "%20llu %s\n",
1763 g_rates == 0 ? total_count :
1764 ((unsigned long long)total_count * NANOSEC) / g_elapsed,
1765 g_event_info[event].ev_desc);
1769 (void) sprintf(lhdr, "%s%s",
1770 g_Wflag ? "Hottest " : "", g_event_info[event].ev_lhdr);
1771 (void) sprintf(chdr, "%s%s",
1772 g_wflag ? "Hottest " : "", "Caller");
1776 "\n%s: %.0f events in %.3f seconds (%.0f events/sec)\n\n",
1777 g_event_info[event].ev_desc, (double)total_count,
1778 (double)g_elapsed / NANOSEC,
1779 (double)total_count * NANOSEC / g_elapsed);
1781 if (!g_pflag && rectype < LS_HIST) {
1782 (void) sprintf(buf, "%s", g_event_info[event].ev_units);
1783 (void) fprintf(out, "%5s %4s %4s %4s %8s %-22s %-24s\n",
1784 g_rates ? "ops/s" : "Count",
1785 g_gflag ? "genr" : "indv",
1786 "cuml", "rcnt", rectype >= LS_TIME ? buf : "", lhdr, chdr);
1787 (void) fprintf(out, "---------------------------------"
1788 "----------------------------------------------\n");
1792 for (i = 0; i < nrecs; i++) {
1795 if (displayed++ >= g_topn)
1801 (void) fprintf(out, "%u %u",
1802 lsp->ls_event, lsp->ls_count);
1804 (void) fprintf(out, " %s",
1805 format_symbol(buf, lsp->ls_lock, g_cflag));
1807 (void) fprintf(out, " %s", lsp->ls_lock);
1809 (void) fprintf(out, " %s",
1810 format_symbol(buf, lsp->ls_caller, 0));
1811 (void) fprintf(out, " %f",
1812 (double)lsp->ls_refcnt / lsp->ls_count);
1813 if (rectype >= LS_TIME)
1814 (void) fprintf(out, " %llu",
1815 (unsigned long long)lsp->ls_time);
1816 if (rectype >= LS_HIST) {
1817 for (j = 0; j < 64; j++)
1818 (void) fprintf(out, " %u",
1821 for (j = 0; j < LS_MAX_STACK_DEPTH; j++) {
1822 if (rectype <= LS_STACK(j) ||
1823 lsp->ls_stack[j] == 0)
1825 (void) fprintf(out, " %s",
1826 format_symbol(buf, lsp->ls_stack[j], 0));
1828 (void) fprintf(out, "\n");
1832 if (rectype >= LS_HIST) {
1833 (void) fprintf(out, "---------------------------------"
1834 "----------------------------------------------\n");
1835 (void) sprintf(buf, "%s",
1836 g_event_info[event].ev_units);
1837 (void) fprintf(out, "%5s %4s %4s %4s %8s %-22s %-24s\n",
1838 g_rates ? "ops/s" : "Count",
1839 g_gflag ? "genr" : "indv",
1840 "cuml", "rcnt", buf, lhdr, chdr);
1843 if (g_Pflag && total_time != 0)
1844 percent = (lsp->ls_time * 100.00) / total_time;
1846 percent = (lsp->ls_count * 100.00) / total_count;
1850 if (rectype >= LS_TIME)
1851 (void) sprintf(buf, "%llu",
1852 (unsigned long long)(lsp->ls_time / lsp->ls_count));
1856 (void) fprintf(out, "%5llu ",
1857 g_rates == 0 ? lsp->ls_count :
1858 ((uint64_t)lsp->ls_count * NANOSEC) / g_elapsed);
1860 (void) fprintf(out, "%3.0f%% ", percent);
1863 (void) fprintf(out, "---- ");
1865 (void) fprintf(out, "%3.0f%% ", ptotal);
1867 (void) fprintf(out, "%4.2f %8s ",
1868 (double)lsp->ls_refcnt / lsp->ls_count, buf);
1871 (void) fprintf(out, "%-22s ",
1872 format_symbol(buf, lsp->ls_lock, g_cflag));
1874 (void) fprintf(out, "%-22s ", lsp->ls_lock);
1877 (void) fprintf(out, "%-24s\n",
1878 format_symbol(buf, lsp->ls_caller, 0));
1880 if (rectype < LS_HIST)
1883 (void) fprintf(out, "\n");
1884 (void) fprintf(out, "%10s %31s %-9s %-24s\n",
1885 g_event_info[event].ev_units,
1886 "------ Time Distribution ------",
1887 g_rates ? "ops/s" : "count",
1888 rectype > LS_STACK(0) ? "Stack" : "");
1891 while (lsp->ls_hist[first_bin] == 0)
1895 while (lsp->ls_hist[last_bin] == 0)
1899 total_bin_count = 0;
1900 for (j = first_bin; j <= last_bin; j++) {
1901 total_bin_count += lsp->ls_hist[j];
1902 if (lsp->ls_hist[j] > max_bin_count)
1903 max_bin_count = lsp->ls_hist[j];
1907 * If we went a few frames below the caller, ignore them
1909 for (fr = 3; fr > 0; fr--)
1910 if (lsp->ls_stack[fr] == lsp->ls_caller)
1913 for (j = first_bin; j <= last_bin; j++) {
1914 uint_t depth = (lsp->ls_hist[j] * 30) / total_bin_count;
1915 (void) fprintf(out, "%10llu |%s%s %-9u ",
1917 "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@" + 30 - depth,
1919 g_rates == 0 ? lsp->ls_hist[j] :
1920 (uint_t)(((uint64_t)lsp->ls_hist[j] * NANOSEC) /
1922 if (rectype <= LS_STACK(fr) || lsp->ls_stack[fr] == 0) {
1923 (void) fprintf(out, "\n");
1926 (void) fprintf(out, "%-24s\n",
1927 format_symbol(buf, lsp->ls_stack[fr], 0));
1930 while (rectype > LS_STACK(fr) && lsp->ls_stack[fr] != 0) {
1931 (void) fprintf(out, "%15s %-36s %-24s\n", "", "",
1932 format_symbol(buf, lsp->ls_stack[fr], 0));
1938 (void) fprintf(out, "---------------------------------"
1939 "----------------------------------------------\n");
1945 report_trace(FILE *out, lsrec_t **sort_buf)
1950 char buf[256], buf2[256];
1952 rectype = g_recsize;
1955 (void) fprintf(out, "%5s %7s %11s %-24s %-24s\n",
1956 "Event", "Time", "Owner", "Lock", "Caller");
1957 (void) fprintf(out, "---------------------------------"
1958 "----------------------------------------------\n");
1961 for (i = 0; i < g_nrecs_used; i++) {
1965 if (lsp->ls_event >= LS_MAX_EVENTS || lsp->ls_count == 0)
1968 (void) fprintf(out, "%2d %10llu %11p %-24s %-24s\n",
1969 lsp->ls_event, (unsigned long long)lsp->ls_time,
1970 (void *)lsp->ls_next,
1972 format_symbol(buf, lsp->ls_lock, 0),
1976 format_symbol(buf2, lsp->ls_caller, 0));
1978 if (rectype <= LS_STACK(0))
1982 * If we went a few frames below the caller, ignore them
1984 for (fr = 3; fr > 0; fr--)
1985 if (lsp->ls_stack[fr] == lsp->ls_caller)
1988 while (rectype > LS_STACK(fr) && lsp->ls_stack[fr] != 0) {
1989 (void) fprintf(out, "%53s %-24s\n", "",
1990 format_symbol(buf, lsp->ls_stack[fr], 0));
1993 (void) fprintf(out, "\n");