]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - cddl/contrib/opensolaris/lib/libdtrace/common/dt_consume.c
dtrace: Add the 'oformat' libdtrace option
[FreeBSD/FreeBSD.git] / cddl / contrib / opensolaris / lib / libdtrace / common / dt_consume.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25
26 /*
27  * Copyright (c) 2023, Domagoj Stolfa. All rights reserved.
28  * Copyright (c) 2017, Joyent, Inc. All rights reserved.
29  * Copyright (c) 2012 by Delphix. All rights reserved.
30  */
31
32 #include <stdlib.h>
33 #include <strings.h>
34 #include <errno.h>
35 #include <unistd.h>
36 #include <limits.h>
37 #include <assert.h>
38 #include <ctype.h>
39 #ifdef illumos
40 #include <alloca.h>
41 #endif
42 #include <dt_impl.h>
43 #include <dt_pq.h>
44 #include <dt_oformat.h>
45 #ifndef illumos
46 #include <libproc_compat.h>
47 #endif
48
49 #define DT_MASK_LO 0x00000000FFFFFFFFULL
50
51 #define dt_format_sym(dtp, addr) dt_print_sym((dtp), NULL, NULL, addr)
52
53 typedef struct dt_prepare_args {
54         int first_bin;
55         int last_bin;
56         union {
57                 struct lquantize_args {
58 #define lquantize_step          u.lquantize.step
59 #define lquantize_levels        u.lquantize.levels
60 #define lquantize_base          u.lquantize.base
61                         int base;
62                         uint16_t step;
63                         uint16_t levels;
64                 } lquantize;
65                 struct llquantize_args {
66 #define llquantize_next         u.llquantize.next
67 #define llquantize_step         u.llquantize.step
68 #define llquantize_value        u.llquantize.value
69 #define llquantize_levels       u.llquantize.levels
70 #define llquantize_order        u.llquantize.order
71 #define llquantize_factor       u.llquantize.factor
72 #define llquantize_low          u.llquantize.low
73 #define llquantize_high         u.llquantize.high
74 #define llquantize_nsteps       u.llquantize.nsteps
75                         int64_t next;
76                         int64_t step;
77                         int64_t value;
78                         int levels;
79                         int order;
80                         uint16_t factor;
81                         uint16_t low;
82                         uint16_t high;
83                         uint16_t nsteps;
84                 } llquantize;
85         } u;
86 } dt_prepare_args_t;
87
88 /*
89  * We declare this here because (1) we need it and (2) we want to avoid a
90  * dependency on libm in libdtrace.
91  */
92 static long double
93 dt_fabsl(long double x)
94 {
95         if (x < 0)
96                 return (-x);
97
98         return (x);
99 }
100
101 static int
102 dt_ndigits(long long val)
103 {
104         int rval = 1;
105         long long cmp = 10;
106
107         if (val < 0) {
108                 val = val == INT64_MIN ? INT64_MAX : -val;
109                 rval++;
110         }
111
112         while (val > cmp && cmp > 0) {
113                 rval++;
114                 cmp *= 10;
115         }
116
117         return (rval < 4 ? 4 : rval);
118 }
119
120 /*
121  * 128-bit arithmetic functions needed to support the stddev() aggregating
122  * action.
123  */
124 static int
125 dt_gt_128(uint64_t *a, uint64_t *b)
126 {
127         return (a[1] > b[1] || (a[1] == b[1] && a[0] > b[0]));
128 }
129
130 static int
131 dt_ge_128(uint64_t *a, uint64_t *b)
132 {
133         return (a[1] > b[1] || (a[1] == b[1] && a[0] >= b[0]));
134 }
135
136 static int
137 dt_le_128(uint64_t *a, uint64_t *b)
138 {
139         return (a[1] < b[1] || (a[1] == b[1] && a[0] <= b[0]));
140 }
141
142 /*
143  * Shift the 128-bit value in a by b. If b is positive, shift left.
144  * If b is negative, shift right.
145  */
146 static void
147 dt_shift_128(uint64_t *a, int b)
148 {
149         uint64_t mask;
150
151         if (b == 0)
152                 return;
153
154         if (b < 0) {
155                 b = -b;
156                 if (b >= 64) {
157                         a[0] = a[1] >> (b - 64);
158                         a[1] = 0;
159                 } else {
160                         a[0] >>= b;
161                         mask = 1LL << (64 - b);
162                         mask -= 1;
163                         a[0] |= ((a[1] & mask) << (64 - b));
164                         a[1] >>= b;
165                 }
166         } else {
167                 if (b >= 64) {
168                         a[1] = a[0] << (b - 64);
169                         a[0] = 0;
170                 } else {
171                         a[1] <<= b;
172                         mask = a[0] >> (64 - b);
173                         a[1] |= mask;
174                         a[0] <<= b;
175                 }
176         }
177 }
178
179 static int
180 dt_nbits_128(uint64_t *a)
181 {
182         int nbits = 0;
183         uint64_t tmp[2];
184         uint64_t zero[2] = { 0, 0 };
185
186         tmp[0] = a[0];
187         tmp[1] = a[1];
188
189         dt_shift_128(tmp, -1);
190         while (dt_gt_128(tmp, zero)) {
191                 dt_shift_128(tmp, -1);
192                 nbits++;
193         }
194
195         return (nbits);
196 }
197
198 static void
199 dt_subtract_128(uint64_t *minuend, uint64_t *subtrahend, uint64_t *difference)
200 {
201         uint64_t result[2];
202
203         result[0] = minuend[0] - subtrahend[0];
204         result[1] = minuend[1] - subtrahend[1] -
205             (minuend[0] < subtrahend[0] ? 1 : 0);
206
207         difference[0] = result[0];
208         difference[1] = result[1];
209 }
210
211 static void
212 dt_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)
213 {
214         uint64_t result[2];
215
216         result[0] = addend1[0] + addend2[0];
217         result[1] = addend1[1] + addend2[1] +
218             (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);
219
220         sum[0] = result[0];
221         sum[1] = result[1];
222 }
223
224 /*
225  * The basic idea is to break the 2 64-bit values into 4 32-bit values,
226  * use native multiplication on those, and then re-combine into the
227  * resulting 128-bit value.
228  *
229  * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
230  *     hi1 * hi2 << 64 +
231  *     hi1 * lo2 << 32 +
232  *     hi2 * lo1 << 32 +
233  *     lo1 * lo2
234  */
235 static void
236 dt_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product)
237 {
238         uint64_t hi1, hi2, lo1, lo2;
239         uint64_t tmp[2];
240
241         hi1 = factor1 >> 32;
242         hi2 = factor2 >> 32;
243
244         lo1 = factor1 & DT_MASK_LO;
245         lo2 = factor2 & DT_MASK_LO;
246
247         product[0] = lo1 * lo2;
248         product[1] = hi1 * hi2;
249
250         tmp[0] = hi1 * lo2;
251         tmp[1] = 0;
252         dt_shift_128(tmp, 32);
253         dt_add_128(product, tmp, product);
254
255         tmp[0] = hi2 * lo1;
256         tmp[1] = 0;
257         dt_shift_128(tmp, 32);
258         dt_add_128(product, tmp, product);
259 }
260
261 /*
262  * This is long-hand division.
263  *
264  * We initialize subtrahend by shifting divisor left as far as possible. We
265  * loop, comparing subtrahend to dividend:  if subtrahend is smaller, we
266  * subtract and set the appropriate bit in the result.  We then shift
267  * subtrahend right by one bit for the next comparison.
268  */
269 static void
270 dt_divide_128(uint64_t *dividend, uint64_t divisor, uint64_t *quotient)
271 {
272         uint64_t result[2] = { 0, 0 };
273         uint64_t remainder[2];
274         uint64_t subtrahend[2];
275         uint64_t divisor_128[2];
276         uint64_t mask[2] = { 1, 0 };
277         int log = 0;
278
279         assert(divisor != 0);
280
281         divisor_128[0] = divisor;
282         divisor_128[1] = 0;
283
284         remainder[0] = dividend[0];
285         remainder[1] = dividend[1];
286
287         subtrahend[0] = divisor;
288         subtrahend[1] = 0;
289
290         while (divisor > 0) {
291                 log++;
292                 divisor >>= 1;
293         }
294
295         dt_shift_128(subtrahend, 128 - log);
296         dt_shift_128(mask, 128 - log);
297
298         while (dt_ge_128(remainder, divisor_128)) {
299                 if (dt_ge_128(remainder, subtrahend)) {
300                         dt_subtract_128(remainder, subtrahend, remainder);
301                         result[0] |= mask[0];
302                         result[1] |= mask[1];
303                 }
304
305                 dt_shift_128(subtrahend, -1);
306                 dt_shift_128(mask, -1);
307         }
308
309         quotient[0] = result[0];
310         quotient[1] = result[1];
311 }
312
313 /*
314  * This is the long-hand method of calculating a square root.
315  * The algorithm is as follows:
316  *
317  * 1. Group the digits by 2 from the right.
318  * 2. Over the leftmost group, find the largest single-digit number
319  *    whose square is less than that group.
320  * 3. Subtract the result of the previous step (2 or 4, depending) and
321  *    bring down the next two-digit group.
322  * 4. For the result R we have so far, find the largest single-digit number
323  *    x such that 2 * R * 10 * x + x^2 is less than the result from step 3.
324  *    (Note that this is doubling R and performing a decimal left-shift by 1
325  *    and searching for the appropriate decimal to fill the one's place.)
326  *    The value x is the next digit in the square root.
327  * Repeat steps 3 and 4 until the desired precision is reached.  (We're
328  * dealing with integers, so the above is sufficient.)
329  *
330  * In decimal, the square root of 582,734 would be calculated as so:
331  *
332  *     __7__6__3
333  *    | 58 27 34
334  *     -49       (7^2 == 49 => 7 is the first digit in the square root)
335  *      --
336  *       9 27    (Subtract and bring down the next group.)
337  * 146   8 76    (2 * 7 * 10 * 6 + 6^2 == 876 => 6 is the next digit in
338  *      -----     the square root)
339  *         51 34 (Subtract and bring down the next group.)
340  * 1523    45 69 (2 * 76 * 10 * 3 + 3^2 == 4569 => 3 is the next digit in
341  *         -----  the square root)
342  *          5 65 (remainder)
343  *
344  * The above algorithm applies similarly in binary, but note that the
345  * only possible non-zero value for x in step 4 is 1, so step 4 becomes a
346  * simple decision: is 2 * R * 2 * 1 + 1^2 (aka R << 2 + 1) less than the
347  * preceding difference?
348  *
349  * In binary, the square root of 11011011 would be calculated as so:
350  *
351  *     __1__1__1__0
352  *    | 11 01 10 11
353  *      01          (0 << 2 + 1 == 1 < 11 => this bit is 1)
354  *      --
355  *      10 01 10 11
356  * 101   1 01       (1 << 2 + 1 == 101 < 1001 => next bit is 1)
357  *      -----
358  *       1 00 10 11
359  * 1101    11 01    (11 << 2 + 1 == 1101 < 10010 => next bit is 1)
360  *       -------
361  *          1 01 11
362  * 11101    1 11 01 (111 << 2 + 1 == 11101 > 10111 => last bit is 0)
363  *
364  */
365 static uint64_t
366 dt_sqrt_128(uint64_t *square)
367 {
368         uint64_t result[2] = { 0, 0 };
369         uint64_t diff[2] = { 0, 0 };
370         uint64_t one[2] = { 1, 0 };
371         uint64_t next_pair[2];
372         uint64_t next_try[2];
373         uint64_t bit_pairs, pair_shift;
374         int i;
375
376         bit_pairs = dt_nbits_128(square) / 2;
377         pair_shift = bit_pairs * 2;
378
379         for (i = 0; i <= bit_pairs; i++) {
380                 /*
381                  * Bring down the next pair of bits.
382                  */
383                 next_pair[0] = square[0];
384                 next_pair[1] = square[1];
385                 dt_shift_128(next_pair, -pair_shift);
386                 next_pair[0] &= 0x3;
387                 next_pair[1] = 0;
388
389                 dt_shift_128(diff, 2);
390                 dt_add_128(diff, next_pair, diff);
391
392                 /*
393                  * next_try = R << 2 + 1
394                  */
395                 next_try[0] = result[0];
396                 next_try[1] = result[1];
397                 dt_shift_128(next_try, 2);
398                 dt_add_128(next_try, one, next_try);
399
400                 if (dt_le_128(next_try, diff)) {
401                         dt_subtract_128(diff, next_try, diff);
402                         dt_shift_128(result, 1);
403                         dt_add_128(result, one, result);
404                 } else {
405                         dt_shift_128(result, 1);
406                 }
407
408                 pair_shift -= 2;
409         }
410
411         assert(result[1] == 0);
412
413         return (result[0]);
414 }
415
416 uint64_t
417 dt_stddev(uint64_t *data, uint64_t normal)
418 {
419         uint64_t avg_of_squares[2];
420         uint64_t square_of_avg[2];
421         int64_t norm_avg;
422         uint64_t diff[2];
423
424         if (data[0] == 0)
425                 return (0);
426
427         /*
428          * The standard approximation for standard deviation is
429          * sqrt(average(x**2) - average(x)**2), i.e. the square root
430          * of the average of the squares minus the square of the average.
431          * When normalizing, we should divide the sum of x**2 by normal**2.
432          */
433         dt_divide_128(data + 2, normal, avg_of_squares);
434         dt_divide_128(avg_of_squares, normal, avg_of_squares);
435         dt_divide_128(avg_of_squares, data[0], avg_of_squares);
436
437         norm_avg = (int64_t)data[1] / (int64_t)normal / (int64_t)data[0];
438
439         if (norm_avg < 0)
440                 norm_avg = -norm_avg;
441
442         dt_multiply_128((uint64_t)norm_avg, (uint64_t)norm_avg, square_of_avg);
443
444         dt_subtract_128(avg_of_squares, square_of_avg, diff);
445
446         return (dt_sqrt_128(diff));
447 }
448
449 static int
450 dt_flowindent(dtrace_hdl_t *dtp, dtrace_probedata_t *data, dtrace_epid_t last,
451     dtrace_bufdesc_t *buf, size_t offs)
452 {
453         dtrace_probedesc_t *pd = data->dtpda_pdesc, *npd;
454         dtrace_eprobedesc_t *epd = data->dtpda_edesc, *nepd;
455         char *p = pd->dtpd_provider, *n = pd->dtpd_name, *sub;
456         dtrace_flowkind_t flow = DTRACEFLOW_NONE;
457         const char *str = NULL;
458         static const char *e_str[2] = { " -> ", " => " };
459         static const char *r_str[2] = { " <- ", " <= " };
460         static const char *ent = "entry", *ret = "return";
461         static int entlen = 0, retlen = 0;
462         dtrace_epid_t next, id = epd->dtepd_epid;
463         int rval;
464
465         if (entlen == 0) {
466                 assert(retlen == 0);
467                 entlen = strlen(ent);
468                 retlen = strlen(ret);
469         }
470
471         /*
472          * If the name of the probe is "entry" or ends with "-entry", we
473          * treat it as an entry; if it is "return" or ends with "-return",
474          * we treat it as a return.  (This allows application-provided probes
475          * like "method-entry" or "function-entry" to participate in flow
476          * indentation -- without accidentally misinterpreting popular probe
477          * names like "carpentry", "gentry" or "Coventry".)
478          */
479         if ((sub = strstr(n, ent)) != NULL && sub[entlen] == '\0' &&
480             (sub == n || sub[-1] == '-')) {
481                 flow = DTRACEFLOW_ENTRY;
482                 str = e_str[strcmp(p, "syscall") == 0];
483         } else if ((sub = strstr(n, ret)) != NULL && sub[retlen] == '\0' &&
484             (sub == n || sub[-1] == '-')) {
485                 flow = DTRACEFLOW_RETURN;
486                 str = r_str[strcmp(p, "syscall") == 0];
487         }
488
489         /*
490          * If we're going to indent this, we need to check the ID of our last
491          * call.  If we're looking at the same probe ID but a different EPID,
492          * we _don't_ want to indent.  (Yes, there are some minor holes in
493          * this scheme -- it's a heuristic.)
494          */
495         if (flow == DTRACEFLOW_ENTRY) {
496                 if ((last != DTRACE_EPIDNONE && id != last &&
497                     pd->dtpd_id == dtp->dt_pdesc[last]->dtpd_id))
498                         flow = DTRACEFLOW_NONE;
499         }
500
501         /*
502          * If we're going to unindent this, it's more difficult to see if
503          * we don't actually want to unindent it -- we need to look at the
504          * _next_ EPID.
505          */
506         if (flow == DTRACEFLOW_RETURN) {
507                 offs += epd->dtepd_size;
508
509                 do {
510                         if (offs >= buf->dtbd_size)
511                                 goto out;
512
513                         next = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
514
515                         if (next == DTRACE_EPIDNONE)
516                                 offs += sizeof (id);
517                 } while (next == DTRACE_EPIDNONE);
518
519                 if ((rval = dt_epid_lookup(dtp, next, &nepd, &npd)) != 0)
520                         return (rval);
521
522                 if (next != id && npd->dtpd_id == pd->dtpd_id)
523                         flow = DTRACEFLOW_NONE;
524         }
525
526 out:
527         if (flow == DTRACEFLOW_ENTRY || flow == DTRACEFLOW_RETURN) {
528                 data->dtpda_prefix = str;
529         } else {
530                 data->dtpda_prefix = "| ";
531         }
532
533         if (flow == DTRACEFLOW_RETURN && data->dtpda_indent > 0)
534                 data->dtpda_indent -= 2;
535
536         data->dtpda_flow = flow;
537
538         return (0);
539 }
540
541 static int
542 dt_nullprobe()
543 {
544         return (DTRACE_CONSUME_THIS);
545 }
546
547 static int
548 dt_nullrec()
549 {
550         return (DTRACE_CONSUME_NEXT);
551 }
552
553 static void
554 dt_quantize_total(dtrace_hdl_t *dtp, int64_t datum, long double *total)
555 {
556         long double val = dt_fabsl((long double)datum);
557
558         if (dtp->dt_options[DTRACEOPT_AGGZOOM] == DTRACEOPT_UNSET) {
559                 *total += val;
560                 return;
561         }
562
563         /*
564          * If we're zooming in on an aggregation, we want the height of the
565          * highest value to be approximately 95% of total bar height -- so we
566          * adjust up by the reciprocal of DTRACE_AGGZOOM_MAX when comparing to
567          * our highest value.
568          */
569         val *= 1 / DTRACE_AGGZOOM_MAX;
570
571         if (*total < val)
572                 *total = val;
573 }
574
575 static int
576 dt_print_quanthdr(dtrace_hdl_t *dtp, FILE *fp, int width)
577 {
578         return (dt_printf(dtp, fp, "\n%*s %41s %-9s\n",
579             width ? width : 16, width ? "key" : "value",
580             "------------- Distribution -------------", "count"));
581 }
582
583 static int
584 dt_print_quanthdr_packed(dtrace_hdl_t *dtp, FILE *fp, int width,
585     const dtrace_aggdata_t *aggdata, dtrace_actkind_t action)
586 {
587         int min = aggdata->dtada_minbin, max = aggdata->dtada_maxbin;
588         int minwidth, maxwidth, i;
589
590         assert(action == DTRACEAGG_QUANTIZE || action == DTRACEAGG_LQUANTIZE);
591
592         if (action == DTRACEAGG_QUANTIZE) {
593                 if (min != 0 && min != DTRACE_QUANTIZE_ZEROBUCKET)
594                         min--;
595
596                 if (max < DTRACE_QUANTIZE_NBUCKETS - 1)
597                         max++;
598
599                 minwidth = dt_ndigits(DTRACE_QUANTIZE_BUCKETVAL(min));
600                 maxwidth = dt_ndigits(DTRACE_QUANTIZE_BUCKETVAL(max));
601         } else {
602                 maxwidth = 8;
603                 minwidth = maxwidth - 1;
604                 max++;
605         }
606
607         if (dt_printf(dtp, fp, "\n%*s %*s .",
608             width, width > 0 ? "key" : "", minwidth, "min") < 0)
609                 return (-1);
610
611         for (i = min; i <= max; i++) {
612                 if (dt_printf(dtp, fp, "-") < 0)
613                         return (-1);
614         }
615
616         return (dt_printf(dtp, fp, ". %*s | count\n", -maxwidth, "max"));
617 }
618
619 /*
620  * We use a subset of the Unicode Block Elements (U+2588 through U+258F,
621  * inclusive) to represent aggregations via UTF-8 -- which are expressed via
622  * 3-byte UTF-8 sequences.
623  */
624 #define DTRACE_AGGUTF8_FULL     0x2588
625 #define DTRACE_AGGUTF8_BASE     0x258f
626 #define DTRACE_AGGUTF8_LEVELS   8
627
628 #define DTRACE_AGGUTF8_BYTE0(val)       (0xe0 | ((val) >> 12))
629 #define DTRACE_AGGUTF8_BYTE1(val)       (0x80 | (((val) >> 6) & 0x3f))
630 #define DTRACE_AGGUTF8_BYTE2(val)       (0x80 | ((val) & 0x3f))
631
632 static int
633 dt_print_quantline_utf8(dtrace_hdl_t *dtp, FILE *fp, int64_t val,
634     uint64_t normal, long double total)
635 {
636         uint_t len = 40, i, whole, partial;
637         long double f = (dt_fabsl((long double)val) * len) / total;
638         const char *spaces = "                                        ";
639
640         whole = (uint_t)f;
641         partial = (uint_t)((f - (long double)(uint_t)f) *
642             (long double)DTRACE_AGGUTF8_LEVELS);
643
644         if (dt_printf(dtp, fp, "|") < 0)
645                 return (-1);
646
647         for (i = 0; i < whole; i++) {
648                 if (dt_printf(dtp, fp, "%c%c%c",
649                     DTRACE_AGGUTF8_BYTE0(DTRACE_AGGUTF8_FULL),
650                     DTRACE_AGGUTF8_BYTE1(DTRACE_AGGUTF8_FULL),
651                     DTRACE_AGGUTF8_BYTE2(DTRACE_AGGUTF8_FULL)) < 0)
652                         return (-1);
653         }
654
655         if (partial != 0) {
656                 partial = DTRACE_AGGUTF8_BASE - (partial - 1);
657
658                 if (dt_printf(dtp, fp, "%c%c%c",
659                     DTRACE_AGGUTF8_BYTE0(partial),
660                     DTRACE_AGGUTF8_BYTE1(partial),
661                     DTRACE_AGGUTF8_BYTE2(partial)) < 0)
662                         return (-1);
663
664                 i++;
665         }
666
667         return (dt_printf(dtp, fp, "%s %-9lld\n", spaces + i,
668             (long long)val / normal));
669 }
670
671 static int
672 dt_print_quantline(dtrace_hdl_t *dtp, FILE *fp, int64_t val,
673     uint64_t normal, long double total, char positives, char negatives)
674 {
675         long double f;
676         uint_t depth, len = 40;
677
678         const char *ats = "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@";
679         const char *spaces = "                                        ";
680
681         assert(strlen(ats) == len && strlen(spaces) == len);
682         assert(!(total == 0 && (positives || negatives)));
683         assert(!(val < 0 && !negatives));
684         assert(!(val > 0 && !positives));
685         assert(!(val != 0 && total == 0));
686
687         if (!negatives) {
688                 if (positives) {
689                         if (dtp->dt_encoding == DT_ENCODING_UTF8) {
690                                 return (dt_print_quantline_utf8(dtp, fp, val,
691                                     normal, total));
692                         }
693
694                         f = (dt_fabsl((long double)val) * len) / total;
695                         depth = (uint_t)(f + 0.5);
696                 } else {
697                         depth = 0;
698                 }
699
700                 return (dt_printf(dtp, fp, "|%s%s %-9lld\n", ats + len - depth,
701                     spaces + depth, (long long)val / normal));
702         }
703
704         if (!positives) {
705                 f = (dt_fabsl((long double)val) * len) / total;
706                 depth = (uint_t)(f + 0.5);
707
708                 return (dt_printf(dtp, fp, "%s%s| %-9lld\n", spaces + depth,
709                     ats + len - depth, (long long)val / normal));
710         }
711
712         /*
713          * If we're here, we have both positive and negative bucket values.
714          * To express this graphically, we're going to generate both positive
715          * and negative bars separated by a centerline.  These bars are half
716          * the size of normal quantize()/lquantize() bars, so we divide the
717          * length in half before calculating the bar length.
718          */
719         len /= 2;
720         ats = &ats[len];
721         spaces = &spaces[len];
722
723         f = (dt_fabsl((long double)val) * len) / total;
724         depth = (uint_t)(f + 0.5);
725
726         if (val <= 0) {
727                 return (dt_printf(dtp, fp, "%s%s|%*s %-9lld\n", spaces + depth,
728                     ats + len - depth, len, "", (long long)val / normal));
729         } else {
730                 return (dt_printf(dtp, fp, "%20s|%s%s %-9lld\n", "",
731                     ats + len - depth, spaces + depth,
732                     (long long)val / normal));
733         }
734 }
735
736 /*
737  * As with UTF-8 printing of aggregations, we use a subset of the Unicode
738  * Block Elements (U+2581 through U+2588, inclusive) to represent our packed
739  * aggregation.
740  */
741 #define DTRACE_AGGPACK_BASE     0x2581
742 #define DTRACE_AGGPACK_LEVELS   8
743
744 static int
745 dt_print_packed(dtrace_hdl_t *dtp, FILE *fp,
746     long double datum, long double total)
747 {
748         static boolean_t utf8_checked = B_FALSE;
749         static boolean_t utf8;
750         char *ascii = "__xxxxXX";
751         char *neg = "vvvvVV";
752         unsigned int len;
753         long double val;
754
755         if (!utf8_checked) {
756                 char *term;
757
758                 /*
759                  * We want to determine if we can reasonably emit UTF-8 for our
760                  * packed aggregation.  To do this, we will check for terminals
761                  * that are known to be primitive to emit UTF-8 on these.
762                  */
763                 utf8_checked = B_TRUE;
764
765                 if (dtp->dt_encoding == DT_ENCODING_ASCII) {
766                         utf8 = B_FALSE;
767                 } else if (dtp->dt_encoding == DT_ENCODING_UTF8) {
768                         utf8 = B_TRUE;
769                 } else if ((term = getenv("TERM")) != NULL &&
770                     (strcmp(term, "sun") == 0 ||
771                     strcmp(term, "sun-color") == 0 ||
772                     strcmp(term, "dumb") == 0)) {
773                         utf8 = B_FALSE;
774                 } else {
775                         utf8 = B_TRUE;
776                 }
777         }
778
779         if (datum == 0)
780                 return (dt_printf(dtp, fp, " "));
781
782         if (datum < 0) {
783                 len = strlen(neg);
784                 val = dt_fabsl(datum * (len - 1)) / total;
785                 return (dt_printf(dtp, fp, "%c", neg[(uint_t)(val + 0.5)]));
786         }
787
788         if (utf8) {
789                 int block = DTRACE_AGGPACK_BASE + (unsigned int)(((datum *
790                     (DTRACE_AGGPACK_LEVELS - 1)) / total) + 0.5);
791
792                 return (dt_printf(dtp, fp, "%c%c%c",
793                     DTRACE_AGGUTF8_BYTE0(block),
794                     DTRACE_AGGUTF8_BYTE1(block),
795                     DTRACE_AGGUTF8_BYTE2(block)));
796         }
797
798         len = strlen(ascii);
799         val = (datum * (len - 1)) / total;
800         return (dt_printf(dtp, fp, "%c", ascii[(uint_t)(val + 0.5)]));
801 }
802
803 static const int64_t *
804 dt_format_quantize_prepare(dtrace_hdl_t *dtp, const void *addr, size_t size,
805     dt_prepare_args_t *args)
806 {
807         const int64_t *data = addr;
808         int first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1;
809
810         if (size != DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t)) {
811                 (void) dt_set_errno(dtp, EDT_DMISMATCH);
812                 return (NULL);
813         }
814
815         while (first_bin < DTRACE_QUANTIZE_NBUCKETS - 1 && data[first_bin] == 0)
816                 first_bin++;
817
818         if (first_bin == DTRACE_QUANTIZE_NBUCKETS - 1) {
819                 /*
820                  * There isn't any data.  This is possible if the aggregation
821                  * has been clear()'d or if negative increment values have been
822                  * used.  Regardless, we'll print the buckets around 0.
823                  */
824                 first_bin = DTRACE_QUANTIZE_ZEROBUCKET - 1;
825                 last_bin = DTRACE_QUANTIZE_ZEROBUCKET + 1;
826         } else {
827                 if (first_bin > 0)
828                         first_bin--;
829
830                 while (last_bin > 0 && data[last_bin] == 0)
831                         last_bin--;
832
833                 if (last_bin < DTRACE_QUANTIZE_NBUCKETS - 1)
834                         last_bin++;
835         }
836
837         args->first_bin = first_bin;
838         args->last_bin = last_bin;
839         return (data);
840 }
841
842 int
843 dt_format_quantize(dtrace_hdl_t *dtp, const void *addr, size_t size,
844     uint64_t normal)
845 {
846         const int64_t *data;
847         dt_prepare_args_t args = { 0 };
848         int i, first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1;
849
850         data = dt_format_quantize_prepare(dtp, addr, size, &args);
851         /* dt_errno is set for us */
852         if (data == NULL)
853                 return (-1);
854
855         first_bin = args.first_bin;
856         last_bin = args.last_bin;
857
858         xo_open_list("buckets");
859         for (i = first_bin; i <= last_bin; i++) {
860                 long long value = (long long)DTRACE_QUANTIZE_BUCKETVAL(i);
861                 xo_open_instance("buckets");
862                 xo_emit("{:value/%lld} {:count/%lld}", value,
863                     (long long)data[i] / normal);
864                 xo_close_instance("buckets");
865         }
866         xo_close_list("buckets");
867
868         return (0);
869 }
870
871 int
872 dt_print_quantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
873     size_t size, uint64_t normal)
874 {
875         const int64_t *data;
876         dt_prepare_args_t args = { 0 };
877         int i, first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1;
878         long double total = 0;
879         char positives = 0, negatives = 0;
880
881         data = dt_format_quantize_prepare(dtp, addr, size, &args);
882         /* dt_errno is set for us */
883         if (data == NULL)
884                 return (-1);
885
886         first_bin = args.first_bin;
887         last_bin = args.last_bin;
888
889         for (i = first_bin; i <= last_bin; i++) {
890                 positives |= (data[i] > 0);
891                 negatives |= (data[i] < 0);
892                 dt_quantize_total(dtp, data[i], &total);
893         }
894
895         if (dt_print_quanthdr(dtp, fp, 0) < 0)
896                 return (-1);
897
898         for (i = first_bin; i <= last_bin; i++) {
899                 if (dt_printf(dtp, fp, "%16lld ",
900                     (long long)DTRACE_QUANTIZE_BUCKETVAL(i)) < 0)
901                         return (-1);
902
903                 if (dt_print_quantline(dtp, fp, data[i], normal, total,
904                     positives, negatives) < 0)
905                         return (-1);
906         }
907
908         return (0);
909 }
910
911 int
912 dt_print_quantize_packed(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
913     size_t size, const dtrace_aggdata_t *aggdata)
914 {
915         const int64_t *data = addr;
916         long double total = 0, count = 0;
917         int min = aggdata->dtada_minbin, max = aggdata->dtada_maxbin, i;
918         int64_t minval, maxval;
919
920         if (size != DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t))
921                 return (dt_set_errno(dtp, EDT_DMISMATCH));
922
923         if (min != 0 && min != DTRACE_QUANTIZE_ZEROBUCKET)
924                 min--;
925
926         if (max < DTRACE_QUANTIZE_NBUCKETS - 1)
927                 max++;
928
929         minval = DTRACE_QUANTIZE_BUCKETVAL(min);
930         maxval = DTRACE_QUANTIZE_BUCKETVAL(max);
931
932         if (dt_printf(dtp, fp, " %*lld :", dt_ndigits(minval),
933             (long long)minval) < 0)
934                 return (-1);
935
936         for (i = min; i <= max; i++) {
937                 dt_quantize_total(dtp, data[i], &total);
938                 count += data[i];
939         }
940
941         for (i = min; i <= max; i++) {
942                 if (dt_print_packed(dtp, fp, data[i], total) < 0)
943                         return (-1);
944         }
945
946         if (dt_printf(dtp, fp, ": %*lld | %lld\n",
947             -dt_ndigits(maxval), (long long)maxval, (long long)count) < 0)
948                 return (-1);
949
950         return (0);
951 }
952
953 static const int64_t *
954 dt_format_lquantize_prepare(dtrace_hdl_t *dtp, const void *addr, size_t size,
955     dt_prepare_args_t *args)
956 {
957         const int64_t *data = addr;
958         int first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1, base;
959         uint64_t arg;
960         uint16_t step, levels;
961
962         if (size < sizeof (uint64_t)) {
963                 (void) dt_set_errno(dtp, EDT_DMISMATCH);
964                 return (NULL);
965         }
966
967         arg = *data++;
968         size -= sizeof (uint64_t);
969
970         base = DTRACE_LQUANTIZE_BASE(arg);
971         step = DTRACE_LQUANTIZE_STEP(arg);
972         levels = DTRACE_LQUANTIZE_LEVELS(arg);
973
974         first_bin = 0;
975         last_bin = levels + 1;
976
977         if (size != sizeof (uint64_t) * (levels + 2)) {
978                 (void) dt_set_errno(dtp, EDT_DMISMATCH);
979                 return (NULL);
980         }
981
982         while (first_bin <= levels + 1 && data[first_bin] == 0)
983                 first_bin++;
984
985         if (first_bin > levels + 1) {
986                 first_bin = 0;
987                 last_bin = 2;
988         } else {
989                 if (first_bin > 0)
990                         first_bin--;
991
992                 while (last_bin > 0 && data[last_bin] == 0)
993                         last_bin--;
994
995                 if (last_bin < levels + 1)
996                         last_bin++;
997         }
998
999         args->first_bin = first_bin;
1000         args->last_bin = last_bin;
1001         args->lquantize_base = base;
1002         args->lquantize_step = step;
1003         args->lquantize_levels = levels;
1004         return (data);
1005 }
1006
1007 int
1008 dt_format_lquantize(dtrace_hdl_t *dtp, const void *addr, size_t size,
1009     uint64_t normal)
1010 {
1011         const int64_t *data;
1012         dt_prepare_args_t args = { 0 };
1013         int i, first_bin, last_bin, base;
1014         uint16_t step, levels;
1015
1016         data = dt_format_lquantize_prepare(dtp, addr, size, &args);
1017         /* dt_errno is set for us */
1018         if (data == NULL)
1019                 return (-1);
1020
1021         first_bin = args.first_bin;
1022         last_bin = args.last_bin;
1023         step = args.lquantize_step;
1024         levels = args.lquantize_levels;
1025         base = args.lquantize_base;
1026
1027         xo_open_list("buckets");
1028         for (i = first_bin; i <= last_bin; i++) {
1029                 char c[32];
1030                 int err;
1031
1032                 xo_open_instance("buckets");
1033                 if (i == 0) {
1034                         xo_emit("{:value/%d} {:operator/%s}", base, "<");
1035                 } else if (i == levels + 1) {
1036                         xo_emit("{:value/%d} {:operator/%s}",
1037                             base + (levels * step), ">=");
1038                 } else {
1039                         xo_emit("{:value/%d}", base + (i - 1) * step);
1040                 }
1041
1042                 xo_emit("{:count/%lld}", (long long)data[i] / normal);
1043                 xo_close_instance("buckets");
1044         }
1045         xo_close_list("buckets");
1046
1047         return (0);
1048 }
1049
1050 int
1051 dt_print_lquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
1052     size_t size, uint64_t normal)
1053 {
1054         const int64_t *data;
1055         dt_prepare_args_t args = { 0 };
1056         int i, first_bin, last_bin, base;
1057         uint64_t arg;
1058         long double total = 0;
1059         uint16_t step, levels;
1060         char positives = 0, negatives = 0;
1061
1062         data = dt_format_lquantize_prepare(dtp, addr, size, &args);
1063         /* dt_errno is set for us */
1064         if (data == NULL)
1065                 return (-1);
1066
1067         first_bin = args.first_bin;
1068         last_bin = args.last_bin;
1069         step = args.lquantize_step;
1070         levels = args.lquantize_levels;
1071         base = args.lquantize_base;
1072
1073         for (i = first_bin; i <= last_bin; i++) {
1074                 positives |= (data[i] > 0);
1075                 negatives |= (data[i] < 0);
1076                 dt_quantize_total(dtp, data[i], &total);
1077         }
1078
1079         if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
1080             "------------- Distribution -------------", "count") < 0)
1081                 return (-1);
1082
1083         for (i = first_bin; i <= last_bin; i++) {
1084                 char c[32];
1085                 int err;
1086
1087                 if (i == 0) {
1088                         (void) snprintf(c, sizeof (c), "< %d", base);
1089                         err = dt_printf(dtp, fp, "%16s ", c);
1090                 } else if (i == levels + 1) {
1091                         (void) snprintf(c, sizeof (c), ">= %d",
1092                             base + (levels * step));
1093                         err = dt_printf(dtp, fp, "%16s ", c);
1094                 } else {
1095                         err = dt_printf(dtp, fp, "%16d ",
1096                             base + (i - 1) * step);
1097                 }
1098
1099                 if (err < 0 || dt_print_quantline(dtp, fp, data[i], normal,
1100                     total, positives, negatives) < 0)
1101                         return (-1);
1102         }
1103
1104         return (0);
1105 }
1106
1107 /*ARGSUSED*/
1108 int
1109 dt_print_lquantize_packed(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
1110     size_t size, const dtrace_aggdata_t *aggdata)
1111 {
1112         const int64_t *data = addr;
1113         long double total = 0, count = 0;
1114         int min, max, base, err;
1115         uint64_t arg;
1116         uint16_t step, levels;
1117         char c[32];
1118         unsigned int i;
1119
1120         if (size < sizeof (uint64_t))
1121                 return (dt_set_errno(dtp, EDT_DMISMATCH));
1122
1123         arg = *data++;
1124         size -= sizeof (uint64_t);
1125
1126         base = DTRACE_LQUANTIZE_BASE(arg);
1127         step = DTRACE_LQUANTIZE_STEP(arg);
1128         levels = DTRACE_LQUANTIZE_LEVELS(arg);
1129
1130         if (size != sizeof (uint64_t) * (levels + 2))
1131                 return (dt_set_errno(dtp, EDT_DMISMATCH));
1132
1133         min = 0;
1134         max = levels + 1;
1135
1136         if (min == 0) {
1137                 (void) snprintf(c, sizeof (c), "< %d", base);
1138                 err = dt_printf(dtp, fp, "%8s :", c);
1139         } else {
1140                 err = dt_printf(dtp, fp, "%8d :", base + (min - 1) * step);
1141         }
1142
1143         if (err < 0)
1144                 return (-1);
1145
1146         for (i = min; i <= max; i++) {
1147                 dt_quantize_total(dtp, data[i], &total);
1148                 count += data[i];
1149         }
1150
1151         for (i = min; i <= max; i++) {
1152                 if (dt_print_packed(dtp, fp, data[i], total) < 0)
1153                         return (-1);
1154         }
1155
1156         (void) snprintf(c, sizeof (c), ">= %d", base + (levels * step));
1157         return (dt_printf(dtp, fp, ": %-8s | %lld\n", c, (long long)count));
1158 }
1159
1160 static const int64_t *
1161 dt_format_llquantize_prepare(dtrace_hdl_t *dtp, const void *addr, size_t size,
1162     dt_prepare_args_t *args)
1163 {
1164         int i, first_bin, last_bin, bin = 1, order, levels;
1165         uint16_t factor, low, high, nsteps;
1166         const int64_t *data = addr;
1167         int64_t value = 1, next, step;
1168         uint64_t arg;
1169
1170         if (size < sizeof(uint64_t)) {
1171                 (void) dt_set_errno(dtp, EDT_DMISMATCH);
1172                 return (NULL);
1173         }
1174
1175         arg = *data++;
1176         size -= sizeof (uint64_t);
1177
1178         factor = DTRACE_LLQUANTIZE_FACTOR(arg);
1179         low = DTRACE_LLQUANTIZE_LOW(arg);
1180         high = DTRACE_LLQUANTIZE_HIGH(arg);
1181         nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);
1182
1183         /*
1184          * We don't expect to be handed invalid llquantize() parameters here,
1185          * but sanity check them (to a degree) nonetheless.
1186          */
1187         if (size > INT32_MAX || factor < 2 || low >= high ||
1188             nsteps == 0 || factor > nsteps) {
1189                 (void) dt_set_errno(dtp, EDT_DMISMATCH);
1190                 return (NULL);
1191         }
1192
1193         levels = (int)size / sizeof (uint64_t);
1194
1195         first_bin = 0;
1196         last_bin = levels - 1;
1197
1198         while (first_bin < levels && data[first_bin] == 0)
1199                 first_bin++;
1200
1201         if (first_bin == levels) {
1202                 first_bin = 0;
1203                 last_bin = 1;
1204         } else {
1205                 if (first_bin > 0)
1206                         first_bin--;
1207
1208                 while (last_bin > 0 && data[last_bin] == 0)
1209                         last_bin--;
1210
1211                 if (last_bin < levels - 1)
1212                         last_bin++;
1213         }
1214
1215         for (order = 0; order < low; order++)
1216                 value *= factor;
1217
1218         next = value * factor;
1219         step = next > nsteps ? next / nsteps : 1;
1220
1221         args->first_bin = first_bin;
1222         args->last_bin = last_bin;
1223         args->llquantize_factor = factor;
1224         args->llquantize_low = low;
1225         args->llquantize_high = high;
1226         args->llquantize_nsteps = nsteps;
1227         args->llquantize_levels = levels;
1228         args->llquantize_order = order;
1229         args->llquantize_next = next;
1230         args->llquantize_step = step;
1231         args->llquantize_value = value;
1232
1233         return (data);
1234 }
1235
1236 int
1237 dt_format_llquantize(dtrace_hdl_t *dtp, const void *addr, size_t size,
1238     uint64_t normal)
1239 {
1240         int first_bin, last_bin, bin = 1, order, levels;
1241         uint16_t factor, low, high, nsteps;
1242         const int64_t *data;
1243         dt_prepare_args_t args = { 0 };
1244         int64_t value = 1, next, step;
1245         uint64_t arg;
1246         char c[32];
1247
1248         data = dt_format_llquantize_prepare(dtp, addr, size, &args);
1249         /* dt_errno is set for us */
1250         if (data == NULL)
1251                 return (-1);
1252
1253         first_bin = args.first_bin;
1254         last_bin = args.last_bin;
1255         factor = args.llquantize_factor;
1256         low = args.llquantize_low;
1257         high = args.llquantize_high;
1258         nsteps = args.llquantize_nsteps;
1259         levels = args.llquantize_levels;
1260         order = args.llquantize_order;
1261         next = args.llquantize_next;
1262         step = args.llquantize_step;
1263         value = args.llquantize_value;
1264
1265         xo_open_list("buckets");
1266         if (first_bin == 0) {
1267                 /*
1268                  * We have to represent < value somehow in JSON, so we bundle an
1269                  * optional "operator" in llquantize buckets.
1270                  */
1271                 xo_open_instance("buckets");
1272                 xo_emit("{:value/%lld} {:count/%lld} {:operator/%s}",
1273                     (long long)value, (long long)data[0] / normal, "<");
1274                 xo_close_instance("buckets");
1275         }
1276
1277         while (order <= high) {
1278                 if (bin >= first_bin && bin <= last_bin) {
1279                         xo_open_instance("buckets");
1280                         xo_emit("{:value/%lld} {:count/%lld}", (long long)value,
1281                             (long long)data[bin] / normal);
1282                         xo_close_instance("buckets");
1283                 }
1284
1285                 assert(value < next);
1286                 bin++;
1287
1288                 if ((value += step) != next)
1289                         continue;
1290
1291                 next = value * factor;
1292                 step = next > nsteps ? next / nsteps : 1;
1293                 order++;
1294         }
1295
1296         if (last_bin < bin) {
1297                 xo_close_list("buckets");
1298                 return (0);
1299         }
1300
1301         assert(last_bin == bin);
1302         xo_open_instance("buckets");
1303         xo_emit("{:value/%lld} {:count/%lld} {:operator/%s}", (long long)value,
1304             (long long)data[bin] / normal, ">=");
1305         xo_close_instance("buckets");
1306
1307         xo_close_list("buckets");
1308         return (0);
1309 }
1310
1311 int
1312 dt_print_llquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
1313     size_t size, uint64_t normal)
1314 {
1315         int i, first_bin, last_bin, bin = 1, order, levels;
1316         uint16_t factor, low, high, nsteps;
1317         const int64_t *data;
1318         dt_prepare_args_t args = { 0 };
1319         int64_t value = 1, next, step;
1320         char positives = 0, negatives = 0;
1321         long double total = 0;
1322         uint64_t arg;
1323         char c[32];
1324
1325         data = dt_format_llquantize_prepare(dtp, addr, size, &args);
1326         /* dt_errno is set for us */
1327         if (data == NULL)
1328                 return (-1);
1329
1330         first_bin = args.first_bin;
1331         last_bin = args.last_bin;
1332         factor = args.llquantize_factor;
1333         low = args.llquantize_low;
1334         high = args.llquantize_high;
1335         nsteps = args.llquantize_nsteps;
1336         levels = args.llquantize_levels;
1337         order = args.llquantize_order;
1338         next = args.llquantize_next;
1339         step = args.llquantize_step;
1340         value = args.llquantize_value;
1341
1342         for (i = first_bin; i <= last_bin; i++) {
1343                 positives |= (data[i] > 0);
1344                 negatives |= (data[i] < 0);
1345                 dt_quantize_total(dtp, data[i], &total);
1346         }
1347
1348         if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
1349             "------------- Distribution -------------", "count") < 0)
1350                 return (-1);
1351
1352         if (first_bin == 0) {
1353                 (void) snprintf(c, sizeof (c), "< %lld", (long long)value);
1354
1355                 if (dt_printf(dtp, fp, "%16s ", c) < 0)
1356                         return (-1);
1357
1358                 if (dt_print_quantline(dtp, fp, data[0], normal,
1359                     total, positives, negatives) < 0)
1360                         return (-1);
1361         }
1362
1363         while (order <= high) {
1364                 if (bin >= first_bin && bin <= last_bin) {
1365                         if (dt_printf(dtp, fp, "%16lld ", (long long)value) < 0)
1366                                 return (-1);
1367
1368                         if (dt_print_quantline(dtp, fp, data[bin],
1369                             normal, total, positives, negatives) < 0)
1370                                 return (-1);
1371                 }
1372
1373                 assert(value < next);
1374                 bin++;
1375
1376                 if ((value += step) != next)
1377                         continue;
1378
1379                 next = value * factor;
1380                 step = next > nsteps ? next / nsteps : 1;
1381                 order++;
1382         }
1383
1384         if (last_bin < bin)
1385                 return (0);
1386
1387         assert(last_bin == bin);
1388         (void) snprintf(c, sizeof (c), ">= %lld", (long long)value);
1389
1390         if (dt_printf(dtp, fp, "%16s ", c) < 0)
1391                 return (-1);
1392
1393         return (dt_print_quantline(dtp, fp, data[bin], normal,
1394             total, positives, negatives));
1395 }
1396
1397 static int
1398 dt_format_average(dtrace_hdl_t *dtp, caddr_t addr, size_t size, uint64_t normal)
1399 {
1400         int64_t *data = (int64_t *)addr;
1401
1402         xo_emit("{:average/%lld}",
1403             data[0] ? (long long)(data[1] / (int64_t)normal / data[0]) : 0);
1404         return (0);
1405 }
1406
1407 /*ARGSUSED*/
1408 static int
1409 dt_print_average(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
1410     size_t size, uint64_t normal)
1411 {
1412         /* LINTED - alignment */
1413         int64_t *data = (int64_t *)addr;
1414
1415         return (dt_printf(dtp, fp, " %16lld", data[0] ?
1416             (long long)(data[1] / (int64_t)normal / data[0]) : 0));
1417 }
1418
1419 static int
1420 dt_format_stddev(dtrace_hdl_t *dtp, caddr_t addr, size_t size, uint64_t normal)
1421 {
1422         uint64_t *data = (uint64_t *)addr;
1423
1424         xo_emit("{:stddev/%llu}",
1425             data[0] ? (unsigned long long)dt_stddev(data, normal) : 0);
1426         return (0);
1427 }
1428
1429 /*ARGSUSED*/
1430 static int
1431 dt_print_stddev(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
1432     size_t size, uint64_t normal)
1433 {
1434         /* LINTED - alignment */
1435         uint64_t *data = (uint64_t *)addr;
1436
1437         return (dt_printf(dtp, fp, " %16llu", data[0] ?
1438             (unsigned long long) dt_stddev(data, normal) : 0));
1439 }
1440
1441 /*ARGSUSED*/
1442 static int
1443 dt_print_bytes(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
1444     size_t nbytes, int width, int quiet, int forceraw)
1445 {
1446         /*
1447          * If the byte stream is a series of printable characters, followed by
1448          * a terminating byte, we print it out as a string.  Otherwise, we
1449          * assume that it's something else and just print the bytes.
1450          */
1451         int i, j, margin = 5;
1452         char *c = (char *)addr;
1453
1454         if (nbytes == 0)
1455                 return (0);
1456
1457         if (forceraw)
1458                 goto raw;
1459
1460         if (dtp->dt_options[DTRACEOPT_RAWBYTES] != DTRACEOPT_UNSET)
1461                 goto raw;
1462
1463         for (i = 0; i < nbytes; i++) {
1464                 /*
1465                  * We define a "printable character" to be one for which
1466                  * isprint(3C) returns non-zero, isspace(3C) returns non-zero,
1467                  * or a character which is either backspace or the bell.
1468                  * Backspace and the bell are regrettably special because
1469                  * they fail the first two tests -- and yet they are entirely
1470                  * printable.  These are the only two control characters that
1471                  * have meaning for the terminal and for which isprint(3C) and
1472                  * isspace(3C) return 0.
1473                  */
1474                 if (isprint(c[i]) || isspace(c[i]) ||
1475                     c[i] == '\b' || c[i] == '\a')
1476                         continue;
1477
1478                 if (c[i] == '\0' && i > 0) {
1479                         /*
1480                          * This looks like it might be a string.  Before we
1481                          * assume that it is indeed a string, check the
1482                          * remainder of the byte range; if it contains
1483                          * additional non-nul characters, we'll assume that
1484                          * it's a binary stream that just happens to look like
1485                          * a string, and we'll print out the individual bytes.
1486                          */
1487                         for (j = i + 1; j < nbytes; j++) {
1488                                 if (c[j] != '\0')
1489                                         break;
1490                         }
1491
1492                         if (j != nbytes)
1493                                 break;
1494
1495                         if (quiet) {
1496                                 return (dt_printf(dtp, fp, "%s", c));
1497                         } else {
1498                                 return (dt_printf(dtp, fp, " %s%*s",
1499                                     width < 0 ? " " : "", width, c));
1500                         }
1501                 }
1502
1503                 break;
1504         }
1505
1506         if (i == nbytes) {
1507                 /*
1508                  * The byte range is all printable characters, but there is
1509                  * no trailing nul byte.  We'll assume that it's a string and
1510                  * print it as such.
1511                  */
1512                 char *s = alloca(nbytes + 1);
1513                 bcopy(c, s, nbytes);
1514                 s[nbytes] = '\0';
1515                 return (dt_printf(dtp, fp, "  %-*s", width, s));
1516         }
1517
1518 raw:
1519         if (dt_printf(dtp, fp, "\n%*s      ", margin, "") < 0)
1520                 return (-1);
1521
1522         for (i = 0; i < 16; i++)
1523                 if (dt_printf(dtp, fp, "  %c", "0123456789abcdef"[i]) < 0)
1524                         return (-1);
1525
1526         if (dt_printf(dtp, fp, "  0123456789abcdef\n") < 0)
1527                 return (-1);
1528
1529
1530         for (i = 0; i < nbytes; i += 16) {
1531                 if (dt_printf(dtp, fp, "%*s%5x:", margin, "", i) < 0)
1532                         return (-1);
1533
1534                 for (j = i; j < i + 16 && j < nbytes; j++) {
1535                         if (dt_printf(dtp, fp, " %02x", (uchar_t)c[j]) < 0)
1536                                 return (-1);
1537                 }
1538
1539                 while (j++ % 16) {
1540                         if (dt_printf(dtp, fp, "   ") < 0)
1541                                 return (-1);
1542                 }
1543
1544                 if (dt_printf(dtp, fp, "  ") < 0)
1545                         return (-1);
1546
1547                 for (j = i; j < i + 16 && j < nbytes; j++) {
1548                         if (dt_printf(dtp, fp, "%c",
1549                             c[j] < ' ' || c[j] > '~' ? '.' : c[j]) < 0)
1550                                 return (-1);
1551                 }
1552
1553                 if (dt_printf(dtp, fp, "\n") < 0)
1554                         return (-1);
1555         }
1556
1557         return (0);
1558 }
1559
1560 int
1561 dt_format_stack(dtrace_hdl_t *dtp, caddr_t addr, int depth, int size)
1562 {
1563         dtrace_syminfo_t dts;
1564         GElf_Sym sym;
1565         int i;
1566         uint64_t pc;
1567
1568         xo_open_list("stack-frames");
1569         for (i = 0; i < depth; i++) {
1570                 switch (size) {
1571                 case sizeof (uint32_t):
1572                         pc = *((uint32_t *)addr);
1573                         break;
1574
1575                 case sizeof (uint64_t):
1576                         pc = *((uint64_t *)addr);
1577                         break;
1578
1579                 default:
1580                         return (dt_set_errno(dtp, EDT_BADSTACKPC));
1581                 }
1582
1583                 if (pc == 0)
1584                         break;
1585
1586                 addr += size;
1587
1588                 xo_open_instance("stack-frames");
1589                 if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {
1590                         if (pc > sym.st_value) {
1591                                 xo_emit("{:symbol/%s`%s+0x%llx} {:module/%s} "
1592                                         "{:name/%s} {:offset/0x%llx}",
1593                                     dts.dts_object, dts.dts_name,
1594                                     (u_longlong_t)(pc - sym.st_value),
1595                                     dts.dts_object, dts.dts_name,
1596                                     (u_longlong_t)(pc - sym.st_value));
1597                         } else {
1598                                 xo_emit("{:symbol/%s`%s} {:module/%s} "
1599                                         "{:name/%s}",
1600                                     dts.dts_object, dts.dts_name,
1601                                     dts.dts_object, dts.dts_name);
1602                         }
1603                 } else {
1604                         /*
1605                          * We'll repeat the lookup, but this time we'll specify
1606                          * a NULL GElf_Sym -- indicating that we're only
1607                          * interested in the containing module.
1608                          */
1609                         if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1610                                 xo_emit("{:symbol/%s`0x%llx} {:module/%s} "
1611                                         "{:offset/0x%llx}",
1612                                     dts.dts_object, (u_longlong_t)pc,
1613                                     dts.dts_object, (u_longlong_t)pc);
1614                         } else {
1615                                 xo_emit("{:symbol/0x%llx} {:offset/0x%llx}",
1616                                     (u_longlong_t)pc, (u_longlong_t)pc);
1617                         }
1618                 }
1619                 xo_close_instance("stack-frames");
1620         }
1621         xo_close_list("stack-frames");
1622
1623         return (0);
1624 }
1625
1626 int
1627 dt_format_ustack(dtrace_hdl_t *dtp, caddr_t addr, uint64_t arg)
1628 {
1629         uint64_t *pc = (uint64_t *)addr;
1630         uint32_t depth = DTRACE_USTACK_NFRAMES(arg);
1631         uint32_t strsize = DTRACE_USTACK_STRSIZE(arg);
1632         const char *strbase = addr + (depth + 1) * sizeof (uint64_t);
1633         const char *str = strsize ? strbase : NULL;
1634         int err = 0;
1635
1636         char name[PATH_MAX], objname[PATH_MAX], c[PATH_MAX * 2];
1637         struct ps_prochandle *P;
1638         GElf_Sym sym;
1639         int i, indent;
1640         pid_t pid;
1641
1642         if (depth == 0)
1643                 return (0);
1644
1645         pid = (pid_t)*pc++;
1646
1647         /*
1648          * Ultimately, we need to add an entry point in the library vector for
1649          * determining <symbol, offset> from <pid, address>.  For now, if
1650          * this is a vector open, we just print the raw address or string.
1651          */
1652         if (dtp->dt_vector == NULL)
1653                 P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
1654         else
1655                 P = NULL;
1656
1657         if (P != NULL)
1658                 dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
1659
1660         xo_open_list("ustack-frames");
1661         for (i = 0; i < depth && pc[i] != 0; i++) {
1662                 const prmap_t *map;
1663
1664                 xo_open_instance("ustack-frames");
1665                 if (P != NULL && Plookup_by_addr(P, pc[i],
1666                     name, sizeof (name), &sym) == 0) {
1667                         (void) Pobjname(P, pc[i], objname, sizeof (objname));
1668
1669                         if (pc[i] > sym.st_value) {
1670                                 xo_emit("{:symbol/%s`%s+0x%llx} {:module/%s} "
1671                                         "{:name/%s} {:offset/0x%llx}",
1672                                     dt_basename(objname), name,
1673                                     (u_longlong_t)(pc[i] - sym.st_value),
1674                                     dt_basename(objname), name,
1675                                     (u_longlong_t)(pc[i] - sym.st_value));
1676                         } else {
1677                                 xo_emit("{:symbol/%s`%s} {:module/%s} "
1678                                         "{:name/%s}",
1679                                     dt_basename(objname), name,
1680                                     dt_basename(objname), name);
1681                         }
1682                 } else if (str != NULL && str[0] != '\0' && str[0] != '@' &&
1683                     (P != NULL && ((map = Paddr_to_map(P, pc[i])) == NULL ||
1684                     (map->pr_mflags & MA_WRITE)))) {
1685                         /*
1686                          * If the current string pointer in the string table
1687                          * does not point to an empty string _and_ the program
1688                          * counter falls in a writable region, we'll use the
1689                          * string from the string table instead of the raw
1690                          * address.  This last condition is necessary because
1691                          * some (broken) ustack helpers will return a string
1692                          * even for a program counter that they can't
1693                          * identify.  If we have a string for a program
1694                          * counter that falls in a segment that isn't
1695                          * writable, we assume that we have fallen into this
1696                          * case and we refuse to use the string.
1697                          */
1698                         xo_emit("{:symbol/%s}", str);
1699                 } else {
1700                         if (P != NULL && Pobjname(P, pc[i], objname,
1701                             sizeof (objname)) != 0) {
1702                                 xo_emit("{:symbol/%s`0x%llx} {:module/%s} "
1703                                         "{:offset/0x%llx}",
1704                                     dt_basename(objname), (u_longlong_t)pc[i],
1705                                     dt_basename(objname), (u_longlong_t)pc[i]);
1706                         } else {
1707                                 xo_emit("{:symbol/0x%llx} {:offset/0x%llx}",
1708                                     (u_longlong_t)pc[i], (u_longlong_t)pc[i]);
1709                         }
1710                 }
1711
1712                 if (str != NULL && str[0] == '@') {
1713                         /*
1714                          * If the first character of the string is an "at" sign,
1715                          * then the string is inferred to be an annotation --
1716                          * and it is printed out beneath the frame and offset
1717                          * with brackets.
1718                          */
1719                         xo_emit("{:annotation/%s}", &str[1]);
1720                 }
1721
1722                 if (str != NULL) {
1723                         str += strlen(str) + 1;
1724                         if (str - strbase >= strsize)
1725                                 str = NULL;
1726                 }
1727                 xo_close_instance("ustack-frames");
1728         }
1729         xo_close_list("ustack-frames");
1730
1731         if (P != NULL) {
1732                 dt_proc_unlock(dtp, P);
1733                 dt_proc_release(dtp, P);
1734         }
1735
1736         return (err);
1737 }
1738
1739 int
1740 dt_print_stack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
1741     caddr_t addr, int depth, int size)
1742 {
1743         dtrace_syminfo_t dts;
1744         GElf_Sym sym;
1745         int i, indent;
1746         char c[PATH_MAX * 2];
1747         uint64_t pc;
1748
1749         if (dt_printf(dtp, fp, "\n") < 0)
1750                 return (-1);
1751
1752         if (format == NULL)
1753                 format = "%s";
1754
1755         if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
1756                 indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];
1757         else
1758                 indent = _dtrace_stkindent;
1759
1760         for (i = 0; i < depth; i++) {
1761                 switch (size) {
1762                 case sizeof (uint32_t):
1763                         /* LINTED - alignment */
1764                         pc = *((uint32_t *)addr);
1765                         break;
1766
1767                 case sizeof (uint64_t):
1768                         /* LINTED - alignment */
1769                         pc = *((uint64_t *)addr);
1770                         break;
1771
1772                 default:
1773                         return (dt_set_errno(dtp, EDT_BADSTACKPC));
1774                 }
1775
1776                 if (pc == 0)
1777                         break;
1778
1779                 addr += size;
1780
1781                 if (dt_printf(dtp, fp, "%*s", indent, "") < 0)
1782                         return (-1);
1783
1784                 if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {
1785                         if (pc > sym.st_value) {
1786                                 (void) snprintf(c, sizeof (c), "%s`%s+0x%llx",
1787                                     dts.dts_object, dts.dts_name,
1788                                     (u_longlong_t)(pc - sym.st_value));
1789                         } else {
1790                                 (void) snprintf(c, sizeof (c), "%s`%s",
1791                                     dts.dts_object, dts.dts_name);
1792                         }
1793                 } else {
1794                         /*
1795                          * We'll repeat the lookup, but this time we'll specify
1796                          * a NULL GElf_Sym -- indicating that we're only
1797                          * interested in the containing module.
1798                          */
1799                         if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1800                                 (void) snprintf(c, sizeof (c), "%s`0x%llx",
1801                                     dts.dts_object, (u_longlong_t)pc);
1802                         } else {
1803                                 (void) snprintf(c, sizeof (c), "0x%llx",
1804                                     (u_longlong_t)pc);
1805                         }
1806                 }
1807
1808                 if (dt_printf(dtp, fp, format, c) < 0)
1809                         return (-1);
1810
1811                 if (dt_printf(dtp, fp, "\n") < 0)
1812                         return (-1);
1813         }
1814
1815         return (0);
1816 }
1817
1818 int
1819 dt_print_ustack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
1820     caddr_t addr, uint64_t arg)
1821 {
1822         /* LINTED - alignment */
1823         uint64_t *pc = (uint64_t *)addr;
1824         uint32_t depth = DTRACE_USTACK_NFRAMES(arg);
1825         uint32_t strsize = DTRACE_USTACK_STRSIZE(arg);
1826         const char *strbase = addr + (depth + 1) * sizeof (uint64_t);
1827         const char *str = strsize ? strbase : NULL;
1828         int err = 0;
1829
1830         char name[PATH_MAX], objname[PATH_MAX], c[PATH_MAX * 2];
1831         struct ps_prochandle *P;
1832         GElf_Sym sym;
1833         int i, indent;
1834         pid_t pid;
1835
1836         if (depth == 0)
1837                 return (0);
1838
1839         pid = (pid_t)*pc++;
1840
1841         if (dt_printf(dtp, fp, "\n") < 0)
1842                 return (-1);
1843
1844         if (format == NULL)
1845                 format = "%s";
1846
1847         if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
1848                 indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];
1849         else
1850                 indent = _dtrace_stkindent;
1851
1852         /*
1853          * Ultimately, we need to add an entry point in the library vector for
1854          * determining <symbol, offset> from <pid, address>.  For now, if
1855          * this is a vector open, we just print the raw address or string.
1856          */
1857         if (dtp->dt_vector == NULL)
1858                 P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
1859         else
1860                 P = NULL;
1861
1862         if (P != NULL)
1863                 dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
1864
1865         for (i = 0; i < depth && pc[i] != 0; i++) {
1866                 const prmap_t *map;
1867
1868                 if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)
1869                         break;
1870
1871                 if (P != NULL && Plookup_by_addr(P, pc[i],
1872                     name, sizeof (name), &sym) == 0) {
1873                         (void) Pobjname(P, pc[i], objname, sizeof (objname));
1874
1875                         if (pc[i] > sym.st_value) {
1876                                 (void) snprintf(c, sizeof (c),
1877                                     "%s`%s+0x%llx", dt_basename(objname), name,
1878                                     (u_longlong_t)(pc[i] - sym.st_value));
1879                         } else {
1880                                 (void) snprintf(c, sizeof (c),
1881                                     "%s`%s", dt_basename(objname), name);
1882                         }
1883                 } else if (str != NULL && str[0] != '\0' && str[0] != '@' &&
1884                     (P != NULL && ((map = Paddr_to_map(P, pc[i])) == NULL ||
1885                     (map->pr_mflags & MA_WRITE)))) {
1886                         /*
1887                          * If the current string pointer in the string table
1888                          * does not point to an empty string _and_ the program
1889                          * counter falls in a writable region, we'll use the
1890                          * string from the string table instead of the raw
1891                          * address.  This last condition is necessary because
1892                          * some (broken) ustack helpers will return a string
1893                          * even for a program counter that they can't
1894                          * identify.  If we have a string for a program
1895                          * counter that falls in a segment that isn't
1896                          * writable, we assume that we have fallen into this
1897                          * case and we refuse to use the string.
1898                          */
1899                         (void) snprintf(c, sizeof (c), "%s", str);
1900                 } else {
1901                         if (P != NULL && Pobjname(P, pc[i], objname,
1902                             sizeof (objname)) != 0) {
1903                                 (void) snprintf(c, sizeof (c), "%s`0x%llx",
1904                                     dt_basename(objname), (u_longlong_t)pc[i]);
1905                         } else {
1906                                 (void) snprintf(c, sizeof (c), "0x%llx",
1907                                     (u_longlong_t)pc[i]);
1908                         }
1909                 }
1910
1911                 if ((err = dt_printf(dtp, fp, format, c)) < 0)
1912                         break;
1913
1914                 if ((err = dt_printf(dtp, fp, "\n")) < 0)
1915                         break;
1916
1917                 if (str != NULL && str[0] == '@') {
1918                         /*
1919                          * If the first character of the string is an "at" sign,
1920                          * then the string is inferred to be an annotation --
1921                          * and it is printed out beneath the frame and offset
1922                          * with brackets.
1923                          */
1924                         if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)
1925                                 break;
1926
1927                         (void) snprintf(c, sizeof (c), "  [ %s ]", &str[1]);
1928
1929                         if ((err = dt_printf(dtp, fp, format, c)) < 0)
1930                                 break;
1931
1932                         if ((err = dt_printf(dtp, fp, "\n")) < 0)
1933                                 break;
1934                 }
1935
1936                 if (str != NULL) {
1937                         str += strlen(str) + 1;
1938                         if (str - strbase >= strsize)
1939                                 str = NULL;
1940                 }
1941         }
1942
1943         if (P != NULL) {
1944                 dt_proc_unlock(dtp, P);
1945                 dt_proc_release(dtp, P);
1946         }
1947
1948         return (err);
1949 }
1950
1951 static int
1952 dt_format_usym(dtrace_hdl_t *dtp, caddr_t addr, dtrace_actkind_t act)
1953 {
1954         uint64_t pid = ((uint64_t *)addr)[0];
1955         uint64_t pc = ((uint64_t *)addr)[1];
1956         char *s;
1957         int n, len = 256;
1958
1959         if (act == DTRACEACT_USYM && dtp->dt_vector == NULL) {
1960                 struct ps_prochandle *P;
1961
1962                 if ((P = dt_proc_grab(dtp, pid,
1963                     PGRAB_RDONLY | PGRAB_FORCE, 0)) != NULL) {
1964                         GElf_Sym sym;
1965
1966                         dt_proc_lock(dtp, P);
1967
1968                         if (Plookup_by_addr(P, pc, NULL, 0, &sym) == 0)
1969                                 pc = sym.st_value;
1970
1971                         dt_proc_unlock(dtp, P);
1972                         dt_proc_release(dtp, P);
1973                 }
1974         }
1975
1976         do {
1977                 n = len;
1978                 s = alloca(n);
1979         } while ((len = dtrace_uaddr2str(dtp, pid, pc, s, n)) > n);
1980
1981         xo_emit("{:usym/%s}", s);
1982         return (0);
1983 }
1984
1985
1986 static int
1987 dt_print_usym(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr, dtrace_actkind_t act)
1988 {
1989         /* LINTED - alignment */
1990         uint64_t pid = ((uint64_t *)addr)[0];
1991         /* LINTED - alignment */
1992         uint64_t pc = ((uint64_t *)addr)[1];
1993         const char *format = "  %-50s";
1994         char *s;
1995         int n, len = 256;
1996
1997         if (act == DTRACEACT_USYM && dtp->dt_vector == NULL) {
1998                 struct ps_prochandle *P;
1999
2000                 if ((P = dt_proc_grab(dtp, pid,
2001                     PGRAB_RDONLY | PGRAB_FORCE, 0)) != NULL) {
2002                         GElf_Sym sym;
2003
2004                         dt_proc_lock(dtp, P);
2005
2006                         if (Plookup_by_addr(P, pc, NULL, 0, &sym) == 0)
2007                                 pc = sym.st_value;
2008
2009                         dt_proc_unlock(dtp, P);
2010                         dt_proc_release(dtp, P);
2011                 }
2012         }
2013
2014         do {
2015                 n = len;
2016                 s = alloca(n);
2017         } while ((len = dtrace_uaddr2str(dtp, pid, pc, s, n)) > n);
2018
2019         return (dt_printf(dtp, fp, format, s));
2020 }
2021
2022 int
2023 dt_format_umod(dtrace_hdl_t *dtp, caddr_t addr)
2024 {
2025         uint64_t pid = ((uint64_t *)addr)[0];
2026         uint64_t pc = ((uint64_t *)addr)[1];
2027         int err = 0;
2028
2029         char objname[PATH_MAX];
2030         struct ps_prochandle *P;
2031
2032         /*
2033          * See the comment in dt_print_ustack() for the rationale for
2034          * printing raw addresses in the vectored case.
2035          */
2036         if (dtp->dt_vector == NULL)
2037                 P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
2038         else
2039                 P = NULL;
2040
2041         if (P != NULL)
2042                 dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
2043
2044         if (P != NULL && Pobjname(P, pc, objname, sizeof (objname)) != 0) {
2045                 xo_emit("{:umod/%s}", dt_basename(objname));
2046         } else {
2047                 xo_emit("{:umod/0x%llx}", (u_longlong_t)pc);
2048         }
2049
2050         if (P != NULL) {
2051                 dt_proc_unlock(dtp, P);
2052                 dt_proc_release(dtp, P);
2053         }
2054
2055         return (0);
2056 }
2057
2058 int
2059 dt_print_umod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
2060 {
2061         /* LINTED - alignment */
2062         uint64_t pid = ((uint64_t *)addr)[0];
2063         /* LINTED - alignment */
2064         uint64_t pc = ((uint64_t *)addr)[1];
2065         int err = 0;
2066
2067         char objname[PATH_MAX], c[PATH_MAX * 2];
2068         struct ps_prochandle *P;
2069
2070         if (format == NULL)
2071                 format = "  %-50s";
2072
2073         /*
2074          * See the comment in dt_print_ustack() for the rationale for
2075          * printing raw addresses in the vectored case.
2076          */
2077         if (dtp->dt_vector == NULL)
2078                 P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
2079         else
2080                 P = NULL;
2081
2082         if (P != NULL)
2083                 dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
2084
2085         if (P != NULL && Pobjname(P, pc, objname, sizeof (objname)) != 0) {
2086                 (void) snprintf(c, sizeof (c), "%s", dt_basename(objname));
2087         } else {
2088                 (void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc);
2089         }
2090
2091         err = dt_printf(dtp, fp, format, c);
2092
2093         if (P != NULL) {
2094                 dt_proc_unlock(dtp, P);
2095                 dt_proc_release(dtp, P);
2096         }
2097
2098         return (err);
2099 }
2100
2101 static int
2102 dt_print_sym(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
2103 {
2104         /* LINTED - alignment */
2105         uint64_t pc = *((uint64_t *)addr);
2106         dtrace_syminfo_t dts;
2107         GElf_Sym sym;
2108         char c[PATH_MAX * 2];
2109
2110         if (format == NULL)
2111                 format = "  %-50s";
2112
2113         if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {
2114                 if (dtp->dt_oformat)
2115                         xo_emit("{:sym/%s`%s} {:object/%s} {:name/%s}",
2116                             dts.dts_object, dts.dts_name, dts.dts_object,
2117                             dts.dts_name);
2118                 else
2119                         (void) snprintf(c, sizeof (c), "%s`%s",
2120                             dts.dts_object, dts.dts_name);
2121         } else {
2122                 /*
2123                  * We'll repeat the lookup, but this time we'll specify a
2124                  * NULL GElf_Sym -- indicating that we're only interested in
2125                  * the containing module.
2126                  */
2127                 if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
2128                         if (dtp->dt_oformat)
2129                                 xo_emit("{:sym/%s`0x%llx} {:object/%s} "
2130                                         "{:offset/0x%llx}",
2131                                     dts.dts_object, (u_longlong_t)pc,
2132                                     dts.dts_object, (u_longlong_t)pc);
2133                         else
2134                                 (void) snprintf(c, sizeof (c), "%s`0x%llx",
2135                                     dts.dts_object, (u_longlong_t)pc);
2136                 } else {
2137                         if (dtp->dt_oformat)
2138                                 xo_emit("{:sym/0x%llx} {:offset/0x%llx}",
2139                                     (u_longlong_t)pc, (u_longlong_t)pc);
2140                         else
2141                                 (void) snprintf(c, sizeof (c), "0x%llx",
2142                                     (u_longlong_t)pc);
2143                 }
2144         }
2145
2146         if (dtp->dt_oformat != 0 && dt_printf(dtp, fp, format, c) < 0)
2147                 return (-1);
2148
2149         return (0);
2150 }
2151
2152 int
2153 dt_format_mod(dtrace_hdl_t *dtp, caddr_t addr)
2154 {
2155         /* LINTED - alignment */
2156         uint64_t pc = *((uint64_t *)addr);
2157         dtrace_syminfo_t dts;
2158
2159         if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
2160                 xo_emit("{:mod/%s}", dts.dts_object);
2161         } else {
2162                 xo_emit("{:mod/0x%llx}", (u_longlong_t)pc);
2163         }
2164
2165         return (0);
2166 }
2167
2168 int
2169 dt_print_mod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
2170 {
2171         /* LINTED - alignment */
2172         uint64_t pc = *((uint64_t *)addr);
2173         dtrace_syminfo_t dts;
2174         char c[PATH_MAX * 2];
2175
2176         if (format == NULL)
2177                 format = "  %-50s";
2178
2179         if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
2180                 (void) snprintf(c, sizeof (c), "%s", dts.dts_object);
2181         } else {
2182                 (void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc);
2183         }
2184
2185         if (dt_printf(dtp, fp, format, c) < 0)
2186                 return (-1);
2187
2188         return (0);
2189 }
2190
2191 static char *
2192 dt_format_bytes_get(dtrace_hdl_t *dtp, caddr_t addr, size_t nbytes)
2193 {
2194         char *s = dt_alloc(dtp, nbytes * 2 + 2 + 1); /* 2 bytes per byte + 0x + '\0' */
2195         char t[6];
2196         char *c = (char *)addr;
2197         size_t i, j;
2198
2199         if (s == NULL)
2200                 return (NULL);
2201
2202         /*
2203          * XXX: Some duplication with dt_print_bytes().
2204          */
2205         for (i = 0; i < nbytes; i++) {
2206                 if (isprint(c[i]) || isspace(c[i]) || c[i] == '\b' || c[i] == '\a')
2207                         continue;
2208
2209                 if (c[i] == '\0' && i > 0) {
2210                         for (j = i + 1; j < nbytes; j++) {
2211                                 if (c[j] != '\0')
2212                                         break;
2213                         }
2214
2215                         if (j != nbytes)
2216                                 break;
2217
2218                         memcpy(s, c, nbytes);
2219                         return (s);
2220                 }
2221
2222                 break;
2223         }
2224
2225         if (i == nbytes) {
2226                 memcpy(s, c, nbytes);
2227                 s[nbytes] = '\0';
2228                 return (s);
2229         }
2230
2231         s[0] = '0';
2232         s[1] = 'x';
2233         for (i = 0; i < nbytes; i++) {
2234                 snprintf(t, sizeof(t), "%02x", (uchar_t)c[i]);
2235                 memcpy(s + (i * 2) + 2, t, 2);
2236         }
2237
2238         s[nbytes * 2 + 2] = 0;
2239         return (s);
2240 }
2241
2242 static int
2243 dt_format_memory(dtrace_hdl_t *dtp, caddr_t addr)
2244 {
2245
2246         size_t nbytes = *((uintptr_t *) addr);
2247         char *s;
2248
2249         s = dt_format_bytes_get(dtp, addr + sizeof(uintptr_t), nbytes);
2250         if (s == NULL)
2251                 return (-1);
2252
2253         xo_emit("{:printm/%s}", s);
2254         dt_free(dtp, s);
2255
2256         return (0);
2257 }
2258
2259 static int
2260 dt_print_memory(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr)
2261 {
2262         int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
2263         size_t nbytes = *((uintptr_t *) addr);
2264
2265         return (dt_print_bytes(dtp, fp, addr + sizeof(uintptr_t),
2266             nbytes, 50, quiet, 1));
2267 }
2268
2269 typedef struct dt_normal {
2270         dtrace_aggvarid_t dtnd_id;
2271         uint64_t dtnd_normal;
2272 } dt_normal_t;
2273
2274 static int
2275 dt_normalize_agg(const dtrace_aggdata_t *aggdata, void *arg)
2276 {
2277         dt_normal_t *normal = arg;
2278         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2279         dtrace_aggvarid_t id = normal->dtnd_id;
2280
2281         if (agg->dtagd_nrecs == 0)
2282                 return (DTRACE_AGGWALK_NEXT);
2283
2284         if (agg->dtagd_varid != id)
2285                 return (DTRACE_AGGWALK_NEXT);
2286
2287         ((dtrace_aggdata_t *)aggdata)->dtada_normal = normal->dtnd_normal;
2288         return (DTRACE_AGGWALK_NORMALIZE);
2289 }
2290
2291 static int
2292 dt_normalize(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
2293 {
2294         dt_normal_t normal;
2295         caddr_t addr;
2296
2297         /*
2298          * We (should) have two records:  the aggregation ID followed by the
2299          * normalization value.
2300          */
2301         addr = base + rec->dtrd_offset;
2302
2303         if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))
2304                 return (dt_set_errno(dtp, EDT_BADNORMAL));
2305
2306         /* LINTED - alignment */
2307         normal.dtnd_id = *((dtrace_aggvarid_t *)addr);
2308         rec++;
2309
2310         if (rec->dtrd_action != DTRACEACT_LIBACT)
2311                 return (dt_set_errno(dtp, EDT_BADNORMAL));
2312
2313         if (rec->dtrd_arg != DT_ACT_NORMALIZE)
2314                 return (dt_set_errno(dtp, EDT_BADNORMAL));
2315
2316         addr = base + rec->dtrd_offset;
2317
2318         switch (rec->dtrd_size) {
2319         case sizeof (uint64_t):
2320                 /* LINTED - alignment */
2321                 normal.dtnd_normal = *((uint64_t *)addr);
2322                 break;
2323         case sizeof (uint32_t):
2324                 /* LINTED - alignment */
2325                 normal.dtnd_normal = *((uint32_t *)addr);
2326                 break;
2327         case sizeof (uint16_t):
2328                 /* LINTED - alignment */
2329                 normal.dtnd_normal = *((uint16_t *)addr);
2330                 break;
2331         case sizeof (uint8_t):
2332                 normal.dtnd_normal = *((uint8_t *)addr);
2333                 break;
2334         default:
2335                 return (dt_set_errno(dtp, EDT_BADNORMAL));
2336         }
2337
2338         (void) dtrace_aggregate_walk(dtp, dt_normalize_agg, &normal);
2339
2340         return (0);
2341 }
2342
2343 static int
2344 dt_denormalize_agg(const dtrace_aggdata_t *aggdata, void *arg)
2345 {
2346         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2347         dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);
2348
2349         if (agg->dtagd_nrecs == 0)
2350                 return (DTRACE_AGGWALK_NEXT);
2351
2352         if (agg->dtagd_varid != id)
2353                 return (DTRACE_AGGWALK_NEXT);
2354
2355         return (DTRACE_AGGWALK_DENORMALIZE);
2356 }
2357
2358 static int
2359 dt_clear_agg(const dtrace_aggdata_t *aggdata, void *arg)
2360 {
2361         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2362         dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);
2363
2364         if (agg->dtagd_nrecs == 0)
2365                 return (DTRACE_AGGWALK_NEXT);
2366
2367         if (agg->dtagd_varid != id)
2368                 return (DTRACE_AGGWALK_NEXT);
2369
2370         return (DTRACE_AGGWALK_CLEAR);
2371 }
2372
2373 typedef struct dt_trunc {
2374         dtrace_aggvarid_t dttd_id;
2375         uint64_t dttd_remaining;
2376 } dt_trunc_t;
2377
2378 static int
2379 dt_trunc_agg(const dtrace_aggdata_t *aggdata, void *arg)
2380 {
2381         dt_trunc_t *trunc = arg;
2382         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2383         dtrace_aggvarid_t id = trunc->dttd_id;
2384
2385         if (agg->dtagd_nrecs == 0)
2386                 return (DTRACE_AGGWALK_NEXT);
2387
2388         if (agg->dtagd_varid != id)
2389                 return (DTRACE_AGGWALK_NEXT);
2390
2391         if (trunc->dttd_remaining == 0)
2392                 return (DTRACE_AGGWALK_REMOVE);
2393
2394         trunc->dttd_remaining--;
2395         return (DTRACE_AGGWALK_NEXT);
2396 }
2397
2398 static int
2399 dt_trunc(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
2400 {
2401         dt_trunc_t trunc;
2402         caddr_t addr;
2403         int64_t remaining;
2404         int (*func)(dtrace_hdl_t *, dtrace_aggregate_f *, void *);
2405
2406         /*
2407          * We (should) have two records:  the aggregation ID followed by the
2408          * number of aggregation entries after which the aggregation is to be
2409          * truncated.
2410          */
2411         addr = base + rec->dtrd_offset;
2412
2413         if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))
2414                 return (dt_set_errno(dtp, EDT_BADTRUNC));
2415
2416         /* LINTED - alignment */
2417         trunc.dttd_id = *((dtrace_aggvarid_t *)addr);
2418         rec++;
2419
2420         if (rec->dtrd_action != DTRACEACT_LIBACT)
2421                 return (dt_set_errno(dtp, EDT_BADTRUNC));
2422
2423         if (rec->dtrd_arg != DT_ACT_TRUNC)
2424                 return (dt_set_errno(dtp, EDT_BADTRUNC));
2425
2426         addr = base + rec->dtrd_offset;
2427
2428         switch (rec->dtrd_size) {
2429         case sizeof (uint64_t):
2430                 /* LINTED - alignment */
2431                 remaining = *((int64_t *)addr);
2432                 break;
2433         case sizeof (uint32_t):
2434                 /* LINTED - alignment */
2435                 remaining = *((int32_t *)addr);
2436                 break;
2437         case sizeof (uint16_t):
2438                 /* LINTED - alignment */
2439                 remaining = *((int16_t *)addr);
2440                 break;
2441         case sizeof (uint8_t):
2442                 remaining = *((int8_t *)addr);
2443                 break;
2444         default:
2445                 return (dt_set_errno(dtp, EDT_BADNORMAL));
2446         }
2447
2448         if (remaining < 0) {
2449                 func = dtrace_aggregate_walk_valsorted;
2450                 remaining = -remaining;
2451         } else {
2452                 func = dtrace_aggregate_walk_valrevsorted;
2453         }
2454
2455         assert(remaining >= 0);
2456         trunc.dttd_remaining = remaining;
2457
2458         (void) func(dtp, dt_trunc_agg, &trunc);
2459
2460         return (0);
2461 }
2462
2463 static int
2464 dt_format_datum(dtrace_hdl_t *dtp, dtrace_recdesc_t *rec, caddr_t addr,
2465     size_t size, const dtrace_aggdata_t *aggdata, uint64_t normal,
2466     dt_print_aggdata_t *pd)
2467 {
2468         dtrace_actkind_t act = rec->dtrd_action;
2469         boolean_t packed = pd->dtpa_agghist || pd->dtpa_aggpack;
2470         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2471         char fmt[512];
2472         char *s;
2473
2474         if (packed && pd->dtpa_agghisthdr != agg->dtagd_varid)
2475                 pd->dtpa_agghisthdr = agg->dtagd_varid;
2476
2477         switch (act) {
2478         case DTRACEACT_STACK:
2479                 return (dt_format_stack(dtp, addr, rec->dtrd_arg,
2480                     rec->dtrd_size / rec->dtrd_arg));
2481
2482         case DTRACEACT_USTACK:
2483         case DTRACEACT_JSTACK:
2484                 return (dt_format_ustack(dtp, addr, rec->dtrd_arg));
2485
2486         case DTRACEACT_USYM:
2487         case DTRACEACT_UADDR:
2488                 return (dt_format_usym(dtp, addr, act));
2489
2490         case DTRACEACT_UMOD:
2491                 return (dt_format_umod(dtp, addr));
2492
2493         case DTRACEACT_SYM:
2494                 return (dt_format_sym(dtp, addr));
2495         case DTRACEACT_MOD:
2496                 return (dt_format_mod(dtp, addr));
2497
2498         case DTRACEAGG_QUANTIZE:
2499                 return (dt_format_quantize(dtp, addr, size, normal));
2500
2501         case DTRACEAGG_LQUANTIZE:
2502                 return (dt_format_lquantize(dtp, addr, size, normal));
2503
2504         case DTRACEAGG_LLQUANTIZE:
2505                 return (dt_format_llquantize(dtp, addr, size, normal));
2506
2507         case DTRACEAGG_AVG:
2508                 return (dt_format_average(dtp, addr, size, normal));
2509
2510         case DTRACEAGG_STDDEV:
2511                 return (dt_format_stddev(dtp, addr, size, normal));
2512
2513         default:
2514                 break;
2515         }
2516
2517         switch (size) {
2518         case sizeof (uint64_t):
2519                 snprintf(fmt, sizeof(fmt), "{:%s/%%lld}", pd->dtpa_keyname);
2520                 xo_emit(fmt, (long long)*((uint64_t *)addr) / normal);
2521                 break;
2522         case sizeof (uint32_t):
2523                 snprintf(fmt, sizeof(fmt), "{:%s/%%d}", pd->dtpa_keyname);
2524                 xo_emit(fmt, *((uint32_t *)addr) / (uint32_t)normal);
2525                 break;
2526         case sizeof (uint16_t):
2527                 snprintf(fmt, sizeof(fmt), "{:%s/%%d}", pd->dtpa_keyname);
2528                 xo_emit(fmt, *((uint16_t *)addr) / (uint32_t)normal);
2529                 break;
2530         case sizeof (uint8_t):
2531                 snprintf(fmt, sizeof(fmt), "{:%s/%%d}", pd->dtpa_keyname);
2532                 xo_emit(fmt, *((uint8_t *)addr) / (uint32_t)normal);
2533                 break;
2534         default:
2535                 s = dt_format_bytes_get(dtp, addr, size);
2536                 if (s == NULL)
2537                         return (-1);
2538
2539                 xo_emit("{:value/%s}", s);
2540                 dt_free(dtp, s);
2541                 break;
2542         }
2543
2544         return (0);
2545 }
2546
2547 static int
2548 dt_print_datum(dtrace_hdl_t *dtp, FILE *fp, dtrace_recdesc_t *rec,
2549     caddr_t addr, size_t size, const dtrace_aggdata_t *aggdata,
2550     uint64_t normal, dt_print_aggdata_t *pd)
2551 {
2552         int err, width;
2553         dtrace_actkind_t act = rec->dtrd_action;
2554         boolean_t packed = pd->dtpa_agghist || pd->dtpa_aggpack;
2555         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2556
2557         static struct {
2558                 size_t size;
2559                 int width;
2560                 int packedwidth;
2561         } *fmt, fmttab[] = {
2562                 { sizeof (uint8_t),     3,      3 },
2563                 { sizeof (uint16_t),    5,      5 },
2564                 { sizeof (uint32_t),    8,      8 },
2565                 { sizeof (uint64_t),    16,     16 },
2566                 { 0,                    -50,    16 }
2567         };
2568
2569         if (packed && pd->dtpa_agghisthdr != agg->dtagd_varid) {
2570                 dtrace_recdesc_t *r;
2571
2572                 width = 0;
2573
2574                 /*
2575                  * To print our quantization header for either an agghist or
2576                  * aggpack aggregation, we need to iterate through all of our
2577                  * of our records to determine their width.
2578                  */
2579                 for (r = rec; !DTRACEACT_ISAGG(r->dtrd_action); r++) {
2580                         for (fmt = fmttab; fmt->size &&
2581                             fmt->size != r->dtrd_size; fmt++)
2582                                 continue;
2583
2584                         width += fmt->packedwidth + 1;
2585                 }
2586
2587                 if (pd->dtpa_agghist) {
2588                         if (dt_print_quanthdr(dtp, fp, width) < 0)
2589                                 return (-1);
2590                 } else {
2591                         if (dt_print_quanthdr_packed(dtp, fp,
2592                             width, aggdata, r->dtrd_action) < 0)
2593                                 return (-1);
2594                 }
2595
2596                 pd->dtpa_agghisthdr = agg->dtagd_varid;
2597         }
2598
2599         if (pd->dtpa_agghist && DTRACEACT_ISAGG(act)) {
2600                 char positives = aggdata->dtada_flags & DTRACE_A_HASPOSITIVES;
2601                 char negatives = aggdata->dtada_flags & DTRACE_A_HASNEGATIVES;
2602                 int64_t val;
2603
2604                 assert(act == DTRACEAGG_SUM || act == DTRACEAGG_COUNT);
2605                 val = (long long)*((uint64_t *)addr);
2606
2607                 if (dt_printf(dtp, fp, " ") < 0)
2608                         return (-1);
2609
2610                 return (dt_print_quantline(dtp, fp, val, normal,
2611                     aggdata->dtada_total, positives, negatives));
2612         }
2613
2614         if (pd->dtpa_aggpack && DTRACEACT_ISAGG(act)) {
2615                 switch (act) {
2616                 case DTRACEAGG_QUANTIZE:
2617                         return (dt_print_quantize_packed(dtp,
2618                             fp, addr, size, aggdata));
2619                 case DTRACEAGG_LQUANTIZE:
2620                         return (dt_print_lquantize_packed(dtp,
2621                             fp, addr, size, aggdata));
2622                 default:
2623                         break;
2624                 }
2625         }
2626
2627         switch (act) {
2628         case DTRACEACT_STACK:
2629                 return (dt_print_stack(dtp, fp, NULL, addr,
2630                     rec->dtrd_arg, rec->dtrd_size / rec->dtrd_arg));
2631
2632         case DTRACEACT_USTACK:
2633         case DTRACEACT_JSTACK:
2634                 return (dt_print_ustack(dtp, fp, NULL, addr, rec->dtrd_arg));
2635
2636         case DTRACEACT_USYM:
2637         case DTRACEACT_UADDR:
2638                 return (dt_print_usym(dtp, fp, addr, act));
2639
2640         case DTRACEACT_UMOD:
2641                 return (dt_print_umod(dtp, fp, NULL, addr));
2642
2643         case DTRACEACT_SYM:
2644                 return (dt_print_sym(dtp, fp, NULL, addr));
2645
2646         case DTRACEACT_MOD:
2647                 return (dt_print_mod(dtp, fp, NULL, addr));
2648
2649         case DTRACEAGG_QUANTIZE:
2650                 return (dt_print_quantize(dtp, fp, addr, size, normal));
2651
2652         case DTRACEAGG_LQUANTIZE:
2653                 return (dt_print_lquantize(dtp, fp, addr, size, normal));
2654
2655         case DTRACEAGG_LLQUANTIZE:
2656                 return (dt_print_llquantize(dtp, fp, addr, size, normal));
2657
2658         case DTRACEAGG_AVG:
2659                 return (dt_print_average(dtp, fp, addr, size, normal));
2660
2661         case DTRACEAGG_STDDEV:
2662                 return (dt_print_stddev(dtp, fp, addr, size, normal));
2663
2664         default:
2665                 break;
2666         }
2667
2668         for (fmt = fmttab; fmt->size && fmt->size != size; fmt++)
2669                 continue;
2670
2671         width = packed ? fmt->packedwidth : fmt->width;
2672
2673         switch (size) {
2674         case sizeof (uint64_t):
2675                 err = dt_printf(dtp, fp, " %*lld", width,
2676                     /* LINTED - alignment */
2677                     (long long)*((uint64_t *)addr) / normal);
2678                 break;
2679         case sizeof (uint32_t):
2680                 /* LINTED - alignment */
2681                 err = dt_printf(dtp, fp, " %*d", width, *((uint32_t *)addr) /
2682                     (uint32_t)normal);
2683                 break;
2684         case sizeof (uint16_t):
2685                 /* LINTED - alignment */
2686                 err = dt_printf(dtp, fp, " %*d", width, *((uint16_t *)addr) /
2687                     (uint32_t)normal);
2688                 break;
2689         case sizeof (uint8_t):
2690                 err = dt_printf(dtp, fp, " %*d", width, *((uint8_t *)addr) /
2691                     (uint32_t)normal);
2692                 break;
2693         default:
2694                 err = dt_print_bytes(dtp, fp, addr, size, width, 0, 0);
2695                 break;
2696         }
2697
2698         return (err);
2699 }
2700
2701 int
2702 dt_format_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
2703 {
2704         int i, aggact = 0;
2705         dt_print_aggdata_t *pd = arg;
2706         const dtrace_aggdata_t *aggdata = aggsdata[0];
2707         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2708         dtrace_hdl_t *dtp = pd->dtpa_dtp;
2709         dtrace_recdesc_t *rec;
2710         dtrace_actkind_t act;
2711         caddr_t addr;
2712         size_t size;
2713
2714         if (pd->dtpa_aggname == NULL)
2715                 pd->dtpa_aggname = agg->dtagd_name;
2716
2717         xo_open_instance("aggregation-data");
2718         strcpy(pd->dtpa_keyname, "value");
2719         xo_open_list("keys");
2720
2721         /*
2722          * Iterate over each record description in the key, printing the traced
2723          * data, skipping the first datum (the tuple member created by the
2724          * compiler).
2725          */
2726         for (i = 1; i < agg->dtagd_nrecs; i++) {
2727                 rec = &agg->dtagd_rec[i];
2728                 act = rec->dtrd_action;
2729                 addr = aggdata->dtada_data + rec->dtrd_offset;
2730                 size = rec->dtrd_size;
2731
2732                 if (DTRACEACT_ISAGG(act)) {
2733                         aggact = i;
2734                         break;
2735                 }
2736
2737                 xo_open_instance("keys");
2738                 if (dt_format_datum(dtp, rec, addr,
2739                     size, aggdata, 1, pd) < 0) {
2740                         xo_close_instance("keys");
2741                         xo_close_instance("aggregation-data");
2742                         return (-1);
2743                 }
2744                 xo_close_instance("keys");
2745
2746                 if (dt_buffered_flush(dtp, NULL, rec, aggdata,
2747                     DTRACE_BUFDATA_AGGKEY) < 0) {
2748                         xo_close_instance("aggregation-data");
2749                         return (-1);
2750                 }
2751         }
2752         xo_close_list("keys");
2753
2754         assert(aggact != 0);
2755
2756         for (i = (naggvars == 1 ? 0 : 1); i < naggvars; i++) {
2757                 uint64_t normal;
2758
2759                 aggdata = aggsdata[i];
2760                 agg = aggdata->dtada_desc;
2761                 rec = &agg->dtagd_rec[aggact];
2762                 act = rec->dtrd_action;
2763                 addr = aggdata->dtada_data + rec->dtrd_offset;
2764                 size = rec->dtrd_size;
2765
2766                 assert(DTRACEACT_ISAGG(act));
2767
2768                 switch (act) {
2769                 case DTRACEAGG_MIN:
2770                         strcpy(pd->dtpa_keyname, "min");
2771                         break;
2772                 case DTRACEAGG_MAX:
2773                         strcpy(pd->dtpa_keyname, "max");
2774                         break;
2775                 case DTRACEAGG_COUNT:
2776                         strcpy(pd->dtpa_keyname, "count");
2777                         break;
2778                 case DTRACEAGG_SUM:
2779                         strcpy(pd->dtpa_keyname, "sum");
2780                         break;
2781                 default:
2782                         strcpy(pd->dtpa_keyname, "UNKNOWN");
2783                         break;
2784                 }
2785
2786                 normal = aggdata->dtada_normal;
2787
2788                 if (dt_format_datum(dtp, rec, addr, size,
2789                     aggdata, normal, pd) < 0) {
2790                         xo_close_instance("aggregation-data");
2791                         return (-1);
2792                 }
2793
2794                 if (dt_buffered_flush(dtp, NULL, rec, aggdata,
2795                     DTRACE_BUFDATA_AGGVAL) < 0) {
2796                         xo_close_instance("aggregation-data");
2797                         return (-1);
2798                 }
2799
2800                 if (!pd->dtpa_allunprint)
2801                         agg->dtagd_flags |= DTRACE_AGD_PRINTED;
2802         }
2803
2804         if (dt_buffered_flush(dtp, NULL, NULL, aggdata,
2805             DTRACE_BUFDATA_AGGFORMAT | DTRACE_BUFDATA_AGGLAST) < 0) {
2806                 xo_close_instance("aggregation-data");
2807                 return (-1);
2808         }
2809
2810         xo_close_instance("aggregation-data");
2811         return (0);
2812 }
2813
2814 int
2815 dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
2816 {
2817         int i, aggact = 0;
2818         dt_print_aggdata_t *pd = arg;
2819         const dtrace_aggdata_t *aggdata = aggsdata[0];
2820         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2821         FILE *fp = pd->dtpa_fp;
2822         dtrace_hdl_t *dtp = pd->dtpa_dtp;
2823         dtrace_recdesc_t *rec;
2824         dtrace_actkind_t act;
2825         caddr_t addr;
2826         size_t size;
2827
2828         pd->dtpa_agghist = (aggdata->dtada_flags & DTRACE_A_TOTAL);
2829         pd->dtpa_aggpack = (aggdata->dtada_flags & DTRACE_A_MINMAXBIN);
2830
2831         /*
2832          * Iterate over each record description in the key, printing the traced
2833          * data, skipping the first datum (the tuple member created by the
2834          * compiler).
2835          */
2836         for (i = 1; i < agg->dtagd_nrecs; i++) {
2837                 rec = &agg->dtagd_rec[i];
2838                 act = rec->dtrd_action;
2839                 addr = aggdata->dtada_data + rec->dtrd_offset;
2840                 size = rec->dtrd_size;
2841
2842                 if (DTRACEACT_ISAGG(act)) {
2843                         aggact = i;
2844                         break;
2845                 }
2846
2847                 if (dt_print_datum(dtp, fp, rec, addr,
2848                     size, aggdata, 1, pd) < 0)
2849                         return (-1);
2850
2851                 if (dt_buffered_flush(dtp, NULL, rec, aggdata,
2852                     DTRACE_BUFDATA_AGGKEY) < 0)
2853                         return (-1);
2854         }
2855
2856         assert(aggact != 0);
2857
2858         for (i = (naggvars == 1 ? 0 : 1); i < naggvars; i++) {
2859                 uint64_t normal;
2860
2861                 aggdata = aggsdata[i];
2862                 agg = aggdata->dtada_desc;
2863                 rec = &agg->dtagd_rec[aggact];
2864                 act = rec->dtrd_action;
2865                 addr = aggdata->dtada_data + rec->dtrd_offset;
2866                 size = rec->dtrd_size;
2867
2868                 assert(DTRACEACT_ISAGG(act));
2869                 normal = aggdata->dtada_normal;
2870
2871                 if (dt_print_datum(dtp, fp, rec, addr,
2872                     size, aggdata, normal, pd) < 0)
2873                         return (-1);
2874
2875                 if (dt_buffered_flush(dtp, NULL, rec, aggdata,
2876                     DTRACE_BUFDATA_AGGVAL) < 0)
2877                         return (-1);
2878
2879                 if (!pd->dtpa_allunprint)
2880                         agg->dtagd_flags |= DTRACE_AGD_PRINTED;
2881         }
2882
2883         if (!pd->dtpa_agghist && !pd->dtpa_aggpack) {
2884                 if (dt_printf(dtp, fp, "\n") < 0)
2885                         return (-1);
2886         }
2887
2888         if (dt_buffered_flush(dtp, NULL, NULL, aggdata,
2889             DTRACE_BUFDATA_AGGFORMAT | DTRACE_BUFDATA_AGGLAST) < 0)
2890                 return (-1);
2891
2892         return (0);
2893 }
2894
2895 int
2896 dt_format_agg(const dtrace_aggdata_t *aggdata, void *arg)
2897 {
2898         dt_print_aggdata_t *pd = arg;
2899         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2900         dtrace_aggvarid_t aggvarid = pd->dtpa_id;
2901
2902         if (pd->dtpa_allunprint) {
2903                 if (agg->dtagd_flags & DTRACE_AGD_PRINTED)
2904                         return (0);
2905         } else {
2906                 /*
2907                  * If we're not printing all unprinted aggregations, then the
2908                  * aggregation variable ID denotes a specific aggregation
2909                  * variable that we should print -- skip any other aggregations
2910                  * that we encounter.
2911                  */
2912                 if (agg->dtagd_nrecs == 0)
2913                         return (0);
2914
2915                 if (aggvarid != agg->dtagd_varid)
2916                         return (0);
2917         }
2918
2919         return (dt_format_aggs(&aggdata, 1, arg));
2920 }
2921
2922 int
2923 dt_print_agg(const dtrace_aggdata_t *aggdata, void *arg)
2924 {
2925         dt_print_aggdata_t *pd = arg;
2926         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2927         dtrace_aggvarid_t aggvarid = pd->dtpa_id;
2928
2929         if (pd->dtpa_allunprint) {
2930                 if (agg->dtagd_flags & DTRACE_AGD_PRINTED)
2931                         return (0);
2932         } else {
2933                 /*
2934                  * If we're not printing all unprinted aggregations, then the
2935                  * aggregation variable ID denotes a specific aggregation
2936                  * variable that we should print -- skip any other aggregations
2937                  * that we encounter.
2938                  */
2939                 if (agg->dtagd_nrecs == 0)
2940                         return (0);
2941
2942                 if (aggvarid != agg->dtagd_varid)
2943                         return (0);
2944         }
2945
2946         return (dt_print_aggs(&aggdata, 1, arg));
2947 }
2948
2949 int
2950 dt_setopt(dtrace_hdl_t *dtp, const dtrace_probedata_t *data,
2951     const char *option, const char *value)
2952 {
2953         int len, rval;
2954         char *msg;
2955         const char *errstr;
2956         dtrace_setoptdata_t optdata;
2957
2958         bzero(&optdata, sizeof (optdata));
2959         (void) dtrace_getopt(dtp, option, &optdata.dtsda_oldval);
2960
2961         if (dtrace_setopt(dtp, option, value) == 0) {
2962                 (void) dtrace_getopt(dtp, option, &optdata.dtsda_newval);
2963                 optdata.dtsda_probe = data;
2964                 optdata.dtsda_option = option;
2965                 optdata.dtsda_handle = dtp;
2966
2967                 if ((rval = dt_handle_setopt(dtp, &optdata)) != 0)
2968                         return (rval);
2969
2970                 return (0);
2971         }
2972
2973         errstr = dtrace_errmsg(dtp, dtrace_errno(dtp));
2974         len = strlen(option) + strlen(value) + strlen(errstr) + 80;
2975         msg = alloca(len);
2976
2977         (void) snprintf(msg, len, "couldn't set option \"%s\" to \"%s\": %s\n",
2978             option, value, errstr);
2979
2980         if ((rval = dt_handle_liberr(dtp, data, msg)) == 0)
2981                 return (0);
2982
2983         return (rval);
2984 }
2985
2986 /*
2987  * Helper functions to help maintain style(9) in dt_consume_cpu().
2988  */
2989 static int
2990 dt_oformat_agg_sorted(dtrace_hdl_t *dtp, dtrace_aggregate_f *func,
2991     dt_print_aggdata_t *pd)
2992 {
2993         int r;
2994
2995         r = dtrace_aggregate_walk_sorted(dtp, dt_format_agg, pd);
2996         if (r < 0) {
2997                 xo_close_list("aggregation-data");
2998                 xo_emit("{:aggregation-name/%s}", pd->dtpa_aggname);
2999                 xo_close_instance("output");
3000         }
3001
3002         return (r);
3003 }
3004
3005 static void
3006 dt_oformat_agg_name(dt_print_aggdata_t *pd)
3007 {
3008
3009         xo_close_list("aggregation-data");
3010         xo_emit("{:aggregation-name/%s}", pd->dtpa_aggname);
3011 }
3012
3013 static int
3014 dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, int cpu,
3015     dtrace_bufdesc_t *buf, boolean_t just_one,
3016     dtrace_consume_probe_f *efunc, dtrace_consume_rec_f *rfunc, void *arg)
3017 {
3018         dtrace_epid_t id;
3019         size_t offs;
3020         int flow = (dtp->dt_options[DTRACEOPT_FLOWINDENT] != DTRACEOPT_UNSET);
3021         int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
3022         int rval, i, n;
3023         uint64_t tracememsize = 0;
3024         dtrace_probedata_t data;
3025         uint64_t drops;
3026         size_t skip_format;
3027
3028         bzero(&data, sizeof (data));
3029         data.dtpda_handle = dtp;
3030         data.dtpda_cpu = cpu;
3031         data.dtpda_flow = dtp->dt_flow;
3032         data.dtpda_indent = dtp->dt_indent;
3033         data.dtpda_prefix = dtp->dt_prefix;
3034
3035         for (offs = buf->dtbd_oldest; offs < buf->dtbd_size; ) {
3036                 dtrace_eprobedesc_t *epd;
3037
3038                 /*
3039                  * We're guaranteed to have an ID.
3040                  */
3041                 id = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
3042
3043                 if (id == DTRACE_EPIDNONE) {
3044                         /*
3045                          * This is filler to assure proper alignment of the
3046                          * next record; we simply ignore it.
3047                          */
3048                         offs += sizeof (id);
3049                         continue;
3050                 }
3051
3052                 if ((rval = dt_epid_lookup(dtp, id, &data.dtpda_edesc,
3053                     &data.dtpda_pdesc)) != 0)
3054                         return (rval);
3055
3056                 epd = data.dtpda_edesc;
3057                 data.dtpda_data = buf->dtbd_data + offs;
3058                 data.dtpda_timestamp = DTRACE_RECORD_LOAD_TIMESTAMP(
3059                     (struct dtrace_rechdr *)data.dtpda_data);
3060
3061                 if (data.dtpda_edesc->dtepd_uarg != DT_ECB_DEFAULT) {
3062                         rval = dt_handle(dtp, &data);
3063
3064                         if (rval == DTRACE_CONSUME_NEXT)
3065                                 goto nextepid;
3066
3067                         if (rval == DTRACE_CONSUME_ERROR)
3068                                 return (-1);
3069                 }
3070
3071                 if (flow)
3072                         (void) dt_flowindent(dtp, &data, dtp->dt_last_epid,
3073                             buf, offs);
3074
3075                 if (dtp->dt_oformat)
3076                         xo_open_instance("probes");
3077                 rval = (*efunc)(&data, arg);
3078
3079                 if (flow) {
3080                         if (data.dtpda_flow == DTRACEFLOW_ENTRY)
3081                                 data.dtpda_indent += 2;
3082                 }
3083
3084                 if (rval == DTRACE_CONSUME_NEXT)
3085                         goto nextepid;
3086
3087                 if (rval == DTRACE_CONSUME_ABORT)
3088                         return (dt_set_errno(dtp, EDT_DIRABORT));
3089
3090                 if (rval != DTRACE_CONSUME_THIS)
3091                         return (dt_set_errno(dtp, EDT_BADRVAL));
3092
3093                 skip_format = 0;
3094                 if (dtp->dt_oformat)
3095                         xo_open_list("output");
3096                 for (i = 0; i < epd->dtepd_nrecs; i++) {
3097                         caddr_t addr;
3098                         dtrace_recdesc_t *rec = &epd->dtepd_rec[i];
3099                         dtrace_actkind_t act = rec->dtrd_action;
3100
3101                         if (skip_format > 0)
3102                                 skip_format--;
3103
3104                         data.dtpda_data = buf->dtbd_data + offs +
3105                             rec->dtrd_offset;
3106                         addr = data.dtpda_data;
3107
3108                         if (act == DTRACEACT_LIBACT) {
3109                                 uint64_t arg = rec->dtrd_arg;
3110                                 dtrace_aggvarid_t id;
3111
3112                                 switch (arg) {
3113                                 case DT_ACT_CLEAR:
3114                                         /* LINTED - alignment */
3115                                         id = *((dtrace_aggvarid_t *)addr);
3116                                         (void) dtrace_aggregate_walk(dtp,
3117                                             dt_clear_agg, &id);
3118                                         continue;
3119
3120                                 case DT_ACT_DENORMALIZE:
3121                                         /* LINTED - alignment */
3122                                         id = *((dtrace_aggvarid_t *)addr);
3123                                         (void) dtrace_aggregate_walk(dtp,
3124                                             dt_denormalize_agg, &id);
3125                                         continue;
3126
3127                                 case DT_ACT_FTRUNCATE:
3128                                         if (fp == NULL)
3129                                                 continue;
3130
3131                                         (void) fflush(fp);
3132                                         (void) ftruncate(fileno(fp), 0);
3133                                         (void) fseeko(fp, 0, SEEK_SET);
3134                                         continue;
3135
3136                                 case DT_ACT_NORMALIZE:
3137                                         if (i == epd->dtepd_nrecs - 1)
3138                                                 return (dt_set_errno(dtp,
3139                                                     EDT_BADNORMAL));
3140
3141                                         if (dt_normalize(dtp,
3142                                             buf->dtbd_data + offs, rec) != 0)
3143                                                 return (-1);
3144
3145                                         i++;
3146                                         continue;
3147
3148                                 case DT_ACT_SETOPT: {
3149                                         uint64_t *opts = dtp->dt_options;
3150                                         dtrace_recdesc_t *valrec;
3151                                         uint32_t valsize;
3152                                         caddr_t val;
3153                                         int rv;
3154
3155                                         if (i == epd->dtepd_nrecs - 1) {
3156                                                 return (dt_set_errno(dtp,
3157                                                     EDT_BADSETOPT));
3158                                         }
3159
3160                                         valrec = &epd->dtepd_rec[++i];
3161                                         valsize = valrec->dtrd_size;
3162
3163                                         if (valrec->dtrd_action != act ||
3164                                             valrec->dtrd_arg != arg) {
3165                                                 return (dt_set_errno(dtp,
3166                                                     EDT_BADSETOPT));
3167                                         }
3168
3169                                         if (valsize > sizeof (uint64_t)) {
3170                                                 val = buf->dtbd_data + offs +
3171                                                     valrec->dtrd_offset;
3172                                         } else {
3173                                                 val = "1";
3174                                         }
3175
3176                                         rv = dt_setopt(dtp, &data, addr, val);
3177
3178                                         if (rv != 0)
3179                                                 return (-1);
3180
3181                                         flow = (opts[DTRACEOPT_FLOWINDENT] !=
3182                                             DTRACEOPT_UNSET);
3183                                         quiet = (opts[DTRACEOPT_QUIET] !=
3184                                             DTRACEOPT_UNSET);
3185
3186                                         continue;
3187                                 }
3188
3189                                 case DT_ACT_TRUNC:
3190                                         if (i == epd->dtepd_nrecs - 1)
3191                                                 return (dt_set_errno(dtp,
3192                                                     EDT_BADTRUNC));
3193
3194                                         if (dt_trunc(dtp,
3195                                             buf->dtbd_data + offs, rec) != 0)
3196                                                 return (-1);
3197
3198                                         i++;
3199                                         continue;
3200
3201                                 default:
3202                                         continue;
3203                                 }
3204                         }
3205
3206                         if (act == DTRACEACT_TRACEMEM_DYNSIZE &&
3207                             rec->dtrd_size == sizeof (uint64_t)) {
3208                                 /* LINTED - alignment */
3209                                 tracememsize = *((unsigned long long *)addr);
3210                                 continue;
3211                         }
3212
3213                         rval = (*rfunc)(&data, rec, arg);
3214
3215                         if (rval == DTRACE_CONSUME_NEXT)
3216                                 continue;
3217
3218                         if (rval == DTRACE_CONSUME_ABORT)
3219                                 return (dt_set_errno(dtp, EDT_DIRABORT));
3220
3221                         if (rval != DTRACE_CONSUME_THIS)
3222                                 return (dt_set_errno(dtp, EDT_BADRVAL));
3223
3224                         if (dtp->dt_oformat && rec->dtrd_size > 0)
3225                                 xo_open_instance("output");
3226                         if (act == DTRACEACT_STACK) {
3227                                 int depth = rec->dtrd_arg;
3228
3229                                 if (dtp->dt_oformat) {
3230                                         if (dt_format_stack(dtp, addr, depth,
3231                                             rec->dtrd_size / depth) < 0) {
3232                                                 xo_close_instance("output");
3233                                                 return (-1);
3234                                         }
3235                                 } else {
3236                                         if (dt_print_stack(dtp,
3237                                             fp, NULL, addr, depth,
3238                                             rec->dtrd_size / depth) < 0)
3239                                         return (-1);
3240                                 }
3241                                 goto nextrec;
3242                         }
3243
3244                         if (act == DTRACEACT_USTACK ||
3245                             act == DTRACEACT_JSTACK) {
3246                                 if (dtp->dt_oformat) {
3247                                         if (dt_format_ustack(dtp, addr,
3248                                             rec->dtrd_arg) < 0) {
3249                                                 xo_close_instance("output");
3250                                                 return (-1);
3251                                         }
3252                                 } else {
3253                                         if (dt_print_ustack(dtp, fp, NULL,
3254                                             addr, rec->dtrd_arg) < 0)
3255                                                 return (-1);
3256                                 }
3257                                 goto nextrec;
3258                         }
3259
3260                         if (act == DTRACEACT_SYM) {
3261                                 if (dtp->dt_oformat) {
3262                                         if (dt_format_sym(dtp, addr) < 0) {
3263                                                 xo_close_instance("output");
3264                                                 return (-1);
3265                                         }
3266                                 } else {
3267                                         if (dt_print_sym(dtp, fp, NULL, addr) < 0)
3268                                                 return (-1);
3269                                 }
3270                                 goto nextrec;
3271                         }
3272
3273                         if (act == DTRACEACT_MOD) {
3274                                 if (dtp->dt_oformat) {
3275                                         if (dt_format_mod(dtp, addr) < 0) {
3276                                                 xo_close_instance("output");
3277                                                 return (-1);
3278                                         }
3279                                 } else {
3280                                         if (dt_print_mod(dtp, fp, NULL, addr) < 0)
3281                                                 return (-1);
3282                                 }
3283                                 goto nextrec;
3284                         }
3285
3286                         if (act == DTRACEACT_USYM || act == DTRACEACT_UADDR) {
3287                                 if (dtp->dt_oformat) {
3288                                         if (dt_format_usym(dtp, addr, act) < 0) {
3289                                                 xo_close_instance("output");
3290                                                 return (-1);
3291                                         }
3292                                 } else {
3293                                         if (dt_print_usym(dtp, fp, addr, act) < 0)
3294                                                 return (-1);
3295                                 }
3296                                 goto nextrec;
3297                         }
3298
3299                         if (act == DTRACEACT_UMOD) {
3300                                 if (dtp->dt_oformat) {
3301                                         if (dt_format_umod(dtp, addr) < 0) {
3302                                                 xo_close_instance("output");
3303                                                 return (-1);
3304                                         }
3305                                 } else {
3306                                         if (dt_print_umod(dtp, fp, NULL, addr) < 0)
3307                                                 return (-1);
3308                                 }
3309                                 goto nextrec;
3310                         }
3311
3312                         if (act == DTRACEACT_PRINTM) {
3313                                 if (dtp->dt_oformat) {
3314                                         if (dt_format_memory(dtp, addr) < 0) {
3315                                                 xo_close_instance("output");
3316                                                 return (-1);
3317                                         }
3318                                 } else {
3319                                         if (dt_print_memory(dtp, fp, addr) < 0)
3320                                                 return (-1);
3321                                 }
3322                                 goto nextrec;
3323                         }
3324
3325                         if (dtp->dt_oformat == DTRACE_OFORMAT_TEXT &&
3326                             DTRACEACT_ISPRINTFLIKE(act)) {
3327                                 void *fmtdata;
3328                                 int (*func)(dtrace_hdl_t *, FILE *, void *,
3329                                     const dtrace_probedata_t *,
3330                                     const dtrace_recdesc_t *, uint_t,
3331                                     const void *buf, size_t);
3332
3333                                 if ((fmtdata = dt_format_lookup(dtp,
3334                                     rec->dtrd_format)) == NULL)
3335                                         goto nofmt;
3336
3337                                 switch (act) {
3338                                 case DTRACEACT_PRINTF:
3339                                         func = dtrace_fprintf;
3340                                         break;
3341                                 case DTRACEACT_PRINTA:
3342                                         func = dtrace_fprinta;
3343                                         break;
3344                                 case DTRACEACT_SYSTEM:
3345                                         func = dtrace_system;
3346                                         break;
3347                                 case DTRACEACT_FREOPEN:
3348                                         func = dtrace_freopen;
3349                                         break;
3350                                 }
3351
3352                                 n = (*func)(dtp, fp, fmtdata, &data,
3353                                     rec, epd->dtepd_nrecs - i,
3354                                     (uchar_t *)buf->dtbd_data + offs,
3355                                     buf->dtbd_size - offs);
3356
3357                                 if (n < 0)
3358                                         return (-1); /* errno is set for us */
3359
3360                                 if (n > 0)
3361                                         i += n - 1;
3362                                 goto nextrec;
3363                         }
3364
3365                         /*
3366                          * We don't care about a formatted printa, system or
3367                          * freopen for oformat.
3368                          */
3369                         if (dtp->dt_oformat && act == DTRACEACT_PRINTF &&
3370                             skip_format == 0) {
3371                                 void *fmtdata;
3372                                 if ((fmtdata = dt_format_lookup(dtp,
3373                                     rec->dtrd_format)) == NULL)
3374                                         goto nofmt;
3375
3376                                 n = dtrace_sprintf(dtp, fp, fmtdata, rec,
3377                                     epd->dtepd_nrecs - i,
3378                                     (uchar_t *)buf->dtbd_data + offs,
3379                                     buf->dtbd_size - offs);
3380
3381                                 if (n < 0) {
3382                                         xo_close_instance("output");
3383                                         return (-1); /* errno is set for us */
3384                                 }
3385
3386                                 xo_emit("{:message/%s}", dtp->dt_sprintf_buf);
3387                                 skip_format += n;
3388
3389                                 /*
3390                                  * We want the "message" object to be its own
3391                                  * thing, but we still want to process the
3392                                  * current DIFEXPR in case there is a value
3393                                  * attached to it. If there is, we need to
3394                                  * re-open a new output instance, as otherwise
3395                                  * the message ends up bundled with the first
3396                                  * value.
3397                                  *
3398                                  * XXX: There is an edge case where a
3399                                  * printf("hello"); will produce a DIFO that
3400                                  * returns 0 attached to it and we have no good
3401                                  * way to determine if this 0 value is because
3402                                  * there's no real data attached to the printf
3403                                  * as an argument, or it's because the argument
3404                                  * actually returns 0.
3405                                  */
3406                                 if (skip_format == 0)
3407                                         goto nextrec;
3408
3409                                 xo_close_instance("output");
3410                                 xo_open_instance("output");
3411                         }
3412
3413                         /*
3414                          * If this is a DIF expression, and the record has a
3415                          * format set, this indicates we have a CTF type name
3416                          * associated with the data and we should try to print
3417                          * it out by type.
3418                          */
3419                         if (act == DTRACEACT_DIFEXPR) {
3420                                 const char *strdata = dt_strdata_lookup(dtp,
3421                                     rec->dtrd_format);
3422                                 if (strdata != NULL) {
3423                                         if (dtp->dt_oformat)
3424                                                 n = dtrace_format_print(dtp, fp,
3425                                                     strdata, addr,
3426                                                     rec->dtrd_size);
3427                                         else
3428                                                 n = dtrace_print(dtp, fp,
3429                                                     strdata, addr,
3430                                                     rec->dtrd_size);
3431
3432                                         /*
3433                                          * dtrace_print() will return -1 on
3434                                          * error, or return the number of bytes
3435                                          * consumed.  It will return 0 if the
3436                                          * type couldn't be determined, and we
3437                                          * should fall through to the normal
3438                                          * trace method.
3439                                          */
3440                                         if (n < 0) {
3441                                                 if (dtp->dt_oformat)
3442                                                         xo_close_instance(
3443                                                             "output");
3444                                                 return (-1);
3445                                         }
3446
3447                                         if (n > 0)
3448                                                 goto nextrec;
3449                                 }
3450                         }
3451
3452 nofmt:
3453                         if (act == DTRACEACT_PRINTA) {
3454                                 dt_print_aggdata_t pd;
3455                                 dtrace_aggvarid_t *aggvars;
3456                                 int j, naggvars = 0;
3457                                 size_t size = ((epd->dtepd_nrecs - i) *
3458                                     sizeof (dtrace_aggvarid_t));
3459
3460                                 if ((aggvars = dt_alloc(dtp, size)) == NULL) {
3461                                         if (dtp->dt_oformat)
3462                                                 xo_close_instance("output");
3463                                         return (-1);
3464                                 }
3465
3466                                 /*
3467                                  * This might be a printa() with multiple
3468                                  * aggregation variables.  We need to scan
3469                                  * forward through the records until we find
3470                                  * a record from a different statement.
3471                                  */
3472                                 for (j = i; j < epd->dtepd_nrecs; j++) {
3473                                         dtrace_recdesc_t *nrec;
3474                                         caddr_t naddr;
3475
3476                                         nrec = &epd->dtepd_rec[j];
3477
3478                                         if (nrec->dtrd_uarg != rec->dtrd_uarg)
3479                                                 break;
3480
3481                                         if (nrec->dtrd_action != act) {
3482                                                 if (dtp->dt_oformat)
3483                                                         xo_close_instance(
3484                                                             "output");
3485                                                 return (dt_set_errno(dtp,
3486                                                     EDT_BADAGG));
3487                                         }
3488
3489                                         naddr = buf->dtbd_data + offs +
3490                                             nrec->dtrd_offset;
3491
3492                                         aggvars[naggvars++] =
3493                                             /* LINTED - alignment */
3494                                             *((dtrace_aggvarid_t *)naddr);
3495                                 }
3496
3497                                 i = j - 1;
3498                                 bzero(&pd, sizeof (pd));
3499                                 pd.dtpa_dtp = dtp;
3500                                 pd.dtpa_fp = fp;
3501
3502                                 assert(naggvars >= 1);
3503
3504                                 if (dtp->dt_oformat)
3505                                         xo_open_list("aggregation-data");
3506                                 if (naggvars == 1) {
3507                                         pd.dtpa_id = aggvars[0];
3508                                         dt_free(dtp, aggvars);
3509
3510                                         if (dtp->dt_oformat) {
3511                                                 n = dt_oformat_agg_sorted(dtp,
3512                                                     dt_format_agg, &pd);
3513                                                 if (n < 0)
3514                                                         return (-1);
3515                                         } else {
3516                                                 if (dt_printf(dtp, fp, "\n") < 0 ||
3517                                                     dtrace_aggregate_walk_sorted(dtp,
3518                                                     dt_print_agg, &pd) < 0)
3519                                                         return (-1);
3520                                         }
3521
3522                                         if (dtp->dt_oformat)
3523                                                 dt_oformat_agg_name(&pd);
3524                                         goto nextrec;
3525                                 }
3526
3527                                 if (dtp->dt_oformat) {
3528                                         if (dtrace_aggregate_walk_joined(dtp,
3529                                             aggvars, naggvars,
3530                                             dt_format_aggs, &pd) < 0) {
3531                                                 dt_oformat_agg_name(&pd);
3532                                                 xo_close_instance("output");
3533                                                 dt_free(dtp, aggvars);
3534                                                 return (-1);
3535                                         }
3536                                 } else {
3537                                         if (dt_printf(dtp, fp, "\n") < 0 ||
3538                                             dtrace_aggregate_walk_joined(dtp,
3539                                             aggvars, naggvars,
3540                                             dt_print_aggs, &pd) < 0) {
3541                                                 dt_free(dtp, aggvars);
3542                                                 return (-1);
3543                                         }
3544                                 }
3545
3546                                 if (dtp->dt_oformat)
3547                                         dt_oformat_agg_name(&pd);
3548                                 dt_free(dtp, aggvars);
3549                                 goto nextrec;
3550                         }
3551
3552                         if (act == DTRACEACT_TRACEMEM) {
3553                                 if (tracememsize == 0 ||
3554                                     tracememsize > rec->dtrd_size) {
3555                                         tracememsize = rec->dtrd_size;
3556                                 }
3557
3558                                 if (dtp->dt_oformat) {
3559                                         char *s;
3560
3561                                         s = dt_format_bytes_get(dtp, addr,
3562                                             tracememsize);
3563                                         n = xo_emit("{:tracemem/%s}", s);
3564                                         dt_free(dtp, s);
3565                                 } else {
3566                                         n = dt_print_bytes(dtp, fp, addr,
3567                                             tracememsize, -33, quiet, 1);
3568                                 }
3569
3570                                 tracememsize = 0;
3571
3572                                 if (n < 0)
3573                                         return (-1);
3574
3575                                 goto nextrec;
3576                         }
3577
3578                         switch (rec->dtrd_size) {
3579                         case sizeof (uint64_t):
3580                                 if (dtp->dt_oformat) {
3581                                         xo_emit("{:value/%lld}",
3582                                             *((unsigned long long *)addr));
3583                                         n = 0;
3584                                 } else
3585                                         n = dt_printf(dtp, fp,
3586                                             quiet ? "%lld" : " %16lld",
3587                                             /* LINTED - alignment */
3588                                             *((unsigned long long *)addr));
3589                                 break;
3590                         case sizeof (uint32_t):
3591                                 if (dtp->dt_oformat) {
3592                                         xo_emit("{:value/%d}",
3593                                             *((uint32_t *)addr));
3594                                         n = 0;
3595                                 } else
3596                                         n = dt_printf(dtp, fp,
3597                                             quiet ? "%d" : " %8d",
3598                                             /* LINTED - alignment */
3599                                             *((uint32_t *)addr));
3600                                 break;
3601                         case sizeof (uint16_t):
3602                                 if (dtp->dt_oformat) {
3603                                         xo_emit("{:value/%d}",
3604                                             *((uint16_t *)addr));
3605                                         n = 0;
3606                                 } else
3607                                         n = dt_printf(dtp, fp,
3608                                             quiet ? "%d" : " %5d",
3609                                             /* LINTED - alignment */
3610                                             *((uint16_t *)addr));
3611                                 break;
3612                         case sizeof (uint8_t):
3613                                 if (dtp->dt_oformat) {
3614                                         xo_emit("{:value/%d}",
3615                                             *((uint8_t *)addr));
3616                                         n = 0;
3617                                 } else
3618                                         n = dt_printf(dtp, fp,
3619                                             quiet ? "%d" : " %3d",
3620                                             *((uint8_t *)addr));
3621                                 break;
3622                         default:
3623                                 if (dtp->dt_oformat && rec->dtrd_size > 0) {
3624                                         char *s;
3625
3626                                         s = dt_format_bytes_get(dtp, addr,
3627                                             rec->dtrd_size);
3628                                         xo_emit("{:value/%s}", s);
3629                                         dt_free(dtp, s);
3630                                         n = 0;
3631                                 } else {
3632                                         n = dt_print_bytes(dtp, fp, addr,
3633                                             rec->dtrd_size, -33, quiet, 0);
3634                                 }
3635                                 break;
3636                         }
3637
3638                         if (dtp->dt_oformat && rec->dtrd_size > 0)
3639                                 xo_close_instance("output");
3640
3641                         if (n < 0)
3642                                 return (-1); /* errno is set for us */
3643
3644 nextrec:
3645                         if (dt_buffered_flush(dtp, &data, rec, NULL, 0) < 0)
3646                                 return (-1); /* errno is set for us */
3647                 }
3648
3649                 /*
3650                  * Call the record callback with a NULL record to indicate
3651                  * that we're done processing this EPID.
3652                  */
3653                 rval = (*rfunc)(&data, NULL, arg);
3654 nextepid:
3655                 offs += epd->dtepd_size;
3656                 dtp->dt_last_epid = id;
3657
3658                 if (dtp->dt_oformat) {
3659                         xo_close_list("output");
3660                         xo_close_instance("probes");
3661                         xo_flush();
3662                 }
3663                 if (just_one) {
3664                         buf->dtbd_oldest = offs;
3665                         break;
3666                 }
3667         }
3668
3669         dtp->dt_flow = data.dtpda_flow;
3670         dtp->dt_indent = data.dtpda_indent;
3671         dtp->dt_prefix = data.dtpda_prefix;
3672
3673         if ((drops = buf->dtbd_drops) == 0)
3674                 return (0);
3675
3676         /*
3677          * Explicitly zero the drops to prevent us from processing them again.
3678          */
3679         buf->dtbd_drops = 0;
3680
3681         xo_open_instance("probes");
3682         dt_oformat_drop(dtp, cpu);
3683         rval = dt_handle_cpudrop(dtp, cpu, DTRACEDROP_PRINCIPAL, drops);
3684         xo_close_instance("probes");
3685
3686         return (rval);
3687 }
3688
3689 /*
3690  * Reduce memory usage by shrinking the buffer if it's no more than half full.
3691  * Note, we need to preserve the alignment of the data at dtbd_oldest, which is
3692  * only 4-byte aligned.
3693  */
3694 static void
3695 dt_realloc_buf(dtrace_hdl_t *dtp, dtrace_bufdesc_t *buf, int cursize)
3696 {
3697         uint64_t used = buf->dtbd_size - buf->dtbd_oldest;
3698         if (used < cursize / 2) {
3699                 int misalign = buf->dtbd_oldest & (sizeof (uint64_t) - 1);
3700                 char *newdata = dt_alloc(dtp, used + misalign);
3701                 if (newdata == NULL)
3702                         return;
3703                 bzero(newdata, misalign);
3704                 bcopy(buf->dtbd_data + buf->dtbd_oldest,
3705                     newdata + misalign, used);
3706                 dt_free(dtp, buf->dtbd_data);
3707                 buf->dtbd_oldest = misalign;
3708                 buf->dtbd_size = used + misalign;
3709                 buf->dtbd_data = newdata;
3710         }
3711 }
3712
3713 /*
3714  * If the ring buffer has wrapped, the data is not in order.  Rearrange it
3715  * so that it is.  Note, we need to preserve the alignment of the data at
3716  * dtbd_oldest, which is only 4-byte aligned.
3717  */
3718 static int
3719 dt_unring_buf(dtrace_hdl_t *dtp, dtrace_bufdesc_t *buf)
3720 {
3721         int misalign;
3722         char *newdata, *ndp;
3723
3724         if (buf->dtbd_oldest == 0)
3725                 return (0);
3726
3727         misalign = buf->dtbd_oldest & (sizeof (uint64_t) - 1);
3728         newdata = ndp = dt_alloc(dtp, buf->dtbd_size + misalign);
3729
3730         if (newdata == NULL)
3731                 return (-1);
3732
3733         assert(0 == (buf->dtbd_size & (sizeof (uint64_t) - 1)));
3734
3735         bzero(ndp, misalign);
3736         ndp += misalign;
3737
3738         bcopy(buf->dtbd_data + buf->dtbd_oldest, ndp,
3739             buf->dtbd_size - buf->dtbd_oldest);
3740         ndp += buf->dtbd_size - buf->dtbd_oldest;
3741
3742         bcopy(buf->dtbd_data, ndp, buf->dtbd_oldest);
3743
3744         dt_free(dtp, buf->dtbd_data);
3745         buf->dtbd_oldest = misalign;
3746         buf->dtbd_data = newdata;
3747         buf->dtbd_size += misalign;
3748
3749         return (0);
3750 }
3751
3752 static void
3753 dt_put_buf(dtrace_hdl_t *dtp, dtrace_bufdesc_t *buf)
3754 {
3755         dt_free(dtp, buf->dtbd_data);
3756         dt_free(dtp, buf);
3757 }
3758
3759 /*
3760  * Returns 0 on success, in which case *cbp will be filled in if we retrieved
3761  * data, or NULL if there is no data for this CPU.
3762  * Returns -1 on failure and sets dt_errno.
3763  */
3764 static int
3765 dt_get_buf(dtrace_hdl_t *dtp, int cpu, dtrace_bufdesc_t **bufp)
3766 {
3767         dtrace_optval_t size;
3768         dtrace_bufdesc_t *buf = dt_zalloc(dtp, sizeof (*buf));
3769         int error, rval;
3770
3771         if (buf == NULL)
3772                 return (-1);
3773
3774         (void) dtrace_getopt(dtp, "bufsize", &size);
3775         buf->dtbd_data = dt_alloc(dtp, size);
3776         if (buf->dtbd_data == NULL) {
3777                 dt_free(dtp, buf);
3778                 return (-1);
3779         }
3780         buf->dtbd_size = size;
3781         buf->dtbd_cpu = cpu;
3782
3783 #ifdef illumos
3784         if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
3785 #else
3786         if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) {
3787 #endif
3788                 /*
3789                  * If we failed with ENOENT, it may be because the
3790                  * CPU was unconfigured -- this is okay.  Any other
3791                  * error, however, is unexpected.
3792                  */
3793                 if (errno == ENOENT) {
3794                         *bufp = NULL;
3795                         rval = 0;
3796                 } else
3797                         rval = dt_set_errno(dtp, errno);
3798
3799                 dt_put_buf(dtp, buf);
3800                 return (rval);
3801         }
3802
3803         error = dt_unring_buf(dtp, buf);
3804         if (error != 0) {
3805                 dt_put_buf(dtp, buf);
3806                 return (error);
3807         }
3808         dt_realloc_buf(dtp, buf, size);
3809
3810         *bufp = buf;
3811         return (0);
3812 }
3813
3814 typedef struct dt_begin {
3815         dtrace_consume_probe_f *dtbgn_probefunc;
3816         dtrace_consume_rec_f *dtbgn_recfunc;
3817         void *dtbgn_arg;
3818         dtrace_handle_err_f *dtbgn_errhdlr;
3819         void *dtbgn_errarg;
3820         int dtbgn_beginonly;
3821 } dt_begin_t;
3822
3823 static int
3824 dt_consume_begin_probe(const dtrace_probedata_t *data, void *arg)
3825 {
3826         dt_begin_t *begin = arg;
3827         dtrace_probedesc_t *pd = data->dtpda_pdesc;
3828
3829         int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);
3830         int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);
3831
3832         if (begin->dtbgn_beginonly) {
3833                 if (!(r1 && r2))
3834                         return (DTRACE_CONSUME_NEXT);
3835         } else {
3836                 if (r1 && r2)
3837                         return (DTRACE_CONSUME_NEXT);
3838         }
3839
3840         /*
3841          * We have a record that we're interested in.  Now call the underlying
3842          * probe function...
3843          */
3844         return (begin->dtbgn_probefunc(data, begin->dtbgn_arg));
3845 }
3846
3847 static int
3848 dt_consume_begin_record(const dtrace_probedata_t *data,
3849     const dtrace_recdesc_t *rec, void *arg)
3850 {
3851         dt_begin_t *begin = arg;
3852
3853         return (begin->dtbgn_recfunc(data, rec, begin->dtbgn_arg));
3854 }
3855
3856 static int
3857 dt_consume_begin_error(const dtrace_errdata_t *data, void *arg)
3858 {
3859         dt_begin_t *begin = (dt_begin_t *)arg;
3860         dtrace_probedesc_t *pd = data->dteda_pdesc;
3861
3862         int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);
3863         int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);
3864
3865         if (begin->dtbgn_beginonly) {
3866                 if (!(r1 && r2))
3867                         return (DTRACE_HANDLE_OK);
3868         } else {
3869                 if (r1 && r2)
3870                         return (DTRACE_HANDLE_OK);
3871         }
3872
3873         return (begin->dtbgn_errhdlr(data, begin->dtbgn_errarg));
3874 }
3875
3876 static int
3877 dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp,
3878     dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
3879 {
3880         /*
3881          * There's this idea that the BEGIN probe should be processed before
3882          * everything else, and that the END probe should be processed after
3883          * anything else.  In the common case, this is pretty easy to deal
3884          * with.  However, a situation may arise where the BEGIN enabling and
3885          * END enabling are on the same CPU, and some enabling in the middle
3886          * occurred on a different CPU.  To deal with this (blech!) we need to
3887          * consume the BEGIN buffer up until the end of the BEGIN probe, and
3888          * then set it aside.  We will then process every other CPU, and then
3889          * we'll return to the BEGIN CPU and process the rest of the data
3890          * (which will inevitably include the END probe, if any).  Making this
3891          * even more complicated (!) is the library's ERROR enabling.  Because
3892          * this enabling is processed before we even get into the consume call
3893          * back, any ERROR firing would result in the library's ERROR enabling
3894          * being processed twice -- once in our first pass (for BEGIN probes),
3895          * and again in our second pass (for everything but BEGIN probes).  To
3896          * deal with this, we interpose on the ERROR handler to assure that we
3897          * only process ERROR enablings induced by BEGIN enablings in the
3898          * first pass, and that we only process ERROR enablings _not_ induced
3899          * by BEGIN enablings in the second pass.
3900          */
3901
3902         dt_begin_t begin;
3903         processorid_t cpu = dtp->dt_beganon;
3904         int rval, i;
3905         static int max_ncpus;
3906         dtrace_bufdesc_t *buf;
3907
3908         dtp->dt_beganon = -1;
3909
3910         if (dt_get_buf(dtp, cpu, &buf) != 0)
3911                 return (-1);
3912         if (buf == NULL)
3913                 return (0);
3914
3915         if (!dtp->dt_stopped || buf->dtbd_cpu != dtp->dt_endedon) {
3916                 /*
3917                  * This is the simple case.  We're either not stopped, or if
3918                  * we are, we actually processed any END probes on another
3919                  * CPU.  We can simply consume this buffer and return.
3920                  */
3921                 rval = dt_consume_cpu(dtp, fp, cpu, buf, B_FALSE,
3922                     pf, rf, arg);
3923                 dt_put_buf(dtp, buf);
3924                 return (rval);
3925         }
3926
3927         begin.dtbgn_probefunc = pf;
3928         begin.dtbgn_recfunc = rf;
3929         begin.dtbgn_arg = arg;
3930         begin.dtbgn_beginonly = 1;
3931
3932         /*
3933          * We need to interpose on the ERROR handler to be sure that we
3934          * only process ERRORs induced by BEGIN.
3935          */
3936         begin.dtbgn_errhdlr = dtp->dt_errhdlr;
3937         begin.dtbgn_errarg = dtp->dt_errarg;
3938         dtp->dt_errhdlr = dt_consume_begin_error;
3939         dtp->dt_errarg = &begin;
3940
3941         rval = dt_consume_cpu(dtp, fp, cpu, buf, B_FALSE,
3942             dt_consume_begin_probe, dt_consume_begin_record, &begin);
3943
3944         dtp->dt_errhdlr = begin.dtbgn_errhdlr;
3945         dtp->dt_errarg = begin.dtbgn_errarg;
3946
3947         if (rval != 0) {
3948                 dt_put_buf(dtp, buf);
3949                 return (rval);
3950         }
3951
3952         if (max_ncpus == 0)
3953                 max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
3954
3955         for (i = 0; i < max_ncpus; i++) {
3956                 dtrace_bufdesc_t *nbuf;
3957                 if (i == cpu)
3958                         continue;
3959
3960                 if (dt_get_buf(dtp, i, &nbuf) != 0) {
3961                         dt_put_buf(dtp, buf);
3962                         return (-1);
3963                 }
3964                 if (nbuf == NULL)
3965                         continue;
3966
3967                 rval = dt_consume_cpu(dtp, fp, i, nbuf, B_FALSE,
3968                     pf, rf, arg);
3969                 dt_put_buf(dtp, nbuf);
3970                 if (rval != 0) {
3971                         dt_put_buf(dtp, buf);
3972                         return (rval);
3973                 }
3974         }
3975
3976         /*
3977          * Okay -- we're done with the other buffers.  Now we want to
3978          * reconsume the first buffer -- but this time we're looking for
3979          * everything _but_ BEGIN.  And of course, in order to only consume
3980          * those ERRORs _not_ associated with BEGIN, we need to reinstall our
3981          * ERROR interposition function...
3982          */
3983         begin.dtbgn_beginonly = 0;
3984
3985         assert(begin.dtbgn_errhdlr == dtp->dt_errhdlr);
3986         assert(begin.dtbgn_errarg == dtp->dt_errarg);
3987         dtp->dt_errhdlr = dt_consume_begin_error;
3988         dtp->dt_errarg = &begin;
3989
3990         rval = dt_consume_cpu(dtp, fp, cpu, buf, B_FALSE,
3991             dt_consume_begin_probe, dt_consume_begin_record, &begin);
3992
3993         dtp->dt_errhdlr = begin.dtbgn_errhdlr;
3994         dtp->dt_errarg = begin.dtbgn_errarg;
3995
3996         return (rval);
3997 }
3998
3999 /* ARGSUSED */
4000 static uint64_t
4001 dt_buf_oldest(void *elem, void *arg)
4002 {
4003         dtrace_bufdesc_t *buf = elem;
4004         size_t offs = buf->dtbd_oldest;
4005
4006         while (offs < buf->dtbd_size) {
4007                 dtrace_rechdr_t *dtrh =
4008                     /* LINTED - alignment */
4009                     (dtrace_rechdr_t *)(buf->dtbd_data + offs);
4010                 if (dtrh->dtrh_epid == DTRACE_EPIDNONE) {
4011                         offs += sizeof (dtrace_epid_t);
4012                 } else {
4013                         return (DTRACE_RECORD_LOAD_TIMESTAMP(dtrh));
4014                 }
4015         }
4016
4017         /* There are no records left; use the time the buffer was retrieved. */
4018         return (buf->dtbd_timestamp);
4019 }
4020
4021 int
4022 dtrace_consume(dtrace_hdl_t *dtp, FILE *fp,
4023     dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
4024 {
4025         dtrace_optval_t size;
4026         static int max_ncpus;
4027         int i, rval;
4028         dtrace_optval_t interval = dtp->dt_options[DTRACEOPT_SWITCHRATE];
4029         hrtime_t now = gethrtime();
4030
4031         if (dtp->dt_lastswitch != 0) {
4032                 if (now - dtp->dt_lastswitch < interval)
4033                         return (0);
4034
4035                 dtp->dt_lastswitch += interval;
4036         } else {
4037                 dtp->dt_lastswitch = now;
4038         }
4039
4040         if (!dtp->dt_active)
4041                 return (dt_set_errno(dtp, EINVAL));
4042
4043         if (max_ncpus == 0)
4044                 max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
4045
4046         if (pf == NULL)
4047                 pf = (dtrace_consume_probe_f *)dt_nullprobe;
4048
4049         if (rf == NULL)
4050                 rf = (dtrace_consume_rec_f *)dt_nullrec;
4051
4052         if (dtp->dt_options[DTRACEOPT_TEMPORAL] == DTRACEOPT_UNSET) {
4053                 /*
4054                  * The output will not be in the order it was traced.  Rather,
4055                  * we will consume all of the data from each CPU's buffer in
4056                  * turn.  We apply special handling for the records from BEGIN
4057                  * and END probes so that they are consumed first and last,
4058                  * respectively.
4059                  *
4060                  * If we have just begun, we want to first process the CPU that
4061                  * executed the BEGIN probe (if any).
4062                  */
4063                 if (dtp->dt_active && dtp->dt_beganon != -1 &&
4064                     (rval = dt_consume_begin(dtp, fp, pf, rf, arg)) != 0)
4065                         return (rval);
4066
4067                 for (i = 0; i < max_ncpus; i++) {
4068                         dtrace_bufdesc_t *buf;
4069
4070                         /*
4071                          * If we have stopped, we want to process the CPU on
4072                          * which the END probe was processed only _after_ we
4073                          * have processed everything else.
4074                          */
4075                         if (dtp->dt_stopped && (i == dtp->dt_endedon))
4076                                 continue;
4077
4078                         if (dt_get_buf(dtp, i, &buf) != 0)
4079                                 return (-1);
4080                         if (buf == NULL)
4081                                 continue;
4082
4083                         dtp->dt_flow = 0;
4084                         dtp->dt_indent = 0;
4085                         dtp->dt_prefix = NULL;
4086                         rval = dt_consume_cpu(dtp, fp, i,
4087                             buf, B_FALSE, pf, rf, arg);
4088                         dt_put_buf(dtp, buf);
4089                         if (rval != 0)
4090                                 return (rval);
4091                 }
4092                 if (dtp->dt_stopped) {
4093                         dtrace_bufdesc_t *buf;
4094
4095                         if (dt_get_buf(dtp, dtp->dt_endedon, &buf) != 0)
4096                                 return (-1);
4097                         if (buf == NULL)
4098                                 return (0);
4099
4100                         rval = dt_consume_cpu(dtp, fp, dtp->dt_endedon,
4101                             buf, B_FALSE, pf, rf, arg);
4102                         dt_put_buf(dtp, buf);
4103                         return (rval);
4104                 }
4105         } else {
4106                 /*
4107                  * The output will be in the order it was traced (or for
4108                  * speculations, when it was committed).  We retrieve a buffer
4109                  * from each CPU and put it into a priority queue, which sorts
4110                  * based on the first entry in the buffer.  This is sufficient
4111                  * because entries within a buffer are already sorted.
4112                  *
4113                  * We then consume records one at a time, always consuming the
4114                  * oldest record, as determined by the priority queue.  When
4115                  * we reach the end of the time covered by these buffers,
4116                  * we need to stop and retrieve more records on the next pass.
4117                  * The kernel tells us the time covered by each buffer, in
4118                  * dtbd_timestamp.  The first buffer's timestamp tells us the
4119                  * time covered by all buffers, as subsequently retrieved
4120                  * buffers will cover to a more recent time.
4121                  */
4122
4123                 uint64_t *drops = alloca(max_ncpus * sizeof (uint64_t));
4124                 uint64_t first_timestamp = 0;
4125                 uint_t cookie = 0;
4126                 dtrace_bufdesc_t *buf;
4127
4128                 bzero(drops, max_ncpus * sizeof (uint64_t));
4129
4130                 if (dtp->dt_bufq == NULL) {
4131                         dtp->dt_bufq = dt_pq_init(dtp, max_ncpus * 2,
4132                             dt_buf_oldest, NULL);
4133                         if (dtp->dt_bufq == NULL) /* ENOMEM */
4134                                 return (-1);
4135                 }
4136
4137                 /* Retrieve data from each CPU. */
4138                 (void) dtrace_getopt(dtp, "bufsize", &size);
4139                 for (i = 0; i < max_ncpus; i++) {
4140                         dtrace_bufdesc_t *buf;
4141
4142                         if (dt_get_buf(dtp, i, &buf) != 0)
4143                                 return (-1);
4144                         if (buf != NULL) {
4145                                 if (first_timestamp == 0)
4146                                         first_timestamp = buf->dtbd_timestamp;
4147                                 assert(buf->dtbd_timestamp >= first_timestamp);
4148
4149                                 dt_pq_insert(dtp->dt_bufq, buf);
4150                                 drops[i] = buf->dtbd_drops;
4151                                 buf->dtbd_drops = 0;
4152                         }
4153                 }
4154
4155                 /* Consume records. */
4156                 for (;;) {
4157                         dtrace_bufdesc_t *buf = dt_pq_pop(dtp->dt_bufq);
4158                         uint64_t timestamp;
4159
4160                         if (buf == NULL)
4161                                 break;
4162
4163                         timestamp = dt_buf_oldest(buf, dtp);
4164                         if (timestamp == buf->dtbd_timestamp) {
4165                                 /*
4166                                  * We've reached the end of the time covered
4167                                  * by this buffer.  If this is the oldest
4168                                  * buffer, we must do another pass
4169                                  * to retrieve more data.
4170                                  */
4171                                 dt_put_buf(dtp, buf);
4172                                 if (timestamp == first_timestamp &&
4173                                     !dtp->dt_stopped)
4174                                         break;
4175                                 continue;
4176                         }
4177                         assert(timestamp >= dtp->dt_last_timestamp);
4178                         dtp->dt_last_timestamp = timestamp;
4179
4180                         if ((rval = dt_consume_cpu(dtp, fp,
4181                             buf->dtbd_cpu, buf, B_TRUE, pf, rf, arg)) != 0)
4182                                 return (rval);
4183                         dt_pq_insert(dtp->dt_bufq, buf);
4184                 }
4185
4186                 /* Consume drops. */
4187                 for (i = 0; i < max_ncpus; i++) {
4188                         if (drops[i] != 0) {
4189                                 int error;
4190                                 xo_open_instance("probes");
4191                                 dt_oformat_drop(dtp, i);
4192                                 error = dt_handle_cpudrop(dtp, i,
4193                                     DTRACEDROP_PRINCIPAL, drops[i]);
4194                                 xo_close_instance("probes");
4195                                 if (error != 0)
4196                                         return (error);
4197                         }
4198                 }
4199
4200                 /*
4201                  * Reduce memory usage by re-allocating smaller buffers
4202                  * for the "remnants".
4203                  */
4204                 while (buf = dt_pq_walk(dtp->dt_bufq, &cookie))
4205                         dt_realloc_buf(dtp, buf, buf->dtbd_size);
4206         }
4207
4208         return (0);
4209 }
4210
4211 void
4212 dtrace_oformat_probe(dtrace_hdl_t *dtp __unused, const dtrace_probedata_t *data,
4213     processorid_t cpu, dtrace_probedesc_t *pd)
4214 {
4215
4216         xo_emit("{:timestamp/%llu} {:cpu/%d} {:id/%d} {:provider/%s} "
4217                 "{:module/%s} {:function/%s} {:name/%s}",
4218             (unsigned long long)data->dtpda_timestamp, cpu, pd->dtpd_id,
4219             pd->dtpd_provider, pd->dtpd_mod, pd->dtpd_func, pd->dtpd_name);
4220 }
4221
4222 void
4223 dt_oformat_drop(dtrace_hdl_t *dtp, processorid_t cpu)
4224 {
4225         xo_emit("{:cpu/%d} {:id/%d} {:provider/%s} "
4226                 "{:module/%s} {:function/%s} {:name/%s}",
4227             cpu, -1, "dtrace", "INTERNAL", "INTERNAL", "DROP");
4228 }