]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - cddl/contrib/opensolaris/lib/libdtrace/common/dt_consume.c
MFC r238071:
[FreeBSD/stable/9.git] / cddl / contrib / opensolaris / lib / libdtrace / common / dt_consume.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25
26 /*
27  * Copyright (c) 2011, Joyent, Inc. All rights reserved.
28  */
29
30 #include <stdlib.h>
31 #include <strings.h>
32 #include <errno.h>
33 #include <unistd.h>
34 #include <limits.h>
35 #include <assert.h>
36 #include <ctype.h>
37 #if defined(sun)
38 #include <alloca.h>
39 #endif
40 #include <dt_impl.h>
41 #if !defined(sun)
42 #include <libproc_compat.h>
43 #endif
44
45 #define DT_MASK_LO 0x00000000FFFFFFFFULL
46
47 /*
48  * We declare this here because (1) we need it and (2) we want to avoid a
49  * dependency on libm in libdtrace.
50  */
51 static long double
52 dt_fabsl(long double x)
53 {
54         if (x < 0)
55                 return (-x);
56
57         return (x);
58 }
59
60 /*
61  * 128-bit arithmetic functions needed to support the stddev() aggregating
62  * action.
63  */
64 static int
65 dt_gt_128(uint64_t *a, uint64_t *b)
66 {
67         return (a[1] > b[1] || (a[1] == b[1] && a[0] > b[0]));
68 }
69
70 static int
71 dt_ge_128(uint64_t *a, uint64_t *b)
72 {
73         return (a[1] > b[1] || (a[1] == b[1] && a[0] >= b[0]));
74 }
75
76 static int
77 dt_le_128(uint64_t *a, uint64_t *b)
78 {
79         return (a[1] < b[1] || (a[1] == b[1] && a[0] <= b[0]));
80 }
81
82 /*
83  * Shift the 128-bit value in a by b. If b is positive, shift left.
84  * If b is negative, shift right.
85  */
86 static void
87 dt_shift_128(uint64_t *a, int b)
88 {
89         uint64_t mask;
90
91         if (b == 0)
92                 return;
93
94         if (b < 0) {
95                 b = -b;
96                 if (b >= 64) {
97                         a[0] = a[1] >> (b - 64);
98                         a[1] = 0;
99                 } else {
100                         a[0] >>= b;
101                         mask = 1LL << (64 - b);
102                         mask -= 1;
103                         a[0] |= ((a[1] & mask) << (64 - b));
104                         a[1] >>= b;
105                 }
106         } else {
107                 if (b >= 64) {
108                         a[1] = a[0] << (b - 64);
109                         a[0] = 0;
110                 } else {
111                         a[1] <<= b;
112                         mask = a[0] >> (64 - b);
113                         a[1] |= mask;
114                         a[0] <<= b;
115                 }
116         }
117 }
118
119 static int
120 dt_nbits_128(uint64_t *a)
121 {
122         int nbits = 0;
123         uint64_t tmp[2];
124         uint64_t zero[2] = { 0, 0 };
125
126         tmp[0] = a[0];
127         tmp[1] = a[1];
128
129         dt_shift_128(tmp, -1);
130         while (dt_gt_128(tmp, zero)) {
131                 dt_shift_128(tmp, -1);
132                 nbits++;
133         }
134
135         return (nbits);
136 }
137
138 static void
139 dt_subtract_128(uint64_t *minuend, uint64_t *subtrahend, uint64_t *difference)
140 {
141         uint64_t result[2];
142
143         result[0] = minuend[0] - subtrahend[0];
144         result[1] = minuend[1] - subtrahend[1] -
145             (minuend[0] < subtrahend[0] ? 1 : 0);
146
147         difference[0] = result[0];
148         difference[1] = result[1];
149 }
150
151 static void
152 dt_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)
153 {
154         uint64_t result[2];
155
156         result[0] = addend1[0] + addend2[0];
157         result[1] = addend1[1] + addend2[1] +
158             (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);
159
160         sum[0] = result[0];
161         sum[1] = result[1];
162 }
163
164 /*
165  * The basic idea is to break the 2 64-bit values into 4 32-bit values,
166  * use native multiplication on those, and then re-combine into the
167  * resulting 128-bit value.
168  *
169  * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
170  *     hi1 * hi2 << 64 +
171  *     hi1 * lo2 << 32 +
172  *     hi2 * lo1 << 32 +
173  *     lo1 * lo2
174  */
175 static void
176 dt_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product)
177 {
178         uint64_t hi1, hi2, lo1, lo2;
179         uint64_t tmp[2];
180
181         hi1 = factor1 >> 32;
182         hi2 = factor2 >> 32;
183
184         lo1 = factor1 & DT_MASK_LO;
185         lo2 = factor2 & DT_MASK_LO;
186
187         product[0] = lo1 * lo2;
188         product[1] = hi1 * hi2;
189
190         tmp[0] = hi1 * lo2;
191         tmp[1] = 0;
192         dt_shift_128(tmp, 32);
193         dt_add_128(product, tmp, product);
194
195         tmp[0] = hi2 * lo1;
196         tmp[1] = 0;
197         dt_shift_128(tmp, 32);
198         dt_add_128(product, tmp, product);
199 }
200
201 /*
202  * This is long-hand division.
203  *
204  * We initialize subtrahend by shifting divisor left as far as possible. We
205  * loop, comparing subtrahend to dividend:  if subtrahend is smaller, we
206  * subtract and set the appropriate bit in the result.  We then shift
207  * subtrahend right by one bit for the next comparison.
208  */
209 static void
210 dt_divide_128(uint64_t *dividend, uint64_t divisor, uint64_t *quotient)
211 {
212         uint64_t result[2] = { 0, 0 };
213         uint64_t remainder[2];
214         uint64_t subtrahend[2];
215         uint64_t divisor_128[2];
216         uint64_t mask[2] = { 1, 0 };
217         int log = 0;
218
219         assert(divisor != 0);
220
221         divisor_128[0] = divisor;
222         divisor_128[1] = 0;
223
224         remainder[0] = dividend[0];
225         remainder[1] = dividend[1];
226
227         subtrahend[0] = divisor;
228         subtrahend[1] = 0;
229
230         while (divisor > 0) {
231                 log++;
232                 divisor >>= 1;
233         }
234
235         dt_shift_128(subtrahend, 128 - log);
236         dt_shift_128(mask, 128 - log);
237
238         while (dt_ge_128(remainder, divisor_128)) {
239                 if (dt_ge_128(remainder, subtrahend)) {
240                         dt_subtract_128(remainder, subtrahend, remainder);
241                         result[0] |= mask[0];
242                         result[1] |= mask[1];
243                 }
244
245                 dt_shift_128(subtrahend, -1);
246                 dt_shift_128(mask, -1);
247         }
248
249         quotient[0] = result[0];
250         quotient[1] = result[1];
251 }
252
253 /*
254  * This is the long-hand method of calculating a square root.
255  * The algorithm is as follows:
256  *
257  * 1. Group the digits by 2 from the right.
258  * 2. Over the leftmost group, find the largest single-digit number
259  *    whose square is less than that group.
260  * 3. Subtract the result of the previous step (2 or 4, depending) and
261  *    bring down the next two-digit group.
262  * 4. For the result R we have so far, find the largest single-digit number
263  *    x such that 2 * R * 10 * x + x^2 is less than the result from step 3.
264  *    (Note that this is doubling R and performing a decimal left-shift by 1
265  *    and searching for the appropriate decimal to fill the one's place.)
266  *    The value x is the next digit in the square root.
267  * Repeat steps 3 and 4 until the desired precision is reached.  (We're
268  * dealing with integers, so the above is sufficient.)
269  *
270  * In decimal, the square root of 582,734 would be calculated as so:
271  *
272  *     __7__6__3
273  *    | 58 27 34
274  *     -49       (7^2 == 49 => 7 is the first digit in the square root)
275  *      --
276  *       9 27    (Subtract and bring down the next group.)
277  * 146   8 76    (2 * 7 * 10 * 6 + 6^2 == 876 => 6 is the next digit in
278  *      -----     the square root)
279  *         51 34 (Subtract and bring down the next group.)
280  * 1523    45 69 (2 * 76 * 10 * 3 + 3^2 == 4569 => 3 is the next digit in
281  *         -----  the square root)
282  *          5 65 (remainder)
283  *
284  * The above algorithm applies similarly in binary, but note that the
285  * only possible non-zero value for x in step 4 is 1, so step 4 becomes a
286  * simple decision: is 2 * R * 2 * 1 + 1^2 (aka R << 2 + 1) less than the
287  * preceding difference?
288  *
289  * In binary, the square root of 11011011 would be calculated as so:
290  *
291  *     __1__1__1__0
292  *    | 11 01 10 11
293  *      01          (0 << 2 + 1 == 1 < 11 => this bit is 1)
294  *      --
295  *      10 01 10 11
296  * 101   1 01       (1 << 2 + 1 == 101 < 1001 => next bit is 1)
297  *      -----
298  *       1 00 10 11
299  * 1101    11 01    (11 << 2 + 1 == 1101 < 10010 => next bit is 1)
300  *       -------
301  *          1 01 11
302  * 11101    1 11 01 (111 << 2 + 1 == 11101 > 10111 => last bit is 0)
303  *
304  */
305 static uint64_t
306 dt_sqrt_128(uint64_t *square)
307 {
308         uint64_t result[2] = { 0, 0 };
309         uint64_t diff[2] = { 0, 0 };
310         uint64_t one[2] = { 1, 0 };
311         uint64_t next_pair[2];
312         uint64_t next_try[2];
313         uint64_t bit_pairs, pair_shift;
314         int i;
315
316         bit_pairs = dt_nbits_128(square) / 2;
317         pair_shift = bit_pairs * 2;
318
319         for (i = 0; i <= bit_pairs; i++) {
320                 /*
321                  * Bring down the next pair of bits.
322                  */
323                 next_pair[0] = square[0];
324                 next_pair[1] = square[1];
325                 dt_shift_128(next_pair, -pair_shift);
326                 next_pair[0] &= 0x3;
327                 next_pair[1] = 0;
328
329                 dt_shift_128(diff, 2);
330                 dt_add_128(diff, next_pair, diff);
331
332                 /*
333                  * next_try = R << 2 + 1
334                  */
335                 next_try[0] = result[0];
336                 next_try[1] = result[1];
337                 dt_shift_128(next_try, 2);
338                 dt_add_128(next_try, one, next_try);
339
340                 if (dt_le_128(next_try, diff)) {
341                         dt_subtract_128(diff, next_try, diff);
342                         dt_shift_128(result, 1);
343                         dt_add_128(result, one, result);
344                 } else {
345                         dt_shift_128(result, 1);
346                 }
347
348                 pair_shift -= 2;
349         }
350
351         assert(result[1] == 0);
352
353         return (result[0]);
354 }
355
356 uint64_t
357 dt_stddev(uint64_t *data, uint64_t normal)
358 {
359         uint64_t avg_of_squares[2];
360         uint64_t square_of_avg[2];
361         int64_t norm_avg;
362         uint64_t diff[2];
363
364         /*
365          * The standard approximation for standard deviation is
366          * sqrt(average(x**2) - average(x)**2), i.e. the square root
367          * of the average of the squares minus the square of the average.
368          */
369         dt_divide_128(data + 2, normal, avg_of_squares);
370         dt_divide_128(avg_of_squares, data[0], avg_of_squares);
371
372         norm_avg = (int64_t)data[1] / (int64_t)normal / (int64_t)data[0];
373
374         if (norm_avg < 0)
375                 norm_avg = -norm_avg;
376
377         dt_multiply_128((uint64_t)norm_avg, (uint64_t)norm_avg, square_of_avg);
378
379         dt_subtract_128(avg_of_squares, square_of_avg, diff);
380
381         return (dt_sqrt_128(diff));
382 }
383
384 static int
385 dt_flowindent(dtrace_hdl_t *dtp, dtrace_probedata_t *data, dtrace_epid_t last,
386     dtrace_bufdesc_t *buf, size_t offs)
387 {
388         dtrace_probedesc_t *pd = data->dtpda_pdesc, *npd;
389         dtrace_eprobedesc_t *epd = data->dtpda_edesc, *nepd;
390         char *p = pd->dtpd_provider, *n = pd->dtpd_name, *sub;
391         dtrace_flowkind_t flow = DTRACEFLOW_NONE;
392         const char *str = NULL;
393         static const char *e_str[2] = { " -> ", " => " };
394         static const char *r_str[2] = { " <- ", " <= " };
395         static const char *ent = "entry", *ret = "return";
396         static int entlen = 0, retlen = 0;
397         dtrace_epid_t next, id = epd->dtepd_epid;
398         int rval;
399
400         if (entlen == 0) {
401                 assert(retlen == 0);
402                 entlen = strlen(ent);
403                 retlen = strlen(ret);
404         }
405
406         /*
407          * If the name of the probe is "entry" or ends with "-entry", we
408          * treat it as an entry; if it is "return" or ends with "-return",
409          * we treat it as a return.  (This allows application-provided probes
410          * like "method-entry" or "function-entry" to participate in flow
411          * indentation -- without accidentally misinterpreting popular probe
412          * names like "carpentry", "gentry" or "Coventry".)
413          */
414         if ((sub = strstr(n, ent)) != NULL && sub[entlen] == '\0' &&
415             (sub == n || sub[-1] == '-')) {
416                 flow = DTRACEFLOW_ENTRY;
417                 str = e_str[strcmp(p, "syscall") == 0];
418         } else if ((sub = strstr(n, ret)) != NULL && sub[retlen] == '\0' &&
419             (sub == n || sub[-1] == '-')) {
420                 flow = DTRACEFLOW_RETURN;
421                 str = r_str[strcmp(p, "syscall") == 0];
422         }
423
424         /*
425          * If we're going to indent this, we need to check the ID of our last
426          * call.  If we're looking at the same probe ID but a different EPID,
427          * we _don't_ want to indent.  (Yes, there are some minor holes in
428          * this scheme -- it's a heuristic.)
429          */
430         if (flow == DTRACEFLOW_ENTRY) {
431                 if ((last != DTRACE_EPIDNONE && id != last &&
432                     pd->dtpd_id == dtp->dt_pdesc[last]->dtpd_id))
433                         flow = DTRACEFLOW_NONE;
434         }
435
436         /*
437          * If we're going to unindent this, it's more difficult to see if
438          * we don't actually want to unindent it -- we need to look at the
439          * _next_ EPID.
440          */
441         if (flow == DTRACEFLOW_RETURN) {
442                 offs += epd->dtepd_size;
443
444                 do {
445                         if (offs >= buf->dtbd_size) {
446                                 /*
447                                  * We're at the end -- maybe.  If the oldest
448                                  * record is non-zero, we need to wrap.
449                                  */
450                                 if (buf->dtbd_oldest != 0) {
451                                         offs = 0;
452                                 } else {
453                                         goto out;
454                                 }
455                         }
456
457                         next = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
458
459                         if (next == DTRACE_EPIDNONE)
460                                 offs += sizeof (id);
461                 } while (next == DTRACE_EPIDNONE);
462
463                 if ((rval = dt_epid_lookup(dtp, next, &nepd, &npd)) != 0)
464                         return (rval);
465
466                 if (next != id && npd->dtpd_id == pd->dtpd_id)
467                         flow = DTRACEFLOW_NONE;
468         }
469
470 out:
471         if (flow == DTRACEFLOW_ENTRY || flow == DTRACEFLOW_RETURN) {
472                 data->dtpda_prefix = str;
473         } else {
474                 data->dtpda_prefix = "| ";
475         }
476
477         if (flow == DTRACEFLOW_RETURN && data->dtpda_indent > 0)
478                 data->dtpda_indent -= 2;
479
480         data->dtpda_flow = flow;
481
482         return (0);
483 }
484
485 static int
486 dt_nullprobe()
487 {
488         return (DTRACE_CONSUME_THIS);
489 }
490
491 static int
492 dt_nullrec()
493 {
494         return (DTRACE_CONSUME_NEXT);
495 }
496
497 int
498 dt_print_quantline(dtrace_hdl_t *dtp, FILE *fp, int64_t val,
499     uint64_t normal, long double total, char positives, char negatives)
500 {
501         long double f;
502         uint_t depth, len = 40;
503
504         const char *ats = "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@";
505         const char *spaces = "                                        ";
506
507         assert(strlen(ats) == len && strlen(spaces) == len);
508         assert(!(total == 0 && (positives || negatives)));
509         assert(!(val < 0 && !negatives));
510         assert(!(val > 0 && !positives));
511         assert(!(val != 0 && total == 0));
512
513         if (!negatives) {
514                 if (positives) {
515                         f = (dt_fabsl((long double)val) * len) / total;
516                         depth = (uint_t)(f + 0.5);
517                 } else {
518                         depth = 0;
519                 }
520
521                 return (dt_printf(dtp, fp, "|%s%s %-9lld\n", ats + len - depth,
522                     spaces + depth, (long long)val / normal));
523         }
524
525         if (!positives) {
526                 f = (dt_fabsl((long double)val) * len) / total;
527                 depth = (uint_t)(f + 0.5);
528
529                 return (dt_printf(dtp, fp, "%s%s| %-9lld\n", spaces + depth,
530                     ats + len - depth, (long long)val / normal));
531         }
532
533         /*
534          * If we're here, we have both positive and negative bucket values.
535          * To express this graphically, we're going to generate both positive
536          * and negative bars separated by a centerline.  These bars are half
537          * the size of normal quantize()/lquantize() bars, so we divide the
538          * length in half before calculating the bar length.
539          */
540         len /= 2;
541         ats = &ats[len];
542         spaces = &spaces[len];
543
544         f = (dt_fabsl((long double)val) * len) / total;
545         depth = (uint_t)(f + 0.5);
546
547         if (val <= 0) {
548                 return (dt_printf(dtp, fp, "%s%s|%*s %-9lld\n", spaces + depth,
549                     ats + len - depth, len, "", (long long)val / normal));
550         } else {
551                 return (dt_printf(dtp, fp, "%20s|%s%s %-9lld\n", "",
552                     ats + len - depth, spaces + depth,
553                     (long long)val / normal));
554         }
555 }
556
557 int
558 dt_print_quantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
559     size_t size, uint64_t normal)
560 {
561         const int64_t *data = addr;
562         int i, first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1;
563         long double total = 0;
564         char positives = 0, negatives = 0;
565
566         if (size != DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t))
567                 return (dt_set_errno(dtp, EDT_DMISMATCH));
568
569         while (first_bin < DTRACE_QUANTIZE_NBUCKETS - 1 && data[first_bin] == 0)
570                 first_bin++;
571
572         if (first_bin == DTRACE_QUANTIZE_NBUCKETS - 1) {
573                 /*
574                  * There isn't any data.  This is possible if (and only if)
575                  * negative increment values have been used.  In this case,
576                  * we'll print the buckets around 0.
577                  */
578                 first_bin = DTRACE_QUANTIZE_ZEROBUCKET - 1;
579                 last_bin = DTRACE_QUANTIZE_ZEROBUCKET + 1;
580         } else {
581                 if (first_bin > 0)
582                         first_bin--;
583
584                 while (last_bin > 0 && data[last_bin] == 0)
585                         last_bin--;
586
587                 if (last_bin < DTRACE_QUANTIZE_NBUCKETS - 1)
588                         last_bin++;
589         }
590
591         for (i = first_bin; i <= last_bin; i++) {
592                 positives |= (data[i] > 0);
593                 negatives |= (data[i] < 0);
594                 total += dt_fabsl((long double)data[i]);
595         }
596
597         if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
598             "------------- Distribution -------------", "count") < 0)
599                 return (-1);
600
601         for (i = first_bin; i <= last_bin; i++) {
602                 if (dt_printf(dtp, fp, "%16lld ",
603                     (long long)DTRACE_QUANTIZE_BUCKETVAL(i)) < 0)
604                         return (-1);
605
606                 if (dt_print_quantline(dtp, fp, data[i], normal, total,
607                     positives, negatives) < 0)
608                         return (-1);
609         }
610
611         return (0);
612 }
613
614 int
615 dt_print_lquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
616     size_t size, uint64_t normal)
617 {
618         const int64_t *data = addr;
619         int i, first_bin, last_bin, base;
620         uint64_t arg;
621         long double total = 0;
622         uint16_t step, levels;
623         char positives = 0, negatives = 0;
624
625         if (size < sizeof (uint64_t))
626                 return (dt_set_errno(dtp, EDT_DMISMATCH));
627
628         arg = *data++;
629         size -= sizeof (uint64_t);
630
631         base = DTRACE_LQUANTIZE_BASE(arg);
632         step = DTRACE_LQUANTIZE_STEP(arg);
633         levels = DTRACE_LQUANTIZE_LEVELS(arg);
634
635         first_bin = 0;
636         last_bin = levels + 1;
637
638         if (size != sizeof (uint64_t) * (levels + 2))
639                 return (dt_set_errno(dtp, EDT_DMISMATCH));
640
641         while (first_bin <= levels + 1 && data[first_bin] == 0)
642                 first_bin++;
643
644         if (first_bin > levels + 1) {
645                 first_bin = 0;
646                 last_bin = 2;
647         } else {
648                 if (first_bin > 0)
649                         first_bin--;
650
651                 while (last_bin > 0 && data[last_bin] == 0)
652                         last_bin--;
653
654                 if (last_bin < levels + 1)
655                         last_bin++;
656         }
657
658         for (i = first_bin; i <= last_bin; i++) {
659                 positives |= (data[i] > 0);
660                 negatives |= (data[i] < 0);
661                 total += dt_fabsl((long double)data[i]);
662         }
663
664         if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
665             "------------- Distribution -------------", "count") < 0)
666                 return (-1);
667
668         for (i = first_bin; i <= last_bin; i++) {
669                 char c[32];
670                 int err;
671
672                 if (i == 0) {
673                         (void) snprintf(c, sizeof (c), "< %d",
674                             base / (uint32_t)normal);
675                         err = dt_printf(dtp, fp, "%16s ", c);
676                 } else if (i == levels + 1) {
677                         (void) snprintf(c, sizeof (c), ">= %d",
678                             base + (levels * step));
679                         err = dt_printf(dtp, fp, "%16s ", c);
680                 } else {
681                         err = dt_printf(dtp, fp, "%16d ",
682                             base + (i - 1) * step);
683                 }
684
685                 if (err < 0 || dt_print_quantline(dtp, fp, data[i], normal,
686                     total, positives, negatives) < 0)
687                         return (-1);
688         }
689
690         return (0);
691 }
692
693 int
694 dt_print_llquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
695     size_t size, uint64_t normal)
696 {
697         int i, first_bin, last_bin, bin = 1, order, levels;
698         uint16_t factor, low, high, nsteps;
699         const int64_t *data = addr;
700         int64_t value = 1, next, step;
701         char positives = 0, negatives = 0;
702         long double total = 0;
703         uint64_t arg;
704         char c[32];
705
706         if (size < sizeof (uint64_t))
707                 return (dt_set_errno(dtp, EDT_DMISMATCH));
708
709         arg = *data++;
710         size -= sizeof (uint64_t);
711
712         factor = DTRACE_LLQUANTIZE_FACTOR(arg);
713         low = DTRACE_LLQUANTIZE_LOW(arg);
714         high = DTRACE_LLQUANTIZE_HIGH(arg);
715         nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);
716
717         /*
718          * We don't expect to be handed invalid llquantize() parameters here,
719          * but sanity check them (to a degree) nonetheless.
720          */
721         if (size > INT32_MAX || factor < 2 || low >= high ||
722             nsteps == 0 || factor > nsteps)
723                 return (dt_set_errno(dtp, EDT_DMISMATCH));
724
725         levels = (int)size / sizeof (uint64_t);
726
727         first_bin = 0;
728         last_bin = levels - 1;
729
730         while (first_bin < levels && data[first_bin] == 0)
731                 first_bin++;
732
733         if (first_bin == levels) {
734                 first_bin = 0;
735                 last_bin = 1;
736         } else {
737                 if (first_bin > 0)
738                         first_bin--;
739
740                 while (last_bin > 0 && data[last_bin] == 0)
741                         last_bin--;
742
743                 if (last_bin < levels - 1)
744                         last_bin++;
745         }
746
747         for (i = first_bin; i <= last_bin; i++) {
748                 positives |= (data[i] > 0);
749                 negatives |= (data[i] < 0);
750                 total += dt_fabsl((long double)data[i]);
751         }
752
753         if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
754             "------------- Distribution -------------", "count") < 0)
755                 return (-1);
756
757         for (order = 0; order < low; order++)
758                 value *= factor;
759
760         next = value * factor;
761         step = next > nsteps ? next / nsteps : 1;
762
763         if (first_bin == 0) {
764                 (void) snprintf(c, sizeof (c), "< %lld", (long long)value);
765
766                 if (dt_printf(dtp, fp, "%16s ", c) < 0)
767                         return (-1);
768
769                 if (dt_print_quantline(dtp, fp, data[0], normal,
770                     total, positives, negatives) < 0)
771                         return (-1);
772         }
773
774         while (order <= high) {
775                 if (bin >= first_bin && bin <= last_bin) {
776                         if (dt_printf(dtp, fp, "%16lld ", (long long)value) < 0)
777                                 return (-1);
778
779                         if (dt_print_quantline(dtp, fp, data[bin],
780                             normal, total, positives, negatives) < 0)
781                                 return (-1);
782                 }
783
784                 assert(value < next);
785                 bin++;
786
787                 if ((value += step) != next)
788                         continue;
789
790                 next = value * factor;
791                 step = next > nsteps ? next / nsteps : 1;
792                 order++;
793         }
794
795         if (last_bin < bin)
796                 return (0);
797
798         assert(last_bin == bin);
799         (void) snprintf(c, sizeof (c), ">= %lld", (long long)value);
800
801         if (dt_printf(dtp, fp, "%16s ", c) < 0)
802                 return (-1);
803
804         return (dt_print_quantline(dtp, fp, data[bin], normal,
805             total, positives, negatives));
806 }
807
808 /*ARGSUSED*/
809 static int
810 dt_print_average(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
811     size_t size, uint64_t normal)
812 {
813         /* LINTED - alignment */
814         int64_t *data = (int64_t *)addr;
815
816         return (dt_printf(dtp, fp, " %16lld", data[0] ?
817             (long long)(data[1] / (int64_t)normal / data[0]) : 0));
818 }
819
820 /*ARGSUSED*/
821 static int
822 dt_print_stddev(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
823     size_t size, uint64_t normal)
824 {
825         /* LINTED - alignment */
826         uint64_t *data = (uint64_t *)addr;
827
828         return (dt_printf(dtp, fp, " %16llu", data[0] ?
829             (unsigned long long) dt_stddev(data, normal) : 0));
830 }
831
832 /*ARGSUSED*/
833 int
834 dt_print_bytes(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
835     size_t nbytes, int width, int quiet, int raw)
836 {
837         /*
838          * If the byte stream is a series of printable characters, followed by
839          * a terminating byte, we print it out as a string.  Otherwise, we
840          * assume that it's something else and just print the bytes.
841          */
842         int i, j, margin = 5;
843         char *c = (char *)addr;
844
845         if (nbytes == 0)
846                 return (0);
847
848         if (raw || dtp->dt_options[DTRACEOPT_RAWBYTES] != DTRACEOPT_UNSET)
849                 goto raw;
850
851         for (i = 0; i < nbytes; i++) {
852                 /*
853                  * We define a "printable character" to be one for which
854                  * isprint(3C) returns non-zero, isspace(3C) returns non-zero,
855                  * or a character which is either backspace or the bell.
856                  * Backspace and the bell are regrettably special because
857                  * they fail the first two tests -- and yet they are entirely
858                  * printable.  These are the only two control characters that
859                  * have meaning for the terminal and for which isprint(3C) and
860                  * isspace(3C) return 0.
861                  */
862                 if (isprint(c[i]) || isspace(c[i]) ||
863                     c[i] == '\b' || c[i] == '\a')
864                         continue;
865
866                 if (c[i] == '\0' && i > 0) {
867                         /*
868                          * This looks like it might be a string.  Before we
869                          * assume that it is indeed a string, check the
870                          * remainder of the byte range; if it contains
871                          * additional non-nul characters, we'll assume that
872                          * it's a binary stream that just happens to look like
873                          * a string, and we'll print out the individual bytes.
874                          */
875                         for (j = i + 1; j < nbytes; j++) {
876                                 if (c[j] != '\0')
877                                         break;
878                         }
879
880                         if (j != nbytes)
881                                 break;
882
883                         if (quiet)
884                                 return (dt_printf(dtp, fp, "%s", c));
885                         else
886                                 return (dt_printf(dtp, fp, "  %-*s", width, c));
887                 }
888
889                 break;
890         }
891
892         if (i == nbytes) {
893                 /*
894                  * The byte range is all printable characters, but there is
895                  * no trailing nul byte.  We'll assume that it's a string and
896                  * print it as such.
897                  */
898                 char *s = alloca(nbytes + 1);
899                 bcopy(c, s, nbytes);
900                 s[nbytes] = '\0';
901                 return (dt_printf(dtp, fp, "  %-*s", width, s));
902         }
903
904 raw:
905         if (dt_printf(dtp, fp, "\n%*s      ", margin, "") < 0)
906                 return (-1);
907
908         for (i = 0; i < 16; i++)
909                 if (dt_printf(dtp, fp, "  %c", "0123456789abcdef"[i]) < 0)
910                         return (-1);
911
912         if (dt_printf(dtp, fp, "  0123456789abcdef\n") < 0)
913                 return (-1);
914
915
916         for (i = 0; i < nbytes; i += 16) {
917                 if (dt_printf(dtp, fp, "%*s%5x:", margin, "", i) < 0)
918                         return (-1);
919
920                 for (j = i; j < i + 16 && j < nbytes; j++) {
921                         if (dt_printf(dtp, fp, " %02x", (uchar_t)c[j]) < 0)
922                                 return (-1);
923                 }
924
925                 while (j++ % 16) {
926                         if (dt_printf(dtp, fp, "   ") < 0)
927                                 return (-1);
928                 }
929
930                 if (dt_printf(dtp, fp, "  ") < 0)
931                         return (-1);
932
933                 for (j = i; j < i + 16 && j < nbytes; j++) {
934                         if (dt_printf(dtp, fp, "%c",
935                             c[j] < ' ' || c[j] > '~' ? '.' : c[j]) < 0)
936                                 return (-1);
937                 }
938
939                 if (dt_printf(dtp, fp, "\n") < 0)
940                         return (-1);
941         }
942
943         return (0);
944 }
945
946 int
947 dt_print_stack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
948     caddr_t addr, int depth, int size)
949 {
950         dtrace_syminfo_t dts;
951         GElf_Sym sym;
952         int i, indent;
953         char c[PATH_MAX * 2];
954         uint64_t pc;
955
956         if (dt_printf(dtp, fp, "\n") < 0)
957                 return (-1);
958
959         if (format == NULL)
960                 format = "%s";
961
962         if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
963                 indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];
964         else
965                 indent = _dtrace_stkindent;
966
967         for (i = 0; i < depth; i++) {
968                 switch (size) {
969                 case sizeof (uint32_t):
970                         /* LINTED - alignment */
971                         pc = *((uint32_t *)addr);
972                         break;
973
974                 case sizeof (uint64_t):
975                         /* LINTED - alignment */
976                         pc = *((uint64_t *)addr);
977                         break;
978
979                 default:
980                         return (dt_set_errno(dtp, EDT_BADSTACKPC));
981                 }
982
983                 if (pc == 0)
984                         break;
985
986                 addr += size;
987
988                 if (dt_printf(dtp, fp, "%*s", indent, "") < 0)
989                         return (-1);
990
991                 if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {
992                         if (pc > sym.st_value) {
993                                 (void) snprintf(c, sizeof (c), "%s`%s+0x%llx",
994                                     dts.dts_object, dts.dts_name,
995                                     (u_longlong_t)(pc - sym.st_value));
996                         } else {
997                                 (void) snprintf(c, sizeof (c), "%s`%s",
998                                     dts.dts_object, dts.dts_name);
999                         }
1000                 } else {
1001                         /*
1002                          * We'll repeat the lookup, but this time we'll specify
1003                          * a NULL GElf_Sym -- indicating that we're only
1004                          * interested in the containing module.
1005                          */
1006                         if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1007                                 (void) snprintf(c, sizeof (c), "%s`0x%llx",
1008                                     dts.dts_object, (u_longlong_t)pc);
1009                         } else {
1010                                 (void) snprintf(c, sizeof (c), "0x%llx",
1011                                     (u_longlong_t)pc);
1012                         }
1013                 }
1014
1015                 if (dt_printf(dtp, fp, format, c) < 0)
1016                         return (-1);
1017
1018                 if (dt_printf(dtp, fp, "\n") < 0)
1019                         return (-1);
1020         }
1021
1022         return (0);
1023 }
1024
1025 int
1026 dt_print_ustack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
1027     caddr_t addr, uint64_t arg)
1028 {
1029         /* LINTED - alignment */
1030         uint64_t *pc = (uint64_t *)addr;
1031         uint32_t depth = DTRACE_USTACK_NFRAMES(arg);
1032         uint32_t strsize = DTRACE_USTACK_STRSIZE(arg);
1033         const char *strbase = addr + (depth + 1) * sizeof (uint64_t);
1034         const char *str = strsize ? strbase : NULL;
1035         int err = 0;
1036
1037         char name[PATH_MAX], objname[PATH_MAX], c[PATH_MAX * 2];
1038         struct ps_prochandle *P;
1039         GElf_Sym sym;
1040         int i, indent;
1041         pid_t pid;
1042
1043         if (depth == 0)
1044                 return (0);
1045
1046         pid = (pid_t)*pc++;
1047
1048         if (dt_printf(dtp, fp, "\n") < 0)
1049                 return (-1);
1050
1051         if (format == NULL)
1052                 format = "%s";
1053
1054         if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
1055                 indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];
1056         else
1057                 indent = _dtrace_stkindent;
1058
1059         /*
1060          * Ultimately, we need to add an entry point in the library vector for
1061          * determining <symbol, offset> from <pid, address>.  For now, if
1062          * this is a vector open, we just print the raw address or string.
1063          */
1064         if (dtp->dt_vector == NULL)
1065                 P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
1066         else
1067                 P = NULL;
1068
1069         if (P != NULL)
1070                 dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
1071
1072         for (i = 0; i < depth && pc[i] != 0; i++) {
1073                 const prmap_t *map;
1074
1075                 if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)
1076                         break;
1077
1078                 if (P != NULL && Plookup_by_addr(P, pc[i],
1079                     name, sizeof (name), &sym) == 0) {
1080                         (void) Pobjname(P, pc[i], objname, sizeof (objname));
1081
1082                         if (pc[i] > sym.st_value) {
1083                                 (void) snprintf(c, sizeof (c),
1084                                     "%s`%s+0x%llx", dt_basename(objname), name,
1085                                     (u_longlong_t)(pc[i] - sym.st_value));
1086                         } else {
1087                                 (void) snprintf(c, sizeof (c),
1088                                     "%s`%s", dt_basename(objname), name);
1089                         }
1090                 } else if (str != NULL && str[0] != '\0' && str[0] != '@' &&
1091                     (P != NULL && ((map = Paddr_to_map(P, pc[i])) == NULL ||
1092                     (map->pr_mflags & MA_WRITE)))) {
1093                         /*
1094                          * If the current string pointer in the string table
1095                          * does not point to an empty string _and_ the program
1096                          * counter falls in a writable region, we'll use the
1097                          * string from the string table instead of the raw
1098                          * address.  This last condition is necessary because
1099                          * some (broken) ustack helpers will return a string
1100                          * even for a program counter that they can't
1101                          * identify.  If we have a string for a program
1102                          * counter that falls in a segment that isn't
1103                          * writable, we assume that we have fallen into this
1104                          * case and we refuse to use the string.
1105                          */
1106                         (void) snprintf(c, sizeof (c), "%s", str);
1107                 } else {
1108                         if (P != NULL && Pobjname(P, pc[i], objname,
1109                             sizeof (objname)) != 0) {
1110                                 (void) snprintf(c, sizeof (c), "%s`0x%llx",
1111                                     dt_basename(objname), (u_longlong_t)pc[i]);
1112                         } else {
1113                                 (void) snprintf(c, sizeof (c), "0x%llx",
1114                                     (u_longlong_t)pc[i]);
1115                         }
1116                 }
1117
1118                 if ((err = dt_printf(dtp, fp, format, c)) < 0)
1119                         break;
1120
1121                 if ((err = dt_printf(dtp, fp, "\n")) < 0)
1122                         break;
1123
1124                 if (str != NULL && str[0] == '@') {
1125                         /*
1126                          * If the first character of the string is an "at" sign,
1127                          * then the string is inferred to be an annotation --
1128                          * and it is printed out beneath the frame and offset
1129                          * with brackets.
1130                          */
1131                         if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)
1132                                 break;
1133
1134                         (void) snprintf(c, sizeof (c), "  [ %s ]", &str[1]);
1135
1136                         if ((err = dt_printf(dtp, fp, format, c)) < 0)
1137                                 break;
1138
1139                         if ((err = dt_printf(dtp, fp, "\n")) < 0)
1140                                 break;
1141                 }
1142
1143                 if (str != NULL) {
1144                         str += strlen(str) + 1;
1145                         if (str - strbase >= strsize)
1146                                 str = NULL;
1147                 }
1148         }
1149
1150         if (P != NULL) {
1151                 dt_proc_unlock(dtp, P);
1152                 dt_proc_release(dtp, P);
1153         }
1154
1155         return (err);
1156 }
1157
1158 static int
1159 dt_print_usym(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr, dtrace_actkind_t act)
1160 {
1161         /* LINTED - alignment */
1162         uint64_t pid = ((uint64_t *)addr)[0];
1163         /* LINTED - alignment */
1164         uint64_t pc = ((uint64_t *)addr)[1];
1165         const char *format = "  %-50s";
1166         char *s;
1167         int n, len = 256;
1168
1169         if (act == DTRACEACT_USYM && dtp->dt_vector == NULL) {
1170                 struct ps_prochandle *P;
1171
1172                 if ((P = dt_proc_grab(dtp, pid,
1173                     PGRAB_RDONLY | PGRAB_FORCE, 0)) != NULL) {
1174                         GElf_Sym sym;
1175
1176                         dt_proc_lock(dtp, P);
1177
1178                         if (Plookup_by_addr(P, pc, NULL, 0, &sym) == 0)
1179                                 pc = sym.st_value;
1180
1181                         dt_proc_unlock(dtp, P);
1182                         dt_proc_release(dtp, P);
1183                 }
1184         }
1185
1186         do {
1187                 n = len;
1188                 s = alloca(n);
1189         } while ((len = dtrace_uaddr2str(dtp, pid, pc, s, n)) > n);
1190
1191         return (dt_printf(dtp, fp, format, s));
1192 }
1193
1194 int
1195 dt_print_umod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
1196 {
1197         /* LINTED - alignment */
1198         uint64_t pid = ((uint64_t *)addr)[0];
1199         /* LINTED - alignment */
1200         uint64_t pc = ((uint64_t *)addr)[1];
1201         int err = 0;
1202
1203         char objname[PATH_MAX], c[PATH_MAX * 2];
1204         struct ps_prochandle *P;
1205
1206         if (format == NULL)
1207                 format = "  %-50s";
1208
1209         /*
1210          * See the comment in dt_print_ustack() for the rationale for
1211          * printing raw addresses in the vectored case.
1212          */
1213         if (dtp->dt_vector == NULL)
1214                 P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
1215         else
1216                 P = NULL;
1217
1218         if (P != NULL)
1219                 dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
1220
1221         if (P != NULL && Pobjname(P, pc, objname, sizeof (objname)) != 0) {
1222                 (void) snprintf(c, sizeof (c), "%s", dt_basename(objname));
1223         } else {
1224                 (void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc);
1225         }
1226
1227         err = dt_printf(dtp, fp, format, c);
1228
1229         if (P != NULL) {
1230                 dt_proc_unlock(dtp, P);
1231                 dt_proc_release(dtp, P);
1232         }
1233
1234         return (err);
1235 }
1236
1237 int
1238 dt_print_memory(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr)
1239 {
1240         int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
1241         size_t nbytes = *((uintptr_t *) addr);
1242
1243         return (dt_print_bytes(dtp, fp, addr + sizeof(uintptr_t),
1244             nbytes, 50, quiet, 1));
1245 }
1246
1247 typedef struct dt_type_cbdata {
1248         dtrace_hdl_t            *dtp;
1249         dtrace_typeinfo_t       dtt;
1250         caddr_t                 addr;
1251         caddr_t                 addrend;
1252         const char              *name;
1253         int                     f_type;
1254         int                     indent;
1255         int                     type_width;
1256         int                     name_width;
1257         FILE                    *fp;
1258 } dt_type_cbdata_t;
1259
1260 static int      dt_print_type_data(dt_type_cbdata_t *, ctf_id_t);
1261
1262 static int
1263 dt_print_type_member(const char *name, ctf_id_t type, ulong_t off, void *arg)
1264 {
1265         dt_type_cbdata_t cbdata;
1266         dt_type_cbdata_t *cbdatap = arg;
1267         ssize_t ssz;
1268
1269         if ((ssz = ctf_type_size(cbdatap->dtt.dtt_ctfp, type)) <= 0)
1270                 return (0);
1271
1272         off /= 8;
1273
1274         cbdata = *cbdatap;
1275         cbdata.name = name;
1276         cbdata.addr += off;
1277         cbdata.addrend = cbdata.addr + ssz;
1278
1279         return (dt_print_type_data(&cbdata, type));
1280 }
1281
1282 static int
1283 dt_print_type_width(const char *name, ctf_id_t type, ulong_t off, void *arg)
1284 {
1285         char buf[DT_TYPE_NAMELEN];
1286         char *p;
1287         dt_type_cbdata_t *cbdatap = arg;
1288         size_t sz = strlen(name);
1289
1290         ctf_type_name(cbdatap->dtt.dtt_ctfp, type, buf, sizeof (buf));
1291
1292         if ((p = strchr(buf, '[')) != NULL)
1293                 p[-1] = '\0';
1294         else
1295                 p = "";
1296
1297         sz += strlen(p);
1298
1299         if (sz > cbdatap->name_width)
1300                 cbdatap->name_width = sz;
1301
1302         sz = strlen(buf);
1303
1304         if (sz > cbdatap->type_width)
1305                 cbdatap->type_width = sz;
1306
1307         return (0);
1308 }
1309
1310 static int
1311 dt_print_type_data(dt_type_cbdata_t *cbdatap, ctf_id_t type)
1312 {
1313         caddr_t addr = cbdatap->addr;
1314         caddr_t addrend = cbdatap->addrend;
1315         char buf[DT_TYPE_NAMELEN];
1316         char *p;
1317         int cnt = 0;
1318         uint_t kind = ctf_type_kind(cbdatap->dtt.dtt_ctfp, type);
1319         ssize_t ssz = ctf_type_size(cbdatap->dtt.dtt_ctfp, type);
1320
1321         ctf_type_name(cbdatap->dtt.dtt_ctfp, type, buf, sizeof (buf));
1322
1323         if ((p = strchr(buf, '[')) != NULL)
1324                 p[-1] = '\0';
1325         else
1326                 p = "";
1327
1328         if (cbdatap->f_type) {
1329                 int type_width = roundup(cbdatap->type_width + 1, 4);
1330                 int name_width = roundup(cbdatap->name_width + 1, 4);
1331
1332                 name_width -= strlen(cbdatap->name);
1333
1334                 dt_printf(cbdatap->dtp, cbdatap->fp, "%*s%-*s%s%-*s     = ",cbdatap->indent * 4,"",type_width,buf,cbdatap->name,name_width,p);
1335         }
1336
1337         while (addr < addrend) {
1338                 dt_type_cbdata_t cbdata;
1339                 ctf_arinfo_t arinfo;
1340                 ctf_encoding_t cte;
1341                 uintptr_t *up;
1342                 void *vp = addr;
1343                 cbdata = *cbdatap;
1344                 cbdata.name = "";
1345                 cbdata.addr = addr;
1346                 cbdata.addrend = addr + ssz;
1347                 cbdata.f_type = 0;
1348                 cbdata.indent++;
1349                 cbdata.type_width = 0;
1350                 cbdata.name_width = 0;
1351
1352                 if (cnt > 0)
1353                         dt_printf(cbdatap->dtp, cbdatap->fp, "%*s", cbdatap->indent * 4,"");
1354
1355                 switch (kind) {
1356                 case CTF_K_INTEGER:
1357                         if (ctf_type_encoding(cbdatap->dtt.dtt_ctfp, type, &cte) != 0)
1358                                 return (-1);
1359                         if ((cte.cte_format & CTF_INT_SIGNED) != 0)
1360                                 switch (cte.cte_bits) {
1361                                 case 8:
1362                                         if (isprint(*((char *) vp)))
1363                                                 dt_printf(cbdatap->dtp, cbdatap->fp, "'%c', ", *((char *) vp));
1364                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%d (0x%x);\n", *((char *) vp), *((char *) vp));
1365                                         break;
1366                                 case 16:
1367                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%hd (0x%hx);\n", *((short *) vp), *((u_short *) vp));
1368                                         break;
1369                                 case 32:
1370                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%d (0x%x);\n", *((int *) vp), *((u_int *) vp));
1371                                         break;
1372                                 case 64:
1373                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%jd (0x%jx);\n", *((long long *) vp), *((unsigned long long *) vp));
1374                                         break;
1375                                 default:
1376                                         dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_INTEGER: format %x offset %u bits %u\n",cte.cte_format,cte.cte_offset,cte.cte_bits);
1377                                         break;
1378                                 }
1379                         else
1380                                 switch (cte.cte_bits) {
1381                                 case 8:
1382                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%u (0x%x);\n", *((uint8_t *) vp) & 0xff, *((uint8_t *) vp) & 0xff);
1383                                         break;
1384                                 case 16:
1385                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%hu (0x%hx);\n", *((u_short *) vp), *((u_short *) vp));
1386                                         break;
1387                                 case 32:
1388                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%u (0x%x);\n", *((u_int *) vp), *((u_int *) vp));
1389                                         break;
1390                                 case 64:
1391                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%ju (0x%jx);\n", *((unsigned long long *) vp), *((unsigned long long *) vp));
1392                                         break;
1393                                 default:
1394                                         dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_INTEGER: format %x offset %u bits %u\n",cte.cte_format,cte.cte_offset,cte.cte_bits);
1395                                         break;
1396                                 }
1397                         break;
1398                 case CTF_K_FLOAT:
1399                         dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_FLOAT: format %x offset %u bits %u\n",cte.cte_format,cte.cte_offset,cte.cte_bits);
1400                         break;
1401                 case CTF_K_POINTER:
1402                         dt_printf(cbdatap->dtp, cbdatap->fp, "%p;\n", *((void **) addr));
1403                         break;
1404                 case CTF_K_ARRAY:
1405                         if (ctf_array_info(cbdatap->dtt.dtt_ctfp, type, &arinfo) != 0)
1406                                 return (-1);
1407                         dt_printf(cbdatap->dtp, cbdatap->fp, "{\n%*s",cbdata.indent * 4,"");
1408                         dt_print_type_data(&cbdata, arinfo.ctr_contents);
1409                         dt_printf(cbdatap->dtp, cbdatap->fp, "%*s};\n",cbdatap->indent * 4,"");
1410                         break;
1411                 case CTF_K_FUNCTION:
1412                         dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_FUNCTION:\n");
1413                         break;
1414                 case CTF_K_STRUCT:
1415                         cbdata.f_type = 1;
1416                         if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type,
1417                             dt_print_type_width, &cbdata) != 0)
1418                                 return (-1);
1419                         dt_printf(cbdatap->dtp, cbdatap->fp, "{\n");
1420                         if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type,
1421                             dt_print_type_member, &cbdata) != 0)
1422                                 return (-1);
1423                         dt_printf(cbdatap->dtp, cbdatap->fp, "%*s};\n",cbdatap->indent * 4,"");
1424                         break;
1425                 case CTF_K_UNION:
1426                         cbdata.f_type = 1;
1427                         if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type,
1428                             dt_print_type_width, &cbdata) != 0)
1429                                 return (-1);
1430                         dt_printf(cbdatap->dtp, cbdatap->fp, "{\n");
1431                         if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type,
1432                             dt_print_type_member, &cbdata) != 0)
1433                                 return (-1);
1434                         dt_printf(cbdatap->dtp, cbdatap->fp, "%*s};\n",cbdatap->indent * 4,"");
1435                         break;
1436                 case CTF_K_ENUM:
1437                         dt_printf(cbdatap->dtp, cbdatap->fp, "%s;\n", ctf_enum_name(cbdatap->dtt.dtt_ctfp, type, *((int *) vp)));
1438                         break;
1439                 case CTF_K_TYPEDEF:
1440                         dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type));
1441                         break;
1442                 case CTF_K_VOLATILE:
1443                         if (cbdatap->f_type)
1444                                 dt_printf(cbdatap->dtp, cbdatap->fp, "volatile ");
1445                         dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type));
1446                         break;
1447                 case CTF_K_CONST:
1448                         if (cbdatap->f_type)
1449                                 dt_printf(cbdatap->dtp, cbdatap->fp, "const ");
1450                         dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type));
1451                         break;
1452                 case CTF_K_RESTRICT:
1453                         if (cbdatap->f_type)
1454                                 dt_printf(cbdatap->dtp, cbdatap->fp, "restrict ");
1455                         dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type));
1456                         break;
1457                 default:
1458                         break;
1459                 }
1460
1461                 addr += ssz;
1462                 cnt++;
1463         }
1464
1465         return (0);
1466 }
1467
1468 static int
1469 dt_print_type(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr)
1470 {
1471         caddr_t addrend;
1472         char *p;
1473         dtrace_typeinfo_t dtt;
1474         dt_type_cbdata_t cbdata;
1475         int num = 0;
1476         int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
1477         ssize_t ssz;
1478
1479         if (!quiet)
1480                 dt_printf(dtp, fp, "\n");
1481
1482         /* Get the total number of bytes of data buffered. */
1483         size_t nbytes = *((uintptr_t *) addr);
1484         addr += sizeof(uintptr_t);
1485
1486         /*
1487          * Get the size of the type so that we can check that it matches
1488          * the CTF data we look up and so that we can figure out how many
1489          * type elements are buffered.
1490          */
1491         size_t typs = *((uintptr_t *) addr);
1492         addr += sizeof(uintptr_t);
1493
1494         /*
1495          * Point to the type string in the buffer. Get it's string
1496          * length and round it up to become the offset to the start
1497          * of the buffered type data which we would like to be aligned
1498          * for easy access.
1499          */
1500         char *strp = (char *) addr;
1501         int offset = roundup(strlen(strp) + 1, sizeof(uintptr_t));
1502
1503         /*
1504          * The type string might have a format such as 'int [20]'.
1505          * Check if there is an array dimension present.
1506          */
1507         if ((p = strchr(strp, '[')) != NULL) {
1508                 /* Strip off the array dimension. */
1509                 *p++ = '\0';
1510
1511                 for (; *p != '\0' && *p != ']'; p++)
1512                         num = num * 10 + *p - '0';
1513         } else
1514                 /* No array dimension, so default. */
1515                 num = 1;
1516
1517         /* Lookup the CTF type from the type string. */
1518         if (dtrace_lookup_by_type(dtp,  DTRACE_OBJ_EVERY, strp, &dtt) < 0)
1519                 return (-1);
1520
1521         /* Offset the buffer address to the start of the data... */
1522         addr += offset;
1523
1524         ssz = ctf_type_size(dtt.dtt_ctfp, dtt.dtt_type);
1525
1526         if (typs != ssz) {
1527                 printf("Expected type size from buffer (%lu) to match type size looked up now (%ld)\n", (u_long) typs, (long) ssz);
1528                 return (-1);
1529         }
1530
1531         cbdata.dtp = dtp;
1532         cbdata.dtt = dtt;
1533         cbdata.name = "";
1534         cbdata.addr = addr;
1535         cbdata.addrend = addr + nbytes;
1536         cbdata.indent = 1;
1537         cbdata.f_type = 1;
1538         cbdata.type_width = 0;
1539         cbdata.name_width = 0;
1540         cbdata.fp = fp;
1541
1542         return (dt_print_type_data(&cbdata, dtt.dtt_type));
1543 }
1544
1545 static int
1546 dt_print_sym(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
1547 {
1548         /* LINTED - alignment */
1549         uint64_t pc = *((uint64_t *)addr);
1550         dtrace_syminfo_t dts;
1551         GElf_Sym sym;
1552         char c[PATH_MAX * 2];
1553
1554         if (format == NULL)
1555                 format = "  %-50s";
1556
1557         if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {
1558                 (void) snprintf(c, sizeof (c), "%s`%s",
1559                     dts.dts_object, dts.dts_name);
1560         } else {
1561                 /*
1562                  * We'll repeat the lookup, but this time we'll specify a
1563                  * NULL GElf_Sym -- indicating that we're only interested in
1564                  * the containing module.
1565                  */
1566                 if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1567                         (void) snprintf(c, sizeof (c), "%s`0x%llx",
1568                             dts.dts_object, (u_longlong_t)pc);
1569                 } else {
1570                         (void) snprintf(c, sizeof (c), "0x%llx",
1571                             (u_longlong_t)pc);
1572                 }
1573         }
1574
1575         if (dt_printf(dtp, fp, format, c) < 0)
1576                 return (-1);
1577
1578         return (0);
1579 }
1580
1581 int
1582 dt_print_mod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
1583 {
1584         /* LINTED - alignment */
1585         uint64_t pc = *((uint64_t *)addr);
1586         dtrace_syminfo_t dts;
1587         char c[PATH_MAX * 2];
1588
1589         if (format == NULL)
1590                 format = "  %-50s";
1591
1592         if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1593                 (void) snprintf(c, sizeof (c), "%s", dts.dts_object);
1594         } else {
1595                 (void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc);
1596         }
1597
1598         if (dt_printf(dtp, fp, format, c) < 0)
1599                 return (-1);
1600
1601         return (0);
1602 }
1603
1604 typedef struct dt_normal {
1605         dtrace_aggvarid_t dtnd_id;
1606         uint64_t dtnd_normal;
1607 } dt_normal_t;
1608
1609 static int
1610 dt_normalize_agg(const dtrace_aggdata_t *aggdata, void *arg)
1611 {
1612         dt_normal_t *normal = arg;
1613         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1614         dtrace_aggvarid_t id = normal->dtnd_id;
1615
1616         if (agg->dtagd_nrecs == 0)
1617                 return (DTRACE_AGGWALK_NEXT);
1618
1619         if (agg->dtagd_varid != id)
1620                 return (DTRACE_AGGWALK_NEXT);
1621
1622         ((dtrace_aggdata_t *)aggdata)->dtada_normal = normal->dtnd_normal;
1623         return (DTRACE_AGGWALK_NORMALIZE);
1624 }
1625
1626 static int
1627 dt_normalize(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
1628 {
1629         dt_normal_t normal;
1630         caddr_t addr;
1631
1632         /*
1633          * We (should) have two records:  the aggregation ID followed by the
1634          * normalization value.
1635          */
1636         addr = base + rec->dtrd_offset;
1637
1638         if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))
1639                 return (dt_set_errno(dtp, EDT_BADNORMAL));
1640
1641         /* LINTED - alignment */
1642         normal.dtnd_id = *((dtrace_aggvarid_t *)addr);
1643         rec++;
1644
1645         if (rec->dtrd_action != DTRACEACT_LIBACT)
1646                 return (dt_set_errno(dtp, EDT_BADNORMAL));
1647
1648         if (rec->dtrd_arg != DT_ACT_NORMALIZE)
1649                 return (dt_set_errno(dtp, EDT_BADNORMAL));
1650
1651         addr = base + rec->dtrd_offset;
1652
1653         switch (rec->dtrd_size) {
1654         case sizeof (uint64_t):
1655                 /* LINTED - alignment */
1656                 normal.dtnd_normal = *((uint64_t *)addr);
1657                 break;
1658         case sizeof (uint32_t):
1659                 /* LINTED - alignment */
1660                 normal.dtnd_normal = *((uint32_t *)addr);
1661                 break;
1662         case sizeof (uint16_t):
1663                 /* LINTED - alignment */
1664                 normal.dtnd_normal = *((uint16_t *)addr);
1665                 break;
1666         case sizeof (uint8_t):
1667                 normal.dtnd_normal = *((uint8_t *)addr);
1668                 break;
1669         default:
1670                 return (dt_set_errno(dtp, EDT_BADNORMAL));
1671         }
1672
1673         (void) dtrace_aggregate_walk(dtp, dt_normalize_agg, &normal);
1674
1675         return (0);
1676 }
1677
1678 static int
1679 dt_denormalize_agg(const dtrace_aggdata_t *aggdata, void *arg)
1680 {
1681         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1682         dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);
1683
1684         if (agg->dtagd_nrecs == 0)
1685                 return (DTRACE_AGGWALK_NEXT);
1686
1687         if (agg->dtagd_varid != id)
1688                 return (DTRACE_AGGWALK_NEXT);
1689
1690         return (DTRACE_AGGWALK_DENORMALIZE);
1691 }
1692
1693 static int
1694 dt_clear_agg(const dtrace_aggdata_t *aggdata, void *arg)
1695 {
1696         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1697         dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);
1698
1699         if (agg->dtagd_nrecs == 0)
1700                 return (DTRACE_AGGWALK_NEXT);
1701
1702         if (agg->dtagd_varid != id)
1703                 return (DTRACE_AGGWALK_NEXT);
1704
1705         return (DTRACE_AGGWALK_CLEAR);
1706 }
1707
1708 typedef struct dt_trunc {
1709         dtrace_aggvarid_t dttd_id;
1710         uint64_t dttd_remaining;
1711 } dt_trunc_t;
1712
1713 static int
1714 dt_trunc_agg(const dtrace_aggdata_t *aggdata, void *arg)
1715 {
1716         dt_trunc_t *trunc = arg;
1717         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1718         dtrace_aggvarid_t id = trunc->dttd_id;
1719
1720         if (agg->dtagd_nrecs == 0)
1721                 return (DTRACE_AGGWALK_NEXT);
1722
1723         if (agg->dtagd_varid != id)
1724                 return (DTRACE_AGGWALK_NEXT);
1725
1726         if (trunc->dttd_remaining == 0)
1727                 return (DTRACE_AGGWALK_REMOVE);
1728
1729         trunc->dttd_remaining--;
1730         return (DTRACE_AGGWALK_NEXT);
1731 }
1732
1733 static int
1734 dt_trunc(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
1735 {
1736         dt_trunc_t trunc;
1737         caddr_t addr;
1738         int64_t remaining;
1739         int (*func)(dtrace_hdl_t *, dtrace_aggregate_f *, void *);
1740
1741         /*
1742          * We (should) have two records:  the aggregation ID followed by the
1743          * number of aggregation entries after which the aggregation is to be
1744          * truncated.
1745          */
1746         addr = base + rec->dtrd_offset;
1747
1748         if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))
1749                 return (dt_set_errno(dtp, EDT_BADTRUNC));
1750
1751         /* LINTED - alignment */
1752         trunc.dttd_id = *((dtrace_aggvarid_t *)addr);
1753         rec++;
1754
1755         if (rec->dtrd_action != DTRACEACT_LIBACT)
1756                 return (dt_set_errno(dtp, EDT_BADTRUNC));
1757
1758         if (rec->dtrd_arg != DT_ACT_TRUNC)
1759                 return (dt_set_errno(dtp, EDT_BADTRUNC));
1760
1761         addr = base + rec->dtrd_offset;
1762
1763         switch (rec->dtrd_size) {
1764         case sizeof (uint64_t):
1765                 /* LINTED - alignment */
1766                 remaining = *((int64_t *)addr);
1767                 break;
1768         case sizeof (uint32_t):
1769                 /* LINTED - alignment */
1770                 remaining = *((int32_t *)addr);
1771                 break;
1772         case sizeof (uint16_t):
1773                 /* LINTED - alignment */
1774                 remaining = *((int16_t *)addr);
1775                 break;
1776         case sizeof (uint8_t):
1777                 remaining = *((int8_t *)addr);
1778                 break;
1779         default:
1780                 return (dt_set_errno(dtp, EDT_BADNORMAL));
1781         }
1782
1783         if (remaining < 0) {
1784                 func = dtrace_aggregate_walk_valsorted;
1785                 remaining = -remaining;
1786         } else {
1787                 func = dtrace_aggregate_walk_valrevsorted;
1788         }
1789
1790         assert(remaining >= 0);
1791         trunc.dttd_remaining = remaining;
1792
1793         (void) func(dtp, dt_trunc_agg, &trunc);
1794
1795         return (0);
1796 }
1797
1798 static int
1799 dt_print_datum(dtrace_hdl_t *dtp, FILE *fp, dtrace_recdesc_t *rec,
1800     caddr_t addr, size_t size, uint64_t normal)
1801 {
1802         int err;
1803         dtrace_actkind_t act = rec->dtrd_action;
1804
1805         switch (act) {
1806         case DTRACEACT_STACK:
1807                 return (dt_print_stack(dtp, fp, NULL, addr,
1808                     rec->dtrd_arg, rec->dtrd_size / rec->dtrd_arg));
1809
1810         case DTRACEACT_USTACK:
1811         case DTRACEACT_JSTACK:
1812                 return (dt_print_ustack(dtp, fp, NULL, addr, rec->dtrd_arg));
1813
1814         case DTRACEACT_USYM:
1815         case DTRACEACT_UADDR:
1816                 return (dt_print_usym(dtp, fp, addr, act));
1817
1818         case DTRACEACT_UMOD:
1819                 return (dt_print_umod(dtp, fp, NULL, addr));
1820
1821         case DTRACEACT_SYM:
1822                 return (dt_print_sym(dtp, fp, NULL, addr));
1823
1824         case DTRACEACT_MOD:
1825                 return (dt_print_mod(dtp, fp, NULL, addr));
1826
1827         case DTRACEAGG_QUANTIZE:
1828                 return (dt_print_quantize(dtp, fp, addr, size, normal));
1829
1830         case DTRACEAGG_LQUANTIZE:
1831                 return (dt_print_lquantize(dtp, fp, addr, size, normal));
1832
1833         case DTRACEAGG_LLQUANTIZE:
1834                 return (dt_print_llquantize(dtp, fp, addr, size, normal));
1835
1836         case DTRACEAGG_AVG:
1837                 return (dt_print_average(dtp, fp, addr, size, normal));
1838
1839         case DTRACEAGG_STDDEV:
1840                 return (dt_print_stddev(dtp, fp, addr, size, normal));
1841
1842         default:
1843                 break;
1844         }
1845
1846         switch (size) {
1847         case sizeof (uint64_t):
1848                 err = dt_printf(dtp, fp, " %16lld",
1849                     /* LINTED - alignment */
1850                     (long long)*((uint64_t *)addr) / normal);
1851                 break;
1852         case sizeof (uint32_t):
1853                 /* LINTED - alignment */
1854                 err = dt_printf(dtp, fp, " %8d", *((uint32_t *)addr) /
1855                     (uint32_t)normal);
1856                 break;
1857         case sizeof (uint16_t):
1858                 /* LINTED - alignment */
1859                 err = dt_printf(dtp, fp, " %5d", *((uint16_t *)addr) /
1860                     (uint32_t)normal);
1861                 break;
1862         case sizeof (uint8_t):
1863                 err = dt_printf(dtp, fp, " %3d", *((uint8_t *)addr) /
1864                     (uint32_t)normal);
1865                 break;
1866         default:
1867                 err = dt_print_bytes(dtp, fp, addr, size, 50, 0, 0);
1868                 break;
1869         }
1870
1871         return (err);
1872 }
1873
1874 int
1875 dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
1876 {
1877         int i, aggact = 0;
1878         dt_print_aggdata_t *pd = arg;
1879         const dtrace_aggdata_t *aggdata = aggsdata[0];
1880         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1881         FILE *fp = pd->dtpa_fp;
1882         dtrace_hdl_t *dtp = pd->dtpa_dtp;
1883         dtrace_recdesc_t *rec;
1884         dtrace_actkind_t act;
1885         caddr_t addr;
1886         size_t size;
1887
1888         /*
1889          * Iterate over each record description in the key, printing the traced
1890          * data, skipping the first datum (the tuple member created by the
1891          * compiler).
1892          */
1893         for (i = 1; i < agg->dtagd_nrecs; i++) {
1894                 rec = &agg->dtagd_rec[i];
1895                 act = rec->dtrd_action;
1896                 addr = aggdata->dtada_data + rec->dtrd_offset;
1897                 size = rec->dtrd_size;
1898
1899                 if (DTRACEACT_ISAGG(act)) {
1900                         aggact = i;
1901                         break;
1902                 }
1903
1904                 if (dt_print_datum(dtp, fp, rec, addr, size, 1) < 0)
1905                         return (-1);
1906
1907                 if (dt_buffered_flush(dtp, NULL, rec, aggdata,
1908                     DTRACE_BUFDATA_AGGKEY) < 0)
1909                         return (-1);
1910         }
1911
1912         assert(aggact != 0);
1913
1914         for (i = (naggvars == 1 ? 0 : 1); i < naggvars; i++) {
1915                 uint64_t normal;
1916
1917                 aggdata = aggsdata[i];
1918                 agg = aggdata->dtada_desc;
1919                 rec = &agg->dtagd_rec[aggact];
1920                 act = rec->dtrd_action;
1921                 addr = aggdata->dtada_data + rec->dtrd_offset;
1922                 size = rec->dtrd_size;
1923
1924                 assert(DTRACEACT_ISAGG(act));
1925                 normal = aggdata->dtada_normal;
1926
1927                 if (dt_print_datum(dtp, fp, rec, addr, size, normal) < 0)
1928                         return (-1);
1929
1930                 if (dt_buffered_flush(dtp, NULL, rec, aggdata,
1931                     DTRACE_BUFDATA_AGGVAL) < 0)
1932                         return (-1);
1933
1934                 if (!pd->dtpa_allunprint)
1935                         agg->dtagd_flags |= DTRACE_AGD_PRINTED;
1936         }
1937
1938         if (dt_printf(dtp, fp, "\n") < 0)
1939                 return (-1);
1940
1941         if (dt_buffered_flush(dtp, NULL, NULL, aggdata,
1942             DTRACE_BUFDATA_AGGFORMAT | DTRACE_BUFDATA_AGGLAST) < 0)
1943                 return (-1);
1944
1945         return (0);
1946 }
1947
1948 int
1949 dt_print_agg(const dtrace_aggdata_t *aggdata, void *arg)
1950 {
1951         dt_print_aggdata_t *pd = arg;
1952         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1953         dtrace_aggvarid_t aggvarid = pd->dtpa_id;
1954
1955         if (pd->dtpa_allunprint) {
1956                 if (agg->dtagd_flags & DTRACE_AGD_PRINTED)
1957                         return (0);
1958         } else {
1959                 /*
1960                  * If we're not printing all unprinted aggregations, then the
1961                  * aggregation variable ID denotes a specific aggregation
1962                  * variable that we should print -- skip any other aggregations
1963                  * that we encounter.
1964                  */
1965                 if (agg->dtagd_nrecs == 0)
1966                         return (0);
1967
1968                 if (aggvarid != agg->dtagd_varid)
1969                         return (0);
1970         }
1971
1972         return (dt_print_aggs(&aggdata, 1, arg));
1973 }
1974
1975 int
1976 dt_setopt(dtrace_hdl_t *dtp, const dtrace_probedata_t *data,
1977     const char *option, const char *value)
1978 {
1979         int len, rval;
1980         char *msg;
1981         const char *errstr;
1982         dtrace_setoptdata_t optdata;
1983
1984         bzero(&optdata, sizeof (optdata));
1985         (void) dtrace_getopt(dtp, option, &optdata.dtsda_oldval);
1986
1987         if (dtrace_setopt(dtp, option, value) == 0) {
1988                 (void) dtrace_getopt(dtp, option, &optdata.dtsda_newval);
1989                 optdata.dtsda_probe = data;
1990                 optdata.dtsda_option = option;
1991                 optdata.dtsda_handle = dtp;
1992
1993                 if ((rval = dt_handle_setopt(dtp, &optdata)) != 0)
1994                         return (rval);
1995
1996                 return (0);
1997         }
1998
1999         errstr = dtrace_errmsg(dtp, dtrace_errno(dtp));
2000         len = strlen(option) + strlen(value) + strlen(errstr) + 80;
2001         msg = alloca(len);
2002
2003         (void) snprintf(msg, len, "couldn't set option \"%s\" to \"%s\": %s\n",
2004             option, value, errstr);
2005
2006         if ((rval = dt_handle_liberr(dtp, data, msg)) == 0)
2007                 return (0);
2008
2009         return (rval);
2010 }
2011
2012 static int
2013 dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, int cpu, dtrace_bufdesc_t *buf,
2014     dtrace_consume_probe_f *efunc, dtrace_consume_rec_f *rfunc, void *arg)
2015 {
2016         dtrace_epid_t id;
2017         size_t offs, start = buf->dtbd_oldest, end = buf->dtbd_size;
2018         int flow = (dtp->dt_options[DTRACEOPT_FLOWINDENT] != DTRACEOPT_UNSET);
2019         int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
2020         int rval, i, n;
2021         dtrace_epid_t last = DTRACE_EPIDNONE;
2022         dtrace_probedata_t data;
2023         uint64_t drops;
2024         caddr_t addr;
2025
2026         bzero(&data, sizeof (data));
2027         data.dtpda_handle = dtp;
2028         data.dtpda_cpu = cpu;
2029
2030 again:
2031         for (offs = start; offs < end; ) {
2032                 dtrace_eprobedesc_t *epd;
2033
2034                 /*
2035                  * We're guaranteed to have an ID.
2036                  */
2037                 id = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
2038
2039                 if (id == DTRACE_EPIDNONE) {
2040                         /*
2041                          * This is filler to assure proper alignment of the
2042                          * next record; we simply ignore it.
2043                          */
2044                         offs += sizeof (id);
2045                         continue;
2046                 }
2047
2048                 if ((rval = dt_epid_lookup(dtp, id, &data.dtpda_edesc,
2049                     &data.dtpda_pdesc)) != 0)
2050                         return (rval);
2051
2052                 epd = data.dtpda_edesc;
2053                 data.dtpda_data = buf->dtbd_data + offs;
2054
2055                 if (data.dtpda_edesc->dtepd_uarg != DT_ECB_DEFAULT) {
2056                         rval = dt_handle(dtp, &data);
2057
2058                         if (rval == DTRACE_CONSUME_NEXT)
2059                                 goto nextepid;
2060
2061                         if (rval == DTRACE_CONSUME_ERROR)
2062                                 return (-1);
2063                 }
2064
2065                 if (flow)
2066                         (void) dt_flowindent(dtp, &data, last, buf, offs);
2067
2068                 rval = (*efunc)(&data, arg);
2069
2070                 if (flow) {
2071                         if (data.dtpda_flow == DTRACEFLOW_ENTRY)
2072                                 data.dtpda_indent += 2;
2073                 }
2074
2075                 if (rval == DTRACE_CONSUME_NEXT)
2076                         goto nextepid;
2077
2078                 if (rval == DTRACE_CONSUME_ABORT)
2079                         return (dt_set_errno(dtp, EDT_DIRABORT));
2080
2081                 if (rval != DTRACE_CONSUME_THIS)
2082                         return (dt_set_errno(dtp, EDT_BADRVAL));
2083
2084                 for (i = 0; i < epd->dtepd_nrecs; i++) {
2085                         dtrace_recdesc_t *rec = &epd->dtepd_rec[i];
2086                         dtrace_actkind_t act = rec->dtrd_action;
2087
2088                         data.dtpda_data = buf->dtbd_data + offs +
2089                             rec->dtrd_offset;
2090                         addr = data.dtpda_data;
2091
2092                         if (act == DTRACEACT_LIBACT) {
2093                                 uint64_t arg = rec->dtrd_arg;
2094                                 dtrace_aggvarid_t id;
2095
2096                                 switch (arg) {
2097                                 case DT_ACT_CLEAR:
2098                                         /* LINTED - alignment */
2099                                         id = *((dtrace_aggvarid_t *)addr);
2100                                         (void) dtrace_aggregate_walk(dtp,
2101                                             dt_clear_agg, &id);
2102                                         continue;
2103
2104                                 case DT_ACT_DENORMALIZE:
2105                                         /* LINTED - alignment */
2106                                         id = *((dtrace_aggvarid_t *)addr);
2107                                         (void) dtrace_aggregate_walk(dtp,
2108                                             dt_denormalize_agg, &id);
2109                                         continue;
2110
2111                                 case DT_ACT_FTRUNCATE:
2112                                         if (fp == NULL)
2113                                                 continue;
2114
2115                                         (void) fflush(fp);
2116                                         (void) ftruncate(fileno(fp), 0);
2117                                         (void) fseeko(fp, 0, SEEK_SET);
2118                                         continue;
2119
2120                                 case DT_ACT_NORMALIZE:
2121                                         if (i == epd->dtepd_nrecs - 1)
2122                                                 return (dt_set_errno(dtp,
2123                                                     EDT_BADNORMAL));
2124
2125                                         if (dt_normalize(dtp,
2126                                             buf->dtbd_data + offs, rec) != 0)
2127                                                 return (-1);
2128
2129                                         i++;
2130                                         continue;
2131
2132                                 case DT_ACT_SETOPT: {
2133                                         uint64_t *opts = dtp->dt_options;
2134                                         dtrace_recdesc_t *valrec;
2135                                         uint32_t valsize;
2136                                         caddr_t val;
2137                                         int rv;
2138
2139                                         if (i == epd->dtepd_nrecs - 1) {
2140                                                 return (dt_set_errno(dtp,
2141                                                     EDT_BADSETOPT));
2142                                         }
2143
2144                                         valrec = &epd->dtepd_rec[++i];
2145                                         valsize = valrec->dtrd_size;
2146
2147                                         if (valrec->dtrd_action != act ||
2148                                             valrec->dtrd_arg != arg) {
2149                                                 return (dt_set_errno(dtp,
2150                                                     EDT_BADSETOPT));
2151                                         }
2152
2153                                         if (valsize > sizeof (uint64_t)) {
2154                                                 val = buf->dtbd_data + offs +
2155                                                     valrec->dtrd_offset;
2156                                         } else {
2157                                                 val = "1";
2158                                         }
2159
2160                                         rv = dt_setopt(dtp, &data, addr, val);
2161
2162                                         if (rv != 0)
2163                                                 return (-1);
2164
2165                                         flow = (opts[DTRACEOPT_FLOWINDENT] !=
2166                                             DTRACEOPT_UNSET);
2167                                         quiet = (opts[DTRACEOPT_QUIET] !=
2168                                             DTRACEOPT_UNSET);
2169
2170                                         continue;
2171                                 }
2172
2173                                 case DT_ACT_TRUNC:
2174                                         if (i == epd->dtepd_nrecs - 1)
2175                                                 return (dt_set_errno(dtp,
2176                                                     EDT_BADTRUNC));
2177
2178                                         if (dt_trunc(dtp,
2179                                             buf->dtbd_data + offs, rec) != 0)
2180                                                 return (-1);
2181
2182                                         i++;
2183                                         continue;
2184
2185                                 default:
2186                                         continue;
2187                                 }
2188                         }
2189
2190                         rval = (*rfunc)(&data, rec, arg);
2191
2192                         if (rval == DTRACE_CONSUME_NEXT)
2193                                 continue;
2194
2195                         if (rval == DTRACE_CONSUME_ABORT)
2196                                 return (dt_set_errno(dtp, EDT_DIRABORT));
2197
2198                         if (rval != DTRACE_CONSUME_THIS)
2199                                 return (dt_set_errno(dtp, EDT_BADRVAL));
2200
2201                         if (act == DTRACEACT_STACK) {
2202                                 int depth = rec->dtrd_arg;
2203
2204                                 if (dt_print_stack(dtp, fp, NULL, addr, depth,
2205                                     rec->dtrd_size / depth) < 0)
2206                                         return (-1);
2207                                 goto nextrec;
2208                         }
2209
2210                         if (act == DTRACEACT_USTACK ||
2211                             act == DTRACEACT_JSTACK) {
2212                                 if (dt_print_ustack(dtp, fp, NULL,
2213                                     addr, rec->dtrd_arg) < 0)
2214                                         return (-1);
2215                                 goto nextrec;
2216                         }
2217
2218                         if (act == DTRACEACT_SYM) {
2219                                 if (dt_print_sym(dtp, fp, NULL, addr) < 0)
2220                                         return (-1);
2221                                 goto nextrec;
2222                         }
2223
2224                         if (act == DTRACEACT_MOD) {
2225                                 if (dt_print_mod(dtp, fp, NULL, addr) < 0)
2226                                         return (-1);
2227                                 goto nextrec;
2228                         }
2229
2230                         if (act == DTRACEACT_USYM || act == DTRACEACT_UADDR) {
2231                                 if (dt_print_usym(dtp, fp, addr, act) < 0)
2232                                         return (-1);
2233                                 goto nextrec;
2234                         }
2235
2236                         if (act == DTRACEACT_UMOD) {
2237                                 if (dt_print_umod(dtp, fp, NULL, addr) < 0)
2238                                         return (-1);
2239                                 goto nextrec;
2240                         }
2241
2242                         if (act == DTRACEACT_PRINTM) {
2243                                 if (dt_print_memory(dtp, fp, addr) < 0)
2244                                         return (-1);
2245                                 goto nextrec;
2246                         }
2247
2248                         if (act == DTRACEACT_PRINTT) {
2249                                 if (dt_print_type(dtp, fp, addr) < 0)
2250                                         return (-1);
2251                                 goto nextrec;
2252                         }
2253
2254                         if (DTRACEACT_ISPRINTFLIKE(act)) {
2255                                 void *fmtdata;
2256                                 int (*func)(dtrace_hdl_t *, FILE *, void *,
2257                                     const dtrace_probedata_t *,
2258                                     const dtrace_recdesc_t *, uint_t,
2259                                     const void *buf, size_t);
2260
2261                                 if ((fmtdata = dt_format_lookup(dtp,
2262                                     rec->dtrd_format)) == NULL)
2263                                         goto nofmt;
2264
2265                                 switch (act) {
2266                                 case DTRACEACT_PRINTF:
2267                                         func = dtrace_fprintf;
2268                                         break;
2269                                 case DTRACEACT_PRINTA:
2270                                         func = dtrace_fprinta;
2271                                         break;
2272                                 case DTRACEACT_SYSTEM:
2273                                         func = dtrace_system;
2274                                         break;
2275                                 case DTRACEACT_FREOPEN:
2276                                         func = dtrace_freopen;
2277                                         break;
2278                                 }
2279
2280                                 n = (*func)(dtp, fp, fmtdata, &data,
2281                                     rec, epd->dtepd_nrecs - i,
2282                                     (uchar_t *)buf->dtbd_data + offs,
2283                                     buf->dtbd_size - offs);
2284
2285                                 if (n < 0)
2286                                         return (-1); /* errno is set for us */
2287
2288                                 if (n > 0)
2289                                         i += n - 1;
2290                                 goto nextrec;
2291                         }
2292
2293 nofmt:
2294                         if (act == DTRACEACT_PRINTA) {
2295                                 dt_print_aggdata_t pd;
2296                                 dtrace_aggvarid_t *aggvars;
2297                                 int j, naggvars = 0;
2298                                 size_t size = ((epd->dtepd_nrecs - i) *
2299                                     sizeof (dtrace_aggvarid_t));
2300
2301                                 if ((aggvars = dt_alloc(dtp, size)) == NULL)
2302                                         return (-1);
2303
2304                                 /*
2305                                  * This might be a printa() with multiple
2306                                  * aggregation variables.  We need to scan
2307                                  * forward through the records until we find
2308                                  * a record from a different statement.
2309                                  */
2310                                 for (j = i; j < epd->dtepd_nrecs; j++) {
2311                                         dtrace_recdesc_t *nrec;
2312                                         caddr_t naddr;
2313
2314                                         nrec = &epd->dtepd_rec[j];
2315
2316                                         if (nrec->dtrd_uarg != rec->dtrd_uarg)
2317                                                 break;
2318
2319                                         if (nrec->dtrd_action != act) {
2320                                                 return (dt_set_errno(dtp,
2321                                                     EDT_BADAGG));
2322                                         }
2323
2324                                         naddr = buf->dtbd_data + offs +
2325                                             nrec->dtrd_offset;
2326
2327                                         aggvars[naggvars++] =
2328                                             /* LINTED - alignment */
2329                                             *((dtrace_aggvarid_t *)naddr);
2330                                 }
2331
2332                                 i = j - 1;
2333                                 bzero(&pd, sizeof (pd));
2334                                 pd.dtpa_dtp = dtp;
2335                                 pd.dtpa_fp = fp;
2336
2337                                 assert(naggvars >= 1);
2338
2339                                 if (naggvars == 1) {
2340                                         pd.dtpa_id = aggvars[0];
2341                                         dt_free(dtp, aggvars);
2342
2343                                         if (dt_printf(dtp, fp, "\n") < 0 ||
2344                                             dtrace_aggregate_walk_sorted(dtp,
2345                                             dt_print_agg, &pd) < 0)
2346                                                 return (-1);
2347                                         goto nextrec;
2348                                 }
2349
2350                                 if (dt_printf(dtp, fp, "\n") < 0 ||
2351                                     dtrace_aggregate_walk_joined(dtp, aggvars,
2352                                     naggvars, dt_print_aggs, &pd) < 0) {
2353                                         dt_free(dtp, aggvars);
2354                                         return (-1);
2355                                 }
2356
2357                                 dt_free(dtp, aggvars);
2358                                 goto nextrec;
2359                         }
2360
2361                         switch (rec->dtrd_size) {
2362                         case sizeof (uint64_t):
2363                                 n = dt_printf(dtp, fp,
2364                                     quiet ? "%lld" : " %16lld",
2365                                     /* LINTED - alignment */
2366                                     *((unsigned long long *)addr));
2367                                 break;
2368                         case sizeof (uint32_t):
2369                                 n = dt_printf(dtp, fp, quiet ? "%d" : " %8d",
2370                                     /* LINTED - alignment */
2371                                     *((uint32_t *)addr));
2372                                 break;
2373                         case sizeof (uint16_t):
2374                                 n = dt_printf(dtp, fp, quiet ? "%d" : " %5d",
2375                                     /* LINTED - alignment */
2376                                     *((uint16_t *)addr));
2377                                 break;
2378                         case sizeof (uint8_t):
2379                                 n = dt_printf(dtp, fp, quiet ? "%d" : " %3d",
2380                                     *((uint8_t *)addr));
2381                                 break;
2382                         default:
2383                                 n = dt_print_bytes(dtp, fp, addr,
2384                                     rec->dtrd_size, 33, quiet, 0);
2385                                 break;
2386                         }
2387
2388                         if (n < 0)
2389                                 return (-1); /* errno is set for us */
2390
2391 nextrec:
2392                         if (dt_buffered_flush(dtp, &data, rec, NULL, 0) < 0)
2393                                 return (-1); /* errno is set for us */
2394                 }
2395
2396                 /*
2397                  * Call the record callback with a NULL record to indicate
2398                  * that we're done processing this EPID.
2399                  */
2400                 rval = (*rfunc)(&data, NULL, arg);
2401 nextepid:
2402                 offs += epd->dtepd_size;
2403                 last = id;
2404         }
2405
2406         if (buf->dtbd_oldest != 0 && start == buf->dtbd_oldest) {
2407                 end = buf->dtbd_oldest;
2408                 start = 0;
2409                 goto again;
2410         }
2411
2412         if ((drops = buf->dtbd_drops) == 0)
2413                 return (0);
2414
2415         /*
2416          * Explicitly zero the drops to prevent us from processing them again.
2417          */
2418         buf->dtbd_drops = 0;
2419
2420         return (dt_handle_cpudrop(dtp, cpu, DTRACEDROP_PRINCIPAL, drops));
2421 }
2422
2423 typedef struct dt_begin {
2424         dtrace_consume_probe_f *dtbgn_probefunc;
2425         dtrace_consume_rec_f *dtbgn_recfunc;
2426         void *dtbgn_arg;
2427         dtrace_handle_err_f *dtbgn_errhdlr;
2428         void *dtbgn_errarg;
2429         int dtbgn_beginonly;
2430 } dt_begin_t;
2431
2432 static int
2433 dt_consume_begin_probe(const dtrace_probedata_t *data, void *arg)
2434 {
2435         dt_begin_t *begin = (dt_begin_t *)arg;
2436         dtrace_probedesc_t *pd = data->dtpda_pdesc;
2437
2438         int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);
2439         int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);
2440
2441         if (begin->dtbgn_beginonly) {
2442                 if (!(r1 && r2))
2443                         return (DTRACE_CONSUME_NEXT);
2444         } else {
2445                 if (r1 && r2)
2446                         return (DTRACE_CONSUME_NEXT);
2447         }
2448
2449         /*
2450          * We have a record that we're interested in.  Now call the underlying
2451          * probe function...
2452          */
2453         return (begin->dtbgn_probefunc(data, begin->dtbgn_arg));
2454 }
2455
2456 static int
2457 dt_consume_begin_record(const dtrace_probedata_t *data,
2458     const dtrace_recdesc_t *rec, void *arg)
2459 {
2460         dt_begin_t *begin = (dt_begin_t *)arg;
2461
2462         return (begin->dtbgn_recfunc(data, rec, begin->dtbgn_arg));
2463 }
2464
2465 static int
2466 dt_consume_begin_error(const dtrace_errdata_t *data, void *arg)
2467 {
2468         dt_begin_t *begin = (dt_begin_t *)arg;
2469         dtrace_probedesc_t *pd = data->dteda_pdesc;
2470
2471         int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);
2472         int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);
2473
2474         if (begin->dtbgn_beginonly) {
2475                 if (!(r1 && r2))
2476                         return (DTRACE_HANDLE_OK);
2477         } else {
2478                 if (r1 && r2)
2479                         return (DTRACE_HANDLE_OK);
2480         }
2481
2482         return (begin->dtbgn_errhdlr(data, begin->dtbgn_errarg));
2483 }
2484
2485 static int
2486 dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_bufdesc_t *buf,
2487     dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
2488 {
2489         /*
2490          * There's this idea that the BEGIN probe should be processed before
2491          * everything else, and that the END probe should be processed after
2492          * anything else.  In the common case, this is pretty easy to deal
2493          * with.  However, a situation may arise where the BEGIN enabling and
2494          * END enabling are on the same CPU, and some enabling in the middle
2495          * occurred on a different CPU.  To deal with this (blech!) we need to
2496          * consume the BEGIN buffer up until the end of the BEGIN probe, and
2497          * then set it aside.  We will then process every other CPU, and then
2498          * we'll return to the BEGIN CPU and process the rest of the data
2499          * (which will inevitably include the END probe, if any).  Making this
2500          * even more complicated (!) is the library's ERROR enabling.  Because
2501          * this enabling is processed before we even get into the consume call
2502          * back, any ERROR firing would result in the library's ERROR enabling
2503          * being processed twice -- once in our first pass (for BEGIN probes),
2504          * and again in our second pass (for everything but BEGIN probes).  To
2505          * deal with this, we interpose on the ERROR handler to assure that we
2506          * only process ERROR enablings induced by BEGIN enablings in the
2507          * first pass, and that we only process ERROR enablings _not_ induced
2508          * by BEGIN enablings in the second pass.
2509          */
2510         dt_begin_t begin;
2511         processorid_t cpu = dtp->dt_beganon;
2512         dtrace_bufdesc_t nbuf;
2513 #if !defined(sun)
2514         dtrace_bufdesc_t *pbuf;
2515 #endif
2516         int rval, i;
2517         static int max_ncpus;
2518         dtrace_optval_t size;
2519
2520         dtp->dt_beganon = -1;
2521
2522 #if defined(sun)
2523         if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
2524 #else
2525         if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) {
2526 #endif
2527                 /*
2528                  * We really don't expect this to fail, but it is at least
2529                  * technically possible for this to fail with ENOENT.  In this
2530                  * case, we just drive on...
2531                  */
2532                 if (errno == ENOENT)
2533                         return (0);
2534
2535                 return (dt_set_errno(dtp, errno));
2536         }
2537
2538         if (!dtp->dt_stopped || buf->dtbd_cpu != dtp->dt_endedon) {
2539                 /*
2540                  * This is the simple case.  We're either not stopped, or if
2541                  * we are, we actually processed any END probes on another
2542                  * CPU.  We can simply consume this buffer and return.
2543                  */
2544                 return (dt_consume_cpu(dtp, fp, cpu, buf, pf, rf, arg));
2545         }
2546
2547         begin.dtbgn_probefunc = pf;
2548         begin.dtbgn_recfunc = rf;
2549         begin.dtbgn_arg = arg;
2550         begin.dtbgn_beginonly = 1;
2551
2552         /*
2553          * We need to interpose on the ERROR handler to be sure that we
2554          * only process ERRORs induced by BEGIN.
2555          */
2556         begin.dtbgn_errhdlr = dtp->dt_errhdlr;
2557         begin.dtbgn_errarg = dtp->dt_errarg;
2558         dtp->dt_errhdlr = dt_consume_begin_error;
2559         dtp->dt_errarg = &begin;
2560
2561         rval = dt_consume_cpu(dtp, fp, cpu, buf, dt_consume_begin_probe,
2562             dt_consume_begin_record, &begin);
2563
2564         dtp->dt_errhdlr = begin.dtbgn_errhdlr;
2565         dtp->dt_errarg = begin.dtbgn_errarg;
2566
2567         if (rval != 0)
2568                 return (rval);
2569
2570         /*
2571          * Now allocate a new buffer.  We'll use this to deal with every other
2572          * CPU.
2573          */
2574         bzero(&nbuf, sizeof (dtrace_bufdesc_t));
2575         (void) dtrace_getopt(dtp, "bufsize", &size);
2576         if ((nbuf.dtbd_data = malloc(size)) == NULL)
2577                 return (dt_set_errno(dtp, EDT_NOMEM));
2578
2579         if (max_ncpus == 0)
2580                 max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
2581
2582         for (i = 0; i < max_ncpus; i++) {
2583                 nbuf.dtbd_cpu = i;
2584
2585                 if (i == cpu)
2586                         continue;
2587
2588 #if defined(sun)
2589                 if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &nbuf) == -1) {
2590 #else
2591                 pbuf = &nbuf;
2592                 if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &pbuf) == -1) {
2593 #endif
2594                         /*
2595                          * If we failed with ENOENT, it may be because the
2596                          * CPU was unconfigured -- this is okay.  Any other
2597                          * error, however, is unexpected.
2598                          */
2599                         if (errno == ENOENT)
2600                                 continue;
2601
2602                         free(nbuf.dtbd_data);
2603
2604                         return (dt_set_errno(dtp, errno));
2605                 }
2606
2607                 if ((rval = dt_consume_cpu(dtp, fp,
2608                     i, &nbuf, pf, rf, arg)) != 0) {
2609                         free(nbuf.dtbd_data);
2610                         return (rval);
2611                 }
2612         }
2613
2614         free(nbuf.dtbd_data);
2615
2616         /*
2617          * Okay -- we're done with the other buffers.  Now we want to
2618          * reconsume the first buffer -- but this time we're looking for
2619          * everything _but_ BEGIN.  And of course, in order to only consume
2620          * those ERRORs _not_ associated with BEGIN, we need to reinstall our
2621          * ERROR interposition function...
2622          */
2623         begin.dtbgn_beginonly = 0;
2624
2625         assert(begin.dtbgn_errhdlr == dtp->dt_errhdlr);
2626         assert(begin.dtbgn_errarg == dtp->dt_errarg);
2627         dtp->dt_errhdlr = dt_consume_begin_error;
2628         dtp->dt_errarg = &begin;
2629
2630         rval = dt_consume_cpu(dtp, fp, cpu, buf, dt_consume_begin_probe,
2631             dt_consume_begin_record, &begin);
2632
2633         dtp->dt_errhdlr = begin.dtbgn_errhdlr;
2634         dtp->dt_errarg = begin.dtbgn_errarg;
2635
2636         return (rval);
2637 }
2638
2639 int
2640 dtrace_consume(dtrace_hdl_t *dtp, FILE *fp,
2641     dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
2642 {
2643         dtrace_bufdesc_t *buf = &dtp->dt_buf;
2644         dtrace_optval_t size;
2645         static int max_ncpus;
2646         int i, rval;
2647         dtrace_optval_t interval = dtp->dt_options[DTRACEOPT_SWITCHRATE];
2648         hrtime_t now = gethrtime();
2649
2650         if (dtp->dt_lastswitch != 0) {
2651                 if (now - dtp->dt_lastswitch < interval)
2652                         return (0);
2653
2654                 dtp->dt_lastswitch += interval;
2655         } else {
2656                 dtp->dt_lastswitch = now;
2657         }
2658
2659         if (!dtp->dt_active)
2660                 return (dt_set_errno(dtp, EINVAL));
2661
2662         if (max_ncpus == 0)
2663                 max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
2664
2665         if (pf == NULL)
2666                 pf = (dtrace_consume_probe_f *)dt_nullprobe;
2667
2668         if (rf == NULL)
2669                 rf = (dtrace_consume_rec_f *)dt_nullrec;
2670
2671         if (buf->dtbd_data == NULL) {
2672                 (void) dtrace_getopt(dtp, "bufsize", &size);
2673                 if ((buf->dtbd_data = malloc(size)) == NULL)
2674                         return (dt_set_errno(dtp, EDT_NOMEM));
2675
2676                 buf->dtbd_size = size;
2677         }
2678
2679         /*
2680          * If we have just begun, we want to first process the CPU that
2681          * executed the BEGIN probe (if any).
2682          */
2683         if (dtp->dt_active && dtp->dt_beganon != -1) {
2684                 buf->dtbd_cpu = dtp->dt_beganon;
2685                 if ((rval = dt_consume_begin(dtp, fp, buf, pf, rf, arg)) != 0)
2686                         return (rval);
2687         }
2688
2689         for (i = 0; i < max_ncpus; i++) {
2690                 buf->dtbd_cpu = i;
2691
2692                 /*
2693                  * If we have stopped, we want to process the CPU on which the
2694                  * END probe was processed only _after_ we have processed
2695                  * everything else.
2696                  */
2697                 if (dtp->dt_stopped && (i == dtp->dt_endedon))
2698                         continue;
2699
2700 #if defined(sun)
2701                 if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
2702 #else
2703                 if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) {
2704 #endif
2705                         /*
2706                          * If we failed with ENOENT, it may be because the
2707                          * CPU was unconfigured -- this is okay.  Any other
2708                          * error, however, is unexpected.
2709                          */
2710                         if (errno == ENOENT)
2711                                 continue;
2712
2713                         return (dt_set_errno(dtp, errno));
2714                 }
2715
2716                 if ((rval = dt_consume_cpu(dtp, fp, i, buf, pf, rf, arg)) != 0)
2717                         return (rval);
2718         }
2719
2720         if (!dtp->dt_stopped)
2721                 return (0);
2722
2723         buf->dtbd_cpu = dtp->dt_endedon;
2724
2725 #if defined(sun)
2726         if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
2727 #else
2728         if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) {
2729 #endif
2730                 /*
2731                  * This _really_ shouldn't fail, but it is strictly speaking
2732                  * possible for this to return ENOENT if the CPU that called
2733                  * the END enabling somehow managed to become unconfigured.
2734                  * It's unclear how the user can possibly expect anything
2735                  * rational to happen in this case -- the state has been thrown
2736                  * out along with the unconfigured CPU -- so we'll just drive
2737                  * on...
2738                  */
2739                 if (errno == ENOENT)
2740                         return (0);
2741
2742                 return (dt_set_errno(dtp, errno));
2743         }
2744
2745         return (dt_consume_cpu(dtp, fp, dtp->dt_endedon, buf, pf, rf, arg));
2746 }