cddl/contrib/opensolaris/lib/libdtrace/common/dt_consume.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25
  26 /*
  27  * Copyright (c) 2011, Joyent, Inc. All rights reserved.
  28  */
  29
  30 #include <stdlib.h>
  31 #include <strings.h>
  32 #include <errno.h>
  33 #include <unistd.h>
  34 #include <limits.h>
  35 #include <assert.h>
  36 #include <ctype.h>
  37 #if defined(sun)
  38 #include <alloca.h>
  39 #endif
  40 #include <dt_impl.h>
  41 #if !defined(sun)
  42 #include <libproc_compat.h>
  43 #endif
  44
  45 #define DT_MASK_LO 0x00000000FFFFFFFFULL
  46
  47 /*
  48  * We declare this here because (1) we need it and (2) we want to avoid a
  49  * dependency on libm in libdtrace.
  50  */
  51 static long double
  52 dt_fabsl(long double x)
  53 {
  54         if (x < 0)
  55                 return (-x);
  56
  57         return (x);
  58 }
  59
  60 /*
  61  * 128-bit arithmetic functions needed to support the stddev() aggregating
  62  * action.
  63  */
  64 static int
  65 dt_gt_128(uint64_t *a, uint64_t *b)
  66 {
  67         return (a[1] > b[1] || (a[1] == b[1] && a[0] > b[0]));
  68 }
  69
  70 static int
  71 dt_ge_128(uint64_t *a, uint64_t *b)
  72 {
  73         return (a[1] > b[1] || (a[1] == b[1] && a[0] >= b[0]));
  74 }
  75
  76 static int
  77 dt_le_128(uint64_t *a, uint64_t *b)
  78 {
  79         return (a[1] < b[1] || (a[1] == b[1] && a[0] <= b[0]));
  80 }
  81
  82 /*
  83  * Shift the 128-bit value in a by b. If b is positive, shift left.
  84  * If b is negative, shift right.
  85  */
  86 static void
  87 dt_shift_128(uint64_t *a, int b)
  88 {
  89         uint64_t mask;
  90
  91         if (b == 0)
  92                 return;
  93
  94         if (b < 0) {
  95                 b = -b;
  96                 if (b >= 64) {
  97                         a[0] = a[1] >> (b - 64);
  98                         a[1] = 0;
  99                 } else {
 100                         a[0] >>= b;
 101                         mask = 1LL << (64 - b);
 102                         mask -= 1;
 103                         a[0] |= ((a[1] & mask) << (64 - b));
 104                         a[1] >>= b;
 105                 }
 106         } else {
 107                 if (b >= 64) {
 108                         a[1] = a[0] << (b - 64);
 109                         a[0] = 0;
 110                 } else {
 111                         a[1] <<= b;
 112                         mask = a[0] >> (64 - b);
 113                         a[1] |= mask;
 114                         a[0] <<= b;
 115                 }
 116         }
 117 }
 118
 119 static int
 120 dt_nbits_128(uint64_t *a)
 121 {
 122         int nbits = 0;
 123         uint64_t tmp[2];
 124         uint64_t zero[2] = { 0, 0 };
 125
 126         tmp[0] = a[0];
 127         tmp[1] = a[1];
 128
 129         dt_shift_128(tmp, -1);
 130         while (dt_gt_128(tmp, zero)) {
 131                 dt_shift_128(tmp, -1);
 132                 nbits++;
 133         }
 134
 135         return (nbits);
 136 }
 137
 138 static void
 139 dt_subtract_128(uint64_t *minuend, uint64_t *subtrahend, uint64_t *difference)
 140 {
 141         uint64_t result[2];
 142
 143         result[0] = minuend[0] - subtrahend[0];
 144         result[1] = minuend[1] - subtrahend[1] -
 145             (minuend[0] < subtrahend[0] ? 1 : 0);
 146
 147         difference[0] = result[0];
 148         difference[1] = result[1];
 149 }
 150
 151 static void
 152 dt_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)
 153 {
 154         uint64_t result[2];
 155
 156         result[0] = addend1[0] + addend2[0];
 157         result[1] = addend1[1] + addend2[1] +
 158             (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);
 159
 160         sum[0] = result[0];
 161         sum[1] = result[1];
 162 }
 163
 164 /*
 165  * The basic idea is to break the 2 64-bit values into 4 32-bit values,
 166  * use native multiplication on those, and then re-combine into the
 167  * resulting 128-bit value.
 168  *
 169  * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
 170  *     hi1 * hi2 << 64 +
 171  *     hi1 * lo2 << 32 +
 172  *     hi2 * lo1 << 32 +
 173  *     lo1 * lo2
 174  */
 175 static void
 176 dt_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product)
 177 {
 178         uint64_t hi1, hi2, lo1, lo2;
 179         uint64_t tmp[2];
 180
 181         hi1 = factor1 >> 32;
 182         hi2 = factor2 >> 32;
 183
 184         lo1 = factor1 & DT_MASK_LO;
 185         lo2 = factor2 & DT_MASK_LO;
 186
 187         product[0] = lo1 * lo2;
 188         product[1] = hi1 * hi2;
 189
 190         tmp[0] = hi1 * lo2;
 191         tmp[1] = 0;
 192         dt_shift_128(tmp, 32);
 193         dt_add_128(product, tmp, product);
 194
 195         tmp[0] = hi2 * lo1;
 196         tmp[1] = 0;
 197         dt_shift_128(tmp, 32);
 198         dt_add_128(product, tmp, product);
 199 }
 200
 201 /*
 202  * This is long-hand division.
 203  *
 204  * We initialize subtrahend by shifting divisor left as far as possible. We
 205  * loop, comparing subtrahend to dividend:  if subtrahend is smaller, we
 206  * subtract and set the appropriate bit in the result.  We then shift
 207  * subtrahend right by one bit for the next comparison.
 208  */
 209 static void
 210 dt_divide_128(uint64_t *dividend, uint64_t divisor, uint64_t *quotient)
 211 {
 212         uint64_t result[2] = { 0, 0 };
 213         uint64_t remainder[2];
 214         uint64_t subtrahend[2];
 215         uint64_t divisor_128[2];
 216         uint64_t mask[2] = { 1, 0 };
 217         int log = 0;
 218
 219         assert(divisor != 0);
 220
 221         divisor_128[0] = divisor;
 222         divisor_128[1] = 0;
 223
 224         remainder[0] = dividend[0];
 225         remainder[1] = dividend[1];
 226
 227         subtrahend[0] = divisor;
 228         subtrahend[1] = 0;
 229
 230         while (divisor > 0) {
 231                 log++;
 232                 divisor >>= 1;
 233         }
 234
 235         dt_shift_128(subtrahend, 128 - log);
 236         dt_shift_128(mask, 128 - log);
 237
 238         while (dt_ge_128(remainder, divisor_128)) {
 239                 if (dt_ge_128(remainder, subtrahend)) {
 240                         dt_subtract_128(remainder, subtrahend, remainder);
 241                         result[0] |= mask[0];
 242                         result[1] |= mask[1];
 243                 }
 244
 245                 dt_shift_128(subtrahend, -1);
 246                 dt_shift_128(mask, -1);
 247         }
 248
 249         quotient[0] = result[0];
 250         quotient[1] = result[1];
 251 }
 252
 253 /*
 254  * This is the long-hand method of calculating a square root.
 255  * The algorithm is as follows:
 256  *
 257  * 1. Group the digits by 2 from the right.
 258  * 2. Over the leftmost group, find the largest single-digit number
 259  *    whose square is less than that group.
 260  * 3. Subtract the result of the previous step (2 or 4, depending) and
 261  *    bring down the next two-digit group.
 262  * 4. For the result R we have so far, find the largest single-digit number
 263  *    x such that 2 * R * 10 * x + x^2 is less than the result from step 3.
 264  *    (Note that this is doubling R and performing a decimal left-shift by 1
 265  *    and searching for the appropriate decimal to fill the one's place.)
 266  *    The value x is the next digit in the square root.
 267  * Repeat steps 3 and 4 until the desired precision is reached.  (We're
 268  * dealing with integers, so the above is sufficient.)
 269  *
 270  * In decimal, the square root of 582,734 would be calculated as so:
 271  *
 272  *     __7__6__3
 273  *    | 58 27 34
 274  *     -49       (7^2 == 49 => 7 is the first digit in the square root)
 275  *      --
 276  *       9 27    (Subtract and bring down the next group.)
 277  * 146   8 76    (2 * 7 * 10 * 6 + 6^2 == 876 => 6 is the next digit in
 278  *      -----     the square root)
 279  *         51 34 (Subtract and bring down the next group.)
 280  * 1523    45 69 (2 * 76 * 10 * 3 + 3^2 == 4569 => 3 is the next digit in
 281  *         -----  the square root)
 282  *          5 65 (remainder)
 283  *
 284  * The above algorithm applies similarly in binary, but note that the
 285  * only possible non-zero value for x in step 4 is 1, so step 4 becomes a
 286  * simple decision: is 2 * R * 2 * 1 + 1^2 (aka R << 2 + 1) less than the
 287  * preceding difference?
 288  *
 289  * In binary, the square root of 11011011 would be calculated as so:
 290  *
 291  *     __1__1__1__0
 292  *    | 11 01 10 11
 293  *      01          (0 << 2 + 1 == 1 < 11 => this bit is 1)
 294  *      --
 295  *      10 01 10 11
 296  * 101   1 01       (1 << 2 + 1 == 101 < 1001 => next bit is 1)
 297  *      -----
 298  *       1 00 10 11
 299  * 1101    11 01    (11 << 2 + 1 == 1101 < 10010 => next bit is 1)
 300  *       -------
 301  *          1 01 11
 302  * 11101    1 11 01 (111 << 2 + 1 == 11101 > 10111 => last bit is 0)
 303  *
 304  */
 305 static uint64_t
 306 dt_sqrt_128(uint64_t *square)
 307 {
 308         uint64_t result[2] = { 0, 0 };
 309         uint64_t diff[2] = { 0, 0 };
 310         uint64_t one[2] = { 1, 0 };
 311         uint64_t next_pair[2];
 312         uint64_t next_try[2];
 313         uint64_t bit_pairs, pair_shift;
 314         int i;
 315
 316         bit_pairs = dt_nbits_128(square) / 2;
 317         pair_shift = bit_pairs * 2;
 318
 319         for (i = 0; i <= bit_pairs; i++) {
 320                 /*
 321                  * Bring down the next pair of bits.
 322                  */
 323                 next_pair[0] = square[0];
 324                 next_pair[1] = square[1];
 325                 dt_shift_128(next_pair, -pair_shift);
 326                 next_pair[0] &= 0x3;
 327                 next_pair[1] = 0;
 328
 329                 dt_shift_128(diff, 2);
 330                 dt_add_128(diff, next_pair, diff);
 331
 332                 /*
 333                  * next_try = R << 2 + 1
 334                  */
 335                 next_try[0] = result[0];
 336                 next_try[1] = result[1];
 337                 dt_shift_128(next_try, 2);
 338                 dt_add_128(next_try, one, next_try);
 339
 340                 if (dt_le_128(next_try, diff)) {
 341                         dt_subtract_128(diff, next_try, diff);
 342                         dt_shift_128(result, 1);
 343                         dt_add_128(result, one, result);
 344                 } else {
 345                         dt_shift_128(result, 1);
 346                 }
 347
 348                 pair_shift -= 2;
 349         }
 350
 351         assert(result[1] == 0);
 352
 353         return (result[0]);
 354 }
 355
 356 uint64_t
 357 dt_stddev(uint64_t *data, uint64_t normal)
 358 {
 359         uint64_t avg_of_squares[2];
 360         uint64_t square_of_avg[2];
 361         int64_t norm_avg;
 362         uint64_t diff[2];
 363
 364         /*
 365          * The standard approximation for standard deviation is
 366          * sqrt(average(x**2) - average(x)**2), i.e. the square root
 367          * of the average of the squares minus the square of the average.
 368          */
 369         dt_divide_128(data + 2, normal, avg_of_squares);
 370         dt_divide_128(avg_of_squares, data[0], avg_of_squares);
 371
 372         norm_avg = (int64_t)data[1] / (int64_t)normal / (int64_t)data[0];
 373
 374         if (norm_avg < 0)
 375                 norm_avg = -norm_avg;
 376
 377         dt_multiply_128((uint64_t)norm_avg, (uint64_t)norm_avg, square_of_avg);
 378
 379         dt_subtract_128(avg_of_squares, square_of_avg, diff);
 380
 381         return (dt_sqrt_128(diff));
 382 }
 383
 384 static int
 385 dt_flowindent(dtrace_hdl_t *dtp, dtrace_probedata_t *data, dtrace_epid_t last,
 386     dtrace_bufdesc_t *buf, size_t offs)
 387 {
 388         dtrace_probedesc_t *pd = data->dtpda_pdesc, *npd;
 389         dtrace_eprobedesc_t *epd = data->dtpda_edesc, *nepd;
 390         char *p = pd->dtpd_provider, *n = pd->dtpd_name, *sub;
 391         dtrace_flowkind_t flow = DTRACEFLOW_NONE;
 392         const char *str = NULL;
 393         static const char *e_str[2] = { " -> ", " => " };
 394         static const char *r_str[2] = { " <- ", " <= " };
 395         static const char *ent = "entry", *ret = "return";
 396         static int entlen = 0, retlen = 0;
 397         dtrace_epid_t next, id = epd->dtepd_epid;
 398         int rval;
 399
 400         if (entlen == 0) {
 401                 assert(retlen == 0);
 402                 entlen = strlen(ent);
 403                 retlen = strlen(ret);
 404         }
 405
 406         /*
 407          * If the name of the probe is "entry" or ends with "-entry", we
 408          * treat it as an entry; if it is "return" or ends with "-return",
 409          * we treat it as a return.  (This allows application-provided probes
 410          * like "method-entry" or "function-entry" to participate in flow
 411          * indentation -- without accidentally misinterpreting popular probe
 412          * names like "carpentry", "gentry" or "Coventry".)
 413          */
 414         if ((sub = strstr(n, ent)) != NULL && sub[entlen] == '\0' &&
 415             (sub == n || sub[-1] == '-')) {
 416                 flow = DTRACEFLOW_ENTRY;
 417                 str = e_str[strcmp(p, "syscall") == 0];
 418         } else if ((sub = strstr(n, ret)) != NULL && sub[retlen] == '\0' &&
 419             (sub == n || sub[-1] == '-')) {
 420                 flow = DTRACEFLOW_RETURN;
 421                 str = r_str[strcmp(p, "syscall") == 0];
 422         }
 423
 424         /*
 425          * If we're going to indent this, we need to check the ID of our last
 426          * call.  If we're looking at the same probe ID but a different EPID,
 427          * we _don't_ want to indent.  (Yes, there are some minor holes in
 428          * this scheme -- it's a heuristic.)
 429          */
 430         if (flow == DTRACEFLOW_ENTRY) {
 431                 if ((last != DTRACE_EPIDNONE && id != last &&
 432                     pd->dtpd_id == dtp->dt_pdesc[last]->dtpd_id))
 433                         flow = DTRACEFLOW_NONE;
 434         }
 435
 436         /*
 437          * If we're going to unindent this, it's more difficult to see if
 438          * we don't actually want to unindent it -- we need to look at the
 439          * _next_ EPID.
 440          */
 441         if (flow == DTRACEFLOW_RETURN) {
 442                 offs += epd->dtepd_size;
 443
 444                 do {
 445                         if (offs >= buf->dtbd_size) {
 446                                 /*
 447                                  * We're at the end -- maybe.  If the oldest
 448                                  * record is non-zero, we need to wrap.
 449                                  */
 450                                 if (buf->dtbd_oldest != 0) {
 451                                         offs = 0;
 452                                 } else {
 453                                         goto out;
 454                                 }
 455                         }
 456
 457                         next = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
 458
 459                         if (next == DTRACE_EPIDNONE)
 460                                 offs += sizeof (id);
 461                 } while (next == DTRACE_EPIDNONE);
 462
 463                 if ((rval = dt_epid_lookup(dtp, next, &nepd, &npd)) != 0)
 464                         return (rval);
 465
 466                 if (next != id && npd->dtpd_id == pd->dtpd_id)
 467                         flow = DTRACEFLOW_NONE;
 468         }
 469
 470 out:
 471         if (flow == DTRACEFLOW_ENTRY || flow == DTRACEFLOW_RETURN) {
 472                 data->dtpda_prefix = str;
 473         } else {
 474                 data->dtpda_prefix = "| ";
 475         }
 476
 477         if (flow == DTRACEFLOW_RETURN && data->dtpda_indent > 0)
 478                 data->dtpda_indent -= 2;
 479
 480         data->dtpda_flow = flow;
 481
 482         return (0);
 483 }
 484
 485 static int
 486 dt_nullprobe()
 487 {
 488         return (DTRACE_CONSUME_THIS);
 489 }
 490
 491 static int
 492 dt_nullrec()
 493 {
 494         return (DTRACE_CONSUME_NEXT);
 495 }
 496
 497 int
 498 dt_print_quantline(dtrace_hdl_t *dtp, FILE *fp, int64_t val,
 499     uint64_t normal, long double total, char positives, char negatives)
 500 {
 501         long double f;
 502         uint_t depth, len = 40;
 503
 504         const char *ats = "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@";
 505         const char *spaces = "                                        ";
 506
 507         assert(strlen(ats) == len && strlen(spaces) == len);
 508         assert(!(total == 0 && (positives || negatives)));
 509         assert(!(val < 0 && !negatives));
 510         assert(!(val > 0 && !positives));
 511         assert(!(val != 0 && total == 0));
 512
 513         if (!negatives) {
 514                 if (positives) {
 515                         f = (dt_fabsl((long double)val) * len) / total;
 516                         depth = (uint_t)(f + 0.5);
 517                 } else {
 518                         depth = 0;
 519                 }
 520
 521                 return (dt_printf(dtp, fp, "|%s%s %-9lld\n", ats + len - depth,
 522                     spaces + depth, (long long)val / normal));
 523         }
 524
 525         if (!positives) {
 526                 f = (dt_fabsl((long double)val) * len) / total;
 527                 depth = (uint_t)(f + 0.5);
 528
 529                 return (dt_printf(dtp, fp, "%s%s| %-9lld\n", spaces + depth,
 530                     ats + len - depth, (long long)val / normal));
 531         }
 532
 533         /*
 534          * If we're here, we have both positive and negative bucket values.
 535          * To express this graphically, we're going to generate both positive
 536          * and negative bars separated by a centerline.  These bars are half
 537          * the size of normal quantize()/lquantize() bars, so we divide the
 538          * length in half before calculating the bar length.
 539          */
 540         len /= 2;
 541         ats = &ats[len];
 542         spaces = &spaces[len];
 543
 544         f = (dt_fabsl((long double)val) * len) / total;
 545         depth = (uint_t)(f + 0.5);
 546
 547         if (val <= 0) {
 548                 return (dt_printf(dtp, fp, "%s%s|%*s %-9lld\n", spaces + depth,
 549                     ats + len - depth, len, "", (long long)val / normal));
 550         } else {
 551                 return (dt_printf(dtp, fp, "%20s|%s%s %-9lld\n", "",
 552                     ats + len - depth, spaces + depth,
 553                     (long long)val / normal));
 554         }
 555 }
 556
 557 int
 558 dt_print_quantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
 559     size_t size, uint64_t normal)
 560 {
 561         const int64_t *data = addr;
 562         int i, first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1;
 563         long double total = 0;
 564         char positives = 0, negatives = 0;
 565
 566         if (size != DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t))
 567                 return (dt_set_errno(dtp, EDT_DMISMATCH));
 568
 569         while (first_bin < DTRACE_QUANTIZE_NBUCKETS - 1 && data[first_bin] == 0)
 570                 first_bin++;
 571
 572         if (first_bin == DTRACE_QUANTIZE_NBUCKETS - 1) {
 573                 /*
 574                  * There isn't any data.  This is possible if (and only if)
 575                  * negative increment values have been used.  In this case,
 576                  * we'll print the buckets around 0.
 577                  */
 578                 first_bin = DTRACE_QUANTIZE_ZEROBUCKET - 1;
 579                 last_bin = DTRACE_QUANTIZE_ZEROBUCKET + 1;
 580         } else {
 581                 if (first_bin > 0)
 582                         first_bin--;
 583
 584                 while (last_bin > 0 && data[last_bin] == 0)
 585                         last_bin--;
 586
 587                 if (last_bin < DTRACE_QUANTIZE_NBUCKETS - 1)
 588                         last_bin++;
 589         }
 590
 591         for (i = first_bin; i <= last_bin; i++) {
 592                 positives |= (data[i] > 0);
 593                 negatives |= (data[i] < 0);
 594                 total += dt_fabsl((long double)data[i]);
 595         }
 596
 597         if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
 598             "------------- Distribution -------------", "count") < 0)
 599                 return (-1);
 600
 601         for (i = first_bin; i <= last_bin; i++) {
 602                 if (dt_printf(dtp, fp, "%16lld ",
 603                     (long long)DTRACE_QUANTIZE_BUCKETVAL(i)) < 0)
 604                         return (-1);
 605
 606                 if (dt_print_quantline(dtp, fp, data[i], normal, total,
 607                     positives, negatives) < 0)
 608                         return (-1);
 609         }
 610
 611         return (0);
 612 }
 613
 614 int
 615 dt_print_lquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
 616     size_t size, uint64_t normal)
 617 {
 618         const int64_t *data = addr;
 619         int i, first_bin, last_bin, base;
 620         uint64_t arg;
 621         long double total = 0;
 622         uint16_t step, levels;
 623         char positives = 0, negatives = 0;
 624
 625         if (size < sizeof (uint64_t))
 626                 return (dt_set_errno(dtp, EDT_DMISMATCH));
 627
 628         arg = *data++;
 629         size -= sizeof (uint64_t);
 630
 631         base = DTRACE_LQUANTIZE_BASE(arg);
 632         step = DTRACE_LQUANTIZE_STEP(arg);
 633         levels = DTRACE_LQUANTIZE_LEVELS(arg);
 634
 635         first_bin = 0;
 636         last_bin = levels + 1;
 637
 638         if (size != sizeof (uint64_t) * (levels + 2))
 639                 return (dt_set_errno(dtp, EDT_DMISMATCH));
 640
 641         while (first_bin <= levels + 1 && data[first_bin] == 0)
 642                 first_bin++;
 643
 644         if (first_bin > levels + 1) {
 645                 first_bin = 0;
 646                 last_bin = 2;
 647         } else {
 648                 if (first_bin > 0)
 649                         first_bin--;
 650
 651                 while (last_bin > 0 && data[last_bin] == 0)
 652                         last_bin--;
 653
 654                 if (last_bin < levels + 1)
 655                         last_bin++;
 656         }
 657
 658         for (i = first_bin; i <= last_bin; i++) {
 659                 positives |= (data[i] > 0);
 660                 negatives |= (data[i] < 0);
 661                 total += dt_fabsl((long double)data[i]);
 662         }
 663
 664         if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
 665             "------------- Distribution -------------", "count") < 0)
 666                 return (-1);
 667
 668         for (i = first_bin; i <= last_bin; i++) {
 669                 char c[32];
 670                 int err;
 671
 672                 if (i == 0) {
 673                         (void) snprintf(c, sizeof (c), "< %d",
 674                             base / (uint32_t)normal);
 675                         err = dt_printf(dtp, fp, "%16s ", c);
 676                 } else if (i == levels + 1) {
 677                         (void) snprintf(c, sizeof (c), ">= %d",
 678                             base + (levels * step));
 679                         err = dt_printf(dtp, fp, "%16s ", c);
 680                 } else {
 681                         err = dt_printf(dtp, fp, "%16d ",
 682                             base + (i - 1) * step);
 683                 }
 684
 685                 if (err < 0 || dt_print_quantline(dtp, fp, data[i], normal,
 686                     total, positives, negatives) < 0)
 687                         return (-1);
 688         }
 689
 690         return (0);
 691 }
 692
 693 int
 694 dt_print_llquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
 695     size_t size, uint64_t normal)
 696 {
 697         int i, first_bin, last_bin, bin = 1, order, levels;
 698         uint16_t factor, low, high, nsteps;
 699         const int64_t *data = addr;
 700         int64_t value = 1, next, step;
 701         char positives = 0, negatives = 0;
 702         long double total = 0;
 703         uint64_t arg;
 704         char c[32];
 705
 706         if (size < sizeof (uint64_t))
 707                 return (dt_set_errno(dtp, EDT_DMISMATCH));
 708
 709         arg = *data++;
 710         size -= sizeof (uint64_t);
 711
 712         factor = DTRACE_LLQUANTIZE_FACTOR(arg);
 713         low = DTRACE_LLQUANTIZE_LOW(arg);
 714         high = DTRACE_LLQUANTIZE_HIGH(arg);
 715         nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);
 716
 717         /*
 718          * We don't expect to be handed invalid llquantize() parameters here,
 719          * but sanity check them (to a degree) nonetheless.
 720          */
 721         if (size > INT32_MAX || factor < 2 || low >= high ||
 722             nsteps == 0 || factor > nsteps)
 723                 return (dt_set_errno(dtp, EDT_DMISMATCH));
 724
 725         levels = (int)size / sizeof (uint64_t);
 726
 727         first_bin = 0;
 728         last_bin = levels - 1;
 729
 730         while (first_bin < levels && data[first_bin] == 0)
 731                 first_bin++;
 732
 733         if (first_bin == levels) {
 734                 first_bin = 0;
 735                 last_bin = 1;
 736         } else {
 737                 if (first_bin > 0)
 738                         first_bin--;
 739
 740                 while (last_bin > 0 && data[last_bin] == 0)
 741                         last_bin--;
 742
 743                 if (last_bin < levels - 1)
 744                         last_bin++;
 745         }
 746
 747         for (i = first_bin; i <= last_bin; i++) {
 748                 positives |= (data[i] > 0);
 749                 negatives |= (data[i] < 0);
 750                 total += dt_fabsl((long double)data[i]);
 751         }
 752
 753         if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
 754             "------------- Distribution -------------", "count") < 0)
 755                 return (-1);
 756
 757         for (order = 0; order < low; order++)
 758                 value *= factor;
 759
 760         next = value * factor;
 761         step = next > nsteps ? next / nsteps : 1;
 762
 763         if (first_bin == 0) {
 764                 (void) snprintf(c, sizeof (c), "< %lld", (long long)value);
 765
 766                 if (dt_printf(dtp, fp, "%16s ", c) < 0)
 767                         return (-1);
 768
 769                 if (dt_print_quantline(dtp, fp, data[0], normal,
 770                     total, positives, negatives) < 0)
 771                         return (-1);
 772         }
 773
 774         while (order <= high) {
 775                 if (bin >= first_bin && bin <= last_bin) {
 776                         if (dt_printf(dtp, fp, "%16lld ", (long long)value) < 0)
 777                                 return (-1);
 778
 779                         if (dt_print_quantline(dtp, fp, data[bin],
 780                             normal, total, positives, negatives) < 0)
 781                                 return (-1);
 782                 }
 783
 784                 assert(value < next);
 785                 bin++;
 786
 787                 if ((value += step) != next)
 788                         continue;
 789
 790                 next = value * factor;
 791                 step = next > nsteps ? next / nsteps : 1;
 792                 order++;
 793         }
 794
 795         if (last_bin < bin)
 796                 return (0);
 797
 798         assert(last_bin == bin);
 799         (void) snprintf(c, sizeof (c), ">= %lld", value);
 800
 801         if (dt_printf(dtp, fp, "%16s ", c) < 0)
 802                 return (-1);
 803
 804         return (dt_print_quantline(dtp, fp, data[bin], normal,
 805             total, positives, negatives));
 806 }
 807
 808 /*ARGSUSED*/
 809 static int
 810 dt_print_average(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
 811     size_t size, uint64_t normal)
 812 {
 813         /* LINTED - alignment */
 814         int64_t *data = (int64_t *)addr;
 815
 816         return (dt_printf(dtp, fp, " %16lld", data[0] ?
 817             (long long)(data[1] / (int64_t)normal / data[0]) : 0));
 818 }
 819
 820 /*ARGSUSED*/
 821 static int
 822 dt_print_stddev(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
 823     size_t size, uint64_t normal)
 824 {
 825         /* LINTED - alignment */
 826         uint64_t *data = (uint64_t *)addr;
 827
 828         return (dt_printf(dtp, fp, " %16llu", data[0] ?
 829             (unsigned long long) dt_stddev(data, normal) : 0));
 830 }
 831
 832 /*ARGSUSED*/
 833 int
 834 dt_print_bytes(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
 835     size_t nbytes, int width, int quiet, int raw)
 836 {
 837         /*
 838          * If the byte stream is a series of printable characters, followed by
 839          * a terminating byte, we print it out as a string.  Otherwise, we
 840          * assume that it's something else and just print the bytes.
 841          */
 842         int i, j, margin = 5;
 843         char *c = (char *)addr;
 844
 845         if (nbytes == 0)
 846                 return (0);
 847
 848         if (raw || dtp->dt_options[DTRACEOPT_RAWBYTES] != DTRACEOPT_UNSET)
 849                 goto raw;
 850
 851         for (i = 0; i < nbytes; i++) {
 852                 /*
 853                  * We define a "printable character" to be one for which
 854                  * isprint(3C) returns non-zero, isspace(3C) returns non-zero,
 855                  * or a character which is either backspace or the bell.
 856                  * Backspace and the bell are regrettably special because
 857                  * they fail the first two tests -- and yet they are entirely
 858                  * printable.  These are the only two control characters that
 859                  * have meaning for the terminal and for which isprint(3C) and
 860                  * isspace(3C) return 0.
 861                  */
 862                 if (isprint(c[i]) || isspace(c[i]) ||
 863                     c[i] == '\b' || c[i] == '\a')
 864                         continue;
 865
 866                 if (c[i] == '\0' && i > 0) {
 867                         /*
 868                          * This looks like it might be a string.  Before we
 869                          * assume that it is indeed a string, check the
 870                          * remainder of the byte range; if it contains
 871                          * additional non-nul characters, we'll assume that
 872                          * it's a binary stream that just happens to look like
 873                          * a string, and we'll print out the individual bytes.
 874                          */
 875                         for (j = i + 1; j < nbytes; j++) {
 876                                 if (c[j] != '\0')
 877                                         break;
 878                         }
 879
 880                         if (j != nbytes)
 881                                 break;
 882
 883                         if (quiet)
 884                                 return (dt_printf(dtp, fp, "%s", c));
 885                         else
 886                                 return (dt_printf(dtp, fp, "  %-*s", width, c));
 887                 }
 888
 889                 break;
 890         }
 891
 892         if (i == nbytes) {
 893                 /*
 894                  * The byte range is all printable characters, but there is
 895                  * no trailing nul byte.  We'll assume that it's a string and
 896                  * print it as such.
 897                  */
 898                 char *s = alloca(nbytes + 1);
 899                 bcopy(c, s, nbytes);
 900                 s[nbytes] = '\0';
 901                 return (dt_printf(dtp, fp, "  %-*s", width, s));
 902         }
 903
 904 raw:
 905         if (dt_printf(dtp, fp, "\n%*s      ", margin, "") < 0)
 906                 return (-1);
 907
 908         for (i = 0; i < 16; i++)
 909                 if (dt_printf(dtp, fp, "  %c", "0123456789abcdef"[i]) < 0)
 910                         return (-1);
 911
 912         if (dt_printf(dtp, fp, "  0123456789abcdef\n") < 0)
 913                 return (-1);
 914
 915
 916         for (i = 0; i < nbytes; i += 16) {
 917                 if (dt_printf(dtp, fp, "%*s%5x:", margin, "", i) < 0)
 918                         return (-1);
 919
 920                 for (j = i; j < i + 16 && j < nbytes; j++) {
 921                         if (dt_printf(dtp, fp, " %02x", (uchar_t)c[j]) < 0)
 922                                 return (-1);
 923                 }
 924
 925                 while (j++ % 16) {
 926                         if (dt_printf(dtp, fp, "   ") < 0)
 927                                 return (-1);
 928                 }
 929
 930                 if (dt_printf(dtp, fp, "  ") < 0)
 931                         return (-1);
 932
 933                 for (j = i; j < i + 16 && j < nbytes; j++) {
 934                         if (dt_printf(dtp, fp, "%c",
 935                             c[j] < ' ' || c[j] > '~' ? '.' : c[j]) < 0)
 936                                 return (-1);
 937                 }
 938
 939                 if (dt_printf(dtp, fp, "\n") < 0)
 940                         return (-1);
 941         }
 942
 943         return (0);
 944 }
 945
 946 int
 947 dt_print_stack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
 948     caddr_t addr, int depth, int size)
 949 {
 950         dtrace_syminfo_t dts;
 951         GElf_Sym sym;
 952         int i, indent;
 953         char c[PATH_MAX * 2];
 954         uint64_t pc;
 955
 956         if (dt_printf(dtp, fp, "\n") < 0)
 957                 return (-1);
 958
 959         if (format == NULL)
 960                 format = "%s";
 961
 962         if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
 963                 indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];
 964         else
 965                 indent = _dtrace_stkindent;
 966
 967         for (i = 0; i < depth; i++) {
 968                 switch (size) {
 969                 case sizeof (uint32_t):
 970                         /* LINTED - alignment */
 971                         pc = *((uint32_t *)addr);
 972                         break;
 973
 974                 case sizeof (uint64_t):
 975                         /* LINTED - alignment */
 976                         pc = *((uint64_t *)addr);
 977                         break;
 978
 979                 default:
 980                         return (dt_set_errno(dtp, EDT_BADSTACKPC));
 981                 }
 982
 983                 if (pc == 0)
 984                         break;
 985
 986                 addr += size;
 987
 988                 if (dt_printf(dtp, fp, "%*s", indent, "") < 0)
 989                         return (-1);
 990
 991                 if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {
 992                         if (pc > sym.st_value) {
 993                                 (void) snprintf(c, sizeof (c), "%s`%s+0x%llx",
 994                                     dts.dts_object, dts.dts_name,
 995                                     pc - sym.st_value);
 996                         } else {
 997                                 (void) snprintf(c, sizeof (c), "%s`%s",
 998                                     dts.dts_object, dts.dts_name);
 999                         }
1000                 } else {
1001                         /*
1002                          * We'll repeat the lookup, but this time we'll specify
1003                          * a NULL GElf_Sym -- indicating that we're only
1004                          * interested in the containing module.
1005                          */
1006                         if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1007                                 (void) snprintf(c, sizeof (c), "%s`0x%llx",
1008                                     dts.dts_object, pc);
1009                         } else {
1010                                 (void) snprintf(c, sizeof (c), "0x%llx", pc);
1011                         }
1012                 }
1013
1014                 if (dt_printf(dtp, fp, format, c) < 0)
1015                         return (-1);
1016
1017                 if (dt_printf(dtp, fp, "\n") < 0)
1018                         return (-1);
1019         }
1020
1021         return (0);
1022 }
1023
1024 int
1025 dt_print_ustack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
1026     caddr_t addr, uint64_t arg)
1027 {
1028         /* LINTED - alignment */
1029         uint64_t *pc = (uint64_t *)addr;
1030         uint32_t depth = DTRACE_USTACK_NFRAMES(arg);
1031         uint32_t strsize = DTRACE_USTACK_STRSIZE(arg);
1032         const char *strbase = addr + (depth + 1) * sizeof (uint64_t);
1033         const char *str = strsize ? strbase : NULL;
1034         int err = 0;
1035
1036         char name[PATH_MAX], objname[PATH_MAX], c[PATH_MAX * 2];
1037         struct ps_prochandle *P;
1038         GElf_Sym sym;
1039         int i, indent;
1040         pid_t pid;
1041
1042         if (depth == 0)
1043                 return (0);
1044
1045         pid = (pid_t)*pc++;
1046
1047         if (dt_printf(dtp, fp, "\n") < 0)
1048                 return (-1);
1049
1050         if (format == NULL)
1051                 format = "%s";
1052
1053         if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
1054                 indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];
1055         else
1056                 indent = _dtrace_stkindent;
1057
1058         /*
1059          * Ultimately, we need to add an entry point in the library vector for
1060          * determining <symbol, offset> from <pid, address>.  For now, if
1061          * this is a vector open, we just print the raw address or string.
1062          */
1063         if (dtp->dt_vector == NULL)
1064                 P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
1065         else
1066                 P = NULL;
1067
1068         if (P != NULL)
1069                 dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
1070
1071         for (i = 0; i < depth && pc[i] != 0; i++) {
1072                 const prmap_t *map;
1073
1074                 if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)
1075                         break;
1076
1077                 if (P != NULL && Plookup_by_addr(P, pc[i],
1078                     name, sizeof (name), &sym) == 0) {
1079                         (void) Pobjname(P, pc[i], objname, sizeof (objname));
1080
1081                         if (pc[i] > sym.st_value) {
1082                                 (void) snprintf(c, sizeof (c),
1083                                     "%s`%s+0x%llx", dt_basename(objname), name,
1084                                     (u_longlong_t)(pc[i] - sym.st_value));
1085                         } else {
1086                                 (void) snprintf(c, sizeof (c),
1087                                     "%s`%s", dt_basename(objname), name);
1088                         }
1089                 } else if (str != NULL && str[0] != '\0' && str[0] != '@' &&
1090                     (P != NULL && ((map = Paddr_to_map(P, pc[i])) == NULL ||
1091                     (map->pr_mflags & MA_WRITE)))) {
1092                         /*
1093                          * If the current string pointer in the string table
1094                          * does not point to an empty string _and_ the program
1095                          * counter falls in a writable region, we'll use the
1096                          * string from the string table instead of the raw
1097                          * address.  This last condition is necessary because
1098                          * some (broken) ustack helpers will return a string
1099                          * even for a program counter that they can't
1100                          * identify.  If we have a string for a program
1101                          * counter that falls in a segment that isn't
1102                          * writable, we assume that we have fallen into this
1103                          * case and we refuse to use the string.
1104                          */
1105                         (void) snprintf(c, sizeof (c), "%s", str);
1106                 } else {
1107                         if (P != NULL && Pobjname(P, pc[i], objname,
1108                             sizeof (objname)) != 0) {
1109                                 (void) snprintf(c, sizeof (c), "%s`0x%llx",
1110                                     dt_basename(objname), (u_longlong_t)pc[i]);
1111                         } else {
1112                                 (void) snprintf(c, sizeof (c), "0x%llx",
1113                                     (u_longlong_t)pc[i]);
1114                         }
1115                 }
1116
1117                 if ((err = dt_printf(dtp, fp, format, c)) < 0)
1118                         break;
1119
1120                 if ((err = dt_printf(dtp, fp, "\n")) < 0)
1121                         break;
1122
1123                 if (str != NULL && str[0] == '@') {
1124                         /*
1125                          * If the first character of the string is an "at" sign,
1126                          * then the string is inferred to be an annotation --
1127                          * and it is printed out beneath the frame and offset
1128                          * with brackets.
1129                          */
1130                         if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)
1131                                 break;
1132
1133                         (void) snprintf(c, sizeof (c), "  [ %s ]", &str[1]);
1134
1135                         if ((err = dt_printf(dtp, fp, format, c)) < 0)
1136                                 break;
1137
1138                         if ((err = dt_printf(dtp, fp, "\n")) < 0)
1139                                 break;
1140                 }
1141
1142                 if (str != NULL) {
1143                         str += strlen(str) + 1;
1144                         if (str - strbase >= strsize)
1145                                 str = NULL;
1146                 }
1147         }
1148
1149         if (P != NULL) {
1150                 dt_proc_unlock(dtp, P);
1151                 dt_proc_release(dtp, P);
1152         }
1153
1154         return (err);
1155 }
1156
1157 static int
1158 dt_print_usym(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr, dtrace_actkind_t act)
1159 {
1160         /* LINTED - alignment */
1161         uint64_t pid = ((uint64_t *)addr)[0];
1162         /* LINTED - alignment */
1163         uint64_t pc = ((uint64_t *)addr)[1];
1164         const char *format = "  %-50s";
1165         char *s;
1166         int n, len = 256;
1167
1168         if (act == DTRACEACT_USYM && dtp->dt_vector == NULL) {
1169                 struct ps_prochandle *P;
1170
1171                 if ((P = dt_proc_grab(dtp, pid,
1172                     PGRAB_RDONLY | PGRAB_FORCE, 0)) != NULL) {
1173                         GElf_Sym sym;
1174
1175                         dt_proc_lock(dtp, P);
1176
1177                         if (Plookup_by_addr(P, pc, NULL, 0, &sym) == 0)
1178                                 pc = sym.st_value;
1179
1180                         dt_proc_unlock(dtp, P);
1181                         dt_proc_release(dtp, P);
1182                 }
1183         }
1184
1185         do {
1186                 n = len;
1187                 s = alloca(n);
1188         } while ((len = dtrace_uaddr2str(dtp, pid, pc, s, n)) > n);
1189
1190         return (dt_printf(dtp, fp, format, s));
1191 }
1192
1193 int
1194 dt_print_umod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
1195 {
1196         /* LINTED - alignment */
1197         uint64_t pid = ((uint64_t *)addr)[0];
1198         /* LINTED - alignment */
1199         uint64_t pc = ((uint64_t *)addr)[1];
1200         int err = 0;
1201
1202         char objname[PATH_MAX], c[PATH_MAX * 2];
1203         struct ps_prochandle *P;
1204
1205         if (format == NULL)
1206                 format = "  %-50s";
1207
1208         /*
1209          * See the comment in dt_print_ustack() for the rationale for
1210          * printing raw addresses in the vectored case.
1211          */
1212         if (dtp->dt_vector == NULL)
1213                 P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
1214         else
1215                 P = NULL;
1216
1217         if (P != NULL)
1218                 dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
1219
1220         if (P != NULL && Pobjname(P, pc, objname, sizeof (objname)) != 0) {
1221                 (void) snprintf(c, sizeof (c), "%s", dt_basename(objname));
1222         } else {
1223                 (void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc);
1224         }
1225
1226         err = dt_printf(dtp, fp, format, c);
1227
1228         if (P != NULL) {
1229                 dt_proc_unlock(dtp, P);
1230                 dt_proc_release(dtp, P);
1231         }
1232
1233         return (err);
1234 }
1235
1236 int
1237 dt_print_memory(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr)
1238 {
1239         int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
1240         size_t nbytes = *((uintptr_t *) addr);
1241
1242         return (dt_print_bytes(dtp, fp, addr + sizeof(uintptr_t),
1243             nbytes, 50, quiet, 1));
1244 }
1245
1246 typedef struct dt_type_cbdata {
1247         dtrace_hdl_t            *dtp;
1248         dtrace_typeinfo_t       dtt;
1249         caddr_t                 addr;
1250         caddr_t                 addrend;
1251         const char              *name;
1252         int                     f_type;
1253         int                     indent;
1254         int                     type_width;
1255         int                     name_width;
1256         FILE                    *fp;
1257 } dt_type_cbdata_t;
1258
1259 static int      dt_print_type_data(dt_type_cbdata_t *, ctf_id_t);
1260
1261 static int
1262 dt_print_type_member(const char *name, ctf_id_t type, ulong_t off, void *arg)
1263 {
1264         dt_type_cbdata_t cbdata;
1265         dt_type_cbdata_t *cbdatap = arg;
1266         ssize_t ssz;
1267
1268         if ((ssz = ctf_type_size(cbdatap->dtt.dtt_ctfp, type)) <= 0)
1269                 return (0);
1270
1271         off /= 8;
1272
1273         cbdata = *cbdatap;
1274         cbdata.name = name;
1275         cbdata.addr += off;
1276         cbdata.addrend = cbdata.addr + ssz;
1277
1278         return (dt_print_type_data(&cbdata, type));
1279 }
1280
1281 static int
1282 dt_print_type_width(const char *name, ctf_id_t type, ulong_t off, void *arg)
1283 {
1284         char buf[DT_TYPE_NAMELEN];
1285         char *p;
1286         dt_type_cbdata_t *cbdatap = arg;
1287         size_t sz = strlen(name);
1288
1289         ctf_type_name(cbdatap->dtt.dtt_ctfp, type, buf, sizeof (buf));
1290
1291         if ((p = strchr(buf, '[')) != NULL)
1292                 p[-1] = '\0';
1293         else
1294                 p = "";
1295
1296         sz += strlen(p);
1297
1298         if (sz > cbdatap->name_width)
1299                 cbdatap->name_width = sz;
1300
1301         sz = strlen(buf);
1302
1303         if (sz > cbdatap->type_width)
1304                 cbdatap->type_width = sz;
1305
1306         return (0);
1307 }
1308
1309 static int
1310 dt_print_type_data(dt_type_cbdata_t *cbdatap, ctf_id_t type)
1311 {
1312         caddr_t addr = cbdatap->addr;
1313         caddr_t addrend = cbdatap->addrend;
1314         char buf[DT_TYPE_NAMELEN];
1315         char *p;
1316         int cnt = 0;
1317         uint_t kind = ctf_type_kind(cbdatap->dtt.dtt_ctfp, type);
1318         ssize_t ssz = ctf_type_size(cbdatap->dtt.dtt_ctfp, type);
1319
1320         ctf_type_name(cbdatap->dtt.dtt_ctfp, type, buf, sizeof (buf));
1321
1322         if ((p = strchr(buf, '[')) != NULL)
1323                 p[-1] = '\0';
1324         else
1325                 p = "";
1326
1327         if (cbdatap->f_type) {
1328                 int type_width = roundup(cbdatap->type_width + 1, 4);
1329                 int name_width = roundup(cbdatap->name_width + 1, 4);
1330
1331                 name_width -= strlen(cbdatap->name);
1332
1333                 dt_printf(cbdatap->dtp, cbdatap->fp, "%*s%-*s%s%-*s     = ",cbdatap->indent * 4,"",type_width,buf,cbdatap->name,name_width,p);
1334         }
1335
1336         while (addr < addrend) {
1337                 dt_type_cbdata_t cbdata;
1338                 ctf_arinfo_t arinfo;
1339                 ctf_encoding_t cte;
1340                 uintptr_t *up;
1341                 void *vp = addr;
1342                 cbdata = *cbdatap;
1343                 cbdata.name = "";
1344                 cbdata.addr = addr;
1345                 cbdata.addrend = addr + ssz;
1346                 cbdata.f_type = 0;
1347                 cbdata.indent++;
1348                 cbdata.type_width = 0;
1349                 cbdata.name_width = 0;
1350
1351                 if (cnt > 0)
1352                         dt_printf(cbdatap->dtp, cbdatap->fp, "%*s", cbdatap->indent * 4,"");
1353
1354                 switch (kind) {
1355                 case CTF_K_INTEGER:
1356                         if (ctf_type_encoding(cbdatap->dtt.dtt_ctfp, type, &cte) != 0)
1357                                 return (-1);
1358                         if ((cte.cte_format & CTF_INT_SIGNED) != 0)
1359                                 switch (cte.cte_bits) {
1360                                 case 8:
1361                                         if (isprint(*((char *) vp)))
1362                                                 dt_printf(cbdatap->dtp, cbdatap->fp, "'%c', ", *((char *) vp));
1363                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%d (0x%x);\n", *((char *) vp), *((char *) vp));
1364                                         break;
1365                                 case 16:
1366                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%hd (0x%hx);\n", *((short *) vp), *((u_short *) vp));
1367                                         break;
1368                                 case 32:
1369                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%d (0x%x);\n", *((int *) vp), *((u_int *) vp));
1370                                         break;
1371                                 case 64:
1372                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%jd (0x%jx);\n", *((long long *) vp), *((unsigned long long *) vp));
1373                                         break;
1374                                 default:
1375                                         dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_INTEGER: format %x offset %u bits %u\n",cte.cte_format,cte.cte_offset,cte.cte_bits);
1376                                         break;
1377                                 }
1378                         else
1379                                 switch (cte.cte_bits) {
1380                                 case 8:
1381                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%u (0x%x);\n", *((uint8_t *) vp) & 0xff, *((uint8_t *) vp) & 0xff);
1382                                         break;
1383                                 case 16:
1384                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%hu (0x%hx);\n", *((u_short *) vp), *((u_short *) vp));
1385                                         break;
1386                                 case 32:
1387                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%u (0x%x);\n", *((u_int *) vp), *((u_int *) vp));
1388                                         break;
1389                                 case 64:
1390                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%ju (0x%jx);\n", *((unsigned long long *) vp), *((unsigned long long *) vp));
1391                                         break;
1392                                 default:
1393                                         dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_INTEGER: format %x offset %u bits %u\n",cte.cte_format,cte.cte_offset,cte.cte_bits);
1394                                         break;
1395                                 }
1396                         break;
1397                 case CTF_K_FLOAT:
1398                         dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_FLOAT: format %x offset %u bits %u\n",cte.cte_format,cte.cte_offset,cte.cte_bits);
1399                         break;
1400                 case CTF_K_POINTER:
1401                         dt_printf(cbdatap->dtp, cbdatap->fp, "%p;\n", *((void **) addr));
1402                         break;
1403                 case CTF_K_ARRAY:
1404                         if (ctf_array_info(cbdatap->dtt.dtt_ctfp, type, &arinfo) != 0)
1405                                 return (-1);
1406                         dt_printf(cbdatap->dtp, cbdatap->fp, "{\n%*s",cbdata.indent * 4,"");
1407                         dt_print_type_data(&cbdata, arinfo.ctr_contents);
1408                         dt_printf(cbdatap->dtp, cbdatap->fp, "%*s};\n",cbdatap->indent * 4,"");
1409                         break;
1410                 case CTF_K_FUNCTION:
1411                         dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_FUNCTION:\n");
1412                         break;
1413                 case CTF_K_STRUCT:
1414                         cbdata.f_type = 1;
1415                         if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type,
1416                             dt_print_type_width, &cbdata) != 0)
1417                                 return (-1);
1418                         dt_printf(cbdatap->dtp, cbdatap->fp, "{\n");
1419                         if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type,
1420                             dt_print_type_member, &cbdata) != 0)
1421                                 return (-1);
1422                         dt_printf(cbdatap->dtp, cbdatap->fp, "%*s};\n",cbdatap->indent * 4,"");
1423                         break;
1424                 case CTF_K_UNION:
1425                         cbdata.f_type = 1;
1426                         if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type,
1427                             dt_print_type_width, &cbdata) != 0)
1428                                 return (-1);
1429                         dt_printf(cbdatap->dtp, cbdatap->fp, "{\n");
1430                         if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type,
1431                             dt_print_type_member, &cbdata) != 0)
1432                                 return (-1);
1433                         dt_printf(cbdatap->dtp, cbdatap->fp, "%*s};\n",cbdatap->indent * 4,"");
1434                         break;
1435                 case CTF_K_ENUM:
1436                         dt_printf(cbdatap->dtp, cbdatap->fp, "%s;\n", ctf_enum_name(cbdatap->dtt.dtt_ctfp, type, *((int *) vp)));
1437                         break;
1438                 case CTF_K_TYPEDEF:
1439                         dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type));
1440                         break;
1441                 case CTF_K_VOLATILE:
1442                         if (cbdatap->f_type)
1443                                 dt_printf(cbdatap->dtp, cbdatap->fp, "volatile ");
1444                         dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type));
1445                         break;
1446                 case CTF_K_CONST:
1447                         if (cbdatap->f_type)
1448                                 dt_printf(cbdatap->dtp, cbdatap->fp, "const ");
1449                         dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type));
1450                         break;
1451                 case CTF_K_RESTRICT:
1452                         if (cbdatap->f_type)
1453                                 dt_printf(cbdatap->dtp, cbdatap->fp, "restrict ");
1454                         dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type));
1455                         break;
1456                 default:
1457                         break;
1458                 }
1459
1460                 addr += ssz;
1461                 cnt++;
1462         }
1463
1464         return (0);
1465 }
1466
1467 static int
1468 dt_print_type(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr)
1469 {
1470         caddr_t addrend;
1471         char *p;
1472         dtrace_typeinfo_t dtt;
1473         dt_type_cbdata_t cbdata;
1474         int num = 0;
1475         int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
1476         ssize_t ssz;
1477
1478         if (!quiet)
1479                 dt_printf(dtp, fp, "\n");
1480
1481         /* Get the total number of bytes of data buffered. */
1482         size_t nbytes = *((uintptr_t *) addr);
1483         addr += sizeof(uintptr_t);
1484
1485         /*
1486          * Get the size of the type so that we can check that it matches
1487          * the CTF data we look up and so that we can figure out how many
1488          * type elements are buffered.
1489          */
1490         size_t typs = *((uintptr_t *) addr);
1491         addr += sizeof(uintptr_t);
1492
1493         /*
1494          * Point to the type string in the buffer. Get it's string
1495          * length and round it up to become the offset to the start
1496          * of the buffered type data which we would like to be aligned
1497          * for easy access.
1498          */
1499         char *strp = (char *) addr;
1500         int offset = roundup(strlen(strp) + 1, sizeof(uintptr_t));
1501
1502         /*
1503          * The type string might have a format such as 'int [20]'.
1504          * Check if there is an array dimension present.
1505          */
1506         if ((p = strchr(strp, '[')) != NULL) {
1507                 /* Strip off the array dimension. */
1508                 *p++ = '\0';
1509
1510                 for (; *p != '\0' && *p != ']'; p++)
1511                         num = num * 10 + *p - '0';
1512         } else
1513                 /* No array dimension, so default. */
1514                 num = 1;
1515
1516         /* Lookup the CTF type from the type string. */
1517         if (dtrace_lookup_by_type(dtp,  DTRACE_OBJ_EVERY, strp, &dtt) < 0)
1518                 return (-1);
1519
1520         /* Offset the buffer address to the start of the data... */
1521         addr += offset;
1522
1523         ssz = ctf_type_size(dtt.dtt_ctfp, dtt.dtt_type);
1524
1525         if (typs != ssz) {
1526                 printf("Expected type size from buffer (%lu) to match type size looked up now (%ld)\n", (u_long) typs, (long) ssz);
1527                 return (-1);
1528         }
1529
1530         cbdata.dtp = dtp;
1531         cbdata.dtt = dtt;
1532         cbdata.name = "";
1533         cbdata.addr = addr;
1534         cbdata.addrend = addr + nbytes;
1535         cbdata.indent = 1;
1536         cbdata.f_type = 1;
1537         cbdata.type_width = 0;
1538         cbdata.name_width = 0;
1539         cbdata.fp = fp;
1540
1541         return (dt_print_type_data(&cbdata, dtt.dtt_type));
1542 }
1543
1544 static int
1545 dt_print_sym(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
1546 {
1547         /* LINTED - alignment */
1548         uint64_t pc = *((uint64_t *)addr);
1549         dtrace_syminfo_t dts;
1550         GElf_Sym sym;
1551         char c[PATH_MAX * 2];
1552
1553         if (format == NULL)
1554                 format = "  %-50s";
1555
1556         if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {
1557                 (void) snprintf(c, sizeof (c), "%s`%s",
1558                     dts.dts_object, dts.dts_name);
1559         } else {
1560                 /*
1561                  * We'll repeat the lookup, but this time we'll specify a
1562                  * NULL GElf_Sym -- indicating that we're only interested in
1563                  * the containing module.
1564                  */
1565                 if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1566                         (void) snprintf(c, sizeof (c), "%s`0x%llx",
1567                             dts.dts_object, (u_longlong_t)pc);
1568                 } else {
1569                         (void) snprintf(c, sizeof (c), "0x%llx",
1570                             (u_longlong_t)pc);
1571                 }
1572         }
1573
1574         if (dt_printf(dtp, fp, format, c) < 0)
1575                 return (-1);
1576
1577         return (0);
1578 }
1579
1580 int
1581 dt_print_mod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
1582 {
1583         /* LINTED - alignment */
1584         uint64_t pc = *((uint64_t *)addr);
1585         dtrace_syminfo_t dts;
1586         char c[PATH_MAX * 2];
1587
1588         if (format == NULL)
1589                 format = "  %-50s";
1590
1591         if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1592                 (void) snprintf(c, sizeof (c), "%s", dts.dts_object);
1593         } else {
1594                 (void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc);
1595         }
1596
1597         if (dt_printf(dtp, fp, format, c) < 0)
1598                 return (-1);
1599
1600         return (0);
1601 }
1602
1603 typedef struct dt_normal {
1604         dtrace_aggvarid_t dtnd_id;
1605         uint64_t dtnd_normal;
1606 } dt_normal_t;
1607
1608 static int
1609 dt_normalize_agg(const dtrace_aggdata_t *aggdata, void *arg)
1610 {
1611         dt_normal_t *normal = arg;
1612         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1613         dtrace_aggvarid_t id = normal->dtnd_id;
1614
1615         if (agg->dtagd_nrecs == 0)
1616                 return (DTRACE_AGGWALK_NEXT);
1617
1618         if (agg->dtagd_varid != id)
1619                 return (DTRACE_AGGWALK_NEXT);
1620
1621         ((dtrace_aggdata_t *)aggdata)->dtada_normal = normal->dtnd_normal;
1622         return (DTRACE_AGGWALK_NORMALIZE);
1623 }
1624
1625 static int
1626 dt_normalize(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
1627 {
1628         dt_normal_t normal;
1629         caddr_t addr;
1630
1631         /*
1632          * We (should) have two records:  the aggregation ID followed by the
1633          * normalization value.
1634          */
1635         addr = base + rec->dtrd_offset;
1636
1637         if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))
1638                 return (dt_set_errno(dtp, EDT_BADNORMAL));
1639
1640         /* LINTED - alignment */
1641         normal.dtnd_id = *((dtrace_aggvarid_t *)addr);
1642         rec++;
1643
1644         if (rec->dtrd_action != DTRACEACT_LIBACT)
1645                 return (dt_set_errno(dtp, EDT_BADNORMAL));
1646
1647         if (rec->dtrd_arg != DT_ACT_NORMALIZE)
1648                 return (dt_set_errno(dtp, EDT_BADNORMAL));
1649
1650         addr = base + rec->dtrd_offset;
1651
1652         switch (rec->dtrd_size) {
1653         case sizeof (uint64_t):
1654                 /* LINTED - alignment */
1655                 normal.dtnd_normal = *((uint64_t *)addr);
1656                 break;
1657         case sizeof (uint32_t):
1658                 /* LINTED - alignment */
1659                 normal.dtnd_normal = *((uint32_t *)addr);
1660                 break;
1661         case sizeof (uint16_t):
1662                 /* LINTED - alignment */
1663                 normal.dtnd_normal = *((uint16_t *)addr);
1664                 break;
1665         case sizeof (uint8_t):
1666                 normal.dtnd_normal = *((uint8_t *)addr);
1667                 break;
1668         default:
1669                 return (dt_set_errno(dtp, EDT_BADNORMAL));
1670         }
1671
1672         (void) dtrace_aggregate_walk(dtp, dt_normalize_agg, &normal);
1673
1674         return (0);
1675 }
1676
1677 static int
1678 dt_denormalize_agg(const dtrace_aggdata_t *aggdata, void *arg)
1679 {
1680         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1681         dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);
1682
1683         if (agg->dtagd_nrecs == 0)
1684                 return (DTRACE_AGGWALK_NEXT);
1685
1686         if (agg->dtagd_varid != id)
1687                 return (DTRACE_AGGWALK_NEXT);
1688
1689         return (DTRACE_AGGWALK_DENORMALIZE);
1690 }
1691
1692 static int
1693 dt_clear_agg(const dtrace_aggdata_t *aggdata, void *arg)
1694 {
1695         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1696         dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);
1697
1698         if (agg->dtagd_nrecs == 0)
1699                 return (DTRACE_AGGWALK_NEXT);
1700
1701         if (agg->dtagd_varid != id)
1702                 return (DTRACE_AGGWALK_NEXT);
1703
1704         return (DTRACE_AGGWALK_CLEAR);
1705 }
1706
1707 typedef struct dt_trunc {
1708         dtrace_aggvarid_t dttd_id;
1709         uint64_t dttd_remaining;
1710 } dt_trunc_t;
1711
1712 static int
1713 dt_trunc_agg(const dtrace_aggdata_t *aggdata, void *arg)
1714 {
1715         dt_trunc_t *trunc = arg;
1716         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1717         dtrace_aggvarid_t id = trunc->dttd_id;
1718
1719         if (agg->dtagd_nrecs == 0)
1720                 return (DTRACE_AGGWALK_NEXT);
1721
1722         if (agg->dtagd_varid != id)
1723                 return (DTRACE_AGGWALK_NEXT);
1724
1725         if (trunc->dttd_remaining == 0)
1726                 return (DTRACE_AGGWALK_REMOVE);
1727
1728         trunc->dttd_remaining--;
1729         return (DTRACE_AGGWALK_NEXT);
1730 }
1731
1732 static int
1733 dt_trunc(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
1734 {
1735         dt_trunc_t trunc;
1736         caddr_t addr;
1737         int64_t remaining;
1738         int (*func)(dtrace_hdl_t *, dtrace_aggregate_f *, void *);
1739
1740         /*
1741          * We (should) have two records:  the aggregation ID followed by the
1742          * number of aggregation entries after which the aggregation is to be
1743          * truncated.
1744          */
1745         addr = base + rec->dtrd_offset;
1746
1747         if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))
1748                 return (dt_set_errno(dtp, EDT_BADTRUNC));
1749
1750         /* LINTED - alignment */
1751         trunc.dttd_id = *((dtrace_aggvarid_t *)addr);
1752         rec++;
1753
1754         if (rec->dtrd_action != DTRACEACT_LIBACT)
1755                 return (dt_set_errno(dtp, EDT_BADTRUNC));
1756
1757         if (rec->dtrd_arg != DT_ACT_TRUNC)
1758                 return (dt_set_errno(dtp, EDT_BADTRUNC));
1759
1760         addr = base + rec->dtrd_offset;
1761
1762         switch (rec->dtrd_size) {
1763         case sizeof (uint64_t):
1764                 /* LINTED - alignment */
1765                 remaining = *((int64_t *)addr);
1766                 break;
1767         case sizeof (uint32_t):
1768                 /* LINTED - alignment */
1769                 remaining = *((int32_t *)addr);
1770                 break;
1771         case sizeof (uint16_t):
1772                 /* LINTED - alignment */
1773                 remaining = *((int16_t *)addr);
1774                 break;
1775         case sizeof (uint8_t):
1776                 remaining = *((int8_t *)addr);
1777                 break;
1778         default:
1779                 return (dt_set_errno(dtp, EDT_BADNORMAL));
1780         }
1781
1782         if (remaining < 0) {
1783                 func = dtrace_aggregate_walk_valsorted;
1784                 remaining = -remaining;
1785         } else {
1786                 func = dtrace_aggregate_walk_valrevsorted;
1787         }
1788
1789         assert(remaining >= 0);
1790         trunc.dttd_remaining = remaining;
1791
1792         (void) func(dtp, dt_trunc_agg, &trunc);
1793
1794         return (0);
1795 }
1796
1797 static int
1798 dt_print_datum(dtrace_hdl_t *dtp, FILE *fp, dtrace_recdesc_t *rec,
1799     caddr_t addr, size_t size, uint64_t normal)
1800 {
1801         int err;
1802         dtrace_actkind_t act = rec->dtrd_action;
1803
1804         switch (act) {
1805         case DTRACEACT_STACK:
1806                 return (dt_print_stack(dtp, fp, NULL, addr,
1807                     rec->dtrd_arg, rec->dtrd_size / rec->dtrd_arg));
1808
1809         case DTRACEACT_USTACK:
1810         case DTRACEACT_JSTACK:
1811                 return (dt_print_ustack(dtp, fp, NULL, addr, rec->dtrd_arg));
1812
1813         case DTRACEACT_USYM:
1814         case DTRACEACT_UADDR:
1815                 return (dt_print_usym(dtp, fp, addr, act));
1816
1817         case DTRACEACT_UMOD:
1818                 return (dt_print_umod(dtp, fp, NULL, addr));
1819
1820         case DTRACEACT_SYM:
1821                 return (dt_print_sym(dtp, fp, NULL, addr));
1822
1823         case DTRACEACT_MOD:
1824                 return (dt_print_mod(dtp, fp, NULL, addr));
1825
1826         case DTRACEAGG_QUANTIZE:
1827                 return (dt_print_quantize(dtp, fp, addr, size, normal));
1828
1829         case DTRACEAGG_LQUANTIZE:
1830                 return (dt_print_lquantize(dtp, fp, addr, size, normal));
1831
1832         case DTRACEAGG_LLQUANTIZE:
1833                 return (dt_print_llquantize(dtp, fp, addr, size, normal));
1834
1835         case DTRACEAGG_AVG:
1836                 return (dt_print_average(dtp, fp, addr, size, normal));
1837
1838         case DTRACEAGG_STDDEV:
1839                 return (dt_print_stddev(dtp, fp, addr, size, normal));
1840
1841         default:
1842                 break;
1843         }
1844
1845         switch (size) {
1846         case sizeof (uint64_t):
1847                 err = dt_printf(dtp, fp, " %16lld",
1848                     /* LINTED - alignment */
1849                     (long long)*((uint64_t *)addr) / normal);
1850                 break;
1851         case sizeof (uint32_t):
1852                 /* LINTED - alignment */
1853                 err = dt_printf(dtp, fp, " %8d", *((uint32_t *)addr) /
1854                     (uint32_t)normal);
1855                 break;
1856         case sizeof (uint16_t):
1857                 /* LINTED - alignment */
1858                 err = dt_printf(dtp, fp, " %5d", *((uint16_t *)addr) /
1859                     (uint32_t)normal);
1860                 break;
1861         case sizeof (uint8_t):
1862                 err = dt_printf(dtp, fp, " %3d", *((uint8_t *)addr) /
1863                     (uint32_t)normal);
1864                 break;
1865         default:
1866                 err = dt_print_bytes(dtp, fp, addr, size, 50, 0, 0);
1867                 break;
1868         }
1869
1870         return (err);
1871 }
1872
1873 int
1874 dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
1875 {
1876         int i, aggact = 0;
1877         dt_print_aggdata_t *pd = arg;
1878         const dtrace_aggdata_t *aggdata = aggsdata[0];
1879         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1880         FILE *fp = pd->dtpa_fp;
1881         dtrace_hdl_t *dtp = pd->dtpa_dtp;
1882         dtrace_recdesc_t *rec;
1883         dtrace_actkind_t act;
1884         caddr_t addr;
1885         size_t size;
1886
1887         /*
1888          * Iterate over each record description in the key, printing the traced
1889          * data, skipping the first datum (the tuple member created by the
1890          * compiler).
1891          */
1892         for (i = 1; i < agg->dtagd_nrecs; i++) {
1893                 rec = &agg->dtagd_rec[i];
1894                 act = rec->dtrd_action;
1895                 addr = aggdata->dtada_data + rec->dtrd_offset;
1896                 size = rec->dtrd_size;
1897
1898                 if (DTRACEACT_ISAGG(act)) {
1899                         aggact = i;
1900                         break;
1901                 }
1902
1903                 if (dt_print_datum(dtp, fp, rec, addr, size, 1) < 0)
1904                         return (-1);
1905
1906                 if (dt_buffered_flush(dtp, NULL, rec, aggdata,
1907                     DTRACE_BUFDATA_AGGKEY) < 0)
1908                         return (-1);
1909         }
1910
1911         assert(aggact != 0);
1912
1913         for (i = (naggvars == 1 ? 0 : 1); i < naggvars; i++) {
1914                 uint64_t normal;
1915
1916                 aggdata = aggsdata[i];
1917                 agg = aggdata->dtada_desc;
1918                 rec = &agg->dtagd_rec[aggact];
1919                 act = rec->dtrd_action;
1920                 addr = aggdata->dtada_data + rec->dtrd_offset;
1921                 size = rec->dtrd_size;
1922
1923                 assert(DTRACEACT_ISAGG(act));
1924                 normal = aggdata->dtada_normal;
1925
1926                 if (dt_print_datum(dtp, fp, rec, addr, size, normal) < 0)
1927                         return (-1);
1928
1929                 if (dt_buffered_flush(dtp, NULL, rec, aggdata,
1930                     DTRACE_BUFDATA_AGGVAL) < 0)
1931                         return (-1);
1932
1933                 if (!pd->dtpa_allunprint)
1934                         agg->dtagd_flags |= DTRACE_AGD_PRINTED;
1935         }
1936
1937         if (dt_printf(dtp, fp, "\n") < 0)
1938                 return (-1);
1939
1940         if (dt_buffered_flush(dtp, NULL, NULL, aggdata,
1941             DTRACE_BUFDATA_AGGFORMAT | DTRACE_BUFDATA_AGGLAST) < 0)
1942                 return (-1);
1943
1944         return (0);
1945 }
1946
1947 int
1948 dt_print_agg(const dtrace_aggdata_t *aggdata, void *arg)
1949 {
1950         dt_print_aggdata_t *pd = arg;
1951         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1952         dtrace_aggvarid_t aggvarid = pd->dtpa_id;
1953
1954         if (pd->dtpa_allunprint) {
1955                 if (agg->dtagd_flags & DTRACE_AGD_PRINTED)
1956                         return (0);
1957         } else {
1958                 /*
1959                  * If we're not printing all unprinted aggregations, then the
1960                  * aggregation variable ID denotes a specific aggregation
1961                  * variable that we should print -- skip any other aggregations
1962                  * that we encounter.
1963                  */
1964                 if (agg->dtagd_nrecs == 0)
1965                         return (0);
1966
1967                 if (aggvarid != agg->dtagd_varid)
1968                         return (0);
1969         }
1970
1971         return (dt_print_aggs(&aggdata, 1, arg));
1972 }
1973
1974 int
1975 dt_setopt(dtrace_hdl_t *dtp, const dtrace_probedata_t *data,
1976     const char *option, const char *value)
1977 {
1978         int len, rval;
1979         char *msg;
1980         const char *errstr;
1981         dtrace_setoptdata_t optdata;
1982
1983         bzero(&optdata, sizeof (optdata));
1984         (void) dtrace_getopt(dtp, option, &optdata.dtsda_oldval);
1985
1986         if (dtrace_setopt(dtp, option, value) == 0) {
1987                 (void) dtrace_getopt(dtp, option, &optdata.dtsda_newval);
1988                 optdata.dtsda_probe = data;
1989                 optdata.dtsda_option = option;
1990                 optdata.dtsda_handle = dtp;
1991
1992                 if ((rval = dt_handle_setopt(dtp, &optdata)) != 0)
1993                         return (rval);
1994
1995                 return (0);
1996         }
1997
1998         errstr = dtrace_errmsg(dtp, dtrace_errno(dtp));
1999         len = strlen(option) + strlen(value) + strlen(errstr) + 80;
2000         msg = alloca(len);
2001
2002         (void) snprintf(msg, len, "couldn't set option \"%s\" to \"%s\": %s\n",
2003             option, value, errstr);
2004
2005         if ((rval = dt_handle_liberr(dtp, data, msg)) == 0)
2006                 return (0);
2007
2008         return (rval);
2009 }
2010
2011 static int
2012 dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, int cpu, dtrace_bufdesc_t *buf,
2013     dtrace_consume_probe_f *efunc, dtrace_consume_rec_f *rfunc, void *arg)
2014 {
2015         dtrace_epid_t id;
2016         size_t offs, start = buf->dtbd_oldest, end = buf->dtbd_size;
2017         int flow = (dtp->dt_options[DTRACEOPT_FLOWINDENT] != DTRACEOPT_UNSET);
2018         int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
2019         int rval, i, n;
2020         dtrace_epid_t last = DTRACE_EPIDNONE;
2021         dtrace_probedata_t data;
2022         uint64_t drops;
2023         caddr_t addr;
2024
2025         bzero(&data, sizeof (data));
2026         data.dtpda_handle = dtp;
2027         data.dtpda_cpu = cpu;
2028
2029 again:
2030         for (offs = start; offs < end; ) {
2031                 dtrace_eprobedesc_t *epd;
2032
2033                 /*
2034                  * We're guaranteed to have an ID.
2035                  */
2036                 id = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
2037
2038                 if (id == DTRACE_EPIDNONE) {
2039                         /*
2040                          * This is filler to assure proper alignment of the
2041                          * next record; we simply ignore it.
2042                          */
2043                         offs += sizeof (id);
2044                         continue;
2045                 }
2046
2047                 if ((rval = dt_epid_lookup(dtp, id, &data.dtpda_edesc,
2048                     &data.dtpda_pdesc)) != 0)
2049                         return (rval);
2050
2051                 epd = data.dtpda_edesc;
2052                 data.dtpda_data = buf->dtbd_data + offs;
2053
2054                 if (data.dtpda_edesc->dtepd_uarg != DT_ECB_DEFAULT) {
2055                         rval = dt_handle(dtp, &data);
2056
2057                         if (rval == DTRACE_CONSUME_NEXT)
2058                                 goto nextepid;
2059
2060                         if (rval == DTRACE_CONSUME_ERROR)
2061                                 return (-1);
2062                 }
2063
2064                 if (flow)
2065                         (void) dt_flowindent(dtp, &data, last, buf, offs);
2066
2067                 rval = (*efunc)(&data, arg);
2068
2069                 if (flow) {
2070                         if (data.dtpda_flow == DTRACEFLOW_ENTRY)
2071                                 data.dtpda_indent += 2;
2072                 }
2073
2074                 if (rval == DTRACE_CONSUME_NEXT)
2075                         goto nextepid;
2076
2077                 if (rval == DTRACE_CONSUME_ABORT)
2078                         return (dt_set_errno(dtp, EDT_DIRABORT));
2079
2080                 if (rval != DTRACE_CONSUME_THIS)
2081                         return (dt_set_errno(dtp, EDT_BADRVAL));
2082
2083                 for (i = 0; i < epd->dtepd_nrecs; i++) {
2084                         dtrace_recdesc_t *rec = &epd->dtepd_rec[i];
2085                         dtrace_actkind_t act = rec->dtrd_action;
2086
2087                         data.dtpda_data = buf->dtbd_data + offs +
2088                             rec->dtrd_offset;
2089                         addr = data.dtpda_data;
2090
2091                         if (act == DTRACEACT_LIBACT) {
2092                                 uint64_t arg = rec->dtrd_arg;
2093                                 dtrace_aggvarid_t id;
2094
2095                                 switch (arg) {
2096                                 case DT_ACT_CLEAR:
2097                                         /* LINTED - alignment */
2098                                         id = *((dtrace_aggvarid_t *)addr);
2099                                         (void) dtrace_aggregate_walk(dtp,
2100                                             dt_clear_agg, &id);
2101                                         continue;
2102
2103                                 case DT_ACT_DENORMALIZE:
2104                                         /* LINTED - alignment */
2105                                         id = *((dtrace_aggvarid_t *)addr);
2106                                         (void) dtrace_aggregate_walk(dtp,
2107                                             dt_denormalize_agg, &id);
2108                                         continue;
2109
2110                                 case DT_ACT_FTRUNCATE:
2111                                         if (fp == NULL)
2112                                                 continue;
2113
2114                                         (void) fflush(fp);
2115                                         (void) ftruncate(fileno(fp), 0);
2116                                         (void) fseeko(fp, 0, SEEK_SET);
2117                                         continue;
2118
2119                                 case DT_ACT_NORMALIZE:
2120                                         if (i == epd->dtepd_nrecs - 1)
2121                                                 return (dt_set_errno(dtp,
2122                                                     EDT_BADNORMAL));
2123
2124                                         if (dt_normalize(dtp,
2125                                             buf->dtbd_data + offs, rec) != 0)
2126                                                 return (-1);
2127
2128                                         i++;
2129                                         continue;
2130
2131                                 case DT_ACT_SETOPT: {
2132                                         uint64_t *opts = dtp->dt_options;
2133                                         dtrace_recdesc_t *valrec;
2134                                         uint32_t valsize;
2135                                         caddr_t val;
2136                                         int rv;
2137
2138                                         if (i == epd->dtepd_nrecs - 1) {
2139                                                 return (dt_set_errno(dtp,
2140                                                     EDT_BADSETOPT));
2141                                         }
2142
2143                                         valrec = &epd->dtepd_rec[++i];
2144                                         valsize = valrec->dtrd_size;
2145
2146                                         if (valrec->dtrd_action != act ||
2147                                             valrec->dtrd_arg != arg) {
2148                                                 return (dt_set_errno(dtp,
2149                                                     EDT_BADSETOPT));
2150                                         }
2151
2152                                         if (valsize > sizeof (uint64_t)) {
2153                                                 val = buf->dtbd_data + offs +
2154                                                     valrec->dtrd_offset;
2155                                         } else {
2156                                                 val = "1";
2157                                         }
2158
2159                                         rv = dt_setopt(dtp, &data, addr, val);
2160
2161                                         if (rv != 0)
2162                                                 return (-1);
2163
2164                                         flow = (opts[DTRACEOPT_FLOWINDENT] !=
2165                                             DTRACEOPT_UNSET);
2166                                         quiet = (opts[DTRACEOPT_QUIET] !=
2167                                             DTRACEOPT_UNSET);
2168
2169                                         continue;
2170                                 }
2171
2172                                 case DT_ACT_TRUNC:
2173                                         if (i == epd->dtepd_nrecs - 1)
2174                                                 return (dt_set_errno(dtp,
2175                                                     EDT_BADTRUNC));
2176
2177                                         if (dt_trunc(dtp,
2178                                             buf->dtbd_data + offs, rec) != 0)
2179                                                 return (-1);
2180
2181                                         i++;
2182                                         continue;
2183
2184                                 default:
2185                                         continue;
2186                                 }
2187                         }
2188
2189                         rval = (*rfunc)(&data, rec, arg);
2190
2191                         if (rval == DTRACE_CONSUME_NEXT)
2192                                 continue;
2193
2194                         if (rval == DTRACE_CONSUME_ABORT)
2195                                 return (dt_set_errno(dtp, EDT_DIRABORT));
2196
2197                         if (rval != DTRACE_CONSUME_THIS)
2198                                 return (dt_set_errno(dtp, EDT_BADRVAL));
2199
2200                         if (act == DTRACEACT_STACK) {
2201                                 int depth = rec->dtrd_arg;
2202
2203                                 if (dt_print_stack(dtp, fp, NULL, addr, depth,
2204                                     rec->dtrd_size / depth) < 0)
2205                                         return (-1);
2206                                 goto nextrec;
2207                         }
2208
2209                         if (act == DTRACEACT_USTACK ||
2210                             act == DTRACEACT_JSTACK) {
2211                                 if (dt_print_ustack(dtp, fp, NULL,
2212                                     addr, rec->dtrd_arg) < 0)
2213                                         return (-1);
2214                                 goto nextrec;
2215                         }
2216
2217                         if (act == DTRACEACT_SYM) {
2218                                 if (dt_print_sym(dtp, fp, NULL, addr) < 0)
2219                                         return (-1);
2220                                 goto nextrec;
2221                         }
2222
2223                         if (act == DTRACEACT_MOD) {
2224                                 if (dt_print_mod(dtp, fp, NULL, addr) < 0)
2225                                         return (-1);
2226                                 goto nextrec;
2227                         }
2228
2229                         if (act == DTRACEACT_USYM || act == DTRACEACT_UADDR) {
2230                                 if (dt_print_usym(dtp, fp, addr, act) < 0)
2231                                         return (-1);
2232                                 goto nextrec;
2233                         }
2234
2235                         if (act == DTRACEACT_UMOD) {
2236                                 if (dt_print_umod(dtp, fp, NULL, addr) < 0)
2237                                         return (-1);
2238                                 goto nextrec;
2239                         }
2240
2241                         if (act == DTRACEACT_PRINTM) {
2242                                 if (dt_print_memory(dtp, fp, addr) < 0)
2243                                         return (-1);
2244                                 goto nextrec;
2245                         }
2246
2247                         if (act == DTRACEACT_PRINTT) {
2248                                 if (dt_print_type(dtp, fp, addr) < 0)
2249                                         return (-1);
2250                                 goto nextrec;
2251                         }
2252
2253                         if (DTRACEACT_ISPRINTFLIKE(act)) {
2254                                 void *fmtdata;
2255                                 int (*func)(dtrace_hdl_t *, FILE *, void *,
2256                                     const dtrace_probedata_t *,
2257                                     const dtrace_recdesc_t *, uint_t,
2258                                     const void *buf, size_t);
2259
2260                                 if ((fmtdata = dt_format_lookup(dtp,
2261                                     rec->dtrd_format)) == NULL)
2262                                         goto nofmt;
2263
2264                                 switch (act) {
2265                                 case DTRACEACT_PRINTF:
2266                                         func = dtrace_fprintf;
2267                                         break;
2268                                 case DTRACEACT_PRINTA:
2269                                         func = dtrace_fprinta;
2270                                         break;
2271                                 case DTRACEACT_SYSTEM:
2272                                         func = dtrace_system;
2273                                         break;
2274                                 case DTRACEACT_FREOPEN:
2275                                         func = dtrace_freopen;
2276                                         break;
2277                                 }
2278
2279                                 n = (*func)(dtp, fp, fmtdata, &data,
2280                                     rec, epd->dtepd_nrecs - i,
2281                                     (uchar_t *)buf->dtbd_data + offs,
2282                                     buf->dtbd_size - offs);
2283
2284                                 if (n < 0)
2285                                         return (-1); /* errno is set for us */
2286
2287                                 if (n > 0)
2288                                         i += n - 1;
2289                                 goto nextrec;
2290                         }
2291
2292 nofmt:
2293                         if (act == DTRACEACT_PRINTA) {
2294                                 dt_print_aggdata_t pd;
2295                                 dtrace_aggvarid_t *aggvars;
2296                                 int j, naggvars = 0;
2297                                 size_t size = ((epd->dtepd_nrecs - i) *
2298                                     sizeof (dtrace_aggvarid_t));
2299
2300                                 if ((aggvars = dt_alloc(dtp, size)) == NULL)
2301                                         return (-1);
2302
2303                                 /*
2304                                  * This might be a printa() with multiple
2305                                  * aggregation variables.  We need to scan
2306                                  * forward through the records until we find
2307                                  * a record from a different statement.
2308                                  */
2309                                 for (j = i; j < epd->dtepd_nrecs; j++) {
2310                                         dtrace_recdesc_t *nrec;
2311                                         caddr_t naddr;
2312
2313                                         nrec = &epd->dtepd_rec[j];
2314
2315                                         if (nrec->dtrd_uarg != rec->dtrd_uarg)
2316                                                 break;
2317
2318                                         if (nrec->dtrd_action != act) {
2319                                                 return (dt_set_errno(dtp,
2320                                                     EDT_BADAGG));
2321                                         }
2322
2323                                         naddr = buf->dtbd_data + offs +
2324                                             nrec->dtrd_offset;
2325
2326                                         aggvars[naggvars++] =
2327                                             /* LINTED - alignment */
2328                                             *((dtrace_aggvarid_t *)naddr);
2329                                 }
2330
2331                                 i = j - 1;
2332                                 bzero(&pd, sizeof (pd));
2333                                 pd.dtpa_dtp = dtp;
2334                                 pd.dtpa_fp = fp;
2335
2336                                 assert(naggvars >= 1);
2337
2338                                 if (naggvars == 1) {
2339                                         pd.dtpa_id = aggvars[0];
2340                                         dt_free(dtp, aggvars);
2341
2342                                         if (dt_printf(dtp, fp, "\n") < 0 ||
2343                                             dtrace_aggregate_walk_sorted(dtp,
2344                                             dt_print_agg, &pd) < 0)
2345                                                 return (-1);
2346                                         goto nextrec;
2347                                 }
2348
2349                                 if (dt_printf(dtp, fp, "\n") < 0 ||
2350                                     dtrace_aggregate_walk_joined(dtp, aggvars,
2351                                     naggvars, dt_print_aggs, &pd) < 0) {
2352                                         dt_free(dtp, aggvars);
2353                                         return (-1);
2354                                 }
2355
2356                                 dt_free(dtp, aggvars);
2357                                 goto nextrec;
2358                         }
2359
2360                         switch (rec->dtrd_size) {
2361                         case sizeof (uint64_t):
2362                                 n = dt_printf(dtp, fp,
2363                                     quiet ? "%lld" : " %16lld",
2364                                     /* LINTED - alignment */
2365                                     *((unsigned long long *)addr));
2366                                 break;
2367                         case sizeof (uint32_t):
2368                                 n = dt_printf(dtp, fp, quiet ? "%d" : " %8d",
2369                                     /* LINTED - alignment */
2370                                     *((uint32_t *)addr));
2371                                 break;
2372                         case sizeof (uint16_t):
2373                                 n = dt_printf(dtp, fp, quiet ? "%d" : " %5d",
2374                                     /* LINTED - alignment */
2375                                     *((uint16_t *)addr));
2376                                 break;
2377                         case sizeof (uint8_t):
2378                                 n = dt_printf(dtp, fp, quiet ? "%d" : " %3d",
2379                                     *((uint8_t *)addr));
2380                                 break;
2381                         default:
2382                                 n = dt_print_bytes(dtp, fp, addr,
2383                                     rec->dtrd_size, 33, quiet, 0);
2384                                 break;
2385                         }
2386
2387                         if (n < 0)
2388                                 return (-1); /* errno is set for us */
2389
2390 nextrec:
2391                         if (dt_buffered_flush(dtp, &data, rec, NULL, 0) < 0)
2392                                 return (-1); /* errno is set for us */
2393                 }
2394
2395                 /*
2396                  * Call the record callback with a NULL record to indicate
2397                  * that we're done processing this EPID.
2398                  */
2399                 rval = (*rfunc)(&data, NULL, arg);
2400 nextepid:
2401                 offs += epd->dtepd_size;
2402                 last = id;
2403         }
2404
2405         if (buf->dtbd_oldest != 0 && start == buf->dtbd_oldest) {
2406                 end = buf->dtbd_oldest;
2407                 start = 0;
2408                 goto again;
2409         }
2410
2411         if ((drops = buf->dtbd_drops) == 0)
2412                 return (0);
2413
2414         /*
2415          * Explicitly zero the drops to prevent us from processing them again.
2416          */
2417         buf->dtbd_drops = 0;
2418
2419         return (dt_handle_cpudrop(dtp, cpu, DTRACEDROP_PRINCIPAL, drops));
2420 }
2421
2422 typedef struct dt_begin {
2423         dtrace_consume_probe_f *dtbgn_probefunc;
2424         dtrace_consume_rec_f *dtbgn_recfunc;
2425         void *dtbgn_arg;
2426         dtrace_handle_err_f *dtbgn_errhdlr;
2427         void *dtbgn_errarg;
2428         int dtbgn_beginonly;
2429 } dt_begin_t;
2430
2431 static int
2432 dt_consume_begin_probe(const dtrace_probedata_t *data, void *arg)
2433 {
2434         dt_begin_t *begin = (dt_begin_t *)arg;
2435         dtrace_probedesc_t *pd = data->dtpda_pdesc;
2436
2437         int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);
2438         int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);
2439
2440         if (begin->dtbgn_beginonly) {
2441                 if (!(r1 && r2))
2442                         return (DTRACE_CONSUME_NEXT);
2443         } else {
2444                 if (r1 && r2)
2445                         return (DTRACE_CONSUME_NEXT);
2446         }
2447
2448         /*
2449          * We have a record that we're interested in.  Now call the underlying
2450          * probe function...
2451          */
2452         return (begin->dtbgn_probefunc(data, begin->dtbgn_arg));
2453 }
2454
2455 static int
2456 dt_consume_begin_record(const dtrace_probedata_t *data,
2457     const dtrace_recdesc_t *rec, void *arg)
2458 {
2459         dt_begin_t *begin = (dt_begin_t *)arg;
2460
2461         return (begin->dtbgn_recfunc(data, rec, begin->dtbgn_arg));
2462 }
2463
2464 static int
2465 dt_consume_begin_error(const dtrace_errdata_t *data, void *arg)
2466 {
2467         dt_begin_t *begin = (dt_begin_t *)arg;
2468         dtrace_probedesc_t *pd = data->dteda_pdesc;
2469
2470         int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);
2471         int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);
2472
2473         if (begin->dtbgn_beginonly) {
2474                 if (!(r1 && r2))
2475                         return (DTRACE_HANDLE_OK);
2476         } else {
2477                 if (r1 && r2)
2478                         return (DTRACE_HANDLE_OK);
2479         }
2480
2481         return (begin->dtbgn_errhdlr(data, begin->dtbgn_errarg));
2482 }
2483
2484 static int
2485 dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp, dtrace_bufdesc_t *buf,
2486     dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
2487 {
2488         /*
2489          * There's this idea that the BEGIN probe should be processed before
2490          * everything else, and that the END probe should be processed after
2491          * anything else.  In the common case, this is pretty easy to deal
2492          * with.  However, a situation may arise where the BEGIN enabling and
2493          * END enabling are on the same CPU, and some enabling in the middle
2494          * occurred on a different CPU.  To deal with this (blech!) we need to
2495          * consume the BEGIN buffer up until the end of the BEGIN probe, and
2496          * then set it aside.  We will then process every other CPU, and then
2497          * we'll return to the BEGIN CPU and process the rest of the data
2498          * (which will inevitably include the END probe, if any).  Making this
2499          * even more complicated (!) is the library's ERROR enabling.  Because
2500          * this enabling is processed before we even get into the consume call
2501          * back, any ERROR firing would result in the library's ERROR enabling
2502          * being processed twice -- once in our first pass (for BEGIN probes),
2503          * and again in our second pass (for everything but BEGIN probes).  To
2504          * deal with this, we interpose on the ERROR handler to assure that we
2505          * only process ERROR enablings induced by BEGIN enablings in the
2506          * first pass, and that we only process ERROR enablings _not_ induced
2507          * by BEGIN enablings in the second pass.
2508          */
2509         dt_begin_t begin;
2510         processorid_t cpu = dtp->dt_beganon;
2511         dtrace_bufdesc_t nbuf;
2512 #if !defined(sun)
2513         dtrace_bufdesc_t *pbuf;
2514 #endif
2515         int rval, i;
2516         static int max_ncpus;
2517         dtrace_optval_t size;
2518
2519         dtp->dt_beganon = -1;
2520
2521 #if defined(sun)
2522         if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
2523 #else
2524         if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) {
2525 #endif
2526                 /*
2527                  * We really don't expect this to fail, but it is at least
2528                  * technically possible for this to fail with ENOENT.  In this
2529                  * case, we just drive on...
2530                  */
2531                 if (errno == ENOENT)
2532                         return (0);
2533
2534                 return (dt_set_errno(dtp, errno));
2535         }
2536
2537         if (!dtp->dt_stopped || buf->dtbd_cpu != dtp->dt_endedon) {
2538                 /*
2539                  * This is the simple case.  We're either not stopped, or if
2540                  * we are, we actually processed any END probes on another
2541                  * CPU.  We can simply consume this buffer and return.
2542                  */
2543                 return (dt_consume_cpu(dtp, fp, cpu, buf, pf, rf, arg));
2544         }
2545
2546         begin.dtbgn_probefunc = pf;
2547         begin.dtbgn_recfunc = rf;
2548         begin.dtbgn_arg = arg;
2549         begin.dtbgn_beginonly = 1;
2550
2551         /*
2552          * We need to interpose on the ERROR handler to be sure that we
2553          * only process ERRORs induced by BEGIN.
2554          */
2555         begin.dtbgn_errhdlr = dtp->dt_errhdlr;
2556         begin.dtbgn_errarg = dtp->dt_errarg;
2557         dtp->dt_errhdlr = dt_consume_begin_error;
2558         dtp->dt_errarg = &begin;
2559
2560         rval = dt_consume_cpu(dtp, fp, cpu, buf, dt_consume_begin_probe,
2561             dt_consume_begin_record, &begin);
2562
2563         dtp->dt_errhdlr = begin.dtbgn_errhdlr;
2564         dtp->dt_errarg = begin.dtbgn_errarg;
2565
2566         if (rval != 0)
2567                 return (rval);
2568
2569         /*
2570          * Now allocate a new buffer.  We'll use this to deal with every other
2571          * CPU.
2572          */
2573         bzero(&nbuf, sizeof (dtrace_bufdesc_t));
2574         (void) dtrace_getopt(dtp, "bufsize", &size);
2575         if ((nbuf.dtbd_data = malloc(size)) == NULL)
2576                 return (dt_set_errno(dtp, EDT_NOMEM));
2577
2578         if (max_ncpus == 0)
2579                 max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
2580
2581         for (i = 0; i < max_ncpus; i++) {
2582                 nbuf.dtbd_cpu = i;
2583
2584                 if (i == cpu)
2585                         continue;
2586
2587 #if defined(sun)
2588                 if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &nbuf) == -1) {
2589 #else
2590                 pbuf = &nbuf;
2591                 if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &pbuf) == -1) {
2592 #endif
2593                         /*
2594                          * If we failed with ENOENT, it may be because the
2595                          * CPU was unconfigured -- this is okay.  Any other
2596                          * error, however, is unexpected.
2597                          */
2598                         if (errno == ENOENT)
2599                                 continue;
2600
2601                         free(nbuf.dtbd_data);
2602
2603                         return (dt_set_errno(dtp, errno));
2604                 }
2605
2606                 if ((rval = dt_consume_cpu(dtp, fp,
2607                     i, &nbuf, pf, rf, arg)) != 0) {
2608                         free(nbuf.dtbd_data);
2609                         return (rval);
2610                 }
2611         }
2612
2613         free(nbuf.dtbd_data);
2614
2615         /*
2616          * Okay -- we're done with the other buffers.  Now we want to
2617          * reconsume the first buffer -- but this time we're looking for
2618          * everything _but_ BEGIN.  And of course, in order to only consume
2619          * those ERRORs _not_ associated with BEGIN, we need to reinstall our
2620          * ERROR interposition function...
2621          */
2622         begin.dtbgn_beginonly = 0;
2623
2624         assert(begin.dtbgn_errhdlr == dtp->dt_errhdlr);
2625         assert(begin.dtbgn_errarg == dtp->dt_errarg);
2626         dtp->dt_errhdlr = dt_consume_begin_error;
2627         dtp->dt_errarg = &begin;
2628
2629         rval = dt_consume_cpu(dtp, fp, cpu, buf, dt_consume_begin_probe,
2630             dt_consume_begin_record, &begin);
2631
2632         dtp->dt_errhdlr = begin.dtbgn_errhdlr;
2633         dtp->dt_errarg = begin.dtbgn_errarg;
2634
2635         return (rval);
2636 }
2637
2638 int
2639 dtrace_consume(dtrace_hdl_t *dtp, FILE *fp,
2640     dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
2641 {
2642         dtrace_bufdesc_t *buf = &dtp->dt_buf;
2643         dtrace_optval_t size;
2644         static int max_ncpus;
2645         int i, rval;
2646         dtrace_optval_t interval = dtp->dt_options[DTRACEOPT_SWITCHRATE];
2647         hrtime_t now = gethrtime();
2648
2649         if (dtp->dt_lastswitch != 0) {
2650                 if (now - dtp->dt_lastswitch < interval)
2651                         return (0);
2652
2653                 dtp->dt_lastswitch += interval;
2654         } else {
2655                 dtp->dt_lastswitch = now;
2656         }
2657
2658         if (!dtp->dt_active)
2659                 return (dt_set_errno(dtp, EINVAL));
2660
2661         if (max_ncpus == 0)
2662                 max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
2663
2664         if (pf == NULL)
2665                 pf = (dtrace_consume_probe_f *)dt_nullprobe;
2666
2667         if (rf == NULL)
2668                 rf = (dtrace_consume_rec_f *)dt_nullrec;
2669
2670         if (buf->dtbd_data == NULL) {
2671                 (void) dtrace_getopt(dtp, "bufsize", &size);
2672                 if ((buf->dtbd_data = malloc(size)) == NULL)
2673                         return (dt_set_errno(dtp, EDT_NOMEM));
2674
2675                 buf->dtbd_size = size;
2676         }
2677
2678         /*
2679          * If we have just begun, we want to first process the CPU that
2680          * executed the BEGIN probe (if any).
2681          */
2682         if (dtp->dt_active && dtp->dt_beganon != -1) {
2683                 buf->dtbd_cpu = dtp->dt_beganon;
2684                 if ((rval = dt_consume_begin(dtp, fp, buf, pf, rf, arg)) != 0)
2685                         return (rval);
2686         }
2687
2688         for (i = 0; i < max_ncpus; i++) {
2689                 buf->dtbd_cpu = i;
2690
2691                 /*
2692                  * If we have stopped, we want to process the CPU on which the
2693                  * END probe was processed only _after_ we have processed
2694                  * everything else.
2695                  */
2696                 if (dtp->dt_stopped && (i == dtp->dt_endedon))
2697                         continue;
2698
2699 #if defined(sun)
2700                 if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
2701 #else
2702                 if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) {
2703 #endif
2704                         /*
2705                          * If we failed with ENOENT, it may be because the
2706                          * CPU was unconfigured -- this is okay.  Any other
2707                          * error, however, is unexpected.
2708                          */
2709                         if (errno == ENOENT)
2710                                 continue;
2711
2712                         return (dt_set_errno(dtp, errno));
2713                 }
2714
2715                 if ((rval = dt_consume_cpu(dtp, fp, i, buf, pf, rf, arg)) != 0)
2716                         return (rval);
2717         }
2718
2719         if (!dtp->dt_stopped)
2720                 return (0);
2721
2722         buf->dtbd_cpu = dtp->dt_endedon;
2723
2724 #if defined(sun)
2725         if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
2726 #else
2727         if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) {
2728 #endif
2729                 /*
2730                  * This _really_ shouldn't fail, but it is strictly speaking
2731                  * possible for this to return ENOENT if the CPU that called
2732                  * the END enabling somehow managed to become unconfigured.
2733                  * It's unclear how the user can possibly expect anything
2734                  * rational to happen in this case -- the state has been thrown
2735                  * out along with the unconfigured CPU -- so we'll just drive
2736                  * on...
2737                  */
2738                 if (errno == ENOENT)
2739                         return (0);
2740
2741                 return (dt_set_errno(dtp, errno));
2742         }
2743
2744         return (dt_consume_cpu(dtp, fp, dtp->dt_endedon, buf, pf, rf, arg));
2745 }