cddl/contrib/opensolaris/lib/libdtrace/common/dt_consume.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25
  26 /*
  27  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
  28  * Copyright (c) 2012 by Delphix. All rights reserved.
  29  */
  30
  31 #include <stdlib.h>
  32 #include <strings.h>
  33 #include <errno.h>
  34 #include <unistd.h>
  35 #include <limits.h>
  36 #include <assert.h>
  37 #include <ctype.h>
  38 #ifdef illumos
  39 #include <alloca.h>
  40 #endif
  41 #include <dt_impl.h>
  42 #include <dt_pq.h>
  43 #ifndef illumos
  44 #include <libproc_compat.h>
  45 #endif
  46
  47 #define DT_MASK_LO 0x00000000FFFFFFFFULL
  48
  49 /*
  50  * We declare this here because (1) we need it and (2) we want to avoid a
  51  * dependency on libm in libdtrace.
  52  */
  53 static long double
  54 dt_fabsl(long double x)
  55 {
  56         if (x < 0)
  57                 return (-x);
  58
  59         return (x);
  60 }
  61
  62 static int
  63 dt_ndigits(long long val)
  64 {
  65         int rval = 1;
  66         long long cmp = 10;
  67
  68         if (val < 0) {
  69                 val = val == INT64_MIN ? INT64_MAX : -val;
  70                 rval++;
  71         }
  72
  73         while (val > cmp && cmp > 0) {
  74                 rval++;
  75                 cmp *= 10;
  76         }
  77
  78         return (rval < 4 ? 4 : rval);
  79 }
  80
  81 /*
  82  * 128-bit arithmetic functions needed to support the stddev() aggregating
  83  * action.
  84  */
  85 static int
  86 dt_gt_128(uint64_t *a, uint64_t *b)
  87 {
  88         return (a[1] > b[1] || (a[1] == b[1] && a[0] > b[0]));
  89 }
  90
  91 static int
  92 dt_ge_128(uint64_t *a, uint64_t *b)
  93 {
  94         return (a[1] > b[1] || (a[1] == b[1] && a[0] >= b[0]));
  95 }
  96
  97 static int
  98 dt_le_128(uint64_t *a, uint64_t *b)
  99 {
 100         return (a[1] < b[1] || (a[1] == b[1] && a[0] <= b[0]));
 101 }
 102
 103 /*
 104  * Shift the 128-bit value in a by b. If b is positive, shift left.
 105  * If b is negative, shift right.
 106  */
 107 static void
 108 dt_shift_128(uint64_t *a, int b)
 109 {
 110         uint64_t mask;
 111
 112         if (b == 0)
 113                 return;
 114
 115         if (b < 0) {
 116                 b = -b;
 117                 if (b >= 64) {
 118                         a[0] = a[1] >> (b - 64);
 119                         a[1] = 0;
 120                 } else {
 121                         a[0] >>= b;
 122                         mask = 1LL << (64 - b);
 123                         mask -= 1;
 124                         a[0] |= ((a[1] & mask) << (64 - b));
 125                         a[1] >>= b;
 126                 }
 127         } else {
 128                 if (b >= 64) {
 129                         a[1] = a[0] << (b - 64);
 130                         a[0] = 0;
 131                 } else {
 132                         a[1] <<= b;
 133                         mask = a[0] >> (64 - b);
 134                         a[1] |= mask;
 135                         a[0] <<= b;
 136                 }
 137         }
 138 }
 139
 140 static int
 141 dt_nbits_128(uint64_t *a)
 142 {
 143         int nbits = 0;
 144         uint64_t tmp[2];
 145         uint64_t zero[2] = { 0, 0 };
 146
 147         tmp[0] = a[0];
 148         tmp[1] = a[1];
 149
 150         dt_shift_128(tmp, -1);
 151         while (dt_gt_128(tmp, zero)) {
 152                 dt_shift_128(tmp, -1);
 153                 nbits++;
 154         }
 155
 156         return (nbits);
 157 }
 158
 159 static void
 160 dt_subtract_128(uint64_t *minuend, uint64_t *subtrahend, uint64_t *difference)
 161 {
 162         uint64_t result[2];
 163
 164         result[0] = minuend[0] - subtrahend[0];
 165         result[1] = minuend[1] - subtrahend[1] -
 166             (minuend[0] < subtrahend[0] ? 1 : 0);
 167
 168         difference[0] = result[0];
 169         difference[1] = result[1];
 170 }
 171
 172 static void
 173 dt_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)
 174 {
 175         uint64_t result[2];
 176
 177         result[0] = addend1[0] + addend2[0];
 178         result[1] = addend1[1] + addend2[1] +
 179             (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);
 180
 181         sum[0] = result[0];
 182         sum[1] = result[1];
 183 }
 184
 185 /*
 186  * The basic idea is to break the 2 64-bit values into 4 32-bit values,
 187  * use native multiplication on those, and then re-combine into the
 188  * resulting 128-bit value.
 189  *
 190  * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
 191  *     hi1 * hi2 << 64 +
 192  *     hi1 * lo2 << 32 +
 193  *     hi2 * lo1 << 32 +
 194  *     lo1 * lo2
 195  */
 196 static void
 197 dt_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product)
 198 {
 199         uint64_t hi1, hi2, lo1, lo2;
 200         uint64_t tmp[2];
 201
 202         hi1 = factor1 >> 32;
 203         hi2 = factor2 >> 32;
 204
 205         lo1 = factor1 & DT_MASK_LO;
 206         lo2 = factor2 & DT_MASK_LO;
 207
 208         product[0] = lo1 * lo2;
 209         product[1] = hi1 * hi2;
 210
 211         tmp[0] = hi1 * lo2;
 212         tmp[1] = 0;
 213         dt_shift_128(tmp, 32);
 214         dt_add_128(product, tmp, product);
 215
 216         tmp[0] = hi2 * lo1;
 217         tmp[1] = 0;
 218         dt_shift_128(tmp, 32);
 219         dt_add_128(product, tmp, product);
 220 }
 221
 222 /*
 223  * This is long-hand division.
 224  *
 225  * We initialize subtrahend by shifting divisor left as far as possible. We
 226  * loop, comparing subtrahend to dividend:  if subtrahend is smaller, we
 227  * subtract and set the appropriate bit in the result.  We then shift
 228  * subtrahend right by one bit for the next comparison.
 229  */
 230 static void
 231 dt_divide_128(uint64_t *dividend, uint64_t divisor, uint64_t *quotient)
 232 {
 233         uint64_t result[2] = { 0, 0 };
 234         uint64_t remainder[2];
 235         uint64_t subtrahend[2];
 236         uint64_t divisor_128[2];
 237         uint64_t mask[2] = { 1, 0 };
 238         int log = 0;
 239
 240         assert(divisor != 0);
 241
 242         divisor_128[0] = divisor;
 243         divisor_128[1] = 0;
 244
 245         remainder[0] = dividend[0];
 246         remainder[1] = dividend[1];
 247
 248         subtrahend[0] = divisor;
 249         subtrahend[1] = 0;
 250
 251         while (divisor > 0) {
 252                 log++;
 253                 divisor >>= 1;
 254         }
 255
 256         dt_shift_128(subtrahend, 128 - log);
 257         dt_shift_128(mask, 128 - log);
 258
 259         while (dt_ge_128(remainder, divisor_128)) {
 260                 if (dt_ge_128(remainder, subtrahend)) {
 261                         dt_subtract_128(remainder, subtrahend, remainder);
 262                         result[0] |= mask[0];
 263                         result[1] |= mask[1];
 264                 }
 265
 266                 dt_shift_128(subtrahend, -1);
 267                 dt_shift_128(mask, -1);
 268         }
 269
 270         quotient[0] = result[0];
 271         quotient[1] = result[1];
 272 }
 273
 274 /*
 275  * This is the long-hand method of calculating a square root.
 276  * The algorithm is as follows:
 277  *
 278  * 1. Group the digits by 2 from the right.
 279  * 2. Over the leftmost group, find the largest single-digit number
 280  *    whose square is less than that group.
 281  * 3. Subtract the result of the previous step (2 or 4, depending) and
 282  *    bring down the next two-digit group.
 283  * 4. For the result R we have so far, find the largest single-digit number
 284  *    x such that 2 * R * 10 * x + x^2 is less than the result from step 3.
 285  *    (Note that this is doubling R and performing a decimal left-shift by 1
 286  *    and searching for the appropriate decimal to fill the one's place.)
 287  *    The value x is the next digit in the square root.
 288  * Repeat steps 3 and 4 until the desired precision is reached.  (We're
 289  * dealing with integers, so the above is sufficient.)
 290  *
 291  * In decimal, the square root of 582,734 would be calculated as so:
 292  *
 293  *     __7__6__3
 294  *    | 58 27 34
 295  *     -49       (7^2 == 49 => 7 is the first digit in the square root)
 296  *      --
 297  *       9 27    (Subtract and bring down the next group.)
 298  * 146   8 76    (2 * 7 * 10 * 6 + 6^2 == 876 => 6 is the next digit in
 299  *      -----     the square root)
 300  *         51 34 (Subtract and bring down the next group.)
 301  * 1523    45 69 (2 * 76 * 10 * 3 + 3^2 == 4569 => 3 is the next digit in
 302  *         -----  the square root)
 303  *          5 65 (remainder)
 304  *
 305  * The above algorithm applies similarly in binary, but note that the
 306  * only possible non-zero value for x in step 4 is 1, so step 4 becomes a
 307  * simple decision: is 2 * R * 2 * 1 + 1^2 (aka R << 2 + 1) less than the
 308  * preceding difference?
 309  *
 310  * In binary, the square root of 11011011 would be calculated as so:
 311  *
 312  *     __1__1__1__0
 313  *    | 11 01 10 11
 314  *      01          (0 << 2 + 1 == 1 < 11 => this bit is 1)
 315  *      --
 316  *      10 01 10 11
 317  * 101   1 01       (1 << 2 + 1 == 101 < 1001 => next bit is 1)
 318  *      -----
 319  *       1 00 10 11
 320  * 1101    11 01    (11 << 2 + 1 == 1101 < 10010 => next bit is 1)
 321  *       -------
 322  *          1 01 11
 323  * 11101    1 11 01 (111 << 2 + 1 == 11101 > 10111 => last bit is 0)
 324  *
 325  */
 326 static uint64_t
 327 dt_sqrt_128(uint64_t *square)
 328 {
 329         uint64_t result[2] = { 0, 0 };
 330         uint64_t diff[2] = { 0, 0 };
 331         uint64_t one[2] = { 1, 0 };
 332         uint64_t next_pair[2];
 333         uint64_t next_try[2];
 334         uint64_t bit_pairs, pair_shift;
 335         int i;
 336
 337         bit_pairs = dt_nbits_128(square) / 2;
 338         pair_shift = bit_pairs * 2;
 339
 340         for (i = 0; i <= bit_pairs; i++) {
 341                 /*
 342                  * Bring down the next pair of bits.
 343                  */
 344                 next_pair[0] = square[0];
 345                 next_pair[1] = square[1];
 346                 dt_shift_128(next_pair, -pair_shift);
 347                 next_pair[0] &= 0x3;
 348                 next_pair[1] = 0;
 349
 350                 dt_shift_128(diff, 2);
 351                 dt_add_128(diff, next_pair, diff);
 352
 353                 /*
 354                  * next_try = R << 2 + 1
 355                  */
 356                 next_try[0] = result[0];
 357                 next_try[1] = result[1];
 358                 dt_shift_128(next_try, 2);
 359                 dt_add_128(next_try, one, next_try);
 360
 361                 if (dt_le_128(next_try, diff)) {
 362                         dt_subtract_128(diff, next_try, diff);
 363                         dt_shift_128(result, 1);
 364                         dt_add_128(result, one, result);
 365                 } else {
 366                         dt_shift_128(result, 1);
 367                 }
 368
 369                 pair_shift -= 2;
 370         }
 371
 372         assert(result[1] == 0);
 373
 374         return (result[0]);
 375 }
 376
 377 uint64_t
 378 dt_stddev(uint64_t *data, uint64_t normal)
 379 {
 380         uint64_t avg_of_squares[2];
 381         uint64_t square_of_avg[2];
 382         int64_t norm_avg;
 383         uint64_t diff[2];
 384
 385         if (data[0] == 0)
 386                 return (0);
 387
 388         /*
 389          * The standard approximation for standard deviation is
 390          * sqrt(average(x**2) - average(x)**2), i.e. the square root
 391          * of the average of the squares minus the square of the average.
 392          * When normalizing, we should divide the sum of x**2 by normal**2.
 393          */
 394         dt_divide_128(data + 2, normal, avg_of_squares);
 395         dt_divide_128(avg_of_squares, normal, avg_of_squares);
 396         dt_divide_128(avg_of_squares, data[0], avg_of_squares);
 397
 398         norm_avg = (int64_t)data[1] / (int64_t)normal / (int64_t)data[0];
 399
 400         if (norm_avg < 0)
 401                 norm_avg = -norm_avg;
 402
 403         dt_multiply_128((uint64_t)norm_avg, (uint64_t)norm_avg, square_of_avg);
 404
 405         dt_subtract_128(avg_of_squares, square_of_avg, diff);
 406
 407         return (dt_sqrt_128(diff));
 408 }
 409
 410 static int
 411 dt_flowindent(dtrace_hdl_t *dtp, dtrace_probedata_t *data, dtrace_epid_t last,
 412     dtrace_bufdesc_t *buf, size_t offs)
 413 {
 414         dtrace_probedesc_t *pd = data->dtpda_pdesc, *npd;
 415         dtrace_eprobedesc_t *epd = data->dtpda_edesc, *nepd;
 416         char *p = pd->dtpd_provider, *n = pd->dtpd_name, *sub;
 417         dtrace_flowkind_t flow = DTRACEFLOW_NONE;
 418         const char *str = NULL;
 419         static const char *e_str[2] = { " -> ", " => " };
 420         static const char *r_str[2] = { " <- ", " <= " };
 421         static const char *ent = "entry", *ret = "return";
 422         static int entlen = 0, retlen = 0;
 423         dtrace_epid_t next, id = epd->dtepd_epid;
 424         int rval;
 425
 426         if (entlen == 0) {
 427                 assert(retlen == 0);
 428                 entlen = strlen(ent);
 429                 retlen = strlen(ret);
 430         }
 431
 432         /*
 433          * If the name of the probe is "entry" or ends with "-entry", we
 434          * treat it as an entry; if it is "return" or ends with "-return",
 435          * we treat it as a return.  (This allows application-provided probes
 436          * like "method-entry" or "function-entry" to participate in flow
 437          * indentation -- without accidentally misinterpreting popular probe
 438          * names like "carpentry", "gentry" or "Coventry".)
 439          */
 440         if ((sub = strstr(n, ent)) != NULL && sub[entlen] == '\0' &&
 441             (sub == n || sub[-1] == '-')) {
 442                 flow = DTRACEFLOW_ENTRY;
 443                 str = e_str[strcmp(p, "syscall") == 0];
 444         } else if ((sub = strstr(n, ret)) != NULL && sub[retlen] == '\0' &&
 445             (sub == n || sub[-1] == '-')) {
 446                 flow = DTRACEFLOW_RETURN;
 447                 str = r_str[strcmp(p, "syscall") == 0];
 448         }
 449
 450         /*
 451          * If we're going to indent this, we need to check the ID of our last
 452          * call.  If we're looking at the same probe ID but a different EPID,
 453          * we _don't_ want to indent.  (Yes, there are some minor holes in
 454          * this scheme -- it's a heuristic.)
 455          */
 456         if (flow == DTRACEFLOW_ENTRY) {
 457                 if ((last != DTRACE_EPIDNONE && id != last &&
 458                     pd->dtpd_id == dtp->dt_pdesc[last]->dtpd_id))
 459                         flow = DTRACEFLOW_NONE;
 460         }
 461
 462         /*
 463          * If we're going to unindent this, it's more difficult to see if
 464          * we don't actually want to unindent it -- we need to look at the
 465          * _next_ EPID.
 466          */
 467         if (flow == DTRACEFLOW_RETURN) {
 468                 offs += epd->dtepd_size;
 469
 470                 do {
 471                         if (offs >= buf->dtbd_size)
 472                                 goto out;
 473
 474                         next = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
 475
 476                         if (next == DTRACE_EPIDNONE)
 477                                 offs += sizeof (id);
 478                 } while (next == DTRACE_EPIDNONE);
 479
 480                 if ((rval = dt_epid_lookup(dtp, next, &nepd, &npd)) != 0)
 481                         return (rval);
 482
 483                 if (next != id && npd->dtpd_id == pd->dtpd_id)
 484                         flow = DTRACEFLOW_NONE;
 485         }
 486
 487 out:
 488         if (flow == DTRACEFLOW_ENTRY || flow == DTRACEFLOW_RETURN) {
 489                 data->dtpda_prefix = str;
 490         } else {
 491                 data->dtpda_prefix = "| ";
 492         }
 493
 494         if (flow == DTRACEFLOW_RETURN && data->dtpda_indent > 0)
 495                 data->dtpda_indent -= 2;
 496
 497         data->dtpda_flow = flow;
 498
 499         return (0);
 500 }
 501
 502 static int
 503 dt_nullprobe()
 504 {
 505         return (DTRACE_CONSUME_THIS);
 506 }
 507
 508 static int
 509 dt_nullrec()
 510 {
 511         return (DTRACE_CONSUME_NEXT);
 512 }
 513
 514 static void
 515 dt_quantize_total(dtrace_hdl_t *dtp, int64_t datum, long double *total)
 516 {
 517         long double val = dt_fabsl((long double)datum);
 518
 519         if (dtp->dt_options[DTRACEOPT_AGGZOOM] == DTRACEOPT_UNSET) {
 520                 *total += val;
 521                 return;
 522         }
 523
 524         /*
 525          * If we're zooming in on an aggregation, we want the height of the
 526          * highest value to be approximately 95% of total bar height -- so we
 527          * adjust up by the reciprocal of DTRACE_AGGZOOM_MAX when comparing to
 528          * our highest value.
 529          */
 530         val *= 1 / DTRACE_AGGZOOM_MAX;
 531
 532         if (*total < val)
 533                 *total = val;
 534 }
 535
 536 static int
 537 dt_print_quanthdr(dtrace_hdl_t *dtp, FILE *fp, int width)
 538 {
 539         return (dt_printf(dtp, fp, "\n%*s %41s %-9s\n",
 540             width ? width : 16, width ? "key" : "value",
 541             "------------- Distribution -------------", "count"));
 542 }
 543
 544 static int
 545 dt_print_quanthdr_packed(dtrace_hdl_t *dtp, FILE *fp, int width,
 546     const dtrace_aggdata_t *aggdata, dtrace_actkind_t action)
 547 {
 548         int min = aggdata->dtada_minbin, max = aggdata->dtada_maxbin;
 549         int minwidth, maxwidth, i;
 550
 551         assert(action == DTRACEAGG_QUANTIZE || action == DTRACEAGG_LQUANTIZE);
 552
 553         if (action == DTRACEAGG_QUANTIZE) {
 554                 if (min != 0 && min != DTRACE_QUANTIZE_ZEROBUCKET)
 555                         min--;
 556
 557                 if (max < DTRACE_QUANTIZE_NBUCKETS - 1)
 558                         max++;
 559
 560                 minwidth = dt_ndigits(DTRACE_QUANTIZE_BUCKETVAL(min));
 561                 maxwidth = dt_ndigits(DTRACE_QUANTIZE_BUCKETVAL(max));
 562         } else {
 563                 maxwidth = 8;
 564                 minwidth = maxwidth - 1;
 565                 max++;
 566         }
 567
 568         if (dt_printf(dtp, fp, "\n%*s %*s .",
 569             width, width > 0 ? "key" : "", minwidth, "min") < 0)
 570                 return (-1);
 571
 572         for (i = min; i <= max; i++) {
 573                 if (dt_printf(dtp, fp, "-") < 0)
 574                         return (-1);
 575         }
 576
 577         return (dt_printf(dtp, fp, ". %*s | count\n", -maxwidth, "max"));
 578 }
 579
 580 /*
 581  * We use a subset of the Unicode Block Elements (U+2588 through U+258F,
 582  * inclusive) to represent aggregations via UTF-8 -- which are expressed via
 583  * 3-byte UTF-8 sequences.
 584  */
 585 #define DTRACE_AGGUTF8_FULL     0x2588
 586 #define DTRACE_AGGUTF8_BASE     0x258f
 587 #define DTRACE_AGGUTF8_LEVELS   8
 588
 589 #define DTRACE_AGGUTF8_BYTE0(val)       (0xe0 | ((val) >> 12))
 590 #define DTRACE_AGGUTF8_BYTE1(val)       (0x80 | (((val) >> 6) & 0x3f))
 591 #define DTRACE_AGGUTF8_BYTE2(val)       (0x80 | ((val) & 0x3f))
 592
 593 static int
 594 dt_print_quantline_utf8(dtrace_hdl_t *dtp, FILE *fp, int64_t val,
 595     uint64_t normal, long double total)
 596 {
 597         uint_t len = 40, i, whole, partial;
 598         long double f = (dt_fabsl((long double)val) * len) / total;
 599         const char *spaces = "                                        ";
 600
 601         whole = (uint_t)f;
 602         partial = (uint_t)((f - (long double)(uint_t)f) *
 603             (long double)DTRACE_AGGUTF8_LEVELS);
 604
 605         if (dt_printf(dtp, fp, "|") < 0)
 606                 return (-1);
 607
 608         for (i = 0; i < whole; i++) {
 609                 if (dt_printf(dtp, fp, "%c%c%c",
 610                     DTRACE_AGGUTF8_BYTE0(DTRACE_AGGUTF8_FULL),
 611                     DTRACE_AGGUTF8_BYTE1(DTRACE_AGGUTF8_FULL),
 612                     DTRACE_AGGUTF8_BYTE2(DTRACE_AGGUTF8_FULL)) < 0)
 613                         return (-1);
 614         }
 615
 616         if (partial != 0) {
 617                 partial = DTRACE_AGGUTF8_BASE - (partial - 1);
 618
 619                 if (dt_printf(dtp, fp, "%c%c%c",
 620                     DTRACE_AGGUTF8_BYTE0(partial),
 621                     DTRACE_AGGUTF8_BYTE1(partial),
 622                     DTRACE_AGGUTF8_BYTE2(partial)) < 0)
 623                         return (-1);
 624
 625                 i++;
 626         }
 627
 628         return (dt_printf(dtp, fp, "%s %-9lld\n", spaces + i,
 629             (long long)val / normal));
 630 }
 631
 632 static int
 633 dt_print_quantline(dtrace_hdl_t *dtp, FILE *fp, int64_t val,
 634     uint64_t normal, long double total, char positives, char negatives)
 635 {
 636         long double f;
 637         uint_t depth, len = 40;
 638
 639         const char *ats = "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@";
 640         const char *spaces = "                                        ";
 641
 642         assert(strlen(ats) == len && strlen(spaces) == len);
 643         assert(!(total == 0 && (positives || negatives)));
 644         assert(!(val < 0 && !negatives));
 645         assert(!(val > 0 && !positives));
 646         assert(!(val != 0 && total == 0));
 647
 648         if (!negatives) {
 649                 if (positives) {
 650                         if (dtp->dt_encoding == DT_ENCODING_UTF8) {
 651                                 return (dt_print_quantline_utf8(dtp, fp, val,
 652                                     normal, total));
 653                         }
 654
 655                         f = (dt_fabsl((long double)val) * len) / total;
 656                         depth = (uint_t)(f + 0.5);
 657                 } else {
 658                         depth = 0;
 659                 }
 660
 661                 return (dt_printf(dtp, fp, "|%s%s %-9lld\n", ats + len - depth,
 662                     spaces + depth, (long long)val / normal));
 663         }
 664
 665         if (!positives) {
 666                 f = (dt_fabsl((long double)val) * len) / total;
 667                 depth = (uint_t)(f + 0.5);
 668
 669                 return (dt_printf(dtp, fp, "%s%s| %-9lld\n", spaces + depth,
 670                     ats + len - depth, (long long)val / normal));
 671         }
 672
 673         /*
 674          * If we're here, we have both positive and negative bucket values.
 675          * To express this graphically, we're going to generate both positive
 676          * and negative bars separated by a centerline.  These bars are half
 677          * the size of normal quantize()/lquantize() bars, so we divide the
 678          * length in half before calculating the bar length.
 679          */
 680         len /= 2;
 681         ats = &ats[len];
 682         spaces = &spaces[len];
 683
 684         f = (dt_fabsl((long double)val) * len) / total;
 685         depth = (uint_t)(f + 0.5);
 686
 687         if (val <= 0) {
 688                 return (dt_printf(dtp, fp, "%s%s|%*s %-9lld\n", spaces + depth,
 689                     ats + len - depth, len, "", (long long)val / normal));
 690         } else {
 691                 return (dt_printf(dtp, fp, "%20s|%s%s %-9lld\n", "",
 692                     ats + len - depth, spaces + depth,
 693                     (long long)val / normal));
 694         }
 695 }
 696
 697 /*
 698  * As with UTF-8 printing of aggregations, we use a subset of the Unicode
 699  * Block Elements (U+2581 through U+2588, inclusive) to represent our packed
 700  * aggregation.
 701  */
 702 #define DTRACE_AGGPACK_BASE     0x2581
 703 #define DTRACE_AGGPACK_LEVELS   8
 704
 705 static int
 706 dt_print_packed(dtrace_hdl_t *dtp, FILE *fp,
 707     long double datum, long double total)
 708 {
 709         static boolean_t utf8_checked = B_FALSE;
 710         static boolean_t utf8;
 711         char *ascii = "__xxxxXX";
 712         char *neg = "vvvvVV";
 713         unsigned int len;
 714         long double val;
 715
 716         if (!utf8_checked) {
 717                 char *term;
 718
 719                 /*
 720                  * We want to determine if we can reasonably emit UTF-8 for our
 721                  * packed aggregation.  To do this, we will check for terminals
 722                  * that are known to be primitive to emit UTF-8 on these.
 723                  */
 724                 utf8_checked = B_TRUE;
 725
 726                 if (dtp->dt_encoding == DT_ENCODING_ASCII) {
 727                         utf8 = B_FALSE;
 728                 } else if (dtp->dt_encoding == DT_ENCODING_UTF8) {
 729                         utf8 = B_TRUE;
 730                 } else if ((term = getenv("TERM")) != NULL &&
 731                     (strcmp(term, "sun") == 0 ||
 732                     strcmp(term, "sun-color") == 0 ||
 733                     strcmp(term, "dumb") == 0)) {
 734                         utf8 = B_FALSE;
 735                 } else {
 736                         utf8 = B_TRUE;
 737                 }
 738         }
 739
 740         if (datum == 0)
 741                 return (dt_printf(dtp, fp, " "));
 742
 743         if (datum < 0) {
 744                 len = strlen(neg);
 745                 val = dt_fabsl(datum * (len - 1)) / total;
 746                 return (dt_printf(dtp, fp, "%c", neg[(uint_t)(val + 0.5)]));
 747         }
 748
 749         if (utf8) {
 750                 int block = DTRACE_AGGPACK_BASE + (unsigned int)(((datum *
 751                     (DTRACE_AGGPACK_LEVELS - 1)) / total) + 0.5);
 752
 753                 return (dt_printf(dtp, fp, "%c%c%c",
 754                     DTRACE_AGGUTF8_BYTE0(block),
 755                     DTRACE_AGGUTF8_BYTE1(block),
 756                     DTRACE_AGGUTF8_BYTE2(block)));
 757         }
 758
 759         len = strlen(ascii);
 760         val = (datum * (len - 1)) / total;
 761         return (dt_printf(dtp, fp, "%c", ascii[(uint_t)(val + 0.5)]));
 762 }
 763
 764 int
 765 dt_print_quantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
 766     size_t size, uint64_t normal)
 767 {
 768         const int64_t *data = addr;
 769         int i, first_bin = 0, last_bin = DTRACE_QUANTIZE_NBUCKETS - 1;
 770         long double total = 0;
 771         char positives = 0, negatives = 0;
 772
 773         if (size != DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t))
 774                 return (dt_set_errno(dtp, EDT_DMISMATCH));
 775
 776         while (first_bin < DTRACE_QUANTIZE_NBUCKETS - 1 && data[first_bin] == 0)
 777                 first_bin++;
 778
 779         if (first_bin == DTRACE_QUANTIZE_NBUCKETS - 1) {
 780                 /*
 781                  * There isn't any data.  This is possible if the aggregation
 782                  * has been clear()'d or if negative increment values have been
 783                  * used.  Regardless, we'll print the buckets around 0.
 784                  */
 785                 first_bin = DTRACE_QUANTIZE_ZEROBUCKET - 1;
 786                 last_bin = DTRACE_QUANTIZE_ZEROBUCKET + 1;
 787         } else {
 788                 if (first_bin > 0)
 789                         first_bin--;
 790
 791                 while (last_bin > 0 && data[last_bin] == 0)
 792                         last_bin--;
 793
 794                 if (last_bin < DTRACE_QUANTIZE_NBUCKETS - 1)
 795                         last_bin++;
 796         }
 797
 798         for (i = first_bin; i <= last_bin; i++) {
 799                 positives |= (data[i] > 0);
 800                 negatives |= (data[i] < 0);
 801                 dt_quantize_total(dtp, data[i], &total);
 802         }
 803
 804         if (dt_print_quanthdr(dtp, fp, 0) < 0)
 805                 return (-1);
 806
 807         for (i = first_bin; i <= last_bin; i++) {
 808                 if (dt_printf(dtp, fp, "%16lld ",
 809                     (long long)DTRACE_QUANTIZE_BUCKETVAL(i)) < 0)
 810                         return (-1);
 811
 812                 if (dt_print_quantline(dtp, fp, data[i], normal, total,
 813                     positives, negatives) < 0)
 814                         return (-1);
 815         }
 816
 817         return (0);
 818 }
 819
 820 int
 821 dt_print_quantize_packed(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
 822     size_t size, const dtrace_aggdata_t *aggdata)
 823 {
 824         const int64_t *data = addr;
 825         long double total = 0, count = 0;
 826         int min = aggdata->dtada_minbin, max = aggdata->dtada_maxbin, i;
 827         int64_t minval, maxval;
 828
 829         if (size != DTRACE_QUANTIZE_NBUCKETS * sizeof (uint64_t))
 830                 return (dt_set_errno(dtp, EDT_DMISMATCH));
 831
 832         if (min != 0 && min != DTRACE_QUANTIZE_ZEROBUCKET)
 833                 min--;
 834
 835         if (max < DTRACE_QUANTIZE_NBUCKETS - 1)
 836                 max++;
 837
 838         minval = DTRACE_QUANTIZE_BUCKETVAL(min);
 839         maxval = DTRACE_QUANTIZE_BUCKETVAL(max);
 840
 841         if (dt_printf(dtp, fp, " %*lld :", dt_ndigits(minval),
 842             (long long)minval) < 0)
 843                 return (-1);
 844
 845         for (i = min; i <= max; i++) {
 846                 dt_quantize_total(dtp, data[i], &total);
 847                 count += data[i];
 848         }
 849
 850         for (i = min; i <= max; i++) {
 851                 if (dt_print_packed(dtp, fp, data[i], total) < 0)
 852                         return (-1);
 853         }
 854
 855         if (dt_printf(dtp, fp, ": %*lld | %lld\n",
 856             -dt_ndigits(maxval), (long long)maxval, (long long)count) < 0)
 857                 return (-1);
 858
 859         return (0);
 860 }
 861
 862 int
 863 dt_print_lquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
 864     size_t size, uint64_t normal)
 865 {
 866         const int64_t *data = addr;
 867         int i, first_bin, last_bin, base;
 868         uint64_t arg;
 869         long double total = 0;
 870         uint16_t step, levels;
 871         char positives = 0, negatives = 0;
 872
 873         if (size < sizeof (uint64_t))
 874                 return (dt_set_errno(dtp, EDT_DMISMATCH));
 875
 876         arg = *data++;
 877         size -= sizeof (uint64_t);
 878
 879         base = DTRACE_LQUANTIZE_BASE(arg);
 880         step = DTRACE_LQUANTIZE_STEP(arg);
 881         levels = DTRACE_LQUANTIZE_LEVELS(arg);
 882
 883         first_bin = 0;
 884         last_bin = levels + 1;
 885
 886         if (size != sizeof (uint64_t) * (levels + 2))
 887                 return (dt_set_errno(dtp, EDT_DMISMATCH));
 888
 889         while (first_bin <= levels + 1 && data[first_bin] == 0)
 890                 first_bin++;
 891
 892         if (first_bin > levels + 1) {
 893                 first_bin = 0;
 894                 last_bin = 2;
 895         } else {
 896                 if (first_bin > 0)
 897                         first_bin--;
 898
 899                 while (last_bin > 0 && data[last_bin] == 0)
 900                         last_bin--;
 901
 902                 if (last_bin < levels + 1)
 903                         last_bin++;
 904         }
 905
 906         for (i = first_bin; i <= last_bin; i++) {
 907                 positives |= (data[i] > 0);
 908                 negatives |= (data[i] < 0);
 909                 dt_quantize_total(dtp, data[i], &total);
 910         }
 911
 912         if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
 913             "------------- Distribution -------------", "count") < 0)
 914                 return (-1);
 915
 916         for (i = first_bin; i <= last_bin; i++) {
 917                 char c[32];
 918                 int err;
 919
 920                 if (i == 0) {
 921                         (void) snprintf(c, sizeof (c), "< %d", base);
 922                         err = dt_printf(dtp, fp, "%16s ", c);
 923                 } else if (i == levels + 1) {
 924                         (void) snprintf(c, sizeof (c), ">= %d",
 925                             base + (levels * step));
 926                         err = dt_printf(dtp, fp, "%16s ", c);
 927                 } else {
 928                         err = dt_printf(dtp, fp, "%16d ",
 929                             base + (i - 1) * step);
 930                 }
 931
 932                 if (err < 0 || dt_print_quantline(dtp, fp, data[i], normal,
 933                     total, positives, negatives) < 0)
 934                         return (-1);
 935         }
 936
 937         return (0);
 938 }
 939
 940 /*ARGSUSED*/
 941 int
 942 dt_print_lquantize_packed(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
 943     size_t size, const dtrace_aggdata_t *aggdata)
 944 {
 945         const int64_t *data = addr;
 946         long double total = 0, count = 0;
 947         int min, max, base, err;
 948         uint64_t arg;
 949         uint16_t step, levels;
 950         char c[32];
 951         unsigned int i;
 952
 953         if (size < sizeof (uint64_t))
 954                 return (dt_set_errno(dtp, EDT_DMISMATCH));
 955
 956         arg = *data++;
 957         size -= sizeof (uint64_t);
 958
 959         base = DTRACE_LQUANTIZE_BASE(arg);
 960         step = DTRACE_LQUANTIZE_STEP(arg);
 961         levels = DTRACE_LQUANTIZE_LEVELS(arg);
 962
 963         if (size != sizeof (uint64_t) * (levels + 2))
 964                 return (dt_set_errno(dtp, EDT_DMISMATCH));
 965
 966         min = 0;
 967         max = levels + 1;
 968
 969         if (min == 0) {
 970                 (void) snprintf(c, sizeof (c), "< %d", base);
 971                 err = dt_printf(dtp, fp, "%8s :", c);
 972         } else {
 973                 err = dt_printf(dtp, fp, "%8d :", base + (min - 1) * step);
 974         }
 975
 976         if (err < 0)
 977                 return (-1);
 978
 979         for (i = min; i <= max; i++) {
 980                 dt_quantize_total(dtp, data[i], &total);
 981                 count += data[i];
 982         }
 983
 984         for (i = min; i <= max; i++) {
 985                 if (dt_print_packed(dtp, fp, data[i], total) < 0)
 986                         return (-1);
 987         }
 988
 989         (void) snprintf(c, sizeof (c), ">= %d", base + (levels * step));
 990         return (dt_printf(dtp, fp, ": %-8s | %lld\n", c, (long long)count));
 991 }
 992
 993 int
 994 dt_print_llquantize(dtrace_hdl_t *dtp, FILE *fp, const void *addr,
 995     size_t size, uint64_t normal)
 996 {
 997         int i, first_bin, last_bin, bin = 1, order, levels;
 998         uint16_t factor, low, high, nsteps;
 999         const int64_t *data = addr;
1000         int64_t value = 1, next, step;
1001         char positives = 0, negatives = 0;
1002         long double total = 0;
1003         uint64_t arg;
1004         char c[32];
1005
1006         if (size < sizeof (uint64_t))
1007                 return (dt_set_errno(dtp, EDT_DMISMATCH));
1008
1009         arg = *data++;
1010         size -= sizeof (uint64_t);
1011
1012         factor = DTRACE_LLQUANTIZE_FACTOR(arg);
1013         low = DTRACE_LLQUANTIZE_LOW(arg);
1014         high = DTRACE_LLQUANTIZE_HIGH(arg);
1015         nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);
1016
1017         /*
1018          * We don't expect to be handed invalid llquantize() parameters here,
1019          * but sanity check them (to a degree) nonetheless.
1020          */
1021         if (size > INT32_MAX || factor < 2 || low >= high ||
1022             nsteps == 0 || factor > nsteps)
1023                 return (dt_set_errno(dtp, EDT_DMISMATCH));
1024
1025         levels = (int)size / sizeof (uint64_t);
1026
1027         first_bin = 0;
1028         last_bin = levels - 1;
1029
1030         while (first_bin < levels && data[first_bin] == 0)
1031                 first_bin++;
1032
1033         if (first_bin == levels) {
1034                 first_bin = 0;
1035                 last_bin = 1;
1036         } else {
1037                 if (first_bin > 0)
1038                         first_bin--;
1039
1040                 while (last_bin > 0 && data[last_bin] == 0)
1041                         last_bin--;
1042
1043                 if (last_bin < levels - 1)
1044                         last_bin++;
1045         }
1046
1047         for (i = first_bin; i <= last_bin; i++) {
1048                 positives |= (data[i] > 0);
1049                 negatives |= (data[i] < 0);
1050                 dt_quantize_total(dtp, data[i], &total);
1051         }
1052
1053         if (dt_printf(dtp, fp, "\n%16s %41s %-9s\n", "value",
1054             "------------- Distribution -------------", "count") < 0)
1055                 return (-1);
1056
1057         for (order = 0; order < low; order++)
1058                 value *= factor;
1059
1060         next = value * factor;
1061         step = next > nsteps ? next / nsteps : 1;
1062
1063         if (first_bin == 0) {
1064                 (void) snprintf(c, sizeof (c), "< %lld", (long long)value);
1065
1066                 if (dt_printf(dtp, fp, "%16s ", c) < 0)
1067                         return (-1);
1068
1069                 if (dt_print_quantline(dtp, fp, data[0], normal,
1070                     total, positives, negatives) < 0)
1071                         return (-1);
1072         }
1073
1074         while (order <= high) {
1075                 if (bin >= first_bin && bin <= last_bin) {
1076                         if (dt_printf(dtp, fp, "%16lld ", (long long)value) < 0)
1077                                 return (-1);
1078
1079                         if (dt_print_quantline(dtp, fp, data[bin],
1080                             normal, total, positives, negatives) < 0)
1081                                 return (-1);
1082                 }
1083
1084                 assert(value < next);
1085                 bin++;
1086
1087                 if ((value += step) != next)
1088                         continue;
1089
1090                 next = value * factor;
1091                 step = next > nsteps ? next / nsteps : 1;
1092                 order++;
1093         }
1094
1095         if (last_bin < bin)
1096                 return (0);
1097
1098         assert(last_bin == bin);
1099         (void) snprintf(c, sizeof (c), ">= %lld", (long long)value);
1100
1101         if (dt_printf(dtp, fp, "%16s ", c) < 0)
1102                 return (-1);
1103
1104         return (dt_print_quantline(dtp, fp, data[bin], normal,
1105             total, positives, negatives));
1106 }
1107
1108 /*ARGSUSED*/
1109 static int
1110 dt_print_average(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
1111     size_t size, uint64_t normal)
1112 {
1113         /* LINTED - alignment */
1114         int64_t *data = (int64_t *)addr;
1115
1116         return (dt_printf(dtp, fp, " %16lld", data[0] ?
1117             (long long)(data[1] / (int64_t)normal / data[0]) : 0));
1118 }
1119
1120 /*ARGSUSED*/
1121 static int
1122 dt_print_stddev(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
1123     size_t size, uint64_t normal)
1124 {
1125         /* LINTED - alignment */
1126         uint64_t *data = (uint64_t *)addr;
1127
1128         return (dt_printf(dtp, fp, " %16llu", data[0] ?
1129             (unsigned long long) dt_stddev(data, normal) : 0));
1130 }
1131
1132 /*ARGSUSED*/
1133 static int
1134 dt_print_bytes(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr,
1135     size_t nbytes, int width, int quiet, int forceraw)
1136 {
1137         /*
1138          * If the byte stream is a series of printable characters, followed by
1139          * a terminating byte, we print it out as a string.  Otherwise, we
1140          * assume that it's something else and just print the bytes.
1141          */
1142         int i, j, margin = 5;
1143         char *c = (char *)addr;
1144
1145         if (nbytes == 0)
1146                 return (0);
1147
1148         if (forceraw)
1149                 goto raw;
1150
1151         if (dtp->dt_options[DTRACEOPT_RAWBYTES] != DTRACEOPT_UNSET)
1152                 goto raw;
1153
1154         for (i = 0; i < nbytes; i++) {
1155                 /*
1156                  * We define a "printable character" to be one for which
1157                  * isprint(3C) returns non-zero, isspace(3C) returns non-zero,
1158                  * or a character which is either backspace or the bell.
1159                  * Backspace and the bell are regrettably special because
1160                  * they fail the first two tests -- and yet they are entirely
1161                  * printable.  These are the only two control characters that
1162                  * have meaning for the terminal and for which isprint(3C) and
1163                  * isspace(3C) return 0.
1164                  */
1165                 if (isprint(c[i]) || isspace(c[i]) ||
1166                     c[i] == '\b' || c[i] == '\a')
1167                         continue;
1168
1169                 if (c[i] == '\0' && i > 0) {
1170                         /*
1171                          * This looks like it might be a string.  Before we
1172                          * assume that it is indeed a string, check the
1173                          * remainder of the byte range; if it contains
1174                          * additional non-nul characters, we'll assume that
1175                          * it's a binary stream that just happens to look like
1176                          * a string, and we'll print out the individual bytes.
1177                          */
1178                         for (j = i + 1; j < nbytes; j++) {
1179                                 if (c[j] != '\0')
1180                                         break;
1181                         }
1182
1183                         if (j != nbytes)
1184                                 break;
1185
1186                         if (quiet) {
1187                                 return (dt_printf(dtp, fp, "%s", c));
1188                         } else {
1189                                 return (dt_printf(dtp, fp, " %s%*s",
1190                                     width < 0 ? " " : "", width, c));
1191                         }
1192                 }
1193
1194                 break;
1195         }
1196
1197         if (i == nbytes) {
1198                 /*
1199                  * The byte range is all printable characters, but there is
1200                  * no trailing nul byte.  We'll assume that it's a string and
1201                  * print it as such.
1202                  */
1203                 char *s = alloca(nbytes + 1);
1204                 bcopy(c, s, nbytes);
1205                 s[nbytes] = '\0';
1206                 return (dt_printf(dtp, fp, "  %-*s", width, s));
1207         }
1208
1209 raw:
1210         if (dt_printf(dtp, fp, "\n%*s      ", margin, "") < 0)
1211                 return (-1);
1212
1213         for (i = 0; i < 16; i++)
1214                 if (dt_printf(dtp, fp, "  %c", "0123456789abcdef"[i]) < 0)
1215                         return (-1);
1216
1217         if (dt_printf(dtp, fp, "  0123456789abcdef\n") < 0)
1218                 return (-1);
1219
1220
1221         for (i = 0; i < nbytes; i += 16) {
1222                 if (dt_printf(dtp, fp, "%*s%5x:", margin, "", i) < 0)
1223                         return (-1);
1224
1225                 for (j = i; j < i + 16 && j < nbytes; j++) {
1226                         if (dt_printf(dtp, fp, " %02x", (uchar_t)c[j]) < 0)
1227                                 return (-1);
1228                 }
1229
1230                 while (j++ % 16) {
1231                         if (dt_printf(dtp, fp, "   ") < 0)
1232                                 return (-1);
1233                 }
1234
1235                 if (dt_printf(dtp, fp, "  ") < 0)
1236                         return (-1);
1237
1238                 for (j = i; j < i + 16 && j < nbytes; j++) {
1239                         if (dt_printf(dtp, fp, "%c",
1240                             c[j] < ' ' || c[j] > '~' ? '.' : c[j]) < 0)
1241                                 return (-1);
1242                 }
1243
1244                 if (dt_printf(dtp, fp, "\n") < 0)
1245                         return (-1);
1246         }
1247
1248         return (0);
1249 }
1250
1251 int
1252 dt_print_stack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
1253     caddr_t addr, int depth, int size)
1254 {
1255         dtrace_syminfo_t dts;
1256         GElf_Sym sym;
1257         int i, indent;
1258         char c[PATH_MAX * 2];
1259         uint64_t pc;
1260
1261         if (dt_printf(dtp, fp, "\n") < 0)
1262                 return (-1);
1263
1264         if (format == NULL)
1265                 format = "%s";
1266
1267         if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
1268                 indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];
1269         else
1270                 indent = _dtrace_stkindent;
1271
1272         for (i = 0; i < depth; i++) {
1273                 switch (size) {
1274                 case sizeof (uint32_t):
1275                         /* LINTED - alignment */
1276                         pc = *((uint32_t *)addr);
1277                         break;
1278
1279                 case sizeof (uint64_t):
1280                         /* LINTED - alignment */
1281                         pc = *((uint64_t *)addr);
1282                         break;
1283
1284                 default:
1285                         return (dt_set_errno(dtp, EDT_BADSTACKPC));
1286                 }
1287
1288                 if (pc == 0)
1289                         break;
1290
1291                 addr += size;
1292
1293                 if (dt_printf(dtp, fp, "%*s", indent, "") < 0)
1294                         return (-1);
1295
1296                 if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {
1297                         if (pc > sym.st_value) {
1298                                 (void) snprintf(c, sizeof (c), "%s`%s+0x%llx",
1299                                     dts.dts_object, dts.dts_name,
1300                                     (u_longlong_t)(pc - sym.st_value));
1301                         } else {
1302                                 (void) snprintf(c, sizeof (c), "%s`%s",
1303                                     dts.dts_object, dts.dts_name);
1304                         }
1305                 } else {
1306                         /*
1307                          * We'll repeat the lookup, but this time we'll specify
1308                          * a NULL GElf_Sym -- indicating that we're only
1309                          * interested in the containing module.
1310                          */
1311                         if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1312                                 (void) snprintf(c, sizeof (c), "%s`0x%llx",
1313                                     dts.dts_object, (u_longlong_t)pc);
1314                         } else {
1315                                 (void) snprintf(c, sizeof (c), "0x%llx",
1316                                     (u_longlong_t)pc);
1317                         }
1318                 }
1319
1320                 if (dt_printf(dtp, fp, format, c) < 0)
1321                         return (-1);
1322
1323                 if (dt_printf(dtp, fp, "\n") < 0)
1324                         return (-1);
1325         }
1326
1327         return (0);
1328 }
1329
1330 int
1331 dt_print_ustack(dtrace_hdl_t *dtp, FILE *fp, const char *format,
1332     caddr_t addr, uint64_t arg)
1333 {
1334         /* LINTED - alignment */
1335         uint64_t *pc = (uint64_t *)addr;
1336         uint32_t depth = DTRACE_USTACK_NFRAMES(arg);
1337         uint32_t strsize = DTRACE_USTACK_STRSIZE(arg);
1338         const char *strbase = addr + (depth + 1) * sizeof (uint64_t);
1339         const char *str = strsize ? strbase : NULL;
1340         int err = 0;
1341
1342         char name[PATH_MAX], objname[PATH_MAX], c[PATH_MAX * 2];
1343         struct ps_prochandle *P;
1344         GElf_Sym sym;
1345         int i, indent;
1346         pid_t pid;
1347
1348         if (depth == 0)
1349                 return (0);
1350
1351         pid = (pid_t)*pc++;
1352
1353         if (dt_printf(dtp, fp, "\n") < 0)
1354                 return (-1);
1355
1356         if (format == NULL)
1357                 format = "%s";
1358
1359         if (dtp->dt_options[DTRACEOPT_STACKINDENT] != DTRACEOPT_UNSET)
1360                 indent = (int)dtp->dt_options[DTRACEOPT_STACKINDENT];
1361         else
1362                 indent = _dtrace_stkindent;
1363
1364         /*
1365          * Ultimately, we need to add an entry point in the library vector for
1366          * determining <symbol, offset> from <pid, address>.  For now, if
1367          * this is a vector open, we just print the raw address or string.
1368          */
1369         if (dtp->dt_vector == NULL)
1370                 P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
1371         else
1372                 P = NULL;
1373
1374         if (P != NULL)
1375                 dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
1376
1377         for (i = 0; i < depth && pc[i] != 0; i++) {
1378                 const prmap_t *map;
1379
1380                 if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)
1381                         break;
1382
1383                 if (P != NULL && Plookup_by_addr(P, pc[i],
1384                     name, sizeof (name), &sym) == 0) {
1385                         (void) Pobjname(P, pc[i], objname, sizeof (objname));
1386
1387                         if (pc[i] > sym.st_value) {
1388                                 (void) snprintf(c, sizeof (c),
1389                                     "%s`%s+0x%llx", dt_basename(objname), name,
1390                                     (u_longlong_t)(pc[i] - sym.st_value));
1391                         } else {
1392                                 (void) snprintf(c, sizeof (c),
1393                                     "%s`%s", dt_basename(objname), name);
1394                         }
1395                 } else if (str != NULL && str[0] != '\0' && str[0] != '@' &&
1396                     (P != NULL && ((map = Paddr_to_map(P, pc[i])) == NULL ||
1397                     (map->pr_mflags & MA_WRITE)))) {
1398                         /*
1399                          * If the current string pointer in the string table
1400                          * does not point to an empty string _and_ the program
1401                          * counter falls in a writable region, we'll use the
1402                          * string from the string table instead of the raw
1403                          * address.  This last condition is necessary because
1404                          * some (broken) ustack helpers will return a string
1405                          * even for a program counter that they can't
1406                          * identify.  If we have a string for a program
1407                          * counter that falls in a segment that isn't
1408                          * writable, we assume that we have fallen into this
1409                          * case and we refuse to use the string.
1410                          */
1411                         (void) snprintf(c, sizeof (c), "%s", str);
1412                 } else {
1413                         if (P != NULL && Pobjname(P, pc[i], objname,
1414                             sizeof (objname)) != 0) {
1415                                 (void) snprintf(c, sizeof (c), "%s`0x%llx",
1416                                     dt_basename(objname), (u_longlong_t)pc[i]);
1417                         } else {
1418                                 (void) snprintf(c, sizeof (c), "0x%llx",
1419                                     (u_longlong_t)pc[i]);
1420                         }
1421                 }
1422
1423                 if ((err = dt_printf(dtp, fp, format, c)) < 0)
1424                         break;
1425
1426                 if ((err = dt_printf(dtp, fp, "\n")) < 0)
1427                         break;
1428
1429                 if (str != NULL && str[0] == '@') {
1430                         /*
1431                          * If the first character of the string is an "at" sign,
1432                          * then the string is inferred to be an annotation --
1433                          * and it is printed out beneath the frame and offset
1434                          * with brackets.
1435                          */
1436                         if ((err = dt_printf(dtp, fp, "%*s", indent, "")) < 0)
1437                                 break;
1438
1439                         (void) snprintf(c, sizeof (c), "  [ %s ]", &str[1]);
1440
1441                         if ((err = dt_printf(dtp, fp, format, c)) < 0)
1442                                 break;
1443
1444                         if ((err = dt_printf(dtp, fp, "\n")) < 0)
1445                                 break;
1446                 }
1447
1448                 if (str != NULL) {
1449                         str += strlen(str) + 1;
1450                         if (str - strbase >= strsize)
1451                                 str = NULL;
1452                 }
1453         }
1454
1455         if (P != NULL) {
1456                 dt_proc_unlock(dtp, P);
1457                 dt_proc_release(dtp, P);
1458         }
1459
1460         return (err);
1461 }
1462
1463 static int
1464 dt_print_usym(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr, dtrace_actkind_t act)
1465 {
1466         /* LINTED - alignment */
1467         uint64_t pid = ((uint64_t *)addr)[0];
1468         /* LINTED - alignment */
1469         uint64_t pc = ((uint64_t *)addr)[1];
1470         const char *format = "  %-50s";
1471         char *s;
1472         int n, len = 256;
1473
1474         if (act == DTRACEACT_USYM && dtp->dt_vector == NULL) {
1475                 struct ps_prochandle *P;
1476
1477                 if ((P = dt_proc_grab(dtp, pid,
1478                     PGRAB_RDONLY | PGRAB_FORCE, 0)) != NULL) {
1479                         GElf_Sym sym;
1480
1481                         dt_proc_lock(dtp, P);
1482
1483                         if (Plookup_by_addr(P, pc, NULL, 0, &sym) == 0)
1484                                 pc = sym.st_value;
1485
1486                         dt_proc_unlock(dtp, P);
1487                         dt_proc_release(dtp, P);
1488                 }
1489         }
1490
1491         do {
1492                 n = len;
1493                 s = alloca(n);
1494         } while ((len = dtrace_uaddr2str(dtp, pid, pc, s, n)) > n);
1495
1496         return (dt_printf(dtp, fp, format, s));
1497 }
1498
1499 int
1500 dt_print_umod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
1501 {
1502         /* LINTED - alignment */
1503         uint64_t pid = ((uint64_t *)addr)[0];
1504         /* LINTED - alignment */
1505         uint64_t pc = ((uint64_t *)addr)[1];
1506         int err = 0;
1507
1508         char objname[PATH_MAX], c[PATH_MAX * 2];
1509         struct ps_prochandle *P;
1510
1511         if (format == NULL)
1512                 format = "  %-50s";
1513
1514         /*
1515          * See the comment in dt_print_ustack() for the rationale for
1516          * printing raw addresses in the vectored case.
1517          */
1518         if (dtp->dt_vector == NULL)
1519                 P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0);
1520         else
1521                 P = NULL;
1522
1523         if (P != NULL)
1524                 dt_proc_lock(dtp, P); /* lock handle while we perform lookups */
1525
1526         if (P != NULL && Pobjname(P, pc, objname, sizeof (objname)) != 0) {
1527                 (void) snprintf(c, sizeof (c), "%s", dt_basename(objname));
1528         } else {
1529                 (void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc);
1530         }
1531
1532         err = dt_printf(dtp, fp, format, c);
1533
1534         if (P != NULL) {
1535                 dt_proc_unlock(dtp, P);
1536                 dt_proc_release(dtp, P);
1537         }
1538
1539         return (err);
1540 }
1541
1542 int
1543 dt_print_memory(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr)
1544 {
1545         int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
1546         size_t nbytes = *((uintptr_t *) addr);
1547
1548         return (dt_print_bytes(dtp, fp, addr + sizeof(uintptr_t),
1549             nbytes, 50, quiet, 1));
1550 }
1551
1552 typedef struct dt_type_cbdata {
1553         dtrace_hdl_t            *dtp;
1554         dtrace_typeinfo_t       dtt;
1555         caddr_t                 addr;
1556         caddr_t                 addrend;
1557         const char              *name;
1558         int                     f_type;
1559         int                     indent;
1560         int                     type_width;
1561         int                     name_width;
1562         FILE                    *fp;
1563 } dt_type_cbdata_t;
1564
1565 static int      dt_print_type_data(dt_type_cbdata_t *, ctf_id_t);
1566
1567 static int
1568 dt_print_type_member(const char *name, ctf_id_t type, ulong_t off, void *arg)
1569 {
1570         dt_type_cbdata_t cbdata;
1571         dt_type_cbdata_t *cbdatap = arg;
1572         ssize_t ssz;
1573
1574         if ((ssz = ctf_type_size(cbdatap->dtt.dtt_ctfp, type)) <= 0)
1575                 return (0);
1576
1577         off /= 8;
1578
1579         cbdata = *cbdatap;
1580         cbdata.name = name;
1581         cbdata.addr += off;
1582         cbdata.addrend = cbdata.addr + ssz;
1583
1584         return (dt_print_type_data(&cbdata, type));
1585 }
1586
1587 static int
1588 dt_print_type_width(const char *name, ctf_id_t type, ulong_t off, void *arg)
1589 {
1590         char buf[DT_TYPE_NAMELEN];
1591         char *p;
1592         dt_type_cbdata_t *cbdatap = arg;
1593         size_t sz = strlen(name);
1594
1595         ctf_type_name(cbdatap->dtt.dtt_ctfp, type, buf, sizeof (buf));
1596
1597         if ((p = strchr(buf, '[')) != NULL)
1598                 p[-1] = '\0';
1599         else
1600                 p = "";
1601
1602         sz += strlen(p);
1603
1604         if (sz > cbdatap->name_width)
1605                 cbdatap->name_width = sz;
1606
1607         sz = strlen(buf);
1608
1609         if (sz > cbdatap->type_width)
1610                 cbdatap->type_width = sz;
1611
1612         return (0);
1613 }
1614
1615 static int
1616 dt_print_type_data(dt_type_cbdata_t *cbdatap, ctf_id_t type)
1617 {
1618         caddr_t addr = cbdatap->addr;
1619         caddr_t addrend = cbdatap->addrend;
1620         char buf[DT_TYPE_NAMELEN];
1621         char *p;
1622         int cnt = 0;
1623         uint_t kind = ctf_type_kind(cbdatap->dtt.dtt_ctfp, type);
1624         ssize_t ssz = ctf_type_size(cbdatap->dtt.dtt_ctfp, type);
1625
1626         ctf_type_name(cbdatap->dtt.dtt_ctfp, type, buf, sizeof (buf));
1627
1628         if ((p = strchr(buf, '[')) != NULL)
1629                 p[-1] = '\0';
1630         else
1631                 p = "";
1632
1633         if (cbdatap->f_type) {
1634                 int type_width = roundup(cbdatap->type_width + 1, 4);
1635                 int name_width = roundup(cbdatap->name_width + 1, 4);
1636
1637                 name_width -= strlen(cbdatap->name);
1638
1639                 dt_printf(cbdatap->dtp, cbdatap->fp, "%*s%-*s%s%-*s     = ",cbdatap->indent * 4,"",type_width,buf,cbdatap->name,name_width,p);
1640         }
1641
1642         while (addr < addrend) {
1643                 dt_type_cbdata_t cbdata;
1644                 ctf_arinfo_t arinfo;
1645                 ctf_encoding_t cte;
1646                 uintptr_t *up;
1647                 void *vp = addr;
1648                 cbdata = *cbdatap;
1649                 cbdata.name = "";
1650                 cbdata.addr = addr;
1651                 cbdata.addrend = addr + ssz;
1652                 cbdata.f_type = 0;
1653                 cbdata.indent++;
1654                 cbdata.type_width = 0;
1655                 cbdata.name_width = 0;
1656
1657                 if (cnt > 0)
1658                         dt_printf(cbdatap->dtp, cbdatap->fp, "%*s", cbdatap->indent * 4,"");
1659
1660                 switch (kind) {
1661                 case CTF_K_INTEGER:
1662                         if (ctf_type_encoding(cbdatap->dtt.dtt_ctfp, type, &cte) != 0)
1663                                 return (-1);
1664                         if ((cte.cte_format & CTF_INT_SIGNED) != 0)
1665                                 switch (cte.cte_bits) {
1666                                 case 8:
1667                                         if (isprint(*((char *) vp)))
1668                                                 dt_printf(cbdatap->dtp, cbdatap->fp, "'%c', ", *((char *) vp));
1669                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%d (0x%x);\n", *((char *) vp), *((char *) vp));
1670                                         break;
1671                                 case 16:
1672                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%hd (0x%hx);\n", *((short *) vp), *((u_short *) vp));
1673                                         break;
1674                                 case 32:
1675                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%d (0x%x);\n", *((int *) vp), *((u_int *) vp));
1676                                         break;
1677                                 case 64:
1678                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%jd (0x%jx);\n", *((long long *) vp), *((unsigned long long *) vp));
1679                                         break;
1680                                 default:
1681                                         dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_INTEGER: format %x offset %u bits %u\n",cte.cte_format,cte.cte_offset,cte.cte_bits);
1682                                         break;
1683                                 }
1684                         else
1685                                 switch (cte.cte_bits) {
1686                                 case 8:
1687                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%u (0x%x);\n", *((uint8_t *) vp) & 0xff, *((uint8_t *) vp) & 0xff);
1688                                         break;
1689                                 case 16:
1690                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%hu (0x%hx);\n", *((u_short *) vp), *((u_short *) vp));
1691                                         break;
1692                                 case 32:
1693                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%u (0x%x);\n", *((u_int *) vp), *((u_int *) vp));
1694                                         break;
1695                                 case 64:
1696                                         dt_printf(cbdatap->dtp, cbdatap->fp, "%ju (0x%jx);\n", *((unsigned long long *) vp), *((unsigned long long *) vp));
1697                                         break;
1698                                 default:
1699                                         dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_INTEGER: format %x offset %u bits %u\n",cte.cte_format,cte.cte_offset,cte.cte_bits);
1700                                         break;
1701                                 }
1702                         break;
1703                 case CTF_K_FLOAT:
1704                         dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_FLOAT: format %x offset %u bits %u\n",cte.cte_format,cte.cte_offset,cte.cte_bits);
1705                         break;
1706                 case CTF_K_POINTER:
1707                         dt_printf(cbdatap->dtp, cbdatap->fp, "%p;\n", *((void **) addr));
1708                         break;
1709                 case CTF_K_ARRAY:
1710                         if (ctf_array_info(cbdatap->dtt.dtt_ctfp, type, &arinfo) != 0)
1711                                 return (-1);
1712                         dt_printf(cbdatap->dtp, cbdatap->fp, "{\n%*s",cbdata.indent * 4,"");
1713                         dt_print_type_data(&cbdata, arinfo.ctr_contents);
1714                         dt_printf(cbdatap->dtp, cbdatap->fp, "%*s};\n",cbdatap->indent * 4,"");
1715                         break;
1716                 case CTF_K_FUNCTION:
1717                         dt_printf(cbdatap->dtp, cbdatap->fp, "CTF_K_FUNCTION:\n");
1718                         break;
1719                 case CTF_K_STRUCT:
1720                         cbdata.f_type = 1;
1721                         if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type,
1722                             dt_print_type_width, &cbdata) != 0)
1723                                 return (-1);
1724                         dt_printf(cbdatap->dtp, cbdatap->fp, "{\n");
1725                         if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type,
1726                             dt_print_type_member, &cbdata) != 0)
1727                                 return (-1);
1728                         dt_printf(cbdatap->dtp, cbdatap->fp, "%*s};\n",cbdatap->indent * 4,"");
1729                         break;
1730                 case CTF_K_UNION:
1731                         cbdata.f_type = 1;
1732                         if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type,
1733                             dt_print_type_width, &cbdata) != 0)
1734                                 return (-1);
1735                         dt_printf(cbdatap->dtp, cbdatap->fp, "{\n");
1736                         if (ctf_member_iter(cbdatap->dtt.dtt_ctfp, type,
1737                             dt_print_type_member, &cbdata) != 0)
1738                                 return (-1);
1739                         dt_printf(cbdatap->dtp, cbdatap->fp, "%*s};\n",cbdatap->indent * 4,"");
1740                         break;
1741                 case CTF_K_ENUM:
1742                         dt_printf(cbdatap->dtp, cbdatap->fp, "%s;\n", ctf_enum_name(cbdatap->dtt.dtt_ctfp, type, *((int *) vp)));
1743                         break;
1744                 case CTF_K_TYPEDEF:
1745                         dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type));
1746                         break;
1747                 case CTF_K_VOLATILE:
1748                         if (cbdatap->f_type)
1749                                 dt_printf(cbdatap->dtp, cbdatap->fp, "volatile ");
1750                         dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type));
1751                         break;
1752                 case CTF_K_CONST:
1753                         if (cbdatap->f_type)
1754                                 dt_printf(cbdatap->dtp, cbdatap->fp, "const ");
1755                         dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type));
1756                         break;
1757                 case CTF_K_RESTRICT:
1758                         if (cbdatap->f_type)
1759                                 dt_printf(cbdatap->dtp, cbdatap->fp, "restrict ");
1760                         dt_print_type_data(&cbdata, ctf_type_reference(cbdatap->dtt.dtt_ctfp,type));
1761                         break;
1762                 default:
1763                         break;
1764                 }
1765
1766                 addr += ssz;
1767                 cnt++;
1768         }
1769
1770         return (0);
1771 }
1772
1773 static int
1774 dt_print_type(dtrace_hdl_t *dtp, FILE *fp, caddr_t addr)
1775 {
1776         caddr_t addrend;
1777         char *p;
1778         dtrace_typeinfo_t dtt;
1779         dt_type_cbdata_t cbdata;
1780         int num = 0;
1781         int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
1782         ssize_t ssz;
1783
1784         if (!quiet)
1785                 dt_printf(dtp, fp, "\n");
1786
1787         /* Get the total number of bytes of data buffered. */
1788         size_t nbytes = *((uintptr_t *) addr);
1789         addr += sizeof(uintptr_t);
1790
1791         /*
1792          * Get the size of the type so that we can check that it matches
1793          * the CTF data we look up and so that we can figure out how many
1794          * type elements are buffered.
1795          */
1796         size_t typs = *((uintptr_t *) addr);
1797         addr += sizeof(uintptr_t);
1798
1799         /*
1800          * Point to the type string in the buffer. Get it's string
1801          * length and round it up to become the offset to the start
1802          * of the buffered type data which we would like to be aligned
1803          * for easy access.
1804          */
1805         char *strp = (char *) addr;
1806         int offset = roundup(strlen(strp) + 1, sizeof(uintptr_t));
1807
1808         /*
1809          * The type string might have a format such as 'int [20]'.
1810          * Check if there is an array dimension present.
1811          */
1812         if ((p = strchr(strp, '[')) != NULL) {
1813                 /* Strip off the array dimension. */
1814                 *p++ = '\0';
1815
1816                 for (; *p != '\0' && *p != ']'; p++)
1817                         num = num * 10 + *p - '0';
1818         } else
1819                 /* No array dimension, so default. */
1820                 num = 1;
1821
1822         /* Lookup the CTF type from the type string. */
1823         if (dtrace_lookup_by_type(dtp,  DTRACE_OBJ_EVERY, strp, &dtt) < 0)
1824                 return (-1);
1825
1826         /* Offset the buffer address to the start of the data... */
1827         addr += offset;
1828
1829         ssz = ctf_type_size(dtt.dtt_ctfp, dtt.dtt_type);
1830
1831         if (typs != ssz) {
1832                 printf("Expected type size from buffer (%lu) to match type size looked up now (%ld)\n", (u_long) typs, (long) ssz);
1833                 return (-1);
1834         }
1835
1836         cbdata.dtp = dtp;
1837         cbdata.dtt = dtt;
1838         cbdata.name = "";
1839         cbdata.addr = addr;
1840         cbdata.addrend = addr + nbytes;
1841         cbdata.indent = 1;
1842         cbdata.f_type = 1;
1843         cbdata.type_width = 0;
1844         cbdata.name_width = 0;
1845         cbdata.fp = fp;
1846
1847         return (dt_print_type_data(&cbdata, dtt.dtt_type));
1848 }
1849
1850 static int
1851 dt_print_sym(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
1852 {
1853         /* LINTED - alignment */
1854         uint64_t pc = *((uint64_t *)addr);
1855         dtrace_syminfo_t dts;
1856         GElf_Sym sym;
1857         char c[PATH_MAX * 2];
1858
1859         if (format == NULL)
1860                 format = "  %-50s";
1861
1862         if (dtrace_lookup_by_addr(dtp, pc, &sym, &dts) == 0) {
1863                 (void) snprintf(c, sizeof (c), "%s`%s",
1864                     dts.dts_object, dts.dts_name);
1865         } else {
1866                 /*
1867                  * We'll repeat the lookup, but this time we'll specify a
1868                  * NULL GElf_Sym -- indicating that we're only interested in
1869                  * the containing module.
1870                  */
1871                 if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1872                         (void) snprintf(c, sizeof (c), "%s`0x%llx",
1873                             dts.dts_object, (u_longlong_t)pc);
1874                 } else {
1875                         (void) snprintf(c, sizeof (c), "0x%llx",
1876                             (u_longlong_t)pc);
1877                 }
1878         }
1879
1880         if (dt_printf(dtp, fp, format, c) < 0)
1881                 return (-1);
1882
1883         return (0);
1884 }
1885
1886 int
1887 dt_print_mod(dtrace_hdl_t *dtp, FILE *fp, const char *format, caddr_t addr)
1888 {
1889         /* LINTED - alignment */
1890         uint64_t pc = *((uint64_t *)addr);
1891         dtrace_syminfo_t dts;
1892         char c[PATH_MAX * 2];
1893
1894         if (format == NULL)
1895                 format = "  %-50s";
1896
1897         if (dtrace_lookup_by_addr(dtp, pc, NULL, &dts) == 0) {
1898                 (void) snprintf(c, sizeof (c), "%s", dts.dts_object);
1899         } else {
1900                 (void) snprintf(c, sizeof (c), "0x%llx", (u_longlong_t)pc);
1901         }
1902
1903         if (dt_printf(dtp, fp, format, c) < 0)
1904                 return (-1);
1905
1906         return (0);
1907 }
1908
1909 typedef struct dt_normal {
1910         dtrace_aggvarid_t dtnd_id;
1911         uint64_t dtnd_normal;
1912 } dt_normal_t;
1913
1914 static int
1915 dt_normalize_agg(const dtrace_aggdata_t *aggdata, void *arg)
1916 {
1917         dt_normal_t *normal = arg;
1918         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1919         dtrace_aggvarid_t id = normal->dtnd_id;
1920
1921         if (agg->dtagd_nrecs == 0)
1922                 return (DTRACE_AGGWALK_NEXT);
1923
1924         if (agg->dtagd_varid != id)
1925                 return (DTRACE_AGGWALK_NEXT);
1926
1927         ((dtrace_aggdata_t *)aggdata)->dtada_normal = normal->dtnd_normal;
1928         return (DTRACE_AGGWALK_NORMALIZE);
1929 }
1930
1931 static int
1932 dt_normalize(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
1933 {
1934         dt_normal_t normal;
1935         caddr_t addr;
1936
1937         /*
1938          * We (should) have two records:  the aggregation ID followed by the
1939          * normalization value.
1940          */
1941         addr = base + rec->dtrd_offset;
1942
1943         if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))
1944                 return (dt_set_errno(dtp, EDT_BADNORMAL));
1945
1946         /* LINTED - alignment */
1947         normal.dtnd_id = *((dtrace_aggvarid_t *)addr);
1948         rec++;
1949
1950         if (rec->dtrd_action != DTRACEACT_LIBACT)
1951                 return (dt_set_errno(dtp, EDT_BADNORMAL));
1952
1953         if (rec->dtrd_arg != DT_ACT_NORMALIZE)
1954                 return (dt_set_errno(dtp, EDT_BADNORMAL));
1955
1956         addr = base + rec->dtrd_offset;
1957
1958         switch (rec->dtrd_size) {
1959         case sizeof (uint64_t):
1960                 /* LINTED - alignment */
1961                 normal.dtnd_normal = *((uint64_t *)addr);
1962                 break;
1963         case sizeof (uint32_t):
1964                 /* LINTED - alignment */
1965                 normal.dtnd_normal = *((uint32_t *)addr);
1966                 break;
1967         case sizeof (uint16_t):
1968                 /* LINTED - alignment */
1969                 normal.dtnd_normal = *((uint16_t *)addr);
1970                 break;
1971         case sizeof (uint8_t):
1972                 normal.dtnd_normal = *((uint8_t *)addr);
1973                 break;
1974         default:
1975                 return (dt_set_errno(dtp, EDT_BADNORMAL));
1976         }
1977
1978         (void) dtrace_aggregate_walk(dtp, dt_normalize_agg, &normal);
1979
1980         return (0);
1981 }
1982
1983 static int
1984 dt_denormalize_agg(const dtrace_aggdata_t *aggdata, void *arg)
1985 {
1986         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1987         dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);
1988
1989         if (agg->dtagd_nrecs == 0)
1990                 return (DTRACE_AGGWALK_NEXT);
1991
1992         if (agg->dtagd_varid != id)
1993                 return (DTRACE_AGGWALK_NEXT);
1994
1995         return (DTRACE_AGGWALK_DENORMALIZE);
1996 }
1997
1998 static int
1999 dt_clear_agg(const dtrace_aggdata_t *aggdata, void *arg)
2000 {
2001         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2002         dtrace_aggvarid_t id = *((dtrace_aggvarid_t *)arg);
2003
2004         if (agg->dtagd_nrecs == 0)
2005                 return (DTRACE_AGGWALK_NEXT);
2006
2007         if (agg->dtagd_varid != id)
2008                 return (DTRACE_AGGWALK_NEXT);
2009
2010         return (DTRACE_AGGWALK_CLEAR);
2011 }
2012
2013 typedef struct dt_trunc {
2014         dtrace_aggvarid_t dttd_id;
2015         uint64_t dttd_remaining;
2016 } dt_trunc_t;
2017
2018 static int
2019 dt_trunc_agg(const dtrace_aggdata_t *aggdata, void *arg)
2020 {
2021         dt_trunc_t *trunc = arg;
2022         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2023         dtrace_aggvarid_t id = trunc->dttd_id;
2024
2025         if (agg->dtagd_nrecs == 0)
2026                 return (DTRACE_AGGWALK_NEXT);
2027
2028         if (agg->dtagd_varid != id)
2029                 return (DTRACE_AGGWALK_NEXT);
2030
2031         if (trunc->dttd_remaining == 0)
2032                 return (DTRACE_AGGWALK_REMOVE);
2033
2034         trunc->dttd_remaining--;
2035         return (DTRACE_AGGWALK_NEXT);
2036 }
2037
2038 static int
2039 dt_trunc(dtrace_hdl_t *dtp, caddr_t base, dtrace_recdesc_t *rec)
2040 {
2041         dt_trunc_t trunc;
2042         caddr_t addr;
2043         int64_t remaining;
2044         int (*func)(dtrace_hdl_t *, dtrace_aggregate_f *, void *);
2045
2046         /*
2047          * We (should) have two records:  the aggregation ID followed by the
2048          * number of aggregation entries after which the aggregation is to be
2049          * truncated.
2050          */
2051         addr = base + rec->dtrd_offset;
2052
2053         if (rec->dtrd_size != sizeof (dtrace_aggvarid_t))
2054                 return (dt_set_errno(dtp, EDT_BADTRUNC));
2055
2056         /* LINTED - alignment */
2057         trunc.dttd_id = *((dtrace_aggvarid_t *)addr);
2058         rec++;
2059
2060         if (rec->dtrd_action != DTRACEACT_LIBACT)
2061                 return (dt_set_errno(dtp, EDT_BADTRUNC));
2062
2063         if (rec->dtrd_arg != DT_ACT_TRUNC)
2064                 return (dt_set_errno(dtp, EDT_BADTRUNC));
2065
2066         addr = base + rec->dtrd_offset;
2067
2068         switch (rec->dtrd_size) {
2069         case sizeof (uint64_t):
2070                 /* LINTED - alignment */
2071                 remaining = *((int64_t *)addr);
2072                 break;
2073         case sizeof (uint32_t):
2074                 /* LINTED - alignment */
2075                 remaining = *((int32_t *)addr);
2076                 break;
2077         case sizeof (uint16_t):
2078                 /* LINTED - alignment */
2079                 remaining = *((int16_t *)addr);
2080                 break;
2081         case sizeof (uint8_t):
2082                 remaining = *((int8_t *)addr);
2083                 break;
2084         default:
2085                 return (dt_set_errno(dtp, EDT_BADNORMAL));
2086         }
2087
2088         if (remaining < 0) {
2089                 func = dtrace_aggregate_walk_valsorted;
2090                 remaining = -remaining;
2091         } else {
2092                 func = dtrace_aggregate_walk_valrevsorted;
2093         }
2094
2095         assert(remaining >= 0);
2096         trunc.dttd_remaining = remaining;
2097
2098         (void) func(dtp, dt_trunc_agg, &trunc);
2099
2100         return (0);
2101 }
2102
2103 static int
2104 dt_print_datum(dtrace_hdl_t *dtp, FILE *fp, dtrace_recdesc_t *rec,
2105     caddr_t addr, size_t size, const dtrace_aggdata_t *aggdata,
2106     uint64_t normal, dt_print_aggdata_t *pd)
2107 {
2108         int err, width;
2109         dtrace_actkind_t act = rec->dtrd_action;
2110         boolean_t packed = pd->dtpa_agghist || pd->dtpa_aggpack;
2111         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2112
2113         static struct {
2114                 size_t size;
2115                 int width;
2116                 int packedwidth;
2117         } *fmt, fmttab[] = {
2118                 { sizeof (uint8_t),     3,      3 },
2119                 { sizeof (uint16_t),    5,      5 },
2120                 { sizeof (uint32_t),    8,      8 },
2121                 { sizeof (uint64_t),    16,     16 },
2122                 { 0,                    -50,    16 }
2123         };
2124
2125         if (packed && pd->dtpa_agghisthdr != agg->dtagd_varid) {
2126                 dtrace_recdesc_t *r;
2127
2128                 width = 0;
2129
2130                 /*
2131                  * To print our quantization header for either an agghist or
2132                  * aggpack aggregation, we need to iterate through all of our
2133                  * of our records to determine their width.
2134                  */
2135                 for (r = rec; !DTRACEACT_ISAGG(r->dtrd_action); r++) {
2136                         for (fmt = fmttab; fmt->size &&
2137                             fmt->size != r->dtrd_size; fmt++)
2138                                 continue;
2139
2140                         width += fmt->packedwidth + 1;
2141                 }
2142
2143                 if (pd->dtpa_agghist) {
2144                         if (dt_print_quanthdr(dtp, fp, width) < 0)
2145                                 return (-1);
2146                 } else {
2147                         if (dt_print_quanthdr_packed(dtp, fp,
2148                             width, aggdata, r->dtrd_action) < 0)
2149                                 return (-1);
2150                 }
2151
2152                 pd->dtpa_agghisthdr = agg->dtagd_varid;
2153         }
2154
2155         if (pd->dtpa_agghist && DTRACEACT_ISAGG(act)) {
2156                 char positives = aggdata->dtada_flags & DTRACE_A_HASPOSITIVES;
2157                 char negatives = aggdata->dtada_flags & DTRACE_A_HASNEGATIVES;
2158                 int64_t val;
2159
2160                 assert(act == DTRACEAGG_SUM || act == DTRACEAGG_COUNT);
2161                 val = (long long)*((uint64_t *)addr);
2162
2163                 if (dt_printf(dtp, fp, " ") < 0)
2164                         return (-1);
2165
2166                 return (dt_print_quantline(dtp, fp, val, normal,
2167                     aggdata->dtada_total, positives, negatives));
2168         }
2169
2170         if (pd->dtpa_aggpack && DTRACEACT_ISAGG(act)) {
2171                 switch (act) {
2172                 case DTRACEAGG_QUANTIZE:
2173                         return (dt_print_quantize_packed(dtp,
2174                             fp, addr, size, aggdata));
2175                 case DTRACEAGG_LQUANTIZE:
2176                         return (dt_print_lquantize_packed(dtp,
2177                             fp, addr, size, aggdata));
2178                 default:
2179                         break;
2180                 }
2181         }
2182
2183         switch (act) {
2184         case DTRACEACT_STACK:
2185                 return (dt_print_stack(dtp, fp, NULL, addr,
2186                     rec->dtrd_arg, rec->dtrd_size / rec->dtrd_arg));
2187
2188         case DTRACEACT_USTACK:
2189         case DTRACEACT_JSTACK:
2190                 return (dt_print_ustack(dtp, fp, NULL, addr, rec->dtrd_arg));
2191
2192         case DTRACEACT_USYM:
2193         case DTRACEACT_UADDR:
2194                 return (dt_print_usym(dtp, fp, addr, act));
2195
2196         case DTRACEACT_UMOD:
2197                 return (dt_print_umod(dtp, fp, NULL, addr));
2198
2199         case DTRACEACT_SYM:
2200                 return (dt_print_sym(dtp, fp, NULL, addr));
2201
2202         case DTRACEACT_MOD:
2203                 return (dt_print_mod(dtp, fp, NULL, addr));
2204
2205         case DTRACEAGG_QUANTIZE:
2206                 return (dt_print_quantize(dtp, fp, addr, size, normal));
2207
2208         case DTRACEAGG_LQUANTIZE:
2209                 return (dt_print_lquantize(dtp, fp, addr, size, normal));
2210
2211         case DTRACEAGG_LLQUANTIZE:
2212                 return (dt_print_llquantize(dtp, fp, addr, size, normal));
2213
2214         case DTRACEAGG_AVG:
2215                 return (dt_print_average(dtp, fp, addr, size, normal));
2216
2217         case DTRACEAGG_STDDEV:
2218                 return (dt_print_stddev(dtp, fp, addr, size, normal));
2219
2220         default:
2221                 break;
2222         }
2223
2224         for (fmt = fmttab; fmt->size && fmt->size != size; fmt++)
2225                 continue;
2226
2227         width = packed ? fmt->packedwidth : fmt->width;
2228
2229         switch (size) {
2230         case sizeof (uint64_t):
2231                 err = dt_printf(dtp, fp, " %*lld", width,
2232                     /* LINTED - alignment */
2233                     (long long)*((uint64_t *)addr) / normal);
2234                 break;
2235         case sizeof (uint32_t):
2236                 /* LINTED - alignment */
2237                 err = dt_printf(dtp, fp, " %*d", width, *((uint32_t *)addr) /
2238                     (uint32_t)normal);
2239                 break;
2240         case sizeof (uint16_t):
2241                 /* LINTED - alignment */
2242                 err = dt_printf(dtp, fp, " %*d", width, *((uint16_t *)addr) /
2243                     (uint32_t)normal);
2244                 break;
2245         case sizeof (uint8_t):
2246                 err = dt_printf(dtp, fp, " %*d", width, *((uint8_t *)addr) /
2247                     (uint32_t)normal);
2248                 break;
2249         default:
2250                 err = dt_print_bytes(dtp, fp, addr, size, width, 0, 0);
2251                 break;
2252         }
2253
2254         return (err);
2255 }
2256
2257 int
2258 dt_print_aggs(const dtrace_aggdata_t **aggsdata, int naggvars, void *arg)
2259 {
2260         int i, aggact = 0;
2261         dt_print_aggdata_t *pd = arg;
2262         const dtrace_aggdata_t *aggdata = aggsdata[0];
2263         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2264         FILE *fp = pd->dtpa_fp;
2265         dtrace_hdl_t *dtp = pd->dtpa_dtp;
2266         dtrace_recdesc_t *rec;
2267         dtrace_actkind_t act;
2268         caddr_t addr;
2269         size_t size;
2270
2271         pd->dtpa_agghist = (aggdata->dtada_flags & DTRACE_A_TOTAL);
2272         pd->dtpa_aggpack = (aggdata->dtada_flags & DTRACE_A_MINMAXBIN);
2273
2274         /*
2275          * Iterate over each record description in the key, printing the traced
2276          * data, skipping the first datum (the tuple member created by the
2277          * compiler).
2278          */
2279         for (i = 1; i < agg->dtagd_nrecs; i++) {
2280                 rec = &agg->dtagd_rec[i];
2281                 act = rec->dtrd_action;
2282                 addr = aggdata->dtada_data + rec->dtrd_offset;
2283                 size = rec->dtrd_size;
2284
2285                 if (DTRACEACT_ISAGG(act)) {
2286                         aggact = i;
2287                         break;
2288                 }
2289
2290                 if (dt_print_datum(dtp, fp, rec, addr,
2291                     size, aggdata, 1, pd) < 0)
2292                         return (-1);
2293
2294                 if (dt_buffered_flush(dtp, NULL, rec, aggdata,
2295                     DTRACE_BUFDATA_AGGKEY) < 0)
2296                         return (-1);
2297         }
2298
2299         assert(aggact != 0);
2300
2301         for (i = (naggvars == 1 ? 0 : 1); i < naggvars; i++) {
2302                 uint64_t normal;
2303
2304                 aggdata = aggsdata[i];
2305                 agg = aggdata->dtada_desc;
2306                 rec = &agg->dtagd_rec[aggact];
2307                 act = rec->dtrd_action;
2308                 addr = aggdata->dtada_data + rec->dtrd_offset;
2309                 size = rec->dtrd_size;
2310
2311                 assert(DTRACEACT_ISAGG(act));
2312                 normal = aggdata->dtada_normal;
2313
2314                 if (dt_print_datum(dtp, fp, rec, addr,
2315                     size, aggdata, normal, pd) < 0)
2316                         return (-1);
2317
2318                 if (dt_buffered_flush(dtp, NULL, rec, aggdata,
2319                     DTRACE_BUFDATA_AGGVAL) < 0)
2320                         return (-1);
2321
2322                 if (!pd->dtpa_allunprint)
2323                         agg->dtagd_flags |= DTRACE_AGD_PRINTED;
2324         }
2325
2326         if (!pd->dtpa_agghist && !pd->dtpa_aggpack) {
2327                 if (dt_printf(dtp, fp, "\n") < 0)
2328                         return (-1);
2329         }
2330
2331         if (dt_buffered_flush(dtp, NULL, NULL, aggdata,
2332             DTRACE_BUFDATA_AGGFORMAT | DTRACE_BUFDATA_AGGLAST) < 0)
2333                 return (-1);
2334
2335         return (0);
2336 }
2337
2338 int
2339 dt_print_agg(const dtrace_aggdata_t *aggdata, void *arg)
2340 {
2341         dt_print_aggdata_t *pd = arg;
2342         dtrace_aggdesc_t *agg = aggdata->dtada_desc;
2343         dtrace_aggvarid_t aggvarid = pd->dtpa_id;
2344
2345         if (pd->dtpa_allunprint) {
2346                 if (agg->dtagd_flags & DTRACE_AGD_PRINTED)
2347                         return (0);
2348         } else {
2349                 /*
2350                  * If we're not printing all unprinted aggregations, then the
2351                  * aggregation variable ID denotes a specific aggregation
2352                  * variable that we should print -- skip any other aggregations
2353                  * that we encounter.
2354                  */
2355                 if (agg->dtagd_nrecs == 0)
2356                         return (0);
2357
2358                 if (aggvarid != agg->dtagd_varid)
2359                         return (0);
2360         }
2361
2362         return (dt_print_aggs(&aggdata, 1, arg));
2363 }
2364
2365 int
2366 dt_setopt(dtrace_hdl_t *dtp, const dtrace_probedata_t *data,
2367     const char *option, const char *value)
2368 {
2369         int len, rval;
2370         char *msg;
2371         const char *errstr;
2372         dtrace_setoptdata_t optdata;
2373
2374         bzero(&optdata, sizeof (optdata));
2375         (void) dtrace_getopt(dtp, option, &optdata.dtsda_oldval);
2376
2377         if (dtrace_setopt(dtp, option, value) == 0) {
2378                 (void) dtrace_getopt(dtp, option, &optdata.dtsda_newval);
2379                 optdata.dtsda_probe = data;
2380                 optdata.dtsda_option = option;
2381                 optdata.dtsda_handle = dtp;
2382
2383                 if ((rval = dt_handle_setopt(dtp, &optdata)) != 0)
2384                         return (rval);
2385
2386                 return (0);
2387         }
2388
2389         errstr = dtrace_errmsg(dtp, dtrace_errno(dtp));
2390         len = strlen(option) + strlen(value) + strlen(errstr) + 80;
2391         msg = alloca(len);
2392
2393         (void) snprintf(msg, len, "couldn't set option \"%s\" to \"%s\": %s\n",
2394             option, value, errstr);
2395
2396         if ((rval = dt_handle_liberr(dtp, data, msg)) == 0)
2397                 return (0);
2398
2399         return (rval);
2400 }
2401
2402 static int
2403 dt_consume_cpu(dtrace_hdl_t *dtp, FILE *fp, int cpu,
2404     dtrace_bufdesc_t *buf, boolean_t just_one,
2405     dtrace_consume_probe_f *efunc, dtrace_consume_rec_f *rfunc, void *arg)
2406 {
2407         dtrace_epid_t id;
2408         size_t offs;
2409         int flow = (dtp->dt_options[DTRACEOPT_FLOWINDENT] != DTRACEOPT_UNSET);
2410         int quiet = (dtp->dt_options[DTRACEOPT_QUIET] != DTRACEOPT_UNSET);
2411         int rval, i, n;
2412         uint64_t tracememsize = 0;
2413         dtrace_probedata_t data;
2414         uint64_t drops;
2415
2416         bzero(&data, sizeof (data));
2417         data.dtpda_handle = dtp;
2418         data.dtpda_cpu = cpu;
2419         data.dtpda_flow = dtp->dt_flow;
2420         data.dtpda_indent = dtp->dt_indent;
2421         data.dtpda_prefix = dtp->dt_prefix;
2422
2423         for (offs = buf->dtbd_oldest; offs < buf->dtbd_size; ) {
2424                 dtrace_eprobedesc_t *epd;
2425
2426                 /*
2427                  * We're guaranteed to have an ID.
2428                  */
2429                 id = *(uint32_t *)((uintptr_t)buf->dtbd_data + offs);
2430
2431                 if (id == DTRACE_EPIDNONE) {
2432                         /*
2433                          * This is filler to assure proper alignment of the
2434                          * next record; we simply ignore it.
2435                          */
2436                         offs += sizeof (id);
2437                         continue;
2438                 }
2439
2440                 if ((rval = dt_epid_lookup(dtp, id, &data.dtpda_edesc,
2441                     &data.dtpda_pdesc)) != 0)
2442                         return (rval);
2443
2444                 epd = data.dtpda_edesc;
2445                 data.dtpda_data = buf->dtbd_data + offs;
2446
2447                 if (data.dtpda_edesc->dtepd_uarg != DT_ECB_DEFAULT) {
2448                         rval = dt_handle(dtp, &data);
2449
2450                         if (rval == DTRACE_CONSUME_NEXT)
2451                                 goto nextepid;
2452
2453                         if (rval == DTRACE_CONSUME_ERROR)
2454                                 return (-1);
2455                 }
2456
2457                 if (flow)
2458                         (void) dt_flowindent(dtp, &data, dtp->dt_last_epid,
2459                             buf, offs);
2460
2461                 rval = (*efunc)(&data, arg);
2462
2463                 if (flow) {
2464                         if (data.dtpda_flow == DTRACEFLOW_ENTRY)
2465                                 data.dtpda_indent += 2;
2466                 }
2467
2468                 if (rval == DTRACE_CONSUME_NEXT)
2469                         goto nextepid;
2470
2471                 if (rval == DTRACE_CONSUME_ABORT)
2472                         return (dt_set_errno(dtp, EDT_DIRABORT));
2473
2474                 if (rval != DTRACE_CONSUME_THIS)
2475                         return (dt_set_errno(dtp, EDT_BADRVAL));
2476
2477                 for (i = 0; i < epd->dtepd_nrecs; i++) {
2478                         caddr_t addr;
2479                         dtrace_recdesc_t *rec = &epd->dtepd_rec[i];
2480                         dtrace_actkind_t act = rec->dtrd_action;
2481
2482                         data.dtpda_data = buf->dtbd_data + offs +
2483                             rec->dtrd_offset;
2484                         addr = data.dtpda_data;
2485
2486                         if (act == DTRACEACT_LIBACT) {
2487                                 uint64_t arg = rec->dtrd_arg;
2488                                 dtrace_aggvarid_t id;
2489
2490                                 switch (arg) {
2491                                 case DT_ACT_CLEAR:
2492                                         /* LINTED - alignment */
2493                                         id = *((dtrace_aggvarid_t *)addr);
2494                                         (void) dtrace_aggregate_walk(dtp,
2495                                             dt_clear_agg, &id);
2496                                         continue;
2497
2498                                 case DT_ACT_DENORMALIZE:
2499                                         /* LINTED - alignment */
2500                                         id = *((dtrace_aggvarid_t *)addr);
2501                                         (void) dtrace_aggregate_walk(dtp,
2502                                             dt_denormalize_agg, &id);
2503                                         continue;
2504
2505                                 case DT_ACT_FTRUNCATE:
2506                                         if (fp == NULL)
2507                                                 continue;
2508
2509                                         (void) fflush(fp);
2510                                         (void) ftruncate(fileno(fp), 0);
2511                                         (void) fseeko(fp, 0, SEEK_SET);
2512                                         continue;
2513
2514                                 case DT_ACT_NORMALIZE:
2515                                         if (i == epd->dtepd_nrecs - 1)
2516                                                 return (dt_set_errno(dtp,
2517                                                     EDT_BADNORMAL));
2518
2519                                         if (dt_normalize(dtp,
2520                                             buf->dtbd_data + offs, rec) != 0)
2521                                                 return (-1);
2522
2523                                         i++;
2524                                         continue;
2525
2526                                 case DT_ACT_SETOPT: {
2527                                         uint64_t *opts = dtp->dt_options;
2528                                         dtrace_recdesc_t *valrec;
2529                                         uint32_t valsize;
2530                                         caddr_t val;
2531                                         int rv;
2532
2533                                         if (i == epd->dtepd_nrecs - 1) {
2534                                                 return (dt_set_errno(dtp,
2535                                                     EDT_BADSETOPT));
2536                                         }
2537
2538                                         valrec = &epd->dtepd_rec[++i];
2539                                         valsize = valrec->dtrd_size;
2540
2541                                         if (valrec->dtrd_action != act ||
2542                                             valrec->dtrd_arg != arg) {
2543                                                 return (dt_set_errno(dtp,
2544                                                     EDT_BADSETOPT));
2545                                         }
2546
2547                                         if (valsize > sizeof (uint64_t)) {
2548                                                 val = buf->dtbd_data + offs +
2549                                                     valrec->dtrd_offset;
2550                                         } else {
2551                                                 val = "1";
2552                                         }
2553
2554                                         rv = dt_setopt(dtp, &data, addr, val);
2555
2556                                         if (rv != 0)
2557                                                 return (-1);
2558
2559                                         flow = (opts[DTRACEOPT_FLOWINDENT] !=
2560                                             DTRACEOPT_UNSET);
2561                                         quiet = (opts[DTRACEOPT_QUIET] !=
2562                                             DTRACEOPT_UNSET);
2563
2564                                         continue;
2565                                 }
2566
2567                                 case DT_ACT_TRUNC:
2568                                         if (i == epd->dtepd_nrecs - 1)
2569                                                 return (dt_set_errno(dtp,
2570                                                     EDT_BADTRUNC));
2571
2572                                         if (dt_trunc(dtp,
2573                                             buf->dtbd_data + offs, rec) != 0)
2574                                                 return (-1);
2575
2576                                         i++;
2577                                         continue;
2578
2579                                 default:
2580                                         continue;
2581                                 }
2582                         }
2583
2584                         if (act == DTRACEACT_TRACEMEM_DYNSIZE &&
2585                             rec->dtrd_size == sizeof (uint64_t)) {
2586                                 /* LINTED - alignment */
2587                                 tracememsize = *((unsigned long long *)addr);
2588                                 continue;
2589                         }
2590
2591                         rval = (*rfunc)(&data, rec, arg);
2592
2593                         if (rval == DTRACE_CONSUME_NEXT)
2594                                 continue;
2595
2596                         if (rval == DTRACE_CONSUME_ABORT)
2597                                 return (dt_set_errno(dtp, EDT_DIRABORT));
2598
2599                         if (rval != DTRACE_CONSUME_THIS)
2600                                 return (dt_set_errno(dtp, EDT_BADRVAL));
2601
2602                         if (act == DTRACEACT_STACK) {
2603                                 int depth = rec->dtrd_arg;
2604
2605                                 if (dt_print_stack(dtp, fp, NULL, addr, depth,
2606                                     rec->dtrd_size / depth) < 0)
2607                                         return (-1);
2608                                 goto nextrec;
2609                         }
2610
2611                         if (act == DTRACEACT_USTACK ||
2612                             act == DTRACEACT_JSTACK) {
2613                                 if (dt_print_ustack(dtp, fp, NULL,
2614                                     addr, rec->dtrd_arg) < 0)
2615                                         return (-1);
2616                                 goto nextrec;
2617                         }
2618
2619                         if (act == DTRACEACT_SYM) {
2620                                 if (dt_print_sym(dtp, fp, NULL, addr) < 0)
2621                                         return (-1);
2622                                 goto nextrec;
2623                         }
2624
2625                         if (act == DTRACEACT_MOD) {
2626                                 if (dt_print_mod(dtp, fp, NULL, addr) < 0)
2627                                         return (-1);
2628                                 goto nextrec;
2629                         }
2630
2631                         if (act == DTRACEACT_USYM || act == DTRACEACT_UADDR) {
2632                                 if (dt_print_usym(dtp, fp, addr, act) < 0)
2633                                         return (-1);
2634                                 goto nextrec;
2635                         }
2636
2637                         if (act == DTRACEACT_UMOD) {
2638                                 if (dt_print_umod(dtp, fp, NULL, addr) < 0)
2639                                         return (-1);
2640                                 goto nextrec;
2641                         }
2642
2643                         if (act == DTRACEACT_PRINTM) {
2644                                 if (dt_print_memory(dtp, fp, addr) < 0)
2645                                         return (-1);
2646                                 goto nextrec;
2647                         }
2648
2649                         if (act == DTRACEACT_PRINTT) {
2650                                 if (dt_print_type(dtp, fp, addr) < 0)
2651                                         return (-1);
2652                                 goto nextrec;
2653                         }
2654
2655                         if (DTRACEACT_ISPRINTFLIKE(act)) {
2656                                 void *fmtdata;
2657                                 int (*func)(dtrace_hdl_t *, FILE *, void *,
2658                                     const dtrace_probedata_t *,
2659                                     const dtrace_recdesc_t *, uint_t,
2660                                     const void *buf, size_t);
2661
2662                                 if ((fmtdata = dt_format_lookup(dtp,
2663                                     rec->dtrd_format)) == NULL)
2664                                         goto nofmt;
2665
2666                                 switch (act) {
2667                                 case DTRACEACT_PRINTF:
2668                                         func = dtrace_fprintf;
2669                                         break;
2670                                 case DTRACEACT_PRINTA:
2671                                         func = dtrace_fprinta;
2672                                         break;
2673                                 case DTRACEACT_SYSTEM:
2674                                         func = dtrace_system;
2675                                         break;
2676                                 case DTRACEACT_FREOPEN:
2677                                         func = dtrace_freopen;
2678                                         break;
2679                                 }
2680
2681                                 n = (*func)(dtp, fp, fmtdata, &data,
2682                                     rec, epd->dtepd_nrecs - i,
2683                                     (uchar_t *)buf->dtbd_data + offs,
2684                                     buf->dtbd_size - offs);
2685
2686                                 if (n < 0)
2687                                         return (-1); /* errno is set for us */
2688
2689                                 if (n > 0)
2690                                         i += n - 1;
2691                                 goto nextrec;
2692                         }
2693
2694                         /*
2695                          * If this is a DIF expression, and the record has a
2696                          * format set, this indicates we have a CTF type name
2697                          * associated with the data and we should try to print
2698                          * it out by type.
2699                          */
2700                         if (act == DTRACEACT_DIFEXPR) {
2701                                 const char *strdata = dt_strdata_lookup(dtp,
2702                                     rec->dtrd_format);
2703                                 if (strdata != NULL) {
2704                                         n = dtrace_print(dtp, fp, strdata,
2705                                             addr, rec->dtrd_size);
2706
2707                                         /*
2708                                          * dtrace_print() will return -1 on
2709                                          * error, or return the number of bytes
2710                                          * consumed.  It will return 0 if the
2711                                          * type couldn't be determined, and we
2712                                          * should fall through to the normal
2713                                          * trace method.
2714                                          */
2715                                         if (n < 0)
2716                                                 return (-1);
2717
2718                                         if (n > 0)
2719                                                 goto nextrec;
2720                                 }
2721                         }
2722
2723 nofmt:
2724                         if (act == DTRACEACT_PRINTA) {
2725                                 dt_print_aggdata_t pd;
2726                                 dtrace_aggvarid_t *aggvars;
2727                                 int j, naggvars = 0;
2728                                 size_t size = ((epd->dtepd_nrecs - i) *
2729                                     sizeof (dtrace_aggvarid_t));
2730
2731                                 if ((aggvars = dt_alloc(dtp, size)) == NULL)
2732                                         return (-1);
2733
2734                                 /*
2735                                  * This might be a printa() with multiple
2736                                  * aggregation variables.  We need to scan
2737                                  * forward through the records until we find
2738                                  * a record from a different statement.
2739                                  */
2740                                 for (j = i; j < epd->dtepd_nrecs; j++) {
2741                                         dtrace_recdesc_t *nrec;
2742                                         caddr_t naddr;
2743
2744                                         nrec = &epd->dtepd_rec[j];
2745
2746                                         if (nrec->dtrd_uarg != rec->dtrd_uarg)
2747                                                 break;
2748
2749                                         if (nrec->dtrd_action != act) {
2750                                                 return (dt_set_errno(dtp,
2751                                                     EDT_BADAGG));
2752                                         }
2753
2754                                         naddr = buf->dtbd_data + offs +
2755                                             nrec->dtrd_offset;
2756
2757                                         aggvars[naggvars++] =
2758                                             /* LINTED - alignment */
2759                                             *((dtrace_aggvarid_t *)naddr);
2760                                 }
2761
2762                                 i = j - 1;
2763                                 bzero(&pd, sizeof (pd));
2764                                 pd.dtpa_dtp = dtp;
2765                                 pd.dtpa_fp = fp;
2766
2767                                 assert(naggvars >= 1);
2768
2769                                 if (naggvars == 1) {
2770                                         pd.dtpa_id = aggvars[0];
2771                                         dt_free(dtp, aggvars);
2772
2773                                         if (dt_printf(dtp, fp, "\n") < 0 ||
2774                                             dtrace_aggregate_walk_sorted(dtp,
2775                                             dt_print_agg, &pd) < 0)
2776                                                 return (-1);
2777                                         goto nextrec;
2778                                 }
2779
2780                                 if (dt_printf(dtp, fp, "\n") < 0 ||
2781                                     dtrace_aggregate_walk_joined(dtp, aggvars,
2782                                     naggvars, dt_print_aggs, &pd) < 0) {
2783                                         dt_free(dtp, aggvars);
2784                                         return (-1);
2785                                 }
2786
2787                                 dt_free(dtp, aggvars);
2788                                 goto nextrec;
2789                         }
2790
2791                         if (act == DTRACEACT_TRACEMEM) {
2792                                 if (tracememsize == 0 ||
2793                                     tracememsize > rec->dtrd_size) {
2794                                         tracememsize = rec->dtrd_size;
2795                                 }
2796
2797                                 n = dt_print_bytes(dtp, fp, addr,
2798                                     tracememsize, -33, quiet, 1);
2799
2800                                 tracememsize = 0;
2801
2802                                 if (n < 0)
2803                                         return (-1);
2804
2805                                 goto nextrec;
2806                         }
2807
2808                         switch (rec->dtrd_size) {
2809                         case sizeof (uint64_t):
2810                                 n = dt_printf(dtp, fp,
2811                                     quiet ? "%lld" : " %16lld",
2812                                     /* LINTED - alignment */
2813                                     *((unsigned long long *)addr));
2814                                 break;
2815                         case sizeof (uint32_t):
2816                                 n = dt_printf(dtp, fp, quiet ? "%d" : " %8d",
2817                                     /* LINTED - alignment */
2818                                     *((uint32_t *)addr));
2819                                 break;
2820                         case sizeof (uint16_t):
2821                                 n = dt_printf(dtp, fp, quiet ? "%d" : " %5d",
2822                                     /* LINTED - alignment */
2823                                     *((uint16_t *)addr));
2824                                 break;
2825                         case sizeof (uint8_t):
2826                                 n = dt_printf(dtp, fp, quiet ? "%d" : " %3d",
2827                                     *((uint8_t *)addr));
2828                                 break;
2829                         default:
2830                                 n = dt_print_bytes(dtp, fp, addr,
2831                                     rec->dtrd_size, -33, quiet, 0);
2832                                 break;
2833                         }
2834
2835                         if (n < 0)
2836                                 return (-1); /* errno is set for us */
2837
2838 nextrec:
2839                         if (dt_buffered_flush(dtp, &data, rec, NULL, 0) < 0)
2840                                 return (-1); /* errno is set for us */
2841                 }
2842
2843                 /*
2844                  * Call the record callback with a NULL record to indicate
2845                  * that we're done processing this EPID.
2846                  */
2847                 rval = (*rfunc)(&data, NULL, arg);
2848 nextepid:
2849                 offs += epd->dtepd_size;
2850                 dtp->dt_last_epid = id;
2851                 if (just_one) {
2852                         buf->dtbd_oldest = offs;
2853                         break;
2854                 }
2855         }
2856
2857         dtp->dt_flow = data.dtpda_flow;
2858         dtp->dt_indent = data.dtpda_indent;
2859         dtp->dt_prefix = data.dtpda_prefix;
2860
2861         if ((drops = buf->dtbd_drops) == 0)
2862                 return (0);
2863
2864         /*
2865          * Explicitly zero the drops to prevent us from processing them again.
2866          */
2867         buf->dtbd_drops = 0;
2868
2869         return (dt_handle_cpudrop(dtp, cpu, DTRACEDROP_PRINCIPAL, drops));
2870 }
2871
2872 /*
2873  * Reduce memory usage by shrinking the buffer if it's no more than half full.
2874  * Note, we need to preserve the alignment of the data at dtbd_oldest, which is
2875  * only 4-byte aligned.
2876  */
2877 static void
2878 dt_realloc_buf(dtrace_hdl_t *dtp, dtrace_bufdesc_t *buf, int cursize)
2879 {
2880         uint64_t used = buf->dtbd_size - buf->dtbd_oldest;
2881         if (used < cursize / 2) {
2882                 int misalign = buf->dtbd_oldest & (sizeof (uint64_t) - 1);
2883                 char *newdata = dt_alloc(dtp, used + misalign);
2884                 if (newdata == NULL)
2885                         return;
2886                 bzero(newdata, misalign);
2887                 bcopy(buf->dtbd_data + buf->dtbd_oldest,
2888                     newdata + misalign, used);
2889                 dt_free(dtp, buf->dtbd_data);
2890                 buf->dtbd_oldest = misalign;
2891                 buf->dtbd_size = used + misalign;
2892                 buf->dtbd_data = newdata;
2893         }
2894 }
2895
2896 /*
2897  * If the ring buffer has wrapped, the data is not in order.  Rearrange it
2898  * so that it is.  Note, we need to preserve the alignment of the data at
2899  * dtbd_oldest, which is only 4-byte aligned.
2900  */
2901 static int
2902 dt_unring_buf(dtrace_hdl_t *dtp, dtrace_bufdesc_t *buf)
2903 {
2904         int misalign;
2905         char *newdata, *ndp;
2906
2907         if (buf->dtbd_oldest == 0)
2908                 return (0);
2909
2910         misalign = buf->dtbd_oldest & (sizeof (uint64_t) - 1);
2911         newdata = ndp = dt_alloc(dtp, buf->dtbd_size + misalign);
2912
2913         if (newdata == NULL)
2914                 return (-1);
2915
2916         assert(0 == (buf->dtbd_size & (sizeof (uint64_t) - 1)));
2917
2918         bzero(ndp, misalign);
2919         ndp += misalign;
2920
2921         bcopy(buf->dtbd_data + buf->dtbd_oldest, ndp,
2922             buf->dtbd_size - buf->dtbd_oldest);
2923         ndp += buf->dtbd_size - buf->dtbd_oldest;
2924
2925         bcopy(buf->dtbd_data, ndp, buf->dtbd_oldest);
2926
2927         dt_free(dtp, buf->dtbd_data);
2928         buf->dtbd_oldest = 0;
2929         buf->dtbd_data = newdata;
2930         buf->dtbd_size += misalign;
2931
2932         return (0);
2933 }
2934
2935 static void
2936 dt_put_buf(dtrace_hdl_t *dtp, dtrace_bufdesc_t *buf)
2937 {
2938         dt_free(dtp, buf->dtbd_data);
2939         dt_free(dtp, buf);
2940 }
2941
2942 /*
2943  * Returns 0 on success, in which case *cbp will be filled in if we retrieved
2944  * data, or NULL if there is no data for this CPU.
2945  * Returns -1 on failure and sets dt_errno.
2946  */
2947 static int
2948 dt_get_buf(dtrace_hdl_t *dtp, int cpu, dtrace_bufdesc_t **bufp)
2949 {
2950         dtrace_optval_t size;
2951         dtrace_bufdesc_t *buf = dt_zalloc(dtp, sizeof (*buf));
2952         int error, rval;
2953
2954         if (buf == NULL)
2955                 return (-1);
2956
2957         (void) dtrace_getopt(dtp, "bufsize", &size);
2958         buf->dtbd_data = dt_alloc(dtp, size);
2959         if (buf->dtbd_data == NULL) {
2960                 dt_free(dtp, buf);
2961                 return (-1);
2962         }
2963         buf->dtbd_size = size;
2964         buf->dtbd_cpu = cpu;
2965
2966 #ifdef illumos
2967         if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, buf) == -1) {
2968 #else
2969         if (dt_ioctl(dtp, DTRACEIOC_BUFSNAP, &buf) == -1) {
2970 #endif
2971                 /*
2972                  * If we failed with ENOENT, it may be because the
2973                  * CPU was unconfigured -- this is okay.  Any other
2974                  * error, however, is unexpected.
2975                  */
2976                 if (errno == ENOENT) {
2977                         *bufp = NULL;
2978                         rval = 0;
2979                 } else
2980                         rval = dt_set_errno(dtp, errno);
2981
2982                 dt_put_buf(dtp, buf);
2983                 return (rval);
2984         }
2985
2986         error = dt_unring_buf(dtp, buf);
2987         if (error != 0) {
2988                 dt_put_buf(dtp, buf);
2989                 return (error);
2990         }
2991         dt_realloc_buf(dtp, buf, size);
2992
2993         *bufp = buf;
2994         return (0);
2995 }
2996
2997 typedef struct dt_begin {
2998         dtrace_consume_probe_f *dtbgn_probefunc;
2999         dtrace_consume_rec_f *dtbgn_recfunc;
3000         void *dtbgn_arg;
3001         dtrace_handle_err_f *dtbgn_errhdlr;
3002         void *dtbgn_errarg;
3003         int dtbgn_beginonly;
3004 } dt_begin_t;
3005
3006 static int
3007 dt_consume_begin_probe(const dtrace_probedata_t *data, void *arg)
3008 {
3009         dt_begin_t *begin = arg;
3010         dtrace_probedesc_t *pd = data->dtpda_pdesc;
3011
3012         int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);
3013         int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);
3014
3015         if (begin->dtbgn_beginonly) {
3016                 if (!(r1 && r2))
3017                         return (DTRACE_CONSUME_NEXT);
3018         } else {
3019                 if (r1 && r2)
3020                         return (DTRACE_CONSUME_NEXT);
3021         }
3022
3023         /*
3024          * We have a record that we're interested in.  Now call the underlying
3025          * probe function...
3026          */
3027         return (begin->dtbgn_probefunc(data, begin->dtbgn_arg));
3028 }
3029
3030 static int
3031 dt_consume_begin_record(const dtrace_probedata_t *data,
3032     const dtrace_recdesc_t *rec, void *arg)
3033 {
3034         dt_begin_t *begin = arg;
3035
3036         return (begin->dtbgn_recfunc(data, rec, begin->dtbgn_arg));
3037 }
3038
3039 static int
3040 dt_consume_begin_error(const dtrace_errdata_t *data, void *arg)
3041 {
3042         dt_begin_t *begin = (dt_begin_t *)arg;
3043         dtrace_probedesc_t *pd = data->dteda_pdesc;
3044
3045         int r1 = (strcmp(pd->dtpd_provider, "dtrace") == 0);
3046         int r2 = (strcmp(pd->dtpd_name, "BEGIN") == 0);
3047
3048         if (begin->dtbgn_beginonly) {
3049                 if (!(r1 && r2))
3050                         return (DTRACE_HANDLE_OK);
3051         } else {
3052                 if (r1 && r2)
3053                         return (DTRACE_HANDLE_OK);
3054         }
3055
3056         return (begin->dtbgn_errhdlr(data, begin->dtbgn_errarg));
3057 }
3058
3059 static int
3060 dt_consume_begin(dtrace_hdl_t *dtp, FILE *fp,
3061     dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
3062 {
3063         /*
3064          * There's this idea that the BEGIN probe should be processed before
3065          * everything else, and that the END probe should be processed after
3066          * anything else.  In the common case, this is pretty easy to deal
3067          * with.  However, a situation may arise where the BEGIN enabling and
3068          * END enabling are on the same CPU, and some enabling in the middle
3069          * occurred on a different CPU.  To deal with this (blech!) we need to
3070          * consume the BEGIN buffer up until the end of the BEGIN probe, and
3071          * then set it aside.  We will then process every other CPU, and then
3072          * we'll return to the BEGIN CPU and process the rest of the data
3073          * (which will inevitably include the END probe, if any).  Making this
3074          * even more complicated (!) is the library's ERROR enabling.  Because
3075          * this enabling is processed before we even get into the consume call
3076          * back, any ERROR firing would result in the library's ERROR enabling
3077          * being processed twice -- once in our first pass (for BEGIN probes),
3078          * and again in our second pass (for everything but BEGIN probes).  To
3079          * deal with this, we interpose on the ERROR handler to assure that we
3080          * only process ERROR enablings induced by BEGIN enablings in the
3081          * first pass, and that we only process ERROR enablings _not_ induced
3082          * by BEGIN enablings in the second pass.
3083          */
3084
3085         dt_begin_t begin;
3086         processorid_t cpu = dtp->dt_beganon;
3087         int rval, i;
3088         static int max_ncpus;
3089         dtrace_bufdesc_t *buf;
3090
3091         dtp->dt_beganon = -1;
3092
3093         if (dt_get_buf(dtp, cpu, &buf) != 0)
3094                 return (-1);
3095         if (buf == NULL)
3096                 return (0);
3097
3098         if (!dtp->dt_stopped || buf->dtbd_cpu != dtp->dt_endedon) {
3099                 /*
3100                  * This is the simple case.  We're either not stopped, or if
3101                  * we are, we actually processed any END probes on another
3102                  * CPU.  We can simply consume this buffer and return.
3103                  */
3104                 rval = dt_consume_cpu(dtp, fp, cpu, buf, B_FALSE,
3105                     pf, rf, arg);
3106                 dt_put_buf(dtp, buf);
3107                 return (rval);
3108         }
3109
3110         begin.dtbgn_probefunc = pf;
3111         begin.dtbgn_recfunc = rf;
3112         begin.dtbgn_arg = arg;
3113         begin.dtbgn_beginonly = 1;
3114
3115         /*
3116          * We need to interpose on the ERROR handler to be sure that we
3117          * only process ERRORs induced by BEGIN.
3118          */
3119         begin.dtbgn_errhdlr = dtp->dt_errhdlr;
3120         begin.dtbgn_errarg = dtp->dt_errarg;
3121         dtp->dt_errhdlr = dt_consume_begin_error;
3122         dtp->dt_errarg = &begin;
3123
3124         rval = dt_consume_cpu(dtp, fp, cpu, buf, B_FALSE,
3125             dt_consume_begin_probe, dt_consume_begin_record, &begin);
3126
3127         dtp->dt_errhdlr = begin.dtbgn_errhdlr;
3128         dtp->dt_errarg = begin.dtbgn_errarg;
3129
3130         if (rval != 0) {
3131                 dt_put_buf(dtp, buf);
3132                 return (rval);
3133         }
3134
3135         if (max_ncpus == 0)
3136                 max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
3137
3138         for (i = 0; i < max_ncpus; i++) {
3139                 dtrace_bufdesc_t *nbuf;
3140                 if (i == cpu)
3141                         continue;
3142
3143                 if (dt_get_buf(dtp, i, &nbuf) != 0) {
3144                         dt_put_buf(dtp, buf);
3145                         return (-1);
3146                 }
3147                 if (nbuf == NULL)
3148                         continue;
3149
3150                 rval = dt_consume_cpu(dtp, fp, i, nbuf, B_FALSE,
3151                     pf, rf, arg);
3152                 dt_put_buf(dtp, nbuf);
3153                 if (rval != 0) {
3154                         dt_put_buf(dtp, buf);
3155                         return (rval);
3156                 }
3157         }
3158
3159         /*
3160          * Okay -- we're done with the other buffers.  Now we want to
3161          * reconsume the first buffer -- but this time we're looking for
3162          * everything _but_ BEGIN.  And of course, in order to only consume
3163          * those ERRORs _not_ associated with BEGIN, we need to reinstall our
3164          * ERROR interposition function...
3165          */
3166         begin.dtbgn_beginonly = 0;
3167
3168         assert(begin.dtbgn_errhdlr == dtp->dt_errhdlr);
3169         assert(begin.dtbgn_errarg == dtp->dt_errarg);
3170         dtp->dt_errhdlr = dt_consume_begin_error;
3171         dtp->dt_errarg = &begin;
3172
3173         rval = dt_consume_cpu(dtp, fp, cpu, buf, B_FALSE,
3174             dt_consume_begin_probe, dt_consume_begin_record, &begin);
3175
3176         dtp->dt_errhdlr = begin.dtbgn_errhdlr;
3177         dtp->dt_errarg = begin.dtbgn_errarg;
3178
3179         return (rval);
3180 }
3181
3182 /* ARGSUSED */
3183 static uint64_t
3184 dt_buf_oldest(void *elem, void *arg)
3185 {
3186         dtrace_bufdesc_t *buf = elem;
3187         size_t offs = buf->dtbd_oldest;
3188
3189         while (offs < buf->dtbd_size) {
3190                 dtrace_rechdr_t *dtrh =
3191                     /* LINTED - alignment */
3192                     (dtrace_rechdr_t *)(buf->dtbd_data + offs);
3193                 if (dtrh->dtrh_epid == DTRACE_EPIDNONE) {
3194                         offs += sizeof (dtrace_epid_t);
3195                 } else {
3196                         return (DTRACE_RECORD_LOAD_TIMESTAMP(dtrh));
3197                 }
3198         }
3199
3200         /* There are no records left; use the time the buffer was retrieved. */
3201         return (buf->dtbd_timestamp);
3202 }
3203
3204 int
3205 dtrace_consume(dtrace_hdl_t *dtp, FILE *fp,
3206     dtrace_consume_probe_f *pf, dtrace_consume_rec_f *rf, void *arg)
3207 {
3208         dtrace_optval_t size;
3209         static int max_ncpus;
3210         int i, rval;
3211         dtrace_optval_t interval = dtp->dt_options[DTRACEOPT_SWITCHRATE];
3212         hrtime_t now = gethrtime();
3213
3214         if (dtp->dt_lastswitch != 0) {
3215                 if (now - dtp->dt_lastswitch < interval)
3216                         return (0);
3217
3218                 dtp->dt_lastswitch += interval;
3219         } else {
3220                 dtp->dt_lastswitch = now;
3221         }
3222
3223         if (!dtp->dt_active)
3224                 return (dt_set_errno(dtp, EINVAL));
3225
3226         if (max_ncpus == 0)
3227                 max_ncpus = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
3228
3229         if (pf == NULL)
3230                 pf = (dtrace_consume_probe_f *)dt_nullprobe;
3231
3232         if (rf == NULL)
3233                 rf = (dtrace_consume_rec_f *)dt_nullrec;
3234
3235         if (dtp->dt_options[DTRACEOPT_TEMPORAL] == DTRACEOPT_UNSET) {
3236                 /*
3237                  * The output will not be in the order it was traced.  Rather,
3238                  * we will consume all of the data from each CPU's buffer in
3239                  * turn.  We apply special handling for the records from BEGIN
3240                  * and END probes so that they are consumed first and last,
3241                  * respectively.
3242                  *
3243                  * If we have just begun, we want to first process the CPU that
3244                  * executed the BEGIN probe (if any).
3245                  */
3246                 if (dtp->dt_active && dtp->dt_beganon != -1 &&
3247                     (rval = dt_consume_begin(dtp, fp, pf, rf, arg)) != 0)
3248                         return (rval);
3249
3250                 for (i = 0; i < max_ncpus; i++) {
3251                         dtrace_bufdesc_t *buf;
3252
3253                         /*
3254                          * If we have stopped, we want to process the CPU on
3255                          * which the END probe was processed only _after_ we
3256                          * have processed everything else.
3257                          */
3258                         if (dtp->dt_stopped && (i == dtp->dt_endedon))
3259                                 continue;
3260
3261                         if (dt_get_buf(dtp, i, &buf) != 0)
3262                                 return (-1);
3263                         if (buf == NULL)
3264                                 continue;
3265
3266                         dtp->dt_flow = 0;
3267                         dtp->dt_indent = 0;
3268                         dtp->dt_prefix = NULL;
3269                         rval = dt_consume_cpu(dtp, fp, i,
3270                             buf, B_FALSE, pf, rf, arg);
3271                         dt_put_buf(dtp, buf);
3272                         if (rval != 0)
3273                                 return (rval);
3274                 }
3275                 if (dtp->dt_stopped) {
3276                         dtrace_bufdesc_t *buf;
3277
3278                         if (dt_get_buf(dtp, dtp->dt_endedon, &buf) != 0)
3279                                 return (-1);
3280                         if (buf == NULL)
3281                                 return (0);
3282
3283                         rval = dt_consume_cpu(dtp, fp, dtp->dt_endedon,
3284                             buf, B_FALSE, pf, rf, arg);
3285                         dt_put_buf(dtp, buf);
3286                         return (rval);
3287                 }
3288         } else {
3289                 /*
3290                  * The output will be in the order it was traced (or for
3291                  * speculations, when it was committed).  We retrieve a buffer
3292                  * from each CPU and put it into a priority queue, which sorts
3293                  * based on the first entry in the buffer.  This is sufficient
3294                  * because entries within a buffer are already sorted.
3295                  *
3296                  * We then consume records one at a time, always consuming the
3297                  * oldest record, as determined by the priority queue.  When
3298                  * we reach the end of the time covered by these buffers,
3299                  * we need to stop and retrieve more records on the next pass.
3300                  * The kernel tells us the time covered by each buffer, in
3301                  * dtbd_timestamp.  The first buffer's timestamp tells us the
3302                  * time covered by all buffers, as subsequently retrieved
3303                  * buffers will cover to a more recent time.
3304                  */
3305
3306                 uint64_t *drops = alloca(max_ncpus * sizeof (uint64_t));
3307                 uint64_t first_timestamp = 0;
3308                 uint_t cookie = 0;
3309                 dtrace_bufdesc_t *buf;
3310
3311                 bzero(drops, max_ncpus * sizeof (uint64_t));
3312
3313                 if (dtp->dt_bufq == NULL) {
3314                         dtp->dt_bufq = dt_pq_init(dtp, max_ncpus * 2,
3315                             dt_buf_oldest, NULL);
3316                         if (dtp->dt_bufq == NULL) /* ENOMEM */
3317                                 return (-1);
3318                 }
3319
3320                 /* Retrieve data from each CPU. */
3321                 (void) dtrace_getopt(dtp, "bufsize", &size);
3322                 for (i = 0; i < max_ncpus; i++) {
3323                         dtrace_bufdesc_t *buf;
3324
3325                         if (dt_get_buf(dtp, i, &buf) != 0)
3326                                 return (-1);
3327                         if (buf != NULL) {
3328                                 if (first_timestamp == 0)
3329                                         first_timestamp = buf->dtbd_timestamp;
3330                                 assert(buf->dtbd_timestamp >= first_timestamp);
3331
3332                                 dt_pq_insert(dtp->dt_bufq, buf);
3333                                 drops[i] = buf->dtbd_drops;
3334                                 buf->dtbd_drops = 0;
3335                         }
3336                 }
3337
3338                 /* Consume records. */
3339                 for (;;) {
3340                         dtrace_bufdesc_t *buf = dt_pq_pop(dtp->dt_bufq);
3341                         uint64_t timestamp;
3342
3343                         if (buf == NULL)
3344                                 break;
3345
3346                         timestamp = dt_buf_oldest(buf, dtp);
3347                         assert(timestamp >= dtp->dt_last_timestamp);
3348                         dtp->dt_last_timestamp = timestamp;
3349
3350                         if (timestamp == buf->dtbd_timestamp) {
3351                                 /*
3352                                  * We've reached the end of the time covered
3353                                  * by this buffer.  If this is the oldest
3354                                  * buffer, we must do another pass
3355                                  * to retrieve more data.
3356                                  */
3357                                 dt_put_buf(dtp, buf);
3358                                 if (timestamp == first_timestamp &&
3359                                     !dtp->dt_stopped)
3360                                         break;
3361                                 continue;
3362                         }
3363
3364                         if ((rval = dt_consume_cpu(dtp, fp,
3365                             buf->dtbd_cpu, buf, B_TRUE, pf, rf, arg)) != 0)
3366                                 return (rval);
3367                         dt_pq_insert(dtp->dt_bufq, buf);
3368                 }
3369
3370                 /* Consume drops. */
3371                 for (i = 0; i < max_ncpus; i++) {
3372                         if (drops[i] != 0) {
3373                                 int error = dt_handle_cpudrop(dtp, i,
3374                                     DTRACEDROP_PRINCIPAL, drops[i]);
3375                                 if (error != 0)
3376                                         return (error);
3377                         }
3378                 }
3379
3380                 /*
3381                  * Reduce memory usage by re-allocating smaller buffers
3382                  * for the "remnants".
3383                  */
3384                 while (buf = dt_pq_walk(dtp->dt_bufq, &cookie))
3385                         dt_realloc_buf(dtp, buf, buf->dtbd_size);
3386         }
3387
3388         return (0);
3389 }