]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - cddl/contrib/opensolaris/lib/libdtrace/common/dt_aggregate.c
dtrace: Add the 'oformat' libdtrace option
[FreeBSD/FreeBSD.git] / cddl / contrib / opensolaris / lib / libdtrace / common / dt_aggregate.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26
27 /*
28  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
29  * Copyright (c) 2012 by Delphix. All rights reserved.
30  */
31
32 #include <stdlib.h>
33 #include <strings.h>
34 #include <errno.h>
35 #include <unistd.h>
36 #include <dt_impl.h>
37 #include <assert.h>
38 #include <dt_oformat.h>
39 #ifdef illumos
40 #include <alloca.h>
41 #else
42 #include <sys/sysctl.h>
43 #include <libproc_compat.h>
44 #endif
45 #include <limits.h>
46
47 #define DTRACE_AHASHSIZE        32779           /* big 'ol prime */
48
49 /*
50  * Because qsort(3C) does not allow an argument to be passed to a comparison
51  * function, the variables that affect comparison must regrettably be global;
52  * they are protected by a global static lock, dt_qsort_lock.
53  */
54 static pthread_mutex_t dt_qsort_lock = PTHREAD_MUTEX_INITIALIZER;
55
56 static int dt_revsort;
57 static int dt_keysort;
58 static int dt_keypos;
59
60 #define DT_LESSTHAN     (dt_revsort == 0 ? -1 : 1)
61 #define DT_GREATERTHAN  (dt_revsort == 0 ? 1 : -1)
62
63 static void
64 dt_aggregate_count(int64_t *existing, int64_t *new, size_t size)
65 {
66         uint_t i;
67
68         for (i = 0; i < size / sizeof (int64_t); i++)
69                 existing[i] = existing[i] + new[i];
70 }
71
72 static int
73 dt_aggregate_countcmp(int64_t *lhs, int64_t *rhs)
74 {
75         int64_t lvar = *lhs;
76         int64_t rvar = *rhs;
77
78         if (lvar < rvar)
79                 return (DT_LESSTHAN);
80
81         if (lvar > rvar)
82                 return (DT_GREATERTHAN);
83
84         return (0);
85 }
86
87 /*ARGSUSED*/
88 static void
89 dt_aggregate_min(int64_t *existing, int64_t *new, size_t size)
90 {
91         if (*new < *existing)
92                 *existing = *new;
93 }
94
95 /*ARGSUSED*/
96 static void
97 dt_aggregate_max(int64_t *existing, int64_t *new, size_t size)
98 {
99         if (*new > *existing)
100                 *existing = *new;
101 }
102
103 static int
104 dt_aggregate_averagecmp(int64_t *lhs, int64_t *rhs)
105 {
106         int64_t lavg = lhs[0] ? (lhs[1] / lhs[0]) : 0;
107         int64_t ravg = rhs[0] ? (rhs[1] / rhs[0]) : 0;
108
109         if (lavg < ravg)
110                 return (DT_LESSTHAN);
111
112         if (lavg > ravg)
113                 return (DT_GREATERTHAN);
114
115         return (0);
116 }
117
118 static int
119 dt_aggregate_stddevcmp(int64_t *lhs, int64_t *rhs)
120 {
121         uint64_t lsd = dt_stddev((uint64_t *)lhs, 1);
122         uint64_t rsd = dt_stddev((uint64_t *)rhs, 1);
123
124         if (lsd < rsd)
125                 return (DT_LESSTHAN);
126
127         if (lsd > rsd)
128                 return (DT_GREATERTHAN);
129
130         return (0);
131 }
132
133 /*ARGSUSED*/
134 static void
135 dt_aggregate_lquantize(int64_t *existing, int64_t *new, size_t size)
136 {
137         int64_t arg = *existing++;
138         uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg);
139         int i;
140
141         for (i = 0; i <= levels + 1; i++)
142                 existing[i] = existing[i] + new[i + 1];
143 }
144
145 static long double
146 dt_aggregate_lquantizedsum(int64_t *lquanta)
147 {
148         int64_t arg = *lquanta++;
149         int32_t base = DTRACE_LQUANTIZE_BASE(arg);
150         uint16_t step = DTRACE_LQUANTIZE_STEP(arg);
151         uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg), i;
152         long double total = (long double)lquanta[0] * (long double)(base - 1);
153
154         for (i = 0; i < levels; base += step, i++)
155                 total += (long double)lquanta[i + 1] * (long double)base;
156
157         return (total + (long double)lquanta[levels + 1] *
158             (long double)(base + 1));
159 }
160
161 static int64_t
162 dt_aggregate_lquantizedzero(int64_t *lquanta)
163 {
164         int64_t arg = *lquanta++;
165         int32_t base = DTRACE_LQUANTIZE_BASE(arg);
166         uint16_t step = DTRACE_LQUANTIZE_STEP(arg);
167         uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg), i;
168
169         if (base - 1 == 0)
170                 return (lquanta[0]);
171
172         for (i = 0; i < levels; base += step, i++) {
173                 if (base != 0)
174                         continue;
175
176                 return (lquanta[i + 1]);
177         }
178
179         if (base + 1 == 0)
180                 return (lquanta[levels + 1]);
181
182         return (0);
183 }
184
185 static int
186 dt_aggregate_lquantizedcmp(int64_t *lhs, int64_t *rhs)
187 {
188         long double lsum = dt_aggregate_lquantizedsum(lhs);
189         long double rsum = dt_aggregate_lquantizedsum(rhs);
190         int64_t lzero, rzero;
191
192         if (lsum < rsum)
193                 return (DT_LESSTHAN);
194
195         if (lsum > rsum)
196                 return (DT_GREATERTHAN);
197
198         /*
199          * If they're both equal, then we will compare based on the weights at
200          * zero.  If the weights at zero are equal (or if zero is not within
201          * the range of the linear quantization), then this will be judged a
202          * tie and will be resolved based on the key comparison.
203          */
204         lzero = dt_aggregate_lquantizedzero(lhs);
205         rzero = dt_aggregate_lquantizedzero(rhs);
206
207         if (lzero < rzero)
208                 return (DT_LESSTHAN);
209
210         if (lzero > rzero)
211                 return (DT_GREATERTHAN);
212
213         return (0);
214 }
215
216 static void
217 dt_aggregate_llquantize(int64_t *existing, int64_t *new, size_t size)
218 {
219         int i;
220
221         for (i = 1; i < size / sizeof (int64_t); i++)
222                 existing[i] = existing[i] + new[i];
223 }
224
225 static long double
226 dt_aggregate_llquantizedsum(int64_t *llquanta)
227 {
228         int64_t arg = *llquanta++;
229         uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg);
230         uint16_t low = DTRACE_LLQUANTIZE_LOW(arg);
231         uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg);
232         uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);
233         int bin = 0, order;
234         int64_t value = 1, next, step;
235         long double total;
236
237         assert(nsteps >= factor);
238         assert(nsteps % factor == 0);
239
240         for (order = 0; order < low; order++)
241                 value *= factor;
242
243         total = (long double)llquanta[bin++] * (long double)(value - 1);
244
245         next = value * factor;
246         step = next > nsteps ? next / nsteps : 1;
247
248         while (order <= high) {
249                 assert(value < next);
250                 total += (long double)llquanta[bin++] * (long double)(value);
251
252                 if ((value += step) != next)
253                         continue;
254
255                 next = value * factor;
256                 step = next > nsteps ? next / nsteps : 1;
257                 order++;
258         }
259
260         return (total + (long double)llquanta[bin] * (long double)value);
261 }
262
263 static int
264 dt_aggregate_llquantizedcmp(int64_t *lhs, int64_t *rhs)
265 {
266         long double lsum = dt_aggregate_llquantizedsum(lhs);
267         long double rsum = dt_aggregate_llquantizedsum(rhs);
268         int64_t lzero, rzero;
269
270         if (lsum < rsum)
271                 return (DT_LESSTHAN);
272
273         if (lsum > rsum)
274                 return (DT_GREATERTHAN);
275
276         /*
277          * If they're both equal, then we will compare based on the weights at
278          * zero.  If the weights at zero are equal, then this will be judged a
279          * tie and will be resolved based on the key comparison.
280          */
281         lzero = lhs[1];
282         rzero = rhs[1];
283
284         if (lzero < rzero)
285                 return (DT_LESSTHAN);
286
287         if (lzero > rzero)
288                 return (DT_GREATERTHAN);
289
290         return (0);
291 }
292
293 static int
294 dt_aggregate_quantizedcmp(int64_t *lhs, int64_t *rhs)
295 {
296         int nbuckets = DTRACE_QUANTIZE_NBUCKETS;
297         long double ltotal = 0, rtotal = 0;
298         int64_t lzero, rzero;
299         uint_t i;
300
301         for (i = 0; i < nbuckets; i++) {
302                 int64_t bucketval = DTRACE_QUANTIZE_BUCKETVAL(i);
303
304                 if (bucketval == 0) {
305                         lzero = lhs[i];
306                         rzero = rhs[i];
307                 }
308
309                 ltotal += (long double)bucketval * (long double)lhs[i];
310                 rtotal += (long double)bucketval * (long double)rhs[i];
311         }
312
313         if (ltotal < rtotal)
314                 return (DT_LESSTHAN);
315
316         if (ltotal > rtotal)
317                 return (DT_GREATERTHAN);
318
319         /*
320          * If they're both equal, then we will compare based on the weights at
321          * zero.  If the weights at zero are equal, then this will be judged a
322          * tie and will be resolved based on the key comparison.
323          */
324         if (lzero < rzero)
325                 return (DT_LESSTHAN);
326
327         if (lzero > rzero)
328                 return (DT_GREATERTHAN);
329
330         return (0);
331 }
332
333 static void
334 dt_aggregate_usym(dtrace_hdl_t *dtp, uint64_t *data)
335 {
336         uint64_t pid = data[0];
337         uint64_t *pc = &data[1];
338         struct ps_prochandle *P;
339         GElf_Sym sym;
340
341         if (dtp->dt_vector != NULL)
342                 return;
343
344         if ((P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0)) == NULL)
345                 return;
346
347         dt_proc_lock(dtp, P);
348
349         if (Plookup_by_addr(P, *pc, NULL, 0, &sym) == 0)
350                 *pc = sym.st_value;
351
352         dt_proc_unlock(dtp, P);
353         dt_proc_release(dtp, P);
354 }
355
356 static void
357 dt_aggregate_umod(dtrace_hdl_t *dtp, uint64_t *data)
358 {
359         uint64_t pid = data[0];
360         uint64_t *pc = &data[1];
361         struct ps_prochandle *P;
362         const prmap_t *map;
363
364         if (dtp->dt_vector != NULL)
365                 return;
366
367         if ((P = dt_proc_grab(dtp, pid, PGRAB_RDONLY | PGRAB_FORCE, 0)) == NULL)
368                 return;
369
370         dt_proc_lock(dtp, P);
371
372         if ((map = Paddr_to_map(P, *pc)) != NULL)
373                 *pc = map->pr_vaddr;
374
375         dt_proc_unlock(dtp, P);
376         dt_proc_release(dtp, P);
377 }
378
379 static void
380 dt_aggregate_sym(dtrace_hdl_t *dtp, uint64_t *data)
381 {
382         GElf_Sym sym;
383         uint64_t *pc = data;
384
385         if (dtrace_lookup_by_addr(dtp, *pc, &sym, NULL) == 0)
386                 *pc = sym.st_value;
387 }
388
389 static void
390 dt_aggregate_mod(dtrace_hdl_t *dtp, uint64_t *data)
391 {
392         uint64_t *pc = data;
393         dt_module_t *dmp;
394
395         if (dtp->dt_vector != NULL) {
396                 /*
397                  * We don't have a way of just getting the module for a
398                  * vectored open, and it doesn't seem to be worth defining
399                  * one.  This means that use of mod() won't get true
400                  * aggregation in the postmortem case (some modules may
401                  * appear more than once in aggregation output).  It seems
402                  * unlikely that anyone will ever notice or care...
403                  */
404                 return;
405         }
406
407         for (dmp = dt_list_next(&dtp->dt_modlist); dmp != NULL;
408             dmp = dt_list_next(dmp)) {
409                 if (*pc - dmp->dm_text_va < dmp->dm_text_size) {
410                         *pc = dmp->dm_text_va;
411                         return;
412                 }
413         }
414 }
415
416 static dtrace_aggvarid_t
417 dt_aggregate_aggvarid(dt_ahashent_t *ent)
418 {
419         dtrace_aggdesc_t *agg = ent->dtahe_data.dtada_desc;
420         caddr_t data = ent->dtahe_data.dtada_data;
421         dtrace_recdesc_t *rec = agg->dtagd_rec;
422
423         /*
424          * First, we'll check the variable ID in the aggdesc.  If it's valid,
425          * we'll return it.  If not, we'll use the compiler-generated ID
426          * present as the first record.
427          */
428         if (agg->dtagd_varid != DTRACE_AGGVARIDNONE)
429                 return (agg->dtagd_varid);
430
431         agg->dtagd_varid = *((dtrace_aggvarid_t *)(uintptr_t)(data +
432             rec->dtrd_offset));
433
434         return (agg->dtagd_varid);
435 }
436
437
438 static int
439 dt_aggregate_snap_cpu(dtrace_hdl_t *dtp, processorid_t cpu)
440 {
441         dtrace_epid_t id;
442         uint64_t hashval;
443         size_t offs, roffs, size, ndx;
444         int i, j, rval;
445         caddr_t addr, data;
446         dtrace_recdesc_t *rec;
447         dt_aggregate_t *agp = &dtp->dt_aggregate;
448         dtrace_aggdesc_t *agg;
449         dt_ahash_t *hash = &agp->dtat_hash;
450         dt_ahashent_t *h;
451         dtrace_bufdesc_t b = agp->dtat_buf, *buf = &b;
452         dtrace_aggdata_t *aggdata;
453         int flags = agp->dtat_flags;
454
455         buf->dtbd_cpu = cpu;
456
457 #ifdef illumos
458         if (dt_ioctl(dtp, DTRACEIOC_AGGSNAP, buf) == -1) {
459 #else
460         if (dt_ioctl(dtp, DTRACEIOC_AGGSNAP, &buf) == -1) {
461 #endif
462                 if (errno == ENOENT) {
463                         /*
464                          * If that failed with ENOENT, it may be because the
465                          * CPU was unconfigured.  This is okay; we'll just
466                          * do nothing but return success.
467                          */
468                         return (0);
469                 }
470
471                 return (dt_set_errno(dtp, errno));
472         }
473
474         if (buf->dtbd_drops != 0) {
475                 xo_open_instance("probes");
476                 dt_oformat_drop(dtp, cpu);
477                 if (dt_handle_cpudrop(dtp, cpu,
478                     DTRACEDROP_AGGREGATION, buf->dtbd_drops) == -1) {
479                         xo_close_instance("probes");
480                         return (-1);
481                 }
482                 xo_close_instance("probes");
483         }
484
485         if (buf->dtbd_size == 0)
486                 return (0);
487
488         if (hash->dtah_hash == NULL) {
489                 size_t size;
490
491                 hash->dtah_size = DTRACE_AHASHSIZE;
492                 size = hash->dtah_size * sizeof (dt_ahashent_t *);
493
494                 if ((hash->dtah_hash = malloc(size)) == NULL)
495                         return (dt_set_errno(dtp, EDT_NOMEM));
496
497                 bzero(hash->dtah_hash, size);
498         }
499
500         for (offs = 0; offs < buf->dtbd_size; ) {
501                 /*
502                  * We're guaranteed to have an ID.
503                  */
504                 id = *((dtrace_epid_t *)((uintptr_t)buf->dtbd_data +
505                     (uintptr_t)offs));
506
507                 if (id == DTRACE_AGGIDNONE) {
508                         /*
509                          * This is filler to assure proper alignment of the
510                          * next record; we simply ignore it.
511                          */
512                         offs += sizeof (id);
513                         continue;
514                 }
515
516                 if ((rval = dt_aggid_lookup(dtp, id, &agg)) != 0)
517                         return (rval);
518
519                 addr = buf->dtbd_data + offs;
520                 size = agg->dtagd_size;
521                 hashval = 0;
522
523                 for (j = 0; j < agg->dtagd_nrecs - 1; j++) {
524                         rec = &agg->dtagd_rec[j];
525                         roffs = rec->dtrd_offset;
526
527                         switch (rec->dtrd_action) {
528                         case DTRACEACT_USYM:
529                                 dt_aggregate_usym(dtp,
530                                     /* LINTED - alignment */
531                                     (uint64_t *)&addr[roffs]);
532                                 break;
533
534                         case DTRACEACT_UMOD:
535                                 dt_aggregate_umod(dtp,
536                                     /* LINTED - alignment */
537                                     (uint64_t *)&addr[roffs]);
538                                 break;
539
540                         case DTRACEACT_SYM:
541                                 /* LINTED - alignment */
542                                 dt_aggregate_sym(dtp, (uint64_t *)&addr[roffs]);
543                                 break;
544
545                         case DTRACEACT_MOD:
546                                 /* LINTED - alignment */
547                                 dt_aggregate_mod(dtp, (uint64_t *)&addr[roffs]);
548                                 break;
549
550                         default:
551                                 break;
552                         }
553
554                         for (i = 0; i < rec->dtrd_size; i++)
555                                 hashval += addr[roffs + i];
556                 }
557
558                 ndx = hashval % hash->dtah_size;
559
560                 for (h = hash->dtah_hash[ndx]; h != NULL; h = h->dtahe_next) {
561                         if (h->dtahe_hashval != hashval)
562                                 continue;
563
564                         if (h->dtahe_size != size)
565                                 continue;
566
567                         aggdata = &h->dtahe_data;
568                         data = aggdata->dtada_data;
569
570                         for (j = 0; j < agg->dtagd_nrecs - 1; j++) {
571                                 rec = &agg->dtagd_rec[j];
572                                 roffs = rec->dtrd_offset;
573
574                                 for (i = 0; i < rec->dtrd_size; i++)
575                                         if (addr[roffs + i] != data[roffs + i])
576                                                 goto hashnext;
577                         }
578
579                         /*
580                          * We found it.  Now we need to apply the aggregating
581                          * action on the data here.
582                          */
583                         rec = &agg->dtagd_rec[agg->dtagd_nrecs - 1];
584                         roffs = rec->dtrd_offset;
585                         /* LINTED - alignment */
586                         h->dtahe_aggregate((int64_t *)&data[roffs],
587                             /* LINTED - alignment */
588                             (int64_t *)&addr[roffs], rec->dtrd_size);
589
590                         /*
591                          * If we're keeping per CPU data, apply the aggregating
592                          * action there as well.
593                          */
594                         if (aggdata->dtada_percpu != NULL) {
595                                 data = aggdata->dtada_percpu[cpu];
596
597                                 /* LINTED - alignment */
598                                 h->dtahe_aggregate((int64_t *)data,
599                                     /* LINTED - alignment */
600                                     (int64_t *)&addr[roffs], rec->dtrd_size);
601                         }
602
603                         goto bufnext;
604 hashnext:
605                         continue;
606                 }
607
608                 /*
609                  * If we're here, we couldn't find an entry for this record.
610                  */
611                 if ((h = malloc(sizeof (dt_ahashent_t))) == NULL)
612                         return (dt_set_errno(dtp, EDT_NOMEM));
613                 bzero(h, sizeof (dt_ahashent_t));
614                 aggdata = &h->dtahe_data;
615
616                 if ((aggdata->dtada_data = malloc(size)) == NULL) {
617                         free(h);
618                         return (dt_set_errno(dtp, EDT_NOMEM));
619                 }
620
621                 bcopy(addr, aggdata->dtada_data, size);
622                 aggdata->dtada_size = size;
623                 aggdata->dtada_desc = agg;
624                 aggdata->dtada_handle = dtp;
625                 (void) dt_epid_lookup(dtp, agg->dtagd_epid,
626                     &aggdata->dtada_edesc, &aggdata->dtada_pdesc);
627                 aggdata->dtada_normal = 1;
628
629                 h->dtahe_hashval = hashval;
630                 h->dtahe_size = size;
631                 (void) dt_aggregate_aggvarid(h);
632
633                 rec = &agg->dtagd_rec[agg->dtagd_nrecs - 1];
634
635                 if (flags & DTRACE_A_PERCPU) {
636                         int max_cpus = agp->dtat_maxcpu;
637                         caddr_t *percpu = malloc(max_cpus * sizeof (caddr_t));
638
639                         if (percpu == NULL) {
640                                 free(aggdata->dtada_data);
641                                 free(h);
642                                 return (dt_set_errno(dtp, EDT_NOMEM));
643                         }
644
645                         for (j = 0; j < max_cpus; j++) {
646                                 percpu[j] = malloc(rec->dtrd_size);
647
648                                 if (percpu[j] == NULL) {
649                                         while (--j >= 0)
650                                                 free(percpu[j]);
651
652                                         free(aggdata->dtada_data);
653                                         free(h);
654                                         return (dt_set_errno(dtp, EDT_NOMEM));
655                                 }
656
657                                 if (j == cpu) {
658                                         bcopy(&addr[rec->dtrd_offset],
659                                             percpu[j], rec->dtrd_size);
660                                 } else {
661                                         bzero(percpu[j], rec->dtrd_size);
662                                 }
663                         }
664
665                         aggdata->dtada_percpu = percpu;
666                 }
667
668                 switch (rec->dtrd_action) {
669                 case DTRACEAGG_MIN:
670                         h->dtahe_aggregate = dt_aggregate_min;
671                         break;
672
673                 case DTRACEAGG_MAX:
674                         h->dtahe_aggregate = dt_aggregate_max;
675                         break;
676
677                 case DTRACEAGG_LQUANTIZE:
678                         h->dtahe_aggregate = dt_aggregate_lquantize;
679                         break;
680
681                 case DTRACEAGG_LLQUANTIZE:
682                         h->dtahe_aggregate = dt_aggregate_llquantize;
683                         break;
684
685                 case DTRACEAGG_COUNT:
686                 case DTRACEAGG_SUM:
687                 case DTRACEAGG_AVG:
688                 case DTRACEAGG_STDDEV:
689                 case DTRACEAGG_QUANTIZE:
690                         h->dtahe_aggregate = dt_aggregate_count;
691                         break;
692
693                 default:
694                         return (dt_set_errno(dtp, EDT_BADAGG));
695                 }
696
697                 if (hash->dtah_hash[ndx] != NULL)
698                         hash->dtah_hash[ndx]->dtahe_prev = h;
699
700                 h->dtahe_next = hash->dtah_hash[ndx];
701                 hash->dtah_hash[ndx] = h;
702
703                 if (hash->dtah_all != NULL)
704                         hash->dtah_all->dtahe_prevall = h;
705
706                 h->dtahe_nextall = hash->dtah_all;
707                 hash->dtah_all = h;
708 bufnext:
709                 offs += agg->dtagd_size;
710         }
711
712         return (0);
713 }
714
715 int
716 dtrace_aggregate_snap(dtrace_hdl_t *dtp)
717 {
718         int i, rval;
719         dt_aggregate_t *agp = &dtp->dt_aggregate;
720         hrtime_t now = gethrtime();
721         dtrace_optval_t interval = dtp->dt_options[DTRACEOPT_AGGRATE];
722
723         if (dtp->dt_lastagg != 0) {
724                 if (now - dtp->dt_lastagg < interval)
725                         return (0);
726
727                 dtp->dt_lastagg += interval;
728         } else {
729                 dtp->dt_lastagg = now;
730         }
731
732         if (!dtp->dt_active)
733                 return (dt_set_errno(dtp, EINVAL));
734
735         if (agp->dtat_buf.dtbd_size == 0)
736                 return (0);
737
738         for (i = 0; i < agp->dtat_ncpus; i++) {
739                 if ((rval = dt_aggregate_snap_cpu(dtp, agp->dtat_cpus[i])))
740                         return (rval);
741         }
742
743         return (0);
744 }
745
746 static int
747 dt_aggregate_hashcmp(const void *lhs, const void *rhs)
748 {
749         dt_ahashent_t *lh = *((dt_ahashent_t **)lhs);
750         dt_ahashent_t *rh = *((dt_ahashent_t **)rhs);
751         dtrace_aggdesc_t *lagg = lh->dtahe_data.dtada_desc;
752         dtrace_aggdesc_t *ragg = rh->dtahe_data.dtada_desc;
753
754         if (lagg->dtagd_nrecs < ragg->dtagd_nrecs)
755                 return (DT_LESSTHAN);
756
757         if (lagg->dtagd_nrecs > ragg->dtagd_nrecs)
758                 return (DT_GREATERTHAN);
759
760         return (0);
761 }
762
763 static int
764 dt_aggregate_varcmp(const void *lhs, const void *rhs)
765 {
766         dt_ahashent_t *lh = *((dt_ahashent_t **)lhs);
767         dt_ahashent_t *rh = *((dt_ahashent_t **)rhs);
768         dtrace_aggvarid_t lid, rid;
769
770         lid = dt_aggregate_aggvarid(lh);
771         rid = dt_aggregate_aggvarid(rh);
772
773         if (lid < rid)
774                 return (DT_LESSTHAN);
775
776         if (lid > rid)
777                 return (DT_GREATERTHAN);
778
779         return (0);
780 }
781
782 static int
783 dt_aggregate_keycmp(const void *lhs, const void *rhs)
784 {
785         dt_ahashent_t *lh = *((dt_ahashent_t **)lhs);
786         dt_ahashent_t *rh = *((dt_ahashent_t **)rhs);
787         dtrace_aggdesc_t *lagg = lh->dtahe_data.dtada_desc;
788         dtrace_aggdesc_t *ragg = rh->dtahe_data.dtada_desc;
789         dtrace_recdesc_t *lrec, *rrec;
790         char *ldata, *rdata;
791         int rval, i, j, keypos, nrecs;
792
793         if ((rval = dt_aggregate_hashcmp(lhs, rhs)) != 0)
794                 return (rval);
795
796         nrecs = lagg->dtagd_nrecs - 1;
797         assert(nrecs == ragg->dtagd_nrecs - 1);
798
799         keypos = dt_keypos + 1 >= nrecs ? 0 : dt_keypos;
800
801         for (i = 1; i < nrecs; i++) {
802                 uint64_t lval, rval;
803                 int ndx = i + keypos;
804
805                 if (ndx >= nrecs)
806                         ndx = ndx - nrecs + 1;
807
808                 lrec = &lagg->dtagd_rec[ndx];
809                 rrec = &ragg->dtagd_rec[ndx];
810
811                 ldata = lh->dtahe_data.dtada_data + lrec->dtrd_offset;
812                 rdata = rh->dtahe_data.dtada_data + rrec->dtrd_offset;
813
814                 if (lrec->dtrd_size < rrec->dtrd_size)
815                         return (DT_LESSTHAN);
816
817                 if (lrec->dtrd_size > rrec->dtrd_size)
818                         return (DT_GREATERTHAN);
819
820                 switch (lrec->dtrd_size) {
821                 case sizeof (uint64_t):
822                         /* LINTED - alignment */
823                         lval = *((uint64_t *)ldata);
824                         /* LINTED - alignment */
825                         rval = *((uint64_t *)rdata);
826                         break;
827
828                 case sizeof (uint32_t):
829                         /* LINTED - alignment */
830                         lval = *((uint32_t *)ldata);
831                         /* LINTED - alignment */
832                         rval = *((uint32_t *)rdata);
833                         break;
834
835                 case sizeof (uint16_t):
836                         /* LINTED - alignment */
837                         lval = *((uint16_t *)ldata);
838                         /* LINTED - alignment */
839                         rval = *((uint16_t *)rdata);
840                         break;
841
842                 case sizeof (uint8_t):
843                         lval = *((uint8_t *)ldata);
844                         rval = *((uint8_t *)rdata);
845                         break;
846
847                 default:
848                         switch (lrec->dtrd_action) {
849                         case DTRACEACT_UMOD:
850                         case DTRACEACT_UADDR:
851                         case DTRACEACT_USYM:
852                                 for (j = 0; j < 2; j++) {
853                                         /* LINTED - alignment */
854                                         lval = ((uint64_t *)ldata)[j];
855                                         /* LINTED - alignment */
856                                         rval = ((uint64_t *)rdata)[j];
857
858                                         if (lval < rval)
859                                                 return (DT_LESSTHAN);
860
861                                         if (lval > rval)
862                                                 return (DT_GREATERTHAN);
863                                 }
864
865                                 break;
866
867                         default:
868                                 for (j = 0; j < lrec->dtrd_size; j++) {
869                                         lval = ((uint8_t *)ldata)[j];
870                                         rval = ((uint8_t *)rdata)[j];
871
872                                         if (lval < rval)
873                                                 return (DT_LESSTHAN);
874
875                                         if (lval > rval)
876                                                 return (DT_GREATERTHAN);
877                                 }
878                         }
879
880                         continue;
881                 }
882
883                 if (lval < rval)
884                         return (DT_LESSTHAN);
885
886                 if (lval > rval)
887                         return (DT_GREATERTHAN);
888         }
889
890         return (0);
891 }
892
893 static int
894 dt_aggregate_valcmp(const void *lhs, const void *rhs)
895 {
896         dt_ahashent_t *lh = *((dt_ahashent_t **)lhs);
897         dt_ahashent_t *rh = *((dt_ahashent_t **)rhs);
898         dtrace_aggdesc_t *lagg = lh->dtahe_data.dtada_desc;
899         dtrace_aggdesc_t *ragg = rh->dtahe_data.dtada_desc;
900         caddr_t ldata = lh->dtahe_data.dtada_data;
901         caddr_t rdata = rh->dtahe_data.dtada_data;
902         dtrace_recdesc_t *lrec, *rrec;
903         int64_t *laddr, *raddr;
904         int rval;
905
906         assert(lagg->dtagd_nrecs == ragg->dtagd_nrecs);
907
908         lrec = &lagg->dtagd_rec[lagg->dtagd_nrecs - 1];
909         rrec = &ragg->dtagd_rec[ragg->dtagd_nrecs - 1];
910
911         assert(lrec->dtrd_action == rrec->dtrd_action);
912
913         laddr = (int64_t *)(uintptr_t)(ldata + lrec->dtrd_offset);
914         raddr = (int64_t *)(uintptr_t)(rdata + rrec->dtrd_offset);
915
916         switch (lrec->dtrd_action) {
917         case DTRACEAGG_AVG:
918                 rval = dt_aggregate_averagecmp(laddr, raddr);
919                 break;
920
921         case DTRACEAGG_STDDEV:
922                 rval = dt_aggregate_stddevcmp(laddr, raddr);
923                 break;
924
925         case DTRACEAGG_QUANTIZE:
926                 rval = dt_aggregate_quantizedcmp(laddr, raddr);
927                 break;
928
929         case DTRACEAGG_LQUANTIZE:
930                 rval = dt_aggregate_lquantizedcmp(laddr, raddr);
931                 break;
932
933         case DTRACEAGG_LLQUANTIZE:
934                 rval = dt_aggregate_llquantizedcmp(laddr, raddr);
935                 break;
936
937         case DTRACEAGG_COUNT:
938         case DTRACEAGG_SUM:
939         case DTRACEAGG_MIN:
940         case DTRACEAGG_MAX:
941                 rval = dt_aggregate_countcmp(laddr, raddr);
942                 break;
943
944         default:
945                 assert(0);
946         }
947
948         return (rval);
949 }
950
951 static int
952 dt_aggregate_valkeycmp(const void *lhs, const void *rhs)
953 {
954         int rval;
955
956         if ((rval = dt_aggregate_valcmp(lhs, rhs)) != 0)
957                 return (rval);
958
959         /*
960          * If we're here, the values for the two aggregation elements are
961          * equal.  We already know that the key layout is the same for the two
962          * elements; we must now compare the keys themselves as a tie-breaker.
963          */
964         return (dt_aggregate_keycmp(lhs, rhs));
965 }
966
967 static int
968 dt_aggregate_keyvarcmp(const void *lhs, const void *rhs)
969 {
970         int rval;
971
972         if ((rval = dt_aggregate_keycmp(lhs, rhs)) != 0)
973                 return (rval);
974
975         return (dt_aggregate_varcmp(lhs, rhs));
976 }
977
978 static int
979 dt_aggregate_varkeycmp(const void *lhs, const void *rhs)
980 {
981         int rval;
982
983         if ((rval = dt_aggregate_varcmp(lhs, rhs)) != 0)
984                 return (rval);
985
986         return (dt_aggregate_keycmp(lhs, rhs));
987 }
988
989 static int
990 dt_aggregate_valvarcmp(const void *lhs, const void *rhs)
991 {
992         int rval;
993
994         if ((rval = dt_aggregate_valkeycmp(lhs, rhs)) != 0)
995                 return (rval);
996
997         return (dt_aggregate_varcmp(lhs, rhs));
998 }
999
1000 static int
1001 dt_aggregate_varvalcmp(const void *lhs, const void *rhs)
1002 {
1003         int rval;
1004
1005         if ((rval = dt_aggregate_varcmp(lhs, rhs)) != 0)
1006                 return (rval);
1007
1008         return (dt_aggregate_valkeycmp(lhs, rhs));
1009 }
1010
1011 static int
1012 dt_aggregate_keyvarrevcmp(const void *lhs, const void *rhs)
1013 {
1014         return (dt_aggregate_keyvarcmp(rhs, lhs));
1015 }
1016
1017 static int
1018 dt_aggregate_varkeyrevcmp(const void *lhs, const void *rhs)
1019 {
1020         return (dt_aggregate_varkeycmp(rhs, lhs));
1021 }
1022
1023 static int
1024 dt_aggregate_valvarrevcmp(const void *lhs, const void *rhs)
1025 {
1026         return (dt_aggregate_valvarcmp(rhs, lhs));
1027 }
1028
1029 static int
1030 dt_aggregate_varvalrevcmp(const void *lhs, const void *rhs)
1031 {
1032         return (dt_aggregate_varvalcmp(rhs, lhs));
1033 }
1034
1035 static int
1036 dt_aggregate_bundlecmp(const void *lhs, const void *rhs)
1037 {
1038         dt_ahashent_t **lh = *((dt_ahashent_t ***)lhs);
1039         dt_ahashent_t **rh = *((dt_ahashent_t ***)rhs);
1040         int i, rval;
1041
1042         if (dt_keysort) {
1043                 /*
1044                  * If we're sorting on keys, we need to scan until we find the
1045                  * last entry -- that's the representative key.  (The order of
1046                  * the bundle is values followed by key to accommodate the
1047                  * default behavior of sorting by value.)  If the keys are
1048                  * equal, we'll fall into the value comparison loop, below.
1049                  */
1050                 for (i = 0; lh[i + 1] != NULL; i++)
1051                         continue;
1052
1053                 assert(i != 0);
1054                 assert(rh[i + 1] == NULL);
1055
1056                 if ((rval = dt_aggregate_keycmp(&lh[i], &rh[i])) != 0)
1057                         return (rval);
1058         }
1059
1060         for (i = 0; ; i++) {
1061                 if (lh[i + 1] == NULL) {
1062                         /*
1063                          * All of the values are equal; if we're sorting on
1064                          * keys, then we're only here because the keys were
1065                          * found to be equal and these records are therefore
1066                          * equal.  If we're not sorting on keys, we'll use the
1067                          * key comparison from the representative key as the
1068                          * tie-breaker.
1069                          */
1070                         if (dt_keysort)
1071                                 return (0);
1072
1073                         assert(i != 0);
1074                         assert(rh[i + 1] == NULL);
1075                         return (dt_aggregate_keycmp(&lh[i], &rh[i]));
1076                 } else {
1077                         if ((rval = dt_aggregate_valcmp(&lh[i], &rh[i])) != 0)
1078                                 return (rval);
1079                 }
1080         }
1081 }
1082
1083 int
1084 dt_aggregate_go(dtrace_hdl_t *dtp)
1085 {
1086         dt_aggregate_t *agp = &dtp->dt_aggregate;
1087         dtrace_optval_t size, cpu;
1088         dtrace_bufdesc_t *buf = &agp->dtat_buf;
1089         int rval, i;
1090
1091         assert(agp->dtat_maxcpu == 0);
1092         assert(agp->dtat_ncpu == 0);
1093         assert(agp->dtat_cpus == NULL);
1094
1095         agp->dtat_maxcpu = dt_sysconf(dtp, _SC_CPUID_MAX) + 1;
1096         agp->dtat_ncpu = dt_sysconf(dtp, _SC_NPROCESSORS_MAX);
1097         agp->dtat_cpus = malloc(agp->dtat_ncpu * sizeof (processorid_t));
1098
1099         if (agp->dtat_cpus == NULL)
1100                 return (dt_set_errno(dtp, EDT_NOMEM));
1101
1102         /*
1103          * Use the aggregation buffer size as reloaded from the kernel.
1104          */
1105         size = dtp->dt_options[DTRACEOPT_AGGSIZE];
1106
1107         rval = dtrace_getopt(dtp, "aggsize", &size);
1108         assert(rval == 0);
1109
1110         if (size == 0 || size == DTRACEOPT_UNSET)
1111                 return (0);
1112
1113         buf = &agp->dtat_buf;
1114         buf->dtbd_size = size;
1115
1116         if ((buf->dtbd_data = malloc(buf->dtbd_size)) == NULL)
1117                 return (dt_set_errno(dtp, EDT_NOMEM));
1118
1119         /*
1120          * Now query for the CPUs enabled.
1121          */
1122         rval = dtrace_getopt(dtp, "cpu", &cpu);
1123         assert(rval == 0 && cpu != DTRACEOPT_UNSET);
1124
1125         if (cpu != DTRACE_CPUALL) {
1126                 assert(cpu < agp->dtat_ncpu);
1127                 agp->dtat_cpus[agp->dtat_ncpus++] = (processorid_t)cpu;
1128
1129                 return (0);
1130         }
1131
1132         agp->dtat_ncpus = 0;
1133         for (i = 0; i < agp->dtat_maxcpu; i++) {
1134                 if (dt_status(dtp, i) == -1)
1135                         continue;
1136
1137                 agp->dtat_cpus[agp->dtat_ncpus++] = i;
1138         }
1139
1140         return (0);
1141 }
1142
1143 static int
1144 dt_aggwalk_rval(dtrace_hdl_t *dtp, dt_ahashent_t *h, int rval)
1145 {
1146         dt_aggregate_t *agp = &dtp->dt_aggregate;
1147         dtrace_aggdata_t *data;
1148         dtrace_aggdesc_t *aggdesc;
1149         dtrace_recdesc_t *rec;
1150         int i;
1151
1152         switch (rval) {
1153         case DTRACE_AGGWALK_NEXT:
1154                 break;
1155
1156         case DTRACE_AGGWALK_CLEAR: {
1157                 uint32_t size, offs = 0;
1158
1159                 aggdesc = h->dtahe_data.dtada_desc;
1160                 rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];
1161                 size = rec->dtrd_size;
1162                 data = &h->dtahe_data;
1163
1164                 if (rec->dtrd_action == DTRACEAGG_LQUANTIZE) {
1165                         offs = sizeof (uint64_t);
1166                         size -= sizeof (uint64_t);
1167                 }
1168
1169                 bzero(&data->dtada_data[rec->dtrd_offset] + offs, size);
1170
1171                 if (data->dtada_percpu == NULL)
1172                         break;
1173
1174                 for (i = 0; i < dtp->dt_aggregate.dtat_maxcpu; i++)
1175                         bzero(data->dtada_percpu[i] + offs, size);
1176                 break;
1177         }
1178
1179         case DTRACE_AGGWALK_ERROR:
1180                 /*
1181                  * We assume that errno is already set in this case.
1182                  */
1183                 return (dt_set_errno(dtp, errno));
1184
1185         case DTRACE_AGGWALK_ABORT:
1186                 return (dt_set_errno(dtp, EDT_DIRABORT));
1187
1188         case DTRACE_AGGWALK_DENORMALIZE:
1189                 h->dtahe_data.dtada_normal = 1;
1190                 return (0);
1191
1192         case DTRACE_AGGWALK_NORMALIZE:
1193                 if (h->dtahe_data.dtada_normal == 0) {
1194                         h->dtahe_data.dtada_normal = 1;
1195                         return (dt_set_errno(dtp, EDT_BADRVAL));
1196                 }
1197
1198                 return (0);
1199
1200         case DTRACE_AGGWALK_REMOVE: {
1201                 dtrace_aggdata_t *aggdata = &h->dtahe_data;
1202                 int max_cpus = agp->dtat_maxcpu;
1203
1204                 /*
1205                  * First, remove this hash entry from its hash chain.
1206                  */
1207                 if (h->dtahe_prev != NULL) {
1208                         h->dtahe_prev->dtahe_next = h->dtahe_next;
1209                 } else {
1210                         dt_ahash_t *hash = &agp->dtat_hash;
1211                         size_t ndx = h->dtahe_hashval % hash->dtah_size;
1212
1213                         assert(hash->dtah_hash[ndx] == h);
1214                         hash->dtah_hash[ndx] = h->dtahe_next;
1215                 }
1216
1217                 if (h->dtahe_next != NULL)
1218                         h->dtahe_next->dtahe_prev = h->dtahe_prev;
1219
1220                 /*
1221                  * Now remove it from the list of all hash entries.
1222                  */
1223                 if (h->dtahe_prevall != NULL) {
1224                         h->dtahe_prevall->dtahe_nextall = h->dtahe_nextall;
1225                 } else {
1226                         dt_ahash_t *hash = &agp->dtat_hash;
1227
1228                         assert(hash->dtah_all == h);
1229                         hash->dtah_all = h->dtahe_nextall;
1230                 }
1231
1232                 if (h->dtahe_nextall != NULL)
1233                         h->dtahe_nextall->dtahe_prevall = h->dtahe_prevall;
1234
1235                 /*
1236                  * We're unlinked.  We can safely destroy the data.
1237                  */
1238                 if (aggdata->dtada_percpu != NULL) {
1239                         for (i = 0; i < max_cpus; i++)
1240                                 free(aggdata->dtada_percpu[i]);
1241                         free(aggdata->dtada_percpu);
1242                 }
1243
1244                 free(aggdata->dtada_data);
1245                 free(h);
1246
1247                 return (0);
1248         }
1249
1250         default:
1251                 return (dt_set_errno(dtp, EDT_BADRVAL));
1252         }
1253
1254         return (0);
1255 }
1256
1257 void
1258 dt_aggregate_qsort(dtrace_hdl_t *dtp, void *base, size_t nel, size_t width,
1259     int (*compar)(const void *, const void *))
1260 {
1261         int rev = dt_revsort, key = dt_keysort, keypos = dt_keypos;
1262         dtrace_optval_t keyposopt = dtp->dt_options[DTRACEOPT_AGGSORTKEYPOS];
1263
1264         dt_revsort = (dtp->dt_options[DTRACEOPT_AGGSORTREV] != DTRACEOPT_UNSET);
1265         dt_keysort = (dtp->dt_options[DTRACEOPT_AGGSORTKEY] != DTRACEOPT_UNSET);
1266
1267         if (keyposopt != DTRACEOPT_UNSET && keyposopt <= INT_MAX) {
1268                 dt_keypos = (int)keyposopt;
1269         } else {
1270                 dt_keypos = 0;
1271         }
1272
1273         if (compar == NULL) {
1274                 if (!dt_keysort) {
1275                         compar = dt_aggregate_varvalcmp;
1276                 } else {
1277                         compar = dt_aggregate_varkeycmp;
1278                 }
1279         }
1280
1281         qsort(base, nel, width, compar);
1282
1283         dt_revsort = rev;
1284         dt_keysort = key;
1285         dt_keypos = keypos;
1286 }
1287
1288 int
1289 dtrace_aggregate_walk(dtrace_hdl_t *dtp, dtrace_aggregate_f *func, void *arg)
1290 {
1291         dt_ahashent_t *h, *next;
1292         dt_ahash_t *hash = &dtp->dt_aggregate.dtat_hash;
1293
1294         for (h = hash->dtah_all; h != NULL; h = next) {
1295                 /*
1296                  * dt_aggwalk_rval() can potentially remove the current hash
1297                  * entry; we need to load the next hash entry before calling
1298                  * into it.
1299                  */
1300                 next = h->dtahe_nextall;
1301
1302                 if (dt_aggwalk_rval(dtp, h, func(&h->dtahe_data, arg)) == -1)
1303                         return (-1);
1304         }
1305
1306         return (0);
1307 }
1308
1309 static int
1310 dt_aggregate_total(dtrace_hdl_t *dtp, boolean_t clear)
1311 {
1312         dt_ahashent_t *h;
1313         dtrace_aggdata_t **total;
1314         dtrace_aggid_t max = DTRACE_AGGVARIDNONE, id;
1315         dt_aggregate_t *agp = &dtp->dt_aggregate;
1316         dt_ahash_t *hash = &agp->dtat_hash;
1317         uint32_t tflags;
1318
1319         tflags = DTRACE_A_TOTAL | DTRACE_A_HASNEGATIVES | DTRACE_A_HASPOSITIVES;
1320
1321         /*
1322          * If we need to deliver per-aggregation totals, we're going to take
1323          * three passes over the aggregate:  one to clear everything out and
1324          * determine our maximum aggregation ID, one to actually total
1325          * everything up, and a final pass to assign the totals to the
1326          * individual elements.
1327          */
1328         for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {
1329                 dtrace_aggdata_t *aggdata = &h->dtahe_data;
1330
1331                 if ((id = dt_aggregate_aggvarid(h)) > max)
1332                         max = id;
1333
1334                 aggdata->dtada_total = 0;
1335                 aggdata->dtada_flags &= ~tflags;
1336         }
1337
1338         if (clear || max == DTRACE_AGGVARIDNONE)
1339                 return (0);
1340
1341         total = dt_zalloc(dtp, (max + 1) * sizeof (dtrace_aggdata_t *));
1342
1343         if (total == NULL)
1344                 return (-1);
1345
1346         for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {
1347                 dtrace_aggdata_t *aggdata = &h->dtahe_data;
1348                 dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1349                 dtrace_recdesc_t *rec;
1350                 caddr_t data;
1351                 int64_t val, *addr;
1352
1353                 rec = &agg->dtagd_rec[agg->dtagd_nrecs - 1];
1354                 data = aggdata->dtada_data;
1355                 addr = (int64_t *)(uintptr_t)(data + rec->dtrd_offset);
1356
1357                 switch (rec->dtrd_action) {
1358                 case DTRACEAGG_STDDEV:
1359                         val = dt_stddev((uint64_t *)addr, 1);
1360                         break;
1361
1362                 case DTRACEAGG_SUM:
1363                 case DTRACEAGG_COUNT:
1364                         val = *addr;
1365                         break;
1366
1367                 case DTRACEAGG_AVG:
1368                         val = addr[0] ? (addr[1] / addr[0]) : 0;
1369                         break;
1370
1371                 default:
1372                         continue;
1373                 }
1374
1375                 if (total[agg->dtagd_varid] == NULL) {
1376                         total[agg->dtagd_varid] = aggdata;
1377                         aggdata->dtada_flags |= DTRACE_A_TOTAL;
1378                 } else {
1379                         aggdata = total[agg->dtagd_varid];
1380                 }
1381
1382                 if (val > 0)
1383                         aggdata->dtada_flags |= DTRACE_A_HASPOSITIVES;
1384
1385                 if (val < 0) {
1386                         aggdata->dtada_flags |= DTRACE_A_HASNEGATIVES;
1387                         val = -val;
1388                 }
1389
1390                 if (dtp->dt_options[DTRACEOPT_AGGZOOM] != DTRACEOPT_UNSET) {
1391                         val = (int64_t)((long double)val *
1392                             (1 / DTRACE_AGGZOOM_MAX));
1393
1394                         if (val > aggdata->dtada_total)
1395                                 aggdata->dtada_total = val;
1396                 } else {
1397                         aggdata->dtada_total += val;
1398                 }
1399         }
1400
1401         /*
1402          * And now one final pass to set everyone's total.
1403          */
1404         for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {
1405                 dtrace_aggdata_t *aggdata = &h->dtahe_data, *t;
1406                 dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1407
1408                 if ((t = total[agg->dtagd_varid]) == NULL || aggdata == t)
1409                         continue;
1410
1411                 aggdata->dtada_total = t->dtada_total;
1412                 aggdata->dtada_flags |= (t->dtada_flags & tflags);
1413         }
1414
1415         dt_free(dtp, total);
1416
1417         return (0);
1418 }
1419
1420 static int
1421 dt_aggregate_minmaxbin(dtrace_hdl_t *dtp, boolean_t clear)
1422 {
1423         dt_ahashent_t *h;
1424         dtrace_aggdata_t **minmax;
1425         dtrace_aggid_t max = DTRACE_AGGVARIDNONE, id;
1426         dt_aggregate_t *agp = &dtp->dt_aggregate;
1427         dt_ahash_t *hash = &agp->dtat_hash;
1428
1429         for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {
1430                 dtrace_aggdata_t *aggdata = &h->dtahe_data;
1431
1432                 if ((id = dt_aggregate_aggvarid(h)) > max)
1433                         max = id;
1434
1435                 aggdata->dtada_minbin = 0;
1436                 aggdata->dtada_maxbin = 0;
1437                 aggdata->dtada_flags &= ~DTRACE_A_MINMAXBIN;
1438         }
1439
1440         if (clear || max == DTRACE_AGGVARIDNONE)
1441                 return (0);
1442
1443         minmax = dt_zalloc(dtp, (max + 1) * sizeof (dtrace_aggdata_t *));
1444
1445         if (minmax == NULL)
1446                 return (-1);
1447
1448         for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {
1449                 dtrace_aggdata_t *aggdata = &h->dtahe_data;
1450                 dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1451                 dtrace_recdesc_t *rec;
1452                 caddr_t data;
1453                 int64_t *addr;
1454                 int minbin = -1, maxbin = -1, i;
1455                 int start = 0, size;
1456
1457                 rec = &agg->dtagd_rec[agg->dtagd_nrecs - 1];
1458                 size = rec->dtrd_size / sizeof (int64_t);
1459                 data = aggdata->dtada_data;
1460                 addr = (int64_t *)(uintptr_t)(data + rec->dtrd_offset);
1461
1462                 switch (rec->dtrd_action) {
1463                 case DTRACEAGG_LQUANTIZE:
1464                         /*
1465                          * For lquantize(), we always display the entire range
1466                          * of the aggregation when aggpack is set.
1467                          */
1468                         start = 1;
1469                         minbin = start;
1470                         maxbin = size - 1 - start;
1471                         break;
1472
1473                 case DTRACEAGG_QUANTIZE:
1474                         for (i = start; i < size; i++) {
1475                                 if (!addr[i])
1476                                         continue;
1477
1478                                 if (minbin == -1)
1479                                         minbin = i - start;
1480
1481                                 maxbin = i - start;
1482                         }
1483
1484                         if (minbin == -1) {
1485                                 /*
1486                                  * If we have no data (e.g., due to a clear()
1487                                  * or negative increments), we'll use the
1488                                  * zero bucket as both our min and max.
1489                                  */
1490                                 minbin = maxbin = DTRACE_QUANTIZE_ZEROBUCKET;
1491                         }
1492
1493                         break;
1494
1495                 default:
1496                         continue;
1497                 }
1498
1499                 if (minmax[agg->dtagd_varid] == NULL) {
1500                         minmax[agg->dtagd_varid] = aggdata;
1501                         aggdata->dtada_flags |= DTRACE_A_MINMAXBIN;
1502                         aggdata->dtada_minbin = minbin;
1503                         aggdata->dtada_maxbin = maxbin;
1504                         continue;
1505                 }
1506
1507                 if (minbin < minmax[agg->dtagd_varid]->dtada_minbin)
1508                         minmax[agg->dtagd_varid]->dtada_minbin = minbin;
1509
1510                 if (maxbin > minmax[agg->dtagd_varid]->dtada_maxbin)
1511                         minmax[agg->dtagd_varid]->dtada_maxbin = maxbin;
1512         }
1513
1514         /*
1515          * And now one final pass to set everyone's minbin and maxbin.
1516          */
1517         for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {
1518                 dtrace_aggdata_t *aggdata = &h->dtahe_data, *mm;
1519                 dtrace_aggdesc_t *agg = aggdata->dtada_desc;
1520
1521                 if ((mm = minmax[agg->dtagd_varid]) == NULL || aggdata == mm)
1522                         continue;
1523
1524                 aggdata->dtada_minbin = mm->dtada_minbin;
1525                 aggdata->dtada_maxbin = mm->dtada_maxbin;
1526                 aggdata->dtada_flags |= DTRACE_A_MINMAXBIN;
1527         }
1528
1529         dt_free(dtp, minmax);
1530
1531         return (0);
1532 }
1533
1534 static int
1535 dt_aggregate_walk_sorted(dtrace_hdl_t *dtp,
1536     dtrace_aggregate_f *func, void *arg,
1537     int (*sfunc)(const void *, const void *))
1538 {
1539         dt_aggregate_t *agp = &dtp->dt_aggregate;
1540         dt_ahashent_t *h, **sorted;
1541         dt_ahash_t *hash = &agp->dtat_hash;
1542         size_t i, nentries = 0;
1543         int rval = -1;
1544
1545         agp->dtat_flags &= ~(DTRACE_A_TOTAL | DTRACE_A_MINMAXBIN);
1546
1547         if (dtp->dt_options[DTRACEOPT_AGGHIST] != DTRACEOPT_UNSET) {
1548                 agp->dtat_flags |= DTRACE_A_TOTAL;
1549
1550                 if (dt_aggregate_total(dtp, B_FALSE) != 0)
1551                         return (-1);
1552         }
1553
1554         if (dtp->dt_options[DTRACEOPT_AGGPACK] != DTRACEOPT_UNSET) {
1555                 agp->dtat_flags |= DTRACE_A_MINMAXBIN;
1556
1557                 if (dt_aggregate_minmaxbin(dtp, B_FALSE) != 0)
1558                         return (-1);
1559         }
1560
1561         for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall)
1562                 nentries++;
1563
1564         sorted = dt_alloc(dtp, nentries * sizeof (dt_ahashent_t *));
1565
1566         if (sorted == NULL)
1567                 goto out;
1568
1569         for (h = hash->dtah_all, i = 0; h != NULL; h = h->dtahe_nextall)
1570                 sorted[i++] = h;
1571
1572         (void) pthread_mutex_lock(&dt_qsort_lock);
1573
1574         if (sfunc == NULL) {
1575                 dt_aggregate_qsort(dtp, sorted, nentries,
1576                     sizeof (dt_ahashent_t *), NULL);
1577         } else {
1578                 /*
1579                  * If we've been explicitly passed a sorting function,
1580                  * we'll use that -- ignoring the values of the "aggsortrev",
1581                  * "aggsortkey" and "aggsortkeypos" options.
1582                  */
1583                 qsort(sorted, nentries, sizeof (dt_ahashent_t *), sfunc);
1584         }
1585
1586         (void) pthread_mutex_unlock(&dt_qsort_lock);
1587
1588         for (i = 0; i < nentries; i++) {
1589                 h = sorted[i];
1590
1591                 if (dt_aggwalk_rval(dtp, h, func(&h->dtahe_data, arg)) == -1)
1592                         goto out;
1593         }
1594
1595         rval = 0;
1596 out:
1597         if (agp->dtat_flags & DTRACE_A_TOTAL)
1598                 (void) dt_aggregate_total(dtp, B_TRUE);
1599
1600         if (agp->dtat_flags & DTRACE_A_MINMAXBIN)
1601                 (void) dt_aggregate_minmaxbin(dtp, B_TRUE);
1602
1603         dt_free(dtp, sorted);
1604         return (rval);
1605 }
1606
1607 int
1608 dtrace_aggregate_walk_sorted(dtrace_hdl_t *dtp,
1609     dtrace_aggregate_f *func, void *arg)
1610 {
1611         return (dt_aggregate_walk_sorted(dtp, func, arg, NULL));
1612 }
1613
1614 int
1615 dtrace_aggregate_walk_keysorted(dtrace_hdl_t *dtp,
1616     dtrace_aggregate_f *func, void *arg)
1617 {
1618         return (dt_aggregate_walk_sorted(dtp, func,
1619             arg, dt_aggregate_varkeycmp));
1620 }
1621
1622 int
1623 dtrace_aggregate_walk_valsorted(dtrace_hdl_t *dtp,
1624     dtrace_aggregate_f *func, void *arg)
1625 {
1626         return (dt_aggregate_walk_sorted(dtp, func,
1627             arg, dt_aggregate_varvalcmp));
1628 }
1629
1630 int
1631 dtrace_aggregate_walk_keyvarsorted(dtrace_hdl_t *dtp,
1632     dtrace_aggregate_f *func, void *arg)
1633 {
1634         return (dt_aggregate_walk_sorted(dtp, func,
1635             arg, dt_aggregate_keyvarcmp));
1636 }
1637
1638 int
1639 dtrace_aggregate_walk_valvarsorted(dtrace_hdl_t *dtp,
1640     dtrace_aggregate_f *func, void *arg)
1641 {
1642         return (dt_aggregate_walk_sorted(dtp, func,
1643             arg, dt_aggregate_valvarcmp));
1644 }
1645
1646 int
1647 dtrace_aggregate_walk_keyrevsorted(dtrace_hdl_t *dtp,
1648     dtrace_aggregate_f *func, void *arg)
1649 {
1650         return (dt_aggregate_walk_sorted(dtp, func,
1651             arg, dt_aggregate_varkeyrevcmp));
1652 }
1653
1654 int
1655 dtrace_aggregate_walk_valrevsorted(dtrace_hdl_t *dtp,
1656     dtrace_aggregate_f *func, void *arg)
1657 {
1658         return (dt_aggregate_walk_sorted(dtp, func,
1659             arg, dt_aggregate_varvalrevcmp));
1660 }
1661
1662 int
1663 dtrace_aggregate_walk_keyvarrevsorted(dtrace_hdl_t *dtp,
1664     dtrace_aggregate_f *func, void *arg)
1665 {
1666         return (dt_aggregate_walk_sorted(dtp, func,
1667             arg, dt_aggregate_keyvarrevcmp));
1668 }
1669
1670 int
1671 dtrace_aggregate_walk_valvarrevsorted(dtrace_hdl_t *dtp,
1672     dtrace_aggregate_f *func, void *arg)
1673 {
1674         return (dt_aggregate_walk_sorted(dtp, func,
1675             arg, dt_aggregate_valvarrevcmp));
1676 }
1677
1678 int
1679 dtrace_aggregate_walk_joined(dtrace_hdl_t *dtp, dtrace_aggvarid_t *aggvars,
1680     int naggvars, dtrace_aggregate_walk_joined_f *func, void *arg)
1681 {
1682         dt_aggregate_t *agp = &dtp->dt_aggregate;
1683         dt_ahashent_t *h, **sorted = NULL, ***bundle, **nbundle;
1684         const dtrace_aggdata_t **data;
1685         dt_ahashent_t *zaggdata = NULL;
1686         dt_ahash_t *hash = &agp->dtat_hash;
1687         size_t nentries = 0, nbundles = 0, start, zsize = 0, bundlesize;
1688         dtrace_aggvarid_t max = 0, aggvar;
1689         int rval = -1, *map, *remap = NULL;
1690         int i, j;
1691         dtrace_optval_t sortpos = dtp->dt_options[DTRACEOPT_AGGSORTPOS];
1692
1693         /*
1694          * If the sorting position is greater than the number of aggregation
1695          * variable IDs, we silently set it to 0.
1696          */
1697         if (sortpos == DTRACEOPT_UNSET || sortpos >= naggvars)
1698                 sortpos = 0;
1699
1700         /*
1701          * First we need to translate the specified aggregation variable IDs
1702          * into a linear map that will allow us to translate an aggregation
1703          * variable ID into its position in the specified aggvars.
1704          */
1705         for (i = 0; i < naggvars; i++) {
1706                 if (aggvars[i] == DTRACE_AGGVARIDNONE || aggvars[i] < 0)
1707                         return (dt_set_errno(dtp, EDT_BADAGGVAR));
1708
1709                 if (aggvars[i] > max)
1710                         max = aggvars[i];
1711         }
1712
1713         if ((map = dt_zalloc(dtp, (max + 1) * sizeof (int))) == NULL)
1714                 return (-1);
1715
1716         zaggdata = dt_zalloc(dtp, naggvars * sizeof (dt_ahashent_t));
1717
1718         if (zaggdata == NULL)
1719                 goto out;
1720
1721         for (i = 0; i < naggvars; i++) {
1722                 int ndx = i + sortpos;
1723
1724                 if (ndx >= naggvars)
1725                         ndx -= naggvars;
1726
1727                 aggvar = aggvars[ndx];
1728                 assert(aggvar <= max);
1729
1730                 if (map[aggvar]) {
1731                         /*
1732                          * We have an aggregation variable that is present
1733                          * more than once in the array of aggregation
1734                          * variables.  While it's unclear why one might want
1735                          * to do this, it's legal.  To support this construct,
1736                          * we will allocate a remap that will indicate the
1737                          * position from which this aggregation variable
1738                          * should be pulled.  (That is, where the remap will
1739                          * map from one position to another.)
1740                          */
1741                         if (remap == NULL) {
1742                                 remap = dt_zalloc(dtp, naggvars * sizeof (int));
1743
1744                                 if (remap == NULL)
1745                                         goto out;
1746                         }
1747
1748                         /*
1749                          * Given that the variable is already present, assert
1750                          * that following through the mapping and adjusting
1751                          * for the sort position yields the same aggregation
1752                          * variable ID.
1753                          */
1754                         assert(aggvars[(map[aggvar] - 1 + sortpos) %
1755                             naggvars] == aggvars[ndx]);
1756
1757                         remap[i] = map[aggvar];
1758                         continue;
1759                 }
1760
1761                 map[aggvar] = i + 1;
1762         }
1763
1764         /*
1765          * We need to take two passes over the data to size our allocation, so
1766          * we'll use the first pass to also fill in the zero-filled data to be
1767          * used to properly format a zero-valued aggregation.
1768          */
1769         for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {
1770                 dtrace_aggvarid_t id;
1771                 int ndx;
1772
1773                 if ((id = dt_aggregate_aggvarid(h)) > max || !(ndx = map[id]))
1774                         continue;
1775
1776                 if (zaggdata[ndx - 1].dtahe_size == 0) {
1777                         zaggdata[ndx - 1].dtahe_size = h->dtahe_size;
1778                         zaggdata[ndx - 1].dtahe_data = h->dtahe_data;
1779                 }
1780
1781                 nentries++;
1782         }
1783
1784         if (nentries == 0) {
1785                 /*
1786                  * We couldn't find any entries; there is nothing else to do.
1787                  */
1788                 rval = 0;
1789                 goto out;
1790         }
1791
1792         /*
1793          * Before we sort the data, we're going to look for any holes in our
1794          * zero-filled data.  This will occur if an aggregation variable that
1795          * we are being asked to print has not yet been assigned the result of
1796          * any aggregating action for _any_ tuple.  The issue becomes that we
1797          * would like a zero value to be printed for all columns for this
1798          * aggregation, but without any record description, we don't know the
1799          * aggregating action that corresponds to the aggregation variable.  To
1800          * try to find a match, we're simply going to lookup aggregation IDs
1801          * (which are guaranteed to be contiguous and to start from 1), looking
1802          * for the specified aggregation variable ID.  If we find a match,
1803          * we'll use that.  If we iterate over all aggregation IDs and don't
1804          * find a match, then we must be an anonymous enabling.  (Anonymous
1805          * enablings can't currently derive either aggregation variable IDs or
1806          * aggregation variable names given only an aggregation ID.)  In this
1807          * obscure case (anonymous enabling, multiple aggregation printa() with
1808          * some aggregations not represented for any tuple), our defined
1809          * behavior is that the zero will be printed in the format of the first
1810          * aggregation variable that contains any non-zero value.
1811          */
1812         for (i = 0; i < naggvars; i++) {
1813                 if (zaggdata[i].dtahe_size == 0) {
1814                         dtrace_aggvarid_t aggvar;
1815
1816                         aggvar = aggvars[(i - sortpos + naggvars) % naggvars];
1817                         assert(zaggdata[i].dtahe_data.dtada_data == NULL);
1818
1819                         for (j = DTRACE_AGGIDNONE + 1; ; j++) {
1820                                 dtrace_aggdesc_t *agg;
1821                                 dtrace_aggdata_t *aggdata;
1822
1823                                 if (dt_aggid_lookup(dtp, j, &agg) != 0)
1824                                         break;
1825
1826                                 if (agg->dtagd_varid != aggvar)
1827                                         continue;
1828
1829                                 /*
1830                                  * We have our description -- now we need to
1831                                  * cons up the zaggdata entry for it.
1832                                  */
1833                                 aggdata = &zaggdata[i].dtahe_data;
1834                                 aggdata->dtada_size = agg->dtagd_size;
1835                                 aggdata->dtada_desc = agg;
1836                                 aggdata->dtada_handle = dtp;
1837                                 (void) dt_epid_lookup(dtp, agg->dtagd_epid,
1838                                     &aggdata->dtada_edesc,
1839                                     &aggdata->dtada_pdesc);
1840                                 aggdata->dtada_normal = 1;
1841                                 zaggdata[i].dtahe_hashval = 0;
1842                                 zaggdata[i].dtahe_size = agg->dtagd_size;
1843                                 break;
1844                         }
1845
1846                         if (zaggdata[i].dtahe_size == 0) {
1847                                 caddr_t data;
1848
1849                                 /*
1850                                  * We couldn't find this aggregation, meaning
1851                                  * that we have never seen it before for any
1852                                  * tuple _and_ this is an anonymous enabling.
1853                                  * That is, we're in the obscure case outlined
1854                                  * above.  In this case, our defined behavior
1855                                  * is to format the data in the format of the
1856                                  * first non-zero aggregation -- of which, of
1857                                  * course, we know there to be at least one
1858                                  * (or nentries would have been zero).
1859                                  */
1860                                 for (j = 0; j < naggvars; j++) {
1861                                         if (zaggdata[j].dtahe_size != 0)
1862                                                 break;
1863                                 }
1864
1865                                 assert(j < naggvars);
1866                                 zaggdata[i] = zaggdata[j];
1867
1868                                 data = zaggdata[i].dtahe_data.dtada_data;
1869                                 assert(data != NULL);
1870                         }
1871                 }
1872         }
1873
1874         /*
1875          * Now we need to allocate our zero-filled data for use for
1876          * aggregations that don't have a value corresponding to a given key.
1877          */
1878         for (i = 0; i < naggvars; i++) {
1879                 dtrace_aggdata_t *aggdata = &zaggdata[i].dtahe_data;
1880                 dtrace_aggdesc_t *aggdesc = aggdata->dtada_desc;
1881                 dtrace_recdesc_t *rec;
1882                 uint64_t larg;
1883                 caddr_t zdata;
1884
1885                 zsize = zaggdata[i].dtahe_size;
1886                 assert(zsize != 0);
1887
1888                 if ((zdata = dt_zalloc(dtp, zsize)) == NULL) {
1889                         /*
1890                          * If we failed to allocated some zero-filled data, we
1891                          * need to zero out the remaining dtada_data pointers
1892                          * to prevent the wrong data from being freed below.
1893                          */
1894                         for (j = i; j < naggvars; j++)
1895                                 zaggdata[j].dtahe_data.dtada_data = NULL;
1896                         goto out;
1897                 }
1898
1899                 aggvar = aggvars[(i - sortpos + naggvars) % naggvars];
1900
1901                 /*
1902                  * First, the easy bit.  To maintain compatibility with
1903                  * consumers that pull the compiler-generated ID out of the
1904                  * data, we put that ID at the top of the zero-filled data.
1905                  */
1906                 rec = &aggdesc->dtagd_rec[0];
1907                 /* LINTED - alignment */
1908                 *((dtrace_aggvarid_t *)(zdata + rec->dtrd_offset)) = aggvar;
1909
1910                 rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];
1911
1912                 /*
1913                  * Now for the more complicated part.  If (and only if) this
1914                  * is an lquantize() aggregating action, zero-filled data is
1915                  * not equivalent to an empty record:  we must also get the
1916                  * parameters for the lquantize().
1917                  */
1918                 if (rec->dtrd_action == DTRACEAGG_LQUANTIZE) {
1919                         if (aggdata->dtada_data != NULL) {
1920                                 /*
1921                                  * The easier case here is if we actually have
1922                                  * some prototype data -- in which case we
1923                                  * manually dig it out of the aggregation
1924                                  * record.
1925                                  */
1926                                 /* LINTED - alignment */
1927                                 larg = *((uint64_t *)(aggdata->dtada_data +
1928                                     rec->dtrd_offset));
1929                         } else {
1930                                 /*
1931                                  * We don't have any prototype data.  As a
1932                                  * result, we know that we _do_ have the
1933                                  * compiler-generated information.  (If this
1934                                  * were an anonymous enabling, all of our
1935                                  * zero-filled data would have prototype data
1936                                  * -- either directly or indirectly.) So as
1937                                  * gross as it is, we'll grovel around in the
1938                                  * compiler-generated information to find the
1939                                  * lquantize() parameters.
1940                                  */
1941                                 dtrace_stmtdesc_t *sdp;
1942                                 dt_ident_t *aid;
1943                                 dt_idsig_t *isp;
1944
1945                                 sdp = (dtrace_stmtdesc_t *)(uintptr_t)
1946                                     aggdesc->dtagd_rec[0].dtrd_uarg;
1947                                 aid = sdp->dtsd_aggdata;
1948                                 isp = (dt_idsig_t *)aid->di_data;
1949                                 assert(isp->dis_auxinfo != 0);
1950                                 larg = isp->dis_auxinfo;
1951                         }
1952
1953                         /* LINTED - alignment */
1954                         *((uint64_t *)(zdata + rec->dtrd_offset)) = larg;
1955                 }
1956
1957                 aggdata->dtada_data = zdata;
1958         }
1959
1960         /*
1961          * Now that we've dealt with setting up our zero-filled data, we can
1962          * allocate our sorted array, and take another pass over the data to
1963          * fill it.
1964          */
1965         sorted = dt_alloc(dtp, nentries * sizeof (dt_ahashent_t *));
1966
1967         if (sorted == NULL)
1968                 goto out;
1969
1970         for (h = hash->dtah_all, i = 0; h != NULL; h = h->dtahe_nextall) {
1971                 dtrace_aggvarid_t id;
1972
1973                 if ((id = dt_aggregate_aggvarid(h)) > max || !map[id])
1974                         continue;
1975
1976                 sorted[i++] = h;
1977         }
1978
1979         assert(i == nentries);
1980
1981         /*
1982          * We've loaded our array; now we need to sort by value to allow us
1983          * to create bundles of like value.  We're going to acquire the
1984          * dt_qsort_lock here, and hold it across all of our subsequent
1985          * comparison and sorting.
1986          */
1987         (void) pthread_mutex_lock(&dt_qsort_lock);
1988
1989         qsort(sorted, nentries, sizeof (dt_ahashent_t *),
1990             dt_aggregate_keyvarcmp);
1991
1992         /*
1993          * Now we need to go through and create bundles.  Because the number
1994          * of bundles is bounded by the size of the sorted array, we're going
1995          * to reuse the underlying storage.  And note that "bundle" is an
1996          * array of pointers to arrays of pointers to dt_ahashent_t -- making
1997          * its type (regrettably) "dt_ahashent_t ***".  (Regrettable because
1998          * '*' -- like '_' and 'X' -- should never appear in triplicate in
1999          * an ideal world.)
2000          */
2001         bundle = (dt_ahashent_t ***)sorted;
2002
2003         for (i = 1, start = 0; i <= nentries; i++) {
2004                 if (i < nentries &&
2005                     dt_aggregate_keycmp(&sorted[i], &sorted[i - 1]) == 0)
2006                         continue;
2007
2008                 /*
2009                  * We have a bundle boundary.  Everything from start to
2010                  * (i - 1) belongs in one bundle.
2011                  */
2012                 assert(i - start <= naggvars);
2013                 bundlesize = (naggvars + 2) * sizeof (dt_ahashent_t *);
2014
2015                 if ((nbundle = dt_zalloc(dtp, bundlesize)) == NULL) {
2016                         (void) pthread_mutex_unlock(&dt_qsort_lock);
2017                         goto out;
2018                 }
2019
2020                 for (j = start; j < i; j++) {
2021                         dtrace_aggvarid_t id = dt_aggregate_aggvarid(sorted[j]);
2022
2023                         assert(id <= max);
2024                         assert(map[id] != 0);
2025                         assert(map[id] - 1 < naggvars);
2026                         assert(nbundle[map[id] - 1] == NULL);
2027                         nbundle[map[id] - 1] = sorted[j];
2028
2029                         if (nbundle[naggvars] == NULL)
2030                                 nbundle[naggvars] = sorted[j];
2031                 }
2032
2033                 for (j = 0; j < naggvars; j++) {
2034                         if (nbundle[j] != NULL)
2035                                 continue;
2036
2037                         /*
2038                          * Before we assume that this aggregation variable
2039                          * isn't present (and fall back to using the
2040                          * zero-filled data allocated earlier), check the
2041                          * remap.  If we have a remapping, we'll drop it in
2042                          * here.  Note that we might be remapping an
2043                          * aggregation variable that isn't present for this
2044                          * key; in this case, the aggregation data that we
2045                          * copy will point to the zeroed data.
2046                          */
2047                         if (remap != NULL && remap[j]) {
2048                                 assert(remap[j] - 1 < j);
2049                                 assert(nbundle[remap[j] - 1] != NULL);
2050                                 nbundle[j] = nbundle[remap[j] - 1];
2051                         } else {
2052                                 nbundle[j] = &zaggdata[j];
2053                         }
2054                 }
2055
2056                 bundle[nbundles++] = nbundle;
2057                 start = i;
2058         }
2059
2060         /*
2061          * Now we need to re-sort based on the first value.
2062          */
2063         dt_aggregate_qsort(dtp, bundle, nbundles, sizeof (dt_ahashent_t **),
2064             dt_aggregate_bundlecmp);
2065
2066         (void) pthread_mutex_unlock(&dt_qsort_lock);
2067
2068         /*
2069          * We're done!  Now we just need to go back over the sorted bundles,
2070          * calling the function.
2071          */
2072         data = alloca((naggvars + 1) * sizeof (dtrace_aggdata_t *));
2073
2074         for (i = 0; i < nbundles; i++) {
2075                 for (j = 0; j < naggvars; j++)
2076                         data[j + 1] = NULL;
2077
2078                 for (j = 0; j < naggvars; j++) {
2079                         int ndx = j - sortpos;
2080
2081                         if (ndx < 0)
2082                                 ndx += naggvars;
2083
2084                         assert(bundle[i][ndx] != NULL);
2085                         data[j + 1] = &bundle[i][ndx]->dtahe_data;
2086                 }
2087
2088                 for (j = 0; j < naggvars; j++)
2089                         assert(data[j + 1] != NULL);
2090
2091                 /*
2092                  * The representative key is the last element in the bundle.
2093                  * Assert that we have one, and then set it to be the first
2094                  * element of data.
2095                  */
2096                 assert(bundle[i][j] != NULL);
2097                 data[0] = &bundle[i][j]->dtahe_data;
2098
2099                 if ((rval = func(data, naggvars + 1, arg)) == -1)
2100                         goto out;
2101         }
2102
2103         rval = 0;
2104 out:
2105         for (i = 0; i < nbundles; i++)
2106                 dt_free(dtp, bundle[i]);
2107
2108         if (zaggdata != NULL) {
2109                 for (i = 0; i < naggvars; i++)
2110                         dt_free(dtp, zaggdata[i].dtahe_data.dtada_data);
2111         }
2112
2113         dt_free(dtp, zaggdata);
2114         dt_free(dtp, sorted);
2115         dt_free(dtp, remap);
2116         dt_free(dtp, map);
2117
2118         return (rval);
2119 }
2120
2121 int
2122 dtrace_aggregate_print(dtrace_hdl_t *dtp, FILE *fp,
2123     dtrace_aggregate_walk_f *func)
2124 {
2125         dt_print_aggdata_t pd;
2126
2127         bzero(&pd, sizeof (pd));
2128
2129         pd.dtpa_dtp = dtp;
2130         pd.dtpa_fp = fp;
2131         pd.dtpa_allunprint = 1;
2132
2133         if (func == NULL)
2134                 func = dtrace_aggregate_walk_sorted;
2135
2136         if (dtp->dt_oformat) {
2137                 if ((*func)(dtp, dt_format_agg, &pd) == -1)
2138                         return (dt_set_errno(dtp, dtp->dt_errno));
2139         } else {
2140                 if ((*func)(dtp, dt_print_agg, &pd) == -1)
2141                         return (dt_set_errno(dtp, dtp->dt_errno));
2142         }
2143
2144         return (0);
2145 }
2146
2147 void
2148 dtrace_aggregate_clear(dtrace_hdl_t *dtp)
2149 {
2150         dt_aggregate_t *agp = &dtp->dt_aggregate;
2151         dt_ahash_t *hash = &agp->dtat_hash;
2152         dt_ahashent_t *h;
2153         dtrace_aggdata_t *data;
2154         dtrace_aggdesc_t *aggdesc;
2155         dtrace_recdesc_t *rec;
2156         int i, max_cpus = agp->dtat_maxcpu;
2157
2158         for (h = hash->dtah_all; h != NULL; h = h->dtahe_nextall) {
2159                 aggdesc = h->dtahe_data.dtada_desc;
2160                 rec = &aggdesc->dtagd_rec[aggdesc->dtagd_nrecs - 1];
2161                 data = &h->dtahe_data;
2162
2163                 bzero(&data->dtada_data[rec->dtrd_offset], rec->dtrd_size);
2164
2165                 if (data->dtada_percpu == NULL)
2166                         continue;
2167
2168                 for (i = 0; i < max_cpus; i++)
2169                         bzero(data->dtada_percpu[i], rec->dtrd_size);
2170         }
2171 }
2172
2173 void
2174 dt_aggregate_destroy(dtrace_hdl_t *dtp)
2175 {
2176         dt_aggregate_t *agp = &dtp->dt_aggregate;
2177         dt_ahash_t *hash = &agp->dtat_hash;
2178         dt_ahashent_t *h, *next;
2179         dtrace_aggdata_t *aggdata;
2180         int i, max_cpus = agp->dtat_maxcpu;
2181
2182         if (hash->dtah_hash == NULL) {
2183                 assert(hash->dtah_all == NULL);
2184         } else {
2185                 free(hash->dtah_hash);
2186
2187                 for (h = hash->dtah_all; h != NULL; h = next) {
2188                         next = h->dtahe_nextall;
2189
2190                         aggdata = &h->dtahe_data;
2191
2192                         if (aggdata->dtada_percpu != NULL) {
2193                                 for (i = 0; i < max_cpus; i++)
2194                                         free(aggdata->dtada_percpu[i]);
2195                                 free(aggdata->dtada_percpu);
2196                         }
2197
2198                         free(aggdata->dtada_data);
2199                         free(h);
2200                 }
2201
2202                 hash->dtah_hash = NULL;
2203                 hash->dtah_all = NULL;
2204                 hash->dtah_size = 0;
2205         }
2206
2207         free(agp->dtat_buf.dtbd_data);
2208         free(agp->dtat_cpus);
2209 }