]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/subr_stats.c
sys/*/conf: do not use "../../conf/" when including std.*
[FreeBSD/FreeBSD.git] / sys / kern / subr_stats.c
1 /*-
2  * Copyright (c) 2014-2018 Netflix, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 /*
28  * Author: Lawrence Stewart <lstewart@netflix.com>
29  */
30
31 #include <sys/param.h>
32 #include <sys/arb.h>
33 #include <sys/ctype.h>
34 #include <sys/errno.h>
35 #include <sys/hash.h>
36 #include <sys/limits.h>
37 #include <sys/malloc.h>
38 #include <sys/qmath.h>
39 #include <sys/sbuf.h>
40 #if defined(DIAGNOSTIC)
41 #include <sys/tree.h>
42 #endif
43 #include <sys/stats.h> /* Must come after qmath.h and arb.h */
44 #include <sys/stddef.h>
45 #include <sys/stdint.h>
46 #include <sys/time.h>
47
48 #ifdef _KERNEL
49 #include <sys/kernel.h>
50 #include <sys/lock.h>
51 #include <sys/rwlock.h>
52 #include <sys/sysctl.h>
53 #include <sys/systm.h>
54 #else /* ! _KERNEL */
55 #include <pthread.h>
56 #include <stdbool.h>
57 #include <stdio.h>
58 #include <stdlib.h>
59 #include <string.h>
60 #endif /* _KERNEL */
61
62 struct voistatdata_voistate {
63         /* Previous VOI value for diff calculation. */
64         struct voistatdata_numeric prev;
65 };
66
67 #define VS_VSDVALID     0x0001  /* Stat's voistatdata updated at least once. */
68 struct voistat {
69         int8_t          stype;          /* Type of stat e.g. VS_STYPE_SUM. */
70         enum vsd_dtype  dtype : 8;      /* Data type of this stat's data. */
71         uint16_t        data_off;       /* Blob offset for this stat's data. */
72         uint16_t        dsz;            /* Size of stat's data. */
73 #define VS_EBITS 8
74         uint16_t        errs : VS_EBITS;/* Non-wrapping error count. */
75         uint16_t        flags : 16 - VS_EBITS;
76 };
77 /* The voistat error count is capped to avoid wrapping. */
78 #define VS_INCERRS(vs) do {                                             \
79         if ((vs)->errs < (1U << VS_EBITS) - 1)                          \
80                 (vs)->errs++;                                           \
81 } while (0)
82
83 /*
84  * Ideas for flags:
85  *   - Global or entity specific (global would imply use of counter(9)?)
86  *   - Whether to reset stats on read or not
87  *   - Signal an overflow?
88  *   - Compressed voistat array
89  */
90 #define VOI_REQSTATE    0x0001  /* VOI requires VS_STYPE_VOISTATE. */
91 struct voi {
92         int16_t         id;             /* VOI id. */
93         enum vsd_dtype  dtype : 8;      /* Data type of the VOI itself. */
94         int8_t          voistatmaxid;   /* Largest allocated voistat index. */
95         uint16_t        stats_off;      /* Blob offset for this VOIs stats. */
96         uint16_t        flags;
97 };
98
99 /*
100  * Memory for the entire blob is allocated as a slab and then offsets are
101  * maintained to carve up the slab into sections holding different data types.
102  *
103  * Ideas for flags:
104  * - Compressed voi array (trade off memory usage vs search time)
105  * - Units of offsets (default bytes, flag for e.g. vm_page/KiB/Mib)
106  */
107 struct statsblobv1 {
108         uint8_t         abi;
109         uint8_t         endian;
110         uint16_t        flags;
111         uint16_t        maxsz;
112         uint16_t        cursz;
113         /* Fields from here down are opaque to consumers. */
114         uint32_t        tplhash;        /* Base template hash ID. */
115         uint16_t        stats_off;      /* voistat array blob offset. */
116         uint16_t        statsdata_off;  /* voistatdata array blob offset. */
117         sbintime_t      created;        /* Blob creation time. */
118         sbintime_t      lastrst;        /* Time of last reset. */
119         struct voi      vois[];         /* Array indexed by [voi_id]. */
120 } __aligned(sizeof(void *));
121 _Static_assert(offsetof(struct statsblobv1, cursz) +
122     SIZEOF_MEMBER(struct statsblobv1, cursz) ==
123     offsetof(struct statsblob, opaque),
124     "statsblobv1 ABI mismatch");
125
126 struct statsblobv1_tpl {
127         struct metablob         *mb;
128         struct statsblobv1      *sb;
129 };
130
131 /* Context passed to iterator callbacks. */
132 struct sb_iter_ctx {
133         void            *usrctx;        /* Caller supplied context. */
134         uint32_t        flags;          /* Flags for current iteration. */
135         int16_t         vslot;          /* struct voi slot index. */
136         int8_t          vsslot;         /* struct voistat slot index. */
137 };
138
139 struct sb_tostrcb_ctx {
140         struct sbuf             *buf;
141         struct statsblob_tpl    *tpl;
142         enum sb_str_fmt fmt;
143         uint32_t                flags;
144 };
145
146 struct sb_visitcb_ctx {
147         stats_blob_visitcb_t    cb;
148         void                    *usrctx;
149 };
150
151 /* Stats blob iterator callback. */
152 typedef int (*stats_v1_blob_itercb_t)(struct statsblobv1 *sb, struct voi *v,
153     struct voistat *vs, struct sb_iter_ctx *ctx);
154
155 #ifdef _KERNEL
156 static struct rwlock tpllistlock;
157 RW_SYSINIT(stats_tpl_list, &tpllistlock, "Stat template list lock");
158 #define TPL_LIST_RLOCK() rw_rlock(&tpllistlock)
159 #define TPL_LIST_RUNLOCK() rw_runlock(&tpllistlock)
160 #define TPL_LIST_WLOCK() rw_wlock(&tpllistlock)
161 #define TPL_LIST_WUNLOCK() rw_wunlock(&tpllistlock)
162 #define TPL_LIST_LOCK_ASSERT() rw_assert(&tpllistlock, RA_LOCKED)
163 #define TPL_LIST_RLOCK_ASSERT() rw_assert(&tpllistlock, RA_RLOCKED)
164 #define TPL_LIST_WLOCK_ASSERT() rw_assert(&tpllistlock, RA_WLOCKED)
165 MALLOC_DEFINE(M_STATS, "stats(9) related memory", "stats(9) related memory");
166 #define stats_free(ptr) free((ptr), M_STATS)
167 #else /* ! _KERNEL */
168 static void stats_constructor(void);
169 static void stats_destructor(void);
170 static pthread_rwlock_t tpllistlock;
171 #define TPL_LIST_UNLOCK() pthread_rwlock_unlock(&tpllistlock)
172 #define TPL_LIST_RLOCK() pthread_rwlock_rdlock(&tpllistlock)
173 #define TPL_LIST_RUNLOCK() TPL_LIST_UNLOCK()
174 #define TPL_LIST_WLOCK() pthread_rwlock_wrlock(&tpllistlock)
175 #define TPL_LIST_WUNLOCK() TPL_LIST_UNLOCK()
176 #define TPL_LIST_LOCK_ASSERT() do { } while (0)
177 #define TPL_LIST_RLOCK_ASSERT() do { } while (0)
178 #define TPL_LIST_WLOCK_ASSERT() do { } while (0)
179 #ifdef NDEBUG
180 #define KASSERT(cond, msg) do {} while (0)
181 #define stats_abort() do {} while (0)
182 #else /* ! NDEBUG */
183 #define KASSERT(cond, msg) do { \
184         if (!(cond)) { \
185                 panic msg; \
186         } \
187 } while (0)
188 #define stats_abort() abort()
189 #endif /* NDEBUG */
190 #define stats_free(ptr) free(ptr)
191 #define panic(fmt, ...) do { \
192         fprintf(stderr, (fmt), ##__VA_ARGS__); \
193         stats_abort(); \
194 } while (0)
195 #endif /* _KERNEL */
196
197 #define SB_V1_MAXSZ 65535
198
199 /* Obtain a blob offset pointer. */
200 #define BLOB_OFFSET(sb, off) ((void *)(((uint8_t *)(sb)) + (off)))
201
202 /*
203  * Number of VOIs in the blob's vois[] array. By virtue of struct voi being a
204  * power of 2 size, we can shift instead of divide. The shift amount must be
205  * updated if sizeof(struct voi) ever changes, which the assert should catch.
206  */
207 #define NVOIS(sb) ((int32_t)((((struct statsblobv1 *)(sb))->stats_off - \
208     sizeof(struct statsblobv1)) >> 3))
209 _Static_assert(sizeof(struct voi) == 8, "statsblobv1 voi ABI mismatch");
210
211 /* Try restrict names to alphanumeric and underscore to simplify JSON compat. */
212 const char *vs_stype2name[VS_NUM_STYPES] = {
213         [VS_STYPE_VOISTATE] = "VOISTATE",
214         [VS_STYPE_SUM] = "SUM",
215         [VS_STYPE_MAX] = "MAX",
216         [VS_STYPE_MIN] = "MIN",
217         [VS_STYPE_HIST] = "HIST",
218         [VS_STYPE_TDGST] = "TDGST",
219 };
220
221 const char *vs_stype2desc[VS_NUM_STYPES] = {
222         [VS_STYPE_VOISTATE] = "VOI related state data (not a real stat)",
223         [VS_STYPE_SUM] = "Simple arithmetic accumulator",
224         [VS_STYPE_MAX] = "Maximum observed VOI value",
225         [VS_STYPE_MIN] = "Minimum observed VOI value",
226         [VS_STYPE_HIST] = "Histogram of observed VOI values",
227         [VS_STYPE_TDGST] = "t-digest of observed VOI values",
228 };
229
230 const char *vsd_dtype2name[VSD_NUM_DTYPES] = {
231         [VSD_DTYPE_VOISTATE] = "VOISTATE",
232         [VSD_DTYPE_INT_S32] = "INT_S32",
233         [VSD_DTYPE_INT_U32] = "INT_U32",
234         [VSD_DTYPE_INT_S64] = "INT_S64",
235         [VSD_DTYPE_INT_U64] = "INT_U64",
236         [VSD_DTYPE_INT_SLONG] = "INT_SLONG",
237         [VSD_DTYPE_INT_ULONG] = "INT_ULONG",
238         [VSD_DTYPE_Q_S32] = "Q_S32",
239         [VSD_DTYPE_Q_U32] = "Q_U32",
240         [VSD_DTYPE_Q_S64] = "Q_S64",
241         [VSD_DTYPE_Q_U64] = "Q_U64",
242         [VSD_DTYPE_CRHIST32] = "CRHIST32",
243         [VSD_DTYPE_DRHIST32] = "DRHIST32",
244         [VSD_DTYPE_DVHIST32] = "DVHIST32",
245         [VSD_DTYPE_CRHIST64] = "CRHIST64",
246         [VSD_DTYPE_DRHIST64] = "DRHIST64",
247         [VSD_DTYPE_DVHIST64] = "DVHIST64",
248         [VSD_DTYPE_TDGSTCLUST32] = "TDGSTCLUST32",
249         [VSD_DTYPE_TDGSTCLUST64] = "TDGSTCLUST64",
250 };
251
252 const size_t vsd_dtype2size[VSD_NUM_DTYPES] = {
253         [VSD_DTYPE_VOISTATE] = sizeof(struct voistatdata_voistate),
254         [VSD_DTYPE_INT_S32] = sizeof(struct voistatdata_int32),
255         [VSD_DTYPE_INT_U32] = sizeof(struct voistatdata_int32),
256         [VSD_DTYPE_INT_S64] = sizeof(struct voistatdata_int64),
257         [VSD_DTYPE_INT_U64] = sizeof(struct voistatdata_int64),
258         [VSD_DTYPE_INT_SLONG] = sizeof(struct voistatdata_intlong),
259         [VSD_DTYPE_INT_ULONG] = sizeof(struct voistatdata_intlong),
260         [VSD_DTYPE_Q_S32] = sizeof(struct voistatdata_q32),
261         [VSD_DTYPE_Q_U32] = sizeof(struct voistatdata_q32),
262         [VSD_DTYPE_Q_S64] = sizeof(struct voistatdata_q64),
263         [VSD_DTYPE_Q_U64] = sizeof(struct voistatdata_q64),
264         [VSD_DTYPE_CRHIST32] = sizeof(struct voistatdata_crhist32),
265         [VSD_DTYPE_DRHIST32] = sizeof(struct voistatdata_drhist32),
266         [VSD_DTYPE_DVHIST32] = sizeof(struct voistatdata_dvhist32),
267         [VSD_DTYPE_CRHIST64] = sizeof(struct voistatdata_crhist64),
268         [VSD_DTYPE_DRHIST64] = sizeof(struct voistatdata_drhist64),
269         [VSD_DTYPE_DVHIST64] = sizeof(struct voistatdata_dvhist64),
270         [VSD_DTYPE_TDGSTCLUST32] = sizeof(struct voistatdata_tdgstclust32),
271         [VSD_DTYPE_TDGSTCLUST64] = sizeof(struct voistatdata_tdgstclust64),
272 };
273
274 static const bool vsd_compoundtype[VSD_NUM_DTYPES] = {
275         [VSD_DTYPE_VOISTATE] = true,
276         [VSD_DTYPE_INT_S32] = false,
277         [VSD_DTYPE_INT_U32] = false,
278         [VSD_DTYPE_INT_S64] = false,
279         [VSD_DTYPE_INT_U64] = false,
280         [VSD_DTYPE_INT_SLONG] = false,
281         [VSD_DTYPE_INT_ULONG] = false,
282         [VSD_DTYPE_Q_S32] = false,
283         [VSD_DTYPE_Q_U32] = false,
284         [VSD_DTYPE_Q_S64] = false,
285         [VSD_DTYPE_Q_U64] = false,
286         [VSD_DTYPE_CRHIST32] = true,
287         [VSD_DTYPE_DRHIST32] = true,
288         [VSD_DTYPE_DVHIST32] = true,
289         [VSD_DTYPE_CRHIST64] = true,
290         [VSD_DTYPE_DRHIST64] = true,
291         [VSD_DTYPE_DVHIST64] = true,
292         [VSD_DTYPE_TDGSTCLUST32] = true,
293         [VSD_DTYPE_TDGSTCLUST64] = true,
294 };
295
296 const struct voistatdata_numeric numeric_limits[2][VSD_DTYPE_Q_U64 + 1] = {
297         [LIM_MIN] = {
298                 [VSD_DTYPE_VOISTATE] = {0},
299                 [VSD_DTYPE_INT_S32] = {.int32 = {.s32 = INT32_MIN}},
300                 [VSD_DTYPE_INT_U32] = {.int32 = {.u32 = 0}},
301                 [VSD_DTYPE_INT_S64] = {.int64 = {.s64 = INT64_MIN}},
302                 [VSD_DTYPE_INT_U64] = {.int64 = {.u64 = 0}},
303                 [VSD_DTYPE_INT_SLONG] = {.intlong = {.slong = LONG_MIN}},
304                 [VSD_DTYPE_INT_ULONG] = {.intlong = {.ulong = 0}},
305                 [VSD_DTYPE_Q_S32] = {.q32 = {.sq32 = Q_IFMINVAL(INT32_MIN)}},
306                 [VSD_DTYPE_Q_U32] = {.q32 = {.uq32 = 0}},
307                 [VSD_DTYPE_Q_S64] = {.q64 = {.sq64 = Q_IFMINVAL(INT64_MIN)}},
308                 [VSD_DTYPE_Q_U64] = {.q64 = {.uq64 = 0}},
309         },
310         [LIM_MAX] = {
311                 [VSD_DTYPE_VOISTATE] = {0},
312                 [VSD_DTYPE_INT_S32] = {.int32 = {.s32 = INT32_MAX}},
313                 [VSD_DTYPE_INT_U32] = {.int32 = {.u32 = UINT32_MAX}},
314                 [VSD_DTYPE_INT_S64] = {.int64 = {.s64 = INT64_MAX}},
315                 [VSD_DTYPE_INT_U64] = {.int64 = {.u64 = UINT64_MAX}},
316                 [VSD_DTYPE_INT_SLONG] = {.intlong = {.slong = LONG_MAX}},
317                 [VSD_DTYPE_INT_ULONG] = {.intlong = {.ulong = ULONG_MAX}},
318                 [VSD_DTYPE_Q_S32] = {.q32 = {.sq32 = Q_IFMAXVAL(INT32_MAX)}},
319                 [VSD_DTYPE_Q_U32] = {.q32 = {.uq32 = Q_IFMAXVAL(UINT32_MAX)}},
320                 [VSD_DTYPE_Q_S64] = {.q64 = {.sq64 = Q_IFMAXVAL(INT64_MAX)}},
321                 [VSD_DTYPE_Q_U64] = {.q64 = {.uq64 = Q_IFMAXVAL(UINT64_MAX)}},
322         }
323 };
324
325 /* tpllistlock protects tpllist and ntpl */
326 static uint32_t ntpl;
327 static struct statsblob_tpl **tpllist;
328
329 static inline void * stats_realloc(void *ptr, size_t oldsz, size_t newsz,
330     int flags);
331 //static void stats_v1_blob_finalise(struct statsblobv1 *sb);
332 static int stats_v1_blob_init_locked(struct statsblobv1 *sb, uint32_t tpl_id,
333     uint32_t flags);
334 static int stats_v1_blob_expand(struct statsblobv1 **sbpp, int newvoibytes,
335     int newvoistatbytes, int newvoistatdatabytes);
336 static void stats_v1_blob_iter(struct statsblobv1 *sb,
337     stats_v1_blob_itercb_t icb, void *usrctx, uint32_t flags);
338 static inline int stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype,
339     struct voistatdata_tdgst *tdgst, s64q_t x, uint64_t weight, int attempt);
340
341 static inline int
342 ctd32cmp(const struct voistatdata_tdgstctd32 *c1, const struct voistatdata_tdgstctd32 *c2)
343 {
344
345         KASSERT(Q_PRECEQ(c1->mu, c2->mu),
346             ("%s: Q_RELPREC(c1->mu,c2->mu)=%d", __func__,
347             Q_RELPREC(c1->mu, c2->mu)));
348
349        return (Q_QLTQ(c1->mu, c2->mu) ? -1 : 1);
350 }
351 ARB_GENERATE_STATIC(ctdth32, voistatdata_tdgstctd32, ctdlnk, ctd32cmp);
352
353 static inline int
354 ctd64cmp(const struct voistatdata_tdgstctd64 *c1, const struct voistatdata_tdgstctd64 *c2)
355 {
356
357         KASSERT(Q_PRECEQ(c1->mu, c2->mu),
358             ("%s: Q_RELPREC(c1->mu,c2->mu)=%d", __func__,
359             Q_RELPREC(c1->mu, c2->mu)));
360
361        return (Q_QLTQ(c1->mu, c2->mu) ? -1 : 1);
362 }
363 ARB_GENERATE_STATIC(ctdth64, voistatdata_tdgstctd64, ctdlnk, ctd64cmp);
364
365 #ifdef DIAGNOSTIC
366 RB_GENERATE_STATIC(rbctdth32, voistatdata_tdgstctd32, rblnk, ctd32cmp);
367 RB_GENERATE_STATIC(rbctdth64, voistatdata_tdgstctd64, rblnk, ctd64cmp);
368 #endif
369
370 static inline sbintime_t
371 stats_sbinuptime(void)
372 {
373         sbintime_t sbt;
374 #ifdef _KERNEL
375
376         sbt = sbinuptime();
377 #else /* ! _KERNEL */
378         struct timespec tp;
379
380         clock_gettime(CLOCK_MONOTONIC_FAST, &tp);
381         sbt = tstosbt(tp);
382 #endif /* _KERNEL */
383
384         return (sbt);
385 }
386
387 static inline void *
388 stats_realloc(void *ptr, size_t oldsz, size_t newsz, int flags)
389 {
390
391 #ifdef _KERNEL
392         /* Default to M_NOWAIT if neither M_NOWAIT or M_WAITOK are set. */
393         if (!(flags & (M_WAITOK | M_NOWAIT)))
394                 flags |= M_NOWAIT;
395         ptr = realloc(ptr, newsz, M_STATS, flags);
396 #else /* ! _KERNEL */
397         ptr = realloc(ptr, newsz);
398         if ((flags & M_ZERO) && ptr != NULL) {
399                 if (oldsz == 0)
400                         memset(ptr, '\0', newsz);
401                 else if (newsz > oldsz)
402                         memset(BLOB_OFFSET(ptr, oldsz), '\0', newsz - oldsz);
403         }
404 #endif /* _KERNEL */
405
406         return (ptr);
407 }
408
409 static inline char *
410 stats_strdup(const char *s,
411 #ifdef _KERNEL
412     int flags)
413 {
414         char *copy;
415         size_t len;
416
417         if (!(flags & (M_WAITOK | M_NOWAIT)))
418                 flags |= M_NOWAIT;
419
420         len = strlen(s) + 1;
421         if ((copy = malloc(len, M_STATS, flags)) != NULL)
422                 bcopy(s, copy, len);
423
424         return (copy);
425 #else
426     int flags __unused)
427 {
428         return (strdup(s));
429 #endif
430 }
431
432 static inline void
433 stats_tpl_update_hash(struct statsblob_tpl *tpl)
434 {
435
436         TPL_LIST_WLOCK_ASSERT();
437         tpl->mb->tplhash = hash32_str(tpl->mb->tplname, 0);
438         for (int voi_id = 0; voi_id < NVOIS(tpl->sb); voi_id++) {
439                 if (tpl->mb->voi_meta[voi_id].name != NULL)
440                         tpl->mb->tplhash = hash32_str(
441                             tpl->mb->voi_meta[voi_id].name, tpl->mb->tplhash);
442         }
443         tpl->mb->tplhash = hash32_buf(tpl->sb, tpl->sb->cursz,
444             tpl->mb->tplhash);
445 }
446
447 static inline uint64_t
448 stats_pow_u64(uint64_t base, uint64_t exp)
449 {
450         uint64_t result = 1;
451
452         while (exp) {
453                 if (exp & 1)
454                         result *= base;
455                 exp >>= 1;
456                 base *= base;
457         }
458
459         return (result);
460 }
461
462 static inline int
463 stats_vss_hist_bkt_hlpr(struct vss_hist_hlpr_info *info, uint32_t curbkt,
464     struct voistatdata_numeric *bkt_lb, struct voistatdata_numeric *bkt_ub)
465 {
466         uint64_t step = 0;
467         int error = 0;
468
469         switch (info->scheme) {
470         case BKT_LIN:
471                 step = info->lin.stepinc;
472                 break;
473         case BKT_EXP:
474                 step = stats_pow_u64(info->exp.stepbase,
475                     info->exp.stepexp + curbkt);
476                 break;
477         case BKT_LINEXP:
478                 {
479                 uint64_t curstepexp = 1;
480
481                 switch (info->voi_dtype) {
482                 case VSD_DTYPE_INT_S32:
483                         while ((int32_t)stats_pow_u64(info->linexp.stepbase,
484                             curstepexp) <= bkt_lb->int32.s32)
485                                 curstepexp++;
486                         break;
487                 case VSD_DTYPE_INT_U32:
488                         while ((uint32_t)stats_pow_u64(info->linexp.stepbase,
489                             curstepexp) <= bkt_lb->int32.u32)
490                                 curstepexp++;
491                         break;
492                 case VSD_DTYPE_INT_S64:
493                         while ((int64_t)stats_pow_u64(info->linexp.stepbase,
494                             curstepexp) <= bkt_lb->int64.s64)
495                                 curstepexp++;
496                         break;
497                 case VSD_DTYPE_INT_U64:
498                         while ((uint64_t)stats_pow_u64(info->linexp.stepbase,
499                             curstepexp) <= bkt_lb->int64.u64)
500                                 curstepexp++;
501                         break;
502                 case VSD_DTYPE_INT_SLONG:
503                         while ((long)stats_pow_u64(info->linexp.stepbase,
504                             curstepexp) <= bkt_lb->intlong.slong)
505                                 curstepexp++;
506                         break;
507                 case VSD_DTYPE_INT_ULONG:
508                         while ((unsigned long)stats_pow_u64(info->linexp.stepbase,
509                             curstepexp) <= bkt_lb->intlong.ulong)
510                                 curstepexp++;
511                         break;
512                 case VSD_DTYPE_Q_S32:
513                         while ((s32q_t)stats_pow_u64(info->linexp.stepbase,
514                             curstepexp) <= Q_GIVAL(bkt_lb->q32.sq32))
515                         break;
516                 case VSD_DTYPE_Q_U32:
517                         while ((u32q_t)stats_pow_u64(info->linexp.stepbase,
518                             curstepexp) <= Q_GIVAL(bkt_lb->q32.uq32))
519                         break;
520                 case VSD_DTYPE_Q_S64:
521                         while ((s64q_t)stats_pow_u64(info->linexp.stepbase,
522                             curstepexp) <= Q_GIVAL(bkt_lb->q64.sq64))
523                                 curstepexp++;
524                         break;
525                 case VSD_DTYPE_Q_U64:
526                         while ((u64q_t)stats_pow_u64(info->linexp.stepbase,
527                             curstepexp) <= Q_GIVAL(bkt_lb->q64.uq64))
528                                 curstepexp++;
529                         break;
530                 default:
531                         break;
532                 }
533
534                 step = stats_pow_u64(info->linexp.stepbase, curstepexp) /
535                     info->linexp.linstepdiv;
536                 if (step == 0)
537                         step = 1;
538                 break;
539                 }
540         default:
541                 break;
542         }
543
544         if (info->scheme == BKT_USR) {
545                 *bkt_lb = info->usr.bkts[curbkt].lb;
546                 *bkt_ub = info->usr.bkts[curbkt].ub;
547         } else if (step != 0) {
548                 switch (info->voi_dtype) {
549                 case VSD_DTYPE_INT_S32:
550                         bkt_ub->int32.s32 += (int32_t)step;
551                         break;
552                 case VSD_DTYPE_INT_U32:
553                         bkt_ub->int32.u32 += (uint32_t)step;
554                         break;
555                 case VSD_DTYPE_INT_S64:
556                         bkt_ub->int64.s64 += (int64_t)step;
557                         break;
558                 case VSD_DTYPE_INT_U64:
559                         bkt_ub->int64.u64 += (uint64_t)step;
560                         break;
561                 case VSD_DTYPE_INT_SLONG:
562                         bkt_ub->intlong.slong += (long)step;
563                         break;
564                 case VSD_DTYPE_INT_ULONG:
565                         bkt_ub->intlong.ulong += (unsigned long)step;
566                         break;
567                 case VSD_DTYPE_Q_S32:
568                         error = Q_QADDI(&bkt_ub->q32.sq32, step);
569                         break;
570                 case VSD_DTYPE_Q_U32:
571                         error = Q_QADDI(&bkt_ub->q32.uq32, step);
572                         break;
573                 case VSD_DTYPE_Q_S64:
574                         error = Q_QADDI(&bkt_ub->q64.sq64, step);
575                         break;
576                 case VSD_DTYPE_Q_U64:
577                         error = Q_QADDI(&bkt_ub->q64.uq64, step);
578                         break;
579                 default:
580                         break;
581                 }
582         } else { /* info->scheme != BKT_USR && step == 0 */
583                 return (EINVAL);
584         }
585
586         return (error);
587 }
588
589 static uint32_t
590 stats_vss_hist_nbkts_hlpr(struct vss_hist_hlpr_info *info)
591 {
592         struct voistatdata_numeric bkt_lb, bkt_ub;
593         uint32_t nbkts;
594         int done;
595
596         if (info->scheme == BKT_USR) {
597                 /* XXXLAS: Setting info->{lb,ub} from macro is tricky. */
598                 info->lb = info->usr.bkts[0].lb;
599                 info->ub = info->usr.bkts[info->usr.nbkts - 1].lb;
600         }
601
602         nbkts = 0;
603         done = 0;
604         bkt_ub = info->lb;
605
606         do {
607                 bkt_lb = bkt_ub;
608                 if (stats_vss_hist_bkt_hlpr(info, nbkts++, &bkt_lb, &bkt_ub))
609                         return (0);
610
611                 if (info->scheme == BKT_USR)
612                         done = (nbkts == info->usr.nbkts);
613                 else {
614                         switch (info->voi_dtype) {
615                         case VSD_DTYPE_INT_S32:
616                                 done = (bkt_ub.int32.s32 > info->ub.int32.s32);
617                                 break;
618                         case VSD_DTYPE_INT_U32:
619                                 done = (bkt_ub.int32.u32 > info->ub.int32.u32);
620                                 break;
621                         case VSD_DTYPE_INT_S64:
622                                 done = (bkt_ub.int64.s64 > info->ub.int64.s64);
623                                 break;
624                         case VSD_DTYPE_INT_U64:
625                                 done = (bkt_ub.int64.u64 > info->ub.int64.u64);
626                                 break;
627                         case VSD_DTYPE_INT_SLONG:
628                                 done = (bkt_ub.intlong.slong >
629                                     info->ub.intlong.slong);
630                                 break;
631                         case VSD_DTYPE_INT_ULONG:
632                                 done = (bkt_ub.intlong.ulong >
633                                     info->ub.intlong.ulong);
634                                 break;
635                         case VSD_DTYPE_Q_S32:
636                                 done = Q_QGTQ(bkt_ub.q32.sq32,
637                                     info->ub.q32.sq32);
638                                 break;
639                         case VSD_DTYPE_Q_U32:
640                                 done = Q_QGTQ(bkt_ub.q32.uq32,
641                                     info->ub.q32.uq32);
642                                 break;
643                         case VSD_DTYPE_Q_S64:
644                                 done = Q_QGTQ(bkt_ub.q64.sq64,
645                                     info->ub.q64.sq64);
646                                 break;
647                         case VSD_DTYPE_Q_U64:
648                                 done = Q_QGTQ(bkt_ub.q64.uq64,
649                                     info->ub.q64.uq64);
650                                 break;
651                         default:
652                                 return (0);
653                         }
654                 }
655         } while (!done);
656
657         if (info->flags & VSD_HIST_LBOUND_INF)
658                 nbkts++;
659         if (info->flags & VSD_HIST_UBOUND_INF)
660                 nbkts++;
661
662         return (nbkts);
663 }
664
665 int
666 stats_vss_hist_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
667     struct vss_hist_hlpr_info *info)
668 {
669         struct voistatdata_hist *hist;
670         struct voistatdata_numeric bkt_lb, bkt_ub, *lbinfbktlb, *lbinfbktub,
671             *ubinfbktlb, *ubinfbktub;
672         uint32_t bkt, nbkts, nloop;
673
674         if (vss == NULL || info == NULL || (info->flags &
675         (VSD_HIST_LBOUND_INF|VSD_HIST_UBOUND_INF) && (info->hist_dtype ==
676         VSD_DTYPE_DVHIST32 || info->hist_dtype == VSD_DTYPE_DVHIST64)))
677                 return (EINVAL);
678
679         info->voi_dtype = voi_dtype;
680
681         if ((nbkts = stats_vss_hist_nbkts_hlpr(info)) == 0)
682                 return (EINVAL);
683
684         switch (info->hist_dtype) {
685         case VSD_DTYPE_CRHIST32:
686                 vss->vsdsz = HIST_NBKTS2VSDSZ(crhist32, nbkts);
687                 break;
688         case VSD_DTYPE_DRHIST32:
689                 vss->vsdsz = HIST_NBKTS2VSDSZ(drhist32, nbkts);
690                 break;
691         case VSD_DTYPE_DVHIST32:
692                 vss->vsdsz = HIST_NBKTS2VSDSZ(dvhist32, nbkts);
693                 break;
694         case VSD_DTYPE_CRHIST64:
695                 vss->vsdsz = HIST_NBKTS2VSDSZ(crhist64, nbkts);
696                 break;
697         case VSD_DTYPE_DRHIST64:
698                 vss->vsdsz = HIST_NBKTS2VSDSZ(drhist64, nbkts);
699                 break;
700         case VSD_DTYPE_DVHIST64:
701                 vss->vsdsz = HIST_NBKTS2VSDSZ(dvhist64, nbkts);
702                 break;
703         default:
704                 return (EINVAL);
705         }
706
707         vss->iv = stats_realloc(NULL, 0, vss->vsdsz, M_ZERO);
708         if (vss->iv == NULL)
709                 return (ENOMEM);
710
711         hist = (struct voistatdata_hist *)vss->iv;
712         bkt_ub = info->lb;
713
714         for (bkt = (info->flags & VSD_HIST_LBOUND_INF), nloop = 0;
715             bkt < nbkts;
716             bkt++, nloop++) {
717                 bkt_lb = bkt_ub;
718                 if (stats_vss_hist_bkt_hlpr(info, nloop, &bkt_lb, &bkt_ub))
719                         return (EINVAL);
720
721                 switch (info->hist_dtype) {
722                 case VSD_DTYPE_CRHIST32:
723                         VSD(crhist32, hist)->bkts[bkt].lb = bkt_lb;
724                         break;
725                 case VSD_DTYPE_DRHIST32:
726                         VSD(drhist32, hist)->bkts[bkt].lb = bkt_lb;
727                         VSD(drhist32, hist)->bkts[bkt].ub = bkt_ub;
728                         break;
729                 case VSD_DTYPE_DVHIST32:
730                         VSD(dvhist32, hist)->bkts[bkt].val = bkt_lb;
731                         break;
732                 case VSD_DTYPE_CRHIST64:
733                         VSD(crhist64, hist)->bkts[bkt].lb = bkt_lb;
734                         break;
735                 case VSD_DTYPE_DRHIST64:
736                         VSD(drhist64, hist)->bkts[bkt].lb = bkt_lb;
737                         VSD(drhist64, hist)->bkts[bkt].ub = bkt_ub;
738                         break;
739                 case VSD_DTYPE_DVHIST64:
740                         VSD(dvhist64, hist)->bkts[bkt].val = bkt_lb;
741                         break;
742                 default:
743                         return (EINVAL);
744                 }
745         }
746
747         lbinfbktlb = lbinfbktub = ubinfbktlb = ubinfbktub = NULL;
748
749         switch (info->hist_dtype) {
750         case VSD_DTYPE_CRHIST32:
751                 lbinfbktlb = &VSD(crhist32, hist)->bkts[0].lb;
752                 ubinfbktlb = &VSD(crhist32, hist)->bkts[nbkts - 1].lb;
753                 break;
754         case VSD_DTYPE_DRHIST32:
755                 lbinfbktlb = &VSD(drhist32, hist)->bkts[0].lb;
756                 lbinfbktub = &VSD(drhist32, hist)->bkts[0].ub;
757                 ubinfbktlb = &VSD(drhist32, hist)->bkts[nbkts - 1].lb;
758                 ubinfbktub = &VSD(drhist32, hist)->bkts[nbkts - 1].ub;
759                 break;
760         case VSD_DTYPE_CRHIST64:
761                 lbinfbktlb = &VSD(crhist64, hist)->bkts[0].lb;
762                 ubinfbktlb = &VSD(crhist64, hist)->bkts[nbkts - 1].lb;
763                 break;
764         case VSD_DTYPE_DRHIST64:
765                 lbinfbktlb = &VSD(drhist64, hist)->bkts[0].lb;
766                 lbinfbktub = &VSD(drhist64, hist)->bkts[0].ub;
767                 ubinfbktlb = &VSD(drhist64, hist)->bkts[nbkts - 1].lb;
768                 ubinfbktub = &VSD(drhist64, hist)->bkts[nbkts - 1].ub;
769                 break;
770         case VSD_DTYPE_DVHIST32:
771         case VSD_DTYPE_DVHIST64:
772                 break;
773         default:
774                 return (EINVAL);
775         }
776
777         if ((info->flags & VSD_HIST_LBOUND_INF) && lbinfbktlb) {
778                 *lbinfbktlb = numeric_limits[LIM_MIN][info->voi_dtype];
779                 /*
780                  * Assignment from numeric_limit array for Q types assigns max
781                  * possible integral/fractional value for underlying data type,
782                  * but we must set control bits for this specific histogram per
783                  * the user's choice of fractional bits, which we extract from
784                  * info->lb.
785                  */
786                 if (info->voi_dtype == VSD_DTYPE_Q_S32 ||
787                     info->voi_dtype == VSD_DTYPE_Q_U32) {
788                         /* Signedness doesn't matter for setting control bits. */
789                         Q_SCVAL(lbinfbktlb->q32.sq32,
790                             Q_GCVAL(info->lb.q32.sq32));
791                 } else if (info->voi_dtype == VSD_DTYPE_Q_S64 ||
792                     info->voi_dtype == VSD_DTYPE_Q_U64) {
793                         /* Signedness doesn't matter for setting control bits. */
794                         Q_SCVAL(lbinfbktlb->q64.sq64,
795                             Q_GCVAL(info->lb.q64.sq64));
796                 }
797                 if (lbinfbktub)
798                         *lbinfbktub = info->lb;
799         }
800         if ((info->flags & VSD_HIST_UBOUND_INF) && ubinfbktlb) {
801                 *ubinfbktlb = bkt_lb;
802                 if (ubinfbktub) {
803                         *ubinfbktub = numeric_limits[LIM_MAX][info->voi_dtype];
804                         if (info->voi_dtype == VSD_DTYPE_Q_S32 ||
805                             info->voi_dtype == VSD_DTYPE_Q_U32) {
806                                 Q_SCVAL(ubinfbktub->q32.sq32,
807                                     Q_GCVAL(info->lb.q32.sq32));
808                         } else if (info->voi_dtype == VSD_DTYPE_Q_S64 ||
809                             info->voi_dtype == VSD_DTYPE_Q_U64) {
810                                 Q_SCVAL(ubinfbktub->q64.sq64,
811                                     Q_GCVAL(info->lb.q64.sq64));
812                         }
813                 }
814         }
815
816         return (0);
817 }
818
819 int
820 stats_vss_tdgst_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
821     struct vss_tdgst_hlpr_info *info)
822 {
823         struct voistatdata_tdgst *tdgst;
824         struct ctdth32 *ctd32tree;
825         struct ctdth64 *ctd64tree;
826         struct voistatdata_tdgstctd32 *ctd32;
827         struct voistatdata_tdgstctd64 *ctd64;
828
829         info->voi_dtype = voi_dtype;
830
831         switch (info->tdgst_dtype) {
832         case VSD_DTYPE_TDGSTCLUST32:
833                 vss->vsdsz = TDGST_NCTRS2VSDSZ(tdgstclust32, info->nctds);
834                 break;
835         case VSD_DTYPE_TDGSTCLUST64:
836                 vss->vsdsz = TDGST_NCTRS2VSDSZ(tdgstclust64, info->nctds);
837                 break;
838         default:
839                 return (EINVAL);
840         }
841
842         vss->iv = stats_realloc(NULL, 0, vss->vsdsz, M_ZERO);
843         if (vss->iv == NULL)
844                 return (ENOMEM);
845
846         tdgst = (struct voistatdata_tdgst *)vss->iv;
847
848         switch (info->tdgst_dtype) {
849         case VSD_DTYPE_TDGSTCLUST32:
850                 ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
851                 ARB_INIT(ctd32, ctdlnk, ctd32tree, info->nctds) {
852                         Q_INI(&ctd32->mu, 0, 0, info->prec);
853                 }
854                 break;
855         case VSD_DTYPE_TDGSTCLUST64:
856                 ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
857                 ARB_INIT(ctd64, ctdlnk, ctd64tree, info->nctds) {
858                         Q_INI(&ctd64->mu, 0, 0, info->prec);
859                 }
860                 break;
861         default:
862                 return (EINVAL);
863         }
864
865         return (0);
866 }
867
868 int
869 stats_vss_numeric_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
870     struct vss_numeric_hlpr_info *info)
871 {
872         struct voistatdata_numeric iv;
873
874         switch (vss->stype) {
875         case VS_STYPE_SUM:
876                 iv = stats_ctor_vsd_numeric(0);
877                 break;
878         case VS_STYPE_MIN:
879                 iv = numeric_limits[LIM_MAX][voi_dtype];
880                 break;
881         case VS_STYPE_MAX:
882                 iv = numeric_limits[LIM_MIN][voi_dtype];
883                 break;
884         default:
885                 return (EINVAL);
886         }
887
888         vss->iv = stats_realloc(NULL, 0, vsd_dtype2size[voi_dtype], 0);
889         if (vss->iv == NULL)
890                 return (ENOMEM);
891
892         vss->vs_dtype = voi_dtype;
893         vss->vsdsz = vsd_dtype2size[voi_dtype];
894         switch (voi_dtype) {
895         case VSD_DTYPE_INT_S32:
896                 *((int32_t *)vss->iv) = iv.int32.s32;
897                 break;
898         case VSD_DTYPE_INT_U32:
899                 *((uint32_t *)vss->iv) = iv.int32.u32;
900                 break;
901         case VSD_DTYPE_INT_S64:
902                 *((int64_t *)vss->iv) = iv.int64.s64;
903                 break;
904         case VSD_DTYPE_INT_U64:
905                 *((uint64_t *)vss->iv) = iv.int64.u64;
906                 break;
907         case VSD_DTYPE_INT_SLONG:
908                 *((long *)vss->iv) = iv.intlong.slong;
909                 break;
910         case VSD_DTYPE_INT_ULONG:
911                 *((unsigned long *)vss->iv) = iv.intlong.ulong;
912                 break;
913         case VSD_DTYPE_Q_S32:
914                 *((s32q_t *)vss->iv) = Q_SCVAL(iv.q32.sq32,
915                     Q_CTRLINI(info->prec));
916                 break;
917         case VSD_DTYPE_Q_U32:
918                 *((u32q_t *)vss->iv) = Q_SCVAL(iv.q32.uq32,
919                     Q_CTRLINI(info->prec));
920                 break;
921         case VSD_DTYPE_Q_S64:
922                 *((s64q_t *)vss->iv) = Q_SCVAL(iv.q64.sq64,
923                     Q_CTRLINI(info->prec));
924                 break;
925         case VSD_DTYPE_Q_U64:
926                 *((u64q_t *)vss->iv) = Q_SCVAL(iv.q64.uq64,
927                     Q_CTRLINI(info->prec));
928                 break;
929         default:
930                 break;
931         }
932
933         return (0);
934 }
935
936 int
937 stats_vss_hlpr_init(enum vsd_dtype voi_dtype, uint32_t nvss,
938     struct voistatspec *vss)
939 {
940         int i, ret;
941
942         for (i = nvss - 1; i >= 0; i--) {
943                 if (vss[i].hlpr && (ret = vss[i].hlpr(voi_dtype, &vss[i],
944                     vss[i].hlprinfo)) != 0)
945                         return (ret);
946         }
947
948         return (0);
949 }
950
951 void
952 stats_vss_hlpr_cleanup(uint32_t nvss, struct voistatspec *vss)
953 {
954         int i;
955
956         for (i = nvss - 1; i >= 0; i--) {
957                 if (vss[i].hlpr) {
958                         stats_free((void *)vss[i].iv);
959                         vss[i].iv = NULL;
960                 }
961         }
962 }
963
964 int
965 stats_tpl_fetch(int tpl_id, struct statsblob_tpl **tpl)
966 {
967         int error;
968
969         error = 0;
970
971         TPL_LIST_WLOCK();
972         if (tpl_id < 0 || tpl_id >= (int)ntpl) {
973                 error = ENOENT;
974         } else {
975                 *tpl = tpllist[tpl_id];
976                 /* XXXLAS: Acquire refcount on tpl. */
977         }
978         TPL_LIST_WUNLOCK();
979
980         return (error);
981 }
982
983 int
984 stats_tpl_fetch_allocid(const char *name, uint32_t hash)
985 {
986         int i, tpl_id;
987
988         tpl_id = -ESRCH;
989
990         TPL_LIST_RLOCK();
991         for (i = ntpl - 1; i >= 0; i--) {
992                 if (name != NULL) {
993                         if (strlen(name) == strlen(tpllist[i]->mb->tplname) &&
994                             strncmp(name, tpllist[i]->mb->tplname,
995                             TPL_MAX_NAME_LEN) == 0 && (!hash || hash ==
996                             tpllist[i]->mb->tplhash)) {
997                                 tpl_id = i;
998                                 break;
999                         }
1000                 } else if (hash == tpllist[i]->mb->tplhash) {
1001                         tpl_id = i;
1002                         break;
1003                 }
1004         }
1005         TPL_LIST_RUNLOCK();
1006
1007         return (tpl_id);
1008 }
1009
1010 int
1011 stats_tpl_id2name(uint32_t tpl_id, char *buf, size_t len)
1012 {
1013         int error;
1014
1015         error = 0;
1016
1017         TPL_LIST_RLOCK();
1018         if (tpl_id < ntpl) {
1019                 if (buf != NULL && len > strlen(tpllist[tpl_id]->mb->tplname))
1020                         strlcpy(buf, tpllist[tpl_id]->mb->tplname, len);
1021                 else
1022                         error = EOVERFLOW;
1023         } else
1024                 error = ENOENT;
1025         TPL_LIST_RUNLOCK();
1026
1027         return (error);
1028 }
1029
1030 int
1031 stats_tpl_sample_rollthedice(struct stats_tpl_sample_rate *rates, int nrates,
1032     void *seed_bytes, size_t seed_len)
1033 {
1034         uint32_t cum_pct, rnd_pct;
1035         int i;
1036
1037         cum_pct = 0;
1038
1039         /*
1040          * Choose a pseudorandom or seeded number in range [0,100] and use
1041          * it to make a sampling decision and template selection where required.
1042          * If no seed is supplied, a PRNG is used to generate a pseudorandom
1043          * number so that every selection is independent. If a seed is supplied,
1044          * the caller desires random selection across different seeds, but
1045          * deterministic selection given the same seed. This is achieved by
1046          * hashing the seed and using the hash as the random number source.
1047          *
1048          * XXXLAS: Characterise hash function output distribution.
1049          */
1050         if (seed_bytes == NULL)
1051                 rnd_pct = random() / (INT32_MAX / 100);
1052         else
1053                 rnd_pct = hash32_buf(seed_bytes, seed_len, 0) /
1054                     (UINT32_MAX / 100U);
1055
1056         /*
1057          * We map the randomly selected percentage on to the interval [0,100]
1058          * consisting of the cumulatively summed template sampling percentages.
1059          * The difference between the cumulative sum of all template sampling
1060          * percentages and 100 is treated as a NULL assignment i.e. no stats
1061          * template will be assigned, and -1 returned instead.
1062          */
1063         for (i = 0; i < nrates; i++) {
1064                 cum_pct += rates[i].tpl_sample_pct;
1065
1066                 KASSERT(cum_pct <= 100, ("%s cum_pct %u > 100", __func__,
1067                     cum_pct));
1068                 if (rnd_pct > cum_pct || rates[i].tpl_sample_pct == 0)
1069                         continue;
1070
1071                 return (rates[i].tpl_slot_id);
1072         }
1073
1074         return (-1);
1075 }
1076
1077 int
1078 stats_v1_blob_clone(struct statsblobv1 **dst, size_t dstmaxsz,
1079     struct statsblobv1 *src, uint32_t flags)
1080 {
1081         int error, tmperror;
1082
1083         error = tmperror = 0;
1084
1085         if (src == NULL || dst == NULL ||
1086             src->cursz < sizeof(struct statsblob) ||
1087             ((flags & SB_CLONE_ALLOCDST) &&
1088             (flags & (SB_CLONE_USRDSTNOFAULT | SB_CLONE_USRDST)))) {
1089                 error = EINVAL;
1090         } else if (flags & SB_CLONE_ALLOCDST) {
1091                 *dst = stats_realloc(NULL, 0, src->cursz, 0);
1092                 if (*dst)
1093                         (*dst)->maxsz = dstmaxsz = src->cursz;
1094                 else
1095                         error = ENOMEM;
1096         } else if (*dst == NULL || dstmaxsz < sizeof(struct statsblob)) {
1097                 error = EINVAL;
1098         }
1099
1100         if (!error) {
1101                 size_t postcurszlen;
1102
1103                 /*
1104                  * Clone src into dst except for the maxsz field. If dst is too
1105                  * small to hold all of src, only copy src's header and return
1106                  * EOVERFLOW.
1107                  */
1108 #ifdef _KERNEL
1109                 if (flags & SB_CLONE_USRDSTNOFAULT)
1110                         error = copyout_nofault(src, *dst,
1111                             offsetof(struct statsblob, maxsz));
1112                 else if (flags & SB_CLONE_USRDST)
1113                         error = copyout(src, *dst,
1114                             offsetof(struct statsblob, maxsz));
1115                 else
1116 #endif
1117                         memcpy(*dst, src, offsetof(struct statsblob, maxsz));
1118 #ifdef _KERNEL
1119                 if (error != 0)
1120                         goto out;
1121 #endif
1122
1123
1124                 if (dstmaxsz >= src->cursz) {
1125                         postcurszlen = src->cursz -
1126                             offsetof(struct statsblob, cursz);
1127                 } else {
1128                         error = EOVERFLOW;
1129                         postcurszlen = sizeof(struct statsblob) -
1130                             offsetof(struct statsblob, cursz);
1131                 }
1132 #ifdef _KERNEL
1133                 if (flags & SB_CLONE_USRDSTNOFAULT)
1134                         tmperror = copyout_nofault(&(src->cursz), &((*dst)->cursz),
1135                             postcurszlen);
1136                 else if (flags & SB_CLONE_USRDST)
1137                         tmperror = copyout(&(src->cursz), &((*dst)->cursz),
1138                             postcurszlen);
1139                 else
1140 #endif
1141                         memcpy(&((*dst)->cursz), &(src->cursz), postcurszlen);
1142
1143                 error = error ? error : tmperror;
1144         }
1145 #ifdef _KERNEL
1146 out:
1147 #endif
1148
1149         return (error);
1150 }
1151
1152 int
1153 stats_v1_tpl_alloc(const char *name, uint32_t flags __unused)
1154 {
1155         struct statsblobv1_tpl *tpl, **newtpllist;
1156         struct statsblobv1 *tpl_sb;
1157         struct metablob *tpl_mb;
1158         int tpl_id;
1159
1160         if (name != NULL && strlen(name) > TPL_MAX_NAME_LEN)
1161                 return (-EINVAL);
1162
1163         if (name != NULL && stats_tpl_fetch_allocid(name, 0) >= 0)
1164                 return (-EEXIST);
1165
1166         tpl = stats_realloc(NULL, 0, sizeof(struct statsblobv1_tpl), M_ZERO);
1167         tpl_mb = stats_realloc(NULL, 0, sizeof(struct metablob), M_ZERO);
1168         tpl_sb = stats_realloc(NULL, 0, sizeof(struct statsblobv1), M_ZERO);
1169
1170         if (tpl_mb != NULL && name != NULL)
1171                 tpl_mb->tplname = stats_strdup(name, 0);
1172
1173         if (tpl == NULL || tpl_sb == NULL || tpl_mb == NULL ||
1174             tpl_mb->tplname == NULL) {
1175                 stats_free(tpl);
1176                 stats_free(tpl_sb);
1177                 if (tpl_mb != NULL) {
1178                         stats_free(tpl_mb->tplname);
1179                         stats_free(tpl_mb);
1180                 }
1181                 return (-ENOMEM);
1182         }
1183
1184         tpl->mb = tpl_mb;
1185         tpl->sb = tpl_sb;
1186
1187         tpl_sb->abi = STATS_ABI_V1;
1188         tpl_sb->endian =
1189 #if BYTE_ORDER == LITTLE_ENDIAN
1190             SB_LE;
1191 #elif BYTE_ORDER == BIG_ENDIAN
1192             SB_BE;
1193 #else
1194             SB_UE;
1195 #endif
1196         tpl_sb->cursz = tpl_sb->maxsz = sizeof(struct statsblobv1);
1197         tpl_sb->stats_off = tpl_sb->statsdata_off = sizeof(struct statsblobv1);
1198
1199         TPL_LIST_WLOCK();
1200         newtpllist = stats_realloc(tpllist, ntpl * sizeof(void *),
1201             (ntpl + 1) * sizeof(void *), 0);
1202         if (newtpllist != NULL) {
1203                 tpl_id = ntpl++;
1204                 tpllist = (struct statsblob_tpl **)newtpllist;
1205                 tpllist[tpl_id] = (struct statsblob_tpl *)tpl;
1206                 stats_tpl_update_hash(tpllist[tpl_id]);
1207         } else {
1208                 stats_free(tpl);
1209                 stats_free(tpl_sb);
1210                 if (tpl_mb != NULL) {
1211                         stats_free(tpl_mb->tplname);
1212                         stats_free(tpl_mb);
1213                 }
1214                 tpl_id = -ENOMEM;
1215         }
1216         TPL_LIST_WUNLOCK();
1217
1218         return (tpl_id);
1219 }
1220
1221 int
1222 stats_v1_tpl_add_voistats(uint32_t tpl_id, int32_t voi_id, const char *voi_name,
1223     enum vsd_dtype voi_dtype, uint32_t nvss, struct voistatspec *vss,
1224     uint32_t flags)
1225 {
1226         struct voi *voi;
1227         struct voistat *tmpstat;
1228         struct statsblobv1 *tpl_sb;
1229         struct metablob *tpl_mb;
1230         int error, i, newstatdataidx, newvoibytes, newvoistatbytes,
1231             newvoistatdatabytes, newvoistatmaxid;
1232         uint32_t nbytes;
1233
1234         if (voi_id < 0 || voi_dtype == 0 || voi_dtype >= VSD_NUM_DTYPES ||
1235             nvss == 0 || vss == NULL)
1236                 return (EINVAL);
1237
1238         error = nbytes = newvoibytes = newvoistatbytes =
1239             newvoistatdatabytes = 0;
1240         newvoistatmaxid = -1;
1241
1242         /* Calculate the number of bytes required for the new voistats. */
1243         for (i = nvss - 1; i >= 0; i--) {
1244                 if (vss[i].stype == 0 || vss[i].stype >= VS_NUM_STYPES ||
1245                     vss[i].vs_dtype == 0 || vss[i].vs_dtype >= VSD_NUM_DTYPES ||
1246                     vss[i].iv == NULL || vss[i].vsdsz == 0)
1247                         return (EINVAL);
1248                 if ((int)vss[i].stype > newvoistatmaxid)
1249                         newvoistatmaxid = vss[i].stype;
1250                 newvoistatdatabytes += vss[i].vsdsz;
1251         }
1252
1253         if (flags & SB_VOI_RELUPDATE) {
1254                 /* XXXLAS: VOI state bytes may need to vary based on stat types. */
1255                 newvoistatdatabytes += sizeof(struct voistatdata_voistate);
1256         }
1257         nbytes += newvoistatdatabytes;
1258
1259         TPL_LIST_WLOCK();
1260         if (tpl_id < ntpl) {
1261                 tpl_sb = (struct statsblobv1 *)tpllist[tpl_id]->sb;
1262                 tpl_mb = tpllist[tpl_id]->mb;
1263
1264                 if (voi_id >= NVOIS(tpl_sb) || tpl_sb->vois[voi_id].id == -1) {
1265                         /* Adding a new VOI and associated stats. */
1266                         if (voi_id >= NVOIS(tpl_sb)) {
1267                                 /* We need to grow the tpl_sb->vois array. */
1268                                 newvoibytes = (voi_id - (NVOIS(tpl_sb) - 1)) *
1269                                     sizeof(struct voi);
1270                                 nbytes += newvoibytes;
1271                         }
1272                         newvoistatbytes =
1273                             (newvoistatmaxid + 1) * sizeof(struct voistat);
1274                 } else {
1275                         /* Adding stats to an existing VOI. */
1276                         if (newvoistatmaxid >
1277                             tpl_sb->vois[voi_id].voistatmaxid) {
1278                                 newvoistatbytes = (newvoistatmaxid -
1279                                     tpl_sb->vois[voi_id].voistatmaxid) *
1280                                     sizeof(struct voistat);
1281                         }
1282                         /* XXXLAS: KPI does not yet support expanding VOIs. */
1283                         error = EOPNOTSUPP;
1284                 }
1285                 nbytes += newvoistatbytes;
1286
1287                 if (!error && newvoibytes > 0) {
1288                         struct voi_meta *voi_meta = tpl_mb->voi_meta;
1289
1290                         voi_meta = stats_realloc(voi_meta, voi_meta == NULL ?
1291                             0 : NVOIS(tpl_sb) * sizeof(struct voi_meta),
1292                             (1 + voi_id) * sizeof(struct voi_meta),
1293                             M_ZERO);
1294
1295                         if (voi_meta == NULL)
1296                                 error = ENOMEM;
1297                         else
1298                                 tpl_mb->voi_meta = voi_meta;
1299                 }
1300
1301                 if (!error) {
1302                         /* NB: Resizing can change where tpl_sb points. */
1303                         error = stats_v1_blob_expand(&tpl_sb, newvoibytes,
1304                             newvoistatbytes, newvoistatdatabytes);
1305                 }
1306
1307                 if (!error) {
1308                         tpl_mb->voi_meta[voi_id].name = stats_strdup(voi_name,
1309                             0);
1310                         if (tpl_mb->voi_meta[voi_id].name == NULL)
1311                                 error = ENOMEM;
1312                 }
1313
1314                 if (!error) {
1315                         /* Update the template list with the resized pointer. */
1316                         tpllist[tpl_id]->sb = (struct statsblob *)tpl_sb;
1317
1318                         /* Update the template. */
1319                         voi = &tpl_sb->vois[voi_id];
1320
1321                         if (voi->id < 0) {
1322                                 /* VOI is new and needs to be initialised. */
1323                                 voi->id = voi_id;
1324                                 voi->dtype = voi_dtype;
1325                                 voi->stats_off = tpl_sb->stats_off;
1326                                 if (flags & SB_VOI_RELUPDATE)
1327                                         voi->flags |= VOI_REQSTATE;
1328                         } else {
1329                                 /*
1330                                  * XXXLAS: When this else block is written, the
1331                                  * "KPI does not yet support expanding VOIs"
1332                                  * error earlier in this function can be
1333                                  * removed. What is required here is to shuffle
1334                                  * the voistat array such that the new stats for
1335                                  * the voi are contiguous, which will displace
1336                                  * stats for other vois that reside after the
1337                                  * voi being updated. The other vois then need
1338                                  * to have their stats_off adjusted post
1339                                  * shuffle.
1340                                  */
1341                         }
1342
1343                         voi->voistatmaxid = newvoistatmaxid;
1344                         newstatdataidx = 0;
1345
1346                         if (voi->flags & VOI_REQSTATE) {
1347                                 /* Initialise the voistate stat in slot 0. */
1348                                 tmpstat = BLOB_OFFSET(tpl_sb, voi->stats_off);
1349                                 tmpstat->stype = VS_STYPE_VOISTATE;
1350                                 tmpstat->flags = 0;
1351                                 tmpstat->dtype = VSD_DTYPE_VOISTATE;
1352                                 newstatdataidx = tmpstat->dsz =
1353                                     sizeof(struct voistatdata_numeric);
1354                                 tmpstat->data_off = tpl_sb->statsdata_off;
1355                         }
1356
1357                         for (i = 0; (uint32_t)i < nvss; i++) {
1358                                 tmpstat = BLOB_OFFSET(tpl_sb, voi->stats_off +
1359                                     (vss[i].stype * sizeof(struct voistat)));
1360                                 KASSERT(tmpstat->stype < 0, ("voistat %p "
1361                                     "already initialised", tmpstat));
1362                                 tmpstat->stype = vss[i].stype;
1363                                 tmpstat->flags = vss[i].flags;
1364                                 tmpstat->dtype = vss[i].vs_dtype;
1365                                 tmpstat->dsz = vss[i].vsdsz;
1366                                 tmpstat->data_off = tpl_sb->statsdata_off +
1367                                     newstatdataidx;
1368                                 memcpy(BLOB_OFFSET(tpl_sb, tmpstat->data_off),
1369                                     vss[i].iv, vss[i].vsdsz);
1370                                 newstatdataidx += vss[i].vsdsz;
1371                         }
1372
1373                         /* Update the template version hash. */
1374                         stats_tpl_update_hash(tpllist[tpl_id]);
1375                         /* XXXLAS: Confirm tpl name/hash pair remains unique. */
1376                 }
1377         } else
1378                 error = EINVAL;
1379         TPL_LIST_WUNLOCK();
1380
1381         return (error);
1382 }
1383
1384 struct statsblobv1 *
1385 stats_v1_blob_alloc(uint32_t tpl_id, uint32_t flags __unused)
1386 {
1387         struct statsblobv1 *sb;
1388         int error;
1389
1390         sb = NULL;
1391
1392         TPL_LIST_RLOCK();
1393         if (tpl_id < ntpl) {
1394                 sb = stats_realloc(NULL, 0, tpllist[tpl_id]->sb->maxsz, 0);
1395                 if (sb != NULL) {
1396                         sb->maxsz = tpllist[tpl_id]->sb->maxsz;
1397                         error = stats_v1_blob_init_locked(sb, tpl_id, 0);
1398                 } else
1399                         error = ENOMEM;
1400
1401                 if (error) {
1402                         stats_free(sb);
1403                         sb = NULL;
1404                 }
1405         }
1406         TPL_LIST_RUNLOCK();
1407
1408         return (sb);
1409 }
1410
1411 void
1412 stats_v1_blob_destroy(struct statsblobv1 *sb)
1413 {
1414
1415         stats_free(sb);
1416 }
1417
1418 int
1419 stats_v1_voistat_fetch_dptr(struct statsblobv1 *sb, int32_t voi_id,
1420     enum voi_stype stype, enum vsd_dtype *retdtype, struct voistatdata **retvsd,
1421     size_t *retvsdsz)
1422 {
1423         struct voi *v;
1424         struct voistat *vs;
1425
1426         if (retvsd == NULL || sb == NULL || sb->abi != STATS_ABI_V1 ||
1427             voi_id >= NVOIS(sb))
1428                 return (EINVAL);
1429
1430         v = &sb->vois[voi_id];
1431         if ((__typeof(v->voistatmaxid))stype > v->voistatmaxid)
1432                 return (EINVAL);
1433
1434         vs = BLOB_OFFSET(sb, v->stats_off + (stype * sizeof(struct voistat)));
1435         *retvsd = BLOB_OFFSET(sb, vs->data_off);
1436         if (retdtype != NULL)
1437                 *retdtype = vs->dtype;
1438         if (retvsdsz != NULL)
1439                 *retvsdsz = vs->dsz;
1440
1441         return (0);
1442 }
1443
1444 int
1445 stats_v1_blob_init(struct statsblobv1 *sb, uint32_t tpl_id, uint32_t flags)
1446 {
1447         int error;
1448
1449         error = 0;
1450
1451         TPL_LIST_RLOCK();
1452         if (sb == NULL || tpl_id >= ntpl) {
1453                 error = EINVAL;
1454         } else {
1455                 error = stats_v1_blob_init_locked(sb, tpl_id, flags);
1456         }
1457         TPL_LIST_RUNLOCK();
1458
1459         return (error);
1460 }
1461
1462 static inline int
1463 stats_v1_blob_init_locked(struct statsblobv1 *sb, uint32_t tpl_id,
1464     uint32_t flags __unused)
1465 {
1466         int error;
1467
1468         TPL_LIST_RLOCK_ASSERT();
1469         error = (sb->maxsz >= tpllist[tpl_id]->sb->cursz) ? 0 : EOVERFLOW;
1470         KASSERT(!error,
1471             ("sb %d instead of %d bytes", sb->maxsz, tpllist[tpl_id]->sb->cursz));
1472
1473         if (!error) {
1474                 memcpy(sb, tpllist[tpl_id]->sb, tpllist[tpl_id]->sb->cursz);
1475                 sb->created = sb->lastrst = stats_sbinuptime();
1476                 sb->tplhash = tpllist[tpl_id]->mb->tplhash;
1477         }
1478
1479         return (error);
1480 }
1481
1482 static int
1483 stats_v1_blob_expand(struct statsblobv1 **sbpp, int newvoibytes,
1484     int newvoistatbytes, int newvoistatdatabytes)
1485 {
1486         struct statsblobv1 *sb;
1487         struct voi *tmpvoi;
1488         struct voistat *tmpvoistat, *voistat_array;
1489         int error, i, idxnewvois, idxnewvoistats, nbytes, nvoistats;
1490
1491         KASSERT(newvoibytes % sizeof(struct voi) == 0,
1492             ("Bad newvoibytes %d", newvoibytes));
1493         KASSERT(newvoistatbytes % sizeof(struct voistat) == 0,
1494             ("Bad newvoistatbytes %d", newvoistatbytes));
1495
1496         error = ((newvoibytes % sizeof(struct voi) == 0) &&
1497             (newvoistatbytes % sizeof(struct voistat) == 0)) ? 0 : EINVAL;
1498         sb = *sbpp;
1499         nbytes = newvoibytes + newvoistatbytes + newvoistatdatabytes;
1500
1501         /*
1502          * XXXLAS: Required until we gain support for flags which alter the
1503          * units of size/offset fields in key structs.
1504          */
1505         if (!error && ((((int)sb->cursz) + nbytes) > SB_V1_MAXSZ))
1506                 error = EFBIG;
1507
1508         if (!error && (sb->cursz + nbytes > sb->maxsz)) {
1509                 /* Need to expand our blob. */
1510                 sb = stats_realloc(sb, sb->maxsz, sb->cursz + nbytes, M_ZERO);
1511                 if (sb != NULL) {
1512                         sb->maxsz = sb->cursz + nbytes;
1513                         *sbpp = sb;
1514                 } else
1515                     error = ENOMEM;
1516         }
1517
1518         if (!error) {
1519                 /*
1520                  * Shuffle memory within the expanded blob working from the end
1521                  * backwards, leaving gaps for the new voistat and voistatdata
1522                  * structs at the beginning of their respective blob regions,
1523                  * and for the new voi structs at the end of their blob region.
1524                  */
1525                 memmove(BLOB_OFFSET(sb, sb->statsdata_off + nbytes),
1526                     BLOB_OFFSET(sb, sb->statsdata_off),
1527                     sb->cursz - sb->statsdata_off);
1528                 memmove(BLOB_OFFSET(sb, sb->stats_off + newvoibytes +
1529                     newvoistatbytes), BLOB_OFFSET(sb, sb->stats_off),
1530                     sb->statsdata_off - sb->stats_off);
1531
1532                 /* First index of new voi/voistat structs to be initialised. */
1533                 idxnewvois = NVOIS(sb);
1534                 idxnewvoistats = (newvoistatbytes / sizeof(struct voistat)) - 1;
1535
1536                 /* Update housekeeping variables and offsets. */
1537                 sb->cursz += nbytes;
1538                 sb->stats_off += newvoibytes;
1539                 sb->statsdata_off += newvoibytes + newvoistatbytes;
1540
1541                 /* XXXLAS: Zeroing not strictly needed but aids debugging. */
1542                 memset(&sb->vois[idxnewvois], '\0', newvoibytes);
1543                 memset(BLOB_OFFSET(sb, sb->stats_off), '\0',
1544                     newvoistatbytes);
1545                 memset(BLOB_OFFSET(sb, sb->statsdata_off), '\0',
1546                     newvoistatdatabytes);
1547
1548                 /* Initialise new voi array members and update offsets. */
1549                 for (i = 0; i < NVOIS(sb); i++) {
1550                         tmpvoi = &sb->vois[i];
1551                         if (i >= idxnewvois) {
1552                                 tmpvoi->id = tmpvoi->voistatmaxid = -1;
1553                         } else if (tmpvoi->id > -1) {
1554                                 tmpvoi->stats_off += newvoibytes +
1555                                     newvoistatbytes;
1556                         }
1557                 }
1558
1559                 /* Initialise new voistat array members and update offsets. */
1560                 nvoistats = (sb->statsdata_off - sb->stats_off) /
1561                     sizeof(struct voistat);
1562                 voistat_array = BLOB_OFFSET(sb, sb->stats_off);
1563                 for (i = 0; i < nvoistats; i++) {
1564                         tmpvoistat = &voistat_array[i];
1565                         if (i <= idxnewvoistats) {
1566                                 tmpvoistat->stype = -1;
1567                         } else if (tmpvoistat->stype > -1) {
1568                                 tmpvoistat->data_off += nbytes;
1569                         }
1570                 }
1571         }
1572
1573         return (error);
1574 }
1575
1576 static void
1577 stats_v1_blob_finalise(struct statsblobv1 *sb __unused)
1578 {
1579
1580         /* XXXLAS: Fill this in. */
1581 }
1582
1583 static void
1584 stats_v1_blob_iter(struct statsblobv1 *sb, stats_v1_blob_itercb_t icb,
1585     void *usrctx, uint32_t flags)
1586 {
1587         struct voi *v;
1588         struct voistat *vs;
1589         struct sb_iter_ctx ctx;
1590         int i, j, firstvoi;
1591
1592         ctx.usrctx = usrctx;
1593         ctx.flags = SB_IT_FIRST_CB;
1594         firstvoi = 1;
1595
1596         for (i = 0; i < NVOIS(sb); i++) {
1597                 v = &sb->vois[i];
1598                 ctx.vslot = i;
1599                 ctx.vsslot = -1;
1600                 ctx.flags |= SB_IT_FIRST_VOISTAT;
1601
1602                 if (firstvoi)
1603                         ctx.flags |= SB_IT_FIRST_VOI;
1604                 else if (i == (NVOIS(sb) - 1))
1605                         ctx.flags |= SB_IT_LAST_VOI | SB_IT_LAST_CB;
1606
1607                 if (v->id < 0 && (flags & SB_IT_NULLVOI)) {
1608                         if (icb(sb, v, NULL, &ctx))
1609                                 return;
1610                         firstvoi = 0;
1611                         ctx.flags &= ~SB_IT_FIRST_CB;
1612                 }
1613
1614                 /* If NULL voi, v->voistatmaxid == -1 */
1615                 for (j = 0; j <= v->voistatmaxid; j++) {
1616                         vs = &((struct voistat *)BLOB_OFFSET(sb,
1617                             v->stats_off))[j];
1618                         if (vs->stype < 0 &&
1619                             !(flags & SB_IT_NULLVOISTAT))
1620                                 continue;
1621
1622                         if (j == v->voistatmaxid) {
1623                                 ctx.flags |= SB_IT_LAST_VOISTAT;
1624                                 if (i == (NVOIS(sb) - 1))
1625                                         ctx.flags |=
1626                                             SB_IT_LAST_CB;
1627                         } else
1628                                 ctx.flags &= ~SB_IT_LAST_CB;
1629
1630                         ctx.vsslot = j;
1631                         if (icb(sb, v, vs, &ctx))
1632                                 return;
1633
1634                         ctx.flags &= ~(SB_IT_FIRST_CB | SB_IT_FIRST_VOISTAT |
1635                             SB_IT_LAST_VOISTAT);
1636                 }
1637                 ctx.flags &= ~(SB_IT_FIRST_VOI | SB_IT_LAST_VOI);
1638         }
1639 }
1640
1641 static inline void
1642 stats_voistatdata_tdgst_tostr(enum vsd_dtype voi_dtype __unused,
1643     const struct voistatdata_tdgst *tdgst, enum vsd_dtype tdgst_dtype,
1644     size_t tdgst_dsz __unused, enum sb_str_fmt fmt, struct sbuf *buf, int objdump)
1645 {
1646         const struct ctdth32 *ctd32tree;
1647         const struct ctdth64 *ctd64tree;
1648         const struct voistatdata_tdgstctd32 *ctd32;
1649         const struct voistatdata_tdgstctd64 *ctd64;
1650         const char *fmtstr;
1651         uint64_t smplcnt, compcnt;
1652         int is32bit, qmaxstrlen;
1653         uint16_t maxctds, curctds;
1654
1655         switch (tdgst_dtype) {
1656         case VSD_DTYPE_TDGSTCLUST32:
1657                 smplcnt = CONSTVSD(tdgstclust32, tdgst)->smplcnt;
1658                 compcnt = CONSTVSD(tdgstclust32, tdgst)->compcnt;
1659                 maxctds = ARB_MAXNODES(&CONSTVSD(tdgstclust32, tdgst)->ctdtree);
1660                 curctds = ARB_CURNODES(&CONSTVSD(tdgstclust32, tdgst)->ctdtree);
1661                 ctd32tree = &CONSTVSD(tdgstclust32, tdgst)->ctdtree;
1662                 ctd32 = (objdump ? ARB_CNODE(ctd32tree, 0) :
1663                     ARB_CMIN(ctdth32, ctd32tree));
1664                 qmaxstrlen = (ctd32 == NULL) ? 1 : Q_MAXSTRLEN(ctd32->mu, 10);
1665                 is32bit = 1;
1666                 ctd64tree = NULL;
1667                 ctd64 = NULL;
1668                 break;
1669         case VSD_DTYPE_TDGSTCLUST64:
1670                 smplcnt = CONSTVSD(tdgstclust64, tdgst)->smplcnt;
1671                 compcnt = CONSTVSD(tdgstclust64, tdgst)->compcnt;
1672                 maxctds = ARB_MAXNODES(&CONSTVSD(tdgstclust64, tdgst)->ctdtree);
1673                 curctds = ARB_CURNODES(&CONSTVSD(tdgstclust64, tdgst)->ctdtree);
1674                 ctd64tree = &CONSTVSD(tdgstclust64, tdgst)->ctdtree;
1675                 ctd64 = (objdump ? ARB_CNODE(ctd64tree, 0) :
1676                     ARB_CMIN(ctdth64, ctd64tree));
1677                 qmaxstrlen = (ctd64 == NULL) ? 1 : Q_MAXSTRLEN(ctd64->mu, 10);
1678                 is32bit = 0;
1679                 ctd32tree = NULL;
1680                 ctd32 = NULL;
1681                 break;
1682         default:
1683                 return;
1684         }
1685
1686         switch (fmt) {
1687         case SB_STRFMT_FREEFORM:
1688                 fmtstr = "smplcnt=%ju, compcnt=%ju, maxctds=%hu, nctds=%hu";
1689                 break;
1690         case SB_STRFMT_JSON:
1691         default:
1692                 fmtstr =
1693                     "\"smplcnt\":%ju,\"compcnt\":%ju,\"maxctds\":%hu,"
1694                     "\"nctds\":%hu,\"ctds\":[";
1695                 break;
1696         }
1697         sbuf_printf(buf, fmtstr, (uintmax_t)smplcnt, (uintmax_t)compcnt,
1698             maxctds, curctds);
1699
1700         while ((is32bit ? NULL != ctd32 : NULL != ctd64)) {
1701                 char qstr[qmaxstrlen];
1702
1703                 switch (fmt) {
1704                 case SB_STRFMT_FREEFORM:
1705                         fmtstr = "\n\t\t\t\t";
1706                         break;
1707                 case SB_STRFMT_JSON:
1708                 default:
1709                         fmtstr = "{";
1710                         break;
1711                 }
1712                 sbuf_cat(buf, fmtstr);
1713
1714                 if (objdump) {
1715                         switch (fmt) {
1716                         case SB_STRFMT_FREEFORM:
1717                                 fmtstr = "ctd[%hu].";
1718                                 break;
1719                         case SB_STRFMT_JSON:
1720                         default:
1721                                 fmtstr = "\"ctd\":%hu,";
1722                                 break;
1723                         }
1724                         sbuf_printf(buf, fmtstr, is32bit ?
1725                             ARB_SELFIDX(ctd32tree, ctd32) :
1726                             ARB_SELFIDX(ctd64tree, ctd64));
1727                 }
1728
1729                 switch (fmt) {
1730                 case SB_STRFMT_FREEFORM:
1731                         fmtstr = "{mu=";
1732                         break;
1733                 case SB_STRFMT_JSON:
1734                 default:
1735                         fmtstr = "\"mu\":";
1736                         break;
1737                 }
1738                 sbuf_cat(buf, fmtstr);
1739                 Q_TOSTR((is32bit ? ctd32->mu : ctd64->mu), -1, 10, qstr,
1740                     sizeof(qstr));
1741                 sbuf_cat(buf, qstr);
1742
1743                 switch (fmt) {
1744                 case SB_STRFMT_FREEFORM:
1745                         fmtstr = is32bit ? ",cnt=%u}" : ",cnt=%ju}";
1746                         break;
1747                 case SB_STRFMT_JSON:
1748                 default:
1749                         fmtstr = is32bit ? ",\"cnt\":%u}" : ",\"cnt\":%ju}";
1750                         break;
1751                 }
1752                 sbuf_printf(buf, fmtstr,
1753                     is32bit ? ctd32->cnt : (uintmax_t)ctd64->cnt);
1754
1755                 if (is32bit)
1756                         ctd32 = (objdump ? ARB_CNODE(ctd32tree,
1757                             ARB_SELFIDX(ctd32tree, ctd32) + 1) :
1758                             ARB_CNEXT(ctdth32, ctd32tree, ctd32));
1759                 else
1760                         ctd64 = (objdump ? ARB_CNODE(ctd64tree,
1761                             ARB_SELFIDX(ctd64tree, ctd64) + 1) :
1762                             ARB_CNEXT(ctdth64, ctd64tree, ctd64));
1763
1764                 if (fmt == SB_STRFMT_JSON &&
1765                     (is32bit ? NULL != ctd32 : NULL != ctd64))
1766                         sbuf_putc(buf, ',');
1767         }
1768         if (fmt == SB_STRFMT_JSON)
1769                 sbuf_cat(buf, "]");
1770 }
1771
1772 static inline void
1773 stats_voistatdata_hist_tostr(enum vsd_dtype voi_dtype,
1774     const struct voistatdata_hist *hist, enum vsd_dtype hist_dtype,
1775     size_t hist_dsz, enum sb_str_fmt fmt, struct sbuf *buf, int objdump)
1776 {
1777         const struct voistatdata_numeric *bkt_lb, *bkt_ub;
1778         const char *fmtstr;
1779         int is32bit;
1780         uint16_t i, nbkts;
1781
1782         switch (hist_dtype) {
1783         case VSD_DTYPE_CRHIST32:
1784                 nbkts = HIST_VSDSZ2NBKTS(crhist32, hist_dsz);
1785                 is32bit = 1;
1786                 break;
1787         case VSD_DTYPE_DRHIST32:
1788                 nbkts = HIST_VSDSZ2NBKTS(drhist32, hist_dsz);
1789                 is32bit = 1;
1790                 break;
1791         case VSD_DTYPE_DVHIST32:
1792                 nbkts = HIST_VSDSZ2NBKTS(dvhist32, hist_dsz);
1793                 is32bit = 1;
1794                 break;
1795         case VSD_DTYPE_CRHIST64:
1796                 nbkts = HIST_VSDSZ2NBKTS(crhist64, hist_dsz);
1797                 is32bit = 0;
1798                 break;
1799         case VSD_DTYPE_DRHIST64:
1800                 nbkts = HIST_VSDSZ2NBKTS(drhist64, hist_dsz);
1801                 is32bit = 0;
1802                 break;
1803         case VSD_DTYPE_DVHIST64:
1804                 nbkts = HIST_VSDSZ2NBKTS(dvhist64, hist_dsz);
1805                 is32bit = 0;
1806                 break;
1807         default:
1808                 return;
1809         }
1810
1811         switch (fmt) {
1812         case SB_STRFMT_FREEFORM:
1813                 fmtstr = "nbkts=%hu, ";
1814                 break;
1815         case SB_STRFMT_JSON:
1816         default:
1817                 fmtstr = "\"nbkts\":%hu,";
1818                 break;
1819         }
1820         sbuf_printf(buf, fmtstr, nbkts);
1821
1822         switch (fmt) {
1823                 case SB_STRFMT_FREEFORM:
1824                         fmtstr = (is32bit ? "oob=%u" : "oob=%ju");
1825                         break;
1826                 case SB_STRFMT_JSON:
1827                 default:
1828                         fmtstr = (is32bit ? "\"oob\":%u,\"bkts\":[" :
1829                             "\"oob\":%ju,\"bkts\":[");
1830                         break;
1831         }
1832         sbuf_printf(buf, fmtstr, is32bit ? VSD_CONSTHIST_FIELDVAL(hist,
1833             hist_dtype, oob) : (uintmax_t)VSD_CONSTHIST_FIELDVAL(hist,
1834             hist_dtype, oob));
1835
1836         for (i = 0; i < nbkts; i++) {
1837                 switch (hist_dtype) {
1838                 case VSD_DTYPE_CRHIST32:
1839                 case VSD_DTYPE_CRHIST64:
1840                         bkt_lb = VSD_CONSTCRHIST_FIELDPTR(hist, hist_dtype,
1841                             bkts[i].lb);
1842                         if (i < nbkts - 1)
1843                                 bkt_ub = VSD_CONSTCRHIST_FIELDPTR(hist,
1844                                     hist_dtype, bkts[i + 1].lb);
1845                         else
1846                                 bkt_ub = &numeric_limits[LIM_MAX][voi_dtype];
1847                         break;
1848                 case VSD_DTYPE_DRHIST32:
1849                 case VSD_DTYPE_DRHIST64:
1850                         bkt_lb = VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype,
1851                             bkts[i].lb);
1852                         bkt_ub = VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype,
1853                             bkts[i].ub);
1854                         break;
1855                 case VSD_DTYPE_DVHIST32:
1856                 case VSD_DTYPE_DVHIST64:
1857                         bkt_lb = bkt_ub = VSD_CONSTDVHIST_FIELDPTR(hist,
1858                             hist_dtype, bkts[i].val);
1859                         break;
1860                 default:
1861                         break;
1862                 }
1863
1864                 switch (fmt) {
1865                 case SB_STRFMT_FREEFORM:
1866                         fmtstr = "\n\t\t\t\t";
1867                         break;
1868                 case SB_STRFMT_JSON:
1869                 default:
1870                         fmtstr = "{";
1871                         break;
1872                 }
1873                 sbuf_cat(buf, fmtstr);
1874
1875                 if (objdump) {
1876                         switch (fmt) {
1877                         case SB_STRFMT_FREEFORM:
1878                                 fmtstr = "bkt[%hu].";
1879                                 break;
1880                         case SB_STRFMT_JSON:
1881                         default:
1882                                 fmtstr = "\"bkt\":%hu,";
1883                                 break;
1884                         }
1885                         sbuf_printf(buf, fmtstr, i);
1886                 }
1887
1888                 switch (fmt) {
1889                 case SB_STRFMT_FREEFORM:
1890                         fmtstr = "{lb=";
1891                         break;
1892                 case SB_STRFMT_JSON:
1893                 default:
1894                         fmtstr = "\"lb\":";
1895                         break;
1896                 }
1897                 sbuf_cat(buf, fmtstr);
1898                 stats_voistatdata_tostr((const struct voistatdata *)bkt_lb,
1899                     voi_dtype, voi_dtype, sizeof(struct voistatdata_numeric),
1900                     fmt, buf, objdump);
1901
1902                 switch (fmt) {
1903                 case SB_STRFMT_FREEFORM:
1904                         fmtstr = ",ub=";
1905                         break;
1906                 case SB_STRFMT_JSON:
1907                 default:
1908                         fmtstr = ",\"ub\":";
1909                         break;
1910                 }
1911                 sbuf_cat(buf, fmtstr);
1912                 stats_voistatdata_tostr((const struct voistatdata *)bkt_ub,
1913                     voi_dtype, voi_dtype, sizeof(struct voistatdata_numeric),
1914                     fmt, buf, objdump);
1915
1916                 switch (fmt) {
1917                 case SB_STRFMT_FREEFORM:
1918                         fmtstr = is32bit ? ",cnt=%u}" : ",cnt=%ju}";
1919                         break;
1920                 case SB_STRFMT_JSON:
1921                 default:
1922                         fmtstr = is32bit ? ",\"cnt\":%u}" : ",\"cnt\":%ju}";
1923                         break;
1924                 }
1925                 sbuf_printf(buf, fmtstr, is32bit ?
1926                     VSD_CONSTHIST_FIELDVAL(hist, hist_dtype, bkts[i].cnt) :
1927                     (uintmax_t)VSD_CONSTHIST_FIELDVAL(hist, hist_dtype,
1928                     bkts[i].cnt));
1929
1930                 if (fmt == SB_STRFMT_JSON && i < nbkts - 1)
1931                         sbuf_putc(buf, ',');
1932         }
1933         if (fmt == SB_STRFMT_JSON)
1934                 sbuf_cat(buf, "]");
1935 }
1936
1937 int
1938 stats_voistatdata_tostr(const struct voistatdata *vsd, enum vsd_dtype voi_dtype,
1939     enum vsd_dtype vsd_dtype, size_t vsd_sz, enum sb_str_fmt fmt,
1940     struct sbuf *buf, int objdump)
1941 {
1942         const char *fmtstr;
1943
1944         if (vsd == NULL || buf == NULL || voi_dtype >= VSD_NUM_DTYPES ||
1945             vsd_dtype >= VSD_NUM_DTYPES || fmt >= SB_STRFMT_NUM_FMTS)
1946                 return (EINVAL);
1947
1948         switch (vsd_dtype) {
1949         case VSD_DTYPE_VOISTATE:
1950                 switch (fmt) {
1951                 case SB_STRFMT_FREEFORM:
1952                         fmtstr = "prev=";
1953                         break;
1954                 case SB_STRFMT_JSON:
1955                 default:
1956                         fmtstr = "\"prev\":";
1957                         break;
1958                 }
1959                 sbuf_cat(buf, fmtstr);
1960                 /*
1961                  * Render prev by passing it as *vsd and voi_dtype as vsd_dtype.
1962                  */
1963                 stats_voistatdata_tostr(
1964                     (const struct voistatdata *)&CONSTVSD(voistate, vsd)->prev,
1965                     voi_dtype, voi_dtype, vsd_sz, fmt, buf, objdump);
1966                 break;
1967         case VSD_DTYPE_INT_S32:
1968                 sbuf_printf(buf, "%d", vsd->int32.s32);
1969                 break;
1970         case VSD_DTYPE_INT_U32:
1971                 sbuf_printf(buf, "%u", vsd->int32.u32);
1972                 break;
1973         case VSD_DTYPE_INT_S64:
1974                 sbuf_printf(buf, "%jd", (intmax_t)vsd->int64.s64);
1975                 break;
1976         case VSD_DTYPE_INT_U64:
1977                 sbuf_printf(buf, "%ju", (uintmax_t)vsd->int64.u64);
1978                 break;
1979         case VSD_DTYPE_INT_SLONG:
1980                 sbuf_printf(buf, "%ld", vsd->intlong.slong);
1981                 break;
1982         case VSD_DTYPE_INT_ULONG:
1983                 sbuf_printf(buf, "%lu", vsd->intlong.ulong);
1984                 break;
1985         case VSD_DTYPE_Q_S32:
1986                 {
1987                 char qstr[Q_MAXSTRLEN(vsd->q32.sq32, 10)];
1988                 Q_TOSTR((s32q_t)vsd->q32.sq32, -1, 10, qstr, sizeof(qstr));
1989                 sbuf_cat(buf, qstr);
1990                 }
1991                 break;
1992         case VSD_DTYPE_Q_U32:
1993                 {
1994                 char qstr[Q_MAXSTRLEN(vsd->q32.uq32, 10)];
1995                 Q_TOSTR((u32q_t)vsd->q32.uq32, -1, 10, qstr, sizeof(qstr));
1996                 sbuf_cat(buf, qstr);
1997                 }
1998                 break;
1999         case VSD_DTYPE_Q_S64:
2000                 {
2001                 char qstr[Q_MAXSTRLEN(vsd->q64.sq64, 10)];
2002                 Q_TOSTR((s64q_t)vsd->q64.sq64, -1, 10, qstr, sizeof(qstr));
2003                 sbuf_cat(buf, qstr);
2004                 }
2005                 break;
2006         case VSD_DTYPE_Q_U64:
2007                 {
2008                 char qstr[Q_MAXSTRLEN(vsd->q64.uq64, 10)];
2009                 Q_TOSTR((u64q_t)vsd->q64.uq64, -1, 10, qstr, sizeof(qstr));
2010                 sbuf_cat(buf, qstr);
2011                 }
2012                 break;
2013         case VSD_DTYPE_CRHIST32:
2014         case VSD_DTYPE_DRHIST32:
2015         case VSD_DTYPE_DVHIST32:
2016         case VSD_DTYPE_CRHIST64:
2017         case VSD_DTYPE_DRHIST64:
2018         case VSD_DTYPE_DVHIST64:
2019                 stats_voistatdata_hist_tostr(voi_dtype, CONSTVSD(hist, vsd),
2020                     vsd_dtype, vsd_sz, fmt, buf, objdump);
2021                 break;
2022         case VSD_DTYPE_TDGSTCLUST32:
2023         case VSD_DTYPE_TDGSTCLUST64:
2024                 stats_voistatdata_tdgst_tostr(voi_dtype,
2025                     CONSTVSD(tdgst, vsd), vsd_dtype, vsd_sz, fmt, buf,
2026                     objdump);
2027                 break;
2028         default:
2029                 break;
2030         }
2031
2032         return (sbuf_error(buf));
2033 }
2034
2035 static void
2036 stats_v1_itercb_tostr_freeform(struct statsblobv1 *sb, struct voi *v,
2037     struct voistat *vs, struct sb_iter_ctx *ctx)
2038 {
2039         struct sb_tostrcb_ctx *sctx;
2040         struct metablob *tpl_mb;
2041         struct sbuf *buf;
2042         void *vsd;
2043         uint8_t dump;
2044
2045         sctx = ctx->usrctx;
2046         buf = sctx->buf;
2047         tpl_mb = sctx->tpl ? sctx->tpl->mb : NULL;
2048         dump = ((sctx->flags & SB_TOSTR_OBJDUMP) != 0);
2049
2050         if (ctx->flags & SB_IT_FIRST_CB) {
2051                 sbuf_printf(buf, "struct statsblobv1@%p", sb);
2052                 if (dump) {
2053                         sbuf_printf(buf, ", abi=%hhu, endian=%hhu, maxsz=%hu, "
2054                             "cursz=%hu, created=%jd, lastrst=%jd, flags=0x%04hx, "
2055                             "stats_off=%hu, statsdata_off=%hu",
2056                             sb->abi, sb->endian, sb->maxsz, sb->cursz,
2057                             sb->created, sb->lastrst, sb->flags, sb->stats_off,
2058                             sb->statsdata_off);
2059                 }
2060                 sbuf_printf(buf, ", tplhash=%u", sb->tplhash);
2061         }
2062
2063         if (ctx->flags & SB_IT_FIRST_VOISTAT) {
2064                 sbuf_printf(buf, "\n\tvois[%hd]: id=%hd", ctx->vslot, v->id);
2065                 if (v->id < 0)
2066                         return;
2067                 sbuf_printf(buf, ", name=\"%s\"", (tpl_mb == NULL) ? "" :
2068                     tpl_mb->voi_meta[v->id].name);
2069                 if (dump)
2070                     sbuf_printf(buf, ", flags=0x%04hx, dtype=%s, "
2071                     "voistatmaxid=%hhd, stats_off=%hu", v->flags,
2072                     vsd_dtype2name[v->dtype], v->voistatmaxid, v->stats_off);
2073         }
2074
2075         if (!dump && vs->stype <= 0)
2076                 return;
2077
2078         sbuf_printf(buf, "\n\t\tvois[%hd]stat[%hhd]: stype=", v->id, ctx->vsslot);
2079         if (vs->stype < 0) {
2080                 sbuf_printf(buf, "%hhd", vs->stype);
2081                 return;
2082         } else
2083                 sbuf_printf(buf, "%s, errs=%hu", vs_stype2name[vs->stype],
2084                     vs->errs);
2085         vsd = BLOB_OFFSET(sb, vs->data_off);
2086         if (dump)
2087                 sbuf_printf(buf, ", flags=0x%04x, dtype=%s, dsz=%hu, "
2088                     "data_off=%hu", vs->flags, vsd_dtype2name[vs->dtype],
2089                     vs->dsz, vs->data_off);
2090
2091         sbuf_cat(buf, "\n\t\t\tvoistatdata: ");
2092         stats_voistatdata_tostr(vsd, v->dtype, vs->dtype, vs->dsz,
2093             sctx->fmt, buf, dump);
2094 }
2095
2096 static void
2097 stats_v1_itercb_tostr_json(struct statsblobv1 *sb, struct voi *v, struct voistat *vs,
2098     struct sb_iter_ctx *ctx)
2099 {
2100         struct sb_tostrcb_ctx *sctx;
2101         struct metablob *tpl_mb;
2102         struct sbuf *buf;
2103         const char *fmtstr;
2104         void *vsd;
2105         uint8_t dump;
2106
2107         sctx = ctx->usrctx;
2108         buf = sctx->buf;
2109         tpl_mb = sctx->tpl ? sctx->tpl->mb : NULL;
2110         dump = ((sctx->flags & SB_TOSTR_OBJDUMP) != 0);
2111
2112         if (ctx->flags & SB_IT_FIRST_CB) {
2113                 sbuf_putc(buf, '{');
2114                 if (dump) {
2115                         sbuf_printf(buf, "\"abi\":%hhu,\"endian\":%hhu,"
2116                             "\"maxsz\":%hu,\"cursz\":%hu,\"created\":%jd,"
2117                             "\"lastrst\":%jd,\"flags\":%hu,\"stats_off\":%hu,"
2118                             "\"statsdata_off\":%hu,", sb->abi,
2119                             sb->endian, sb->maxsz, sb->cursz, sb->created,
2120                             sb->lastrst, sb->flags, sb->stats_off,
2121                             sb->statsdata_off);
2122                 }
2123
2124                 if (tpl_mb == NULL)
2125                         fmtstr = "\"tplname\":%s,\"tplhash\":%u,\"vois\":{";
2126                 else
2127                         fmtstr = "\"tplname\":\"%s\",\"tplhash\":%u,\"vois\":{";
2128
2129                 sbuf_printf(buf, fmtstr, tpl_mb ? tpl_mb->tplname : "null",
2130                     sb->tplhash);
2131         }
2132
2133         if (ctx->flags & SB_IT_FIRST_VOISTAT) {
2134                 if (dump) {
2135                         sbuf_printf(buf, "\"[%d]\":{\"id\":%d", ctx->vslot,
2136                             v->id);
2137                         if (v->id < 0) {
2138                                 sbuf_cat(buf, "},");
2139                                 return;
2140                         }
2141                         
2142                         if (tpl_mb == NULL)
2143                                 fmtstr = ",\"name\":%s,\"flags\":%hu,"
2144                                     "\"dtype\":\"%s\",\"voistatmaxid\":%hhd,"
2145                                     "\"stats_off\":%hu,";
2146                         else
2147                                 fmtstr = ",\"name\":\"%s\",\"flags\":%hu,"
2148                                     "\"dtype\":\"%s\",\"voistatmaxid\":%hhd,"
2149                                     "\"stats_off\":%hu,";
2150
2151                         sbuf_printf(buf, fmtstr, tpl_mb ?
2152                             tpl_mb->voi_meta[v->id].name : "null", v->flags,
2153                             vsd_dtype2name[v->dtype], v->voistatmaxid,
2154                             v->stats_off);
2155                 } else {
2156                         if (tpl_mb == NULL) {
2157                                 sbuf_printf(buf, "\"[%hd]\":{", v->id);
2158                         } else {
2159                                 sbuf_printf(buf, "\"%s\":{",
2160                                     tpl_mb->voi_meta[v->id].name);
2161                         }
2162                 }
2163                 sbuf_cat(buf, "\"stats\":{");
2164         }
2165
2166         vsd = BLOB_OFFSET(sb, vs->data_off);
2167         if (dump) {
2168                 sbuf_printf(buf, "\"[%hhd]\":", ctx->vsslot);
2169                 if (vs->stype < 0) {
2170                         sbuf_cat(buf, "{\"stype\":-1},");
2171                         return;
2172                 }
2173                 sbuf_printf(buf, "{\"stype\":\"%s\",\"errs\":%hu,\"flags\":%hu,"
2174                     "\"dtype\":\"%s\",\"data_off\":%hu,\"voistatdata\":{",
2175                     vs_stype2name[vs->stype], vs->errs, vs->flags,
2176                     vsd_dtype2name[vs->dtype], vs->data_off);
2177         } else if (vs->stype > 0) {
2178                 if (tpl_mb == NULL)
2179                         sbuf_printf(buf, "\"[%hhd]\":", vs->stype);
2180                 else
2181                         sbuf_printf(buf, "\"%s\":", vs_stype2name[vs->stype]);
2182         } else
2183                 return;
2184
2185         if ((vs->flags & VS_VSDVALID) || dump) {
2186                 if (!dump)
2187                         sbuf_printf(buf, "{\"errs\":%hu,", vs->errs);
2188                 /* Simple non-compound VSD types need a key. */
2189                 if (!vsd_compoundtype[vs->dtype])
2190                         sbuf_cat(buf, "\"val\":");
2191                 stats_voistatdata_tostr(vsd, v->dtype, vs->dtype, vs->dsz,
2192                     sctx->fmt, buf, dump);
2193                 sbuf_cat(buf, dump ? "}}" : "}");
2194         } else
2195                 sbuf_cat(buf, dump ? "null}" : "null");
2196
2197         if (ctx->flags & SB_IT_LAST_VOISTAT)
2198                 sbuf_cat(buf, "}}");
2199
2200         if (ctx->flags & SB_IT_LAST_CB)
2201                 sbuf_cat(buf, "}}");
2202         else
2203                 sbuf_putc(buf, ',');
2204 }
2205
2206 static int
2207 stats_v1_itercb_tostr(struct statsblobv1 *sb, struct voi *v, struct voistat *vs,
2208     struct sb_iter_ctx *ctx)
2209 {
2210         struct sb_tostrcb_ctx *sctx;
2211
2212         sctx = ctx->usrctx;
2213
2214         switch (sctx->fmt) {
2215         case SB_STRFMT_FREEFORM:
2216                 stats_v1_itercb_tostr_freeform(sb, v, vs, ctx);
2217                 break;
2218         case SB_STRFMT_JSON:
2219                 stats_v1_itercb_tostr_json(sb, v, vs, ctx);
2220                 break;
2221         default:
2222                 break;
2223         }
2224
2225         return (sbuf_error(sctx->buf));
2226 }
2227
2228 int
2229 stats_v1_blob_tostr(struct statsblobv1 *sb, struct sbuf *buf,
2230     enum sb_str_fmt fmt, uint32_t flags)
2231 {
2232         struct sb_tostrcb_ctx sctx;
2233         uint32_t iflags;
2234
2235         if (sb == NULL || sb->abi != STATS_ABI_V1 || buf == NULL ||
2236             fmt >= SB_STRFMT_NUM_FMTS)
2237                 return (EINVAL);
2238
2239         sctx.buf = buf;
2240         sctx.fmt = fmt;
2241         sctx.flags = flags;
2242
2243         if (flags & SB_TOSTR_META) {
2244                 if (stats_tpl_fetch(stats_tpl_fetch_allocid(NULL, sb->tplhash),
2245                     &sctx.tpl))
2246                         return (EINVAL);
2247         } else
2248                 sctx.tpl = NULL;
2249
2250         iflags = 0;
2251         if (flags & SB_TOSTR_OBJDUMP)
2252                 iflags |= (SB_IT_NULLVOI | SB_IT_NULLVOISTAT);
2253         stats_v1_blob_iter(sb, stats_v1_itercb_tostr, &sctx, iflags);
2254
2255         return (sbuf_error(buf));
2256 }
2257
2258 static int
2259 stats_v1_itercb_visit(struct statsblobv1 *sb, struct voi *v,
2260     struct voistat *vs, struct sb_iter_ctx *ctx)
2261 {
2262         struct sb_visitcb_ctx *vctx;
2263         struct sb_visit sbv;
2264
2265         vctx = ctx->usrctx;
2266
2267         sbv.tplhash = sb->tplhash;
2268         sbv.voi_id = v->id;
2269         sbv.voi_dtype = v->dtype;
2270         sbv.vs_stype = vs->stype;
2271         sbv.vs_dtype = vs->dtype;
2272         sbv.vs_dsz = vs->dsz;
2273         sbv.vs_data = BLOB_OFFSET(sb, vs->data_off);
2274         sbv.vs_errs = vs->errs;
2275         sbv.flags = ctx->flags & (SB_IT_FIRST_CB | SB_IT_LAST_CB |
2276             SB_IT_FIRST_VOI | SB_IT_LAST_VOI | SB_IT_FIRST_VOISTAT |
2277             SB_IT_LAST_VOISTAT);
2278
2279         return (vctx->cb(&sbv, vctx->usrctx));
2280 }
2281
2282 int
2283 stats_v1_blob_visit(struct statsblobv1 *sb, stats_blob_visitcb_t func,
2284     void *usrctx)
2285 {
2286         struct sb_visitcb_ctx vctx;
2287
2288         if (sb == NULL || sb->abi != STATS_ABI_V1 || func == NULL)
2289                 return (EINVAL);
2290
2291         vctx.cb = func;
2292         vctx.usrctx = usrctx;
2293
2294         stats_v1_blob_iter(sb, stats_v1_itercb_visit, &vctx, 0);
2295
2296         return (0);
2297 }
2298
2299 static int
2300 stats_v1_icb_reset_voistat(struct statsblobv1 *sb, struct voi *v __unused,
2301     struct voistat *vs, struct sb_iter_ctx *ctx __unused)
2302 {
2303         void *vsd;
2304
2305         if (vs->stype == VS_STYPE_VOISTATE)
2306                 return (0);
2307
2308         vsd = BLOB_OFFSET(sb, vs->data_off);
2309
2310         /* Perform the stat type's default reset action. */
2311         switch (vs->stype) {
2312         case VS_STYPE_SUM:
2313                 switch (vs->dtype) {
2314                 case VSD_DTYPE_Q_S32:
2315                         Q_SIFVAL(VSD(q32, vsd)->sq32, 0);
2316                         break;
2317                 case VSD_DTYPE_Q_U32:
2318                         Q_SIFVAL(VSD(q32, vsd)->uq32, 0);
2319                         break;
2320                 case VSD_DTYPE_Q_S64:
2321                         Q_SIFVAL(VSD(q64, vsd)->sq64, 0);
2322                         break;
2323                 case VSD_DTYPE_Q_U64:
2324                         Q_SIFVAL(VSD(q64, vsd)->uq64, 0);
2325                         break;
2326                 default:
2327                         bzero(vsd, vs->dsz);
2328                         break;
2329                 }
2330                 break;
2331         case VS_STYPE_MAX:
2332                 switch (vs->dtype) {
2333                 case VSD_DTYPE_Q_S32:
2334                         Q_SIFVAL(VSD(q32, vsd)->sq32,
2335                             Q_IFMINVAL(VSD(q32, vsd)->sq32));
2336                         break;
2337                 case VSD_DTYPE_Q_U32:
2338                         Q_SIFVAL(VSD(q32, vsd)->uq32,
2339                             Q_IFMINVAL(VSD(q32, vsd)->uq32));
2340                         break;
2341                 case VSD_DTYPE_Q_S64:
2342                         Q_SIFVAL(VSD(q64, vsd)->sq64,
2343                             Q_IFMINVAL(VSD(q64, vsd)->sq64));
2344                         break;
2345                 case VSD_DTYPE_Q_U64:
2346                         Q_SIFVAL(VSD(q64, vsd)->uq64,
2347                             Q_IFMINVAL(VSD(q64, vsd)->uq64));
2348                         break;
2349                 default:
2350                         memcpy(vsd, &numeric_limits[LIM_MIN][vs->dtype],
2351                             vs->dsz);
2352                         break;
2353                 }
2354                 break;
2355         case VS_STYPE_MIN:
2356                 switch (vs->dtype) {
2357                 case VSD_DTYPE_Q_S32:
2358                         Q_SIFVAL(VSD(q32, vsd)->sq32,
2359                             Q_IFMAXVAL(VSD(q32, vsd)->sq32));
2360                         break;
2361                 case VSD_DTYPE_Q_U32:
2362                         Q_SIFVAL(VSD(q32, vsd)->uq32,
2363                             Q_IFMAXVAL(VSD(q32, vsd)->uq32));
2364                         break;
2365                 case VSD_DTYPE_Q_S64:
2366                         Q_SIFVAL(VSD(q64, vsd)->sq64,
2367                             Q_IFMAXVAL(VSD(q64, vsd)->sq64));
2368                         break;
2369                 case VSD_DTYPE_Q_U64:
2370                         Q_SIFVAL(VSD(q64, vsd)->uq64,
2371                             Q_IFMAXVAL(VSD(q64, vsd)->uq64));
2372                         break;
2373                 default:
2374                         memcpy(vsd, &numeric_limits[LIM_MAX][vs->dtype],
2375                             vs->dsz);
2376                         break;
2377                 }
2378                 break;
2379         case VS_STYPE_HIST:
2380                 {
2381                 /* Reset bucket counts. */
2382                 struct voistatdata_hist *hist;
2383                 int i, is32bit;
2384                 uint16_t nbkts;
2385
2386                 hist = VSD(hist, vsd);
2387                 switch (vs->dtype) {
2388                 case VSD_DTYPE_CRHIST32:
2389                         nbkts = HIST_VSDSZ2NBKTS(crhist32, vs->dsz);
2390                         is32bit = 1;
2391                         break;
2392                 case VSD_DTYPE_DRHIST32:
2393                         nbkts = HIST_VSDSZ2NBKTS(drhist32, vs->dsz);
2394                         is32bit = 1;
2395                         break;
2396                 case VSD_DTYPE_DVHIST32:
2397                         nbkts = HIST_VSDSZ2NBKTS(dvhist32, vs->dsz);
2398                         is32bit = 1;
2399                         break;
2400                 case VSD_DTYPE_CRHIST64:
2401                         nbkts = HIST_VSDSZ2NBKTS(crhist64, vs->dsz);
2402                         is32bit = 0;
2403                         break;
2404                 case VSD_DTYPE_DRHIST64:
2405                         nbkts = HIST_VSDSZ2NBKTS(drhist64, vs->dsz);
2406                         is32bit = 0;
2407                         break;
2408                 case VSD_DTYPE_DVHIST64:
2409                         nbkts = HIST_VSDSZ2NBKTS(dvhist64, vs->dsz);
2410                         is32bit = 0;
2411                         break;
2412                 default:
2413                         return (0);
2414                 }
2415
2416                 bzero(VSD_HIST_FIELDPTR(hist, vs->dtype, oob),
2417                     is32bit ? sizeof(uint32_t) : sizeof(uint64_t));
2418                 for (i = nbkts - 1; i >= 0; i--) {
2419                         bzero(VSD_HIST_FIELDPTR(hist, vs->dtype,
2420                             bkts[i].cnt), is32bit ? sizeof(uint32_t) :
2421                             sizeof(uint64_t));
2422                 }
2423                 break;
2424                 }
2425         case VS_STYPE_TDGST:
2426                 {
2427                 /* Reset sample count centroids array/tree. */
2428                 struct voistatdata_tdgst *tdgst;
2429                 struct ctdth32 *ctd32tree;
2430                 struct ctdth64 *ctd64tree;
2431                 struct voistatdata_tdgstctd32 *ctd32;
2432                 struct voistatdata_tdgstctd64 *ctd64;
2433
2434                 tdgst = VSD(tdgst, vsd);
2435                 switch (vs->dtype) {
2436                 case VSD_DTYPE_TDGSTCLUST32:
2437                         VSD(tdgstclust32, tdgst)->smplcnt = 0;
2438                         VSD(tdgstclust32, tdgst)->compcnt = 0;
2439                         ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
2440                         ARB_INIT(ctd32, ctdlnk, ctd32tree,
2441                             ARB_MAXNODES(ctd32tree)) {
2442                                 ctd32->cnt = 0;
2443                                 Q_SIFVAL(ctd32->mu, 0);
2444                         }
2445 #ifdef DIAGNOSTIC
2446                         RB_INIT(&VSD(tdgstclust32, tdgst)->rbctdtree);
2447 #endif
2448                 break;
2449                 case VSD_DTYPE_TDGSTCLUST64:
2450                         VSD(tdgstclust64, tdgst)->smplcnt = 0;
2451                         VSD(tdgstclust64, tdgst)->compcnt = 0;
2452                         ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
2453                         ARB_INIT(ctd64, ctdlnk, ctd64tree,
2454                             ARB_MAXNODES(ctd64tree)) {
2455                                 ctd64->cnt = 0;
2456                                 Q_SIFVAL(ctd64->mu, 0);
2457                         }
2458 #ifdef DIAGNOSTIC
2459                         RB_INIT(&VSD(tdgstclust64, tdgst)->rbctdtree);
2460 #endif
2461                 break;
2462                 default:
2463                         return (0);
2464                 }
2465                 break;
2466                 }
2467         default:
2468                 KASSERT(0, ("Unknown VOI stat type %d", vs->stype));
2469                 break;
2470         }
2471
2472         vs->errs = 0;
2473         vs->flags &= ~VS_VSDVALID;
2474
2475         return (0);
2476 }
2477
2478 int
2479 stats_v1_blob_snapshot(struct statsblobv1 **dst, size_t dstmaxsz,
2480     struct statsblobv1 *src, uint32_t flags)
2481 {
2482         int error;
2483
2484         if (src != NULL && src->abi == STATS_ABI_V1) {
2485                 error = stats_v1_blob_clone(dst, dstmaxsz, src, flags);
2486                 if (!error) {
2487                         if (flags & SB_CLONE_RSTSRC) {
2488                                 stats_v1_blob_iter(src,
2489                                     stats_v1_icb_reset_voistat, NULL, 0);
2490                                 src->lastrst = stats_sbinuptime();
2491                         }
2492                         stats_v1_blob_finalise(*dst);
2493                 }
2494         } else
2495                 error = EINVAL;
2496
2497         return (error);
2498 }
2499
2500 static inline int
2501 stats_v1_voi_update_max(enum vsd_dtype voi_dtype __unused,
2502     struct voistatdata *voival, struct voistat *vs, void *vsd)
2503 {
2504         int error;
2505
2506         KASSERT(vs->dtype < VSD_NUM_DTYPES,
2507             ("Unknown VSD dtype %d", vs->dtype));
2508
2509         error = 0;
2510
2511         switch (vs->dtype) {
2512         case VSD_DTYPE_INT_S32:
2513                 if (VSD(int32, vsd)->s32 < voival->int32.s32) {
2514                         VSD(int32, vsd)->s32 = voival->int32.s32;
2515                         vs->flags |= VS_VSDVALID;
2516                 }
2517                 break;
2518         case VSD_DTYPE_INT_U32:
2519                 if (VSD(int32, vsd)->u32 < voival->int32.u32) {
2520                         VSD(int32, vsd)->u32 = voival->int32.u32;
2521                         vs->flags |= VS_VSDVALID;
2522                 }
2523                 break;
2524         case VSD_DTYPE_INT_S64:
2525                 if (VSD(int64, vsd)->s64 < voival->int64.s64) {
2526                         VSD(int64, vsd)->s64 = voival->int64.s64;
2527                         vs->flags |= VS_VSDVALID;
2528                 }
2529                 break;
2530         case VSD_DTYPE_INT_U64:
2531                 if (VSD(int64, vsd)->u64 < voival->int64.u64) {
2532                         VSD(int64, vsd)->u64 = voival->int64.u64;
2533                         vs->flags |= VS_VSDVALID;
2534                 }
2535                 break;
2536         case VSD_DTYPE_INT_SLONG:
2537                 if (VSD(intlong, vsd)->slong < voival->intlong.slong) {
2538                         VSD(intlong, vsd)->slong = voival->intlong.slong;
2539                         vs->flags |= VS_VSDVALID;
2540                 }
2541                 break;
2542         case VSD_DTYPE_INT_ULONG:
2543                 if (VSD(intlong, vsd)->ulong < voival->intlong.ulong) {
2544                         VSD(intlong, vsd)->ulong = voival->intlong.ulong;
2545                         vs->flags |= VS_VSDVALID;
2546                 }
2547                 break;
2548         case VSD_DTYPE_Q_S32:
2549                 if (Q_QLTQ(VSD(q32, vsd)->sq32, voival->q32.sq32) &&
2550                     (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->sq32,
2551                     voival->q32.sq32)))) {
2552                         vs->flags |= VS_VSDVALID;
2553                 }
2554                 break;
2555         case VSD_DTYPE_Q_U32:
2556                 if (Q_QLTQ(VSD(q32, vsd)->uq32, voival->q32.uq32) &&
2557                     (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->uq32,
2558                     voival->q32.uq32)))) {
2559                         vs->flags |= VS_VSDVALID;
2560                 }
2561                 break;
2562         case VSD_DTYPE_Q_S64:
2563                 if (Q_QLTQ(VSD(q64, vsd)->sq64, voival->q64.sq64) &&
2564                     (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->sq64,
2565                     voival->q64.sq64)))) {
2566                         vs->flags |= VS_VSDVALID;
2567                 }
2568                 break;
2569         case VSD_DTYPE_Q_U64:
2570                 if (Q_QLTQ(VSD(q64, vsd)->uq64, voival->q64.uq64) &&
2571                     (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->uq64,
2572                     voival->q64.uq64)))) {
2573                         vs->flags |= VS_VSDVALID;
2574                 }
2575                 break;
2576         default:
2577                 error = EINVAL;
2578                 break;
2579         }
2580
2581         return (error);
2582 }
2583
2584 static inline int
2585 stats_v1_voi_update_min(enum vsd_dtype voi_dtype __unused,
2586     struct voistatdata *voival, struct voistat *vs, void *vsd)
2587 {
2588         int error;
2589
2590         KASSERT(vs->dtype < VSD_NUM_DTYPES,
2591             ("Unknown VSD dtype %d", vs->dtype));
2592
2593         error = 0;
2594
2595         switch (vs->dtype) {
2596         case VSD_DTYPE_INT_S32:
2597                 if (VSD(int32, vsd)->s32 > voival->int32.s32) {
2598                         VSD(int32, vsd)->s32 = voival->int32.s32;
2599                         vs->flags |= VS_VSDVALID;
2600                 }
2601                 break;
2602         case VSD_DTYPE_INT_U32:
2603                 if (VSD(int32, vsd)->u32 > voival->int32.u32) {
2604                         VSD(int32, vsd)->u32 = voival->int32.u32;
2605                         vs->flags |= VS_VSDVALID;
2606                 }
2607                 break;
2608         case VSD_DTYPE_INT_S64:
2609                 if (VSD(int64, vsd)->s64 > voival->int64.s64) {
2610                         VSD(int64, vsd)->s64 = voival->int64.s64;
2611                         vs->flags |= VS_VSDVALID;
2612                 }
2613                 break;
2614         case VSD_DTYPE_INT_U64:
2615                 if (VSD(int64, vsd)->u64 > voival->int64.u64) {
2616                         VSD(int64, vsd)->u64 = voival->int64.u64;
2617                         vs->flags |= VS_VSDVALID;
2618                 }
2619                 break;
2620         case VSD_DTYPE_INT_SLONG:
2621                 if (VSD(intlong, vsd)->slong > voival->intlong.slong) {
2622                         VSD(intlong, vsd)->slong = voival->intlong.slong;
2623                         vs->flags |= VS_VSDVALID;
2624                 }
2625                 break;
2626         case VSD_DTYPE_INT_ULONG:
2627                 if (VSD(intlong, vsd)->ulong > voival->intlong.ulong) {
2628                         VSD(intlong, vsd)->ulong = voival->intlong.ulong;
2629                         vs->flags |= VS_VSDVALID;
2630                 }
2631                 break;
2632         case VSD_DTYPE_Q_S32:
2633                 if (Q_QGTQ(VSD(q32, vsd)->sq32, voival->q32.sq32) &&
2634                     (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->sq32,
2635                     voival->q32.sq32)))) {
2636                         vs->flags |= VS_VSDVALID;
2637                 }
2638                 break;
2639         case VSD_DTYPE_Q_U32:
2640                 if (Q_QGTQ(VSD(q32, vsd)->uq32, voival->q32.uq32) &&
2641                     (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->uq32,
2642                     voival->q32.uq32)))) {
2643                         vs->flags |= VS_VSDVALID;
2644                 }
2645                 break;
2646         case VSD_DTYPE_Q_S64:
2647                 if (Q_QGTQ(VSD(q64, vsd)->sq64, voival->q64.sq64) &&
2648                     (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->sq64,
2649                     voival->q64.sq64)))) {
2650                         vs->flags |= VS_VSDVALID;
2651                 }
2652                 break;
2653         case VSD_DTYPE_Q_U64:
2654                 if (Q_QGTQ(VSD(q64, vsd)->uq64, voival->q64.uq64) &&
2655                     (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->uq64,
2656                     voival->q64.uq64)))) {
2657                         vs->flags |= VS_VSDVALID;
2658                 }
2659                 break;
2660         default:
2661                 error = EINVAL;
2662                 break;
2663         }
2664
2665         return (error);
2666 }
2667
2668 static inline int
2669 stats_v1_voi_update_sum(enum vsd_dtype voi_dtype __unused,
2670     struct voistatdata *voival, struct voistat *vs, void *vsd)
2671 {
2672         int error;
2673
2674         KASSERT(vs->dtype < VSD_NUM_DTYPES,
2675             ("Unknown VSD dtype %d", vs->dtype));
2676
2677         error = 0;
2678
2679         switch (vs->dtype) {
2680         case VSD_DTYPE_INT_S32:
2681                 VSD(int32, vsd)->s32 += voival->int32.s32;
2682                 break;
2683         case VSD_DTYPE_INT_U32:
2684                 VSD(int32, vsd)->u32 += voival->int32.u32;
2685                 break;
2686         case VSD_DTYPE_INT_S64:
2687                 VSD(int64, vsd)->s64 += voival->int64.s64;
2688                 break;
2689         case VSD_DTYPE_INT_U64:
2690                 VSD(int64, vsd)->u64 += voival->int64.u64;
2691                 break;
2692         case VSD_DTYPE_INT_SLONG:
2693                 VSD(intlong, vsd)->slong += voival->intlong.slong;
2694                 break;
2695         case VSD_DTYPE_INT_ULONG:
2696                 VSD(intlong, vsd)->ulong += voival->intlong.ulong;
2697                 break;
2698         case VSD_DTYPE_Q_S32:
2699                 error = Q_QADDQ(&VSD(q32, vsd)->sq32, voival->q32.sq32);
2700                 break;
2701         case VSD_DTYPE_Q_U32:
2702                 error = Q_QADDQ(&VSD(q32, vsd)->uq32, voival->q32.uq32);
2703                 break;
2704         case VSD_DTYPE_Q_S64:
2705                 error = Q_QADDQ(&VSD(q64, vsd)->sq64, voival->q64.sq64);
2706                 break;
2707         case VSD_DTYPE_Q_U64:
2708                 error = Q_QADDQ(&VSD(q64, vsd)->uq64, voival->q64.uq64);
2709                 break;
2710         default:
2711                 error = EINVAL;
2712                 break;
2713         }
2714
2715         if (!error)
2716                 vs->flags |= VS_VSDVALID;
2717
2718         return (error);
2719 }
2720
2721 static inline int
2722 stats_v1_voi_update_hist(enum vsd_dtype voi_dtype, struct voistatdata *voival,
2723     struct voistat *vs, struct voistatdata_hist *hist)
2724 {
2725         struct voistatdata_numeric *bkt_lb, *bkt_ub;
2726         uint64_t *oob64, *cnt64;
2727         uint32_t *oob32, *cnt32;
2728         int error, i, found, is32bit, has_ub, eq_only;
2729
2730         error = 0;
2731
2732         switch (vs->dtype) {
2733         case VSD_DTYPE_CRHIST32:
2734                 i = HIST_VSDSZ2NBKTS(crhist32, vs->dsz);
2735                 is32bit = 1;
2736                 has_ub = eq_only = 0;
2737                 oob32 = &VSD(crhist32, hist)->oob;
2738                 break;
2739         case VSD_DTYPE_DRHIST32:
2740                 i = HIST_VSDSZ2NBKTS(drhist32, vs->dsz);
2741                 is32bit = has_ub = 1;
2742                 eq_only = 0;
2743                 oob32 = &VSD(drhist32, hist)->oob;
2744                 break;
2745         case VSD_DTYPE_DVHIST32:
2746                 i = HIST_VSDSZ2NBKTS(dvhist32, vs->dsz);
2747                 is32bit = eq_only = 1;
2748                 has_ub = 0;
2749                 oob32 = &VSD(dvhist32, hist)->oob;
2750                 break;
2751         case VSD_DTYPE_CRHIST64:
2752                 i = HIST_VSDSZ2NBKTS(crhist64, vs->dsz);
2753                 is32bit = has_ub = eq_only = 0;
2754                 oob64 = &VSD(crhist64, hist)->oob;
2755                 break;
2756         case VSD_DTYPE_DRHIST64:
2757                 i = HIST_VSDSZ2NBKTS(drhist64, vs->dsz);
2758                 is32bit = eq_only = 0;
2759                 has_ub = 1;
2760                 oob64 = &VSD(drhist64, hist)->oob;
2761                 break;
2762         case VSD_DTYPE_DVHIST64:
2763                 i = HIST_VSDSZ2NBKTS(dvhist64, vs->dsz);
2764                 is32bit = has_ub = 0;
2765                 eq_only = 1;
2766                 oob64 = &VSD(dvhist64, hist)->oob;
2767                 break;
2768         default:
2769                 return (EINVAL);
2770         }
2771         i--; /* Adjust for 0-based array index. */
2772
2773         /* XXXLAS: Should probably use a better bucket search algorithm. ARB? */
2774         for (found = 0; i >= 0 && !found; i--) {
2775                 switch (vs->dtype) {
2776                 case VSD_DTYPE_CRHIST32:
2777                         bkt_lb = &VSD(crhist32, hist)->bkts[i].lb;
2778                         cnt32 = &VSD(crhist32, hist)->bkts[i].cnt;
2779                         break;
2780                 case VSD_DTYPE_DRHIST32:
2781                         bkt_lb = &VSD(drhist32, hist)->bkts[i].lb;
2782                         bkt_ub = &VSD(drhist32, hist)->bkts[i].ub;
2783                         cnt32 = &VSD(drhist32, hist)->bkts[i].cnt;
2784                         break;
2785                 case VSD_DTYPE_DVHIST32:
2786                         bkt_lb = &VSD(dvhist32, hist)->bkts[i].val;
2787                         cnt32 = &VSD(dvhist32, hist)->bkts[i].cnt;
2788                         break;
2789                 case VSD_DTYPE_CRHIST64:
2790                         bkt_lb = &VSD(crhist64, hist)->bkts[i].lb;
2791                         cnt64 = &VSD(crhist64, hist)->bkts[i].cnt;
2792                         break;
2793                 case VSD_DTYPE_DRHIST64:
2794                         bkt_lb = &VSD(drhist64, hist)->bkts[i].lb;
2795                         bkt_ub = &VSD(drhist64, hist)->bkts[i].ub;
2796                         cnt64 = &VSD(drhist64, hist)->bkts[i].cnt;
2797                         break;
2798                 case VSD_DTYPE_DVHIST64:
2799                         bkt_lb = &VSD(dvhist64, hist)->bkts[i].val;
2800                         cnt64 = &VSD(dvhist64, hist)->bkts[i].cnt;
2801                         break;
2802                 default:
2803                         return (EINVAL);
2804                 }
2805
2806                 switch (voi_dtype) {
2807                 case VSD_DTYPE_INT_S32:
2808                         if (voival->int32.s32 >= bkt_lb->int32.s32) {
2809                                 if ((eq_only && voival->int32.s32 ==
2810                                     bkt_lb->int32.s32) ||
2811                                     (!eq_only && (!has_ub ||
2812                                     voival->int32.s32 < bkt_ub->int32.s32)))
2813                                         found = 1;
2814                         }
2815                         break;
2816                 case VSD_DTYPE_INT_U32:
2817                         if (voival->int32.u32 >= bkt_lb->int32.u32) {
2818                                 if ((eq_only && voival->int32.u32 ==
2819                                     bkt_lb->int32.u32) ||
2820                                     (!eq_only && (!has_ub ||
2821                                     voival->int32.u32 < bkt_ub->int32.u32)))
2822                                         found = 1;
2823                         }
2824                         break;
2825                 case VSD_DTYPE_INT_S64:
2826                         if (voival->int64.s64 >= bkt_lb->int64.s64)
2827                                 if ((eq_only && voival->int64.s64 ==
2828                                     bkt_lb->int64.s64) ||
2829                                     (!eq_only && (!has_ub ||
2830                                     voival->int64.s64 < bkt_ub->int64.s64)))
2831                                         found = 1;
2832                         break;
2833                 case VSD_DTYPE_INT_U64:
2834                         if (voival->int64.u64 >= bkt_lb->int64.u64)
2835                                 if ((eq_only && voival->int64.u64 ==
2836                                     bkt_lb->int64.u64) ||
2837                                     (!eq_only && (!has_ub ||
2838                                     voival->int64.u64 < bkt_ub->int64.u64)))
2839                                         found = 1;
2840                         break;
2841                 case VSD_DTYPE_INT_SLONG:
2842                         if (voival->intlong.slong >= bkt_lb->intlong.slong)
2843                                 if ((eq_only && voival->intlong.slong ==
2844                                     bkt_lb->intlong.slong) ||
2845                                     (!eq_only && (!has_ub ||
2846                                     voival->intlong.slong <
2847                                     bkt_ub->intlong.slong)))
2848                                         found = 1;
2849                         break;
2850                 case VSD_DTYPE_INT_ULONG:
2851                         if (voival->intlong.ulong >= bkt_lb->intlong.ulong)
2852                                 if ((eq_only && voival->intlong.ulong ==
2853                                     bkt_lb->intlong.ulong) ||
2854                                     (!eq_only && (!has_ub ||
2855                                     voival->intlong.ulong <
2856                                     bkt_ub->intlong.ulong)))
2857                                         found = 1;
2858                         break;
2859                 case VSD_DTYPE_Q_S32:
2860                         if (Q_QGEQ(voival->q32.sq32, bkt_lb->q32.sq32))
2861                                 if ((eq_only && Q_QEQ(voival->q32.sq32,
2862                                     bkt_lb->q32.sq32)) ||
2863                                     (!eq_only && (!has_ub ||
2864                                     Q_QLTQ(voival->q32.sq32,
2865                                     bkt_ub->q32.sq32))))
2866                                         found = 1;
2867                         break;
2868                 case VSD_DTYPE_Q_U32:
2869                         if (Q_QGEQ(voival->q32.uq32, bkt_lb->q32.uq32))
2870                                 if ((eq_only && Q_QEQ(voival->q32.uq32,
2871                                     bkt_lb->q32.uq32)) ||
2872                                     (!eq_only && (!has_ub ||
2873                                     Q_QLTQ(voival->q32.uq32,
2874                                     bkt_ub->q32.uq32))))
2875                                         found = 1;
2876                         break;
2877                 case VSD_DTYPE_Q_S64:
2878                         if (Q_QGEQ(voival->q64.sq64, bkt_lb->q64.sq64))
2879                                 if ((eq_only && Q_QEQ(voival->q64.sq64,
2880                                     bkt_lb->q64.sq64)) ||
2881                                     (!eq_only && (!has_ub ||
2882                                     Q_QLTQ(voival->q64.sq64,
2883                                     bkt_ub->q64.sq64))))
2884                                         found = 1;
2885                         break;
2886                 case VSD_DTYPE_Q_U64:
2887                         if (Q_QGEQ(voival->q64.uq64, bkt_lb->q64.uq64))
2888                                 if ((eq_only && Q_QEQ(voival->q64.uq64,
2889                                     bkt_lb->q64.uq64)) ||
2890                                     (!eq_only && (!has_ub ||
2891                                     Q_QLTQ(voival->q64.uq64,
2892                                     bkt_ub->q64.uq64))))
2893                                         found = 1;
2894                         break;
2895                 default:
2896                         break;
2897                 }
2898         }
2899
2900         if (found) {
2901                 if (is32bit)
2902                         *cnt32 += 1;
2903                 else
2904                         *cnt64 += 1;
2905         } else {
2906                 if (is32bit)
2907                         *oob32 += 1;
2908                 else
2909                         *oob64 += 1;
2910         }
2911
2912         vs->flags |= VS_VSDVALID;
2913         return (error);
2914 }
2915
2916 static inline int
2917 stats_v1_vsd_tdgst_compress(enum vsd_dtype vs_dtype,
2918     struct voistatdata_tdgst *tdgst, int attempt)
2919 {
2920         struct ctdth32 *ctd32tree;
2921         struct ctdth64 *ctd64tree;
2922         struct voistatdata_tdgstctd32 *ctd32;
2923         struct voistatdata_tdgstctd64 *ctd64;
2924         uint64_t ebits, idxmask;
2925         uint32_t bitsperidx, nebits;
2926         int error, idx, is32bit, maxctds, remctds, tmperr;
2927
2928         error = 0;
2929
2930         switch (vs_dtype) {
2931         case VSD_DTYPE_TDGSTCLUST32:
2932                 ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
2933                 if (!ARB_FULL(ctd32tree))
2934                         return (0);
2935                 VSD(tdgstclust32, tdgst)->compcnt++;
2936                 maxctds = remctds = ARB_MAXNODES(ctd32tree);
2937                 ARB_RESET_TREE(ctd32tree, ctdth32, maxctds);
2938                 VSD(tdgstclust32, tdgst)->smplcnt = 0;
2939                 is32bit = 1;
2940                 ctd64tree = NULL;
2941                 ctd64 = NULL;
2942 #ifdef DIAGNOSTIC
2943                 RB_INIT(&VSD(tdgstclust32, tdgst)->rbctdtree);
2944 #endif
2945                 break;
2946         case VSD_DTYPE_TDGSTCLUST64:
2947                 ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
2948                 if (!ARB_FULL(ctd64tree))
2949                         return (0);
2950                 VSD(tdgstclust64, tdgst)->compcnt++;
2951                 maxctds = remctds = ARB_MAXNODES(ctd64tree);
2952                 ARB_RESET_TREE(ctd64tree, ctdth64, maxctds);
2953                 VSD(tdgstclust64, tdgst)->smplcnt = 0;
2954                 is32bit = 0;
2955                 ctd32tree = NULL;
2956                 ctd32 = NULL;
2957 #ifdef DIAGNOSTIC
2958                 RB_INIT(&VSD(tdgstclust64, tdgst)->rbctdtree);
2959 #endif
2960                 break;
2961         default:
2962                 return (EINVAL);
2963         }
2964
2965         /*
2966          * Rebuild the t-digest ARB by pseudorandomly selecting centroids and
2967          * re-inserting the mu/cnt of each as a value and corresponding weight.
2968          */
2969
2970         /*
2971          * XXXCEM: random(9) is currently rand(3), not random(3).  rand(3)
2972          * RAND_MAX happens to be approximately 31 bits (range [0,
2973          * 0x7ffffffd]), so the math kinda works out.  When/if this portion of
2974          * the code is compiled in userspace, it gets the random(3) behavior,
2975          * which has expected range [0, 0x7fffffff].
2976          */
2977 #define bitsperrand 31
2978         ebits = 0;
2979         nebits = 0;
2980         bitsperidx = fls(maxctds);
2981         KASSERT(bitsperidx <= sizeof(ebits) << 3,
2982             ("%s: bitsperidx=%d, ebits=%d",
2983             __func__, bitsperidx, (int)(sizeof(ebits) << 3)));
2984         idxmask = (UINT64_C(1) << bitsperidx) - 1;
2985
2986         /* Initialise the free list with randomised centroid indices. */
2987         for (; remctds > 0; remctds--) {
2988                 while (nebits < bitsperidx) {
2989                         ebits |= ((uint64_t)random()) << nebits;
2990                         nebits += bitsperrand;
2991                         if (nebits > (sizeof(ebits) << 3))
2992                                 nebits = sizeof(ebits) << 3;
2993                 }
2994                 idx = ebits & idxmask;
2995                 nebits -= bitsperidx;
2996                 ebits >>= bitsperidx;
2997
2998                 /*
2999                  * Select the next centroid to put on the ARB free list. We
3000                  * start with the centroid at our randomly selected array index,
3001                  * and work our way forwards until finding one (the latter
3002                  * aspect reduces re-insertion randomness, but is good enough).
3003                  */
3004                 do {
3005                         if (idx >= maxctds)
3006                                 idx %= maxctds;
3007
3008                         if (is32bit)
3009                                 ctd32 = ARB_NODE(ctd32tree, idx);
3010                         else
3011                                 ctd64 = ARB_NODE(ctd64tree, idx);
3012                 } while ((is32bit ? ARB_ISFREE(ctd32, ctdlnk) :
3013                     ARB_ISFREE(ctd64, ctdlnk)) && ++idx);
3014
3015                 /* Put the centroid on the ARB free list. */
3016                 if (is32bit)
3017                         ARB_RETURNFREE(ctd32tree, ctd32, ctdlnk);
3018                 else
3019                         ARB_RETURNFREE(ctd64tree, ctd64, ctdlnk);
3020         }
3021
3022         /*
3023          * The free list now contains the randomised indices of every centroid.
3024          * Walk the free list from start to end, re-inserting each centroid's
3025          * mu/cnt. The tdgst_add() call may or may not consume the free centroid
3026          * we re-insert values from during each loop iteration, so we must latch
3027          * the index of the next free list centroid before the re-insertion
3028          * call. The previous loop above should have left the centroid pointer
3029          * pointing to the element at the head of the free list.
3030          */
3031         KASSERT((is32bit ?
3032             ARB_FREEIDX(ctd32tree) == ARB_SELFIDX(ctd32tree, ctd32) :
3033             ARB_FREEIDX(ctd64tree) == ARB_SELFIDX(ctd64tree, ctd64)),
3034             ("%s: t-digest ARB@%p free list bug", __func__,
3035             (is32bit ? (void *)ctd32tree : (void *)ctd64tree)));
3036         remctds = maxctds;
3037         while ((is32bit ? ctd32 != NULL : ctd64 != NULL)) {
3038                 tmperr = 0;
3039                 if (is32bit) {
3040                         s64q_t x;
3041
3042                         idx = ARB_NEXTFREEIDX(ctd32, ctdlnk);
3043                         /* Cloning a s32q_t into a s64q_t should never fail. */
3044                         tmperr = Q_QCLONEQ(&x, ctd32->mu);
3045                         tmperr = tmperr ? tmperr : stats_v1_vsd_tdgst_add(
3046                             vs_dtype, tdgst, x, ctd32->cnt, attempt);
3047                         ctd32 = ARB_NODE(ctd32tree, idx);
3048                         KASSERT(ctd32 == NULL || ARB_ISFREE(ctd32, ctdlnk),
3049                             ("%s: t-digest ARB@%p free list bug", __func__,
3050                             ctd32tree));
3051                 } else {
3052                         idx = ARB_NEXTFREEIDX(ctd64, ctdlnk);
3053                         tmperr = stats_v1_vsd_tdgst_add(vs_dtype, tdgst,
3054                             ctd64->mu, ctd64->cnt, attempt);
3055                         ctd64 = ARB_NODE(ctd64tree, idx);
3056                         KASSERT(ctd64 == NULL || ARB_ISFREE(ctd64, ctdlnk),
3057                             ("%s: t-digest ARB@%p free list bug", __func__,
3058                             ctd64tree));
3059                 }
3060                 /*
3061                  * This process should not produce errors, bugs notwithstanding.
3062                  * Just in case, latch any errors and attempt all re-insertions.
3063                  */
3064                 error = tmperr ? tmperr : error;
3065                 remctds--;
3066         }
3067
3068         KASSERT(remctds == 0, ("%s: t-digest ARB@%p free list bug", __func__,
3069             (is32bit ? (void *)ctd32tree : (void *)ctd64tree)));
3070
3071         return (error);
3072 }
3073
3074 static inline int
3075 stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype, struct voistatdata_tdgst *tdgst,
3076     s64q_t x, uint64_t weight, int attempt)
3077 {
3078 #ifdef DIAGNOSTIC
3079         char qstr[Q_MAXSTRLEN(x, 10)];
3080 #endif
3081         struct ctdth32 *ctd32tree;
3082         struct ctdth64 *ctd64tree;
3083         void *closest, *cur, *lb, *ub;
3084         struct voistatdata_tdgstctd32 *ctd32;
3085         struct voistatdata_tdgstctd64 *ctd64;
3086         uint64_t cnt, smplcnt, sum, tmpsum;
3087         s64q_t k, minz, q, z;
3088         int error, is32bit, n;
3089
3090         error = 0;
3091         minz = Q_INI(&z, 0, 0, Q_NFBITS(x));
3092
3093         switch (vs_dtype) {
3094         case VSD_DTYPE_TDGSTCLUST32:
3095                 if ((UINT32_MAX - weight) < VSD(tdgstclust32, tdgst)->smplcnt)
3096                         error = EOVERFLOW;
3097                 smplcnt = VSD(tdgstclust32, tdgst)->smplcnt;
3098                 ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
3099                 is32bit = 1;
3100                 ctd64tree = NULL;
3101                 ctd64 = NULL;
3102                 break;
3103         case VSD_DTYPE_TDGSTCLUST64:
3104                 if ((UINT64_MAX - weight) < VSD(tdgstclust64, tdgst)->smplcnt)
3105                         error = EOVERFLOW;
3106                 smplcnt = VSD(tdgstclust64, tdgst)->smplcnt;
3107                 ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
3108                 is32bit = 0;
3109                 ctd32tree = NULL;
3110                 ctd32 = NULL;
3111                 break;
3112         default:
3113                 error = EINVAL;
3114                 break;
3115         }
3116
3117         if (error)
3118                 return (error);
3119
3120         /*
3121          * Inspired by Ted Dunning's AVLTreeDigest.java
3122          */
3123         do {
3124 #if defined(DIAGNOSTIC)
3125                 KASSERT(attempt < 5,
3126                     ("%s: Too many attempts", __func__));
3127 #endif
3128                 if (attempt >= 5)
3129                         return (EAGAIN);
3130
3131                 Q_SIFVAL(minz, Q_IFMAXVAL(minz));
3132                 closest = ub = NULL;
3133                 sum = tmpsum = 0;
3134
3135                 if (is32bit)
3136                         lb = cur = (void *)(ctd32 = ARB_MIN(ctdth32, ctd32tree));
3137                 else
3138                         lb = cur = (void *)(ctd64 = ARB_MIN(ctdth64, ctd64tree));
3139
3140                 if (lb == NULL) /* Empty tree. */
3141                         lb = (is32bit ? (void *)ARB_ROOT(ctd32tree) :
3142                             (void *)ARB_ROOT(ctd64tree));
3143
3144                 /*
3145                  * Find the set of centroids with minimum distance to x and
3146                  * compute the sum of counts for all centroids with mean less
3147                  * than the first centroid in the set.
3148                  */
3149                 for (; cur != NULL;
3150                     cur = (is32bit ?
3151                     (void *)(ctd32 = ARB_NEXT(ctdth32, ctd32tree, ctd32)) :
3152                     (void *)(ctd64 = ARB_NEXT(ctdth64, ctd64tree, ctd64)))) {
3153                         if (is32bit) {
3154                                 cnt = ctd32->cnt;
3155                                 KASSERT(Q_PRECEQ(ctd32->mu, x),
3156                                     ("%s: Q_RELPREC(mu,x)=%d", __func__,
3157                                     Q_RELPREC(ctd32->mu, x)));
3158                                 /* Ok to assign as both have same precision. */
3159                                 z = ctd32->mu;
3160                         } else {
3161                                 cnt = ctd64->cnt;
3162                                 KASSERT(Q_PRECEQ(ctd64->mu, x),
3163                                     ("%s: Q_RELPREC(mu,x)=%d", __func__,
3164                                     Q_RELPREC(ctd64->mu, x)));
3165                                 /* Ok to assign as both have same precision. */
3166                                 z = ctd64->mu;
3167                         }
3168
3169                         error = Q_QSUBQ(&z, x);
3170 #if defined(DIAGNOSTIC)
3171                         KASSERT(!error, ("%s: unexpected error %d", __func__,
3172                             error));
3173 #endif
3174                         if (error)
3175                                 return (error);
3176
3177                         z = Q_QABS(z);
3178                         if (Q_QLTQ(z, minz)) {
3179                                 minz = z;
3180                                 lb = cur;
3181                                 sum = tmpsum;
3182                                 tmpsum += cnt;
3183                         } else if (Q_QGTQ(z, minz)) {
3184                                 ub = cur;
3185                                 break;
3186                         }
3187                 }
3188
3189                 cur = (is32bit ?
3190                     (void *)(ctd32 = (struct voistatdata_tdgstctd32 *)lb) :
3191                     (void *)(ctd64 = (struct voistatdata_tdgstctd64 *)lb));
3192
3193                 for (n = 0; cur != ub; cur = (is32bit ?
3194                     (void *)(ctd32 = ARB_NEXT(ctdth32, ctd32tree, ctd32)) :
3195                     (void *)(ctd64 = ARB_NEXT(ctdth64, ctd64tree, ctd64)))) {
3196                         if (is32bit)
3197                                 cnt = ctd32->cnt;
3198                         else
3199                                 cnt = ctd64->cnt;
3200
3201                         q = Q_CTRLINI(16);
3202                         if (smplcnt == 1)
3203                                 error = Q_QFRACI(&q, 1, 2);
3204                         else
3205                                 /* [ sum + ((cnt - 1) / 2) ] / (smplcnt - 1) */
3206                                 error = Q_QFRACI(&q, (sum << 1) + cnt - 1,
3207                                     (smplcnt - 1) << 1);
3208                         k = q;
3209                         /* k = q x 4 x samplcnt x attempt */
3210                         error |= Q_QMULI(&k, 4 * smplcnt * attempt);
3211                         /* k = k x (1 - q) */
3212                         error |= Q_QSUBI(&q, 1);
3213                         q = Q_QABS(q);
3214                         error |= Q_QMULQ(&k, q);
3215 #if defined(DIAGNOSTIC)
3216 #if !defined(_KERNEL)
3217                         double q_dbl, k_dbl, q2d, k2d;
3218                         q2d = Q_Q2D(q);
3219                         k2d = Q_Q2D(k);
3220                         q_dbl = smplcnt == 1 ? 0.5 :
3221                             (sum + ((cnt - 1)  / 2.0)) / (double)(smplcnt - 1);
3222                         k_dbl = 4 * smplcnt * q_dbl * (1.0 - q_dbl) * attempt;
3223                         /*
3224                          * If the difference between q and q_dbl is greater than
3225                          * the fractional precision of q, something is off.
3226                          * NB: q is holding the value of 1 - q
3227                          */
3228                         q_dbl = 1.0 - q_dbl;
3229                         KASSERT((q_dbl > q2d ? q_dbl - q2d : q2d - q_dbl) <
3230                             (1.05 * ((double)1 / (double)(1ULL << Q_NFBITS(q)))),
3231                             ("Q-type q bad precision"));
3232                         KASSERT((k_dbl > k2d ? k_dbl - k2d : k2d - k_dbl) <
3233                             1.0 + (0.01 * smplcnt),
3234                             ("Q-type k bad precision"));
3235 #endif /* !_KERNEL */
3236                         KASSERT(!error, ("%s: unexpected error %d", __func__,
3237                             error));
3238 #endif /* DIAGNOSTIC */
3239                         if (error)
3240                                 return (error);
3241                         if ((is32bit && ((ctd32->cnt + weight) <=
3242                             (uint64_t)Q_GIVAL(k))) ||
3243                             (!is32bit && ((ctd64->cnt + weight) <=
3244                             (uint64_t)Q_GIVAL(k)))) {
3245                                 n++;
3246                                 /* random() produces 31 bits. */
3247                                 if (random() < (INT32_MAX / n))
3248                                         closest = cur;
3249                         }
3250                         sum += cnt;
3251                 }
3252         } while (closest == NULL &&
3253             (is32bit ? ARB_FULL(ctd32tree) : ARB_FULL(ctd64tree)) &&
3254             (error = stats_v1_vsd_tdgst_compress(vs_dtype, tdgst,
3255             attempt++)) == 0);
3256
3257         if (error)
3258                 return (error);
3259
3260         if (closest != NULL) {
3261                 /* Merge with an existing centroid. */
3262                 if (is32bit) {
3263                         ctd32 = (struct voistatdata_tdgstctd32 *)closest;
3264                         error = Q_QSUBQ(&x, ctd32->mu);
3265                         /*
3266                          * The following calculation "x / (cnt + weight)"
3267                          * computes the amount by which to adjust the centroid's
3268                          * mu value in order to merge in the VOI sample.
3269                          *
3270                          * It can underflow (Q_QDIVI() returns ERANGE) when the
3271                          * user centroids' fractional precision (which is
3272                          * inherited by 'x') is too low to represent the result.
3273                          *
3274                          * A sophisticated approach to dealing with this issue
3275                          * would minimise accumulation of error by tracking
3276                          * underflow per centroid and making an adjustment when
3277                          * a LSB's worth of underflow has accumulated.
3278                          *
3279                          * A simpler approach is to let the result underflow
3280                          * i.e. merge the VOI sample into the centroid without
3281                          * adjusting the centroid's mu, and rely on the user to
3282                          * specify their t-digest with sufficient centroid
3283                          * fractional precision such that the accumulation of
3284                          * error from multiple underflows is of no material
3285                          * consequence to the centroid's final value of mu.
3286                          *
3287                          * For the moment, the latter approach is employed by
3288                          * simply ignoring ERANGE here.
3289                          *
3290                          * XXXLAS: Per-centroid underflow tracking is likely too
3291                          * onerous, but it probably makes sense to accumulate a
3292                          * single underflow error variable across all centroids
3293                          * and report it as part of the digest to provide
3294                          * additional visibility into the digest's fidelity.
3295                          */
3296                         error = error ? error :
3297                             Q_QDIVI(&x, ctd32->cnt + weight);
3298                         if ((error && error != ERANGE)
3299                             || (error = Q_QADDQ(&ctd32->mu, x))) {
3300 #ifdef DIAGNOSTIC
3301                                 KASSERT(!error, ("%s: unexpected error %d",
3302                                     __func__, error));
3303 #endif
3304                                 return (error);
3305                         }
3306                         ctd32->cnt += weight;
3307                         error = ARB_REINSERT(ctdth32, ctd32tree, ctd32) ==
3308                             NULL ? 0 : EALREADY;
3309 #ifdef DIAGNOSTIC
3310                         RB_REINSERT(rbctdth32,
3311                             &VSD(tdgstclust32, tdgst)->rbctdtree, ctd32);
3312 #endif
3313                 } else {
3314                         ctd64 = (struct voistatdata_tdgstctd64 *)closest;
3315                         error = Q_QSUBQ(&x, ctd64->mu);
3316                         error = error ? error :
3317                             Q_QDIVI(&x, ctd64->cnt + weight);
3318                         /* Refer to is32bit ERANGE discussion above. */
3319                         if ((error && error != ERANGE)
3320                             || (error = Q_QADDQ(&ctd64->mu, x))) {
3321                                 KASSERT(!error, ("%s: unexpected error %d",
3322                                     __func__, error));
3323                                 return (error);
3324                         }
3325                         ctd64->cnt += weight;
3326                         error = ARB_REINSERT(ctdth64, ctd64tree, ctd64) ==
3327                             NULL ? 0 : EALREADY;
3328 #ifdef DIAGNOSTIC
3329                         RB_REINSERT(rbctdth64,
3330                             &VSD(tdgstclust64, tdgst)->rbctdtree, ctd64);
3331 #endif
3332                 }
3333         } else {
3334                 /*
3335                  * Add a new centroid. If digest compression is working
3336                  * correctly, there should always be at least one free.
3337                  */
3338                 if (is32bit) {
3339                         ctd32 = ARB_GETFREE(ctd32tree, ctdlnk);
3340 #ifdef DIAGNOSTIC
3341                         KASSERT(ctd32 != NULL,
3342                             ("%s: t-digest@%p has no free centroids",
3343                             __func__, tdgst));
3344 #endif
3345                         if (ctd32 == NULL)
3346                                 return (EAGAIN);
3347                         if ((error = Q_QCPYVALQ(&ctd32->mu, x)))
3348                                 return (error);
3349                         ctd32->cnt = weight;
3350                         error = ARB_INSERT(ctdth32, ctd32tree, ctd32) == NULL ?
3351                             0 : EALREADY;
3352 #ifdef DIAGNOSTIC
3353                         RB_INSERT(rbctdth32,
3354                             &VSD(tdgstclust32, tdgst)->rbctdtree, ctd32);
3355 #endif
3356                 } else {
3357                         ctd64 = ARB_GETFREE(ctd64tree, ctdlnk);
3358 #ifdef DIAGNOSTIC
3359                         KASSERT(ctd64 != NULL,
3360                             ("%s: t-digest@%p has no free centroids",
3361                             __func__, tdgst));
3362 #endif
3363                         if (ctd64 == NULL) /* Should not happen. */
3364                                 return (EAGAIN);
3365                         /* Direct assignment ok as both have same type/prec. */
3366                         ctd64->mu = x;
3367                         ctd64->cnt = weight;
3368                         error = ARB_INSERT(ctdth64, ctd64tree, ctd64) == NULL ?
3369                             0 : EALREADY;
3370 #ifdef DIAGNOSTIC
3371                         RB_INSERT(rbctdth64, &VSD(tdgstclust64,
3372                             tdgst)->rbctdtree, ctd64);
3373 #endif
3374                 }
3375         }
3376
3377         if (is32bit)
3378                 VSD(tdgstclust32, tdgst)->smplcnt += weight;
3379         else {
3380                 VSD(tdgstclust64, tdgst)->smplcnt += weight;
3381
3382 #ifdef DIAGNOSTIC
3383                 struct rbctdth64 *rbctdtree =
3384                     &VSD(tdgstclust64, tdgst)->rbctdtree;
3385                 struct voistatdata_tdgstctd64 *rbctd64;
3386                 int i = 0;
3387                 ARB_FOREACH(ctd64, ctdth64, ctd64tree) {
3388                         rbctd64 = (i == 0 ? RB_MIN(rbctdth64, rbctdtree) :
3389                             RB_NEXT(rbctdth64, rbctdtree, rbctd64));
3390
3391                         if (i >= ARB_CURNODES(ctd64tree)
3392                             || ctd64 != rbctd64
3393                             || ARB_MIN(ctdth64, ctd64tree) !=
3394                                RB_MIN(rbctdth64, rbctdtree)
3395                             || ARB_MAX(ctdth64, ctd64tree) !=
3396                                RB_MAX(rbctdth64, rbctdtree)
3397                             || ARB_LEFTIDX(ctd64, ctdlnk) !=
3398                                ARB_SELFIDX(ctd64tree, RB_LEFT(rbctd64, rblnk))
3399                             || ARB_RIGHTIDX(ctd64, ctdlnk) !=
3400                                ARB_SELFIDX(ctd64tree, RB_RIGHT(rbctd64, rblnk))
3401                             || ARB_PARENTIDX(ctd64, ctdlnk) !=
3402                                ARB_SELFIDX(ctd64tree,
3403                                RB_PARENT(rbctd64, rblnk))) {
3404                                 Q_TOSTR(ctd64->mu, -1, 10, qstr, sizeof(qstr));
3405                                 printf("ARB ctd=%3d p=%3d l=%3d r=%3d c=%2d "
3406                                     "mu=%s\n",
3407                                     (int)ARB_SELFIDX(ctd64tree, ctd64),
3408                                     ARB_PARENTIDX(ctd64, ctdlnk),
3409                                     ARB_LEFTIDX(ctd64, ctdlnk),
3410                                     ARB_RIGHTIDX(ctd64, ctdlnk),
3411                                     ARB_COLOR(ctd64, ctdlnk),
3412                                     qstr);
3413
3414                                 Q_TOSTR(rbctd64->mu, -1, 10, qstr,
3415                                     sizeof(qstr));
3416                                 struct voistatdata_tdgstctd64 *parent;
3417                                 parent = RB_PARENT(rbctd64, rblnk);
3418                                 int rb_color =
3419                                         parent == NULL ? 0 :
3420                                         RB_LEFT(parent, rblnk) == rbctd64 ?
3421                                         (_RB_BITSUP(parent, rblnk) & _RB_L) != 0 :
3422                                         (_RB_BITSUP(parent, rblnk) & _RB_R) != 0;
3423                                 printf(" RB ctd=%3d p=%3d l=%3d r=%3d c=%2d "
3424                                     "mu=%s\n",
3425                                     (int)ARB_SELFIDX(ctd64tree, rbctd64),
3426                                     (int)ARB_SELFIDX(ctd64tree,
3427                                       RB_PARENT(rbctd64, rblnk)),
3428                                     (int)ARB_SELFIDX(ctd64tree,
3429                                       RB_LEFT(rbctd64, rblnk)),
3430                                     (int)ARB_SELFIDX(ctd64tree,
3431                                       RB_RIGHT(rbctd64, rblnk)),
3432                                     rb_color,
3433                                     qstr);
3434
3435                                 panic("RB@%p and ARB@%p trees differ\n",
3436                                     rbctdtree, ctd64tree);
3437                         }
3438                         i++;
3439                 }
3440 #endif /* DIAGNOSTIC */
3441         }
3442
3443         return (error);
3444 }
3445
3446 static inline int
3447 stats_v1_voi_update_tdgst(enum vsd_dtype voi_dtype, struct voistatdata *voival,
3448     struct voistat *vs, struct voistatdata_tdgst *tdgst)
3449 {
3450         s64q_t x;
3451         int error;
3452
3453         error = 0;
3454
3455         switch (vs->dtype) {
3456         case VSD_DTYPE_TDGSTCLUST32:
3457                 /* Use same precision as the user's centroids. */
3458                 Q_INI(&x, 0, 0, Q_NFBITS(
3459                     ARB_CNODE(&VSD(tdgstclust32, tdgst)->ctdtree, 0)->mu));
3460                 break;
3461         case VSD_DTYPE_TDGSTCLUST64:
3462                 /* Use same precision as the user's centroids. */
3463                 Q_INI(&x, 0, 0, Q_NFBITS(
3464                     ARB_CNODE(&VSD(tdgstclust64, tdgst)->ctdtree, 0)->mu));
3465                 break;
3466         default:
3467                 KASSERT(vs->dtype == VSD_DTYPE_TDGSTCLUST32 ||
3468                     vs->dtype == VSD_DTYPE_TDGSTCLUST64,
3469                     ("%s: vs->dtype(%d) != VSD_DTYPE_TDGSTCLUST<32|64>",
3470                     __func__, vs->dtype));
3471                 return (EINVAL);
3472         }
3473
3474         /*
3475          * XXXLAS: Should have both a signed and unsigned 'x' variable to avoid
3476          * returning EOVERFLOW if the voival would have fit in a u64q_t.
3477          */
3478         switch (voi_dtype) {
3479         case VSD_DTYPE_INT_S32:
3480                 error = Q_QCPYVALI(&x, voival->int32.s32);
3481                 break;
3482         case VSD_DTYPE_INT_U32:
3483                 error = Q_QCPYVALI(&x, voival->int32.u32);
3484                 break;
3485         case VSD_DTYPE_INT_S64:
3486                 error = Q_QCPYVALI(&x, voival->int64.s64);
3487                 break;
3488         case VSD_DTYPE_INT_U64:
3489                 error = Q_QCPYVALI(&x, voival->int64.u64);
3490                 break;
3491         case VSD_DTYPE_INT_SLONG:
3492                 error = Q_QCPYVALI(&x, voival->intlong.slong);
3493                 break;
3494         case VSD_DTYPE_INT_ULONG:
3495                 error = Q_QCPYVALI(&x, voival->intlong.ulong);
3496                 break;
3497         case VSD_DTYPE_Q_S32:
3498                 error = Q_QCPYVALQ(&x, voival->q32.sq32);
3499                 break;
3500         case VSD_DTYPE_Q_U32:
3501                 error = Q_QCPYVALQ(&x, voival->q32.uq32);
3502                 break;
3503         case VSD_DTYPE_Q_S64:
3504                 error = Q_QCPYVALQ(&x, voival->q64.sq64);
3505                 break;
3506         case VSD_DTYPE_Q_U64:
3507                 error = Q_QCPYVALQ(&x, voival->q64.uq64);
3508                 break;
3509         default:
3510                 error = EINVAL;
3511                 break;
3512         }
3513
3514         if (error ||
3515             (error = stats_v1_vsd_tdgst_add(vs->dtype, tdgst, x, 1, 1)))
3516                 return (error);
3517
3518         vs->flags |= VS_VSDVALID;
3519         return (0);
3520 }
3521
3522 int
3523 stats_v1_voi_update(struct statsblobv1 *sb, int32_t voi_id,
3524     enum vsd_dtype voi_dtype, struct voistatdata *voival, uint32_t flags)
3525 {
3526         struct voi *v;
3527         struct voistat *vs;
3528         void *statevsd, *vsd;
3529         int error, i, tmperr;
3530
3531         error = 0;
3532
3533         if (sb == NULL || sb->abi != STATS_ABI_V1 || voi_id >= NVOIS(sb) ||
3534             voi_dtype == 0 || voi_dtype >= VSD_NUM_DTYPES || voival == NULL)
3535                 return (EINVAL);
3536         v = &sb->vois[voi_id];
3537         if (voi_dtype != v->dtype || v->id < 0 ||
3538             ((flags & SB_VOI_RELUPDATE) && !(v->flags & VOI_REQSTATE)))
3539                 return (EINVAL);
3540
3541         vs = BLOB_OFFSET(sb, v->stats_off);
3542         if (v->flags & VOI_REQSTATE)
3543                 statevsd = BLOB_OFFSET(sb, vs->data_off);
3544         else
3545                 statevsd = NULL;
3546
3547         if (flags & SB_VOI_RELUPDATE) {
3548                 switch (voi_dtype) {
3549                 case VSD_DTYPE_INT_S32:
3550                         voival->int32.s32 +=
3551                             VSD(voistate, statevsd)->prev.int32.s32;
3552                         break;
3553                 case VSD_DTYPE_INT_U32:
3554                         voival->int32.u32 +=
3555                             VSD(voistate, statevsd)->prev.int32.u32;
3556                         break;
3557                 case VSD_DTYPE_INT_S64:
3558                         voival->int64.s64 +=
3559                             VSD(voistate, statevsd)->prev.int64.s64;
3560                         break;
3561                 case VSD_DTYPE_INT_U64:
3562                         voival->int64.u64 +=
3563                             VSD(voistate, statevsd)->prev.int64.u64;
3564                         break;
3565                 case VSD_DTYPE_INT_SLONG:
3566                         voival->intlong.slong +=
3567                             VSD(voistate, statevsd)->prev.intlong.slong;
3568                         break;
3569                 case VSD_DTYPE_INT_ULONG:
3570                         voival->intlong.ulong +=
3571                             VSD(voistate, statevsd)->prev.intlong.ulong;
3572                         break;
3573                 case VSD_DTYPE_Q_S32:
3574                         error = Q_QADDQ(&voival->q32.sq32,
3575                             VSD(voistate, statevsd)->prev.q32.sq32);
3576                         break;
3577                 case VSD_DTYPE_Q_U32:
3578                         error = Q_QADDQ(&voival->q32.uq32,
3579                             VSD(voistate, statevsd)->prev.q32.uq32);
3580                         break;
3581                 case VSD_DTYPE_Q_S64:
3582                         error = Q_QADDQ(&voival->q64.sq64,
3583                             VSD(voistate, statevsd)->prev.q64.sq64);
3584                         break;
3585                 case VSD_DTYPE_Q_U64:
3586                         error = Q_QADDQ(&voival->q64.uq64,
3587                             VSD(voistate, statevsd)->prev.q64.uq64);
3588                         break;
3589                 default:
3590                         KASSERT(0, ("Unknown VOI data type %d", voi_dtype));
3591                         break;
3592                 }
3593         }
3594
3595         if (error)
3596                 return (error);
3597
3598         for (i = v->voistatmaxid; i > 0; i--) {
3599                 vs = &((struct voistat *)BLOB_OFFSET(sb, v->stats_off))[i];
3600                 if (vs->stype < 0)
3601                         continue;
3602
3603                 vsd = BLOB_OFFSET(sb, vs->data_off);
3604
3605                 switch (vs->stype) {
3606                 case VS_STYPE_MAX:
3607                         tmperr = stats_v1_voi_update_max(voi_dtype, voival,
3608                             vs, vsd);
3609                         break;
3610                 case VS_STYPE_MIN:
3611                         tmperr = stats_v1_voi_update_min(voi_dtype, voival,
3612                             vs, vsd);
3613                         break;
3614                 case VS_STYPE_SUM:
3615                         tmperr = stats_v1_voi_update_sum(voi_dtype, voival,
3616                             vs, vsd);
3617                         break;
3618                 case VS_STYPE_HIST:
3619                         tmperr = stats_v1_voi_update_hist(voi_dtype, voival,
3620                             vs, vsd);
3621                         break;
3622                 case VS_STYPE_TDGST:
3623                         tmperr = stats_v1_voi_update_tdgst(voi_dtype, voival,
3624                             vs, vsd);
3625                         break;
3626                 default:
3627                         KASSERT(0, ("Unknown VOI stat type %d", vs->stype));
3628                         break;
3629                 }
3630
3631                 if (tmperr) {
3632                         error = tmperr;
3633                         VS_INCERRS(vs);
3634                 }
3635         }
3636
3637         if (statevsd) {
3638                 switch (voi_dtype) {
3639                 case VSD_DTYPE_INT_S32:
3640                         VSD(voistate, statevsd)->prev.int32.s32 =
3641                             voival->int32.s32;
3642                         break;
3643                 case VSD_DTYPE_INT_U32:
3644                         VSD(voistate, statevsd)->prev.int32.u32 =
3645                             voival->int32.u32;
3646                         break;
3647                 case VSD_DTYPE_INT_S64:
3648                         VSD(voistate, statevsd)->prev.int64.s64 =
3649                             voival->int64.s64;
3650                         break;
3651                 case VSD_DTYPE_INT_U64:
3652                         VSD(voistate, statevsd)->prev.int64.u64 =
3653                             voival->int64.u64;
3654                         break;
3655                 case VSD_DTYPE_INT_SLONG:
3656                         VSD(voistate, statevsd)->prev.intlong.slong =
3657                             voival->intlong.slong;
3658                         break;
3659                 case VSD_DTYPE_INT_ULONG:
3660                         VSD(voistate, statevsd)->prev.intlong.ulong =
3661                             voival->intlong.ulong;
3662                         break;
3663                 case VSD_DTYPE_Q_S32:
3664                         error = Q_QCPYVALQ(
3665                             &VSD(voistate, statevsd)->prev.q32.sq32,
3666                             voival->q32.sq32);
3667                         break;
3668                 case VSD_DTYPE_Q_U32:
3669                         error = Q_QCPYVALQ(
3670                             &VSD(voistate, statevsd)->prev.q32.uq32,
3671                             voival->q32.uq32);
3672                         break;
3673                 case VSD_DTYPE_Q_S64:
3674                         error = Q_QCPYVALQ(
3675                             &VSD(voistate, statevsd)->prev.q64.sq64,
3676                             voival->q64.sq64);
3677                         break;
3678                 case VSD_DTYPE_Q_U64:
3679                         error = Q_QCPYVALQ(
3680                             &VSD(voistate, statevsd)->prev.q64.uq64,
3681                             voival->q64.uq64);
3682                         break;
3683                 default:
3684                         KASSERT(0, ("Unknown VOI data type %d", voi_dtype));
3685                         break;
3686                 }
3687         }
3688
3689         return (error);
3690 }
3691
3692 #ifdef _KERNEL
3693
3694 static void
3695 stats_init(void *arg)
3696 {
3697
3698 }
3699 SYSINIT(stats, SI_SUB_KDTRACE, SI_ORDER_FIRST, stats_init, NULL);
3700
3701 /*
3702  * Sysctl handler to display the list of available stats templates.
3703  */
3704 static int
3705 stats_tpl_list_available(SYSCTL_HANDLER_ARGS)
3706 {
3707         struct sbuf *s;
3708         int err, i;
3709
3710         err = 0;
3711
3712         /* We can tolerate ntpl being stale, so do not take the lock. */
3713         s = sbuf_new(NULL, NULL, /* +1 per tpl for , */
3714             ntpl * (STATS_TPL_MAX_STR_SPEC_LEN + 1), SBUF_FIXEDLEN);
3715         if (s == NULL)
3716                 return (ENOMEM);
3717
3718         TPL_LIST_RLOCK();
3719         for (i = 0; i < ntpl; i++) {
3720                 err = sbuf_printf(s, "%s\"%s\":%u", i ? "," : "",
3721                     tpllist[i]->mb->tplname, tpllist[i]->mb->tplhash);
3722                 if (err) {
3723                         /* Sbuf overflow condition. */
3724                         err = EOVERFLOW;
3725                         break;
3726                 }
3727         }
3728         TPL_LIST_RUNLOCK();
3729
3730         if (!err) {
3731                 sbuf_finish(s);
3732                 err = sysctl_handle_string(oidp, sbuf_data(s), 0, req);
3733         }
3734
3735         sbuf_delete(s);
3736         return (err);
3737 }
3738
3739 /*
3740  * Called by subsystem-specific sysctls to report and/or parse the list of
3741  * templates being sampled and their sampling rates. A stats_tpl_sr_cb_t
3742  * conformant function pointer must be passed in as arg1, which is used to
3743  * interact with the subsystem's stats template sample rates list. If arg2 > 0,
3744  * a zero-initialised allocation of arg2-sized contextual memory is
3745  * heap-allocated and passed in to all subsystem callbacks made during the
3746  * operation of stats_tpl_sample_rates().
3747  *
3748  * XXXLAS: Assumes templates are never removed, which is currently true but may
3749  * need to be reworked in future if dynamic template management becomes a
3750  * requirement e.g. to support kernel module based templates.
3751  */
3752 int
3753 stats_tpl_sample_rates(SYSCTL_HANDLER_ARGS)
3754 {
3755         char kvpair_fmt[16], tplspec_fmt[16];
3756         char tpl_spec[STATS_TPL_MAX_STR_SPEC_LEN];
3757         char tpl_name[TPL_MAX_NAME_LEN + 2]; /* +2 for "" */
3758         stats_tpl_sr_cb_t subsys_cb;
3759         void *subsys_ctx;
3760         char *buf, *new_rates_usr_str, *tpl_name_p;
3761         struct stats_tpl_sample_rate *rates;
3762         struct sbuf *s, _s;
3763         uint32_t cum_pct, pct, tpl_hash;
3764         int err, i, off, len, newlen, nrates;
3765
3766         buf = NULL;
3767         rates = NULL;
3768         err = nrates = 0;
3769         subsys_cb = (stats_tpl_sr_cb_t)arg1;
3770         KASSERT(subsys_cb != NULL, ("%s: subsys_cb == arg1 == NULL", __func__));
3771         if (arg2 > 0)
3772                 subsys_ctx = malloc(arg2, M_TEMP, M_WAITOK | M_ZERO);
3773         else
3774                 subsys_ctx = NULL;
3775
3776         /* Grab current count of subsystem rates. */
3777         err = subsys_cb(TPL_SR_UNLOCKED_GET, NULL, &nrates, subsys_ctx);
3778         if (err)
3779                 goto done;
3780
3781         /* +1 to ensure we can append '\0' post copyin, +5 per rate for =nnn, */
3782         len = max(req->newlen + 1, nrates * (STATS_TPL_MAX_STR_SPEC_LEN + 5));
3783
3784         if (req->oldptr != NULL || req->newptr != NULL)
3785                 buf = malloc(len, M_TEMP, M_WAITOK);
3786
3787         if (req->oldptr != NULL) {
3788                 if (nrates == 0) {
3789                         /* No rates, so return an empty string via oldptr. */
3790                         err = SYSCTL_OUT(req, "", 1);
3791                         if (err)
3792                                 goto done;
3793                         goto process_new;
3794                 }
3795
3796                 s = sbuf_new(&_s, buf, len, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
3797
3798                 /* Grab locked count of, and ptr to, subsystem rates. */
3799                 err = subsys_cb(TPL_SR_RLOCKED_GET, &rates, &nrates,
3800                     subsys_ctx);
3801                 if (err)
3802                         goto done;
3803                 TPL_LIST_RLOCK();
3804                 for (i = 0; i < nrates && !err; i++) {
3805                         err = sbuf_printf(s, "%s\"%s\":%u=%u", i ? "," : "",
3806                             tpllist[rates[i].tpl_slot_id]->mb->tplname,
3807                             tpllist[rates[i].tpl_slot_id]->mb->tplhash,
3808                             rates[i].tpl_sample_pct);
3809                 }
3810                 TPL_LIST_RUNLOCK();
3811                 /* Tell subsystem that we're done with its rates list. */
3812                 err = subsys_cb(TPL_SR_RUNLOCK, &rates, &nrates, subsys_ctx);
3813                 if (err)
3814                         goto done;
3815
3816                 err = sbuf_finish(s);
3817                 if (err)
3818                         goto done; /* We lost a race for buf to be too small. */
3819
3820                 /* Return the rendered string data via oldptr. */
3821                 err = SYSCTL_OUT(req, sbuf_data(s), sbuf_len(s));
3822         } else {
3823                 /* Return the upper bound size for buffer sizing requests. */
3824                 err = SYSCTL_OUT(req, NULL, len);
3825         }
3826
3827 process_new:
3828         if (err || req->newptr == NULL)
3829                 goto done;
3830
3831         newlen = req->newlen - req->newidx;
3832         err = SYSCTL_IN(req, buf, newlen);
3833         if (err)
3834                 goto done;
3835
3836         /*
3837          * Initialise format strings at run time.
3838          *
3839          * Write the max template spec string length into the
3840          * template_spec=percent key-value pair parsing format string as:
3841          *     " %<width>[^=]=%u %n"
3842          *
3843          * Write the max template name string length into the tplname:tplhash
3844          * parsing format string as:
3845          *     "%<width>[^:]:%u"
3846          *
3847          * Subtract 1 for \0 appended by sscanf().
3848          */
3849         sprintf(kvpair_fmt, " %%%zu[^=]=%%u %%n", sizeof(tpl_spec) - 1);
3850         sprintf(tplspec_fmt, "%%%zu[^:]:%%u", sizeof(tpl_name) - 1);
3851
3852         /*
3853          * Parse each CSV key-value pair specifying a template and its sample
3854          * percentage. Whitespace either side of a key-value pair is ignored.
3855          * Templates can be specified by name, hash, or name and hash per the
3856          * following formats (chars in [] are optional):
3857          *    ["]<tplname>["]=<percent>
3858          *    :hash=pct
3859          *    ["]<tplname>["]:hash=<percent>
3860          */
3861         cum_pct = nrates = 0;
3862         rates = NULL;
3863         buf[newlen] = '\0'; /* buf is at least newlen+1 in size. */
3864         new_rates_usr_str = buf;
3865         while (isspace(*new_rates_usr_str))
3866                 new_rates_usr_str++; /* Skip leading whitespace. */
3867         while (*new_rates_usr_str != '\0') {
3868                 tpl_name_p = tpl_name;
3869                 tpl_name[0] = '\0';
3870                 tpl_hash = 0;
3871                 off = 0;
3872
3873                 /*
3874                  * Parse key-value pair which must perform 2 conversions, then
3875                  * parse the template spec to extract either name, hash, or name
3876                  * and hash depending on the three possible spec formats. The
3877                  * tplspec_fmt format specifier parses name or name and hash
3878                  * template specs, while the ":%u" format specifier parses
3879                  * hash-only template specs. If parsing is successfull, ensure
3880                  * the cumulative sampling percentage does not exceed 100.
3881                  */
3882                 err = EINVAL;
3883                 if (2 != sscanf(new_rates_usr_str, kvpair_fmt, tpl_spec, &pct,
3884                     &off))
3885                         break;
3886                 if ((1 > sscanf(tpl_spec, tplspec_fmt, tpl_name, &tpl_hash)) &&
3887                     (1 != sscanf(tpl_spec, ":%u", &tpl_hash)))
3888                         break;
3889                 if ((cum_pct += pct) > 100)
3890                         break;
3891                 err = 0;
3892
3893                 /* Strip surrounding "" from template name if present. */
3894                 len = strlen(tpl_name);
3895                 if (len > 0) {
3896                         if (tpl_name[len - 1] == '"')
3897                                 tpl_name[--len] = '\0';
3898                         if (tpl_name[0] == '"') {
3899                                 tpl_name_p++;
3900                                 len--;
3901                         }
3902                 }
3903
3904                 rates = stats_realloc(rates, 0, /* oldsz is unused in kernel. */
3905                     (nrates + 1) * sizeof(*rates), M_WAITOK);
3906                 rates[nrates].tpl_slot_id =
3907                     stats_tpl_fetch_allocid(len ? tpl_name_p : NULL, tpl_hash);
3908                 if (rates[nrates].tpl_slot_id < 0) {
3909                         err = -rates[nrates].tpl_slot_id;
3910                         break;
3911                 }
3912                 rates[nrates].tpl_sample_pct = pct;
3913                 nrates++;
3914                 new_rates_usr_str += off;
3915                 if (*new_rates_usr_str != ',')
3916                         break; /* End-of-input or malformed. */
3917                 new_rates_usr_str++; /* Move past comma to next pair. */
3918         }
3919
3920         if (!err) {
3921                 if ((new_rates_usr_str - buf) < newlen) {
3922                         /* Entire input has not been consumed. */
3923                         err = EINVAL;
3924                 } else {
3925                         /*
3926                          * Give subsystem the new rates. They'll return the
3927                          * appropriate rates pointer for us to garbage collect.
3928                          */
3929                         err = subsys_cb(TPL_SR_PUT, &rates, &nrates,
3930                             subsys_ctx);
3931                 }
3932         }
3933         stats_free(rates);
3934
3935 done:
3936         free(buf, M_TEMP);
3937         free(subsys_ctx, M_TEMP);
3938         return (err);
3939 }
3940
3941 SYSCTL_NODE(_kern, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
3942     "stats(9) MIB");
3943
3944 SYSCTL_PROC(_kern_stats, OID_AUTO, templates,
3945     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
3946     stats_tpl_list_available, "A",
3947     "list the name/hash of all available stats(9) templates");
3948
3949 #else /* ! _KERNEL */
3950
3951 static void __attribute__ ((constructor))
3952 stats_constructor(void)
3953 {
3954
3955         pthread_rwlock_init(&tpllistlock, NULL);
3956 }
3957
3958 static void __attribute__ ((destructor))
3959 stats_destructor(void)
3960 {
3961
3962         pthread_rwlock_destroy(&tpllistlock);
3963 }
3964
3965 #endif /* _KERNEL */