]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/subr_stats.c
bsddialog: import version 1.0
[FreeBSD/FreeBSD.git] / sys / kern / subr_stats.c
1 /*-
2  * Copyright (c) 2014-2018 Netflix, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 /*
28  * Author: Lawrence Stewart <lstewart@netflix.com>
29  */
30
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 #include <sys/arb.h>
34 #include <sys/ctype.h>
35 #include <sys/errno.h>
36 #include <sys/hash.h>
37 #include <sys/limits.h>
38 #include <sys/malloc.h>
39 #include <sys/qmath.h>
40 #include <sys/sbuf.h>
41 #if defined(DIAGNOSTIC)
42 #include <sys/tree.h>
43 #endif
44 #include <sys/stats.h> /* Must come after qmath.h and arb.h */
45 #include <sys/stddef.h>
46 #include <sys/stdint.h>
47 #include <sys/time.h>
48
49 #ifdef _KERNEL
50 #include <sys/kernel.h>
51 #include <sys/lock.h>
52 #include <sys/rwlock.h>
53 #include <sys/sysctl.h>
54 #include <sys/systm.h>
55 #else /* ! _KERNEL */
56 #include <pthread.h>
57 #include <stdbool.h>
58 #include <stdio.h>
59 #include <stdlib.h>
60 #include <string.h>
61 #endif /* _KERNEL */
62
63 struct voistatdata_voistate {
64         /* Previous VOI value for diff calculation. */
65         struct voistatdata_numeric prev;
66 };
67
68 #define VS_VSDVALID     0x0001  /* Stat's voistatdata updated at least once. */
69 struct voistat {
70         int8_t          stype;          /* Type of stat e.g. VS_STYPE_SUM. */
71         enum vsd_dtype  dtype : 8;      /* Data type of this stat's data. */
72         uint16_t        data_off;       /* Blob offset for this stat's data. */
73         uint16_t        dsz;            /* Size of stat's data. */
74 #define VS_EBITS 8
75         uint16_t        errs : VS_EBITS;/* Non-wrapping error count. */
76         uint16_t        flags : 16 - VS_EBITS;
77 };
78 /* The voistat error count is capped to avoid wrapping. */
79 #define VS_INCERRS(vs) do {                                             \
80         if ((vs)->errs < (1U << VS_EBITS) - 1)                          \
81                 (vs)->errs++;                                           \
82 } while (0)
83
84 /*
85  * Ideas for flags:
86  *   - Global or entity specific (global would imply use of counter(9)?)
87  *   - Whether to reset stats on read or not
88  *   - Signal an overflow?
89  *   - Compressed voistat array
90  */
91 #define VOI_REQSTATE    0x0001  /* VOI requires VS_STYPE_VOISTATE. */
92 struct voi {
93         int16_t         id;             /* VOI id. */
94         enum vsd_dtype  dtype : 8;      /* Data type of the VOI itself. */
95         int8_t          voistatmaxid;   /* Largest allocated voistat index. */
96         uint16_t        stats_off;      /* Blob offset for this VOIs stats. */
97         uint16_t        flags;
98 };
99
100 /*
101  * Memory for the entire blob is allocated as a slab and then offsets are
102  * maintained to carve up the slab into sections holding different data types.
103  *
104  * Ideas for flags:
105  * - Compressed voi array (trade off memory usage vs search time)
106  * - Units of offsets (default bytes, flag for e.g. vm_page/KiB/Mib)
107  */
108 struct statsblobv1 {
109         uint8_t         abi;
110         uint8_t         endian;
111         uint16_t        flags;
112         uint16_t        maxsz;
113         uint16_t        cursz;
114         /* Fields from here down are opaque to consumers. */
115         uint32_t        tplhash;        /* Base template hash ID. */
116         uint16_t        stats_off;      /* voistat array blob offset. */
117         uint16_t        statsdata_off;  /* voistatdata array blob offset. */
118         sbintime_t      created;        /* Blob creation time. */
119         sbintime_t      lastrst;        /* Time of last reset. */
120         struct voi      vois[];         /* Array indexed by [voi_id]. */
121 } __aligned(sizeof(void *));
122 _Static_assert(offsetof(struct statsblobv1, cursz) +
123     SIZEOF_MEMBER(struct statsblobv1, cursz) ==
124     offsetof(struct statsblob, opaque),
125     "statsblobv1 ABI mismatch");
126
127 struct statsblobv1_tpl {
128         struct metablob         *mb;
129         struct statsblobv1      *sb;
130 };
131
132 /* Context passed to iterator callbacks. */
133 struct sb_iter_ctx {
134         void            *usrctx;        /* Caller supplied context. */
135         uint32_t        flags;          /* Flags for current iteration. */
136         int16_t         vslot;          /* struct voi slot index. */
137         int8_t          vsslot;         /* struct voistat slot index. */
138 };
139
140 struct sb_tostrcb_ctx {
141         struct sbuf             *buf;
142         struct statsblob_tpl    *tpl;
143         enum sb_str_fmt fmt;
144         uint32_t                flags;
145 };
146
147 struct sb_visitcb_ctx {
148         stats_blob_visitcb_t    cb;
149         void                    *usrctx;
150 };
151
152 /* Stats blob iterator callback. */
153 typedef int (*stats_v1_blob_itercb_t)(struct statsblobv1 *sb, struct voi *v,
154     struct voistat *vs, struct sb_iter_ctx *ctx);
155
156 #ifdef _KERNEL
157 static struct rwlock tpllistlock;
158 RW_SYSINIT(stats_tpl_list, &tpllistlock, "Stat template list lock");
159 #define TPL_LIST_RLOCK() rw_rlock(&tpllistlock)
160 #define TPL_LIST_RUNLOCK() rw_runlock(&tpllistlock)
161 #define TPL_LIST_WLOCK() rw_wlock(&tpllistlock)
162 #define TPL_LIST_WUNLOCK() rw_wunlock(&tpllistlock)
163 #define TPL_LIST_LOCK_ASSERT() rw_assert(&tpllistlock, RA_LOCKED)
164 #define TPL_LIST_RLOCK_ASSERT() rw_assert(&tpllistlock, RA_RLOCKED)
165 #define TPL_LIST_WLOCK_ASSERT() rw_assert(&tpllistlock, RA_WLOCKED)
166 MALLOC_DEFINE(M_STATS, "stats(9) related memory", "stats(9) related memory");
167 #define stats_free(ptr) free((ptr), M_STATS)
168 #else /* ! _KERNEL */
169 static void stats_constructor(void);
170 static void stats_destructor(void);
171 static pthread_rwlock_t tpllistlock;
172 #define TPL_LIST_UNLOCK() pthread_rwlock_unlock(&tpllistlock)
173 #define TPL_LIST_RLOCK() pthread_rwlock_rdlock(&tpllistlock)
174 #define TPL_LIST_RUNLOCK() TPL_LIST_UNLOCK()
175 #define TPL_LIST_WLOCK() pthread_rwlock_wrlock(&tpllistlock)
176 #define TPL_LIST_WUNLOCK() TPL_LIST_UNLOCK()
177 #define TPL_LIST_LOCK_ASSERT() do { } while (0)
178 #define TPL_LIST_RLOCK_ASSERT() do { } while (0)
179 #define TPL_LIST_WLOCK_ASSERT() do { } while (0)
180 #ifdef NDEBUG
181 #define KASSERT(cond, msg) do {} while (0)
182 #define stats_abort() do {} while (0)
183 #else /* ! NDEBUG */
184 #define KASSERT(cond, msg) do { \
185         if (!(cond)) { \
186                 panic msg; \
187         } \
188 } while (0)
189 #define stats_abort() abort()
190 #endif /* NDEBUG */
191 #define stats_free(ptr) free(ptr)
192 #define panic(fmt, ...) do { \
193         fprintf(stderr, (fmt), ##__VA_ARGS__); \
194         stats_abort(); \
195 } while (0)
196 #endif /* _KERNEL */
197
198 #define SB_V1_MAXSZ 65535
199
200 /* Obtain a blob offset pointer. */
201 #define BLOB_OFFSET(sb, off) ((void *)(((uint8_t *)(sb)) + (off)))
202
203 /*
204  * Number of VOIs in the blob's vois[] array. By virtue of struct voi being a
205  * power of 2 size, we can shift instead of divide. The shift amount must be
206  * updated if sizeof(struct voi) ever changes, which the assert should catch.
207  */
208 #define NVOIS(sb) ((int32_t)((((struct statsblobv1 *)(sb))->stats_off - \
209     sizeof(struct statsblobv1)) >> 3))
210 _Static_assert(sizeof(struct voi) == 8, "statsblobv1 voi ABI mismatch");
211
212 /* Try restrict names to alphanumeric and underscore to simplify JSON compat. */
213 const char *vs_stype2name[VS_NUM_STYPES] = {
214         [VS_STYPE_VOISTATE] = "VOISTATE",
215         [VS_STYPE_SUM] = "SUM",
216         [VS_STYPE_MAX] = "MAX",
217         [VS_STYPE_MIN] = "MIN",
218         [VS_STYPE_HIST] = "HIST",
219         [VS_STYPE_TDGST] = "TDGST",
220 };
221
222 const char *vs_stype2desc[VS_NUM_STYPES] = {
223         [VS_STYPE_VOISTATE] = "VOI related state data (not a real stat)",
224         [VS_STYPE_SUM] = "Simple arithmetic accumulator",
225         [VS_STYPE_MAX] = "Maximum observed VOI value",
226         [VS_STYPE_MIN] = "Minimum observed VOI value",
227         [VS_STYPE_HIST] = "Histogram of observed VOI values",
228         [VS_STYPE_TDGST] = "t-digest of observed VOI values",
229 };
230
231 const char *vsd_dtype2name[VSD_NUM_DTYPES] = {
232         [VSD_DTYPE_VOISTATE] = "VOISTATE",
233         [VSD_DTYPE_INT_S32] = "INT_S32",
234         [VSD_DTYPE_INT_U32] = "INT_U32",
235         [VSD_DTYPE_INT_S64] = "INT_S64",
236         [VSD_DTYPE_INT_U64] = "INT_U64",
237         [VSD_DTYPE_INT_SLONG] = "INT_SLONG",
238         [VSD_DTYPE_INT_ULONG] = "INT_ULONG",
239         [VSD_DTYPE_Q_S32] = "Q_S32",
240         [VSD_DTYPE_Q_U32] = "Q_U32",
241         [VSD_DTYPE_Q_S64] = "Q_S64",
242         [VSD_DTYPE_Q_U64] = "Q_U64",
243         [VSD_DTYPE_CRHIST32] = "CRHIST32",
244         [VSD_DTYPE_DRHIST32] = "DRHIST32",
245         [VSD_DTYPE_DVHIST32] = "DVHIST32",
246         [VSD_DTYPE_CRHIST64] = "CRHIST64",
247         [VSD_DTYPE_DRHIST64] = "DRHIST64",
248         [VSD_DTYPE_DVHIST64] = "DVHIST64",
249         [VSD_DTYPE_TDGSTCLUST32] = "TDGSTCLUST32",
250         [VSD_DTYPE_TDGSTCLUST64] = "TDGSTCLUST64",
251 };
252
253 const size_t vsd_dtype2size[VSD_NUM_DTYPES] = {
254         [VSD_DTYPE_VOISTATE] = sizeof(struct voistatdata_voistate),
255         [VSD_DTYPE_INT_S32] = sizeof(struct voistatdata_int32),
256         [VSD_DTYPE_INT_U32] = sizeof(struct voistatdata_int32),
257         [VSD_DTYPE_INT_S64] = sizeof(struct voistatdata_int64),
258         [VSD_DTYPE_INT_U64] = sizeof(struct voistatdata_int64),
259         [VSD_DTYPE_INT_SLONG] = sizeof(struct voistatdata_intlong),
260         [VSD_DTYPE_INT_ULONG] = sizeof(struct voistatdata_intlong),
261         [VSD_DTYPE_Q_S32] = sizeof(struct voistatdata_q32),
262         [VSD_DTYPE_Q_U32] = sizeof(struct voistatdata_q32),
263         [VSD_DTYPE_Q_S64] = sizeof(struct voistatdata_q64),
264         [VSD_DTYPE_Q_U64] = sizeof(struct voistatdata_q64),
265         [VSD_DTYPE_CRHIST32] = sizeof(struct voistatdata_crhist32),
266         [VSD_DTYPE_DRHIST32] = sizeof(struct voistatdata_drhist32),
267         [VSD_DTYPE_DVHIST32] = sizeof(struct voistatdata_dvhist32),
268         [VSD_DTYPE_CRHIST64] = sizeof(struct voistatdata_crhist64),
269         [VSD_DTYPE_DRHIST64] = sizeof(struct voistatdata_drhist64),
270         [VSD_DTYPE_DVHIST64] = sizeof(struct voistatdata_dvhist64),
271         [VSD_DTYPE_TDGSTCLUST32] = sizeof(struct voistatdata_tdgstclust32),
272         [VSD_DTYPE_TDGSTCLUST64] = sizeof(struct voistatdata_tdgstclust64),
273 };
274
275 static const bool vsd_compoundtype[VSD_NUM_DTYPES] = {
276         [VSD_DTYPE_VOISTATE] = true,
277         [VSD_DTYPE_INT_S32] = false,
278         [VSD_DTYPE_INT_U32] = false,
279         [VSD_DTYPE_INT_S64] = false,
280         [VSD_DTYPE_INT_U64] = false,
281         [VSD_DTYPE_INT_SLONG] = false,
282         [VSD_DTYPE_INT_ULONG] = false,
283         [VSD_DTYPE_Q_S32] = false,
284         [VSD_DTYPE_Q_U32] = false,
285         [VSD_DTYPE_Q_S64] = false,
286         [VSD_DTYPE_Q_U64] = false,
287         [VSD_DTYPE_CRHIST32] = true,
288         [VSD_DTYPE_DRHIST32] = true,
289         [VSD_DTYPE_DVHIST32] = true,
290         [VSD_DTYPE_CRHIST64] = true,
291         [VSD_DTYPE_DRHIST64] = true,
292         [VSD_DTYPE_DVHIST64] = true,
293         [VSD_DTYPE_TDGSTCLUST32] = true,
294         [VSD_DTYPE_TDGSTCLUST64] = true,
295 };
296
297 const struct voistatdata_numeric numeric_limits[2][VSD_DTYPE_Q_U64 + 1] = {
298         [LIM_MIN] = {
299                 [VSD_DTYPE_VOISTATE] = {0},
300                 [VSD_DTYPE_INT_S32] = {.int32 = {.s32 = INT32_MIN}},
301                 [VSD_DTYPE_INT_U32] = {.int32 = {.u32 = 0}},
302                 [VSD_DTYPE_INT_S64] = {.int64 = {.s64 = INT64_MIN}},
303                 [VSD_DTYPE_INT_U64] = {.int64 = {.u64 = 0}},
304                 [VSD_DTYPE_INT_SLONG] = {.intlong = {.slong = LONG_MIN}},
305                 [VSD_DTYPE_INT_ULONG] = {.intlong = {.ulong = 0}},
306                 [VSD_DTYPE_Q_S32] = {.q32 = {.sq32 = Q_IFMINVAL(INT32_MIN)}},
307                 [VSD_DTYPE_Q_U32] = {.q32 = {.uq32 = 0}},
308                 [VSD_DTYPE_Q_S64] = {.q64 = {.sq64 = Q_IFMINVAL(INT64_MIN)}},
309                 [VSD_DTYPE_Q_U64] = {.q64 = {.uq64 = 0}},
310         },
311         [LIM_MAX] = {
312                 [VSD_DTYPE_VOISTATE] = {0},
313                 [VSD_DTYPE_INT_S32] = {.int32 = {.s32 = INT32_MAX}},
314                 [VSD_DTYPE_INT_U32] = {.int32 = {.u32 = UINT32_MAX}},
315                 [VSD_DTYPE_INT_S64] = {.int64 = {.s64 = INT64_MAX}},
316                 [VSD_DTYPE_INT_U64] = {.int64 = {.u64 = UINT64_MAX}},
317                 [VSD_DTYPE_INT_SLONG] = {.intlong = {.slong = LONG_MAX}},
318                 [VSD_DTYPE_INT_ULONG] = {.intlong = {.ulong = ULONG_MAX}},
319                 [VSD_DTYPE_Q_S32] = {.q32 = {.sq32 = Q_IFMAXVAL(INT32_MAX)}},
320                 [VSD_DTYPE_Q_U32] = {.q32 = {.uq32 = Q_IFMAXVAL(UINT32_MAX)}},
321                 [VSD_DTYPE_Q_S64] = {.q64 = {.sq64 = Q_IFMAXVAL(INT64_MAX)}},
322                 [VSD_DTYPE_Q_U64] = {.q64 = {.uq64 = Q_IFMAXVAL(UINT64_MAX)}},
323         }
324 };
325
326 /* tpllistlock protects tpllist and ntpl */
327 static uint32_t ntpl;
328 static struct statsblob_tpl **tpllist;
329
330 static inline void * stats_realloc(void *ptr, size_t oldsz, size_t newsz,
331     int flags);
332 //static void stats_v1_blob_finalise(struct statsblobv1 *sb);
333 static int stats_v1_blob_init_locked(struct statsblobv1 *sb, uint32_t tpl_id,
334     uint32_t flags);
335 static int stats_v1_blob_expand(struct statsblobv1 **sbpp, int newvoibytes,
336     int newvoistatbytes, int newvoistatdatabytes);
337 static void stats_v1_blob_iter(struct statsblobv1 *sb,
338     stats_v1_blob_itercb_t icb, void *usrctx, uint32_t flags);
339 static inline int stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype,
340     struct voistatdata_tdgst *tdgst, s64q_t x, uint64_t weight, int attempt);
341
342 static inline int
343 ctd32cmp(const struct voistatdata_tdgstctd32 *c1, const struct voistatdata_tdgstctd32 *c2)
344 {
345
346         KASSERT(Q_PRECEQ(c1->mu, c2->mu),
347             ("%s: Q_RELPREC(c1->mu,c2->mu)=%d", __func__,
348             Q_RELPREC(c1->mu, c2->mu)));
349
350        return (Q_QLTQ(c1->mu, c2->mu) ? -1 : 1);
351 }
352 ARB_GENERATE_STATIC(ctdth32, voistatdata_tdgstctd32, ctdlnk, ctd32cmp);
353
354 static inline int
355 ctd64cmp(const struct voistatdata_tdgstctd64 *c1, const struct voistatdata_tdgstctd64 *c2)
356 {
357
358         KASSERT(Q_PRECEQ(c1->mu, c2->mu),
359             ("%s: Q_RELPREC(c1->mu,c2->mu)=%d", __func__,
360             Q_RELPREC(c1->mu, c2->mu)));
361
362        return (Q_QLTQ(c1->mu, c2->mu) ? -1 : 1);
363 }
364 ARB_GENERATE_STATIC(ctdth64, voistatdata_tdgstctd64, ctdlnk, ctd64cmp);
365
366 #ifdef DIAGNOSTIC
367 RB_GENERATE_STATIC(rbctdth32, voistatdata_tdgstctd32, rblnk, ctd32cmp);
368 RB_GENERATE_STATIC(rbctdth64, voistatdata_tdgstctd64, rblnk, ctd64cmp);
369 #endif
370
371 static inline sbintime_t
372 stats_sbinuptime(void)
373 {
374         sbintime_t sbt;
375 #ifdef _KERNEL
376
377         sbt = sbinuptime();
378 #else /* ! _KERNEL */
379         struct timespec tp;
380
381         clock_gettime(CLOCK_MONOTONIC_FAST, &tp);
382         sbt = tstosbt(tp);
383 #endif /* _KERNEL */
384
385         return (sbt);
386 }
387
388 static inline void *
389 stats_realloc(void *ptr, size_t oldsz, size_t newsz, int flags)
390 {
391
392 #ifdef _KERNEL
393         /* Default to M_NOWAIT if neither M_NOWAIT or M_WAITOK are set. */
394         if (!(flags & (M_WAITOK | M_NOWAIT)))
395                 flags |= M_NOWAIT;
396         ptr = realloc(ptr, newsz, M_STATS, flags);
397 #else /* ! _KERNEL */
398         ptr = realloc(ptr, newsz);
399         if ((flags & M_ZERO) && ptr != NULL) {
400                 if (oldsz == 0)
401                         memset(ptr, '\0', newsz);
402                 else if (newsz > oldsz)
403                         memset(BLOB_OFFSET(ptr, oldsz), '\0', newsz - oldsz);
404         }
405 #endif /* _KERNEL */
406
407         return (ptr);
408 }
409
410 static inline char *
411 stats_strdup(const char *s,
412 #ifdef _KERNEL
413     int flags)
414 {
415         char *copy;
416         size_t len;
417
418         if (!(flags & (M_WAITOK | M_NOWAIT)))
419                 flags |= M_NOWAIT;
420
421         len = strlen(s) + 1;
422         if ((copy = malloc(len, M_STATS, flags)) != NULL)
423                 bcopy(s, copy, len);
424
425         return (copy);
426 #else
427     int flags __unused)
428 {
429         return (strdup(s));
430 #endif
431 }
432
433 static inline void
434 stats_tpl_update_hash(struct statsblob_tpl *tpl)
435 {
436
437         TPL_LIST_WLOCK_ASSERT();
438         tpl->mb->tplhash = hash32_str(tpl->mb->tplname, 0);
439         for (int voi_id = 0; voi_id < NVOIS(tpl->sb); voi_id++) {
440                 if (tpl->mb->voi_meta[voi_id].name != NULL)
441                         tpl->mb->tplhash = hash32_str(
442                             tpl->mb->voi_meta[voi_id].name, tpl->mb->tplhash);
443         }
444         tpl->mb->tplhash = hash32_buf(tpl->sb, tpl->sb->cursz,
445             tpl->mb->tplhash);
446 }
447
448 static inline uint64_t
449 stats_pow_u64(uint64_t base, uint64_t exp)
450 {
451         uint64_t result = 1;
452
453         while (exp) {
454                 if (exp & 1)
455                         result *= base;
456                 exp >>= 1;
457                 base *= base;
458         }
459
460         return (result);
461 }
462
463 static inline int
464 stats_vss_hist_bkt_hlpr(struct vss_hist_hlpr_info *info, uint32_t curbkt,
465     struct voistatdata_numeric *bkt_lb, struct voistatdata_numeric *bkt_ub)
466 {
467         uint64_t step = 0;
468         int error = 0;
469
470         switch (info->scheme) {
471         case BKT_LIN:
472                 step = info->lin.stepinc;
473                 break;
474         case BKT_EXP:
475                 step = stats_pow_u64(info->exp.stepbase,
476                     info->exp.stepexp + curbkt);
477                 break;
478         case BKT_LINEXP:
479                 {
480                 uint64_t curstepexp = 1;
481
482                 switch (info->voi_dtype) {
483                 case VSD_DTYPE_INT_S32:
484                         while ((int32_t)stats_pow_u64(info->linexp.stepbase,
485                             curstepexp) <= bkt_lb->int32.s32)
486                                 curstepexp++;
487                         break;
488                 case VSD_DTYPE_INT_U32:
489                         while ((uint32_t)stats_pow_u64(info->linexp.stepbase,
490                             curstepexp) <= bkt_lb->int32.u32)
491                                 curstepexp++;
492                         break;
493                 case VSD_DTYPE_INT_S64:
494                         while ((int64_t)stats_pow_u64(info->linexp.stepbase,
495                             curstepexp) <= bkt_lb->int64.s64)
496                                 curstepexp++;
497                         break;
498                 case VSD_DTYPE_INT_U64:
499                         while ((uint64_t)stats_pow_u64(info->linexp.stepbase,
500                             curstepexp) <= bkt_lb->int64.u64)
501                                 curstepexp++;
502                         break;
503                 case VSD_DTYPE_INT_SLONG:
504                         while ((long)stats_pow_u64(info->linexp.stepbase,
505                             curstepexp) <= bkt_lb->intlong.slong)
506                                 curstepexp++;
507                         break;
508                 case VSD_DTYPE_INT_ULONG:
509                         while ((unsigned long)stats_pow_u64(info->linexp.stepbase,
510                             curstepexp) <= bkt_lb->intlong.ulong)
511                                 curstepexp++;
512                         break;
513                 case VSD_DTYPE_Q_S32:
514                         while ((s32q_t)stats_pow_u64(info->linexp.stepbase,
515                             curstepexp) <= Q_GIVAL(bkt_lb->q32.sq32))
516                         break;
517                 case VSD_DTYPE_Q_U32:
518                         while ((u32q_t)stats_pow_u64(info->linexp.stepbase,
519                             curstepexp) <= Q_GIVAL(bkt_lb->q32.uq32))
520                         break;
521                 case VSD_DTYPE_Q_S64:
522                         while ((s64q_t)stats_pow_u64(info->linexp.stepbase,
523                             curstepexp) <= Q_GIVAL(bkt_lb->q64.sq64))
524                                 curstepexp++;
525                         break;
526                 case VSD_DTYPE_Q_U64:
527                         while ((u64q_t)stats_pow_u64(info->linexp.stepbase,
528                             curstepexp) <= Q_GIVAL(bkt_lb->q64.uq64))
529                                 curstepexp++;
530                         break;
531                 default:
532                         break;
533                 }
534
535                 step = stats_pow_u64(info->linexp.stepbase, curstepexp) /
536                     info->linexp.linstepdiv;
537                 if (step == 0)
538                         step = 1;
539                 break;
540                 }
541         default:
542                 break;
543         }
544
545         if (info->scheme == BKT_USR) {
546                 *bkt_lb = info->usr.bkts[curbkt].lb;
547                 *bkt_ub = info->usr.bkts[curbkt].ub;
548         } else if (step != 0) {
549                 switch (info->voi_dtype) {
550                 case VSD_DTYPE_INT_S32:
551                         bkt_ub->int32.s32 += (int32_t)step;
552                         break;
553                 case VSD_DTYPE_INT_U32:
554                         bkt_ub->int32.u32 += (uint32_t)step;
555                         break;
556                 case VSD_DTYPE_INT_S64:
557                         bkt_ub->int64.s64 += (int64_t)step;
558                         break;
559                 case VSD_DTYPE_INT_U64:
560                         bkt_ub->int64.u64 += (uint64_t)step;
561                         break;
562                 case VSD_DTYPE_INT_SLONG:
563                         bkt_ub->intlong.slong += (long)step;
564                         break;
565                 case VSD_DTYPE_INT_ULONG:
566                         bkt_ub->intlong.ulong += (unsigned long)step;
567                         break;
568                 case VSD_DTYPE_Q_S32:
569                         error = Q_QADDI(&bkt_ub->q32.sq32, step);
570                         break;
571                 case VSD_DTYPE_Q_U32:
572                         error = Q_QADDI(&bkt_ub->q32.uq32, step);
573                         break;
574                 case VSD_DTYPE_Q_S64:
575                         error = Q_QADDI(&bkt_ub->q64.sq64, step);
576                         break;
577                 case VSD_DTYPE_Q_U64:
578                         error = Q_QADDI(&bkt_ub->q64.uq64, step);
579                         break;
580                 default:
581                         break;
582                 }
583         } else { /* info->scheme != BKT_USR && step == 0 */
584                 return (EINVAL);
585         }
586
587         return (error);
588 }
589
590 static uint32_t
591 stats_vss_hist_nbkts_hlpr(struct vss_hist_hlpr_info *info)
592 {
593         struct voistatdata_numeric bkt_lb, bkt_ub;
594         uint32_t nbkts;
595         int done;
596
597         if (info->scheme == BKT_USR) {
598                 /* XXXLAS: Setting info->{lb,ub} from macro is tricky. */
599                 info->lb = info->usr.bkts[0].lb;
600                 info->ub = info->usr.bkts[info->usr.nbkts - 1].lb;
601         }
602
603         nbkts = 0;
604         done = 0;
605         bkt_ub = info->lb;
606
607         do {
608                 bkt_lb = bkt_ub;
609                 if (stats_vss_hist_bkt_hlpr(info, nbkts++, &bkt_lb, &bkt_ub))
610                         return (0);
611
612                 if (info->scheme == BKT_USR)
613                         done = (nbkts == info->usr.nbkts);
614                 else {
615                         switch (info->voi_dtype) {
616                         case VSD_DTYPE_INT_S32:
617                                 done = (bkt_ub.int32.s32 > info->ub.int32.s32);
618                                 break;
619                         case VSD_DTYPE_INT_U32:
620                                 done = (bkt_ub.int32.u32 > info->ub.int32.u32);
621                                 break;
622                         case VSD_DTYPE_INT_S64:
623                                 done = (bkt_ub.int64.s64 > info->ub.int64.s64);
624                                 break;
625                         case VSD_DTYPE_INT_U64:
626                                 done = (bkt_ub.int64.u64 > info->ub.int64.u64);
627                                 break;
628                         case VSD_DTYPE_INT_SLONG:
629                                 done = (bkt_ub.intlong.slong >
630                                     info->ub.intlong.slong);
631                                 break;
632                         case VSD_DTYPE_INT_ULONG:
633                                 done = (bkt_ub.intlong.ulong >
634                                     info->ub.intlong.ulong);
635                                 break;
636                         case VSD_DTYPE_Q_S32:
637                                 done = Q_QGTQ(bkt_ub.q32.sq32,
638                                     info->ub.q32.sq32);
639                                 break;
640                         case VSD_DTYPE_Q_U32:
641                                 done = Q_QGTQ(bkt_ub.q32.uq32,
642                                     info->ub.q32.uq32);
643                                 break;
644                         case VSD_DTYPE_Q_S64:
645                                 done = Q_QGTQ(bkt_ub.q64.sq64,
646                                     info->ub.q64.sq64);
647                                 break;
648                         case VSD_DTYPE_Q_U64:
649                                 done = Q_QGTQ(bkt_ub.q64.uq64,
650                                     info->ub.q64.uq64);
651                                 break;
652                         default:
653                                 return (0);
654                         }
655                 }
656         } while (!done);
657
658         if (info->flags & VSD_HIST_LBOUND_INF)
659                 nbkts++;
660         if (info->flags & VSD_HIST_UBOUND_INF)
661                 nbkts++;
662
663         return (nbkts);
664 }
665
666 int
667 stats_vss_hist_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
668     struct vss_hist_hlpr_info *info)
669 {
670         struct voistatdata_hist *hist;
671         struct voistatdata_numeric bkt_lb, bkt_ub, *lbinfbktlb, *lbinfbktub,
672             *ubinfbktlb, *ubinfbktub;
673         uint32_t bkt, nbkts, nloop;
674
675         if (vss == NULL || info == NULL || (info->flags &
676         (VSD_HIST_LBOUND_INF|VSD_HIST_UBOUND_INF) && (info->hist_dtype ==
677         VSD_DTYPE_DVHIST32 || info->hist_dtype == VSD_DTYPE_DVHIST64)))
678                 return (EINVAL);
679
680         info->voi_dtype = voi_dtype;
681
682         if ((nbkts = stats_vss_hist_nbkts_hlpr(info)) == 0)
683                 return (EINVAL);
684
685         switch (info->hist_dtype) {
686         case VSD_DTYPE_CRHIST32:
687                 vss->vsdsz = HIST_NBKTS2VSDSZ(crhist32, nbkts);
688                 break;
689         case VSD_DTYPE_DRHIST32:
690                 vss->vsdsz = HIST_NBKTS2VSDSZ(drhist32, nbkts);
691                 break;
692         case VSD_DTYPE_DVHIST32:
693                 vss->vsdsz = HIST_NBKTS2VSDSZ(dvhist32, nbkts);
694                 break;
695         case VSD_DTYPE_CRHIST64:
696                 vss->vsdsz = HIST_NBKTS2VSDSZ(crhist64, nbkts);
697                 break;
698         case VSD_DTYPE_DRHIST64:
699                 vss->vsdsz = HIST_NBKTS2VSDSZ(drhist64, nbkts);
700                 break;
701         case VSD_DTYPE_DVHIST64:
702                 vss->vsdsz = HIST_NBKTS2VSDSZ(dvhist64, nbkts);
703                 break;
704         default:
705                 return (EINVAL);
706         }
707
708         vss->iv = stats_realloc(NULL, 0, vss->vsdsz, M_ZERO);
709         if (vss->iv == NULL)
710                 return (ENOMEM);
711
712         hist = (struct voistatdata_hist *)vss->iv;
713         bkt_ub = info->lb;
714
715         for (bkt = (info->flags & VSD_HIST_LBOUND_INF), nloop = 0;
716             bkt < nbkts;
717             bkt++, nloop++) {
718                 bkt_lb = bkt_ub;
719                 if (stats_vss_hist_bkt_hlpr(info, nloop, &bkt_lb, &bkt_ub))
720                         return (EINVAL);
721
722                 switch (info->hist_dtype) {
723                 case VSD_DTYPE_CRHIST32:
724                         VSD(crhist32, hist)->bkts[bkt].lb = bkt_lb;
725                         break;
726                 case VSD_DTYPE_DRHIST32:
727                         VSD(drhist32, hist)->bkts[bkt].lb = bkt_lb;
728                         VSD(drhist32, hist)->bkts[bkt].ub = bkt_ub;
729                         break;
730                 case VSD_DTYPE_DVHIST32:
731                         VSD(dvhist32, hist)->bkts[bkt].val = bkt_lb;
732                         break;
733                 case VSD_DTYPE_CRHIST64:
734                         VSD(crhist64, hist)->bkts[bkt].lb = bkt_lb;
735                         break;
736                 case VSD_DTYPE_DRHIST64:
737                         VSD(drhist64, hist)->bkts[bkt].lb = bkt_lb;
738                         VSD(drhist64, hist)->bkts[bkt].ub = bkt_ub;
739                         break;
740                 case VSD_DTYPE_DVHIST64:
741                         VSD(dvhist64, hist)->bkts[bkt].val = bkt_lb;
742                         break;
743                 default:
744                         return (EINVAL);
745                 }
746         }
747
748         lbinfbktlb = lbinfbktub = ubinfbktlb = ubinfbktub = NULL;
749
750         switch (info->hist_dtype) {
751         case VSD_DTYPE_CRHIST32:
752                 lbinfbktlb = &VSD(crhist32, hist)->bkts[0].lb;
753                 ubinfbktlb = &VSD(crhist32, hist)->bkts[nbkts - 1].lb;
754                 break;
755         case VSD_DTYPE_DRHIST32:
756                 lbinfbktlb = &VSD(drhist32, hist)->bkts[0].lb;
757                 lbinfbktub = &VSD(drhist32, hist)->bkts[0].ub;
758                 ubinfbktlb = &VSD(drhist32, hist)->bkts[nbkts - 1].lb;
759                 ubinfbktub = &VSD(drhist32, hist)->bkts[nbkts - 1].ub;
760                 break;
761         case VSD_DTYPE_CRHIST64:
762                 lbinfbktlb = &VSD(crhist64, hist)->bkts[0].lb;
763                 ubinfbktlb = &VSD(crhist64, hist)->bkts[nbkts - 1].lb;
764                 break;
765         case VSD_DTYPE_DRHIST64:
766                 lbinfbktlb = &VSD(drhist64, hist)->bkts[0].lb;
767                 lbinfbktub = &VSD(drhist64, hist)->bkts[0].ub;
768                 ubinfbktlb = &VSD(drhist64, hist)->bkts[nbkts - 1].lb;
769                 ubinfbktub = &VSD(drhist64, hist)->bkts[nbkts - 1].ub;
770                 break;
771         case VSD_DTYPE_DVHIST32:
772         case VSD_DTYPE_DVHIST64:
773                 break;
774         default:
775                 return (EINVAL);
776         }
777
778         if ((info->flags & VSD_HIST_LBOUND_INF) && lbinfbktlb) {
779                 *lbinfbktlb = numeric_limits[LIM_MIN][info->voi_dtype];
780                 /*
781                  * Assignment from numeric_limit array for Q types assigns max
782                  * possible integral/fractional value for underlying data type,
783                  * but we must set control bits for this specific histogram per
784                  * the user's choice of fractional bits, which we extract from
785                  * info->lb.
786                  */
787                 if (info->voi_dtype == VSD_DTYPE_Q_S32 ||
788                     info->voi_dtype == VSD_DTYPE_Q_U32) {
789                         /* Signedness doesn't matter for setting control bits. */
790                         Q_SCVAL(lbinfbktlb->q32.sq32,
791                             Q_GCVAL(info->lb.q32.sq32));
792                 } else if (info->voi_dtype == VSD_DTYPE_Q_S64 ||
793                     info->voi_dtype == VSD_DTYPE_Q_U64) {
794                         /* Signedness doesn't matter for setting control bits. */
795                         Q_SCVAL(lbinfbktlb->q64.sq64,
796                             Q_GCVAL(info->lb.q64.sq64));
797                 }
798                 if (lbinfbktub)
799                         *lbinfbktub = info->lb;
800         }
801         if ((info->flags & VSD_HIST_UBOUND_INF) && ubinfbktlb) {
802                 *ubinfbktlb = bkt_lb;
803                 if (ubinfbktub) {
804                         *ubinfbktub = numeric_limits[LIM_MAX][info->voi_dtype];
805                         if (info->voi_dtype == VSD_DTYPE_Q_S32 ||
806                             info->voi_dtype == VSD_DTYPE_Q_U32) {
807                                 Q_SCVAL(ubinfbktub->q32.sq32,
808                                     Q_GCVAL(info->lb.q32.sq32));
809                         } else if (info->voi_dtype == VSD_DTYPE_Q_S64 ||
810                             info->voi_dtype == VSD_DTYPE_Q_U64) {
811                                 Q_SCVAL(ubinfbktub->q64.sq64,
812                                     Q_GCVAL(info->lb.q64.sq64));
813                         }
814                 }
815         }
816
817         return (0);
818 }
819
820 int
821 stats_vss_tdgst_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
822     struct vss_tdgst_hlpr_info *info)
823 {
824         struct voistatdata_tdgst *tdgst;
825         struct ctdth32 *ctd32tree;
826         struct ctdth64 *ctd64tree;
827         struct voistatdata_tdgstctd32 *ctd32;
828         struct voistatdata_tdgstctd64 *ctd64;
829
830         info->voi_dtype = voi_dtype;
831
832         switch (info->tdgst_dtype) {
833         case VSD_DTYPE_TDGSTCLUST32:
834                 vss->vsdsz = TDGST_NCTRS2VSDSZ(tdgstclust32, info->nctds);
835                 break;
836         case VSD_DTYPE_TDGSTCLUST64:
837                 vss->vsdsz = TDGST_NCTRS2VSDSZ(tdgstclust64, info->nctds);
838                 break;
839         default:
840                 return (EINVAL);
841         }
842
843         vss->iv = stats_realloc(NULL, 0, vss->vsdsz, M_ZERO);
844         if (vss->iv == NULL)
845                 return (ENOMEM);
846
847         tdgst = (struct voistatdata_tdgst *)vss->iv;
848
849         switch (info->tdgst_dtype) {
850         case VSD_DTYPE_TDGSTCLUST32:
851                 ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
852                 ARB_INIT(ctd32, ctdlnk, ctd32tree, info->nctds) {
853                         Q_INI(&ctd32->mu, 0, 0, info->prec);
854                 }
855                 break;
856         case VSD_DTYPE_TDGSTCLUST64:
857                 ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
858                 ARB_INIT(ctd64, ctdlnk, ctd64tree, info->nctds) {
859                         Q_INI(&ctd64->mu, 0, 0, info->prec);
860                 }
861                 break;
862         default:
863                 return (EINVAL);
864         }
865
866         return (0);
867 }
868
869 int
870 stats_vss_numeric_hlpr(enum vsd_dtype voi_dtype, struct voistatspec *vss,
871     struct vss_numeric_hlpr_info *info)
872 {
873         struct voistatdata_numeric iv;
874
875         switch (vss->stype) {
876         case VS_STYPE_SUM:
877                 iv = stats_ctor_vsd_numeric(0);
878                 break;
879         case VS_STYPE_MIN:
880                 iv = numeric_limits[LIM_MAX][voi_dtype];
881                 break;
882         case VS_STYPE_MAX:
883                 iv = numeric_limits[LIM_MIN][voi_dtype];
884                 break;
885         default:
886                 return (EINVAL);
887         }
888
889         vss->iv = stats_realloc(NULL, 0, vsd_dtype2size[voi_dtype], 0);
890         if (vss->iv == NULL)
891                 return (ENOMEM);
892
893         vss->vs_dtype = voi_dtype;
894         vss->vsdsz = vsd_dtype2size[voi_dtype];
895         switch (voi_dtype) {
896         case VSD_DTYPE_INT_S32:
897                 *((int32_t *)vss->iv) = iv.int32.s32;
898                 break;
899         case VSD_DTYPE_INT_U32:
900                 *((uint32_t *)vss->iv) = iv.int32.u32;
901                 break;
902         case VSD_DTYPE_INT_S64:
903                 *((int64_t *)vss->iv) = iv.int64.s64;
904                 break;
905         case VSD_DTYPE_INT_U64:
906                 *((uint64_t *)vss->iv) = iv.int64.u64;
907                 break;
908         case VSD_DTYPE_INT_SLONG:
909                 *((long *)vss->iv) = iv.intlong.slong;
910                 break;
911         case VSD_DTYPE_INT_ULONG:
912                 *((unsigned long *)vss->iv) = iv.intlong.ulong;
913                 break;
914         case VSD_DTYPE_Q_S32:
915                 *((s32q_t *)vss->iv) = Q_SCVAL(iv.q32.sq32,
916                     Q_CTRLINI(info->prec));
917                 break;
918         case VSD_DTYPE_Q_U32:
919                 *((u32q_t *)vss->iv) = Q_SCVAL(iv.q32.uq32,
920                     Q_CTRLINI(info->prec));
921                 break;
922         case VSD_DTYPE_Q_S64:
923                 *((s64q_t *)vss->iv) = Q_SCVAL(iv.q64.sq64,
924                     Q_CTRLINI(info->prec));
925                 break;
926         case VSD_DTYPE_Q_U64:
927                 *((u64q_t *)vss->iv) = Q_SCVAL(iv.q64.uq64,
928                     Q_CTRLINI(info->prec));
929                 break;
930         default:
931                 break;
932         }
933
934         return (0);
935 }
936
937 int
938 stats_vss_hlpr_init(enum vsd_dtype voi_dtype, uint32_t nvss,
939     struct voistatspec *vss)
940 {
941         int i, ret;
942
943         for (i = nvss - 1; i >= 0; i--) {
944                 if (vss[i].hlpr && (ret = vss[i].hlpr(voi_dtype, &vss[i],
945                     vss[i].hlprinfo)) != 0)
946                         return (ret);
947         }
948
949         return (0);
950 }
951
952 void
953 stats_vss_hlpr_cleanup(uint32_t nvss, struct voistatspec *vss)
954 {
955         int i;
956
957         for (i = nvss - 1; i >= 0; i--) {
958                 if (vss[i].hlpr) {
959                         stats_free((void *)vss[i].iv);
960                         vss[i].iv = NULL;
961                 }
962         }
963 }
964
965 int
966 stats_tpl_fetch(int tpl_id, struct statsblob_tpl **tpl)
967 {
968         int error;
969
970         error = 0;
971
972         TPL_LIST_WLOCK();
973         if (tpl_id < 0 || tpl_id >= (int)ntpl) {
974                 error = ENOENT;
975         } else {
976                 *tpl = tpllist[tpl_id];
977                 /* XXXLAS: Acquire refcount on tpl. */
978         }
979         TPL_LIST_WUNLOCK();
980
981         return (error);
982 }
983
984 int
985 stats_tpl_fetch_allocid(const char *name, uint32_t hash)
986 {
987         int i, tpl_id;
988
989         tpl_id = -ESRCH;
990
991         TPL_LIST_RLOCK();
992         for (i = ntpl - 1; i >= 0; i--) {
993                 if (name != NULL) {
994                         if (strlen(name) == strlen(tpllist[i]->mb->tplname) &&
995                             strncmp(name, tpllist[i]->mb->tplname,
996                             TPL_MAX_NAME_LEN) == 0 && (!hash || hash ==
997                             tpllist[i]->mb->tplhash)) {
998                                 tpl_id = i;
999                                 break;
1000                         }
1001                 } else if (hash == tpllist[i]->mb->tplhash) {
1002                         tpl_id = i;
1003                         break;
1004                 }
1005         }
1006         TPL_LIST_RUNLOCK();
1007
1008         return (tpl_id);
1009 }
1010
1011 int
1012 stats_tpl_id2name(uint32_t tpl_id, char *buf, size_t len)
1013 {
1014         int error;
1015
1016         error = 0;
1017
1018         TPL_LIST_RLOCK();
1019         if (tpl_id < ntpl) {
1020                 if (buf != NULL && len > strlen(tpllist[tpl_id]->mb->tplname))
1021                         strlcpy(buf, tpllist[tpl_id]->mb->tplname, len);
1022                 else
1023                         error = EOVERFLOW;
1024         } else
1025                 error = ENOENT;
1026         TPL_LIST_RUNLOCK();
1027
1028         return (error);
1029 }
1030
1031 int
1032 stats_tpl_sample_rollthedice(struct stats_tpl_sample_rate *rates, int nrates,
1033     void *seed_bytes, size_t seed_len)
1034 {
1035         uint32_t cum_pct, rnd_pct;
1036         int i;
1037
1038         cum_pct = 0;
1039
1040         /*
1041          * Choose a pseudorandom or seeded number in range [0,100] and use
1042          * it to make a sampling decision and template selection where required.
1043          * If no seed is supplied, a PRNG is used to generate a pseudorandom
1044          * number so that every selection is independent. If a seed is supplied,
1045          * the caller desires random selection across different seeds, but
1046          * deterministic selection given the same seed. This is achieved by
1047          * hashing the seed and using the hash as the random number source.
1048          *
1049          * XXXLAS: Characterise hash function output distribution.
1050          */
1051         if (seed_bytes == NULL)
1052                 rnd_pct = random() / (INT32_MAX / 100);
1053         else
1054                 rnd_pct = hash32_buf(seed_bytes, seed_len, 0) /
1055                     (UINT32_MAX / 100U);
1056
1057         /*
1058          * We map the randomly selected percentage on to the interval [0,100]
1059          * consisting of the cumulatively summed template sampling percentages.
1060          * The difference between the cumulative sum of all template sampling
1061          * percentages and 100 is treated as a NULL assignment i.e. no stats
1062          * template will be assigned, and -1 returned instead.
1063          */
1064         for (i = 0; i < nrates; i++) {
1065                 cum_pct += rates[i].tpl_sample_pct;
1066
1067                 KASSERT(cum_pct <= 100, ("%s cum_pct %u > 100", __func__,
1068                     cum_pct));
1069                 if (rnd_pct > cum_pct || rates[i].tpl_sample_pct == 0)
1070                         continue;
1071
1072                 return (rates[i].tpl_slot_id);
1073         }
1074
1075         return (-1);
1076 }
1077
1078 int
1079 stats_v1_blob_clone(struct statsblobv1 **dst, size_t dstmaxsz,
1080     struct statsblobv1 *src, uint32_t flags)
1081 {
1082         int error;
1083
1084         error = 0;
1085
1086         if (src == NULL || dst == NULL ||
1087             src->cursz < sizeof(struct statsblob) ||
1088             ((flags & SB_CLONE_ALLOCDST) &&
1089             (flags & (SB_CLONE_USRDSTNOFAULT | SB_CLONE_USRDST)))) {
1090                 error = EINVAL;
1091         } else if (flags & SB_CLONE_ALLOCDST) {
1092                 *dst = stats_realloc(NULL, 0, src->cursz, 0);
1093                 if (*dst)
1094                         (*dst)->maxsz = dstmaxsz = src->cursz;
1095                 else
1096                         error = ENOMEM;
1097         } else if (*dst == NULL || dstmaxsz < sizeof(struct statsblob)) {
1098                 error = EINVAL;
1099         }
1100
1101         if (!error) {
1102                 size_t postcurszlen;
1103
1104                 /*
1105                  * Clone src into dst except for the maxsz field. If dst is too
1106                  * small to hold all of src, only copy src's header and return
1107                  * EOVERFLOW.
1108                  */
1109 #ifdef _KERNEL
1110                 if (flags & SB_CLONE_USRDSTNOFAULT)
1111                         copyout_nofault(src, *dst,
1112                             offsetof(struct statsblob, maxsz));
1113                 else if (flags & SB_CLONE_USRDST)
1114                         copyout(src, *dst, offsetof(struct statsblob, maxsz));
1115                 else
1116 #endif
1117                         memcpy(*dst, src, offsetof(struct statsblob, maxsz));
1118
1119                 if (dstmaxsz >= src->cursz) {
1120                         postcurszlen = src->cursz -
1121                             offsetof(struct statsblob, cursz);
1122                 } else {
1123                         error = EOVERFLOW;
1124                         postcurszlen = sizeof(struct statsblob) -
1125                             offsetof(struct statsblob, cursz);
1126                 }
1127 #ifdef _KERNEL
1128                 if (flags & SB_CLONE_USRDSTNOFAULT)
1129                         copyout_nofault(&(src->cursz), &((*dst)->cursz),
1130                             postcurszlen);
1131                 else if (flags & SB_CLONE_USRDST)
1132                         copyout(&(src->cursz), &((*dst)->cursz), postcurszlen);
1133                 else
1134 #endif
1135                         memcpy(&((*dst)->cursz), &(src->cursz), postcurszlen);
1136         }
1137
1138         return (error);
1139 }
1140
1141 int
1142 stats_v1_tpl_alloc(const char *name, uint32_t flags __unused)
1143 {
1144         struct statsblobv1_tpl *tpl, **newtpllist;
1145         struct statsblobv1 *tpl_sb;
1146         struct metablob *tpl_mb;
1147         int tpl_id;
1148
1149         if (name != NULL && strlen(name) > TPL_MAX_NAME_LEN)
1150                 return (-EINVAL);
1151
1152         if (name != NULL && stats_tpl_fetch_allocid(name, 0) >= 0)
1153                 return (-EEXIST);
1154
1155         tpl = stats_realloc(NULL, 0, sizeof(struct statsblobv1_tpl), M_ZERO);
1156         tpl_mb = stats_realloc(NULL, 0, sizeof(struct metablob), M_ZERO);
1157         tpl_sb = stats_realloc(NULL, 0, sizeof(struct statsblobv1), M_ZERO);
1158
1159         if (tpl_mb != NULL && name != NULL)
1160                 tpl_mb->tplname = stats_strdup(name, 0);
1161
1162         if (tpl == NULL || tpl_sb == NULL || tpl_mb == NULL ||
1163             tpl_mb->tplname == NULL) {
1164                 stats_free(tpl);
1165                 stats_free(tpl_sb);
1166                 if (tpl_mb != NULL) {
1167                         stats_free(tpl_mb->tplname);
1168                         stats_free(tpl_mb);
1169                 }
1170                 return (-ENOMEM);
1171         }
1172
1173         tpl->mb = tpl_mb;
1174         tpl->sb = tpl_sb;
1175
1176         tpl_sb->abi = STATS_ABI_V1;
1177         tpl_sb->endian =
1178 #if BYTE_ORDER == LITTLE_ENDIAN
1179             SB_LE;
1180 #elif BYTE_ORDER == BIG_ENDIAN
1181             SB_BE;
1182 #else
1183             SB_UE;
1184 #endif
1185         tpl_sb->cursz = tpl_sb->maxsz = sizeof(struct statsblobv1);
1186         tpl_sb->stats_off = tpl_sb->statsdata_off = sizeof(struct statsblobv1);
1187
1188         TPL_LIST_WLOCK();
1189         newtpllist = stats_realloc(tpllist, ntpl * sizeof(void *),
1190             (ntpl + 1) * sizeof(void *), 0);
1191         if (newtpllist != NULL) {
1192                 tpl_id = ntpl++;
1193                 tpllist = (struct statsblob_tpl **)newtpllist;
1194                 tpllist[tpl_id] = (struct statsblob_tpl *)tpl;
1195                 stats_tpl_update_hash(tpllist[tpl_id]);
1196         } else {
1197                 stats_free(tpl);
1198                 stats_free(tpl_sb);
1199                 if (tpl_mb != NULL) {
1200                         stats_free(tpl_mb->tplname);
1201                         stats_free(tpl_mb);
1202                 }
1203                 tpl_id = -ENOMEM;
1204         }
1205         TPL_LIST_WUNLOCK();
1206
1207         return (tpl_id);
1208 }
1209
1210 int
1211 stats_v1_tpl_add_voistats(uint32_t tpl_id, int32_t voi_id, const char *voi_name,
1212     enum vsd_dtype voi_dtype, uint32_t nvss, struct voistatspec *vss,
1213     uint32_t flags)
1214 {
1215         struct voi *voi;
1216         struct voistat *tmpstat;
1217         struct statsblobv1 *tpl_sb;
1218         struct metablob *tpl_mb;
1219         int error, i, newstatdataidx, newvoibytes, newvoistatbytes,
1220             newvoistatdatabytes, newvoistatmaxid;
1221         uint32_t nbytes;
1222
1223         if (voi_id < 0 || voi_dtype == 0 || voi_dtype >= VSD_NUM_DTYPES ||
1224             nvss == 0 || vss == NULL)
1225                 return (EINVAL);
1226
1227         error = nbytes = newvoibytes = newvoistatbytes =
1228             newvoistatdatabytes = 0;
1229         newvoistatmaxid = -1;
1230
1231         /* Calculate the number of bytes required for the new voistats. */
1232         for (i = nvss - 1; i >= 0; i--) {
1233                 if (vss[i].stype == 0 || vss[i].stype >= VS_NUM_STYPES ||
1234                     vss[i].vs_dtype == 0 || vss[i].vs_dtype >= VSD_NUM_DTYPES ||
1235                     vss[i].iv == NULL || vss[i].vsdsz == 0)
1236                         return (EINVAL);
1237                 if ((int)vss[i].stype > newvoistatmaxid)
1238                         newvoistatmaxid = vss[i].stype;
1239                 newvoistatdatabytes += vss[i].vsdsz;
1240         }
1241
1242         if (flags & SB_VOI_RELUPDATE) {
1243                 /* XXXLAS: VOI state bytes may need to vary based on stat types. */
1244                 newvoistatdatabytes += sizeof(struct voistatdata_voistate);
1245         }
1246         nbytes += newvoistatdatabytes;
1247
1248         TPL_LIST_WLOCK();
1249         if (tpl_id < ntpl) {
1250                 tpl_sb = (struct statsblobv1 *)tpllist[tpl_id]->sb;
1251                 tpl_mb = tpllist[tpl_id]->mb;
1252
1253                 if (voi_id >= NVOIS(tpl_sb) || tpl_sb->vois[voi_id].id == -1) {
1254                         /* Adding a new VOI and associated stats. */
1255                         if (voi_id >= NVOIS(tpl_sb)) {
1256                                 /* We need to grow the tpl_sb->vois array. */
1257                                 newvoibytes = (voi_id - (NVOIS(tpl_sb) - 1)) *
1258                                     sizeof(struct voi);
1259                                 nbytes += newvoibytes;
1260                         }
1261                         newvoistatbytes =
1262                             (newvoistatmaxid + 1) * sizeof(struct voistat);
1263                 } else {
1264                         /* Adding stats to an existing VOI. */
1265                         if (newvoistatmaxid >
1266                             tpl_sb->vois[voi_id].voistatmaxid) {
1267                                 newvoistatbytes = (newvoistatmaxid -
1268                                     tpl_sb->vois[voi_id].voistatmaxid) *
1269                                     sizeof(struct voistat);
1270                         }
1271                         /* XXXLAS: KPI does not yet support expanding VOIs. */
1272                         error = EOPNOTSUPP;
1273                 }
1274                 nbytes += newvoistatbytes;
1275
1276                 if (!error && newvoibytes > 0) {
1277                         struct voi_meta *voi_meta = tpl_mb->voi_meta;
1278
1279                         voi_meta = stats_realloc(voi_meta, voi_meta == NULL ?
1280                             0 : NVOIS(tpl_sb) * sizeof(struct voi_meta),
1281                             (1 + voi_id) * sizeof(struct voi_meta),
1282                             M_ZERO);
1283
1284                         if (voi_meta == NULL)
1285                                 error = ENOMEM;
1286                         else
1287                                 tpl_mb->voi_meta = voi_meta;
1288                 }
1289
1290                 if (!error) {
1291                         /* NB: Resizing can change where tpl_sb points. */
1292                         error = stats_v1_blob_expand(&tpl_sb, newvoibytes,
1293                             newvoistatbytes, newvoistatdatabytes);
1294                 }
1295
1296                 if (!error) {
1297                         tpl_mb->voi_meta[voi_id].name = stats_strdup(voi_name,
1298                             0);
1299                         if (tpl_mb->voi_meta[voi_id].name == NULL)
1300                                 error = ENOMEM;
1301                 }
1302
1303                 if (!error) {
1304                         /* Update the template list with the resized pointer. */
1305                         tpllist[tpl_id]->sb = (struct statsblob *)tpl_sb;
1306
1307                         /* Update the template. */
1308                         voi = &tpl_sb->vois[voi_id];
1309
1310                         if (voi->id < 0) {
1311                                 /* VOI is new and needs to be initialised. */
1312                                 voi->id = voi_id;
1313                                 voi->dtype = voi_dtype;
1314                                 voi->stats_off = tpl_sb->stats_off;
1315                                 if (flags & SB_VOI_RELUPDATE)
1316                                         voi->flags |= VOI_REQSTATE;
1317                         } else {
1318                                 /*
1319                                  * XXXLAS: When this else block is written, the
1320                                  * "KPI does not yet support expanding VOIs"
1321                                  * error earlier in this function can be
1322                                  * removed. What is required here is to shuffle
1323                                  * the voistat array such that the new stats for
1324                                  * the voi are contiguous, which will displace
1325                                  * stats for other vois that reside after the
1326                                  * voi being updated. The other vois then need
1327                                  * to have their stats_off adjusted post
1328                                  * shuffle.
1329                                  */
1330                         }
1331
1332                         voi->voistatmaxid = newvoistatmaxid;
1333                         newstatdataidx = 0;
1334
1335                         if (voi->flags & VOI_REQSTATE) {
1336                                 /* Initialise the voistate stat in slot 0. */
1337                                 tmpstat = BLOB_OFFSET(tpl_sb, voi->stats_off);
1338                                 tmpstat->stype = VS_STYPE_VOISTATE;
1339                                 tmpstat->flags = 0;
1340                                 tmpstat->dtype = VSD_DTYPE_VOISTATE;
1341                                 newstatdataidx = tmpstat->dsz =
1342                                     sizeof(struct voistatdata_numeric);
1343                                 tmpstat->data_off = tpl_sb->statsdata_off;
1344                         }
1345
1346                         for (i = 0; (uint32_t)i < nvss; i++) {
1347                                 tmpstat = BLOB_OFFSET(tpl_sb, voi->stats_off +
1348                                     (vss[i].stype * sizeof(struct voistat)));
1349                                 KASSERT(tmpstat->stype < 0, ("voistat %p "
1350                                     "already initialised", tmpstat));
1351                                 tmpstat->stype = vss[i].stype;
1352                                 tmpstat->flags = vss[i].flags;
1353                                 tmpstat->dtype = vss[i].vs_dtype;
1354                                 tmpstat->dsz = vss[i].vsdsz;
1355                                 tmpstat->data_off = tpl_sb->statsdata_off +
1356                                     newstatdataidx;
1357                                 memcpy(BLOB_OFFSET(tpl_sb, tmpstat->data_off),
1358                                     vss[i].iv, vss[i].vsdsz);
1359                                 newstatdataidx += vss[i].vsdsz;
1360                         }
1361
1362                         /* Update the template version hash. */
1363                         stats_tpl_update_hash(tpllist[tpl_id]);
1364                         /* XXXLAS: Confirm tpl name/hash pair remains unique. */
1365                 }
1366         } else
1367                 error = EINVAL;
1368         TPL_LIST_WUNLOCK();
1369
1370         return (error);
1371 }
1372
1373 struct statsblobv1 *
1374 stats_v1_blob_alloc(uint32_t tpl_id, uint32_t flags __unused)
1375 {
1376         struct statsblobv1 *sb;
1377         int error;
1378
1379         sb = NULL;
1380
1381         TPL_LIST_RLOCK();
1382         if (tpl_id < ntpl) {
1383                 sb = stats_realloc(NULL, 0, tpllist[tpl_id]->sb->maxsz, 0);
1384                 if (sb != NULL) {
1385                         sb->maxsz = tpllist[tpl_id]->sb->maxsz;
1386                         error = stats_v1_blob_init_locked(sb, tpl_id, 0);
1387                 } else
1388                         error = ENOMEM;
1389
1390                 if (error) {
1391                         stats_free(sb);
1392                         sb = NULL;
1393                 }
1394         }
1395         TPL_LIST_RUNLOCK();
1396
1397         return (sb);
1398 }
1399
1400 void
1401 stats_v1_blob_destroy(struct statsblobv1 *sb)
1402 {
1403
1404         stats_free(sb);
1405 }
1406
1407 int
1408 stats_v1_voistat_fetch_dptr(struct statsblobv1 *sb, int32_t voi_id,
1409     enum voi_stype stype, enum vsd_dtype *retdtype, struct voistatdata **retvsd,
1410     size_t *retvsdsz)
1411 {
1412         struct voi *v;
1413         struct voistat *vs;
1414
1415         if (retvsd == NULL || sb == NULL || sb->abi != STATS_ABI_V1 ||
1416             voi_id >= NVOIS(sb))
1417                 return (EINVAL);
1418
1419         v = &sb->vois[voi_id];
1420         if ((__typeof(v->voistatmaxid))stype > v->voistatmaxid)
1421                 return (EINVAL);
1422
1423         vs = BLOB_OFFSET(sb, v->stats_off + (stype * sizeof(struct voistat)));
1424         *retvsd = BLOB_OFFSET(sb, vs->data_off);
1425         if (retdtype != NULL)
1426                 *retdtype = vs->dtype;
1427         if (retvsdsz != NULL)
1428                 *retvsdsz = vs->dsz;
1429
1430         return (0);
1431 }
1432
1433 int
1434 stats_v1_blob_init(struct statsblobv1 *sb, uint32_t tpl_id, uint32_t flags)
1435 {
1436         int error;
1437
1438         error = 0;
1439
1440         TPL_LIST_RLOCK();
1441         if (sb == NULL || tpl_id >= ntpl) {
1442                 error = EINVAL;
1443         } else {
1444                 error = stats_v1_blob_init_locked(sb, tpl_id, flags);
1445         }
1446         TPL_LIST_RUNLOCK();
1447
1448         return (error);
1449 }
1450
1451 static inline int
1452 stats_v1_blob_init_locked(struct statsblobv1 *sb, uint32_t tpl_id,
1453     uint32_t flags __unused)
1454 {
1455         int error;
1456
1457         TPL_LIST_RLOCK_ASSERT();
1458         error = (sb->maxsz >= tpllist[tpl_id]->sb->cursz) ? 0 : EOVERFLOW;
1459         KASSERT(!error,
1460             ("sb %d instead of %d bytes", sb->maxsz, tpllist[tpl_id]->sb->cursz));
1461
1462         if (!error) {
1463                 memcpy(sb, tpllist[tpl_id]->sb, tpllist[tpl_id]->sb->cursz);
1464                 sb->created = sb->lastrst = stats_sbinuptime();
1465                 sb->tplhash = tpllist[tpl_id]->mb->tplhash;
1466         }
1467
1468         return (error);
1469 }
1470
1471 static int
1472 stats_v1_blob_expand(struct statsblobv1 **sbpp, int newvoibytes,
1473     int newvoistatbytes, int newvoistatdatabytes)
1474 {
1475         struct statsblobv1 *sb;
1476         struct voi *tmpvoi;
1477         struct voistat *tmpvoistat, *voistat_array;
1478         int error, i, idxnewvois, idxnewvoistats, nbytes, nvoistats;
1479
1480         KASSERT(newvoibytes % sizeof(struct voi) == 0,
1481             ("Bad newvoibytes %d", newvoibytes));
1482         KASSERT(newvoistatbytes % sizeof(struct voistat) == 0,
1483             ("Bad newvoistatbytes %d", newvoistatbytes));
1484
1485         error = ((newvoibytes % sizeof(struct voi) == 0) &&
1486             (newvoistatbytes % sizeof(struct voistat) == 0)) ? 0 : EINVAL;
1487         sb = *sbpp;
1488         nbytes = newvoibytes + newvoistatbytes + newvoistatdatabytes;
1489
1490         /*
1491          * XXXLAS: Required until we gain support for flags which alter the
1492          * units of size/offset fields in key structs.
1493          */
1494         if (!error && ((((int)sb->cursz) + nbytes) > SB_V1_MAXSZ))
1495                 error = EFBIG;
1496
1497         if (!error && (sb->cursz + nbytes > sb->maxsz)) {
1498                 /* Need to expand our blob. */
1499                 sb = stats_realloc(sb, sb->maxsz, sb->cursz + nbytes, M_ZERO);
1500                 if (sb != NULL) {
1501                         sb->maxsz = sb->cursz + nbytes;
1502                         *sbpp = sb;
1503                 } else
1504                     error = ENOMEM;
1505         }
1506
1507         if (!error) {
1508                 /*
1509                  * Shuffle memory within the expanded blob working from the end
1510                  * backwards, leaving gaps for the new voistat and voistatdata
1511                  * structs at the beginning of their respective blob regions,
1512                  * and for the new voi structs at the end of their blob region.
1513                  */
1514                 memmove(BLOB_OFFSET(sb, sb->statsdata_off + nbytes),
1515                     BLOB_OFFSET(sb, sb->statsdata_off),
1516                     sb->cursz - sb->statsdata_off);
1517                 memmove(BLOB_OFFSET(sb, sb->stats_off + newvoibytes +
1518                     newvoistatbytes), BLOB_OFFSET(sb, sb->stats_off),
1519                     sb->statsdata_off - sb->stats_off);
1520
1521                 /* First index of new voi/voistat structs to be initialised. */
1522                 idxnewvois = NVOIS(sb);
1523                 idxnewvoistats = (newvoistatbytes / sizeof(struct voistat)) - 1;
1524
1525                 /* Update housekeeping variables and offsets. */
1526                 sb->cursz += nbytes;
1527                 sb->stats_off += newvoibytes;
1528                 sb->statsdata_off += newvoibytes + newvoistatbytes;
1529
1530                 /* XXXLAS: Zeroing not strictly needed but aids debugging. */
1531                 memset(&sb->vois[idxnewvois], '\0', newvoibytes);
1532                 memset(BLOB_OFFSET(sb, sb->stats_off), '\0',
1533                     newvoistatbytes);
1534                 memset(BLOB_OFFSET(sb, sb->statsdata_off), '\0',
1535                     newvoistatdatabytes);
1536
1537                 /* Initialise new voi array members and update offsets. */
1538                 for (i = 0; i < NVOIS(sb); i++) {
1539                         tmpvoi = &sb->vois[i];
1540                         if (i >= idxnewvois) {
1541                                 tmpvoi->id = tmpvoi->voistatmaxid = -1;
1542                         } else if (tmpvoi->id > -1) {
1543                                 tmpvoi->stats_off += newvoibytes +
1544                                     newvoistatbytes;
1545                         }
1546                 }
1547
1548                 /* Initialise new voistat array members and update offsets. */
1549                 nvoistats = (sb->statsdata_off - sb->stats_off) /
1550                     sizeof(struct voistat);
1551                 voistat_array = BLOB_OFFSET(sb, sb->stats_off);
1552                 for (i = 0; i < nvoistats; i++) {
1553                         tmpvoistat = &voistat_array[i];
1554                         if (i <= idxnewvoistats) {
1555                                 tmpvoistat->stype = -1;
1556                         } else if (tmpvoistat->stype > -1) {
1557                                 tmpvoistat->data_off += nbytes;
1558                         }
1559                 }
1560         }
1561
1562         return (error);
1563 }
1564
1565 static void
1566 stats_v1_blob_finalise(struct statsblobv1 *sb __unused)
1567 {
1568
1569         /* XXXLAS: Fill this in. */
1570 }
1571
1572 static void
1573 stats_v1_blob_iter(struct statsblobv1 *sb, stats_v1_blob_itercb_t icb,
1574     void *usrctx, uint32_t flags)
1575 {
1576         struct voi *v;
1577         struct voistat *vs;
1578         struct sb_iter_ctx ctx;
1579         int i, j, firstvoi;
1580
1581         ctx.usrctx = usrctx;
1582         ctx.flags = SB_IT_FIRST_CB;
1583         firstvoi = 1;
1584
1585         for (i = 0; i < NVOIS(sb); i++) {
1586                 v = &sb->vois[i];
1587                 ctx.vslot = i;
1588                 ctx.vsslot = -1;
1589                 ctx.flags |= SB_IT_FIRST_VOISTAT;
1590
1591                 if (firstvoi)
1592                         ctx.flags |= SB_IT_FIRST_VOI;
1593                 else if (i == (NVOIS(sb) - 1))
1594                         ctx.flags |= SB_IT_LAST_VOI | SB_IT_LAST_CB;
1595
1596                 if (v->id < 0 && (flags & SB_IT_NULLVOI)) {
1597                         if (icb(sb, v, NULL, &ctx))
1598                                 return;
1599                         firstvoi = 0;
1600                         ctx.flags &= ~SB_IT_FIRST_CB;
1601                 }
1602
1603                 /* If NULL voi, v->voistatmaxid == -1 */
1604                 for (j = 0; j <= v->voistatmaxid; j++) {
1605                         vs = &((struct voistat *)BLOB_OFFSET(sb,
1606                             v->stats_off))[j];
1607                         if (vs->stype < 0 &&
1608                             !(flags & SB_IT_NULLVOISTAT))
1609                                 continue;
1610
1611                         if (j == v->voistatmaxid) {
1612                                 ctx.flags |= SB_IT_LAST_VOISTAT;
1613                                 if (i == (NVOIS(sb) - 1))
1614                                         ctx.flags |=
1615                                             SB_IT_LAST_CB;
1616                         } else
1617                                 ctx.flags &= ~SB_IT_LAST_CB;
1618
1619                         ctx.vsslot = j;
1620                         if (icb(sb, v, vs, &ctx))
1621                                 return;
1622
1623                         ctx.flags &= ~(SB_IT_FIRST_CB | SB_IT_FIRST_VOISTAT |
1624                             SB_IT_LAST_VOISTAT);
1625                 }
1626                 ctx.flags &= ~(SB_IT_FIRST_VOI | SB_IT_LAST_VOI);
1627         }
1628 }
1629
1630 static inline void
1631 stats_voistatdata_tdgst_tostr(enum vsd_dtype voi_dtype __unused,
1632     const struct voistatdata_tdgst *tdgst, enum vsd_dtype tdgst_dtype,
1633     size_t tdgst_dsz __unused, enum sb_str_fmt fmt, struct sbuf *buf, int objdump)
1634 {
1635         const struct ctdth32 *ctd32tree;
1636         const struct ctdth64 *ctd64tree;
1637         const struct voistatdata_tdgstctd32 *ctd32;
1638         const struct voistatdata_tdgstctd64 *ctd64;
1639         const char *fmtstr;
1640         uint64_t smplcnt, compcnt;
1641         int is32bit, qmaxstrlen;
1642         uint16_t maxctds, curctds;
1643
1644         switch (tdgst_dtype) {
1645         case VSD_DTYPE_TDGSTCLUST32:
1646                 smplcnt = CONSTVSD(tdgstclust32, tdgst)->smplcnt;
1647                 compcnt = CONSTVSD(tdgstclust32, tdgst)->compcnt;
1648                 maxctds = ARB_MAXNODES(&CONSTVSD(tdgstclust32, tdgst)->ctdtree);
1649                 curctds = ARB_CURNODES(&CONSTVSD(tdgstclust32, tdgst)->ctdtree);
1650                 ctd32tree = &CONSTVSD(tdgstclust32, tdgst)->ctdtree;
1651                 ctd32 = (objdump ? ARB_CNODE(ctd32tree, 0) :
1652                     ARB_CMIN(ctdth32, ctd32tree));
1653                 qmaxstrlen = (ctd32 == NULL) ? 1 : Q_MAXSTRLEN(ctd32->mu, 10);
1654                 is32bit = 1;
1655                 ctd64tree = NULL;
1656                 ctd64 = NULL;
1657                 break;
1658         case VSD_DTYPE_TDGSTCLUST64:
1659                 smplcnt = CONSTVSD(tdgstclust64, tdgst)->smplcnt;
1660                 compcnt = CONSTVSD(tdgstclust64, tdgst)->compcnt;
1661                 maxctds = ARB_MAXNODES(&CONSTVSD(tdgstclust64, tdgst)->ctdtree);
1662                 curctds = ARB_CURNODES(&CONSTVSD(tdgstclust64, tdgst)->ctdtree);
1663                 ctd64tree = &CONSTVSD(tdgstclust64, tdgst)->ctdtree;
1664                 ctd64 = (objdump ? ARB_CNODE(ctd64tree, 0) :
1665                     ARB_CMIN(ctdth64, ctd64tree));
1666                 qmaxstrlen = (ctd64 == NULL) ? 1 : Q_MAXSTRLEN(ctd64->mu, 10);
1667                 is32bit = 0;
1668                 ctd32tree = NULL;
1669                 ctd32 = NULL;
1670                 break;
1671         default:
1672                 return;
1673         }
1674
1675         switch (fmt) {
1676         case SB_STRFMT_FREEFORM:
1677                 fmtstr = "smplcnt=%ju, compcnt=%ju, maxctds=%hu, nctds=%hu";
1678                 break;
1679         case SB_STRFMT_JSON:
1680         default:
1681                 fmtstr =
1682                     "\"smplcnt\":%ju,\"compcnt\":%ju,\"maxctds\":%hu,"
1683                     "\"nctds\":%hu,\"ctds\":[";
1684                 break;
1685         }
1686         sbuf_printf(buf, fmtstr, (uintmax_t)smplcnt, (uintmax_t)compcnt,
1687             maxctds, curctds);
1688
1689         while ((is32bit ? NULL != ctd32 : NULL != ctd64)) {
1690                 char qstr[qmaxstrlen];
1691
1692                 switch (fmt) {
1693                 case SB_STRFMT_FREEFORM:
1694                         fmtstr = "\n\t\t\t\t";
1695                         break;
1696                 case SB_STRFMT_JSON:
1697                 default:
1698                         fmtstr = "{";
1699                         break;
1700                 }
1701                 sbuf_cat(buf, fmtstr);
1702
1703                 if (objdump) {
1704                         switch (fmt) {
1705                         case SB_STRFMT_FREEFORM:
1706                                 fmtstr = "ctd[%hu].";
1707                                 break;
1708                         case SB_STRFMT_JSON:
1709                         default:
1710                                 fmtstr = "\"ctd\":%hu,";
1711                                 break;
1712                         }
1713                         sbuf_printf(buf, fmtstr, is32bit ?
1714                             ARB_SELFIDX(ctd32tree, ctd32) :
1715                             ARB_SELFIDX(ctd64tree, ctd64));
1716                 }
1717
1718                 switch (fmt) {
1719                 case SB_STRFMT_FREEFORM:
1720                         fmtstr = "{mu=";
1721                         break;
1722                 case SB_STRFMT_JSON:
1723                 default:
1724                         fmtstr = "\"mu\":";
1725                         break;
1726                 }
1727                 sbuf_cat(buf, fmtstr);
1728                 Q_TOSTR((is32bit ? ctd32->mu : ctd64->mu), -1, 10, qstr,
1729                     sizeof(qstr));
1730                 sbuf_cat(buf, qstr);
1731
1732                 switch (fmt) {
1733                 case SB_STRFMT_FREEFORM:
1734                         fmtstr = is32bit ? ",cnt=%u}" : ",cnt=%ju}";
1735                         break;
1736                 case SB_STRFMT_JSON:
1737                 default:
1738                         fmtstr = is32bit ? ",\"cnt\":%u}" : ",\"cnt\":%ju}";
1739                         break;
1740                 }
1741                 sbuf_printf(buf, fmtstr,
1742                     is32bit ? ctd32->cnt : (uintmax_t)ctd64->cnt);
1743
1744                 if (is32bit)
1745                         ctd32 = (objdump ? ARB_CNODE(ctd32tree,
1746                             ARB_SELFIDX(ctd32tree, ctd32) + 1) :
1747                             ARB_CNEXT(ctdth32, ctd32tree, ctd32));
1748                 else
1749                         ctd64 = (objdump ? ARB_CNODE(ctd64tree,
1750                             ARB_SELFIDX(ctd64tree, ctd64) + 1) :
1751                             ARB_CNEXT(ctdth64, ctd64tree, ctd64));
1752
1753                 if (fmt == SB_STRFMT_JSON &&
1754                     (is32bit ? NULL != ctd32 : NULL != ctd64))
1755                         sbuf_putc(buf, ',');
1756         }
1757         if (fmt == SB_STRFMT_JSON)
1758                 sbuf_cat(buf, "]");
1759 }
1760
1761 static inline void
1762 stats_voistatdata_hist_tostr(enum vsd_dtype voi_dtype,
1763     const struct voistatdata_hist *hist, enum vsd_dtype hist_dtype,
1764     size_t hist_dsz, enum sb_str_fmt fmt, struct sbuf *buf, int objdump)
1765 {
1766         const struct voistatdata_numeric *bkt_lb, *bkt_ub;
1767         const char *fmtstr;
1768         int is32bit;
1769         uint16_t i, nbkts;
1770
1771         switch (hist_dtype) {
1772         case VSD_DTYPE_CRHIST32:
1773                 nbkts = HIST_VSDSZ2NBKTS(crhist32, hist_dsz);
1774                 is32bit = 1;
1775                 break;
1776         case VSD_DTYPE_DRHIST32:
1777                 nbkts = HIST_VSDSZ2NBKTS(drhist32, hist_dsz);
1778                 is32bit = 1;
1779                 break;
1780         case VSD_DTYPE_DVHIST32:
1781                 nbkts = HIST_VSDSZ2NBKTS(dvhist32, hist_dsz);
1782                 is32bit = 1;
1783                 break;
1784         case VSD_DTYPE_CRHIST64:
1785                 nbkts = HIST_VSDSZ2NBKTS(crhist64, hist_dsz);
1786                 is32bit = 0;
1787                 break;
1788         case VSD_DTYPE_DRHIST64:
1789                 nbkts = HIST_VSDSZ2NBKTS(drhist64, hist_dsz);
1790                 is32bit = 0;
1791                 break;
1792         case VSD_DTYPE_DVHIST64:
1793                 nbkts = HIST_VSDSZ2NBKTS(dvhist64, hist_dsz);
1794                 is32bit = 0;
1795                 break;
1796         default:
1797                 return;
1798         }
1799
1800         switch (fmt) {
1801         case SB_STRFMT_FREEFORM:
1802                 fmtstr = "nbkts=%hu, ";
1803                 break;
1804         case SB_STRFMT_JSON:
1805         default:
1806                 fmtstr = "\"nbkts\":%hu,";
1807                 break;
1808         }
1809         sbuf_printf(buf, fmtstr, nbkts);
1810
1811         switch (fmt) {
1812                 case SB_STRFMT_FREEFORM:
1813                         fmtstr = (is32bit ? "oob=%u" : "oob=%ju");
1814                         break;
1815                 case SB_STRFMT_JSON:
1816                 default:
1817                         fmtstr = (is32bit ? "\"oob\":%u,\"bkts\":[" :
1818                             "\"oob\":%ju,\"bkts\":[");
1819                         break;
1820         }
1821         sbuf_printf(buf, fmtstr, is32bit ? VSD_CONSTHIST_FIELDVAL(hist,
1822             hist_dtype, oob) : (uintmax_t)VSD_CONSTHIST_FIELDVAL(hist,
1823             hist_dtype, oob));
1824
1825         for (i = 0; i < nbkts; i++) {
1826                 switch (hist_dtype) {
1827                 case VSD_DTYPE_CRHIST32:
1828                 case VSD_DTYPE_CRHIST64:
1829                         bkt_lb = VSD_CONSTCRHIST_FIELDPTR(hist, hist_dtype,
1830                             bkts[i].lb);
1831                         if (i < nbkts - 1)
1832                                 bkt_ub = VSD_CONSTCRHIST_FIELDPTR(hist,
1833                                     hist_dtype, bkts[i + 1].lb);
1834                         else
1835                                 bkt_ub = &numeric_limits[LIM_MAX][voi_dtype];
1836                         break;
1837                 case VSD_DTYPE_DRHIST32:
1838                 case VSD_DTYPE_DRHIST64:
1839                         bkt_lb = VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype,
1840                             bkts[i].lb);
1841                         bkt_ub = VSD_CONSTDRHIST_FIELDPTR(hist, hist_dtype,
1842                             bkts[i].ub);
1843                         break;
1844                 case VSD_DTYPE_DVHIST32:
1845                 case VSD_DTYPE_DVHIST64:
1846                         bkt_lb = bkt_ub = VSD_CONSTDVHIST_FIELDPTR(hist,
1847                             hist_dtype, bkts[i].val);
1848                         break;
1849                 default:
1850                         break;
1851                 }
1852
1853                 switch (fmt) {
1854                 case SB_STRFMT_FREEFORM:
1855                         fmtstr = "\n\t\t\t\t";
1856                         break;
1857                 case SB_STRFMT_JSON:
1858                 default:
1859                         fmtstr = "{";
1860                         break;
1861                 }
1862                 sbuf_cat(buf, fmtstr);
1863
1864                 if (objdump) {
1865                         switch (fmt) {
1866                         case SB_STRFMT_FREEFORM:
1867                                 fmtstr = "bkt[%hu].";
1868                                 break;
1869                         case SB_STRFMT_JSON:
1870                         default:
1871                                 fmtstr = "\"bkt\":%hu,";
1872                                 break;
1873                         }
1874                         sbuf_printf(buf, fmtstr, i);
1875                 }
1876
1877                 switch (fmt) {
1878                 case SB_STRFMT_FREEFORM:
1879                         fmtstr = "{lb=";
1880                         break;
1881                 case SB_STRFMT_JSON:
1882                 default:
1883                         fmtstr = "\"lb\":";
1884                         break;
1885                 }
1886                 sbuf_cat(buf, fmtstr);
1887                 stats_voistatdata_tostr((const struct voistatdata *)bkt_lb,
1888                     voi_dtype, voi_dtype, sizeof(struct voistatdata_numeric),
1889                     fmt, buf, objdump);
1890
1891                 switch (fmt) {
1892                 case SB_STRFMT_FREEFORM:
1893                         fmtstr = ",ub=";
1894                         break;
1895                 case SB_STRFMT_JSON:
1896                 default:
1897                         fmtstr = ",\"ub\":";
1898                         break;
1899                 }
1900                 sbuf_cat(buf, fmtstr);
1901                 stats_voistatdata_tostr((const struct voistatdata *)bkt_ub,
1902                     voi_dtype, voi_dtype, sizeof(struct voistatdata_numeric),
1903                     fmt, buf, objdump);
1904
1905                 switch (fmt) {
1906                 case SB_STRFMT_FREEFORM:
1907                         fmtstr = is32bit ? ",cnt=%u}" : ",cnt=%ju}";
1908                         break;
1909                 case SB_STRFMT_JSON:
1910                 default:
1911                         fmtstr = is32bit ? ",\"cnt\":%u}" : ",\"cnt\":%ju}";
1912                         break;
1913                 }
1914                 sbuf_printf(buf, fmtstr, is32bit ?
1915                     VSD_CONSTHIST_FIELDVAL(hist, hist_dtype, bkts[i].cnt) :
1916                     (uintmax_t)VSD_CONSTHIST_FIELDVAL(hist, hist_dtype,
1917                     bkts[i].cnt));
1918
1919                 if (fmt == SB_STRFMT_JSON && i < nbkts - 1)
1920                         sbuf_putc(buf, ',');
1921         }
1922         if (fmt == SB_STRFMT_JSON)
1923                 sbuf_cat(buf, "]");
1924 }
1925
1926 int
1927 stats_voistatdata_tostr(const struct voistatdata *vsd, enum vsd_dtype voi_dtype,
1928     enum vsd_dtype vsd_dtype, size_t vsd_sz, enum sb_str_fmt fmt,
1929     struct sbuf *buf, int objdump)
1930 {
1931         const char *fmtstr;
1932
1933         if (vsd == NULL || buf == NULL || voi_dtype >= VSD_NUM_DTYPES ||
1934             vsd_dtype >= VSD_NUM_DTYPES || fmt >= SB_STRFMT_NUM_FMTS)
1935                 return (EINVAL);
1936
1937         switch (vsd_dtype) {
1938         case VSD_DTYPE_VOISTATE:
1939                 switch (fmt) {
1940                 case SB_STRFMT_FREEFORM:
1941                         fmtstr = "prev=";
1942                         break;
1943                 case SB_STRFMT_JSON:
1944                 default:
1945                         fmtstr = "\"prev\":";
1946                         break;
1947                 }
1948                 sbuf_cat(buf, fmtstr);
1949                 /*
1950                  * Render prev by passing it as *vsd and voi_dtype as vsd_dtype.
1951                  */
1952                 stats_voistatdata_tostr(
1953                     (const struct voistatdata *)&CONSTVSD(voistate, vsd)->prev,
1954                     voi_dtype, voi_dtype, vsd_sz, fmt, buf, objdump);
1955                 break;
1956         case VSD_DTYPE_INT_S32:
1957                 sbuf_printf(buf, "%d", vsd->int32.s32);
1958                 break;
1959         case VSD_DTYPE_INT_U32:
1960                 sbuf_printf(buf, "%u", vsd->int32.u32);
1961                 break;
1962         case VSD_DTYPE_INT_S64:
1963                 sbuf_printf(buf, "%jd", (intmax_t)vsd->int64.s64);
1964                 break;
1965         case VSD_DTYPE_INT_U64:
1966                 sbuf_printf(buf, "%ju", (uintmax_t)vsd->int64.u64);
1967                 break;
1968         case VSD_DTYPE_INT_SLONG:
1969                 sbuf_printf(buf, "%ld", vsd->intlong.slong);
1970                 break;
1971         case VSD_DTYPE_INT_ULONG:
1972                 sbuf_printf(buf, "%lu", vsd->intlong.ulong);
1973                 break;
1974         case VSD_DTYPE_Q_S32:
1975                 {
1976                 char qstr[Q_MAXSTRLEN(vsd->q32.sq32, 10)];
1977                 Q_TOSTR((s32q_t)vsd->q32.sq32, -1, 10, qstr, sizeof(qstr));
1978                 sbuf_cat(buf, qstr);
1979                 }
1980                 break;
1981         case VSD_DTYPE_Q_U32:
1982                 {
1983                 char qstr[Q_MAXSTRLEN(vsd->q32.uq32, 10)];
1984                 Q_TOSTR((u32q_t)vsd->q32.uq32, -1, 10, qstr, sizeof(qstr));
1985                 sbuf_cat(buf, qstr);
1986                 }
1987                 break;
1988         case VSD_DTYPE_Q_S64:
1989                 {
1990                 char qstr[Q_MAXSTRLEN(vsd->q64.sq64, 10)];
1991                 Q_TOSTR((s64q_t)vsd->q64.sq64, -1, 10, qstr, sizeof(qstr));
1992                 sbuf_cat(buf, qstr);
1993                 }
1994                 break;
1995         case VSD_DTYPE_Q_U64:
1996                 {
1997                 char qstr[Q_MAXSTRLEN(vsd->q64.uq64, 10)];
1998                 Q_TOSTR((u64q_t)vsd->q64.uq64, -1, 10, qstr, sizeof(qstr));
1999                 sbuf_cat(buf, qstr);
2000                 }
2001                 break;
2002         case VSD_DTYPE_CRHIST32:
2003         case VSD_DTYPE_DRHIST32:
2004         case VSD_DTYPE_DVHIST32:
2005         case VSD_DTYPE_CRHIST64:
2006         case VSD_DTYPE_DRHIST64:
2007         case VSD_DTYPE_DVHIST64:
2008                 stats_voistatdata_hist_tostr(voi_dtype, CONSTVSD(hist, vsd),
2009                     vsd_dtype, vsd_sz, fmt, buf, objdump);
2010                 break;
2011         case VSD_DTYPE_TDGSTCLUST32:
2012         case VSD_DTYPE_TDGSTCLUST64:
2013                 stats_voistatdata_tdgst_tostr(voi_dtype,
2014                     CONSTVSD(tdgst, vsd), vsd_dtype, vsd_sz, fmt, buf,
2015                     objdump);
2016                 break;
2017         default:
2018                 break;
2019         }
2020
2021         return (sbuf_error(buf));
2022 }
2023
2024 static void
2025 stats_v1_itercb_tostr_freeform(struct statsblobv1 *sb, struct voi *v,
2026     struct voistat *vs, struct sb_iter_ctx *ctx)
2027 {
2028         struct sb_tostrcb_ctx *sctx;
2029         struct metablob *tpl_mb;
2030         struct sbuf *buf;
2031         void *vsd;
2032         uint8_t dump;
2033
2034         sctx = ctx->usrctx;
2035         buf = sctx->buf;
2036         tpl_mb = sctx->tpl ? sctx->tpl->mb : NULL;
2037         dump = ((sctx->flags & SB_TOSTR_OBJDUMP) != 0);
2038
2039         if (ctx->flags & SB_IT_FIRST_CB) {
2040                 sbuf_printf(buf, "struct statsblobv1@%p", sb);
2041                 if (dump) {
2042                         sbuf_printf(buf, ", abi=%hhu, endian=%hhu, maxsz=%hu, "
2043                             "cursz=%hu, created=%jd, lastrst=%jd, flags=0x%04hx, "
2044                             "stats_off=%hu, statsdata_off=%hu",
2045                             sb->abi, sb->endian, sb->maxsz, sb->cursz,
2046                             sb->created, sb->lastrst, sb->flags, sb->stats_off,
2047                             sb->statsdata_off);
2048                 }
2049                 sbuf_printf(buf, ", tplhash=%u", sb->tplhash);
2050         }
2051
2052         if (ctx->flags & SB_IT_FIRST_VOISTAT) {
2053                 sbuf_printf(buf, "\n\tvois[%hd]: id=%hd", ctx->vslot, v->id);
2054                 if (v->id < 0)
2055                         return;
2056                 sbuf_printf(buf, ", name=\"%s\"", (tpl_mb == NULL) ? "" :
2057                     tpl_mb->voi_meta[v->id].name);
2058                 if (dump)
2059                     sbuf_printf(buf, ", flags=0x%04hx, dtype=%s, "
2060                     "voistatmaxid=%hhd, stats_off=%hu", v->flags,
2061                     vsd_dtype2name[v->dtype], v->voistatmaxid, v->stats_off);
2062         }
2063
2064         if (!dump && vs->stype <= 0)
2065                 return;
2066
2067         sbuf_printf(buf, "\n\t\tvois[%hd]stat[%hhd]: stype=", v->id, ctx->vsslot);
2068         if (vs->stype < 0) {
2069                 sbuf_printf(buf, "%hhd", vs->stype);
2070                 return;
2071         } else
2072                 sbuf_printf(buf, "%s, errs=%hu", vs_stype2name[vs->stype],
2073                     vs->errs);
2074         vsd = BLOB_OFFSET(sb, vs->data_off);
2075         if (dump)
2076                 sbuf_printf(buf, ", flags=0x%04x, dtype=%s, dsz=%hu, "
2077                     "data_off=%hu", vs->flags, vsd_dtype2name[vs->dtype],
2078                     vs->dsz, vs->data_off);
2079
2080         sbuf_printf(buf, "\n\t\t\tvoistatdata: ");
2081         stats_voistatdata_tostr(vsd, v->dtype, vs->dtype, vs->dsz,
2082             sctx->fmt, buf, dump);
2083 }
2084
2085 static void
2086 stats_v1_itercb_tostr_json(struct statsblobv1 *sb, struct voi *v, struct voistat *vs,
2087     struct sb_iter_ctx *ctx)
2088 {
2089         struct sb_tostrcb_ctx *sctx;
2090         struct metablob *tpl_mb;
2091         struct sbuf *buf;
2092         const char *fmtstr;
2093         void *vsd;
2094         uint8_t dump;
2095
2096         sctx = ctx->usrctx;
2097         buf = sctx->buf;
2098         tpl_mb = sctx->tpl ? sctx->tpl->mb : NULL;
2099         dump = ((sctx->flags & SB_TOSTR_OBJDUMP) != 0);
2100
2101         if (ctx->flags & SB_IT_FIRST_CB) {
2102                 sbuf_putc(buf, '{');
2103                 if (dump) {
2104                         sbuf_printf(buf, "\"abi\":%hhu,\"endian\":%hhu,"
2105                             "\"maxsz\":%hu,\"cursz\":%hu,\"created\":%jd,"
2106                             "\"lastrst\":%jd,\"flags\":%hu,\"stats_off\":%hu,"
2107                             "\"statsdata_off\":%hu,", sb->abi,
2108                             sb->endian, sb->maxsz, sb->cursz, sb->created,
2109                             sb->lastrst, sb->flags, sb->stats_off,
2110                             sb->statsdata_off);
2111                 }
2112
2113                 if (tpl_mb == NULL)
2114                         fmtstr = "\"tplname\":%s,\"tplhash\":%u,\"vois\":{";
2115                 else
2116                         fmtstr = "\"tplname\":\"%s\",\"tplhash\":%u,\"vois\":{";
2117
2118                 sbuf_printf(buf, fmtstr, tpl_mb ? tpl_mb->tplname : "null",
2119                     sb->tplhash);
2120         }
2121
2122         if (ctx->flags & SB_IT_FIRST_VOISTAT) {
2123                 if (dump) {
2124                         sbuf_printf(buf, "\"[%d]\":{\"id\":%d", ctx->vslot,
2125                             v->id);
2126                         if (v->id < 0) {
2127                                 sbuf_printf(buf, "},");
2128                                 return;
2129                         }
2130                         
2131                         if (tpl_mb == NULL)
2132                                 fmtstr = ",\"name\":%s,\"flags\":%hu,"
2133                                     "\"dtype\":\"%s\",\"voistatmaxid\":%hhd,"
2134                                     "\"stats_off\":%hu,";
2135                         else
2136                                 fmtstr = ",\"name\":\"%s\",\"flags\":%hu,"
2137                                     "\"dtype\":\"%s\",\"voistatmaxid\":%hhd,"
2138                                     "\"stats_off\":%hu,";
2139
2140                         sbuf_printf(buf, fmtstr, tpl_mb ?
2141                             tpl_mb->voi_meta[v->id].name : "null", v->flags,
2142                             vsd_dtype2name[v->dtype], v->voistatmaxid,
2143                             v->stats_off);
2144                 } else {
2145                         if (tpl_mb == NULL) {
2146                                 sbuf_printf(buf, "\"[%hd]\":{", v->id);
2147                         } else {
2148                                 sbuf_printf(buf, "\"%s\":{",
2149                                     tpl_mb->voi_meta[v->id].name);
2150                         }
2151                 }
2152                 sbuf_cat(buf, "\"stats\":{");
2153         }
2154
2155         vsd = BLOB_OFFSET(sb, vs->data_off);
2156         if (dump) {
2157                 sbuf_printf(buf, "\"[%hhd]\":", ctx->vsslot);
2158                 if (vs->stype < 0) {
2159                         sbuf_printf(buf, "{\"stype\":-1},");
2160                         return;
2161                 }
2162                 sbuf_printf(buf, "{\"stype\":\"%s\",\"errs\":%hu,\"flags\":%hu,"
2163                     "\"dtype\":\"%s\",\"data_off\":%hu,\"voistatdata\":{",
2164                     vs_stype2name[vs->stype], vs->errs, vs->flags,
2165                     vsd_dtype2name[vs->dtype], vs->data_off);
2166         } else if (vs->stype > 0) {
2167                 if (tpl_mb == NULL)
2168                         sbuf_printf(buf, "\"[%hhd]\":", vs->stype);
2169                 else
2170                         sbuf_printf(buf, "\"%s\":", vs_stype2name[vs->stype]);
2171         } else
2172                 return;
2173
2174         if ((vs->flags & VS_VSDVALID) || dump) {
2175                 if (!dump)
2176                         sbuf_printf(buf, "{\"errs\":%hu,", vs->errs);
2177                 /* Simple non-compound VSD types need a key. */
2178                 if (!vsd_compoundtype[vs->dtype])
2179                         sbuf_cat(buf, "\"val\":");
2180                 stats_voistatdata_tostr(vsd, v->dtype, vs->dtype, vs->dsz,
2181                     sctx->fmt, buf, dump);
2182                 sbuf_cat(buf, dump ? "}}" : "}");
2183         } else
2184                 sbuf_cat(buf, dump ? "null}" : "null");
2185
2186         if (ctx->flags & SB_IT_LAST_VOISTAT)
2187                 sbuf_cat(buf, "}}");
2188
2189         if (ctx->flags & SB_IT_LAST_CB)
2190                 sbuf_cat(buf, "}}");
2191         else
2192                 sbuf_putc(buf, ',');
2193 }
2194
2195 static int
2196 stats_v1_itercb_tostr(struct statsblobv1 *sb, struct voi *v, struct voistat *vs,
2197     struct sb_iter_ctx *ctx)
2198 {
2199         struct sb_tostrcb_ctx *sctx;
2200
2201         sctx = ctx->usrctx;
2202
2203         switch (sctx->fmt) {
2204         case SB_STRFMT_FREEFORM:
2205                 stats_v1_itercb_tostr_freeform(sb, v, vs, ctx);
2206                 break;
2207         case SB_STRFMT_JSON:
2208                 stats_v1_itercb_tostr_json(sb, v, vs, ctx);
2209                 break;
2210         default:
2211                 break;
2212         }
2213
2214         return (sbuf_error(sctx->buf));
2215 }
2216
2217 int
2218 stats_v1_blob_tostr(struct statsblobv1 *sb, struct sbuf *buf,
2219     enum sb_str_fmt fmt, uint32_t flags)
2220 {
2221         struct sb_tostrcb_ctx sctx;
2222         uint32_t iflags;
2223
2224         if (sb == NULL || sb->abi != STATS_ABI_V1 || buf == NULL ||
2225             fmt >= SB_STRFMT_NUM_FMTS)
2226                 return (EINVAL);
2227
2228         sctx.buf = buf;
2229         sctx.fmt = fmt;
2230         sctx.flags = flags;
2231
2232         if (flags & SB_TOSTR_META) {
2233                 if (stats_tpl_fetch(stats_tpl_fetch_allocid(NULL, sb->tplhash),
2234                     &sctx.tpl))
2235                         return (EINVAL);
2236         } else
2237                 sctx.tpl = NULL;
2238
2239         iflags = 0;
2240         if (flags & SB_TOSTR_OBJDUMP)
2241                 iflags |= (SB_IT_NULLVOI | SB_IT_NULLVOISTAT);
2242         stats_v1_blob_iter(sb, stats_v1_itercb_tostr, &sctx, iflags);
2243
2244         return (sbuf_error(buf));
2245 }
2246
2247 static int
2248 stats_v1_itercb_visit(struct statsblobv1 *sb, struct voi *v,
2249     struct voistat *vs, struct sb_iter_ctx *ctx)
2250 {
2251         struct sb_visitcb_ctx *vctx;
2252         struct sb_visit sbv;
2253
2254         vctx = ctx->usrctx;
2255
2256         sbv.tplhash = sb->tplhash;
2257         sbv.voi_id = v->id;
2258         sbv.voi_dtype = v->dtype;
2259         sbv.vs_stype = vs->stype;
2260         sbv.vs_dtype = vs->dtype;
2261         sbv.vs_dsz = vs->dsz;
2262         sbv.vs_data = BLOB_OFFSET(sb, vs->data_off);
2263         sbv.vs_errs = vs->errs;
2264         sbv.flags = ctx->flags & (SB_IT_FIRST_CB | SB_IT_LAST_CB |
2265             SB_IT_FIRST_VOI | SB_IT_LAST_VOI | SB_IT_FIRST_VOISTAT |
2266             SB_IT_LAST_VOISTAT);
2267
2268         return (vctx->cb(&sbv, vctx->usrctx));
2269 }
2270
2271 int
2272 stats_v1_blob_visit(struct statsblobv1 *sb, stats_blob_visitcb_t func,
2273     void *usrctx)
2274 {
2275         struct sb_visitcb_ctx vctx;
2276
2277         if (sb == NULL || sb->abi != STATS_ABI_V1 || func == NULL)
2278                 return (EINVAL);
2279
2280         vctx.cb = func;
2281         vctx.usrctx = usrctx;
2282
2283         stats_v1_blob_iter(sb, stats_v1_itercb_visit, &vctx, 0);
2284
2285         return (0);
2286 }
2287
2288 static int
2289 stats_v1_icb_reset_voistat(struct statsblobv1 *sb, struct voi *v __unused,
2290     struct voistat *vs, struct sb_iter_ctx *ctx __unused)
2291 {
2292         void *vsd;
2293
2294         if (vs->stype == VS_STYPE_VOISTATE)
2295                 return (0);
2296
2297         vsd = BLOB_OFFSET(sb, vs->data_off);
2298
2299         /* Perform the stat type's default reset action. */
2300         switch (vs->stype) {
2301         case VS_STYPE_SUM:
2302                 switch (vs->dtype) {
2303                 case VSD_DTYPE_Q_S32:
2304                         Q_SIFVAL(VSD(q32, vsd)->sq32, 0);
2305                         break;
2306                 case VSD_DTYPE_Q_U32:
2307                         Q_SIFVAL(VSD(q32, vsd)->uq32, 0);
2308                         break;
2309                 case VSD_DTYPE_Q_S64:
2310                         Q_SIFVAL(VSD(q64, vsd)->sq64, 0);
2311                         break;
2312                 case VSD_DTYPE_Q_U64:
2313                         Q_SIFVAL(VSD(q64, vsd)->uq64, 0);
2314                         break;
2315                 default:
2316                         bzero(vsd, vs->dsz);
2317                         break;
2318                 }
2319                 break;
2320         case VS_STYPE_MAX:
2321                 switch (vs->dtype) {
2322                 case VSD_DTYPE_Q_S32:
2323                         Q_SIFVAL(VSD(q32, vsd)->sq32,
2324                             Q_IFMINVAL(VSD(q32, vsd)->sq32));
2325                         break;
2326                 case VSD_DTYPE_Q_U32:
2327                         Q_SIFVAL(VSD(q32, vsd)->uq32,
2328                             Q_IFMINVAL(VSD(q32, vsd)->uq32));
2329                         break;
2330                 case VSD_DTYPE_Q_S64:
2331                         Q_SIFVAL(VSD(q64, vsd)->sq64,
2332                             Q_IFMINVAL(VSD(q64, vsd)->sq64));
2333                         break;
2334                 case VSD_DTYPE_Q_U64:
2335                         Q_SIFVAL(VSD(q64, vsd)->uq64,
2336                             Q_IFMINVAL(VSD(q64, vsd)->uq64));
2337                         break;
2338                 default:
2339                         memcpy(vsd, &numeric_limits[LIM_MIN][vs->dtype],
2340                             vs->dsz);
2341                         break;
2342                 }
2343                 break;
2344         case VS_STYPE_MIN:
2345                 switch (vs->dtype) {
2346                 case VSD_DTYPE_Q_S32:
2347                         Q_SIFVAL(VSD(q32, vsd)->sq32,
2348                             Q_IFMAXVAL(VSD(q32, vsd)->sq32));
2349                         break;
2350                 case VSD_DTYPE_Q_U32:
2351                         Q_SIFVAL(VSD(q32, vsd)->uq32,
2352                             Q_IFMAXVAL(VSD(q32, vsd)->uq32));
2353                         break;
2354                 case VSD_DTYPE_Q_S64:
2355                         Q_SIFVAL(VSD(q64, vsd)->sq64,
2356                             Q_IFMAXVAL(VSD(q64, vsd)->sq64));
2357                         break;
2358                 case VSD_DTYPE_Q_U64:
2359                         Q_SIFVAL(VSD(q64, vsd)->uq64,
2360                             Q_IFMAXVAL(VSD(q64, vsd)->uq64));
2361                         break;
2362                 default:
2363                         memcpy(vsd, &numeric_limits[LIM_MAX][vs->dtype],
2364                             vs->dsz);
2365                         break;
2366                 }
2367                 break;
2368         case VS_STYPE_HIST:
2369                 {
2370                 /* Reset bucket counts. */
2371                 struct voistatdata_hist *hist;
2372                 int i, is32bit;
2373                 uint16_t nbkts;
2374
2375                 hist = VSD(hist, vsd);
2376                 switch (vs->dtype) {
2377                 case VSD_DTYPE_CRHIST32:
2378                         nbkts = HIST_VSDSZ2NBKTS(crhist32, vs->dsz);
2379                         is32bit = 1;
2380                         break;
2381                 case VSD_DTYPE_DRHIST32:
2382                         nbkts = HIST_VSDSZ2NBKTS(drhist32, vs->dsz);
2383                         is32bit = 1;
2384                         break;
2385                 case VSD_DTYPE_DVHIST32:
2386                         nbkts = HIST_VSDSZ2NBKTS(dvhist32, vs->dsz);
2387                         is32bit = 1;
2388                         break;
2389                 case VSD_DTYPE_CRHIST64:
2390                         nbkts = HIST_VSDSZ2NBKTS(crhist64, vs->dsz);
2391                         is32bit = 0;
2392                         break;
2393                 case VSD_DTYPE_DRHIST64:
2394                         nbkts = HIST_VSDSZ2NBKTS(drhist64, vs->dsz);
2395                         is32bit = 0;
2396                         break;
2397                 case VSD_DTYPE_DVHIST64:
2398                         nbkts = HIST_VSDSZ2NBKTS(dvhist64, vs->dsz);
2399                         is32bit = 0;
2400                         break;
2401                 default:
2402                         return (0);
2403                 }
2404
2405                 bzero(VSD_HIST_FIELDPTR(hist, vs->dtype, oob),
2406                     is32bit ? sizeof(uint32_t) : sizeof(uint64_t));
2407                 for (i = nbkts - 1; i >= 0; i--) {
2408                         bzero(VSD_HIST_FIELDPTR(hist, vs->dtype,
2409                             bkts[i].cnt), is32bit ? sizeof(uint32_t) :
2410                             sizeof(uint64_t));
2411                 }
2412                 break;
2413                 }
2414         case VS_STYPE_TDGST:
2415                 {
2416                 /* Reset sample count centroids array/tree. */
2417                 struct voistatdata_tdgst *tdgst;
2418                 struct ctdth32 *ctd32tree;
2419                 struct ctdth64 *ctd64tree;
2420                 struct voistatdata_tdgstctd32 *ctd32;
2421                 struct voistatdata_tdgstctd64 *ctd64;
2422
2423                 tdgst = VSD(tdgst, vsd);
2424                 switch (vs->dtype) {
2425                 case VSD_DTYPE_TDGSTCLUST32:
2426                         VSD(tdgstclust32, tdgst)->smplcnt = 0;
2427                         VSD(tdgstclust32, tdgst)->compcnt = 0;
2428                         ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
2429                         ARB_INIT(ctd32, ctdlnk, ctd32tree,
2430                             ARB_MAXNODES(ctd32tree)) {
2431                                 ctd32->cnt = 0;
2432                                 Q_SIFVAL(ctd32->mu, 0);
2433                         }
2434 #ifdef DIAGNOSTIC
2435                         RB_INIT(&VSD(tdgstclust32, tdgst)->rbctdtree);
2436 #endif
2437                 break;
2438                 case VSD_DTYPE_TDGSTCLUST64:
2439                         VSD(tdgstclust64, tdgst)->smplcnt = 0;
2440                         VSD(tdgstclust64, tdgst)->compcnt = 0;
2441                         ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
2442                         ARB_INIT(ctd64, ctdlnk, ctd64tree,
2443                             ARB_MAXNODES(ctd64tree)) {
2444                                 ctd64->cnt = 0;
2445                                 Q_SIFVAL(ctd64->mu, 0);
2446                         }
2447 #ifdef DIAGNOSTIC
2448                         RB_INIT(&VSD(tdgstclust64, tdgst)->rbctdtree);
2449 #endif
2450                 break;
2451                 default:
2452                         return (0);
2453                 }
2454                 break;
2455                 }
2456         default:
2457                 KASSERT(0, ("Unknown VOI stat type %d", vs->stype));
2458                 break;
2459         }
2460
2461         vs->errs = 0;
2462         vs->flags &= ~VS_VSDVALID;
2463
2464         return (0);
2465 }
2466
2467 int
2468 stats_v1_blob_snapshot(struct statsblobv1 **dst, size_t dstmaxsz,
2469     struct statsblobv1 *src, uint32_t flags)
2470 {
2471         int error;
2472
2473         if (src != NULL && src->abi == STATS_ABI_V1) {
2474                 error = stats_v1_blob_clone(dst, dstmaxsz, src, flags);
2475                 if (!error) {
2476                         if (flags & SB_CLONE_RSTSRC) {
2477                                 stats_v1_blob_iter(src,
2478                                     stats_v1_icb_reset_voistat, NULL, 0);
2479                                 src->lastrst = stats_sbinuptime();
2480                         }
2481                         stats_v1_blob_finalise(*dst);
2482                 }
2483         } else
2484                 error = EINVAL;
2485
2486         return (error);
2487 }
2488
2489 static inline int
2490 stats_v1_voi_update_max(enum vsd_dtype voi_dtype __unused,
2491     struct voistatdata *voival, struct voistat *vs, void *vsd)
2492 {
2493         int error;
2494
2495         KASSERT(vs->dtype < VSD_NUM_DTYPES,
2496             ("Unknown VSD dtype %d", vs->dtype));
2497
2498         error = 0;
2499
2500         switch (vs->dtype) {
2501         case VSD_DTYPE_INT_S32:
2502                 if (VSD(int32, vsd)->s32 < voival->int32.s32) {
2503                         VSD(int32, vsd)->s32 = voival->int32.s32;
2504                         vs->flags |= VS_VSDVALID;
2505                 }
2506                 break;
2507         case VSD_DTYPE_INT_U32:
2508                 if (VSD(int32, vsd)->u32 < voival->int32.u32) {
2509                         VSD(int32, vsd)->u32 = voival->int32.u32;
2510                         vs->flags |= VS_VSDVALID;
2511                 }
2512                 break;
2513         case VSD_DTYPE_INT_S64:
2514                 if (VSD(int64, vsd)->s64 < voival->int64.s64) {
2515                         VSD(int64, vsd)->s64 = voival->int64.s64;
2516                         vs->flags |= VS_VSDVALID;
2517                 }
2518                 break;
2519         case VSD_DTYPE_INT_U64:
2520                 if (VSD(int64, vsd)->u64 < voival->int64.u64) {
2521                         VSD(int64, vsd)->u64 = voival->int64.u64;
2522                         vs->flags |= VS_VSDVALID;
2523                 }
2524                 break;
2525         case VSD_DTYPE_INT_SLONG:
2526                 if (VSD(intlong, vsd)->slong < voival->intlong.slong) {
2527                         VSD(intlong, vsd)->slong = voival->intlong.slong;
2528                         vs->flags |= VS_VSDVALID;
2529                 }
2530                 break;
2531         case VSD_DTYPE_INT_ULONG:
2532                 if (VSD(intlong, vsd)->ulong < voival->intlong.ulong) {
2533                         VSD(intlong, vsd)->ulong = voival->intlong.ulong;
2534                         vs->flags |= VS_VSDVALID;
2535                 }
2536                 break;
2537         case VSD_DTYPE_Q_S32:
2538                 if (Q_QLTQ(VSD(q32, vsd)->sq32, voival->q32.sq32) &&
2539                     (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->sq32,
2540                     voival->q32.sq32)))) {
2541                         vs->flags |= VS_VSDVALID;
2542                 }
2543                 break;
2544         case VSD_DTYPE_Q_U32:
2545                 if (Q_QLTQ(VSD(q32, vsd)->uq32, voival->q32.uq32) &&
2546                     (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->uq32,
2547                     voival->q32.uq32)))) {
2548                         vs->flags |= VS_VSDVALID;
2549                 }
2550                 break;
2551         case VSD_DTYPE_Q_S64:
2552                 if (Q_QLTQ(VSD(q64, vsd)->sq64, voival->q64.sq64) &&
2553                     (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->sq64,
2554                     voival->q64.sq64)))) {
2555                         vs->flags |= VS_VSDVALID;
2556                 }
2557                 break;
2558         case VSD_DTYPE_Q_U64:
2559                 if (Q_QLTQ(VSD(q64, vsd)->uq64, voival->q64.uq64) &&
2560                     (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->uq64,
2561                     voival->q64.uq64)))) {
2562                         vs->flags |= VS_VSDVALID;
2563                 }
2564                 break;
2565         default:
2566                 error = EINVAL;
2567                 break;
2568         }
2569
2570         return (error);
2571 }
2572
2573 static inline int
2574 stats_v1_voi_update_min(enum vsd_dtype voi_dtype __unused,
2575     struct voistatdata *voival, struct voistat *vs, void *vsd)
2576 {
2577         int error;
2578
2579         KASSERT(vs->dtype < VSD_NUM_DTYPES,
2580             ("Unknown VSD dtype %d", vs->dtype));
2581
2582         error = 0;
2583
2584         switch (vs->dtype) {
2585         case VSD_DTYPE_INT_S32:
2586                 if (VSD(int32, vsd)->s32 > voival->int32.s32) {
2587                         VSD(int32, vsd)->s32 = voival->int32.s32;
2588                         vs->flags |= VS_VSDVALID;
2589                 }
2590                 break;
2591         case VSD_DTYPE_INT_U32:
2592                 if (VSD(int32, vsd)->u32 > voival->int32.u32) {
2593                         VSD(int32, vsd)->u32 = voival->int32.u32;
2594                         vs->flags |= VS_VSDVALID;
2595                 }
2596                 break;
2597         case VSD_DTYPE_INT_S64:
2598                 if (VSD(int64, vsd)->s64 > voival->int64.s64) {
2599                         VSD(int64, vsd)->s64 = voival->int64.s64;
2600                         vs->flags |= VS_VSDVALID;
2601                 }
2602                 break;
2603         case VSD_DTYPE_INT_U64:
2604                 if (VSD(int64, vsd)->u64 > voival->int64.u64) {
2605                         VSD(int64, vsd)->u64 = voival->int64.u64;
2606                         vs->flags |= VS_VSDVALID;
2607                 }
2608                 break;
2609         case VSD_DTYPE_INT_SLONG:
2610                 if (VSD(intlong, vsd)->slong > voival->intlong.slong) {
2611                         VSD(intlong, vsd)->slong = voival->intlong.slong;
2612                         vs->flags |= VS_VSDVALID;
2613                 }
2614                 break;
2615         case VSD_DTYPE_INT_ULONG:
2616                 if (VSD(intlong, vsd)->ulong > voival->intlong.ulong) {
2617                         VSD(intlong, vsd)->ulong = voival->intlong.ulong;
2618                         vs->flags |= VS_VSDVALID;
2619                 }
2620                 break;
2621         case VSD_DTYPE_Q_S32:
2622                 if (Q_QGTQ(VSD(q32, vsd)->sq32, voival->q32.sq32) &&
2623                     (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->sq32,
2624                     voival->q32.sq32)))) {
2625                         vs->flags |= VS_VSDVALID;
2626                 }
2627                 break;
2628         case VSD_DTYPE_Q_U32:
2629                 if (Q_QGTQ(VSD(q32, vsd)->uq32, voival->q32.uq32) &&
2630                     (0 == (error = Q_QCPYVALQ(&VSD(q32, vsd)->uq32,
2631                     voival->q32.uq32)))) {
2632                         vs->flags |= VS_VSDVALID;
2633                 }
2634                 break;
2635         case VSD_DTYPE_Q_S64:
2636                 if (Q_QGTQ(VSD(q64, vsd)->sq64, voival->q64.sq64) &&
2637                     (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->sq64,
2638                     voival->q64.sq64)))) {
2639                         vs->flags |= VS_VSDVALID;
2640                 }
2641                 break;
2642         case VSD_DTYPE_Q_U64:
2643                 if (Q_QGTQ(VSD(q64, vsd)->uq64, voival->q64.uq64) &&
2644                     (0 == (error = Q_QCPYVALQ(&VSD(q64, vsd)->uq64,
2645                     voival->q64.uq64)))) {
2646                         vs->flags |= VS_VSDVALID;
2647                 }
2648                 break;
2649         default:
2650                 error = EINVAL;
2651                 break;
2652         }
2653
2654         return (error);
2655 }
2656
2657 static inline int
2658 stats_v1_voi_update_sum(enum vsd_dtype voi_dtype __unused,
2659     struct voistatdata *voival, struct voistat *vs, void *vsd)
2660 {
2661         int error;
2662
2663         KASSERT(vs->dtype < VSD_NUM_DTYPES,
2664             ("Unknown VSD dtype %d", vs->dtype));
2665
2666         error = 0;
2667
2668         switch (vs->dtype) {
2669         case VSD_DTYPE_INT_S32:
2670                 VSD(int32, vsd)->s32 += voival->int32.s32;
2671                 break;
2672         case VSD_DTYPE_INT_U32:
2673                 VSD(int32, vsd)->u32 += voival->int32.u32;
2674                 break;
2675         case VSD_DTYPE_INT_S64:
2676                 VSD(int64, vsd)->s64 += voival->int64.s64;
2677                 break;
2678         case VSD_DTYPE_INT_U64:
2679                 VSD(int64, vsd)->u64 += voival->int64.u64;
2680                 break;
2681         case VSD_DTYPE_INT_SLONG:
2682                 VSD(intlong, vsd)->slong += voival->intlong.slong;
2683                 break;
2684         case VSD_DTYPE_INT_ULONG:
2685                 VSD(intlong, vsd)->ulong += voival->intlong.ulong;
2686                 break;
2687         case VSD_DTYPE_Q_S32:
2688                 error = Q_QADDQ(&VSD(q32, vsd)->sq32, voival->q32.sq32);
2689                 break;
2690         case VSD_DTYPE_Q_U32:
2691                 error = Q_QADDQ(&VSD(q32, vsd)->uq32, voival->q32.uq32);
2692                 break;
2693         case VSD_DTYPE_Q_S64:
2694                 error = Q_QADDQ(&VSD(q64, vsd)->sq64, voival->q64.sq64);
2695                 break;
2696         case VSD_DTYPE_Q_U64:
2697                 error = Q_QADDQ(&VSD(q64, vsd)->uq64, voival->q64.uq64);
2698                 break;
2699         default:
2700                 error = EINVAL;
2701                 break;
2702         }
2703
2704         if (!error)
2705                 vs->flags |= VS_VSDVALID;
2706
2707         return (error);
2708 }
2709
2710 static inline int
2711 stats_v1_voi_update_hist(enum vsd_dtype voi_dtype, struct voistatdata *voival,
2712     struct voistat *vs, struct voistatdata_hist *hist)
2713 {
2714         struct voistatdata_numeric *bkt_lb, *bkt_ub;
2715         uint64_t *oob64, *cnt64;
2716         uint32_t *oob32, *cnt32;
2717         int error, i, found, is32bit, has_ub, eq_only;
2718
2719         error = 0;
2720
2721         switch (vs->dtype) {
2722         case VSD_DTYPE_CRHIST32:
2723                 i = HIST_VSDSZ2NBKTS(crhist32, vs->dsz);
2724                 is32bit = 1;
2725                 has_ub = eq_only = 0;
2726                 oob32 = &VSD(crhist32, hist)->oob;
2727                 break;
2728         case VSD_DTYPE_DRHIST32:
2729                 i = HIST_VSDSZ2NBKTS(drhist32, vs->dsz);
2730                 is32bit = has_ub = 1;
2731                 eq_only = 0;
2732                 oob32 = &VSD(drhist32, hist)->oob;
2733                 break;
2734         case VSD_DTYPE_DVHIST32:
2735                 i = HIST_VSDSZ2NBKTS(dvhist32, vs->dsz);
2736                 is32bit = eq_only = 1;
2737                 has_ub = 0;
2738                 oob32 = &VSD(dvhist32, hist)->oob;
2739                 break;
2740         case VSD_DTYPE_CRHIST64:
2741                 i = HIST_VSDSZ2NBKTS(crhist64, vs->dsz);
2742                 is32bit = has_ub = eq_only = 0;
2743                 oob64 = &VSD(crhist64, hist)->oob;
2744                 break;
2745         case VSD_DTYPE_DRHIST64:
2746                 i = HIST_VSDSZ2NBKTS(drhist64, vs->dsz);
2747                 is32bit = eq_only = 0;
2748                 has_ub = 1;
2749                 oob64 = &VSD(drhist64, hist)->oob;
2750                 break;
2751         case VSD_DTYPE_DVHIST64:
2752                 i = HIST_VSDSZ2NBKTS(dvhist64, vs->dsz);
2753                 is32bit = has_ub = 0;
2754                 eq_only = 1;
2755                 oob64 = &VSD(dvhist64, hist)->oob;
2756                 break;
2757         default:
2758                 return (EINVAL);
2759         }
2760         i--; /* Adjust for 0-based array index. */
2761
2762         /* XXXLAS: Should probably use a better bucket search algorithm. ARB? */
2763         for (found = 0; i >= 0 && !found; i--) {
2764                 switch (vs->dtype) {
2765                 case VSD_DTYPE_CRHIST32:
2766                         bkt_lb = &VSD(crhist32, hist)->bkts[i].lb;
2767                         cnt32 = &VSD(crhist32, hist)->bkts[i].cnt;
2768                         break;
2769                 case VSD_DTYPE_DRHIST32:
2770                         bkt_lb = &VSD(drhist32, hist)->bkts[i].lb;
2771                         bkt_ub = &VSD(drhist32, hist)->bkts[i].ub;
2772                         cnt32 = &VSD(drhist32, hist)->bkts[i].cnt;
2773                         break;
2774                 case VSD_DTYPE_DVHIST32:
2775                         bkt_lb = &VSD(dvhist32, hist)->bkts[i].val;
2776                         cnt32 = &VSD(dvhist32, hist)->bkts[i].cnt;
2777                         break;
2778                 case VSD_DTYPE_CRHIST64:
2779                         bkt_lb = &VSD(crhist64, hist)->bkts[i].lb;
2780                         cnt64 = &VSD(crhist64, hist)->bkts[i].cnt;
2781                         break;
2782                 case VSD_DTYPE_DRHIST64:
2783                         bkt_lb = &VSD(drhist64, hist)->bkts[i].lb;
2784                         bkt_ub = &VSD(drhist64, hist)->bkts[i].ub;
2785                         cnt64 = &VSD(drhist64, hist)->bkts[i].cnt;
2786                         break;
2787                 case VSD_DTYPE_DVHIST64:
2788                         bkt_lb = &VSD(dvhist64, hist)->bkts[i].val;
2789                         cnt64 = &VSD(dvhist64, hist)->bkts[i].cnt;
2790                         break;
2791                 default:
2792                         return (EINVAL);
2793                 }
2794
2795                 switch (voi_dtype) {
2796                 case VSD_DTYPE_INT_S32:
2797                         if (voival->int32.s32 >= bkt_lb->int32.s32) {
2798                                 if ((eq_only && voival->int32.s32 ==
2799                                     bkt_lb->int32.s32) ||
2800                                     (!eq_only && (!has_ub ||
2801                                     voival->int32.s32 < bkt_ub->int32.s32)))
2802                                         found = 1;
2803                         }
2804                         break;
2805                 case VSD_DTYPE_INT_U32:
2806                         if (voival->int32.u32 >= bkt_lb->int32.u32) {
2807                                 if ((eq_only && voival->int32.u32 ==
2808                                     bkt_lb->int32.u32) ||
2809                                     (!eq_only && (!has_ub ||
2810                                     voival->int32.u32 < bkt_ub->int32.u32)))
2811                                         found = 1;
2812                         }
2813                         break;
2814                 case VSD_DTYPE_INT_S64:
2815                         if (voival->int64.s64 >= bkt_lb->int64.s64)
2816                                 if ((eq_only && voival->int64.s64 ==
2817                                     bkt_lb->int64.s64) ||
2818                                     (!eq_only && (!has_ub ||
2819                                     voival->int64.s64 < bkt_ub->int64.s64)))
2820                                         found = 1;
2821                         break;
2822                 case VSD_DTYPE_INT_U64:
2823                         if (voival->int64.u64 >= bkt_lb->int64.u64)
2824                                 if ((eq_only && voival->int64.u64 ==
2825                                     bkt_lb->int64.u64) ||
2826                                     (!eq_only && (!has_ub ||
2827                                     voival->int64.u64 < bkt_ub->int64.u64)))
2828                                         found = 1;
2829                         break;
2830                 case VSD_DTYPE_INT_SLONG:
2831                         if (voival->intlong.slong >= bkt_lb->intlong.slong)
2832                                 if ((eq_only && voival->intlong.slong ==
2833                                     bkt_lb->intlong.slong) ||
2834                                     (!eq_only && (!has_ub ||
2835                                     voival->intlong.slong <
2836                                     bkt_ub->intlong.slong)))
2837                                         found = 1;
2838                         break;
2839                 case VSD_DTYPE_INT_ULONG:
2840                         if (voival->intlong.ulong >= bkt_lb->intlong.ulong)
2841                                 if ((eq_only && voival->intlong.ulong ==
2842                                     bkt_lb->intlong.ulong) ||
2843                                     (!eq_only && (!has_ub ||
2844                                     voival->intlong.ulong <
2845                                     bkt_ub->intlong.ulong)))
2846                                         found = 1;
2847                         break;
2848                 case VSD_DTYPE_Q_S32:
2849                         if (Q_QGEQ(voival->q32.sq32, bkt_lb->q32.sq32))
2850                                 if ((eq_only && Q_QEQ(voival->q32.sq32,
2851                                     bkt_lb->q32.sq32)) ||
2852                                     (!eq_only && (!has_ub ||
2853                                     Q_QLTQ(voival->q32.sq32,
2854                                     bkt_ub->q32.sq32))))
2855                                         found = 1;
2856                         break;
2857                 case VSD_DTYPE_Q_U32:
2858                         if (Q_QGEQ(voival->q32.uq32, bkt_lb->q32.uq32))
2859                                 if ((eq_only && Q_QEQ(voival->q32.uq32,
2860                                     bkt_lb->q32.uq32)) ||
2861                                     (!eq_only && (!has_ub ||
2862                                     Q_QLTQ(voival->q32.uq32,
2863                                     bkt_ub->q32.uq32))))
2864                                         found = 1;
2865                         break;
2866                 case VSD_DTYPE_Q_S64:
2867                         if (Q_QGEQ(voival->q64.sq64, bkt_lb->q64.sq64))
2868                                 if ((eq_only && Q_QEQ(voival->q64.sq64,
2869                                     bkt_lb->q64.sq64)) ||
2870                                     (!eq_only && (!has_ub ||
2871                                     Q_QLTQ(voival->q64.sq64,
2872                                     bkt_ub->q64.sq64))))
2873                                         found = 1;
2874                         break;
2875                 case VSD_DTYPE_Q_U64:
2876                         if (Q_QGEQ(voival->q64.uq64, bkt_lb->q64.uq64))
2877                                 if ((eq_only && Q_QEQ(voival->q64.uq64,
2878                                     bkt_lb->q64.uq64)) ||
2879                                     (!eq_only && (!has_ub ||
2880                                     Q_QLTQ(voival->q64.uq64,
2881                                     bkt_ub->q64.uq64))))
2882                                         found = 1;
2883                         break;
2884                 default:
2885                         break;
2886                 }
2887         }
2888
2889         if (found) {
2890                 if (is32bit)
2891                         *cnt32 += 1;
2892                 else
2893                         *cnt64 += 1;
2894         } else {
2895                 if (is32bit)
2896                         *oob32 += 1;
2897                 else
2898                         *oob64 += 1;
2899         }
2900
2901         vs->flags |= VS_VSDVALID;
2902         return (error);
2903 }
2904
2905 static inline int
2906 stats_v1_vsd_tdgst_compress(enum vsd_dtype vs_dtype,
2907     struct voistatdata_tdgst *tdgst, int attempt)
2908 {
2909         struct ctdth32 *ctd32tree;
2910         struct ctdth64 *ctd64tree;
2911         struct voistatdata_tdgstctd32 *ctd32;
2912         struct voistatdata_tdgstctd64 *ctd64;
2913         uint64_t ebits, idxmask;
2914         uint32_t bitsperidx, nebits;
2915         int error, idx, is32bit, maxctds, remctds, tmperr;
2916
2917         error = 0;
2918
2919         switch (vs_dtype) {
2920         case VSD_DTYPE_TDGSTCLUST32:
2921                 ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
2922                 if (!ARB_FULL(ctd32tree))
2923                         return (0);
2924                 VSD(tdgstclust32, tdgst)->compcnt++;
2925                 maxctds = remctds = ARB_MAXNODES(ctd32tree);
2926                 ARB_RESET_TREE(ctd32tree, ctdth32, maxctds);
2927                 VSD(tdgstclust32, tdgst)->smplcnt = 0;
2928                 is32bit = 1;
2929                 ctd64tree = NULL;
2930                 ctd64 = NULL;
2931 #ifdef DIAGNOSTIC
2932                 RB_INIT(&VSD(tdgstclust32, tdgst)->rbctdtree);
2933 #endif
2934                 break;
2935         case VSD_DTYPE_TDGSTCLUST64:
2936                 ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
2937                 if (!ARB_FULL(ctd64tree))
2938                         return (0);
2939                 VSD(tdgstclust64, tdgst)->compcnt++;
2940                 maxctds = remctds = ARB_MAXNODES(ctd64tree);
2941                 ARB_RESET_TREE(ctd64tree, ctdth64, maxctds);
2942                 VSD(tdgstclust64, tdgst)->smplcnt = 0;
2943                 is32bit = 0;
2944                 ctd32tree = NULL;
2945                 ctd32 = NULL;
2946 #ifdef DIAGNOSTIC
2947                 RB_INIT(&VSD(tdgstclust64, tdgst)->rbctdtree);
2948 #endif
2949                 break;
2950         default:
2951                 return (EINVAL);
2952         }
2953
2954         /*
2955          * Rebuild the t-digest ARB by pseudorandomly selecting centroids and
2956          * re-inserting the mu/cnt of each as a value and corresponding weight.
2957          */
2958
2959         /*
2960          * XXXCEM: random(9) is currently rand(3), not random(3).  rand(3)
2961          * RAND_MAX happens to be approximately 31 bits (range [0,
2962          * 0x7ffffffd]), so the math kinda works out.  When/if this portion of
2963          * the code is compiled in userspace, it gets the random(3) behavior,
2964          * which has expected range [0, 0x7fffffff].
2965          */
2966 #define bitsperrand 31
2967         ebits = 0;
2968         nebits = 0;
2969         bitsperidx = fls(maxctds);
2970         KASSERT(bitsperidx <= sizeof(ebits) << 3,
2971             ("%s: bitsperidx=%d, ebits=%d",
2972             __func__, bitsperidx, (int)(sizeof(ebits) << 3)));
2973         idxmask = (UINT64_C(1) << bitsperidx) - 1;
2974
2975         /* Initialise the free list with randomised centroid indices. */
2976         for (; remctds > 0; remctds--) {
2977                 while (nebits < bitsperidx) {
2978                         ebits |= ((uint64_t)random()) << nebits;
2979                         nebits += bitsperrand;
2980                         if (nebits > (sizeof(ebits) << 3))
2981                                 nebits = sizeof(ebits) << 3;
2982                 }
2983                 idx = ebits & idxmask;
2984                 nebits -= bitsperidx;
2985                 ebits >>= bitsperidx;
2986
2987                 /*
2988                  * Select the next centroid to put on the ARB free list. We
2989                  * start with the centroid at our randomly selected array index,
2990                  * and work our way forwards until finding one (the latter
2991                  * aspect reduces re-insertion randomness, but is good enough).
2992                  */
2993                 do {
2994                         if (idx >= maxctds)
2995                                 idx %= maxctds;
2996
2997                         if (is32bit)
2998                                 ctd32 = ARB_NODE(ctd32tree, idx);
2999                         else
3000                                 ctd64 = ARB_NODE(ctd64tree, idx);
3001                 } while ((is32bit ? ARB_ISFREE(ctd32, ctdlnk) :
3002                     ARB_ISFREE(ctd64, ctdlnk)) && ++idx);
3003
3004                 /* Put the centroid on the ARB free list. */
3005                 if (is32bit)
3006                         ARB_RETURNFREE(ctd32tree, ctd32, ctdlnk);
3007                 else
3008                         ARB_RETURNFREE(ctd64tree, ctd64, ctdlnk);
3009         }
3010
3011         /*
3012          * The free list now contains the randomised indices of every centroid.
3013          * Walk the free list from start to end, re-inserting each centroid's
3014          * mu/cnt. The tdgst_add() call may or may not consume the free centroid
3015          * we re-insert values from during each loop iteration, so we must latch
3016          * the index of the next free list centroid before the re-insertion
3017          * call. The previous loop above should have left the centroid pointer
3018          * pointing to the element at the head of the free list.
3019          */
3020         KASSERT((is32bit ?
3021             ARB_FREEIDX(ctd32tree) == ARB_SELFIDX(ctd32tree, ctd32) :
3022             ARB_FREEIDX(ctd64tree) == ARB_SELFIDX(ctd64tree, ctd64)),
3023             ("%s: t-digest ARB@%p free list bug", __func__,
3024             (is32bit ? (void *)ctd32tree : (void *)ctd64tree)));
3025         remctds = maxctds;
3026         while ((is32bit ? ctd32 != NULL : ctd64 != NULL)) {
3027                 tmperr = 0;
3028                 if (is32bit) {
3029                         s64q_t x;
3030
3031                         idx = ARB_NEXTFREEIDX(ctd32, ctdlnk);
3032                         /* Cloning a s32q_t into a s64q_t should never fail. */
3033                         tmperr = Q_QCLONEQ(&x, ctd32->mu);
3034                         tmperr = tmperr ? tmperr : stats_v1_vsd_tdgst_add(
3035                             vs_dtype, tdgst, x, ctd32->cnt, attempt);
3036                         ctd32 = ARB_NODE(ctd32tree, idx);
3037                         KASSERT(ctd32 == NULL || ARB_ISFREE(ctd32, ctdlnk),
3038                             ("%s: t-digest ARB@%p free list bug", __func__,
3039                             ctd32tree));
3040                 } else {
3041                         idx = ARB_NEXTFREEIDX(ctd64, ctdlnk);
3042                         tmperr = stats_v1_vsd_tdgst_add(vs_dtype, tdgst,
3043                             ctd64->mu, ctd64->cnt, attempt);
3044                         ctd64 = ARB_NODE(ctd64tree, idx);
3045                         KASSERT(ctd64 == NULL || ARB_ISFREE(ctd64, ctdlnk),
3046                             ("%s: t-digest ARB@%p free list bug", __func__,
3047                             ctd64tree));
3048                 }
3049                 /*
3050                  * This process should not produce errors, bugs notwithstanding.
3051                  * Just in case, latch any errors and attempt all re-insertions.
3052                  */
3053                 error = tmperr ? tmperr : error;
3054                 remctds--;
3055         }
3056
3057         KASSERT(remctds == 0, ("%s: t-digest ARB@%p free list bug", __func__,
3058             (is32bit ? (void *)ctd32tree : (void *)ctd64tree)));
3059
3060         return (error);
3061 }
3062
3063 static inline int
3064 stats_v1_vsd_tdgst_add(enum vsd_dtype vs_dtype, struct voistatdata_tdgst *tdgst,
3065     s64q_t x, uint64_t weight, int attempt)
3066 {
3067 #ifdef DIAGNOSTIC
3068         char qstr[Q_MAXSTRLEN(x, 10)];
3069 #endif
3070         struct ctdth32 *ctd32tree;
3071         struct ctdth64 *ctd64tree;
3072         void *closest, *cur, *lb, *ub;
3073         struct voistatdata_tdgstctd32 *ctd32;
3074         struct voistatdata_tdgstctd64 *ctd64;
3075         uint64_t cnt, smplcnt, sum, tmpsum;
3076         s64q_t k, minz, q, z;
3077         int error, is32bit, n;
3078
3079         error = 0;
3080         minz = Q_INI(&z, 0, 0, Q_NFBITS(x));
3081
3082         switch (vs_dtype) {
3083         case VSD_DTYPE_TDGSTCLUST32:
3084                 if ((UINT32_MAX - weight) < VSD(tdgstclust32, tdgst)->smplcnt)
3085                         error = EOVERFLOW;
3086                 smplcnt = VSD(tdgstclust32, tdgst)->smplcnt;
3087                 ctd32tree = &VSD(tdgstclust32, tdgst)->ctdtree;
3088                 is32bit = 1;
3089                 ctd64tree = NULL;
3090                 ctd64 = NULL;
3091                 break;
3092         case VSD_DTYPE_TDGSTCLUST64:
3093                 if ((UINT64_MAX - weight) < VSD(tdgstclust64, tdgst)->smplcnt)
3094                         error = EOVERFLOW;
3095                 smplcnt = VSD(tdgstclust64, tdgst)->smplcnt;
3096                 ctd64tree = &VSD(tdgstclust64, tdgst)->ctdtree;
3097                 is32bit = 0;
3098                 ctd32tree = NULL;
3099                 ctd32 = NULL;
3100                 break;
3101         default:
3102                 error = EINVAL;
3103                 break;
3104         }
3105
3106         if (error)
3107                 return (error);
3108
3109         /*
3110          * Inspired by Ted Dunning's AVLTreeDigest.java
3111          */
3112         do {
3113 #if defined(DIAGNOSTIC)
3114                 KASSERT(attempt < 5,
3115                     ("%s: Too many attempts", __func__));
3116 #endif
3117                 if (attempt >= 5)
3118                         return (EAGAIN);
3119
3120                 Q_SIFVAL(minz, Q_IFMAXVAL(minz));
3121                 closest = ub = NULL;
3122                 sum = tmpsum = 0;
3123
3124                 if (is32bit)
3125                         lb = cur = (void *)(ctd32 = ARB_MIN(ctdth32, ctd32tree));
3126                 else
3127                         lb = cur = (void *)(ctd64 = ARB_MIN(ctdth64, ctd64tree));
3128
3129                 if (lb == NULL) /* Empty tree. */
3130                         lb = (is32bit ? (void *)ARB_ROOT(ctd32tree) :
3131                             (void *)ARB_ROOT(ctd64tree));
3132
3133                 /*
3134                  * Find the set of centroids with minimum distance to x and
3135                  * compute the sum of counts for all centroids with mean less
3136                  * than the first centroid in the set.
3137                  */
3138                 for (; cur != NULL;
3139                     cur = (is32bit ?
3140                     (void *)(ctd32 = ARB_NEXT(ctdth32, ctd32tree, ctd32)) :
3141                     (void *)(ctd64 = ARB_NEXT(ctdth64, ctd64tree, ctd64)))) {
3142                         if (is32bit) {
3143                                 cnt = ctd32->cnt;
3144                                 KASSERT(Q_PRECEQ(ctd32->mu, x),
3145                                     ("%s: Q_RELPREC(mu,x)=%d", __func__,
3146                                     Q_RELPREC(ctd32->mu, x)));
3147                                 /* Ok to assign as both have same precision. */
3148                                 z = ctd32->mu;
3149                         } else {
3150                                 cnt = ctd64->cnt;
3151                                 KASSERT(Q_PRECEQ(ctd64->mu, x),
3152                                     ("%s: Q_RELPREC(mu,x)=%d", __func__,
3153                                     Q_RELPREC(ctd64->mu, x)));
3154                                 /* Ok to assign as both have same precision. */
3155                                 z = ctd64->mu;
3156                         }
3157
3158                         error = Q_QSUBQ(&z, x);
3159 #if defined(DIAGNOSTIC)
3160                         KASSERT(!error, ("%s: unexpected error %d", __func__,
3161                             error));
3162 #endif
3163                         if (error)
3164                                 return (error);
3165
3166                         z = Q_QABS(z);
3167                         if (Q_QLTQ(z, minz)) {
3168                                 minz = z;
3169                                 lb = cur;
3170                                 sum = tmpsum;
3171                                 tmpsum += cnt;
3172                         } else if (Q_QGTQ(z, minz)) {
3173                                 ub = cur;
3174                                 break;
3175                         }
3176                 }
3177
3178                 cur = (is32bit ?
3179                     (void *)(ctd32 = (struct voistatdata_tdgstctd32 *)lb) :
3180                     (void *)(ctd64 = (struct voistatdata_tdgstctd64 *)lb));
3181
3182                 for (n = 0; cur != ub; cur = (is32bit ?
3183                     (void *)(ctd32 = ARB_NEXT(ctdth32, ctd32tree, ctd32)) :
3184                     (void *)(ctd64 = ARB_NEXT(ctdth64, ctd64tree, ctd64)))) {
3185                         if (is32bit)
3186                                 cnt = ctd32->cnt;
3187                         else
3188                                 cnt = ctd64->cnt;
3189
3190                         q = Q_CTRLINI(16);
3191                         if (smplcnt == 1)
3192                                 error = Q_QFRACI(&q, 1, 2);
3193                         else
3194                                 /* [ sum + ((cnt - 1) / 2) ] / (smplcnt - 1) */
3195                                 error = Q_QFRACI(&q, (sum << 1) + cnt - 1,
3196                                     (smplcnt - 1) << 1);
3197                         k = q;
3198                         /* k = q x 4 x samplcnt x attempt */
3199                         error |= Q_QMULI(&k, 4 * smplcnt * attempt);
3200                         /* k = k x (1 - q) */
3201                         error |= Q_QSUBI(&q, 1);
3202                         q = Q_QABS(q);
3203                         error |= Q_QMULQ(&k, q);
3204 #if defined(DIAGNOSTIC)
3205 #if !defined(_KERNEL)
3206                         double q_dbl, k_dbl, q2d, k2d;
3207                         q2d = Q_Q2D(q);
3208                         k2d = Q_Q2D(k);
3209                         q_dbl = smplcnt == 1 ? 0.5 :
3210                             (sum + ((cnt - 1)  / 2.0)) / (double)(smplcnt - 1);
3211                         k_dbl = 4 * smplcnt * q_dbl * (1.0 - q_dbl) * attempt;
3212                         /*
3213                          * If the difference between q and q_dbl is greater than
3214                          * the fractional precision of q, something is off.
3215                          * NB: q is holding the value of 1 - q
3216                          */
3217                         q_dbl = 1.0 - q_dbl;
3218                         KASSERT((q_dbl > q2d ? q_dbl - q2d : q2d - q_dbl) <
3219                             (1.05 * ((double)1 / (double)(1ULL << Q_NFBITS(q)))),
3220                             ("Q-type q bad precision"));
3221                         KASSERT((k_dbl > k2d ? k_dbl - k2d : k2d - k_dbl) <
3222                             1.0 + (0.01 * smplcnt),
3223                             ("Q-type k bad precision"));
3224 #endif /* !_KERNEL */
3225                         KASSERT(!error, ("%s: unexpected error %d", __func__,
3226                             error));
3227 #endif /* DIAGNOSTIC */
3228                         if (error)
3229                                 return (error);
3230                         if ((is32bit && ((ctd32->cnt + weight) <=
3231                             (uint64_t)Q_GIVAL(k))) ||
3232                             (!is32bit && ((ctd64->cnt + weight) <=
3233                             (uint64_t)Q_GIVAL(k)))) {
3234                                 n++;
3235                                 /* random() produces 31 bits. */
3236                                 if (random() < (INT32_MAX / n))
3237                                         closest = cur;
3238                         }
3239                         sum += cnt;
3240                 }
3241         } while (closest == NULL &&
3242             (is32bit ? ARB_FULL(ctd32tree) : ARB_FULL(ctd64tree)) &&
3243             (error = stats_v1_vsd_tdgst_compress(vs_dtype, tdgst,
3244             attempt++)) == 0);
3245
3246         if (error)
3247                 return (error);
3248
3249         if (closest != NULL) {
3250                 /* Merge with an existing centroid. */
3251                 if (is32bit) {
3252                         ctd32 = (struct voistatdata_tdgstctd32 *)closest;
3253                         error = Q_QSUBQ(&x, ctd32->mu);
3254                         /*
3255                          * The following calculation "x / (cnt + weight)"
3256                          * computes the amount by which to adjust the centroid's
3257                          * mu value in order to merge in the VOI sample.
3258                          *
3259                          * It can underflow (Q_QDIVI() returns ERANGE) when the
3260                          * user centroids' fractional precision (which is
3261                          * inherited by 'x') is too low to represent the result.
3262                          *
3263                          * A sophisticated approach to dealing with this issue
3264                          * would minimise accumulation of error by tracking
3265                          * underflow per centroid and making an adjustment when
3266                          * a LSB's worth of underflow has accumulated.
3267                          *
3268                          * A simpler approach is to let the result underflow
3269                          * i.e. merge the VOI sample into the centroid without
3270                          * adjusting the centroid's mu, and rely on the user to
3271                          * specify their t-digest with sufficient centroid
3272                          * fractional precision such that the accumulation of
3273                          * error from multiple underflows is of no material
3274                          * consequence to the centroid's final value of mu.
3275                          *
3276                          * For the moment, the latter approach is employed by
3277                          * simply ignoring ERANGE here.
3278                          *
3279                          * XXXLAS: Per-centroid underflow tracking is likely too
3280                          * onerous, but it probably makes sense to accumulate a
3281                          * single underflow error variable across all centroids
3282                          * and report it as part of the digest to provide
3283                          * additional visibility into the digest's fidelity.
3284                          */
3285                         error = error ? error :
3286                             Q_QDIVI(&x, ctd32->cnt + weight);
3287                         if ((error && error != ERANGE)
3288                             || (error = Q_QADDQ(&ctd32->mu, x))) {
3289 #ifdef DIAGNOSTIC
3290                                 KASSERT(!error, ("%s: unexpected error %d",
3291                                     __func__, error));
3292 #endif
3293                                 return (error);
3294                         }
3295                         ctd32->cnt += weight;
3296                         error = ARB_REINSERT(ctdth32, ctd32tree, ctd32) ==
3297                             NULL ? 0 : EALREADY;
3298 #ifdef DIAGNOSTIC
3299                         RB_REINSERT(rbctdth32,
3300                             &VSD(tdgstclust32, tdgst)->rbctdtree, ctd32);
3301 #endif
3302                 } else {
3303                         ctd64 = (struct voistatdata_tdgstctd64 *)closest;
3304                         error = Q_QSUBQ(&x, ctd64->mu);
3305                         error = error ? error :
3306                             Q_QDIVI(&x, ctd64->cnt + weight);
3307                         /* Refer to is32bit ERANGE discussion above. */
3308                         if ((error && error != ERANGE)
3309                             || (error = Q_QADDQ(&ctd64->mu, x))) {
3310                                 KASSERT(!error, ("%s: unexpected error %d",
3311                                     __func__, error));
3312                                 return (error);
3313                         }
3314                         ctd64->cnt += weight;
3315                         error = ARB_REINSERT(ctdth64, ctd64tree, ctd64) ==
3316                             NULL ? 0 : EALREADY;
3317 #ifdef DIAGNOSTIC
3318                         RB_REINSERT(rbctdth64,
3319                             &VSD(tdgstclust64, tdgst)->rbctdtree, ctd64);
3320 #endif
3321                 }
3322         } else {
3323                 /*
3324                  * Add a new centroid. If digest compression is working
3325                  * correctly, there should always be at least one free.
3326                  */
3327                 if (is32bit) {
3328                         ctd32 = ARB_GETFREE(ctd32tree, ctdlnk);
3329 #ifdef DIAGNOSTIC
3330                         KASSERT(ctd32 != NULL,
3331                             ("%s: t-digest@%p has no free centroids",
3332                             __func__, tdgst));
3333 #endif
3334                         if (ctd32 == NULL)
3335                                 return (EAGAIN);
3336                         if ((error = Q_QCPYVALQ(&ctd32->mu, x)))
3337                                 return (error);
3338                         ctd32->cnt = weight;
3339                         error = ARB_INSERT(ctdth32, ctd32tree, ctd32) == NULL ?
3340                             0 : EALREADY;
3341 #ifdef DIAGNOSTIC
3342                         RB_INSERT(rbctdth32,
3343                             &VSD(tdgstclust32, tdgst)->rbctdtree, ctd32);
3344 #endif
3345                 } else {
3346                         ctd64 = ARB_GETFREE(ctd64tree, ctdlnk);
3347 #ifdef DIAGNOSTIC
3348                         KASSERT(ctd64 != NULL,
3349                             ("%s: t-digest@%p has no free centroids",
3350                             __func__, tdgst));
3351 #endif
3352                         if (ctd64 == NULL) /* Should not happen. */
3353                                 return (EAGAIN);
3354                         /* Direct assignment ok as both have same type/prec. */
3355                         ctd64->mu = x;
3356                         ctd64->cnt = weight;
3357                         error = ARB_INSERT(ctdth64, ctd64tree, ctd64) == NULL ?
3358                             0 : EALREADY;
3359 #ifdef DIAGNOSTIC
3360                         RB_INSERT(rbctdth64, &VSD(tdgstclust64,
3361                             tdgst)->rbctdtree, ctd64);
3362 #endif
3363                 }
3364         }
3365
3366         if (is32bit)
3367                 VSD(tdgstclust32, tdgst)->smplcnt += weight;
3368         else {
3369                 VSD(tdgstclust64, tdgst)->smplcnt += weight;
3370
3371 #ifdef DIAGNOSTIC
3372                 struct rbctdth64 *rbctdtree =
3373                     &VSD(tdgstclust64, tdgst)->rbctdtree;
3374                 struct voistatdata_tdgstctd64 *rbctd64;
3375                 int i = 0;
3376                 ARB_FOREACH(ctd64, ctdth64, ctd64tree) {
3377                         rbctd64 = (i == 0 ? RB_MIN(rbctdth64, rbctdtree) :
3378                             RB_NEXT(rbctdth64, rbctdtree, rbctd64));
3379
3380                         if (i >= ARB_CURNODES(ctd64tree)
3381                             || ctd64 != rbctd64
3382                             || ARB_MIN(ctdth64, ctd64tree) !=
3383                                RB_MIN(rbctdth64, rbctdtree)
3384                             || ARB_MAX(ctdth64, ctd64tree) !=
3385                                RB_MAX(rbctdth64, rbctdtree)
3386                             || ARB_LEFTIDX(ctd64, ctdlnk) !=
3387                                ARB_SELFIDX(ctd64tree, RB_LEFT(rbctd64, rblnk))
3388                             || ARB_RIGHTIDX(ctd64, ctdlnk) !=
3389                                ARB_SELFIDX(ctd64tree, RB_RIGHT(rbctd64, rblnk))
3390                             || ARB_PARENTIDX(ctd64, ctdlnk) !=
3391                                ARB_SELFIDX(ctd64tree,
3392                                RB_PARENT(rbctd64, rblnk))) {
3393                                 Q_TOSTR(ctd64->mu, -1, 10, qstr, sizeof(qstr));
3394                                 printf("ARB ctd=%3d p=%3d l=%3d r=%3d c=%2d "
3395                                     "mu=%s\n",
3396                                     (int)ARB_SELFIDX(ctd64tree, ctd64),
3397                                     ARB_PARENTIDX(ctd64, ctdlnk),
3398                                     ARB_LEFTIDX(ctd64, ctdlnk),
3399                                     ARB_RIGHTIDX(ctd64, ctdlnk),
3400                                     ARB_COLOR(ctd64, ctdlnk),
3401                                     qstr);
3402
3403                                 Q_TOSTR(rbctd64->mu, -1, 10, qstr,
3404                                     sizeof(qstr));
3405                                 struct voistatdata_tdgstctd64 *parent;
3406                                 parent = RB_PARENT(rbctd64, rblnk);
3407                                 int rb_color =
3408                                         parent == NULL ? 0 :
3409                                         RB_LEFT(parent, rblnk) == rbctd64 ?
3410                                         (_RB_BITSUP(parent, rblnk) & _RB_L) != 0 :
3411                                         (_RB_BITSUP(parent, rblnk) & _RB_R) != 0;
3412                                 printf(" RB ctd=%3d p=%3d l=%3d r=%3d c=%2d "
3413                                     "mu=%s\n",
3414                                     (int)ARB_SELFIDX(ctd64tree, rbctd64),
3415                                     (int)ARB_SELFIDX(ctd64tree,
3416                                       RB_PARENT(rbctd64, rblnk)),
3417                                     (int)ARB_SELFIDX(ctd64tree,
3418                                       RB_LEFT(rbctd64, rblnk)),
3419                                     (int)ARB_SELFIDX(ctd64tree,
3420                                       RB_RIGHT(rbctd64, rblnk)),
3421                                     rb_color,
3422                                     qstr);
3423
3424                                 panic("RB@%p and ARB@%p trees differ\n",
3425                                     rbctdtree, ctd64tree);
3426                         }
3427                         i++;
3428                 }
3429 #endif /* DIAGNOSTIC */
3430         }
3431
3432         return (error);
3433 }
3434
3435 static inline int
3436 stats_v1_voi_update_tdgst(enum vsd_dtype voi_dtype, struct voistatdata *voival,
3437     struct voistat *vs, struct voistatdata_tdgst *tdgst)
3438 {
3439         s64q_t x;
3440         int error;
3441
3442         error = 0;
3443
3444         switch (vs->dtype) {
3445         case VSD_DTYPE_TDGSTCLUST32:
3446                 /* Use same precision as the user's centroids. */
3447                 Q_INI(&x, 0, 0, Q_NFBITS(
3448                     ARB_CNODE(&VSD(tdgstclust32, tdgst)->ctdtree, 0)->mu));
3449                 break;
3450         case VSD_DTYPE_TDGSTCLUST64:
3451                 /* Use same precision as the user's centroids. */
3452                 Q_INI(&x, 0, 0, Q_NFBITS(
3453                     ARB_CNODE(&VSD(tdgstclust64, tdgst)->ctdtree, 0)->mu));
3454                 break;
3455         default:
3456                 KASSERT(vs->dtype == VSD_DTYPE_TDGSTCLUST32 ||
3457                     vs->dtype == VSD_DTYPE_TDGSTCLUST64,
3458                     ("%s: vs->dtype(%d) != VSD_DTYPE_TDGSTCLUST<32|64>",
3459                     __func__, vs->dtype));
3460                 return (EINVAL);
3461         }
3462
3463         /*
3464          * XXXLAS: Should have both a signed and unsigned 'x' variable to avoid
3465          * returning EOVERFLOW if the voival would have fit in a u64q_t.
3466          */
3467         switch (voi_dtype) {
3468         case VSD_DTYPE_INT_S32:
3469                 error = Q_QCPYVALI(&x, voival->int32.s32);
3470                 break;
3471         case VSD_DTYPE_INT_U32:
3472                 error = Q_QCPYVALI(&x, voival->int32.u32);
3473                 break;
3474         case VSD_DTYPE_INT_S64:
3475                 error = Q_QCPYVALI(&x, voival->int64.s64);
3476                 break;
3477         case VSD_DTYPE_INT_U64:
3478                 error = Q_QCPYVALI(&x, voival->int64.u64);
3479                 break;
3480         case VSD_DTYPE_INT_SLONG:
3481                 error = Q_QCPYVALI(&x, voival->intlong.slong);
3482                 break;
3483         case VSD_DTYPE_INT_ULONG:
3484                 error = Q_QCPYVALI(&x, voival->intlong.ulong);
3485                 break;
3486         case VSD_DTYPE_Q_S32:
3487                 error = Q_QCPYVALQ(&x, voival->q32.sq32);
3488                 break;
3489         case VSD_DTYPE_Q_U32:
3490                 error = Q_QCPYVALQ(&x, voival->q32.uq32);
3491                 break;
3492         case VSD_DTYPE_Q_S64:
3493                 error = Q_QCPYVALQ(&x, voival->q64.sq64);
3494                 break;
3495         case VSD_DTYPE_Q_U64:
3496                 error = Q_QCPYVALQ(&x, voival->q64.uq64);
3497                 break;
3498         default:
3499                 error = EINVAL;
3500                 break;
3501         }
3502
3503         if (error ||
3504             (error = stats_v1_vsd_tdgst_add(vs->dtype, tdgst, x, 1, 1)))
3505                 return (error);
3506
3507         vs->flags |= VS_VSDVALID;
3508         return (0);
3509 }
3510
3511 int
3512 stats_v1_voi_update(struct statsblobv1 *sb, int32_t voi_id,
3513     enum vsd_dtype voi_dtype, struct voistatdata *voival, uint32_t flags)
3514 {
3515         struct voi *v;
3516         struct voistat *vs;
3517         void *statevsd, *vsd;
3518         int error, i, tmperr;
3519
3520         error = 0;
3521
3522         if (sb == NULL || sb->abi != STATS_ABI_V1 || voi_id >= NVOIS(sb) ||
3523             voi_dtype == 0 || voi_dtype >= VSD_NUM_DTYPES || voival == NULL)
3524                 return (EINVAL);
3525         v = &sb->vois[voi_id];
3526         if (voi_dtype != v->dtype || v->id < 0 ||
3527             ((flags & SB_VOI_RELUPDATE) && !(v->flags & VOI_REQSTATE)))
3528                 return (EINVAL);
3529
3530         vs = BLOB_OFFSET(sb, v->stats_off);
3531         if (v->flags & VOI_REQSTATE)
3532                 statevsd = BLOB_OFFSET(sb, vs->data_off);
3533         else
3534                 statevsd = NULL;
3535
3536         if (flags & SB_VOI_RELUPDATE) {
3537                 switch (voi_dtype) {
3538                 case VSD_DTYPE_INT_S32:
3539                         voival->int32.s32 +=
3540                             VSD(voistate, statevsd)->prev.int32.s32;
3541                         break;
3542                 case VSD_DTYPE_INT_U32:
3543                         voival->int32.u32 +=
3544                             VSD(voistate, statevsd)->prev.int32.u32;
3545                         break;
3546                 case VSD_DTYPE_INT_S64:
3547                         voival->int64.s64 +=
3548                             VSD(voistate, statevsd)->prev.int64.s64;
3549                         break;
3550                 case VSD_DTYPE_INT_U64:
3551                         voival->int64.u64 +=
3552                             VSD(voistate, statevsd)->prev.int64.u64;
3553                         break;
3554                 case VSD_DTYPE_INT_SLONG:
3555                         voival->intlong.slong +=
3556                             VSD(voistate, statevsd)->prev.intlong.slong;
3557                         break;
3558                 case VSD_DTYPE_INT_ULONG:
3559                         voival->intlong.ulong +=
3560                             VSD(voistate, statevsd)->prev.intlong.ulong;
3561                         break;
3562                 case VSD_DTYPE_Q_S32:
3563                         error = Q_QADDQ(&voival->q32.sq32,
3564                             VSD(voistate, statevsd)->prev.q32.sq32);
3565                         break;
3566                 case VSD_DTYPE_Q_U32:
3567                         error = Q_QADDQ(&voival->q32.uq32,
3568                             VSD(voistate, statevsd)->prev.q32.uq32);
3569                         break;
3570                 case VSD_DTYPE_Q_S64:
3571                         error = Q_QADDQ(&voival->q64.sq64,
3572                             VSD(voistate, statevsd)->prev.q64.sq64);
3573                         break;
3574                 case VSD_DTYPE_Q_U64:
3575                         error = Q_QADDQ(&voival->q64.uq64,
3576                             VSD(voistate, statevsd)->prev.q64.uq64);
3577                         break;
3578                 default:
3579                         KASSERT(0, ("Unknown VOI data type %d", voi_dtype));
3580                         break;
3581                 }
3582         }
3583
3584         if (error)
3585                 return (error);
3586
3587         for (i = v->voistatmaxid; i > 0; i--) {
3588                 vs = &((struct voistat *)BLOB_OFFSET(sb, v->stats_off))[i];
3589                 if (vs->stype < 0)
3590                         continue;
3591
3592                 vsd = BLOB_OFFSET(sb, vs->data_off);
3593
3594                 switch (vs->stype) {
3595                 case VS_STYPE_MAX:
3596                         tmperr = stats_v1_voi_update_max(voi_dtype, voival,
3597                             vs, vsd);
3598                         break;
3599                 case VS_STYPE_MIN:
3600                         tmperr = stats_v1_voi_update_min(voi_dtype, voival,
3601                             vs, vsd);
3602                         break;
3603                 case VS_STYPE_SUM:
3604                         tmperr = stats_v1_voi_update_sum(voi_dtype, voival,
3605                             vs, vsd);
3606                         break;
3607                 case VS_STYPE_HIST:
3608                         tmperr = stats_v1_voi_update_hist(voi_dtype, voival,
3609                             vs, vsd);
3610                         break;
3611                 case VS_STYPE_TDGST:
3612                         tmperr = stats_v1_voi_update_tdgst(voi_dtype, voival,
3613                             vs, vsd);
3614                         break;
3615                 default:
3616                         KASSERT(0, ("Unknown VOI stat type %d", vs->stype));
3617                         break;
3618                 }
3619
3620                 if (tmperr) {
3621                         error = tmperr;
3622                         VS_INCERRS(vs);
3623                 }
3624         }
3625
3626         if (statevsd) {
3627                 switch (voi_dtype) {
3628                 case VSD_DTYPE_INT_S32:
3629                         VSD(voistate, statevsd)->prev.int32.s32 =
3630                             voival->int32.s32;
3631                         break;
3632                 case VSD_DTYPE_INT_U32:
3633                         VSD(voistate, statevsd)->prev.int32.u32 =
3634                             voival->int32.u32;
3635                         break;
3636                 case VSD_DTYPE_INT_S64:
3637                         VSD(voistate, statevsd)->prev.int64.s64 =
3638                             voival->int64.s64;
3639                         break;
3640                 case VSD_DTYPE_INT_U64:
3641                         VSD(voistate, statevsd)->prev.int64.u64 =
3642                             voival->int64.u64;
3643                         break;
3644                 case VSD_DTYPE_INT_SLONG:
3645                         VSD(voistate, statevsd)->prev.intlong.slong =
3646                             voival->intlong.slong;
3647                         break;
3648                 case VSD_DTYPE_INT_ULONG:
3649                         VSD(voistate, statevsd)->prev.intlong.ulong =
3650                             voival->intlong.ulong;
3651                         break;
3652                 case VSD_DTYPE_Q_S32:
3653                         error = Q_QCPYVALQ(
3654                             &VSD(voistate, statevsd)->prev.q32.sq32,
3655                             voival->q32.sq32);
3656                         break;
3657                 case VSD_DTYPE_Q_U32:
3658                         error = Q_QCPYVALQ(
3659                             &VSD(voistate, statevsd)->prev.q32.uq32,
3660                             voival->q32.uq32);
3661                         break;
3662                 case VSD_DTYPE_Q_S64:
3663                         error = Q_QCPYVALQ(
3664                             &VSD(voistate, statevsd)->prev.q64.sq64,
3665                             voival->q64.sq64);
3666                         break;
3667                 case VSD_DTYPE_Q_U64:
3668                         error = Q_QCPYVALQ(
3669                             &VSD(voistate, statevsd)->prev.q64.uq64,
3670                             voival->q64.uq64);
3671                         break;
3672                 default:
3673                         KASSERT(0, ("Unknown VOI data type %d", voi_dtype));
3674                         break;
3675                 }
3676         }
3677
3678         return (error);
3679 }
3680
3681 #ifdef _KERNEL
3682
3683 static void
3684 stats_init(void *arg)
3685 {
3686
3687 }
3688 SYSINIT(stats, SI_SUB_KDTRACE, SI_ORDER_FIRST, stats_init, NULL);
3689
3690 /*
3691  * Sysctl handler to display the list of available stats templates.
3692  */
3693 static int
3694 stats_tpl_list_available(SYSCTL_HANDLER_ARGS)
3695 {
3696         struct sbuf *s;
3697         int err, i;
3698
3699         err = 0;
3700
3701         /* We can tolerate ntpl being stale, so do not take the lock. */
3702         s = sbuf_new(NULL, NULL, /* +1 per tpl for , */
3703             ntpl * (STATS_TPL_MAX_STR_SPEC_LEN + 1), SBUF_FIXEDLEN);
3704         if (s == NULL)
3705                 return (ENOMEM);
3706
3707         TPL_LIST_RLOCK();
3708         for (i = 0; i < ntpl; i++) {
3709                 err = sbuf_printf(s, "%s\"%s\":%u", i ? "," : "",
3710                     tpllist[i]->mb->tplname, tpllist[i]->mb->tplhash);
3711                 if (err) {
3712                         /* Sbuf overflow condition. */
3713                         err = EOVERFLOW;
3714                         break;
3715                 }
3716         }
3717         TPL_LIST_RUNLOCK();
3718
3719         if (!err) {
3720                 sbuf_finish(s);
3721                 err = sysctl_handle_string(oidp, sbuf_data(s), 0, req);
3722         }
3723
3724         sbuf_delete(s);
3725         return (err);
3726 }
3727
3728 /*
3729  * Called by subsystem-specific sysctls to report and/or parse the list of
3730  * templates being sampled and their sampling rates. A stats_tpl_sr_cb_t
3731  * conformant function pointer must be passed in as arg1, which is used to
3732  * interact with the subsystem's stats template sample rates list. If arg2 > 0,
3733  * a zero-initialised allocation of arg2-sized contextual memory is
3734  * heap-allocated and passed in to all subsystem callbacks made during the
3735  * operation of stats_tpl_sample_rates().
3736  *
3737  * XXXLAS: Assumes templates are never removed, which is currently true but may
3738  * need to be reworked in future if dynamic template management becomes a
3739  * requirement e.g. to support kernel module based templates.
3740  */
3741 int
3742 stats_tpl_sample_rates(SYSCTL_HANDLER_ARGS)
3743 {
3744         char kvpair_fmt[16], tplspec_fmt[16];
3745         char tpl_spec[STATS_TPL_MAX_STR_SPEC_LEN];
3746         char tpl_name[TPL_MAX_NAME_LEN + 2]; /* +2 for "" */
3747         stats_tpl_sr_cb_t subsys_cb;
3748         void *subsys_ctx;
3749         char *buf, *new_rates_usr_str, *tpl_name_p;
3750         struct stats_tpl_sample_rate *rates;
3751         struct sbuf *s, _s;
3752         uint32_t cum_pct, pct, tpl_hash;
3753         int err, i, off, len, newlen, nrates;
3754
3755         buf = NULL;
3756         rates = NULL;
3757         err = nrates = 0;
3758         subsys_cb = (stats_tpl_sr_cb_t)arg1;
3759         KASSERT(subsys_cb != NULL, ("%s: subsys_cb == arg1 == NULL", __func__));
3760         if (arg2 > 0)
3761                 subsys_ctx = malloc(arg2, M_TEMP, M_WAITOK | M_ZERO);
3762         else
3763                 subsys_ctx = NULL;
3764
3765         /* Grab current count of subsystem rates. */
3766         err = subsys_cb(TPL_SR_UNLOCKED_GET, NULL, &nrates, subsys_ctx);
3767         if (err)
3768                 goto done;
3769
3770         /* +1 to ensure we can append '\0' post copyin, +5 per rate for =nnn, */
3771         len = max(req->newlen + 1, nrates * (STATS_TPL_MAX_STR_SPEC_LEN + 5));
3772
3773         if (req->oldptr != NULL || req->newptr != NULL)
3774                 buf = malloc(len, M_TEMP, M_WAITOK);
3775
3776         if (req->oldptr != NULL) {
3777                 if (nrates == 0) {
3778                         /* No rates, so return an empty string via oldptr. */
3779                         err = SYSCTL_OUT(req, "", 1);
3780                         if (err)
3781                                 goto done;
3782                         goto process_new;
3783                 }
3784
3785                 s = sbuf_new(&_s, buf, len, SBUF_FIXEDLEN | SBUF_INCLUDENUL);
3786
3787                 /* Grab locked count of, and ptr to, subsystem rates. */
3788                 err = subsys_cb(TPL_SR_RLOCKED_GET, &rates, &nrates,
3789                     subsys_ctx);
3790                 if (err)
3791                         goto done;
3792                 TPL_LIST_RLOCK();
3793                 for (i = 0; i < nrates && !err; i++) {
3794                         err = sbuf_printf(s, "%s\"%s\":%u=%u", i ? "," : "",
3795                             tpllist[rates[i].tpl_slot_id]->mb->tplname,
3796                             tpllist[rates[i].tpl_slot_id]->mb->tplhash,
3797                             rates[i].tpl_sample_pct);
3798                 }
3799                 TPL_LIST_RUNLOCK();
3800                 /* Tell subsystem that we're done with its rates list. */
3801                 err = subsys_cb(TPL_SR_RUNLOCK, &rates, &nrates, subsys_ctx);
3802                 if (err)
3803                         goto done;
3804
3805                 err = sbuf_finish(s);
3806                 if (err)
3807                         goto done; /* We lost a race for buf to be too small. */
3808
3809                 /* Return the rendered string data via oldptr. */
3810                 err = SYSCTL_OUT(req, sbuf_data(s), sbuf_len(s));
3811         } else {
3812                 /* Return the upper bound size for buffer sizing requests. */
3813                 err = SYSCTL_OUT(req, NULL, len);
3814         }
3815
3816 process_new:
3817         if (err || req->newptr == NULL)
3818                 goto done;
3819
3820         newlen = req->newlen - req->newidx;
3821         err = SYSCTL_IN(req, buf, newlen);
3822         if (err)
3823                 goto done;
3824
3825         /*
3826          * Initialise format strings at run time.
3827          *
3828          * Write the max template spec string length into the
3829          * template_spec=percent key-value pair parsing format string as:
3830          *     " %<width>[^=]=%u %n"
3831          *
3832          * Write the max template name string length into the tplname:tplhash
3833          * parsing format string as:
3834          *     "%<width>[^:]:%u"
3835          *
3836          * Subtract 1 for \0 appended by sscanf().
3837          */
3838         sprintf(kvpair_fmt, " %%%zu[^=]=%%u %%n", sizeof(tpl_spec) - 1);
3839         sprintf(tplspec_fmt, "%%%zu[^:]:%%u", sizeof(tpl_name) - 1);
3840
3841         /*
3842          * Parse each CSV key-value pair specifying a template and its sample
3843          * percentage. Whitespace either side of a key-value pair is ignored.
3844          * Templates can be specified by name, hash, or name and hash per the
3845          * following formats (chars in [] are optional):
3846          *    ["]<tplname>["]=<percent>
3847          *    :hash=pct
3848          *    ["]<tplname>["]:hash=<percent>
3849          */
3850         cum_pct = nrates = 0;
3851         rates = NULL;
3852         buf[newlen] = '\0'; /* buf is at least newlen+1 in size. */
3853         new_rates_usr_str = buf;
3854         while (isspace(*new_rates_usr_str))
3855                 new_rates_usr_str++; /* Skip leading whitespace. */
3856         while (*new_rates_usr_str != '\0') {
3857                 tpl_name_p = tpl_name;
3858                 tpl_name[0] = '\0';
3859                 tpl_hash = 0;
3860                 off = 0;
3861
3862                 /*
3863                  * Parse key-value pair which must perform 2 conversions, then
3864                  * parse the template spec to extract either name, hash, or name
3865                  * and hash depending on the three possible spec formats. The
3866                  * tplspec_fmt format specifier parses name or name and hash
3867                  * template specs, while the ":%u" format specifier parses
3868                  * hash-only template specs. If parsing is successfull, ensure
3869                  * the cumulative sampling percentage does not exceed 100.
3870                  */
3871                 err = EINVAL;
3872                 if (2 != sscanf(new_rates_usr_str, kvpair_fmt, tpl_spec, &pct,
3873                     &off))
3874                         break;
3875                 if ((1 > sscanf(tpl_spec, tplspec_fmt, tpl_name, &tpl_hash)) &&
3876                     (1 != sscanf(tpl_spec, ":%u", &tpl_hash)))
3877                         break;
3878                 if ((cum_pct += pct) > 100)
3879                         break;
3880                 err = 0;
3881
3882                 /* Strip surrounding "" from template name if present. */
3883                 len = strlen(tpl_name);
3884                 if (len > 0) {
3885                         if (tpl_name[len - 1] == '"')
3886                                 tpl_name[--len] = '\0';
3887                         if (tpl_name[0] == '"') {
3888                                 tpl_name_p++;
3889                                 len--;
3890                         }
3891                 }
3892
3893                 rates = stats_realloc(rates, 0, /* oldsz is unused in kernel. */
3894                     (nrates + 1) * sizeof(*rates), M_WAITOK);
3895                 rates[nrates].tpl_slot_id =
3896                     stats_tpl_fetch_allocid(len ? tpl_name_p : NULL, tpl_hash);
3897                 if (rates[nrates].tpl_slot_id < 0) {
3898                         err = -rates[nrates].tpl_slot_id;
3899                         break;
3900                 }
3901                 rates[nrates].tpl_sample_pct = pct;
3902                 nrates++;
3903                 new_rates_usr_str += off;
3904                 if (*new_rates_usr_str != ',')
3905                         break; /* End-of-input or malformed. */
3906                 new_rates_usr_str++; /* Move past comma to next pair. */
3907         }
3908
3909         if (!err) {
3910                 if ((new_rates_usr_str - buf) < newlen) {
3911                         /* Entire input has not been consumed. */
3912                         err = EINVAL;
3913                 } else {
3914                         /*
3915                          * Give subsystem the new rates. They'll return the
3916                          * appropriate rates pointer for us to garbage collect.
3917                          */
3918                         err = subsys_cb(TPL_SR_PUT, &rates, &nrates,
3919                             subsys_ctx);
3920                 }
3921         }
3922         stats_free(rates);
3923
3924 done:
3925         free(buf, M_TEMP);
3926         free(subsys_ctx, M_TEMP);
3927         return (err);
3928 }
3929
3930 SYSCTL_NODE(_kern, OID_AUTO, stats, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
3931     "stats(9) MIB");
3932
3933 SYSCTL_PROC(_kern_stats, OID_AUTO, templates,
3934     CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
3935     stats_tpl_list_available, "A",
3936     "list the name/hash of all available stats(9) templates");
3937
3938 #else /* ! _KERNEL */
3939
3940 static void __attribute__ ((constructor))
3941 stats_constructor(void)
3942 {
3943
3944         pthread_rwlock_init(&tpllistlock, NULL);
3945 }
3946
3947 static void __attribute__ ((destructor))
3948 stats_destructor(void)
3949 {
3950
3951         pthread_rwlock_destroy(&tpllistlock);
3952 }
3953
3954 #endif /* _KERNEL */