2 * SPDX-License-Identifier: BSD-3-Clause
4 * Copyright (c) 1998 Matthew Dillon. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting
32 * This module implements a general bitmap allocator/deallocator. The
33 * allocator eats around 2 bits per 'block'. The module does not
34 * try to interpret the meaning of a 'block' other than to return
35 * SWAPBLK_NONE on an allocation failure.
37 * A radix tree controls access to pieces of the bitmap, and includes
38 * auxiliary information at each interior node about the availabilty of
39 * contiguous free blocks in the subtree rooted at that node. A radix
40 * constant defines the size of the bitmaps contained in a leaf node
41 * and the number of descendents of each of the meta (interior) nodes.
42 * Each subtree is associated with a range of blocks. The root of any
43 * subtree stores a hint field that defines an upper bound on the size
44 * of the largest allocation that can begin in the associated block
45 * range. A hint is an upper bound on a potential allocation, but not
46 * necessarily a tight upper bound.
48 * The bitmap field in each node directs the search for available blocks.
49 * For a leaf node, a bit is set if the corresponding block is free. For a
50 * meta node, a bit is set if the corresponding subtree contains a free
51 * block somewhere within it. The search at a meta node considers only
52 * children of that node that represent a range that includes a free block.
54 * The hinting greatly increases code efficiency for allocations while
55 * the general radix structure optimizes both allocations and frees. The
56 * radix tree should be able to operate well no matter how much
57 * fragmentation there is and no matter how large a bitmap is used.
59 * The blist code wires all necessary memory at creation time. Neither
60 * allocations nor frees require interaction with the memory subsystem.
61 * The non-blocking nature of allocations and frees is required by swap
62 * code (vm/swap_pager.c).
64 * LAYOUT: The radix tree is laid out recursively using a linear array.
65 * Each meta node is immediately followed (laid out sequentially in
66 * memory) by BLIST_RADIX lower-level nodes. This is a recursive
67 * structure but one that can be easily scanned through a very simple
68 * 'skip' calculation. The memory allocation is only large enough to
69 * cover the number of blocks requested at creation time. Nodes that
70 * represent blocks beyond that limit, nodes that would never be read
71 * or written, are not allocated, so that the last of the
72 * BLIST_RADIX lower-level nodes of a some nodes may not be allocated.
74 * NOTE: the allocator cannot currently allocate more than
75 * BLIST_RADIX blocks per call. It will panic with 'allocation too
76 * large' if you try. This is an area that could use improvement. The
77 * radix is large enough that this restriction does not effect the swap
78 * system, though. Currently only the allocation code is affected by
79 * this algorithmic unfeature. The freeing code can handle arbitrary
82 * This code can be compiled stand-alone for debugging.
85 #include <sys/cdefs.h>
88 #include <sys/param.h>
89 #include <sys/systm.h>
91 #include <sys/kernel.h>
92 #include <sys/blist.h>
93 #include <sys/malloc.h>
96 #include <sys/mutex.h>
100 #ifndef BLIST_NO_DEBUG
104 #include <sys/errno.h>
105 #include <sys/types.h>
106 #include <sys/malloc.h>
107 #include <sys/sbuf.h>
116 #define bitcount64(x) __bitcount64((uint64_t)(x))
117 #define malloc(a,b,c) calloc(a, 1)
118 #define free(a,b) free(a)
119 #define ummin(a,b) ((a) < (b) ? (a) : (b))
120 #define imin(a,b) ((a) < (b) ? (a) : (b))
121 #define KASSERT(a,b) assert(a)
123 #include <sys/blist.h>
128 * static support functions
130 static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk,
131 int *count, int maxcount);
132 static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t cursor, int *count,
133 int maxcount, u_daddr_t radix);
134 static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count);
135 static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count,
137 static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix,
138 blist_t dest, daddr_t count);
139 static daddr_t blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count);
140 static daddr_t blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count,
143 static void blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix,
148 static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space");
151 #define BLIST_MASK (BLIST_RADIX - 1)
154 * For a subtree that can represent the state of up to 'radix' blocks, the
155 * number of leaf nodes of the subtree is L=radix/BLIST_RADIX. If 'm'
156 * is short for BLIST_RADIX, then for a tree of height h with L=m**h
157 * leaf nodes, the total number of tree nodes is 1 + m + m**2 + ... + m**h,
158 * or, equivalently, (m**(h+1)-1)/(m-1). This quantity is called 'skip'
159 * in the 'meta' functions that process subtrees. Since integer division
160 * discards remainders, we can express this computation as
161 * skip = (m * m**h) / (m - 1)
162 * skip = (m * (radix / m)) / (m - 1)
163 * skip = radix / (m - 1)
164 * so that simple integer division by a constant can safely be used for the
167 static inline daddr_t
168 radix_to_skip(daddr_t radix)
171 return (radix / BLIST_MASK);
175 * Provide a mask with count bits set, starting as position n.
177 static inline u_daddr_t
178 bitrange(int n, int count)
181 return (((u_daddr_t)-1 << n) &
182 ((u_daddr_t)-1 >> (BLIST_RADIX - (n + count))));
186 * Find the first bit set in a u_daddr_t.
189 generic_bitpos(u_daddr_t mask)
195 while (lo + 1 < hi) {
196 mid = (lo + hi) >> 1;
197 if (mask & bitrange(0, mid))
206 bitpos(u_daddr_t mask)
209 switch (sizeof(mask)) {
210 #ifdef HAVE_INLINE_FFSLL
211 case sizeof(long long):
212 return (ffsll(mask) - 1);
214 #ifdef HAVE_INLINE_FFS
216 return (ffs(mask) - 1);
219 return (generic_bitpos(mask));
224 * blist_create() - create a blist capable of handling up to the specified
227 * blocks - must be greater than 0
228 * flags - malloc flags
230 * The smallest blist consists of a single leaf node capable of
231 * managing BLIST_RADIX blocks.
234 blist_create(daddr_t blocks, int flags)
237 u_daddr_t nodes, radix;
239 KASSERT(blocks > 0, ("invalid block count"));
242 * Calculate the radix and node count used for scanning.
245 for (radix = 1; (blocks - 1) / BLIST_RADIX / radix > 0;
246 radix *= BLIST_RADIX)
247 nodes += 1 + (blocks - 1) / BLIST_RADIX / radix;
250 * Include a sentinel node to ensure that cross-leaf scans stay within
251 * the bounds of the allocation.
253 if (blocks % BLIST_RADIX == 0)
256 bl = malloc(offsetof(struct blist, bl_root[nodes]), M_SWAP, flags |
261 bl->bl_blocks = blocks;
262 bl->bl_radix = radix;
264 #if defined(BLIST_DEBUG)
266 "BLIST representing %lld blocks (%lld MB of swap)"
267 ", requiring %lldK of ram\n",
268 (long long)bl->bl_blocks,
269 (long long)bl->bl_blocks * 4 / 1024,
270 (long long)(nodes * sizeof(blmeta_t) + 1023) / 1024
272 printf("BLIST raw radix tree contains %lld records\n",
280 blist_destroy(blist_t bl)
287 * blist_alloc() - reserve space in the block bitmap. Return the base
288 * of a contiguous region or SWAPBLK_NONE if space could
292 blist_alloc(blist_t bl, int *count, int maxcount)
296 KASSERT(*count <= maxcount,
297 ("invalid parameters %d > %d", *count, maxcount));
298 KASSERT(*count <= BLIST_MAX_ALLOC,
299 ("minimum allocation too large: %d", *count));
302 * This loop iterates at most twice. An allocation failure in the
303 * first iteration leads to a second iteration only if the cursor was
304 * non-zero. When the cursor is zero, an allocation failure will
305 * stop further iterations.
307 for (cursor = bl->bl_cursor;; cursor = 0) {
308 blk = blst_meta_alloc(bl->bl_root, cursor, count, maxcount,
310 if (blk != SWAPBLK_NONE) {
311 bl->bl_avail -= *count;
312 bl->bl_cursor = blk + *count;
313 if (bl->bl_cursor == bl->bl_blocks)
318 return (SWAPBLK_NONE);
323 * blist_avail() - return the number of free blocks.
326 blist_avail(blist_t bl)
329 return (bl->bl_avail);
333 * blist_free() - free up space in the block bitmap. Return the base
334 * of a contiguous region.
337 blist_free(blist_t bl, daddr_t blkno, daddr_t count)
340 KASSERT(blkno >= 0 && blkno + count <= bl->bl_blocks,
341 ("freeing invalid range: blkno %jx, count %d, blocks %jd",
342 (uintmax_t)blkno, (int)count, (uintmax_t)bl->bl_blocks));
343 blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix);
344 bl->bl_avail += count;
348 * blist_fill() - mark a region in the block bitmap as off-limits
349 * to the allocator (i.e. allocate it), ignoring any
350 * existing allocations. Return the number of blocks
351 * actually filled that were free before the call.
354 blist_fill(blist_t bl, daddr_t blkno, daddr_t count)
358 KASSERT(blkno >= 0 && blkno + count <= bl->bl_blocks,
359 ("filling invalid range: blkno %jx, count %d, blocks %jd",
360 (uintmax_t)blkno, (int)count, (uintmax_t)bl->bl_blocks));
361 filled = blst_meta_fill(bl->bl_root, blkno, count, bl->bl_radix);
362 bl->bl_avail -= filled;
367 * blist_resize() - resize an existing radix tree to handle the
368 * specified number of blocks. This will reallocate
369 * the tree and transfer the previous bitmap to the new
370 * one. When extending the tree you can specify whether
371 * the new blocks are to left allocated or freed.
374 blist_resize(blist_t *pbl, daddr_t count, int freenew, int flags)
376 blist_t newbl = blist_create(count, flags);
380 if (count > save->bl_blocks)
381 count = save->bl_blocks;
382 blst_copy(save->bl_root, 0, save->bl_radix, newbl, count);
385 * If resizing upwards, should we free the new space or not?
387 if (freenew && count < newbl->bl_blocks) {
388 blist_free(newbl, count, newbl->bl_blocks - count);
396 * blist_print() - dump radix tree
399 blist_print(blist_t bl)
401 printf("BLIST avail = %jd, cursor = %08jx {\n",
402 (uintmax_t)bl->bl_avail, (uintmax_t)bl->bl_cursor);
404 if (bl->bl_root->bm_bitmap != 0)
405 blst_radix_print(bl->bl_root, 0, bl->bl_radix, 4);
411 static const u_daddr_t fib[] = {
412 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584,
413 4181, 6765, 10946, 17711, 28657, 46368, 75025, 121393, 196418, 317811,
414 514229, 832040, 1346269, 2178309, 3524578,
418 * Use 'gap' to describe a maximal range of unallocated blocks/bits.
421 daddr_t start; /* current gap start, or SWAPBLK_NONE */
422 daddr_t num; /* number of gaps observed */
423 daddr_t max; /* largest gap size */
424 daddr_t avg; /* average gap size */
425 daddr_t err; /* sum - num * avg */
426 daddr_t histo[nitems(fib)]; /* # gaps in each size range */
427 int max_bucket; /* last histo elt with nonzero val */
431 * gap_stats_counting() - is the state 'counting 1 bits'?
432 * or 'skipping 0 bits'?
435 gap_stats_counting(const struct gap_stats *stats)
438 return (stats->start != SWAPBLK_NONE);
442 * init_gap_stats() - initialize stats on gap sizes
445 init_gap_stats(struct gap_stats *stats)
448 bzero(stats, sizeof(*stats));
449 stats->start = SWAPBLK_NONE;
453 * update_gap_stats() - update stats on gap sizes
456 update_gap_stats(struct gap_stats *stats, daddr_t posn)
461 if (!gap_stats_counting(stats)) {
465 size = posn - stats->start;
466 stats->start = SWAPBLK_NONE;
467 if (size > stats->max)
471 * Find the fibonacci range that contains size,
472 * expecting to find it in an early range.
476 while (hi < nitems(fib) && fib[hi] <= size) {
480 if (hi >= nitems(fib))
482 while (lo + 1 != hi) {
483 mid = (lo + hi) >> 1;
484 if (fib[mid] <= size)
490 if (lo > stats->max_bucket)
491 stats->max_bucket = lo;
492 stats->err += size - stats->avg;
494 stats->avg += stats->err / stats->num;
495 stats->err %= stats->num;
499 * dump_gap_stats() - print stats on gap sizes
502 dump_gap_stats(const struct gap_stats *stats, struct sbuf *s)
506 sbuf_printf(s, "number of maximal free ranges: %jd\n",
507 (intmax_t)stats->num);
508 sbuf_printf(s, "largest free range: %jd\n", (intmax_t)stats->max);
509 sbuf_printf(s, "average maximal free range size: %jd\n",
510 (intmax_t)stats->avg);
511 sbuf_printf(s, "number of maximal free ranges of different sizes:\n");
512 sbuf_printf(s, " count | size range\n");
513 sbuf_printf(s, " ----- | ----------\n");
514 for (i = 0; i < stats->max_bucket; i++) {
515 if (stats->histo[i] != 0) {
516 sbuf_printf(s, "%20jd | ",
517 (intmax_t)stats->histo[i]);
518 if (fib[i] != fib[i + 1] - 1)
519 sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
520 (intmax_t)fib[i + 1] - 1);
522 sbuf_printf(s, "%jd\n", (intmax_t)fib[i]);
525 sbuf_printf(s, "%20jd | ", (intmax_t)stats->histo[i]);
526 if (stats->histo[i] > 1)
527 sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
528 (intmax_t)stats->max);
530 sbuf_printf(s, "%jd\n", (intmax_t)stats->max);
534 * blist_stats() - dump radix tree stats
537 blist_stats(blist_t bl, struct sbuf *s)
539 struct gap_stats gstats;
540 struct gap_stats *stats = &gstats;
541 daddr_t i, nodes, radix;
542 u_daddr_t diff, mask;
545 init_gap_stats(stats);
547 radix = bl->bl_radix;
548 for (i = 0; i < bl->bl_blocks; ) {
550 * Check for skippable subtrees starting at i.
553 if (bl->bl_root[nodes].bm_bitmap == 0) {
554 if (gap_stats_counting(stats))
555 update_gap_stats(stats, i);
563 radix /= BLIST_RADIX;
569 mask = bl->bl_root[nodes].bm_bitmap;
570 diff = mask ^ (mask << 1);
571 if (gap_stats_counting(stats))
574 digit = bitpos(diff);
575 update_gap_stats(stats, i + digit);
576 diff ^= bitrange(digit, 1);
579 nodes += radix_to_skip(radix * BLIST_RADIX);
580 i += radix * BLIST_RADIX;
583 * Find max size subtree starting at i.
586 ((i / BLIST_RADIX / radix) & BLIST_MASK) == 0;
587 radix *= BLIST_RADIX)
590 update_gap_stats(stats, i);
591 dump_gap_stats(stats, s);
594 /************************************************************************
595 * ALLOCATION SUPPORT FUNCTIONS *
596 ************************************************************************
598 * These support functions do all the actual work. They may seem
599 * rather longish, but that's because I've commented them up. The
600 * actual code is straight forward.
605 * BLST_NEXT_LEAF_ALLOC() - allocate the blocks starting with the next leaf.
607 * 'scan' is a leaf node, and its first block is at address 'start'. The
608 * next leaf node could be adjacent, or several nodes away if the least
609 * common ancestor of 'scan' and its neighbor is several levels up. Use
610 * addresses to determine how many meta-nodes lie between the leaves. If
611 * sequence of leaves starting with the next one has enough initial bits
612 * set, clear them and clear the bits in the meta nodes on the path up to
613 * the least common ancestor to mark any subtrees made completely empty.
616 blst_next_leaf_alloc(blmeta_t *scan, daddr_t start, int count, int maxcount)
622 start += BLIST_RADIX;
623 for (blk = start; blk - start < maxcount; blk += BLIST_RADIX) {
624 /* Skip meta-nodes, as long as they promise more free blocks. */
626 while (((++scan)->bm_bitmap & 1) == 1 &&
627 ((blk / radix) & BLIST_MASK) == 0)
628 radix *= BLIST_RADIX;
629 if (~scan->bm_bitmap != 0) {
631 * Either there is no next leaf with any free blocks,
632 * or we've reached the next leaf and found that some
633 * of its blocks are not free. In the first case,
634 * bitpos() returns zero here.
636 avail = blk - start + bitpos(~scan->bm_bitmap);
637 if (avail < count || avail == 0) {
639 * There isn't a next leaf with enough free
640 * blocks at its beginning to bother
645 maxcount = imin(avail, maxcount);
646 if (maxcount % BLIST_RADIX == 0) {
648 * There was no next leaf. Back scan up to
652 radix /= BLIST_RADIX;
654 } while (radix != 1);
661 * 'scan' is the last leaf that provides blocks. Clear from 1 to
662 * BLIST_RADIX bits to represent the allocation of those last blocks.
664 if (maxcount % BLIST_RADIX != 0)
665 scan->bm_bitmap &= ~bitrange(0, maxcount % BLIST_RADIX);
670 /* Back up over meta-nodes, clearing bits if necessary. */
672 for (radix = BLIST_RADIX;
673 (digit = ((blk / radix) & BLIST_MASK)) == 0;
674 radix *= BLIST_RADIX) {
675 if ((scan--)->bm_bitmap == 0)
676 scan->bm_bitmap ^= 1;
678 if ((scan--)->bm_bitmap == 0)
679 scan[-digit * radix_to_skip(radix)].bm_bitmap ^=
680 (u_daddr_t)1 << digit;
684 /* Clear all the bits of this leaf. */
691 * BLST_LEAF_ALLOC() - allocate at a leaf in the radix tree (a bitmap).
693 * This function is the core of the allocator. Its execution time is
694 * proportional to log(count), plus height of the tree if the allocation
695 * crosses a leaf boundary.
698 blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int *count, int maxcount)
701 int bighint, count1, hi, lo, num_shifts;
704 num_shifts = fls(count1);
705 mask = ~scan->bm_bitmap;
706 while ((mask & (mask + 1)) != 0 && num_shifts > 0) {
708 * If bit i is 0 in mask, then bits in [i, i + (count1 >>
709 * num_shifts)] are 1 in scan->bm_bitmap. Reduce num_shifts to
710 * 0, while preserving this invariant. The updates to mask
711 * leave fewer bits 0, but each bit that remains 0 represents a
712 * longer string of consecutive 1-bits in scan->bm_bitmap. If
713 * more updates to mask cannot set more bits, because mask is
714 * partitioned with all 1 bits following all 0 bits, the loop
715 * terminates immediately.
718 mask |= mask >> ((count1 >> num_shifts) + 1) / 2;
720 bighint = count1 >> num_shifts;
723 * Update bighint. There is no allocation bigger than
724 * count1 >> num_shifts starting in this leaf.
726 scan->bm_bighint = bighint;
727 return (SWAPBLK_NONE);
730 /* Discard any candidates that appear before blk. */
731 if ((blk & BLIST_MASK) != 0) {
732 if ((~mask & bitrange(0, blk & BLIST_MASK)) != 0) {
733 /* Grow bighint in case all discarded bits are set. */
734 bighint += blk & BLIST_MASK;
735 mask |= bitrange(0, blk & BLIST_MASK);
737 scan->bm_bighint = bighint;
738 return (SWAPBLK_NONE);
741 blk -= blk & BLIST_MASK;
745 * The least significant set bit in mask marks the start of the first
746 * available range of sufficient size. Find its position.
751 * Find how much space is available starting at that position.
753 if ((mask & (mask + 1)) != 0) {
754 /* Count the 1 bits starting at position lo. */
755 hi = bitpos(mask & (mask + 1)) + count1;
756 if (maxcount < hi - lo)
759 mask = ~bitrange(lo, *count);
760 } else if (maxcount <= BLIST_RADIX - lo) {
761 /* All the blocks we can use are available here. */
764 mask = ~bitrange(lo, *count);
765 if (hi == BLIST_RADIX)
766 scan->bm_bighint = bighint;
768 /* Check next leaf for some of the blocks we want or need. */
769 count1 = *count - (BLIST_RADIX - lo);
770 maxcount -= BLIST_RADIX - lo;
771 hi = blst_next_leaf_alloc(scan, blk, count1, maxcount);
774 * The next leaf cannot supply enough blocks to reach
775 * the minimum required allocation. The hint cannot be
776 * updated, because the same allocation request could
777 * be satisfied later, by this leaf, if the state of
778 * the next leaf changes, and without any changes to
781 return (SWAPBLK_NONE);
782 *count = BLIST_RADIX - lo + hi;
783 scan->bm_bighint = bighint;
786 /* Clear the allocated bits from this leaf. */
787 scan->bm_bitmap &= mask;
792 * blist_meta_alloc() - allocate at a meta in the radix tree.
794 * Attempt to allocate at a meta node. If we can't, we update
795 * bighint and return a failure. Updating bighint optimize future
796 * calls that hit this node. We have to check for our collapse cases
797 * and we have a few optimizations strewn in as well.
800 blst_meta_alloc(blmeta_t *scan, daddr_t cursor, int *count,
801 int maxcount, u_daddr_t radix)
803 daddr_t blk, i, r, skip;
805 bool scan_from_start;
809 return (blst_leaf_alloc(scan, cursor, count, maxcount));
810 blk = cursor & -(radix * BLIST_RADIX);
811 scan_from_start = (cursor == blk);
812 skip = radix_to_skip(radix);
813 mask = scan->bm_bitmap;
815 /* Discard any candidates that appear before cursor. */
816 digit = (cursor / radix) & BLIST_MASK;
817 mask &= (u_daddr_t)-1 << digit;
819 return (SWAPBLK_NONE);
822 * If the first try is for a block that includes the cursor, pre-undo
823 * the digit * radix offset in the first call; otherwise, ignore the
826 if (((mask >> digit) & 1) == 1)
827 cursor -= digit * radix;
832 * Examine the nonempty subtree associated with each bit set in mask.
835 digit = bitpos(mask);
836 i = 1 + digit * skip;
837 if (*count <= scan[i].bm_bighint) {
839 * The allocation might fit beginning in the i'th subtree.
841 r = blst_meta_alloc(&scan[i], cursor + digit * radix,
842 count, maxcount, radix / BLIST_RADIX);
843 if (r != SWAPBLK_NONE) {
844 if (scan[i].bm_bitmap == 0)
845 scan->bm_bitmap ^= bitrange(digit, 1);
850 } while ((mask ^= bitrange(digit, 1)) != 0);
853 * We couldn't allocate count in this subtree. If the whole tree was
854 * scanned, and the last tree node is allocated, update bighint.
856 if (scan_from_start && !(digit == BLIST_RADIX - 1 &&
857 scan[i].bm_bighint == BLIST_MAX_ALLOC))
858 scan->bm_bighint = *count - 1;
860 return (SWAPBLK_NONE);
864 * BLST_LEAF_FREE() - free allocated block from leaf bitmap
868 blst_leaf_free(blmeta_t *scan, daddr_t blk, int count)
873 * free some data in this bitmap
874 * mask=0000111111111110000
878 mask = bitrange(blk & BLIST_MASK, count);
879 KASSERT((scan->bm_bitmap & mask) == 0,
880 ("freeing free block: %jx, size %d, mask %jx",
881 (uintmax_t)blk, count, (uintmax_t)scan->bm_bitmap & mask));
882 scan->bm_bitmap |= mask;
886 * BLST_META_FREE() - free allocated blocks from radix tree meta info
888 * This support routine frees a range of blocks from the bitmap.
889 * The range must be entirely enclosed by this radix node. If a
890 * meta node, we break the range down recursively to free blocks
891 * in subnodes (which means that this code can free an arbitrary
892 * range whereas the allocation code cannot allocate an arbitrary
896 blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, u_daddr_t radix)
898 daddr_t blk, endBlk, i, skip;
902 * We could probably do a better job here. We are required to make
903 * bighint at least as large as the biggest allocable block of data.
904 * If we just shoehorn it, a little extra overhead will be incurred
905 * on the next allocation (but only that one typically).
907 scan->bm_bighint = BLIST_MAX_ALLOC;
910 return (blst_leaf_free(scan, freeBlk, count));
912 endBlk = freeBlk + count;
913 blk = (freeBlk + radix * BLIST_RADIX) & -(radix * BLIST_RADIX);
915 * blk is first block past the end of the range of this meta node,
916 * or 0 in case of overflow.
919 endBlk = ummin(endBlk, blk);
920 skip = radix_to_skip(radix);
921 blk = freeBlk & -radix;
922 digit = (blk / radix) & BLIST_MASK;
923 endDigit = 1 + (((endBlk - 1) / radix) & BLIST_MASK);
924 scan->bm_bitmap |= bitrange(digit, endDigit - digit);
925 for (i = 1 + digit * skip; blk < endBlk; i += skip) {
927 count = ummin(blk, endBlk) - freeBlk;
928 blst_meta_free(&scan[i], freeBlk, count, radix / BLIST_RADIX);
934 * BLST_COPY() - copy one radix tree to another
936 * Locates free space in the source tree and frees it in the destination
937 * tree. The space may not already be free in the destination.
940 blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, blist_t dest,
943 daddr_t endBlk, i, skip;
950 u_daddr_t v = scan->bm_bitmap;
952 if (v == (u_daddr_t)-1) {
953 blist_free(dest, blk, count);
957 for (i = 0; i < count; ++i) {
958 if (v & ((u_daddr_t)1 << i))
959 blist_free(dest, blk + i, 1);
969 if (scan->bm_bitmap == 0) {
971 * Source all allocated, leave dest allocated
976 endBlk = blk + count;
977 skip = radix_to_skip(radix);
978 for (i = 1; blk < endBlk; i += skip) {
982 count -= blk - endBlk;
983 blst_copy(&scan[i], blk - radix,
984 radix / BLIST_RADIX, dest, count);
989 * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap
991 * This routine allocates all blocks in the specified range
992 * regardless of any existing allocations in that range. Returns
993 * the number of blocks allocated by the call.
996 blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count)
1001 mask = bitrange(blk & BLIST_MASK, count);
1003 /* Count the number of blocks that we are allocating. */
1004 nblks = bitcount64(scan->bm_bitmap & mask);
1006 scan->bm_bitmap &= ~mask;
1011 * BLIST_META_FILL() - allocate specific blocks at a meta node
1013 * This routine allocates the specified range of blocks,
1014 * regardless of any existing allocations in the range. The
1015 * range must be within the extent of this node. Returns the
1016 * number of blocks allocated by the call.
1019 blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, u_daddr_t radix)
1021 daddr_t blk, endBlk, i, nblks, skip;
1025 return (blst_leaf_fill(scan, allocBlk, count));
1027 endBlk = allocBlk + count;
1028 blk = (allocBlk + radix * BLIST_RADIX) & -(radix * BLIST_RADIX);
1030 * blk is first block past the end of the range of this meta node,
1031 * or 0 in case of overflow.
1034 endBlk = ummin(endBlk, blk);
1035 skip = radix_to_skip(radix);
1036 blk = allocBlk & -radix;
1038 while (blk < endBlk) {
1039 digit = (blk / radix) & BLIST_MASK;
1040 i = 1 + digit * skip;
1042 count = ummin(blk, endBlk) - allocBlk;
1043 nblks += blst_meta_fill(&scan[i], allocBlk, count,
1044 radix / BLIST_RADIX);
1045 if (scan[i].bm_bitmap == 0)
1046 scan->bm_bitmap &= ~((u_daddr_t)1 << digit);
1055 blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab)
1063 "%*.*s(%08llx,%lld): bitmap %0*llx big=%lld\n",
1065 (long long)blk, (long long)BLIST_RADIX,
1066 (int)(1 + (BLIST_RADIX - 1) / 4),
1067 (long long)scan->bm_bitmap,
1068 (long long)scan->bm_bighint
1074 "%*.*s(%08llx): subtree (%lld/%lld) bitmap %0*llx big=%lld {\n",
1076 (long long)blk, (long long)radix * BLIST_RADIX,
1077 (long long)radix * BLIST_RADIX,
1078 (int)(1 + (BLIST_RADIX - 1) / 4),
1079 (long long)scan->bm_bitmap,
1080 (long long)scan->bm_bighint
1083 skip = radix_to_skip(radix);
1086 mask = scan->bm_bitmap;
1087 /* Examine the nonempty subtree associated with each bit set in mask */
1089 digit = bitpos(mask);
1090 blst_radix_print(&scan[1 + digit * skip], blk + digit * radix,
1091 radix / BLIST_RADIX, tab);
1092 } while ((mask ^= bitrange(digit, 1)) != 0);
1106 main(int ac, char **av)
1108 daddr_t size = BLIST_RADIX * BLIST_RADIX;
1113 for (i = 1; i < ac; ++i) {
1114 const char *ptr = av[i];
1116 size = strtoll(ptr, NULL, 0);
1120 fprintf(stderr, "Bad option: %s\n", ptr - 2);
1123 bl = blist_create(size, M_WAITOK);
1125 fprintf(stderr, "blist_create failed\n");
1128 blist_free(bl, 0, size);
1133 int count = 0, maxcount = 0;
1135 printf("%lld/%lld/%lld> ", (long long)blist_avail(bl),
1136 (long long)size, (long long)bl->bl_radix * BLIST_RADIX);
1138 if (fgets(buf, sizeof(buf), stdin) == NULL)
1142 if (sscanf(buf + 1, "%d", &count) == 1) {
1143 blist_resize(&bl, count, 1, M_WAITOK);
1151 s = sbuf_new_auto();
1154 printf("%s", sbuf_data(s));
1158 if (sscanf(buf + 1, "%d%d", &count, &maxcount) == 2) {
1159 daddr_t blk = blist_alloc(bl, &count, maxcount);
1160 printf(" R=%08llx, c=%08d\n",
1161 (long long)blk, count);
1167 if (sscanf(buf + 1, "%llx %d", &da, &count) == 2) {
1168 blist_free(bl, da, count);
1174 if (sscanf(buf + 1, "%llx %d", &da, &count) == 2) {
1176 (intmax_t)blist_fill(bl, da, count));
1186 "a %d %d -allocate\n"