2 * SPDX-License-Identifier: BSD-3-Clause
4 * Copyright (c) 1998 Matthew Dillon. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting
32 * This module implements a general bitmap allocator/deallocator. The
33 * allocator eats around 2 bits per 'block'. The module does not
34 * try to interpret the meaning of a 'block' other than to return
35 * SWAPBLK_NONE on an allocation failure.
37 * A radix tree controls access to pieces of the bitmap, and includes
38 * auxiliary information at each interior node about the availabilty of
39 * contiguous free blocks in the subtree rooted at that node. Two radix
40 * constants are involved: one for the size of the bitmaps contained in the
41 * leaf nodes (BLIST_BMAP_RADIX), and one for the number of descendents of
42 * each of the meta (interior) nodes (BLIST_META_RADIX). Each subtree is
43 * associated with a range of blocks. The root of any subtree stores a
44 * hint field that defines an upper bound on the size of the largest
45 * allocation that can begin in the associated block range. A hint is an
46 * upper bound on a potential allocation, but not necessarily a tight upper
49 * The bitmap field in each node directs the search for available blocks.
50 * For a leaf node, a bit is set if the corresponding block is free. For a
51 * meta node, a bit is set if the corresponding subtree contains a free
52 * block somewhere within it. The search at a meta node considers only
53 * children of that node that represent a range that includes a free block.
55 * The hinting greatly increases code efficiency for allocations while
56 * the general radix structure optimizes both allocations and frees. The
57 * radix tree should be able to operate well no matter how much
58 * fragmentation there is and no matter how large a bitmap is used.
60 * The blist code wires all necessary memory at creation time. Neither
61 * allocations nor frees require interaction with the memory subsystem.
62 * The non-blocking nature of allocations and frees is required by swap
63 * code (vm/swap_pager.c).
65 * LAYOUT: The radix tree is laid out recursively using a linear array.
66 * Each meta node is immediately followed (laid out sequentially in
67 * memory) by BLIST_META_RADIX lower level nodes. This is a recursive
68 * structure but one that can be easily scanned through a very simple
69 * 'skip' calculation. The memory allocation is only large enough to
70 * cover the number of blocks requested at creation time. Nodes that
71 * represent blocks beyond that limit, nodes that would never be read
72 * or written, are not allocated, so that the last of the
73 * BLIST_META_RADIX lower level nodes of a some nodes may not be
76 * NOTE: the allocator cannot currently allocate more than
77 * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too
78 * large' if you try. This is an area that could use improvement. The
79 * radix is large enough that this restriction does not effect the swap
80 * system, though. Currently only the allocation code is affected by
81 * this algorithmic unfeature. The freeing code can handle arbitrary
84 * This code can be compiled stand-alone for debugging.
87 #include <sys/cdefs.h>
88 __FBSDID("$FreeBSD$");
92 #include <sys/param.h>
93 #include <sys/systm.h>
95 #include <sys/kernel.h>
96 #include <sys/blist.h>
97 #include <sys/malloc.h>
100 #include <sys/mutex.h>
104 #ifndef BLIST_NO_DEBUG
108 #include <sys/errno.h>
109 #include <sys/types.h>
110 #include <sys/malloc.h>
111 #include <sys/sbuf.h>
120 #define bitcount64(x) __bitcount64((uint64_t)(x))
121 #define malloc(a,b,c) calloc(a, 1)
122 #define free(a,b) free(a)
123 #define ummin(a,b) ((a) < (b) ? (a) : (b))
124 #define imin(a,b) ((a) < (b) ? (a) : (b))
125 #define KASSERT(a,b) assert(a)
127 #include <sys/blist.h>
132 * static support functions
134 static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk,
135 int *count, int maxcount);
136 static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t cursor, int *count,
137 int maxcount, u_daddr_t radix);
138 static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count);
139 static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count,
141 static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix,
142 blist_t dest, daddr_t count);
143 static daddr_t blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count);
144 static daddr_t blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count,
147 static void blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix,
152 static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space");
155 _Static_assert(BLIST_BMAP_RADIX % BLIST_META_RADIX == 0,
156 "radix divisibility error");
157 #define BLIST_BMAP_MASK (BLIST_BMAP_RADIX - 1)
158 #define BLIST_META_MASK (BLIST_META_RADIX - 1)
161 * For a subtree that can represent the state of up to 'radix' blocks, the
162 * number of leaf nodes of the subtree is L=radix/BLIST_BMAP_RADIX. If 'm'
163 * is short for BLIST_META_RADIX, then for a tree of height h with L=m**h
164 * leaf nodes, the total number of tree nodes is 1 + m + m**2 + ... + m**h,
165 * or, equivalently, (m**(h+1)-1)/(m-1). This quantity is called 'skip'
166 * in the 'meta' functions that process subtrees. Since integer division
167 * discards remainders, we can express this computation as
168 * skip = (m * m**h) / (m - 1)
169 * skip = (m * (radix / BLIST_BMAP_RADIX)) / (m - 1)
170 * and since m divides BLIST_BMAP_RADIX, we can simplify further to
171 * skip = (radix / (BLIST_BMAP_RADIX / m)) / (m - 1)
172 * skip = radix / ((BLIST_BMAP_RADIX / m) * (m - 1))
173 * so that simple integer division by a constant can safely be used for the
176 static inline daddr_t
177 radix_to_skip(daddr_t radix)
181 ((BLIST_BMAP_RADIX / BLIST_META_RADIX) * BLIST_META_MASK));
185 * Provide a mask with count bits set, starting as position n.
187 static inline u_daddr_t
188 bitrange(int n, int count)
191 return (((u_daddr_t)-1 << n) &
192 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - (n + count))));
197 * Find the first bit set in a u_daddr_t.
200 generic_bitpos(u_daddr_t mask)
205 hi = BLIST_BMAP_RADIX;
206 while (lo + 1 < hi) {
207 mid = (lo + hi) >> 1;
208 if (mask & bitrange(0, mid))
217 bitpos(u_daddr_t mask)
220 switch (sizeof(mask)) {
221 #ifdef HAVE_INLINE_FFSLL
222 case sizeof(long long):
223 return (ffsll(mask) - 1);
225 #ifdef HAVE_INLINE_FFS
227 return (ffs(mask) - 1);
230 return (generic_bitpos(mask));
235 * blist_create() - create a blist capable of handling up to the specified
238 * blocks - must be greater than 0
239 * flags - malloc flags
241 * The smallest blist consists of a single leaf node capable of
242 * managing BLIST_BMAP_RADIX blocks.
245 blist_create(daddr_t blocks, int flags)
248 u_daddr_t nodes, radix;
250 KASSERT(blocks > 0, ("invalid block count"));
253 * Calculate the radix and node count used for scanning.
256 radix = BLIST_BMAP_RADIX;
257 while (radix <= blocks) {
258 nodes += 1 + (blocks - 1) / radix;
259 radix *= BLIST_META_RADIX;
262 bl = malloc(offsetof(struct blist, bl_root[nodes]), M_SWAP, flags |
267 bl->bl_blocks = blocks;
268 bl->bl_radix = radix;
270 #if defined(BLIST_DEBUG)
272 "BLIST representing %lld blocks (%lld MB of swap)"
273 ", requiring %lldK of ram\n",
274 (long long)bl->bl_blocks,
275 (long long)bl->bl_blocks * 4 / 1024,
276 (long long)(nodes * sizeof(blmeta_t) + 1023) / 1024
278 printf("BLIST raw radix tree contains %lld records\n",
286 blist_destroy(blist_t bl)
293 * blist_alloc() - reserve space in the block bitmap. Return the base
294 * of a contiguous region or SWAPBLK_NONE if space could
298 blist_alloc(blist_t bl, int *count, int maxcount)
302 KASSERT(*count <= maxcount,
303 ("invalid parameters %d > %d", *count, maxcount));
304 KASSERT(*count <= BLIST_MAX_ALLOC,
305 ("minimum allocation too large: %d", *count));
308 * This loop iterates at most twice. An allocation failure in the
309 * first iteration leads to a second iteration only if the cursor was
310 * non-zero. When the cursor is zero, an allocation failure will
311 * stop further iterations.
313 for (cursor = bl->bl_cursor;; cursor = 0) {
314 blk = blst_meta_alloc(bl->bl_root, cursor, count, maxcount,
316 if (blk != SWAPBLK_NONE) {
317 bl->bl_avail -= *count;
318 bl->bl_cursor = blk + *count;
319 if (bl->bl_cursor == bl->bl_blocks)
324 return (SWAPBLK_NONE);
329 * blist_avail() - return the number of free blocks.
332 blist_avail(blist_t bl)
335 return (bl->bl_avail);
339 * blist_free() - free up space in the block bitmap. Return the base
340 * of a contiguous region.
343 blist_free(blist_t bl, daddr_t blkno, daddr_t count)
346 KASSERT(blkno >= 0 && blkno + count <= bl->bl_blocks,
347 ("freeing invalid range: blkno %jx, count %d, blocks %jd",
348 (uintmax_t)blkno, (int)count, (uintmax_t)bl->bl_blocks));
349 blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix);
350 bl->bl_avail += count;
354 * blist_fill() - mark a region in the block bitmap as off-limits
355 * to the allocator (i.e. allocate it), ignoring any
356 * existing allocations. Return the number of blocks
357 * actually filled that were free before the call.
360 blist_fill(blist_t bl, daddr_t blkno, daddr_t count)
364 KASSERT(blkno >= 0 && blkno + count <= bl->bl_blocks,
365 ("filling invalid range: blkno %jx, count %d, blocks %jd",
366 (uintmax_t)blkno, (int)count, (uintmax_t)bl->bl_blocks));
367 filled = blst_meta_fill(bl->bl_root, blkno, count, bl->bl_radix);
368 bl->bl_avail -= filled;
373 * blist_resize() - resize an existing radix tree to handle the
374 * specified number of blocks. This will reallocate
375 * the tree and transfer the previous bitmap to the new
376 * one. When extending the tree you can specify whether
377 * the new blocks are to left allocated or freed.
380 blist_resize(blist_t *pbl, daddr_t count, int freenew, int flags)
382 blist_t newbl = blist_create(count, flags);
386 if (count > save->bl_blocks)
387 count = save->bl_blocks;
388 blst_copy(save->bl_root, 0, save->bl_radix, newbl, count);
391 * If resizing upwards, should we free the new space or not?
393 if (freenew && count < newbl->bl_blocks) {
394 blist_free(newbl, count, newbl->bl_blocks - count);
402 * blist_print() - dump radix tree
405 blist_print(blist_t bl)
407 printf("BLIST avail = %jd, cursor = %08jx {\n",
408 (uintmax_t)bl->bl_avail, (uintmax_t)bl->bl_cursor);
410 if (bl->bl_root->bm_bitmap != 0)
411 blst_radix_print(bl->bl_root, 0, bl->bl_radix, 4);
417 static const u_daddr_t fib[] = {
418 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584,
419 4181, 6765, 10946, 17711, 28657, 46368, 75025, 121393, 196418, 317811,
420 514229, 832040, 1346269, 2178309, 3524578,
424 * Use 'gap' to describe a maximal range of unallocated blocks/bits.
427 daddr_t start; /* current gap start, or SWAPBLK_NONE */
428 daddr_t num; /* number of gaps observed */
429 daddr_t max; /* largest gap size */
430 daddr_t avg; /* average gap size */
431 daddr_t err; /* sum - num * avg */
432 daddr_t histo[nitems(fib)]; /* # gaps in each size range */
433 int max_bucket; /* last histo elt with nonzero val */
437 * gap_stats_counting() - is the state 'counting 1 bits'?
438 * or 'skipping 0 bits'?
441 gap_stats_counting(const struct gap_stats *stats)
444 return (stats->start != SWAPBLK_NONE);
448 * init_gap_stats() - initialize stats on gap sizes
451 init_gap_stats(struct gap_stats *stats)
454 bzero(stats, sizeof(*stats));
455 stats->start = SWAPBLK_NONE;
459 * update_gap_stats() - update stats on gap sizes
462 update_gap_stats(struct gap_stats *stats, daddr_t posn)
467 if (!gap_stats_counting(stats)) {
471 size = posn - stats->start;
472 stats->start = SWAPBLK_NONE;
473 if (size > stats->max)
477 * Find the fibonacci range that contains size,
478 * expecting to find it in an early range.
482 while (hi < nitems(fib) && fib[hi] <= size) {
486 if (hi >= nitems(fib))
488 while (lo + 1 != hi) {
489 mid = (lo + hi) >> 1;
490 if (fib[mid] <= size)
496 if (lo > stats->max_bucket)
497 stats->max_bucket = lo;
498 stats->err += size - stats->avg;
500 stats->avg += stats->err / stats->num;
501 stats->err %= stats->num;
505 * dump_gap_stats() - print stats on gap sizes
508 dump_gap_stats(const struct gap_stats *stats, struct sbuf *s)
512 sbuf_printf(s, "number of maximal free ranges: %jd\n",
513 (intmax_t)stats->num);
514 sbuf_printf(s, "largest free range: %jd\n", (intmax_t)stats->max);
515 sbuf_printf(s, "average maximal free range size: %jd\n",
516 (intmax_t)stats->avg);
517 sbuf_printf(s, "number of maximal free ranges of different sizes:\n");
518 sbuf_printf(s, " count | size range\n");
519 sbuf_printf(s, " ----- | ----------\n");
520 for (i = 0; i < stats->max_bucket; i++) {
521 if (stats->histo[i] != 0) {
522 sbuf_printf(s, "%20jd | ",
523 (intmax_t)stats->histo[i]);
524 if (fib[i] != fib[i + 1] - 1)
525 sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
526 (intmax_t)fib[i + 1] - 1);
528 sbuf_printf(s, "%jd\n", (intmax_t)fib[i]);
531 sbuf_printf(s, "%20jd | ", (intmax_t)stats->histo[i]);
532 if (stats->histo[i] > 1)
533 sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
534 (intmax_t)stats->max);
536 sbuf_printf(s, "%jd\n", (intmax_t)stats->max);
540 * blist_stats() - dump radix tree stats
543 blist_stats(blist_t bl, struct sbuf *s)
545 struct gap_stats gstats;
546 struct gap_stats *stats = &gstats;
547 daddr_t i, nodes, radix;
548 u_daddr_t diff, mask;
551 init_gap_stats(stats);
554 while (i < bl->bl_radix + bl->bl_blocks) {
556 * Find max size subtree starting at i.
558 radix = BLIST_BMAP_RADIX;
559 while (((i / radix) & BLIST_META_MASK) == 0)
560 radix *= BLIST_META_RADIX;
563 * Check for skippable subtrees starting at i.
565 while (radix > BLIST_BMAP_RADIX) {
566 if (bl->bl_root[nodes].bm_bitmap == 0) {
567 if (gap_stats_counting(stats))
568 update_gap_stats(stats, i);
576 radix /= BLIST_META_RADIX;
578 if (radix == BLIST_BMAP_RADIX) {
582 mask = bl->bl_root[nodes].bm_bitmap;
583 diff = mask ^ (mask << 1);
584 if (gap_stats_counting(stats))
587 digit = bitpos(diff);
588 update_gap_stats(stats, i + digit);
589 diff ^= bitrange(digit, 1);
592 nodes += radix_to_skip(radix);
595 update_gap_stats(stats, i);
596 dump_gap_stats(stats, s);
599 /************************************************************************
600 * ALLOCATION SUPPORT FUNCTIONS *
601 ************************************************************************
603 * These support functions do all the actual work. They may seem
604 * rather longish, but that's because I've commented them up. The
605 * actual code is straight forward.
610 * BLST_NEXT_LEAF_ALLOC() - allocate the blocks starting with the next leaf.
612 * 'scan' is a leaf node, and its first block is at address 'start'. The
613 * next leaf node could be adjacent, or several nodes away if the least
614 * common ancestor of 'scan' and its neighbor is several levels up. Use
615 * addresses to determine how many meta-nodes lie between the leaves. If
616 * sequence of leaves starting with the next one has enough initial bits
617 * set, clear them and clear the bits in the meta nodes on the path up to
618 * the least common ancestor to mark any subtrees made completely empty.
621 blst_next_leaf_alloc(blmeta_t *scan, daddr_t start, int count, int maxcount)
627 start += BLIST_BMAP_RADIX;
628 for (blk = start; blk - start < maxcount; blk += BLIST_BMAP_RADIX) {
629 /* Skip meta-nodes, as long as they promise more free blocks. */
630 radix = BLIST_BMAP_RADIX;
631 while (((++scan)->bm_bitmap & 1) == 1 &&
632 ((blk / radix) & BLIST_META_MASK) == 0)
633 radix *= BLIST_META_RADIX;
634 if (~scan->bm_bitmap != 0) {
636 * Either there is no next leaf with any free blocks,
637 * or we've reached the next leaf and found that some
638 * of its blocks are not free. In the first case,
639 * bitpos() returns zero here.
641 avail = blk - start + bitpos(~scan->bm_bitmap);
644 * There isn't a next leaf with enough free
645 * blocks at its beginning to complete the
646 * spanning allocation.
650 maxcount = imin(avail, maxcount);
655 * 'scan' is the last leaf that provides blocks. Clear from 1 to
656 * BLIST_BMAP_RADIX bits to represent the allocation of those last
659 if (maxcount % BLIST_BMAP_RADIX != 0)
660 scan->bm_bitmap &= ~bitrange(0, maxcount % BLIST_BMAP_RADIX);
665 /* Back up over meta-nodes, clearing bits if necessary. */
666 blk -= BLIST_BMAP_RADIX;
667 radix = BLIST_BMAP_RADIX;
668 while ((digit = ((blk / radix) & BLIST_META_MASK)) == 0) {
669 if ((scan--)->bm_bitmap == 0)
670 scan->bm_bitmap ^= 1;
671 radix *= BLIST_META_RADIX;
673 if ((scan--)->bm_bitmap == 0)
674 scan[-digit * radix_to_skip(radix)].bm_bitmap ^=
675 (u_daddr_t)1 << digit;
679 /* Clear all the bits of this leaf. */
686 * Given a bitmask, flip all the bits from the least-significant 1-bit to the
687 * most significant bit. If the result is non-zero, then the least-significant
688 * 1-bit of the result is in the same position as the least-signification 0-bit
689 * in mask that is followed by a 1-bit.
691 static inline u_daddr_t
692 flip_hibits(u_daddr_t mask)
695 return (-mask & ~mask);
699 * BLST_LEAF_ALLOC() - allocate at a leaf in the radix tree (a bitmap).
701 * This function is the core of the allocator. Its execution time is
702 * proportional to log(count), plus height of the tree if the allocation
703 * crosses a leaf boundary.
706 blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int *count, int maxcount)
708 u_daddr_t cursor_mask, mask;
709 int count1, hi, lo, num_shifts, range1, range_ext;
713 num_shifts = fls(count1);
714 mask = scan->bm_bitmap;
715 while (flip_hibits(mask) != 0 && num_shifts > 0) {
717 * If bit i is set in mask, then bits in [i, i+range1] are set
718 * in scan->bm_bitmap. The value of range1 is equal to count1
719 * >> num_shifts. Grow range1 and reduce num_shifts to 0,
720 * while preserving these invariants. The updates to mask
721 * leave fewer bits set, but each bit that remains set
722 * represents a longer string of consecutive bits set in
723 * scan->bm_bitmap. If more updates to mask cannot clear more
724 * bits, because mask is partitioned with all 0 bits preceding
725 * all 1 bits, the loop terminates immediately.
728 range_ext = range1 + ((count1 >> num_shifts) & 1);
730 * mask is a signed quantity for the shift because when it is
731 * shifted right, the sign bit should copied; when the last
732 * block of the leaf is free, pretend, for a while, that all the
733 * blocks that follow it are also free.
735 mask &= (daddr_t)mask >> range_ext;
740 * Update bighint. There is no allocation bigger than range1
741 * starting in this leaf.
743 scan->bm_bighint = range1;
744 return (SWAPBLK_NONE);
747 /* Discard any candidates that appear before blk. */
748 if ((blk & BLIST_BMAP_MASK) != 0) {
749 cursor_mask = mask & bitrange(0, blk & BLIST_BMAP_MASK);
750 if (cursor_mask != 0) {
753 return (SWAPBLK_NONE);
756 * Bighint change for last block allocation cannot
757 * assume that any other blocks are allocated, so the
758 * bighint cannot be reduced much.
760 range1 = BLIST_MAX_ALLOC - 1;
762 blk &= ~BLIST_BMAP_MASK;
766 * The least significant set bit in mask marks the start of the first
767 * available range of sufficient size. Find its position.
772 * Find how much space is available starting at that position.
774 if (flip_hibits(mask) != 0) {
775 /* Count the 1 bits starting at position lo. */
776 hi = bitpos(flip_hibits(mask)) + count1;
777 if (maxcount < hi - lo)
780 mask = bitrange(lo, *count);
781 } else if (maxcount <= BLIST_BMAP_RADIX - lo) {
782 /* All the blocks we can use are available here. */
785 mask = bitrange(lo, *count);
787 /* Check next leaf for some of the blocks we want or need. */
788 count1 = *count - (BLIST_BMAP_RADIX - lo);
789 maxcount -= BLIST_BMAP_RADIX - lo;
790 hi = blst_next_leaf_alloc(scan, blk, count1, maxcount);
793 * The next leaf cannot supply enough blocks to reach
794 * the minimum required allocation. The hint cannot be
795 * updated, because the same allocation request could
796 * be satisfied later, by this leaf, if the state of
797 * the next leaf changes, and without any changes to
800 return (SWAPBLK_NONE);
801 *count = BLIST_BMAP_RADIX - lo + hi;
802 hi = BLIST_BMAP_RADIX;
805 if (hi == BLIST_BMAP_RADIX) {
807 * Update bighint. There is no allocation bigger than range1
808 * available in this leaf after this allocation completes.
810 scan->bm_bighint = range1;
812 /* Clear the allocated bits from this leaf. */
813 scan->bm_bitmap &= ~mask;
818 * blist_meta_alloc() - allocate at a meta in the radix tree.
820 * Attempt to allocate at a meta node. If we can't, we update
821 * bighint and return a failure. Updating bighint optimize future
822 * calls that hit this node. We have to check for our collapse cases
823 * and we have a few optimizations strewn in as well.
826 blst_meta_alloc(blmeta_t *scan, daddr_t cursor, int *count,
827 int maxcount, u_daddr_t radix)
829 daddr_t blk, i, r, skip;
831 bool scan_from_start;
834 if (radix == BLIST_BMAP_RADIX)
835 return (blst_leaf_alloc(scan, cursor, count, maxcount));
836 blk = cursor & -radix;
837 scan_from_start = (cursor == blk);
838 radix /= BLIST_META_RADIX;
839 skip = radix_to_skip(radix);
840 mask = scan->bm_bitmap;
842 /* Discard any candidates that appear before cursor. */
843 digit = (cursor / radix) & BLIST_META_MASK;
844 mask &= (u_daddr_t)-1 << digit;
846 return (SWAPBLK_NONE);
849 * If the first try is for a block that includes the cursor, pre-undo
850 * the digit * radix offset in the first call; otherwise, ignore the
853 if (((mask >> digit) & 1) == 1)
854 cursor -= digit * radix;
859 * Examine the nonempty subtree associated with each bit set in mask.
862 digit = bitpos(mask);
863 i = 1 + digit * skip;
864 if (*count <= scan[i].bm_bighint) {
866 * The allocation might fit beginning in the i'th subtree.
868 r = blst_meta_alloc(&scan[i], cursor + digit * radix,
869 count, maxcount, radix);
870 if (r != SWAPBLK_NONE) {
871 if (scan[i].bm_bitmap == 0)
872 scan->bm_bitmap ^= bitrange(digit, 1);
877 } while ((mask ^= bitrange(digit, 1)) != 0);
880 * We couldn't allocate count in this subtree. If the whole tree was
881 * scanned, and the last tree node is allocated, update bighint.
883 if (scan_from_start && !(digit == BLIST_META_RADIX - 1 &&
884 scan[i].bm_bighint == BLIST_MAX_ALLOC))
885 scan->bm_bighint = *count - 1;
887 return (SWAPBLK_NONE);
891 * BLST_LEAF_FREE() - free allocated block from leaf bitmap
895 blst_leaf_free(blmeta_t *scan, daddr_t blk, int count)
900 * free some data in this bitmap
901 * mask=0000111111111110000
905 mask = bitrange(blk & BLIST_BMAP_MASK, count);
906 KASSERT((scan->bm_bitmap & mask) == 0,
907 ("freeing free block: %jx, size %d, mask %jx",
908 (uintmax_t)blk, count, (uintmax_t)scan->bm_bitmap & mask));
909 scan->bm_bitmap |= mask;
913 * BLST_META_FREE() - free allocated blocks from radix tree meta info
915 * This support routine frees a range of blocks from the bitmap.
916 * The range must be entirely enclosed by this radix node. If a
917 * meta node, we break the range down recursively to free blocks
918 * in subnodes (which means that this code can free an arbitrary
919 * range whereas the allocation code cannot allocate an arbitrary
923 blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, u_daddr_t radix)
925 daddr_t blk, endBlk, i, skip;
929 * We could probably do a better job here. We are required to make
930 * bighint at least as large as the biggest allocable block of data.
931 * If we just shoehorn it, a little extra overhead will be incurred
932 * on the next allocation (but only that one typically).
934 scan->bm_bighint = BLIST_MAX_ALLOC;
936 if (radix == BLIST_BMAP_RADIX)
937 return (blst_leaf_free(scan, freeBlk, count));
939 endBlk = ummin(freeBlk + count, (freeBlk + radix) & -radix);
940 radix /= BLIST_META_RADIX;
941 skip = radix_to_skip(radix);
942 blk = freeBlk & -radix;
943 digit = (blk / radix) & BLIST_META_MASK;
944 endDigit = 1 + (((endBlk - 1) / radix) & BLIST_META_MASK);
945 scan->bm_bitmap |= bitrange(digit, endDigit - digit);
946 for (i = 1 + digit * skip; blk < endBlk; i += skip) {
948 count = ummin(blk, endBlk) - freeBlk;
949 blst_meta_free(&scan[i], freeBlk, count, radix);
955 * BLST_COPY() - copy one radix tree to another
957 * Locates free space in the source tree and frees it in the destination
958 * tree. The space may not already be free in the destination.
961 blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, blist_t dest,
964 daddr_t endBlk, i, skip;
970 if (radix == BLIST_BMAP_RADIX) {
971 u_daddr_t v = scan->bm_bitmap;
973 if (v == (u_daddr_t)-1) {
974 blist_free(dest, blk, count);
978 for (i = 0; i < count; ++i) {
979 if (v & ((u_daddr_t)1 << i))
980 blist_free(dest, blk + i, 1);
990 if (scan->bm_bitmap == 0) {
992 * Source all allocated, leave dest allocated
997 endBlk = blk + count;
998 radix /= BLIST_META_RADIX;
999 skip = radix_to_skip(radix);
1000 for (i = 1; blk < endBlk; i += skip) {
1004 count -= blk - endBlk;
1005 blst_copy(&scan[i], blk - radix, radix, dest, count);
1010 * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap
1012 * This routine allocates all blocks in the specified range
1013 * regardless of any existing allocations in that range. Returns
1014 * the number of blocks allocated by the call.
1017 blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count)
1022 mask = bitrange(blk & BLIST_BMAP_MASK, count);
1024 /* Count the number of blocks that we are allocating. */
1025 nblks = bitcount64(scan->bm_bitmap & mask);
1027 scan->bm_bitmap &= ~mask;
1032 * BLIST_META_FILL() - allocate specific blocks at a meta node
1034 * This routine allocates the specified range of blocks,
1035 * regardless of any existing allocations in the range. The
1036 * range must be within the extent of this node. Returns the
1037 * number of blocks allocated by the call.
1040 blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, u_daddr_t radix)
1042 daddr_t blk, endBlk, i, nblks, skip;
1045 if (radix == BLIST_BMAP_RADIX)
1046 return (blst_leaf_fill(scan, allocBlk, count));
1048 endBlk = ummin(allocBlk + count, (allocBlk + radix) & -radix);
1049 radix /= BLIST_META_RADIX;
1050 skip = radix_to_skip(radix);
1051 blk = allocBlk & -radix;
1053 while (blk < endBlk) {
1054 digit = (blk / radix) & BLIST_META_MASK;
1055 i = 1 + digit * skip;
1057 count = ummin(blk, endBlk) - allocBlk;
1058 nblks += blst_meta_fill(&scan[i], allocBlk, count, radix);
1059 if (scan[i].bm_bitmap == 0)
1060 scan->bm_bitmap &= ~((u_daddr_t)1 << digit);
1069 blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab)
1075 if (radix == BLIST_BMAP_RADIX) {
1077 "%*.*s(%08llx,%lld): bitmap %0*llx big=%lld\n",
1079 (long long)blk, (long long)radix,
1080 1 + (BLIST_BMAP_RADIX - 1) / 4,
1081 (long long)scan->bm_bitmap,
1082 (long long)scan->bm_bighint
1088 "%*.*s(%08llx): subtree (%lld/%lld) bitmap %0*llx big=%lld {\n",
1090 (long long)blk, (long long)radix,
1092 1 + (BLIST_META_RADIX - 1) / 4,
1093 (long long)scan->bm_bitmap,
1094 (long long)scan->bm_bighint
1097 radix /= BLIST_META_RADIX;
1098 skip = radix_to_skip(radix);
1101 mask = scan->bm_bitmap;
1102 /* Examine the nonempty subtree associated with each bit set in mask */
1104 digit = bitpos(mask);
1105 blst_radix_print(&scan[1 + digit * skip], blk + digit * radix,
1107 } while ((mask ^= bitrange(digit, 1)) != 0);
1121 main(int ac, char **av)
1123 int size = BLIST_META_RADIX * BLIST_BMAP_RADIX;
1128 for (i = 1; i < ac; ++i) {
1129 const char *ptr = av[i];
1131 size = strtol(ptr, NULL, 0);
1135 fprintf(stderr, "Bad option: %s\n", ptr - 2);
1138 bl = blist_create(size, M_WAITOK);
1139 blist_free(bl, 0, size);
1144 int count = 0, maxcount = 0;
1146 printf("%lld/%lld/%lld> ", (long long)blist_avail(bl),
1147 (long long)size, (long long)bl->bl_radix);
1149 if (fgets(buf, sizeof(buf), stdin) == NULL)
1153 if (sscanf(buf + 1, "%d", &count) == 1) {
1154 blist_resize(&bl, count, 1, M_WAITOK);
1162 s = sbuf_new_auto();
1165 printf("%s", sbuf_data(s));
1169 if (sscanf(buf + 1, "%d%d", &count, &maxcount) == 2) {
1170 daddr_t blk = blist_alloc(bl, &count, maxcount);
1171 printf(" R=%08llx, c=%08d\n",
1172 (long long)blk, count);
1178 if (sscanf(buf + 1, "%llx %d", &da, &count) == 2) {
1179 blist_free(bl, da, count);
1185 if (sscanf(buf + 1, "%llx %d", &da, &count) == 2) {
1187 (intmax_t)blist_fill(bl, da, count));
1197 "a %d %d -allocate\n"