2 * SPDX-License-Identifier: BSD-3-Clause
4 * Copyright (c) 1998 Matthew Dillon. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting
32 * This module implements a general bitmap allocator/deallocator. The
33 * allocator eats around 2 bits per 'block'. The module does not
34 * try to interpret the meaning of a 'block' other than to return
35 * SWAPBLK_NONE on an allocation failure.
37 * A radix tree controls access to pieces of the bitmap, and includes
38 * auxiliary information at each interior node about the availabilty of
39 * contiguous free blocks in the subtree rooted at that node. A radix
40 * constant defines the size of the bitmaps contained in a leaf node
41 * and the number of descendents of each of the meta (interior) nodes.
42 * Each subtree is associated with a range of blocks. The root of any
43 * subtree stores a hint field that defines an upper bound on the size
44 * of the largest allocation that can begin in the associated block
45 * range. A hint is an upper bound on a potential allocation, but not
46 * necessarily a tight upper bound.
48 * The bitmap field in each node directs the search for available blocks.
49 * For a leaf node, a bit is set if the corresponding block is free. For a
50 * meta node, a bit is set if the corresponding subtree contains a free
51 * block somewhere within it. The search at a meta node considers only
52 * children of that node that represent a range that includes a free block.
54 * The hinting greatly increases code efficiency for allocations while
55 * the general radix structure optimizes both allocations and frees. The
56 * radix tree should be able to operate well no matter how much
57 * fragmentation there is and no matter how large a bitmap is used.
59 * The blist code wires all necessary memory at creation time. Neither
60 * allocations nor frees require interaction with the memory subsystem.
61 * The non-blocking nature of allocations and frees is required by swap
62 * code (vm/swap_pager.c).
64 * LAYOUT: The radix tree is laid out recursively using a linear array.
65 * Each meta node is immediately followed (laid out sequentially in
66 * memory) by BLIST_RADIX lower-level nodes. This is a recursive
67 * structure but one that can be easily scanned through a very simple
68 * 'skip' calculation. The memory allocation is only large enough to
69 * cover the number of blocks requested at creation time. Nodes that
70 * represent blocks beyond that limit, nodes that would never be read
71 * or written, are not allocated, so that the last of the
72 * BLIST_RADIX lower-level nodes of a some nodes may not be allocated.
74 * NOTE: the allocator cannot currently allocate more than
75 * BLIST_RADIX blocks per call. It will panic with 'allocation too
76 * large' if you try. This is an area that could use improvement. The
77 * radix is large enough that this restriction does not effect the swap
78 * system, though. Currently only the allocation code is affected by
79 * this algorithmic unfeature. The freeing code can handle arbitrary
82 * This code can be compiled stand-alone for debugging.
85 #include <sys/cdefs.h>
86 __FBSDID("$FreeBSD$");
90 #include <sys/param.h>
91 #include <sys/systm.h>
93 #include <sys/kernel.h>
94 #include <sys/blist.h>
95 #include <sys/malloc.h>
98 #include <sys/mutex.h>
102 #ifndef BLIST_NO_DEBUG
106 #include <sys/errno.h>
107 #include <sys/types.h>
108 #include <sys/malloc.h>
109 #include <sys/sbuf.h>
118 #define bitcount64(x) __bitcount64((uint64_t)(x))
119 #define malloc(a,b,c) calloc(a, 1)
120 #define free(a,b) free(a)
121 #define ummin(a,b) ((a) < (b) ? (a) : (b))
122 #define imin(a,b) ((a) < (b) ? (a) : (b))
123 #define KASSERT(a,b) assert(a)
125 #include <sys/blist.h>
130 * static support functions
132 static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk,
133 int *count, int maxcount);
134 static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t cursor, int *count,
135 int maxcount, u_daddr_t radix);
136 static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count);
137 static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count,
139 static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix,
140 blist_t dest, daddr_t count);
141 static daddr_t blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count);
142 static daddr_t blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count,
145 static void blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix,
150 static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space");
153 #define BLIST_MASK (BLIST_RADIX - 1)
156 * For a subtree that can represent the state of up to 'radix' blocks, the
157 * number of leaf nodes of the subtree is L=radix/BLIST_RADIX. If 'm'
158 * is short for BLIST_RADIX, then for a tree of height h with L=m**h
159 * leaf nodes, the total number of tree nodes is 1 + m + m**2 + ... + m**h,
160 * or, equivalently, (m**(h+1)-1)/(m-1). This quantity is called 'skip'
161 * in the 'meta' functions that process subtrees. Since integer division
162 * discards remainders, we can express this computation as
163 * skip = (m * m**h) / (m - 1)
164 * skip = (m * (radix / m)) / (m - 1)
165 * skip = radix / (m - 1)
166 * so that simple integer division by a constant can safely be used for the
169 static inline daddr_t
170 radix_to_skip(daddr_t radix)
173 return (radix / BLIST_MASK);
177 * Provide a mask with count bits set, starting as position n.
179 static inline u_daddr_t
180 bitrange(int n, int count)
183 return (((u_daddr_t)-1 << n) &
184 ((u_daddr_t)-1 >> (BLIST_RADIX - (n + count))));
188 * Find the first bit set in a u_daddr_t.
191 generic_bitpos(u_daddr_t mask)
197 while (lo + 1 < hi) {
198 mid = (lo + hi) >> 1;
199 if (mask & bitrange(0, mid))
208 bitpos(u_daddr_t mask)
211 switch (sizeof(mask)) {
212 #ifdef HAVE_INLINE_FFSLL
213 case sizeof(long long):
214 return (ffsll(mask) - 1);
216 #ifdef HAVE_INLINE_FFS
218 return (ffs(mask) - 1);
221 return (generic_bitpos(mask));
226 * blist_create() - create a blist capable of handling up to the specified
229 * blocks - must be greater than 0
230 * flags - malloc flags
232 * The smallest blist consists of a single leaf node capable of
233 * managing BLIST_RADIX blocks.
236 blist_create(daddr_t blocks, int flags)
239 u_daddr_t nodes, radix;
241 KASSERT(blocks > 0, ("invalid block count"));
244 * Calculate the radix and node count used for scanning.
247 for (radix = 1; (blocks - 1) / BLIST_RADIX / radix > 0;
248 radix *= BLIST_RADIX)
249 nodes += 1 + (blocks - 1) / BLIST_RADIX / radix;
252 * Include a sentinel node to ensure that cross-leaf scans stay within
253 * the bounds of the allocation.
255 if (blocks % BLIST_RADIX == 0)
258 bl = malloc(offsetof(struct blist, bl_root[nodes]), M_SWAP, flags |
263 bl->bl_blocks = blocks;
264 bl->bl_radix = radix;
266 #if defined(BLIST_DEBUG)
268 "BLIST representing %lld blocks (%lld MB of swap)"
269 ", requiring %lldK of ram\n",
270 (long long)bl->bl_blocks,
271 (long long)bl->bl_blocks * 4 / 1024,
272 (long long)(nodes * sizeof(blmeta_t) + 1023) / 1024
274 printf("BLIST raw radix tree contains %lld records\n",
282 blist_destroy(blist_t bl)
289 * blist_alloc() - reserve space in the block bitmap. Return the base
290 * of a contiguous region or SWAPBLK_NONE if space could
294 blist_alloc(blist_t bl, int *count, int maxcount)
298 KASSERT(*count <= maxcount,
299 ("invalid parameters %d > %d", *count, maxcount));
300 KASSERT(*count <= BLIST_MAX_ALLOC,
301 ("minimum allocation too large: %d", *count));
304 * This loop iterates at most twice. An allocation failure in the
305 * first iteration leads to a second iteration only if the cursor was
306 * non-zero. When the cursor is zero, an allocation failure will
307 * stop further iterations.
309 for (cursor = bl->bl_cursor;; cursor = 0) {
310 blk = blst_meta_alloc(bl->bl_root, cursor, count, maxcount,
312 if (blk != SWAPBLK_NONE) {
313 bl->bl_avail -= *count;
314 bl->bl_cursor = blk + *count;
315 if (bl->bl_cursor == bl->bl_blocks)
320 return (SWAPBLK_NONE);
325 * blist_avail() - return the number of free blocks.
328 blist_avail(blist_t bl)
331 return (bl->bl_avail);
335 * blist_free() - free up space in the block bitmap. Return the base
336 * of a contiguous region.
339 blist_free(blist_t bl, daddr_t blkno, daddr_t count)
342 KASSERT(blkno >= 0 && blkno + count <= bl->bl_blocks,
343 ("freeing invalid range: blkno %jx, count %d, blocks %jd",
344 (uintmax_t)blkno, (int)count, (uintmax_t)bl->bl_blocks));
345 blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix);
346 bl->bl_avail += count;
350 * blist_fill() - mark a region in the block bitmap as off-limits
351 * to the allocator (i.e. allocate it), ignoring any
352 * existing allocations. Return the number of blocks
353 * actually filled that were free before the call.
356 blist_fill(blist_t bl, daddr_t blkno, daddr_t count)
360 KASSERT(blkno >= 0 && blkno + count <= bl->bl_blocks,
361 ("filling invalid range: blkno %jx, count %d, blocks %jd",
362 (uintmax_t)blkno, (int)count, (uintmax_t)bl->bl_blocks));
363 filled = blst_meta_fill(bl->bl_root, blkno, count, bl->bl_radix);
364 bl->bl_avail -= filled;
369 * blist_resize() - resize an existing radix tree to handle the
370 * specified number of blocks. This will reallocate
371 * the tree and transfer the previous bitmap to the new
372 * one. When extending the tree you can specify whether
373 * the new blocks are to left allocated or freed.
376 blist_resize(blist_t *pbl, daddr_t count, int freenew, int flags)
378 blist_t newbl = blist_create(count, flags);
382 if (count > save->bl_blocks)
383 count = save->bl_blocks;
384 blst_copy(save->bl_root, 0, save->bl_radix, newbl, count);
387 * If resizing upwards, should we free the new space or not?
389 if (freenew && count < newbl->bl_blocks) {
390 blist_free(newbl, count, newbl->bl_blocks - count);
398 * blist_print() - dump radix tree
401 blist_print(blist_t bl)
403 printf("BLIST avail = %jd, cursor = %08jx {\n",
404 (uintmax_t)bl->bl_avail, (uintmax_t)bl->bl_cursor);
406 if (bl->bl_root->bm_bitmap != 0)
407 blst_radix_print(bl->bl_root, 0, bl->bl_radix, 4);
413 static const u_daddr_t fib[] = {
414 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584,
415 4181, 6765, 10946, 17711, 28657, 46368, 75025, 121393, 196418, 317811,
416 514229, 832040, 1346269, 2178309, 3524578,
420 * Use 'gap' to describe a maximal range of unallocated blocks/bits.
423 daddr_t start; /* current gap start, or SWAPBLK_NONE */
424 daddr_t num; /* number of gaps observed */
425 daddr_t max; /* largest gap size */
426 daddr_t avg; /* average gap size */
427 daddr_t err; /* sum - num * avg */
428 daddr_t histo[nitems(fib)]; /* # gaps in each size range */
429 int max_bucket; /* last histo elt with nonzero val */
433 * gap_stats_counting() - is the state 'counting 1 bits'?
434 * or 'skipping 0 bits'?
437 gap_stats_counting(const struct gap_stats *stats)
440 return (stats->start != SWAPBLK_NONE);
444 * init_gap_stats() - initialize stats on gap sizes
447 init_gap_stats(struct gap_stats *stats)
450 bzero(stats, sizeof(*stats));
451 stats->start = SWAPBLK_NONE;
455 * update_gap_stats() - update stats on gap sizes
458 update_gap_stats(struct gap_stats *stats, daddr_t posn)
463 if (!gap_stats_counting(stats)) {
467 size = posn - stats->start;
468 stats->start = SWAPBLK_NONE;
469 if (size > stats->max)
473 * Find the fibonacci range that contains size,
474 * expecting to find it in an early range.
478 while (hi < nitems(fib) && fib[hi] <= size) {
482 if (hi >= nitems(fib))
484 while (lo + 1 != hi) {
485 mid = (lo + hi) >> 1;
486 if (fib[mid] <= size)
492 if (lo > stats->max_bucket)
493 stats->max_bucket = lo;
494 stats->err += size - stats->avg;
496 stats->avg += stats->err / stats->num;
497 stats->err %= stats->num;
501 * dump_gap_stats() - print stats on gap sizes
504 dump_gap_stats(const struct gap_stats *stats, struct sbuf *s)
508 sbuf_printf(s, "number of maximal free ranges: %jd\n",
509 (intmax_t)stats->num);
510 sbuf_printf(s, "largest free range: %jd\n", (intmax_t)stats->max);
511 sbuf_printf(s, "average maximal free range size: %jd\n",
512 (intmax_t)stats->avg);
513 sbuf_printf(s, "number of maximal free ranges of different sizes:\n");
514 sbuf_printf(s, " count | size range\n");
515 sbuf_printf(s, " ----- | ----------\n");
516 for (i = 0; i < stats->max_bucket; i++) {
517 if (stats->histo[i] != 0) {
518 sbuf_printf(s, "%20jd | ",
519 (intmax_t)stats->histo[i]);
520 if (fib[i] != fib[i + 1] - 1)
521 sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
522 (intmax_t)fib[i + 1] - 1);
524 sbuf_printf(s, "%jd\n", (intmax_t)fib[i]);
527 sbuf_printf(s, "%20jd | ", (intmax_t)stats->histo[i]);
528 if (stats->histo[i] > 1)
529 sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
530 (intmax_t)stats->max);
532 sbuf_printf(s, "%jd\n", (intmax_t)stats->max);
536 * blist_stats() - dump radix tree stats
539 blist_stats(blist_t bl, struct sbuf *s)
541 struct gap_stats gstats;
542 struct gap_stats *stats = &gstats;
543 daddr_t i, nodes, radix;
544 u_daddr_t diff, mask;
547 init_gap_stats(stats);
549 radix = bl->bl_radix;
550 for (i = 0; i < bl->bl_blocks; ) {
552 * Check for skippable subtrees starting at i.
555 if (bl->bl_root[nodes].bm_bitmap == 0) {
556 if (gap_stats_counting(stats))
557 update_gap_stats(stats, i);
565 radix /= BLIST_RADIX;
571 mask = bl->bl_root[nodes].bm_bitmap;
572 diff = mask ^ (mask << 1);
573 if (gap_stats_counting(stats))
576 digit = bitpos(diff);
577 update_gap_stats(stats, i + digit);
578 diff ^= bitrange(digit, 1);
581 nodes += radix_to_skip(radix * BLIST_RADIX);
582 i += radix * BLIST_RADIX;
585 * Find max size subtree starting at i.
588 ((i / BLIST_RADIX / radix) & BLIST_MASK) == 0;
589 radix *= BLIST_RADIX)
592 update_gap_stats(stats, i);
593 dump_gap_stats(stats, s);
596 /************************************************************************
597 * ALLOCATION SUPPORT FUNCTIONS *
598 ************************************************************************
600 * These support functions do all the actual work. They may seem
601 * rather longish, but that's because I've commented them up. The
602 * actual code is straight forward.
607 * BLST_NEXT_LEAF_ALLOC() - allocate the blocks starting with the next leaf.
609 * 'scan' is a leaf node, and its first block is at address 'start'. The
610 * next leaf node could be adjacent, or several nodes away if the least
611 * common ancestor of 'scan' and its neighbor is several levels up. Use
612 * addresses to determine how many meta-nodes lie between the leaves. If
613 * sequence of leaves starting with the next one has enough initial bits
614 * set, clear them and clear the bits in the meta nodes on the path up to
615 * the least common ancestor to mark any subtrees made completely empty.
618 blst_next_leaf_alloc(blmeta_t *scan, daddr_t start, int count, int maxcount)
624 start += BLIST_RADIX;
625 for (blk = start; blk - start < maxcount; blk += BLIST_RADIX) {
626 /* Skip meta-nodes, as long as they promise more free blocks. */
628 while (((++scan)->bm_bitmap & 1) == 1 &&
629 ((blk / radix) & BLIST_MASK) == 0)
630 radix *= BLIST_RADIX;
631 if (~scan->bm_bitmap != 0) {
633 * Either there is no next leaf with any free blocks,
634 * or we've reached the next leaf and found that some
635 * of its blocks are not free. In the first case,
636 * bitpos() returns zero here.
638 avail = blk - start + bitpos(~scan->bm_bitmap);
639 if (avail < count || avail == 0) {
641 * There isn't a next leaf with enough free
642 * blocks at its beginning to bother
647 maxcount = imin(avail, maxcount);
648 if (maxcount % BLIST_RADIX == 0) {
650 * There was no next leaf. Back scan up to
654 radix /= BLIST_RADIX;
656 } while (radix != 1);
663 * 'scan' is the last leaf that provides blocks. Clear from 1 to
664 * BLIST_RADIX bits to represent the allocation of those last blocks.
666 if (maxcount % BLIST_RADIX != 0)
667 scan->bm_bitmap &= ~bitrange(0, maxcount % BLIST_RADIX);
672 /* Back up over meta-nodes, clearing bits if necessary. */
674 for (radix = BLIST_RADIX;
675 (digit = ((blk / radix) & BLIST_MASK)) == 0;
676 radix *= BLIST_RADIX) {
677 if ((scan--)->bm_bitmap == 0)
678 scan->bm_bitmap ^= 1;
680 if ((scan--)->bm_bitmap == 0)
681 scan[-digit * radix_to_skip(radix)].bm_bitmap ^=
682 (u_daddr_t)1 << digit;
686 /* Clear all the bits of this leaf. */
693 * BLST_LEAF_ALLOC() - allocate at a leaf in the radix tree (a bitmap).
695 * This function is the core of the allocator. Its execution time is
696 * proportional to log(count), plus height of the tree if the allocation
697 * crosses a leaf boundary.
700 blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int *count, int maxcount)
703 int bighint, count1, hi, lo, num_shifts;
706 num_shifts = fls(count1);
707 mask = ~scan->bm_bitmap;
708 while ((mask & (mask + 1)) != 0 && num_shifts > 0) {
710 * If bit i is 0 in mask, then bits in [i, i + (count1 >>
711 * num_shifts)] are 1 in scan->bm_bitmap. Reduce num_shifts to
712 * 0, while preserving this invariant. The updates to mask
713 * leave fewer bits 0, but each bit that remains 0 represents a
714 * longer string of consecutive 1-bits in scan->bm_bitmap. If
715 * more updates to mask cannot set more bits, because mask is
716 * partitioned with all 1 bits following all 0 bits, the loop
717 * terminates immediately.
720 mask |= mask >> ((count1 >> num_shifts) + 1) / 2;
722 bighint = count1 >> num_shifts;
725 * Update bighint. There is no allocation bigger than
726 * count1 >> num_shifts starting in this leaf.
728 scan->bm_bighint = bighint;
729 return (SWAPBLK_NONE);
732 /* Discard any candidates that appear before blk. */
733 if ((blk & BLIST_MASK) != 0) {
734 if ((~mask & bitrange(0, blk & BLIST_MASK)) != 0) {
735 /* Grow bighint in case all discarded bits are set. */
736 bighint += blk & BLIST_MASK;
737 mask |= bitrange(0, blk & BLIST_MASK);
739 scan->bm_bighint = bighint;
740 return (SWAPBLK_NONE);
743 blk -= blk & BLIST_MASK;
747 * The least significant set bit in mask marks the start of the first
748 * available range of sufficient size. Find its position.
753 * Find how much space is available starting at that position.
755 if ((mask & (mask + 1)) != 0) {
756 /* Count the 1 bits starting at position lo. */
757 hi = bitpos(mask & (mask + 1)) + count1;
758 if (maxcount < hi - lo)
761 mask = ~bitrange(lo, *count);
762 } else if (maxcount <= BLIST_RADIX - lo) {
763 /* All the blocks we can use are available here. */
766 mask = ~bitrange(lo, *count);
767 if (hi == BLIST_RADIX)
768 scan->bm_bighint = bighint;
770 /* Check next leaf for some of the blocks we want or need. */
771 count1 = *count - (BLIST_RADIX - lo);
772 maxcount -= BLIST_RADIX - lo;
773 hi = blst_next_leaf_alloc(scan, blk, count1, maxcount);
776 * The next leaf cannot supply enough blocks to reach
777 * the minimum required allocation. The hint cannot be
778 * updated, because the same allocation request could
779 * be satisfied later, by this leaf, if the state of
780 * the next leaf changes, and without any changes to
783 return (SWAPBLK_NONE);
784 *count = BLIST_RADIX - lo + hi;
785 scan->bm_bighint = bighint;
788 /* Clear the allocated bits from this leaf. */
789 scan->bm_bitmap &= mask;
794 * blist_meta_alloc() - allocate at a meta in the radix tree.
796 * Attempt to allocate at a meta node. If we can't, we update
797 * bighint and return a failure. Updating bighint optimize future
798 * calls that hit this node. We have to check for our collapse cases
799 * and we have a few optimizations strewn in as well.
802 blst_meta_alloc(blmeta_t *scan, daddr_t cursor, int *count,
803 int maxcount, u_daddr_t radix)
805 daddr_t blk, i, r, skip;
807 bool scan_from_start;
811 return (blst_leaf_alloc(scan, cursor, count, maxcount));
812 blk = cursor & -(radix * BLIST_RADIX);
813 scan_from_start = (cursor == blk);
814 skip = radix_to_skip(radix);
815 mask = scan->bm_bitmap;
817 /* Discard any candidates that appear before cursor. */
818 digit = (cursor / radix) & BLIST_MASK;
819 mask &= (u_daddr_t)-1 << digit;
821 return (SWAPBLK_NONE);
824 * If the first try is for a block that includes the cursor, pre-undo
825 * the digit * radix offset in the first call; otherwise, ignore the
828 if (((mask >> digit) & 1) == 1)
829 cursor -= digit * radix;
834 * Examine the nonempty subtree associated with each bit set in mask.
837 digit = bitpos(mask);
838 i = 1 + digit * skip;
839 if (*count <= scan[i].bm_bighint) {
841 * The allocation might fit beginning in the i'th subtree.
843 r = blst_meta_alloc(&scan[i], cursor + digit * radix,
844 count, maxcount, radix / BLIST_RADIX);
845 if (r != SWAPBLK_NONE) {
846 if (scan[i].bm_bitmap == 0)
847 scan->bm_bitmap ^= bitrange(digit, 1);
852 } while ((mask ^= bitrange(digit, 1)) != 0);
855 * We couldn't allocate count in this subtree. If the whole tree was
856 * scanned, and the last tree node is allocated, update bighint.
858 if (scan_from_start && !(digit == BLIST_RADIX - 1 &&
859 scan[i].bm_bighint == BLIST_MAX_ALLOC))
860 scan->bm_bighint = *count - 1;
862 return (SWAPBLK_NONE);
866 * BLST_LEAF_FREE() - free allocated block from leaf bitmap
870 blst_leaf_free(blmeta_t *scan, daddr_t blk, int count)
875 * free some data in this bitmap
876 * mask=0000111111111110000
880 mask = bitrange(blk & BLIST_MASK, count);
881 KASSERT((scan->bm_bitmap & mask) == 0,
882 ("freeing free block: %jx, size %d, mask %jx",
883 (uintmax_t)blk, count, (uintmax_t)scan->bm_bitmap & mask));
884 scan->bm_bitmap |= mask;
888 * BLST_META_FREE() - free allocated blocks from radix tree meta info
890 * This support routine frees a range of blocks from the bitmap.
891 * The range must be entirely enclosed by this radix node. If a
892 * meta node, we break the range down recursively to free blocks
893 * in subnodes (which means that this code can free an arbitrary
894 * range whereas the allocation code cannot allocate an arbitrary
898 blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, u_daddr_t radix)
900 daddr_t blk, endBlk, i, skip;
904 * We could probably do a better job here. We are required to make
905 * bighint at least as large as the biggest allocable block of data.
906 * If we just shoehorn it, a little extra overhead will be incurred
907 * on the next allocation (but only that one typically).
909 scan->bm_bighint = BLIST_MAX_ALLOC;
912 return (blst_leaf_free(scan, freeBlk, count));
914 endBlk = freeBlk + count;
915 blk = (freeBlk + radix * BLIST_RADIX) & -(radix * BLIST_RADIX);
917 * blk is first block past the end of the range of this meta node,
918 * or 0 in case of overflow.
921 endBlk = ummin(endBlk, blk);
922 skip = radix_to_skip(radix);
923 blk = freeBlk & -radix;
924 digit = (blk / radix) & BLIST_MASK;
925 endDigit = 1 + (((endBlk - 1) / radix) & BLIST_MASK);
926 scan->bm_bitmap |= bitrange(digit, endDigit - digit);
927 for (i = 1 + digit * skip; blk < endBlk; i += skip) {
929 count = ummin(blk, endBlk) - freeBlk;
930 blst_meta_free(&scan[i], freeBlk, count, radix / BLIST_RADIX);
936 * BLST_COPY() - copy one radix tree to another
938 * Locates free space in the source tree and frees it in the destination
939 * tree. The space may not already be free in the destination.
942 blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, blist_t dest,
945 daddr_t endBlk, i, skip;
952 u_daddr_t v = scan->bm_bitmap;
954 if (v == (u_daddr_t)-1) {
955 blist_free(dest, blk, count);
959 for (i = 0; i < count; ++i) {
960 if (v & ((u_daddr_t)1 << i))
961 blist_free(dest, blk + i, 1);
971 if (scan->bm_bitmap == 0) {
973 * Source all allocated, leave dest allocated
978 endBlk = blk + count;
979 skip = radix_to_skip(radix);
980 for (i = 1; blk < endBlk; i += skip) {
984 count -= blk - endBlk;
985 blst_copy(&scan[i], blk - radix,
986 radix / BLIST_RADIX, dest, count);
991 * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap
993 * This routine allocates all blocks in the specified range
994 * regardless of any existing allocations in that range. Returns
995 * the number of blocks allocated by the call.
998 blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count)
1003 mask = bitrange(blk & BLIST_MASK, count);
1005 /* Count the number of blocks that we are allocating. */
1006 nblks = bitcount64(scan->bm_bitmap & mask);
1008 scan->bm_bitmap &= ~mask;
1013 * BLIST_META_FILL() - allocate specific blocks at a meta node
1015 * This routine allocates the specified range of blocks,
1016 * regardless of any existing allocations in the range. The
1017 * range must be within the extent of this node. Returns the
1018 * number of blocks allocated by the call.
1021 blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, u_daddr_t radix)
1023 daddr_t blk, endBlk, i, nblks, skip;
1027 return (blst_leaf_fill(scan, allocBlk, count));
1029 endBlk = allocBlk + count;
1030 blk = (allocBlk + radix * BLIST_RADIX) & -(radix * BLIST_RADIX);
1032 * blk is first block past the end of the range of this meta node,
1033 * or 0 in case of overflow.
1036 endBlk = ummin(endBlk, blk);
1037 skip = radix_to_skip(radix);
1038 blk = allocBlk & -radix;
1040 while (blk < endBlk) {
1041 digit = (blk / radix) & BLIST_MASK;
1042 i = 1 + digit * skip;
1044 count = ummin(blk, endBlk) - allocBlk;
1045 nblks += blst_meta_fill(&scan[i], allocBlk, count,
1046 radix / BLIST_RADIX);
1047 if (scan[i].bm_bitmap == 0)
1048 scan->bm_bitmap &= ~((u_daddr_t)1 << digit);
1057 blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab)
1065 "%*.*s(%08llx,%lld): bitmap %0*llx big=%lld\n",
1067 (long long)blk, (long long)BLIST_RADIX,
1068 (int)(1 + (BLIST_RADIX - 1) / 4),
1069 (long long)scan->bm_bitmap,
1070 (long long)scan->bm_bighint
1076 "%*.*s(%08llx): subtree (%lld/%lld) bitmap %0*llx big=%lld {\n",
1078 (long long)blk, (long long)radix * BLIST_RADIX,
1079 (long long)radix * BLIST_RADIX,
1080 (int)(1 + (BLIST_RADIX - 1) / 4),
1081 (long long)scan->bm_bitmap,
1082 (long long)scan->bm_bighint
1085 skip = radix_to_skip(radix);
1088 mask = scan->bm_bitmap;
1089 /* Examine the nonempty subtree associated with each bit set in mask */
1091 digit = bitpos(mask);
1092 blst_radix_print(&scan[1 + digit * skip], blk + digit * radix,
1093 radix / BLIST_RADIX, tab);
1094 } while ((mask ^= bitrange(digit, 1)) != 0);
1108 main(int ac, char **av)
1110 daddr_t size = BLIST_RADIX * BLIST_RADIX;
1115 for (i = 1; i < ac; ++i) {
1116 const char *ptr = av[i];
1118 size = strtoll(ptr, NULL, 0);
1122 fprintf(stderr, "Bad option: %s\n", ptr - 2);
1125 bl = blist_create(size, M_WAITOK);
1127 fprintf(stderr, "blist_create failed\n");
1130 blist_free(bl, 0, size);
1135 int count = 0, maxcount = 0;
1137 printf("%lld/%lld/%lld> ", (long long)blist_avail(bl),
1138 (long long)size, (long long)bl->bl_radix * BLIST_RADIX);
1140 if (fgets(buf, sizeof(buf), stdin) == NULL)
1144 if (sscanf(buf + 1, "%d", &count) == 1) {
1145 blist_resize(&bl, count, 1, M_WAITOK);
1153 s = sbuf_new_auto();
1156 printf("%s", sbuf_data(s));
1160 if (sscanf(buf + 1, "%d%d", &count, &maxcount) == 2) {
1161 daddr_t blk = blist_alloc(bl, &count, maxcount);
1162 printf(" R=%08llx, c=%08d\n",
1163 (long long)blk, count);
1169 if (sscanf(buf + 1, "%llx %d", &da, &count) == 2) {
1170 blist_free(bl, da, count);
1176 if (sscanf(buf + 1, "%llx %d", &da, &count) == 2) {
1178 (intmax_t)blist_fill(bl, da, count));
1188 "a %d %d -allocate\n"