2 * SPDX-License-Identifier: BSD-3-Clause
4 * Copyright (c) 1998 Matthew Dillon. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting
32 * This module implements a general bitmap allocator/deallocator. The
33 * allocator eats around 2 bits per 'block'. The module does not
34 * try to interpret the meaning of a 'block' other than to return
35 * SWAPBLK_NONE on an allocation failure.
37 * A radix tree controls access to pieces of the bitmap, and includes
38 * auxiliary information at each interior node about the availabilty of
39 * contiguous free blocks in the subtree rooted at that node. Two radix
40 * constants are involved: one for the size of the bitmaps contained in the
41 * leaf nodes (BLIST_BMAP_RADIX), and one for the number of descendents of
42 * each of the meta (interior) nodes (BLIST_META_RADIX). Each subtree is
43 * associated with a range of blocks. The root of any subtree stores a
44 * hint field that defines an upper bound on the size of the largest
45 * allocation that can begin in the associated block range. A hint is an
46 * upper bound on a potential allocation, but not necessarily a tight upper
49 * The bitmap field in each node directs the search for available blocks.
50 * For a leaf node, a bit is set if the corresponding block is free. For a
51 * meta node, a bit is set if the corresponding subtree contains a free
52 * block somewhere within it. The search at a meta node considers only
53 * children of that node that represent a range that includes a free block.
55 * The hinting greatly increases code efficiency for allocations while
56 * the general radix structure optimizes both allocations and frees. The
57 * radix tree should be able to operate well no matter how much
58 * fragmentation there is and no matter how large a bitmap is used.
60 * The blist code wires all necessary memory at creation time. Neither
61 * allocations nor frees require interaction with the memory subsystem.
62 * The non-blocking nature of allocations and frees is required by swap
63 * code (vm/swap_pager.c).
65 * LAYOUT: The radix tree is laid out recursively using a linear array.
66 * Each meta node is immediately followed (laid out sequentially in
67 * memory) by BLIST_META_RADIX lower level nodes. This is a recursive
68 * structure but one that can be easily scanned through a very simple
69 * 'skip' calculation. The memory allocation is only large enough to
70 * cover the number of blocks requested at creation time. Nodes that
71 * represent blocks beyond that limit, nodes that would never be read
72 * or written, are not allocated, so that the last of the
73 * BLIST_META_RADIX lower level nodes of a some nodes may not be
76 * NOTE: the allocator cannot currently allocate more than
77 * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too
78 * large' if you try. This is an area that could use improvement. The
79 * radix is large enough that this restriction does not effect the swap
80 * system, though. Currently only the allocation code is affected by
81 * this algorithmic unfeature. The freeing code can handle arbitrary
84 * This code can be compiled stand-alone for debugging.
87 #include <sys/cdefs.h>
88 __FBSDID("$FreeBSD$");
92 #include <sys/param.h>
93 #include <sys/systm.h>
95 #include <sys/kernel.h>
96 #include <sys/blist.h>
97 #include <sys/malloc.h>
100 #include <sys/mutex.h>
104 #ifndef BLIST_NO_DEBUG
108 #include <sys/errno.h>
109 #include <sys/types.h>
110 #include <sys/malloc.h>
111 #include <sys/sbuf.h>
119 #define bitcount64(x) __bitcount64((uint64_t)(x))
120 #define malloc(a,b,c) calloc(a, 1)
121 #define free(a,b) free(a)
122 #define ummin(a,b) ((a) < (b) ? (a) : (b))
124 #include <sys/blist.h>
126 void panic(const char *ctl, ...);
131 * static support functions
133 static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count);
134 static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count,
136 static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count);
137 static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count,
139 static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix,
140 blist_t dest, daddr_t count);
141 static daddr_t blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count);
142 static daddr_t blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count,
145 static void blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix,
150 static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space");
153 _Static_assert(BLIST_BMAP_RADIX % BLIST_META_RADIX == 0,
154 "radix divisibility error");
155 #define BLIST_BMAP_MASK (BLIST_BMAP_RADIX - 1)
156 #define BLIST_META_MASK (BLIST_META_RADIX - 1)
159 * For a subtree that can represent the state of up to 'radix' blocks, the
160 * number of leaf nodes of the subtree is L=radix/BLIST_BMAP_RADIX. If 'm'
161 * is short for BLIST_META_RADIX, then for a tree of height h with L=m**h
162 * leaf nodes, the total number of tree nodes is 1 + m + m**2 + ... + m**h,
163 * or, equivalently, (m**(h+1)-1)/(m-1). This quantity is called 'skip'
164 * in the 'meta' functions that process subtrees. Since integer division
165 * discards remainders, we can express this computation as
166 * skip = (m * m**h) / (m - 1)
167 * skip = (m * (radix / BLIST_BMAP_RADIX)) / (m - 1)
168 * and since m divides BLIST_BMAP_RADIX, we can simplify further to
169 * skip = (radix / (BLIST_BMAP_RADIX / m)) / (m - 1)
170 * skip = radix / ((BLIST_BMAP_RADIX / m) * (m - 1))
171 * so that simple integer division by a constant can safely be used for the
174 static inline daddr_t
175 radix_to_skip(daddr_t radix)
179 ((BLIST_BMAP_RADIX / BLIST_META_RADIX) * BLIST_META_MASK));
183 * Provide a mask with count bits set, starting as position n.
185 static inline u_daddr_t
186 bitrange(int n, int count)
189 return (((u_daddr_t)-1 << n) &
190 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - (n + count))));
195 * Use binary search, or a faster method, to find the 1 bit in a u_daddr_t.
196 * Assumes that the argument has only one bit set.
199 bitpos(u_daddr_t mask)
203 switch (sizeof(mask)) {
204 #ifdef HAVE_INLINE_FFSLL
205 case sizeof(long long):
206 return (ffsll(mask) - 1);
210 hi = BLIST_BMAP_RADIX;
211 while (lo + 1 < hi) {
212 mid = (lo + hi) >> 1;
213 if ((mask >> mid) != 0)
223 * blist_create() - create a blist capable of handling up to the specified
226 * blocks - must be greater than 0
227 * flags - malloc flags
229 * The smallest blist consists of a single leaf node capable of
230 * managing BLIST_BMAP_RADIX blocks.
233 blist_create(daddr_t blocks, int flags)
236 u_daddr_t nodes, radix;
239 panic("invalid block count");
242 * Calculate the radix and node count used for scanning.
245 radix = BLIST_BMAP_RADIX;
246 while (radix <= blocks) {
247 nodes += 1 + (blocks - 1) / radix;
248 radix *= BLIST_META_RADIX;
251 bl = malloc(offsetof(struct blist, bl_root[nodes]), M_SWAP, flags |
256 bl->bl_blocks = blocks;
257 bl->bl_radix = radix;
259 #if defined(BLIST_DEBUG)
261 "BLIST representing %lld blocks (%lld MB of swap)"
262 ", requiring %lldK of ram\n",
263 (long long)bl->bl_blocks,
264 (long long)bl->bl_blocks * 4 / 1024,
265 (long long)(nodes * sizeof(blmeta_t) + 1023) / 1024
267 printf("BLIST raw radix tree contains %lld records\n",
275 blist_destroy(blist_t bl)
282 * blist_alloc() - reserve space in the block bitmap. Return the base
283 * of a contiguous region or SWAPBLK_NONE if space could
287 blist_alloc(blist_t bl, daddr_t count)
291 if (count > BLIST_MAX_ALLOC)
292 panic("allocation too large");
295 * This loop iterates at most twice. An allocation failure in the
296 * first iteration leads to a second iteration only if the cursor was
297 * non-zero. When the cursor is zero, an allocation failure will
298 * stop further iterations.
300 cursor = bl->bl_cursor;
302 blk = blst_meta_alloc(bl->bl_root, cursor, count,
304 if (blk != SWAPBLK_NONE) {
305 bl->bl_avail -= count;
306 bl->bl_cursor = blk + count;
307 if (bl->bl_cursor == bl->bl_blocks)
310 } else if (cursor == 0)
311 return (SWAPBLK_NONE);
317 * blist_avail() - return the number of free blocks.
320 blist_avail(blist_t bl)
323 return (bl->bl_avail);
327 * blist_free() - free up space in the block bitmap. Return the base
328 * of a contiguous region. Panic if an inconsistancy is
332 blist_free(blist_t bl, daddr_t blkno, daddr_t count)
335 if (blkno < 0 || blkno + count > bl->bl_blocks)
336 panic("freeing invalid range");
337 blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix);
338 bl->bl_avail += count;
342 * blist_fill() - mark a region in the block bitmap as off-limits
343 * to the allocator (i.e. allocate it), ignoring any
344 * existing allocations. Return the number of blocks
345 * actually filled that were free before the call.
348 blist_fill(blist_t bl, daddr_t blkno, daddr_t count)
352 if (blkno < 0 || blkno + count > bl->bl_blocks)
353 panic("filling invalid range");
354 filled = blst_meta_fill(bl->bl_root, blkno, count, bl->bl_radix);
355 bl->bl_avail -= filled;
360 * blist_resize() - resize an existing radix tree to handle the
361 * specified number of blocks. This will reallocate
362 * the tree and transfer the previous bitmap to the new
363 * one. When extending the tree you can specify whether
364 * the new blocks are to left allocated or freed.
367 blist_resize(blist_t *pbl, daddr_t count, int freenew, int flags)
369 blist_t newbl = blist_create(count, flags);
373 if (count > save->bl_blocks)
374 count = save->bl_blocks;
375 blst_copy(save->bl_root, 0, save->bl_radix, newbl, count);
378 * If resizing upwards, should we free the new space or not?
380 if (freenew && count < newbl->bl_blocks) {
381 blist_free(newbl, count, newbl->bl_blocks - count);
389 * blist_print() - dump radix tree
392 blist_print(blist_t bl)
394 printf("BLIST avail = %jd, cursor = %08jx {\n",
395 (uintmax_t)bl->bl_avail, (uintmax_t)bl->bl_cursor);
397 if (bl->bl_root->bm_bitmap != 0)
398 blst_radix_print(bl->bl_root, 0, bl->bl_radix, 4);
404 static const u_daddr_t fib[] = {
405 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584,
406 4181, 6765, 10946, 17711, 28657, 46368, 75025, 121393, 196418, 317811,
407 514229, 832040, 1346269, 2178309, 3524578,
411 * Use 'gap' to describe a maximal range of unallocated blocks/bits.
414 daddr_t start; /* current gap start, or SWAPBLK_NONE */
415 daddr_t num; /* number of gaps observed */
416 daddr_t max; /* largest gap size */
417 daddr_t avg; /* average gap size */
418 daddr_t err; /* sum - num * avg */
419 daddr_t histo[nitems(fib)]; /* # gaps in each size range */
420 int max_bucket; /* last histo elt with nonzero val */
424 * gap_stats_counting() - is the state 'counting 1 bits'?
425 * or 'skipping 0 bits'?
428 gap_stats_counting(const struct gap_stats *stats)
431 return (stats->start != SWAPBLK_NONE);
435 * init_gap_stats() - initialize stats on gap sizes
438 init_gap_stats(struct gap_stats *stats)
441 bzero(stats, sizeof(*stats));
442 stats->start = SWAPBLK_NONE;
446 * update_gap_stats() - update stats on gap sizes
449 update_gap_stats(struct gap_stats *stats, daddr_t posn)
454 if (!gap_stats_counting(stats)) {
458 size = posn - stats->start;
459 stats->start = SWAPBLK_NONE;
460 if (size > stats->max)
464 * Find the fibonacci range that contains size,
465 * expecting to find it in an early range.
469 while (hi < nitems(fib) && fib[hi] <= size) {
473 if (hi >= nitems(fib))
475 while (lo + 1 != hi) {
476 mid = (lo + hi) >> 1;
477 if (fib[mid] <= size)
483 if (lo > stats->max_bucket)
484 stats->max_bucket = lo;
485 stats->err += size - stats->avg;
487 stats->avg += stats->err / stats->num;
488 stats->err %= stats->num;
492 * dump_gap_stats() - print stats on gap sizes
495 dump_gap_stats(const struct gap_stats *stats, struct sbuf *s)
499 sbuf_printf(s, "number of maximal free ranges: %jd\n",
500 (intmax_t)stats->num);
501 sbuf_printf(s, "largest free range: %jd\n", (intmax_t)stats->max);
502 sbuf_printf(s, "average maximal free range size: %jd\n",
503 (intmax_t)stats->avg);
504 sbuf_printf(s, "number of maximal free ranges of different sizes:\n");
505 sbuf_printf(s, " count | size range\n");
506 sbuf_printf(s, " ----- | ----------\n");
507 for (i = 0; i < stats->max_bucket; i++) {
508 if (stats->histo[i] != 0) {
509 sbuf_printf(s, "%20jd | ",
510 (intmax_t)stats->histo[i]);
511 if (fib[i] != fib[i + 1] - 1)
512 sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
513 (intmax_t)fib[i + 1] - 1);
515 sbuf_printf(s, "%jd\n", (intmax_t)fib[i]);
518 sbuf_printf(s, "%20jd | ", (intmax_t)stats->histo[i]);
519 if (stats->histo[i] > 1)
520 sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
521 (intmax_t)stats->max);
523 sbuf_printf(s, "%jd\n", (intmax_t)stats->max);
527 * blist_stats() - dump radix tree stats
530 blist_stats(blist_t bl, struct sbuf *s)
532 struct gap_stats gstats;
533 struct gap_stats *stats = &gstats;
534 daddr_t i, nodes, radix;
535 u_daddr_t bit, diff, mask;
537 init_gap_stats(stats);
540 while (i < bl->bl_radix + bl->bl_blocks) {
542 * Find max size subtree starting at i.
544 radix = BLIST_BMAP_RADIX;
545 while (((i / radix) & BLIST_META_MASK) == 0)
546 radix *= BLIST_META_RADIX;
549 * Check for skippable subtrees starting at i.
551 while (radix > BLIST_BMAP_RADIX) {
552 if (bl->bl_root[nodes].bm_bitmap == 0) {
553 if (gap_stats_counting(stats))
554 update_gap_stats(stats, i);
562 radix /= BLIST_META_RADIX;
564 if (radix == BLIST_BMAP_RADIX) {
568 mask = bl->bl_root[nodes].bm_bitmap;
569 diff = mask ^ (mask << 1);
570 if (gap_stats_counting(stats))
574 update_gap_stats(stats, i + bitpos(bit));
578 nodes += radix_to_skip(radix);
581 update_gap_stats(stats, i);
582 dump_gap_stats(stats, s);
585 /************************************************************************
586 * ALLOCATION SUPPORT FUNCTIONS *
587 ************************************************************************
589 * These support functions do all the actual work. They may seem
590 * rather longish, but that's because I've commented them up. The
591 * actual code is straight forward.
596 * BLST_NEXT_LEAF_ALLOC() - allocate the first few blocks in the next leaf.
598 * 'scan' is a leaf node, associated with a block containing 'blk'.
599 * The next leaf node could be adjacent, or several nodes away if the
600 * least common ancestor of 'scan' and its neighbor is several levels
601 * up. Use 'blk' to determine how many meta-nodes lie between the
602 * leaves. If the next leaf has enough initial bits set, clear them
603 * and clear the bits in the meta nodes on the path up to the least
604 * common ancestor to mark any subtrees made completely empty.
607 blst_next_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
615 blk += BLIST_BMAP_RADIX;
616 radix = BLIST_BMAP_RADIX;
617 while ((digit = ((blk / radix) & BLIST_META_MASK)) == 0 &&
618 (next->bm_bitmap & 1) == 1) {
620 radix *= BLIST_META_RADIX;
622 if (((next->bm_bitmap + 1) & ~((u_daddr_t)-1 << count)) != 0) {
624 * The next leaf doesn't have enough free blocks at the
625 * beginning to complete the spanning allocation.
629 /* Clear the first 'count' bits in the next leaf to allocate. */
630 next->bm_bitmap &= (u_daddr_t)-1 << count;
633 * Update bitmaps of next-ancestors, up to least common ancestor.
635 skip = radix_to_skip(radix);
636 while (radix != BLIST_BMAP_RADIX && next->bm_bitmap == 0) {
637 (--next)->bm_bitmap ^= 1;
638 radix /= BLIST_META_RADIX;
640 if (next->bm_bitmap == 0)
641 scan[-digit * skip].bm_bitmap ^= (u_daddr_t)1 << digit;
646 * BLST_LEAF_ALLOC() - allocate at a leaf in the radix tree (a bitmap).
648 * This function is the core of the allocator. Its execution time is
649 * proportional to log(count), plus height of the tree if the allocation
650 * crosses a leaf boundary.
653 blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
655 u_daddr_t cursor_mask, mask;
656 int count1, hi, lo, num_shifts, range1, range_ext;
660 num_shifts = fls(count1);
661 mask = scan->bm_bitmap;
662 while ((-mask & ~mask) != 0 && num_shifts > 0) {
664 * If bit i is set in mask, then bits in [i, i+range1] are set
665 * in scan->bm_bitmap. The value of range1 is equal to count1
666 * >> num_shifts. Grow range1 and reduce num_shifts to 0,
667 * while preserving these invariants. The updates to mask
668 * leave fewer bits set, but each bit that remains set
669 * represents a longer string of consecutive bits set in
670 * scan->bm_bitmap. If more updates to mask cannot clear more
671 * bits, because mask is partitioned with all 0 bits preceding
672 * all 1 bits, the loop terminates immediately.
675 range_ext = range1 + ((count1 >> num_shifts) & 1);
677 * mask is a signed quantity for the shift because when it is
678 * shifted right, the sign bit should copied; when the last
679 * block of the leaf is free, pretend, for a while, that all the
680 * blocks that follow it are also free.
682 mask &= (daddr_t)mask >> range_ext;
687 * Update bighint. There is no allocation bigger than range1
688 * starting in this leaf.
690 scan->bm_bighint = range1;
691 return (SWAPBLK_NONE);
694 /* Discard any candidates that appear before blk. */
695 if ((blk & BLIST_BMAP_MASK) != 0) {
696 cursor_mask = mask & bitrange(0, blk & BLIST_BMAP_MASK);
697 if (cursor_mask != 0) {
700 return (SWAPBLK_NONE);
703 * Bighint change for last block allocation cannot
704 * assume that any other blocks are allocated, so the
705 * bighint cannot be reduced much.
707 range1 = BLIST_MAX_ALLOC - 1;
709 blk &= ~BLIST_BMAP_MASK;
713 * The least significant set bit in mask marks the start of the first
714 * available range of sufficient size. Clear all the bits but that one,
715 * and then find its position.
721 if (hi > BLIST_BMAP_RADIX) {
723 * An allocation within this leaf is impossible, so a successful
724 * allocation depends on the next leaf providing some of the blocks.
726 if (blst_next_leaf_alloc(scan, blk, hi - BLIST_BMAP_RADIX) != 0)
728 * The hint cannot be updated, because the same
729 * allocation request could be satisfied later, by this
730 * leaf, if the state of the next leaf changes, and
731 * without any changes to this leaf.
733 return (SWAPBLK_NONE);
734 hi = BLIST_BMAP_RADIX;
737 /* Set the bits of mask at position 'lo' and higher. */
739 if (hi == BLIST_BMAP_RADIX) {
741 * Update bighint. There is no allocation bigger than range1
742 * available in this leaf after this allocation completes.
744 scan->bm_bighint = range1;
746 /* Clear the bits of mask at position 'hi' and higher. */
747 mask &= (u_daddr_t)-1 >> (BLIST_BMAP_RADIX - hi);
749 /* Clear the allocated bits from this leaf. */
750 scan->bm_bitmap &= ~mask;
755 * blist_meta_alloc() - allocate at a meta in the radix tree.
757 * Attempt to allocate at a meta node. If we can't, we update
758 * bighint and return a failure. Updating bighint optimize future
759 * calls that hit this node. We have to check for our collapse cases
760 * and we have a few optimizations strewn in as well.
763 blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, u_daddr_t radix)
765 daddr_t blk, i, r, skip;
767 bool scan_from_start;
770 if (radix == BLIST_BMAP_RADIX)
771 return (blst_leaf_alloc(scan, cursor, count));
772 blk = cursor & -radix;
773 scan_from_start = (cursor == blk);
774 radix /= BLIST_META_RADIX;
775 skip = radix_to_skip(radix);
776 mask = scan->bm_bitmap;
778 /* Discard any candidates that appear before cursor. */
779 digit = (cursor / radix) & BLIST_META_MASK;
780 mask &= (u_daddr_t)-1 << digit;
782 return (SWAPBLK_NONE);
785 * If the first try is for a block that includes the cursor, pre-undo
786 * the digit * radix offset in the first call; otherwise, ignore the
789 if (((mask >> digit) & 1) == 1)
790 cursor -= digit * radix;
795 * Examine the nonempty subtree associated with each bit set in mask.
800 i = 1 + digit * skip;
801 if (count <= scan[i].bm_bighint) {
803 * The allocation might fit beginning in the i'th subtree.
805 r = blst_meta_alloc(&scan[i], cursor + digit * radix,
807 if (r != SWAPBLK_NONE) {
808 if (scan[i].bm_bitmap == 0)
809 scan->bm_bitmap ^= bit;
814 } while ((mask ^= bit) != 0);
817 * We couldn't allocate count in this subtree. If the whole tree was
818 * scanned, and the last tree node is allocated, update bighint.
820 if (scan_from_start && !(digit == BLIST_META_RADIX - 1 &&
821 scan[i].bm_bighint == BLIST_MAX_ALLOC))
822 scan->bm_bighint = count - 1;
824 return (SWAPBLK_NONE);
828 * BLST_LEAF_FREE() - free allocated block from leaf bitmap
832 blst_leaf_free(blmeta_t *scan, daddr_t blk, int count)
837 * free some data in this bitmap
838 * mask=0000111111111110000
842 mask = bitrange(blk & BLIST_BMAP_MASK, count);
843 if (scan->bm_bitmap & mask)
844 panic("freeing free block");
845 scan->bm_bitmap |= mask;
849 * BLST_META_FREE() - free allocated blocks from radix tree meta info
851 * This support routine frees a range of blocks from the bitmap.
852 * The range must be entirely enclosed by this radix node. If a
853 * meta node, we break the range down recursively to free blocks
854 * in subnodes (which means that this code can free an arbitrary
855 * range whereas the allocation code cannot allocate an arbitrary
859 blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, u_daddr_t radix)
861 daddr_t blk, endBlk, i, skip;
865 * We could probably do a better job here. We are required to make
866 * bighint at least as large as the biggest allocable block of data.
867 * If we just shoehorn it, a little extra overhead will be incurred
868 * on the next allocation (but only that one typically).
870 scan->bm_bighint = BLIST_MAX_ALLOC;
872 if (radix == BLIST_BMAP_RADIX)
873 return (blst_leaf_free(scan, freeBlk, count));
875 endBlk = ummin(freeBlk + count, (freeBlk + radix) & -radix);
876 radix /= BLIST_META_RADIX;
877 skip = radix_to_skip(radix);
878 blk = freeBlk & -radix;
879 digit = (blk / radix) & BLIST_META_MASK;
880 endDigit = 1 + (((endBlk - 1) / radix) & BLIST_META_MASK);
881 scan->bm_bitmap |= bitrange(digit, endDigit - digit);
882 for (i = 1 + digit * skip; blk < endBlk; i += skip) {
884 count = ummin(blk, endBlk) - freeBlk;
885 blst_meta_free(&scan[i], freeBlk, count, radix);
891 * BLST_COPY() - copy one radix tree to another
893 * Locates free space in the source tree and frees it in the destination
894 * tree. The space may not already be free in the destination.
897 blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, blist_t dest,
900 daddr_t endBlk, i, skip;
906 if (radix == BLIST_BMAP_RADIX) {
907 u_daddr_t v = scan->bm_bitmap;
909 if (v == (u_daddr_t)-1) {
910 blist_free(dest, blk, count);
914 for (i = 0; i < count; ++i) {
915 if (v & ((u_daddr_t)1 << i))
916 blist_free(dest, blk + i, 1);
926 if (scan->bm_bitmap == 0) {
928 * Source all allocated, leave dest allocated
933 endBlk = blk + count;
934 radix /= BLIST_META_RADIX;
935 skip = radix_to_skip(radix);
936 for (i = 1; blk < endBlk; i += skip) {
940 count -= blk - endBlk;
941 blst_copy(&scan[i], blk - radix, radix, dest, count);
946 * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap
948 * This routine allocates all blocks in the specified range
949 * regardless of any existing allocations in that range. Returns
950 * the number of blocks allocated by the call.
953 blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count)
958 mask = bitrange(blk & BLIST_BMAP_MASK, count);
960 /* Count the number of blocks that we are allocating. */
961 nblks = bitcount64(scan->bm_bitmap & mask);
963 scan->bm_bitmap &= ~mask;
968 * BLIST_META_FILL() - allocate specific blocks at a meta node
970 * This routine allocates the specified range of blocks,
971 * regardless of any existing allocations in the range. The
972 * range must be within the extent of this node. Returns the
973 * number of blocks allocated by the call.
976 blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, u_daddr_t radix)
978 daddr_t blk, endBlk, i, nblks, skip;
981 if (radix == BLIST_BMAP_RADIX)
982 return (blst_leaf_fill(scan, allocBlk, count));
984 endBlk = ummin(allocBlk + count, (allocBlk + radix) & -radix);
985 radix /= BLIST_META_RADIX;
986 skip = radix_to_skip(radix);
987 blk = allocBlk & -radix;
989 while (blk < endBlk) {
990 digit = (blk / radix) & BLIST_META_MASK;
991 i = 1 + digit * skip;
993 count = ummin(blk, endBlk) - allocBlk;
994 nblks += blst_meta_fill(&scan[i], allocBlk, count, radix);
995 if (scan[i].bm_bitmap == 0)
996 scan->bm_bitmap &= ~((u_daddr_t)1 << digit);
1005 blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab)
1008 u_daddr_t bit, mask;
1011 if (radix == BLIST_BMAP_RADIX) {
1013 "%*.*s(%08llx,%lld): bitmap %0*llx big=%lld\n",
1015 (long long)blk, (long long)radix,
1016 1 + (BLIST_BMAP_RADIX - 1) / 4,
1017 (long long)scan->bm_bitmap,
1018 (long long)scan->bm_bighint
1024 "%*.*s(%08llx): subtree (%lld/%lld) bitmap %0*llx big=%lld {\n",
1026 (long long)blk, (long long)radix,
1028 1 + (BLIST_META_RADIX - 1) / 4,
1029 (long long)scan->bm_bitmap,
1030 (long long)scan->bm_bighint
1033 radix /= BLIST_META_RADIX;
1034 skip = radix_to_skip(radix);
1037 mask = scan->bm_bitmap;
1038 /* Examine the nonempty subtree associated with each bit set in mask */
1041 digit = bitpos(bit);
1042 blst_radix_print(&scan[1 + digit * skip], blk + digit * radix,
1044 } while ((mask ^= bit) != 0);
1058 main(int ac, char **av)
1060 int size = BLIST_META_RADIX * BLIST_BMAP_RADIX;
1065 for (i = 1; i < ac; ++i) {
1066 const char *ptr = av[i];
1068 size = strtol(ptr, NULL, 0);
1072 fprintf(stderr, "Bad option: %s\n", ptr - 2);
1075 bl = blist_create(size, M_WAITOK);
1076 blist_free(bl, 0, size);
1081 long long count = 0;
1083 printf("%lld/%lld/%lld> ", (long long)blist_avail(bl),
1084 (long long)size, (long long)bl->bl_radix);
1086 if (fgets(buf, sizeof(buf), stdin) == NULL)
1090 if (sscanf(buf + 1, "%lld", &count) == 1) {
1091 blist_resize(&bl, count, 1, M_WAITOK);
1099 s = sbuf_new_auto();
1102 printf("%s", sbuf_data(s));
1106 if (sscanf(buf + 1, "%lld", &count) == 1) {
1107 daddr_t blk = blist_alloc(bl, count);
1108 printf(" R=%08llx\n", (long long)blk);
1114 if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) {
1115 blist_free(bl, da, count);
1121 if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) {
1123 (intmax_t)blist_fill(bl, da, count));
1149 panic(const char *ctl, ...)
1154 vfprintf(stderr, ctl, va);
1155 fprintf(stderr, "\n");