2 * SPDX-License-Identifier: BSD-3-Clause
4 * Copyright (c) 1998 Matthew Dillon. All Rights Reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the University nor the names of its contributors
14 * may be used to endorse or promote products derived from this software
15 * without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
18 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
19 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
21 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
23 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
26 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting
32 * This module implements a general bitmap allocator/deallocator. The
33 * allocator eats around 2 bits per 'block'. The module does not
34 * try to interpret the meaning of a 'block' other than to return
35 * SWAPBLK_NONE on an allocation failure.
37 * A radix tree controls access to pieces of the bitmap, and includes
38 * auxiliary information at each interior node about the availabilty of
39 * contiguous free blocks in the subtree rooted at that node. Two radix
40 * constants are involved: one for the size of the bitmaps contained in the
41 * leaf nodes (BLIST_BMAP_RADIX), and one for the number of descendents of
42 * each of the meta (interior) nodes (BLIST_META_RADIX). Each subtree is
43 * associated with a range of blocks. The root of any subtree stores a
44 * hint field that defines an upper bound on the size of the largest
45 * allocation that can begin in the associated block range. A hint is an
46 * upper bound on a potential allocation, but not necessarily a tight upper
49 * The bitmap field in each node directs the search for available blocks.
50 * For a leaf node, a bit is set if the corresponding block is free. For a
51 * meta node, a bit is set if the corresponding subtree contains a free
52 * block somewhere within it. The search at a meta node considers only
53 * children of that node that represent a range that includes a free block.
55 * The hinting greatly increases code efficiency for allocations while
56 * the general radix structure optimizes both allocations and frees. The
57 * radix tree should be able to operate well no matter how much
58 * fragmentation there is and no matter how large a bitmap is used.
60 * The blist code wires all necessary memory at creation time. Neither
61 * allocations nor frees require interaction with the memory subsystem.
62 * The non-blocking nature of allocations and frees is required by swap
63 * code (vm/swap_pager.c).
65 * LAYOUT: The radix tree is laid out recursively using a linear array.
66 * Each meta node is immediately followed (laid out sequentially in
67 * memory) by BLIST_META_RADIX lower level nodes. This is a recursive
68 * structure but one that can be easily scanned through a very simple
69 * 'skip' calculation. The memory allocation is only large enough to
70 * cover the number of blocks requested at creation time. Nodes that
71 * represent blocks beyond that limit, nodes that would never be read
72 * or written, are not allocated, so that the last of the
73 * BLIST_META_RADIX lower level nodes of a some nodes may not be
76 * NOTE: the allocator cannot currently allocate more than
77 * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too
78 * large' if you try. This is an area that could use improvement. The
79 * radix is large enough that this restriction does not effect the swap
80 * system, though. Currently only the allocation code is affected by
81 * this algorithmic unfeature. The freeing code can handle arbitrary
84 * This code can be compiled stand-alone for debugging.
87 #include <sys/cdefs.h>
88 __FBSDID("$FreeBSD$");
92 #include <sys/param.h>
93 #include <sys/systm.h>
95 #include <sys/kernel.h>
96 #include <sys/blist.h>
97 #include <sys/malloc.h>
100 #include <sys/mutex.h>
104 #ifndef BLIST_NO_DEBUG
108 #include <sys/errno.h>
109 #include <sys/types.h>
110 #include <sys/malloc.h>
111 #include <sys/sbuf.h>
120 #define bitcount64(x) __bitcount64((uint64_t)(x))
121 #define malloc(a,b,c) calloc(a, 1)
122 #define free(a,b) free(a)
123 #define ummin(a,b) ((a) < (b) ? (a) : (b))
124 #define imin(a,b) ((a) < (b) ? (a) : (b))
125 #define KASSERT(a,b) assert(a)
127 #include <sys/blist.h>
132 * static support functions
134 static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk,
135 int *count, int maxcount);
136 static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t cursor, int *count,
137 int maxcount, u_daddr_t radix);
138 static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count);
139 static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count,
141 static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix,
142 blist_t dest, daddr_t count);
143 static daddr_t blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count);
144 static daddr_t blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count,
147 static void blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix,
152 static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space");
155 _Static_assert(BLIST_BMAP_RADIX % BLIST_META_RADIX == 0,
156 "radix divisibility error");
157 #define BLIST_BMAP_MASK (BLIST_BMAP_RADIX - 1)
158 #define BLIST_META_MASK (BLIST_META_RADIX - 1)
161 * For a subtree that can represent the state of up to 'radix' blocks, the
162 * number of leaf nodes of the subtree is L=radix/BLIST_BMAP_RADIX. If 'm'
163 * is short for BLIST_META_RADIX, then for a tree of height h with L=m**h
164 * leaf nodes, the total number of tree nodes is 1 + m + m**2 + ... + m**h,
165 * or, equivalently, (m**(h+1)-1)/(m-1). This quantity is called 'skip'
166 * in the 'meta' functions that process subtrees. Since integer division
167 * discards remainders, we can express this computation as
168 * skip = (m * m**h) / (m - 1)
169 * skip = (m * (radix / BLIST_BMAP_RADIX)) / (m - 1)
170 * and since m divides BLIST_BMAP_RADIX, we can simplify further to
171 * skip = (radix / (BLIST_BMAP_RADIX / m)) / (m - 1)
172 * skip = radix / ((BLIST_BMAP_RADIX / m) * (m - 1))
173 * so that simple integer division by a constant can safely be used for the
176 static inline daddr_t
177 radix_to_skip(daddr_t radix)
181 ((BLIST_BMAP_RADIX / BLIST_META_RADIX) * BLIST_META_MASK));
185 * Provide a mask with count bits set, starting as position n.
187 static inline u_daddr_t
188 bitrange(int n, int count)
191 return (((u_daddr_t)-1 << n) &
192 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - (n + count))));
196 * Find the first bit set in a u_daddr_t.
199 generic_bitpos(u_daddr_t mask)
204 hi = BLIST_BMAP_RADIX;
205 while (lo + 1 < hi) {
206 mid = (lo + hi) >> 1;
207 if (mask & bitrange(0, mid))
216 bitpos(u_daddr_t mask)
219 switch (sizeof(mask)) {
220 #ifdef HAVE_INLINE_FFSLL
221 case sizeof(long long):
222 return (ffsll(mask) - 1);
224 #ifdef HAVE_INLINE_FFS
226 return (ffs(mask) - 1);
229 return (generic_bitpos(mask));
234 * blist_create() - create a blist capable of handling up to the specified
237 * blocks - must be greater than 0
238 * flags - malloc flags
240 * The smallest blist consists of a single leaf node capable of
241 * managing BLIST_BMAP_RADIX blocks.
244 blist_create(daddr_t blocks, int flags)
247 u_daddr_t nodes, radix;
249 KASSERT(blocks > 0, ("invalid block count"));
252 * Calculate the radix and node count used for scanning.
255 radix = BLIST_BMAP_RADIX;
256 while (radix <= blocks) {
257 nodes += 1 + (blocks - 1) / radix;
258 radix *= BLIST_META_RADIX;
261 bl = malloc(offsetof(struct blist, bl_root[nodes]), M_SWAP, flags |
266 bl->bl_blocks = blocks;
267 bl->bl_radix = radix;
269 #if defined(BLIST_DEBUG)
271 "BLIST representing %lld blocks (%lld MB of swap)"
272 ", requiring %lldK of ram\n",
273 (long long)bl->bl_blocks,
274 (long long)bl->bl_blocks * 4 / 1024,
275 (long long)(nodes * sizeof(blmeta_t) + 1023) / 1024
277 printf("BLIST raw radix tree contains %lld records\n",
285 blist_destroy(blist_t bl)
292 * blist_alloc() - reserve space in the block bitmap. Return the base
293 * of a contiguous region or SWAPBLK_NONE if space could
297 blist_alloc(blist_t bl, int *count, int maxcount)
301 KASSERT(*count <= maxcount,
302 ("invalid parameters %d > %d", *count, maxcount));
303 KASSERT(*count <= BLIST_MAX_ALLOC,
304 ("minimum allocation too large: %d", *count));
307 * This loop iterates at most twice. An allocation failure in the
308 * first iteration leads to a second iteration only if the cursor was
309 * non-zero. When the cursor is zero, an allocation failure will
310 * stop further iterations.
312 for (cursor = bl->bl_cursor;; cursor = 0) {
313 blk = blst_meta_alloc(bl->bl_root, cursor, count, maxcount,
315 if (blk != SWAPBLK_NONE) {
316 bl->bl_avail -= *count;
317 bl->bl_cursor = blk + *count;
318 if (bl->bl_cursor == bl->bl_blocks)
323 return (SWAPBLK_NONE);
328 * blist_avail() - return the number of free blocks.
331 blist_avail(blist_t bl)
334 return (bl->bl_avail);
338 * blist_free() - free up space in the block bitmap. Return the base
339 * of a contiguous region.
342 blist_free(blist_t bl, daddr_t blkno, daddr_t count)
345 KASSERT(blkno >= 0 && blkno + count <= bl->bl_blocks,
346 ("freeing invalid range: blkno %jx, count %d, blocks %jd",
347 (uintmax_t)blkno, (int)count, (uintmax_t)bl->bl_blocks));
348 blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix);
349 bl->bl_avail += count;
353 * blist_fill() - mark a region in the block bitmap as off-limits
354 * to the allocator (i.e. allocate it), ignoring any
355 * existing allocations. Return the number of blocks
356 * actually filled that were free before the call.
359 blist_fill(blist_t bl, daddr_t blkno, daddr_t count)
363 KASSERT(blkno >= 0 && blkno + count <= bl->bl_blocks,
364 ("filling invalid range: blkno %jx, count %d, blocks %jd",
365 (uintmax_t)blkno, (int)count, (uintmax_t)bl->bl_blocks));
366 filled = blst_meta_fill(bl->bl_root, blkno, count, bl->bl_radix);
367 bl->bl_avail -= filled;
372 * blist_resize() - resize an existing radix tree to handle the
373 * specified number of blocks. This will reallocate
374 * the tree and transfer the previous bitmap to the new
375 * one. When extending the tree you can specify whether
376 * the new blocks are to left allocated or freed.
379 blist_resize(blist_t *pbl, daddr_t count, int freenew, int flags)
381 blist_t newbl = blist_create(count, flags);
385 if (count > save->bl_blocks)
386 count = save->bl_blocks;
387 blst_copy(save->bl_root, 0, save->bl_radix, newbl, count);
390 * If resizing upwards, should we free the new space or not?
392 if (freenew && count < newbl->bl_blocks) {
393 blist_free(newbl, count, newbl->bl_blocks - count);
401 * blist_print() - dump radix tree
404 blist_print(blist_t bl)
406 printf("BLIST avail = %jd, cursor = %08jx {\n",
407 (uintmax_t)bl->bl_avail, (uintmax_t)bl->bl_cursor);
409 if (bl->bl_root->bm_bitmap != 0)
410 blst_radix_print(bl->bl_root, 0, bl->bl_radix, 4);
416 static const u_daddr_t fib[] = {
417 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584,
418 4181, 6765, 10946, 17711, 28657, 46368, 75025, 121393, 196418, 317811,
419 514229, 832040, 1346269, 2178309, 3524578,
423 * Use 'gap' to describe a maximal range of unallocated blocks/bits.
426 daddr_t start; /* current gap start, or SWAPBLK_NONE */
427 daddr_t num; /* number of gaps observed */
428 daddr_t max; /* largest gap size */
429 daddr_t avg; /* average gap size */
430 daddr_t err; /* sum - num * avg */
431 daddr_t histo[nitems(fib)]; /* # gaps in each size range */
432 int max_bucket; /* last histo elt with nonzero val */
436 * gap_stats_counting() - is the state 'counting 1 bits'?
437 * or 'skipping 0 bits'?
440 gap_stats_counting(const struct gap_stats *stats)
443 return (stats->start != SWAPBLK_NONE);
447 * init_gap_stats() - initialize stats on gap sizes
450 init_gap_stats(struct gap_stats *stats)
453 bzero(stats, sizeof(*stats));
454 stats->start = SWAPBLK_NONE;
458 * update_gap_stats() - update stats on gap sizes
461 update_gap_stats(struct gap_stats *stats, daddr_t posn)
466 if (!gap_stats_counting(stats)) {
470 size = posn - stats->start;
471 stats->start = SWAPBLK_NONE;
472 if (size > stats->max)
476 * Find the fibonacci range that contains size,
477 * expecting to find it in an early range.
481 while (hi < nitems(fib) && fib[hi] <= size) {
485 if (hi >= nitems(fib))
487 while (lo + 1 != hi) {
488 mid = (lo + hi) >> 1;
489 if (fib[mid] <= size)
495 if (lo > stats->max_bucket)
496 stats->max_bucket = lo;
497 stats->err += size - stats->avg;
499 stats->avg += stats->err / stats->num;
500 stats->err %= stats->num;
504 * dump_gap_stats() - print stats on gap sizes
507 dump_gap_stats(const struct gap_stats *stats, struct sbuf *s)
511 sbuf_printf(s, "number of maximal free ranges: %jd\n",
512 (intmax_t)stats->num);
513 sbuf_printf(s, "largest free range: %jd\n", (intmax_t)stats->max);
514 sbuf_printf(s, "average maximal free range size: %jd\n",
515 (intmax_t)stats->avg);
516 sbuf_printf(s, "number of maximal free ranges of different sizes:\n");
517 sbuf_printf(s, " count | size range\n");
518 sbuf_printf(s, " ----- | ----------\n");
519 for (i = 0; i < stats->max_bucket; i++) {
520 if (stats->histo[i] != 0) {
521 sbuf_printf(s, "%20jd | ",
522 (intmax_t)stats->histo[i]);
523 if (fib[i] != fib[i + 1] - 1)
524 sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
525 (intmax_t)fib[i + 1] - 1);
527 sbuf_printf(s, "%jd\n", (intmax_t)fib[i]);
530 sbuf_printf(s, "%20jd | ", (intmax_t)stats->histo[i]);
531 if (stats->histo[i] > 1)
532 sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
533 (intmax_t)stats->max);
535 sbuf_printf(s, "%jd\n", (intmax_t)stats->max);
539 * blist_stats() - dump radix tree stats
542 blist_stats(blist_t bl, struct sbuf *s)
544 struct gap_stats gstats;
545 struct gap_stats *stats = &gstats;
546 daddr_t i, nodes, radix;
547 u_daddr_t diff, mask;
550 init_gap_stats(stats);
553 while (i < bl->bl_radix + bl->bl_blocks) {
555 * Find max size subtree starting at i.
557 radix = BLIST_BMAP_RADIX;
558 while (((i / radix) & BLIST_META_MASK) == 0)
559 radix *= BLIST_META_RADIX;
562 * Check for skippable subtrees starting at i.
564 while (radix > BLIST_BMAP_RADIX) {
565 if (bl->bl_root[nodes].bm_bitmap == 0) {
566 if (gap_stats_counting(stats))
567 update_gap_stats(stats, i);
575 radix /= BLIST_META_RADIX;
577 if (radix == BLIST_BMAP_RADIX) {
581 mask = bl->bl_root[nodes].bm_bitmap;
582 diff = mask ^ (mask << 1);
583 if (gap_stats_counting(stats))
586 digit = bitpos(diff);
587 update_gap_stats(stats, i + digit);
588 diff ^= bitrange(digit, 1);
591 nodes += radix_to_skip(radix);
594 update_gap_stats(stats, i);
595 dump_gap_stats(stats, s);
598 /************************************************************************
599 * ALLOCATION SUPPORT FUNCTIONS *
600 ************************************************************************
602 * These support functions do all the actual work. They may seem
603 * rather longish, but that's because I've commented them up. The
604 * actual code is straight forward.
609 * BLST_NEXT_LEAF_ALLOC() - allocate the blocks starting with the next leaf.
611 * 'scan' is a leaf node, and its first block is at address 'start'. The
612 * next leaf node could be adjacent, or several nodes away if the least
613 * common ancestor of 'scan' and its neighbor is several levels up. Use
614 * addresses to determine how many meta-nodes lie between the leaves. If
615 * sequence of leaves starting with the next one has enough initial bits
616 * set, clear them and clear the bits in the meta nodes on the path up to
617 * the least common ancestor to mark any subtrees made completely empty.
620 blst_next_leaf_alloc(blmeta_t *scan, daddr_t start, int count, int maxcount)
626 start += BLIST_BMAP_RADIX;
627 for (blk = start; blk - start < maxcount; blk += BLIST_BMAP_RADIX) {
628 /* Skip meta-nodes, as long as they promise more free blocks. */
629 radix = BLIST_BMAP_RADIX;
630 while (((++scan)->bm_bitmap & 1) == 1 &&
631 ((blk / radix) & BLIST_META_MASK) == 0)
632 radix *= BLIST_META_RADIX;
633 if (~scan->bm_bitmap != 0) {
635 * Either there is no next leaf with any free blocks,
636 * or we've reached the next leaf and found that some
637 * of its blocks are not free. In the first case,
638 * bitpos() returns zero here.
640 avail = blk - start + bitpos(~scan->bm_bitmap);
641 if (avail < count || avail == 0) {
643 * There isn't a next leaf with enough free
644 * blocks at its beginning to bother
649 maxcount = imin(avail, maxcount);
650 if (maxcount % BLIST_BMAP_RADIX == 0) {
652 * There was no next leaf. Back scan up to
656 while (radix != BLIST_BMAP_RADIX) {
657 radix /= BLIST_META_RADIX;
660 blk -= BLIST_BMAP_RADIX;
666 * 'scan' is the last leaf that provides blocks. Clear from 1 to
667 * BLIST_BMAP_RADIX bits to represent the allocation of those last
670 if (maxcount % BLIST_BMAP_RADIX != 0)
671 scan->bm_bitmap &= ~bitrange(0, maxcount % BLIST_BMAP_RADIX);
676 /* Back up over meta-nodes, clearing bits if necessary. */
677 blk -= BLIST_BMAP_RADIX;
678 radix = BLIST_BMAP_RADIX;
679 while ((digit = ((blk / radix) & BLIST_META_MASK)) == 0) {
680 if ((scan--)->bm_bitmap == 0)
681 scan->bm_bitmap ^= 1;
682 radix *= BLIST_META_RADIX;
684 if ((scan--)->bm_bitmap == 0)
685 scan[-digit * radix_to_skip(radix)].bm_bitmap ^=
686 (u_daddr_t)1 << digit;
690 /* Clear all the bits of this leaf. */
697 * BLST_LEAF_ALLOC() - allocate at a leaf in the radix tree (a bitmap).
699 * This function is the core of the allocator. Its execution time is
700 * proportional to log(count), plus height of the tree if the allocation
701 * crosses a leaf boundary.
704 blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int *count, int maxcount)
707 int bighint, count1, hi, lo, num_shifts;
710 num_shifts = fls(count1);
711 mask = ~scan->bm_bitmap;
712 while ((mask & (mask + 1)) != 0 && num_shifts > 0) {
714 * If bit i is 0 in mask, then bits in [i, i + (count1 >>
715 * num_shifts)] are 1 in scan->bm_bitmap. Reduce num_shifts to
716 * 0, while preserving this invariant. The updates to mask
717 * leave fewer bits 0, but each bit that remains 0 represents a
718 * longer string of consecutive 1-bits in scan->bm_bitmap. If
719 * more updates to mask cannot set more bits, because mask is
720 * partitioned with all 1 bits following all 0 bits, the loop
721 * terminates immediately.
724 mask |= mask >> ((count1 >> num_shifts) + 1) / 2;
726 bighint = count1 >> num_shifts;
729 * Update bighint. There is no allocation bigger than
730 * count1 >> num_shifts starting in this leaf.
732 scan->bm_bighint = bighint;
733 return (SWAPBLK_NONE);
736 /* Discard any candidates that appear before blk. */
737 if ((blk & BLIST_BMAP_MASK) != 0) {
738 if ((~mask & bitrange(0, blk & BLIST_BMAP_MASK)) != 0) {
739 /* Grow bighint in case all discarded bits are set. */
740 bighint += blk & BLIST_BMAP_MASK;
741 mask |= bitrange(0, blk & BLIST_BMAP_MASK);
743 scan->bm_bighint = bighint;
744 return (SWAPBLK_NONE);
747 blk -= blk & BLIST_BMAP_MASK;
751 * The least significant set bit in mask marks the start of the first
752 * available range of sufficient size. Find its position.
757 * Find how much space is available starting at that position.
759 if ((mask & (mask + 1)) != 0) {
760 /* Count the 1 bits starting at position lo. */
761 hi = bitpos(mask & (mask + 1)) + count1;
762 if (maxcount < hi - lo)
765 mask = ~bitrange(lo, *count);
766 } else if (maxcount <= BLIST_BMAP_RADIX - lo) {
767 /* All the blocks we can use are available here. */
770 mask = ~bitrange(lo, *count);
771 if (hi == BLIST_BMAP_RADIX)
772 scan->bm_bighint = bighint;
774 /* Check next leaf for some of the blocks we want or need. */
775 count1 = *count - (BLIST_BMAP_RADIX - lo);
776 maxcount -= BLIST_BMAP_RADIX - lo;
777 hi = blst_next_leaf_alloc(scan, blk, count1, maxcount);
780 * The next leaf cannot supply enough blocks to reach
781 * the minimum required allocation. The hint cannot be
782 * updated, because the same allocation request could
783 * be satisfied later, by this leaf, if the state of
784 * the next leaf changes, and without any changes to
787 return (SWAPBLK_NONE);
788 *count = BLIST_BMAP_RADIX - lo + hi;
789 scan->bm_bighint = bighint;
792 /* Clear the allocated bits from this leaf. */
793 scan->bm_bitmap &= mask;
798 * blist_meta_alloc() - allocate at a meta in the radix tree.
800 * Attempt to allocate at a meta node. If we can't, we update
801 * bighint and return a failure. Updating bighint optimize future
802 * calls that hit this node. We have to check for our collapse cases
803 * and we have a few optimizations strewn in as well.
806 blst_meta_alloc(blmeta_t *scan, daddr_t cursor, int *count,
807 int maxcount, u_daddr_t radix)
809 daddr_t blk, i, r, skip;
811 bool scan_from_start;
814 if (radix == BLIST_BMAP_RADIX)
815 return (blst_leaf_alloc(scan, cursor, count, maxcount));
816 blk = cursor & -radix;
817 scan_from_start = (cursor == blk);
818 radix /= BLIST_META_RADIX;
819 skip = radix_to_skip(radix);
820 mask = scan->bm_bitmap;
822 /* Discard any candidates that appear before cursor. */
823 digit = (cursor / radix) & BLIST_META_MASK;
824 mask &= (u_daddr_t)-1 << digit;
826 return (SWAPBLK_NONE);
829 * If the first try is for a block that includes the cursor, pre-undo
830 * the digit * radix offset in the first call; otherwise, ignore the
833 if (((mask >> digit) & 1) == 1)
834 cursor -= digit * radix;
839 * Examine the nonempty subtree associated with each bit set in mask.
842 digit = bitpos(mask);
843 i = 1 + digit * skip;
844 if (*count <= scan[i].bm_bighint) {
846 * The allocation might fit beginning in the i'th subtree.
848 r = blst_meta_alloc(&scan[i], cursor + digit * radix,
849 count, maxcount, radix);
850 if (r != SWAPBLK_NONE) {
851 if (scan[i].bm_bitmap == 0)
852 scan->bm_bitmap ^= bitrange(digit, 1);
857 } while ((mask ^= bitrange(digit, 1)) != 0);
860 * We couldn't allocate count in this subtree. If the whole tree was
861 * scanned, and the last tree node is allocated, update bighint.
863 if (scan_from_start && !(digit == BLIST_META_RADIX - 1 &&
864 scan[i].bm_bighint == BLIST_MAX_ALLOC))
865 scan->bm_bighint = *count - 1;
867 return (SWAPBLK_NONE);
871 * BLST_LEAF_FREE() - free allocated block from leaf bitmap
875 blst_leaf_free(blmeta_t *scan, daddr_t blk, int count)
880 * free some data in this bitmap
881 * mask=0000111111111110000
885 mask = bitrange(blk & BLIST_BMAP_MASK, count);
886 KASSERT((scan->bm_bitmap & mask) == 0,
887 ("freeing free block: %jx, size %d, mask %jx",
888 (uintmax_t)blk, count, (uintmax_t)scan->bm_bitmap & mask));
889 scan->bm_bitmap |= mask;
893 * BLST_META_FREE() - free allocated blocks from radix tree meta info
895 * This support routine frees a range of blocks from the bitmap.
896 * The range must be entirely enclosed by this radix node. If a
897 * meta node, we break the range down recursively to free blocks
898 * in subnodes (which means that this code can free an arbitrary
899 * range whereas the allocation code cannot allocate an arbitrary
903 blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, u_daddr_t radix)
905 daddr_t blk, endBlk, i, skip;
909 * We could probably do a better job here. We are required to make
910 * bighint at least as large as the biggest allocable block of data.
911 * If we just shoehorn it, a little extra overhead will be incurred
912 * on the next allocation (but only that one typically).
914 scan->bm_bighint = BLIST_MAX_ALLOC;
916 if (radix == BLIST_BMAP_RADIX)
917 return (blst_leaf_free(scan, freeBlk, count));
919 endBlk = ummin(freeBlk + count, (freeBlk + radix) & -radix);
920 radix /= BLIST_META_RADIX;
921 skip = radix_to_skip(radix);
922 blk = freeBlk & -radix;
923 digit = (blk / radix) & BLIST_META_MASK;
924 endDigit = 1 + (((endBlk - 1) / radix) & BLIST_META_MASK);
925 scan->bm_bitmap |= bitrange(digit, endDigit - digit);
926 for (i = 1 + digit * skip; blk < endBlk; i += skip) {
928 count = ummin(blk, endBlk) - freeBlk;
929 blst_meta_free(&scan[i], freeBlk, count, radix);
935 * BLST_COPY() - copy one radix tree to another
937 * Locates free space in the source tree and frees it in the destination
938 * tree. The space may not already be free in the destination.
941 blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, blist_t dest,
944 daddr_t endBlk, i, skip;
950 if (radix == BLIST_BMAP_RADIX) {
951 u_daddr_t v = scan->bm_bitmap;
953 if (v == (u_daddr_t)-1) {
954 blist_free(dest, blk, count);
958 for (i = 0; i < count; ++i) {
959 if (v & ((u_daddr_t)1 << i))
960 blist_free(dest, blk + i, 1);
970 if (scan->bm_bitmap == 0) {
972 * Source all allocated, leave dest allocated
977 endBlk = blk + count;
978 radix /= BLIST_META_RADIX;
979 skip = radix_to_skip(radix);
980 for (i = 1; blk < endBlk; i += skip) {
984 count -= blk - endBlk;
985 blst_copy(&scan[i], blk - radix, radix, dest, count);
990 * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap
992 * This routine allocates all blocks in the specified range
993 * regardless of any existing allocations in that range. Returns
994 * the number of blocks allocated by the call.
997 blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count)
1002 mask = bitrange(blk & BLIST_BMAP_MASK, count);
1004 /* Count the number of blocks that we are allocating. */
1005 nblks = bitcount64(scan->bm_bitmap & mask);
1007 scan->bm_bitmap &= ~mask;
1012 * BLIST_META_FILL() - allocate specific blocks at a meta node
1014 * This routine allocates the specified range of blocks,
1015 * regardless of any existing allocations in the range. The
1016 * range must be within the extent of this node. Returns the
1017 * number of blocks allocated by the call.
1020 blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, u_daddr_t radix)
1022 daddr_t blk, endBlk, i, nblks, skip;
1025 if (radix == BLIST_BMAP_RADIX)
1026 return (blst_leaf_fill(scan, allocBlk, count));
1028 endBlk = ummin(allocBlk + count, (allocBlk + radix) & -radix);
1029 radix /= BLIST_META_RADIX;
1030 skip = radix_to_skip(radix);
1031 blk = allocBlk & -radix;
1033 while (blk < endBlk) {
1034 digit = (blk / radix) & BLIST_META_MASK;
1035 i = 1 + digit * skip;
1037 count = ummin(blk, endBlk) - allocBlk;
1038 nblks += blst_meta_fill(&scan[i], allocBlk, count, radix);
1039 if (scan[i].bm_bitmap == 0)
1040 scan->bm_bitmap &= ~((u_daddr_t)1 << digit);
1049 blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab)
1055 if (radix == BLIST_BMAP_RADIX) {
1057 "%*.*s(%08llx,%lld): bitmap %0*llx big=%lld\n",
1059 (long long)blk, (long long)radix,
1060 1 + (BLIST_BMAP_RADIX - 1) / 4,
1061 (long long)scan->bm_bitmap,
1062 (long long)scan->bm_bighint
1068 "%*.*s(%08llx): subtree (%lld/%lld) bitmap %0*llx big=%lld {\n",
1070 (long long)blk, (long long)radix,
1072 1 + (BLIST_META_RADIX - 1) / 4,
1073 (long long)scan->bm_bitmap,
1074 (long long)scan->bm_bighint
1077 radix /= BLIST_META_RADIX;
1078 skip = radix_to_skip(radix);
1081 mask = scan->bm_bitmap;
1082 /* Examine the nonempty subtree associated with each bit set in mask */
1084 digit = bitpos(mask);
1085 blst_radix_print(&scan[1 + digit * skip], blk + digit * radix,
1087 } while ((mask ^= bitrange(digit, 1)) != 0);
1101 main(int ac, char **av)
1103 int size = BLIST_META_RADIX * BLIST_BMAP_RADIX;
1108 for (i = 1; i < ac; ++i) {
1109 const char *ptr = av[i];
1111 size = strtol(ptr, NULL, 0);
1115 fprintf(stderr, "Bad option: %s\n", ptr - 2);
1118 bl = blist_create(size, M_WAITOK);
1119 blist_free(bl, 0, size);
1124 int count = 0, maxcount = 0;
1126 printf("%lld/%lld/%lld> ", (long long)blist_avail(bl),
1127 (long long)size, (long long)bl->bl_radix);
1129 if (fgets(buf, sizeof(buf), stdin) == NULL)
1133 if (sscanf(buf + 1, "%d", &count) == 1) {
1134 blist_resize(&bl, count, 1, M_WAITOK);
1142 s = sbuf_new_auto();
1145 printf("%s", sbuf_data(s));
1149 if (sscanf(buf + 1, "%d%d", &count, &maxcount) == 2) {
1150 daddr_t blk = blist_alloc(bl, &count, maxcount);
1151 printf(" R=%08llx, c=%08d\n",
1152 (long long)blk, count);
1158 if (sscanf(buf + 1, "%llx %d", &da, &count) == 2) {
1159 blist_free(bl, da, count);
1165 if (sscanf(buf + 1, "%llx %d", &da, &count) == 2) {
1167 (intmax_t)blist_fill(bl, da, count));
1177 "a %d %d -allocate\n"