2 * Copyright (c) 1998 Matthew Dillon. All Rights Reserved.
3 * Redistribution and use in source and binary forms, with or without
4 * modification, are permitted provided that the following conditions
6 * 1. Redistributions of source code must retain the above copyright
7 * notice, this list of conditions and the following disclaimer.
8 * 2. Redistributions in binary form must reproduce the above copyright
9 * notice, this list of conditions and the following disclaimer in the
10 * documentation and/or other materials provided with the distribution.
11 * 3. Neither the name of the University nor the names of its contributors
12 * may be used to endorse or promote products derived from this software
13 * without specific prior written permission.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
19 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
21 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting
30 * This module implements a general bitmap allocator/deallocator. The
31 * allocator eats around 2 bits per 'block'. The module does not
32 * try to interpret the meaning of a 'block' other than to return
33 * SWAPBLK_NONE on an allocation failure.
35 * A radix tree is used to maintain the bitmap. Two radix constants are
36 * involved: One for the bitmaps contained in the leaf nodes (typically
37 * 64), and one for the meta nodes (typically 16). Both meta and leaf
38 * nodes have a hint field. This field gives us a hint as to the largest
39 * free contiguous range of blocks under the node. It may contain a
40 * value that is too high, but will never contain a value that is too
41 * low. When the radix tree is searched, allocation failures in subtrees
44 * The radix tree also implements two collapsed states for meta nodes:
45 * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is
46 * in either of these two states, all information contained underneath
47 * the node is considered stale. These states are used to optimize
48 * allocation and freeing operations.
50 * The hinting greatly increases code efficiency for allocations while
51 * the general radix structure optimizes both allocations and frees. The
52 * radix tree should be able to operate well no matter how much
53 * fragmentation there is and no matter how large a bitmap is used.
55 * The blist code wires all necessary memory at creation time. Neither
56 * allocations nor frees require interaction with the memory subsystem.
57 * The non-blocking features of the blist code are used in the swap code
60 * LAYOUT: The radix tree is laid out recursively using a
61 * linear array. Each meta node is immediately followed (laid out
62 * sequentially in memory) by BLIST_META_RADIX lower level nodes. This
63 * is a recursive structure but one that can be easily scanned through
64 * a very simple 'skip' calculation. In order to support large radixes,
65 * portions of the tree may reside outside our memory allocation. We
66 * handle this with an early-termination optimization (when bighint is
67 * set to -1) on the scan. The memory allocation is only large enough
68 * to cover the number of blocks requested at creation time even if it
69 * must be encompassed in larger root-node radix.
71 * NOTE: the allocator cannot currently allocate more than
72 * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too
73 * large' if you try. This is an area that could use improvement. The
74 * radix is large enough that this restriction does not effect the swap
75 * system, though. Currently only the allocation code is affected by
76 * this algorithmic unfeature. The freeing code can handle arbitrary
79 * This code can be compiled stand-alone for debugging.
82 #include <sys/cdefs.h>
83 __FBSDID("$FreeBSD$");
87 #include <sys/param.h>
88 #include <sys/systm.h>
90 #include <sys/kernel.h>
91 #include <sys/blist.h>
92 #include <sys/malloc.h>
95 #include <sys/mutex.h>
99 #ifndef BLIST_NO_DEBUG
103 #include <sys/types.h>
104 #include <sys/malloc.h>
105 #include <sys/sbuf.h>
112 #define bitcount64(x) __bitcount64((uint64_t)(x))
113 #define malloc(a,b,c) calloc(a, 1)
114 #define free(a,b) free(a)
115 #define CTASSERT(expr)
117 #include <sys/blist.h>
119 void panic(const char *ctl, ...);
124 * static support functions
126 static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count,
128 static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count,
130 static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count);
131 static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count,
133 static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix,
134 blist_t dest, daddr_t count);
135 static daddr_t blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count);
136 static daddr_t blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count,
138 static daddr_t blst_radix_init(blmeta_t *scan, daddr_t radix, daddr_t count);
140 static void blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix,
145 static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space");
148 CTASSERT(BLIST_BMAP_RADIX % BLIST_META_RADIX == 0);
149 #define BLIST_META_MASK (BLIST_META_RADIX - 1)
152 * For a subtree that can represent the state of up to 'radix' blocks, the
153 * number of leaf nodes of the subtree is L=radix/BLIST_BMAP_RADIX. If 'm'
154 * is short for BLIST_META_RADIX, then for a tree of height h with L=m**h
155 * leaf nodes, the total number of tree nodes is 1 + m + m**2 + ... + m**h,
156 * or, equivalently, (m**(h+1)-1)/(m-1). This quantity is called 'skip'
157 * in the 'meta' functions that process subtrees. Since integer division
158 * discards remainders, we can express this computation as
159 * skip = (m * m**h) / (m - 1)
160 * skip = (m * (radix / BLIST_BMAP_RADIX)) / (m - 1)
161 * and since m divides BLIST_BMAP_RADIX, we can simplify further to
162 * skip = (radix / (BLIST_BMAP_RADIX / m)) / (m - 1)
163 * skip = radix / ((BLIST_BMAP_RADIX / m) * (m - 1))
164 * so that simple integer division by a constant can safely be used for the
167 static inline daddr_t
168 radix_to_skip(daddr_t radix)
172 ((BLIST_BMAP_RADIX / BLIST_META_RADIX) * BLIST_META_MASK));
176 * Use binary search, or a faster method, to find the 1 bit in a u_daddr_t.
177 * Assumes that the argument has only one bit set.
180 bitpos(u_daddr_t mask)
184 switch (sizeof(mask)) {
185 #ifdef HAVE_INLINE_FFSLL
186 case sizeof(long long):
187 return (ffsll(mask) - 1);
191 hi = BLIST_BMAP_RADIX;
192 while (lo + 1 < hi) {
193 mid = (lo + hi) >> 1;
194 if ((mask >> mid) != 0)
204 * blist_create() - create a blist capable of handling up to the specified
207 * blocks - must be greater than 0
208 * flags - malloc flags
210 * The smallest blist consists of a single leaf node capable of
211 * managing BLIST_BMAP_RADIX blocks.
214 blist_create(daddr_t blocks, int flags)
217 daddr_t nodes, radix;
220 * Calculate the radix field used for scanning.
222 radix = BLIST_BMAP_RADIX;
223 while (radix < blocks) {
224 radix *= BLIST_META_RADIX;
226 nodes = 1 + blst_radix_init(NULL, radix, blocks);
228 bl = malloc(sizeof(struct blist), M_SWAP, flags);
232 bl->bl_blocks = blocks;
233 bl->bl_radix = radix;
235 bl->bl_root = malloc(nodes * sizeof(blmeta_t), M_SWAP, flags);
236 if (bl->bl_root == NULL) {
240 blst_radix_init(bl->bl_root, radix, blocks);
242 #if defined(BLIST_DEBUG)
244 "BLIST representing %lld blocks (%lld MB of swap)"
245 ", requiring %lldK of ram\n",
246 (long long)bl->bl_blocks,
247 (long long)bl->bl_blocks * 4 / 1024,
248 (long long)(nodes * sizeof(blmeta_t) + 1023) / 1024
250 printf("BLIST raw radix tree contains %lld records\n",
258 blist_destroy(blist_t bl)
260 free(bl->bl_root, M_SWAP);
265 * blist_alloc() - reserve space in the block bitmap. Return the base
266 * of a contiguous region or SWAPBLK_NONE if space could
270 blist_alloc(blist_t bl, daddr_t count)
275 * This loop iterates at most twice. An allocation failure in the
276 * first iteration leads to a second iteration only if the cursor was
277 * non-zero. When the cursor is zero, an allocation failure will
278 * reduce the hint, stopping further iterations.
280 while (count <= bl->bl_root->bm_bighint) {
281 blk = blst_meta_alloc(bl->bl_root, bl->bl_cursor, count,
283 if (blk != SWAPBLK_NONE) {
284 bl->bl_cursor = blk + count;
285 if (bl->bl_cursor == bl->bl_blocks)
288 } else if (bl->bl_cursor != 0)
291 return (SWAPBLK_NONE);
295 * blist_avail() - return the number of free blocks.
298 blist_avail(blist_t bl)
301 if (bl->bl_radix == BLIST_BMAP_RADIX)
302 return (bitcount64(bl->bl_root->u.bmu_bitmap));
304 return (bl->bl_root->u.bmu_avail);
308 * blist_free() - free up space in the block bitmap. Return the base
309 * of a contiguous region. Panic if an inconsistancy is
313 blist_free(blist_t bl, daddr_t blkno, daddr_t count)
316 blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix);
320 * blist_fill() - mark a region in the block bitmap as off-limits
321 * to the allocator (i.e. allocate it), ignoring any
322 * existing allocations. Return the number of blocks
323 * actually filled that were free before the call.
326 blist_fill(blist_t bl, daddr_t blkno, daddr_t count)
329 return (blst_meta_fill(bl->bl_root, blkno, count, bl->bl_radix));
333 * blist_resize() - resize an existing radix tree to handle the
334 * specified number of blocks. This will reallocate
335 * the tree and transfer the previous bitmap to the new
336 * one. When extending the tree you can specify whether
337 * the new blocks are to left allocated or freed.
340 blist_resize(blist_t *pbl, daddr_t count, int freenew, int flags)
342 blist_t newbl = blist_create(count, flags);
346 if (count > save->bl_blocks)
347 count = save->bl_blocks;
348 blst_copy(save->bl_root, 0, save->bl_radix, newbl, count);
351 * If resizing upwards, should we free the new space or not?
353 if (freenew && count < newbl->bl_blocks) {
354 blist_free(newbl, count, newbl->bl_blocks - count);
362 * blist_print() - dump radix tree
365 blist_print(blist_t bl)
367 printf("BLIST cursor = %08jx {\n", (uintmax_t)bl->bl_cursor);
368 blst_radix_print(bl->bl_root, 0, bl->bl_radix, 4);
374 static const u_daddr_t fib[] = {
375 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597, 2584,
376 4181, 6765, 10946, 17711, 28657, 46368, 75025, 121393, 196418, 317811,
377 514229, 832040, 1346269, 2178309, 3524578,
381 * Use 'gap' to describe a maximal range of unallocated blocks/bits.
384 daddr_t start; /* current gap start, or SWAPBLK_NONE */
385 daddr_t num; /* number of gaps observed */
386 daddr_t max; /* largest gap size */
387 daddr_t avg; /* average gap size */
388 daddr_t err; /* sum - num * avg */
389 daddr_t histo[nitems(fib)]; /* # gaps in each size range */
390 int max_bucket; /* last histo elt with nonzero val */
394 * gap_stats_counting() - is the state 'counting 1 bits'?
395 * or 'skipping 0 bits'?
398 gap_stats_counting(const struct gap_stats *stats)
401 return (stats->start != SWAPBLK_NONE);
405 * init_gap_stats() - initialize stats on gap sizes
408 init_gap_stats(struct gap_stats *stats)
411 bzero(stats, sizeof(*stats));
412 stats->start = SWAPBLK_NONE;
416 * update_gap_stats() - update stats on gap sizes
419 update_gap_stats(struct gap_stats *stats, daddr_t posn)
424 if (!gap_stats_counting(stats)) {
428 size = posn - stats->start;
429 stats->start = SWAPBLK_NONE;
430 if (size > stats->max)
434 * Find the fibonacci range that contains size,
435 * expecting to find it in an early range.
439 while (hi < nitems(fib) && fib[hi] <= size) {
443 if (hi >= nitems(fib))
445 while (lo + 1 != hi) {
446 mid = (lo + hi) >> 1;
447 if (fib[mid] <= size)
453 if (lo > stats->max_bucket)
454 stats->max_bucket = lo;
455 stats->err += size - stats->avg;
457 stats->avg += stats->err / stats->num;
458 stats->err %= stats->num;
462 * dump_gap_stats() - print stats on gap sizes
465 dump_gap_stats(const struct gap_stats *stats, struct sbuf *s)
469 sbuf_printf(s, "number of maximal free ranges: %jd\n",
470 (intmax_t)stats->num);
471 sbuf_printf(s, "largest free range: %jd\n", (intmax_t)stats->max);
472 sbuf_printf(s, "average maximal free range size: %jd\n",
473 (intmax_t)stats->avg);
474 sbuf_printf(s, "number of maximal free ranges of different sizes:\n");
475 sbuf_printf(s, " count | size range\n");
476 sbuf_printf(s, " ----- | ----------\n");
477 for (i = 0; i < stats->max_bucket; i++) {
478 if (stats->histo[i] != 0) {
479 sbuf_printf(s, "%20jd | ",
480 (intmax_t)stats->histo[i]);
481 if (fib[i] != fib[i + 1] - 1)
482 sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
483 (intmax_t)fib[i + 1] - 1);
485 sbuf_printf(s, "%jd\n", (intmax_t)fib[i]);
488 sbuf_printf(s, "%20jd | ", (intmax_t)stats->histo[i]);
489 if (stats->histo[i] > 1)
490 sbuf_printf(s, "%jd to %jd\n", (intmax_t)fib[i],
491 (intmax_t)stats->max);
493 sbuf_printf(s, "%jd\n", (intmax_t)stats->max);
497 * blist_stats() - dump radix tree stats
500 blist_stats(blist_t bl, struct sbuf *s)
502 struct gap_stats gstats;
503 struct gap_stats *stats = &gstats;
504 daddr_t i, nodes, radix;
505 u_daddr_t bit, diff, mask;
507 init_gap_stats(stats);
510 while (i < bl->bl_radix + bl->bl_blocks) {
512 * Find max size subtree starting at i.
514 radix = BLIST_BMAP_RADIX;
515 while (((i / radix) & BLIST_META_MASK) == 0)
516 radix *= BLIST_META_RADIX;
519 * Check for skippable subtrees starting at i.
521 while (radix > BLIST_BMAP_RADIX) {
522 if (bl->bl_root[nodes].u.bmu_avail == 0) {
523 if (gap_stats_counting(stats))
524 update_gap_stats(stats, i);
527 if (bl->bl_root[nodes].u.bmu_avail == radix) {
528 if (!gap_stats_counting(stats))
529 update_gap_stats(stats, i);
537 radix /= BLIST_META_RADIX;
539 if (radix == BLIST_BMAP_RADIX) {
543 mask = bl->bl_root[nodes].u.bmu_bitmap;
544 diff = mask ^ (mask << 1);
545 if (gap_stats_counting(stats))
549 update_gap_stats(stats, i + bitpos(bit));
553 nodes += radix_to_skip(radix);
556 update_gap_stats(stats, i);
557 dump_gap_stats(stats, s);
560 /************************************************************************
561 * ALLOCATION SUPPORT FUNCTIONS *
562 ************************************************************************
564 * These support functions do all the actual work. They may seem
565 * rather longish, but that's because I've commented them up. The
566 * actual code is straight forward.
571 * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap).
573 * This is the core of the allocator and is optimized for the
574 * BLIST_BMAP_RADIX block allocation case. Otherwise, execution
575 * time is proportional to log2(count) + bitpos time.
578 blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count, daddr_t cursor)
581 int count1, lo, num_shifts, range1, range_ext;
583 if (count == BLIST_BMAP_RADIX) {
585 * Optimize allocation of BLIST_BMAP_RADIX bits. If this wasn't
586 * a special case, then forming the final value of 'mask' below
587 * would require special handling to avoid an invalid left shift
588 * when count equals the number of bits in mask.
590 if (~scan->u.bmu_bitmap != 0) {
591 scan->bm_bighint = BLIST_BMAP_RADIX - 1;
592 return (SWAPBLK_NONE);
595 return (SWAPBLK_NONE);
596 scan->u.bmu_bitmap = 0;
597 scan->bm_bighint = 0;
602 num_shifts = fls(count1);
603 mask = scan->u.bmu_bitmap;
604 while (mask != 0 && num_shifts > 0) {
606 * If bit i is set in mask, then bits in [i, i+range1] are set
607 * in scan->u.bmu_bitmap. The value of range1 is equal to
608 * count1 >> num_shifts. Grow range and reduce num_shifts to 0,
609 * while preserving these invariants. The updates to mask leave
610 * fewer bits set, but each bit that remains set represents a
611 * longer string of consecutive bits set in scan->u.bmu_bitmap.
614 range_ext = range1 + ((count1 >> num_shifts) & 1);
615 mask &= mask >> range_ext;
620 * Update bighint. There is no allocation bigger than range1
621 * available in this leaf.
623 scan->bm_bighint = range1;
624 return (SWAPBLK_NONE);
628 * Discard any candidates that appear before the cursor.
631 mask &= ~(u_daddr_t)0 << lo;
634 return (SWAPBLK_NONE);
637 * The least significant set bit in mask marks the start of the first
638 * available range of sufficient size. Clear all the bits but that one,
639 * and then find its position.
645 * Set in mask exactly the bits being allocated, and clear them from
646 * the set of available bits.
648 mask = (mask << count) - mask;
649 scan->u.bmu_bitmap &= ~mask;
654 * blist_meta_alloc() - allocate at a meta in the radix tree.
656 * Attempt to allocate at a meta node. If we can't, we update
657 * bighint and return a failure. Updating bighint optimize future
658 * calls that hit this node. We have to check for our collapse cases
659 * and we have a few optimizations strewn in as well.
662 blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, u_daddr_t radix)
664 daddr_t blk, i, next_skip, r, skip;
666 bool scan_from_start;
668 blk = cursor & -radix;
669 if (radix == BLIST_BMAP_RADIX)
670 return (blst_leaf_alloc(scan, blk, count, cursor));
671 if (scan->u.bmu_avail < count) {
673 * The meta node's hint must be too large if the allocation
674 * exceeds the number of free blocks. Reduce the hint, and
677 scan->bm_bighint = scan->u.bmu_avail;
678 return (SWAPBLK_NONE);
680 skip = radix_to_skip(radix);
681 next_skip = skip / BLIST_META_RADIX;
684 * An ALL-FREE meta node requires special handling before allocating
687 if (scan->u.bmu_avail == radix) {
688 radix /= BLIST_META_RADIX;
691 * Reinitialize each of the meta node's children. An ALL-FREE
692 * meta node cannot have a terminator in any subtree.
694 for (i = 1; i < skip; i += next_skip) {
696 scan[i].u.bmu_bitmap = (u_daddr_t)-1;
698 scan[i].u.bmu_avail = radix;
699 scan[i].bm_bighint = radix;
702 radix /= BLIST_META_RADIX;
707 * The allocation exceeds the number of blocks that are
708 * managed by a subtree of this meta node.
710 panic("allocation too large");
712 scan_from_start = cursor == blk;
713 child = (cursor - blk) / radix;
714 blk += child * radix;
715 for (i = 1 + child * next_skip; i < skip; i += next_skip) {
716 if (count <= scan[i].bm_bighint) {
718 * The allocation might fit in the i'th subtree.
720 r = blst_meta_alloc(&scan[i],
721 cursor > blk ? cursor : blk, count, radix);
722 if (r != SWAPBLK_NONE) {
723 scan->u.bmu_avail -= count;
726 } else if (scan[i].bm_bighint == (daddr_t)-1) {
736 * We couldn't allocate count in this subtree, update bighint.
738 if (scan_from_start && scan->bm_bighint >= count)
739 scan->bm_bighint = count - 1;
741 return (SWAPBLK_NONE);
745 * BLST_LEAF_FREE() - free allocated block from leaf bitmap
749 blst_leaf_free(blmeta_t *scan, daddr_t blk, int count)
752 * free some data in this bitmap
755 * 0000111111111110000
759 int n = blk & (BLIST_BMAP_RADIX - 1);
762 mask = ((u_daddr_t)-1 << n) &
763 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n));
765 if (scan->u.bmu_bitmap & mask)
766 panic("blst_radix_free: freeing free block");
767 scan->u.bmu_bitmap |= mask;
770 * We could probably do a better job here. We are required to make
771 * bighint at least as large as the biggest contiguous block of
772 * data. If we just shoehorn it, a little extra overhead will
773 * be incured on the next allocation (but only that one typically).
775 scan->bm_bighint = BLIST_BMAP_RADIX;
779 * BLST_META_FREE() - free allocated blocks from radix tree meta info
781 * This support routine frees a range of blocks from the bitmap.
782 * The range must be entirely enclosed by this radix node. If a
783 * meta node, we break the range down recursively to free blocks
784 * in subnodes (which means that this code can free an arbitrary
785 * range whereas the allocation code cannot allocate an arbitrary
789 blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, u_daddr_t radix)
791 daddr_t blk, i, next_skip, skip, v;
794 if (scan->bm_bighint == (daddr_t)-1)
795 panic("freeing invalid range");
796 if (radix == BLIST_BMAP_RADIX)
797 return (blst_leaf_free(scan, freeBlk, count));
798 skip = radix_to_skip(radix);
799 next_skip = skip / BLIST_META_RADIX;
801 if (scan->u.bmu_avail == 0) {
803 * ALL-ALLOCATED special case, with possible
804 * shortcut to ALL-FREE special case.
806 scan->u.bmu_avail = count;
807 scan->bm_bighint = count;
809 if (count != radix) {
810 for (i = 1; i < skip; i += next_skip) {
811 if (scan[i].bm_bighint == (daddr_t)-1)
813 scan[i].bm_bighint = 0;
814 if (next_skip == 1) {
815 scan[i].u.bmu_bitmap = 0;
817 scan[i].u.bmu_avail = 0;
823 scan->u.bmu_avail += count;
824 /* scan->bm_bighint = radix; */
828 * ALL-FREE special case.
831 if (scan->u.bmu_avail == radix)
833 if (scan->u.bmu_avail > radix)
834 panic("blst_meta_free: freeing already free blocks (%lld) %lld/%lld",
835 (long long)count, (long long)scan->u.bmu_avail,
839 * Break the free down into its components
842 blk = freeBlk & -radix;
843 radix /= BLIST_META_RADIX;
845 child = (freeBlk - blk) / radix;
846 blk += child * radix;
847 i = 1 + child * next_skip;
848 while (i < skip && blk < freeBlk + count) {
849 v = blk + radix - freeBlk;
852 blst_meta_free(&scan[i], freeBlk, v, radix);
853 if (scan->bm_bighint < scan[i].bm_bighint)
854 scan->bm_bighint = scan[i].bm_bighint;
863 * BLIST_RADIX_COPY() - copy one radix tree to another
865 * Locates free space in the source tree and frees it in the destination
866 * tree. The space may not already be free in the destination.
869 blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, blist_t dest,
872 daddr_t i, next_skip, skip;
878 if (radix == BLIST_BMAP_RADIX) {
879 u_daddr_t v = scan->u.bmu_bitmap;
881 if (v == (u_daddr_t)-1) {
882 blist_free(dest, blk, count);
886 for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) {
887 if (v & ((u_daddr_t)1 << i))
888 blist_free(dest, blk + i, 1);
898 if (scan->u.bmu_avail == 0) {
900 * Source all allocated, leave dest allocated
904 if (scan->u.bmu_avail == radix) {
906 * Source all free, free entire dest
909 blist_free(dest, blk, count);
911 blist_free(dest, blk, radix);
916 skip = radix_to_skip(radix);
917 next_skip = skip / BLIST_META_RADIX;
918 radix /= BLIST_META_RADIX;
920 for (i = 1; count && i < skip; i += next_skip) {
921 if (scan[i].bm_bighint == (daddr_t)-1)
924 if (count >= radix) {
925 blst_copy(&scan[i], blk, radix, dest, radix);
929 blst_copy(&scan[i], blk, radix, dest, count);
938 * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap
940 * This routine allocates all blocks in the specified range
941 * regardless of any existing allocations in that range. Returns
942 * the number of blocks allocated by the call.
945 blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count)
947 int n = blk & (BLIST_BMAP_RADIX - 1);
951 mask = ((u_daddr_t)-1 << n) &
952 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n));
954 /* Count the number of blocks that we are allocating. */
955 nblks = bitcount64(scan->u.bmu_bitmap & mask);
957 scan->u.bmu_bitmap &= ~mask;
962 * BLIST_META_FILL() - allocate specific blocks at a meta node
964 * This routine allocates the specified range of blocks,
965 * regardless of any existing allocations in the range. The
966 * range must be within the extent of this node. Returns the
967 * number of blocks allocated by the call.
970 blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, u_daddr_t radix)
972 daddr_t blk, i, nblks, next_skip, skip, v;
975 if (scan->bm_bighint == (daddr_t)-1)
976 panic("filling invalid range");
979 * The allocation exceeds the number of blocks that are
980 * managed by this node.
982 panic("fill too large");
984 if (radix == BLIST_BMAP_RADIX)
985 return (blst_leaf_fill(scan, allocBlk, count));
986 if (count == radix || scan->u.bmu_avail == 0) {
988 * ALL-ALLOCATED special case
990 nblks = scan->u.bmu_avail;
991 scan->u.bmu_avail = 0;
992 scan->bm_bighint = 0;
995 skip = radix_to_skip(radix);
996 next_skip = skip / BLIST_META_RADIX;
997 blk = allocBlk & -radix;
1000 * An ALL-FREE meta node requires special handling before allocating
1001 * any of its blocks.
1003 if (scan->u.bmu_avail == radix) {
1004 radix /= BLIST_META_RADIX;
1007 * Reinitialize each of the meta node's children. An ALL-FREE
1008 * meta node cannot have a terminator in any subtree.
1010 for (i = 1; i < skip; i += next_skip) {
1012 scan[i].u.bmu_bitmap = (u_daddr_t)-1;
1014 scan[i].u.bmu_avail = radix;
1015 scan[i].bm_bighint = radix;
1018 radix /= BLIST_META_RADIX;
1022 child = (allocBlk - blk) / radix;
1023 blk += child * radix;
1024 i = 1 + child * next_skip;
1025 while (i < skip && blk < allocBlk + count) {
1026 v = blk + radix - allocBlk;
1029 nblks += blst_meta_fill(&scan[i], allocBlk, v, radix);
1035 scan->u.bmu_avail -= nblks;
1040 * BLST_RADIX_INIT() - initialize radix tree
1042 * Initialize our meta structures and bitmaps and calculate the exact
1043 * amount of space required to manage 'count' blocks - this space may
1044 * be considerably less than the calculated radix due to the large
1045 * RADIX values we use.
1048 blst_radix_init(blmeta_t *scan, daddr_t radix, daddr_t count)
1050 daddr_t i, memindex, next_skip, skip;
1058 if (radix == BLIST_BMAP_RADIX) {
1060 scan->bm_bighint = 0;
1061 scan->u.bmu_bitmap = 0;
1067 * Meta node. If allocating the entire object we can special
1068 * case it. However, we need to figure out how much memory
1069 * is required to manage 'count' blocks, so we continue on anyway.
1073 scan->bm_bighint = 0;
1074 scan->u.bmu_avail = 0;
1077 skip = radix_to_skip(radix);
1078 next_skip = skip / BLIST_META_RADIX;
1079 radix /= BLIST_META_RADIX;
1081 for (i = 1; i < skip; i += next_skip) {
1082 if (count >= radix) {
1084 * Allocate the entire object
1087 blst_radix_init(((scan) ? &scan[i] : NULL), radix,
1090 } else if (count > 0) {
1092 * Allocate a partial object
1095 blst_radix_init(((scan) ? &scan[i] : NULL), radix,
1100 * Add terminator and break out
1103 scan[i].bm_bighint = (daddr_t)-1;
1115 blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab)
1117 daddr_t i, next_skip, skip;
1119 if (radix == BLIST_BMAP_RADIX) {
1121 "%*.*s(%08llx,%lld): bitmap %016llx big=%lld\n",
1123 (long long)blk, (long long)radix,
1124 (long long)scan->u.bmu_bitmap,
1125 (long long)scan->bm_bighint
1130 if (scan->u.bmu_avail == 0) {
1132 "%*.*s(%08llx,%lld) ALL ALLOCATED\n",
1139 if (scan->u.bmu_avail == radix) {
1141 "%*.*s(%08llx,%lld) ALL FREE\n",
1150 "%*.*s(%08llx,%lld): subtree (%lld/%lld) big=%lld {\n",
1152 (long long)blk, (long long)radix,
1153 (long long)scan->u.bmu_avail,
1155 (long long)scan->bm_bighint
1158 skip = radix_to_skip(radix);
1159 next_skip = skip / BLIST_META_RADIX;
1160 radix /= BLIST_META_RADIX;
1163 for (i = 1; i < skip; i += next_skip) {
1164 if (scan[i].bm_bighint == (daddr_t)-1) {
1166 "%*.*s(%08llx,%lld): Terminator\n",
1168 (long long)blk, (long long)radix
1172 blst_radix_print(&scan[i], blk, radix, tab);
1188 main(int ac, char **av)
1195 for (i = 1; i < ac; ++i) {
1196 const char *ptr = av[i];
1198 size = strtol(ptr, NULL, 0);
1202 fprintf(stderr, "Bad option: %s\n", ptr - 2);
1205 bl = blist_create(size, M_WAITOK);
1206 blist_free(bl, 0, size);
1211 long long count = 0;
1213 printf("%lld/%lld/%lld> ", (long long)blist_avail(bl),
1214 (long long)size, (long long)bl->bl_radix);
1216 if (fgets(buf, sizeof(buf), stdin) == NULL)
1220 if (sscanf(buf + 1, "%lld", &count) == 1) {
1221 blist_resize(&bl, count, 1, M_WAITOK);
1229 s = sbuf_new_auto();
1232 printf("%s", sbuf_data(s));
1236 if (sscanf(buf + 1, "%lld", &count) == 1) {
1237 daddr_t blk = blist_alloc(bl, count);
1238 printf(" R=%08llx\n", (long long)blk);
1244 if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) {
1245 blist_free(bl, da, count);
1251 if (sscanf(buf + 1, "%llx %lld", &da, &count) == 2) {
1253 (intmax_t)blist_fill(bl, da, count));
1279 panic(const char *ctl, ...)
1284 vfprintf(stderr, ctl, va);
1285 fprintf(stderr, "\n");