2 * Copyright (C) 2006-2008 Jason Evans <jasone@FreeBSD.org>.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice(s), this list of conditions and the following disclaimer as
10 * the first lines of this file unmodified other than the possible
11 * addition of one or more copyright notices.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice(s), this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *******************************************************************************
31 * This allocator implementation is designed to provide scalable performance
32 * for multi-threaded programs on multi-processor systems. The following
33 * features are included for this purpose:
35 * + Multiple arenas are used if there are multiple CPUs, which reduces lock
36 * contention and cache sloshing.
38 * + Thread-specific caching is used if there are multiple threads, which
39 * reduces the amount of locking.
41 * + Cache line sharing between arenas is avoided for internal data
44 * + Memory is managed in chunks and runs (chunks can be split into runs),
45 * rather than as individual pages. This provides a constant-time
46 * mechanism for associating allocations with particular arenas.
48 * Allocation requests are rounded up to the nearest size class, and no record
49 * of the original request size is maintained. Allocations are broken into
50 * categories according to size class. Assuming runtime defaults, 4 kB pages
51 * and a 16 byte quantum on a 32-bit system, the size classes in each category
54 * |=======================================|
55 * | Category | Subcategory | Size |
56 * |=======================================|
57 * | Small | Tiny | 2 |
60 * | |------------------+---------|
61 * | | Quantum-spaced | 16 |
68 * | |------------------+---------|
69 * | | Cacheline-spaced | 192 |
75 * | |------------------+---------|
76 * | | Sub-page | 760 |
83 * |=======================================|
91 * |=======================================|
96 * |=======================================|
98 * A different mechanism is used for each category:
100 * Small : Each size class is segregated into its own set of runs. Each run
101 * maintains a bitmap of which regions are free/allocated.
103 * Large : Each allocation is backed by a dedicated run. Metadata are stored
104 * in the associated arena chunk header maps.
106 * Huge : Each allocation is backed by a dedicated contiguous set of chunks.
107 * Metadata are stored in a separate red-black tree.
109 *******************************************************************************
113 * MALLOC_PRODUCTION disables assertions and statistics gathering. It also
114 * defaults the A and J runtime options to off. These settings are appropriate
115 * for production systems.
117 #ifndef MALLOC_PRODUCTION
118 #define MALLOC_PRODUCTION
121 #ifndef MALLOC_PRODUCTION
123 * MALLOC_DEBUG enables assertions and other sanity checks, and disables
126 # define MALLOC_DEBUG
128 /* MALLOC_STATS enables statistics calculation. */
129 # define MALLOC_STATS
133 * MALLOC_TINY enables support for tiny objects, which are smaller than one
139 * MALLOC_MAG enables a magazine-based thread-specific caching layer for small
140 * objects. This makes it possible to allocate/deallocate objects without any
141 * locking when the cache is in the steady state.
146 * MALLOC_BALANCE enables monitoring of arena lock contention and dynamically
147 * re-balances arena load if exponentially averaged contention exceeds a
150 #define MALLOC_BALANCE
153 * MALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage
154 * segment (DSS). In an ideal world, this functionality would be completely
155 * unnecessary, but we are burdened by history and the lack of resource limits
156 * for anonymous mapped memory.
160 #include <sys/cdefs.h>
161 __FBSDID("$FreeBSD$");
163 #include "libc_private.h"
167 #include "spinlock.h"
168 #include "namespace.h"
169 #include <sys/mman.h>
170 #include <sys/param.h>
171 #include <sys/stddef.h>
172 #include <sys/time.h>
173 #include <sys/types.h>
174 #include <sys/sysctl.h>
176 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
178 #include <machine/cpufunc.h>
179 #include <machine/param.h>
180 #include <machine/vmparam.h>
195 #include "un-namespace.h"
211 /* Disable inlining to make debugging easier. */
215 /* Size of stack-allocated buffer passed to strerror_r(). */
216 #define STRERROR_BUF 64
219 * Minimum alignment of allocations is 2^QUANTUM_2POW bytes.
222 # define QUANTUM_2POW 4
223 # define SIZEOF_PTR_2POW 2
224 # define CPU_SPINWAIT __asm__ volatile("pause")
227 # define QUANTUM_2POW 4
228 # define SIZEOF_PTR_2POW 3
231 # define QUANTUM_2POW 4
232 # define SIZEOF_PTR_2POW 3
236 # define QUANTUM_2POW 4
237 # define SIZEOF_PTR_2POW 3
241 # define QUANTUM_2POW 4
242 # define SIZEOF_PTR_2POW 3
243 # define CPU_SPINWAIT __asm__ volatile("pause")
246 # define QUANTUM_2POW 3
247 # define SIZEOF_PTR_2POW 2
251 # define QUANTUM_2POW 3
252 # define SIZEOF_PTR_2POW 2
256 # define QUANTUM_2POW 4
257 # define SIZEOF_PTR_2POW 2
260 #define QUANTUM ((size_t)(1U << QUANTUM_2POW))
261 #define QUANTUM_MASK (QUANTUM - 1)
263 #define SIZEOF_PTR (1U << SIZEOF_PTR_2POW)
265 /* sizeof(int) == (1U << SIZEOF_INT_2POW). */
266 #ifndef SIZEOF_INT_2POW
267 # define SIZEOF_INT_2POW 2
270 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
271 #if (!defined(PIC) && !defined(NO_TLS))
276 /* MALLOC_MAG requires TLS. */
280 /* MALLOC_BALANCE requires TLS. */
281 # ifdef MALLOC_BALANCE
282 # undef MALLOC_BALANCE
287 * Size and alignment of memory chunks that are allocated by the OS's virtual
290 #define CHUNK_2POW_DEFAULT 20
292 /* Maximum number of dirty pages per arena. */
293 #define DIRTY_MAX_DEFAULT (1U << 9)
296 * Maximum size of L1 cache line. This is used to avoid cache line aliasing.
297 * In addition, this controls the spacing of cacheline-spaced size classes.
299 #define CACHELINE_2POW 6
300 #define CACHELINE ((size_t)(1U << CACHELINE_2POW))
301 #define CACHELINE_MASK (CACHELINE - 1)
304 * Subpages are an artificially designated partitioning of pages. Their only
305 * purpose is to support subpage-spaced size classes.
307 * There must be at least 4 subpages per page, due to the way size classes are
310 #define SUBPAGE_2POW 8
311 #define SUBPAGE ((size_t)(1U << SUBPAGE_2POW))
312 #define SUBPAGE_MASK (SUBPAGE - 1)
315 /* Smallest size class to support. */
316 # define TINY_MIN_2POW 1
320 * Maximum size class that is a multiple of the quantum, but not (necessarily)
321 * a power of 2. Above this size, allocations are rounded up to the nearest
324 #define QSPACE_MAX_2POW_DEFAULT 7
327 * Maximum size class that is a multiple of the cacheline, but not (necessarily)
328 * a power of 2. Above this size, allocations are rounded up to the nearest
331 #define CSPACE_MAX_2POW_DEFAULT 9
334 * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized
335 * as small as possible such that this setting is still honored, without
336 * violating other constraints. The goal is to make runs as small as possible
337 * without exceeding a per run external fragmentation threshold.
339 * We use binary fixed point math for overhead computations, where the binary
340 * point is implicitly RUN_BFP bits to the left.
342 * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
343 * honored for some/all object sizes, since there is one bit of header overhead
344 * per object (plus a constant). This constraint is relaxed (ignored) for runs
345 * that are so small that the per-region overhead is greater than:
347 * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
350 /* \/ Implicit binary fixed point. */
351 #define RUN_MAX_OVRHD 0x0000003dU
352 #define RUN_MAX_OVRHD_RELAX 0x00001800U
354 /* Put a cap on small object run size. This overrides RUN_MAX_OVRHD. */
355 #define RUN_MAX_SMALL (12 * PAGE_SIZE)
358 * Hyper-threaded CPUs may need a special instruction inside spin loops in
359 * order to yield to another virtual CPU. If no such instruction is defined
360 * above, make CPU_SPINWAIT a no-op.
363 # define CPU_SPINWAIT
367 * Adaptive spinning must eventually switch to blocking, in order to avoid the
368 * potential for priority inversion deadlock. Backing off past a certain point
369 * can actually waste time.
371 #define SPIN_LIMIT_2POW 11
374 * Conversion from spinning to blocking is expensive; we use (1U <<
375 * BLOCK_COST_2POW) to estimate how many more times costly blocking is than
376 * worst-case spinning.
378 #define BLOCK_COST_2POW 4
382 * Default magazine size, in bytes. max_rounds is calculated to make
383 * optimal use of the space, leaving just enough room for the magazine
386 # define MAG_SIZE_2POW_DEFAULT 9
389 #ifdef MALLOC_BALANCE
391 * We use an exponential moving average to track recent lock contention,
392 * where the size of the history window is N, and alpha=2/(N+1).
394 * Due to integer math rounding, very small values here can cause
395 * substantial degradation in accuracy, thus making the moving average decay
396 * faster than it would with precise calculation.
398 # define BALANCE_ALPHA_INV_2POW 9
401 * Threshold value for the exponential moving contention average at which to
402 * re-assign a thread.
404 # define BALANCE_THRESHOLD_DEFAULT (1U << (SPIN_LIMIT_2POW-4))
407 /******************************************************************************/
410 * Mutexes based on spinlocks. We can't use normal pthread spinlocks in all
411 * places, because they require malloc()ed memory, which causes bootstrapping
412 * issues in some cases.
418 /* Set to true once the allocator has been initialized. */
419 static bool malloc_initialized = false;
421 /* Used to avoid initialization races. */
422 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
424 /******************************************************************************/
426 * Statistics data structures.
431 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
432 struct malloc_bin_stats_s {
434 * Number of allocation requests that corresponded to the size of this
440 /* Number of magazine reloads from this bin. */
444 /* Total number of runs created for this bin's size class. */
448 * Total number of runs reused by extracting them from the runs tree for
449 * this bin's size class.
453 /* High-water mark for this bin. */
454 unsigned long highruns;
456 /* Current number of runs in this bin. */
457 unsigned long curruns;
460 typedef struct arena_stats_s arena_stats_t;
461 struct arena_stats_s {
462 /* Number of bytes currently mapped. */
466 * Total number of purge sweeps, total number of madvise calls made,
467 * and total pages purged in order to keep dirty unused memory under
474 /* Per-size-category statistics. */
475 size_t allocated_small;
476 uint64_t nmalloc_small;
477 uint64_t ndalloc_small;
479 size_t allocated_large;
480 uint64_t nmalloc_large;
481 uint64_t ndalloc_large;
483 #ifdef MALLOC_BALANCE
484 /* Number of times this arena reassigned a thread due to contention. */
489 typedef struct chunk_stats_s chunk_stats_t;
490 struct chunk_stats_s {
491 /* Number of chunks that were allocated. */
494 /* High-water mark for number of chunks allocated. */
495 unsigned long highchunks;
498 * Current number of chunks allocated. This value isn't maintained for
499 * any other purpose, so keep track of it in order to be able to set
502 unsigned long curchunks;
505 #endif /* #ifdef MALLOC_STATS */
507 /******************************************************************************/
509 * Extent data structures.
512 /* Tree of extents. */
513 typedef struct extent_node_s extent_node_t;
514 struct extent_node_s {
516 /* Linkage for the size/address-ordered tree. */
517 rb_node(extent_node_t) link_szad;
520 /* Linkage for the address-ordered tree. */
521 rb_node(extent_node_t) link_ad;
523 /* Pointer to the extent that this tree node is responsible for. */
526 /* Total region size. */
529 typedef rb_tree(extent_node_t) extent_tree_t;
531 /******************************************************************************/
533 * Arena data structures.
536 typedef struct arena_s arena_t;
537 typedef struct arena_bin_s arena_bin_t;
539 /* Each element of the chunk map corresponds to one page within the chunk. */
540 typedef struct arena_chunk_map_s arena_chunk_map_t;
541 struct arena_chunk_map_s {
543 * Linkage for run trees. There are two disjoint uses:
545 * 1) arena_t's runs_avail tree.
546 * 2) arena_run_t conceptually uses this linkage for in-use non-full
547 * runs, rather than directly embedding linkage.
549 rb_node(arena_chunk_map_t) link;
552 * Run address (or size) and various flags are stored together. The bit
553 * layout looks like (assuming 32-bit system):
555 * ???????? ???????? ????---- ---kdzla
557 * ? : Unallocated: Run address for first/last pages, unset for internal
559 * Small: Run address.
560 * Large: Run size for first page, unset for trailing pages.
568 * Following are example bit patterns for the three types of runs.
577 * ssssssss ssssssss ssss---- --------
578 * xxxxxxxx xxxxxxxx xxxx---- ----d---
579 * ssssssss ssssssss ssss---- -----z--
582 * rrrrrrrr rrrrrrrr rrrr---- -------a
583 * rrrrrrrr rrrrrrrr rrrr---- -------a
584 * rrrrrrrr rrrrrrrr rrrr---- -------a
587 * ssssssss ssssssss ssss---- ------la
588 * -------- -------- -------- ------la
589 * -------- -------- -------- ------la
592 #define CHUNK_MAP_KEY ((size_t)0x10U)
593 #define CHUNK_MAP_DIRTY ((size_t)0x08U)
594 #define CHUNK_MAP_ZEROED ((size_t)0x04U)
595 #define CHUNK_MAP_LARGE ((size_t)0x02U)
596 #define CHUNK_MAP_ALLOCATED ((size_t)0x01U)
598 typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
599 typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
601 /* Arena chunk header. */
602 typedef struct arena_chunk_s arena_chunk_t;
603 struct arena_chunk_s {
604 /* Arena that owns the chunk. */
607 /* Linkage for the arena's chunks_dirty tree. */
608 rb_node(arena_chunk_t) link_dirty;
610 /* Number of dirty pages. */
613 /* Map of pages within chunk that keeps track of free/large/small. */
614 arena_chunk_map_t map[1]; /* Dynamically sized. */
616 typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
618 typedef struct arena_run_s arena_run_t;
622 # define ARENA_RUN_MAGIC 0x384adf93
625 /* Bin this run is associated with. */
628 /* Index of first element that might have a free region. */
629 unsigned regs_minelm;
631 /* Number of free regions in run. */
634 /* Bitmask of in-use regions (0: in use, 1: free). */
635 unsigned regs_mask[1]; /* Dynamically sized. */
640 * Current run being used to service allocations of this bin's size
646 * Tree of non-full runs. This tree is used when looking for an
647 * existing run when runcur is no longer usable. We choose the
648 * non-full run that is lowest in memory; this policy tends to keep
649 * objects packed well, and it can also help reduce the number of
650 * almost-empty chunks.
652 arena_run_tree_t runs;
654 /* Size of regions in a run for this bin's size class. */
657 /* Total size of a run for this bin's size class. */
660 /* Total number of regions in a run for this bin's size class. */
663 /* Number of elements in a run's regs_mask for this bin's size class. */
664 uint32_t regs_mask_nelms;
666 /* Offset of first region in a run for this bin's size class. */
667 uint32_t reg0_offset;
670 /* Bin statistics. */
671 malloc_bin_stats_t stats;
678 # define ARENA_MAGIC 0x947d3d24
681 /* All operations on this arena require that lock be locked. */
682 pthread_mutex_t lock;
688 /* Tree of dirty-page-containing chunks this arena manages. */
689 arena_chunk_tree_t chunks_dirty;
692 * In order to avoid rapid chunk allocation/deallocation when an arena
693 * oscillates right on the cusp of needing a new chunk, cache the most
694 * recently freed chunk. The spare is left in the arena's chunk trees
695 * until it is deleted.
697 * There is one spare chunk per arena, rather than one spare total, in
698 * order to avoid interactions between multiple threads that could make
699 * a single spare inadequate.
701 arena_chunk_t *spare;
704 * Current count of pages within unused runs that are potentially
705 * dirty, and for which madvise(... MADV_FREE) has not been called. By
706 * tracking this, we can institute a limit on how much dirty unused
707 * memory is mapped for each arena.
712 * Size/address-ordered tree of this arena's available runs. This tree
713 * is used for first-best-fit run allocation.
715 arena_avail_tree_t runs_avail;
717 #ifdef MALLOC_BALANCE
719 * The arena load balancing machinery needs to keep track of how much
720 * lock contention there is. This value is exponentially averaged.
726 * bins is used to store rings of free regions of the following sizes,
727 * assuming a 16-byte quantum, 4kB page size, and default
749 arena_bin_t bins[1]; /* Dynamically sized. */
752 /******************************************************************************/
754 * Magazine data structures.
758 typedef struct mag_s mag_t;
760 size_t binind; /* Index of associated bin. */
762 void *rounds[1]; /* Dynamically sized. */
766 * Magazines are lazily allocated, but once created, they remain until the
767 * associated mag_rack is destroyed.
769 typedef struct bin_mags_s bin_mags_t;
775 typedef struct mag_rack_s mag_rack_t;
777 bin_mags_t bin_mags[1]; /* Dynamically sized. */
781 /******************************************************************************/
786 /* Number of CPUs. */
787 static unsigned ncpus;
789 /* Various bin-related settings. */
790 #ifdef MALLOC_TINY /* Number of (2^n)-spaced tiny bins. */
791 # define ntbins ((unsigned)(QUANTUM_2POW - TINY_MIN_2POW))
795 static unsigned nqbins; /* Number of quantum-spaced bins. */
796 static unsigned ncbins; /* Number of cacheline-spaced bins. */
797 static unsigned nsbins; /* Number of subpage-spaced bins. */
798 static unsigned nbins;
800 # define tspace_max ((size_t)(QUANTUM >> 1))
802 #define qspace_min QUANTUM
803 static size_t qspace_max;
804 static size_t cspace_min;
805 static size_t cspace_max;
806 static size_t sspace_min;
807 static size_t sspace_max;
808 #define bin_maxclass sspace_max
810 static uint8_t const *size2bin;
812 * const_size2bin is a static constant lookup table that in the common case can
813 * be used as-is for size2bin. For dynamically linked programs, this avoids
814 * a page of memory overhead per process.
817 #define S2B_2(i) S2B_1(i) S2B_1(i)
818 #define S2B_4(i) S2B_2(i) S2B_2(i)
819 #define S2B_8(i) S2B_4(i) S2B_4(i)
820 #define S2B_16(i) S2B_8(i) S2B_8(i)
821 #define S2B_32(i) S2B_16(i) S2B_16(i)
822 #define S2B_64(i) S2B_32(i) S2B_32(i)
823 #define S2B_128(i) S2B_64(i) S2B_64(i)
824 #define S2B_256(i) S2B_128(i) S2B_128(i)
825 static const uint8_t const_size2bin[PAGE_SIZE - 255] = {
827 #if (QUANTUM_2POW == 4)
828 /* 64-bit system ************************/
839 S2B_16(S2B_QMIN + 1) /* 32 */
840 S2B_16(S2B_QMIN + 2) /* 48 */
841 S2B_16(S2B_QMIN + 3) /* 64 */
842 S2B_16(S2B_QMIN + 4) /* 80 */
843 S2B_16(S2B_QMIN + 5) /* 96 */
844 S2B_16(S2B_QMIN + 6) /* 112 */
845 S2B_16(S2B_QMIN + 7) /* 128 */
846 # define S2B_CMIN (S2B_QMIN + 8)
848 /* 32-bit system ************************/
858 S2B_8(S2B_QMIN + 1) /* 16 */
859 S2B_8(S2B_QMIN + 2) /* 24 */
860 S2B_8(S2B_QMIN + 3) /* 32 */
861 S2B_8(S2B_QMIN + 4) /* 40 */
862 S2B_8(S2B_QMIN + 5) /* 48 */
863 S2B_8(S2B_QMIN + 6) /* 56 */
864 S2B_8(S2B_QMIN + 7) /* 64 */
865 S2B_8(S2B_QMIN + 8) /* 72 */
866 S2B_8(S2B_QMIN + 9) /* 80 */
867 S2B_8(S2B_QMIN + 10) /* 88 */
868 S2B_8(S2B_QMIN + 11) /* 96 */
869 S2B_8(S2B_QMIN + 12) /* 104 */
870 S2B_8(S2B_QMIN + 13) /* 112 */
871 S2B_8(S2B_QMIN + 14) /* 120 */
872 S2B_8(S2B_QMIN + 15) /* 128 */
873 # define S2B_CMIN (S2B_QMIN + 16)
875 /****************************************/
876 S2B_64(S2B_CMIN + 0) /* 192 */
877 S2B_64(S2B_CMIN + 1) /* 256 */
878 S2B_64(S2B_CMIN + 2) /* 320 */
879 S2B_64(S2B_CMIN + 3) /* 384 */
880 S2B_64(S2B_CMIN + 4) /* 448 */
881 S2B_64(S2B_CMIN + 5) /* 512 */
882 # define S2B_SMIN (S2B_CMIN + 6)
883 S2B_256(S2B_SMIN + 0) /* 768 */
884 S2B_256(S2B_SMIN + 1) /* 1024 */
885 S2B_256(S2B_SMIN + 2) /* 1280 */
886 S2B_256(S2B_SMIN + 3) /* 1536 */
887 S2B_256(S2B_SMIN + 4) /* 1792 */
888 S2B_256(S2B_SMIN + 5) /* 2048 */
889 S2B_256(S2B_SMIN + 6) /* 2304 */
890 S2B_256(S2B_SMIN + 7) /* 2560 */
891 S2B_256(S2B_SMIN + 8) /* 2816 */
892 S2B_256(S2B_SMIN + 9) /* 3072 */
893 S2B_256(S2B_SMIN + 10) /* 3328 */
894 S2B_256(S2B_SMIN + 11) /* 3584 */
895 S2B_256(S2B_SMIN + 12) /* 3840 */
896 #if (PAGE_SHIFT == 13)
897 S2B_256(S2B_SMIN + 13) /* 4096 */
898 S2B_256(S2B_SMIN + 14) /* 4352 */
899 S2B_256(S2B_SMIN + 15) /* 4608 */
900 S2B_256(S2B_SMIN + 16) /* 4864 */
901 S2B_256(S2B_SMIN + 17) /* 5120 */
902 S2B_256(S2B_SMIN + 18) /* 5376 */
903 S2B_256(S2B_SMIN + 19) /* 5632 */
904 S2B_256(S2B_SMIN + 20) /* 5888 */
905 S2B_256(S2B_SMIN + 21) /* 6144 */
906 S2B_256(S2B_SMIN + 22) /* 6400 */
907 S2B_256(S2B_SMIN + 23) /* 6656 */
908 S2B_256(S2B_SMIN + 24) /* 6912 */
909 S2B_256(S2B_SMIN + 25) /* 7168 */
910 S2B_256(S2B_SMIN + 26) /* 7424 */
911 S2B_256(S2B_SMIN + 27) /* 7680 */
912 S2B_256(S2B_SMIN + 28) /* 7936 */
929 static size_t max_rounds;
932 /* Various chunk-related settings. */
933 static size_t chunksize;
934 static size_t chunksize_mask; /* (chunksize - 1). */
935 static size_t chunk_npages;
936 static size_t arena_chunk_header_npages;
937 static size_t arena_maxclass; /* Max size class for arenas. */
944 /* Protects chunk-related data structures. */
945 static malloc_mutex_t huge_mtx;
947 /* Tree of chunks that are stand-alone huge allocations. */
948 static extent_tree_t huge;
952 * Protects sbrk() calls. This avoids malloc races among threads, though it
953 * does not protect against races with threads that call sbrk() directly.
955 static malloc_mutex_t dss_mtx;
956 /* Base address of the DSS. */
957 static void *dss_base;
958 /* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
959 static void *dss_prev;
960 /* Current upper limit on DSS addresses. */
961 static void *dss_max;
964 * Trees of chunks that were previously allocated (trees differ only in node
965 * ordering). These are used when allocating chunks, in an attempt to re-use
966 * address space. Depending on function, different tree orderings are needed,
967 * which is why there are two trees with the same contents.
969 static extent_tree_t dss_chunks_szad;
970 static extent_tree_t dss_chunks_ad;
974 /* Huge allocation statistics. */
975 static uint64_t huge_nmalloc;
976 static uint64_t huge_ndalloc;
977 static size_t huge_allocated;
980 /****************************/
982 * base (internal allocation).
986 * Current pages that are being used for internal memory allocations. These
987 * pages are carved up in cacheline-size quanta, so that there is no chance of
988 * false cache line sharing.
990 static void *base_pages;
991 static void *base_next_addr;
992 static void *base_past_addr; /* Addr immediately past base_pages. */
993 static extent_node_t *base_nodes;
994 static malloc_mutex_t base_mtx;
996 static size_t base_mapped;
1005 * Arenas that are used to service external requests. Not all elements of the
1006 * arenas array are necessarily used; arenas are created lazily as needed.
1008 static arena_t **arenas;
1009 static unsigned narenas;
1011 # ifdef MALLOC_BALANCE
1012 static unsigned narenas_2pow;
1014 static unsigned next_arena;
1017 static pthread_mutex_t arenas_lock; /* Protects arenas initialization. */
1021 * Map of pthread_self() --> arenas[???], used for selecting an arena to use
1024 static __thread arena_t *arenas_map;
1029 * Map of thread-specific magazine racks, used for thread-specific object
1032 static __thread mag_rack_t *mag_rack;
1036 /* Chunk statistics. */
1037 static chunk_stats_t stats_chunks;
1040 /*******************************/
1042 * Runtime configuration options.
1044 const char *_malloc_options;
1046 #ifndef MALLOC_PRODUCTION
1047 static bool opt_abort = true;
1048 static bool opt_junk = true;
1050 static bool opt_abort = false;
1051 static bool opt_junk = false;
1054 static bool opt_dss = true;
1055 static bool opt_mmap = true;
1058 static bool opt_mag = true;
1059 static size_t opt_mag_size_2pow = MAG_SIZE_2POW_DEFAULT;
1061 static size_t opt_dirty_max = DIRTY_MAX_DEFAULT;
1062 #ifdef MALLOC_BALANCE
1063 static uint64_t opt_balance_threshold = BALANCE_THRESHOLD_DEFAULT;
1065 static bool opt_print_stats = false;
1066 static size_t opt_qspace_max_2pow = QSPACE_MAX_2POW_DEFAULT;
1067 static size_t opt_cspace_max_2pow = CSPACE_MAX_2POW_DEFAULT;
1068 static size_t opt_chunk_2pow = CHUNK_2POW_DEFAULT;
1069 static bool opt_utrace = false;
1070 static bool opt_sysv = false;
1071 static bool opt_xmalloc = false;
1072 static bool opt_zero = false;
1073 static int opt_narenas_lshift = 0;
1081 #define UTRACE(a, b, c) \
1083 malloc_utrace_t ut; \
1087 utrace(&ut, sizeof(ut)); \
1090 /******************************************************************************/
1092 * Begin function prototypes for non-inline static functions.
1095 static void malloc_mutex_init(malloc_mutex_t *mutex);
1096 static bool malloc_spin_init(pthread_mutex_t *lock);
1097 static void wrtmessage(const char *p1, const char *p2, const char *p3,
1100 static void malloc_printf(const char *format, ...);
1102 static char *umax2s(uintmax_t x, char *s);
1104 static bool base_pages_alloc_dss(size_t minsize);
1106 static bool base_pages_alloc_mmap(size_t minsize);
1107 static bool base_pages_alloc(size_t minsize);
1108 static void *base_alloc(size_t size);
1109 static void *base_calloc(size_t number, size_t size);
1110 static extent_node_t *base_node_alloc(void);
1111 static void base_node_dealloc(extent_node_t *node);
1113 static void stats_print(arena_t *arena);
1115 static void *pages_map(void *addr, size_t size);
1116 static void pages_unmap(void *addr, size_t size);
1118 static void *chunk_alloc_dss(size_t size);
1119 static void *chunk_recycle_dss(size_t size, bool zero);
1121 static void *chunk_alloc_mmap(size_t size);
1122 static void *chunk_alloc(size_t size, bool zero);
1124 static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size);
1125 static bool chunk_dealloc_dss(void *chunk, size_t size);
1127 static void chunk_dealloc_mmap(void *chunk, size_t size);
1128 static void chunk_dealloc(void *chunk, size_t size);
1130 static arena_t *choose_arena_hard(void);
1132 static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size,
1133 bool large, bool zero);
1134 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
1135 static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
1136 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large,
1138 static void arena_purge(arena_t *arena);
1139 static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty);
1140 static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk,
1141 arena_run_t *run, size_t oldsize, size_t newsize);
1142 static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
1143 arena_run_t *run, size_t oldsize, size_t newsize, bool dirty);
1144 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
1145 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
1146 static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
1147 #ifdef MALLOC_BALANCE
1148 static void arena_lock_balance_hard(arena_t *arena);
1151 static void mag_load(mag_t *mag);
1153 static void *arena_malloc_large(arena_t *arena, size_t size, bool zero);
1154 static void *arena_palloc(arena_t *arena, size_t alignment, size_t size,
1156 static size_t arena_salloc(const void *ptr);
1158 static void mag_unload(mag_t *mag);
1160 static void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk,
1162 static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk,
1163 void *ptr, size_t size, size_t oldsize);
1164 static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk,
1165 void *ptr, size_t size, size_t oldsize);
1166 static bool arena_ralloc_large(void *ptr, size_t size, size_t oldsize);
1167 static void *arena_ralloc(void *ptr, size_t size, size_t oldsize);
1168 static bool arena_new(arena_t *arena);
1169 static arena_t *arenas_extend(unsigned ind);
1171 static mag_t *mag_create(arena_t *arena, size_t binind);
1172 static void mag_destroy(mag_t *mag);
1173 static mag_rack_t *mag_rack_create(arena_t *arena);
1174 static void mag_rack_destroy(mag_rack_t *rack);
1176 static void *huge_malloc(size_t size, bool zero);
1177 static void *huge_palloc(size_t alignment, size_t size);
1178 static void *huge_ralloc(void *ptr, size_t size, size_t oldsize);
1179 static void huge_dalloc(void *ptr);
1180 static void malloc_print_stats(void);
1182 static void size2bin_validate(void);
1184 static bool size2bin_init(void);
1185 static bool size2bin_init_hard(void);
1186 static bool malloc_init_hard(void);
1189 * End function prototypes.
1191 /******************************************************************************/
1193 * Begin mutex. We can't use normal pthread mutexes in all places, because
1194 * they require malloc()ed memory, which causes bootstrapping issues in some
1199 malloc_mutex_init(malloc_mutex_t *mutex)
1201 static const spinlock_t lock = _SPINLOCK_INITIALIZER;
1207 malloc_mutex_lock(malloc_mutex_t *mutex)
1211 _SPINLOCK(&mutex->lock);
1215 malloc_mutex_unlock(malloc_mutex_t *mutex)
1219 _SPINUNLOCK(&mutex->lock);
1225 /******************************************************************************/
1227 * Begin spin lock. Spin locks here are actually adaptive mutexes that block
1228 * after a period of spinning, because unbounded spinning would allow for
1229 * priority inversion.
1233 * We use an unpublished interface to initialize pthread mutexes with an
1234 * allocation callback, in order to avoid infinite recursion.
1236 int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
1237 void *(calloc_cb)(size_t, size_t));
1239 __weak_reference(_pthread_mutex_init_calloc_cb_stub,
1240 _pthread_mutex_init_calloc_cb);
1243 _pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex,
1244 void *(calloc_cb)(size_t, size_t))
1251 malloc_spin_init(pthread_mutex_t *lock)
1254 if (_pthread_mutex_init_calloc_cb(lock, base_calloc) != 0)
1260 static inline unsigned
1261 malloc_spin_lock(pthread_mutex_t *lock)
1266 if (_pthread_mutex_trylock(lock) != 0) {
1267 /* Exponentially back off if there are multiple CPUs. */
1270 volatile unsigned j;
1272 for (i = 1; i <= SPIN_LIMIT_2POW; i++) {
1273 for (j = 0; j < (1U << i); j++) {
1278 if (_pthread_mutex_trylock(lock) == 0)
1284 * Spinning failed. Block until the lock becomes
1285 * available, in order to avoid indefinite priority
1288 _pthread_mutex_lock(lock);
1289 assert((ret << BLOCK_COST_2POW) != 0 || ncpus == 1);
1290 return (ret << BLOCK_COST_2POW);
1298 malloc_spin_unlock(pthread_mutex_t *lock)
1302 _pthread_mutex_unlock(lock);
1308 /******************************************************************************/
1310 * Begin Utility functions/macros.
1313 /* Return the chunk address for allocation address a. */
1314 #define CHUNK_ADDR2BASE(a) \
1315 ((void *)((uintptr_t)(a) & ~chunksize_mask))
1317 /* Return the chunk offset of address a. */
1318 #define CHUNK_ADDR2OFFSET(a) \
1319 ((size_t)((uintptr_t)(a) & chunksize_mask))
1321 /* Return the smallest chunk multiple that is >= s. */
1322 #define CHUNK_CEILING(s) \
1323 (((s) + chunksize_mask) & ~chunksize_mask)
1325 /* Return the smallest quantum multiple that is >= a. */
1326 #define QUANTUM_CEILING(a) \
1327 (((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
1329 /* Return the smallest cacheline multiple that is >= s. */
1330 #define CACHELINE_CEILING(s) \
1331 (((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
1333 /* Return the smallest subpage multiple that is >= s. */
1334 #define SUBPAGE_CEILING(s) \
1335 (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK)
1337 /* Return the smallest PAGE_SIZE multiple that is >= s. */
1338 #define PAGE_CEILING(s) \
1339 (((s) + PAGE_MASK) & ~PAGE_MASK)
1342 /* Compute the smallest power of 2 that is >= x. */
1343 static inline size_t
1353 #if (SIZEOF_PTR == 8)
1361 #ifdef MALLOC_BALANCE
1363 * Use a simple linear congruential pseudo-random number generator:
1365 * prn(y) = (a*x + c) % m
1367 * where the following constants ensure maximal period:
1369 * a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4.
1370 * c == Odd number (relatively prime to 2^n).
1373 * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
1375 * This choice of m has the disadvantage that the quality of the bits is
1376 * proportional to bit position. For example. the lowest bit has a cycle of 2,
1377 * the next has a cycle of 4, etc. For this reason, we prefer to use the upper
1380 # define PRN_DEFINE(suffix, var, a, c) \
1381 static inline void \
1382 sprn_##suffix(uint32_t seed) \
1387 static inline uint32_t \
1388 prn_##suffix(uint32_t lg_range) \
1392 assert(lg_range > 0); \
1393 assert(lg_range <= 32); \
1395 x = (var * (a)) + (c); \
1397 ret = x >> (32 - lg_range); \
1401 # define SPRN(suffix, seed) sprn_##suffix(seed)
1402 # define PRN(suffix, lg_range) prn_##suffix(lg_range)
1405 #ifdef MALLOC_BALANCE
1406 /* Define the PRNG used for arena assignment. */
1407 static __thread uint32_t balance_x;
1408 PRN_DEFINE(balance, balance_x, 1297, 1301)
1412 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
1415 _write(STDERR_FILENO, p1, strlen(p1));
1416 _write(STDERR_FILENO, p2, strlen(p2));
1417 _write(STDERR_FILENO, p3, strlen(p3));
1418 _write(STDERR_FILENO, p4, strlen(p4));
1421 void (*_malloc_message)(const char *p1, const char *p2, const char *p3,
1422 const char *p4) = wrtmessage;
1426 * Print to stderr in such a way as to (hopefully) avoid memory allocation.
1429 malloc_printf(const char *format, ...)
1434 va_start(ap, format);
1435 vsnprintf(buf, sizeof(buf), format, ap);
1437 _malloc_message(buf, "", "", "");
1442 * We don't want to depend on vsnprintf() for production builds, since that can
1443 * cause unnecessary bloat for static binaries. umax2s() provides minimal
1444 * integer printing functionality, so that malloc_printf() use can be limited to
1445 * MALLOC_STATS code.
1447 #define UMAX2S_BUFSIZE 21
1449 umax2s(uintmax_t x, char *s)
1453 /* Make sure UMAX2S_BUFSIZE is large enough. */
1454 assert(sizeof(uintmax_t) <= 8);
1456 i = UMAX2S_BUFSIZE - 1;
1460 s[i] = "0123456789"[x % 10];
1467 /******************************************************************************/
1471 base_pages_alloc_dss(size_t minsize)
1475 * Do special DSS allocation here, since base allocations don't need to
1478 malloc_mutex_lock(&dss_mtx);
1479 if (dss_prev != (void *)-1) {
1481 size_t csize = CHUNK_CEILING(minsize);
1484 /* Get the current end of the DSS. */
1488 * Calculate how much padding is necessary to
1489 * chunk-align the end of the DSS. Don't worry about
1490 * dss_max not being chunk-aligned though.
1492 incr = (intptr_t)chunksize
1493 - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
1495 if ((size_t)incr < minsize)
1498 dss_prev = sbrk(incr);
1499 if (dss_prev == dss_max) {
1501 dss_max = (void *)((intptr_t)dss_prev + incr);
1502 base_pages = dss_prev;
1503 base_next_addr = base_pages;
1504 base_past_addr = dss_max;
1506 base_mapped += incr;
1508 malloc_mutex_unlock(&dss_mtx);
1511 } while (dss_prev != (void *)-1);
1513 malloc_mutex_unlock(&dss_mtx);
1520 base_pages_alloc_mmap(size_t minsize)
1524 assert(minsize != 0);
1525 csize = PAGE_CEILING(minsize);
1526 base_pages = pages_map(NULL, csize);
1527 if (base_pages == NULL)
1529 base_next_addr = base_pages;
1530 base_past_addr = (void *)((uintptr_t)base_pages + csize);
1532 base_mapped += csize;
1539 base_pages_alloc(size_t minsize)
1543 if (opt_mmap && minsize != 0)
1546 if (base_pages_alloc_mmap(minsize) == false)
1552 if (base_pages_alloc_dss(minsize) == false)
1562 base_alloc(size_t size)
1567 /* Round size up to nearest multiple of the cacheline size. */
1568 csize = CACHELINE_CEILING(size);
1570 malloc_mutex_lock(&base_mtx);
1571 /* Make sure there's enough space for the allocation. */
1572 if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1573 if (base_pages_alloc(csize)) {
1574 malloc_mutex_unlock(&base_mtx);
1579 ret = base_next_addr;
1580 base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1581 malloc_mutex_unlock(&base_mtx);
1587 base_calloc(size_t number, size_t size)
1591 ret = base_alloc(number * size);
1592 memset(ret, 0, number * size);
1597 static extent_node_t *
1598 base_node_alloc(void)
1602 malloc_mutex_lock(&base_mtx);
1603 if (base_nodes != NULL) {
1605 base_nodes = *(extent_node_t **)ret;
1606 malloc_mutex_unlock(&base_mtx);
1608 malloc_mutex_unlock(&base_mtx);
1609 ret = (extent_node_t *)base_alloc(sizeof(extent_node_t));
1616 base_node_dealloc(extent_node_t *node)
1619 malloc_mutex_lock(&base_mtx);
1620 *(extent_node_t **)node = base_nodes;
1622 malloc_mutex_unlock(&base_mtx);
1625 /******************************************************************************/
1629 stats_print(arena_t *arena)
1631 unsigned i, gap_start;
1633 malloc_printf("dirty: %zu page%s dirty, %llu sweep%s,"
1634 " %llu madvise%s, %llu page%s purged\n",
1635 arena->ndirty, arena->ndirty == 1 ? "" : "s",
1636 arena->stats.npurge, arena->stats.npurge == 1 ? "" : "s",
1637 arena->stats.nmadvise, arena->stats.nmadvise == 1 ? "" : "s",
1638 arena->stats.purged, arena->stats.purged == 1 ? "" : "s");
1640 malloc_printf(" allocated nmalloc ndalloc\n");
1641 malloc_printf("small: %12zu %12llu %12llu\n",
1642 arena->stats.allocated_small, arena->stats.nmalloc_small,
1643 arena->stats.ndalloc_small);
1644 malloc_printf("large: %12zu %12llu %12llu\n",
1645 arena->stats.allocated_large, arena->stats.nmalloc_large,
1646 arena->stats.ndalloc_large);
1647 malloc_printf("total: %12zu %12llu %12llu\n",
1648 arena->stats.allocated_small + arena->stats.allocated_large,
1649 arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1650 arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1651 malloc_printf("mapped: %12zu\n", arena->stats.mapped);
1654 if (__isthreaded && opt_mag) {
1655 malloc_printf("bins: bin size regs pgs mags "
1656 "newruns reruns maxruns curruns\n");
1659 malloc_printf("bins: bin size regs pgs requests "
1660 "newruns reruns maxruns curruns\n");
1664 for (i = 0, gap_start = UINT_MAX; i < nbins; i++) {
1665 if (arena->bins[i].stats.nruns == 0) {
1666 if (gap_start == UINT_MAX)
1669 if (gap_start != UINT_MAX) {
1670 if (i > gap_start + 1) {
1671 /* Gap of more than one size class. */
1672 malloc_printf("[%u..%u]\n",
1675 /* Gap of one size class. */
1676 malloc_printf("[%u]\n", gap_start);
1678 gap_start = UINT_MAX;
1681 "%13u %1s %4u %4u %3u %9llu %9llu"
1682 " %9llu %7lu %7lu\n",
1684 i < ntbins ? "T" : i < ntbins + nqbins ? "Q" :
1685 i < ntbins + nqbins + ncbins ? "C" : "S",
1686 arena->bins[i].reg_size,
1687 arena->bins[i].nregs,
1688 arena->bins[i].run_size >> PAGE_SHIFT,
1690 (__isthreaded && opt_mag) ?
1691 arena->bins[i].stats.nmags :
1693 arena->bins[i].stats.nrequests,
1694 arena->bins[i].stats.nruns,
1695 arena->bins[i].stats.reruns,
1696 arena->bins[i].stats.highruns,
1697 arena->bins[i].stats.curruns);
1700 if (gap_start != UINT_MAX) {
1701 if (i > gap_start + 1) {
1702 /* Gap of more than one size class. */
1703 malloc_printf("[%u..%u]\n", gap_start, i - 1);
1705 /* Gap of one size class. */
1706 malloc_printf("[%u]\n", gap_start);
1713 * End Utility functions/macros.
1715 /******************************************************************************/
1717 * Begin extent tree code.
1722 extent_szad_comp(extent_node_t *a, extent_node_t *b)
1725 size_t a_size = a->size;
1726 size_t b_size = b->size;
1728 ret = (a_size > b_size) - (a_size < b_size);
1730 uintptr_t a_addr = (uintptr_t)a->addr;
1731 uintptr_t b_addr = (uintptr_t)b->addr;
1733 ret = (a_addr > b_addr) - (a_addr < b_addr);
1739 /* Wrap red-black tree macros in functions. */
1740 rb_wrap(__unused static, extent_tree_szad_, extent_tree_t, extent_node_t,
1741 link_szad, extent_szad_comp)
1745 extent_ad_comp(extent_node_t *a, extent_node_t *b)
1747 uintptr_t a_addr = (uintptr_t)a->addr;
1748 uintptr_t b_addr = (uintptr_t)b->addr;
1750 return ((a_addr > b_addr) - (a_addr < b_addr));
1753 /* Wrap red-black tree macros in functions. */
1754 rb_wrap(__unused static, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad,
1758 * End extent tree code.
1760 /******************************************************************************/
1762 * Begin chunk management functions.
1766 pages_map(void *addr, size_t size)
1771 * We don't use MAP_FIXED here, because it can cause the *replacement*
1772 * of existing mappings, and we only want to create new mappings.
1774 ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
1776 assert(ret != NULL);
1778 if (ret == MAP_FAILED)
1780 else if (addr != NULL && ret != addr) {
1782 * We succeeded in mapping memory, but not in the right place.
1784 if (munmap(ret, size) == -1) {
1785 char buf[STRERROR_BUF];
1787 strerror_r(errno, buf, sizeof(buf));
1788 _malloc_message(_getprogname(),
1789 ": (malloc) Error in munmap(): ", buf, "\n");
1796 assert(ret == NULL || (addr == NULL && ret != addr)
1797 || (addr != NULL && ret == addr));
1802 pages_unmap(void *addr, size_t size)
1805 if (munmap(addr, size) == -1) {
1806 char buf[STRERROR_BUF];
1808 strerror_r(errno, buf, sizeof(buf));
1809 _malloc_message(_getprogname(),
1810 ": (malloc) Error in munmap(): ", buf, "\n");
1818 chunk_alloc_dss(size_t size)
1822 * sbrk() uses a signed increment argument, so take care not to
1823 * interpret a huge allocation request as a negative increment.
1825 if ((intptr_t)size < 0)
1828 malloc_mutex_lock(&dss_mtx);
1829 if (dss_prev != (void *)-1) {
1833 * The loop is necessary to recover from races with other
1834 * threads that are using the DSS for something other than
1840 /* Get the current end of the DSS. */
1844 * Calculate how much padding is necessary to
1845 * chunk-align the end of the DSS.
1847 incr = (intptr_t)size
1848 - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
1849 if (incr == (intptr_t)size)
1852 ret = (void *)((intptr_t)dss_max + incr);
1856 dss_prev = sbrk(incr);
1857 if (dss_prev == dss_max) {
1859 dss_max = (void *)((intptr_t)dss_prev + incr);
1860 malloc_mutex_unlock(&dss_mtx);
1863 } while (dss_prev != (void *)-1);
1865 malloc_mutex_unlock(&dss_mtx);
1871 chunk_recycle_dss(size_t size, bool zero)
1873 extent_node_t *node, key;
1877 malloc_mutex_lock(&dss_mtx);
1878 node = extent_tree_szad_nsearch(&dss_chunks_szad, &key);
1880 void *ret = node->addr;
1882 /* Remove node from the tree. */
1883 extent_tree_szad_remove(&dss_chunks_szad, node);
1884 if (node->size == size) {
1885 extent_tree_ad_remove(&dss_chunks_ad, node);
1886 base_node_dealloc(node);
1889 * Insert the remainder of node's address range as a
1890 * smaller chunk. Its position within dss_chunks_ad
1893 assert(node->size > size);
1894 node->addr = (void *)((uintptr_t)node->addr + size);
1896 extent_tree_szad_insert(&dss_chunks_szad, node);
1898 malloc_mutex_unlock(&dss_mtx);
1901 memset(ret, 0, size);
1904 malloc_mutex_unlock(&dss_mtx);
1911 chunk_alloc_mmap(size_t size)
1917 * Ideally, there would be a way to specify alignment to mmap() (like
1918 * NetBSD has), but in the absence of such a feature, we have to work
1919 * hard to efficiently create aligned mappings. The reliable, but
1920 * expensive method is to create a mapping that is over-sized, then
1921 * trim the excess. However, that always results in at least one call
1924 * A more optimistic approach is to try mapping precisely the right
1925 * amount, then try to append another mapping if alignment is off. In
1926 * practice, this works out well as long as the application is not
1927 * interleaving mappings via direct mmap() calls. If we do run into a
1928 * situation where there is an interleaved mapping and we are unable to
1929 * extend an unaligned mapping, our best option is to momentarily
1930 * revert to the reliable-but-expensive method. This will tend to
1931 * leave a gap in the memory map that is too small to cause later
1932 * problems for the optimistic method.
1935 ret = pages_map(NULL, size);
1939 offset = CHUNK_ADDR2OFFSET(ret);
1941 /* Try to extend chunk boundary. */
1942 if (pages_map((void *)((uintptr_t)ret + size),
1943 chunksize - offset) == NULL) {
1945 * Extension failed. Clean up, then revert to the
1946 * reliable-but-expensive method.
1948 pages_unmap(ret, size);
1950 /* Beware size_t wrap-around. */
1951 if (size + chunksize <= size)
1954 ret = pages_map(NULL, size + chunksize);
1958 /* Clean up unneeded leading/trailing space. */
1959 offset = CHUNK_ADDR2OFFSET(ret);
1961 /* Leading space. */
1962 pages_unmap(ret, chunksize - offset);
1964 ret = (void *)((uintptr_t)ret +
1965 (chunksize - offset));
1967 /* Trailing space. */
1968 pages_unmap((void *)((uintptr_t)ret + size),
1971 /* Trailing space only. */
1972 pages_unmap((void *)((uintptr_t)ret + size),
1976 /* Clean up unneeded leading space. */
1977 pages_unmap(ret, chunksize - offset);
1978 ret = (void *)((uintptr_t)ret + (chunksize - offset));
1986 chunk_alloc(size_t size, bool zero)
1991 assert((size & chunksize_mask) == 0);
1997 ret = chunk_alloc_mmap(size);
2004 ret = chunk_recycle_dss(size, zero);
2009 ret = chunk_alloc_dss(size);
2015 /* All strategies for allocation failed. */
2020 stats_chunks.nchunks += (size / chunksize);
2021 stats_chunks.curchunks += (size / chunksize);
2023 if (stats_chunks.curchunks > stats_chunks.highchunks)
2024 stats_chunks.highchunks = stats_chunks.curchunks;
2027 assert(CHUNK_ADDR2BASE(ret) == ret);
2032 static extent_node_t *
2033 chunk_dealloc_dss_record(void *chunk, size_t size)
2035 extent_node_t *node, *prev, key;
2037 key.addr = (void *)((uintptr_t)chunk + size);
2038 node = extent_tree_ad_nsearch(&dss_chunks_ad, &key);
2039 /* Try to coalesce forward. */
2040 if (node != NULL && node->addr == key.addr) {
2042 * Coalesce chunk with the following address range. This does
2043 * not change the position within dss_chunks_ad, so only
2044 * remove/insert from/into dss_chunks_szad.
2046 extent_tree_szad_remove(&dss_chunks_szad, node);
2049 extent_tree_szad_insert(&dss_chunks_szad, node);
2052 * Coalescing forward failed, so insert a new node. Drop
2053 * dss_mtx during node allocation, since it is possible that a
2054 * new base chunk will be allocated.
2056 malloc_mutex_unlock(&dss_mtx);
2057 node = base_node_alloc();
2058 malloc_mutex_lock(&dss_mtx);
2063 extent_tree_ad_insert(&dss_chunks_ad, node);
2064 extent_tree_szad_insert(&dss_chunks_szad, node);
2067 /* Try to coalesce backward. */
2068 prev = extent_tree_ad_prev(&dss_chunks_ad, node);
2069 if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
2072 * Coalesce chunk with the previous address range. This does
2073 * not change the position within dss_chunks_ad, so only
2074 * remove/insert node from/into dss_chunks_szad.
2076 extent_tree_szad_remove(&dss_chunks_szad, prev);
2077 extent_tree_ad_remove(&dss_chunks_ad, prev);
2079 extent_tree_szad_remove(&dss_chunks_szad, node);
2080 node->addr = prev->addr;
2081 node->size += prev->size;
2082 extent_tree_szad_insert(&dss_chunks_szad, node);
2084 base_node_dealloc(prev);
2091 chunk_dealloc_dss(void *chunk, size_t size)
2094 malloc_mutex_lock(&dss_mtx);
2095 if ((uintptr_t)chunk >= (uintptr_t)dss_base
2096 && (uintptr_t)chunk < (uintptr_t)dss_max) {
2097 extent_node_t *node;
2099 /* Try to coalesce with other unused chunks. */
2100 node = chunk_dealloc_dss_record(chunk, size);
2106 /* Get the current end of the DSS. */
2110 * Try to shrink the DSS if this chunk is at the end of the
2111 * DSS. The sbrk() call here is subject to a race condition
2112 * with threads that use brk(2) or sbrk(2) directly, but the
2113 * alternative would be to leak memory for the sake of poorly
2114 * designed multi-threaded programs.
2116 if ((void *)((uintptr_t)chunk + size) == dss_max
2117 && (dss_prev = sbrk(-(intptr_t)size)) == dss_max) {
2119 dss_max = (void *)((intptr_t)dss_prev - (intptr_t)size);
2122 extent_tree_szad_remove(&dss_chunks_szad, node);
2123 extent_tree_ad_remove(&dss_chunks_ad, node);
2124 base_node_dealloc(node);
2126 malloc_mutex_unlock(&dss_mtx);
2128 malloc_mutex_unlock(&dss_mtx);
2129 madvise(chunk, size, MADV_FREE);
2134 malloc_mutex_unlock(&dss_mtx);
2141 chunk_dealloc_mmap(void *chunk, size_t size)
2144 pages_unmap(chunk, size);
2148 chunk_dealloc(void *chunk, size_t size)
2151 assert(chunk != NULL);
2152 assert(CHUNK_ADDR2BASE(chunk) == chunk);
2154 assert((size & chunksize_mask) == 0);
2157 stats_chunks.curchunks -= (size / chunksize);
2162 if (chunk_dealloc_dss(chunk, size) == false)
2168 chunk_dealloc_mmap(chunk, size);
2172 * End chunk management functions.
2174 /******************************************************************************/
2180 * Choose an arena based on a per-thread value (fast-path code, calls slow-path
2181 * code if necessary).
2183 static inline arena_t *
2189 * We can only use TLS if this is a PIC library, since for the static
2190 * library version, libc's malloc is used by TLS allocation, which
2191 * introduces a bootstrapping issue.
2194 if (__isthreaded == false) {
2195 /* Avoid the overhead of TLS for single-threaded operation. */
2201 ret = choose_arena_hard();
2202 assert(ret != NULL);
2205 if (__isthreaded && narenas > 1) {
2209 * Hash _pthread_self() to one of the arenas. There is a prime
2210 * number of arenas, so this has a reasonable chance of
2211 * working. Even so, the hashing can be easily thwarted by
2212 * inconvenient _pthread_self() values. Without specific
2213 * knowledge of how _pthread_self() calculates values, we can't
2214 * easily do much better than this.
2216 ind = (unsigned long) _pthread_self() % narenas;
2219 * Optimistially assume that arenas[ind] has been initialized.
2220 * At worst, we find out that some other thread has already
2221 * done so, after acquiring the lock in preparation. Note that
2222 * this lazy locking also has the effect of lazily forcing
2223 * cache coherency; without the lock acquisition, there's no
2224 * guarantee that modification of arenas[ind] by another thread
2225 * would be seen on this CPU for an arbitrary amount of time.
2227 * In general, this approach to modifying a synchronized value
2228 * isn't a good idea, but in this case we only ever modify the
2229 * value once, so things work out well.
2234 * Avoid races with another thread that may have already
2235 * initialized arenas[ind].
2237 malloc_spin_lock(&arenas_lock);
2238 if (arenas[ind] == NULL)
2239 ret = arenas_extend((unsigned)ind);
2242 malloc_spin_unlock(&arenas_lock);
2248 assert(ret != NULL);
2254 * Choose an arena based on a per-thread value (slow-path code only, called
2255 * only by choose_arena()).
2258 choose_arena_hard(void)
2262 assert(__isthreaded);
2264 #ifdef MALLOC_BALANCE
2265 /* Seed the PRNG used for arena load balancing. */
2266 SPRN(balance, (uint32_t)(uintptr_t)(_pthread_self()));
2270 #ifdef MALLOC_BALANCE
2273 ind = PRN(balance, narenas_2pow);
2274 if ((ret = arenas[ind]) == NULL) {
2275 malloc_spin_lock(&arenas_lock);
2276 if ((ret = arenas[ind]) == NULL)
2277 ret = arenas_extend(ind);
2278 malloc_spin_unlock(&arenas_lock);
2281 malloc_spin_lock(&arenas_lock);
2282 if ((ret = arenas[next_arena]) == NULL)
2283 ret = arenas_extend(next_arena);
2284 next_arena = (next_arena + 1) % narenas;
2285 malloc_spin_unlock(&arenas_lock);
2297 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
2299 uintptr_t a_chunk = (uintptr_t)a;
2300 uintptr_t b_chunk = (uintptr_t)b;
2305 return ((a_chunk > b_chunk) - (a_chunk < b_chunk));
2308 /* Wrap red-black tree macros in functions. */
2309 rb_wrap(__unused static, arena_chunk_tree_dirty_, arena_chunk_tree_t,
2310 arena_chunk_t, link_dirty, arena_chunk_comp)
2313 arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
2315 uintptr_t a_mapelm = (uintptr_t)a;
2316 uintptr_t b_mapelm = (uintptr_t)b;
2321 return ((a_mapelm > b_mapelm) - (a_mapelm < b_mapelm));
2324 /* Wrap red-black tree macros in functions. */
2325 rb_wrap(__unused static, arena_run_tree_, arena_run_tree_t, arena_chunk_map_t,
2326 link, arena_run_comp)
2329 arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
2332 size_t a_size = a->bits & ~PAGE_MASK;
2333 size_t b_size = b->bits & ~PAGE_MASK;
2335 ret = (a_size > b_size) - (a_size < b_size);
2337 uintptr_t a_mapelm, b_mapelm;
2339 if ((a->bits & CHUNK_MAP_KEY) == 0)
2340 a_mapelm = (uintptr_t)a;
2343 * Treat keys as though they are lower than anything
2348 b_mapelm = (uintptr_t)b;
2350 ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm);
2356 /* Wrap red-black tree macros in functions. */
2357 rb_wrap(__unused static, arena_avail_tree_, arena_avail_tree_t,
2358 arena_chunk_map_t, link, arena_avail_comp)
2360 static inline void *
2361 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
2364 unsigned i, mask, bit, regind;
2366 assert(run->magic == ARENA_RUN_MAGIC);
2367 assert(run->regs_minelm < bin->regs_mask_nelms);
2370 * Move the first check outside the loop, so that run->regs_minelm can
2371 * be updated unconditionally, without the possibility of updating it
2374 i = run->regs_minelm;
2375 mask = run->regs_mask[i];
2377 /* Usable allocation found. */
2378 bit = ffs((int)mask) - 1;
2380 regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
2381 assert(regind < bin->nregs);
2382 ret = (void *)(((uintptr_t)run) + bin->reg0_offset
2383 + (bin->reg_size * regind));
2386 mask ^= (1U << bit);
2387 run->regs_mask[i] = mask;
2392 for (i++; i < bin->regs_mask_nelms; i++) {
2393 mask = run->regs_mask[i];
2395 /* Usable allocation found. */
2396 bit = ffs((int)mask) - 1;
2398 regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
2399 assert(regind < bin->nregs);
2400 ret = (void *)(((uintptr_t)run) + bin->reg0_offset
2401 + (bin->reg_size * regind));
2404 mask ^= (1U << bit);
2405 run->regs_mask[i] = mask;
2408 * Make a note that nothing before this element
2409 * contains a free region.
2411 run->regs_minelm = i; /* Low payoff: + (mask == 0); */
2422 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
2424 unsigned diff, regind, elm, bit;
2426 assert(run->magic == ARENA_RUN_MAGIC);
2429 * Avoid doing division with a variable divisor if possible. Using
2430 * actual division here can reduce allocator throughput by over 20%!
2432 diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
2433 if ((size & (size - 1)) == 0) {
2435 * log2_table allows fast division of a power of two in the
2438 * (x / divisor) becomes (x >> log2_table[divisor - 1]).
2440 static const unsigned char log2_table[] = {
2441 0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
2442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
2443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
2445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2447 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2448 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
2452 regind = (diff >> log2_table[size - 1]);
2453 else if (size <= 32768)
2454 regind = diff >> (8 + log2_table[(size >> 8) - 1]);
2456 regind = diff / size;
2457 } else if (size < qspace_max) {
2459 * To divide by a number D that is not a power of two we
2460 * multiply by (2^21 / D) and then right shift by 21 positions.
2466 * (X * qsize_invs[(D >> QUANTUM_2POW) - 3])
2469 * We can omit the first three elements, because we never
2470 * divide by 0, and QUANTUM and 2*QUANTUM are both powers of
2471 * two, which are handled above.
2473 #define SIZE_INV_SHIFT 21
2474 #define QSIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << QUANTUM_2POW)) + 1)
2475 static const unsigned qsize_invs[] = {
2477 QSIZE_INV(4), QSIZE_INV(5), QSIZE_INV(6), QSIZE_INV(7)
2478 #if (QUANTUM_2POW < 4)
2480 QSIZE_INV(8), QSIZE_INV(9), QSIZE_INV(10), QSIZE_INV(11),
2481 QSIZE_INV(12),QSIZE_INV(13), QSIZE_INV(14), QSIZE_INV(15)
2484 assert(QUANTUM * (((sizeof(qsize_invs)) / sizeof(unsigned)) + 3)
2485 >= (1U << QSPACE_MAX_2POW_DEFAULT));
2487 if (size <= (((sizeof(qsize_invs) / sizeof(unsigned)) + 2) <<
2489 regind = qsize_invs[(size >> QUANTUM_2POW) - 3] * diff;
2490 regind >>= SIZE_INV_SHIFT;
2492 regind = diff / size;
2494 } else if (size < cspace_max) {
2495 #define CSIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << CACHELINE_2POW)) + 1)
2496 static const unsigned csize_invs[] = {
2498 CSIZE_INV(4), CSIZE_INV(5), CSIZE_INV(6), CSIZE_INV(7)
2500 assert(CACHELINE * (((sizeof(csize_invs)) / sizeof(unsigned)) +
2501 3) >= (1U << CSPACE_MAX_2POW_DEFAULT));
2503 if (size <= (((sizeof(csize_invs) / sizeof(unsigned)) + 2) <<
2505 regind = csize_invs[(size >> CACHELINE_2POW) - 3] *
2507 regind >>= SIZE_INV_SHIFT;
2509 regind = diff / size;
2512 #define SSIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << SUBPAGE_2POW)) + 1)
2513 static const unsigned ssize_invs[] = {
2515 SSIZE_INV(4), SSIZE_INV(5), SSIZE_INV(6), SSIZE_INV(7),
2516 SSIZE_INV(8), SSIZE_INV(9), SSIZE_INV(10), SSIZE_INV(11),
2517 SSIZE_INV(12), SSIZE_INV(13), SSIZE_INV(14), SSIZE_INV(15)
2518 #if (PAGE_SHIFT == 13)
2520 SSIZE_INV(16), SSIZE_INV(17), SSIZE_INV(18), SSIZE_INV(19),
2521 SSIZE_INV(20), SSIZE_INV(21), SSIZE_INV(22), SSIZE_INV(23),
2522 SSIZE_INV(24), SSIZE_INV(25), SSIZE_INV(26), SSIZE_INV(27),
2523 SSIZE_INV(28), SSIZE_INV(29), SSIZE_INV(29), SSIZE_INV(30)
2526 assert(SUBPAGE * (((sizeof(ssize_invs)) / sizeof(unsigned)) + 3)
2529 if (size < (((sizeof(ssize_invs) / sizeof(unsigned)) + 2) <<
2531 regind = ssize_invs[(size >> SUBPAGE_2POW) - 3] * diff;
2532 regind >>= SIZE_INV_SHIFT;
2534 regind = diff / size;
2537 #undef SIZE_INV_SHIFT
2538 assert(diff == regind * size);
2539 assert(regind < bin->nregs);
2541 elm = regind >> (SIZEOF_INT_2POW + 3);
2542 if (elm < run->regs_minelm)
2543 run->regs_minelm = elm;
2544 bit = regind - (elm << (SIZEOF_INT_2POW + 3));
2545 assert((run->regs_mask[elm] & (1U << bit)) == 0);
2546 run->regs_mask[elm] |= (1U << bit);
2550 arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
2553 arena_chunk_t *chunk;
2554 size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i;
2556 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2557 old_ndirty = chunk->ndirty;
2558 run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
2560 total_pages = (chunk->map[run_ind].bits & ~PAGE_MASK) >>
2562 need_pages = (size >> PAGE_SHIFT);
2563 assert(need_pages > 0);
2564 assert(need_pages <= total_pages);
2565 rem_pages = total_pages - need_pages;
2567 arena_avail_tree_remove(&arena->runs_avail, &chunk->map[run_ind]);
2569 /* Keep track of trailing unused pages for later use. */
2570 if (rem_pages > 0) {
2571 chunk->map[run_ind+need_pages].bits = (rem_pages <<
2572 PAGE_SHIFT) | (chunk->map[run_ind+need_pages].bits &
2574 chunk->map[run_ind+total_pages-1].bits = (rem_pages <<
2575 PAGE_SHIFT) | (chunk->map[run_ind+total_pages-1].bits &
2577 arena_avail_tree_insert(&arena->runs_avail,
2578 &chunk->map[run_ind+need_pages]);
2581 for (i = 0; i < need_pages; i++) {
2582 /* Zero if necessary. */
2584 if ((chunk->map[run_ind + i].bits & CHUNK_MAP_ZEROED)
2586 memset((void *)((uintptr_t)chunk + ((run_ind
2587 + i) << PAGE_SHIFT)), 0, PAGE_SIZE);
2588 /* CHUNK_MAP_ZEROED is cleared below. */
2592 /* Update dirty page accounting. */
2593 if (chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY) {
2596 /* CHUNK_MAP_DIRTY is cleared below. */
2599 /* Initialize the chunk map. */
2601 chunk->map[run_ind + i].bits = CHUNK_MAP_LARGE
2602 | CHUNK_MAP_ALLOCATED;
2604 chunk->map[run_ind + i].bits = (size_t)run
2605 | CHUNK_MAP_ALLOCATED;
2610 * Set the run size only in the first element for large runs. This is
2611 * primarily a debugging aid, since the lack of size info for trailing
2612 * pages only matters if the application tries to operate on an
2616 chunk->map[run_ind].bits |= size;
2618 if (chunk->ndirty == 0 && old_ndirty > 0)
2619 arena_chunk_tree_dirty_remove(&arena->chunks_dirty, chunk);
2622 static arena_chunk_t *
2623 arena_chunk_alloc(arena_t *arena)
2625 arena_chunk_t *chunk;
2628 if (arena->spare != NULL) {
2629 chunk = arena->spare;
2630 arena->spare = NULL;
2632 chunk = (arena_chunk_t *)chunk_alloc(chunksize, true);
2636 arena->stats.mapped += chunksize;
2639 chunk->arena = arena;
2642 * Claim that no pages are in use, since the header is merely
2648 * Initialize the map to contain one maximal free untouched run.
2650 for (i = 0; i < arena_chunk_header_npages; i++)
2651 chunk->map[i].bits = 0;
2652 chunk->map[i].bits = arena_maxclass | CHUNK_MAP_ZEROED;
2653 for (i++; i < chunk_npages-1; i++) {
2654 chunk->map[i].bits = CHUNK_MAP_ZEROED;
2656 chunk->map[chunk_npages-1].bits = arena_maxclass |
2660 /* Insert the run into the runs_avail tree. */
2661 arena_avail_tree_insert(&arena->runs_avail,
2662 &chunk->map[arena_chunk_header_npages]);
2668 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
2671 if (arena->spare != NULL) {
2672 if (arena->spare->ndirty > 0) {
2673 arena_chunk_tree_dirty_remove(
2674 &chunk->arena->chunks_dirty, arena->spare);
2675 arena->ndirty -= arena->spare->ndirty;
2677 chunk_dealloc((void *)arena->spare, chunksize);
2679 arena->stats.mapped -= chunksize;
2684 * Remove run from runs_avail, regardless of whether this chunk
2685 * will be cached, so that the arena does not use it. Dirty page
2686 * flushing only uses the chunks_dirty tree, so leaving this chunk in
2687 * the chunks_* trees is sufficient for that purpose.
2689 arena_avail_tree_remove(&arena->runs_avail,
2690 &chunk->map[arena_chunk_header_npages]);
2692 arena->spare = chunk;
2695 static arena_run_t *
2696 arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
2698 arena_chunk_t *chunk;
2700 arena_chunk_map_t *mapelm, key;
2702 assert(size <= arena_maxclass);
2703 assert((size & PAGE_MASK) == 0);
2705 /* Search the arena's chunks for the lowest best fit. */
2706 key.bits = size | CHUNK_MAP_KEY;
2707 mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key);
2708 if (mapelm != NULL) {
2709 arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
2710 size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map)
2711 / sizeof(arena_chunk_map_t);
2713 run = (arena_run_t *)((uintptr_t)run_chunk + (pageind
2715 arena_run_split(arena, run, size, large, zero);
2720 * No usable runs. Create a new chunk from which to allocate the run.
2722 chunk = arena_chunk_alloc(arena);
2725 run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
2727 /* Update page map. */
2728 arena_run_split(arena, run, size, large, zero);
2733 arena_purge(arena_t *arena)
2735 arena_chunk_t *chunk;
2740 rb_foreach_begin(arena_chunk_t, link_dirty, &arena->chunks_dirty,
2742 ndirty += chunk->ndirty;
2743 } rb_foreach_end(arena_chunk_t, link_dirty, &arena->chunks_dirty, chunk)
2744 assert(ndirty == arena->ndirty);
2746 assert(arena->ndirty > opt_dirty_max);
2749 arena->stats.npurge++;
2753 * Iterate downward through chunks until enough dirty memory has been
2754 * purged. Terminate as soon as possible in order to minimize the
2755 * number of system calls, even if a chunk has only been partially
2758 while (arena->ndirty > (opt_dirty_max >> 1)) {
2759 chunk = arena_chunk_tree_dirty_last(&arena->chunks_dirty);
2760 assert(chunk != NULL);
2762 for (i = chunk_npages - 1; chunk->ndirty > 0; i--) {
2763 assert(i >= arena_chunk_header_npages);
2765 if (chunk->map[i].bits & CHUNK_MAP_DIRTY) {
2766 chunk->map[i].bits ^= CHUNK_MAP_DIRTY;
2767 /* Find adjacent dirty run(s). */
2768 for (npages = 1; i > arena_chunk_header_npages
2769 && (chunk->map[i - 1].bits &
2770 CHUNK_MAP_DIRTY); npages++) {
2772 chunk->map[i].bits ^= CHUNK_MAP_DIRTY;
2774 chunk->ndirty -= npages;
2775 arena->ndirty -= npages;
2777 madvise((void *)((uintptr_t)chunk + (i <<
2778 PAGE_SHIFT)), (npages << PAGE_SHIFT),
2781 arena->stats.nmadvise++;
2782 arena->stats.purged += npages;
2784 if (arena->ndirty <= (opt_dirty_max >> 1))
2789 if (chunk->ndirty == 0) {
2790 arena_chunk_tree_dirty_remove(&arena->chunks_dirty,
2797 arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
2799 arena_chunk_t *chunk;
2800 size_t size, run_ind, run_pages;
2802 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2803 run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk)
2805 assert(run_ind >= arena_chunk_header_npages);
2806 assert(run_ind < chunk_npages);
2807 if ((chunk->map[run_ind].bits & CHUNK_MAP_LARGE) != 0)
2808 size = chunk->map[run_ind].bits & ~PAGE_MASK;
2810 size = run->bin->run_size;
2811 run_pages = (size >> PAGE_SHIFT);
2813 /* Mark pages as unallocated in the chunk map. */
2817 for (i = 0; i < run_pages; i++) {
2818 assert((chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY)
2820 chunk->map[run_ind + i].bits = CHUNK_MAP_DIRTY;
2823 if (chunk->ndirty == 0) {
2824 arena_chunk_tree_dirty_insert(&arena->chunks_dirty,
2827 chunk->ndirty += run_pages;
2828 arena->ndirty += run_pages;
2832 for (i = 0; i < run_pages; i++) {
2833 chunk->map[run_ind + i].bits &= ~(CHUNK_MAP_LARGE |
2834 CHUNK_MAP_ALLOCATED);
2837 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2839 chunk->map[run_ind+run_pages-1].bits = size |
2840 (chunk->map[run_ind+run_pages-1].bits & PAGE_MASK);
2842 /* Try to coalesce forward. */
2843 if (run_ind + run_pages < chunk_npages &&
2844 (chunk->map[run_ind+run_pages].bits & CHUNK_MAP_ALLOCATED) == 0) {
2845 size_t nrun_size = chunk->map[run_ind+run_pages].bits &
2849 * Remove successor from runs_avail; the coalesced run is
2852 arena_avail_tree_remove(&arena->runs_avail,
2853 &chunk->map[run_ind+run_pages]);
2856 run_pages = size >> PAGE_SHIFT;
2858 assert((chunk->map[run_ind+run_pages-1].bits & ~PAGE_MASK)
2860 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2862 chunk->map[run_ind+run_pages-1].bits = size |
2863 (chunk->map[run_ind+run_pages-1].bits & PAGE_MASK);
2866 /* Try to coalesce backward. */
2867 if (run_ind > arena_chunk_header_npages && (chunk->map[run_ind-1].bits &
2868 CHUNK_MAP_ALLOCATED) == 0) {
2869 size_t prun_size = chunk->map[run_ind-1].bits & ~PAGE_MASK;
2871 run_ind -= prun_size >> PAGE_SHIFT;
2874 * Remove predecessor from runs_avail; the coalesced run is
2877 arena_avail_tree_remove(&arena->runs_avail,
2878 &chunk->map[run_ind]);
2881 run_pages = size >> PAGE_SHIFT;
2883 assert((chunk->map[run_ind].bits & ~PAGE_MASK) ==
2885 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2887 chunk->map[run_ind+run_pages-1].bits = size |
2888 (chunk->map[run_ind+run_pages-1].bits & PAGE_MASK);
2891 /* Insert into runs_avail, now that coalescing is complete. */
2892 arena_avail_tree_insert(&arena->runs_avail, &chunk->map[run_ind]);
2894 /* Deallocate chunk if it is now completely unused. */
2895 if ((chunk->map[arena_chunk_header_npages].bits & (~PAGE_MASK |
2896 CHUNK_MAP_ALLOCATED)) == arena_maxclass)
2897 arena_chunk_dealloc(arena, chunk);
2899 /* Enforce opt_dirty_max. */
2900 if (arena->ndirty > opt_dirty_max)
2905 arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
2906 size_t oldsize, size_t newsize)
2908 size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
2909 size_t head_npages = (oldsize - newsize) >> PAGE_SHIFT;
2911 assert(oldsize > newsize);
2914 * Update the chunk map so that arena_run_dalloc() can treat the
2915 * leading run as separately allocated.
2917 chunk->map[pageind].bits = (oldsize - newsize) | CHUNK_MAP_LARGE |
2918 CHUNK_MAP_ALLOCATED;
2919 chunk->map[pageind+head_npages].bits = newsize | CHUNK_MAP_LARGE |
2920 CHUNK_MAP_ALLOCATED;
2922 arena_run_dalloc(arena, run, false);
2926 arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
2927 size_t oldsize, size_t newsize, bool dirty)
2929 size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
2930 size_t npages = newsize >> PAGE_SHIFT;
2932 assert(oldsize > newsize);
2935 * Update the chunk map so that arena_run_dalloc() can treat the
2936 * trailing run as separately allocated.
2938 chunk->map[pageind].bits = newsize | CHUNK_MAP_LARGE |
2939 CHUNK_MAP_ALLOCATED;
2940 chunk->map[pageind+npages].bits = (oldsize - newsize) | CHUNK_MAP_LARGE
2941 | CHUNK_MAP_ALLOCATED;
2943 arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize),
2947 static arena_run_t *
2948 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2950 arena_chunk_map_t *mapelm;
2952 unsigned i, remainder;
2954 /* Look for a usable run. */
2955 mapelm = arena_run_tree_first(&bin->runs);
2956 if (mapelm != NULL) {
2957 /* run is guaranteed to have available space. */
2958 arena_run_tree_remove(&bin->runs, mapelm);
2959 run = (arena_run_t *)(mapelm->bits & ~PAGE_MASK);
2961 bin->stats.reruns++;
2965 /* No existing runs have any space available. */
2967 /* Allocate a new run. */
2968 run = arena_run_alloc(arena, bin->run_size, false, false);
2972 /* Initialize run internals. */
2975 for (i = 0; i < bin->regs_mask_nelms - 1; i++)
2976 run->regs_mask[i] = UINT_MAX;
2977 remainder = bin->nregs & ((1U << (SIZEOF_INT_2POW + 3)) - 1);
2979 run->regs_mask[i] = UINT_MAX;
2981 /* The last element has spare bits that need to be unset. */
2982 run->regs_mask[i] = (UINT_MAX >> ((1U << (SIZEOF_INT_2POW + 3))
2986 run->regs_minelm = 0;
2988 run->nfree = bin->nregs;
2990 run->magic = ARENA_RUN_MAGIC;
2995 bin->stats.curruns++;
2996 if (bin->stats.curruns > bin->stats.highruns)
2997 bin->stats.highruns = bin->stats.curruns;
3002 /* bin->runcur must have space available before this function is called. */
3003 static inline void *
3004 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
3008 assert(run->magic == ARENA_RUN_MAGIC);
3009 assert(run->nfree > 0);
3011 ret = arena_run_reg_alloc(run, bin);
3012 assert(ret != NULL);
3018 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
3020 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
3023 bin->runcur = arena_bin_nonfull_run_get(arena, bin);
3024 if (bin->runcur == NULL)
3026 assert(bin->runcur->magic == ARENA_RUN_MAGIC);
3027 assert(bin->runcur->nfree > 0);
3029 return (arena_bin_malloc_easy(arena, bin, bin->runcur));
3033 * Calculate bin->run_size such that it meets the following constraints:
3035 * *) bin->run_size >= min_run_size
3036 * *) bin->run_size <= arena_maxclass
3037 * *) bin->run_size <= RUN_MAX_SMALL
3038 * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
3040 * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
3041 * also calculated here, since these settings are all interdependent.
3044 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
3046 size_t try_run_size, good_run_size;
3047 unsigned good_nregs, good_mask_nelms, good_reg0_offset;
3048 unsigned try_nregs, try_mask_nelms, try_reg0_offset;
3050 assert(min_run_size >= PAGE_SIZE);
3051 assert(min_run_size <= arena_maxclass);
3052 assert(min_run_size <= RUN_MAX_SMALL);
3055 * Calculate known-valid settings before entering the run_size
3056 * expansion loop, so that the first part of the loop always copies
3059 * The do..while loop iteratively reduces the number of regions until
3060 * the run header and the regions no longer overlap. A closed formula
3061 * would be quite messy, since there is an interdependency between the
3062 * header's mask length and the number of regions.
3064 try_run_size = min_run_size;
3065 try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
3066 + 1; /* Counter-act try_nregs-- in loop. */
3069 try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
3070 ((try_nregs & ((1U << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
3071 try_reg0_offset = try_run_size - (try_nregs * bin->reg_size);
3072 } while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
3075 /* run_size expansion loop. */
3078 * Copy valid settings before trying more aggressive settings.
3080 good_run_size = try_run_size;
3081 good_nregs = try_nregs;
3082 good_mask_nelms = try_mask_nelms;
3083 good_reg0_offset = try_reg0_offset;
3085 /* Try more aggressive settings. */
3086 try_run_size += PAGE_SIZE;
3087 try_nregs = ((try_run_size - sizeof(arena_run_t)) /
3088 bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
3091 try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
3092 ((try_nregs & ((1U << (SIZEOF_INT_2POW + 3)) - 1)) ?
3094 try_reg0_offset = try_run_size - (try_nregs *
3096 } while (sizeof(arena_run_t) + (sizeof(unsigned) *
3097 (try_mask_nelms - 1)) > try_reg0_offset);
3098 } while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
3099 && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
3100 && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
3102 assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
3103 <= good_reg0_offset);
3104 assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
3106 /* Copy final settings. */
3107 bin->run_size = good_run_size;
3108 bin->nregs = good_nregs;
3109 bin->regs_mask_nelms = good_mask_nelms;
3110 bin->reg0_offset = good_reg0_offset;
3112 return (good_run_size);
3115 #ifdef MALLOC_BALANCE
3117 arena_lock_balance(arena_t *arena)
3119 unsigned contention;
3121 contention = malloc_spin_lock(&arena->lock);
3124 * Calculate the exponentially averaged contention for this
3125 * arena. Due to integer math always rounding down, this value
3126 * decays somewhat faster than normal.
3128 arena->contention = (((uint64_t)arena->contention
3129 * (uint64_t)((1U << BALANCE_ALPHA_INV_2POW)-1))
3130 + (uint64_t)contention) >> BALANCE_ALPHA_INV_2POW;
3131 if (arena->contention >= opt_balance_threshold)
3132 arena_lock_balance_hard(arena);
3137 arena_lock_balance_hard(arena_t *arena)
3141 arena->contention = 0;
3143 arena->stats.nbalance++;
3145 ind = PRN(balance, narenas_2pow);
3146 if (arenas[ind] != NULL)
3147 arenas_map = arenas[ind];
3149 malloc_spin_lock(&arenas_lock);
3150 if (arenas[ind] != NULL)
3151 arenas_map = arenas[ind];
3153 arenas_map = arenas_extend(ind);
3154 malloc_spin_unlock(&arenas_lock);
3160 static inline void *
3161 mag_alloc(mag_t *mag)
3164 if (mag->nrounds == 0)
3168 return (mag->rounds[mag->nrounds]);
3172 mag_load(mag_t *mag)
3180 arena = choose_arena();
3181 bin = &arena->bins[mag->binind];
3182 #ifdef MALLOC_BALANCE
3183 arena_lock_balance(arena);
3185 malloc_spin_lock(&arena->lock);
3187 for (i = mag->nrounds; i < max_rounds; i++) {
3188 if ((run = bin->runcur) != NULL && run->nfree > 0)
3189 round = arena_bin_malloc_easy(arena, bin, run);
3191 round = arena_bin_malloc_hard(arena, bin);
3194 mag->rounds[i] = round;
3198 arena->stats.nmalloc_small += (i - mag->nrounds);
3199 arena->stats.allocated_small += (i - mag->nrounds) * bin->reg_size;
3201 malloc_spin_unlock(&arena->lock);
3205 static inline void *
3206 mag_rack_alloc(mag_rack_t *rack, size_t size, bool zero)
3209 bin_mags_t *bin_mags;
3213 binind = size2bin[size];
3214 assert(binind < nbins);
3215 bin_mags = &rack->bin_mags[binind];
3217 mag = bin_mags->curmag;
3219 /* Create an initial magazine for this size class. */
3220 assert(bin_mags->sparemag == NULL);
3221 mag = mag_create(choose_arena(), binind);
3224 bin_mags->curmag = mag;
3228 ret = mag_alloc(mag);
3230 if (bin_mags->sparemag != NULL) {
3231 if (bin_mags->sparemag->nrounds > 0) {
3232 /* Swap magazines. */
3233 bin_mags->curmag = bin_mags->sparemag;
3234 bin_mags->sparemag = mag;
3235 mag = bin_mags->curmag;
3237 /* Reload the current magazine. */
3241 /* Create a second magazine. */
3242 mag = mag_create(choose_arena(), binind);
3246 bin_mags->sparemag = bin_mags->curmag;
3247 bin_mags->curmag = mag;
3249 ret = mag_alloc(mag);
3254 if (zero == false) {
3256 memset(ret, 0xa5, size);
3258 memset(ret, 0, size);
3260 memset(ret, 0, size);
3266 static inline void *
3267 arena_malloc_small(arena_t *arena, size_t size, bool zero)
3274 binind = size2bin[size];
3275 assert(binind < nbins);
3276 bin = &arena->bins[binind];
3277 size = bin->reg_size;
3279 #ifdef MALLOC_BALANCE
3280 arena_lock_balance(arena);
3282 malloc_spin_lock(&arena->lock);
3284 if ((run = bin->runcur) != NULL && run->nfree > 0)
3285 ret = arena_bin_malloc_easy(arena, bin, run);
3287 ret = arena_bin_malloc_hard(arena, bin);
3290 malloc_spin_unlock(&arena->lock);
3295 bin->stats.nrequests++;
3296 arena->stats.nmalloc_small++;
3297 arena->stats.allocated_small += size;
3299 malloc_spin_unlock(&arena->lock);
3301 if (zero == false) {
3303 memset(ret, 0xa5, size);
3305 memset(ret, 0, size);
3307 memset(ret, 0, size);
3313 arena_malloc_large(arena_t *arena, size_t size, bool zero)
3317 /* Large allocation. */
3318 size = PAGE_CEILING(size);
3319 #ifdef MALLOC_BALANCE
3320 arena_lock_balance(arena);
3322 malloc_spin_lock(&arena->lock);
3324 ret = (void *)arena_run_alloc(arena, size, true, zero);
3326 malloc_spin_unlock(&arena->lock);
3330 arena->stats.nmalloc_large++;
3331 arena->stats.allocated_large += size;
3333 malloc_spin_unlock(&arena->lock);
3335 if (zero == false) {
3337 memset(ret, 0xa5, size);
3339 memset(ret, 0, size);
3345 static inline void *
3346 arena_malloc(arena_t *arena, size_t size, bool zero)
3349 assert(arena != NULL);
3350 assert(arena->magic == ARENA_MAGIC);
3352 assert(QUANTUM_CEILING(size) <= arena_maxclass);
3354 if (size <= bin_maxclass) {
3356 if (__isthreaded && opt_mag) {
3357 mag_rack_t *rack = mag_rack;
3359 rack = mag_rack_create(arena);
3364 return (mag_rack_alloc(rack, size, zero));
3367 return (arena_malloc_small(arena, size, zero));
3369 return (arena_malloc_large(arena, size, zero));
3372 static inline void *
3373 imalloc(size_t size)
3378 if (size <= arena_maxclass)
3379 return (arena_malloc(choose_arena(), size, false));
3381 return (huge_malloc(size, false));
3384 static inline void *
3385 icalloc(size_t size)
3388 if (size <= arena_maxclass)
3389 return (arena_malloc(choose_arena(), size, true));
3391 return (huge_malloc(size, true));
3394 /* Only handles large allocations that require more than page alignment. */
3396 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
3400 arena_chunk_t *chunk;
3402 assert((size & PAGE_MASK) == 0);
3403 assert((alignment & PAGE_MASK) == 0);
3405 #ifdef MALLOC_BALANCE
3406 arena_lock_balance(arena);
3408 malloc_spin_lock(&arena->lock);
3410 ret = (void *)arena_run_alloc(arena, alloc_size, true, false);
3412 malloc_spin_unlock(&arena->lock);
3416 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
3418 offset = (uintptr_t)ret & (alignment - 1);
3419 assert((offset & PAGE_MASK) == 0);
3420 assert(offset < alloc_size);
3422 arena_run_trim_tail(arena, chunk, ret, alloc_size, size, false);
3424 size_t leadsize, trailsize;
3426 leadsize = alignment - offset;
3428 arena_run_trim_head(arena, chunk, ret, alloc_size,
3429 alloc_size - leadsize);
3430 ret = (void *)((uintptr_t)ret + leadsize);
3433 trailsize = alloc_size - leadsize - size;
3434 if (trailsize != 0) {
3435 /* Trim trailing space. */
3436 assert(trailsize < alloc_size);
3437 arena_run_trim_tail(arena, chunk, ret, size + trailsize,
3443 arena->stats.nmalloc_large++;
3444 arena->stats.allocated_large += size;
3446 malloc_spin_unlock(&arena->lock);
3449 memset(ret, 0xa5, size);
3451 memset(ret, 0, size);
3455 static inline void *
3456 ipalloc(size_t alignment, size_t size)
3462 * Round size up to the nearest multiple of alignment.
3464 * This done, we can take advantage of the fact that for each small
3465 * size class, every object is aligned at the smallest power of two
3466 * that is non-zero in the base two representation of the size. For
3469 * Size | Base 2 | Minimum alignment
3470 * -----+----------+------------------
3472 * 144 | 10100000 | 32
3473 * 192 | 11000000 | 64
3475 * Depending on runtime settings, it is possible that arena_malloc()
3476 * will further round up to a power of two, but that never causes
3477 * correctness issues.
3479 ceil_size = (size + (alignment - 1)) & (-alignment);
3481 * (ceil_size < size) protects against the combination of maximal
3482 * alignment and size greater than maximal alignment.
3484 if (ceil_size < size) {
3485 /* size_t overflow. */
3489 if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE
3490 && ceil_size <= arena_maxclass))
3491 ret = arena_malloc(choose_arena(), ceil_size, false);
3496 * We can't achieve subpage alignment, so round up alignment
3497 * permanently; it makes later calculations simpler.
3499 alignment = PAGE_CEILING(alignment);
3500 ceil_size = PAGE_CEILING(size);
3502 * (ceil_size < size) protects against very large sizes within
3503 * PAGE_SIZE of SIZE_T_MAX.
3505 * (ceil_size + alignment < ceil_size) protects against the
3506 * combination of maximal alignment and ceil_size large enough
3507 * to cause overflow. This is similar to the first overflow
3508 * check above, but it needs to be repeated due to the new
3509 * ceil_size value, which may now be *equal* to maximal
3510 * alignment, whereas before we only detected overflow if the
3511 * original size was *greater* than maximal alignment.
3513 if (ceil_size < size || ceil_size + alignment < ceil_size) {
3514 /* size_t overflow. */
3519 * Calculate the size of the over-size run that arena_palloc()
3520 * would need to allocate in order to guarantee the alignment.
3522 if (ceil_size >= alignment)
3523 run_size = ceil_size + alignment - PAGE_SIZE;
3526 * It is possible that (alignment << 1) will cause
3527 * overflow, but it doesn't matter because we also
3528 * subtract PAGE_SIZE, which in the case of overflow
3529 * leaves us with a very large run_size. That causes
3530 * the first conditional below to fail, which means
3531 * that the bogus run_size value never gets used for
3532 * anything important.
3534 run_size = (alignment << 1) - PAGE_SIZE;
3537 if (run_size <= arena_maxclass) {
3538 ret = arena_palloc(choose_arena(), alignment, ceil_size,
3540 } else if (alignment <= chunksize)
3541 ret = huge_malloc(ceil_size, false);
3543 ret = huge_palloc(alignment, ceil_size);
3546 assert(((uintptr_t)ret & (alignment - 1)) == 0);
3550 /* Return the size of the allocation pointed to by ptr. */
3552 arena_salloc(const void *ptr)
3555 arena_chunk_t *chunk;
3556 size_t pageind, mapbits;
3558 assert(ptr != NULL);
3559 assert(CHUNK_ADDR2BASE(ptr) != ptr);
3561 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3562 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3563 mapbits = chunk->map[pageind].bits;
3564 assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
3565 if ((mapbits & CHUNK_MAP_LARGE) == 0) {
3566 arena_run_t *run = (arena_run_t *)(mapbits & ~PAGE_MASK);
3567 assert(run->magic == ARENA_RUN_MAGIC);
3568 ret = run->bin->reg_size;
3570 ret = mapbits & ~PAGE_MASK;
3577 static inline size_t
3578 isalloc(const void *ptr)
3581 arena_chunk_t *chunk;
3583 assert(ptr != NULL);
3585 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3588 assert(chunk->arena->magic == ARENA_MAGIC);
3590 ret = arena_salloc(ptr);
3592 extent_node_t *node, key;
3594 /* Chunk (huge allocation). */
3596 malloc_mutex_lock(&huge_mtx);
3598 /* Extract from tree of huge allocations. */
3599 key.addr = __DECONST(void *, ptr);
3600 node = extent_tree_ad_search(&huge, &key);
3601 assert(node != NULL);
3605 malloc_mutex_unlock(&huge_mtx);
3612 arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3613 arena_chunk_map_t *mapelm)
3619 run = (arena_run_t *)(mapelm->bits & ~PAGE_MASK);
3620 assert(run->magic == ARENA_RUN_MAGIC);
3622 size = bin->reg_size;
3625 memset(ptr, 0x5a, size);
3627 arena_run_reg_dalloc(run, bin, ptr, size);
3630 if (run->nfree == bin->nregs) {
3631 /* Deallocate run. */
3632 if (run == bin->runcur)
3634 else if (bin->nregs != 1) {
3635 size_t run_pageind = (((uintptr_t)run -
3636 (uintptr_t)chunk)) >> PAGE_SHIFT;
3637 arena_chunk_map_t *run_mapelm =
3638 &chunk->map[run_pageind];
3640 * This block's conditional is necessary because if the
3641 * run only contains one region, then it never gets
3642 * inserted into the non-full runs tree.
3644 arena_run_tree_remove(&bin->runs, run_mapelm);
3649 arena_run_dalloc(arena, run, true);
3651 bin->stats.curruns--;
3653 } else if (run->nfree == 1 && run != bin->runcur) {
3655 * Make sure that bin->runcur always refers to the lowest
3656 * non-full run, if one exists.
3658 if (bin->runcur == NULL)
3660 else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
3661 /* Switch runcur. */
3662 if (bin->runcur->nfree > 0) {
3663 arena_chunk_t *runcur_chunk =
3664 CHUNK_ADDR2BASE(bin->runcur);
3665 size_t runcur_pageind =
3666 (((uintptr_t)bin->runcur -
3667 (uintptr_t)runcur_chunk)) >> PAGE_SHIFT;
3668 arena_chunk_map_t *runcur_mapelm =
3669 &runcur_chunk->map[runcur_pageind];
3671 /* Insert runcur. */
3672 arena_run_tree_insert(&bin->runs,
3677 size_t run_pageind = (((uintptr_t)run -
3678 (uintptr_t)chunk)) >> PAGE_SHIFT;
3679 arena_chunk_map_t *run_mapelm =
3680 &chunk->map[run_pageind];
3682 assert(arena_run_tree_search(&bin->runs, run_mapelm) ==
3684 arena_run_tree_insert(&bin->runs, run_mapelm);
3688 arena->stats.allocated_small -= size;
3689 arena->stats.ndalloc_small++;
3695 mag_unload(mag_t *mag)
3697 arena_chunk_t *chunk;
3700 size_t i, ndeferred, nrounds;
3702 for (ndeferred = mag->nrounds; ndeferred > 0;) {
3703 nrounds = ndeferred;
3704 /* Lock the arena associated with the first round. */
3705 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mag->rounds[0]);
3706 arena = chunk->arena;
3707 #ifdef MALLOC_BALANCE
3708 arena_lock_balance(arena);
3710 malloc_spin_lock(&arena->lock);
3712 /* Deallocate every round that belongs to the locked arena. */
3713 for (i = ndeferred = 0; i < nrounds; i++) {
3714 round = mag->rounds[i];
3715 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(round);
3716 if (chunk->arena == arena) {
3717 size_t pageind = (((uintptr_t)round -
3718 (uintptr_t)chunk) >> PAGE_SHIFT);
3719 arena_chunk_map_t *mapelm =
3720 &chunk->map[pageind];
3721 arena_dalloc_small(arena, chunk, round, mapelm);
3724 * This round was allocated via a different
3725 * arena than the one that is currently locked.
3726 * Stash the round, so that it can be handled
3729 mag->rounds[ndeferred] = round;
3733 malloc_spin_unlock(&arena->lock);
3740 mag_rack_dalloc(mag_rack_t *rack, void *ptr)
3743 arena_chunk_t *chunk;
3746 bin_mags_t *bin_mags;
3748 size_t pageind, binind;
3749 arena_chunk_map_t *mapelm;
3751 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3752 arena = chunk->arena;
3753 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3754 mapelm = &chunk->map[pageind];
3755 run = (arena_run_t *)(mapelm->bits & ~PAGE_MASK);
3756 assert(run->magic == ARENA_RUN_MAGIC);
3758 binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) /
3759 sizeof(arena_bin_t);
3760 assert(binind < nbins);
3763 memset(ptr, 0x5a, arena->bins[binind].reg_size);
3765 bin_mags = &rack->bin_mags[binind];
3766 mag = bin_mags->curmag;
3768 /* Create an initial magazine for this size class. */
3769 assert(bin_mags->sparemag == NULL);
3770 mag = mag_create(choose_arena(), binind);
3772 malloc_spin_lock(&arena->lock);
3773 arena_dalloc_small(arena, chunk, ptr, mapelm);
3774 malloc_spin_unlock(&arena->lock);
3777 bin_mags->curmag = mag;
3780 if (mag->nrounds == max_rounds) {
3781 if (bin_mags->sparemag != NULL) {
3782 if (bin_mags->sparemag->nrounds < max_rounds) {
3783 /* Swap magazines. */
3784 bin_mags->curmag = bin_mags->sparemag;
3785 bin_mags->sparemag = mag;
3786 mag = bin_mags->curmag;
3788 /* Unload the current magazine. */
3792 /* Create a second magazine. */
3793 mag = mag_create(choose_arena(), binind);
3795 mag = rack->bin_mags[binind].curmag;
3798 bin_mags->sparemag = bin_mags->curmag;
3799 bin_mags->curmag = mag;
3802 assert(mag->nrounds < max_rounds);
3804 mag->rounds[mag->nrounds] = ptr;
3810 arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
3812 /* Large allocation. */
3813 malloc_spin_lock(&arena->lock);
3815 #ifndef MALLOC_STATS
3819 size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
3821 size_t size = chunk->map[pageind].bits & ~PAGE_MASK;
3826 memset(ptr, 0x5a, size);
3828 arena->stats.allocated_large -= size;
3832 arena->stats.ndalloc_large++;
3835 arena_run_dalloc(arena, (arena_run_t *)ptr, true);
3836 malloc_spin_unlock(&arena->lock);
3840 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
3843 arena_chunk_map_t *mapelm;
3845 assert(arena != NULL);
3846 assert(arena->magic == ARENA_MAGIC);
3847 assert(chunk->arena == arena);
3848 assert(ptr != NULL);
3849 assert(CHUNK_ADDR2BASE(ptr) != ptr);
3851 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3852 mapelm = &chunk->map[pageind];
3853 assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0);
3854 if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
3855 /* Small allocation. */
3857 if (__isthreaded && opt_mag) {
3858 mag_rack_t *rack = mag_rack;
3860 rack = mag_rack_create(arena);
3862 malloc_spin_lock(&arena->lock);
3863 arena_dalloc_small(arena, chunk, ptr,
3865 malloc_spin_unlock(&arena->lock);
3870 mag_rack_dalloc(rack, ptr);
3873 malloc_spin_lock(&arena->lock);
3874 arena_dalloc_small(arena, chunk, ptr, mapelm);
3875 malloc_spin_unlock(&arena->lock);
3880 arena_dalloc_large(arena, chunk, ptr);
3886 arena_chunk_t *chunk;
3888 assert(ptr != NULL);
3890 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3892 arena_dalloc(chunk->arena, chunk, ptr);
3898 arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3899 size_t size, size_t oldsize)
3902 assert(size < oldsize);
3905 * Shrink the run, and make trailing pages available for other
3908 #ifdef MALLOC_BALANCE
3909 arena_lock_balance(arena);
3911 malloc_spin_lock(&arena->lock);
3913 arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size,
3916 arena->stats.allocated_large -= oldsize - size;
3918 malloc_spin_unlock(&arena->lock);
3922 arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3923 size_t size, size_t oldsize)
3925 size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
3926 size_t npages = oldsize >> PAGE_SHIFT;
3928 assert(oldsize == (chunk->map[pageind].bits & ~PAGE_MASK));
3930 /* Try to extend the run. */
3931 assert(size > oldsize);
3932 #ifdef MALLOC_BALANCE
3933 arena_lock_balance(arena);
3935 malloc_spin_lock(&arena->lock);
3937 if (pageind + npages < chunk_npages && (chunk->map[pageind+npages].bits
3938 & CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[pageind+npages].bits &
3939 ~PAGE_MASK) >= size - oldsize) {
3941 * The next run is available and sufficiently large. Split the
3942 * following run, then merge the first part with the existing
3945 arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk +
3946 ((pageind+npages) << PAGE_SHIFT)), size - oldsize, true,
3949 chunk->map[pageind].bits = size | CHUNK_MAP_LARGE |
3950 CHUNK_MAP_ALLOCATED;
3951 chunk->map[pageind+npages].bits = CHUNK_MAP_LARGE |
3952 CHUNK_MAP_ALLOCATED;
3955 arena->stats.allocated_large += size - oldsize;
3957 malloc_spin_unlock(&arena->lock);
3960 malloc_spin_unlock(&arena->lock);
3966 * Try to resize a large allocation, in order to avoid copying. This will
3967 * always fail if growing an object, and the following run is already in use.
3970 arena_ralloc_large(void *ptr, size_t size, size_t oldsize)
3974 psize = PAGE_CEILING(size);
3975 if (psize == oldsize) {
3976 /* Same size class. */
3977 if (opt_junk && size < oldsize) {
3978 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize -
3983 arena_chunk_t *chunk;
3986 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3987 arena = chunk->arena;
3988 assert(arena->magic == ARENA_MAGIC);
3990 if (psize < oldsize) {
3991 /* Fill before shrinking in order avoid a race. */
3993 memset((void *)((uintptr_t)ptr + size), 0x5a,
3996 arena_ralloc_large_shrink(arena, chunk, ptr, psize,
4000 bool ret = arena_ralloc_large_grow(arena, chunk, ptr,
4002 if (ret == false && opt_zero) {
4003 memset((void *)((uintptr_t)ptr + oldsize), 0,
4012 arena_ralloc(void *ptr, size_t size, size_t oldsize)
4017 /* Try to avoid moving the allocation. */
4018 if (size <= bin_maxclass) {
4019 if (oldsize <= bin_maxclass && size2bin[size] ==
4023 if (oldsize > bin_maxclass && oldsize <= arena_maxclass) {
4024 assert(size > bin_maxclass);
4025 if (arena_ralloc_large(ptr, size, oldsize) == false)
4031 * If we get here, then size and oldsize are different enough that we
4032 * need to move the object. In that case, fall back to allocating new
4033 * space and copying.
4035 ret = arena_malloc(choose_arena(), size, false);
4039 /* Junk/zero-filling were already done by arena_malloc(). */
4040 copysize = (size < oldsize) ? size : oldsize;
4041 memcpy(ret, ptr, copysize);
4045 if (opt_junk && size < oldsize)
4046 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
4047 else if (opt_zero && size > oldsize)
4048 memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
4052 static inline void *
4053 iralloc(void *ptr, size_t size)
4057 assert(ptr != NULL);
4060 oldsize = isalloc(ptr);
4062 if (size <= arena_maxclass)
4063 return (arena_ralloc(ptr, size, oldsize));
4065 return (huge_ralloc(ptr, size, oldsize));
4069 arena_new(arena_t *arena)
4073 size_t prev_run_size;
4075 if (malloc_spin_init(&arena->lock))
4079 memset(&arena->stats, 0, sizeof(arena_stats_t));
4082 /* Initialize chunks. */
4083 arena_chunk_tree_dirty_new(&arena->chunks_dirty);
4084 arena->spare = NULL;
4088 arena_avail_tree_new(&arena->runs_avail);
4090 #ifdef MALLOC_BALANCE
4091 arena->contention = 0;
4094 /* Initialize bins. */
4095 prev_run_size = PAGE_SIZE;
4099 /* (2^n)-spaced tiny bins. */
4100 for (; i < ntbins; i++) {
4101 bin = &arena->bins[i];
4103 arena_run_tree_new(&bin->runs);
4105 bin->reg_size = (1U << (TINY_MIN_2POW + i));
4107 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4110 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4115 /* Quantum-spaced bins. */
4116 for (; i < ntbins + nqbins; i++) {
4117 bin = &arena->bins[i];
4119 arena_run_tree_new(&bin->runs);
4121 bin->reg_size = (i - ntbins + 1) << QUANTUM_2POW;
4123 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4126 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4130 /* Cacheline-spaced bins. */
4131 for (; i < ntbins + nqbins + ncbins; i++) {
4132 bin = &arena->bins[i];
4134 arena_run_tree_new(&bin->runs);
4136 bin->reg_size = cspace_min + ((i - (ntbins + nqbins)) <<
4139 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4142 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4146 /* Subpage-spaced bins. */
4147 for (; i < nbins; i++) {
4148 bin = &arena->bins[i];
4150 arena_run_tree_new(&bin->runs);
4152 bin->reg_size = sspace_min + ((i - (ntbins + nqbins + ncbins))
4155 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4158 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4163 arena->magic = ARENA_MAGIC;
4169 /* Create a new arena and insert it into the arenas array at index ind. */
4171 arenas_extend(unsigned ind)
4175 /* Allocate enough space for trailing bins. */
4176 ret = (arena_t *)base_alloc(sizeof(arena_t)
4177 + (sizeof(arena_bin_t) * (nbins - 1)));
4178 if (ret != NULL && arena_new(ret) == false) {
4182 /* Only reached if there is an OOM error. */
4185 * OOM here is quite inconvenient to propagate, since dealing with it
4186 * would require a check for failure in the fast path. Instead, punt
4187 * by using arenas[0]. In practice, this is an extremely unlikely
4190 _malloc_message(_getprogname(),
4191 ": (malloc) Error initializing arena\n", "", "");
4200 mag_create(arena_t *arena, size_t binind)
4204 if (sizeof(mag_t) + (sizeof(void *) * (max_rounds - 1)) <=
4206 ret = arena_malloc_small(arena, sizeof(mag_t) + (sizeof(void *)
4207 * (max_rounds - 1)), false);
4209 ret = imalloc(sizeof(mag_t) + (sizeof(void *) * (max_rounds -
4214 ret->binind = binind;
4221 mag_destroy(mag_t *mag)
4224 arena_chunk_t *chunk;
4226 arena_chunk_map_t *mapelm;
4228 chunk = CHUNK_ADDR2BASE(mag);
4229 arena = chunk->arena;
4230 pageind = (((uintptr_t)mag - (uintptr_t)chunk) >> PAGE_SHIFT);
4231 mapelm = &chunk->map[pageind];
4233 assert(mag->nrounds == 0);
4234 if (sizeof(mag_t) + (sizeof(void *) * (max_rounds - 1)) <=
4236 malloc_spin_lock(&arena->lock);
4237 arena_dalloc_small(arena, chunk, mag, mapelm);
4238 malloc_spin_unlock(&arena->lock);
4244 mag_rack_create(arena_t *arena)
4247 assert(sizeof(mag_rack_t) + (sizeof(bin_mags_t *) * (nbins - 1)) <=
4249 return (arena_malloc_small(arena, sizeof(mag_rack_t) +
4250 (sizeof(bin_mags_t) * (nbins - 1)), true));
4254 mag_rack_destroy(mag_rack_t *rack)
4257 arena_chunk_t *chunk;
4258 bin_mags_t *bin_mags;
4260 arena_chunk_map_t *mapelm;
4262 for (i = 0; i < nbins; i++) {
4263 bin_mags = &rack->bin_mags[i];
4264 if (bin_mags->curmag != NULL) {
4265 assert(bin_mags->curmag->binind == i);
4266 mag_unload(bin_mags->curmag);
4267 mag_destroy(bin_mags->curmag);
4269 if (bin_mags->sparemag != NULL) {
4270 assert(bin_mags->sparemag->binind == i);
4271 mag_unload(bin_mags->sparemag);
4272 mag_destroy(bin_mags->sparemag);
4276 chunk = CHUNK_ADDR2BASE(rack);
4277 arena = chunk->arena;
4278 pageind = (((uintptr_t)rack - (uintptr_t)chunk) >> PAGE_SHIFT);
4279 mapelm = &chunk->map[pageind];
4281 malloc_spin_lock(&arena->lock);
4282 arena_dalloc_small(arena, chunk, rack, mapelm);
4283 malloc_spin_unlock(&arena->lock);
4290 /******************************************************************************/
4292 * Begin general internal functions.
4296 huge_malloc(size_t size, bool zero)
4300 extent_node_t *node;
4302 /* Allocate one or more contiguous chunks for this request. */
4304 csize = CHUNK_CEILING(size);
4306 /* size is large enough to cause size_t wrap-around. */
4310 /* Allocate an extent node with which to track the chunk. */
4311 node = base_node_alloc();
4315 ret = chunk_alloc(csize, zero);
4317 base_node_dealloc(node);
4321 /* Insert node into huge. */
4325 malloc_mutex_lock(&huge_mtx);
4326 extent_tree_ad_insert(&huge, node);
4329 huge_allocated += csize;
4331 malloc_mutex_unlock(&huge_mtx);
4333 if (zero == false) {
4335 memset(ret, 0xa5, csize);
4337 memset(ret, 0, csize);
4343 /* Only handles large allocations that require more than chunk alignment. */
4345 huge_palloc(size_t alignment, size_t size)
4348 size_t alloc_size, chunk_size, offset;
4349 extent_node_t *node;
4352 * This allocation requires alignment that is even larger than chunk
4353 * alignment. This means that huge_malloc() isn't good enough.
4355 * Allocate almost twice as many chunks as are demanded by the size or
4356 * alignment, in order to assure the alignment can be achieved, then
4357 * unmap leading and trailing chunks.
4359 assert(alignment >= chunksize);
4361 chunk_size = CHUNK_CEILING(size);
4363 if (size >= alignment)
4364 alloc_size = chunk_size + alignment - chunksize;
4366 alloc_size = (alignment << 1) - chunksize;
4368 /* Allocate an extent node with which to track the chunk. */
4369 node = base_node_alloc();
4373 ret = chunk_alloc(alloc_size, false);
4375 base_node_dealloc(node);
4379 offset = (uintptr_t)ret & (alignment - 1);
4380 assert((offset & chunksize_mask) == 0);
4381 assert(offset < alloc_size);
4383 /* Trim trailing space. */
4384 chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
4389 /* Trim leading space. */
4390 chunk_dealloc(ret, alignment - offset);
4392 ret = (void *)((uintptr_t)ret + (alignment - offset));
4394 trailsize = alloc_size - (alignment - offset) - chunk_size;
4395 if (trailsize != 0) {
4396 /* Trim trailing space. */
4397 assert(trailsize < alloc_size);
4398 chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
4403 /* Insert node into huge. */
4405 node->size = chunk_size;
4407 malloc_mutex_lock(&huge_mtx);
4408 extent_tree_ad_insert(&huge, node);
4411 huge_allocated += chunk_size;
4413 malloc_mutex_unlock(&huge_mtx);
4416 memset(ret, 0xa5, chunk_size);
4418 memset(ret, 0, chunk_size);
4424 huge_ralloc(void *ptr, size_t size, size_t oldsize)
4429 /* Avoid moving the allocation if the size class would not change. */
4430 if (oldsize > arena_maxclass &&
4431 CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
4432 if (opt_junk && size < oldsize) {
4433 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
4435 } else if (opt_zero && size > oldsize) {
4436 memset((void *)((uintptr_t)ptr + oldsize), 0, size
4443 * If we get here, then size and oldsize are different enough that we
4444 * need to use a different size class. In that case, fall back to
4445 * allocating new space and copying.
4447 ret = huge_malloc(size, false);
4451 copysize = (size < oldsize) ? size : oldsize;
4452 memcpy(ret, ptr, copysize);
4458 huge_dalloc(void *ptr)
4460 extent_node_t *node, key;
4462 malloc_mutex_lock(&huge_mtx);
4464 /* Extract from tree of huge allocations. */
4466 node = extent_tree_ad_search(&huge, &key);
4467 assert(node != NULL);
4468 assert(node->addr == ptr);
4469 extent_tree_ad_remove(&huge, node);
4473 huge_allocated -= node->size;
4476 malloc_mutex_unlock(&huge_mtx);
4480 if (opt_dss && opt_junk)
4481 memset(node->addr, 0x5a, node->size);
4483 chunk_dealloc(node->addr, node->size);
4485 base_node_dealloc(node);
4489 malloc_print_stats(void)
4492 if (opt_print_stats) {
4493 char s[UMAX2S_BUFSIZE];
4494 _malloc_message("___ Begin malloc statistics ___\n", "", "",
4496 _malloc_message("Assertions ",
4503 _malloc_message("Boolean MALLOC_OPTIONS: ",
4504 opt_abort ? "A" : "a", "", "");
4506 _malloc_message(opt_dss ? "D" : "d", "", "", "");
4509 _malloc_message(opt_mag ? "G" : "g", "", "", "");
4511 _malloc_message(opt_junk ? "J" : "j", "", "", "");
4513 _malloc_message(opt_mmap ? "M" : "m", "", "", "");
4515 _malloc_message(opt_utrace ? "PU" : "Pu",
4516 opt_sysv ? "V" : "v",
4517 opt_xmalloc ? "X" : "x",
4518 opt_zero ? "Z\n" : "z\n");
4520 _malloc_message("CPUs: ", umax2s(ncpus, s), "\n", "");
4521 _malloc_message("Max arenas: ", umax2s(narenas, s), "\n", "");
4522 #ifdef MALLOC_BALANCE
4523 _malloc_message("Arena balance threshold: ",
4524 umax2s(opt_balance_threshold, s), "\n", "");
4526 _malloc_message("Pointer size: ", umax2s(sizeof(void *), s),
4528 _malloc_message("Quantum size: ", umax2s(QUANTUM, s), "\n", "");
4529 _malloc_message("Cacheline size (assumed): ", umax2s(CACHELINE,
4532 _malloc_message("Tiny 2^n-spaced sizes: [", umax2s((1U <<
4533 TINY_MIN_2POW), s), "..", "");
4534 _malloc_message(umax2s((qspace_min >> 1), s), "]\n", "", "");
4536 _malloc_message("Quantum-spaced sizes: [", umax2s(qspace_min,
4538 _malloc_message(umax2s(qspace_max, s), "]\n", "", "");
4539 _malloc_message("Cacheline-spaced sizes: [", umax2s(cspace_min,
4541 _malloc_message(umax2s(cspace_max, s), "]\n", "", "");
4542 _malloc_message("Subpage-spaced sizes: [", umax2s(sspace_min,
4544 _malloc_message(umax2s(sspace_max, s), "]\n", "", "");
4546 _malloc_message("Rounds per magazine: ", umax2s(max_rounds, s),
4549 _malloc_message("Max dirty pages per arena: ",
4550 umax2s(opt_dirty_max, s), "\n", "");
4552 _malloc_message("Chunk size: ", umax2s(chunksize, s), "", "");
4553 _malloc_message(" (2^", umax2s(opt_chunk_2pow, s), ")\n", "");
4557 size_t allocated, mapped;
4558 #ifdef MALLOC_BALANCE
4559 uint64_t nbalance = 0;
4564 /* Calculate and print allocated/mapped stats. */
4567 for (i = 0, allocated = 0; i < narenas; i++) {
4568 if (arenas[i] != NULL) {
4569 malloc_spin_lock(&arenas[i]->lock);
4571 arenas[i]->stats.allocated_small;
4573 arenas[i]->stats.allocated_large;
4574 #ifdef MALLOC_BALANCE
4575 nbalance += arenas[i]->stats.nbalance;
4577 malloc_spin_unlock(&arenas[i]->lock);
4582 malloc_mutex_lock(&huge_mtx);
4583 allocated += huge_allocated;
4584 mapped = stats_chunks.curchunks * chunksize;
4585 malloc_mutex_unlock(&huge_mtx);
4587 malloc_mutex_lock(&base_mtx);
4588 mapped += base_mapped;
4589 malloc_mutex_unlock(&base_mtx);
4591 malloc_printf("Allocated: %zu, mapped: %zu\n",
4594 #ifdef MALLOC_BALANCE
4595 malloc_printf("Arena balance reassignments: %llu\n",
4599 /* Print chunk stats. */
4601 chunk_stats_t chunks_stats;
4603 malloc_mutex_lock(&huge_mtx);
4604 chunks_stats = stats_chunks;
4605 malloc_mutex_unlock(&huge_mtx);
4607 malloc_printf("chunks: nchunks "
4608 "highchunks curchunks\n");
4609 malloc_printf(" %13llu%13lu%13lu\n",
4610 chunks_stats.nchunks,
4611 chunks_stats.highchunks,
4612 chunks_stats.curchunks);
4615 /* Print chunk stats. */
4617 "huge: nmalloc ndalloc allocated\n");
4618 malloc_printf(" %12llu %12llu %12zu\n",
4619 huge_nmalloc, huge_ndalloc, huge_allocated);
4621 /* Print stats for each arena. */
4622 for (i = 0; i < narenas; i++) {
4624 if (arena != NULL) {
4626 "\narenas[%u]:\n", i);
4627 malloc_spin_lock(&arena->lock);
4629 malloc_spin_unlock(&arena->lock);
4633 #endif /* #ifdef MALLOC_STATS */
4634 _malloc_message("--- End malloc statistics ---\n", "", "", "");
4640 size2bin_validate(void)
4642 size_t i, size, binind;
4644 assert(size2bin[0] == 0xffU);
4648 for (; i < (1U << TINY_MIN_2POW); i++) {
4649 size = pow2_ceil(1U << TINY_MIN_2POW);
4650 binind = ffs((int)(size >> (TINY_MIN_2POW + 1)));
4651 assert(size2bin[i] == binind);
4653 for (; i < qspace_min; i++) {
4654 size = pow2_ceil(i);
4655 binind = ffs((int)(size >> (TINY_MIN_2POW + 1)));
4656 assert(size2bin[i] == binind);
4659 /* Quantum-spaced. */
4660 for (; i <= qspace_max; i++) {
4661 size = QUANTUM_CEILING(i);
4662 binind = ntbins + (size >> QUANTUM_2POW) - 1;
4663 assert(size2bin[i] == binind);
4665 /* Cacheline-spaced. */
4666 for (; i <= cspace_max; i++) {
4667 size = CACHELINE_CEILING(i);
4668 binind = ntbins + nqbins + ((size - cspace_min) >>
4670 assert(size2bin[i] == binind);
4673 for (; i <= sspace_max; i++) {
4674 size = SUBPAGE_CEILING(i);
4675 binind = ntbins + nqbins + ncbins + ((size - sspace_min)
4677 assert(size2bin[i] == binind);
4686 if (opt_qspace_max_2pow != QSPACE_MAX_2POW_DEFAULT
4687 || opt_cspace_max_2pow != CSPACE_MAX_2POW_DEFAULT)
4688 return (size2bin_init_hard());
4690 size2bin = const_size2bin;
4692 assert(sizeof(const_size2bin) == bin_maxclass + 1);
4693 size2bin_validate();
4699 size2bin_init_hard(void)
4701 size_t i, size, binind;
4702 uint8_t *custom_size2bin;
4704 assert(opt_qspace_max_2pow != QSPACE_MAX_2POW_DEFAULT
4705 || opt_cspace_max_2pow != CSPACE_MAX_2POW_DEFAULT);
4707 custom_size2bin = (uint8_t *)base_alloc(bin_maxclass + 1);
4708 if (custom_size2bin == NULL)
4711 custom_size2bin[0] = 0xffU;
4715 for (; i < (1U << TINY_MIN_2POW); i++) {
4716 size = pow2_ceil(1U << TINY_MIN_2POW);
4717 binind = ffs((int)(size >> (TINY_MIN_2POW + 1)));
4718 custom_size2bin[i] = binind;
4720 for (; i < qspace_min; i++) {
4721 size = pow2_ceil(i);
4722 binind = ffs((int)(size >> (TINY_MIN_2POW + 1)));
4723 custom_size2bin[i] = binind;
4726 /* Quantum-spaced. */
4727 for (; i <= qspace_max; i++) {
4728 size = QUANTUM_CEILING(i);
4729 binind = ntbins + (size >> QUANTUM_2POW) - 1;
4730 custom_size2bin[i] = binind;
4732 /* Cacheline-spaced. */
4733 for (; i <= cspace_max; i++) {
4734 size = CACHELINE_CEILING(i);
4735 binind = ntbins + nqbins + ((size - cspace_min) >>
4737 custom_size2bin[i] = binind;
4740 for (; i <= sspace_max; i++) {
4741 size = SUBPAGE_CEILING(i);
4742 binind = ntbins + nqbins + ncbins + ((size - sspace_min) >>
4744 custom_size2bin[i] = binind;
4747 size2bin = custom_size2bin;
4749 size2bin_validate();
4755 * FreeBSD's pthreads implementation calls malloc(3), so the malloc
4756 * implementation has to take pains to avoid infinite recursion during
4763 if (malloc_initialized == false)
4764 return (malloc_init_hard());
4770 malloc_init_hard(void)
4774 char buf[PATH_MAX + 1];
4777 malloc_mutex_lock(&init_lock);
4778 if (malloc_initialized) {
4780 * Another thread initialized the allocator before this one
4781 * acquired init_lock.
4783 malloc_mutex_unlock(&init_lock);
4787 /* Get number of CPUs. */
4794 len = sizeof(ncpus);
4795 if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
4802 * Increase the chunk size to the largest page size that is greater
4803 * than the default chunk size and less than or equal to 4MB.
4806 size_t pagesizes[MAXPAGESIZES];
4809 nsizes = getpagesizes(pagesizes, MAXPAGESIZES);
4810 for (k = 0; k < nsizes; k++)
4811 if (pagesizes[k] <= (1LU << 22))
4812 while ((1LU << opt_chunk_2pow) < pagesizes[k])
4816 for (i = 0; i < 3; i++) {
4819 /* Get runtime configuration. */
4822 if ((linklen = readlink("/etc/malloc.conf", buf,
4823 sizeof(buf) - 1)) != -1) {
4825 * Use the contents of the "/etc/malloc.conf"
4826 * symbolic link's name.
4828 buf[linklen] = '\0';
4831 /* No configuration specified. */
4837 if (issetugid() == 0 && (opts =
4838 getenv("MALLOC_OPTIONS")) != NULL) {
4840 * Do nothing; opts is already initialized to
4841 * the value of the MALLOC_OPTIONS environment
4845 /* No configuration specified. */
4851 if (_malloc_options != NULL) {
4853 * Use options that were compiled into the
4856 opts = _malloc_options;
4858 /* No configuration specified. */
4868 for (j = 0; opts[j] != '\0'; j++) {
4872 /* Parse repetition count, if any. */
4873 for (nreps = 0, nseen = false;; j++, nseen = true) {
4875 case '0': case '1': case '2': case '3':
4876 case '4': case '5': case '6': case '7':
4879 nreps += opts[j] - '0';
4889 for (k = 0; k < nreps; k++) {
4898 #ifdef MALLOC_BALANCE
4899 opt_balance_threshold >>= 1;
4903 #ifdef MALLOC_BALANCE
4904 if (opt_balance_threshold == 0)
4905 opt_balance_threshold = 1;
4906 else if ((opt_balance_threshold << 1)
4907 > opt_balance_threshold)
4908 opt_balance_threshold <<= 1;
4912 if (opt_cspace_max_2pow - 1 >
4913 opt_qspace_max_2pow &&
4914 opt_cspace_max_2pow >
4916 opt_cspace_max_2pow--;
4919 if (opt_cspace_max_2pow < PAGE_SHIFT
4921 opt_cspace_max_2pow++;
4934 opt_dirty_max >>= 1;
4937 if (opt_dirty_max == 0)
4939 else if ((opt_dirty_max << 1) != 0)
4940 opt_dirty_max <<= 1;
4958 * Chunks always require at least one
4959 * header page, so chunks can never be
4960 * smaller than two pages.
4962 if (opt_chunk_2pow > PAGE_SHIFT + 1)
4966 if (opt_chunk_2pow + 1 <
4967 (sizeof(size_t) << 3))
4981 opt_narenas_lshift--;
4984 opt_narenas_lshift++;
4987 opt_print_stats = false;
4990 opt_print_stats = true;
4993 if (opt_qspace_max_2pow > QUANTUM_2POW)
4994 opt_qspace_max_2pow--;
4997 if (opt_qspace_max_2pow + 1 <
4998 opt_cspace_max_2pow)
4999 opt_qspace_max_2pow++;
5003 if (opt_mag_size_2pow + 1 < (8U <<
5005 opt_mag_size_2pow++;
5009 * Make sure there's always at least
5010 * one round per magazine.
5012 if ((1U << (opt_mag_size_2pow-1)) >=
5014 opt_mag_size_2pow--;
5030 opt_xmalloc = false;
5046 _malloc_message(_getprogname(),
5047 ": (malloc) Unsupported character "
5048 "in malloc options: '", cbuf,
5057 /* Make sure that there is some method for acquiring memory. */
5058 if (opt_dss == false && opt_mmap == false)
5062 /* Take care to call atexit() only once. */
5063 if (opt_print_stats) {
5064 /* Print statistics at exit. */
5065 atexit(malloc_print_stats);
5070 * Calculate the actual number of rounds per magazine, taking into
5071 * account header overhead.
5073 max_rounds = (1LLU << (opt_mag_size_2pow - SIZEOF_PTR_2POW)) -
5074 (sizeof(mag_t) >> SIZEOF_PTR_2POW) + 1;
5077 /* Set variables according to the value of opt_[qc]space_max_2pow. */
5078 qspace_max = (1U << opt_qspace_max_2pow);
5079 cspace_min = CACHELINE_CEILING(qspace_max);
5080 if (cspace_min == qspace_max)
5081 cspace_min += CACHELINE;
5082 cspace_max = (1U << opt_cspace_max_2pow);
5083 sspace_min = SUBPAGE_CEILING(cspace_max);
5084 if (sspace_min == cspace_max)
5085 sspace_min += SUBPAGE;
5086 assert(sspace_min < PAGE_SIZE);
5087 sspace_max = PAGE_SIZE - SUBPAGE;
5090 assert(QUANTUM_2POW >= TINY_MIN_2POW);
5092 assert(ntbins <= QUANTUM_2POW);
5093 nqbins = qspace_max >> QUANTUM_2POW;
5094 ncbins = ((cspace_max - cspace_min) >> CACHELINE_2POW) + 1;
5095 nsbins = ((sspace_max - sspace_min) >> SUBPAGE_2POW) + 1;
5096 nbins = ntbins + nqbins + ncbins + nsbins;
5098 if (size2bin_init()) {
5099 malloc_mutex_unlock(&init_lock);
5103 /* Set variables according to the value of opt_chunk_2pow. */
5104 chunksize = (1LU << opt_chunk_2pow);
5105 chunksize_mask = chunksize - 1;
5106 chunk_npages = (chunksize >> PAGE_SHIFT);
5111 * Compute the header size such that it is large enough to
5112 * contain the page map.
5114 header_size = sizeof(arena_chunk_t) +
5115 (sizeof(arena_chunk_map_t) * (chunk_npages - 1));
5116 arena_chunk_header_npages = (header_size >> PAGE_SHIFT) +
5117 ((header_size & PAGE_MASK) != 0);
5119 arena_maxclass = chunksize - (arena_chunk_header_npages <<
5125 memset(&stats_chunks, 0, sizeof(chunk_stats_t));
5128 /* Various sanity checks that regard configuration. */
5129 assert(chunksize >= PAGE_SIZE);
5131 /* Initialize chunks data. */
5132 malloc_mutex_init(&huge_mtx);
5133 extent_tree_ad_new(&huge);
5135 malloc_mutex_init(&dss_mtx);
5137 dss_prev = dss_base;
5139 extent_tree_szad_new(&dss_chunks_szad);
5140 extent_tree_ad_new(&dss_chunks_ad);
5148 /* Initialize base allocation data structures. */
5154 * Allocate a base chunk here, since it doesn't actually have to be
5155 * chunk-aligned. Doing this before allocating any other chunks allows
5156 * the use of space that would otherwise be wasted.
5159 base_pages_alloc(0);
5162 malloc_mutex_init(&base_mtx);
5166 * For SMP systems, create twice as many arenas as there are
5169 opt_narenas_lshift++;
5172 /* Determine how many arenas to use. */
5174 if (opt_narenas_lshift > 0) {
5175 if ((narenas << opt_narenas_lshift) > narenas)
5176 narenas <<= opt_narenas_lshift;
5178 * Make sure not to exceed the limits of what base_alloc() can
5181 if (narenas * sizeof(arena_t *) > chunksize)
5182 narenas = chunksize / sizeof(arena_t *);
5183 } else if (opt_narenas_lshift < 0) {
5184 if ((narenas >> -opt_narenas_lshift) < narenas)
5185 narenas >>= -opt_narenas_lshift;
5186 /* Make sure there is at least one arena. */
5190 #ifdef MALLOC_BALANCE
5191 assert(narenas != 0);
5192 for (narenas_2pow = 0;
5193 (narenas >> (narenas_2pow + 1)) != 0;
5199 static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
5200 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
5201 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
5202 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
5203 223, 227, 229, 233, 239, 241, 251, 257, 263};
5204 unsigned nprimes, parenas;
5207 * Pick a prime number of hash arenas that is more than narenas
5208 * so that direct hashing of pthread_self() pointers tends to
5209 * spread allocations evenly among the arenas.
5211 assert((narenas & 1) == 0); /* narenas must be even. */
5212 nprimes = (sizeof(primes) >> SIZEOF_INT_2POW);
5213 parenas = primes[nprimes - 1]; /* In case not enough primes. */
5214 for (i = 1; i < nprimes; i++) {
5215 if (primes[i] > narenas) {
5216 parenas = primes[i];
5225 # ifndef MALLOC_BALANCE
5230 /* Allocate and initialize arenas. */
5231 arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
5232 if (arenas == NULL) {
5233 malloc_mutex_unlock(&init_lock);
5237 * Zero the array. In practice, this should always be pre-zeroed,
5238 * since it was just mmap()ed, but let's be sure.
5240 memset(arenas, 0, sizeof(arena_t *) * narenas);
5243 * Initialize one arena here. The rest are lazily created in
5244 * choose_arena_hard().
5247 if (arenas[0] == NULL) {
5248 malloc_mutex_unlock(&init_lock);
5253 * Assign the initial arena to the initial thread, in order to avoid
5254 * spurious creation of an extra arena if the application switches to
5257 arenas_map = arenas[0];
5260 * Seed here for the initial thread, since choose_arena_hard() is only
5261 * called for other threads. The seed value doesn't really matter.
5263 #ifdef MALLOC_BALANCE
5267 malloc_spin_init(&arenas_lock);
5269 malloc_initialized = true;
5270 malloc_mutex_unlock(&init_lock);
5275 * End general internal functions.
5277 /******************************************************************************/
5279 * Begin malloc(3)-compatible functions.
5287 if (malloc_init()) {
5293 if (opt_sysv == false)
5301 ret = imalloc(size);
5306 _malloc_message(_getprogname(),
5307 ": (malloc) Error in malloc(): out of memory\n", "",
5314 UTRACE(0, size, ret);
5319 posix_memalign(void **memptr, size_t alignment, size_t size)
5327 /* Make sure that alignment is a large enough power of 2. */
5328 if (((alignment - 1) & alignment) != 0
5329 || alignment < sizeof(void *)) {
5331 _malloc_message(_getprogname(),
5332 ": (malloc) Error in posix_memalign(): "
5333 "invalid alignment\n", "", "");
5342 if (opt_sysv == false)
5350 result = ipalloc(alignment, size);
5353 if (result == NULL) {
5355 _malloc_message(_getprogname(),
5356 ": (malloc) Error in posix_memalign(): out of memory\n",
5368 UTRACE(0, size, result);
5373 calloc(size_t num, size_t size)
5378 if (malloc_init()) {
5384 num_size = num * size;
5385 if (num_size == 0) {
5386 if ((opt_sysv == false) && ((num == 0) || (size == 0)))
5393 * Try to avoid division here. We know that it isn't possible to
5394 * overflow during multiplication if neither operand uses any of the
5395 * most significant half of the bits in a size_t.
5397 } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2)))
5398 && (num_size / size != num)) {
5399 /* size_t overflow. */
5404 ret = icalloc(num_size);
5409 _malloc_message(_getprogname(),
5410 ": (malloc) Error in calloc(): out of memory\n", "",
5417 UTRACE(0, num_size, ret);
5422 realloc(void *ptr, size_t size)
5427 if (opt_sysv == false)
5438 assert(malloc_initialized);
5440 ret = iralloc(ptr, size);
5444 _malloc_message(_getprogname(),
5445 ": (malloc) Error in realloc(): out of "
5446 "memory\n", "", "");
5455 ret = imalloc(size);
5459 _malloc_message(_getprogname(),
5460 ": (malloc) Error in realloc(): out of "
5461 "memory\n", "", "");
5469 UTRACE(ptr, size, ret);
5479 assert(malloc_initialized);
5486 * End malloc(3)-compatible functions.
5488 /******************************************************************************/
5490 * Begin non-standard functions.
5494 malloc_usable_size(const void *ptr)
5497 assert(ptr != NULL);
5499 return (isalloc(ptr));
5503 * End non-standard functions.
5505 /******************************************************************************/
5507 * Begin library-private functions.
5510 /******************************************************************************/
5512 * Begin thread cache.
5516 * We provide an unpublished interface in order to receive notifications from
5517 * the pthreads library whenever a thread exits. This allows us to clean up
5521 _malloc_thread_cleanup(void)
5525 if (mag_rack != NULL) {
5526 assert(mag_rack != (void *)-1);
5527 mag_rack_destroy(mag_rack);
5529 mag_rack = (void *)-1;
5536 * The following functions are used by threading libraries for protection of
5537 * malloc during fork(). These functions are only called if the program is
5538 * running in threaded mode, so there is no need to check whether the program
5543 _malloc_prefork(void)
5547 arena_t *larenas[narenas], *tarenas[narenas];
5549 /* Acquire all mutexes in a safe order. */
5552 * arenas_lock must be acquired after all of the arena mutexes, in
5553 * order to avoid potential deadlock with arena_lock_balance[_hard]().
5554 * Since arenas_lock protects the arenas array, the following code has
5555 * to race with arenas_extend() callers until it succeeds in locking
5556 * all arenas before locking arenas_lock.
5558 memset(larenas, 0, sizeof(arena_t *) * narenas);
5562 malloc_spin_lock(&arenas_lock);
5563 for (i = 0; i < narenas; i++) {
5564 if (arenas[i] != larenas[i]) {
5565 memcpy(tarenas, arenas, sizeof(arena_t *) *
5567 malloc_spin_unlock(&arenas_lock);
5568 for (j = 0; j < narenas; j++) {
5569 if (larenas[j] != tarenas[j]) {
5570 larenas[j] = tarenas[j];
5581 malloc_mutex_lock(&base_mtx);
5583 malloc_mutex_lock(&huge_mtx);
5586 malloc_mutex_lock(&dss_mtx);
5591 _malloc_postfork(void)
5594 arena_t *larenas[narenas];
5596 /* Release all mutexes, now that fork() has completed. */
5599 malloc_mutex_unlock(&dss_mtx);
5602 malloc_mutex_unlock(&huge_mtx);
5604 malloc_mutex_unlock(&base_mtx);
5606 memcpy(larenas, arenas, sizeof(arena_t *) * narenas);
5607 malloc_spin_unlock(&arenas_lock);
5608 for (i = 0; i < narenas; i++) {
5609 if (larenas[i] != NULL)
5610 malloc_spin_unlock(&larenas[i]->lock);
5615 * End library-private functions.
5617 /******************************************************************************/