2 * Copyright (C) 2006-2008 Jason Evans <jasone@FreeBSD.org>.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice(s), this list of conditions and the following disclaimer as
10 * the first lines of this file unmodified other than the possible
11 * addition of one or more copyright notices.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice(s), this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *******************************************************************************
31 * This allocator implementation is designed to provide scalable performance
32 * for multi-threaded programs on multi-processor systems. The following
33 * features are included for this purpose:
35 * + Multiple arenas are used if there are multiple CPUs, which reduces lock
36 * contention and cache sloshing.
38 * + Thread-specific caching is used if there are multiple threads, which
39 * reduces the amount of locking.
41 * + Cache line sharing between arenas is avoided for internal data
44 * + Memory is managed in chunks and runs (chunks can be split into runs),
45 * rather than as individual pages. This provides a constant-time
46 * mechanism for associating allocations with particular arenas.
48 * Allocation requests are rounded up to the nearest size class, and no record
49 * of the original request size is maintained. Allocations are broken into
50 * categories according to size class. Assuming runtime defaults, 4 kB pages
51 * and a 16 byte quantum on a 32-bit system, the size classes in each category
54 * |=======================================|
55 * | Category | Subcategory | Size |
56 * |=======================================|
57 * | Small | Tiny | 2 |
60 * | |------------------+---------|
61 * | | Quantum-spaced | 16 |
68 * | |------------------+---------|
69 * | | Cacheline-spaced | 192 |
75 * | |------------------+---------|
76 * | | Sub-page | 760 |
83 * |=======================================|
91 * |=======================================|
96 * |=======================================|
98 * A different mechanism is used for each category:
100 * Small : Each size class is segregated into its own set of runs. Each run
101 * maintains a bitmap of which regions are free/allocated.
103 * Large : Each allocation is backed by a dedicated run. Metadata are stored
104 * in the associated arena chunk header maps.
106 * Huge : Each allocation is backed by a dedicated contiguous set of chunks.
107 * Metadata are stored in a separate red-black tree.
109 *******************************************************************************
113 * MALLOC_PRODUCTION disables assertions and statistics gathering. It also
114 * defaults the A and J runtime options to off. These settings are appropriate
115 * for production systems.
117 #define MALLOC_PRODUCTION
119 #ifndef MALLOC_PRODUCTION
121 * MALLOC_DEBUG enables assertions and other sanity checks, and disables
124 # define MALLOC_DEBUG
126 /* MALLOC_STATS enables statistics calculation. */
127 # define MALLOC_STATS
131 * MALLOC_TINY enables support for tiny objects, which are smaller than one
137 * MALLOC_MAG enables a magazine-based thread-specific caching layer for small
138 * objects. This makes it possible to allocate/deallocate objects without any
139 * locking when the cache is in the steady state.
144 * MALLOC_BALANCE enables monitoring of arena lock contention and dynamically
145 * re-balances arena load if exponentially averaged contention exceeds a
148 #define MALLOC_BALANCE
151 * MALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage
152 * segment (DSS). In an ideal world, this functionality would be completely
153 * unnecessary, but we are burdened by history and the lack of resource limits
154 * for anonymous mapped memory.
158 #include <sys/cdefs.h>
159 __FBSDID("$FreeBSD$");
161 #include "libc_private.h"
165 #include "spinlock.h"
166 #include "namespace.h"
167 #include <sys/mman.h>
168 #include <sys/param.h>
169 #include <sys/stddef.h>
170 #include <sys/time.h>
171 #include <sys/types.h>
172 #include <sys/sysctl.h>
174 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
176 #include <machine/cpufunc.h>
177 #include <machine/param.h>
178 #include <machine/vmparam.h>
193 #include "un-namespace.h"
209 /* Disable inlining to make debugging easier. */
213 /* Size of stack-allocated buffer passed to strerror_r(). */
214 #define STRERROR_BUF 64
217 * Minimum alignment of allocations is 2^QUANTUM_2POW bytes.
220 # define QUANTUM_2POW 4
221 # define SIZEOF_PTR_2POW 2
222 # define CPU_SPINWAIT __asm__ volatile("pause")
225 # define QUANTUM_2POW 4
226 # define SIZEOF_PTR_2POW 3
229 # define QUANTUM_2POW 4
230 # define SIZEOF_PTR_2POW 3
234 # define QUANTUM_2POW 4
235 # define SIZEOF_PTR_2POW 3
239 # define QUANTUM_2POW 4
240 # define SIZEOF_PTR_2POW 3
241 # define CPU_SPINWAIT __asm__ volatile("pause")
244 # define QUANTUM_2POW 3
245 # define SIZEOF_PTR_2POW 2
249 # define QUANTUM_2POW 3
250 # define SIZEOF_PTR_2POW 2
254 # define QUANTUM_2POW 4
255 # define SIZEOF_PTR_2POW 2
258 #define QUANTUM ((size_t)(1U << QUANTUM_2POW))
259 #define QUANTUM_MASK (QUANTUM - 1)
261 #define SIZEOF_PTR (1U << SIZEOF_PTR_2POW)
263 /* sizeof(int) == (1U << SIZEOF_INT_2POW). */
264 #ifndef SIZEOF_INT_2POW
265 # define SIZEOF_INT_2POW 2
268 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
269 #if (!defined(PIC) && !defined(NO_TLS))
274 /* MALLOC_MAG requires TLS. */
278 /* MALLOC_BALANCE requires TLS. */
279 # ifdef MALLOC_BALANCE
280 # undef MALLOC_BALANCE
285 * Size and alignment of memory chunks that are allocated by the OS's virtual
288 #define CHUNK_2POW_DEFAULT 20
290 /* Maximum number of dirty pages per arena. */
291 #define DIRTY_MAX_DEFAULT (1U << 9)
294 * Maximum size of L1 cache line. This is used to avoid cache line aliasing.
295 * In addition, this controls the spacing of cacheline-spaced size classes.
297 #define CACHELINE_2POW 6
298 #define CACHELINE ((size_t)(1U << CACHELINE_2POW))
299 #define CACHELINE_MASK (CACHELINE - 1)
302 * Subpages are an artificially designated partitioning of pages. Their only
303 * purpose is to support subpage-spaced size classes.
305 * There must be at least 4 subpages per page, due to the way size classes are
308 #define SUBPAGE_2POW 8
309 #define SUBPAGE ((size_t)(1U << SUBPAGE_2POW))
310 #define SUBPAGE_MASK (SUBPAGE - 1)
313 /* Smallest size class to support. */
314 # define TINY_MIN_2POW 1
318 * Maximum size class that is a multiple of the quantum, but not (necessarily)
319 * a power of 2. Above this size, allocations are rounded up to the nearest
322 #define QSPACE_MAX_2POW_DEFAULT 7
325 * Maximum size class that is a multiple of the cacheline, but not (necessarily)
326 * a power of 2. Above this size, allocations are rounded up to the nearest
329 #define CSPACE_MAX_2POW_DEFAULT 9
332 * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized
333 * as small as possible such that this setting is still honored, without
334 * violating other constraints. The goal is to make runs as small as possible
335 * without exceeding a per run external fragmentation threshold.
337 * We use binary fixed point math for overhead computations, where the binary
338 * point is implicitly RUN_BFP bits to the left.
340 * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
341 * honored for some/all object sizes, since there is one bit of header overhead
342 * per object (plus a constant). This constraint is relaxed (ignored) for runs
343 * that are so small that the per-region overhead is greater than:
345 * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
348 /* \/ Implicit binary fixed point. */
349 #define RUN_MAX_OVRHD 0x0000003dU
350 #define RUN_MAX_OVRHD_RELAX 0x00001800U
352 /* Put a cap on small object run size. This overrides RUN_MAX_OVRHD. */
353 #define RUN_MAX_SMALL (12 * PAGE_SIZE)
356 * Hyper-threaded CPUs may need a special instruction inside spin loops in
357 * order to yield to another virtual CPU. If no such instruction is defined
358 * above, make CPU_SPINWAIT a no-op.
361 # define CPU_SPINWAIT
365 * Adaptive spinning must eventually switch to blocking, in order to avoid the
366 * potential for priority inversion deadlock. Backing off past a certain point
367 * can actually waste time.
369 #define SPIN_LIMIT_2POW 11
372 * Conversion from spinning to blocking is expensive; we use (1U <<
373 * BLOCK_COST_2POW) to estimate how many more times costly blocking is than
374 * worst-case spinning.
376 #define BLOCK_COST_2POW 4
380 * Default magazine size, in bytes. max_rounds is calculated to make
381 * optimal use of the space, leaving just enough room for the magazine
384 # define MAG_SIZE_2POW_DEFAULT 9
387 #ifdef MALLOC_BALANCE
389 * We use an exponential moving average to track recent lock contention,
390 * where the size of the history window is N, and alpha=2/(N+1).
392 * Due to integer math rounding, very small values here can cause
393 * substantial degradation in accuracy, thus making the moving average decay
394 * faster than it would with precise calculation.
396 # define BALANCE_ALPHA_INV_2POW 9
399 * Threshold value for the exponential moving contention average at which to
400 * re-assign a thread.
402 # define BALANCE_THRESHOLD_DEFAULT (1U << (SPIN_LIMIT_2POW-4))
405 /******************************************************************************/
408 * Mutexes based on spinlocks. We can't use normal pthread spinlocks in all
409 * places, because they require malloc()ed memory, which causes bootstrapping
410 * issues in some cases.
416 /* Set to true once the allocator has been initialized. */
417 static bool malloc_initialized = false;
419 /* Used to avoid initialization races. */
420 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
422 /******************************************************************************/
424 * Statistics data structures.
429 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
430 struct malloc_bin_stats_s {
432 * Number of allocation requests that corresponded to the size of this
438 /* Number of magazine reloads from this bin. */
442 /* Total number of runs created for this bin's size class. */
446 * Total number of runs reused by extracting them from the runs tree for
447 * this bin's size class.
451 /* High-water mark for this bin. */
452 unsigned long highruns;
454 /* Current number of runs in this bin. */
455 unsigned long curruns;
458 typedef struct arena_stats_s arena_stats_t;
459 struct arena_stats_s {
460 /* Number of bytes currently mapped. */
464 * Total number of purge sweeps, total number of madvise calls made,
465 * and total pages purged in order to keep dirty unused memory under
472 /* Per-size-category statistics. */
473 size_t allocated_small;
474 uint64_t nmalloc_small;
475 uint64_t ndalloc_small;
477 size_t allocated_large;
478 uint64_t nmalloc_large;
479 uint64_t ndalloc_large;
481 #ifdef MALLOC_BALANCE
482 /* Number of times this arena reassigned a thread due to contention. */
487 typedef struct chunk_stats_s chunk_stats_t;
488 struct chunk_stats_s {
489 /* Number of chunks that were allocated. */
492 /* High-water mark for number of chunks allocated. */
493 unsigned long highchunks;
496 * Current number of chunks allocated. This value isn't maintained for
497 * any other purpose, so keep track of it in order to be able to set
500 unsigned long curchunks;
503 #endif /* #ifdef MALLOC_STATS */
505 /******************************************************************************/
507 * Extent data structures.
510 /* Tree of extents. */
511 typedef struct extent_node_s extent_node_t;
512 struct extent_node_s {
514 /* Linkage for the size/address-ordered tree. */
515 rb_node(extent_node_t) link_szad;
518 /* Linkage for the address-ordered tree. */
519 rb_node(extent_node_t) link_ad;
521 /* Pointer to the extent that this tree node is responsible for. */
524 /* Total region size. */
527 typedef rb_tree(extent_node_t) extent_tree_t;
529 /******************************************************************************/
531 * Arena data structures.
534 typedef struct arena_s arena_t;
535 typedef struct arena_bin_s arena_bin_t;
537 /* Each element of the chunk map corresponds to one page within the chunk. */
538 typedef struct arena_chunk_map_s arena_chunk_map_t;
539 struct arena_chunk_map_s {
541 * Linkage for run trees. There are two disjoint uses:
543 * 1) arena_t's runs_avail tree.
544 * 2) arena_run_t conceptually uses this linkage for in-use non-full
545 * runs, rather than directly embedding linkage.
547 rb_node(arena_chunk_map_t) link;
550 * Run address (or size) and various flags are stored together. The bit
551 * layout looks like (assuming 32-bit system):
553 * ???????? ???????? ????---- ---kdzla
555 * ? : Unallocated: Run address for first/last pages, unset for internal
557 * Small: Run address.
558 * Large: Run size for first page, unset for trailing pages.
566 * Following are example bit patterns for the three types of runs.
575 * ssssssss ssssssss ssss---- --------
576 * xxxxxxxx xxxxxxxx xxxx---- ----d---
577 * ssssssss ssssssss ssss---- -----z--
580 * rrrrrrrr rrrrrrrr rrrr---- -------a
581 * rrrrrrrr rrrrrrrr rrrr---- -------a
582 * rrrrrrrr rrrrrrrr rrrr---- -------a
585 * ssssssss ssssssss ssss---- ------la
586 * -------- -------- -------- ------la
587 * -------- -------- -------- ------la
590 #define CHUNK_MAP_KEY ((size_t)0x10U)
591 #define CHUNK_MAP_DIRTY ((size_t)0x08U)
592 #define CHUNK_MAP_ZEROED ((size_t)0x04U)
593 #define CHUNK_MAP_LARGE ((size_t)0x02U)
594 #define CHUNK_MAP_ALLOCATED ((size_t)0x01U)
596 typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
597 typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
599 /* Arena chunk header. */
600 typedef struct arena_chunk_s arena_chunk_t;
601 struct arena_chunk_s {
602 /* Arena that owns the chunk. */
605 /* Linkage for the arena's chunks_dirty tree. */
606 rb_node(arena_chunk_t) link_dirty;
608 /* Number of dirty pages. */
611 /* Map of pages within chunk that keeps track of free/large/small. */
612 arena_chunk_map_t map[1]; /* Dynamically sized. */
614 typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
616 typedef struct arena_run_s arena_run_t;
620 # define ARENA_RUN_MAGIC 0x384adf93
623 /* Bin this run is associated with. */
626 /* Index of first element that might have a free region. */
627 unsigned regs_minelm;
629 /* Number of free regions in run. */
632 /* Bitmask of in-use regions (0: in use, 1: free). */
633 unsigned regs_mask[1]; /* Dynamically sized. */
638 * Current run being used to service allocations of this bin's size
644 * Tree of non-full runs. This tree is used when looking for an
645 * existing run when runcur is no longer usable. We choose the
646 * non-full run that is lowest in memory; this policy tends to keep
647 * objects packed well, and it can also help reduce the number of
648 * almost-empty chunks.
650 arena_run_tree_t runs;
652 /* Size of regions in a run for this bin's size class. */
655 /* Total size of a run for this bin's size class. */
658 /* Total number of regions in a run for this bin's size class. */
661 /* Number of elements in a run's regs_mask for this bin's size class. */
662 uint32_t regs_mask_nelms;
664 /* Offset of first region in a run for this bin's size class. */
665 uint32_t reg0_offset;
668 /* Bin statistics. */
669 malloc_bin_stats_t stats;
676 # define ARENA_MAGIC 0x947d3d24
679 /* All operations on this arena require that lock be locked. */
680 pthread_mutex_t lock;
686 /* Tree of dirty-page-containing chunks this arena manages. */
687 arena_chunk_tree_t chunks_dirty;
690 * In order to avoid rapid chunk allocation/deallocation when an arena
691 * oscillates right on the cusp of needing a new chunk, cache the most
692 * recently freed chunk. The spare is left in the arena's chunk trees
693 * until it is deleted.
695 * There is one spare chunk per arena, rather than one spare total, in
696 * order to avoid interactions between multiple threads that could make
697 * a single spare inadequate.
699 arena_chunk_t *spare;
702 * Current count of pages within unused runs that are potentially
703 * dirty, and for which madvise(... MADV_FREE) has not been called. By
704 * tracking this, we can institute a limit on how much dirty unused
705 * memory is mapped for each arena.
710 * Size/address-ordered tree of this arena's available runs. This tree
711 * is used for first-best-fit run allocation.
713 arena_avail_tree_t runs_avail;
715 #ifdef MALLOC_BALANCE
717 * The arena load balancing machinery needs to keep track of how much
718 * lock contention there is. This value is exponentially averaged.
724 * bins is used to store rings of free regions of the following sizes,
725 * assuming a 16-byte quantum, 4kB page size, and default
747 arena_bin_t bins[1]; /* Dynamically sized. */
750 /******************************************************************************/
752 * Magazine data structures.
756 typedef struct mag_s mag_t;
758 size_t binind; /* Index of associated bin. */
760 void *rounds[1]; /* Dynamically sized. */
764 * Magazines are lazily allocated, but once created, they remain until the
765 * associated mag_rack is destroyed.
767 typedef struct bin_mags_s bin_mags_t;
773 typedef struct mag_rack_s mag_rack_t;
775 bin_mags_t bin_mags[1]; /* Dynamically sized. */
779 /******************************************************************************/
784 /* Number of CPUs. */
785 static unsigned ncpus;
787 /* Various bin-related settings. */
788 #ifdef MALLOC_TINY /* Number of (2^n)-spaced tiny bins. */
789 # define ntbins ((unsigned)(QUANTUM_2POW - TINY_MIN_2POW))
793 static unsigned nqbins; /* Number of quantum-spaced bins. */
794 static unsigned ncbins; /* Number of cacheline-spaced bins. */
795 static unsigned nsbins; /* Number of subpage-spaced bins. */
796 static unsigned nbins;
798 # define tspace_max ((size_t)(QUANTUM >> 1))
800 #define qspace_min QUANTUM
801 static size_t qspace_max;
802 static size_t cspace_min;
803 static size_t cspace_max;
804 static size_t sspace_min;
805 static size_t sspace_max;
806 #define bin_maxclass sspace_max
808 static uint8_t const *size2bin;
810 * const_size2bin is a static constant lookup table that in the common case can
811 * be used as-is for size2bin. For dynamically linked programs, this avoids
812 * a page of memory overhead per process.
815 #define S2B_2(i) S2B_1(i) S2B_1(i)
816 #define S2B_4(i) S2B_2(i) S2B_2(i)
817 #define S2B_8(i) S2B_4(i) S2B_4(i)
818 #define S2B_16(i) S2B_8(i) S2B_8(i)
819 #define S2B_32(i) S2B_16(i) S2B_16(i)
820 #define S2B_64(i) S2B_32(i) S2B_32(i)
821 #define S2B_128(i) S2B_64(i) S2B_64(i)
822 #define S2B_256(i) S2B_128(i) S2B_128(i)
823 static const uint8_t const_size2bin[PAGE_SIZE - 255] = {
825 #if (QUANTUM_2POW == 4)
826 /* 64-bit system ************************/
837 S2B_16(S2B_QMIN + 1) /* 32 */
838 S2B_16(S2B_QMIN + 2) /* 48 */
839 S2B_16(S2B_QMIN + 3) /* 64 */
840 S2B_16(S2B_QMIN + 4) /* 80 */
841 S2B_16(S2B_QMIN + 5) /* 96 */
842 S2B_16(S2B_QMIN + 6) /* 112 */
843 S2B_16(S2B_QMIN + 7) /* 128 */
844 # define S2B_CMIN (S2B_QMIN + 8)
846 /* 32-bit system ************************/
856 S2B_8(S2B_QMIN + 1) /* 16 */
857 S2B_8(S2B_QMIN + 2) /* 24 */
858 S2B_8(S2B_QMIN + 3) /* 32 */
859 S2B_8(S2B_QMIN + 4) /* 40 */
860 S2B_8(S2B_QMIN + 5) /* 48 */
861 S2B_8(S2B_QMIN + 6) /* 56 */
862 S2B_8(S2B_QMIN + 7) /* 64 */
863 S2B_8(S2B_QMIN + 8) /* 72 */
864 S2B_8(S2B_QMIN + 9) /* 80 */
865 S2B_8(S2B_QMIN + 10) /* 88 */
866 S2B_8(S2B_QMIN + 11) /* 96 */
867 S2B_8(S2B_QMIN + 12) /* 104 */
868 S2B_8(S2B_QMIN + 13) /* 112 */
869 S2B_8(S2B_QMIN + 14) /* 120 */
870 S2B_8(S2B_QMIN + 15) /* 128 */
871 # define S2B_CMIN (S2B_QMIN + 16)
873 /****************************************/
874 S2B_64(S2B_CMIN + 0) /* 192 */
875 S2B_64(S2B_CMIN + 1) /* 256 */
876 S2B_64(S2B_CMIN + 2) /* 320 */
877 S2B_64(S2B_CMIN + 3) /* 384 */
878 S2B_64(S2B_CMIN + 4) /* 448 */
879 S2B_64(S2B_CMIN + 5) /* 512 */
880 # define S2B_SMIN (S2B_CMIN + 6)
881 S2B_256(S2B_SMIN + 0) /* 768 */
882 S2B_256(S2B_SMIN + 1) /* 1024 */
883 S2B_256(S2B_SMIN + 2) /* 1280 */
884 S2B_256(S2B_SMIN + 3) /* 1536 */
885 S2B_256(S2B_SMIN + 4) /* 1792 */
886 S2B_256(S2B_SMIN + 5) /* 2048 */
887 S2B_256(S2B_SMIN + 6) /* 2304 */
888 S2B_256(S2B_SMIN + 7) /* 2560 */
889 S2B_256(S2B_SMIN + 8) /* 2816 */
890 S2B_256(S2B_SMIN + 9) /* 3072 */
891 S2B_256(S2B_SMIN + 10) /* 3328 */
892 S2B_256(S2B_SMIN + 11) /* 3584 */
893 S2B_256(S2B_SMIN + 12) /* 3840 */
894 #if (PAGE_SHIFT == 13)
895 S2B_256(S2B_SMIN + 13) /* 4096 */
896 S2B_256(S2B_SMIN + 14) /* 4352 */
897 S2B_256(S2B_SMIN + 15) /* 4608 */
898 S2B_256(S2B_SMIN + 16) /* 4864 */
899 S2B_256(S2B_SMIN + 17) /* 5120 */
900 S2B_256(S2B_SMIN + 18) /* 5376 */
901 S2B_256(S2B_SMIN + 19) /* 5632 */
902 S2B_256(S2B_SMIN + 20) /* 5888 */
903 S2B_256(S2B_SMIN + 21) /* 6144 */
904 S2B_256(S2B_SMIN + 22) /* 6400 */
905 S2B_256(S2B_SMIN + 23) /* 6656 */
906 S2B_256(S2B_SMIN + 24) /* 6912 */
907 S2B_256(S2B_SMIN + 25) /* 7168 */
908 S2B_256(S2B_SMIN + 26) /* 7424 */
909 S2B_256(S2B_SMIN + 27) /* 7680 */
910 S2B_256(S2B_SMIN + 28) /* 7936 */
927 static size_t max_rounds;
930 /* Various chunk-related settings. */
931 static size_t chunksize;
932 static size_t chunksize_mask; /* (chunksize - 1). */
933 static size_t chunk_npages;
934 static size_t arena_chunk_header_npages;
935 static size_t arena_maxclass; /* Max size class for arenas. */
942 /* Protects chunk-related data structures. */
943 static malloc_mutex_t huge_mtx;
945 /* Tree of chunks that are stand-alone huge allocations. */
946 static extent_tree_t huge;
950 * Protects sbrk() calls. This avoids malloc races among threads, though it
951 * does not protect against races with threads that call sbrk() directly.
953 static malloc_mutex_t dss_mtx;
954 /* Base address of the DSS. */
955 static void *dss_base;
956 /* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
957 static void *dss_prev;
958 /* Current upper limit on DSS addresses. */
959 static void *dss_max;
962 * Trees of chunks that were previously allocated (trees differ only in node
963 * ordering). These are used when allocating chunks, in an attempt to re-use
964 * address space. Depending on function, different tree orderings are needed,
965 * which is why there are two trees with the same contents.
967 static extent_tree_t dss_chunks_szad;
968 static extent_tree_t dss_chunks_ad;
972 /* Huge allocation statistics. */
973 static uint64_t huge_nmalloc;
974 static uint64_t huge_ndalloc;
975 static size_t huge_allocated;
978 /****************************/
980 * base (internal allocation).
984 * Current pages that are being used for internal memory allocations. These
985 * pages are carved up in cacheline-size quanta, so that there is no chance of
986 * false cache line sharing.
988 static void *base_pages;
989 static void *base_next_addr;
990 static void *base_past_addr; /* Addr immediately past base_pages. */
991 static extent_node_t *base_nodes;
992 static malloc_mutex_t base_mtx;
994 static size_t base_mapped;
1003 * Arenas that are used to service external requests. Not all elements of the
1004 * arenas array are necessarily used; arenas are created lazily as needed.
1006 static arena_t **arenas;
1007 static unsigned narenas;
1009 # ifdef MALLOC_BALANCE
1010 static unsigned narenas_2pow;
1012 static unsigned next_arena;
1015 static pthread_mutex_t arenas_lock; /* Protects arenas initialization. */
1019 * Map of pthread_self() --> arenas[???], used for selecting an arena to use
1022 static __thread arena_t *arenas_map;
1027 * Map of thread-specific magazine racks, used for thread-specific object
1030 static __thread mag_rack_t *mag_rack;
1034 /* Chunk statistics. */
1035 static chunk_stats_t stats_chunks;
1038 /*******************************/
1040 * Runtime configuration options.
1042 const char *_malloc_options;
1044 #ifndef MALLOC_PRODUCTION
1045 static bool opt_abort = true;
1046 static bool opt_junk = true;
1048 static bool opt_abort = false;
1049 static bool opt_junk = false;
1052 static bool opt_dss = true;
1053 static bool opt_mmap = true;
1056 static bool opt_mag = true;
1057 static size_t opt_mag_size_2pow = MAG_SIZE_2POW_DEFAULT;
1059 static size_t opt_dirty_max = DIRTY_MAX_DEFAULT;
1060 #ifdef MALLOC_BALANCE
1061 static uint64_t opt_balance_threshold = BALANCE_THRESHOLD_DEFAULT;
1063 static bool opt_print_stats = false;
1064 static size_t opt_qspace_max_2pow = QSPACE_MAX_2POW_DEFAULT;
1065 static size_t opt_cspace_max_2pow = CSPACE_MAX_2POW_DEFAULT;
1066 static size_t opt_chunk_2pow = CHUNK_2POW_DEFAULT;
1067 static bool opt_utrace = false;
1068 static bool opt_sysv = false;
1069 static bool opt_xmalloc = false;
1070 static bool opt_zero = false;
1071 static int opt_narenas_lshift = 0;
1079 #define UTRACE(a, b, c) \
1081 malloc_utrace_t ut; \
1085 utrace(&ut, sizeof(ut)); \
1088 /******************************************************************************/
1090 * Begin function prototypes for non-inline static functions.
1093 static void malloc_mutex_init(malloc_mutex_t *mutex);
1094 static bool malloc_spin_init(pthread_mutex_t *lock);
1095 static void wrtmessage(const char *p1, const char *p2, const char *p3,
1098 static void malloc_printf(const char *format, ...);
1100 static char *umax2s(uintmax_t x, char *s);
1102 static bool base_pages_alloc_dss(size_t minsize);
1104 static bool base_pages_alloc_mmap(size_t minsize);
1105 static bool base_pages_alloc(size_t minsize);
1106 static void *base_alloc(size_t size);
1107 static void *base_calloc(size_t number, size_t size);
1108 static extent_node_t *base_node_alloc(void);
1109 static void base_node_dealloc(extent_node_t *node);
1111 static void stats_print(arena_t *arena);
1113 static void *pages_map(void *addr, size_t size);
1114 static void pages_unmap(void *addr, size_t size);
1116 static void *chunk_alloc_dss(size_t size);
1117 static void *chunk_recycle_dss(size_t size, bool zero);
1119 static void *chunk_alloc_mmap(size_t size);
1120 static void *chunk_alloc(size_t size, bool zero);
1122 static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size);
1123 static bool chunk_dealloc_dss(void *chunk, size_t size);
1125 static void chunk_dealloc_mmap(void *chunk, size_t size);
1126 static void chunk_dealloc(void *chunk, size_t size);
1128 static arena_t *choose_arena_hard(void);
1130 static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size,
1131 bool large, bool zero);
1132 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
1133 static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
1134 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large,
1136 static void arena_purge(arena_t *arena);
1137 static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty);
1138 static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk,
1139 arena_run_t *run, size_t oldsize, size_t newsize);
1140 static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
1141 arena_run_t *run, size_t oldsize, size_t newsize, bool dirty);
1142 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
1143 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
1144 static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
1145 #ifdef MALLOC_BALANCE
1146 static void arena_lock_balance_hard(arena_t *arena);
1149 static void mag_load(mag_t *mag);
1151 static void *arena_malloc_large(arena_t *arena, size_t size, bool zero);
1152 static void *arena_palloc(arena_t *arena, size_t alignment, size_t size,
1154 static size_t arena_salloc(const void *ptr);
1156 static void mag_unload(mag_t *mag);
1158 static void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk,
1160 static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk,
1161 void *ptr, size_t size, size_t oldsize);
1162 static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk,
1163 void *ptr, size_t size, size_t oldsize);
1164 static bool arena_ralloc_large(void *ptr, size_t size, size_t oldsize);
1165 static void *arena_ralloc(void *ptr, size_t size, size_t oldsize);
1166 static bool arena_new(arena_t *arena);
1167 static arena_t *arenas_extend(unsigned ind);
1169 static mag_t *mag_create(arena_t *arena, size_t binind);
1170 static void mag_destroy(mag_t *mag);
1171 static mag_rack_t *mag_rack_create(arena_t *arena);
1172 static void mag_rack_destroy(mag_rack_t *rack);
1174 static void *huge_malloc(size_t size, bool zero);
1175 static void *huge_palloc(size_t alignment, size_t size);
1176 static void *huge_ralloc(void *ptr, size_t size, size_t oldsize);
1177 static void huge_dalloc(void *ptr);
1178 static void malloc_print_stats(void);
1180 static void size2bin_validate(void);
1182 static bool size2bin_init(void);
1183 static bool size2bin_init_hard(void);
1184 static bool malloc_init_hard(void);
1187 * End function prototypes.
1189 /******************************************************************************/
1191 * Begin mutex. We can't use normal pthread mutexes in all places, because
1192 * they require malloc()ed memory, which causes bootstrapping issues in some
1197 malloc_mutex_init(malloc_mutex_t *mutex)
1199 static const spinlock_t lock = _SPINLOCK_INITIALIZER;
1205 malloc_mutex_lock(malloc_mutex_t *mutex)
1209 _SPINLOCK(&mutex->lock);
1213 malloc_mutex_unlock(malloc_mutex_t *mutex)
1217 _SPINUNLOCK(&mutex->lock);
1223 /******************************************************************************/
1225 * Begin spin lock. Spin locks here are actually adaptive mutexes that block
1226 * after a period of spinning, because unbounded spinning would allow for
1227 * priority inversion.
1231 * We use an unpublished interface to initialize pthread mutexes with an
1232 * allocation callback, in order to avoid infinite recursion.
1234 int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
1235 void *(calloc_cb)(size_t, size_t));
1237 __weak_reference(_pthread_mutex_init_calloc_cb_stub,
1238 _pthread_mutex_init_calloc_cb);
1241 _pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex,
1242 void *(calloc_cb)(size_t, size_t))
1249 malloc_spin_init(pthread_mutex_t *lock)
1252 if (_pthread_mutex_init_calloc_cb(lock, base_calloc) != 0)
1258 static inline unsigned
1259 malloc_spin_lock(pthread_mutex_t *lock)
1264 if (_pthread_mutex_trylock(lock) != 0) {
1265 /* Exponentially back off if there are multiple CPUs. */
1268 volatile unsigned j;
1270 for (i = 1; i <= SPIN_LIMIT_2POW; i++) {
1271 for (j = 0; j < (1U << i); j++) {
1276 if (_pthread_mutex_trylock(lock) == 0)
1282 * Spinning failed. Block until the lock becomes
1283 * available, in order to avoid indefinite priority
1286 _pthread_mutex_lock(lock);
1287 assert((ret << BLOCK_COST_2POW) != 0 || ncpus == 1);
1288 return (ret << BLOCK_COST_2POW);
1296 malloc_spin_unlock(pthread_mutex_t *lock)
1300 _pthread_mutex_unlock(lock);
1306 /******************************************************************************/
1308 * Begin Utility functions/macros.
1311 /* Return the chunk address for allocation address a. */
1312 #define CHUNK_ADDR2BASE(a) \
1313 ((void *)((uintptr_t)(a) & ~chunksize_mask))
1315 /* Return the chunk offset of address a. */
1316 #define CHUNK_ADDR2OFFSET(a) \
1317 ((size_t)((uintptr_t)(a) & chunksize_mask))
1319 /* Return the smallest chunk multiple that is >= s. */
1320 #define CHUNK_CEILING(s) \
1321 (((s) + chunksize_mask) & ~chunksize_mask)
1323 /* Return the smallest quantum multiple that is >= a. */
1324 #define QUANTUM_CEILING(a) \
1325 (((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
1327 /* Return the smallest cacheline multiple that is >= s. */
1328 #define CACHELINE_CEILING(s) \
1329 (((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
1331 /* Return the smallest subpage multiple that is >= s. */
1332 #define SUBPAGE_CEILING(s) \
1333 (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK)
1335 /* Return the smallest PAGE_SIZE multiple that is >= s. */
1336 #define PAGE_CEILING(s) \
1337 (((s) + PAGE_MASK) & ~PAGE_MASK)
1340 /* Compute the smallest power of 2 that is >= x. */
1341 static inline size_t
1351 #if (SIZEOF_PTR == 8)
1359 #ifdef MALLOC_BALANCE
1361 * Use a simple linear congruential pseudo-random number generator:
1363 * prn(y) = (a*x + c) % m
1365 * where the following constants ensure maximal period:
1367 * a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4.
1368 * c == Odd number (relatively prime to 2^n).
1371 * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
1373 * This choice of m has the disadvantage that the quality of the bits is
1374 * proportional to bit position. For example. the lowest bit has a cycle of 2,
1375 * the next has a cycle of 4, etc. For this reason, we prefer to use the upper
1378 # define PRN_DEFINE(suffix, var, a, c) \
1379 static inline void \
1380 sprn_##suffix(uint32_t seed) \
1385 static inline uint32_t \
1386 prn_##suffix(uint32_t lg_range) \
1390 assert(lg_range > 0); \
1391 assert(lg_range <= 32); \
1393 x = (var * (a)) + (c); \
1395 ret = x >> (32 - lg_range); \
1399 # define SPRN(suffix, seed) sprn_##suffix(seed)
1400 # define PRN(suffix, lg_range) prn_##suffix(lg_range)
1403 #ifdef MALLOC_BALANCE
1404 /* Define the PRNG used for arena assignment. */
1405 static __thread uint32_t balance_x;
1406 PRN_DEFINE(balance, balance_x, 1297, 1301)
1410 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
1413 _write(STDERR_FILENO, p1, strlen(p1));
1414 _write(STDERR_FILENO, p2, strlen(p2));
1415 _write(STDERR_FILENO, p3, strlen(p3));
1416 _write(STDERR_FILENO, p4, strlen(p4));
1419 void (*_malloc_message)(const char *p1, const char *p2, const char *p3,
1420 const char *p4) = wrtmessage;
1424 * Print to stderr in such a way as to (hopefully) avoid memory allocation.
1427 malloc_printf(const char *format, ...)
1432 va_start(ap, format);
1433 vsnprintf(buf, sizeof(buf), format, ap);
1435 _malloc_message(buf, "", "", "");
1440 * We don't want to depend on vsnprintf() for production builds, since that can
1441 * cause unnecessary bloat for static binaries. umax2s() provides minimal
1442 * integer printing functionality, so that malloc_printf() use can be limited to
1443 * MALLOC_STATS code.
1445 #define UMAX2S_BUFSIZE 21
1447 umax2s(uintmax_t x, char *s)
1451 /* Make sure UMAX2S_BUFSIZE is large enough. */
1452 assert(sizeof(uintmax_t) <= 8);
1454 i = UMAX2S_BUFSIZE - 1;
1458 s[i] = "0123456789"[x % 10];
1465 /******************************************************************************/
1469 base_pages_alloc_dss(size_t minsize)
1473 * Do special DSS allocation here, since base allocations don't need to
1476 malloc_mutex_lock(&dss_mtx);
1477 if (dss_prev != (void *)-1) {
1479 size_t csize = CHUNK_CEILING(minsize);
1482 /* Get the current end of the DSS. */
1486 * Calculate how much padding is necessary to
1487 * chunk-align the end of the DSS. Don't worry about
1488 * dss_max not being chunk-aligned though.
1490 incr = (intptr_t)chunksize
1491 - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
1493 if ((size_t)incr < minsize)
1496 dss_prev = sbrk(incr);
1497 if (dss_prev == dss_max) {
1499 dss_max = (void *)((intptr_t)dss_prev + incr);
1500 base_pages = dss_prev;
1501 base_next_addr = base_pages;
1502 base_past_addr = dss_max;
1504 base_mapped += incr;
1506 malloc_mutex_unlock(&dss_mtx);
1509 } while (dss_prev != (void *)-1);
1511 malloc_mutex_unlock(&dss_mtx);
1518 base_pages_alloc_mmap(size_t minsize)
1522 assert(minsize != 0);
1523 csize = PAGE_CEILING(minsize);
1524 base_pages = pages_map(NULL, csize);
1525 if (base_pages == NULL)
1527 base_next_addr = base_pages;
1528 base_past_addr = (void *)((uintptr_t)base_pages + csize);
1530 base_mapped += csize;
1537 base_pages_alloc(size_t minsize)
1541 if (opt_mmap && minsize != 0)
1544 if (base_pages_alloc_mmap(minsize) == false)
1550 if (base_pages_alloc_dss(minsize) == false)
1560 base_alloc(size_t size)
1565 /* Round size up to nearest multiple of the cacheline size. */
1566 csize = CACHELINE_CEILING(size);
1568 malloc_mutex_lock(&base_mtx);
1569 /* Make sure there's enough space for the allocation. */
1570 if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1571 if (base_pages_alloc(csize)) {
1572 malloc_mutex_unlock(&base_mtx);
1577 ret = base_next_addr;
1578 base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1579 malloc_mutex_unlock(&base_mtx);
1585 base_calloc(size_t number, size_t size)
1589 ret = base_alloc(number * size);
1590 memset(ret, 0, number * size);
1595 static extent_node_t *
1596 base_node_alloc(void)
1600 malloc_mutex_lock(&base_mtx);
1601 if (base_nodes != NULL) {
1603 base_nodes = *(extent_node_t **)ret;
1604 malloc_mutex_unlock(&base_mtx);
1606 malloc_mutex_unlock(&base_mtx);
1607 ret = (extent_node_t *)base_alloc(sizeof(extent_node_t));
1614 base_node_dealloc(extent_node_t *node)
1617 malloc_mutex_lock(&base_mtx);
1618 *(extent_node_t **)node = base_nodes;
1620 malloc_mutex_unlock(&base_mtx);
1623 /******************************************************************************/
1627 stats_print(arena_t *arena)
1629 unsigned i, gap_start;
1631 malloc_printf("dirty: %zu page%s dirty, %llu sweep%s,"
1632 " %llu madvise%s, %llu page%s purged\n",
1633 arena->ndirty, arena->ndirty == 1 ? "" : "s",
1634 arena->stats.npurge, arena->stats.npurge == 1 ? "" : "s",
1635 arena->stats.nmadvise, arena->stats.nmadvise == 1 ? "" : "s",
1636 arena->stats.purged, arena->stats.purged == 1 ? "" : "s");
1638 malloc_printf(" allocated nmalloc ndalloc\n");
1639 malloc_printf("small: %12zu %12llu %12llu\n",
1640 arena->stats.allocated_small, arena->stats.nmalloc_small,
1641 arena->stats.ndalloc_small);
1642 malloc_printf("large: %12zu %12llu %12llu\n",
1643 arena->stats.allocated_large, arena->stats.nmalloc_large,
1644 arena->stats.ndalloc_large);
1645 malloc_printf("total: %12zu %12llu %12llu\n",
1646 arena->stats.allocated_small + arena->stats.allocated_large,
1647 arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1648 arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1649 malloc_printf("mapped: %12zu\n", arena->stats.mapped);
1652 if (__isthreaded && opt_mag) {
1653 malloc_printf("bins: bin size regs pgs mags "
1654 "newruns reruns maxruns curruns\n");
1657 malloc_printf("bins: bin size regs pgs requests "
1658 "newruns reruns maxruns curruns\n");
1662 for (i = 0, gap_start = UINT_MAX; i < nbins; i++) {
1663 if (arena->bins[i].stats.nruns == 0) {
1664 if (gap_start == UINT_MAX)
1667 if (gap_start != UINT_MAX) {
1668 if (i > gap_start + 1) {
1669 /* Gap of more than one size class. */
1670 malloc_printf("[%u..%u]\n",
1673 /* Gap of one size class. */
1674 malloc_printf("[%u]\n", gap_start);
1676 gap_start = UINT_MAX;
1679 "%13u %1s %4u %4u %3u %9llu %9llu"
1680 " %9llu %7lu %7lu\n",
1682 i < ntbins ? "T" : i < ntbins + nqbins ? "Q" :
1683 i < ntbins + nqbins + ncbins ? "C" : "S",
1684 arena->bins[i].reg_size,
1685 arena->bins[i].nregs,
1686 arena->bins[i].run_size >> PAGE_SHIFT,
1688 (__isthreaded && opt_mag) ?
1689 arena->bins[i].stats.nmags :
1691 arena->bins[i].stats.nrequests,
1692 arena->bins[i].stats.nruns,
1693 arena->bins[i].stats.reruns,
1694 arena->bins[i].stats.highruns,
1695 arena->bins[i].stats.curruns);
1698 if (gap_start != UINT_MAX) {
1699 if (i > gap_start + 1) {
1700 /* Gap of more than one size class. */
1701 malloc_printf("[%u..%u]\n", gap_start, i - 1);
1703 /* Gap of one size class. */
1704 malloc_printf("[%u]\n", gap_start);
1711 * End Utility functions/macros.
1713 /******************************************************************************/
1715 * Begin extent tree code.
1720 extent_szad_comp(extent_node_t *a, extent_node_t *b)
1723 size_t a_size = a->size;
1724 size_t b_size = b->size;
1726 ret = (a_size > b_size) - (a_size < b_size);
1728 uintptr_t a_addr = (uintptr_t)a->addr;
1729 uintptr_t b_addr = (uintptr_t)b->addr;
1731 ret = (a_addr > b_addr) - (a_addr < b_addr);
1737 /* Wrap red-black tree macros in functions. */
1738 rb_wrap(__unused static, extent_tree_szad_, extent_tree_t, extent_node_t,
1739 link_szad, extent_szad_comp)
1743 extent_ad_comp(extent_node_t *a, extent_node_t *b)
1745 uintptr_t a_addr = (uintptr_t)a->addr;
1746 uintptr_t b_addr = (uintptr_t)b->addr;
1748 return ((a_addr > b_addr) - (a_addr < b_addr));
1751 /* Wrap red-black tree macros in functions. */
1752 rb_wrap(__unused static, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad,
1756 * End extent tree code.
1758 /******************************************************************************/
1760 * Begin chunk management functions.
1764 pages_map(void *addr, size_t size)
1769 * We don't use MAP_FIXED here, because it can cause the *replacement*
1770 * of existing mappings, and we only want to create new mappings.
1772 ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
1774 assert(ret != NULL);
1776 if (ret == MAP_FAILED)
1778 else if (addr != NULL && ret != addr) {
1780 * We succeeded in mapping memory, but not in the right place.
1782 if (munmap(ret, size) == -1) {
1783 char buf[STRERROR_BUF];
1785 strerror_r(errno, buf, sizeof(buf));
1786 _malloc_message(_getprogname(),
1787 ": (malloc) Error in munmap(): ", buf, "\n");
1794 assert(ret == NULL || (addr == NULL && ret != addr)
1795 || (addr != NULL && ret == addr));
1800 pages_unmap(void *addr, size_t size)
1803 if (munmap(addr, size) == -1) {
1804 char buf[STRERROR_BUF];
1806 strerror_r(errno, buf, sizeof(buf));
1807 _malloc_message(_getprogname(),
1808 ": (malloc) Error in munmap(): ", buf, "\n");
1816 chunk_alloc_dss(size_t size)
1820 * sbrk() uses a signed increment argument, so take care not to
1821 * interpret a huge allocation request as a negative increment.
1823 if ((intptr_t)size < 0)
1826 malloc_mutex_lock(&dss_mtx);
1827 if (dss_prev != (void *)-1) {
1831 * The loop is necessary to recover from races with other
1832 * threads that are using the DSS for something other than
1838 /* Get the current end of the DSS. */
1842 * Calculate how much padding is necessary to
1843 * chunk-align the end of the DSS.
1845 incr = (intptr_t)size
1846 - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
1847 if (incr == (intptr_t)size)
1850 ret = (void *)((intptr_t)dss_max + incr);
1854 dss_prev = sbrk(incr);
1855 if (dss_prev == dss_max) {
1857 dss_max = (void *)((intptr_t)dss_prev + incr);
1858 malloc_mutex_unlock(&dss_mtx);
1861 } while (dss_prev != (void *)-1);
1863 malloc_mutex_unlock(&dss_mtx);
1869 chunk_recycle_dss(size_t size, bool zero)
1871 extent_node_t *node, key;
1875 malloc_mutex_lock(&dss_mtx);
1876 node = extent_tree_szad_nsearch(&dss_chunks_szad, &key);
1878 void *ret = node->addr;
1880 /* Remove node from the tree. */
1881 extent_tree_szad_remove(&dss_chunks_szad, node);
1882 if (node->size == size) {
1883 extent_tree_ad_remove(&dss_chunks_ad, node);
1884 base_node_dealloc(node);
1887 * Insert the remainder of node's address range as a
1888 * smaller chunk. Its position within dss_chunks_ad
1891 assert(node->size > size);
1892 node->addr = (void *)((uintptr_t)node->addr + size);
1894 extent_tree_szad_insert(&dss_chunks_szad, node);
1896 malloc_mutex_unlock(&dss_mtx);
1899 memset(ret, 0, size);
1902 malloc_mutex_unlock(&dss_mtx);
1909 chunk_alloc_mmap(size_t size)
1915 * Ideally, there would be a way to specify alignment to mmap() (like
1916 * NetBSD has), but in the absence of such a feature, we have to work
1917 * hard to efficiently create aligned mappings. The reliable, but
1918 * expensive method is to create a mapping that is over-sized, then
1919 * trim the excess. However, that always results in at least one call
1922 * A more optimistic approach is to try mapping precisely the right
1923 * amount, then try to append another mapping if alignment is off. In
1924 * practice, this works out well as long as the application is not
1925 * interleaving mappings via direct mmap() calls. If we do run into a
1926 * situation where there is an interleaved mapping and we are unable to
1927 * extend an unaligned mapping, our best option is to momentarily
1928 * revert to the reliable-but-expensive method. This will tend to
1929 * leave a gap in the memory map that is too small to cause later
1930 * problems for the optimistic method.
1933 ret = pages_map(NULL, size);
1937 offset = CHUNK_ADDR2OFFSET(ret);
1939 /* Try to extend chunk boundary. */
1940 if (pages_map((void *)((uintptr_t)ret + size),
1941 chunksize - offset) == NULL) {
1943 * Extension failed. Clean up, then revert to the
1944 * reliable-but-expensive method.
1946 pages_unmap(ret, size);
1948 /* Beware size_t wrap-around. */
1949 if (size + chunksize <= size)
1952 ret = pages_map(NULL, size + chunksize);
1956 /* Clean up unneeded leading/trailing space. */
1957 offset = CHUNK_ADDR2OFFSET(ret);
1959 /* Leading space. */
1960 pages_unmap(ret, chunksize - offset);
1962 ret = (void *)((uintptr_t)ret +
1963 (chunksize - offset));
1965 /* Trailing space. */
1966 pages_unmap((void *)((uintptr_t)ret + size),
1969 /* Trailing space only. */
1970 pages_unmap((void *)((uintptr_t)ret + size),
1974 /* Clean up unneeded leading space. */
1975 pages_unmap(ret, chunksize - offset);
1976 ret = (void *)((uintptr_t)ret + (chunksize - offset));
1984 chunk_alloc(size_t size, bool zero)
1989 assert((size & chunksize_mask) == 0);
1995 ret = chunk_alloc_mmap(size);
2002 ret = chunk_recycle_dss(size, zero);
2007 ret = chunk_alloc_dss(size);
2013 /* All strategies for allocation failed. */
2018 stats_chunks.nchunks += (size / chunksize);
2019 stats_chunks.curchunks += (size / chunksize);
2021 if (stats_chunks.curchunks > stats_chunks.highchunks)
2022 stats_chunks.highchunks = stats_chunks.curchunks;
2025 assert(CHUNK_ADDR2BASE(ret) == ret);
2030 static extent_node_t *
2031 chunk_dealloc_dss_record(void *chunk, size_t size)
2033 extent_node_t *node, *prev, key;
2035 key.addr = (void *)((uintptr_t)chunk + size);
2036 node = extent_tree_ad_nsearch(&dss_chunks_ad, &key);
2037 /* Try to coalesce forward. */
2038 if (node != NULL && node->addr == key.addr) {
2040 * Coalesce chunk with the following address range. This does
2041 * not change the position within dss_chunks_ad, so only
2042 * remove/insert from/into dss_chunks_szad.
2044 extent_tree_szad_remove(&dss_chunks_szad, node);
2047 extent_tree_szad_insert(&dss_chunks_szad, node);
2050 * Coalescing forward failed, so insert a new node. Drop
2051 * dss_mtx during node allocation, since it is possible that a
2052 * new base chunk will be allocated.
2054 malloc_mutex_unlock(&dss_mtx);
2055 node = base_node_alloc();
2056 malloc_mutex_lock(&dss_mtx);
2061 extent_tree_ad_insert(&dss_chunks_ad, node);
2062 extent_tree_szad_insert(&dss_chunks_szad, node);
2065 /* Try to coalesce backward. */
2066 prev = extent_tree_ad_prev(&dss_chunks_ad, node);
2067 if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
2070 * Coalesce chunk with the previous address range. This does
2071 * not change the position within dss_chunks_ad, so only
2072 * remove/insert node from/into dss_chunks_szad.
2074 extent_tree_szad_remove(&dss_chunks_szad, prev);
2075 extent_tree_ad_remove(&dss_chunks_ad, prev);
2077 extent_tree_szad_remove(&dss_chunks_szad, node);
2078 node->addr = prev->addr;
2079 node->size += prev->size;
2080 extent_tree_szad_insert(&dss_chunks_szad, node);
2082 base_node_dealloc(prev);
2089 chunk_dealloc_dss(void *chunk, size_t size)
2092 malloc_mutex_lock(&dss_mtx);
2093 if ((uintptr_t)chunk >= (uintptr_t)dss_base
2094 && (uintptr_t)chunk < (uintptr_t)dss_max) {
2095 extent_node_t *node;
2097 /* Try to coalesce with other unused chunks. */
2098 node = chunk_dealloc_dss_record(chunk, size);
2104 /* Get the current end of the DSS. */
2108 * Try to shrink the DSS if this chunk is at the end of the
2109 * DSS. The sbrk() call here is subject to a race condition
2110 * with threads that use brk(2) or sbrk(2) directly, but the
2111 * alternative would be to leak memory for the sake of poorly
2112 * designed multi-threaded programs.
2114 if ((void *)((uintptr_t)chunk + size) == dss_max
2115 && (dss_prev = sbrk(-(intptr_t)size)) == dss_max) {
2117 dss_max = (void *)((intptr_t)dss_prev - (intptr_t)size);
2120 extent_tree_szad_remove(&dss_chunks_szad, node);
2121 extent_tree_ad_remove(&dss_chunks_ad, node);
2122 base_node_dealloc(node);
2124 malloc_mutex_unlock(&dss_mtx);
2126 malloc_mutex_unlock(&dss_mtx);
2127 madvise(chunk, size, MADV_FREE);
2132 malloc_mutex_unlock(&dss_mtx);
2139 chunk_dealloc_mmap(void *chunk, size_t size)
2142 pages_unmap(chunk, size);
2146 chunk_dealloc(void *chunk, size_t size)
2149 assert(chunk != NULL);
2150 assert(CHUNK_ADDR2BASE(chunk) == chunk);
2152 assert((size & chunksize_mask) == 0);
2155 stats_chunks.curchunks -= (size / chunksize);
2160 if (chunk_dealloc_dss(chunk, size) == false)
2166 chunk_dealloc_mmap(chunk, size);
2170 * End chunk management functions.
2172 /******************************************************************************/
2178 * Choose an arena based on a per-thread value (fast-path code, calls slow-path
2179 * code if necessary).
2181 static inline arena_t *
2187 * We can only use TLS if this is a PIC library, since for the static
2188 * library version, libc's malloc is used by TLS allocation, which
2189 * introduces a bootstrapping issue.
2192 if (__isthreaded == false) {
2193 /* Avoid the overhead of TLS for single-threaded operation. */
2199 ret = choose_arena_hard();
2200 assert(ret != NULL);
2203 if (__isthreaded && narenas > 1) {
2207 * Hash _pthread_self() to one of the arenas. There is a prime
2208 * number of arenas, so this has a reasonable chance of
2209 * working. Even so, the hashing can be easily thwarted by
2210 * inconvenient _pthread_self() values. Without specific
2211 * knowledge of how _pthread_self() calculates values, we can't
2212 * easily do much better than this.
2214 ind = (unsigned long) _pthread_self() % narenas;
2217 * Optimistially assume that arenas[ind] has been initialized.
2218 * At worst, we find out that some other thread has already
2219 * done so, after acquiring the lock in preparation. Note that
2220 * this lazy locking also has the effect of lazily forcing
2221 * cache coherency; without the lock acquisition, there's no
2222 * guarantee that modification of arenas[ind] by another thread
2223 * would be seen on this CPU for an arbitrary amount of time.
2225 * In general, this approach to modifying a synchronized value
2226 * isn't a good idea, but in this case we only ever modify the
2227 * value once, so things work out well.
2232 * Avoid races with another thread that may have already
2233 * initialized arenas[ind].
2235 malloc_spin_lock(&arenas_lock);
2236 if (arenas[ind] == NULL)
2237 ret = arenas_extend((unsigned)ind);
2240 malloc_spin_unlock(&arenas_lock);
2246 assert(ret != NULL);
2252 * Choose an arena based on a per-thread value (slow-path code only, called
2253 * only by choose_arena()).
2256 choose_arena_hard(void)
2260 assert(__isthreaded);
2262 #ifdef MALLOC_BALANCE
2263 /* Seed the PRNG used for arena load balancing. */
2264 SPRN(balance, (uint32_t)(uintptr_t)(_pthread_self()));
2268 #ifdef MALLOC_BALANCE
2271 ind = PRN(balance, narenas_2pow);
2272 if ((ret = arenas[ind]) == NULL) {
2273 malloc_spin_lock(&arenas_lock);
2274 if ((ret = arenas[ind]) == NULL)
2275 ret = arenas_extend(ind);
2276 malloc_spin_unlock(&arenas_lock);
2279 malloc_spin_lock(&arenas_lock);
2280 if ((ret = arenas[next_arena]) == NULL)
2281 ret = arenas_extend(next_arena);
2282 next_arena = (next_arena + 1) % narenas;
2283 malloc_spin_unlock(&arenas_lock);
2295 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
2297 uintptr_t a_chunk = (uintptr_t)a;
2298 uintptr_t b_chunk = (uintptr_t)b;
2303 return ((a_chunk > b_chunk) - (a_chunk < b_chunk));
2306 /* Wrap red-black tree macros in functions. */
2307 rb_wrap(__unused static, arena_chunk_tree_dirty_, arena_chunk_tree_t,
2308 arena_chunk_t, link_dirty, arena_chunk_comp)
2311 arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
2313 uintptr_t a_mapelm = (uintptr_t)a;
2314 uintptr_t b_mapelm = (uintptr_t)b;
2319 return ((a_mapelm > b_mapelm) - (a_mapelm < b_mapelm));
2322 /* Wrap red-black tree macros in functions. */
2323 rb_wrap(__unused static, arena_run_tree_, arena_run_tree_t, arena_chunk_map_t,
2324 link, arena_run_comp)
2327 arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
2330 size_t a_size = a->bits & ~PAGE_MASK;
2331 size_t b_size = b->bits & ~PAGE_MASK;
2333 ret = (a_size > b_size) - (a_size < b_size);
2335 uintptr_t a_mapelm, b_mapelm;
2337 if ((a->bits & CHUNK_MAP_KEY) == 0)
2338 a_mapelm = (uintptr_t)a;
2341 * Treat keys as though they are lower than anything
2346 b_mapelm = (uintptr_t)b;
2348 ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm);
2354 /* Wrap red-black tree macros in functions. */
2355 rb_wrap(__unused static, arena_avail_tree_, arena_avail_tree_t,
2356 arena_chunk_map_t, link, arena_avail_comp)
2358 static inline void *
2359 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
2362 unsigned i, mask, bit, regind;
2364 assert(run->magic == ARENA_RUN_MAGIC);
2365 assert(run->regs_minelm < bin->regs_mask_nelms);
2368 * Move the first check outside the loop, so that run->regs_minelm can
2369 * be updated unconditionally, without the possibility of updating it
2372 i = run->regs_minelm;
2373 mask = run->regs_mask[i];
2375 /* Usable allocation found. */
2376 bit = ffs((int)mask) - 1;
2378 regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
2379 assert(regind < bin->nregs);
2380 ret = (void *)(((uintptr_t)run) + bin->reg0_offset
2381 + (bin->reg_size * regind));
2384 mask ^= (1U << bit);
2385 run->regs_mask[i] = mask;
2390 for (i++; i < bin->regs_mask_nelms; i++) {
2391 mask = run->regs_mask[i];
2393 /* Usable allocation found. */
2394 bit = ffs((int)mask) - 1;
2396 regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
2397 assert(regind < bin->nregs);
2398 ret = (void *)(((uintptr_t)run) + bin->reg0_offset
2399 + (bin->reg_size * regind));
2402 mask ^= (1U << bit);
2403 run->regs_mask[i] = mask;
2406 * Make a note that nothing before this element
2407 * contains a free region.
2409 run->regs_minelm = i; /* Low payoff: + (mask == 0); */
2420 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
2422 unsigned diff, regind, elm, bit;
2424 assert(run->magic == ARENA_RUN_MAGIC);
2427 * Avoid doing division with a variable divisor if possible. Using
2428 * actual division here can reduce allocator throughput by over 20%!
2430 diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
2431 if ((size & (size - 1)) == 0) {
2433 * log2_table allows fast division of a power of two in the
2436 * (x / divisor) becomes (x >> log2_table[divisor - 1]).
2438 static const unsigned char log2_table[] = {
2439 0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
2440 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
2441 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2442 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
2443 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2444 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2445 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2446 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
2450 regind = (diff >> log2_table[size - 1]);
2451 else if (size <= 32768)
2452 regind = diff >> (8 + log2_table[(size >> 8) - 1]);
2454 regind = diff / size;
2455 } else if (size < qspace_max) {
2457 * To divide by a number D that is not a power of two we
2458 * multiply by (2^21 / D) and then right shift by 21 positions.
2464 * (X * qsize_invs[(D >> QUANTUM_2POW) - 3])
2467 * We can omit the first three elements, because we never
2468 * divide by 0, and QUANTUM and 2*QUANTUM are both powers of
2469 * two, which are handled above.
2471 #define SIZE_INV_SHIFT 21
2472 #define QSIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << QUANTUM_2POW)) + 1)
2473 static const unsigned qsize_invs[] = {
2475 QSIZE_INV(4), QSIZE_INV(5), QSIZE_INV(6), QSIZE_INV(7)
2476 #if (QUANTUM_2POW < 4)
2478 QSIZE_INV(8), QSIZE_INV(9), QSIZE_INV(10), QSIZE_INV(11),
2479 QSIZE_INV(12),QSIZE_INV(13), QSIZE_INV(14), QSIZE_INV(15)
2482 assert(QUANTUM * (((sizeof(qsize_invs)) / sizeof(unsigned)) + 3)
2483 >= (1U << QSPACE_MAX_2POW_DEFAULT));
2485 if (size <= (((sizeof(qsize_invs) / sizeof(unsigned)) + 2) <<
2487 regind = qsize_invs[(size >> QUANTUM_2POW) - 3] * diff;
2488 regind >>= SIZE_INV_SHIFT;
2490 regind = diff / size;
2492 } else if (size < cspace_max) {
2493 #define CSIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << CACHELINE_2POW)) + 1)
2494 static const unsigned csize_invs[] = {
2496 CSIZE_INV(4), CSIZE_INV(5), CSIZE_INV(6), CSIZE_INV(7)
2498 assert(CACHELINE * (((sizeof(csize_invs)) / sizeof(unsigned)) +
2499 3) >= (1U << CSPACE_MAX_2POW_DEFAULT));
2501 if (size <= (((sizeof(csize_invs) / sizeof(unsigned)) + 2) <<
2503 regind = csize_invs[(size >> CACHELINE_2POW) - 3] *
2505 regind >>= SIZE_INV_SHIFT;
2507 regind = diff / size;
2510 #define SSIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << SUBPAGE_2POW)) + 1)
2511 static const unsigned ssize_invs[] = {
2513 SSIZE_INV(4), SSIZE_INV(5), SSIZE_INV(6), SSIZE_INV(7),
2514 SSIZE_INV(8), SSIZE_INV(9), SSIZE_INV(10), SSIZE_INV(11),
2515 SSIZE_INV(12), SSIZE_INV(13), SSIZE_INV(14), SSIZE_INV(15)
2516 #if (PAGE_SHIFT == 13)
2518 SSIZE_INV(16), SSIZE_INV(17), SSIZE_INV(18), SSIZE_INV(19),
2519 SSIZE_INV(20), SSIZE_INV(21), SSIZE_INV(22), SSIZE_INV(23),
2520 SSIZE_INV(24), SSIZE_INV(25), SSIZE_INV(26), SSIZE_INV(27),
2521 SSIZE_INV(28), SSIZE_INV(29), SSIZE_INV(29), SSIZE_INV(30)
2524 assert(SUBPAGE * (((sizeof(ssize_invs)) / sizeof(unsigned)) + 3)
2527 if (size < (((sizeof(ssize_invs) / sizeof(unsigned)) + 2) <<
2529 regind = ssize_invs[(size >> SUBPAGE_2POW) - 3] * diff;
2530 regind >>= SIZE_INV_SHIFT;
2532 regind = diff / size;
2535 #undef SIZE_INV_SHIFT
2536 assert(diff == regind * size);
2537 assert(regind < bin->nregs);
2539 elm = regind >> (SIZEOF_INT_2POW + 3);
2540 if (elm < run->regs_minelm)
2541 run->regs_minelm = elm;
2542 bit = regind - (elm << (SIZEOF_INT_2POW + 3));
2543 assert((run->regs_mask[elm] & (1U << bit)) == 0);
2544 run->regs_mask[elm] |= (1U << bit);
2548 arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
2551 arena_chunk_t *chunk;
2552 size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i;
2554 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2555 old_ndirty = chunk->ndirty;
2556 run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
2558 total_pages = (chunk->map[run_ind].bits & ~PAGE_MASK) >>
2560 need_pages = (size >> PAGE_SHIFT);
2561 assert(need_pages > 0);
2562 assert(need_pages <= total_pages);
2563 rem_pages = total_pages - need_pages;
2565 arena_avail_tree_remove(&arena->runs_avail, &chunk->map[run_ind]);
2567 /* Keep track of trailing unused pages for later use. */
2568 if (rem_pages > 0) {
2569 chunk->map[run_ind+need_pages].bits = (rem_pages <<
2570 PAGE_SHIFT) | (chunk->map[run_ind+need_pages].bits &
2572 chunk->map[run_ind+total_pages-1].bits = (rem_pages <<
2573 PAGE_SHIFT) | (chunk->map[run_ind+total_pages-1].bits &
2575 arena_avail_tree_insert(&arena->runs_avail,
2576 &chunk->map[run_ind+need_pages]);
2579 for (i = 0; i < need_pages; i++) {
2580 /* Zero if necessary. */
2582 if ((chunk->map[run_ind + i].bits & CHUNK_MAP_ZEROED)
2584 memset((void *)((uintptr_t)chunk + ((run_ind
2585 + i) << PAGE_SHIFT)), 0, PAGE_SIZE);
2586 /* CHUNK_MAP_ZEROED is cleared below. */
2590 /* Update dirty page accounting. */
2591 if (chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY) {
2594 /* CHUNK_MAP_DIRTY is cleared below. */
2597 /* Initialize the chunk map. */
2599 chunk->map[run_ind + i].bits = CHUNK_MAP_LARGE
2600 | CHUNK_MAP_ALLOCATED;
2602 chunk->map[run_ind + i].bits = (size_t)run
2603 | CHUNK_MAP_ALLOCATED;
2608 * Set the run size only in the first element for large runs. This is
2609 * primarily a debugging aid, since the lack of size info for trailing
2610 * pages only matters if the application tries to operate on an
2614 chunk->map[run_ind].bits |= size;
2616 if (chunk->ndirty == 0 && old_ndirty > 0)
2617 arena_chunk_tree_dirty_remove(&arena->chunks_dirty, chunk);
2620 static arena_chunk_t *
2621 arena_chunk_alloc(arena_t *arena)
2623 arena_chunk_t *chunk;
2626 if (arena->spare != NULL) {
2627 chunk = arena->spare;
2628 arena->spare = NULL;
2630 chunk = (arena_chunk_t *)chunk_alloc(chunksize, true);
2634 arena->stats.mapped += chunksize;
2637 chunk->arena = arena;
2640 * Claim that no pages are in use, since the header is merely
2646 * Initialize the map to contain one maximal free untouched run.
2648 for (i = 0; i < arena_chunk_header_npages; i++)
2649 chunk->map[i].bits = 0;
2650 chunk->map[i].bits = arena_maxclass | CHUNK_MAP_ZEROED;
2651 for (i++; i < chunk_npages-1; i++) {
2652 chunk->map[i].bits = CHUNK_MAP_ZEROED;
2654 chunk->map[chunk_npages-1].bits = arena_maxclass |
2658 /* Insert the run into the runs_avail tree. */
2659 arena_avail_tree_insert(&arena->runs_avail,
2660 &chunk->map[arena_chunk_header_npages]);
2666 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
2669 if (arena->spare != NULL) {
2670 if (arena->spare->ndirty > 0) {
2671 arena_chunk_tree_dirty_remove(
2672 &chunk->arena->chunks_dirty, arena->spare);
2673 arena->ndirty -= arena->spare->ndirty;
2675 chunk_dealloc((void *)arena->spare, chunksize);
2677 arena->stats.mapped -= chunksize;
2682 * Remove run from runs_avail, regardless of whether this chunk
2683 * will be cached, so that the arena does not use it. Dirty page
2684 * flushing only uses the chunks_dirty tree, so leaving this chunk in
2685 * the chunks_* trees is sufficient for that purpose.
2687 arena_avail_tree_remove(&arena->runs_avail,
2688 &chunk->map[arena_chunk_header_npages]);
2690 arena->spare = chunk;
2693 static arena_run_t *
2694 arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
2696 arena_chunk_t *chunk;
2698 arena_chunk_map_t *mapelm, key;
2700 assert(size <= arena_maxclass);
2701 assert((size & PAGE_MASK) == 0);
2703 /* Search the arena's chunks for the lowest best fit. */
2704 key.bits = size | CHUNK_MAP_KEY;
2705 mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key);
2706 if (mapelm != NULL) {
2707 arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
2708 size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map)
2709 / sizeof(arena_chunk_map_t);
2711 run = (arena_run_t *)((uintptr_t)run_chunk + (pageind
2713 arena_run_split(arena, run, size, large, zero);
2718 * No usable runs. Create a new chunk from which to allocate the run.
2720 chunk = arena_chunk_alloc(arena);
2723 run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
2725 /* Update page map. */
2726 arena_run_split(arena, run, size, large, zero);
2731 arena_purge(arena_t *arena)
2733 arena_chunk_t *chunk;
2738 rb_foreach_begin(arena_chunk_t, link_dirty, &arena->chunks_dirty,
2740 ndirty += chunk->ndirty;
2741 } rb_foreach_end(arena_chunk_t, link_dirty, &arena->chunks_dirty, chunk)
2742 assert(ndirty == arena->ndirty);
2744 assert(arena->ndirty > opt_dirty_max);
2747 arena->stats.npurge++;
2751 * Iterate downward through chunks until enough dirty memory has been
2752 * purged. Terminate as soon as possible in order to minimize the
2753 * number of system calls, even if a chunk has only been partially
2756 while (arena->ndirty > (opt_dirty_max >> 1)) {
2757 chunk = arena_chunk_tree_dirty_last(&arena->chunks_dirty);
2758 assert(chunk != NULL);
2760 for (i = chunk_npages - 1; chunk->ndirty > 0; i--) {
2761 assert(i >= arena_chunk_header_npages);
2763 if (chunk->map[i].bits & CHUNK_MAP_DIRTY) {
2764 chunk->map[i].bits ^= CHUNK_MAP_DIRTY;
2765 /* Find adjacent dirty run(s). */
2766 for (npages = 1; i > arena_chunk_header_npages
2767 && (chunk->map[i - 1].bits &
2768 CHUNK_MAP_DIRTY); npages++) {
2770 chunk->map[i].bits ^= CHUNK_MAP_DIRTY;
2772 chunk->ndirty -= npages;
2773 arena->ndirty -= npages;
2775 madvise((void *)((uintptr_t)chunk + (i <<
2776 PAGE_SHIFT)), (npages << PAGE_SHIFT),
2779 arena->stats.nmadvise++;
2780 arena->stats.purged += npages;
2782 if (arena->ndirty <= (opt_dirty_max >> 1))
2787 if (chunk->ndirty == 0) {
2788 arena_chunk_tree_dirty_remove(&arena->chunks_dirty,
2795 arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
2797 arena_chunk_t *chunk;
2798 size_t size, run_ind, run_pages;
2800 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2801 run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk)
2803 assert(run_ind >= arena_chunk_header_npages);
2804 assert(run_ind < chunk_npages);
2805 if ((chunk->map[run_ind].bits & CHUNK_MAP_LARGE) != 0)
2806 size = chunk->map[run_ind].bits & ~PAGE_MASK;
2808 size = run->bin->run_size;
2809 run_pages = (size >> PAGE_SHIFT);
2811 /* Mark pages as unallocated in the chunk map. */
2815 for (i = 0; i < run_pages; i++) {
2816 assert((chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY)
2818 chunk->map[run_ind + i].bits = CHUNK_MAP_DIRTY;
2821 if (chunk->ndirty == 0) {
2822 arena_chunk_tree_dirty_insert(&arena->chunks_dirty,
2825 chunk->ndirty += run_pages;
2826 arena->ndirty += run_pages;
2830 for (i = 0; i < run_pages; i++) {
2831 chunk->map[run_ind + i].bits &= ~(CHUNK_MAP_LARGE |
2832 CHUNK_MAP_ALLOCATED);
2835 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2837 chunk->map[run_ind+run_pages-1].bits = size |
2838 (chunk->map[run_ind+run_pages-1].bits & PAGE_MASK);
2840 /* Try to coalesce forward. */
2841 if (run_ind + run_pages < chunk_npages &&
2842 (chunk->map[run_ind+run_pages].bits & CHUNK_MAP_ALLOCATED) == 0) {
2843 size_t nrun_size = chunk->map[run_ind+run_pages].bits &
2847 * Remove successor from runs_avail; the coalesced run is
2850 arena_avail_tree_remove(&arena->runs_avail,
2851 &chunk->map[run_ind+run_pages]);
2854 run_pages = size >> PAGE_SHIFT;
2856 assert((chunk->map[run_ind+run_pages-1].bits & ~PAGE_MASK)
2858 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2860 chunk->map[run_ind+run_pages-1].bits = size |
2861 (chunk->map[run_ind+run_pages-1].bits & PAGE_MASK);
2864 /* Try to coalesce backward. */
2865 if (run_ind > arena_chunk_header_npages && (chunk->map[run_ind-1].bits &
2866 CHUNK_MAP_ALLOCATED) == 0) {
2867 size_t prun_size = chunk->map[run_ind-1].bits & ~PAGE_MASK;
2869 run_ind -= prun_size >> PAGE_SHIFT;
2872 * Remove predecessor from runs_avail; the coalesced run is
2875 arena_avail_tree_remove(&arena->runs_avail,
2876 &chunk->map[run_ind]);
2879 run_pages = size >> PAGE_SHIFT;
2881 assert((chunk->map[run_ind].bits & ~PAGE_MASK) ==
2883 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2885 chunk->map[run_ind+run_pages-1].bits = size |
2886 (chunk->map[run_ind+run_pages-1].bits & PAGE_MASK);
2889 /* Insert into runs_avail, now that coalescing is complete. */
2890 arena_avail_tree_insert(&arena->runs_avail, &chunk->map[run_ind]);
2892 /* Deallocate chunk if it is now completely unused. */
2893 if ((chunk->map[arena_chunk_header_npages].bits & (~PAGE_MASK |
2894 CHUNK_MAP_ALLOCATED)) == arena_maxclass)
2895 arena_chunk_dealloc(arena, chunk);
2897 /* Enforce opt_dirty_max. */
2898 if (arena->ndirty > opt_dirty_max)
2903 arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
2904 size_t oldsize, size_t newsize)
2906 size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
2907 size_t head_npages = (oldsize - newsize) >> PAGE_SHIFT;
2909 assert(oldsize > newsize);
2912 * Update the chunk map so that arena_run_dalloc() can treat the
2913 * leading run as separately allocated.
2915 chunk->map[pageind].bits = (oldsize - newsize) | CHUNK_MAP_LARGE |
2916 CHUNK_MAP_ALLOCATED;
2917 chunk->map[pageind+head_npages].bits = newsize | CHUNK_MAP_LARGE |
2918 CHUNK_MAP_ALLOCATED;
2920 arena_run_dalloc(arena, run, false);
2924 arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
2925 size_t oldsize, size_t newsize, bool dirty)
2927 size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
2928 size_t npages = newsize >> PAGE_SHIFT;
2930 assert(oldsize > newsize);
2933 * Update the chunk map so that arena_run_dalloc() can treat the
2934 * trailing run as separately allocated.
2936 chunk->map[pageind].bits = newsize | CHUNK_MAP_LARGE |
2937 CHUNK_MAP_ALLOCATED;
2938 chunk->map[pageind+npages].bits = (oldsize - newsize) | CHUNK_MAP_LARGE
2939 | CHUNK_MAP_ALLOCATED;
2941 arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize),
2945 static arena_run_t *
2946 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2948 arena_chunk_map_t *mapelm;
2950 unsigned i, remainder;
2952 /* Look for a usable run. */
2953 mapelm = arena_run_tree_first(&bin->runs);
2954 if (mapelm != NULL) {
2955 /* run is guaranteed to have available space. */
2956 arena_run_tree_remove(&bin->runs, mapelm);
2957 run = (arena_run_t *)(mapelm->bits & ~PAGE_MASK);
2959 bin->stats.reruns++;
2963 /* No existing runs have any space available. */
2965 /* Allocate a new run. */
2966 run = arena_run_alloc(arena, bin->run_size, false, false);
2970 /* Initialize run internals. */
2973 for (i = 0; i < bin->regs_mask_nelms - 1; i++)
2974 run->regs_mask[i] = UINT_MAX;
2975 remainder = bin->nregs & ((1U << (SIZEOF_INT_2POW + 3)) - 1);
2977 run->regs_mask[i] = UINT_MAX;
2979 /* The last element has spare bits that need to be unset. */
2980 run->regs_mask[i] = (UINT_MAX >> ((1U << (SIZEOF_INT_2POW + 3))
2984 run->regs_minelm = 0;
2986 run->nfree = bin->nregs;
2988 run->magic = ARENA_RUN_MAGIC;
2993 bin->stats.curruns++;
2994 if (bin->stats.curruns > bin->stats.highruns)
2995 bin->stats.highruns = bin->stats.curruns;
3000 /* bin->runcur must have space available before this function is called. */
3001 static inline void *
3002 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
3006 assert(run->magic == ARENA_RUN_MAGIC);
3007 assert(run->nfree > 0);
3009 ret = arena_run_reg_alloc(run, bin);
3010 assert(ret != NULL);
3016 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
3018 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
3021 bin->runcur = arena_bin_nonfull_run_get(arena, bin);
3022 if (bin->runcur == NULL)
3024 assert(bin->runcur->magic == ARENA_RUN_MAGIC);
3025 assert(bin->runcur->nfree > 0);
3027 return (arena_bin_malloc_easy(arena, bin, bin->runcur));
3031 * Calculate bin->run_size such that it meets the following constraints:
3033 * *) bin->run_size >= min_run_size
3034 * *) bin->run_size <= arena_maxclass
3035 * *) bin->run_size <= RUN_MAX_SMALL
3036 * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
3038 * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
3039 * also calculated here, since these settings are all interdependent.
3042 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
3044 size_t try_run_size, good_run_size;
3045 unsigned good_nregs, good_mask_nelms, good_reg0_offset;
3046 unsigned try_nregs, try_mask_nelms, try_reg0_offset;
3048 assert(min_run_size >= PAGE_SIZE);
3049 assert(min_run_size <= arena_maxclass);
3050 assert(min_run_size <= RUN_MAX_SMALL);
3053 * Calculate known-valid settings before entering the run_size
3054 * expansion loop, so that the first part of the loop always copies
3057 * The do..while loop iteratively reduces the number of regions until
3058 * the run header and the regions no longer overlap. A closed formula
3059 * would be quite messy, since there is an interdependency between the
3060 * header's mask length and the number of regions.
3062 try_run_size = min_run_size;
3063 try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
3064 + 1; /* Counter-act try_nregs-- in loop. */
3067 try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
3068 ((try_nregs & ((1U << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
3069 try_reg0_offset = try_run_size - (try_nregs * bin->reg_size);
3070 } while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
3073 /* run_size expansion loop. */
3076 * Copy valid settings before trying more aggressive settings.
3078 good_run_size = try_run_size;
3079 good_nregs = try_nregs;
3080 good_mask_nelms = try_mask_nelms;
3081 good_reg0_offset = try_reg0_offset;
3083 /* Try more aggressive settings. */
3084 try_run_size += PAGE_SIZE;
3085 try_nregs = ((try_run_size - sizeof(arena_run_t)) /
3086 bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
3089 try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
3090 ((try_nregs & ((1U << (SIZEOF_INT_2POW + 3)) - 1)) ?
3092 try_reg0_offset = try_run_size - (try_nregs *
3094 } while (sizeof(arena_run_t) + (sizeof(unsigned) *
3095 (try_mask_nelms - 1)) > try_reg0_offset);
3096 } while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
3097 && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
3098 && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
3100 assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
3101 <= good_reg0_offset);
3102 assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
3104 /* Copy final settings. */
3105 bin->run_size = good_run_size;
3106 bin->nregs = good_nregs;
3107 bin->regs_mask_nelms = good_mask_nelms;
3108 bin->reg0_offset = good_reg0_offset;
3110 return (good_run_size);
3113 #ifdef MALLOC_BALANCE
3115 arena_lock_balance(arena_t *arena)
3117 unsigned contention;
3119 contention = malloc_spin_lock(&arena->lock);
3122 * Calculate the exponentially averaged contention for this
3123 * arena. Due to integer math always rounding down, this value
3124 * decays somewhat faster than normal.
3126 arena->contention = (((uint64_t)arena->contention
3127 * (uint64_t)((1U << BALANCE_ALPHA_INV_2POW)-1))
3128 + (uint64_t)contention) >> BALANCE_ALPHA_INV_2POW;
3129 if (arena->contention >= opt_balance_threshold)
3130 arena_lock_balance_hard(arena);
3135 arena_lock_balance_hard(arena_t *arena)
3139 arena->contention = 0;
3141 arena->stats.nbalance++;
3143 ind = PRN(balance, narenas_2pow);
3144 if (arenas[ind] != NULL)
3145 arenas_map = arenas[ind];
3147 malloc_spin_lock(&arenas_lock);
3148 if (arenas[ind] != NULL)
3149 arenas_map = arenas[ind];
3151 arenas_map = arenas_extend(ind);
3152 malloc_spin_unlock(&arenas_lock);
3158 static inline void *
3159 mag_alloc(mag_t *mag)
3162 if (mag->nrounds == 0)
3166 return (mag->rounds[mag->nrounds]);
3170 mag_load(mag_t *mag)
3178 arena = choose_arena();
3179 bin = &arena->bins[mag->binind];
3180 #ifdef MALLOC_BALANCE
3181 arena_lock_balance(arena);
3183 malloc_spin_lock(&arena->lock);
3185 for (i = mag->nrounds; i < max_rounds; i++) {
3186 if ((run = bin->runcur) != NULL && run->nfree > 0)
3187 round = arena_bin_malloc_easy(arena, bin, run);
3189 round = arena_bin_malloc_hard(arena, bin);
3192 mag->rounds[i] = round;
3196 arena->stats.nmalloc_small += (i - mag->nrounds);
3197 arena->stats.allocated_small += (i - mag->nrounds) * bin->reg_size;
3199 malloc_spin_unlock(&arena->lock);
3203 static inline void *
3204 mag_rack_alloc(mag_rack_t *rack, size_t size, bool zero)
3207 bin_mags_t *bin_mags;
3211 binind = size2bin[size];
3212 assert(binind < nbins);
3213 bin_mags = &rack->bin_mags[binind];
3215 mag = bin_mags->curmag;
3217 /* Create an initial magazine for this size class. */
3218 assert(bin_mags->sparemag == NULL);
3219 mag = mag_create(choose_arena(), binind);
3222 bin_mags->curmag = mag;
3226 ret = mag_alloc(mag);
3228 if (bin_mags->sparemag != NULL) {
3229 if (bin_mags->sparemag->nrounds > 0) {
3230 /* Swap magazines. */
3231 bin_mags->curmag = bin_mags->sparemag;
3232 bin_mags->sparemag = mag;
3233 mag = bin_mags->curmag;
3235 /* Reload the current magazine. */
3239 /* Create a second magazine. */
3240 mag = mag_create(choose_arena(), binind);
3244 bin_mags->sparemag = bin_mags->curmag;
3245 bin_mags->curmag = mag;
3247 ret = mag_alloc(mag);
3252 if (zero == false) {
3254 memset(ret, 0xa5, size);
3256 memset(ret, 0, size);
3258 memset(ret, 0, size);
3264 static inline void *
3265 arena_malloc_small(arena_t *arena, size_t size, bool zero)
3272 binind = size2bin[size];
3273 assert(binind < nbins);
3274 bin = &arena->bins[binind];
3275 size = bin->reg_size;
3277 #ifdef MALLOC_BALANCE
3278 arena_lock_balance(arena);
3280 malloc_spin_lock(&arena->lock);
3282 if ((run = bin->runcur) != NULL && run->nfree > 0)
3283 ret = arena_bin_malloc_easy(arena, bin, run);
3285 ret = arena_bin_malloc_hard(arena, bin);
3288 malloc_spin_unlock(&arena->lock);
3293 bin->stats.nrequests++;
3294 arena->stats.nmalloc_small++;
3295 arena->stats.allocated_small += size;
3297 malloc_spin_unlock(&arena->lock);
3299 if (zero == false) {
3301 memset(ret, 0xa5, size);
3303 memset(ret, 0, size);
3305 memset(ret, 0, size);
3311 arena_malloc_large(arena_t *arena, size_t size, bool zero)
3315 /* Large allocation. */
3316 size = PAGE_CEILING(size);
3317 #ifdef MALLOC_BALANCE
3318 arena_lock_balance(arena);
3320 malloc_spin_lock(&arena->lock);
3322 ret = (void *)arena_run_alloc(arena, size, true, zero);
3324 malloc_spin_unlock(&arena->lock);
3328 arena->stats.nmalloc_large++;
3329 arena->stats.allocated_large += size;
3331 malloc_spin_unlock(&arena->lock);
3333 if (zero == false) {
3335 memset(ret, 0xa5, size);
3337 memset(ret, 0, size);
3343 static inline void *
3344 arena_malloc(arena_t *arena, size_t size, bool zero)
3347 assert(arena != NULL);
3348 assert(arena->magic == ARENA_MAGIC);
3350 assert(QUANTUM_CEILING(size) <= arena_maxclass);
3352 if (size <= bin_maxclass) {
3354 if (__isthreaded && opt_mag) {
3355 mag_rack_t *rack = mag_rack;
3357 rack = mag_rack_create(arena);
3362 return (mag_rack_alloc(rack, size, zero));
3365 return (arena_malloc_small(arena, size, zero));
3367 return (arena_malloc_large(arena, size, zero));
3370 static inline void *
3371 imalloc(size_t size)
3376 if (size <= arena_maxclass)
3377 return (arena_malloc(choose_arena(), size, false));
3379 return (huge_malloc(size, false));
3382 static inline void *
3383 icalloc(size_t size)
3386 if (size <= arena_maxclass)
3387 return (arena_malloc(choose_arena(), size, true));
3389 return (huge_malloc(size, true));
3392 /* Only handles large allocations that require more than page alignment. */
3394 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
3398 arena_chunk_t *chunk;
3400 assert((size & PAGE_MASK) == 0);
3401 assert((alignment & PAGE_MASK) == 0);
3403 #ifdef MALLOC_BALANCE
3404 arena_lock_balance(arena);
3406 malloc_spin_lock(&arena->lock);
3408 ret = (void *)arena_run_alloc(arena, alloc_size, true, false);
3410 malloc_spin_unlock(&arena->lock);
3414 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
3416 offset = (uintptr_t)ret & (alignment - 1);
3417 assert((offset & PAGE_MASK) == 0);
3418 assert(offset < alloc_size);
3420 arena_run_trim_tail(arena, chunk, ret, alloc_size, size, false);
3422 size_t leadsize, trailsize;
3424 leadsize = alignment - offset;
3426 arena_run_trim_head(arena, chunk, ret, alloc_size,
3427 alloc_size - leadsize);
3428 ret = (void *)((uintptr_t)ret + leadsize);
3431 trailsize = alloc_size - leadsize - size;
3432 if (trailsize != 0) {
3433 /* Trim trailing space. */
3434 assert(trailsize < alloc_size);
3435 arena_run_trim_tail(arena, chunk, ret, size + trailsize,
3441 arena->stats.nmalloc_large++;
3442 arena->stats.allocated_large += size;
3444 malloc_spin_unlock(&arena->lock);
3447 memset(ret, 0xa5, size);
3449 memset(ret, 0, size);
3453 static inline void *
3454 ipalloc(size_t alignment, size_t size)
3460 * Round size up to the nearest multiple of alignment.
3462 * This done, we can take advantage of the fact that for each small
3463 * size class, every object is aligned at the smallest power of two
3464 * that is non-zero in the base two representation of the size. For
3467 * Size | Base 2 | Minimum alignment
3468 * -----+----------+------------------
3470 * 144 | 10100000 | 32
3471 * 192 | 11000000 | 64
3473 * Depending on runtime settings, it is possible that arena_malloc()
3474 * will further round up to a power of two, but that never causes
3475 * correctness issues.
3477 ceil_size = (size + (alignment - 1)) & (-alignment);
3479 * (ceil_size < size) protects against the combination of maximal
3480 * alignment and size greater than maximal alignment.
3482 if (ceil_size < size) {
3483 /* size_t overflow. */
3487 if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE
3488 && ceil_size <= arena_maxclass))
3489 ret = arena_malloc(choose_arena(), ceil_size, false);
3494 * We can't achieve subpage alignment, so round up alignment
3495 * permanently; it makes later calculations simpler.
3497 alignment = PAGE_CEILING(alignment);
3498 ceil_size = PAGE_CEILING(size);
3500 * (ceil_size < size) protects against very large sizes within
3501 * PAGE_SIZE of SIZE_T_MAX.
3503 * (ceil_size + alignment < ceil_size) protects against the
3504 * combination of maximal alignment and ceil_size large enough
3505 * to cause overflow. This is similar to the first overflow
3506 * check above, but it needs to be repeated due to the new
3507 * ceil_size value, which may now be *equal* to maximal
3508 * alignment, whereas before we only detected overflow if the
3509 * original size was *greater* than maximal alignment.
3511 if (ceil_size < size || ceil_size + alignment < ceil_size) {
3512 /* size_t overflow. */
3517 * Calculate the size of the over-size run that arena_palloc()
3518 * would need to allocate in order to guarantee the alignment.
3520 if (ceil_size >= alignment)
3521 run_size = ceil_size + alignment - PAGE_SIZE;
3524 * It is possible that (alignment << 1) will cause
3525 * overflow, but it doesn't matter because we also
3526 * subtract PAGE_SIZE, which in the case of overflow
3527 * leaves us with a very large run_size. That causes
3528 * the first conditional below to fail, which means
3529 * that the bogus run_size value never gets used for
3530 * anything important.
3532 run_size = (alignment << 1) - PAGE_SIZE;
3535 if (run_size <= arena_maxclass) {
3536 ret = arena_palloc(choose_arena(), alignment, ceil_size,
3538 } else if (alignment <= chunksize)
3539 ret = huge_malloc(ceil_size, false);
3541 ret = huge_palloc(alignment, ceil_size);
3544 assert(((uintptr_t)ret & (alignment - 1)) == 0);
3548 /* Return the size of the allocation pointed to by ptr. */
3550 arena_salloc(const void *ptr)
3553 arena_chunk_t *chunk;
3554 size_t pageind, mapbits;
3556 assert(ptr != NULL);
3557 assert(CHUNK_ADDR2BASE(ptr) != ptr);
3559 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3560 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3561 mapbits = chunk->map[pageind].bits;
3562 assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
3563 if ((mapbits & CHUNK_MAP_LARGE) == 0) {
3564 arena_run_t *run = (arena_run_t *)(mapbits & ~PAGE_MASK);
3565 assert(run->magic == ARENA_RUN_MAGIC);
3566 ret = run->bin->reg_size;
3568 ret = mapbits & ~PAGE_MASK;
3575 static inline size_t
3576 isalloc(const void *ptr)
3579 arena_chunk_t *chunk;
3581 assert(ptr != NULL);
3583 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3586 assert(chunk->arena->magic == ARENA_MAGIC);
3588 ret = arena_salloc(ptr);
3590 extent_node_t *node, key;
3592 /* Chunk (huge allocation). */
3594 malloc_mutex_lock(&huge_mtx);
3596 /* Extract from tree of huge allocations. */
3597 key.addr = __DECONST(void *, ptr);
3598 node = extent_tree_ad_search(&huge, &key);
3599 assert(node != NULL);
3603 malloc_mutex_unlock(&huge_mtx);
3610 arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3611 arena_chunk_map_t *mapelm)
3617 run = (arena_run_t *)(mapelm->bits & ~PAGE_MASK);
3618 assert(run->magic == ARENA_RUN_MAGIC);
3620 size = bin->reg_size;
3623 memset(ptr, 0x5a, size);
3625 arena_run_reg_dalloc(run, bin, ptr, size);
3628 if (run->nfree == bin->nregs) {
3629 /* Deallocate run. */
3630 if (run == bin->runcur)
3632 else if (bin->nregs != 1) {
3633 size_t run_pageind = (((uintptr_t)run -
3634 (uintptr_t)chunk)) >> PAGE_SHIFT;
3635 arena_chunk_map_t *run_mapelm =
3636 &chunk->map[run_pageind];
3638 * This block's conditional is necessary because if the
3639 * run only contains one region, then it never gets
3640 * inserted into the non-full runs tree.
3642 arena_run_tree_remove(&bin->runs, run_mapelm);
3647 arena_run_dalloc(arena, run, true);
3649 bin->stats.curruns--;
3651 } else if (run->nfree == 1 && run != bin->runcur) {
3653 * Make sure that bin->runcur always refers to the lowest
3654 * non-full run, if one exists.
3656 if (bin->runcur == NULL)
3658 else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
3659 /* Switch runcur. */
3660 if (bin->runcur->nfree > 0) {
3661 arena_chunk_t *runcur_chunk =
3662 CHUNK_ADDR2BASE(bin->runcur);
3663 size_t runcur_pageind =
3664 (((uintptr_t)bin->runcur -
3665 (uintptr_t)runcur_chunk)) >> PAGE_SHIFT;
3666 arena_chunk_map_t *runcur_mapelm =
3667 &runcur_chunk->map[runcur_pageind];
3669 /* Insert runcur. */
3670 arena_run_tree_insert(&bin->runs,
3675 size_t run_pageind = (((uintptr_t)run -
3676 (uintptr_t)chunk)) >> PAGE_SHIFT;
3677 arena_chunk_map_t *run_mapelm =
3678 &chunk->map[run_pageind];
3680 assert(arena_run_tree_search(&bin->runs, run_mapelm) ==
3682 arena_run_tree_insert(&bin->runs, run_mapelm);
3686 arena->stats.allocated_small -= size;
3687 arena->stats.ndalloc_small++;
3693 mag_unload(mag_t *mag)
3695 arena_chunk_t *chunk;
3698 size_t i, ndeferred, nrounds;
3700 for (ndeferred = mag->nrounds; ndeferred > 0;) {
3701 nrounds = ndeferred;
3702 /* Lock the arena associated with the first round. */
3703 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mag->rounds[0]);
3704 arena = chunk->arena;
3705 #ifdef MALLOC_BALANCE
3706 arena_lock_balance(arena);
3708 malloc_spin_lock(&arena->lock);
3710 /* Deallocate every round that belongs to the locked arena. */
3711 for (i = ndeferred = 0; i < nrounds; i++) {
3712 round = mag->rounds[i];
3713 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(round);
3714 if (chunk->arena == arena) {
3715 size_t pageind = (((uintptr_t)round -
3716 (uintptr_t)chunk) >> PAGE_SHIFT);
3717 arena_chunk_map_t *mapelm =
3718 &chunk->map[pageind];
3719 arena_dalloc_small(arena, chunk, round, mapelm);
3722 * This round was allocated via a different
3723 * arena than the one that is currently locked.
3724 * Stash the round, so that it can be handled
3727 mag->rounds[ndeferred] = round;
3731 malloc_spin_unlock(&arena->lock);
3738 mag_rack_dalloc(mag_rack_t *rack, void *ptr)
3741 arena_chunk_t *chunk;
3744 bin_mags_t *bin_mags;
3746 size_t pageind, binind;
3747 arena_chunk_map_t *mapelm;
3749 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3750 arena = chunk->arena;
3751 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3752 mapelm = &chunk->map[pageind];
3753 run = (arena_run_t *)(mapelm->bits & ~PAGE_MASK);
3754 assert(run->magic == ARENA_RUN_MAGIC);
3756 binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) /
3757 sizeof(arena_bin_t);
3758 assert(binind < nbins);
3761 memset(ptr, 0x5a, arena->bins[binind].reg_size);
3763 bin_mags = &rack->bin_mags[binind];
3764 mag = bin_mags->curmag;
3766 /* Create an initial magazine for this size class. */
3767 assert(bin_mags->sparemag == NULL);
3768 mag = mag_create(choose_arena(), binind);
3770 malloc_spin_lock(&arena->lock);
3771 arena_dalloc_small(arena, chunk, ptr, mapelm);
3772 malloc_spin_unlock(&arena->lock);
3775 bin_mags->curmag = mag;
3778 if (mag->nrounds == max_rounds) {
3779 if (bin_mags->sparemag != NULL) {
3780 if (bin_mags->sparemag->nrounds < max_rounds) {
3781 /* Swap magazines. */
3782 bin_mags->curmag = bin_mags->sparemag;
3783 bin_mags->sparemag = mag;
3784 mag = bin_mags->curmag;
3786 /* Unload the current magazine. */
3790 /* Create a second magazine. */
3791 mag = mag_create(choose_arena(), binind);
3793 mag = rack->bin_mags[binind].curmag;
3796 bin_mags->sparemag = bin_mags->curmag;
3797 bin_mags->curmag = mag;
3800 assert(mag->nrounds < max_rounds);
3802 mag->rounds[mag->nrounds] = ptr;
3808 arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
3810 /* Large allocation. */
3811 malloc_spin_lock(&arena->lock);
3813 #ifndef MALLOC_STATS
3817 size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
3819 size_t size = chunk->map[pageind].bits & ~PAGE_MASK;
3824 memset(ptr, 0x5a, size);
3826 arena->stats.allocated_large -= size;
3830 arena->stats.ndalloc_large++;
3833 arena_run_dalloc(arena, (arena_run_t *)ptr, true);
3834 malloc_spin_unlock(&arena->lock);
3838 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
3841 arena_chunk_map_t *mapelm;
3843 assert(arena != NULL);
3844 assert(arena->magic == ARENA_MAGIC);
3845 assert(chunk->arena == arena);
3846 assert(ptr != NULL);
3847 assert(CHUNK_ADDR2BASE(ptr) != ptr);
3849 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3850 mapelm = &chunk->map[pageind];
3851 assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0);
3852 if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
3853 /* Small allocation. */
3855 if (__isthreaded && opt_mag) {
3856 mag_rack_t *rack = mag_rack;
3858 rack = mag_rack_create(arena);
3860 malloc_spin_lock(&arena->lock);
3861 arena_dalloc_small(arena, chunk, ptr,
3863 malloc_spin_unlock(&arena->lock);
3868 mag_rack_dalloc(rack, ptr);
3871 malloc_spin_lock(&arena->lock);
3872 arena_dalloc_small(arena, chunk, ptr, mapelm);
3873 malloc_spin_unlock(&arena->lock);
3878 arena_dalloc_large(arena, chunk, ptr);
3884 arena_chunk_t *chunk;
3886 assert(ptr != NULL);
3888 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3890 arena_dalloc(chunk->arena, chunk, ptr);
3896 arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3897 size_t size, size_t oldsize)
3900 assert(size < oldsize);
3903 * Shrink the run, and make trailing pages available for other
3906 #ifdef MALLOC_BALANCE
3907 arena_lock_balance(arena);
3909 malloc_spin_lock(&arena->lock);
3911 arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size,
3914 arena->stats.allocated_large -= oldsize - size;
3916 malloc_spin_unlock(&arena->lock);
3920 arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3921 size_t size, size_t oldsize)
3923 size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
3924 size_t npages = oldsize >> PAGE_SHIFT;
3926 assert(oldsize == (chunk->map[pageind].bits & ~PAGE_MASK));
3928 /* Try to extend the run. */
3929 assert(size > oldsize);
3930 #ifdef MALLOC_BALANCE
3931 arena_lock_balance(arena);
3933 malloc_spin_lock(&arena->lock);
3935 if (pageind + npages < chunk_npages && (chunk->map[pageind+npages].bits
3936 & CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[pageind+npages].bits &
3937 ~PAGE_MASK) >= size - oldsize) {
3939 * The next run is available and sufficiently large. Split the
3940 * following run, then merge the first part with the existing
3943 arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk +
3944 ((pageind+npages) << PAGE_SHIFT)), size - oldsize, true,
3947 chunk->map[pageind].bits = size | CHUNK_MAP_LARGE |
3948 CHUNK_MAP_ALLOCATED;
3949 chunk->map[pageind+npages].bits = CHUNK_MAP_LARGE |
3950 CHUNK_MAP_ALLOCATED;
3953 arena->stats.allocated_large += size - oldsize;
3955 malloc_spin_unlock(&arena->lock);
3958 malloc_spin_unlock(&arena->lock);
3964 * Try to resize a large allocation, in order to avoid copying. This will
3965 * always fail if growing an object, and the following run is already in use.
3968 arena_ralloc_large(void *ptr, size_t size, size_t oldsize)
3972 psize = PAGE_CEILING(size);
3973 if (psize == oldsize) {
3974 /* Same size class. */
3975 if (opt_junk && size < oldsize) {
3976 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize -
3981 arena_chunk_t *chunk;
3984 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3985 arena = chunk->arena;
3986 assert(arena->magic == ARENA_MAGIC);
3988 if (psize < oldsize) {
3989 /* Fill before shrinking in order avoid a race. */
3991 memset((void *)((uintptr_t)ptr + size), 0x5a,
3994 arena_ralloc_large_shrink(arena, chunk, ptr, psize,
3998 bool ret = arena_ralloc_large_grow(arena, chunk, ptr,
4000 if (ret == false && opt_zero) {
4001 memset((void *)((uintptr_t)ptr + oldsize), 0,
4010 arena_ralloc(void *ptr, size_t size, size_t oldsize)
4015 /* Try to avoid moving the allocation. */
4016 if (size <= bin_maxclass) {
4017 if (oldsize <= bin_maxclass && size2bin[size] ==
4021 if (oldsize > bin_maxclass && oldsize <= arena_maxclass) {
4022 assert(size > bin_maxclass);
4023 if (arena_ralloc_large(ptr, size, oldsize) == false)
4029 * If we get here, then size and oldsize are different enough that we
4030 * need to move the object. In that case, fall back to allocating new
4031 * space and copying.
4033 ret = arena_malloc(choose_arena(), size, false);
4037 /* Junk/zero-filling were already done by arena_malloc(). */
4038 copysize = (size < oldsize) ? size : oldsize;
4039 memcpy(ret, ptr, copysize);
4043 if (opt_junk && size < oldsize)
4044 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
4045 else if (opt_zero && size > oldsize)
4046 memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
4050 static inline void *
4051 iralloc(void *ptr, size_t size)
4055 assert(ptr != NULL);
4058 oldsize = isalloc(ptr);
4060 if (size <= arena_maxclass)
4061 return (arena_ralloc(ptr, size, oldsize));
4063 return (huge_ralloc(ptr, size, oldsize));
4067 arena_new(arena_t *arena)
4071 size_t prev_run_size;
4073 if (malloc_spin_init(&arena->lock))
4077 memset(&arena->stats, 0, sizeof(arena_stats_t));
4080 /* Initialize chunks. */
4081 arena_chunk_tree_dirty_new(&arena->chunks_dirty);
4082 arena->spare = NULL;
4086 arena_avail_tree_new(&arena->runs_avail);
4088 #ifdef MALLOC_BALANCE
4089 arena->contention = 0;
4092 /* Initialize bins. */
4093 prev_run_size = PAGE_SIZE;
4097 /* (2^n)-spaced tiny bins. */
4098 for (; i < ntbins; i++) {
4099 bin = &arena->bins[i];
4101 arena_run_tree_new(&bin->runs);
4103 bin->reg_size = (1U << (TINY_MIN_2POW + i));
4105 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4108 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4113 /* Quantum-spaced bins. */
4114 for (; i < ntbins + nqbins; i++) {
4115 bin = &arena->bins[i];
4117 arena_run_tree_new(&bin->runs);
4119 bin->reg_size = (i - ntbins + 1) << QUANTUM_2POW;
4121 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4124 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4128 /* Cacheline-spaced bins. */
4129 for (; i < ntbins + nqbins + ncbins; i++) {
4130 bin = &arena->bins[i];
4132 arena_run_tree_new(&bin->runs);
4134 bin->reg_size = cspace_min + ((i - (ntbins + nqbins)) <<
4137 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4140 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4144 /* Subpage-spaced bins. */
4145 for (; i < nbins; i++) {
4146 bin = &arena->bins[i];
4148 arena_run_tree_new(&bin->runs);
4150 bin->reg_size = sspace_min + ((i - (ntbins + nqbins + ncbins))
4153 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4156 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4161 arena->magic = ARENA_MAGIC;
4167 /* Create a new arena and insert it into the arenas array at index ind. */
4169 arenas_extend(unsigned ind)
4173 /* Allocate enough space for trailing bins. */
4174 ret = (arena_t *)base_alloc(sizeof(arena_t)
4175 + (sizeof(arena_bin_t) * (nbins - 1)));
4176 if (ret != NULL && arena_new(ret) == false) {
4180 /* Only reached if there is an OOM error. */
4183 * OOM here is quite inconvenient to propagate, since dealing with it
4184 * would require a check for failure in the fast path. Instead, punt
4185 * by using arenas[0]. In practice, this is an extremely unlikely
4188 _malloc_message(_getprogname(),
4189 ": (malloc) Error initializing arena\n", "", "");
4198 mag_create(arena_t *arena, size_t binind)
4202 if (sizeof(mag_t) + (sizeof(void *) * (max_rounds - 1)) <=
4204 ret = arena_malloc_small(arena, sizeof(mag_t) + (sizeof(void *)
4205 * (max_rounds - 1)), false);
4207 ret = imalloc(sizeof(mag_t) + (sizeof(void *) * (max_rounds -
4212 ret->binind = binind;
4219 mag_destroy(mag_t *mag)
4222 arena_chunk_t *chunk;
4224 arena_chunk_map_t *mapelm;
4226 chunk = CHUNK_ADDR2BASE(mag);
4227 arena = chunk->arena;
4228 pageind = (((uintptr_t)mag - (uintptr_t)chunk) >> PAGE_SHIFT);
4229 mapelm = &chunk->map[pageind];
4231 assert(mag->nrounds == 0);
4232 if (sizeof(mag_t) + (sizeof(void *) * (max_rounds - 1)) <=
4234 malloc_spin_lock(&arena->lock);
4235 arena_dalloc_small(arena, chunk, mag, mapelm);
4236 malloc_spin_unlock(&arena->lock);
4242 mag_rack_create(arena_t *arena)
4245 assert(sizeof(mag_rack_t) + (sizeof(bin_mags_t *) * (nbins - 1)) <=
4247 return (arena_malloc_small(arena, sizeof(mag_rack_t) +
4248 (sizeof(bin_mags_t) * (nbins - 1)), true));
4252 mag_rack_destroy(mag_rack_t *rack)
4255 arena_chunk_t *chunk;
4256 bin_mags_t *bin_mags;
4258 arena_chunk_map_t *mapelm;
4260 for (i = 0; i < nbins; i++) {
4261 bin_mags = &rack->bin_mags[i];
4262 if (bin_mags->curmag != NULL) {
4263 assert(bin_mags->curmag->binind == i);
4264 mag_unload(bin_mags->curmag);
4265 mag_destroy(bin_mags->curmag);
4267 if (bin_mags->sparemag != NULL) {
4268 assert(bin_mags->sparemag->binind == i);
4269 mag_unload(bin_mags->sparemag);
4270 mag_destroy(bin_mags->sparemag);
4274 chunk = CHUNK_ADDR2BASE(rack);
4275 arena = chunk->arena;
4276 pageind = (((uintptr_t)rack - (uintptr_t)chunk) >> PAGE_SHIFT);
4277 mapelm = &chunk->map[pageind];
4279 malloc_spin_lock(&arena->lock);
4280 arena_dalloc_small(arena, chunk, rack, mapelm);
4281 malloc_spin_unlock(&arena->lock);
4288 /******************************************************************************/
4290 * Begin general internal functions.
4294 huge_malloc(size_t size, bool zero)
4298 extent_node_t *node;
4300 /* Allocate one or more contiguous chunks for this request. */
4302 csize = CHUNK_CEILING(size);
4304 /* size is large enough to cause size_t wrap-around. */
4308 /* Allocate an extent node with which to track the chunk. */
4309 node = base_node_alloc();
4313 ret = chunk_alloc(csize, zero);
4315 base_node_dealloc(node);
4319 /* Insert node into huge. */
4323 malloc_mutex_lock(&huge_mtx);
4324 extent_tree_ad_insert(&huge, node);
4327 huge_allocated += csize;
4329 malloc_mutex_unlock(&huge_mtx);
4331 if (zero == false) {
4333 memset(ret, 0xa5, csize);
4335 memset(ret, 0, csize);
4341 /* Only handles large allocations that require more than chunk alignment. */
4343 huge_palloc(size_t alignment, size_t size)
4346 size_t alloc_size, chunk_size, offset;
4347 extent_node_t *node;
4350 * This allocation requires alignment that is even larger than chunk
4351 * alignment. This means that huge_malloc() isn't good enough.
4353 * Allocate almost twice as many chunks as are demanded by the size or
4354 * alignment, in order to assure the alignment can be achieved, then
4355 * unmap leading and trailing chunks.
4357 assert(alignment >= chunksize);
4359 chunk_size = CHUNK_CEILING(size);
4361 if (size >= alignment)
4362 alloc_size = chunk_size + alignment - chunksize;
4364 alloc_size = (alignment << 1) - chunksize;
4366 /* Allocate an extent node with which to track the chunk. */
4367 node = base_node_alloc();
4371 ret = chunk_alloc(alloc_size, false);
4373 base_node_dealloc(node);
4377 offset = (uintptr_t)ret & (alignment - 1);
4378 assert((offset & chunksize_mask) == 0);
4379 assert(offset < alloc_size);
4381 /* Trim trailing space. */
4382 chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
4387 /* Trim leading space. */
4388 chunk_dealloc(ret, alignment - offset);
4390 ret = (void *)((uintptr_t)ret + (alignment - offset));
4392 trailsize = alloc_size - (alignment - offset) - chunk_size;
4393 if (trailsize != 0) {
4394 /* Trim trailing space. */
4395 assert(trailsize < alloc_size);
4396 chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
4401 /* Insert node into huge. */
4403 node->size = chunk_size;
4405 malloc_mutex_lock(&huge_mtx);
4406 extent_tree_ad_insert(&huge, node);
4409 huge_allocated += chunk_size;
4411 malloc_mutex_unlock(&huge_mtx);
4414 memset(ret, 0xa5, chunk_size);
4416 memset(ret, 0, chunk_size);
4422 huge_ralloc(void *ptr, size_t size, size_t oldsize)
4427 /* Avoid moving the allocation if the size class would not change. */
4428 if (oldsize > arena_maxclass &&
4429 CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
4430 if (opt_junk && size < oldsize) {
4431 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
4433 } else if (opt_zero && size > oldsize) {
4434 memset((void *)((uintptr_t)ptr + oldsize), 0, size
4441 * If we get here, then size and oldsize are different enough that we
4442 * need to use a different size class. In that case, fall back to
4443 * allocating new space and copying.
4445 ret = huge_malloc(size, false);
4449 copysize = (size < oldsize) ? size : oldsize;
4450 memcpy(ret, ptr, copysize);
4456 huge_dalloc(void *ptr)
4458 extent_node_t *node, key;
4460 malloc_mutex_lock(&huge_mtx);
4462 /* Extract from tree of huge allocations. */
4464 node = extent_tree_ad_search(&huge, &key);
4465 assert(node != NULL);
4466 assert(node->addr == ptr);
4467 extent_tree_ad_remove(&huge, node);
4471 huge_allocated -= node->size;
4474 malloc_mutex_unlock(&huge_mtx);
4478 if (opt_dss && opt_junk)
4479 memset(node->addr, 0x5a, node->size);
4481 chunk_dealloc(node->addr, node->size);
4483 base_node_dealloc(node);
4487 malloc_print_stats(void)
4490 if (opt_print_stats) {
4491 char s[UMAX2S_BUFSIZE];
4492 _malloc_message("___ Begin malloc statistics ___\n", "", "",
4494 _malloc_message("Assertions ",
4501 _malloc_message("Boolean MALLOC_OPTIONS: ",
4502 opt_abort ? "A" : "a", "", "");
4504 _malloc_message(opt_dss ? "D" : "d", "", "", "");
4507 _malloc_message(opt_mag ? "G" : "g", "", "", "");
4509 _malloc_message(opt_junk ? "J" : "j", "", "", "");
4511 _malloc_message(opt_mmap ? "M" : "m", "", "", "");
4513 _malloc_message(opt_utrace ? "PU" : "Pu",
4514 opt_sysv ? "V" : "v",
4515 opt_xmalloc ? "X" : "x",
4516 opt_zero ? "Z\n" : "z\n");
4518 _malloc_message("CPUs: ", umax2s(ncpus, s), "\n", "");
4519 _malloc_message("Max arenas: ", umax2s(narenas, s), "\n", "");
4520 #ifdef MALLOC_BALANCE
4521 _malloc_message("Arena balance threshold: ",
4522 umax2s(opt_balance_threshold, s), "\n", "");
4524 _malloc_message("Pointer size: ", umax2s(sizeof(void *), s),
4526 _malloc_message("Quantum size: ", umax2s(QUANTUM, s), "\n", "");
4527 _malloc_message("Cacheline size (assumed): ", umax2s(CACHELINE,
4530 _malloc_message("Tiny 2^n-spaced sizes: [", umax2s((1U <<
4531 TINY_MIN_2POW), s), "..", "");
4532 _malloc_message(umax2s((qspace_min >> 1), s), "]\n", "", "");
4534 _malloc_message("Quantum-spaced sizes: [", umax2s(qspace_min,
4536 _malloc_message(umax2s(qspace_max, s), "]\n", "", "");
4537 _malloc_message("Cacheline-spaced sizes: [", umax2s(cspace_min,
4539 _malloc_message(umax2s(cspace_max, s), "]\n", "", "");
4540 _malloc_message("Subpage-spaced sizes: [", umax2s(sspace_min,
4542 _malloc_message(umax2s(sspace_max, s), "]\n", "", "");
4544 _malloc_message("Rounds per magazine: ", umax2s(max_rounds, s),
4547 _malloc_message("Max dirty pages per arena: ",
4548 umax2s(opt_dirty_max, s), "\n", "");
4550 _malloc_message("Chunk size: ", umax2s(chunksize, s), "", "");
4551 _malloc_message(" (2^", umax2s(opt_chunk_2pow, s), ")\n", "");
4555 size_t allocated, mapped;
4556 #ifdef MALLOC_BALANCE
4557 uint64_t nbalance = 0;
4562 /* Calculate and print allocated/mapped stats. */
4565 for (i = 0, allocated = 0; i < narenas; i++) {
4566 if (arenas[i] != NULL) {
4567 malloc_spin_lock(&arenas[i]->lock);
4569 arenas[i]->stats.allocated_small;
4571 arenas[i]->stats.allocated_large;
4572 #ifdef MALLOC_BALANCE
4573 nbalance += arenas[i]->stats.nbalance;
4575 malloc_spin_unlock(&arenas[i]->lock);
4580 malloc_mutex_lock(&huge_mtx);
4581 allocated += huge_allocated;
4582 mapped = stats_chunks.curchunks * chunksize;
4583 malloc_mutex_unlock(&huge_mtx);
4585 malloc_mutex_lock(&base_mtx);
4586 mapped += base_mapped;
4587 malloc_mutex_unlock(&base_mtx);
4589 malloc_printf("Allocated: %zu, mapped: %zu\n",
4592 #ifdef MALLOC_BALANCE
4593 malloc_printf("Arena balance reassignments: %llu\n",
4597 /* Print chunk stats. */
4599 chunk_stats_t chunks_stats;
4601 malloc_mutex_lock(&huge_mtx);
4602 chunks_stats = stats_chunks;
4603 malloc_mutex_unlock(&huge_mtx);
4605 malloc_printf("chunks: nchunks "
4606 "highchunks curchunks\n");
4607 malloc_printf(" %13llu%13lu%13lu\n",
4608 chunks_stats.nchunks,
4609 chunks_stats.highchunks,
4610 chunks_stats.curchunks);
4613 /* Print chunk stats. */
4615 "huge: nmalloc ndalloc allocated\n");
4616 malloc_printf(" %12llu %12llu %12zu\n",
4617 huge_nmalloc, huge_ndalloc, huge_allocated);
4619 /* Print stats for each arena. */
4620 for (i = 0; i < narenas; i++) {
4622 if (arena != NULL) {
4624 "\narenas[%u]:\n", i);
4625 malloc_spin_lock(&arena->lock);
4627 malloc_spin_unlock(&arena->lock);
4631 #endif /* #ifdef MALLOC_STATS */
4632 _malloc_message("--- End malloc statistics ---\n", "", "", "");
4638 size2bin_validate(void)
4640 size_t i, size, binind;
4642 assert(size2bin[0] == 0xffU);
4646 for (; i < (1U << TINY_MIN_2POW); i++) {
4647 size = pow2_ceil(1U << TINY_MIN_2POW);
4648 binind = ffs((int)(size >> (TINY_MIN_2POW + 1)));
4649 assert(size2bin[i] == binind);
4651 for (; i < qspace_min; i++) {
4652 size = pow2_ceil(i);
4653 binind = ffs((int)(size >> (TINY_MIN_2POW + 1)));
4654 assert(size2bin[i] == binind);
4657 /* Quantum-spaced. */
4658 for (; i <= qspace_max; i++) {
4659 size = QUANTUM_CEILING(i);
4660 binind = ntbins + (size >> QUANTUM_2POW) - 1;
4661 assert(size2bin[i] == binind);
4663 /* Cacheline-spaced. */
4664 for (; i <= cspace_max; i++) {
4665 size = CACHELINE_CEILING(i);
4666 binind = ntbins + nqbins + ((size - cspace_min) >>
4668 assert(size2bin[i] == binind);
4671 for (; i <= sspace_max; i++) {
4672 size = SUBPAGE_CEILING(i);
4673 binind = ntbins + nqbins + ncbins + ((size - sspace_min)
4675 assert(size2bin[i] == binind);
4684 if (opt_qspace_max_2pow != QSPACE_MAX_2POW_DEFAULT
4685 || opt_cspace_max_2pow != CSPACE_MAX_2POW_DEFAULT)
4686 return (size2bin_init_hard());
4688 size2bin = const_size2bin;
4690 assert(sizeof(const_size2bin) == bin_maxclass + 1);
4691 size2bin_validate();
4697 size2bin_init_hard(void)
4699 size_t i, size, binind;
4700 uint8_t *custom_size2bin;
4702 assert(opt_qspace_max_2pow != QSPACE_MAX_2POW_DEFAULT
4703 || opt_cspace_max_2pow != CSPACE_MAX_2POW_DEFAULT);
4705 custom_size2bin = (uint8_t *)base_alloc(bin_maxclass + 1);
4706 if (custom_size2bin == NULL)
4709 custom_size2bin[0] = 0xffU;
4713 for (; i < (1U << TINY_MIN_2POW); i++) {
4714 size = pow2_ceil(1U << TINY_MIN_2POW);
4715 binind = ffs((int)(size >> (TINY_MIN_2POW + 1)));
4716 custom_size2bin[i] = binind;
4718 for (; i < qspace_min; i++) {
4719 size = pow2_ceil(i);
4720 binind = ffs((int)(size >> (TINY_MIN_2POW + 1)));
4721 custom_size2bin[i] = binind;
4724 /* Quantum-spaced. */
4725 for (; i <= qspace_max; i++) {
4726 size = QUANTUM_CEILING(i);
4727 binind = ntbins + (size >> QUANTUM_2POW) - 1;
4728 custom_size2bin[i] = binind;
4730 /* Cacheline-spaced. */
4731 for (; i <= cspace_max; i++) {
4732 size = CACHELINE_CEILING(i);
4733 binind = ntbins + nqbins + ((size - cspace_min) >>
4735 custom_size2bin[i] = binind;
4738 for (; i <= sspace_max; i++) {
4739 size = SUBPAGE_CEILING(i);
4740 binind = ntbins + nqbins + ncbins + ((size - sspace_min) >>
4742 custom_size2bin[i] = binind;
4745 size2bin = custom_size2bin;
4747 size2bin_validate();
4753 * FreeBSD's pthreads implementation calls malloc(3), so the malloc
4754 * implementation has to take pains to avoid infinite recursion during
4761 if (malloc_initialized == false)
4762 return (malloc_init_hard());
4768 malloc_init_hard(void)
4772 char buf[PATH_MAX + 1];
4775 malloc_mutex_lock(&init_lock);
4776 if (malloc_initialized) {
4778 * Another thread initialized the allocator before this one
4779 * acquired init_lock.
4781 malloc_mutex_unlock(&init_lock);
4785 /* Get number of CPUs. */
4792 len = sizeof(ncpus);
4793 if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
4800 * Increase the chunk size to the largest page size that is greater
4801 * than the default chunk size and less than or equal to 4MB.
4804 size_t pagesizes[MAXPAGESIZES];
4807 nsizes = getpagesizes(pagesizes, MAXPAGESIZES);
4808 for (k = 0; k < nsizes; k++)
4809 if (pagesizes[k] <= (1LU << 22))
4810 while ((1LU << opt_chunk_2pow) < pagesizes[k])
4814 for (i = 0; i < 3; i++) {
4817 /* Get runtime configuration. */
4820 if ((linklen = readlink("/etc/malloc.conf", buf,
4821 sizeof(buf) - 1)) != -1) {
4823 * Use the contents of the "/etc/malloc.conf"
4824 * symbolic link's name.
4826 buf[linklen] = '\0';
4829 /* No configuration specified. */
4835 if (issetugid() == 0 && (opts =
4836 getenv("MALLOC_OPTIONS")) != NULL) {
4838 * Do nothing; opts is already initialized to
4839 * the value of the MALLOC_OPTIONS environment
4843 /* No configuration specified. */
4849 if (_malloc_options != NULL) {
4851 * Use options that were compiled into the
4854 opts = _malloc_options;
4856 /* No configuration specified. */
4866 for (j = 0; opts[j] != '\0'; j++) {
4870 /* Parse repetition count, if any. */
4871 for (nreps = 0, nseen = false;; j++, nseen = true) {
4873 case '0': case '1': case '2': case '3':
4874 case '4': case '5': case '6': case '7':
4877 nreps += opts[j] - '0';
4887 for (k = 0; k < nreps; k++) {
4896 #ifdef MALLOC_BALANCE
4897 opt_balance_threshold >>= 1;
4901 #ifdef MALLOC_BALANCE
4902 if (opt_balance_threshold == 0)
4903 opt_balance_threshold = 1;
4904 else if ((opt_balance_threshold << 1)
4905 > opt_balance_threshold)
4906 opt_balance_threshold <<= 1;
4910 if (opt_cspace_max_2pow - 1 >
4911 opt_qspace_max_2pow &&
4912 opt_cspace_max_2pow >
4914 opt_cspace_max_2pow--;
4917 if (opt_cspace_max_2pow < PAGE_SHIFT
4919 opt_cspace_max_2pow++;
4932 opt_dirty_max >>= 1;
4935 if (opt_dirty_max == 0)
4937 else if ((opt_dirty_max << 1) != 0)
4938 opt_dirty_max <<= 1;
4956 * Chunks always require at least one
4957 * header page, so chunks can never be
4958 * smaller than two pages.
4960 if (opt_chunk_2pow > PAGE_SHIFT + 1)
4964 if (opt_chunk_2pow + 1 <
4965 (sizeof(size_t) << 3))
4979 opt_narenas_lshift--;
4982 opt_narenas_lshift++;
4985 opt_print_stats = false;
4988 opt_print_stats = true;
4991 if (opt_qspace_max_2pow > QUANTUM_2POW)
4992 opt_qspace_max_2pow--;
4995 if (opt_qspace_max_2pow + 1 <
4996 opt_cspace_max_2pow)
4997 opt_qspace_max_2pow++;
5001 if (opt_mag_size_2pow + 1 < (8U <<
5003 opt_mag_size_2pow++;
5007 * Make sure there's always at least
5008 * one round per magazine.
5010 if ((1U << (opt_mag_size_2pow-1)) >=
5012 opt_mag_size_2pow--;
5028 opt_xmalloc = false;
5044 _malloc_message(_getprogname(),
5045 ": (malloc) Unsupported character "
5046 "in malloc options: '", cbuf,
5055 /* Make sure that there is some method for acquiring memory. */
5056 if (opt_dss == false && opt_mmap == false)
5060 /* Take care to call atexit() only once. */
5061 if (opt_print_stats) {
5062 /* Print statistics at exit. */
5063 atexit(malloc_print_stats);
5068 * Calculate the actual number of rounds per magazine, taking into
5069 * account header overhead.
5071 max_rounds = (1LLU << (opt_mag_size_2pow - SIZEOF_PTR_2POW)) -
5072 (sizeof(mag_t) >> SIZEOF_PTR_2POW) + 1;
5075 /* Set variables according to the value of opt_[qc]space_max_2pow. */
5076 qspace_max = (1U << opt_qspace_max_2pow);
5077 cspace_min = CACHELINE_CEILING(qspace_max);
5078 if (cspace_min == qspace_max)
5079 cspace_min += CACHELINE;
5080 cspace_max = (1U << opt_cspace_max_2pow);
5081 sspace_min = SUBPAGE_CEILING(cspace_max);
5082 if (sspace_min == cspace_max)
5083 sspace_min += SUBPAGE;
5084 assert(sspace_min < PAGE_SIZE);
5085 sspace_max = PAGE_SIZE - SUBPAGE;
5088 assert(QUANTUM_2POW >= TINY_MIN_2POW);
5090 assert(ntbins <= QUANTUM_2POW);
5091 nqbins = qspace_max >> QUANTUM_2POW;
5092 ncbins = ((cspace_max - cspace_min) >> CACHELINE_2POW) + 1;
5093 nsbins = ((sspace_max - sspace_min) >> SUBPAGE_2POW) + 1;
5094 nbins = ntbins + nqbins + ncbins + nsbins;
5096 if (size2bin_init()) {
5097 malloc_mutex_unlock(&init_lock);
5101 /* Set variables according to the value of opt_chunk_2pow. */
5102 chunksize = (1LU << opt_chunk_2pow);
5103 chunksize_mask = chunksize - 1;
5104 chunk_npages = (chunksize >> PAGE_SHIFT);
5109 * Compute the header size such that it is large enough to
5110 * contain the page map.
5112 header_size = sizeof(arena_chunk_t) +
5113 (sizeof(arena_chunk_map_t) * (chunk_npages - 1));
5114 arena_chunk_header_npages = (header_size >> PAGE_SHIFT) +
5115 ((header_size & PAGE_MASK) != 0);
5117 arena_maxclass = chunksize - (arena_chunk_header_npages <<
5123 memset(&stats_chunks, 0, sizeof(chunk_stats_t));
5126 /* Various sanity checks that regard configuration. */
5127 assert(chunksize >= PAGE_SIZE);
5129 /* Initialize chunks data. */
5130 malloc_mutex_init(&huge_mtx);
5131 extent_tree_ad_new(&huge);
5133 malloc_mutex_init(&dss_mtx);
5135 dss_prev = dss_base;
5137 extent_tree_szad_new(&dss_chunks_szad);
5138 extent_tree_ad_new(&dss_chunks_ad);
5146 /* Initialize base allocation data structures. */
5152 * Allocate a base chunk here, since it doesn't actually have to be
5153 * chunk-aligned. Doing this before allocating any other chunks allows
5154 * the use of space that would otherwise be wasted.
5157 base_pages_alloc(0);
5160 malloc_mutex_init(&base_mtx);
5164 * For SMP systems, create twice as many arenas as there are
5167 opt_narenas_lshift++;
5170 /* Determine how many arenas to use. */
5172 if (opt_narenas_lshift > 0) {
5173 if ((narenas << opt_narenas_lshift) > narenas)
5174 narenas <<= opt_narenas_lshift;
5176 * Make sure not to exceed the limits of what base_alloc() can
5179 if (narenas * sizeof(arena_t *) > chunksize)
5180 narenas = chunksize / sizeof(arena_t *);
5181 } else if (opt_narenas_lshift < 0) {
5182 if ((narenas >> -opt_narenas_lshift) < narenas)
5183 narenas >>= -opt_narenas_lshift;
5184 /* Make sure there is at least one arena. */
5188 #ifdef MALLOC_BALANCE
5189 assert(narenas != 0);
5190 for (narenas_2pow = 0;
5191 (narenas >> (narenas_2pow + 1)) != 0;
5197 static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
5198 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
5199 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
5200 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
5201 223, 227, 229, 233, 239, 241, 251, 257, 263};
5202 unsigned nprimes, parenas;
5205 * Pick a prime number of hash arenas that is more than narenas
5206 * so that direct hashing of pthread_self() pointers tends to
5207 * spread allocations evenly among the arenas.
5209 assert((narenas & 1) == 0); /* narenas must be even. */
5210 nprimes = (sizeof(primes) >> SIZEOF_INT_2POW);
5211 parenas = primes[nprimes - 1]; /* In case not enough primes. */
5212 for (i = 1; i < nprimes; i++) {
5213 if (primes[i] > narenas) {
5214 parenas = primes[i];
5223 # ifndef MALLOC_BALANCE
5228 /* Allocate and initialize arenas. */
5229 arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
5230 if (arenas == NULL) {
5231 malloc_mutex_unlock(&init_lock);
5235 * Zero the array. In practice, this should always be pre-zeroed,
5236 * since it was just mmap()ed, but let's be sure.
5238 memset(arenas, 0, sizeof(arena_t *) * narenas);
5241 * Initialize one arena here. The rest are lazily created in
5242 * choose_arena_hard().
5245 if (arenas[0] == NULL) {
5246 malloc_mutex_unlock(&init_lock);
5251 * Assign the initial arena to the initial thread, in order to avoid
5252 * spurious creation of an extra arena if the application switches to
5255 arenas_map = arenas[0];
5258 * Seed here for the initial thread, since choose_arena_hard() is only
5259 * called for other threads. The seed value doesn't really matter.
5261 #ifdef MALLOC_BALANCE
5265 malloc_spin_init(&arenas_lock);
5267 malloc_initialized = true;
5268 malloc_mutex_unlock(&init_lock);
5273 * End general internal functions.
5275 /******************************************************************************/
5277 * Begin malloc(3)-compatible functions.
5285 if (malloc_init()) {
5291 if (opt_sysv == false)
5299 ret = imalloc(size);
5304 _malloc_message(_getprogname(),
5305 ": (malloc) Error in malloc(): out of memory\n", "",
5312 UTRACE(0, size, ret);
5317 posix_memalign(void **memptr, size_t alignment, size_t size)
5325 /* Make sure that alignment is a large enough power of 2. */
5326 if (((alignment - 1) & alignment) != 0
5327 || alignment < sizeof(void *)) {
5329 _malloc_message(_getprogname(),
5330 ": (malloc) Error in posix_memalign(): "
5331 "invalid alignment\n", "", "");
5340 if (opt_sysv == false)
5348 result = ipalloc(alignment, size);
5351 if (result == NULL) {
5353 _malloc_message(_getprogname(),
5354 ": (malloc) Error in posix_memalign(): out of memory\n",
5366 UTRACE(0, size, result);
5371 calloc(size_t num, size_t size)
5376 if (malloc_init()) {
5382 num_size = num * size;
5383 if (num_size == 0) {
5384 if ((opt_sysv == false) && ((num == 0) || (size == 0)))
5391 * Try to avoid division here. We know that it isn't possible to
5392 * overflow during multiplication if neither operand uses any of the
5393 * most significant half of the bits in a size_t.
5395 } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2)))
5396 && (num_size / size != num)) {
5397 /* size_t overflow. */
5402 ret = icalloc(num_size);
5407 _malloc_message(_getprogname(),
5408 ": (malloc) Error in calloc(): out of memory\n", "",
5415 UTRACE(0, num_size, ret);
5420 realloc(void *ptr, size_t size)
5425 if (opt_sysv == false)
5436 assert(malloc_initialized);
5438 ret = iralloc(ptr, size);
5442 _malloc_message(_getprogname(),
5443 ": (malloc) Error in realloc(): out of "
5444 "memory\n", "", "");
5453 ret = imalloc(size);
5457 _malloc_message(_getprogname(),
5458 ": (malloc) Error in realloc(): out of "
5459 "memory\n", "", "");
5467 UTRACE(ptr, size, ret);
5477 assert(malloc_initialized);
5484 * End malloc(3)-compatible functions.
5486 /******************************************************************************/
5488 * Begin non-standard functions.
5492 malloc_usable_size(const void *ptr)
5495 assert(ptr != NULL);
5497 return (isalloc(ptr));
5501 * End non-standard functions.
5503 /******************************************************************************/
5505 * Begin library-private functions.
5508 /******************************************************************************/
5510 * Begin thread cache.
5514 * We provide an unpublished interface in order to receive notifications from
5515 * the pthreads library whenever a thread exits. This allows us to clean up
5519 _malloc_thread_cleanup(void)
5523 if (mag_rack != NULL) {
5524 assert(mag_rack != (void *)-1);
5525 mag_rack_destroy(mag_rack);
5527 mag_rack = (void *)-1;
5534 * The following functions are used by threading libraries for protection of
5535 * malloc during fork(). These functions are only called if the program is
5536 * running in threaded mode, so there is no need to check whether the program
5541 _malloc_prefork(void)
5545 arena_t *larenas[narenas], *tarenas[narenas];
5547 /* Acquire all mutexes in a safe order. */
5550 * arenas_lock must be acquired after all of the arena mutexes, in
5551 * order to avoid potential deadlock with arena_lock_balance[_hard]().
5552 * Since arenas_lock protects the arenas array, the following code has
5553 * to race with arenas_extend() callers until it succeeds in locking
5554 * all arenas before locking arenas_lock.
5556 memset(larenas, 0, sizeof(arena_t *) * narenas);
5560 malloc_spin_lock(&arenas_lock);
5561 for (i = 0; i < narenas; i++) {
5562 if (arenas[i] != larenas[i]) {
5563 memcpy(tarenas, arenas, sizeof(arena_t *) *
5565 malloc_spin_unlock(&arenas_lock);
5566 for (j = 0; j < narenas; j++) {
5567 if (larenas[j] != tarenas[j]) {
5568 larenas[j] = tarenas[j];
5579 malloc_mutex_lock(&base_mtx);
5581 malloc_mutex_lock(&huge_mtx);
5584 malloc_mutex_lock(&dss_mtx);
5589 _malloc_postfork(void)
5592 arena_t *larenas[narenas];
5594 /* Release all mutexes, now that fork() has completed. */
5597 malloc_mutex_unlock(&dss_mtx);
5600 malloc_mutex_unlock(&huge_mtx);
5602 malloc_mutex_unlock(&base_mtx);
5604 memcpy(larenas, arenas, sizeof(arena_t *) * narenas);
5605 malloc_spin_unlock(&arenas_lock);
5606 for (i = 0; i < narenas; i++) {
5607 if (larenas[i] != NULL)
5608 malloc_spin_unlock(&larenas[i]->lock);
5613 * End library-private functions.
5615 /******************************************************************************/