2 * Copyright (C) 2006-2008 Jason Evans <jasone@FreeBSD.org>.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice(s), this list of conditions and the following disclaimer as
10 * the first lines of this file unmodified other than the possible
11 * addition of one or more copyright notices.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice(s), this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *******************************************************************************
31 * This allocator implementation is designed to provide scalable performance
32 * for multi-threaded programs on multi-processor systems. The following
33 * features are included for this purpose:
35 * + Multiple arenas are used if there are multiple CPUs, which reduces lock
36 * contention and cache sloshing.
38 * + Cache line sharing between arenas is avoided for internal data
41 * + Memory is managed in chunks and runs (chunks can be split into runs),
42 * rather than as individual pages. This provides a constant-time
43 * mechanism for associating allocations with particular arenas.
45 * Allocation requests are rounded up to the nearest size class, and no record
46 * of the original request size is maintained. Allocations are broken into
47 * categories according to size class. Assuming runtime defaults, 4 kB pages
48 * and a 16 byte quantum on a 32-bit system, the size classes in each category
51 * |=====================================|
52 * | Category | Subcategory | Size |
53 * |=====================================|
54 * | Small | Tiny | 2 |
57 * | |----------------+---------|
58 * | | Quantum-spaced | 16 |
65 * | |----------------+---------|
66 * | | Sub-page | 1 kB |
68 * |=====================================|
76 * |=====================================|
81 * |=====================================|
83 * A different mechanism is used for each category:
85 * Small : Each size class is segregated into its own set of runs. Each run
86 * maintains a bitmap of which regions are free/allocated.
88 * Large : Each allocation is backed by a dedicated run. Metadata are stored
89 * in the associated arena chunk header maps.
91 * Huge : Each allocation is backed by a dedicated contiguous set of chunks.
92 * Metadata are stored in a separate red-black tree.
94 *******************************************************************************
98 * MALLOC_PRODUCTION disables assertions and statistics gathering. It also
99 * defaults the A and J runtime options to off. These settings are appropriate
100 * for production systems.
102 #define MALLOC_PRODUCTION
104 #ifndef MALLOC_PRODUCTION
106 * MALLOC_DEBUG enables assertions and other sanity checks, and disables
109 # define MALLOC_DEBUG
111 /* MALLOC_STATS enables statistics calculation. */
112 # define MALLOC_STATS
116 * MALLOC_BALANCE enables monitoring of arena lock contention and dynamically
117 * re-balances arena load if exponentially averaged contention exceeds a
120 #define MALLOC_BALANCE
123 * MALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage
124 * segment (DSS). In an ideal world, this functionality would be completely
125 * unnecessary, but we are burdened by history and the lack of resource limits
126 * for anonymous mapped memory.
130 #include <sys/cdefs.h>
131 __FBSDID("$FreeBSD$");
133 #include "libc_private.h"
137 #include "spinlock.h"
138 #include "namespace.h"
139 #include <sys/mman.h>
140 #include <sys/param.h>
141 #include <sys/stddef.h>
142 #include <sys/time.h>
143 #include <sys/types.h>
144 #include <sys/sysctl.h>
146 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
148 #include <machine/cpufunc.h>
149 #include <machine/vmparam.h>
164 #include "un-namespace.h"
180 /* Disable inlining to make debugging easier. */
184 /* Size of stack-allocated buffer passed to strerror_r(). */
185 #define STRERROR_BUF 64
187 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
189 # define QUANTUM_2POW_MIN 4
190 # define SIZEOF_PTR_2POW 2
191 # define CPU_SPINWAIT __asm__ volatile("pause")
194 # define QUANTUM_2POW_MIN 4
195 # define SIZEOF_PTR_2POW 3
198 # define QUANTUM_2POW_MIN 4
199 # define SIZEOF_PTR_2POW 3
203 # define QUANTUM_2POW_MIN 4
204 # define SIZEOF_PTR_2POW 3
208 # define QUANTUM_2POW_MIN 4
209 # define SIZEOF_PTR_2POW 3
210 # define CPU_SPINWAIT __asm__ volatile("pause")
213 # define QUANTUM_2POW_MIN 3
214 # define SIZEOF_PTR_2POW 2
218 # define QUANTUM_2POW_MIN 3
219 # define SIZEOF_PTR_2POW 2
223 # define QUANTUM_2POW_MIN 4
224 # define SIZEOF_PTR_2POW 2
227 #define SIZEOF_PTR (1U << SIZEOF_PTR_2POW)
229 /* sizeof(int) == (1U << SIZEOF_INT_2POW). */
230 #ifndef SIZEOF_INT_2POW
231 # define SIZEOF_INT_2POW 2
234 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
235 #if (!defined(PIC) && !defined(NO_TLS))
240 /* MALLOC_BALANCE requires TLS. */
241 # ifdef MALLOC_BALANCE
242 # undef MALLOC_BALANCE
247 * Size and alignment of memory chunks that are allocated by the OS's virtual
250 #define CHUNK_2POW_DEFAULT 20
252 /* Maximum number of dirty pages per arena. */
253 #define DIRTY_MAX_DEFAULT (1U << 9)
256 * Maximum size of L1 cache line. This is used to avoid cache line aliasing,
257 * so over-estimates are okay (up to a point), but under-estimates will
258 * negatively affect performance.
260 #define CACHELINE_2POW 6
261 #define CACHELINE ((size_t)(1U << CACHELINE_2POW))
263 /* Smallest size class to support. */
264 #define TINY_MIN_2POW 1
267 * Maximum size class that is a multiple of the quantum, but not (necessarily)
268 * a power of 2. Above this size, allocations are rounded up to the nearest
271 #define SMALL_MAX_2POW_DEFAULT 9
272 #define SMALL_MAX_DEFAULT (1U << SMALL_MAX_2POW_DEFAULT)
275 * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized
276 * as small as possible such that this setting is still honored, without
277 * violating other constraints. The goal is to make runs as small as possible
278 * without exceeding a per run external fragmentation threshold.
280 * We use binary fixed point math for overhead computations, where the binary
281 * point is implicitly RUN_BFP bits to the left.
283 * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
284 * honored for some/all object sizes, since there is one bit of header overhead
285 * per object (plus a constant). This constraint is relaxed (ignored) for runs
286 * that are so small that the per-region overhead is greater than:
288 * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
291 /* \/ Implicit binary fixed point. */
292 #define RUN_MAX_OVRHD 0x0000003dU
293 #define RUN_MAX_OVRHD_RELAX 0x00001800U
295 /* Put a cap on small object run size. This overrides RUN_MAX_OVRHD. */
296 #define RUN_MAX_SMALL_2POW 15
297 #define RUN_MAX_SMALL (1U << RUN_MAX_SMALL_2POW)
300 * Hyper-threaded CPUs may need a special instruction inside spin loops in
301 * order to yield to another virtual CPU. If no such instruction is defined
302 * above, make CPU_SPINWAIT a no-op.
305 # define CPU_SPINWAIT
309 * Adaptive spinning must eventually switch to blocking, in order to avoid the
310 * potential for priority inversion deadlock. Backing off past a certain point
311 * can actually waste time.
313 #define SPIN_LIMIT_2POW 11
316 * Conversion from spinning to blocking is expensive; we use (1U <<
317 * BLOCK_COST_2POW) to estimate how many more times costly blocking is than
318 * worst-case spinning.
320 #define BLOCK_COST_2POW 4
322 #ifdef MALLOC_BALANCE
324 * We use an exponential moving average to track recent lock contention,
325 * where the size of the history window is N, and alpha=2/(N+1).
327 * Due to integer math rounding, very small values here can cause
328 * substantial degradation in accuracy, thus making the moving average decay
329 * faster than it would with precise calculation.
331 # define BALANCE_ALPHA_INV_2POW 9
334 * Threshold value for the exponential moving contention average at which to
335 * re-assign a thread.
337 # define BALANCE_THRESHOLD_DEFAULT (1U << (SPIN_LIMIT_2POW-4))
340 /******************************************************************************/
343 * Mutexes based on spinlocks. We can't use normal pthread spinlocks in all
344 * places, because they require malloc()ed memory, which causes bootstrapping
345 * issues in some cases.
351 /* Set to true once the allocator has been initialized. */
352 static bool malloc_initialized = false;
354 /* Used to avoid initialization races. */
355 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
357 /******************************************************************************/
359 * Statistics data structures.
364 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
365 struct malloc_bin_stats_s {
367 * Number of allocation requests that corresponded to the size of this
372 /* Total number of runs created for this bin's size class. */
376 * Total number of runs reused by extracting them from the runs tree for
377 * this bin's size class.
381 /* High-water mark for this bin. */
382 unsigned long highruns;
384 /* Current number of runs in this bin. */
385 unsigned long curruns;
388 typedef struct arena_stats_s arena_stats_t;
389 struct arena_stats_s {
390 /* Number of bytes currently mapped. */
394 * Total number of purge sweeps, total number of madvise calls made,
395 * and total pages purged in order to keep dirty unused memory under
402 /* Per-size-category statistics. */
403 size_t allocated_small;
404 uint64_t nmalloc_small;
405 uint64_t ndalloc_small;
407 size_t allocated_large;
408 uint64_t nmalloc_large;
409 uint64_t ndalloc_large;
411 #ifdef MALLOC_BALANCE
412 /* Number of times this arena reassigned a thread due to contention. */
417 typedef struct chunk_stats_s chunk_stats_t;
418 struct chunk_stats_s {
419 /* Number of chunks that were allocated. */
422 /* High-water mark for number of chunks allocated. */
423 unsigned long highchunks;
426 * Current number of chunks allocated. This value isn't maintained for
427 * any other purpose, so keep track of it in order to be able to set
430 unsigned long curchunks;
433 #endif /* #ifdef MALLOC_STATS */
435 /******************************************************************************/
437 * Extent data structures.
440 /* Tree of extents. */
441 typedef struct extent_node_s extent_node_t;
442 struct extent_node_s {
444 /* Linkage for the size/address-ordered tree. */
445 rb_node(extent_node_t) link_szad;
448 /* Linkage for the address-ordered tree. */
449 rb_node(extent_node_t) link_ad;
451 /* Pointer to the extent that this tree node is responsible for. */
454 /* Total region size. */
457 typedef rb_tree(extent_node_t) extent_tree_t;
459 /******************************************************************************/
461 * Arena data structures.
464 typedef struct arena_s arena_t;
465 typedef struct arena_bin_s arena_bin_t;
467 /* Each element of the chunk map corresponds to one page within the chunk. */
468 typedef struct arena_chunk_map_s arena_chunk_map_t;
469 struct arena_chunk_map_s {
471 * Linkage for run trees. There are two disjoint uses:
473 * 1) arena_t's runs_avail tree.
474 * 2) arena_run_t conceptually uses this linkage for in-use non-full
475 * runs, rather than directly embedding linkage.
477 rb_node(arena_chunk_map_t) link;
480 * Run address (or size) and various flags are stored together. The bit
481 * layout looks like (assuming 32-bit system):
483 * ???????? ???????? ????---- ---kdzla
485 * ? : Unallocated: Run address for first/last pages, unset for internal
487 * Small: Run address.
488 * Large: Run size for first page, unset for trailing pages.
496 * Following are example bit patterns for the three types of runs.
505 * ssssssss ssssssss ssss---- --------
506 * xxxxxxxx xxxxxxxx xxxx---- ----d---
507 * ssssssss ssssssss ssss---- -----z--
510 * rrrrrrrr rrrrrrrr rrrr---- -------a
511 * rrrrrrrr rrrrrrrr rrrr---- -------a
512 * rrrrrrrr rrrrrrrr rrrr---- -------a
515 * ssssssss ssssssss ssss---- ------la
516 * -------- -------- -------- ------la
517 * -------- -------- -------- ------la
520 #define CHUNK_MAP_KEY ((size_t)0x10U)
521 #define CHUNK_MAP_DIRTY ((size_t)0x08U)
522 #define CHUNK_MAP_ZEROED ((size_t)0x04U)
523 #define CHUNK_MAP_LARGE ((size_t)0x02U)
524 #define CHUNK_MAP_ALLOCATED ((size_t)0x01U)
526 typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
527 typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
529 /* Arena chunk header. */
530 typedef struct arena_chunk_s arena_chunk_t;
531 struct arena_chunk_s {
532 /* Arena that owns the chunk. */
535 /* Linkage for the arena's chunks_dirty tree. */
536 rb_node(arena_chunk_t) link_dirty;
538 /* Number of dirty pages. */
541 /* Map of pages within chunk that keeps track of free/large/small. */
542 arena_chunk_map_t map[1]; /* Dynamically sized. */
544 typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
546 typedef struct arena_run_s arena_run_t;
550 # define ARENA_RUN_MAGIC 0x384adf93
553 /* Bin this run is associated with. */
556 /* Index of first element that might have a free region. */
557 unsigned regs_minelm;
559 /* Number of free regions in run. */
562 /* Bitmask of in-use regions (0: in use, 1: free). */
563 unsigned regs_mask[1]; /* Dynamically sized. */
568 * Current run being used to service allocations of this bin's size
574 * Tree of non-full runs. This tree is used when looking for an
575 * existing run when runcur is no longer usable. We choose the
576 * non-full run that is lowest in memory; this policy tends to keep
577 * objects packed well, and it can also help reduce the number of
578 * almost-empty chunks.
580 arena_run_tree_t runs;
582 /* Size of regions in a run for this bin's size class. */
585 /* Total size of a run for this bin's size class. */
588 /* Total number of regions in a run for this bin's size class. */
591 /* Number of elements in a run's regs_mask for this bin's size class. */
592 uint32_t regs_mask_nelms;
594 /* Offset of first region in a run for this bin's size class. */
595 uint32_t reg0_offset;
598 /* Bin statistics. */
599 malloc_bin_stats_t stats;
606 # define ARENA_MAGIC 0x947d3d24
609 /* All operations on this arena require that lock be locked. */
610 pthread_mutex_t lock;
616 /* Tree of dirty-page-containing chunks this arena manages. */
617 arena_chunk_tree_t chunks_dirty;
620 * In order to avoid rapid chunk allocation/deallocation when an arena
621 * oscillates right on the cusp of needing a new chunk, cache the most
622 * recently freed chunk. The spare is left in the arena's chunk trees
623 * until it is deleted.
625 * There is one spare chunk per arena, rather than one spare total, in
626 * order to avoid interactions between multiple threads that could make
627 * a single spare inadequate.
629 arena_chunk_t *spare;
632 * Current count of pages within unused runs that are potentially
633 * dirty, and for which madvise(... MADV_FREE) has not been called. By
634 * tracking this, we can institute a limit on how much dirty unused
635 * memory is mapped for each arena.
640 * Size/address-ordered tree of this arena's available runs. This tree
641 * is used for first-best-fit run allocation.
643 arena_avail_tree_t runs_avail;
645 #ifdef MALLOC_BALANCE
647 * The arena load balancing machinery needs to keep track of how much
648 * lock contention there is. This value is exponentially averaged.
654 * bins is used to store rings of free regions of the following sizes,
655 * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
676 arena_bin_t bins[1]; /* Dynamically sized. */
679 /******************************************************************************/
684 /* Number of CPUs. */
685 static unsigned ncpus;
688 static size_t pagesize;
689 static size_t pagesize_mask;
690 static size_t pagesize_2pow;
692 /* Various bin-related settings. */
693 static size_t bin_maxclass; /* Max size class for bins. */
694 static unsigned ntbins; /* Number of (2^n)-spaced tiny bins. */
695 static unsigned nqbins; /* Number of quantum-spaced bins. */
696 static unsigned nsbins; /* Number of (2^n)-spaced sub-page bins. */
697 static size_t small_min;
698 static size_t small_max;
700 /* Various quantum-related settings. */
701 static size_t quantum;
702 static size_t quantum_mask; /* (quantum - 1). */
704 /* Various chunk-related settings. */
705 static size_t chunksize;
706 static size_t chunksize_mask; /* (chunksize - 1). */
707 static size_t chunk_npages;
708 static size_t arena_chunk_header_npages;
709 static size_t arena_maxclass; /* Max size class for arenas. */
716 /* Protects chunk-related data structures. */
717 static malloc_mutex_t huge_mtx;
719 /* Tree of chunks that are stand-alone huge allocations. */
720 static extent_tree_t huge;
724 * Protects sbrk() calls. This avoids malloc races among threads, though it
725 * does not protect against races with threads that call sbrk() directly.
727 static malloc_mutex_t dss_mtx;
728 /* Base address of the DSS. */
729 static void *dss_base;
730 /* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
731 static void *dss_prev;
732 /* Current upper limit on DSS addresses. */
733 static void *dss_max;
736 * Trees of chunks that were previously allocated (trees differ only in node
737 * ordering). These are used when allocating chunks, in an attempt to re-use
738 * address space. Depending on function, different tree orderings are needed,
739 * which is why there are two trees with the same contents.
741 static extent_tree_t dss_chunks_szad;
742 static extent_tree_t dss_chunks_ad;
746 /* Huge allocation statistics. */
747 static uint64_t huge_nmalloc;
748 static uint64_t huge_ndalloc;
749 static size_t huge_allocated;
752 /****************************/
754 * base (internal allocation).
758 * Current pages that are being used for internal memory allocations. These
759 * pages are carved up in cacheline-size quanta, so that there is no chance of
760 * false cache line sharing.
762 static void *base_pages;
763 static void *base_next_addr;
764 static void *base_past_addr; /* Addr immediately past base_pages. */
765 static extent_node_t *base_nodes;
766 static malloc_mutex_t base_mtx;
768 static size_t base_mapped;
777 * Arenas that are used to service external requests. Not all elements of the
778 * arenas array are necessarily used; arenas are created lazily as needed.
780 static arena_t **arenas;
781 static unsigned narenas;
783 # ifdef MALLOC_BALANCE
784 static unsigned narenas_2pow;
786 static unsigned next_arena;
789 static pthread_mutex_t arenas_lock; /* Protects arenas initialization. */
793 * Map of pthread_self() --> arenas[???], used for selecting an arena to use
796 static __thread arena_t *arenas_map;
800 /* Chunk statistics. */
801 static chunk_stats_t stats_chunks;
804 /*******************************/
806 * Runtime configuration options.
808 const char *_malloc_options;
810 #ifndef MALLOC_PRODUCTION
811 static bool opt_abort = true;
812 static bool opt_junk = true;
814 static bool opt_abort = false;
815 static bool opt_junk = false;
818 static bool opt_dss = true;
819 static bool opt_mmap = true;
821 static size_t opt_dirty_max = DIRTY_MAX_DEFAULT;
822 #ifdef MALLOC_BALANCE
823 static uint64_t opt_balance_threshold = BALANCE_THRESHOLD_DEFAULT;
825 static bool opt_print_stats = false;
826 static size_t opt_quantum_2pow = QUANTUM_2POW_MIN;
827 static size_t opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
828 static size_t opt_chunk_2pow = CHUNK_2POW_DEFAULT;
829 static bool opt_utrace = false;
830 static bool opt_sysv = false;
831 static bool opt_xmalloc = false;
832 static bool opt_zero = false;
833 static int opt_narenas_lshift = 0;
841 #define UTRACE(a, b, c) \
843 malloc_utrace_t ut; \
847 utrace(&ut, sizeof(ut)); \
850 /******************************************************************************/
852 * Begin function prototypes for non-inline static functions.
855 static void malloc_mutex_init(malloc_mutex_t *mutex);
856 static bool malloc_spin_init(pthread_mutex_t *lock);
857 static void wrtmessage(const char *p1, const char *p2, const char *p3,
860 static void malloc_printf(const char *format, ...);
862 static char *umax2s(uintmax_t x, char *s);
864 static bool base_pages_alloc_dss(size_t minsize);
866 static bool base_pages_alloc_mmap(size_t minsize);
867 static bool base_pages_alloc(size_t minsize);
868 static void *base_alloc(size_t size);
869 static void *base_calloc(size_t number, size_t size);
870 static extent_node_t *base_node_alloc(void);
871 static void base_node_dealloc(extent_node_t *node);
873 static void stats_print(arena_t *arena);
875 static void *pages_map(void *addr, size_t size);
876 static void pages_unmap(void *addr, size_t size);
878 static void *chunk_alloc_dss(size_t size);
879 static void *chunk_recycle_dss(size_t size, bool zero);
881 static void *chunk_alloc_mmap(size_t size);
882 static void *chunk_alloc(size_t size, bool zero);
884 static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size);
885 static bool chunk_dealloc_dss(void *chunk, size_t size);
887 static void chunk_dealloc_mmap(void *chunk, size_t size);
888 static void chunk_dealloc(void *chunk, size_t size);
890 static arena_t *choose_arena_hard(void);
892 static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size,
893 bool large, bool zero);
894 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
895 static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
896 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large,
898 static void arena_purge(arena_t *arena);
899 static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty);
900 static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk,
901 arena_run_t *run, size_t oldsize, size_t newsize);
902 static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
903 arena_run_t *run, size_t oldsize, size_t newsize, bool dirty);
904 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
905 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
906 static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
907 #ifdef MALLOC_BALANCE
908 static void arena_lock_balance_hard(arena_t *arena);
910 static void *arena_malloc_large(arena_t *arena, size_t size, bool zero);
911 static void *arena_palloc(arena_t *arena, size_t alignment, size_t size,
913 static size_t arena_salloc(const void *ptr);
914 static void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk,
916 static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk,
917 void *ptr, size_t size, size_t oldsize);
918 static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk,
919 void *ptr, size_t size, size_t oldsize);
920 static bool arena_ralloc_large(void *ptr, size_t size, size_t oldsize);
921 static void *arena_ralloc(void *ptr, size_t size, size_t oldsize);
922 static bool arena_new(arena_t *arena);
923 static arena_t *arenas_extend(unsigned ind);
924 static void *huge_malloc(size_t size, bool zero);
925 static void *huge_palloc(size_t alignment, size_t size);
926 static void *huge_ralloc(void *ptr, size_t size, size_t oldsize);
927 static void huge_dalloc(void *ptr);
928 static void malloc_print_stats(void);
929 static bool malloc_init_hard(void);
932 * End function prototypes.
934 /******************************************************************************/
936 * Begin mutex. We can't use normal pthread mutexes in all places, because
937 * they require malloc()ed memory, which causes bootstrapping issues in some
942 malloc_mutex_init(malloc_mutex_t *mutex)
944 static const spinlock_t lock = _SPINLOCK_INITIALIZER;
950 malloc_mutex_lock(malloc_mutex_t *mutex)
954 _SPINLOCK(&mutex->lock);
958 malloc_mutex_unlock(malloc_mutex_t *mutex)
962 _SPINUNLOCK(&mutex->lock);
968 /******************************************************************************/
970 * Begin spin lock. Spin locks here are actually adaptive mutexes that block
971 * after a period of spinning, because unbounded spinning would allow for
972 * priority inversion.
976 * We use an unpublished interface to initialize pthread mutexes with an
977 * allocation callback, in order to avoid infinite recursion.
979 int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
980 void *(calloc_cb)(size_t, size_t));
982 __weak_reference(_pthread_mutex_init_calloc_cb_stub,
983 _pthread_mutex_init_calloc_cb);
986 _pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex,
987 void *(calloc_cb)(size_t, size_t))
994 malloc_spin_init(pthread_mutex_t *lock)
997 if (_pthread_mutex_init_calloc_cb(lock, base_calloc) != 0)
1003 static inline unsigned
1004 malloc_spin_lock(pthread_mutex_t *lock)
1009 if (_pthread_mutex_trylock(lock) != 0) {
1011 volatile unsigned j;
1013 /* Exponentially back off. */
1014 for (i = 1; i <= SPIN_LIMIT_2POW; i++) {
1015 for (j = 0; j < (1U << i); j++) {
1020 if (_pthread_mutex_trylock(lock) == 0)
1025 * Spinning failed. Block until the lock becomes
1026 * available, in order to avoid indefinite priority
1029 _pthread_mutex_lock(lock);
1030 assert((ret << BLOCK_COST_2POW) != 0);
1031 return (ret << BLOCK_COST_2POW);
1039 malloc_spin_unlock(pthread_mutex_t *lock)
1043 _pthread_mutex_unlock(lock);
1049 /******************************************************************************/
1051 * Begin Utility functions/macros.
1054 /* Return the chunk address for allocation address a. */
1055 #define CHUNK_ADDR2BASE(a) \
1056 ((void *)((uintptr_t)(a) & ~chunksize_mask))
1058 /* Return the chunk offset of address a. */
1059 #define CHUNK_ADDR2OFFSET(a) \
1060 ((size_t)((uintptr_t)(a) & chunksize_mask))
1062 /* Return the smallest chunk multiple that is >= s. */
1063 #define CHUNK_CEILING(s) \
1064 (((s) + chunksize_mask) & ~chunksize_mask)
1066 /* Return the smallest cacheline multiple that is >= s. */
1067 #define CACHELINE_CEILING(s) \
1068 (((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
1070 /* Return the smallest quantum multiple that is >= a. */
1071 #define QUANTUM_CEILING(a) \
1072 (((a) + quantum_mask) & ~quantum_mask)
1074 /* Return the smallest pagesize multiple that is >= s. */
1075 #define PAGE_CEILING(s) \
1076 (((s) + pagesize_mask) & ~pagesize_mask)
1078 /* Compute the smallest power of 2 that is >= x. */
1079 static inline size_t
1089 #if (SIZEOF_PTR == 8)
1096 #ifdef MALLOC_BALANCE
1098 * Use a simple linear congruential pseudo-random number generator:
1100 * prn(y) = (a*x + c) % m
1102 * where the following constants ensure maximal period:
1104 * a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4.
1105 * c == Odd number (relatively prime to 2^n).
1108 * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
1110 * This choice of m has the disadvantage that the quality of the bits is
1111 * proportional to bit position. For example. the lowest bit has a cycle of 2,
1112 * the next has a cycle of 4, etc. For this reason, we prefer to use the upper
1115 # define PRN_DEFINE(suffix, var, a, c) \
1116 static inline void \
1117 sprn_##suffix(uint32_t seed) \
1122 static inline uint32_t \
1123 prn_##suffix(uint32_t lg_range) \
1127 assert(lg_range > 0); \
1128 assert(lg_range <= 32); \
1130 x = (var * (a)) + (c); \
1132 ret = x >> (32 - lg_range); \
1136 # define SPRN(suffix, seed) sprn_##suffix(seed)
1137 # define PRN(suffix, lg_range) prn_##suffix(lg_range)
1140 #ifdef MALLOC_BALANCE
1141 /* Define the PRNG used for arena assignment. */
1142 static __thread uint32_t balance_x;
1143 PRN_DEFINE(balance, balance_x, 1297, 1301)
1147 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
1150 _write(STDERR_FILENO, p1, strlen(p1));
1151 _write(STDERR_FILENO, p2, strlen(p2));
1152 _write(STDERR_FILENO, p3, strlen(p3));
1153 _write(STDERR_FILENO, p4, strlen(p4));
1156 void (*_malloc_message)(const char *p1, const char *p2, const char *p3,
1157 const char *p4) = wrtmessage;
1161 * Print to stderr in such a way as to (hopefully) avoid memory allocation.
1164 malloc_printf(const char *format, ...)
1169 va_start(ap, format);
1170 vsnprintf(buf, sizeof(buf), format, ap);
1172 _malloc_message(buf, "", "", "");
1177 * We don't want to depend on vsnprintf() for production builds, since that can
1178 * cause unnecessary bloat for static binaries. umax2s() provides minimal
1179 * integer printing functionality, so that malloc_printf() use can be limited to
1180 * MALLOC_STATS code.
1182 #define UMAX2S_BUFSIZE 21
1184 umax2s(uintmax_t x, char *s)
1188 /* Make sure UMAX2S_BUFSIZE is large enough. */
1189 assert(sizeof(uintmax_t) <= 8);
1191 i = UMAX2S_BUFSIZE - 1;
1195 s[i] = "0123456789"[x % 10];
1202 /******************************************************************************/
1206 base_pages_alloc_dss(size_t minsize)
1210 * Do special DSS allocation here, since base allocations don't need to
1213 malloc_mutex_lock(&dss_mtx);
1214 if (dss_prev != (void *)-1) {
1216 size_t csize = CHUNK_CEILING(minsize);
1219 /* Get the current end of the DSS. */
1223 * Calculate how much padding is necessary to
1224 * chunk-align the end of the DSS. Don't worry about
1225 * dss_max not being chunk-aligned though.
1227 incr = (intptr_t)chunksize
1228 - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
1230 if ((size_t)incr < minsize)
1233 dss_prev = sbrk(incr);
1234 if (dss_prev == dss_max) {
1236 dss_max = (void *)((intptr_t)dss_prev + incr);
1237 base_pages = dss_prev;
1238 base_next_addr = base_pages;
1239 base_past_addr = dss_max;
1241 base_mapped += incr;
1243 malloc_mutex_unlock(&dss_mtx);
1246 } while (dss_prev != (void *)-1);
1248 malloc_mutex_unlock(&dss_mtx);
1255 base_pages_alloc_mmap(size_t minsize)
1259 assert(minsize != 0);
1260 csize = PAGE_CEILING(minsize);
1261 base_pages = pages_map(NULL, csize);
1262 if (base_pages == NULL)
1264 base_next_addr = base_pages;
1265 base_past_addr = (void *)((uintptr_t)base_pages + csize);
1267 base_mapped += csize;
1274 base_pages_alloc(size_t minsize)
1278 if (opt_mmap && minsize != 0)
1281 if (base_pages_alloc_mmap(minsize) == false)
1287 if (base_pages_alloc_dss(minsize) == false)
1297 base_alloc(size_t size)
1302 /* Round size up to nearest multiple of the cacheline size. */
1303 csize = CACHELINE_CEILING(size);
1305 malloc_mutex_lock(&base_mtx);
1306 /* Make sure there's enough space for the allocation. */
1307 if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1308 if (base_pages_alloc(csize)) {
1309 malloc_mutex_unlock(&base_mtx);
1314 ret = base_next_addr;
1315 base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1316 malloc_mutex_unlock(&base_mtx);
1322 base_calloc(size_t number, size_t size)
1326 ret = base_alloc(number * size);
1327 memset(ret, 0, number * size);
1332 static extent_node_t *
1333 base_node_alloc(void)
1337 malloc_mutex_lock(&base_mtx);
1338 if (base_nodes != NULL) {
1340 base_nodes = *(extent_node_t **)ret;
1341 malloc_mutex_unlock(&base_mtx);
1343 malloc_mutex_unlock(&base_mtx);
1344 ret = (extent_node_t *)base_alloc(sizeof(extent_node_t));
1351 base_node_dealloc(extent_node_t *node)
1354 malloc_mutex_lock(&base_mtx);
1355 *(extent_node_t **)node = base_nodes;
1357 malloc_mutex_unlock(&base_mtx);
1360 /******************************************************************************/
1364 stats_print(arena_t *arena)
1366 unsigned i, gap_start;
1368 malloc_printf("dirty: %zu page%s dirty, %llu sweep%s,"
1369 " %llu madvise%s, %llu page%s purged\n",
1370 arena->ndirty, arena->ndirty == 1 ? "" : "s",
1371 arena->stats.npurge, arena->stats.npurge == 1 ? "" : "s",
1372 arena->stats.nmadvise, arena->stats.nmadvise == 1 ? "" : "s",
1373 arena->stats.purged, arena->stats.purged == 1 ? "" : "s");
1375 malloc_printf(" allocated nmalloc ndalloc\n");
1376 malloc_printf("small: %12zu %12llu %12llu\n",
1377 arena->stats.allocated_small, arena->stats.nmalloc_small,
1378 arena->stats.ndalloc_small);
1379 malloc_printf("large: %12zu %12llu %12llu\n",
1380 arena->stats.allocated_large, arena->stats.nmalloc_large,
1381 arena->stats.ndalloc_large);
1382 malloc_printf("total: %12zu %12llu %12llu\n",
1383 arena->stats.allocated_small + arena->stats.allocated_large,
1384 arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1385 arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1386 malloc_printf("mapped: %12zu\n", arena->stats.mapped);
1388 malloc_printf("bins: bin size regs pgs requests newruns"
1389 " reruns maxruns curruns\n");
1390 for (i = 0, gap_start = UINT_MAX; i < ntbins + nqbins + nsbins; i++) {
1391 if (arena->bins[i].stats.nrequests == 0) {
1392 if (gap_start == UINT_MAX)
1395 if (gap_start != UINT_MAX) {
1396 if (i > gap_start + 1) {
1397 /* Gap of more than one size class. */
1398 malloc_printf("[%u..%u]\n",
1401 /* Gap of one size class. */
1402 malloc_printf("[%u]\n", gap_start);
1404 gap_start = UINT_MAX;
1407 "%13u %1s %4u %4u %3u %9llu %9llu"
1408 " %9llu %7lu %7lu\n",
1410 i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1411 arena->bins[i].reg_size,
1412 arena->bins[i].nregs,
1413 arena->bins[i].run_size >> pagesize_2pow,
1414 arena->bins[i].stats.nrequests,
1415 arena->bins[i].stats.nruns,
1416 arena->bins[i].stats.reruns,
1417 arena->bins[i].stats.highruns,
1418 arena->bins[i].stats.curruns);
1421 if (gap_start != UINT_MAX) {
1422 if (i > gap_start + 1) {
1423 /* Gap of more than one size class. */
1424 malloc_printf("[%u..%u]\n", gap_start, i - 1);
1426 /* Gap of one size class. */
1427 malloc_printf("[%u]\n", gap_start);
1434 * End Utility functions/macros.
1436 /******************************************************************************/
1438 * Begin extent tree code.
1443 extent_szad_comp(extent_node_t *a, extent_node_t *b)
1446 size_t a_size = a->size;
1447 size_t b_size = b->size;
1449 ret = (a_size > b_size) - (a_size < b_size);
1451 uintptr_t a_addr = (uintptr_t)a->addr;
1452 uintptr_t b_addr = (uintptr_t)b->addr;
1454 ret = (a_addr > b_addr) - (a_addr < b_addr);
1460 /* Wrap red-black tree macros in functions. */
1461 rb_wrap(__unused static, extent_tree_szad_, extent_tree_t, extent_node_t,
1462 link_szad, extent_szad_comp)
1466 extent_ad_comp(extent_node_t *a, extent_node_t *b)
1468 uintptr_t a_addr = (uintptr_t)a->addr;
1469 uintptr_t b_addr = (uintptr_t)b->addr;
1471 return ((a_addr > b_addr) - (a_addr < b_addr));
1474 /* Wrap red-black tree macros in functions. */
1475 rb_wrap(__unused static, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad,
1479 * End extent tree code.
1481 /******************************************************************************/
1483 * Begin chunk management functions.
1487 pages_map(void *addr, size_t size)
1492 * We don't use MAP_FIXED here, because it can cause the *replacement*
1493 * of existing mappings, and we only want to create new mappings.
1495 ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
1497 assert(ret != NULL);
1499 if (ret == MAP_FAILED)
1501 else if (addr != NULL && ret != addr) {
1503 * We succeeded in mapping memory, but not in the right place.
1505 if (munmap(ret, size) == -1) {
1506 char buf[STRERROR_BUF];
1508 strerror_r(errno, buf, sizeof(buf));
1509 _malloc_message(_getprogname(),
1510 ": (malloc) Error in munmap(): ", buf, "\n");
1517 assert(ret == NULL || (addr == NULL && ret != addr)
1518 || (addr != NULL && ret == addr));
1523 pages_unmap(void *addr, size_t size)
1526 if (munmap(addr, size) == -1) {
1527 char buf[STRERROR_BUF];
1529 strerror_r(errno, buf, sizeof(buf));
1530 _malloc_message(_getprogname(),
1531 ": (malloc) Error in munmap(): ", buf, "\n");
1539 chunk_alloc_dss(size_t size)
1543 * sbrk() uses a signed increment argument, so take care not to
1544 * interpret a huge allocation request as a negative increment.
1546 if ((intptr_t)size < 0)
1549 malloc_mutex_lock(&dss_mtx);
1550 if (dss_prev != (void *)-1) {
1554 * The loop is necessary to recover from races with other
1555 * threads that are using the DSS for something other than
1561 /* Get the current end of the DSS. */
1565 * Calculate how much padding is necessary to
1566 * chunk-align the end of the DSS.
1568 incr = (intptr_t)size
1569 - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
1570 if (incr == (intptr_t)size)
1573 ret = (void *)((intptr_t)dss_max + incr);
1577 dss_prev = sbrk(incr);
1578 if (dss_prev == dss_max) {
1580 dss_max = (void *)((intptr_t)dss_prev + incr);
1581 malloc_mutex_unlock(&dss_mtx);
1584 } while (dss_prev != (void *)-1);
1586 malloc_mutex_unlock(&dss_mtx);
1592 chunk_recycle_dss(size_t size, bool zero)
1594 extent_node_t *node, key;
1598 malloc_mutex_lock(&dss_mtx);
1599 node = extent_tree_szad_nsearch(&dss_chunks_szad, &key);
1601 void *ret = node->addr;
1603 /* Remove node from the tree. */
1604 extent_tree_szad_remove(&dss_chunks_szad, node);
1605 if (node->size == size) {
1606 extent_tree_ad_remove(&dss_chunks_ad, node);
1607 base_node_dealloc(node);
1610 * Insert the remainder of node's address range as a
1611 * smaller chunk. Its position within dss_chunks_ad
1614 assert(node->size > size);
1615 node->addr = (void *)((uintptr_t)node->addr + size);
1617 extent_tree_szad_insert(&dss_chunks_szad, node);
1619 malloc_mutex_unlock(&dss_mtx);
1622 memset(ret, 0, size);
1625 malloc_mutex_unlock(&dss_mtx);
1632 chunk_alloc_mmap(size_t size)
1638 * Ideally, there would be a way to specify alignment to mmap() (like
1639 * NetBSD has), but in the absence of such a feature, we have to work
1640 * hard to efficiently create aligned mappings. The reliable, but
1641 * expensive method is to create a mapping that is over-sized, then
1642 * trim the excess. However, that always results in at least one call
1645 * A more optimistic approach is to try mapping precisely the right
1646 * amount, then try to append another mapping if alignment is off. In
1647 * practice, this works out well as long as the application is not
1648 * interleaving mappings via direct mmap() calls. If we do run into a
1649 * situation where there is an interleaved mapping and we are unable to
1650 * extend an unaligned mapping, our best option is to momentarily
1651 * revert to the reliable-but-expensive method. This will tend to
1652 * leave a gap in the memory map that is too small to cause later
1653 * problems for the optimistic method.
1656 ret = pages_map(NULL, size);
1660 offset = CHUNK_ADDR2OFFSET(ret);
1662 /* Try to extend chunk boundary. */
1663 if (pages_map((void *)((uintptr_t)ret + size),
1664 chunksize - offset) == NULL) {
1666 * Extension failed. Clean up, then revert to the
1667 * reliable-but-expensive method.
1669 pages_unmap(ret, size);
1671 /* Beware size_t wrap-around. */
1672 if (size + chunksize <= size)
1675 ret = pages_map(NULL, size + chunksize);
1679 /* Clean up unneeded leading/trailing space. */
1680 offset = CHUNK_ADDR2OFFSET(ret);
1682 /* Leading space. */
1683 pages_unmap(ret, chunksize - offset);
1685 ret = (void *)((uintptr_t)ret +
1686 (chunksize - offset));
1688 /* Trailing space. */
1689 pages_unmap((void *)((uintptr_t)ret + size),
1692 /* Trailing space only. */
1693 pages_unmap((void *)((uintptr_t)ret + size),
1697 /* Clean up unneeded leading space. */
1698 pages_unmap(ret, chunksize - offset);
1699 ret = (void *)((uintptr_t)ret + (chunksize - offset));
1707 chunk_alloc(size_t size, bool zero)
1712 assert((size & chunksize_mask) == 0);
1718 ret = chunk_alloc_mmap(size);
1725 ret = chunk_recycle_dss(size, zero);
1730 ret = chunk_alloc_dss(size);
1736 /* All strategies for allocation failed. */
1741 stats_chunks.nchunks += (size / chunksize);
1742 stats_chunks.curchunks += (size / chunksize);
1744 if (stats_chunks.curchunks > stats_chunks.highchunks)
1745 stats_chunks.highchunks = stats_chunks.curchunks;
1748 assert(CHUNK_ADDR2BASE(ret) == ret);
1753 static extent_node_t *
1754 chunk_dealloc_dss_record(void *chunk, size_t size)
1756 extent_node_t *node, *prev, key;
1758 key.addr = (void *)((uintptr_t)chunk + size);
1759 node = extent_tree_ad_nsearch(&dss_chunks_ad, &key);
1760 /* Try to coalesce forward. */
1761 if (node != NULL && node->addr == key.addr) {
1763 * Coalesce chunk with the following address range. This does
1764 * not change the position within dss_chunks_ad, so only
1765 * remove/insert from/into dss_chunks_szad.
1767 extent_tree_szad_remove(&dss_chunks_szad, node);
1770 extent_tree_szad_insert(&dss_chunks_szad, node);
1773 * Coalescing forward failed, so insert a new node. Drop
1774 * dss_mtx during node allocation, since it is possible that a
1775 * new base chunk will be allocated.
1777 malloc_mutex_unlock(&dss_mtx);
1778 node = base_node_alloc();
1779 malloc_mutex_lock(&dss_mtx);
1784 extent_tree_ad_insert(&dss_chunks_ad, node);
1785 extent_tree_szad_insert(&dss_chunks_szad, node);
1788 /* Try to coalesce backward. */
1789 prev = extent_tree_ad_prev(&dss_chunks_ad, node);
1790 if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
1793 * Coalesce chunk with the previous address range. This does
1794 * not change the position within dss_chunks_ad, so only
1795 * remove/insert node from/into dss_chunks_szad.
1797 extent_tree_szad_remove(&dss_chunks_szad, prev);
1798 extent_tree_ad_remove(&dss_chunks_ad, prev);
1800 extent_tree_szad_remove(&dss_chunks_szad, node);
1801 node->addr = prev->addr;
1802 node->size += prev->size;
1803 extent_tree_szad_insert(&dss_chunks_szad, node);
1805 base_node_dealloc(prev);
1812 chunk_dealloc_dss(void *chunk, size_t size)
1815 malloc_mutex_lock(&dss_mtx);
1816 if ((uintptr_t)chunk >= (uintptr_t)dss_base
1817 && (uintptr_t)chunk < (uintptr_t)dss_max) {
1818 extent_node_t *node;
1820 /* Try to coalesce with other unused chunks. */
1821 node = chunk_dealloc_dss_record(chunk, size);
1827 /* Get the current end of the DSS. */
1831 * Try to shrink the DSS if this chunk is at the end of the
1832 * DSS. The sbrk() call here is subject to a race condition
1833 * with threads that use brk(2) or sbrk(2) directly, but the
1834 * alternative would be to leak memory for the sake of poorly
1835 * designed multi-threaded programs.
1837 if ((void *)((uintptr_t)chunk + size) == dss_max
1838 && (dss_prev = sbrk(-(intptr_t)size)) == dss_max) {
1840 dss_max = (void *)((intptr_t)dss_prev - (intptr_t)size);
1843 extent_tree_szad_remove(&dss_chunks_szad, node);
1844 extent_tree_ad_remove(&dss_chunks_ad, node);
1845 base_node_dealloc(node);
1847 malloc_mutex_unlock(&dss_mtx);
1849 malloc_mutex_unlock(&dss_mtx);
1850 madvise(chunk, size, MADV_FREE);
1855 malloc_mutex_unlock(&dss_mtx);
1862 chunk_dealloc_mmap(void *chunk, size_t size)
1865 pages_unmap(chunk, size);
1869 chunk_dealloc(void *chunk, size_t size)
1872 assert(chunk != NULL);
1873 assert(CHUNK_ADDR2BASE(chunk) == chunk);
1875 assert((size & chunksize_mask) == 0);
1878 stats_chunks.curchunks -= (size / chunksize);
1883 if (chunk_dealloc_dss(chunk, size) == false)
1889 chunk_dealloc_mmap(chunk, size);
1893 * End chunk management functions.
1895 /******************************************************************************/
1901 * Choose an arena based on a per-thread value (fast-path code, calls slow-path
1902 * code if necessary).
1904 static inline arena_t *
1910 * We can only use TLS if this is a PIC library, since for the static
1911 * library version, libc's malloc is used by TLS allocation, which
1912 * introduces a bootstrapping issue.
1915 if (__isthreaded == false) {
1916 /* Avoid the overhead of TLS for single-threaded operation. */
1922 ret = choose_arena_hard();
1923 assert(ret != NULL);
1926 if (__isthreaded && narenas > 1) {
1930 * Hash _pthread_self() to one of the arenas. There is a prime
1931 * number of arenas, so this has a reasonable chance of
1932 * working. Even so, the hashing can be easily thwarted by
1933 * inconvenient _pthread_self() values. Without specific
1934 * knowledge of how _pthread_self() calculates values, we can't
1935 * easily do much better than this.
1937 ind = (unsigned long) _pthread_self() % narenas;
1940 * Optimistially assume that arenas[ind] has been initialized.
1941 * At worst, we find out that some other thread has already
1942 * done so, after acquiring the lock in preparation. Note that
1943 * this lazy locking also has the effect of lazily forcing
1944 * cache coherency; without the lock acquisition, there's no
1945 * guarantee that modification of arenas[ind] by another thread
1946 * would be seen on this CPU for an arbitrary amount of time.
1948 * In general, this approach to modifying a synchronized value
1949 * isn't a good idea, but in this case we only ever modify the
1950 * value once, so things work out well.
1955 * Avoid races with another thread that may have already
1956 * initialized arenas[ind].
1958 malloc_spin_lock(&arenas_lock);
1959 if (arenas[ind] == NULL)
1960 ret = arenas_extend((unsigned)ind);
1963 malloc_spin_unlock(&arenas_lock);
1969 assert(ret != NULL);
1975 * Choose an arena based on a per-thread value (slow-path code only, called
1976 * only by choose_arena()).
1979 choose_arena_hard(void)
1983 assert(__isthreaded);
1985 #ifdef MALLOC_BALANCE
1986 /* Seed the PRNG used for arena load balancing. */
1987 SPRN(balance, (uint32_t)(uintptr_t)(_pthread_self()));
1991 #ifdef MALLOC_BALANCE
1994 ind = PRN(balance, narenas_2pow);
1995 if ((ret = arenas[ind]) == NULL) {
1996 malloc_spin_lock(&arenas_lock);
1997 if ((ret = arenas[ind]) == NULL)
1998 ret = arenas_extend(ind);
1999 malloc_spin_unlock(&arenas_lock);
2002 malloc_spin_lock(&arenas_lock);
2003 if ((ret = arenas[next_arena]) == NULL)
2004 ret = arenas_extend(next_arena);
2005 next_arena = (next_arena + 1) % narenas;
2006 malloc_spin_unlock(&arenas_lock);
2018 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
2020 uintptr_t a_chunk = (uintptr_t)a;
2021 uintptr_t b_chunk = (uintptr_t)b;
2026 return ((a_chunk > b_chunk) - (a_chunk < b_chunk));
2029 /* Wrap red-black tree macros in functions. */
2030 rb_wrap(__unused static, arena_chunk_tree_dirty_, arena_chunk_tree_t,
2031 arena_chunk_t, link_dirty, arena_chunk_comp)
2034 arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
2036 uintptr_t a_mapelm = (uintptr_t)a;
2037 uintptr_t b_mapelm = (uintptr_t)b;
2042 return ((a_mapelm > b_mapelm) - (a_mapelm < b_mapelm));
2045 /* Wrap red-black tree macros in functions. */
2046 rb_wrap(__unused static, arena_run_tree_, arena_run_tree_t, arena_chunk_map_t,
2047 link, arena_run_comp)
2050 arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
2053 size_t a_size = a->bits & ~pagesize_mask;
2054 size_t b_size = b->bits & ~pagesize_mask;
2056 ret = (a_size > b_size) - (a_size < b_size);
2058 uintptr_t a_mapelm, b_mapelm;
2060 if ((a->bits & CHUNK_MAP_KEY) == 0)
2061 a_mapelm = (uintptr_t)a;
2064 * Treat keys as though they are lower than anything
2069 b_mapelm = (uintptr_t)b;
2071 ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm);
2077 /* Wrap red-black tree macros in functions. */
2078 rb_wrap(__unused static, arena_avail_tree_, arena_avail_tree_t,
2079 arena_chunk_map_t, link, arena_avail_comp)
2081 static inline void *
2082 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
2085 unsigned i, mask, bit, regind;
2087 assert(run->magic == ARENA_RUN_MAGIC);
2088 assert(run->regs_minelm < bin->regs_mask_nelms);
2091 * Move the first check outside the loop, so that run->regs_minelm can
2092 * be updated unconditionally, without the possibility of updating it
2095 i = run->regs_minelm;
2096 mask = run->regs_mask[i];
2098 /* Usable allocation found. */
2099 bit = ffs((int)mask) - 1;
2101 regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
2102 assert(regind < bin->nregs);
2103 ret = (void *)(((uintptr_t)run) + bin->reg0_offset
2104 + (bin->reg_size * regind));
2107 mask ^= (1U << bit);
2108 run->regs_mask[i] = mask;
2113 for (i++; i < bin->regs_mask_nelms; i++) {
2114 mask = run->regs_mask[i];
2116 /* Usable allocation found. */
2117 bit = ffs((int)mask) - 1;
2119 regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
2120 assert(regind < bin->nregs);
2121 ret = (void *)(((uintptr_t)run) + bin->reg0_offset
2122 + (bin->reg_size * regind));
2125 mask ^= (1U << bit);
2126 run->regs_mask[i] = mask;
2129 * Make a note that nothing before this element
2130 * contains a free region.
2132 run->regs_minelm = i; /* Low payoff: + (mask == 0); */
2143 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
2146 * To divide by a number D that is not a power of two we multiply
2147 * by (2^21 / D) and then right shift by 21 positions.
2153 * (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
2155 #define SIZE_INV_SHIFT 21
2156 #define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
2157 static const unsigned size_invs[] = {
2159 SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
2160 SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
2161 SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
2162 SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
2163 SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
2164 SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
2165 SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
2166 #if (QUANTUM_2POW_MIN < 4)
2168 SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
2169 SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
2170 SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
2171 SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
2172 SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
2173 SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
2174 SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
2175 SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
2178 unsigned diff, regind, elm, bit;
2180 assert(run->magic == ARENA_RUN_MAGIC);
2181 assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
2182 >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
2185 * Avoid doing division with a variable divisor if possible. Using
2186 * actual division here can reduce allocator throughput by over 20%!
2188 diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
2189 if ((size & (size - 1)) == 0) {
2191 * log2_table allows fast division of a power of two in the
2194 * (x / divisor) becomes (x >> log2_table[divisor - 1]).
2196 static const unsigned char log2_table[] = {
2197 0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
2198 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
2199 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2200 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
2201 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2202 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2203 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2204 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
2208 regind = (diff >> log2_table[size - 1]);
2209 else if (size <= 32768)
2210 regind = diff >> (8 + log2_table[(size >> 8) - 1]);
2213 * The run size is too large for us to use the lookup
2214 * table. Use real division.
2216 regind = diff / size;
2218 } else if (size <= (((sizeof(size_invs) / sizeof(unsigned)) + 2)
2219 << QUANTUM_2POW_MIN)) {
2220 regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
2221 regind >>= SIZE_INV_SHIFT;
2224 * size_invs isn't large enough to handle this size class, so
2225 * calculate regind using actual division. This only happens
2226 * if the user increases small_max via the 'S' runtime
2227 * configuration option.
2229 regind = diff / size;
2231 assert(diff == regind * size);
2232 assert(regind < bin->nregs);
2234 elm = regind >> (SIZEOF_INT_2POW + 3);
2235 if (elm < run->regs_minelm)
2236 run->regs_minelm = elm;
2237 bit = regind - (elm << (SIZEOF_INT_2POW + 3));
2238 assert((run->regs_mask[elm] & (1U << bit)) == 0);
2239 run->regs_mask[elm] |= (1U << bit);
2241 #undef SIZE_INV_SHIFT
2245 arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
2248 arena_chunk_t *chunk;
2249 size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i;
2251 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2252 old_ndirty = chunk->ndirty;
2253 run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
2255 total_pages = (chunk->map[run_ind].bits & ~pagesize_mask) >>
2257 need_pages = (size >> pagesize_2pow);
2258 assert(need_pages > 0);
2259 assert(need_pages <= total_pages);
2260 rem_pages = total_pages - need_pages;
2262 arena_avail_tree_remove(&arena->runs_avail, &chunk->map[run_ind]);
2264 /* Keep track of trailing unused pages for later use. */
2265 if (rem_pages > 0) {
2266 chunk->map[run_ind+need_pages].bits = (rem_pages <<
2267 pagesize_2pow) | (chunk->map[run_ind+need_pages].bits &
2269 chunk->map[run_ind+total_pages-1].bits = (rem_pages <<
2270 pagesize_2pow) | (chunk->map[run_ind+total_pages-1].bits &
2272 arena_avail_tree_insert(&arena->runs_avail,
2273 &chunk->map[run_ind+need_pages]);
2276 for (i = 0; i < need_pages; i++) {
2277 /* Zero if necessary. */
2279 if ((chunk->map[run_ind + i].bits & CHUNK_MAP_ZEROED)
2281 memset((void *)((uintptr_t)chunk + ((run_ind
2282 + i) << pagesize_2pow)), 0, pagesize);
2283 /* CHUNK_MAP_ZEROED is cleared below. */
2287 /* Update dirty page accounting. */
2288 if (chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY) {
2291 /* CHUNK_MAP_DIRTY is cleared below. */
2294 /* Initialize the chunk map. */
2296 chunk->map[run_ind + i].bits = CHUNK_MAP_LARGE
2297 | CHUNK_MAP_ALLOCATED;
2299 chunk->map[run_ind + i].bits = (size_t)run
2300 | CHUNK_MAP_ALLOCATED;
2305 * Set the run size only in the first element for large runs. This is
2306 * primarily a debugging aid, since the lack of size info for trailing
2307 * pages only matters if the application tries to operate on an
2311 chunk->map[run_ind].bits |= size;
2313 if (chunk->ndirty == 0 && old_ndirty > 0)
2314 arena_chunk_tree_dirty_remove(&arena->chunks_dirty, chunk);
2317 static arena_chunk_t *
2318 arena_chunk_alloc(arena_t *arena)
2320 arena_chunk_t *chunk;
2323 if (arena->spare != NULL) {
2324 chunk = arena->spare;
2325 arena->spare = NULL;
2327 chunk = (arena_chunk_t *)chunk_alloc(chunksize, true);
2331 arena->stats.mapped += chunksize;
2334 chunk->arena = arena;
2337 * Claim that no pages are in use, since the header is merely
2343 * Initialize the map to contain one maximal free untouched run.
2345 for (i = 0; i < arena_chunk_header_npages; i++)
2346 chunk->map[i].bits = 0;
2347 chunk->map[i].bits = arena_maxclass | CHUNK_MAP_ZEROED;
2348 for (i++; i < chunk_npages-1; i++) {
2349 chunk->map[i].bits = CHUNK_MAP_ZEROED;
2351 chunk->map[chunk_npages-1].bits = arena_maxclass |
2355 /* Insert the run into the runs_avail tree. */
2356 arena_avail_tree_insert(&arena->runs_avail,
2357 &chunk->map[arena_chunk_header_npages]);
2363 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
2366 if (arena->spare != NULL) {
2367 if (arena->spare->ndirty > 0) {
2368 arena_chunk_tree_dirty_remove(
2369 &chunk->arena->chunks_dirty, arena->spare);
2370 arena->ndirty -= arena->spare->ndirty;
2372 chunk_dealloc((void *)arena->spare, chunksize);
2374 arena->stats.mapped -= chunksize;
2379 * Remove run from runs_avail, regardless of whether this chunk
2380 * will be cached, so that the arena does not use it. Dirty page
2381 * flushing only uses the chunks_dirty tree, so leaving this chunk in
2382 * the chunks_* trees is sufficient for that purpose.
2384 arena_avail_tree_remove(&arena->runs_avail,
2385 &chunk->map[arena_chunk_header_npages]);
2387 arena->spare = chunk;
2390 static arena_run_t *
2391 arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
2393 arena_chunk_t *chunk;
2395 arena_chunk_map_t *mapelm, key;
2397 assert(size <= arena_maxclass);
2398 assert((size & pagesize_mask) == 0);
2400 /* Search the arena's chunks for the lowest best fit. */
2401 key.bits = size | CHUNK_MAP_KEY;
2402 mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key);
2403 if (mapelm != NULL) {
2404 arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
2405 size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map)
2406 / sizeof(arena_chunk_map_t);
2408 run = (arena_run_t *)((uintptr_t)run_chunk + (pageind
2410 arena_run_split(arena, run, size, large, zero);
2415 * No usable runs. Create a new chunk from which to allocate the run.
2417 chunk = arena_chunk_alloc(arena);
2420 run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
2422 /* Update page map. */
2423 arena_run_split(arena, run, size, large, zero);
2428 arena_purge(arena_t *arena)
2430 arena_chunk_t *chunk;
2435 rb_foreach_begin(arena_chunk_t, link_dirty, &arena->chunks_dirty,
2437 ndirty += chunk->ndirty;
2438 } rb_foreach_end(arena_chunk_t, link_dirty, &arena->chunks_dirty, chunk)
2439 assert(ndirty == arena->ndirty);
2441 assert(arena->ndirty > opt_dirty_max);
2444 arena->stats.npurge++;
2448 * Iterate downward through chunks until enough dirty memory has been
2449 * purged. Terminate as soon as possible in order to minimize the
2450 * number of system calls, even if a chunk has only been partially
2453 while (arena->ndirty > (opt_dirty_max >> 1)) {
2454 chunk = arena_chunk_tree_dirty_last(&arena->chunks_dirty);
2455 assert(chunk != NULL);
2457 for (i = chunk_npages - 1; chunk->ndirty > 0; i--) {
2458 assert(i >= arena_chunk_header_npages);
2460 if (chunk->map[i].bits & CHUNK_MAP_DIRTY) {
2461 chunk->map[i].bits ^= CHUNK_MAP_DIRTY;
2462 /* Find adjacent dirty run(s). */
2463 for (npages = 1; i > arena_chunk_header_npages
2464 && (chunk->map[i - 1].bits &
2465 CHUNK_MAP_DIRTY); npages++) {
2467 chunk->map[i].bits ^= CHUNK_MAP_DIRTY;
2469 chunk->ndirty -= npages;
2470 arena->ndirty -= npages;
2472 madvise((void *)((uintptr_t)chunk + (i <<
2473 pagesize_2pow)), (npages << pagesize_2pow),
2476 arena->stats.nmadvise++;
2477 arena->stats.purged += npages;
2479 if (arena->ndirty <= (opt_dirty_max >> 1))
2484 if (chunk->ndirty == 0) {
2485 arena_chunk_tree_dirty_remove(&arena->chunks_dirty,
2492 arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
2494 arena_chunk_t *chunk;
2495 size_t size, run_ind, run_pages;
2497 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2498 run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk)
2500 assert(run_ind >= arena_chunk_header_npages);
2501 assert(run_ind < chunk_npages);
2502 if ((chunk->map[run_ind].bits & CHUNK_MAP_LARGE) != 0)
2503 size = chunk->map[run_ind].bits & ~pagesize_mask;
2505 size = run->bin->run_size;
2506 run_pages = (size >> pagesize_2pow);
2508 /* Mark pages as unallocated in the chunk map. */
2512 for (i = 0; i < run_pages; i++) {
2513 assert((chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY)
2515 chunk->map[run_ind + i].bits = CHUNK_MAP_DIRTY;
2518 if (chunk->ndirty == 0) {
2519 arena_chunk_tree_dirty_insert(&arena->chunks_dirty,
2522 chunk->ndirty += run_pages;
2523 arena->ndirty += run_pages;
2527 for (i = 0; i < run_pages; i++) {
2528 chunk->map[run_ind + i].bits &= ~(CHUNK_MAP_LARGE |
2529 CHUNK_MAP_ALLOCATED);
2532 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2534 chunk->map[run_ind+run_pages-1].bits = size |
2535 (chunk->map[run_ind+run_pages-1].bits & pagesize_mask);
2537 /* Try to coalesce forward. */
2538 if (run_ind + run_pages < chunk_npages &&
2539 (chunk->map[run_ind+run_pages].bits & CHUNK_MAP_ALLOCATED) == 0) {
2540 size_t nrun_size = chunk->map[run_ind+run_pages].bits &
2544 * Remove successor from runs_avail; the coalesced run is
2547 arena_avail_tree_remove(&arena->runs_avail,
2548 &chunk->map[run_ind+run_pages]);
2551 run_pages = size >> pagesize_2pow;
2553 assert((chunk->map[run_ind+run_pages-1].bits & ~pagesize_mask)
2555 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2557 chunk->map[run_ind+run_pages-1].bits = size |
2558 (chunk->map[run_ind+run_pages-1].bits & pagesize_mask);
2561 /* Try to coalesce backward. */
2562 if (run_ind > arena_chunk_header_npages && (chunk->map[run_ind-1].bits &
2563 CHUNK_MAP_ALLOCATED) == 0) {
2564 size_t prun_size = chunk->map[run_ind-1].bits & ~pagesize_mask;
2566 run_ind -= prun_size >> pagesize_2pow;
2569 * Remove predecessor from runs_avail; the coalesced run is
2572 arena_avail_tree_remove(&arena->runs_avail,
2573 &chunk->map[run_ind]);
2576 run_pages = size >> pagesize_2pow;
2578 assert((chunk->map[run_ind].bits & ~pagesize_mask) ==
2580 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2582 chunk->map[run_ind+run_pages-1].bits = size |
2583 (chunk->map[run_ind+run_pages-1].bits & pagesize_mask);
2586 /* Insert into runs_avail, now that coalescing is complete. */
2587 arena_avail_tree_insert(&arena->runs_avail, &chunk->map[run_ind]);
2589 /* Deallocate chunk if it is now completely unused. */
2590 if ((chunk->map[arena_chunk_header_npages].bits & (~pagesize_mask |
2591 CHUNK_MAP_ALLOCATED)) == arena_maxclass)
2592 arena_chunk_dealloc(arena, chunk);
2594 /* Enforce opt_dirty_max. */
2595 if (arena->ndirty > opt_dirty_max)
2600 arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
2601 size_t oldsize, size_t newsize)
2603 size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> pagesize_2pow;
2604 size_t head_npages = (oldsize - newsize) >> pagesize_2pow;
2606 assert(oldsize > newsize);
2609 * Update the chunk map so that arena_run_dalloc() can treat the
2610 * leading run as separately allocated.
2612 chunk->map[pageind].bits = (oldsize - newsize) | CHUNK_MAP_LARGE |
2613 CHUNK_MAP_ALLOCATED;
2614 chunk->map[pageind+head_npages].bits = newsize | CHUNK_MAP_LARGE |
2615 CHUNK_MAP_ALLOCATED;
2617 arena_run_dalloc(arena, run, false);
2621 arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
2622 size_t oldsize, size_t newsize, bool dirty)
2624 size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> pagesize_2pow;
2625 size_t npages = newsize >> pagesize_2pow;
2627 assert(oldsize > newsize);
2630 * Update the chunk map so that arena_run_dalloc() can treat the
2631 * trailing run as separately allocated.
2633 chunk->map[pageind].bits = newsize | CHUNK_MAP_LARGE |
2634 CHUNK_MAP_ALLOCATED;
2635 chunk->map[pageind+npages].bits = (oldsize - newsize) | CHUNK_MAP_LARGE
2636 | CHUNK_MAP_ALLOCATED;
2638 arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize),
2642 static arena_run_t *
2643 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2645 arena_chunk_map_t *mapelm;
2647 unsigned i, remainder;
2649 /* Look for a usable run. */
2650 mapelm = arena_run_tree_first(&bin->runs);
2651 if (mapelm != NULL) {
2652 /* run is guaranteed to have available space. */
2653 arena_run_tree_remove(&bin->runs, mapelm);
2654 run = (arena_run_t *)(mapelm->bits & ~pagesize_mask);
2656 bin->stats.reruns++;
2660 /* No existing runs have any space available. */
2662 /* Allocate a new run. */
2663 run = arena_run_alloc(arena, bin->run_size, false, false);
2667 /* Initialize run internals. */
2670 for (i = 0; i < bin->regs_mask_nelms - 1; i++)
2671 run->regs_mask[i] = UINT_MAX;
2672 remainder = bin->nregs & ((1U << (SIZEOF_INT_2POW + 3)) - 1);
2674 run->regs_mask[i] = UINT_MAX;
2676 /* The last element has spare bits that need to be unset. */
2677 run->regs_mask[i] = (UINT_MAX >> ((1U << (SIZEOF_INT_2POW + 3))
2681 run->regs_minelm = 0;
2683 run->nfree = bin->nregs;
2685 run->magic = ARENA_RUN_MAGIC;
2690 bin->stats.curruns++;
2691 if (bin->stats.curruns > bin->stats.highruns)
2692 bin->stats.highruns = bin->stats.curruns;
2697 /* bin->runcur must have space available before this function is called. */
2698 static inline void *
2699 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
2703 assert(run->magic == ARENA_RUN_MAGIC);
2704 assert(run->nfree > 0);
2706 ret = arena_run_reg_alloc(run, bin);
2707 assert(ret != NULL);
2713 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2715 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
2718 bin->runcur = arena_bin_nonfull_run_get(arena, bin);
2719 if (bin->runcur == NULL)
2721 assert(bin->runcur->magic == ARENA_RUN_MAGIC);
2722 assert(bin->runcur->nfree > 0);
2724 return (arena_bin_malloc_easy(arena, bin, bin->runcur));
2728 * Calculate bin->run_size such that it meets the following constraints:
2730 * *) bin->run_size >= min_run_size
2731 * *) bin->run_size <= arena_maxclass
2732 * *) bin->run_size <= RUN_MAX_SMALL
2733 * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
2735 * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
2736 * also calculated here, since these settings are all interdependent.
2739 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
2741 size_t try_run_size, good_run_size;
2742 unsigned good_nregs, good_mask_nelms, good_reg0_offset;
2743 unsigned try_nregs, try_mask_nelms, try_reg0_offset;
2745 assert(min_run_size >= pagesize);
2746 assert(min_run_size <= arena_maxclass);
2747 assert(min_run_size <= RUN_MAX_SMALL);
2750 * Calculate known-valid settings before entering the run_size
2751 * expansion loop, so that the first part of the loop always copies
2754 * The do..while loop iteratively reduces the number of regions until
2755 * the run header and the regions no longer overlap. A closed formula
2756 * would be quite messy, since there is an interdependency between the
2757 * header's mask length and the number of regions.
2759 try_run_size = min_run_size;
2760 try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
2761 + 1; /* Counter-act try_nregs-- in loop. */
2764 try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2765 ((try_nregs & ((1U << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
2766 try_reg0_offset = try_run_size - (try_nregs * bin->reg_size);
2767 } while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
2770 /* run_size expansion loop. */
2773 * Copy valid settings before trying more aggressive settings.
2775 good_run_size = try_run_size;
2776 good_nregs = try_nregs;
2777 good_mask_nelms = try_mask_nelms;
2778 good_reg0_offset = try_reg0_offset;
2780 /* Try more aggressive settings. */
2781 try_run_size += pagesize;
2782 try_nregs = ((try_run_size - sizeof(arena_run_t)) /
2783 bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
2786 try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
2787 ((try_nregs & ((1U << (SIZEOF_INT_2POW + 3)) - 1)) ?
2789 try_reg0_offset = try_run_size - (try_nregs *
2791 } while (sizeof(arena_run_t) + (sizeof(unsigned) *
2792 (try_mask_nelms - 1)) > try_reg0_offset);
2793 } while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
2794 && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
2795 && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
2797 assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
2798 <= good_reg0_offset);
2799 assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
2801 /* Copy final settings. */
2802 bin->run_size = good_run_size;
2803 bin->nregs = good_nregs;
2804 bin->regs_mask_nelms = good_mask_nelms;
2805 bin->reg0_offset = good_reg0_offset;
2807 return (good_run_size);
2810 #ifdef MALLOC_BALANCE
2812 arena_lock_balance(arena_t *arena)
2814 unsigned contention;
2816 contention = malloc_spin_lock(&arena->lock);
2819 * Calculate the exponentially averaged contention for this
2820 * arena. Due to integer math always rounding down, this value
2821 * decays somewhat faster then normal.
2823 arena->contention = (((uint64_t)arena->contention
2824 * (uint64_t)((1U << BALANCE_ALPHA_INV_2POW)-1))
2825 + (uint64_t)contention) >> BALANCE_ALPHA_INV_2POW;
2826 if (arena->contention >= opt_balance_threshold)
2827 arena_lock_balance_hard(arena);
2832 arena_lock_balance_hard(arena_t *arena)
2836 arena->contention = 0;
2838 arena->stats.nbalance++;
2840 ind = PRN(balance, narenas_2pow);
2841 if (arenas[ind] != NULL)
2842 arenas_map = arenas[ind];
2844 malloc_spin_lock(&arenas_lock);
2845 if (arenas[ind] != NULL)
2846 arenas_map = arenas[ind];
2848 arenas_map = arenas_extend(ind);
2849 malloc_spin_unlock(&arenas_lock);
2854 static inline void *
2855 arena_malloc_small(arena_t *arena, size_t size, bool zero)
2861 if (size < small_min) {
2863 size = pow2_ceil(size);
2864 bin = &arena->bins[ffs((int)(size >> (TINY_MIN_2POW +
2866 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
2868 * Bin calculation is always correct, but we may need
2869 * to fix size for the purposes of assertions and/or
2872 if (size < (1U << TINY_MIN_2POW))
2873 size = (1U << TINY_MIN_2POW);
2875 } else if (size <= small_max) {
2876 /* Quantum-spaced. */
2877 size = QUANTUM_CEILING(size);
2878 bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
2882 size = pow2_ceil(size);
2883 bin = &arena->bins[ntbins + nqbins
2884 + (ffs((int)(size >> opt_small_max_2pow)) - 2)];
2886 assert(size == bin->reg_size);
2888 #ifdef MALLOC_BALANCE
2889 arena_lock_balance(arena);
2891 malloc_spin_lock(&arena->lock);
2893 if ((run = bin->runcur) != NULL && run->nfree > 0)
2894 ret = arena_bin_malloc_easy(arena, bin, run);
2896 ret = arena_bin_malloc_hard(arena, bin);
2899 malloc_spin_unlock(&arena->lock);
2904 bin->stats.nrequests++;
2905 arena->stats.nmalloc_small++;
2906 arena->stats.allocated_small += size;
2908 malloc_spin_unlock(&arena->lock);
2910 if (zero == false) {
2912 memset(ret, 0xa5, size);
2914 memset(ret, 0, size);
2916 memset(ret, 0, size);
2922 arena_malloc_large(arena_t *arena, size_t size, bool zero)
2926 /* Large allocation. */
2927 size = PAGE_CEILING(size);
2928 #ifdef MALLOC_BALANCE
2929 arena_lock_balance(arena);
2931 malloc_spin_lock(&arena->lock);
2933 ret = (void *)arena_run_alloc(arena, size, true, zero);
2935 malloc_spin_unlock(&arena->lock);
2939 arena->stats.nmalloc_large++;
2940 arena->stats.allocated_large += size;
2942 malloc_spin_unlock(&arena->lock);
2944 if (zero == false) {
2946 memset(ret, 0xa5, size);
2948 memset(ret, 0, size);
2954 static inline void *
2955 arena_malloc(arena_t *arena, size_t size, bool zero)
2958 assert(arena != NULL);
2959 assert(arena->magic == ARENA_MAGIC);
2961 assert(QUANTUM_CEILING(size) <= arena_maxclass);
2963 if (size <= bin_maxclass) {
2964 return (arena_malloc_small(arena, size, zero));
2966 return (arena_malloc_large(arena, size, zero));
2969 static inline void *
2970 imalloc(size_t size)
2975 if (size <= arena_maxclass)
2976 return (arena_malloc(choose_arena(), size, false));
2978 return (huge_malloc(size, false));
2981 static inline void *
2982 icalloc(size_t size)
2985 if (size <= arena_maxclass)
2986 return (arena_malloc(choose_arena(), size, true));
2988 return (huge_malloc(size, true));
2991 /* Only handles large allocations that require more than page alignment. */
2993 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
2997 arena_chunk_t *chunk;
2999 assert((size & pagesize_mask) == 0);
3000 assert((alignment & pagesize_mask) == 0);
3002 #ifdef MALLOC_BALANCE
3003 arena_lock_balance(arena);
3005 malloc_spin_lock(&arena->lock);
3007 ret = (void *)arena_run_alloc(arena, alloc_size, true, false);
3009 malloc_spin_unlock(&arena->lock);
3013 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
3015 offset = (uintptr_t)ret & (alignment - 1);
3016 assert((offset & pagesize_mask) == 0);
3017 assert(offset < alloc_size);
3019 arena_run_trim_tail(arena, chunk, ret, alloc_size, size, false);
3021 size_t leadsize, trailsize;
3023 leadsize = alignment - offset;
3025 arena_run_trim_head(arena, chunk, ret, alloc_size,
3026 alloc_size - leadsize);
3027 ret = (void *)((uintptr_t)ret + leadsize);
3030 trailsize = alloc_size - leadsize - size;
3031 if (trailsize != 0) {
3032 /* Trim trailing space. */
3033 assert(trailsize < alloc_size);
3034 arena_run_trim_tail(arena, chunk, ret, size + trailsize,
3040 arena->stats.nmalloc_large++;
3041 arena->stats.allocated_large += size;
3043 malloc_spin_unlock(&arena->lock);
3046 memset(ret, 0xa5, size);
3048 memset(ret, 0, size);
3052 static inline void *
3053 ipalloc(size_t alignment, size_t size)
3059 * Round size up to the nearest multiple of alignment.
3061 * This done, we can take advantage of the fact that for each small
3062 * size class, every object is aligned at the smallest power of two
3063 * that is non-zero in the base two representation of the size. For
3066 * Size | Base 2 | Minimum alignment
3067 * -----+----------+------------------
3069 * 144 | 10100000 | 32
3070 * 192 | 11000000 | 64
3072 * Depending on runtime settings, it is possible that arena_malloc()
3073 * will further round up to a power of two, but that never causes
3074 * correctness issues.
3076 ceil_size = (size + (alignment - 1)) & (-alignment);
3078 * (ceil_size < size) protects against the combination of maximal
3079 * alignment and size greater than maximal alignment.
3081 if (ceil_size < size) {
3082 /* size_t overflow. */
3086 if (ceil_size <= pagesize || (alignment <= pagesize
3087 && ceil_size <= arena_maxclass))
3088 ret = arena_malloc(choose_arena(), ceil_size, false);
3093 * We can't achieve sub-page alignment, so round up alignment
3094 * permanently; it makes later calculations simpler.
3096 alignment = PAGE_CEILING(alignment);
3097 ceil_size = PAGE_CEILING(size);
3099 * (ceil_size < size) protects against very large sizes within
3100 * pagesize of SIZE_T_MAX.
3102 * (ceil_size + alignment < ceil_size) protects against the
3103 * combination of maximal alignment and ceil_size large enough
3104 * to cause overflow. This is similar to the first overflow
3105 * check above, but it needs to be repeated due to the new
3106 * ceil_size value, which may now be *equal* to maximal
3107 * alignment, whereas before we only detected overflow if the
3108 * original size was *greater* than maximal alignment.
3110 if (ceil_size < size || ceil_size + alignment < ceil_size) {
3111 /* size_t overflow. */
3116 * Calculate the size of the over-size run that arena_palloc()
3117 * would need to allocate in order to guarantee the alignment.
3119 if (ceil_size >= alignment)
3120 run_size = ceil_size + alignment - pagesize;
3123 * It is possible that (alignment << 1) will cause
3124 * overflow, but it doesn't matter because we also
3125 * subtract pagesize, which in the case of overflow
3126 * leaves us with a very large run_size. That causes
3127 * the first conditional below to fail, which means
3128 * that the bogus run_size value never gets used for
3129 * anything important.
3131 run_size = (alignment << 1) - pagesize;
3134 if (run_size <= arena_maxclass) {
3135 ret = arena_palloc(choose_arena(), alignment, ceil_size,
3137 } else if (alignment <= chunksize)
3138 ret = huge_malloc(ceil_size, false);
3140 ret = huge_palloc(alignment, ceil_size);
3143 assert(((uintptr_t)ret & (alignment - 1)) == 0);
3147 /* Return the size of the allocation pointed to by ptr. */
3149 arena_salloc(const void *ptr)
3152 arena_chunk_t *chunk;
3153 size_t pageind, mapbits;
3155 assert(ptr != NULL);
3156 assert(CHUNK_ADDR2BASE(ptr) != ptr);
3158 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3159 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
3160 mapbits = chunk->map[pageind].bits;
3161 assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
3162 if ((mapbits & CHUNK_MAP_LARGE) == 0) {
3163 arena_run_t *run = (arena_run_t *)(mapbits & ~pagesize_mask);
3164 assert(run->magic == ARENA_RUN_MAGIC);
3165 ret = run->bin->reg_size;
3167 ret = mapbits & ~pagesize_mask;
3174 static inline size_t
3175 isalloc(const void *ptr)
3178 arena_chunk_t *chunk;
3180 assert(ptr != NULL);
3182 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3185 assert(chunk->arena->magic == ARENA_MAGIC);
3187 ret = arena_salloc(ptr);
3189 extent_node_t *node, key;
3191 /* Chunk (huge allocation). */
3193 malloc_mutex_lock(&huge_mtx);
3195 /* Extract from tree of huge allocations. */
3196 key.addr = __DECONST(void *, ptr);
3197 node = extent_tree_ad_search(&huge, &key);
3198 assert(node != NULL);
3202 malloc_mutex_unlock(&huge_mtx);
3209 arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3210 arena_chunk_map_t *mapelm)
3216 run = (arena_run_t *)(mapelm->bits & ~pagesize_mask);
3217 assert(run->magic == ARENA_RUN_MAGIC);
3219 size = bin->reg_size;
3222 memset(ptr, 0x5a, size);
3224 arena_run_reg_dalloc(run, bin, ptr, size);
3227 if (run->nfree == bin->nregs) {
3228 /* Deallocate run. */
3229 if (run == bin->runcur)
3231 else if (bin->nregs != 1) {
3232 size_t run_pageind = (((uintptr_t)run -
3233 (uintptr_t)chunk)) >> pagesize_2pow;
3234 arena_chunk_map_t *run_mapelm =
3235 &chunk->map[run_pageind];
3237 * This block's conditional is necessary because if the
3238 * run only contains one region, then it never gets
3239 * inserted into the non-full runs tree.
3241 arena_run_tree_remove(&bin->runs, run_mapelm);
3246 arena_run_dalloc(arena, run, true);
3248 bin->stats.curruns--;
3250 } else if (run->nfree == 1 && run != bin->runcur) {
3252 * Make sure that bin->runcur always refers to the lowest
3253 * non-full run, if one exists.
3255 if (bin->runcur == NULL)
3257 else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
3258 /* Switch runcur. */
3259 if (bin->runcur->nfree > 0) {
3260 arena_chunk_t *runcur_chunk =
3261 CHUNK_ADDR2BASE(bin->runcur);
3262 size_t runcur_pageind =
3263 (((uintptr_t)bin->runcur -
3264 (uintptr_t)runcur_chunk)) >> pagesize_2pow;
3265 arena_chunk_map_t *runcur_mapelm =
3266 &runcur_chunk->map[runcur_pageind];
3268 /* Insert runcur. */
3269 arena_run_tree_insert(&bin->runs,
3274 size_t run_pageind = (((uintptr_t)run -
3275 (uintptr_t)chunk)) >> pagesize_2pow;
3276 arena_chunk_map_t *run_mapelm =
3277 &chunk->map[run_pageind];
3279 assert(arena_run_tree_search(&bin->runs, run_mapelm) ==
3281 arena_run_tree_insert(&bin->runs, run_mapelm);
3285 arena->stats.allocated_small -= size;
3286 arena->stats.ndalloc_small++;
3291 arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
3293 /* Large allocation. */
3294 malloc_spin_lock(&arena->lock);
3296 #ifndef MALLOC_STATS
3300 size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
3302 size_t size = chunk->map[pageind].bits & ~pagesize_mask;
3307 memset(ptr, 0x5a, size);
3309 arena->stats.allocated_large -= size;
3313 arena->stats.ndalloc_large++;
3316 arena_run_dalloc(arena, (arena_run_t *)ptr, true);
3317 malloc_spin_unlock(&arena->lock);
3321 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
3324 arena_chunk_map_t *mapelm;
3326 assert(arena != NULL);
3327 assert(arena->magic == ARENA_MAGIC);
3328 assert(chunk->arena == arena);
3329 assert(ptr != NULL);
3330 assert(CHUNK_ADDR2BASE(ptr) != ptr);
3332 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
3333 mapelm = &chunk->map[pageind];
3334 assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0);
3335 if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
3336 /* Small allocation. */
3337 malloc_spin_lock(&arena->lock);
3338 arena_dalloc_small(arena, chunk, ptr, mapelm);
3339 malloc_spin_unlock(&arena->lock);
3341 arena_dalloc_large(arena, chunk, ptr);
3347 arena_chunk_t *chunk;
3349 assert(ptr != NULL);
3351 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3353 arena_dalloc(chunk->arena, chunk, ptr);
3359 arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3360 size_t size, size_t oldsize)
3363 assert(size < oldsize);
3366 * Shrink the run, and make trailing pages available for other
3369 #ifdef MALLOC_BALANCE
3370 arena_lock_balance(arena);
3372 malloc_spin_lock(&arena->lock);
3374 arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size,
3377 arena->stats.allocated_large -= oldsize - size;
3379 malloc_spin_unlock(&arena->lock);
3383 arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3384 size_t size, size_t oldsize)
3386 size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow;
3387 size_t npages = oldsize >> pagesize_2pow;
3389 assert(oldsize == (chunk->map[pageind].bits & ~pagesize_mask));
3391 /* Try to extend the run. */
3392 assert(size > oldsize);
3393 #ifdef MALLOC_BALANCE
3394 arena_lock_balance(arena);
3396 malloc_spin_lock(&arena->lock);
3398 if (pageind + npages < chunk_npages && (chunk->map[pageind+npages].bits
3399 & CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[pageind+npages].bits &
3400 ~pagesize_mask) >= size - oldsize) {
3402 * The next run is available and sufficiently large. Split the
3403 * following run, then merge the first part with the existing
3406 arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk +
3407 ((pageind+npages) << pagesize_2pow)), size - oldsize, true,
3410 chunk->map[pageind].bits = size | CHUNK_MAP_LARGE |
3411 CHUNK_MAP_ALLOCATED;
3412 chunk->map[pageind+npages].bits = CHUNK_MAP_LARGE |
3413 CHUNK_MAP_ALLOCATED;
3416 arena->stats.allocated_large += size - oldsize;
3418 malloc_spin_unlock(&arena->lock);
3421 malloc_spin_unlock(&arena->lock);
3427 * Try to resize a large allocation, in order to avoid copying. This will
3428 * always fail if growing an object, and the following run is already in use.
3431 arena_ralloc_large(void *ptr, size_t size, size_t oldsize)
3435 psize = PAGE_CEILING(size);
3436 if (psize == oldsize) {
3437 /* Same size class. */
3438 if (opt_junk && size < oldsize) {
3439 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize -
3444 arena_chunk_t *chunk;
3447 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3448 arena = chunk->arena;
3449 assert(arena->magic == ARENA_MAGIC);
3451 if (psize < oldsize) {
3452 /* Fill before shrinking in order avoid a race. */
3454 memset((void *)((uintptr_t)ptr + size), 0x5a,
3457 arena_ralloc_large_shrink(arena, chunk, ptr, psize,
3461 bool ret = arena_ralloc_large_grow(arena, chunk, ptr,
3463 if (ret == false && opt_zero) {
3464 memset((void *)((uintptr_t)ptr + oldsize), 0,
3473 arena_ralloc(void *ptr, size_t size, size_t oldsize)
3478 /* Try to avoid moving the allocation. */
3479 if (size < small_min) {
3480 if (oldsize < small_min &&
3481 ffs((int)(pow2_ceil(size) >> (TINY_MIN_2POW + 1)))
3482 == ffs((int)(pow2_ceil(oldsize) >> (TINY_MIN_2POW + 1))))
3483 goto IN_PLACE; /* Same size class. */
3484 } else if (size <= small_max) {
3485 if (oldsize >= small_min && oldsize <= small_max &&
3486 (QUANTUM_CEILING(size) >> opt_quantum_2pow)
3487 == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
3488 goto IN_PLACE; /* Same size class. */
3489 } else if (size <= bin_maxclass) {
3490 if (oldsize > small_max && oldsize <= bin_maxclass &&
3491 pow2_ceil(size) == pow2_ceil(oldsize))
3492 goto IN_PLACE; /* Same size class. */
3493 } else if (oldsize > bin_maxclass && oldsize <= arena_maxclass) {
3494 assert(size > bin_maxclass);
3495 if (arena_ralloc_large(ptr, size, oldsize) == false)
3500 * If we get here, then size and oldsize are different enough that we
3501 * need to move the object. In that case, fall back to allocating new
3502 * space and copying.
3504 ret = arena_malloc(choose_arena(), size, false);
3508 /* Junk/zero-filling were already done by arena_malloc(). */
3509 copysize = (size < oldsize) ? size : oldsize;
3510 memcpy(ret, ptr, copysize);
3514 if (opt_junk && size < oldsize)
3515 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
3516 else if (opt_zero && size > oldsize)
3517 memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
3521 static inline void *
3522 iralloc(void *ptr, size_t size)
3526 assert(ptr != NULL);
3529 oldsize = isalloc(ptr);
3531 if (size <= arena_maxclass)
3532 return (arena_ralloc(ptr, size, oldsize));
3534 return (huge_ralloc(ptr, size, oldsize));
3538 arena_new(arena_t *arena)
3542 size_t prev_run_size;
3544 if (malloc_spin_init(&arena->lock))
3548 memset(&arena->stats, 0, sizeof(arena_stats_t));
3551 /* Initialize chunks. */
3552 arena_chunk_tree_dirty_new(&arena->chunks_dirty);
3553 arena->spare = NULL;
3557 arena_avail_tree_new(&arena->runs_avail);
3559 #ifdef MALLOC_BALANCE
3560 arena->contention = 0;
3563 /* Initialize bins. */
3564 prev_run_size = pagesize;
3566 /* (2^n)-spaced tiny bins. */
3567 for (i = 0; i < ntbins; i++) {
3568 bin = &arena->bins[i];
3570 arena_run_tree_new(&bin->runs);
3572 bin->reg_size = (1U << (TINY_MIN_2POW + i));
3574 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
3577 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
3581 /* Quantum-spaced bins. */
3582 for (; i < ntbins + nqbins; i++) {
3583 bin = &arena->bins[i];
3585 arena_run_tree_new(&bin->runs);
3587 bin->reg_size = quantum * (i - ntbins + 1);
3589 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
3592 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
3596 /* (2^n)-spaced sub-page bins. */
3597 for (; i < ntbins + nqbins + nsbins; i++) {
3598 bin = &arena->bins[i];
3600 arena_run_tree_new(&bin->runs);
3602 bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
3604 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
3607 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
3612 arena->magic = ARENA_MAGIC;
3618 /* Create a new arena and insert it into the arenas array at index ind. */
3620 arenas_extend(unsigned ind)
3624 /* Allocate enough space for trailing bins. */
3625 ret = (arena_t *)base_alloc(sizeof(arena_t)
3626 + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
3627 if (ret != NULL && arena_new(ret) == false) {
3631 /* Only reached if there is an OOM error. */
3634 * OOM here is quite inconvenient to propagate, since dealing with it
3635 * would require a check for failure in the fast path. Instead, punt
3636 * by using arenas[0]. In practice, this is an extremely unlikely
3639 _malloc_message(_getprogname(),
3640 ": (malloc) Error initializing arena\n", "", "");
3650 /******************************************************************************/
3652 * Begin general internal functions.
3656 huge_malloc(size_t size, bool zero)
3660 extent_node_t *node;
3662 /* Allocate one or more contiguous chunks for this request. */
3664 csize = CHUNK_CEILING(size);
3666 /* size is large enough to cause size_t wrap-around. */
3670 /* Allocate an extent node with which to track the chunk. */
3671 node = base_node_alloc();
3675 ret = chunk_alloc(csize, zero);
3677 base_node_dealloc(node);
3681 /* Insert node into huge. */
3685 malloc_mutex_lock(&huge_mtx);
3686 extent_tree_ad_insert(&huge, node);
3689 huge_allocated += csize;
3691 malloc_mutex_unlock(&huge_mtx);
3693 if (zero == false) {
3695 memset(ret, 0xa5, csize);
3697 memset(ret, 0, csize);
3703 /* Only handles large allocations that require more than chunk alignment. */
3705 huge_palloc(size_t alignment, size_t size)
3708 size_t alloc_size, chunk_size, offset;
3709 extent_node_t *node;
3712 * This allocation requires alignment that is even larger than chunk
3713 * alignment. This means that huge_malloc() isn't good enough.
3715 * Allocate almost twice as many chunks as are demanded by the size or
3716 * alignment, in order to assure the alignment can be achieved, then
3717 * unmap leading and trailing chunks.
3719 assert(alignment >= chunksize);
3721 chunk_size = CHUNK_CEILING(size);
3723 if (size >= alignment)
3724 alloc_size = chunk_size + alignment - chunksize;
3726 alloc_size = (alignment << 1) - chunksize;
3728 /* Allocate an extent node with which to track the chunk. */
3729 node = base_node_alloc();
3733 ret = chunk_alloc(alloc_size, false);
3735 base_node_dealloc(node);
3739 offset = (uintptr_t)ret & (alignment - 1);
3740 assert((offset & chunksize_mask) == 0);
3741 assert(offset < alloc_size);
3743 /* Trim trailing space. */
3744 chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
3749 /* Trim leading space. */
3750 chunk_dealloc(ret, alignment - offset);
3752 ret = (void *)((uintptr_t)ret + (alignment - offset));
3754 trailsize = alloc_size - (alignment - offset) - chunk_size;
3755 if (trailsize != 0) {
3756 /* Trim trailing space. */
3757 assert(trailsize < alloc_size);
3758 chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
3763 /* Insert node into huge. */
3765 node->size = chunk_size;
3767 malloc_mutex_lock(&huge_mtx);
3768 extent_tree_ad_insert(&huge, node);
3771 huge_allocated += chunk_size;
3773 malloc_mutex_unlock(&huge_mtx);
3776 memset(ret, 0xa5, chunk_size);
3778 memset(ret, 0, chunk_size);
3784 huge_ralloc(void *ptr, size_t size, size_t oldsize)
3789 /* Avoid moving the allocation if the size class would not change. */
3790 if (oldsize > arena_maxclass &&
3791 CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
3792 if (opt_junk && size < oldsize) {
3793 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
3795 } else if (opt_zero && size > oldsize) {
3796 memset((void *)((uintptr_t)ptr + oldsize), 0, size
3803 * If we get here, then size and oldsize are different enough that we
3804 * need to use a different size class. In that case, fall back to
3805 * allocating new space and copying.
3807 ret = huge_malloc(size, false);
3811 copysize = (size < oldsize) ? size : oldsize;
3812 memcpy(ret, ptr, copysize);
3818 huge_dalloc(void *ptr)
3820 extent_node_t *node, key;
3822 malloc_mutex_lock(&huge_mtx);
3824 /* Extract from tree of huge allocations. */
3826 node = extent_tree_ad_search(&huge, &key);
3827 assert(node != NULL);
3828 assert(node->addr == ptr);
3829 extent_tree_ad_remove(&huge, node);
3833 huge_allocated -= node->size;
3836 malloc_mutex_unlock(&huge_mtx);
3840 if (opt_dss && opt_junk)
3841 memset(node->addr, 0x5a, node->size);
3843 chunk_dealloc(node->addr, node->size);
3845 base_node_dealloc(node);
3849 malloc_print_stats(void)
3852 if (opt_print_stats) {
3853 char s[UMAX2S_BUFSIZE];
3854 _malloc_message("___ Begin malloc statistics ___\n", "", "",
3856 _malloc_message("Assertions ",
3863 _malloc_message("Boolean MALLOC_OPTIONS: ",
3864 opt_abort ? "A" : "a", "", "");
3866 _malloc_message(opt_dss ? "D" : "d", "", "", "");
3868 _malloc_message(opt_junk ? "J" : "j", "", "", "");
3870 _malloc_message(opt_mmap ? "M" : "m", "", "", "");
3872 _malloc_message(opt_utrace ? "PU" : "Pu",
3873 opt_sysv ? "V" : "v",
3874 opt_xmalloc ? "X" : "x",
3875 opt_zero ? "Z\n" : "z\n");
3877 _malloc_message("CPUs: ", umax2s(ncpus, s), "\n", "");
3878 _malloc_message("Max arenas: ", umax2s(narenas, s), "\n", "");
3879 #ifdef MALLOC_BALANCE
3880 _malloc_message("Arena balance threshold: ",
3881 umax2s(opt_balance_threshold, s), "\n", "");
3883 _malloc_message("Pointer size: ", umax2s(sizeof(void *), s),
3885 _malloc_message("Quantum size: ", umax2s(quantum, s), "\n", "");
3886 _malloc_message("Max small size: ", umax2s(small_max, s), "\n",
3888 _malloc_message("Max dirty pages per arena: ",
3889 umax2s(opt_dirty_max, s), "\n", "");
3891 _malloc_message("Chunk size: ", umax2s(chunksize, s), "", "");
3892 _malloc_message(" (2^", umax2s(opt_chunk_2pow, s), ")\n", "");
3896 size_t allocated, mapped;
3897 #ifdef MALLOC_BALANCE
3898 uint64_t nbalance = 0;
3903 /* Calculate and print allocated/mapped stats. */
3906 for (i = 0, allocated = 0; i < narenas; i++) {
3907 if (arenas[i] != NULL) {
3908 malloc_spin_lock(&arenas[i]->lock);
3910 arenas[i]->stats.allocated_small;
3912 arenas[i]->stats.allocated_large;
3913 #ifdef MALLOC_BALANCE
3914 nbalance += arenas[i]->stats.nbalance;
3916 malloc_spin_unlock(&arenas[i]->lock);
3921 malloc_mutex_lock(&huge_mtx);
3922 allocated += huge_allocated;
3923 mapped = stats_chunks.curchunks * chunksize;
3924 malloc_mutex_unlock(&huge_mtx);
3926 malloc_mutex_lock(&base_mtx);
3927 mapped += base_mapped;
3928 malloc_mutex_unlock(&base_mtx);
3930 malloc_printf("Allocated: %zu, mapped: %zu\n",
3933 #ifdef MALLOC_BALANCE
3934 malloc_printf("Arena balance reassignments: %llu\n",
3938 /* Print chunk stats. */
3940 chunk_stats_t chunks_stats;
3942 malloc_mutex_lock(&huge_mtx);
3943 chunks_stats = stats_chunks;
3944 malloc_mutex_unlock(&huge_mtx);
3946 malloc_printf("chunks: nchunks "
3947 "highchunks curchunks\n");
3948 malloc_printf(" %13llu%13lu%13lu\n",
3949 chunks_stats.nchunks,
3950 chunks_stats.highchunks,
3951 chunks_stats.curchunks);
3954 /* Print chunk stats. */
3956 "huge: nmalloc ndalloc allocated\n");
3957 malloc_printf(" %12llu %12llu %12zu\n",
3958 huge_nmalloc, huge_ndalloc, huge_allocated);
3960 /* Print stats for each arena. */
3961 for (i = 0; i < narenas; i++) {
3963 if (arena != NULL) {
3965 "\narenas[%u]:\n", i);
3966 malloc_spin_lock(&arena->lock);
3968 malloc_spin_unlock(&arena->lock);
3972 #endif /* #ifdef MALLOC_STATS */
3973 _malloc_message("--- End malloc statistics ---\n", "", "", "");
3978 * FreeBSD's pthreads implementation calls malloc(3), so the malloc
3979 * implementation has to take pains to avoid infinite recursion during
3986 if (malloc_initialized == false)
3987 return (malloc_init_hard());
3993 malloc_init_hard(void)
3997 char buf[PATH_MAX + 1];
4000 malloc_mutex_lock(&init_lock);
4001 if (malloc_initialized) {
4003 * Another thread initialized the allocator before this one
4004 * acquired init_lock.
4006 malloc_mutex_unlock(&init_lock);
4010 /* Get number of CPUs. */
4017 len = sizeof(ncpus);
4018 if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
4024 /* Get page size. */
4028 result = sysconf(_SC_PAGESIZE);
4029 assert(result != -1);
4030 pagesize = (unsigned)result;
4033 * We assume that pagesize is a power of 2 when calculating
4034 * pagesize_mask and pagesize_2pow.
4036 assert(((result - 1) & result) == 0);
4037 pagesize_mask = result - 1;
4038 pagesize_2pow = ffs((int)result) - 1;
4041 for (i = 0; i < 3; i++) {
4044 /* Get runtime configuration. */
4047 if ((linklen = readlink("/etc/malloc.conf", buf,
4048 sizeof(buf) - 1)) != -1) {
4050 * Use the contents of the "/etc/malloc.conf"
4051 * symbolic link's name.
4053 buf[linklen] = '\0';
4056 /* No configuration specified. */
4062 if (issetugid() == 0 && (opts =
4063 getenv("MALLOC_OPTIONS")) != NULL) {
4065 * Do nothing; opts is already initialized to
4066 * the value of the MALLOC_OPTIONS environment
4070 /* No configuration specified. */
4076 if (_malloc_options != NULL) {
4078 * Use options that were compiled into the
4081 opts = _malloc_options;
4083 /* No configuration specified. */
4093 for (j = 0; opts[j] != '\0'; j++) {
4097 /* Parse repetition count, if any. */
4098 for (nreps = 0, nseen = false;; j++, nseen = true) {
4100 case '0': case '1': case '2': case '3':
4101 case '4': case '5': case '6': case '7':
4104 nreps += opts[j] - '0';
4114 for (k = 0; k < nreps; k++) {
4123 #ifdef MALLOC_BALANCE
4124 opt_balance_threshold >>= 1;
4128 #ifdef MALLOC_BALANCE
4129 if (opt_balance_threshold == 0)
4130 opt_balance_threshold = 1;
4131 else if ((opt_balance_threshold << 1)
4132 > opt_balance_threshold)
4133 opt_balance_threshold <<= 1;
4147 opt_dirty_max >>= 1;
4150 if (opt_dirty_max == 0)
4152 else if ((opt_dirty_max << 1) != 0)
4153 opt_dirty_max <<= 1;
4156 /* Compatibility hack for RELENG_7. */
4157 opt_dirty_max = DIRTY_MAX_DEFAULT;
4160 /* Compatibility hack for RELENG_7. */
4171 * Chunks always require at least one
4172 * header page, so chunks can never be
4173 * smaller than two pages.
4175 if (opt_chunk_2pow > pagesize_2pow + 1)
4179 if (opt_chunk_2pow + 1 <
4180 (sizeof(size_t) << 3))
4194 opt_narenas_lshift--;
4197 opt_narenas_lshift++;
4200 opt_print_stats = false;
4203 opt_print_stats = true;
4206 if (opt_quantum_2pow > QUANTUM_2POW_MIN)
4210 if (opt_quantum_2pow < pagesize_2pow -
4215 if (opt_small_max_2pow >
4217 opt_small_max_2pow--;
4220 if (opt_small_max_2pow < pagesize_2pow
4222 opt_small_max_2pow++;
4237 opt_xmalloc = false;
4253 _malloc_message(_getprogname(),
4254 ": (malloc) Unsupported character "
4255 "in malloc options: '", cbuf,
4264 /* Make sure that there is some method for acquiring memory. */
4265 if (opt_dss == false && opt_mmap == false)
4269 /* Take care to call atexit() only once. */
4270 if (opt_print_stats) {
4271 /* Print statistics at exit. */
4272 atexit(malloc_print_stats);
4275 /* Set variables according to the value of opt_small_max_2pow. */
4276 if (opt_small_max_2pow < opt_quantum_2pow)
4277 opt_small_max_2pow = opt_quantum_2pow;
4278 small_max = (1U << opt_small_max_2pow);
4280 /* Set bin-related variables. */
4281 bin_maxclass = (pagesize >> 1);
4282 assert(opt_quantum_2pow >= TINY_MIN_2POW);
4283 ntbins = opt_quantum_2pow - TINY_MIN_2POW;
4284 assert(ntbins <= opt_quantum_2pow);
4285 nqbins = (small_max >> opt_quantum_2pow);
4286 nsbins = pagesize_2pow - opt_small_max_2pow - 1;
4288 /* Set variables according to the value of opt_quantum_2pow. */
4289 quantum = (1U << opt_quantum_2pow);
4290 quantum_mask = quantum - 1;
4292 small_min = (quantum >> 1) + 1;
4295 assert(small_min <= quantum);
4297 /* Set variables according to the value of opt_chunk_2pow. */
4298 chunksize = (1LU << opt_chunk_2pow);
4299 chunksize_mask = chunksize - 1;
4300 chunk_npages = (chunksize >> pagesize_2pow);
4305 * Compute the header size such that it is large
4306 * enough to contain the page map and enough nodes for the
4307 * worst case: one node per non-header page plus one extra for
4308 * situations where we briefly have one more node allocated
4309 * than we will need.
4311 header_size = sizeof(arena_chunk_t) +
4312 (sizeof(arena_chunk_map_t) * (chunk_npages - 1));
4313 arena_chunk_header_npages = (header_size >> pagesize_2pow) +
4314 ((header_size & pagesize_mask) != 0);
4316 arena_maxclass = chunksize - (arena_chunk_header_npages <<
4322 memset(&stats_chunks, 0, sizeof(chunk_stats_t));
4325 /* Various sanity checks that regard configuration. */
4326 assert(quantum >= sizeof(void *));
4327 assert(quantum <= pagesize);
4328 assert(chunksize >= pagesize);
4329 assert(quantum * 4 <= chunksize);
4331 /* Initialize chunks data. */
4332 malloc_mutex_init(&huge_mtx);
4333 extent_tree_ad_new(&huge);
4335 malloc_mutex_init(&dss_mtx);
4337 dss_prev = dss_base;
4339 extent_tree_szad_new(&dss_chunks_szad);
4340 extent_tree_ad_new(&dss_chunks_ad);
4348 /* Initialize base allocation data structures. */
4354 * Allocate a base chunk here, since it doesn't actually have to be
4355 * chunk-aligned. Doing this before allocating any other chunks allows
4356 * the use of space that would otherwise be wasted.
4359 base_pages_alloc(0);
4362 malloc_mutex_init(&base_mtx);
4366 * For SMP systems, create four times as many arenas as there
4367 * are CPUs by default.
4369 opt_narenas_lshift += 2;
4372 /* Determine how many arenas to use. */
4374 if (opt_narenas_lshift > 0) {
4375 if ((narenas << opt_narenas_lshift) > narenas)
4376 narenas <<= opt_narenas_lshift;
4378 * Make sure not to exceed the limits of what base_alloc() can
4381 if (narenas * sizeof(arena_t *) > chunksize)
4382 narenas = chunksize / sizeof(arena_t *);
4383 } else if (opt_narenas_lshift < 0) {
4384 if ((narenas >> -opt_narenas_lshift) < narenas)
4385 narenas >>= -opt_narenas_lshift;
4386 /* Make sure there is at least one arena. */
4390 #ifdef MALLOC_BALANCE
4391 assert(narenas != 0);
4392 for (narenas_2pow = 0;
4393 (narenas >> (narenas_2pow + 1)) != 0;
4399 static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
4400 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
4401 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
4402 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
4403 223, 227, 229, 233, 239, 241, 251, 257, 263};
4404 unsigned nprimes, parenas;
4407 * Pick a prime number of hash arenas that is more than narenas
4408 * so that direct hashing of pthread_self() pointers tends to
4409 * spread allocations evenly among the arenas.
4411 assert((narenas & 1) == 0); /* narenas must be even. */
4412 nprimes = (sizeof(primes) >> SIZEOF_INT_2POW);
4413 parenas = primes[nprimes - 1]; /* In case not enough primes. */
4414 for (i = 1; i < nprimes; i++) {
4415 if (primes[i] > narenas) {
4416 parenas = primes[i];
4425 # ifndef MALLOC_BALANCE
4430 /* Allocate and initialize arenas. */
4431 arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
4432 if (arenas == NULL) {
4433 malloc_mutex_unlock(&init_lock);
4437 * Zero the array. In practice, this should always be pre-zeroed,
4438 * since it was just mmap()ed, but let's be sure.
4440 memset(arenas, 0, sizeof(arena_t *) * narenas);
4443 * Initialize one arena here. The rest are lazily created in
4444 * choose_arena_hard().
4447 if (arenas[0] == NULL) {
4448 malloc_mutex_unlock(&init_lock);
4453 * Assign the initial arena to the initial thread, in order to avoid
4454 * spurious creation of an extra arena if the application switches to
4457 arenas_map = arenas[0];
4460 * Seed here for the initial thread, since choose_arena_hard() is only
4461 * called for other threads. The seed value doesn't really matter.
4463 #ifdef MALLOC_BALANCE
4467 malloc_spin_init(&arenas_lock);
4469 malloc_initialized = true;
4470 malloc_mutex_unlock(&init_lock);
4475 * End general internal functions.
4477 /******************************************************************************/
4479 * Begin malloc(3)-compatible functions.
4487 if (malloc_init()) {
4493 if (opt_sysv == false)
4501 ret = imalloc(size);
4506 _malloc_message(_getprogname(),
4507 ": (malloc) Error in malloc(): out of memory\n", "",
4514 UTRACE(0, size, ret);
4519 posix_memalign(void **memptr, size_t alignment, size_t size)
4527 /* Make sure that alignment is a large enough power of 2. */
4528 if (((alignment - 1) & alignment) != 0
4529 || alignment < sizeof(void *)) {
4531 _malloc_message(_getprogname(),
4532 ": (malloc) Error in posix_memalign(): "
4533 "invalid alignment\n", "", "");
4541 result = ipalloc(alignment, size);
4544 if (result == NULL) {
4546 _malloc_message(_getprogname(),
4547 ": (malloc) Error in posix_memalign(): out of memory\n",
4559 UTRACE(0, size, result);
4564 calloc(size_t num, size_t size)
4569 if (malloc_init()) {
4575 num_size = num * size;
4576 if (num_size == 0) {
4577 if ((opt_sysv == false) && ((num == 0) || (size == 0)))
4584 * Try to avoid division here. We know that it isn't possible to
4585 * overflow during multiplication if neither operand uses any of the
4586 * most significant half of the bits in a size_t.
4588 } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2)))
4589 && (num_size / size != num)) {
4590 /* size_t overflow. */
4595 ret = icalloc(num_size);
4600 _malloc_message(_getprogname(),
4601 ": (malloc) Error in calloc(): out of memory\n", "",
4608 UTRACE(0, num_size, ret);
4613 realloc(void *ptr, size_t size)
4618 if (opt_sysv == false)
4629 assert(malloc_initialized);
4631 ret = iralloc(ptr, size);
4635 _malloc_message(_getprogname(),
4636 ": (malloc) Error in realloc(): out of "
4637 "memory\n", "", "");
4646 ret = imalloc(size);
4650 _malloc_message(_getprogname(),
4651 ": (malloc) Error in realloc(): out of "
4652 "memory\n", "", "");
4660 UTRACE(ptr, size, ret);
4670 assert(malloc_initialized);
4677 * End malloc(3)-compatible functions.
4679 /******************************************************************************/
4681 * Begin non-standard functions.
4685 malloc_usable_size(const void *ptr)
4688 assert(ptr != NULL);
4690 return (isalloc(ptr));
4694 * End non-standard functions.
4698 * We provide an unpublished interface in order to receive notifications from
4699 * the pthreads library whenever a thread exits. This allows us to clean up
4703 _malloc_thread_cleanup(void)
4707 /******************************************************************************/
4709 * Begin library-private functions, used by threading libraries for protection
4710 * of malloc during fork(). These functions are only called if the program is
4711 * running in threaded mode, so there is no need to check whether the program
4716 _malloc_prefork(void)
4720 /* Acquire all mutexes in a safe order. */
4722 malloc_spin_lock(&arenas_lock);
4723 for (i = 0; i < narenas; i++) {
4724 if (arenas[i] != NULL)
4725 malloc_spin_lock(&arenas[i]->lock);
4727 malloc_spin_unlock(&arenas_lock);
4729 malloc_mutex_lock(&base_mtx);
4731 malloc_mutex_lock(&huge_mtx);
4734 malloc_mutex_lock(&dss_mtx);
4739 _malloc_postfork(void)
4743 /* Release all mutexes, now that fork() has completed. */
4746 malloc_mutex_unlock(&dss_mtx);
4749 malloc_mutex_unlock(&huge_mtx);
4751 malloc_mutex_unlock(&base_mtx);
4753 malloc_spin_lock(&arenas_lock);
4754 for (i = 0; i < narenas; i++) {
4755 if (arenas[i] != NULL)
4756 malloc_spin_unlock(&arenas[i]->lock);
4758 malloc_spin_unlock(&arenas_lock);
4762 * End library-private functions.
4764 /******************************************************************************/