2 * Copyright (C) 2006 Jason Evans <jasone@FreeBSD.org>.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice(s), this list of conditions and the following disclaimer as
10 * the first lines of this file unmodified other than the possible
11 * addition of one or more copyright notices.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice(s), this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *******************************************************************************
31 * This allocator implementation is designed to provide scalable performance
32 * for multi-threaded programs on multi-processor systems. The following
33 * features are included for this purpose:
35 * + Multiple arenas are used if there are multiple CPUs, which reduces lock
36 * contention and cache sloshing.
38 * + Cache line sharing between arenas is avoided for internal data
41 * + Memory is managed in chunks and runs (chunks can be split into runs using
42 * a binary buddy scheme), rather than as individual pages. This provides
43 * a constant-time mechanism for associating allocations with particular
46 * Allocation requests are rounded up to the nearest size class, and no record
47 * of the original request size is maintained. Allocations are broken into
48 * categories according to size class. Assuming runtime defaults, 4 kB pages
49 * and a 16 byte quantum, the size classes in each category are as follows:
51 * |====================================|
52 * | Category | Subcategory | Size |
53 * |====================================|
54 * | Small | Tiny | 2 |
57 * | |----------------+--------|
58 * | | Quantum-spaced | 16 |
65 * | |----------------+--------|
66 * | | Sub-page | 1 kB |
68 * |====================================|
76 * |====================================|
81 * |====================================|
83 * A different mechanism is used for each category:
85 * Small : Each size class is segregated into its own set of runs. Each run
86 * maintains a bitmap of which regions are free/allocated.
88 * Large : Each allocation is backed by a dedicated run. Metadata are stored
89 * in the associated arena chunk header maps.
91 * Huge : Each allocation is backed by a dedicated contiguous set of chunks.
92 * Metadata are stored in a separate red-black tree.
94 *******************************************************************************
98 *******************************************************************************
102 *******************************************************************************
105 /* Ring definitions. */
106 #define qr(a_type) struct { \
111 #define qr_initializer {NULL, NULL}
113 /* Ring functions. */
114 #define qr_new(a_qr, a_field) do { \
115 (a_qr)->a_field.qre_next = (a_qr); \
116 (a_qr)->a_field.qre_prev = (a_qr); \
119 #define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next)
121 #define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev)
123 #define qr_before_insert(a_qrelm, a_qr, a_field) do { \
124 (a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev; \
125 (a_qr)->a_field.qre_next = (a_qrelm); \
126 (a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr); \
127 (a_qrelm)->a_field.qre_prev = (a_qr); \
130 #define qr_after_insert(a_qrelm, a_qr, a_field) do { \
131 (a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next; \
132 (a_qr)->a_field.qre_prev = (a_qrelm); \
133 (a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr); \
134 (a_qrelm)->a_field.qre_next = (a_qr); \
137 #define qr_meld(a_qr_a, a_qr_b, a_type, a_field) do { \
139 (a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b); \
140 (a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a); \
141 t = (a_qr_a)->a_field.qre_prev; \
142 (a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev; \
143 (a_qr_b)->a_field.qre_prev = t; \
147 * qr_meld() and qr_split() are functionally equivalent, so there's no need to
148 * have two copies of the code.
150 #define qr_split(a_qr_a, a_qr_b, a_type, a_field) \
151 qr_meld((a_qr_a), (a_qr_b), a_type, a_field)
153 #define qr_remove(a_qr, a_field) do { \
154 (a_qr)->a_field.qre_prev->a_field.qre_next \
155 = (a_qr)->a_field.qre_next; \
156 (a_qr)->a_field.qre_next->a_field.qre_prev \
157 = (a_qr)->a_field.qre_prev; \
158 (a_qr)->a_field.qre_next = (a_qr); \
159 (a_qr)->a_field.qre_prev = (a_qr); \
162 #define qr_foreach(var, a_qr, a_field) \
163 for ((var) = (a_qr); \
165 (var) = (((var)->a_field.qre_next != (a_qr)) \
166 ? (var)->a_field.qre_next : NULL))
168 #define qr_reverse_foreach(var, a_qr, a_field) \
169 for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL; \
171 (var) = (((var) != (a_qr)) \
172 ? (var)->a_field.qre_prev : NULL))
174 /******************************************************************************/
177 * In order to disable various extra features that may have negative
178 * performance impacts, (assertions, expanded statistics), define
181 /* #define NO_MALLOC_EXTRAS */
183 #ifndef NO_MALLOC_EXTRAS
184 # define MALLOC_DEBUG
187 #include <sys/cdefs.h>
188 __FBSDID("$FreeBSD$");
190 #include "libc_private.h"
194 #include "spinlock.h"
195 #include "namespace.h"
196 #include <sys/mman.h>
197 #include <sys/param.h>
198 #include <sys/stddef.h>
199 #include <sys/time.h>
200 #include <sys/types.h>
201 #include <sys/sysctl.h>
202 #include <sys/tree.h>
204 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
206 #include <machine/atomic.h>
207 #include <machine/cpufunc.h>
208 #include <machine/vmparam.h>
222 #include "un-namespace.h"
225 * Calculate statistics that can be used to get an idea of how well caching is
228 #ifndef NO_MALLOC_EXTRAS
229 # define MALLOC_STATS
240 /* Disable inlining to make debugging easier. */
244 /* Size of stack-allocated buffer passed to strerror_r(). */
245 #define STRERROR_BUF 64
247 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
249 # define QUANTUM_2POW_MIN 4
250 # define SIZEOF_PTR 4
254 # define QUANTUM_2POW_MIN 4
255 # define SIZEOF_PTR 8
259 # define QUANTUM_2POW_MIN 4
260 # define SIZEOF_PTR 8
264 # define QUANTUM_2POW_MIN 4
265 # define SIZEOF_PTR 8
269 # define QUANTUM_2POW_MIN 4
270 # define SIZEOF_PTR 8
273 # define QUANTUM_2POW_MIN 3
274 # define SIZEOF_PTR 4
279 # define QUANTUM_2POW_MIN 4
280 # define SIZEOF_PTR 4
284 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
285 #ifndef SIZEOF_INT_2POW
286 # define SIZEOF_INT_2POW 2
289 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
290 #if (!defined(PIC) && !defined(NO_TLS))
295 * Size and alignment of memory chunks that are allocated by the OS's virtual
300 * 2^(pagesize_2pow - 1 + RUN_MIN_REGS_2POW) <= chunk_size <= 2^28
302 #define CHUNK_2POW_DEFAULT 21
303 #define CHUNK_2POW_MAX 28
306 * Maximum size of L1 cache line. This is used to avoid cache line aliasing,
307 * so over-estimates are okay (up to a point), but under-estimates will
308 * negatively affect performance.
310 #define CACHELINE_2POW 6
311 #define CACHELINE ((size_t)(1 << CACHELINE_2POW))
314 * Maximum size class that is a multiple of the quantum, but not (necessarily)
315 * a power of 2. Above this size, allocations are rounded up to the nearest
318 #define SMALL_MAX_2POW_DEFAULT 9
319 #define SMALL_MAX_DEFAULT (1 << SMALL_MAX_2POW_DEFAULT)
322 * Minimum number of regions that must fit into a run that serves quantum-size
325 * Note that if this is set too low, space will be wasted if there are size
326 * classes that are small enough that RUN_MIN_REGS regions don't fill a page.
327 * If this is set too high, then the overhead of searching through the bitmap
328 * that tracks region usage will become excessive.
330 #define RUN_MIN_REGS_2POW 10
331 #define RUN_MIN_REGS (1 << RUN_MIN_REGS_2POW)
334 * Maximum number of pages for a run that is used for bin allocations.
336 * Note that if this is set too low, then fragmentation for the largest bin
337 * size classes will be high. If this is set too high, then even small
338 * programs will often have to allocate more than two chunks early on.
340 #define RUN_MAX_PAGES_2POW 4
341 #define RUN_MAX_PAGES (1 << RUN_MAX_PAGES_2POW)
343 /******************************************************************************/
346 * Mutexes based on spinlocks. We can't use normal pthread mutexes, because
347 * they require malloc()ed memory.
353 /* Set to true once the allocator has been initialized. */
354 static bool malloc_initialized = false;
356 /* Used to avoid initialization races. */
357 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
359 /******************************************************************************/
361 * Statistics data structures.
366 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
367 struct malloc_bin_stats_s {
369 * Number of allocation requests that corresponded to the size of this
374 /* Total number of runs created for this bin's size class. */
378 * Total number of run promotions/demotions for this bin's size class.
383 /* High-water mark for this bin. */
384 unsigned long highruns;
386 /* Current number of runs in this bin. */
387 unsigned long curruns;
390 typedef struct arena_stats_s arena_stats_t;
391 struct arena_stats_s {
392 /* Total byte count of allocated memory, not including overhead. */
395 /* Number of times each function was called. */
400 /* Number of large allocation requests. */
401 uint64_t large_nrequests;
404 typedef struct chunk_stats_s chunk_stats_t;
405 struct chunk_stats_s {
406 /* Number of chunks that were allocated. */
409 /* High-water mark for number of chunks allocated. */
410 unsigned long highchunks;
413 * Current number of chunks allocated. This value isn't maintained for
414 * any other purpose, so keep track of it in order to be able to set
417 unsigned long curchunks;
420 #endif /* #ifdef MALLOC_STATS */
422 /******************************************************************************/
424 * Chunk data structures.
427 /* Tree of chunks. */
428 typedef struct chunk_node_s chunk_node_t;
429 struct chunk_node_s {
430 /* Linkage for the chunk tree. */
431 RB_ENTRY(chunk_node_s) link;
434 * Pointer to the chunk that this tree node is responsible for. In some
435 * (but certainly not all) cases, this data structure is placed at the
436 * beginning of the corresponding chunk, so this field may point to this
441 /* Total chunk size. */
444 typedef struct chunk_tree_s chunk_tree_t;
445 RB_HEAD(chunk_tree_s, chunk_node_s);
447 /******************************************************************************/
449 * Arena data structures.
452 typedef struct arena_s arena_t;
453 typedef struct arena_bin_s arena_bin_t;
455 typedef struct arena_chunk_map_s arena_chunk_map_t;
456 struct arena_chunk_map_s {
459 unsigned npages:15; /* Limiting factor for CHUNK_2POW_MAX. */
463 /* Arena chunk header. */
464 typedef struct arena_chunk_s arena_chunk_t;
465 struct arena_chunk_s {
466 /* Arena that owns the chunk. */
469 /* Linkage for the arena's chunk tree. */
470 RB_ENTRY(arena_chunk_s) link;
473 * Number of pages in use. This is maintained in order to make
474 * detection of empty chunks fast.
479 * Array of counters that keeps track of how many free runs of each
480 * size are available in this chunk. This table is sized at compile
481 * time, which is wasteful. However, due to unrelated rounding, this
482 * doesn't actually waste any otherwise useful space.
494 uint32_t nfree_runs[CHUNK_2POW_MAX/* - PAGE_SHIFT */];
496 /* Map of pages within chunk that keeps track of free/large/small. */
497 arena_chunk_map_t map[1]; /* Dynamically sized. */
499 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
500 RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
502 typedef struct arena_run_s arena_run_t;
504 /* Linkage for run rings. */
505 qr(arena_run_t) link;
509 # define ARENA_RUN_MAGIC 0x384adf93
512 /* Bin this run is associated with. */
515 /* Bitmask of in-use regions (0: in use, 1: free). */
516 #define REGS_MASK_NELMS \
517 (1 << (RUN_MIN_REGS_2POW - SIZEOF_INT_2POW - 2))
518 unsigned regs_mask[REGS_MASK_NELMS];
520 /* Index of first element that might have a free region. */
521 unsigned regs_minelm;
523 /* Number of free regions in run. */
527 * Current quartile for this run, one of: {RUN_QINIT, RUN_Q0, RUN_25,
528 * RUN_Q50, RUN_Q75, RUN_Q100}.
539 * Limits on the number of free regions for the fullness quartile this
540 * run is currently in. If nfree goes outside these limits, the run
541 * is moved to a different fullness quartile.
547 /* Used for run ring headers, where the run isn't actually used. */
548 typedef struct arena_run_link_s arena_run_link_t;
549 struct arena_run_link_s {
550 /* Linkage for run rings. */
551 qr(arena_run_t) link;
556 * Current run being used to service allocations of this bin's size
562 * Links into rings of runs, of various fullnesses (names indicate
563 * approximate lower bounds). A new run conceptually starts off in
564 * runsinit, and it isn't inserted into the runs0 ring until it
565 * reaches 25% full (hysteresis mechanism). For the run to be moved
566 * again, it must become either empty or 50% full. Thus, each ring
567 * contains runs that are within 50% above the advertised fullness for
568 * the ring. This provides a low-overhead mechanism for segregating
569 * runs into approximate fullness classes.
571 * Conceptually, there is a runs100 that contains completely full runs.
572 * Since we don't need to search for these runs though, no runs100 ring
573 * is actually maintained.
575 * These rings are useful when looking for an existing run to use when
576 * runcur is no longer usable. We look for usable runs in the
584 * runs75 isn't a good place to look, because it contains runs that may
585 * be nearly completely full. Still, we look there as a last resort in
586 * order to avoid allocating a new run if at all possible.
588 /* arena_run_link_t runsinit; 0% <= fullness < 25% */
589 arena_run_link_t runs0; /* 0% < fullness < 50% */
590 arena_run_link_t runs25; /* 25% < fullness < 75% */
591 arena_run_link_t runs50; /* 50% < fullness < 100% */
592 arena_run_link_t runs75; /* 75% < fullness < 100% */
593 /* arena_run_link_t runs100; fullness == 100% */
595 /* Size of regions in a run for this bin's size class. */
598 /* Total size of a run for this bin's size class. */
601 /* Total number of regions in a run for this bin's size class. */
604 /* Offset of first region in a run for this bin's size class. */
605 uint32_t reg0_offset;
608 /* Bin statistics. */
609 malloc_bin_stats_t stats;
616 # define ARENA_MAGIC 0x947d3d24
619 /* All operations on this arena require that mtx be locked. */
627 * Tree of chunks this arena manages.
629 arena_chunk_tree_t chunks;
632 * bins is used to store rings of free regions of the following sizes,
633 * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
654 arena_bin_t bins[1]; /* Dynamically sized. */
657 /******************************************************************************/
662 /* Used as a special "nil" return value for malloc(0). */
663 static const int nil;
665 /* Number of CPUs. */
666 static unsigned ncpus;
669 static unsigned pagesize;
670 static unsigned pagesize_2pow;
672 /* Various bin-related settings. */
673 static size_t bin_maxclass; /* Max size class for bins. */
674 static unsigned ntbins; /* Number of (2^n)-spaced tiny bins. */
675 static unsigned nqbins; /* Number of quantum-spaced bins. */
676 static unsigned nsbins; /* Number of (2^n)-spaced sub-page bins. */
677 static size_t small_min;
678 static size_t small_max;
679 static unsigned tiny_min_2pow;
681 /* Various quantum-related settings. */
682 static size_t quantum;
683 static size_t quantum_mask; /* (quantum - 1). */
685 /* Various chunk-related settings. */
686 static size_t chunk_size;
687 static size_t chunk_size_mask; /* (chunk_size - 1). */
688 static size_t arena_maxclass; /* Max size class for arenas. */
689 static unsigned arena_chunk_maplen;
696 /* Protects chunk-related data structures. */
697 static malloc_mutex_t chunks_mtx;
699 /* Tree of chunks that are stand-alone huge allocations. */
700 static chunk_tree_t huge;
704 * Try to use brk for chunk-size allocations, due to address space constraints.
706 /* Result of first sbrk(0) call. */
707 static void *brk_base;
708 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
709 static void *brk_prev;
710 /* Current upper limit on brk addresses. */
711 static void *brk_max;
716 * Byte counters for allocated/total space used by the chunks in the huge
719 static uint64_t huge_nmalloc;
720 static uint64_t huge_ndalloc;
721 static size_t huge_allocated;
725 * Tree of chunks that were previously allocated. This is used when allocating
726 * chunks, in an attempt to re-use address space.
728 static chunk_tree_t old_chunks;
730 /****************************/
732 * base (internal allocation).
736 * Current chunk that is being used for internal memory allocations. This
737 * chunk is carved up in cacheline-size quanta, so that there is no chance of
738 * false cache line sharing.
740 static void *base_chunk;
741 static void *base_next_addr;
742 static void *base_past_addr; /* Addr immediately past base_chunk. */
743 static chunk_node_t *base_chunk_nodes; /* LIFO cache of chunk nodes. */
744 static malloc_mutex_t base_mtx;
746 static uint64_t base_total;
755 * Arenas that are used to service external requests. Not all elements of the
756 * arenas array are necessarily used; arenas are created lazily as needed.
758 static arena_t **arenas;
759 static unsigned narenas;
761 static unsigned next_arena;
763 static malloc_mutex_t arenas_mtx; /* Protects arenas initialization. */
767 * Map of pthread_self() --> arenas[???], used for selecting an arena to use
770 static __thread arena_t *arenas_map;
774 /* Chunk statistics. */
775 static chunk_stats_t stats_chunks;
778 /*******************************/
780 * Runtime configuration options.
782 const char *_malloc_options;
784 #ifndef NO_MALLOC_EXTRAS
785 static bool opt_abort = true;
786 static bool opt_junk = true;
788 static bool opt_abort = false;
789 static bool opt_junk = false;
791 static bool opt_hint = false;
792 static bool opt_print_stats = false;
793 static size_t opt_quantum_2pow = QUANTUM_2POW_MIN;
794 static size_t opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
795 static size_t opt_chunk_2pow = CHUNK_2POW_DEFAULT;
796 static bool opt_utrace = false;
797 static bool opt_sysv = false;
798 static bool opt_xmalloc = false;
799 static bool opt_zero = false;
800 static int32_t opt_narenas_lshift = 0;
808 #define UTRACE(a, b, c) \
810 malloc_utrace_t ut = {a, b, c}; \
811 utrace(&ut, sizeof(ut)); \
814 /******************************************************************************/
816 * Begin function prototypes for non-inline static functions.
819 static void malloc_mutex_init(malloc_mutex_t *a_mutex);
820 static void wrtmessage(const char *p1, const char *p2, const char *p3,
822 static void malloc_printf(const char *format, ...);
823 static void *base_alloc(size_t size);
824 static chunk_node_t *base_chunk_node_alloc(void);
825 static void base_chunk_node_dealloc(chunk_node_t *node);
827 static void stats_print(arena_t *arena);
829 static void *pages_map(void *addr, size_t size);
830 static void pages_unmap(void *addr, size_t size);
831 static void *chunk_alloc(size_t size);
832 static void chunk_dealloc(void *chunk, size_t size);
834 static arena_t *choose_arena_hard(void);
836 static void arena_run_split(arena_t *arena, arena_run_t *run, bool large,
838 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
839 static void arena_chunk_dealloc(arena_chunk_t *chunk);
840 static void arena_bin_run_promote(arena_t *arena, arena_bin_t *bin,
841 arena_run_t *run, size_t size);
842 static void arena_bin_run_demote(arena_t *arena, arena_bin_t *bin,
843 arena_run_t *run, size_t size);
844 static arena_run_t *arena_run_alloc(arena_t *arena, bool large, size_t size);
845 static void arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
846 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin,
848 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin,
850 static void *arena_malloc(arena_t *arena, size_t size);
851 static size_t arena_salloc(const void *ptr);
852 static void *arena_ralloc(void *ptr, size_t size, size_t oldsize);
853 static void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
854 static bool arena_new(arena_t *arena);
855 static arena_t *arenas_extend(unsigned ind);
856 static void *huge_malloc(size_t size);
857 static void *huge_ralloc(void *ptr, size_t size, size_t oldsize);
858 static void huge_dalloc(void *ptr);
859 static void *imalloc(size_t size);
860 static void *ipalloc(size_t alignment, size_t size);
861 static void *icalloc(size_t size);
862 static size_t isalloc(const void *ptr);
863 static void *iralloc(void *ptr, size_t size);
864 static void idalloc(void *ptr);
865 static void malloc_print_stats(void);
866 static bool malloc_init_hard(void);
869 * End function prototypes.
871 /******************************************************************************/
877 malloc_mutex_init(malloc_mutex_t *a_mutex)
879 static const spinlock_t lock = _SPINLOCK_INITIALIZER;
881 a_mutex->lock = lock;
885 malloc_mutex_lock(malloc_mutex_t *a_mutex)
889 _SPINLOCK(&a_mutex->lock);
893 malloc_mutex_unlock(malloc_mutex_t *a_mutex)
897 _SPINUNLOCK(&a_mutex->lock);
903 /******************************************************************************/
905 * Begin Utility functions/macros.
908 /* Return the chunk address for allocation address a. */
909 #define CHUNK_ADDR2BASE(a) \
910 ((void *)((uintptr_t)(a) & ~chunk_size_mask))
912 /* Return the chunk offset of address a. */
913 #define CHUNK_ADDR2OFFSET(a) \
914 ((size_t)((uintptr_t)(a) & chunk_size_mask))
916 /* Return the smallest chunk multiple that is >= s. */
917 #define CHUNK_CEILING(s) \
918 (((s) + chunk_size_mask) & ~chunk_size_mask)
920 /* Return the smallest cacheline multiple that is >= s. */
921 #define CACHELINE_CEILING(s) \
922 (((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
924 /* Return the smallest quantum multiple that is >= a. */
925 #define QUANTUM_CEILING(a) \
926 (((a) + quantum_mask) & ~quantum_mask)
928 /* Compute the smallest power of 2 that is >= x. */
939 #if (SIZEOF_PTR == 8)
947 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
950 _write(STDERR_FILENO, p1, strlen(p1));
951 _write(STDERR_FILENO, p2, strlen(p2));
952 _write(STDERR_FILENO, p3, strlen(p3));
953 _write(STDERR_FILENO, p4, strlen(p4));
956 void (*_malloc_message)(const char *p1, const char *p2, const char *p3,
957 const char *p4) = wrtmessage;
960 * Print to stderr in such a way as to (hopefully) avoid memory allocation.
963 malloc_printf(const char *format, ...)
968 va_start(ap, format);
969 vsnprintf(buf, sizeof(buf), format, ap);
971 _malloc_message(buf, "", "", "");
974 /******************************************************************************/
977 base_alloc(size_t size)
982 /* Round size up to nearest multiple of the cacheline size. */
983 csize = CACHELINE_CEILING(size);
985 malloc_mutex_lock(&base_mtx);
987 /* Make sure there's enough space for the allocation. */
988 if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
991 assert(csize <= chunk_size);
993 tchunk = chunk_alloc(chunk_size);
994 if (tchunk == NULL) {
999 base_next_addr = (void *)base_chunk;
1000 base_past_addr = (void *)((uintptr_t)base_chunk + chunk_size);
1002 base_total += chunk_size;
1007 ret = base_next_addr;
1008 base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1011 malloc_mutex_unlock(&base_mtx);
1015 static chunk_node_t *
1016 base_chunk_node_alloc(void)
1020 malloc_mutex_lock(&base_mtx);
1021 if (base_chunk_nodes != NULL) {
1022 ret = base_chunk_nodes;
1023 base_chunk_nodes = *(chunk_node_t **)ret;
1024 malloc_mutex_unlock(&base_mtx);
1026 malloc_mutex_unlock(&base_mtx);
1027 ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
1034 base_chunk_node_dealloc(chunk_node_t *node)
1037 malloc_mutex_lock(&base_mtx);
1038 *(chunk_node_t **)node = base_chunk_nodes;
1039 base_chunk_nodes = node;
1040 malloc_mutex_unlock(&base_mtx);
1043 /******************************************************************************/
1047 stats_print(arena_t *arena)
1052 malloc_printf("allocated: %zu\n", arena->stats.allocated);
1054 malloc_printf("calls:\n");
1055 malloc_printf(" %12s %12s %12s\n", "nmalloc","ndalloc", "nmadvise");
1056 malloc_printf(" %12llu %12llu %12llu\n",
1057 arena->stats.nmalloc, arena->stats.ndalloc, arena->stats.nmadvise);
1059 malloc_printf("large requests: %llu\n", arena->stats.large_nrequests);
1061 malloc_printf("bins:\n");
1062 malloc_printf("%13s %1s %4s %5s %6s %9s %5s %6s %7s %6s %6s\n",
1063 "bin", "", "size", "nregs", "run_sz", "nrequests", "nruns",
1064 "hiruns", "curruns", "npromo", "ndemo");
1065 for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
1066 if (arena->bins[i].stats.nrequests == 0) {
1067 if (gap_start == -1)
1070 if (gap_start != -1) {
1071 if (i > gap_start + 1) {
1072 /* Gap of more than one size class. */
1073 malloc_printf("[%u..%u]\n",
1076 /* Gap of one size class. */
1077 malloc_printf("[%u]\n", gap_start);
1082 "%13u %1s %4u %5u %6u %9llu %5llu"
1083 " %6lu %7lu %6llu %6llu\n",
1085 i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1086 arena->bins[i].reg_size,
1087 arena->bins[i].nregs,
1088 arena->bins[i].run_size,
1089 arena->bins[i].stats.nrequests,
1090 arena->bins[i].stats.nruns,
1091 arena->bins[i].stats.highruns,
1092 arena->bins[i].stats.curruns,
1093 arena->bins[i].stats.npromote,
1094 arena->bins[i].stats.ndemote);
1097 if (gap_start != -1) {
1098 if (i > gap_start + 1) {
1099 /* Gap of more than one size class. */
1100 malloc_printf("[%u..%u]\n", gap_start, i - 1);
1102 /* Gap of one size class. */
1103 malloc_printf("[%u]\n", gap_start);
1110 * End Utility functions/macros.
1112 /******************************************************************************/
1114 * Begin chunk management functions.
1118 chunk_comp(chunk_node_t *a, chunk_node_t *b)
1124 if ((uintptr_t)a->chunk < (uintptr_t)b->chunk)
1126 else if (a->chunk == b->chunk)
1132 /* Generate red-black tree code for chunks. */
1133 RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp);
1136 pages_map(void *addr, size_t size)
1141 * We don't use MAP_FIXED here, because it can cause the *replacement*
1142 * of existing mappings, and we only want to create new mappings.
1144 ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
1146 assert(ret != NULL);
1148 if (ret == MAP_FAILED)
1150 else if (addr != NULL && ret != addr) {
1152 * We succeeded in mapping memory, but not in the right place.
1154 if (munmap(ret, size) == -1) {
1155 char buf[STRERROR_BUF];
1157 strerror_r(errno, buf, sizeof(buf));
1158 malloc_printf("%s: (malloc) Error in munmap(): %s\n",
1159 _getprogname(), buf);
1166 assert(ret == NULL || (addr == NULL && ret != addr)
1167 || (addr != NULL && ret == addr));
1172 pages_unmap(void *addr, size_t size)
1175 if (munmap(addr, size) == -1) {
1176 char buf[STRERROR_BUF];
1178 strerror_r(errno, buf, sizeof(buf));
1179 malloc_printf("%s: (malloc) Error in munmap(): %s\n",
1180 _getprogname(), buf);
1187 chunk_alloc(size_t size)
1190 chunk_node_t *tchunk, *delchunk;
1193 assert(size % chunk_size == 0);
1195 malloc_mutex_lock(&chunks_mtx);
1197 if (size == chunk_size) {
1199 * Check for address ranges that were previously chunks and try
1203 tchunk = RB_MIN(chunk_tree_s, &old_chunks);
1204 while (tchunk != NULL) {
1205 /* Found an address range. Try to recycle it. */
1207 chunk = tchunk->chunk;
1209 tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1211 /* Remove delchunk from the tree. */
1212 RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1213 base_chunk_node_dealloc(delchunk);
1216 if ((uintptr_t)chunk >= (uintptr_t)brk_base
1217 && (uintptr_t)chunk < (uintptr_t)brk_max) {
1218 /* Re-use a previously freed brk chunk. */
1223 if ((ret = pages_map(chunk, size)) != NULL) {
1232 * Try to create allocations in brk, in order to make full use of
1233 * limited address space.
1235 if (brk_prev != (void *)-1) {
1240 * The loop is necessary to recover from races with other
1241 * threads that are using brk for something other than malloc.
1244 /* Get the current end of brk. */
1248 * Calculate how much padding is necessary to
1249 * chunk-align the end of brk.
1251 incr = (intptr_t)size
1252 - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1256 ret = (void *)(intptr_t)brk_cur + incr;
1260 brk_prev = sbrk(incr);
1261 if (brk_prev == brk_cur) {
1263 brk_max = (void *)(intptr_t)ret + size;
1266 } while (brk_prev != (void *)-1);
1271 * Try to over-allocate, but allow the OS to place the allocation
1272 * anywhere. Beware of size_t wrap-around.
1274 if (size + chunk_size > size) {
1275 if ((ret = pages_map(NULL, size + chunk_size)) != NULL) {
1276 size_t offset = CHUNK_ADDR2OFFSET(ret);
1279 * Success. Clean up unneeded leading/trailing space.
1282 /* Leading space. */
1283 pages_unmap(ret, chunk_size - offset);
1285 ret = (void *)((uintptr_t)ret + (chunk_size -
1288 /* Trailing space. */
1289 pages_unmap((void *)((uintptr_t)ret + size),
1292 /* Trailing space only. */
1293 pages_unmap((void *)((uintptr_t)ret + size),
1300 /* All strategies for allocation failed. */
1305 stats_chunks.nchunks += (size / chunk_size);
1306 stats_chunks.curchunks += (size / chunk_size);
1308 if (stats_chunks.curchunks > stats_chunks.highchunks)
1309 stats_chunks.highchunks = stats_chunks.curchunks;
1311 malloc_mutex_unlock(&chunks_mtx);
1313 assert(CHUNK_ADDR2BASE(ret) == ret);
1318 chunk_dealloc(void *chunk, size_t size)
1323 assert(chunk != NULL);
1324 assert(CHUNK_ADDR2BASE(chunk) == chunk);
1326 assert(size % chunk_size == 0);
1328 malloc_mutex_lock(&chunks_mtx);
1331 if ((uintptr_t)chunk >= (uintptr_t)brk_base
1332 && (uintptr_t)chunk < (uintptr_t)brk_max) {
1335 /* Get the current end of brk. */
1339 * Try to shrink the data segment if this chunk is at the end
1340 * of the data segment. The sbrk() call here is subject to a
1341 * race condition with threads that use brk(2) or sbrk(2)
1342 * directly, but the alternative would be to leak memory for
1343 * the sake of poorly designed multi-threaded programs.
1345 if (brk_cur == brk_max
1346 && (void *)(uintptr_t)chunk + size == brk_max
1347 && sbrk(-(intptr_t)size) == brk_max) {
1348 if (brk_prev == brk_max) {
1350 brk_prev = (void *)(intptr_t)brk_max
1356 madvise(chunk, size, MADV_FREE);
1359 pages_unmap(chunk, size);
1362 * Iteratively create records of each chunk-sized memory region that
1363 * 'chunk' is comprised of, so that the address range can be recycled
1364 * if memory usage increases later on.
1366 for (offset = 0; offset < size; offset += chunk_size) {
1367 node = base_chunk_node_alloc();
1371 node->chunk = (void *)((uintptr_t)chunk + (uintptr_t)offset);
1372 node->size = chunk_size;
1373 RB_INSERT(chunk_tree_s, &old_chunks, node);
1380 stats_chunks.curchunks -= (size / chunk_size);
1382 malloc_mutex_unlock(&chunks_mtx);
1386 * End chunk management functions.
1388 /******************************************************************************/
1394 * Choose an arena based on a per-thread value (fast-path code, calls slow-path
1395 * code if necessary.
1397 static inline arena_t *
1403 * We can only use TLS if this is a PIC library, since for the static
1404 * library version, libc's malloc is used by TLS allocation, which
1405 * introduces a bootstrapping issue.
1408 if (__isthreaded == false) {
1410 * Avoid the overhead of TLS for single-threaded operation. If the
1411 * app switches to threaded mode, the initial thread may end up
1412 * being assigned to some other arena, but this one-time switch
1413 * shouldn't cause significant issues.
1420 ret = choose_arena_hard();
1426 * Hash _pthread_self() to one of the arenas. There is a prime
1427 * number of arenas, so this has a reasonable chance of
1428 * working. Even so, the hashing can be easily thwarted by
1429 * inconvenient _pthread_self() values. Without specific
1430 * knowledge of how _pthread_self() calculates values, we can't
1431 * easily do much better than this.
1433 ind = (unsigned long) _pthread_self() % narenas;
1436 * Optimistially assume that arenas[ind] has been initialized.
1437 * At worst, we find out that some other thread has already
1438 * done so, after acquiring the lock in preparation. Note that
1439 * this lazy locking also has the effect of lazily forcing
1440 * cache coherency; without the lock acquisition, there's no
1441 * guarantee that modification of arenas[ind] by another thread
1442 * would be seen on this CPU for an arbitrary amount of time.
1444 * In general, this approach to modifying a synchronized value
1445 * isn't a good idea, but in this case we only ever modify the
1446 * value once, so things work out well.
1451 * Avoid races with another thread that may have already
1452 * initialized arenas[ind].
1454 malloc_mutex_lock(&arenas_mtx);
1455 if (arenas[ind] == NULL)
1456 ret = arenas_extend((unsigned)ind);
1459 malloc_mutex_unlock(&arenas_mtx);
1465 assert(ret != NULL);
1471 * Choose an arena based on a per-thread value (slow-path code only, called
1472 * only by choose_arena()).
1475 choose_arena_hard(void)
1479 assert(__isthreaded);
1481 /* Assign one of the arenas to this thread, in a round-robin fashion. */
1482 malloc_mutex_lock(&arenas_mtx);
1483 ret = arenas[next_arena];
1485 ret = arenas_extend(next_arena);
1488 * Make sure that this function never returns NULL, so that
1489 * choose_arena() doesn't have to check for a NULL return
1494 next_arena = (next_arena + 1) % narenas;
1495 malloc_mutex_unlock(&arenas_mtx);
1503 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
1509 if ((uintptr_t)a < (uintptr_t)b)
1517 /* Generate red-black tree code for arena chunks. */
1518 RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp);
1520 static inline void *
1521 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
1524 unsigned i, mask, bit, regind;
1526 assert(run->magic == ARENA_RUN_MAGIC);
1528 for (i = run->regs_minelm; i < REGS_MASK_NELMS; i++) {
1529 mask = run->regs_mask[i];
1531 /* Usable allocation found. */
1532 bit = ffs(mask) - 1;
1534 regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1535 ret = (void *)&((char *)run)[bin->reg0_offset
1536 + (bin->reg_size * regind)];
1540 run->regs_mask[i] = mask;
1545 * Make a note that nothing before this element
1546 * contains a free region.
1548 run->regs_minelm = i + 1;
1557 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
1559 unsigned diff, regind, elm, bit;
1561 assert(run->magic == ARENA_RUN_MAGIC);
1564 * Avoid doing division with a variable divisor if possible. This
1565 * single operation can reduce allocator throughput by around 20%!
1567 #define POW2_CASE(p) \
1569 regind = diff >> (p); \
1571 #define QUANTUM_CASE(n) \
1572 case ((n) << QUANTUM_2POW_MIN): \
1573 regind = diff / ((n) << QUANTUM_2POW_MIN); \
1577 * These assertions make sure that the switch statement matches
1578 * compile-time configuration.
1580 assert(tiny_min_2pow >= 1);
1581 assert(QUANTUM_2POW_MIN >= 3 && QUANTUM_2POW_MIN <= 4);
1582 assert(SMALL_MAX_2POW_DEFAULT == 9);
1584 diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
1588 #if (QUANTUM_2POW_MIN > 3)
1626 POW2_CASE(12) /* Handle up to 8 kB pages. */
1628 regind = diff / size;
1632 assert(regind < bin->nregs);
1634 elm = regind >> (SIZEOF_INT_2POW + 3);
1635 if (elm < run->regs_minelm)
1636 run->regs_minelm = elm;
1637 bit = regind - (elm << (SIZEOF_INT_2POW + 3));
1638 assert((run->regs_mask[elm] & (1 << bit)) == 0);
1639 run->regs_mask[elm] |= (1 << bit);
1643 arena_run_split(arena_t *arena, arena_run_t *run, bool large, size_t size)
1645 arena_chunk_t *chunk;
1646 unsigned run_ind, map_offset, total_pages, need_pages;
1647 unsigned i, log2_run_pages, run_pages;
1649 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1650 run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1652 assert(chunk->map[run_ind].free);
1653 total_pages = chunk->map[run_ind].npages;
1654 need_pages = (size >> pagesize_2pow);
1656 assert(chunk->map[run_ind].free);
1657 assert(chunk->map[run_ind].large == false);
1658 assert(chunk->map[run_ind].npages == total_pages);
1660 /* Split enough pages from the front of run to fit allocation size. */
1661 map_offset = run_ind;
1662 for (i = 0; i < need_pages; i++) {
1663 chunk->map[map_offset + i].free = false;
1664 chunk->map[map_offset + i].large = large;
1665 chunk->map[map_offset + i].npages = need_pages;
1666 chunk->map[map_offset + i].pos = i;
1669 /* Update map for trailing pages. */
1670 map_offset += need_pages;
1671 while (map_offset < run_ind + total_pages) {
1672 log2_run_pages = ffs(map_offset) - 1;
1673 run_pages = (1 << log2_run_pages);
1675 chunk->map[map_offset].free = true;
1676 chunk->map[map_offset].large = false;
1677 chunk->map[map_offset].npages = run_pages;
1679 chunk->nfree_runs[log2_run_pages]++;
1681 map_offset += run_pages;
1684 chunk->pages_used += (size >> pagesize_2pow);
1687 static arena_chunk_t *
1688 arena_chunk_alloc(arena_t *arena)
1690 arena_chunk_t *chunk;
1691 unsigned i, j, header_npages, pow2_header_npages, map_offset;
1692 unsigned log2_run_pages, run_pages;
1695 chunk = (arena_chunk_t *)chunk_alloc(chunk_size);
1699 chunk->arena = arena;
1701 RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1704 * Claim that no pages are in use, since the header is merely overhead.
1706 chunk->pages_used = 0;
1708 memset(&chunk->nfree_runs, 0, sizeof(chunk->nfree_runs));
1710 header_size = (size_t)((uintptr_t)&chunk->map[arena_chunk_maplen]
1711 - (uintptr_t)chunk);
1712 if (header_size % pagesize != 0) {
1713 /* Round up to the nearest page boundary. */
1714 header_size += pagesize - (header_size % pagesize);
1717 header_npages = header_size >> pagesize_2pow;
1718 pow2_header_npages = pow2_ceil(header_npages);
1721 * Iteratively mark runs as in use, until we've spoken for the entire
1725 for (i = 0; header_npages > 0; i++) {
1726 if ((pow2_header_npages >> i) <= header_npages) {
1727 for (j = 0; j < (pow2_header_npages >> i); j++) {
1728 chunk->map[map_offset + j].free = false;
1729 chunk->map[map_offset + j].large = false;
1730 chunk->map[map_offset + j].npages =
1731 (pow2_header_npages >> i);
1732 chunk->map[map_offset + j].pos = j;
1734 header_npages -= (pow2_header_npages >> i);
1735 map_offset += (pow2_header_npages >> i);
1740 * Finish initializing map. The chunk header takes up some space at
1741 * the beginning of the chunk, which we just took care of by
1742 * "allocating" the leading pages.
1744 while (map_offset < (chunk_size >> pagesize_2pow)) {
1745 log2_run_pages = ffs(map_offset) - 1;
1746 run_pages = (1 << log2_run_pages);
1748 chunk->map[map_offset].free = true;
1749 chunk->map[map_offset].large = false;
1750 chunk->map[map_offset].npages = run_pages;
1752 chunk->nfree_runs[log2_run_pages]++;
1754 map_offset += run_pages;
1761 arena_chunk_dealloc(arena_chunk_t *chunk)
1764 RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
1766 chunk_dealloc((void *)chunk, chunk_size);
1770 arena_bin_run_promote(arena_t *arena, arena_bin_t *bin, arena_run_t *run,
1774 assert(bin == run->bin);
1777 assert(run->free_min > run->nfree);
1778 assert(run->quartile < RUN_Q100);
1781 bin->stats.npromote++;
1785 switch (run->quartile) {
1790 qr_before_insert((arena_run_t *)&bin->runs0, run, link);
1791 run->free_max = bin->nregs - 1;
1792 run->free_min = (bin->nregs >> 1) + 1;
1793 assert(run->nfree <= run->free_max);
1794 assert(run->nfree >= run->free_min);
1797 qr_remove(run, link);
1798 qr_before_insert((arena_run_t *)&bin->runs25, run,
1800 run->free_max = ((bin->nregs >> 2) * 3) - 1;
1801 run->free_min = (bin->nregs >> 2) + 1;
1802 assert(run->nfree <= run->free_max);
1803 assert(run->nfree >= run->free_min);
1806 qr_remove(run, link);
1807 qr_before_insert((arena_run_t *)&bin->runs50, run,
1809 run->free_max = (bin->nregs >> 1) - 1;
1811 assert(run->nfree <= run->free_max);
1812 assert(run->nfree >= run->free_min);
1816 * Skip RUN_Q75 during promotion from RUN_Q50.
1817 * Separate handling of RUN_Q75 and RUN_Q100 allows us
1818 * to keep completely full runs in RUN_Q100, thus
1819 * guaranteeing that runs in RUN_Q75 are only mostly
1820 * full. This provides a method for avoiding a linear
1821 * search for non-full runs, which avoids some
1822 * pathological edge cases.
1827 qr_remove(run, link);
1828 assert(bin->runcur == run);
1832 assert(run->nfree <= run->free_max);
1833 assert(run->nfree >= run->free_min);
1842 arena_bin_run_demote(arena_t *arena, arena_bin_t *bin, arena_run_t *run,
1846 assert(bin == run->bin);
1849 assert(run->free_max < run->nfree);
1850 assert(run->quartile > RUN_QINIT);
1853 bin->stats.ndemote++;
1857 switch (run->quartile) {
1859 qr_remove(run, link);
1861 bin->stats.curruns--;
1863 if (bin->runcur == run)
1868 arena_run_dalloc(arena, run, bin->run_size);
1871 qr_remove(run, link);
1872 qr_before_insert((arena_run_t *)&bin->runs0, run, link);
1873 run->free_max = bin->nregs - 1;
1874 run->free_min = (bin->nregs >> 1) + 1;
1875 assert(run->nfree <= run->free_max);
1876 assert(run->nfree >= run->free_min);
1879 qr_remove(run, link);
1880 qr_before_insert((arena_run_t *)&bin->runs25, run,
1882 run->free_max = ((bin->nregs >> 2) * 3) - 1;
1883 run->free_min = (bin->nregs >> 2) + 1;
1884 assert(run->nfree <= run->free_max);
1885 assert(run->nfree >= run->free_min);
1888 qr_remove(run, link);
1889 qr_before_insert((arena_run_t *)&bin->runs50, run,
1891 run->free_max = (bin->nregs >> 1) - 1;
1893 assert(run->nfree <= run->free_max);
1894 assert(run->nfree >= run->free_min);
1897 qr_before_insert((arena_run_t *)&bin->runs75, run,
1899 run->free_max = (bin->nregs >> 2) - 1;
1901 assert(run->nfree <= run->free_max);
1902 assert(run->nfree >= run->free_min);
1911 static arena_run_t *
1912 arena_run_alloc(arena_t *arena, bool large, size_t size)
1915 unsigned min_ind, i, j;
1916 arena_chunk_t *chunk;
1921 assert(size <= arena_maxclass);
1930 * Search through arena's chunks in address order for a run that is
1931 * large enough. Look for a precise fit, but do not pass up a chunk
1932 * that has a run which is large enough to split.
1934 min_ind = ffs(size >> pagesize_2pow) - 1;
1935 RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
1937 i < (opt_chunk_2pow - pagesize_2pow);
1939 if (chunk->nfree_runs[i] > 0) {
1940 arena_chunk_map_t *map = chunk->map;
1942 /* Scan chunk's map for free run. */
1944 j < arena_chunk_maplen;
1945 j += map[j].npages) {
1947 && map[j].npages == (1 << i))
1948 {/*<----------------------------*/
1949 run = (arena_run_t *)&((char *)chunk)[j
1952 assert(chunk->nfree_runs[i] > 0);
1953 chunk->nfree_runs[i]--;
1955 /* Update page map. */
1956 arena_run_split(arena, run, large, size);
1959 }/*---------------------------->*/
1967 /* No usable runs. Allocate a new chunk, then try again. */
1968 if (arena_chunk_alloc(arena) == NULL)
1974 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
1976 arena_chunk_t *chunk;
1977 unsigned run_ind, buddy_ind, base_run_ind, run_pages, log2_run_pages;
1979 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1980 run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1982 run_pages = (size >> pagesize_2pow);
1983 log2_run_pages = ffs(run_pages) - 1;
1984 assert(run_pages > 0);
1986 /* Subtract pages from count of pages used in chunk. */
1987 chunk->pages_used -= run_pages;
1989 /* Mark run as deallocated. */
1990 chunk->map[run_ind].free = true;
1991 chunk->map[run_ind].large = false;
1992 chunk->map[run_ind].npages = run_pages;
1995 * Tell the kernel that we don't need the data in this run, but only if
1996 * requested via runtime configuration.
1999 madvise(run, size, MADV_FREE);
2001 arena->stats.nmadvise += (size >> pagesize_2pow);
2006 * Iteratively coalesce with buddies. Conceptually, the buddy scheme
2007 * induces a tree on the set of pages. If we know the number of pages
2008 * in the subtree rooted at the current node, we can quickly determine
2009 * whether a run is the left or right buddy, and then calculate the
2013 (run_pages = (1 << log2_run_pages)) < arena_chunk_maplen;
2015 if (((run_ind >> log2_run_pages) & 1) == 0) {
2016 /* Current run precedes its buddy. */
2017 buddy_ind = run_ind + run_pages;
2018 base_run_ind = run_ind;
2020 /* Current run follows its buddy. */
2021 buddy_ind = run_ind - run_pages;
2022 base_run_ind = buddy_ind;
2025 if (chunk->map[buddy_ind].free == false
2026 || chunk->map[buddy_ind].npages != run_pages)
2029 assert(chunk->nfree_runs[log2_run_pages] > 0);
2030 chunk->nfree_runs[log2_run_pages]--;
2033 chunk->map[base_run_ind].npages = (run_pages << 1);
2035 /* Update run_ind to be the beginning of the coalesced run. */
2036 run_ind = base_run_ind;
2039 chunk->nfree_runs[log2_run_pages]++;
2041 /* Free pages, to the extent possible. */
2042 if (chunk->pages_used == 0) {
2043 /* This chunk is completely unused now, so deallocate it. */
2044 arena_chunk_dealloc(chunk);
2048 static arena_run_t *
2049 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin, size_t size)
2052 unsigned i, remainder;
2054 /* Look for a usable run. */
2055 if ((run = qr_next((arena_run_t *)&bin->runs50, link))
2056 != (arena_run_t *)&bin->runs50
2057 || (run = qr_next((arena_run_t *)&bin->runs25, link))
2058 != (arena_run_t *)&bin->runs25
2059 || (run = qr_next((arena_run_t *)&bin->runs0, link))
2060 != (arena_run_t *)&bin->runs0
2061 || (run = qr_next((arena_run_t *)&bin->runs75, link))
2062 != (arena_run_t *)&bin->runs75) {
2063 /* run is guaranteed to have available space. */
2064 qr_remove(run, link);
2067 /* No existing runs have any space available. */
2069 /* Allocate a new run. */
2070 run = arena_run_alloc(arena, false, bin->run_size);
2074 /* Initialize run internals. */
2078 for (i = 0; i < (bin->nregs >> (SIZEOF_INT_2POW + 3)); i++)
2079 run->regs_mask[i] = UINT_MAX;
2080 remainder = bin->nregs % (1 << (SIZEOF_INT_2POW + 3));
2081 if (remainder != 0) {
2082 run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
2086 for (; i < REGS_MASK_NELMS; i++)
2087 run->regs_mask[i] = 0;
2089 run->regs_minelm = 0;
2091 run->nfree = bin->nregs;
2092 run->quartile = RUN_QINIT;
2093 run->free_max = bin->nregs;
2094 run->free_min = ((bin->nregs >> 2) * 3) + 1;
2096 run->magic = ARENA_RUN_MAGIC;
2101 bin->stats.curruns++;
2102 if (bin->stats.curruns > bin->stats.highruns)
2103 bin->stats.highruns = bin->stats.curruns;
2108 /* bin->runcur must have space available before this function is called. */
2109 static inline void *
2110 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run,
2115 assert(run->magic == ARENA_RUN_MAGIC);
2116 assert(run->nfree > 0);
2118 ret = arena_run_reg_alloc(run, bin);
2119 assert(ret != NULL);
2121 if (run->nfree < run->free_min) {
2122 /* Promote run to higher fullness quartile. */
2123 arena_bin_run_promote(arena, bin, run, size);
2129 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2131 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin, size_t size)
2134 assert(bin->runcur == NULL || bin->runcur->quartile == RUN_Q100);
2136 bin->runcur = arena_bin_nonfull_run_get(arena, bin, size);
2137 if (bin->runcur == NULL)
2139 assert(bin->runcur->magic == ARENA_RUN_MAGIC);
2140 assert(bin->runcur->nfree > 0);
2142 return (arena_bin_malloc_easy(arena, bin, bin->runcur, size));
2146 arena_malloc(arena_t *arena, size_t size)
2150 assert(arena != NULL);
2151 assert(arena->magic == ARENA_MAGIC);
2153 assert(QUANTUM_CEILING(size) <= arena_maxclass);
2155 if (size <= bin_maxclass) {
2159 /* Small allocation. */
2161 if (size < small_min) {
2163 size = pow2_ceil(size);
2164 bin = &arena->bins[ffs(size >> (tiny_min_2pow + 1))];
2167 * Bin calculation is always correct, but we may need to
2168 * fix size for the purposes of stats accuracy.
2170 if (size < (1 << tiny_min_2pow))
2171 size = (1 << tiny_min_2pow);
2173 } else if (size <= small_max) {
2174 /* Quantum-spaced. */
2175 size = QUANTUM_CEILING(size);
2176 bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
2180 size = pow2_ceil(size);
2181 bin = &arena->bins[ntbins + nqbins
2182 + (ffs(size >> opt_small_max_2pow) - 2)];
2184 assert(size == bin->reg_size);
2186 malloc_mutex_lock(&arena->mtx);
2187 if ((run = bin->runcur) != NULL)
2188 ret = arena_bin_malloc_easy(arena, bin, run, size);
2190 ret = arena_bin_malloc_hard(arena, bin, size);
2193 bin->stats.nrequests++;
2196 /* Medium allocation. */
2197 size = pow2_ceil(size);
2198 malloc_mutex_lock(&arena->mtx);
2199 ret = (void *)arena_run_alloc(arena, true, size);
2201 arena->stats.large_nrequests++;
2206 arena->stats.nmalloc++;
2208 arena->stats.allocated += size;
2211 malloc_mutex_unlock(&arena->mtx);
2213 if (opt_junk && ret != NULL)
2214 memset(ret, 0xa5, size);
2215 else if (opt_zero && ret != NULL)
2216 memset(ret, 0, size);
2220 /* Return the size of the allocation pointed to by ptr. */
2222 arena_salloc(const void *ptr)
2225 arena_chunk_t *chunk;
2227 arena_chunk_map_t mapelm;
2229 assert(ptr != NULL);
2230 assert(ptr != &nil);
2231 assert(CHUNK_ADDR2BASE(ptr) != ptr);
2234 * No arena data structures that we query here can change in a way that
2235 * affects this function, so we don't need to lock.
2237 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2238 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
2239 mapelm = chunk->map[pageind];
2240 assert(mapelm.free == false);
2241 if (mapelm.large == false) {
2244 pageind -= mapelm.pos;
2246 run = (arena_run_t *)&((char *)chunk)[pageind << pagesize_2pow];
2247 assert(run->magic == ARENA_RUN_MAGIC);
2248 ret = run->bin->reg_size;
2250 ret = mapelm.npages << pagesize_2pow;
2256 arena_ralloc(void *ptr, size_t size, size_t oldsize)
2260 /* Avoid moving the allocation if the size class would not change. */
2261 if (size < small_min) {
2262 if (oldsize < small_min &&
2263 ffs(pow2_ceil(size) >> (tiny_min_2pow + 1))
2264 == ffs(pow2_ceil(oldsize) >> (tiny_min_2pow + 1)))
2266 } else if (size <= small_max) {
2267 if (oldsize >= small_min && oldsize <= small_max &&
2268 (QUANTUM_CEILING(size) >> opt_quantum_2pow)
2269 == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
2272 if (oldsize > small_max &&
2273 pow2_ceil(size) == pow2_ceil(oldsize))
2278 * If we get here, then size and oldsize are different enough that we
2279 * need to use a different size class. In that case, fall back to
2280 * allocating new space and copying.
2282 ret = arena_malloc(choose_arena(), size);
2287 memcpy(ret, ptr, size);
2289 memcpy(ret, ptr, oldsize);
2293 if (opt_junk && size < oldsize)
2294 memset(&((char *)ptr)[size], 0x5a, oldsize - size);
2295 else if (opt_zero && size > oldsize)
2296 memset(&((char *)ptr)[size], 0, size - oldsize);
2301 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
2304 arena_chunk_map_t mapelm;
2307 assert(arena != NULL);
2308 assert(arena->magic == ARENA_MAGIC);
2309 assert(chunk->arena == arena);
2310 assert(ptr != NULL);
2311 assert(ptr != &nil);
2312 assert(CHUNK_ADDR2BASE(ptr) != ptr);
2314 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
2315 mapelm = chunk->map[pageind];
2316 assert(mapelm.free == false);
2317 if (mapelm.large == false) {
2321 /* Small allocation. */
2323 pageind -= mapelm.pos;
2325 run = (arena_run_t *)&((char *)chunk)[pageind << pagesize_2pow];
2326 assert(run->magic == ARENA_RUN_MAGIC);
2328 size = bin->reg_size;
2331 memset(ptr, 0x5a, size);
2333 malloc_mutex_lock(&arena->mtx);
2334 arena_run_reg_dalloc(run, bin, ptr, size);
2336 if (run->nfree > run->free_max) {
2337 /* Demote run to lower fullness quartile. */
2338 arena_bin_run_demote(arena, bin, run, size);
2341 /* Medium allocation. */
2343 size = mapelm.npages << pagesize_2pow;
2346 memset(ptr, 0x5a, size);
2348 malloc_mutex_lock(&arena->mtx);
2349 arena_run_dalloc(arena, (arena_run_t *)ptr, size);
2353 arena->stats.allocated -= size;
2356 malloc_mutex_unlock(&arena->mtx);
2360 arena_new(arena_t *arena)
2364 size_t pow2_size, run_size;
2366 malloc_mutex_init(&arena->mtx);
2369 memset(&arena->stats, 0, sizeof(arena_stats_t));
2372 /* Initialize chunks. */
2373 RB_INIT(&arena->chunks);
2375 /* Initialize bins. */
2377 /* (2^n)-spaced tiny bins. */
2378 for (i = 0; i < ntbins; i++) {
2379 bin = &arena->bins[i];
2381 qr_new((arena_run_t *)&bin->runs0, link);
2382 qr_new((arena_run_t *)&bin->runs25, link);
2383 qr_new((arena_run_t *)&bin->runs50, link);
2384 qr_new((arena_run_t *)&bin->runs75, link);
2386 bin->reg_size = (1 << (tiny_min_2pow + i));
2389 * Calculate how large of a run to allocate. Make sure that at
2390 * least RUN_MIN_REGS regions fit in the run.
2392 run_size = bin->reg_size << RUN_MIN_REGS_2POW;
2393 if (run_size < pagesize)
2394 run_size = pagesize;
2395 if (run_size > (pagesize << RUN_MAX_PAGES_2POW))
2396 run_size = (pagesize << RUN_MAX_PAGES_2POW);
2397 if (run_size > arena_maxclass)
2398 run_size = arena_maxclass;
2399 bin->run_size = run_size;
2401 assert(run_size >= sizeof(arena_run_t));
2402 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size;
2403 if (bin->nregs > (REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3))) {
2404 /* Take care not to overflow regs_mask. */
2405 bin->nregs = REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3);
2407 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size);
2410 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2414 /* Quantum-spaced bins. */
2415 for (; i < ntbins + nqbins; i++) {
2416 bin = &arena->bins[i];
2418 qr_new((arena_run_t *)&bin->runs0, link);
2419 qr_new((arena_run_t *)&bin->runs25, link);
2420 qr_new((arena_run_t *)&bin->runs50, link);
2421 qr_new((arena_run_t *)&bin->runs75, link);
2423 bin->reg_size = quantum * (i - ntbins + 1);
2426 * Calculate how large of a run to allocate. Make sure that at
2427 * least RUN_MIN_REGS regions fit in the run.
2429 pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2430 run_size = (pow2_size << RUN_MIN_REGS_2POW);
2431 if (run_size < pagesize)
2432 run_size = pagesize;
2433 if (run_size > (pagesize << RUN_MAX_PAGES_2POW))
2434 run_size = (pagesize << RUN_MAX_PAGES_2POW);
2435 if (run_size > arena_maxclass)
2436 run_size = arena_maxclass;
2437 bin->run_size = run_size;
2439 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size;
2440 assert(bin->nregs <= REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3));
2441 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size);
2444 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2448 /* (2^n)-spaced sub-page bins. */
2449 for (; i < ntbins + nqbins + nsbins; i++) {
2450 bin = &arena->bins[i];
2452 qr_new((arena_run_t *)&bin->runs0, link);
2453 qr_new((arena_run_t *)&bin->runs25, link);
2454 qr_new((arena_run_t *)&bin->runs50, link);
2455 qr_new((arena_run_t *)&bin->runs75, link);
2457 bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
2460 * Calculate how large of a run to allocate. Make sure that at
2461 * least RUN_MIN_REGS regions fit in the run.
2463 run_size = bin->reg_size << RUN_MIN_REGS_2POW;
2464 if (run_size < pagesize)
2465 run_size = pagesize;
2466 if (run_size > (pagesize << RUN_MAX_PAGES_2POW))
2467 run_size = (pagesize << RUN_MAX_PAGES_2POW);
2468 if (run_size > arena_maxclass)
2469 run_size = arena_maxclass;
2470 bin->run_size = run_size;
2472 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size;
2473 assert(bin->nregs <= REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3));
2474 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size);
2477 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2482 arena->magic = ARENA_MAGIC;
2488 /* Create a new arena and insert it into the arenas array at index ind. */
2490 arenas_extend(unsigned ind)
2494 /* Allocate enough space for trailing bins. */
2495 ret = (arena_t *)base_alloc(sizeof(arena_t)
2496 + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
2497 if (ret != NULL && arena_new(ret) == false) {
2501 /* Only reached if there is an OOM error. */
2504 * OOM here is quite inconvenient to propagate, since dealing with it
2505 * would require a check for failure in the fast path. Instead, punt
2506 * by using arenas[0]. In practice, this is an extremely unlikely
2509 malloc_printf("%s: (malloc) Error initializing arena\n",
2520 /******************************************************************************/
2522 * Begin general internal functions.
2526 huge_malloc(size_t size)
2532 /* Allocate one or more contiguous chunks for this request. */
2534 csize = CHUNK_CEILING(size);
2536 /* size is large enough to cause size_t wrap-around. */
2540 /* Allocate a chunk node with which to track the chunk. */
2541 node = base_chunk_node_alloc();
2545 ret = chunk_alloc(csize);
2547 base_chunk_node_dealloc(node);
2551 /* Insert node into chunks. */
2555 malloc_mutex_lock(&chunks_mtx);
2556 RB_INSERT(chunk_tree_s, &huge, node);
2559 huge_allocated += csize;
2561 malloc_mutex_unlock(&chunks_mtx);
2563 if (opt_junk && ret != NULL)
2564 memset(ret, 0xa5, csize);
2565 else if (opt_zero && ret != NULL)
2566 memset(ret, 0, csize);
2572 huge_ralloc(void *ptr, size_t size, size_t oldsize)
2576 /* Avoid moving the allocation if the size class would not change. */
2577 if (oldsize > arena_maxclass &&
2578 CHUNK_CEILING(size) == CHUNK_CEILING(oldsize))
2582 * If we get here, then size and oldsize are different enough that we
2583 * need to use a different size class. In that case, fall back to
2584 * allocating new space and copying.
2586 ret = huge_malloc(size);
2590 if (CHUNK_ADDR2BASE(ptr) == ptr) {
2591 /* The old allocation is a chunk. */
2593 memcpy(ret, ptr, size);
2595 memcpy(ret, ptr, oldsize);
2597 /* The old allocation is a region. */
2598 assert(oldsize < size);
2599 memcpy(ret, ptr, oldsize);
2606 huge_dalloc(void *ptr)
2611 malloc_mutex_lock(&chunks_mtx);
2613 /* Extract from tree of huge allocations. */
2615 node = RB_FIND(chunk_tree_s, &huge, &key);
2616 assert(node != NULL);
2617 assert(node->chunk == ptr);
2618 RB_REMOVE(chunk_tree_s, &huge, node);
2621 /* Update counters. */
2623 huge_allocated -= node->size;
2626 malloc_mutex_unlock(&chunks_mtx);
2631 memset(node->chunk, 0x5a, node->size);
2633 chunk_dealloc(node->chunk, node->size);
2635 base_chunk_node_dealloc(node);
2639 imalloc(size_t size)
2645 if (size <= arena_maxclass)
2646 ret = arena_malloc(choose_arena(), size);
2648 ret = huge_malloc(size);
2654 ipalloc(size_t alignment, size_t size)
2660 * Round up to the nearest power of two that is >= alignment and
2663 if (size > alignment)
2664 pow2_size = pow2_ceil(size);
2666 pow2_size = alignment;
2667 pow2_size = QUANTUM_CEILING(pow2_size);
2668 if (pow2_size < size) {
2669 /* size_t overflow. */
2673 if (pow2_size <= arena_maxclass)
2674 ret = arena_malloc(choose_arena(), pow2_size);
2676 if (alignment <= chunk_size)
2677 ret = huge_malloc(size);
2679 size_t chunksize, alloc_size, offset;
2683 * This allocation requires alignment that is even
2684 * larger than chunk alignment. This means that
2685 * huge_malloc() isn't good enough.
2687 * Allocate almost twice as many chunks as are demanded
2688 * by the size or alignment, in order to assure the
2689 * alignment can be achieved, then unmap leading and
2693 chunksize = CHUNK_CEILING(size);
2695 if (size >= alignment)
2696 alloc_size = chunksize + alignment - chunk_size;
2698 alloc_size = (alignment << 1) - chunk_size;
2701 * Allocate a chunk node with which to track the chunk.
2703 node = base_chunk_node_alloc();
2707 ret = chunk_alloc(alloc_size);
2709 base_chunk_node_dealloc(node);
2713 offset = (uintptr_t)ret & (alignment - 1);
2714 assert(offset % chunk_size == 0);
2715 assert(offset < alloc_size);
2717 /* Trim trailing space. */
2718 chunk_dealloc((void *)((uintptr_t)ret
2719 + chunksize), alloc_size - chunksize);
2723 /* Trim leading space. */
2724 chunk_dealloc(ret, alignment - offset);
2726 ret = (void *)((uintptr_t)ret + (alignment
2729 trailsize = alloc_size - (alignment - offset)
2731 if (trailsize != 0) {
2732 /* Trim trailing space. */
2733 assert(trailsize < alloc_size);
2734 chunk_dealloc((void *)((uintptr_t)ret
2735 + chunksize), trailsize);
2739 /* Insert node into chunks. */
2741 node->size = chunksize;
2743 malloc_mutex_lock(&chunks_mtx);
2744 RB_INSERT(chunk_tree_s, &huge, node);
2746 huge_allocated += size;
2748 malloc_mutex_unlock(&chunks_mtx);
2751 memset(ret, 0xa5, chunksize);
2753 memset(ret, 0, chunksize);
2757 assert(((uintptr_t)ret & (alignment - 1)) == 0);
2762 icalloc(size_t size)
2766 if (size <= arena_maxclass) {
2767 ret = arena_malloc(choose_arena(), size);
2770 memset(ret, 0, size);
2773 * The virtual memory system provides zero-filled pages, so
2774 * there is no need to do so manually, unless opt_junk is
2775 * enabled, in which case huge_malloc() fills huge allocations
2778 ret = huge_malloc(size);
2783 memset(ret, 0, size);
2785 else if ((uintptr_t)ret >= (uintptr_t)brk_base
2786 && (uintptr_t)ret < (uintptr_t)brk_max) {
2788 * This may be a re-used brk chunk. Therefore, zero
2791 memset(ret, 0, size);
2800 isalloc(const void *ptr)
2803 arena_chunk_t *chunk;
2805 assert(ptr != NULL);
2806 assert(ptr != &nil);
2808 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2811 assert(chunk->arena->magic == ARENA_MAGIC);
2813 ret = arena_salloc(ptr);
2815 chunk_node_t *node, key;
2817 /* Chunk (huge allocation). */
2819 malloc_mutex_lock(&chunks_mtx);
2821 /* Extract from tree of huge allocations. */
2822 key.chunk = (void *)ptr;
2823 node = RB_FIND(chunk_tree_s, &huge, &key);
2824 assert(node != NULL);
2828 malloc_mutex_unlock(&chunks_mtx);
2835 iralloc(void *ptr, size_t size)
2840 assert(ptr != NULL);
2841 assert(ptr != &nil);
2844 oldsize = isalloc(ptr);
2846 if (size <= arena_maxclass)
2847 ret = arena_ralloc(ptr, size, oldsize);
2849 ret = huge_ralloc(ptr, size, oldsize);
2857 arena_chunk_t *chunk;
2859 assert(ptr != NULL);
2860 assert(ptr != &nil);
2862 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2866 malloc_mutex_lock(&chunk->arena->mtx);
2867 chunk->arena->stats.ndalloc++;
2868 malloc_mutex_unlock(&chunk->arena->mtx);
2870 arena_dalloc(chunk->arena, chunk, ptr);
2876 malloc_print_stats(void)
2879 if (opt_print_stats) {
2880 malloc_printf("___ Begin malloc statistics ___\n");
2881 malloc_printf("Number of CPUs: %u\n", ncpus);
2882 malloc_printf("Number of arenas: %u\n", narenas);
2883 malloc_printf("Chunk size: %zu (2^%zu)\n", chunk_size,
2885 malloc_printf("Quantum size: %zu (2^%zu)\n", quantum,
2887 malloc_printf("Max small size: %zu\n", small_max);
2888 malloc_printf("Pointer size: %u\n", sizeof(void *));
2889 malloc_printf("Assertions %s\n",
2900 size_t allocated, total;
2904 /* Calculate and print allocated/total stats. */
2907 for (i = 0, allocated = 0; i < narenas; i++) {
2908 if (arenas[i] != NULL) {
2909 malloc_mutex_lock(&arenas[i]->mtx);
2910 allocated += arenas[i]->stats.allocated;
2911 malloc_mutex_unlock(&arenas[i]->mtx);
2916 malloc_mutex_lock(&chunks_mtx);
2917 allocated += huge_allocated;
2918 total = stats_chunks.curchunks * chunk_size;
2919 malloc_mutex_unlock(&chunks_mtx);
2921 malloc_printf("Allocated: %zu, space used: %zu\n",
2924 /* Print chunk stats. */
2926 chunk_stats_t chunks_stats;
2928 malloc_mutex_lock(&chunks_mtx);
2929 chunks_stats = stats_chunks;
2930 malloc_mutex_unlock(&chunks_mtx);
2932 malloc_printf("\nchunks:\n");
2933 malloc_printf(" %13s%13s%13s\n", "nchunks",
2934 "highchunks", "curchunks");
2935 malloc_printf(" %13llu%13lu%13lu\n",
2936 chunks_stats.nchunks,
2937 chunks_stats.highchunks,
2938 chunks_stats.curchunks);
2941 /* Print chunk stats. */
2942 malloc_printf("\nhuge:\n");
2943 malloc_printf("%12s %12s %12s\n",
2944 "nmalloc", "ndalloc", "allocated");
2945 malloc_printf("%12llu %12llu %12zu\n",
2946 huge_nmalloc, huge_ndalloc, huge_allocated);
2948 /* Print stats for each arena. */
2949 for (i = 0; i < narenas; i++) {
2951 if (arena != NULL) {
2953 "\narenas[%u] statistics:\n", i);
2954 malloc_mutex_lock(&arena->mtx);
2956 malloc_mutex_unlock(&arena->mtx);
2960 #endif /* #ifdef MALLOC_STATS */
2961 malloc_printf("--- End malloc statistics ---\n");
2966 * FreeBSD's pthreads implementation calls malloc(3), so the malloc
2967 * implementation has to take pains to avoid infinite recursion during
2974 if (malloc_initialized == false)
2975 return (malloc_init_hard());
2981 malloc_init_hard(void)
2985 char buf[PATH_MAX + 1];
2988 malloc_mutex_lock(&init_lock);
2989 if (malloc_initialized) {
2991 * Another thread initialized the allocator before this one
2992 * acquired init_lock.
2994 malloc_mutex_unlock(&init_lock);
2998 /* Get number of CPUs. */
3005 len = sizeof(ncpus);
3006 if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
3012 /* Get page size. */
3016 result = sysconf(_SC_PAGESIZE);
3017 assert(result != -1);
3018 pagesize = (unsigned) result;
3021 * We assume that pagesize is a power of 2 when calculating
3024 assert(((result - 1) & result) == 0);
3025 pagesize_2pow = ffs(result) - 1;
3028 for (i = 0; i < 3; i++) {
3029 /* Get runtime configuration. */
3032 if ((linklen = readlink("/etc/malloc.conf", buf,
3033 sizeof(buf) - 1)) != -1) {
3035 * Use the contents of the "/etc/malloc.conf"
3036 * symbolic link's name.
3038 buf[linklen] = '\0';
3041 /* No configuration specified. */
3047 if (issetugid() == 0 && (opts =
3048 getenv("MALLOC_OPTIONS")) != NULL) {
3050 * Do nothing; opts is already initialized to
3051 * the value of the MALLOC_OPTIONS environment
3055 /* No configuration specified. */
3061 if (_malloc_options != NULL) {
3063 * Use options that were compiled into the program.
3065 opts = _malloc_options;
3067 /* No configuration specified. */
3077 for (j = 0; opts[j] != '\0'; j++) {
3099 * Run fullness quartile limits don't have
3100 * enough resolution if there are too few
3101 * regions for the largest bin size classes.
3103 if (opt_chunk_2pow > pagesize_2pow + 4)
3107 if (opt_chunk_2pow < CHUNK_2POW_MAX)
3111 opt_narenas_lshift--;
3114 opt_narenas_lshift++;
3117 opt_print_stats = false;
3120 opt_print_stats = true;
3123 if (opt_quantum_2pow > QUANTUM_2POW_MIN)
3127 if (opt_quantum_2pow < pagesize_2pow - 1)
3131 if (opt_small_max_2pow > QUANTUM_2POW_MIN)
3132 opt_small_max_2pow--;
3135 if (opt_small_max_2pow < pagesize_2pow - 1)
3136 opt_small_max_2pow++;
3151 opt_xmalloc = false;
3163 malloc_printf("%s: (malloc) Unsupported"
3164 " character in malloc options: '%c'\n",
3165 _getprogname(), opts[j]);
3170 /* Take care to call atexit() only once. */
3171 if (opt_print_stats) {
3172 /* Print statistics at exit. */
3173 atexit(malloc_print_stats);
3176 /* Set variables according to the value of opt_small_max_2pow. */
3177 if (opt_small_max_2pow < opt_quantum_2pow)
3178 opt_small_max_2pow = opt_quantum_2pow;
3179 small_max = (1 << opt_small_max_2pow);
3181 /* Set bin-related variables. */
3182 bin_maxclass = (pagesize >> 1);
3183 if (pagesize_2pow > RUN_MIN_REGS_2POW + 1)
3184 tiny_min_2pow = pagesize_2pow - (RUN_MIN_REGS_2POW + 1);
3187 assert(opt_quantum_2pow >= tiny_min_2pow);
3188 ntbins = opt_quantum_2pow - tiny_min_2pow;
3189 assert(ntbins <= opt_quantum_2pow);
3190 nqbins = (small_max >> opt_quantum_2pow);
3191 nsbins = pagesize_2pow - opt_small_max_2pow - 1;
3193 /* Set variables according to the value of opt_quantum_2pow. */
3194 quantum = (1 << opt_quantum_2pow);
3195 quantum_mask = quantum - 1;
3197 small_min = (quantum >> 1) + 1;
3200 assert(small_min <= quantum);
3202 /* Set variables according to the value of opt_chunk_2pow. */
3203 chunk_size = (1 << opt_chunk_2pow);
3204 chunk_size_mask = chunk_size - 1;
3205 arena_chunk_maplen = (1 << (opt_chunk_2pow - pagesize_2pow));
3206 arena_maxclass = (chunk_size >> 1);
3211 memset(&stats_chunks, 0, sizeof(chunk_stats_t));
3214 /* Various sanity checks that regard configuration. */
3215 assert(quantum >= sizeof(void *));
3216 assert(quantum <= pagesize);
3217 assert(chunk_size >= pagesize);
3218 assert(quantum * 4 <= chunk_size);
3220 /* Initialize chunks data. */
3221 malloc_mutex_init(&chunks_mtx);
3225 brk_prev = brk_base;
3233 RB_INIT(&old_chunks);
3235 /* Initialize base allocation data structures. */
3241 * Do special brk allocation here, since the base chunk doesn't really
3242 * need to be chunk-aligned.
3249 /* Get the current end of brk. */
3253 * Calculate how much padding is necessary to
3254 * chunk-align the end of brk. Don't worry about
3255 * brk_cur not being chunk-aligned though.
3257 incr = (intptr_t)chunk_size
3258 - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
3260 brk_prev = sbrk(incr);
3261 if (brk_prev == brk_cur) {
3265 } while (brk_prev != (void *)-1);
3267 base_chunk = brk_cur;
3268 base_next_addr = base_chunk;
3269 base_past_addr = (void *)((uintptr_t)base_chunk + incr);
3272 stats_chunks.nchunks = 1;
3273 stats_chunks.curchunks = 1;
3274 stats_chunks.highchunks = 1;
3279 * The first base chunk will be allocated when needed by base_alloc().
3282 base_next_addr = NULL;
3283 base_past_addr = NULL;
3285 base_chunk_nodes = NULL;
3286 malloc_mutex_init(&base_mtx);
3290 * For SMP systems, create four times as many arenas as there
3291 * are CPUs by default.
3293 opt_narenas_lshift += 2;
3296 /* Determine how many arenas to use. */
3298 if (opt_narenas_lshift > 0) {
3299 if ((narenas << opt_narenas_lshift) > narenas)
3300 narenas <<= opt_narenas_lshift;
3302 * Make sure not to exceed the limits of what base_malloc()
3305 if (narenas * sizeof(arena_t *) > chunk_size)
3306 narenas = chunk_size / sizeof(arena_t *);
3307 } else if (opt_narenas_lshift < 0) {
3308 if ((narenas << opt_narenas_lshift) < narenas)
3309 narenas <<= opt_narenas_lshift;
3310 /* Make sure there is at least one arena. */
3317 static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
3318 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
3319 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
3320 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
3321 223, 227, 229, 233, 239, 241, 251, 257, 263};
3322 unsigned i, nprimes, parenas;
3325 * Pick a prime number of hash arenas that is more than narenas
3326 * so that direct hashing of pthread_self() pointers tends to
3327 * spread allocations evenly among the arenas.
3329 assert((narenas & 1) == 0); /* narenas must be even. */
3330 nprimes = (sizeof(primes) >> SIZEOF_INT_2POW);
3331 parenas = primes[nprimes - 1]; /* In case not enough primes. */
3332 for (i = 1; i < nprimes; i++) {
3333 if (primes[i] > narenas) {
3334 parenas = primes[i];
3346 /* Allocate and initialize arenas. */
3347 arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
3348 if (arenas == NULL) {
3349 malloc_mutex_unlock(&init_lock);
3353 * Zero the array. In practice, this should always be pre-zeroed,
3354 * since it was just mmap()ed, but let's be sure.
3356 memset(arenas, 0, sizeof(arena_t *) * narenas);
3359 * Initialize one arena here. The rest are lazily created in
3360 * arena_choose_hard().
3363 if (arenas[0] == NULL) {
3364 malloc_mutex_unlock(&init_lock);
3368 malloc_mutex_init(&arenas_mtx);
3370 malloc_initialized = true;
3371 malloc_mutex_unlock(&init_lock);
3376 * End general internal functions.
3378 /******************************************************************************/
3380 * Begin malloc(3)-compatible functions.
3388 if (malloc_init()) {
3394 if (opt_sysv == false)
3401 ret = imalloc(size);
3406 malloc_printf("%s: (malloc) Error in malloc(%zu):"
3407 " out of memory\n", _getprogname(), size);
3413 UTRACE(0, size, ret);
3418 posix_memalign(void **memptr, size_t alignment, size_t size)
3426 /* Make sure that alignment is a large enough power of 2. */
3427 if (((alignment - 1) & alignment) != 0
3428 || alignment < sizeof(void *)) {
3430 malloc_printf("%s: (malloc) Error in"
3431 " posix_memalign(%zu, %zu):"
3432 " invalid alignment\n",
3433 _getprogname(), alignment, size);
3441 result = ipalloc(alignment, size);
3444 if (result == NULL) {
3446 malloc_printf("%s: (malloc) Error in"
3447 " posix_memalign(%zu, %zu): out of memory\n",
3448 _getprogname(), alignment, size);
3459 UTRACE(0, size, result);
3464 calloc(size_t num, size_t size)
3469 if (malloc_init()) {
3474 num_size = num * size;
3475 if (num_size == 0) {
3476 if (opt_sysv == false)
3482 * Try to avoid division here. We know that it isn't possible to
3483 * overflow during multiplication if neither operand uses any of the
3484 * most significant half of the bits in a size_t.
3486 } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2)))
3487 && (num_size / size != num)) {
3488 /* size_t overflow. */
3493 ret = icalloc(num_size);
3498 malloc_printf("%s: (malloc) Error in"
3499 " calloc(%zu, %zu): out of memory\n",
3500 _getprogname(), num, size);
3506 UTRACE(0, num_size, ret);
3511 realloc(void *ptr, size_t size)
3516 if (ptr != &nil && ptr != NULL) {
3517 assert(malloc_initialized);
3519 ret = iralloc(ptr, size);
3523 malloc_printf("%s: (malloc) Error in"
3524 " ralloc(%p, %zu): out of memory\n",
3525 _getprogname(), ptr, size);
3534 ret = imalloc(size);
3538 malloc_printf("%s: (malloc) Error in"
3539 " ralloc(%p, %zu): out of memory\n",
3540 _getprogname(), ptr, size);
3547 if (ptr != &nil && ptr != NULL)
3553 UTRACE(ptr, size, ret);
3562 if (ptr != &nil && ptr != NULL) {
3563 assert(malloc_initialized);
3570 * End malloc(3)-compatible functions.
3572 /******************************************************************************/
3574 * Begin non-standard functions.
3578 malloc_usable_size(const void *ptr)
3581 assert(ptr != NULL);
3586 return (isalloc(ptr));
3590 * End non-standard functions.
3592 /******************************************************************************/
3594 * Begin library-private functions, used by threading libraries for protection
3595 * of malloc during fork(). These functions are only called if the program is
3596 * running in threaded mode, so there is no need to check whether the program
3601 _malloc_prefork(void)
3605 /* Acquire all mutexes in a safe order. */
3607 malloc_mutex_lock(&arenas_mtx);
3608 for (i = 0; i < narenas; i++) {
3609 if (arenas[i] != NULL)
3610 malloc_mutex_lock(&arenas[i]->mtx);
3612 malloc_mutex_unlock(&arenas_mtx);
3614 malloc_mutex_lock(&base_mtx);
3616 malloc_mutex_lock(&chunks_mtx);
3620 _malloc_postfork(void)
3624 /* Release all mutexes, now that fork() has completed. */
3626 malloc_mutex_unlock(&chunks_mtx);
3628 malloc_mutex_unlock(&base_mtx);
3630 malloc_mutex_lock(&arenas_mtx);
3631 for (i = 0; i < narenas; i++) {
3632 if (arenas[i] != NULL)
3633 malloc_mutex_unlock(&arenas[i]->mtx);
3635 malloc_mutex_unlock(&arenas_mtx);
3639 * End library-private functions.
3641 /******************************************************************************/