2 * Copyright (C) 2006 Jason Evans <jasone@FreeBSD.org>.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice(s), this list of conditions and the following disclaimer as
10 * the first lines of this file unmodified other than the possible
11 * addition of one or more copyright notices.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice(s), this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *******************************************************************************
31 * This allocator implementation is designed to provide scalable performance
32 * for multi-threaded programs on multi-processor systems. The following
33 * features are included for this purpose:
35 * + Multiple arenas are used if there are multiple CPUs, which reduces lock
36 * contention and cache sloshing.
38 * + Cache line sharing between arenas is avoided for internal data
41 * + Memory is managed in chunks and runs (chunks can be split into runs using
42 * a binary buddy scheme), rather than as individual pages. This provides
43 * a constant-time mechanism for associating allocations with particular
46 * Allocation requests are rounded up to the nearest size class, and no record
47 * of the original request size is maintained. Allocations are broken into
48 * categories according to size class. Assuming runtime defaults, 4 kB pages
49 * and a 16 byte quantum, the size classes in each category are as follows:
51 * |====================================|
52 * | Category | Subcategory | Size |
53 * |====================================|
54 * | Small | Tiny | 2 |
57 * | |----------------+--------|
58 * | | Quantum-spaced | 16 |
65 * | |----------------+--------|
66 * | | Sub-page | 1 kB |
68 * |====================================|
76 * |====================================|
81 * |====================================|
83 * A different mechanism is used for each category:
85 * Small : Each size class is segregated into its own set of runs. Each run
86 * maintains a bitmap of which regions are free/allocated.
88 * Large : Each allocation is backed by a dedicated run. Metadata are stored
89 * in the associated arena chunk header maps.
91 * Huge : Each allocation is backed by a dedicated contiguous set of chunks.
92 * Metadata are stored in a separate red-black tree.
94 *******************************************************************************
98 *******************************************************************************
102 *******************************************************************************
105 /* Ring definitions. */
106 #define qr(a_type) struct { \
111 #define qr_initializer {NULL, NULL}
113 /* Ring functions. */
114 #define qr_new(a_qr, a_field) do { \
115 (a_qr)->a_field.qre_next = (a_qr); \
116 (a_qr)->a_field.qre_prev = (a_qr); \
119 #define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next)
121 #define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev)
123 #define qr_before_insert(a_qrelm, a_qr, a_field) do { \
124 (a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev; \
125 (a_qr)->a_field.qre_next = (a_qrelm); \
126 (a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr); \
127 (a_qrelm)->a_field.qre_prev = (a_qr); \
130 #define qr_after_insert(a_qrelm, a_qr, a_field) do { \
131 (a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next; \
132 (a_qr)->a_field.qre_prev = (a_qrelm); \
133 (a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr); \
134 (a_qrelm)->a_field.qre_next = (a_qr); \
137 #define qr_meld(a_qr_a, a_qr_b, a_type, a_field) do { \
139 (a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b); \
140 (a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a); \
141 t = (a_qr_a)->a_field.qre_prev; \
142 (a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev; \
143 (a_qr_b)->a_field.qre_prev = t; \
147 * qr_meld() and qr_split() are functionally equivalent, so there's no need to
148 * have two copies of the code.
150 #define qr_split(a_qr_a, a_qr_b, a_type, a_field) \
151 qr_meld((a_qr_a), (a_qr_b), a_type, a_field)
153 #define qr_remove(a_qr, a_field) do { \
154 (a_qr)->a_field.qre_prev->a_field.qre_next \
155 = (a_qr)->a_field.qre_next; \
156 (a_qr)->a_field.qre_next->a_field.qre_prev \
157 = (a_qr)->a_field.qre_prev; \
158 (a_qr)->a_field.qre_next = (a_qr); \
159 (a_qr)->a_field.qre_prev = (a_qr); \
162 #define qr_foreach(var, a_qr, a_field) \
163 for ((var) = (a_qr); \
165 (var) = (((var)->a_field.qre_next != (a_qr)) \
166 ? (var)->a_field.qre_next : NULL))
168 #define qr_reverse_foreach(var, a_qr, a_field) \
169 for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL; \
171 (var) = (((var) != (a_qr)) \
172 ? (var)->a_field.qre_prev : NULL))
174 /******************************************************************************/
177 * In order to disable various extra features that may have negative
178 * performance impacts, (assertions, expanded statistics), define
181 /* #define NO_MALLOC_EXTRAS */
183 #ifndef NO_MALLOC_EXTRAS
184 # define MALLOC_DEBUG
187 #include <sys/cdefs.h>
188 __FBSDID("$FreeBSD$");
190 #include "libc_private.h"
194 #include "spinlock.h"
195 #include "namespace.h"
196 #include <sys/mman.h>
197 #include <sys/param.h>
198 #include <sys/stddef.h>
199 #include <sys/time.h>
200 #include <sys/types.h>
201 #include <sys/sysctl.h>
202 #include <sys/tree.h>
204 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
206 #include <machine/atomic.h>
207 #include <machine/cpufunc.h>
208 #include <machine/vmparam.h>
222 #include "un-namespace.h"
225 * Calculate statistics that can be used to get an idea of how well caching is
228 #ifndef NO_MALLOC_EXTRAS
229 # define MALLOC_STATS
240 /* Disable inlining to make debugging easier. */
244 /* Size of stack-allocated buffer passed to strerror_r(). */
245 #define STRERROR_BUF 64
247 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
249 # define QUANTUM_2POW_MIN 4
250 # define SIZEOF_PTR 4
254 # define QUANTUM_2POW_MIN 4
255 # define SIZEOF_PTR 8
259 # define QUANTUM_2POW_MIN 4
260 # define SIZEOF_PTR 8
264 # define QUANTUM_2POW_MIN 4
265 # define SIZEOF_PTR 8
269 # define QUANTUM_2POW_MIN 4
270 # define SIZEOF_PTR 8
273 # define QUANTUM_2POW_MIN 3
274 # define SIZEOF_PTR 4
279 # define QUANTUM_2POW_MIN 4
280 # define SIZEOF_PTR 4
285 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
286 #ifndef SIZEOF_INT_2POW
287 # define SIZEOF_INT_2POW 2
290 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
291 #if (!defined(PIC) && !defined(NO_TLS))
296 * Size and alignment of memory chunks that are allocated by the OS's virtual
301 * 2^(pagesize_2pow - 1 + RUN_MIN_REGS_2POW) <= chunk_size <= 2^28
303 #define CHUNK_2POW_DEFAULT 21
304 #define CHUNK_2POW_MAX 28
307 * Maximum size of L1 cache line. This is used to avoid cache line aliasing,
308 * so over-estimates are okay (up to a point), but under-estimates will
309 * negatively affect performance.
311 #define CACHELINE_2POW 6
312 #define CACHELINE ((size_t)(1 << CACHELINE_2POW))
315 * Maximum size class that is a multiple of the quantum, but not (necessarily)
316 * a power of 2. Above this size, allocations are rounded up to the nearest
319 #define SMALL_MAX_2POW_DEFAULT 9
320 #define SMALL_MAX_DEFAULT (1 << SMALL_MAX_2POW_DEFAULT)
323 * Minimum number of regions that must fit into a run that serves quantum-size
326 * Note that if this is set too low, space will be wasted if there are size
327 * classes that are small enough that RUN_MIN_REGS regions don't fill a page.
328 * If this is set too high, then the overhead of searching through the bitmap
329 * that tracks region usage will become excessive.
331 #define RUN_MIN_REGS_2POW 10
332 #define RUN_MIN_REGS (1 << RUN_MIN_REGS_2POW)
335 * Maximum number of pages for a run that is used for bin allocations.
337 * Note that if this is set too low, then fragmentation for the largest bin
338 * size classes will be high. If this is set too high, then even small
339 * programs will often have to allocate more than two chunks early on.
341 #define RUN_MAX_PAGES_2POW 4
342 #define RUN_MAX_PAGES (1 << RUN_MAX_PAGES_2POW)
344 /******************************************************************************/
347 * Mutexes based on spinlocks. We can't use normal pthread mutexes, because
348 * they require malloc()ed memory.
354 /* Set to true once the allocator has been initialized. */
355 static bool malloc_initialized = false;
357 /* Used to avoid initialization races. */
358 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
360 /******************************************************************************/
362 * Statistics data structures.
367 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
368 struct malloc_bin_stats_s {
370 * Number of allocation requests that corresponded to the size of this
375 /* Total number of runs created for this bin's size class. */
379 * Total number of run promotions/demotions for this bin's size class.
384 /* High-water mark for this bin. */
385 unsigned long highruns;
387 /* Current number of runs in this bin. */
388 unsigned long curruns;
391 typedef struct arena_stats_s arena_stats_t;
392 struct arena_stats_s {
393 /* Total byte count of allocated memory, not including overhead. */
396 /* Number of times each function was called. */
401 /* Number of large allocation requests. */
402 uint64_t large_nrequests;
405 typedef struct chunk_stats_s chunk_stats_t;
406 struct chunk_stats_s {
407 /* Number of chunks that were allocated. */
410 /* High-water mark for number of chunks allocated. */
411 unsigned long highchunks;
414 * Current number of chunks allocated. This value isn't maintained for
415 * any other purpose, so keep track of it in order to be able to set
418 unsigned long curchunks;
421 #endif /* #ifdef MALLOC_STATS */
423 /******************************************************************************/
425 * Chunk data structures.
428 /* Tree of chunks. */
429 typedef struct chunk_node_s chunk_node_t;
430 struct chunk_node_s {
431 /* Linkage for the chunk tree. */
432 RB_ENTRY(chunk_node_s) link;
435 * Pointer to the chunk that this tree node is responsible for. In some
436 * (but certainly not all) cases, this data structure is placed at the
437 * beginning of the corresponding chunk, so this field may point to this
442 /* Total chunk size. */
445 typedef struct chunk_tree_s chunk_tree_t;
446 RB_HEAD(chunk_tree_s, chunk_node_s);
448 /******************************************************************************/
450 * Arena data structures.
453 typedef struct arena_s arena_t;
454 typedef struct arena_bin_s arena_bin_t;
456 typedef struct arena_chunk_map_s arena_chunk_map_t;
457 struct arena_chunk_map_s {
460 unsigned npages:15; /* Limiting factor for CHUNK_2POW_MAX. */
464 /* Arena chunk header. */
465 typedef struct arena_chunk_s arena_chunk_t;
466 struct arena_chunk_s {
467 /* Arena that owns the chunk. */
470 /* Linkage for the arena's chunk tree. */
471 RB_ENTRY(arena_chunk_s) link;
474 * Number of pages in use. This is maintained in order to make
475 * detection of empty chunks fast.
480 * Array of counters that keeps track of how many free runs of each
481 * size are available in this chunk. This table is sized at compile
482 * time, which is wasteful. However, due to unrelated rounding, this
483 * doesn't actually waste any otherwise useful space.
495 uint32_t nfree_runs[CHUNK_2POW_MAX/* - PAGE_SHIFT */];
497 /* Map of pages within chunk that keeps track of free/large/small. */
498 arena_chunk_map_t map[1]; /* Dynamically sized. */
500 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
501 RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
503 typedef struct arena_run_s arena_run_t;
505 /* Linkage for run rings. */
506 qr(arena_run_t) link;
510 # define ARENA_RUN_MAGIC 0x384adf93
513 /* Bin this run is associated with. */
516 /* Bitmask of in-use regions (0: in use, 1: free). */
517 #define REGS_MASK_NELMS \
518 (1 << (RUN_MIN_REGS_2POW - SIZEOF_INT_2POW - 2))
519 unsigned regs_mask[REGS_MASK_NELMS];
521 /* Index of first element that might have a free region. */
522 unsigned regs_minelm;
524 /* Number of free regions in run. */
528 * Current quartile for this run, one of: {RUN_QINIT, RUN_Q0, RUN_25,
529 * RUN_Q50, RUN_Q75, RUN_Q100}.
540 * Limits on the number of free regions for the fullness quartile this
541 * run is currently in. If nfree goes outside these limits, the run
542 * is moved to a different fullness quartile.
548 /* Used for run ring headers, where the run isn't actually used. */
549 typedef struct arena_run_link_s arena_run_link_t;
550 struct arena_run_link_s {
551 /* Linkage for run rings. */
552 qr(arena_run_t) link;
557 * Current run being used to service allocations of this bin's size
563 * Links into rings of runs, of various fullnesses (names indicate
564 * approximate lower bounds). A new run conceptually starts off in
565 * runsinit, and it isn't inserted into the runs0 ring until it
566 * reaches 25% full (hysteresis mechanism). For the run to be moved
567 * again, it must become either empty or 50% full. Thus, each ring
568 * contains runs that are within 50% above the advertised fullness for
569 * the ring. This provides a low-overhead mechanism for segregating
570 * runs into approximate fullness classes.
572 * Conceptually, there is a runs100 that contains completely full runs.
573 * Since we don't need to search for these runs though, no runs100 ring
574 * is actually maintained.
576 * These rings are useful when looking for an existing run to use when
577 * runcur is no longer usable. We look for usable runs in the
585 * runs75 isn't a good place to look, because it contains runs that may
586 * be nearly completely full. Still, we look there as a last resort in
587 * order to avoid allocating a new run if at all possible.
589 /* arena_run_link_t runsinit; 0% <= fullness < 25% */
590 arena_run_link_t runs0; /* 0% < fullness < 50% */
591 arena_run_link_t runs25; /* 25% < fullness < 75% */
592 arena_run_link_t runs50; /* 50% < fullness < 100% */
593 arena_run_link_t runs75; /* 75% < fullness < 100% */
594 /* arena_run_link_t runs100; fullness == 100% */
596 /* Size of regions in a run for this bin's size class. */
599 /* Total size of a run for this bin's size class. */
602 /* Total number of regions in a run for this bin's size class. */
605 /* Offset of first region in a run for this bin's size class. */
606 uint32_t reg0_offset;
609 /* Bin statistics. */
610 malloc_bin_stats_t stats;
617 # define ARENA_MAGIC 0x947d3d24
620 /* All operations on this arena require that mtx be locked. */
628 * Tree of chunks this arena manages.
630 arena_chunk_tree_t chunks;
633 * bins is used to store rings of free regions of the following sizes,
634 * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
655 arena_bin_t bins[1]; /* Dynamically sized. */
658 /******************************************************************************/
663 /* Number of CPUs. */
664 static unsigned ncpus;
667 static unsigned pagesize;
668 static unsigned pagesize_2pow;
670 /* Various bin-related settings. */
671 static size_t bin_maxclass; /* Max size class for bins. */
672 static unsigned ntbins; /* Number of (2^n)-spaced tiny bins. */
673 static unsigned nqbins; /* Number of quantum-spaced bins. */
674 static unsigned nsbins; /* Number of (2^n)-spaced sub-page bins. */
675 static size_t small_min;
676 static size_t small_max;
677 static unsigned tiny_min_2pow;
679 /* Various quantum-related settings. */
680 static size_t quantum;
681 static size_t quantum_mask; /* (quantum - 1). */
683 /* Various chunk-related settings. */
684 static size_t chunk_size;
685 static size_t chunk_size_mask; /* (chunk_size - 1). */
686 static size_t arena_maxclass; /* Max size class for arenas. */
687 static unsigned arena_chunk_maplen;
694 /* Protects chunk-related data structures. */
695 static malloc_mutex_t chunks_mtx;
697 /* Tree of chunks that are stand-alone huge allocations. */
698 static chunk_tree_t huge;
702 * Try to use brk for chunk-size allocations, due to address space constraints.
704 /* Result of first sbrk(0) call. */
705 static void *brk_base;
706 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
707 static void *brk_prev;
708 /* Current upper limit on brk addresses. */
709 static void *brk_max;
714 * Byte counters for allocated/total space used by the chunks in the huge
717 static uint64_t huge_nmalloc;
718 static uint64_t huge_ndalloc;
719 static size_t huge_allocated;
723 * Tree of chunks that were previously allocated. This is used when allocating
724 * chunks, in an attempt to re-use address space.
726 static chunk_tree_t old_chunks;
728 /****************************/
730 * base (internal allocation).
734 * Current chunk that is being used for internal memory allocations. This
735 * chunk is carved up in cacheline-size quanta, so that there is no chance of
736 * false cache line sharing.
738 static void *base_chunk;
739 static void *base_next_addr;
740 static void *base_past_addr; /* Addr immediately past base_chunk. */
741 static chunk_node_t *base_chunk_nodes; /* LIFO cache of chunk nodes. */
742 static malloc_mutex_t base_mtx;
744 static uint64_t base_total;
753 * Arenas that are used to service external requests. Not all elements of the
754 * arenas array are necessarily used; arenas are created lazily as needed.
756 static arena_t **arenas;
757 static unsigned narenas;
759 static unsigned next_arena;
761 static malloc_mutex_t arenas_mtx; /* Protects arenas initialization. */
765 * Map of pthread_self() --> arenas[???], used for selecting an arena to use
768 static __thread arena_t *arenas_map;
772 /* Chunk statistics. */
773 static chunk_stats_t stats_chunks;
776 /*******************************/
778 * Runtime configuration options.
780 const char *_malloc_options;
782 #ifndef NO_MALLOC_EXTRAS
783 static bool opt_abort = true;
784 static bool opt_junk = true;
786 static bool opt_abort = false;
787 static bool opt_junk = false;
789 static bool opt_hint = false;
790 static bool opt_print_stats = false;
791 static size_t opt_quantum_2pow = QUANTUM_2POW_MIN;
792 static size_t opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
793 static size_t opt_chunk_2pow = CHUNK_2POW_DEFAULT;
794 static bool opt_utrace = false;
795 static bool opt_sysv = false;
796 static bool opt_xmalloc = false;
797 static bool opt_zero = false;
798 static int32_t opt_narenas_lshift = 0;
806 #define UTRACE(a, b, c) \
808 malloc_utrace_t ut = {a, b, c}; \
809 utrace(&ut, sizeof(ut)); \
812 /******************************************************************************/
814 * Begin function prototypes for non-inline static functions.
817 static void malloc_mutex_init(malloc_mutex_t *a_mutex);
818 static void wrtmessage(const char *p1, const char *p2, const char *p3,
820 static void malloc_printf(const char *format, ...);
821 static void *base_alloc(size_t size);
822 static chunk_node_t *base_chunk_node_alloc(void);
823 static void base_chunk_node_dealloc(chunk_node_t *node);
825 static void stats_print(arena_t *arena);
827 static void *pages_map(void *addr, size_t size);
828 static void pages_unmap(void *addr, size_t size);
829 static void *chunk_alloc(size_t size);
830 static void chunk_dealloc(void *chunk, size_t size);
832 static arena_t *choose_arena_hard(void);
834 static void arena_run_split(arena_t *arena, arena_run_t *run, bool large,
836 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
837 static void arena_chunk_dealloc(arena_chunk_t *chunk);
838 static void arena_bin_run_promote(arena_t *arena, arena_bin_t *bin,
840 static void arena_bin_run_demote(arena_t *arena, arena_bin_t *bin,
842 static arena_run_t *arena_run_alloc(arena_t *arena, bool large, size_t size);
843 static void arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
844 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
845 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
846 static void *arena_malloc(arena_t *arena, size_t size);
847 static size_t arena_salloc(const void *ptr);
848 static void *arena_ralloc(void *ptr, size_t size, size_t oldsize);
849 static void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
850 static bool arena_new(arena_t *arena);
851 static arena_t *arenas_extend(unsigned ind);
852 static void *huge_malloc(size_t size);
853 static void *huge_ralloc(void *ptr, size_t size, size_t oldsize);
854 static void huge_dalloc(void *ptr);
855 static void *imalloc(size_t size);
856 static void *ipalloc(size_t alignment, size_t size);
857 static void *icalloc(size_t size);
858 static size_t isalloc(const void *ptr);
859 static void *iralloc(void *ptr, size_t size);
860 static void idalloc(void *ptr);
861 static void malloc_print_stats(void);
862 static bool malloc_init_hard(void);
865 * End function prototypes.
867 /******************************************************************************/
873 malloc_mutex_init(malloc_mutex_t *a_mutex)
875 static const spinlock_t lock = _SPINLOCK_INITIALIZER;
877 a_mutex->lock = lock;
881 malloc_mutex_lock(malloc_mutex_t *a_mutex)
885 _SPINLOCK(&a_mutex->lock);
889 malloc_mutex_unlock(malloc_mutex_t *a_mutex)
893 _SPINUNLOCK(&a_mutex->lock);
899 /******************************************************************************/
901 * Begin Utility functions/macros.
904 /* Return the chunk address for allocation address a. */
905 #define CHUNK_ADDR2BASE(a) \
906 ((void *)((uintptr_t)(a) & ~chunk_size_mask))
908 /* Return the chunk offset of address a. */
909 #define CHUNK_ADDR2OFFSET(a) \
910 ((size_t)((uintptr_t)(a) & chunk_size_mask))
912 /* Return the smallest chunk multiple that is >= s. */
913 #define CHUNK_CEILING(s) \
914 (((s) + chunk_size_mask) & ~chunk_size_mask)
916 /* Return the smallest cacheline multiple that is >= s. */
917 #define CACHELINE_CEILING(s) \
918 (((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
920 /* Return the smallest quantum multiple that is >= a. */
921 #define QUANTUM_CEILING(a) \
922 (((a) + quantum_mask) & ~quantum_mask)
924 /* Compute the smallest power of 2 that is >= x. */
935 #if (SIZEOF_PTR == 8)
943 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
946 _write(STDERR_FILENO, p1, strlen(p1));
947 _write(STDERR_FILENO, p2, strlen(p2));
948 _write(STDERR_FILENO, p3, strlen(p3));
949 _write(STDERR_FILENO, p4, strlen(p4));
952 void (*_malloc_message)(const char *p1, const char *p2, const char *p3,
953 const char *p4) = wrtmessage;
956 * Print to stderr in such a way as to (hopefully) avoid memory allocation.
959 malloc_printf(const char *format, ...)
964 va_start(ap, format);
965 vsnprintf(buf, sizeof(buf), format, ap);
967 _malloc_message(buf, "", "", "");
970 /******************************************************************************/
973 base_alloc(size_t size)
978 /* Round size up to nearest multiple of the cacheline size. */
979 csize = CACHELINE_CEILING(size);
981 malloc_mutex_lock(&base_mtx);
983 /* Make sure there's enough space for the allocation. */
984 if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
987 assert(csize <= chunk_size);
989 tchunk = chunk_alloc(chunk_size);
990 if (tchunk == NULL) {
995 base_next_addr = (void *)base_chunk;
996 base_past_addr = (void *)((uintptr_t)base_chunk + chunk_size);
998 base_total += chunk_size;
1003 ret = base_next_addr;
1004 base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1007 malloc_mutex_unlock(&base_mtx);
1011 static chunk_node_t *
1012 base_chunk_node_alloc(void)
1016 malloc_mutex_lock(&base_mtx);
1017 if (base_chunk_nodes != NULL) {
1018 ret = base_chunk_nodes;
1019 base_chunk_nodes = *(chunk_node_t **)ret;
1020 malloc_mutex_unlock(&base_mtx);
1022 malloc_mutex_unlock(&base_mtx);
1023 ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
1030 base_chunk_node_dealloc(chunk_node_t *node)
1033 malloc_mutex_lock(&base_mtx);
1034 *(chunk_node_t **)node = base_chunk_nodes;
1035 base_chunk_nodes = node;
1036 malloc_mutex_unlock(&base_mtx);
1039 /******************************************************************************/
1043 stats_print(arena_t *arena)
1048 malloc_printf("allocated: %zu\n", arena->stats.allocated);
1050 malloc_printf("calls:\n");
1051 malloc_printf(" %12s %12s %12s\n", "nmalloc","ndalloc", "nmadvise");
1052 malloc_printf(" %12llu %12llu %12llu\n",
1053 arena->stats.nmalloc, arena->stats.ndalloc, arena->stats.nmadvise);
1055 malloc_printf("large requests: %llu\n", arena->stats.large_nrequests);
1057 malloc_printf("bins:\n");
1058 malloc_printf("%13s %1s %4s %5s %6s %9s %5s %6s %7s %6s %6s\n",
1059 "bin", "", "size", "nregs", "run_sz", "nrequests", "nruns",
1060 "hiruns", "curruns", "npromo", "ndemo");
1061 for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
1062 if (arena->bins[i].stats.nrequests == 0) {
1063 if (gap_start == -1)
1066 if (gap_start != -1) {
1067 if (i > gap_start + 1) {
1068 /* Gap of more than one size class. */
1069 malloc_printf("[%u..%u]\n",
1072 /* Gap of one size class. */
1073 malloc_printf("[%u]\n", gap_start);
1078 "%13u %1s %4u %5u %6u %9llu %5llu"
1079 " %6lu %7lu %6llu %6llu\n",
1081 i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1082 arena->bins[i].reg_size,
1083 arena->bins[i].nregs,
1084 arena->bins[i].run_size,
1085 arena->bins[i].stats.nrequests,
1086 arena->bins[i].stats.nruns,
1087 arena->bins[i].stats.highruns,
1088 arena->bins[i].stats.curruns,
1089 arena->bins[i].stats.npromote,
1090 arena->bins[i].stats.ndemote);
1093 if (gap_start != -1) {
1094 if (i > gap_start + 1) {
1095 /* Gap of more than one size class. */
1096 malloc_printf("[%u..%u]\n", gap_start, i - 1);
1098 /* Gap of one size class. */
1099 malloc_printf("[%u]\n", gap_start);
1106 * End Utility functions/macros.
1108 /******************************************************************************/
1110 * Begin chunk management functions.
1114 chunk_comp(chunk_node_t *a, chunk_node_t *b)
1120 if ((uintptr_t)a->chunk < (uintptr_t)b->chunk)
1122 else if (a->chunk == b->chunk)
1128 /* Generate red-black tree code for chunks. */
1129 RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp);
1132 pages_map(void *addr, size_t size)
1137 * We don't use MAP_FIXED here, because it can cause the *replacement*
1138 * of existing mappings, and we only want to create new mappings.
1140 ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
1142 assert(ret != NULL);
1144 if (ret == MAP_FAILED)
1146 else if (addr != NULL && ret != addr) {
1148 * We succeeded in mapping memory, but not in the right place.
1150 if (munmap(ret, size) == -1) {
1151 char buf[STRERROR_BUF];
1153 strerror_r(errno, buf, sizeof(buf));
1154 malloc_printf("%s: (malloc) Error in munmap(): %s\n",
1155 _getprogname(), buf);
1162 assert(ret == NULL || (addr == NULL && ret != addr)
1163 || (addr != NULL && ret == addr));
1168 pages_unmap(void *addr, size_t size)
1171 if (munmap(addr, size) == -1) {
1172 char buf[STRERROR_BUF];
1174 strerror_r(errno, buf, sizeof(buf));
1175 malloc_printf("%s: (malloc) Error in munmap(): %s\n",
1176 _getprogname(), buf);
1183 chunk_alloc(size_t size)
1186 chunk_node_t *tchunk, *delchunk;
1189 assert(size % chunk_size == 0);
1191 malloc_mutex_lock(&chunks_mtx);
1193 if (size == chunk_size) {
1195 * Check for address ranges that were previously chunks and try
1199 tchunk = RB_MIN(chunk_tree_s, &old_chunks);
1200 while (tchunk != NULL) {
1201 /* Found an address range. Try to recycle it. */
1203 chunk = tchunk->chunk;
1205 tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1207 /* Remove delchunk from the tree. */
1208 RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1209 base_chunk_node_dealloc(delchunk);
1212 if ((uintptr_t)chunk >= (uintptr_t)brk_base
1213 && (uintptr_t)chunk < (uintptr_t)brk_max) {
1214 /* Re-use a previously freed brk chunk. */
1219 if ((ret = pages_map(chunk, size)) != NULL) {
1228 * Try to create allocations in brk, in order to make full use of
1229 * limited address space.
1231 if (brk_prev != (void *)-1) {
1236 * The loop is necessary to recover from races with other
1237 * threads that are using brk for something other than malloc.
1240 /* Get the current end of brk. */
1244 * Calculate how much padding is necessary to
1245 * chunk-align the end of brk.
1247 incr = (intptr_t)size
1248 - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1252 ret = (void *)(intptr_t)brk_cur + incr;
1256 brk_prev = sbrk(incr);
1257 if (brk_prev == brk_cur) {
1259 brk_max = (void *)(intptr_t)ret + size;
1262 } while (brk_prev != (void *)-1);
1267 * Try to over-allocate, but allow the OS to place the allocation
1268 * anywhere. Beware of size_t wrap-around.
1270 if (size + chunk_size > size) {
1271 if ((ret = pages_map(NULL, size + chunk_size)) != NULL) {
1272 size_t offset = CHUNK_ADDR2OFFSET(ret);
1275 * Success. Clean up unneeded leading/trailing space.
1278 /* Leading space. */
1279 pages_unmap(ret, chunk_size - offset);
1281 ret = (void *)((uintptr_t)ret + (chunk_size -
1284 /* Trailing space. */
1285 pages_unmap((void *)((uintptr_t)ret + size),
1288 /* Trailing space only. */
1289 pages_unmap((void *)((uintptr_t)ret + size),
1296 /* All strategies for allocation failed. */
1301 stats_chunks.nchunks += (size / chunk_size);
1302 stats_chunks.curchunks += (size / chunk_size);
1304 if (stats_chunks.curchunks > stats_chunks.highchunks)
1305 stats_chunks.highchunks = stats_chunks.curchunks;
1307 malloc_mutex_unlock(&chunks_mtx);
1309 assert(CHUNK_ADDR2BASE(ret) == ret);
1314 chunk_dealloc(void *chunk, size_t size)
1320 assert(chunk != NULL);
1321 assert(CHUNK_ADDR2BASE(chunk) == chunk);
1323 assert(size % chunk_size == 0);
1325 malloc_mutex_lock(&chunks_mtx);
1328 if ((uintptr_t)chunk >= (uintptr_t)brk_base
1329 && (uintptr_t)chunk < (uintptr_t)brk_max) {
1332 /* Get the current end of brk. */
1336 * Try to shrink the data segment if this chunk is at the end
1337 * of the data segment. The sbrk() call here is subject to a
1338 * race condition with threads that use brk(2) or sbrk(2)
1339 * directly, but the alternative would be to leak memory for
1340 * the sake of poorly designed multi-threaded programs.
1342 if (brk_cur == brk_max
1343 && (void *)(uintptr_t)chunk + size == brk_max
1344 && sbrk(-(intptr_t)size) == brk_max) {
1345 if (brk_prev == brk_max) {
1347 brk_prev = (void *)(intptr_t)brk_max
1353 madvise(chunk, size, MADV_FREE);
1356 pages_unmap(chunk, size);
1359 * Iteratively create records of each chunk-sized memory region that
1360 * 'chunk' is comprised of, so that the address range can be recycled
1361 * if memory usage increases later on.
1363 for (offset = 0; offset < size; offset += chunk_size) {
1365 * It is possible for chunk to overlap existing entries in
1366 * old_chunks if it is a huge allocation, so take care to not
1369 key.chunk = (void *)((uintptr_t)chunk + (uintptr_t)offset);
1370 if (RB_FIND(chunk_tree_s, &old_chunks, &key) == NULL) {
1371 node = base_chunk_node_alloc();
1375 node->chunk = key.chunk;
1376 node->size = chunk_size;
1377 RB_INSERT(chunk_tree_s, &old_chunks, node);
1385 stats_chunks.curchunks -= (size / chunk_size);
1387 malloc_mutex_unlock(&chunks_mtx);
1391 * End chunk management functions.
1393 /******************************************************************************/
1399 * Choose an arena based on a per-thread value (fast-path code, calls slow-path
1400 * code if necessary.
1402 static inline arena_t *
1408 * We can only use TLS if this is a PIC library, since for the static
1409 * library version, libc's malloc is used by TLS allocation, which
1410 * introduces a bootstrapping issue.
1413 if (__isthreaded == false) {
1415 * Avoid the overhead of TLS for single-threaded operation. If the
1416 * app switches to threaded mode, the initial thread may end up
1417 * being assigned to some other arena, but this one-time switch
1418 * shouldn't cause significant issues.
1425 ret = choose_arena_hard();
1431 * Hash _pthread_self() to one of the arenas. There is a prime
1432 * number of arenas, so this has a reasonable chance of
1433 * working. Even so, the hashing can be easily thwarted by
1434 * inconvenient _pthread_self() values. Without specific
1435 * knowledge of how _pthread_self() calculates values, we can't
1436 * easily do much better than this.
1438 ind = (unsigned long) _pthread_self() % narenas;
1441 * Optimistially assume that arenas[ind] has been initialized.
1442 * At worst, we find out that some other thread has already
1443 * done so, after acquiring the lock in preparation. Note that
1444 * this lazy locking also has the effect of lazily forcing
1445 * cache coherency; without the lock acquisition, there's no
1446 * guarantee that modification of arenas[ind] by another thread
1447 * would be seen on this CPU for an arbitrary amount of time.
1449 * In general, this approach to modifying a synchronized value
1450 * isn't a good idea, but in this case we only ever modify the
1451 * value once, so things work out well.
1456 * Avoid races with another thread that may have already
1457 * initialized arenas[ind].
1459 malloc_mutex_lock(&arenas_mtx);
1460 if (arenas[ind] == NULL)
1461 ret = arenas_extend((unsigned)ind);
1464 malloc_mutex_unlock(&arenas_mtx);
1470 assert(ret != NULL);
1476 * Choose an arena based on a per-thread value (slow-path code only, called
1477 * only by choose_arena()).
1480 choose_arena_hard(void)
1484 assert(__isthreaded);
1486 /* Assign one of the arenas to this thread, in a round-robin fashion. */
1487 malloc_mutex_lock(&arenas_mtx);
1488 ret = arenas[next_arena];
1490 ret = arenas_extend(next_arena);
1493 * Make sure that this function never returns NULL, so that
1494 * choose_arena() doesn't have to check for a NULL return
1499 next_arena = (next_arena + 1) % narenas;
1500 malloc_mutex_unlock(&arenas_mtx);
1508 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
1514 if ((uintptr_t)a < (uintptr_t)b)
1522 /* Generate red-black tree code for arena chunks. */
1523 RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp);
1525 static inline void *
1526 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
1529 unsigned i, mask, bit, regind;
1531 assert(run->magic == ARENA_RUN_MAGIC);
1533 for (i = run->regs_minelm; i < REGS_MASK_NELMS; i++) {
1534 mask = run->regs_mask[i];
1536 /* Usable allocation found. */
1537 bit = ffs(mask) - 1;
1539 regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1540 ret = (void *)&((char *)run)[bin->reg0_offset
1541 + (bin->reg_size * regind)];
1545 run->regs_mask[i] = mask;
1550 * Make a note that nothing before this element
1551 * contains a free region.
1553 run->regs_minelm = i + 1;
1562 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
1565 * To divide by a number D that is not a power of two we multiply
1566 * by (2^21 / D) and then right shift by 21 positions.
1572 * (size_invs[(D >> QUANTUM_2POW_MIN) - 3] * D) >> SIZE_INV_SHIFT
1574 #define SIZE_INV_SHIFT 21
1575 #define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
1576 static const unsigned size_invs[] = {
1578 SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
1579 SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
1580 SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
1581 SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
1582 SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
1583 SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
1584 SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
1585 #if (QUANTUM_2POW_MIN < 4)
1587 SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
1588 SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
1589 SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
1590 SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
1591 SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
1592 SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
1593 SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
1594 SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
1597 unsigned diff, regind, elm, bit;
1599 assert(run->magic == ARENA_RUN_MAGIC);
1600 assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
1601 >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
1604 * Avoid doing division with a variable divisor if possible. Using
1605 * actual division here can reduce allocator throughput by over 20%!
1607 diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
1608 if ((size & (size - 1)) == 0) {
1610 * log2_table allows fast division of a power of two in the
1613 * (x / divisor) becomes (x >> log2_table[divisor - 1]).
1615 static const unsigned char log2_table[] = {
1616 0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
1617 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
1618 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1619 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
1620 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1621 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1622 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1623 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
1627 regind = (diff >> log2_table[size - 1]);
1628 else if (size <= 32768)
1629 regind = diff >> (8 + log2_table[(size >> 8) - 1]);
1632 * The page size is too large for us to use the lookup
1633 * table. Use real division.
1635 regind = diff / size;
1637 } else if (size <= ((sizeof(size_invs) / sizeof(unsigned))
1638 << QUANTUM_2POW_MIN) + 2) {
1639 regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
1640 regind >>= SIZE_INV_SHIFT;
1643 * size_invs isn't large enough to handle this size class, so
1644 * calculate regind using actual division. This only happens
1645 * if the user increases small_max via the 'S' runtime
1646 * configuration option.
1648 regind = diff / size;
1650 assert(regind == diff / size);
1651 assert(regind < bin->nregs);
1653 elm = regind >> (SIZEOF_INT_2POW + 3);
1654 if (elm < run->regs_minelm)
1655 run->regs_minelm = elm;
1656 bit = regind - (elm << (SIZEOF_INT_2POW + 3));
1657 assert((run->regs_mask[elm] & (1 << bit)) == 0);
1658 run->regs_mask[elm] |= (1 << bit);
1660 #undef SIZE_INV_SHIFT
1664 arena_run_split(arena_t *arena, arena_run_t *run, bool large, size_t size)
1666 arena_chunk_t *chunk;
1667 unsigned run_ind, map_offset, total_pages, need_pages;
1668 unsigned i, log2_run_pages, run_pages;
1670 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1671 run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1673 assert(chunk->map[run_ind].free);
1674 total_pages = chunk->map[run_ind].npages;
1675 need_pages = (size >> pagesize_2pow);
1677 assert(chunk->map[run_ind].free);
1678 assert(chunk->map[run_ind].large == false);
1679 assert(chunk->map[run_ind].npages == total_pages);
1681 /* Split enough pages from the front of run to fit allocation size. */
1682 map_offset = run_ind;
1683 for (i = 0; i < need_pages; i++) {
1684 chunk->map[map_offset + i].free = false;
1685 chunk->map[map_offset + i].large = large;
1686 chunk->map[map_offset + i].npages = need_pages;
1687 chunk->map[map_offset + i].pos = i;
1690 /* Update map for trailing pages. */
1691 map_offset += need_pages;
1692 while (map_offset < run_ind + total_pages) {
1693 log2_run_pages = ffs(map_offset) - 1;
1694 run_pages = (1 << log2_run_pages);
1696 chunk->map[map_offset].free = true;
1697 chunk->map[map_offset].large = false;
1698 chunk->map[map_offset].npages = run_pages;
1700 chunk->nfree_runs[log2_run_pages]++;
1702 map_offset += run_pages;
1705 chunk->pages_used += (size >> pagesize_2pow);
1708 static arena_chunk_t *
1709 arena_chunk_alloc(arena_t *arena)
1711 arena_chunk_t *chunk;
1712 unsigned i, j, header_npages, pow2_header_npages, map_offset;
1713 unsigned log2_run_pages, run_pages;
1716 chunk = (arena_chunk_t *)chunk_alloc(chunk_size);
1720 chunk->arena = arena;
1722 RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1725 * Claim that no pages are in use, since the header is merely overhead.
1727 chunk->pages_used = 0;
1729 memset(&chunk->nfree_runs, 0, sizeof(chunk->nfree_runs));
1731 header_size = (size_t)((uintptr_t)&chunk->map[arena_chunk_maplen]
1732 - (uintptr_t)chunk);
1733 if (header_size % pagesize != 0) {
1734 /* Round up to the nearest page boundary. */
1735 header_size += pagesize - (header_size % pagesize);
1738 header_npages = header_size >> pagesize_2pow;
1739 pow2_header_npages = pow2_ceil(header_npages);
1742 * Iteratively mark runs as in use, until we've spoken for the entire
1746 for (i = 0; header_npages > 0; i++) {
1747 if ((pow2_header_npages >> i) <= header_npages) {
1748 for (j = 0; j < (pow2_header_npages >> i); j++) {
1749 chunk->map[map_offset + j].free = false;
1750 chunk->map[map_offset + j].large = false;
1751 chunk->map[map_offset + j].npages =
1752 (pow2_header_npages >> i);
1753 chunk->map[map_offset + j].pos = j;
1755 header_npages -= (pow2_header_npages >> i);
1756 map_offset += (pow2_header_npages >> i);
1761 * Finish initializing map. The chunk header takes up some space at
1762 * the beginning of the chunk, which we just took care of by
1763 * "allocating" the leading pages.
1765 while (map_offset < (chunk_size >> pagesize_2pow)) {
1766 log2_run_pages = ffs(map_offset) - 1;
1767 run_pages = (1 << log2_run_pages);
1769 chunk->map[map_offset].free = true;
1770 chunk->map[map_offset].large = false;
1771 chunk->map[map_offset].npages = run_pages;
1773 chunk->nfree_runs[log2_run_pages]++;
1775 map_offset += run_pages;
1782 arena_chunk_dealloc(arena_chunk_t *chunk)
1785 RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
1787 chunk_dealloc((void *)chunk, chunk_size);
1791 arena_bin_run_promote(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
1794 assert(bin == run->bin);
1797 assert(run->free_min > run->nfree);
1798 assert(run->quartile < RUN_Q100);
1801 bin->stats.npromote++;
1805 switch (run->quartile) {
1810 qr_before_insert((arena_run_t *)&bin->runs0, run, link);
1811 run->free_max = bin->nregs - 1;
1812 run->free_min = (bin->nregs >> 1) + 1;
1813 assert(run->nfree <= run->free_max);
1814 assert(run->nfree >= run->free_min);
1817 qr_remove(run, link);
1818 qr_before_insert((arena_run_t *)&bin->runs25, run,
1820 run->free_max = ((bin->nregs >> 2) * 3) - 1;
1821 run->free_min = (bin->nregs >> 2) + 1;
1822 assert(run->nfree <= run->free_max);
1823 assert(run->nfree >= run->free_min);
1826 qr_remove(run, link);
1827 qr_before_insert((arena_run_t *)&bin->runs50, run,
1829 run->free_max = (bin->nregs >> 1) - 1;
1831 assert(run->nfree <= run->free_max);
1832 assert(run->nfree >= run->free_min);
1836 * Skip RUN_Q75 during promotion from RUN_Q50.
1837 * Separate handling of RUN_Q75 and RUN_Q100 allows us
1838 * to keep completely full runs in RUN_Q100, thus
1839 * guaranteeing that runs in RUN_Q75 are only mostly
1840 * full. This provides a method for avoiding a linear
1841 * search for non-full runs, which avoids some
1842 * pathological edge cases.
1847 qr_remove(run, link);
1848 assert(bin->runcur == run);
1852 assert(run->nfree <= run->free_max);
1853 assert(run->nfree >= run->free_min);
1862 arena_bin_run_demote(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
1865 assert(bin == run->bin);
1868 assert(run->free_max < run->nfree);
1869 assert(run->quartile > RUN_QINIT);
1872 bin->stats.ndemote++;
1876 switch (run->quartile) {
1878 qr_remove(run, link);
1880 bin->stats.curruns--;
1882 if (bin->runcur == run)
1887 arena_run_dalloc(arena, run, bin->run_size);
1890 qr_remove(run, link);
1891 qr_before_insert((arena_run_t *)&bin->runs0, run, link);
1892 run->free_max = bin->nregs - 1;
1893 run->free_min = (bin->nregs >> 1) + 1;
1894 assert(run->nfree <= run->free_max);
1895 assert(run->nfree >= run->free_min);
1898 qr_remove(run, link);
1899 qr_before_insert((arena_run_t *)&bin->runs25, run,
1901 run->free_max = ((bin->nregs >> 2) * 3) - 1;
1902 run->free_min = (bin->nregs >> 2) + 1;
1903 assert(run->nfree <= run->free_max);
1904 assert(run->nfree >= run->free_min);
1907 qr_remove(run, link);
1908 qr_before_insert((arena_run_t *)&bin->runs50, run,
1910 run->free_max = (bin->nregs >> 1) - 1;
1912 assert(run->nfree <= run->free_max);
1913 assert(run->nfree >= run->free_min);
1916 qr_before_insert((arena_run_t *)&bin->runs75, run,
1918 run->free_max = (bin->nregs >> 2) - 1;
1920 assert(run->nfree <= run->free_max);
1921 assert(run->nfree >= run->free_min);
1930 static arena_run_t *
1931 arena_run_alloc(arena_t *arena, bool large, size_t size)
1934 unsigned min_ind, i, j;
1935 arena_chunk_t *chunk;
1940 assert(size <= arena_maxclass);
1949 * Search through arena's chunks in address order for a run that is
1950 * large enough. Look for a precise fit, but do not pass up a chunk
1951 * that has a run which is large enough to split.
1953 min_ind = ffs(size >> pagesize_2pow) - 1;
1954 RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
1956 i < (opt_chunk_2pow - pagesize_2pow);
1958 if (chunk->nfree_runs[i] > 0) {
1959 arena_chunk_map_t *map = chunk->map;
1961 /* Scan chunk's map for free run. */
1963 j < arena_chunk_maplen;
1964 j += map[j].npages) {
1966 && map[j].npages == (1 << i))
1967 {/*<----------------------------*/
1968 run = (arena_run_t *)&((char *)chunk)[j
1971 assert(chunk->nfree_runs[i] > 0);
1972 chunk->nfree_runs[i]--;
1974 /* Update page map. */
1975 arena_run_split(arena, run, large, size);
1978 }/*---------------------------->*/
1986 /* No usable runs. Allocate a new chunk, then try again. */
1987 if (arena_chunk_alloc(arena) == NULL)
1993 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
1995 arena_chunk_t *chunk;
1996 unsigned run_ind, buddy_ind, base_run_ind, run_pages, log2_run_pages;
1998 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1999 run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
2001 run_pages = (size >> pagesize_2pow);
2002 log2_run_pages = ffs(run_pages) - 1;
2003 assert(run_pages > 0);
2005 /* Subtract pages from count of pages used in chunk. */
2006 chunk->pages_used -= run_pages;
2008 /* Mark run as deallocated. */
2009 chunk->map[run_ind].free = true;
2010 chunk->map[run_ind].large = false;
2011 chunk->map[run_ind].npages = run_pages;
2014 * Tell the kernel that we don't need the data in this run, but only if
2015 * requested via runtime configuration.
2018 madvise(run, size, MADV_FREE);
2020 arena->stats.nmadvise += (size >> pagesize_2pow);
2025 * Iteratively coalesce with buddies. Conceptually, the buddy scheme
2026 * induces a tree on the set of pages. If we know the number of pages
2027 * in the subtree rooted at the current node, we can quickly determine
2028 * whether a run is the left or right buddy, and then calculate the
2032 (run_pages = (1 << log2_run_pages)) < arena_chunk_maplen;
2034 if (((run_ind >> log2_run_pages) & 1) == 0) {
2035 /* Current run precedes its buddy. */
2036 buddy_ind = run_ind + run_pages;
2037 base_run_ind = run_ind;
2039 /* Current run follows its buddy. */
2040 buddy_ind = run_ind - run_pages;
2041 base_run_ind = buddy_ind;
2044 if (chunk->map[buddy_ind].free == false
2045 || chunk->map[buddy_ind].npages != run_pages)
2048 assert(chunk->nfree_runs[log2_run_pages] > 0);
2049 chunk->nfree_runs[log2_run_pages]--;
2052 chunk->map[base_run_ind].npages = (run_pages << 1);
2054 /* Update run_ind to be the beginning of the coalesced run. */
2055 run_ind = base_run_ind;
2058 chunk->nfree_runs[log2_run_pages]++;
2060 /* Free pages, to the extent possible. */
2061 if (chunk->pages_used == 0) {
2062 /* This chunk is completely unused now, so deallocate it. */
2063 arena_chunk_dealloc(chunk);
2067 static arena_run_t *
2068 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2071 unsigned i, remainder;
2073 /* Look for a usable run. */
2074 if ((run = qr_next((arena_run_t *)&bin->runs50, link))
2075 != (arena_run_t *)&bin->runs50
2076 || (run = qr_next((arena_run_t *)&bin->runs25, link))
2077 != (arena_run_t *)&bin->runs25
2078 || (run = qr_next((arena_run_t *)&bin->runs0, link))
2079 != (arena_run_t *)&bin->runs0
2080 || (run = qr_next((arena_run_t *)&bin->runs75, link))
2081 != (arena_run_t *)&bin->runs75) {
2082 /* run is guaranteed to have available space. */
2083 qr_remove(run, link);
2086 /* No existing runs have any space available. */
2088 /* Allocate a new run. */
2089 run = arena_run_alloc(arena, false, bin->run_size);
2093 /* Initialize run internals. */
2097 for (i = 0; i < (bin->nregs >> (SIZEOF_INT_2POW + 3)); i++)
2098 run->regs_mask[i] = UINT_MAX;
2099 remainder = bin->nregs % (1 << (SIZEOF_INT_2POW + 3));
2100 if (remainder != 0) {
2101 run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
2105 for (; i < REGS_MASK_NELMS; i++)
2106 run->regs_mask[i] = 0;
2108 run->regs_minelm = 0;
2110 run->nfree = bin->nregs;
2111 run->quartile = RUN_QINIT;
2112 run->free_max = bin->nregs;
2113 run->free_min = ((bin->nregs >> 2) * 3) + 1;
2115 run->magic = ARENA_RUN_MAGIC;
2120 bin->stats.curruns++;
2121 if (bin->stats.curruns > bin->stats.highruns)
2122 bin->stats.highruns = bin->stats.curruns;
2127 /* bin->runcur must have space available before this function is called. */
2128 static inline void *
2129 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
2133 assert(run->magic == ARENA_RUN_MAGIC);
2134 assert(run->nfree > 0);
2136 ret = arena_run_reg_alloc(run, bin);
2137 assert(ret != NULL);
2139 if (run->nfree < run->free_min) {
2140 /* Promote run to higher fullness quartile. */
2141 arena_bin_run_promote(arena, bin, run);
2147 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2149 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
2152 assert(bin->runcur == NULL || bin->runcur->quartile == RUN_Q100);
2154 bin->runcur = arena_bin_nonfull_run_get(arena, bin);
2155 if (bin->runcur == NULL)
2157 assert(bin->runcur->magic == ARENA_RUN_MAGIC);
2158 assert(bin->runcur->nfree > 0);
2160 return (arena_bin_malloc_easy(arena, bin, bin->runcur));
2164 arena_malloc(arena_t *arena, size_t size)
2168 assert(arena != NULL);
2169 assert(arena->magic == ARENA_MAGIC);
2171 assert(QUANTUM_CEILING(size) <= arena_maxclass);
2173 if (size <= bin_maxclass) {
2177 /* Small allocation. */
2179 if (size < small_min) {
2181 size = pow2_ceil(size);
2182 bin = &arena->bins[ffs(size >> (tiny_min_2pow + 1))];
2183 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
2185 * Bin calculation is always correct, but we may need
2186 * to fix size for the purposes of assertions and/or
2189 if (size < (1 << tiny_min_2pow))
2190 size = (1 << tiny_min_2pow);
2192 } else if (size <= small_max) {
2193 /* Quantum-spaced. */
2194 size = QUANTUM_CEILING(size);
2195 bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
2199 size = pow2_ceil(size);
2200 bin = &arena->bins[ntbins + nqbins
2201 + (ffs(size >> opt_small_max_2pow) - 2)];
2203 assert(size == bin->reg_size);
2205 malloc_mutex_lock(&arena->mtx);
2206 if ((run = bin->runcur) != NULL)
2207 ret = arena_bin_malloc_easy(arena, bin, run);
2209 ret = arena_bin_malloc_hard(arena, bin);
2212 bin->stats.nrequests++;
2215 /* Medium allocation. */
2216 size = pow2_ceil(size);
2217 malloc_mutex_lock(&arena->mtx);
2218 ret = (void *)arena_run_alloc(arena, true, size);
2220 arena->stats.large_nrequests++;
2225 arena->stats.nmalloc++;
2227 arena->stats.allocated += size;
2230 malloc_mutex_unlock(&arena->mtx);
2232 if (opt_junk && ret != NULL)
2233 memset(ret, 0xa5, size);
2234 else if (opt_zero && ret != NULL)
2235 memset(ret, 0, size);
2239 /* Return the size of the allocation pointed to by ptr. */
2241 arena_salloc(const void *ptr)
2244 arena_chunk_t *chunk;
2246 arena_chunk_map_t mapelm;
2248 assert(ptr != NULL);
2249 assert(CHUNK_ADDR2BASE(ptr) != ptr);
2252 * No arena data structures that we query here can change in a way that
2253 * affects this function, so we don't need to lock.
2255 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2256 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
2257 mapelm = chunk->map[pageind];
2258 assert(mapelm.free == false);
2259 if (mapelm.large == false) {
2262 pageind -= mapelm.pos;
2264 run = (arena_run_t *)&((char *)chunk)[pageind << pagesize_2pow];
2265 assert(run->magic == ARENA_RUN_MAGIC);
2266 ret = run->bin->reg_size;
2268 ret = mapelm.npages << pagesize_2pow;
2274 arena_ralloc(void *ptr, size_t size, size_t oldsize)
2278 /* Avoid moving the allocation if the size class would not change. */
2279 if (size < small_min) {
2280 if (oldsize < small_min &&
2281 ffs(pow2_ceil(size) >> (tiny_min_2pow + 1))
2282 == ffs(pow2_ceil(oldsize) >> (tiny_min_2pow + 1)))
2284 } else if (size <= small_max) {
2285 if (oldsize >= small_min && oldsize <= small_max &&
2286 (QUANTUM_CEILING(size) >> opt_quantum_2pow)
2287 == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
2290 if (oldsize > small_max && pow2_ceil(size) == oldsize)
2295 * If we get here, then size and oldsize are different enough that we
2296 * need to use a different size class. In that case, fall back to
2297 * allocating new space and copying.
2299 ret = arena_malloc(choose_arena(), size);
2304 memcpy(ret, ptr, size);
2306 memcpy(ret, ptr, oldsize);
2310 if (opt_junk && size < oldsize)
2311 memset(&((char *)ptr)[size], 0x5a, oldsize - size);
2312 else if (opt_zero && size > oldsize)
2313 memset(&((char *)ptr)[size], 0, size - oldsize);
2318 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
2321 arena_chunk_map_t mapelm;
2324 assert(arena != NULL);
2325 assert(arena->magic == ARENA_MAGIC);
2326 assert(chunk->arena == arena);
2327 assert(ptr != NULL);
2328 assert(CHUNK_ADDR2BASE(ptr) != ptr);
2330 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
2331 mapelm = chunk->map[pageind];
2332 assert(mapelm.free == false);
2333 if (mapelm.large == false) {
2337 /* Small allocation. */
2339 pageind -= mapelm.pos;
2341 run = (arena_run_t *)&((char *)chunk)[pageind << pagesize_2pow];
2342 assert(run->magic == ARENA_RUN_MAGIC);
2344 size = bin->reg_size;
2347 memset(ptr, 0x5a, size);
2349 malloc_mutex_lock(&arena->mtx);
2350 arena_run_reg_dalloc(run, bin, ptr, size);
2352 if (run->nfree > run->free_max) {
2353 /* Demote run to lower fullness quartile. */
2354 arena_bin_run_demote(arena, bin, run);
2357 /* Medium allocation. */
2359 size = mapelm.npages << pagesize_2pow;
2360 assert((((uintptr_t)ptr) & (size - 1)) == 0);
2363 memset(ptr, 0x5a, size);
2365 malloc_mutex_lock(&arena->mtx);
2366 arena_run_dalloc(arena, (arena_run_t *)ptr, size);
2370 arena->stats.allocated -= size;
2373 malloc_mutex_unlock(&arena->mtx);
2377 arena_new(arena_t *arena)
2381 size_t pow2_size, run_size;
2383 malloc_mutex_init(&arena->mtx);
2386 memset(&arena->stats, 0, sizeof(arena_stats_t));
2389 /* Initialize chunks. */
2390 RB_INIT(&arena->chunks);
2392 /* Initialize bins. */
2394 /* (2^n)-spaced tiny bins. */
2395 for (i = 0; i < ntbins; i++) {
2396 bin = &arena->bins[i];
2398 qr_new((arena_run_t *)&bin->runs0, link);
2399 qr_new((arena_run_t *)&bin->runs25, link);
2400 qr_new((arena_run_t *)&bin->runs50, link);
2401 qr_new((arena_run_t *)&bin->runs75, link);
2403 bin->reg_size = (1 << (tiny_min_2pow + i));
2406 * Calculate how large of a run to allocate. Make sure that at
2407 * least RUN_MIN_REGS regions fit in the run.
2409 run_size = bin->reg_size << RUN_MIN_REGS_2POW;
2410 if (run_size < pagesize)
2411 run_size = pagesize;
2412 if (run_size > (pagesize << RUN_MAX_PAGES_2POW))
2413 run_size = (pagesize << RUN_MAX_PAGES_2POW);
2414 if (run_size > arena_maxclass)
2415 run_size = arena_maxclass;
2416 bin->run_size = run_size;
2418 assert(run_size >= sizeof(arena_run_t));
2419 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size;
2420 if (bin->nregs > (REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3))) {
2421 /* Take care not to overflow regs_mask. */
2422 bin->nregs = REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3);
2424 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size);
2427 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2431 /* Quantum-spaced bins. */
2432 for (; i < ntbins + nqbins; i++) {
2433 bin = &arena->bins[i];
2435 qr_new((arena_run_t *)&bin->runs0, link);
2436 qr_new((arena_run_t *)&bin->runs25, link);
2437 qr_new((arena_run_t *)&bin->runs50, link);
2438 qr_new((arena_run_t *)&bin->runs75, link);
2440 bin->reg_size = quantum * (i - ntbins + 1);
2443 * Calculate how large of a run to allocate. Make sure that at
2444 * least RUN_MIN_REGS regions fit in the run.
2446 pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2447 run_size = (pow2_size << RUN_MIN_REGS_2POW);
2448 if (run_size < pagesize)
2449 run_size = pagesize;
2450 if (run_size > (pagesize << RUN_MAX_PAGES_2POW))
2451 run_size = (pagesize << RUN_MAX_PAGES_2POW);
2452 if (run_size > arena_maxclass)
2453 run_size = arena_maxclass;
2454 bin->run_size = run_size;
2456 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size;
2457 assert(bin->nregs <= REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3));
2458 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size);
2461 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2465 /* (2^n)-spaced sub-page bins. */
2466 for (; i < ntbins + nqbins + nsbins; i++) {
2467 bin = &arena->bins[i];
2469 qr_new((arena_run_t *)&bin->runs0, link);
2470 qr_new((arena_run_t *)&bin->runs25, link);
2471 qr_new((arena_run_t *)&bin->runs50, link);
2472 qr_new((arena_run_t *)&bin->runs75, link);
2474 bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
2477 * Calculate how large of a run to allocate. Make sure that at
2478 * least RUN_MIN_REGS regions fit in the run.
2480 run_size = bin->reg_size << RUN_MIN_REGS_2POW;
2481 if (run_size < pagesize)
2482 run_size = pagesize;
2483 if (run_size > (pagesize << RUN_MAX_PAGES_2POW))
2484 run_size = (pagesize << RUN_MAX_PAGES_2POW);
2485 if (run_size > arena_maxclass)
2486 run_size = arena_maxclass;
2487 bin->run_size = run_size;
2489 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size;
2490 assert(bin->nregs <= REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3));
2491 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size);
2494 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2499 arena->magic = ARENA_MAGIC;
2505 /* Create a new arena and insert it into the arenas array at index ind. */
2507 arenas_extend(unsigned ind)
2511 /* Allocate enough space for trailing bins. */
2512 ret = (arena_t *)base_alloc(sizeof(arena_t)
2513 + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
2514 if (ret != NULL && arena_new(ret) == false) {
2518 /* Only reached if there is an OOM error. */
2521 * OOM here is quite inconvenient to propagate, since dealing with it
2522 * would require a check for failure in the fast path. Instead, punt
2523 * by using arenas[0]. In practice, this is an extremely unlikely
2526 malloc_printf("%s: (malloc) Error initializing arena\n",
2537 /******************************************************************************/
2539 * Begin general internal functions.
2543 huge_malloc(size_t size)
2549 /* Allocate one or more contiguous chunks for this request. */
2551 csize = CHUNK_CEILING(size);
2553 /* size is large enough to cause size_t wrap-around. */
2557 /* Allocate a chunk node with which to track the chunk. */
2558 node = base_chunk_node_alloc();
2562 ret = chunk_alloc(csize);
2564 base_chunk_node_dealloc(node);
2568 /* Insert node into huge. */
2572 malloc_mutex_lock(&chunks_mtx);
2573 RB_INSERT(chunk_tree_s, &huge, node);
2576 huge_allocated += csize;
2578 malloc_mutex_unlock(&chunks_mtx);
2580 if (opt_junk && ret != NULL)
2581 memset(ret, 0xa5, csize);
2582 else if (opt_zero && ret != NULL)
2583 memset(ret, 0, csize);
2589 huge_ralloc(void *ptr, size_t size, size_t oldsize)
2593 /* Avoid moving the allocation if the size class would not change. */
2594 if (oldsize > arena_maxclass &&
2595 CHUNK_CEILING(size) == CHUNK_CEILING(oldsize))
2599 * If we get here, then size and oldsize are different enough that we
2600 * need to use a different size class. In that case, fall back to
2601 * allocating new space and copying.
2603 ret = huge_malloc(size);
2607 if (CHUNK_ADDR2BASE(ptr) == ptr) {
2608 /* The old allocation is a chunk. */
2610 memcpy(ret, ptr, size);
2612 memcpy(ret, ptr, oldsize);
2614 /* The old allocation is a region. */
2615 assert(oldsize < size);
2616 memcpy(ret, ptr, oldsize);
2623 huge_dalloc(void *ptr)
2628 malloc_mutex_lock(&chunks_mtx);
2630 /* Extract from tree of huge allocations. */
2632 node = RB_FIND(chunk_tree_s, &huge, &key);
2633 assert(node != NULL);
2634 assert(node->chunk == ptr);
2635 RB_REMOVE(chunk_tree_s, &huge, node);
2638 /* Update counters. */
2640 huge_allocated -= node->size;
2643 malloc_mutex_unlock(&chunks_mtx);
2648 memset(node->chunk, 0x5a, node->size);
2650 chunk_dealloc(node->chunk, node->size);
2652 base_chunk_node_dealloc(node);
2656 imalloc(size_t size)
2662 if (size <= arena_maxclass)
2663 ret = arena_malloc(choose_arena(), size);
2665 ret = huge_malloc(size);
2671 ipalloc(size_t alignment, size_t size)
2677 * Take advantage of the fact that for each size class, every object is
2678 * aligned at the smallest power of two that is non-zero in the base
2679 * two representation of the size. For example:
2681 * Size | Base 2 | Minimum alignment
2682 * -----+----------+------------------
2684 * 144 | 10100000 | 32
2685 * 192 | 11000000 | 64
2687 * Depending on runtime settings, it is possible that arena_malloc()
2688 * will further round up to a power of two, but that never causes
2689 * correctness issues.
2691 alloc_size = (size + (alignment - 1)) & (-alignment);
2692 if (alloc_size < size) {
2693 /* size_t overflow. */
2697 if (alloc_size <= arena_maxclass)
2698 ret = arena_malloc(choose_arena(), alloc_size);
2700 if (alignment <= chunk_size)
2701 ret = huge_malloc(size);
2703 size_t chunksize, offset;
2707 * This allocation requires alignment that is even
2708 * larger than chunk alignment. This means that
2709 * huge_malloc() isn't good enough.
2711 * Allocate almost twice as many chunks as are demanded
2712 * by the size or alignment, in order to assure the
2713 * alignment can be achieved, then unmap leading and
2717 chunksize = CHUNK_CEILING(size);
2719 if (size >= alignment)
2720 alloc_size = chunksize + alignment - chunk_size;
2722 alloc_size = (alignment << 1) - chunk_size;
2725 * Allocate a chunk node with which to track the chunk.
2727 node = base_chunk_node_alloc();
2731 ret = chunk_alloc(alloc_size);
2733 base_chunk_node_dealloc(node);
2737 offset = (uintptr_t)ret & (alignment - 1);
2738 assert(offset % chunk_size == 0);
2739 assert(offset < alloc_size);
2741 /* Trim trailing space. */
2742 chunk_dealloc((void *)((uintptr_t)ret
2743 + chunksize), alloc_size - chunksize);
2747 /* Trim leading space. */
2748 chunk_dealloc(ret, alignment - offset);
2750 ret = (void *)((uintptr_t)ret + (alignment
2753 trailsize = alloc_size - (alignment - offset)
2755 if (trailsize != 0) {
2756 /* Trim trailing space. */
2757 assert(trailsize < alloc_size);
2758 chunk_dealloc((void *)((uintptr_t)ret
2759 + chunksize), trailsize);
2763 /* Insert node into huge. */
2765 node->size = chunksize;
2767 malloc_mutex_lock(&chunks_mtx);
2768 RB_INSERT(chunk_tree_s, &huge, node);
2770 huge_allocated += size;
2772 malloc_mutex_unlock(&chunks_mtx);
2775 memset(ret, 0xa5, chunksize);
2777 memset(ret, 0, chunksize);
2781 assert(((uintptr_t)ret & (alignment - 1)) == 0);
2786 icalloc(size_t size)
2790 if (size <= arena_maxclass) {
2791 ret = arena_malloc(choose_arena(), size);
2794 memset(ret, 0, size);
2797 * The virtual memory system provides zero-filled pages, so
2798 * there is no need to do so manually, unless opt_junk is
2799 * enabled, in which case huge_malloc() fills huge allocations
2802 ret = huge_malloc(size);
2807 memset(ret, 0, size);
2809 else if ((uintptr_t)ret >= (uintptr_t)brk_base
2810 && (uintptr_t)ret < (uintptr_t)brk_max) {
2812 * This may be a re-used brk chunk. Therefore, zero
2815 memset(ret, 0, size);
2824 isalloc(const void *ptr)
2827 arena_chunk_t *chunk;
2829 assert(ptr != NULL);
2831 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2834 assert(chunk->arena->magic == ARENA_MAGIC);
2836 ret = arena_salloc(ptr);
2838 chunk_node_t *node, key;
2840 /* Chunk (huge allocation). */
2842 malloc_mutex_lock(&chunks_mtx);
2844 /* Extract from tree of huge allocations. */
2845 key.chunk = (void *)ptr;
2846 node = RB_FIND(chunk_tree_s, &huge, &key);
2847 assert(node != NULL);
2851 malloc_mutex_unlock(&chunks_mtx);
2858 iralloc(void *ptr, size_t size)
2863 assert(ptr != NULL);
2866 oldsize = isalloc(ptr);
2868 if (size <= arena_maxclass)
2869 ret = arena_ralloc(ptr, size, oldsize);
2871 ret = huge_ralloc(ptr, size, oldsize);
2879 arena_chunk_t *chunk;
2881 assert(ptr != NULL);
2883 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2887 malloc_mutex_lock(&chunk->arena->mtx);
2888 chunk->arena->stats.ndalloc++;
2889 malloc_mutex_unlock(&chunk->arena->mtx);
2891 arena_dalloc(chunk->arena, chunk, ptr);
2897 malloc_print_stats(void)
2900 if (opt_print_stats) {
2901 malloc_printf("___ Begin malloc statistics ___\n");
2902 malloc_printf("Number of CPUs: %u\n", ncpus);
2903 malloc_printf("Number of arenas: %u\n", narenas);
2904 malloc_printf("Chunk size: %zu (2^%zu)\n", chunk_size,
2906 malloc_printf("Quantum size: %zu (2^%zu)\n", quantum,
2908 malloc_printf("Max small size: %zu\n", small_max);
2909 malloc_printf("Pointer size: %u\n", sizeof(void *));
2910 malloc_printf("Assertions %s\n",
2921 size_t allocated, total;
2925 /* Calculate and print allocated/total stats. */
2928 for (i = 0, allocated = 0; i < narenas; i++) {
2929 if (arenas[i] != NULL) {
2930 malloc_mutex_lock(&arenas[i]->mtx);
2931 allocated += arenas[i]->stats.allocated;
2932 malloc_mutex_unlock(&arenas[i]->mtx);
2937 malloc_mutex_lock(&chunks_mtx);
2938 allocated += huge_allocated;
2939 total = stats_chunks.curchunks * chunk_size;
2940 malloc_mutex_unlock(&chunks_mtx);
2942 malloc_printf("Allocated: %zu, space used: %zu\n",
2945 /* Print chunk stats. */
2947 chunk_stats_t chunks_stats;
2949 malloc_mutex_lock(&chunks_mtx);
2950 chunks_stats = stats_chunks;
2951 malloc_mutex_unlock(&chunks_mtx);
2953 malloc_printf("\nchunks:\n");
2954 malloc_printf(" %13s%13s%13s\n", "nchunks",
2955 "highchunks", "curchunks");
2956 malloc_printf(" %13llu%13lu%13lu\n",
2957 chunks_stats.nchunks,
2958 chunks_stats.highchunks,
2959 chunks_stats.curchunks);
2962 /* Print chunk stats. */
2963 malloc_printf("\nhuge:\n");
2964 malloc_printf("%12s %12s %12s\n",
2965 "nmalloc", "ndalloc", "allocated");
2966 malloc_printf("%12llu %12llu %12zu\n",
2967 huge_nmalloc, huge_ndalloc, huge_allocated);
2969 /* Print stats for each arena. */
2970 for (i = 0; i < narenas; i++) {
2972 if (arena != NULL) {
2974 "\narenas[%u] statistics:\n", i);
2975 malloc_mutex_lock(&arena->mtx);
2977 malloc_mutex_unlock(&arena->mtx);
2981 #endif /* #ifdef MALLOC_STATS */
2982 malloc_printf("--- End malloc statistics ---\n");
2987 * FreeBSD's pthreads implementation calls malloc(3), so the malloc
2988 * implementation has to take pains to avoid infinite recursion during
2995 if (malloc_initialized == false)
2996 return (malloc_init_hard());
3002 malloc_init_hard(void)
3006 char buf[PATH_MAX + 1];
3009 malloc_mutex_lock(&init_lock);
3010 if (malloc_initialized) {
3012 * Another thread initialized the allocator before this one
3013 * acquired init_lock.
3015 malloc_mutex_unlock(&init_lock);
3019 /* Get number of CPUs. */
3026 len = sizeof(ncpus);
3027 if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
3033 /* Get page size. */
3037 result = sysconf(_SC_PAGESIZE);
3038 assert(result != -1);
3039 pagesize = (unsigned) result;
3042 * We assume that pagesize is a power of 2 when calculating
3045 assert(((result - 1) & result) == 0);
3046 pagesize_2pow = ffs(result) - 1;
3049 for (i = 0; i < 3; i++) {
3050 /* Get runtime configuration. */
3053 if ((linklen = readlink("/etc/malloc.conf", buf,
3054 sizeof(buf) - 1)) != -1) {
3056 * Use the contents of the "/etc/malloc.conf"
3057 * symbolic link's name.
3059 buf[linklen] = '\0';
3062 /* No configuration specified. */
3068 if (issetugid() == 0 && (opts =
3069 getenv("MALLOC_OPTIONS")) != NULL) {
3071 * Do nothing; opts is already initialized to
3072 * the value of the MALLOC_OPTIONS environment
3076 /* No configuration specified. */
3082 if (_malloc_options != NULL) {
3084 * Use options that were compiled into the program.
3086 opts = _malloc_options;
3088 /* No configuration specified. */
3098 for (j = 0; opts[j] != '\0'; j++) {
3120 * Run fullness quartile limits don't have
3121 * enough resolution if there are too few
3122 * regions for the largest bin size classes.
3124 if (opt_chunk_2pow > pagesize_2pow + 4)
3128 if (opt_chunk_2pow < CHUNK_2POW_MAX)
3132 opt_narenas_lshift--;
3135 opt_narenas_lshift++;
3138 opt_print_stats = false;
3141 opt_print_stats = true;
3144 if (opt_quantum_2pow > QUANTUM_2POW_MIN)
3148 if (opt_quantum_2pow < pagesize_2pow - 1)
3152 if (opt_small_max_2pow > QUANTUM_2POW_MIN)
3153 opt_small_max_2pow--;
3156 if (opt_small_max_2pow < pagesize_2pow - 1)
3157 opt_small_max_2pow++;
3172 opt_xmalloc = false;
3184 malloc_printf("%s: (malloc) Unsupported"
3185 " character in malloc options: '%c'\n",
3186 _getprogname(), opts[j]);
3191 /* Take care to call atexit() only once. */
3192 if (opt_print_stats) {
3193 /* Print statistics at exit. */
3194 atexit(malloc_print_stats);
3197 /* Set variables according to the value of opt_small_max_2pow. */
3198 if (opt_small_max_2pow < opt_quantum_2pow)
3199 opt_small_max_2pow = opt_quantum_2pow;
3200 small_max = (1 << opt_small_max_2pow);
3202 /* Set bin-related variables. */
3203 bin_maxclass = (pagesize >> 1);
3204 if (pagesize_2pow > RUN_MIN_REGS_2POW + 1)
3205 tiny_min_2pow = pagesize_2pow - (RUN_MIN_REGS_2POW + 1);
3208 assert(opt_quantum_2pow >= tiny_min_2pow);
3209 ntbins = opt_quantum_2pow - tiny_min_2pow;
3210 assert(ntbins <= opt_quantum_2pow);
3211 nqbins = (small_max >> opt_quantum_2pow);
3212 nsbins = pagesize_2pow - opt_small_max_2pow - 1;
3214 /* Set variables according to the value of opt_quantum_2pow. */
3215 quantum = (1 << opt_quantum_2pow);
3216 quantum_mask = quantum - 1;
3218 small_min = (quantum >> 1) + 1;
3221 assert(small_min <= quantum);
3223 /* Set variables according to the value of opt_chunk_2pow. */
3224 chunk_size = (1 << opt_chunk_2pow);
3225 chunk_size_mask = chunk_size - 1;
3226 arena_chunk_maplen = (1 << (opt_chunk_2pow - pagesize_2pow));
3227 arena_maxclass = (chunk_size >> 1);
3232 memset(&stats_chunks, 0, sizeof(chunk_stats_t));
3235 /* Various sanity checks that regard configuration. */
3236 assert(quantum >= sizeof(void *));
3237 assert(quantum <= pagesize);
3238 assert(chunk_size >= pagesize);
3239 assert(quantum * 4 <= chunk_size);
3241 /* Initialize chunks data. */
3242 malloc_mutex_init(&chunks_mtx);
3246 brk_prev = brk_base;
3254 RB_INIT(&old_chunks);
3256 /* Initialize base allocation data structures. */
3262 * Do special brk allocation here, since the base chunk doesn't really
3263 * need to be chunk-aligned.
3270 /* Get the current end of brk. */
3274 * Calculate how much padding is necessary to
3275 * chunk-align the end of brk. Don't worry about
3276 * brk_cur not being chunk-aligned though.
3278 incr = (intptr_t)chunk_size
3279 - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
3281 brk_prev = sbrk(incr);
3282 if (brk_prev == brk_cur) {
3286 } while (brk_prev != (void *)-1);
3288 base_chunk = brk_cur;
3289 base_next_addr = base_chunk;
3290 base_past_addr = (void *)((uintptr_t)base_chunk + incr);
3293 stats_chunks.nchunks = 1;
3294 stats_chunks.curchunks = 1;
3295 stats_chunks.highchunks = 1;
3300 * The first base chunk will be allocated when needed by base_alloc().
3303 base_next_addr = NULL;
3304 base_past_addr = NULL;
3306 base_chunk_nodes = NULL;
3307 malloc_mutex_init(&base_mtx);
3311 * For SMP systems, create four times as many arenas as there
3312 * are CPUs by default.
3314 opt_narenas_lshift += 2;
3317 /* Determine how many arenas to use. */
3319 if (opt_narenas_lshift > 0) {
3320 if ((narenas << opt_narenas_lshift) > narenas)
3321 narenas <<= opt_narenas_lshift;
3323 * Make sure not to exceed the limits of what base_malloc()
3326 if (narenas * sizeof(arena_t *) > chunk_size)
3327 narenas = chunk_size / sizeof(arena_t *);
3328 } else if (opt_narenas_lshift < 0) {
3329 if ((narenas << opt_narenas_lshift) < narenas)
3330 narenas <<= opt_narenas_lshift;
3331 /* Make sure there is at least one arena. */
3338 static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
3339 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
3340 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
3341 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
3342 223, 227, 229, 233, 239, 241, 251, 257, 263};
3343 unsigned i, nprimes, parenas;
3346 * Pick a prime number of hash arenas that is more than narenas
3347 * so that direct hashing of pthread_self() pointers tends to
3348 * spread allocations evenly among the arenas.
3350 assert((narenas & 1) == 0); /* narenas must be even. */
3351 nprimes = (sizeof(primes) >> SIZEOF_INT_2POW);
3352 parenas = primes[nprimes - 1]; /* In case not enough primes. */
3353 for (i = 1; i < nprimes; i++) {
3354 if (primes[i] > narenas) {
3355 parenas = primes[i];
3367 /* Allocate and initialize arenas. */
3368 arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
3369 if (arenas == NULL) {
3370 malloc_mutex_unlock(&init_lock);
3374 * Zero the array. In practice, this should always be pre-zeroed,
3375 * since it was just mmap()ed, but let's be sure.
3377 memset(arenas, 0, sizeof(arena_t *) * narenas);
3380 * Initialize one arena here. The rest are lazily created in
3381 * arena_choose_hard().
3384 if (arenas[0] == NULL) {
3385 malloc_mutex_unlock(&init_lock);
3389 malloc_mutex_init(&arenas_mtx);
3391 malloc_initialized = true;
3392 malloc_mutex_unlock(&init_lock);
3397 * End general internal functions.
3399 /******************************************************************************/
3401 * Begin malloc(3)-compatible functions.
3409 if (malloc_init()) {
3415 if (opt_sysv == false)
3423 ret = imalloc(size);
3428 malloc_printf("%s: (malloc) Error in malloc(%zu):"
3429 " out of memory\n", _getprogname(), size);
3435 UTRACE(0, size, ret);
3440 posix_memalign(void **memptr, size_t alignment, size_t size)
3448 /* Make sure that alignment is a large enough power of 2. */
3449 if (((alignment - 1) & alignment) != 0
3450 || alignment < sizeof(void *)) {
3452 malloc_printf("%s: (malloc) Error in"
3453 " posix_memalign(%zu, %zu):"
3454 " invalid alignment\n",
3455 _getprogname(), alignment, size);
3463 result = ipalloc(alignment, size);
3466 if (result == NULL) {
3468 malloc_printf("%s: (malloc) Error in"
3469 " posix_memalign(%zu, %zu): out of memory\n",
3470 _getprogname(), alignment, size);
3481 UTRACE(0, size, result);
3486 calloc(size_t num, size_t size)
3491 if (malloc_init()) {
3496 num_size = num * size;
3497 if (num_size == 0) {
3498 if ((opt_sysv == false) && ((num == 0) || (size == 0)))
3505 * Try to avoid division here. We know that it isn't possible to
3506 * overflow during multiplication if neither operand uses any of the
3507 * most significant half of the bits in a size_t.
3509 } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2)))
3510 && (num_size / size != num)) {
3511 /* size_t overflow. */
3516 ret = icalloc(num_size);
3521 malloc_printf("%s: (malloc) Error in"
3522 " calloc(%zu, %zu): out of memory\n",
3523 _getprogname(), num, size);
3529 UTRACE(0, num_size, ret);
3534 realloc(void *ptr, size_t size)
3539 if (opt_sysv == false)
3550 assert(malloc_initialized);
3552 ret = iralloc(ptr, size);
3556 malloc_printf("%s: (malloc) Error in"
3557 " realloc(%p, %zu): out of memory\n",
3558 _getprogname(), ptr, size);
3567 ret = imalloc(size);
3571 malloc_printf("%s: (malloc) Error in"
3572 " realloc(%p, %zu): out of memory\n",
3573 _getprogname(), ptr, size);
3581 UTRACE(ptr, size, ret);
3591 assert(malloc_initialized);
3598 * End malloc(3)-compatible functions.
3600 /******************************************************************************/
3602 * Begin non-standard functions.
3606 malloc_usable_size(const void *ptr)
3609 assert(ptr != NULL);
3611 return (isalloc(ptr));
3615 * End non-standard functions.
3617 /******************************************************************************/
3619 * Begin library-private functions, used by threading libraries for protection
3620 * of malloc during fork(). These functions are only called if the program is
3621 * running in threaded mode, so there is no need to check whether the program
3626 _malloc_prefork(void)
3630 /* Acquire all mutexes in a safe order. */
3632 malloc_mutex_lock(&arenas_mtx);
3633 for (i = 0; i < narenas; i++) {
3634 if (arenas[i] != NULL)
3635 malloc_mutex_lock(&arenas[i]->mtx);
3637 malloc_mutex_unlock(&arenas_mtx);
3639 malloc_mutex_lock(&base_mtx);
3641 malloc_mutex_lock(&chunks_mtx);
3645 _malloc_postfork(void)
3649 /* Release all mutexes, now that fork() has completed. */
3651 malloc_mutex_unlock(&chunks_mtx);
3653 malloc_mutex_unlock(&base_mtx);
3655 malloc_mutex_lock(&arenas_mtx);
3656 for (i = 0; i < narenas; i++) {
3657 if (arenas[i] != NULL)
3658 malloc_mutex_unlock(&arenas[i]->mtx);
3660 malloc_mutex_unlock(&arenas_mtx);
3664 * End library-private functions.
3666 /******************************************************************************/