2 * Copyright (C) 2006 Jason Evans <jasone@FreeBSD.org>.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice(s), this list of conditions and the following disclaimer as
10 * the first lines of this file unmodified other than the possible
11 * addition of one or more copyright notices.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice(s), this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *******************************************************************************
31 * This allocator implementation is designed to provide scalable performance
32 * for multi-threaded programs on multi-processor systems. The following
33 * features are included for this purpose:
35 * + Multiple arenas are used if there are multiple CPUs, which reduces lock
36 * contention and cache sloshing.
38 * + Cache line sharing between arenas is avoided for internal data
41 * + Memory is managed in chunks and runs (chunks can be split into runs using
42 * a binary buddy scheme), rather than as individual pages. This provides
43 * a constant-time mechanism for associating allocations with particular
46 * Allocation requests are rounded up to the nearest size class, and no record
47 * of the original request size is maintained. Allocations are broken into
48 * categories according to size class. Assuming runtime defaults, 4 kB pages
49 * and a 16 byte quantum, the size classes in each category are as follows:
51 * |====================================|
52 * | Category | Subcategory | Size |
53 * |====================================|
54 * | Small | Tiny | 2 |
57 * | |----------------+--------|
58 * | | Quantum-spaced | 16 |
65 * | |----------------+--------|
66 * | | Sub-page | 1 kB |
68 * |====================================|
76 * |====================================|
81 * |====================================|
83 * A different mechanism is used for each category:
85 * Small : Each size class is segregated into its own set of runs. Each run
86 * maintains a bitmap of which regions are free/allocated.
88 * Large : Each allocation is backed by a dedicated run. Metadata are stored
89 * in the associated arena chunk header maps.
91 * Huge : Each allocation is backed by a dedicated contiguous set of chunks.
92 * Metadata are stored in a separate red-black tree.
94 *******************************************************************************
98 *******************************************************************************
102 *******************************************************************************
105 /* Ring definitions. */
106 #define qr(a_type) struct { \
111 #define qr_initializer {NULL, NULL}
113 /* Ring functions. */
114 #define qr_new(a_qr, a_field) do { \
115 (a_qr)->a_field.qre_next = (a_qr); \
116 (a_qr)->a_field.qre_prev = (a_qr); \
119 #define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next)
121 #define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev)
123 #define qr_before_insert(a_qrelm, a_qr, a_field) do { \
124 (a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev; \
125 (a_qr)->a_field.qre_next = (a_qrelm); \
126 (a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr); \
127 (a_qrelm)->a_field.qre_prev = (a_qr); \
130 #define qr_after_insert(a_qrelm, a_qr, a_field) do { \
131 (a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next; \
132 (a_qr)->a_field.qre_prev = (a_qrelm); \
133 (a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr); \
134 (a_qrelm)->a_field.qre_next = (a_qr); \
137 #define qr_meld(a_qr_a, a_qr_b, a_type, a_field) do { \
139 (a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b); \
140 (a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a); \
141 t = (a_qr_a)->a_field.qre_prev; \
142 (a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev; \
143 (a_qr_b)->a_field.qre_prev = t; \
147 * qr_meld() and qr_split() are functionally equivalent, so there's no need to
148 * have two copies of the code.
150 #define qr_split(a_qr_a, a_qr_b, a_type, a_field) \
151 qr_meld((a_qr_a), (a_qr_b), a_type, a_field)
153 #define qr_remove(a_qr, a_field) do { \
154 (a_qr)->a_field.qre_prev->a_field.qre_next \
155 = (a_qr)->a_field.qre_next; \
156 (a_qr)->a_field.qre_next->a_field.qre_prev \
157 = (a_qr)->a_field.qre_prev; \
158 (a_qr)->a_field.qre_next = (a_qr); \
159 (a_qr)->a_field.qre_prev = (a_qr); \
162 #define qr_foreach(var, a_qr, a_field) \
163 for ((var) = (a_qr); \
165 (var) = (((var)->a_field.qre_next != (a_qr)) \
166 ? (var)->a_field.qre_next : NULL))
168 #define qr_reverse_foreach(var, a_qr, a_field) \
169 for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL; \
171 (var) = (((var) != (a_qr)) \
172 ? (var)->a_field.qre_prev : NULL))
174 /******************************************************************************/
177 * In order to disable various extra features that may have negative
178 * performance impacts, (assertions, expanded statistics), define
181 /* #define NO_MALLOC_EXTRAS */
183 #ifndef NO_MALLOC_EXTRAS
184 # define MALLOC_DEBUG
187 #include <sys/cdefs.h>
188 __FBSDID("$FreeBSD$");
190 #include "libc_private.h"
194 #include "spinlock.h"
195 #include "namespace.h"
196 #include <sys/mman.h>
197 #include <sys/param.h>
198 #include <sys/stddef.h>
199 #include <sys/time.h>
200 #include <sys/types.h>
201 #include <sys/sysctl.h>
202 #include <sys/tree.h>
204 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
206 #include <machine/atomic.h>
207 #include <machine/cpufunc.h>
208 #include <machine/vmparam.h>
222 #include "un-namespace.h"
225 * Calculate statistics that can be used to get an idea of how well caching is
228 #ifndef NO_MALLOC_EXTRAS
229 # define MALLOC_STATS
240 /* Disable inlining to make debugging easier. */
244 /* Size of stack-allocated buffer passed to strerror_r(). */
245 #define STRERROR_BUF 64
247 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
249 # define QUANTUM_2POW_MIN 4
250 # define SIZEOF_PTR 4
254 # define QUANTUM_2POW_MIN 4
255 # define SIZEOF_PTR 8
258 # define QUANTUM_2POW_MIN 4
259 # define SIZEOF_PTR 8
263 # define QUANTUM_2POW_MIN 4
264 # define SIZEOF_PTR 8
268 # define QUANTUM_2POW_MIN 4
269 # define SIZEOF_PTR 8
272 # define QUANTUM_2POW_MIN 3
273 # define SIZEOF_PTR 4
278 # define QUANTUM_2POW_MIN 4
279 # define SIZEOF_PTR 4
283 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
284 #ifndef SIZEOF_INT_2POW
285 # define SIZEOF_INT_2POW 2
288 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
289 #if (!defined(PIC) && !defined(NO_TLS))
294 * Size and alignment of memory chunks that are allocated by the OS's virtual
299 * 2^(pagesize_2pow - 1 + RUN_MIN_REGS_2POW) <= chunk_size <= 2^28
301 #define CHUNK_2POW_DEFAULT 21
302 #define CHUNK_2POW_MAX 28
305 * Maximum size of L1 cache line. This is used to avoid cache line aliasing,
306 * so over-estimates are okay (up to a point), but under-estimates will
307 * negatively affect performance.
309 #define CACHELINE_2POW 6
310 #define CACHELINE ((size_t)(1 << CACHELINE_2POW))
313 * Maximum size class that is a multiple of the quantum, but not (necessarily)
314 * a power of 2. Above this size, allocations are rounded up to the nearest
317 #define SMALL_MAX_2POW_DEFAULT 9
318 #define SMALL_MAX_DEFAULT (1 << SMALL_MAX_2POW_DEFAULT)
321 * Minimum number of regions that must fit into a run that serves quantum-size
324 * Note that if this is set too low, space will be wasted if there are size
325 * classes that are small enough that RUN_MIN_REGS regions don't fill a page.
326 * If this is set too high, then the overhead of searching through the bitmap
327 * that tracks region usage will become excessive.
329 #define RUN_MIN_REGS_2POW 10
330 #define RUN_MIN_REGS (1 << RUN_MIN_REGS_2POW)
333 * Maximum number of pages for a run that is used for bin allocations.
335 * Note that if this is set too low, then fragmentation for the largest bin
336 * size classes will be high. If this is set too high, then even small
337 * programs will often have to allocate more than two chunks early on.
339 #define RUN_MAX_PAGES_2POW 4
340 #define RUN_MAX_PAGES (1 << RUN_MAX_PAGES_2POW)
342 /******************************************************************************/
345 * Mutexes based on spinlocks. We can't use normal pthread mutexes, because
346 * they require malloc()ed memory.
352 /* Set to true once the allocator has been initialized. */
353 static bool malloc_initialized = false;
355 /* Used to avoid initialization races. */
356 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
358 /******************************************************************************/
360 * Statistics data structures.
365 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
366 struct malloc_bin_stats_s {
368 * Number of allocation requests that corresponded to the size of this
373 /* Total number of runs created for this bin's size class. */
377 * Total number of run promotions/demotions for this bin's size class.
382 /* High-water mark for this bin. */
383 unsigned long highruns;
385 /* Current number of runs in this bin. */
386 unsigned long curruns;
389 typedef struct arena_stats_s arena_stats_t;
390 struct arena_stats_s {
391 /* Total byte count of allocated memory, not including overhead. */
394 /* Number of times each function was called. */
399 /* Number of large allocation requests. */
400 uint64_t large_nrequests;
403 typedef struct chunk_stats_s chunk_stats_t;
404 struct chunk_stats_s {
405 /* Number of chunks that were allocated. */
408 /* High-water mark for number of chunks allocated. */
409 unsigned long highchunks;
412 * Current number of chunks allocated. This value isn't maintained for
413 * any other purpose, so keep track of it in order to be able to set
416 unsigned long curchunks;
419 #endif /* #ifdef MALLOC_STATS */
421 /******************************************************************************/
423 * Chunk data structures.
426 /* Tree of chunks. */
427 typedef struct chunk_node_s chunk_node_t;
428 struct chunk_node_s {
429 /* Linkage for the chunk tree. */
430 RB_ENTRY(chunk_node_s) link;
433 * Pointer to the chunk that this tree node is responsible for. In some
434 * (but certainly not all) cases, this data structure is placed at the
435 * beginning of the corresponding chunk, so this field may point to this
440 /* Total chunk size. */
443 typedef struct chunk_tree_s chunk_tree_t;
444 RB_HEAD(chunk_tree_s, chunk_node_s);
446 /******************************************************************************/
448 * Arena data structures.
451 typedef struct arena_s arena_t;
452 typedef struct arena_bin_s arena_bin_t;
454 typedef struct arena_chunk_map_s arena_chunk_map_t;
455 struct arena_chunk_map_s {
458 unsigned npages:15; /* Limiting factor for CHUNK_2POW_MAX. */
462 /* Arena chunk header. */
463 typedef struct arena_chunk_s arena_chunk_t;
464 struct arena_chunk_s {
465 /* Arena that owns the chunk. */
468 /* Linkage for the arena's chunk tree. */
469 RB_ENTRY(arena_chunk_s) link;
472 * Number of pages in use. This is maintained in order to make
473 * detection of empty chunks fast.
478 * Array of counters that keeps track of how many free runs of each
479 * size are available in this chunk. This table is sized at compile
480 * time, which is wasteful. However, due to unrelated rounding, this
481 * doesn't actually waste any otherwise useful space.
493 uint32_t nfree_runs[CHUNK_2POW_MAX/* - PAGE_SHIFT */];
495 /* Map of pages within chunk that keeps track of free/large/small. */
496 arena_chunk_map_t map[1]; /* Dynamically sized. */
498 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
499 RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
501 typedef struct arena_run_s arena_run_t;
503 /* Linkage for run rings. */
504 qr(arena_run_t) link;
508 # define ARENA_RUN_MAGIC 0x384adf93
511 /* Bin this run is associated with. */
514 /* Bitmask of in-use regions (0: in use, 1: free). */
515 #define REGS_MASK_NELMS \
516 (1 << (RUN_MIN_REGS_2POW - SIZEOF_INT_2POW - 2))
517 unsigned regs_mask[REGS_MASK_NELMS];
519 /* Index of first element that might have a free region. */
520 unsigned regs_minelm;
522 /* Number of free regions in run. */
526 * Current quartile for this run, one of: {RUN_QINIT, RUN_Q0, RUN_25,
527 * RUN_Q50, RUN_Q75, RUN_Q100}.
538 * Limits on the number of free regions for the fullness quartile this
539 * run is currently in. If nfree goes outside these limits, the run
540 * is moved to a different fullness quartile.
546 /* Used for run ring headers, where the run isn't actually used. */
547 typedef struct arena_run_link_s arena_run_link_t;
548 struct arena_run_link_s {
549 /* Linkage for run rings. */
550 qr(arena_run_t) link;
555 * Current run being used to service allocations of this bin's size
561 * Links into rings of runs, of various fullnesses (names indicate
562 * approximate lower bounds). A new run conceptually starts off in
563 * runsinit, and it isn't inserted into the runs0 ring until it
564 * reaches 25% full (hysteresis mechanism). For the run to be moved
565 * again, it must become either empty or 50% full. Thus, each ring
566 * contains runs that are within 50% above the advertised fullness for
567 * the ring. This provides a low-overhead mechanism for segregating
568 * runs into approximate fullness classes.
570 * Conceptually, there is a runs100 that contains completely full runs.
571 * Since we don't need to search for these runs though, no runs100 ring
572 * is actually maintained.
574 * These rings are useful when looking for an existing run to use when
575 * runcur is no longer usable. We look for usable runs in the
583 * runs75 isn't a good place to look, because it contains runs that may
584 * be nearly completely full. Still, we look there as a last resort in
585 * order to avoid allocating a new run if at all possible.
587 /* arena_run_link_t runsinit; 0% <= fullness < 25% */
588 arena_run_link_t runs0; /* 0% < fullness < 50% */
589 arena_run_link_t runs25; /* 25% < fullness < 75% */
590 arena_run_link_t runs50; /* 50% < fullness < 100% */
591 arena_run_link_t runs75; /* 75% < fullness < 100% */
592 /* arena_run_link_t runs100; fullness == 100% */
594 /* Size of regions in a run for this bin's size class. */
597 /* Total size of a run for this bin's size class. */
600 /* Total number of regions in a run for this bin's size class. */
603 /* Offset of first region in a run for this bin's size class. */
604 uint32_t reg0_offset;
607 /* Bin statistics. */
608 malloc_bin_stats_t stats;
615 # define ARENA_MAGIC 0x947d3d24
618 /* All operations on this arena require that mtx be locked. */
626 * Tree of chunks this arena manages.
628 arena_chunk_tree_t chunks;
631 * bins is used to store rings of free regions of the following sizes,
632 * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
653 arena_bin_t bins[1]; /* Dynamically sized. */
656 /******************************************************************************/
661 /* Number of CPUs. */
662 static unsigned ncpus;
665 static unsigned pagesize;
666 static unsigned pagesize_2pow;
668 /* Various bin-related settings. */
669 static size_t bin_maxclass; /* Max size class for bins. */
670 static unsigned ntbins; /* Number of (2^n)-spaced tiny bins. */
671 static unsigned nqbins; /* Number of quantum-spaced bins. */
672 static unsigned nsbins; /* Number of (2^n)-spaced sub-page bins. */
673 static size_t small_min;
674 static size_t small_max;
675 static unsigned tiny_min_2pow;
677 /* Various quantum-related settings. */
678 static size_t quantum;
679 static size_t quantum_mask; /* (quantum - 1). */
681 /* Various chunk-related settings. */
682 static size_t chunk_size;
683 static size_t chunk_size_mask; /* (chunk_size - 1). */
684 static size_t arena_maxclass; /* Max size class for arenas. */
685 static unsigned arena_chunk_maplen;
692 /* Protects chunk-related data structures. */
693 static malloc_mutex_t chunks_mtx;
695 /* Tree of chunks that are stand-alone huge allocations. */
696 static chunk_tree_t huge;
700 * Try to use brk for chunk-size allocations, due to address space constraints.
703 * Protects sbrk() calls. This must be separate from chunks_mtx, since
704 * base_chunk_alloc() also uses sbrk(), but cannot lock chunks_mtx (doing so
705 * could cause recursive lock acquisition).
707 static malloc_mutex_t brk_mtx;
708 /* Result of first sbrk(0) call. */
709 static void *brk_base;
710 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
711 static void *brk_prev;
712 /* Current upper limit on brk addresses. */
713 static void *brk_max;
718 * Byte counters for allocated/total space used by the chunks in the huge
721 static uint64_t huge_nmalloc;
722 static uint64_t huge_ndalloc;
723 static size_t huge_allocated;
727 * Tree of chunks that were previously allocated. This is used when allocating
728 * chunks, in an attempt to re-use address space.
730 static chunk_tree_t old_chunks;
732 /****************************/
734 * base (internal allocation).
738 * Current chunk that is being used for internal memory allocations. This
739 * chunk is carved up in cacheline-size quanta, so that there is no chance of
740 * false cache line sharing.
742 static void *base_chunk;
743 static void *base_next_addr;
744 static void *base_past_addr; /* Addr immediately past base_chunk. */
745 static chunk_node_t *base_chunk_nodes; /* LIFO cache of chunk nodes. */
746 static malloc_mutex_t base_mtx;
748 static uint64_t base_total;
757 * Arenas that are used to service external requests. Not all elements of the
758 * arenas array are necessarily used; arenas are created lazily as needed.
760 static arena_t **arenas;
761 static unsigned narenas;
763 static unsigned next_arena;
765 static malloc_mutex_t arenas_mtx; /* Protects arenas initialization. */
769 * Map of pthread_self() --> arenas[???], used for selecting an arena to use
772 static __thread arena_t *arenas_map;
776 /* Chunk statistics. */
777 static chunk_stats_t stats_chunks;
780 /*******************************/
782 * Runtime configuration options.
784 const char *_malloc_options;
786 #ifndef NO_MALLOC_EXTRAS
787 static bool opt_abort = true;
788 static bool opt_junk = true;
790 static bool opt_abort = false;
791 static bool opt_junk = false;
793 static bool opt_hint = false;
794 static bool opt_print_stats = false;
795 static size_t opt_quantum_2pow = QUANTUM_2POW_MIN;
796 static size_t opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
797 static size_t opt_chunk_2pow = CHUNK_2POW_DEFAULT;
798 static bool opt_utrace = false;
799 static bool opt_sysv = false;
800 static bool opt_xmalloc = false;
801 static bool opt_zero = false;
802 static int32_t opt_narenas_lshift = 0;
810 #define UTRACE(a, b, c) \
812 malloc_utrace_t ut = {a, b, c}; \
813 utrace(&ut, sizeof(ut)); \
816 /******************************************************************************/
818 * Begin function prototypes for non-inline static functions.
821 static void malloc_mutex_init(malloc_mutex_t *a_mutex);
822 static void wrtmessage(const char *p1, const char *p2, const char *p3,
824 static void malloc_printf(const char *format, ...);
825 static bool base_chunk_alloc(size_t minsize);
826 static void *base_alloc(size_t size);
827 static chunk_node_t *base_chunk_node_alloc(void);
828 static void base_chunk_node_dealloc(chunk_node_t *node);
830 static void stats_print(arena_t *arena);
832 static void *pages_map(void *addr, size_t size);
833 static void pages_unmap(void *addr, size_t size);
834 static void *chunk_alloc(size_t size);
835 static void chunk_dealloc(void *chunk, size_t size);
837 static arena_t *choose_arena_hard(void);
839 static void arena_run_split(arena_t *arena, arena_run_t *run, bool large,
841 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
842 static void arena_chunk_dealloc(arena_chunk_t *chunk);
843 static void arena_bin_run_promote(arena_t *arena, arena_bin_t *bin,
845 static void arena_bin_run_demote(arena_t *arena, arena_bin_t *bin,
847 static arena_run_t *arena_run_alloc(arena_t *arena, bool large, size_t size);
848 static void arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
849 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
850 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
851 static void *arena_malloc(arena_t *arena, size_t size);
852 static size_t arena_salloc(const void *ptr);
853 static void *arena_ralloc(void *ptr, size_t size, size_t oldsize);
854 static void arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
855 static bool arena_new(arena_t *arena);
856 static arena_t *arenas_extend(unsigned ind);
857 static void *huge_malloc(size_t size);
858 static void *huge_ralloc(void *ptr, size_t size, size_t oldsize);
859 static void huge_dalloc(void *ptr);
860 static void *imalloc(size_t size);
861 static void *ipalloc(size_t alignment, size_t size);
862 static void *icalloc(size_t size);
863 static size_t isalloc(const void *ptr);
864 static void *iralloc(void *ptr, size_t size);
865 static void idalloc(void *ptr);
866 static void malloc_print_stats(void);
867 static bool malloc_init_hard(void);
870 * End function prototypes.
872 /******************************************************************************/
878 malloc_mutex_init(malloc_mutex_t *a_mutex)
880 static const spinlock_t lock = _SPINLOCK_INITIALIZER;
882 a_mutex->lock = lock;
886 malloc_mutex_lock(malloc_mutex_t *a_mutex)
890 _SPINLOCK(&a_mutex->lock);
894 malloc_mutex_unlock(malloc_mutex_t *a_mutex)
898 _SPINUNLOCK(&a_mutex->lock);
904 /******************************************************************************/
906 * Begin Utility functions/macros.
909 /* Return the chunk address for allocation address a. */
910 #define CHUNK_ADDR2BASE(a) \
911 ((void *)((uintptr_t)(a) & ~chunk_size_mask))
913 /* Return the chunk offset of address a. */
914 #define CHUNK_ADDR2OFFSET(a) \
915 ((size_t)((uintptr_t)(a) & chunk_size_mask))
917 /* Return the smallest chunk multiple that is >= s. */
918 #define CHUNK_CEILING(s) \
919 (((s) + chunk_size_mask) & ~chunk_size_mask)
921 /* Return the smallest cacheline multiple that is >= s. */
922 #define CACHELINE_CEILING(s) \
923 (((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
925 /* Return the smallest quantum multiple that is >= a. */
926 #define QUANTUM_CEILING(a) \
927 (((a) + quantum_mask) & ~quantum_mask)
929 /* Compute the smallest power of 2 that is >= x. */
940 #if (SIZEOF_PTR == 8)
948 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
951 _write(STDERR_FILENO, p1, strlen(p1));
952 _write(STDERR_FILENO, p2, strlen(p2));
953 _write(STDERR_FILENO, p3, strlen(p3));
954 _write(STDERR_FILENO, p4, strlen(p4));
957 void (*_malloc_message)(const char *p1, const char *p2, const char *p3,
958 const char *p4) = wrtmessage;
961 * Print to stderr in such a way as to (hopefully) avoid memory allocation.
964 malloc_printf(const char *format, ...)
969 va_start(ap, format);
970 vsnprintf(buf, sizeof(buf), format, ap);
972 _malloc_message(buf, "", "", "");
975 /******************************************************************************/
978 base_chunk_alloc(size_t minsize)
981 assert(minsize <= chunk_size);
985 * Do special brk allocation here, since the base chunk doesn't really
986 * need to be chunk-aligned.
988 if (brk_prev != (void *)-1) {
992 malloc_mutex_lock(&brk_mtx);
994 /* Get the current end of brk. */
998 * Calculate how much padding is necessary to
999 * chunk-align the end of brk. Don't worry about
1000 * brk_cur not being chunk-aligned though.
1002 incr = (intptr_t)chunk_size
1003 - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1007 brk_prev = sbrk(incr);
1008 if (brk_prev == brk_cur) {
1010 malloc_mutex_unlock(&brk_mtx);
1011 base_chunk = brk_cur;
1012 base_next_addr = base_chunk;
1013 base_past_addr = (void *)((uintptr_t)base_chunk +
1020 } while (brk_prev != (void *)-1);
1021 malloc_mutex_unlock(&brk_mtx);
1026 * Don't worry about chunk alignment here, since base_chunk doesn't really
1027 * need to be aligned.
1029 base_chunk = pages_map(NULL, chunk_size);
1030 if (base_chunk == NULL)
1032 base_next_addr = base_chunk;
1033 base_past_addr = (void *)((uintptr_t)base_chunk + chunk_size);
1035 base_total += chunk_size;
1041 base_alloc(size_t size)
1046 /* Round size up to nearest multiple of the cacheline size. */
1047 csize = CACHELINE_CEILING(size);
1049 malloc_mutex_lock(&base_mtx);
1051 /* Make sure there's enough space for the allocation. */
1052 if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1053 if (base_chunk_alloc(csize)) {
1060 ret = base_next_addr;
1061 base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1064 malloc_mutex_unlock(&base_mtx);
1068 static chunk_node_t *
1069 base_chunk_node_alloc(void)
1073 malloc_mutex_lock(&base_mtx);
1074 if (base_chunk_nodes != NULL) {
1075 ret = base_chunk_nodes;
1076 base_chunk_nodes = *(chunk_node_t **)ret;
1077 malloc_mutex_unlock(&base_mtx);
1079 malloc_mutex_unlock(&base_mtx);
1080 ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
1087 base_chunk_node_dealloc(chunk_node_t *node)
1090 malloc_mutex_lock(&base_mtx);
1091 *(chunk_node_t **)node = base_chunk_nodes;
1092 base_chunk_nodes = node;
1093 malloc_mutex_unlock(&base_mtx);
1096 /******************************************************************************/
1100 stats_print(arena_t *arena)
1105 malloc_printf("allocated: %zu\n", arena->stats.allocated);
1107 malloc_printf("calls:\n");
1108 malloc_printf(" %12s %12s %12s\n", "nmalloc","ndalloc", "nmadvise");
1109 malloc_printf(" %12llu %12llu %12llu\n",
1110 arena->stats.nmalloc, arena->stats.ndalloc, arena->stats.nmadvise);
1112 malloc_printf("large requests: %llu\n", arena->stats.large_nrequests);
1114 malloc_printf("bins:\n");
1115 malloc_printf("%13s %1s %4s %5s %6s %9s %5s %6s %7s %6s %6s\n",
1116 "bin", "", "size", "nregs", "run_sz", "nrequests", "nruns",
1117 "hiruns", "curruns", "npromo", "ndemo");
1118 for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
1119 if (arena->bins[i].stats.nrequests == 0) {
1120 if (gap_start == -1)
1123 if (gap_start != -1) {
1124 if (i > gap_start + 1) {
1125 /* Gap of more than one size class. */
1126 malloc_printf("[%u..%u]\n",
1129 /* Gap of one size class. */
1130 malloc_printf("[%u]\n", gap_start);
1135 "%13u %1s %4u %5u %6u %9llu %5llu"
1136 " %6lu %7lu %6llu %6llu\n",
1138 i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1139 arena->bins[i].reg_size,
1140 arena->bins[i].nregs,
1141 arena->bins[i].run_size,
1142 arena->bins[i].stats.nrequests,
1143 arena->bins[i].stats.nruns,
1144 arena->bins[i].stats.highruns,
1145 arena->bins[i].stats.curruns,
1146 arena->bins[i].stats.npromote,
1147 arena->bins[i].stats.ndemote);
1150 if (gap_start != -1) {
1151 if (i > gap_start + 1) {
1152 /* Gap of more than one size class. */
1153 malloc_printf("[%u..%u]\n", gap_start, i - 1);
1155 /* Gap of one size class. */
1156 malloc_printf("[%u]\n", gap_start);
1163 * End Utility functions/macros.
1165 /******************************************************************************/
1167 * Begin chunk management functions.
1171 chunk_comp(chunk_node_t *a, chunk_node_t *b)
1177 if ((uintptr_t)a->chunk < (uintptr_t)b->chunk)
1179 else if (a->chunk == b->chunk)
1185 /* Generate red-black tree code for chunks. */
1186 RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp);
1189 pages_map(void *addr, size_t size)
1194 * We don't use MAP_FIXED here, because it can cause the *replacement*
1195 * of existing mappings, and we only want to create new mappings.
1197 ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
1199 assert(ret != NULL);
1201 if (ret == MAP_FAILED)
1203 else if (addr != NULL && ret != addr) {
1205 * We succeeded in mapping memory, but not in the right place.
1207 if (munmap(ret, size) == -1) {
1208 char buf[STRERROR_BUF];
1210 strerror_r(errno, buf, sizeof(buf));
1211 malloc_printf("%s: (malloc) Error in munmap(): %s\n",
1212 _getprogname(), buf);
1219 assert(ret == NULL || (addr == NULL && ret != addr)
1220 || (addr != NULL && ret == addr));
1225 pages_unmap(void *addr, size_t size)
1228 if (munmap(addr, size) == -1) {
1229 char buf[STRERROR_BUF];
1231 strerror_r(errno, buf, sizeof(buf));
1232 malloc_printf("%s: (malloc) Error in munmap(): %s\n",
1233 _getprogname(), buf);
1240 chunk_alloc(size_t size)
1243 chunk_node_t *tchunk, *delchunk;
1246 assert(size % chunk_size == 0);
1248 malloc_mutex_lock(&chunks_mtx);
1250 if (size == chunk_size) {
1252 * Check for address ranges that were previously chunks and try
1256 tchunk = RB_MIN(chunk_tree_s, &old_chunks);
1257 while (tchunk != NULL) {
1258 /* Found an address range. Try to recycle it. */
1260 chunk = tchunk->chunk;
1262 tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1264 /* Remove delchunk from the tree. */
1265 RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1266 base_chunk_node_dealloc(delchunk);
1269 if ((uintptr_t)chunk >= (uintptr_t)brk_base
1270 && (uintptr_t)chunk < (uintptr_t)brk_max) {
1271 /* Re-use a previously freed brk chunk. */
1276 if ((ret = pages_map(chunk, size)) != NULL) {
1285 * Try to create allocations in brk, in order to make full use of
1286 * limited address space.
1288 if (brk_prev != (void *)-1) {
1293 * The loop is necessary to recover from races with other
1294 * threads that are using brk for something other than malloc.
1296 malloc_mutex_lock(&brk_mtx);
1298 /* Get the current end of brk. */
1302 * Calculate how much padding is necessary to
1303 * chunk-align the end of brk.
1305 incr = (intptr_t)size
1306 - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1310 ret = (void *)(intptr_t)brk_cur + incr;
1314 brk_prev = sbrk(incr);
1315 if (brk_prev == brk_cur) {
1317 malloc_mutex_unlock(&brk_mtx);
1318 brk_max = (void *)(intptr_t)ret + size;
1321 } while (brk_prev != (void *)-1);
1322 malloc_mutex_unlock(&brk_mtx);
1327 * Try to over-allocate, but allow the OS to place the allocation
1328 * anywhere. Beware of size_t wrap-around.
1330 if (size + chunk_size > size) {
1331 if ((ret = pages_map(NULL, size + chunk_size)) != NULL) {
1332 size_t offset = CHUNK_ADDR2OFFSET(ret);
1335 * Success. Clean up unneeded leading/trailing space.
1338 /* Leading space. */
1339 pages_unmap(ret, chunk_size - offset);
1341 ret = (void *)((uintptr_t)ret + (chunk_size -
1344 /* Trailing space. */
1345 pages_unmap((void *)((uintptr_t)ret + size),
1348 /* Trailing space only. */
1349 pages_unmap((void *)((uintptr_t)ret + size),
1356 /* All strategies for allocation failed. */
1361 stats_chunks.nchunks += (size / chunk_size);
1362 stats_chunks.curchunks += (size / chunk_size);
1364 if (stats_chunks.curchunks > stats_chunks.highchunks)
1365 stats_chunks.highchunks = stats_chunks.curchunks;
1367 malloc_mutex_unlock(&chunks_mtx);
1369 assert(CHUNK_ADDR2BASE(ret) == ret);
1374 chunk_dealloc(void *chunk, size_t size)
1380 assert(chunk != NULL);
1381 assert(CHUNK_ADDR2BASE(chunk) == chunk);
1383 assert(size % chunk_size == 0);
1385 malloc_mutex_lock(&chunks_mtx);
1388 if ((uintptr_t)chunk >= (uintptr_t)brk_base
1389 && (uintptr_t)chunk < (uintptr_t)brk_max) {
1392 malloc_mutex_lock(&brk_mtx);
1393 /* Get the current end of brk. */
1397 * Try to shrink the data segment if this chunk is at the end
1398 * of the data segment. The sbrk() call here is subject to a
1399 * race condition with threads that use brk(2) or sbrk(2)
1400 * directly, but the alternative would be to leak memory for
1401 * the sake of poorly designed multi-threaded programs.
1403 if (brk_cur == brk_max
1404 && (void *)(uintptr_t)chunk + size == brk_max
1405 && sbrk(-(intptr_t)size) == brk_max) {
1406 malloc_mutex_unlock(&brk_mtx);
1407 if (brk_prev == brk_max) {
1409 brk_prev = (void *)(intptr_t)brk_max
1415 malloc_mutex_unlock(&brk_mtx);
1416 madvise(chunk, size, MADV_FREE);
1419 pages_unmap(chunk, size);
1422 * Iteratively create records of each chunk-sized memory region that
1423 * 'chunk' is comprised of, so that the address range can be recycled
1424 * if memory usage increases later on.
1426 for (offset = 0; offset < size; offset += chunk_size) {
1428 * It is possible for chunk to overlap existing entries in
1429 * old_chunks if it is a huge allocation, so take care to not
1432 key.chunk = (void *)((uintptr_t)chunk + (uintptr_t)offset);
1433 if (RB_FIND(chunk_tree_s, &old_chunks, &key) == NULL) {
1434 node = base_chunk_node_alloc();
1438 node->chunk = key.chunk;
1439 node->size = chunk_size;
1440 RB_INSERT(chunk_tree_s, &old_chunks, node);
1448 stats_chunks.curchunks -= (size / chunk_size);
1450 malloc_mutex_unlock(&chunks_mtx);
1454 * End chunk management functions.
1456 /******************************************************************************/
1462 * Choose an arena based on a per-thread value (fast-path code, calls slow-path
1463 * code if necessary.
1465 static inline arena_t *
1471 * We can only use TLS if this is a PIC library, since for the static
1472 * library version, libc's malloc is used by TLS allocation, which
1473 * introduces a bootstrapping issue.
1476 if (__isthreaded == false) {
1478 * Avoid the overhead of TLS for single-threaded operation. If the
1479 * app switches to threaded mode, the initial thread may end up
1480 * being assigned to some other arena, but this one-time switch
1481 * shouldn't cause significant issues.
1488 ret = choose_arena_hard();
1494 * Hash _pthread_self() to one of the arenas. There is a prime
1495 * number of arenas, so this has a reasonable chance of
1496 * working. Even so, the hashing can be easily thwarted by
1497 * inconvenient _pthread_self() values. Without specific
1498 * knowledge of how _pthread_self() calculates values, we can't
1499 * easily do much better than this.
1501 ind = (unsigned long) _pthread_self() % narenas;
1504 * Optimistially assume that arenas[ind] has been initialized.
1505 * At worst, we find out that some other thread has already
1506 * done so, after acquiring the lock in preparation. Note that
1507 * this lazy locking also has the effect of lazily forcing
1508 * cache coherency; without the lock acquisition, there's no
1509 * guarantee that modification of arenas[ind] by another thread
1510 * would be seen on this CPU for an arbitrary amount of time.
1512 * In general, this approach to modifying a synchronized value
1513 * isn't a good idea, but in this case we only ever modify the
1514 * value once, so things work out well.
1519 * Avoid races with another thread that may have already
1520 * initialized arenas[ind].
1522 malloc_mutex_lock(&arenas_mtx);
1523 if (arenas[ind] == NULL)
1524 ret = arenas_extend((unsigned)ind);
1527 malloc_mutex_unlock(&arenas_mtx);
1533 assert(ret != NULL);
1539 * Choose an arena based on a per-thread value (slow-path code only, called
1540 * only by choose_arena()).
1543 choose_arena_hard(void)
1547 assert(__isthreaded);
1549 /* Assign one of the arenas to this thread, in a round-robin fashion. */
1550 malloc_mutex_lock(&arenas_mtx);
1551 ret = arenas[next_arena];
1553 ret = arenas_extend(next_arena);
1556 * Make sure that this function never returns NULL, so that
1557 * choose_arena() doesn't have to check for a NULL return
1562 next_arena = (next_arena + 1) % narenas;
1563 malloc_mutex_unlock(&arenas_mtx);
1571 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
1577 if ((uintptr_t)a < (uintptr_t)b)
1585 /* Generate red-black tree code for arena chunks. */
1586 RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp);
1588 static inline void *
1589 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
1592 unsigned i, mask, bit, regind;
1594 assert(run->magic == ARENA_RUN_MAGIC);
1596 for (i = run->regs_minelm; i < REGS_MASK_NELMS; i++) {
1597 mask = run->regs_mask[i];
1599 /* Usable allocation found. */
1600 bit = ffs(mask) - 1;
1602 regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1603 ret = (void *)&((char *)run)[bin->reg0_offset
1604 + (bin->reg_size * regind)];
1608 run->regs_mask[i] = mask;
1613 * Make a note that nothing before this element
1614 * contains a free region.
1616 run->regs_minelm = i + 1;
1625 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
1628 * To divide by a number D that is not a power of two we multiply
1629 * by (2^21 / D) and then right shift by 21 positions.
1635 * (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
1637 #define SIZE_INV_SHIFT 21
1638 #define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
1639 static const unsigned size_invs[] = {
1641 SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
1642 SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
1643 SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
1644 SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
1645 SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
1646 SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
1647 SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
1648 #if (QUANTUM_2POW_MIN < 4)
1650 SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
1651 SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
1652 SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
1653 SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
1654 SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
1655 SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
1656 SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
1657 SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
1660 unsigned diff, regind, elm, bit;
1662 assert(run->magic == ARENA_RUN_MAGIC);
1663 assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
1664 >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
1667 * Avoid doing division with a variable divisor if possible. Using
1668 * actual division here can reduce allocator throughput by over 20%!
1670 diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
1671 if ((size & (size - 1)) == 0) {
1673 * log2_table allows fast division of a power of two in the
1676 * (x / divisor) becomes (x >> log2_table[divisor - 1]).
1678 static const unsigned char log2_table[] = {
1679 0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
1680 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
1681 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1682 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
1683 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1684 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1685 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1686 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
1690 regind = (diff >> log2_table[size - 1]);
1691 else if (size <= 32768)
1692 regind = diff >> (8 + log2_table[(size >> 8) - 1]);
1695 * The page size is too large for us to use the lookup
1696 * table. Use real division.
1698 regind = diff / size;
1700 } else if (size <= ((sizeof(size_invs) / sizeof(unsigned))
1701 << QUANTUM_2POW_MIN) + 2) {
1702 regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
1703 regind >>= SIZE_INV_SHIFT;
1706 * size_invs isn't large enough to handle this size class, so
1707 * calculate regind using actual division. This only happens
1708 * if the user increases small_max via the 'S' runtime
1709 * configuration option.
1711 regind = diff / size;
1713 assert(diff == regind * size);
1714 assert(regind < bin->nregs);
1716 elm = regind >> (SIZEOF_INT_2POW + 3);
1717 if (elm < run->regs_minelm)
1718 run->regs_minelm = elm;
1719 bit = regind - (elm << (SIZEOF_INT_2POW + 3));
1720 assert((run->regs_mask[elm] & (1 << bit)) == 0);
1721 run->regs_mask[elm] |= (1 << bit);
1723 #undef SIZE_INV_SHIFT
1727 arena_run_split(arena_t *arena, arena_run_t *run, bool large, size_t size)
1729 arena_chunk_t *chunk;
1730 unsigned run_ind, map_offset, total_pages, need_pages;
1731 unsigned i, log2_run_pages, run_pages;
1733 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1734 run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1736 assert(chunk->map[run_ind].free);
1737 total_pages = chunk->map[run_ind].npages;
1738 need_pages = (size >> pagesize_2pow);
1740 assert(chunk->map[run_ind].free);
1741 assert(chunk->map[run_ind].large == false);
1742 assert(chunk->map[run_ind].npages == total_pages);
1744 /* Split enough pages from the front of run to fit allocation size. */
1745 map_offset = run_ind;
1746 for (i = 0; i < need_pages; i++) {
1747 chunk->map[map_offset + i].free = false;
1748 chunk->map[map_offset + i].large = large;
1749 chunk->map[map_offset + i].npages = need_pages;
1750 chunk->map[map_offset + i].pos = i;
1753 /* Update map for trailing pages. */
1754 map_offset += need_pages;
1755 while (map_offset < run_ind + total_pages) {
1756 log2_run_pages = ffs(map_offset) - 1;
1757 run_pages = (1 << log2_run_pages);
1759 chunk->map[map_offset].free = true;
1760 chunk->map[map_offset].large = false;
1761 chunk->map[map_offset].npages = run_pages;
1763 chunk->nfree_runs[log2_run_pages]++;
1765 map_offset += run_pages;
1768 chunk->pages_used += (size >> pagesize_2pow);
1771 static arena_chunk_t *
1772 arena_chunk_alloc(arena_t *arena)
1774 arena_chunk_t *chunk;
1775 unsigned i, j, header_npages, pow2_header_npages, map_offset;
1776 unsigned log2_run_pages, run_pages;
1779 chunk = (arena_chunk_t *)chunk_alloc(chunk_size);
1783 chunk->arena = arena;
1785 RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1788 * Claim that no pages are in use, since the header is merely overhead.
1790 chunk->pages_used = 0;
1792 memset(&chunk->nfree_runs, 0, sizeof(chunk->nfree_runs));
1794 header_size = (size_t)((uintptr_t)&chunk->map[arena_chunk_maplen]
1795 - (uintptr_t)chunk);
1796 if (header_size % pagesize != 0) {
1797 /* Round up to the nearest page boundary. */
1798 header_size += pagesize - (header_size % pagesize);
1801 header_npages = header_size >> pagesize_2pow;
1802 pow2_header_npages = pow2_ceil(header_npages);
1805 * Iteratively mark runs as in use, until we've spoken for the entire
1809 for (i = 0; header_npages > 0; i++) {
1810 if ((pow2_header_npages >> i) <= header_npages) {
1811 for (j = 0; j < (pow2_header_npages >> i); j++) {
1812 chunk->map[map_offset + j].free = false;
1813 chunk->map[map_offset + j].large = false;
1814 chunk->map[map_offset + j].npages =
1815 (pow2_header_npages >> i);
1816 chunk->map[map_offset + j].pos = j;
1818 header_npages -= (pow2_header_npages >> i);
1819 map_offset += (pow2_header_npages >> i);
1824 * Finish initializing map. The chunk header takes up some space at
1825 * the beginning of the chunk, which we just took care of by
1826 * "allocating" the leading pages.
1828 while (map_offset < (chunk_size >> pagesize_2pow)) {
1829 log2_run_pages = ffs(map_offset) - 1;
1830 run_pages = (1 << log2_run_pages);
1832 chunk->map[map_offset].free = true;
1833 chunk->map[map_offset].large = false;
1834 chunk->map[map_offset].npages = run_pages;
1836 chunk->nfree_runs[log2_run_pages]++;
1838 map_offset += run_pages;
1845 arena_chunk_dealloc(arena_chunk_t *chunk)
1848 RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
1850 chunk_dealloc((void *)chunk, chunk_size);
1854 arena_bin_run_promote(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
1857 assert(bin == run->bin);
1860 assert(run->free_min > run->nfree);
1861 assert(run->quartile < RUN_Q100);
1864 bin->stats.npromote++;
1868 switch (run->quartile) {
1873 qr_before_insert((arena_run_t *)&bin->runs0, run, link);
1874 run->free_max = bin->nregs - 1;
1875 run->free_min = (bin->nregs >> 1) + 1;
1876 assert(run->nfree <= run->free_max);
1877 assert(run->nfree >= run->free_min);
1880 qr_remove(run, link);
1881 qr_before_insert((arena_run_t *)&bin->runs25, run,
1883 run->free_max = ((bin->nregs >> 2) * 3) - 1;
1884 run->free_min = (bin->nregs >> 2) + 1;
1885 assert(run->nfree <= run->free_max);
1886 assert(run->nfree >= run->free_min);
1889 qr_remove(run, link);
1890 qr_before_insert((arena_run_t *)&bin->runs50, run,
1892 run->free_max = (bin->nregs >> 1) - 1;
1894 assert(run->nfree <= run->free_max);
1895 assert(run->nfree >= run->free_min);
1899 * Skip RUN_Q75 during promotion from RUN_Q50.
1900 * Separate handling of RUN_Q75 and RUN_Q100 allows us
1901 * to keep completely full runs in RUN_Q100, thus
1902 * guaranteeing that runs in RUN_Q75 are only mostly
1903 * full. This provides a method for avoiding a linear
1904 * search for non-full runs, which avoids some
1905 * pathological edge cases.
1910 qr_remove(run, link);
1911 assert(bin->runcur == run);
1915 assert(run->nfree <= run->free_max);
1916 assert(run->nfree >= run->free_min);
1925 arena_bin_run_demote(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
1928 assert(bin == run->bin);
1931 assert(run->free_max < run->nfree);
1932 assert(run->quartile > RUN_QINIT);
1935 bin->stats.ndemote++;
1939 switch (run->quartile) {
1941 qr_remove(run, link);
1943 bin->stats.curruns--;
1945 if (bin->runcur == run)
1950 arena_run_dalloc(arena, run, bin->run_size);
1953 qr_remove(run, link);
1954 qr_before_insert((arena_run_t *)&bin->runs0, run, link);
1955 run->free_max = bin->nregs - 1;
1956 run->free_min = (bin->nregs >> 1) + 1;
1957 assert(run->nfree <= run->free_max);
1958 assert(run->nfree >= run->free_min);
1961 qr_remove(run, link);
1962 qr_before_insert((arena_run_t *)&bin->runs25, run,
1964 run->free_max = ((bin->nregs >> 2) * 3) - 1;
1965 run->free_min = (bin->nregs >> 2) + 1;
1966 assert(run->nfree <= run->free_max);
1967 assert(run->nfree >= run->free_min);
1970 qr_remove(run, link);
1971 qr_before_insert((arena_run_t *)&bin->runs50, run,
1973 run->free_max = (bin->nregs >> 1) - 1;
1975 assert(run->nfree <= run->free_max);
1976 assert(run->nfree >= run->free_min);
1979 qr_before_insert((arena_run_t *)&bin->runs75, run,
1981 run->free_max = (bin->nregs >> 2) - 1;
1983 assert(run->nfree <= run->free_max);
1984 assert(run->nfree >= run->free_min);
1993 static arena_run_t *
1994 arena_run_alloc(arena_t *arena, bool large, size_t size)
1997 unsigned min_ind, i, j;
1998 arena_chunk_t *chunk;
2003 assert(size <= arena_maxclass);
2012 * Search through arena's chunks in address order for a run that is
2013 * large enough. Look for a precise fit, but do not pass up a chunk
2014 * that has a run which is large enough to split.
2016 min_ind = ffs(size >> pagesize_2pow) - 1;
2017 RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
2019 i < (opt_chunk_2pow - pagesize_2pow);
2021 if (chunk->nfree_runs[i] > 0) {
2022 arena_chunk_map_t *map = chunk->map;
2024 /* Scan chunk's map for free run. */
2026 j < arena_chunk_maplen;
2027 j += map[j].npages) {
2029 && map[j].npages == (1 << i))
2030 {/*<----------------------------*/
2031 run = (arena_run_t *)&((char *)chunk)[j
2034 assert(chunk->nfree_runs[i] > 0);
2035 chunk->nfree_runs[i]--;
2037 /* Update page map. */
2038 arena_run_split(arena, run, large, size);
2041 }/*---------------------------->*/
2049 /* No usable runs. Allocate a new chunk, then try again. */
2050 if (arena_chunk_alloc(arena) == NULL)
2056 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
2058 arena_chunk_t *chunk;
2059 unsigned run_ind, buddy_ind, base_run_ind, run_pages, log2_run_pages;
2061 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2062 run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
2064 run_pages = (size >> pagesize_2pow);
2065 log2_run_pages = ffs(run_pages) - 1;
2066 assert(run_pages > 0);
2068 /* Subtract pages from count of pages used in chunk. */
2069 chunk->pages_used -= run_pages;
2071 /* Mark run as deallocated. */
2072 chunk->map[run_ind].free = true;
2073 chunk->map[run_ind].large = false;
2074 chunk->map[run_ind].npages = run_pages;
2077 * Tell the kernel that we don't need the data in this run, but only if
2078 * requested via runtime configuration.
2081 madvise(run, size, MADV_FREE);
2083 arena->stats.nmadvise += (size >> pagesize_2pow);
2088 * Iteratively coalesce with buddies. Conceptually, the buddy scheme
2089 * induces a tree on the set of pages. If we know the number of pages
2090 * in the subtree rooted at the current node, we can quickly determine
2091 * whether a run is the left or right buddy, and then calculate the
2095 (run_pages = (1 << log2_run_pages)) < arena_chunk_maplen;
2097 if (((run_ind >> log2_run_pages) & 1) == 0) {
2098 /* Current run precedes its buddy. */
2099 buddy_ind = run_ind + run_pages;
2100 base_run_ind = run_ind;
2102 /* Current run follows its buddy. */
2103 buddy_ind = run_ind - run_pages;
2104 base_run_ind = buddy_ind;
2107 if (chunk->map[buddy_ind].free == false
2108 || chunk->map[buddy_ind].npages != run_pages)
2111 assert(chunk->nfree_runs[log2_run_pages] > 0);
2112 chunk->nfree_runs[log2_run_pages]--;
2115 chunk->map[base_run_ind].npages = (run_pages << 1);
2117 /* Update run_ind to be the beginning of the coalesced run. */
2118 run_ind = base_run_ind;
2121 chunk->nfree_runs[log2_run_pages]++;
2123 /* Free pages, to the extent possible. */
2124 if (chunk->pages_used == 0) {
2125 /* This chunk is completely unused now, so deallocate it. */
2126 arena_chunk_dealloc(chunk);
2130 static arena_run_t *
2131 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2134 unsigned i, remainder;
2136 /* Look for a usable run. */
2137 if ((run = qr_next((arena_run_t *)&bin->runs50, link))
2138 != (arena_run_t *)&bin->runs50
2139 || (run = qr_next((arena_run_t *)&bin->runs25, link))
2140 != (arena_run_t *)&bin->runs25
2141 || (run = qr_next((arena_run_t *)&bin->runs0, link))
2142 != (arena_run_t *)&bin->runs0
2143 || (run = qr_next((arena_run_t *)&bin->runs75, link))
2144 != (arena_run_t *)&bin->runs75) {
2145 /* run is guaranteed to have available space. */
2146 qr_remove(run, link);
2149 /* No existing runs have any space available. */
2151 /* Allocate a new run. */
2152 run = arena_run_alloc(arena, false, bin->run_size);
2156 /* Initialize run internals. */
2160 for (i = 0; i < (bin->nregs >> (SIZEOF_INT_2POW + 3)); i++)
2161 run->regs_mask[i] = UINT_MAX;
2162 remainder = bin->nregs % (1 << (SIZEOF_INT_2POW + 3));
2163 if (remainder != 0) {
2164 run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
2168 for (; i < REGS_MASK_NELMS; i++)
2169 run->regs_mask[i] = 0;
2171 run->regs_minelm = 0;
2173 run->nfree = bin->nregs;
2174 run->quartile = RUN_QINIT;
2175 run->free_max = bin->nregs;
2176 run->free_min = ((bin->nregs >> 2) * 3) + 1;
2178 run->magic = ARENA_RUN_MAGIC;
2183 bin->stats.curruns++;
2184 if (bin->stats.curruns > bin->stats.highruns)
2185 bin->stats.highruns = bin->stats.curruns;
2190 /* bin->runcur must have space available before this function is called. */
2191 static inline void *
2192 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
2196 assert(run->magic == ARENA_RUN_MAGIC);
2197 assert(run->nfree > 0);
2199 ret = arena_run_reg_alloc(run, bin);
2200 assert(ret != NULL);
2202 if (run->nfree < run->free_min) {
2203 /* Promote run to higher fullness quartile. */
2204 arena_bin_run_promote(arena, bin, run);
2210 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2212 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
2215 assert(bin->runcur == NULL || bin->runcur->quartile == RUN_Q100);
2217 bin->runcur = arena_bin_nonfull_run_get(arena, bin);
2218 if (bin->runcur == NULL)
2220 assert(bin->runcur->magic == ARENA_RUN_MAGIC);
2221 assert(bin->runcur->nfree > 0);
2223 return (arena_bin_malloc_easy(arena, bin, bin->runcur));
2227 arena_malloc(arena_t *arena, size_t size)
2231 assert(arena != NULL);
2232 assert(arena->magic == ARENA_MAGIC);
2234 assert(QUANTUM_CEILING(size) <= arena_maxclass);
2236 if (size <= bin_maxclass) {
2240 /* Small allocation. */
2242 if (size < small_min) {
2244 size = pow2_ceil(size);
2245 bin = &arena->bins[ffs(size >> (tiny_min_2pow + 1))];
2246 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
2248 * Bin calculation is always correct, but we may need
2249 * to fix size for the purposes of assertions and/or
2252 if (size < (1 << tiny_min_2pow))
2253 size = (1 << tiny_min_2pow);
2255 } else if (size <= small_max) {
2256 /* Quantum-spaced. */
2257 size = QUANTUM_CEILING(size);
2258 bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
2262 size = pow2_ceil(size);
2263 bin = &arena->bins[ntbins + nqbins
2264 + (ffs(size >> opt_small_max_2pow) - 2)];
2266 assert(size == bin->reg_size);
2268 malloc_mutex_lock(&arena->mtx);
2269 if ((run = bin->runcur) != NULL)
2270 ret = arena_bin_malloc_easy(arena, bin, run);
2272 ret = arena_bin_malloc_hard(arena, bin);
2275 bin->stats.nrequests++;
2278 /* Medium allocation. */
2279 size = pow2_ceil(size);
2280 malloc_mutex_lock(&arena->mtx);
2281 ret = (void *)arena_run_alloc(arena, true, size);
2283 arena->stats.large_nrequests++;
2288 arena->stats.nmalloc++;
2290 arena->stats.allocated += size;
2293 malloc_mutex_unlock(&arena->mtx);
2295 if (opt_junk && ret != NULL)
2296 memset(ret, 0xa5, size);
2297 else if (opt_zero && ret != NULL)
2298 memset(ret, 0, size);
2302 /* Return the size of the allocation pointed to by ptr. */
2304 arena_salloc(const void *ptr)
2307 arena_chunk_t *chunk;
2309 arena_chunk_map_t mapelm;
2311 assert(ptr != NULL);
2312 assert(CHUNK_ADDR2BASE(ptr) != ptr);
2315 * No arena data structures that we query here can change in a way that
2316 * affects this function, so we don't need to lock.
2318 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2319 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
2320 mapelm = chunk->map[pageind];
2321 assert(mapelm.free == false);
2322 if (mapelm.large == false) {
2325 pageind -= mapelm.pos;
2327 run = (arena_run_t *)&((char *)chunk)[pageind << pagesize_2pow];
2328 assert(run->magic == ARENA_RUN_MAGIC);
2329 ret = run->bin->reg_size;
2331 ret = mapelm.npages << pagesize_2pow;
2337 arena_ralloc(void *ptr, size_t size, size_t oldsize)
2341 /* Avoid moving the allocation if the size class would not change. */
2342 if (size < small_min) {
2343 if (oldsize < small_min &&
2344 ffs(pow2_ceil(size) >> (tiny_min_2pow + 1))
2345 == ffs(pow2_ceil(oldsize) >> (tiny_min_2pow + 1)))
2347 } else if (size <= small_max) {
2348 if (oldsize >= small_min && oldsize <= small_max &&
2349 (QUANTUM_CEILING(size) >> opt_quantum_2pow)
2350 == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
2353 if (oldsize > small_max && pow2_ceil(size) == oldsize)
2358 * If we get here, then size and oldsize are different enough that we
2359 * need to use a different size class. In that case, fall back to
2360 * allocating new space and copying.
2362 ret = arena_malloc(choose_arena(), size);
2367 memcpy(ret, ptr, size);
2369 memcpy(ret, ptr, oldsize);
2373 if (opt_junk && size < oldsize)
2374 memset(&((char *)ptr)[size], 0x5a, oldsize - size);
2375 else if (opt_zero && size > oldsize)
2376 memset(&((char *)ptr)[size], 0, size - oldsize);
2381 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
2384 arena_chunk_map_t mapelm;
2387 assert(arena != NULL);
2388 assert(arena->magic == ARENA_MAGIC);
2389 assert(chunk->arena == arena);
2390 assert(ptr != NULL);
2391 assert(CHUNK_ADDR2BASE(ptr) != ptr);
2393 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
2394 mapelm = chunk->map[pageind];
2395 assert(mapelm.free == false);
2396 if (mapelm.large == false) {
2400 /* Small allocation. */
2402 pageind -= mapelm.pos;
2404 run = (arena_run_t *)&((char *)chunk)[pageind << pagesize_2pow];
2405 assert(run->magic == ARENA_RUN_MAGIC);
2407 size = bin->reg_size;
2410 memset(ptr, 0x5a, size);
2412 malloc_mutex_lock(&arena->mtx);
2413 arena_run_reg_dalloc(run, bin, ptr, size);
2415 if (run->nfree > run->free_max) {
2416 /* Demote run to lower fullness quartile. */
2417 arena_bin_run_demote(arena, bin, run);
2420 /* Medium allocation. */
2422 size = mapelm.npages << pagesize_2pow;
2423 assert((((uintptr_t)ptr) & (size - 1)) == 0);
2426 memset(ptr, 0x5a, size);
2428 malloc_mutex_lock(&arena->mtx);
2429 arena_run_dalloc(arena, (arena_run_t *)ptr, size);
2433 arena->stats.allocated -= size;
2436 malloc_mutex_unlock(&arena->mtx);
2440 arena_new(arena_t *arena)
2444 size_t pow2_size, run_size;
2446 malloc_mutex_init(&arena->mtx);
2449 memset(&arena->stats, 0, sizeof(arena_stats_t));
2452 /* Initialize chunks. */
2453 RB_INIT(&arena->chunks);
2455 /* Initialize bins. */
2457 /* (2^n)-spaced tiny bins. */
2458 for (i = 0; i < ntbins; i++) {
2459 bin = &arena->bins[i];
2461 qr_new((arena_run_t *)&bin->runs0, link);
2462 qr_new((arena_run_t *)&bin->runs25, link);
2463 qr_new((arena_run_t *)&bin->runs50, link);
2464 qr_new((arena_run_t *)&bin->runs75, link);
2466 bin->reg_size = (1 << (tiny_min_2pow + i));
2469 * Calculate how large of a run to allocate. Make sure that at
2470 * least RUN_MIN_REGS regions fit in the run.
2472 run_size = bin->reg_size << RUN_MIN_REGS_2POW;
2473 if (run_size < pagesize)
2474 run_size = pagesize;
2475 if (run_size > (pagesize << RUN_MAX_PAGES_2POW))
2476 run_size = (pagesize << RUN_MAX_PAGES_2POW);
2477 if (run_size > arena_maxclass)
2478 run_size = arena_maxclass;
2479 bin->run_size = run_size;
2481 assert(run_size >= sizeof(arena_run_t));
2482 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size;
2483 if (bin->nregs > (REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3))) {
2484 /* Take care not to overflow regs_mask. */
2485 bin->nregs = REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3);
2487 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size);
2490 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2494 /* Quantum-spaced bins. */
2495 for (; i < ntbins + nqbins; i++) {
2496 bin = &arena->bins[i];
2498 qr_new((arena_run_t *)&bin->runs0, link);
2499 qr_new((arena_run_t *)&bin->runs25, link);
2500 qr_new((arena_run_t *)&bin->runs50, link);
2501 qr_new((arena_run_t *)&bin->runs75, link);
2503 bin->reg_size = quantum * (i - ntbins + 1);
2506 * Calculate how large of a run to allocate. Make sure that at
2507 * least RUN_MIN_REGS regions fit in the run.
2509 pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2510 run_size = (pow2_size << RUN_MIN_REGS_2POW);
2511 if (run_size < pagesize)
2512 run_size = pagesize;
2513 if (run_size > (pagesize << RUN_MAX_PAGES_2POW))
2514 run_size = (pagesize << RUN_MAX_PAGES_2POW);
2515 if (run_size > arena_maxclass)
2516 run_size = arena_maxclass;
2517 bin->run_size = run_size;
2519 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size;
2520 assert(bin->nregs <= REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3));
2521 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size);
2524 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2528 /* (2^n)-spaced sub-page bins. */
2529 for (; i < ntbins + nqbins + nsbins; i++) {
2530 bin = &arena->bins[i];
2532 qr_new((arena_run_t *)&bin->runs0, link);
2533 qr_new((arena_run_t *)&bin->runs25, link);
2534 qr_new((arena_run_t *)&bin->runs50, link);
2535 qr_new((arena_run_t *)&bin->runs75, link);
2537 bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
2540 * Calculate how large of a run to allocate. Make sure that at
2541 * least RUN_MIN_REGS regions fit in the run.
2543 run_size = bin->reg_size << RUN_MIN_REGS_2POW;
2544 if (run_size < pagesize)
2545 run_size = pagesize;
2546 if (run_size > (pagesize << RUN_MAX_PAGES_2POW))
2547 run_size = (pagesize << RUN_MAX_PAGES_2POW);
2548 if (run_size > arena_maxclass)
2549 run_size = arena_maxclass;
2550 bin->run_size = run_size;
2552 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size;
2553 assert(bin->nregs <= REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3));
2554 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size);
2557 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2562 arena->magic = ARENA_MAGIC;
2568 /* Create a new arena and insert it into the arenas array at index ind. */
2570 arenas_extend(unsigned ind)
2574 /* Allocate enough space for trailing bins. */
2575 ret = (arena_t *)base_alloc(sizeof(arena_t)
2576 + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
2577 if (ret != NULL && arena_new(ret) == false) {
2581 /* Only reached if there is an OOM error. */
2584 * OOM here is quite inconvenient to propagate, since dealing with it
2585 * would require a check for failure in the fast path. Instead, punt
2586 * by using arenas[0]. In practice, this is an extremely unlikely
2589 malloc_printf("%s: (malloc) Error initializing arena\n",
2600 /******************************************************************************/
2602 * Begin general internal functions.
2606 huge_malloc(size_t size)
2612 /* Allocate one or more contiguous chunks for this request. */
2614 csize = CHUNK_CEILING(size);
2616 /* size is large enough to cause size_t wrap-around. */
2620 /* Allocate a chunk node with which to track the chunk. */
2621 node = base_chunk_node_alloc();
2625 ret = chunk_alloc(csize);
2627 base_chunk_node_dealloc(node);
2631 /* Insert node into huge. */
2635 malloc_mutex_lock(&chunks_mtx);
2636 RB_INSERT(chunk_tree_s, &huge, node);
2639 huge_allocated += csize;
2641 malloc_mutex_unlock(&chunks_mtx);
2643 if (opt_junk && ret != NULL)
2644 memset(ret, 0xa5, csize);
2645 else if (opt_zero && ret != NULL)
2646 memset(ret, 0, csize);
2652 huge_ralloc(void *ptr, size_t size, size_t oldsize)
2656 /* Avoid moving the allocation if the size class would not change. */
2657 if (oldsize > arena_maxclass &&
2658 CHUNK_CEILING(size) == CHUNK_CEILING(oldsize))
2662 * If we get here, then size and oldsize are different enough that we
2663 * need to use a different size class. In that case, fall back to
2664 * allocating new space and copying.
2666 ret = huge_malloc(size);
2670 if (CHUNK_ADDR2BASE(ptr) == ptr) {
2671 /* The old allocation is a chunk. */
2673 memcpy(ret, ptr, size);
2675 memcpy(ret, ptr, oldsize);
2677 /* The old allocation is a region. */
2678 assert(oldsize < size);
2679 memcpy(ret, ptr, oldsize);
2686 huge_dalloc(void *ptr)
2691 malloc_mutex_lock(&chunks_mtx);
2693 /* Extract from tree of huge allocations. */
2695 node = RB_FIND(chunk_tree_s, &huge, &key);
2696 assert(node != NULL);
2697 assert(node->chunk == ptr);
2698 RB_REMOVE(chunk_tree_s, &huge, node);
2701 /* Update counters. */
2703 huge_allocated -= node->size;
2706 malloc_mutex_unlock(&chunks_mtx);
2711 memset(node->chunk, 0x5a, node->size);
2713 chunk_dealloc(node->chunk, node->size);
2715 base_chunk_node_dealloc(node);
2719 imalloc(size_t size)
2725 if (size <= arena_maxclass)
2726 ret = arena_malloc(choose_arena(), size);
2728 ret = huge_malloc(size);
2734 ipalloc(size_t alignment, size_t size)
2740 * Take advantage of the fact that for each size class, every object is
2741 * aligned at the smallest power of two that is non-zero in the base
2742 * two representation of the size. For example:
2744 * Size | Base 2 | Minimum alignment
2745 * -----+----------+------------------
2747 * 144 | 10100000 | 32
2748 * 192 | 11000000 | 64
2750 * Depending on runtime settings, it is possible that arena_malloc()
2751 * will further round up to a power of two, but that never causes
2752 * correctness issues.
2754 alloc_size = (size + (alignment - 1)) & (-alignment);
2755 if (alloc_size < size) {
2756 /* size_t overflow. */
2760 if (alloc_size <= arena_maxclass)
2761 ret = arena_malloc(choose_arena(), alloc_size);
2763 if (alignment <= chunk_size)
2764 ret = huge_malloc(size);
2766 size_t chunksize, offset;
2770 * This allocation requires alignment that is even
2771 * larger than chunk alignment. This means that
2772 * huge_malloc() isn't good enough.
2774 * Allocate almost twice as many chunks as are demanded
2775 * by the size or alignment, in order to assure the
2776 * alignment can be achieved, then unmap leading and
2780 chunksize = CHUNK_CEILING(size);
2782 if (size >= alignment)
2783 alloc_size = chunksize + alignment - chunk_size;
2785 alloc_size = (alignment << 1) - chunk_size;
2788 * Allocate a chunk node with which to track the chunk.
2790 node = base_chunk_node_alloc();
2794 ret = chunk_alloc(alloc_size);
2796 base_chunk_node_dealloc(node);
2800 offset = (uintptr_t)ret & (alignment - 1);
2801 assert(offset % chunk_size == 0);
2802 assert(offset < alloc_size);
2804 /* Trim trailing space. */
2805 chunk_dealloc((void *)((uintptr_t)ret
2806 + chunksize), alloc_size - chunksize);
2810 /* Trim leading space. */
2811 chunk_dealloc(ret, alignment - offset);
2813 ret = (void *)((uintptr_t)ret + (alignment
2816 trailsize = alloc_size - (alignment - offset)
2818 if (trailsize != 0) {
2819 /* Trim trailing space. */
2820 assert(trailsize < alloc_size);
2821 chunk_dealloc((void *)((uintptr_t)ret
2822 + chunksize), trailsize);
2826 /* Insert node into huge. */
2828 node->size = chunksize;
2830 malloc_mutex_lock(&chunks_mtx);
2831 RB_INSERT(chunk_tree_s, &huge, node);
2833 huge_allocated += size;
2835 malloc_mutex_unlock(&chunks_mtx);
2838 memset(ret, 0xa5, chunksize);
2840 memset(ret, 0, chunksize);
2844 assert(((uintptr_t)ret & (alignment - 1)) == 0);
2849 icalloc(size_t size)
2853 if (size <= arena_maxclass) {
2854 ret = arena_malloc(choose_arena(), size);
2857 memset(ret, 0, size);
2860 * The virtual memory system provides zero-filled pages, so
2861 * there is no need to do so manually, unless opt_junk is
2862 * enabled, in which case huge_malloc() fills huge allocations
2865 ret = huge_malloc(size);
2870 memset(ret, 0, size);
2872 else if ((uintptr_t)ret >= (uintptr_t)brk_base
2873 && (uintptr_t)ret < (uintptr_t)brk_max) {
2875 * This may be a re-used brk chunk. Therefore, zero
2878 memset(ret, 0, size);
2887 isalloc(const void *ptr)
2890 arena_chunk_t *chunk;
2892 assert(ptr != NULL);
2894 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2897 assert(chunk->arena->magic == ARENA_MAGIC);
2899 ret = arena_salloc(ptr);
2901 chunk_node_t *node, key;
2903 /* Chunk (huge allocation). */
2905 malloc_mutex_lock(&chunks_mtx);
2907 /* Extract from tree of huge allocations. */
2908 key.chunk = (void *)ptr;
2909 node = RB_FIND(chunk_tree_s, &huge, &key);
2910 assert(node != NULL);
2914 malloc_mutex_unlock(&chunks_mtx);
2921 iralloc(void *ptr, size_t size)
2926 assert(ptr != NULL);
2929 oldsize = isalloc(ptr);
2931 if (size <= arena_maxclass)
2932 ret = arena_ralloc(ptr, size, oldsize);
2934 ret = huge_ralloc(ptr, size, oldsize);
2942 arena_chunk_t *chunk;
2944 assert(ptr != NULL);
2946 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2950 malloc_mutex_lock(&chunk->arena->mtx);
2951 chunk->arena->stats.ndalloc++;
2952 malloc_mutex_unlock(&chunk->arena->mtx);
2954 arena_dalloc(chunk->arena, chunk, ptr);
2960 malloc_print_stats(void)
2963 if (opt_print_stats) {
2964 malloc_printf("___ Begin malloc statistics ___\n");
2965 malloc_printf("Number of CPUs: %u\n", ncpus);
2966 malloc_printf("Number of arenas: %u\n", narenas);
2967 malloc_printf("Chunk size: %zu (2^%zu)\n", chunk_size,
2969 malloc_printf("Quantum size: %zu (2^%zu)\n", quantum,
2971 malloc_printf("Max small size: %zu\n", small_max);
2972 malloc_printf("Pointer size: %u\n", sizeof(void *));
2973 malloc_printf("Assertions %s\n",
2984 size_t allocated, total;
2988 /* Calculate and print allocated/total stats. */
2991 for (i = 0, allocated = 0; i < narenas; i++) {
2992 if (arenas[i] != NULL) {
2993 malloc_mutex_lock(&arenas[i]->mtx);
2994 allocated += arenas[i]->stats.allocated;
2995 malloc_mutex_unlock(&arenas[i]->mtx);
3000 malloc_mutex_lock(&chunks_mtx);
3001 allocated += huge_allocated;
3002 total = stats_chunks.curchunks * chunk_size;
3003 malloc_mutex_unlock(&chunks_mtx);
3005 malloc_printf("Allocated: %zu, space used: %zu\n",
3008 /* Print base stats. */
3010 malloc_mutex_lock(&base_mtx);
3011 malloc_printf("\nbase:\n");
3012 malloc_printf(" %13s\n", "total");
3013 malloc_printf(" %13llu\n", base_total);
3014 malloc_mutex_unlock(&base_mtx);
3017 /* Print chunk stats. */
3019 chunk_stats_t chunks_stats;
3021 malloc_mutex_lock(&chunks_mtx);
3022 chunks_stats = stats_chunks;
3023 malloc_mutex_unlock(&chunks_mtx);
3025 malloc_printf("\nchunks:\n");
3026 malloc_printf(" %13s%13s%13s\n", "nchunks",
3027 "highchunks", "curchunks");
3028 malloc_printf(" %13llu%13lu%13lu\n",
3029 chunks_stats.nchunks,
3030 chunks_stats.highchunks,
3031 chunks_stats.curchunks);
3034 /* Print chunk stats. */
3035 malloc_printf("\nhuge:\n");
3036 malloc_printf("%12s %12s %12s\n",
3037 "nmalloc", "ndalloc", "allocated");
3038 malloc_printf("%12llu %12llu %12zu\n",
3039 huge_nmalloc, huge_ndalloc, huge_allocated);
3041 /* Print stats for each arena. */
3042 for (i = 0; i < narenas; i++) {
3044 if (arena != NULL) {
3046 "\narenas[%u] statistics:\n", i);
3047 malloc_mutex_lock(&arena->mtx);
3049 malloc_mutex_unlock(&arena->mtx);
3053 #endif /* #ifdef MALLOC_STATS */
3054 malloc_printf("--- End malloc statistics ---\n");
3059 * FreeBSD's pthreads implementation calls malloc(3), so the malloc
3060 * implementation has to take pains to avoid infinite recursion during
3067 if (malloc_initialized == false)
3068 return (malloc_init_hard());
3074 malloc_init_hard(void)
3078 char buf[PATH_MAX + 1];
3081 malloc_mutex_lock(&init_lock);
3082 if (malloc_initialized) {
3084 * Another thread initialized the allocator before this one
3085 * acquired init_lock.
3087 malloc_mutex_unlock(&init_lock);
3091 /* Get number of CPUs. */
3098 len = sizeof(ncpus);
3099 if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
3105 /* Get page size. */
3109 result = sysconf(_SC_PAGESIZE);
3110 assert(result != -1);
3111 pagesize = (unsigned) result;
3114 * We assume that pagesize is a power of 2 when calculating
3117 assert(((result - 1) & result) == 0);
3118 pagesize_2pow = ffs(result) - 1;
3121 for (i = 0; i < 3; i++) {
3122 /* Get runtime configuration. */
3125 if ((linklen = readlink("/etc/malloc.conf", buf,
3126 sizeof(buf) - 1)) != -1) {
3128 * Use the contents of the "/etc/malloc.conf"
3129 * symbolic link's name.
3131 buf[linklen] = '\0';
3134 /* No configuration specified. */
3140 if (issetugid() == 0 && (opts =
3141 getenv("MALLOC_OPTIONS")) != NULL) {
3143 * Do nothing; opts is already initialized to
3144 * the value of the MALLOC_OPTIONS environment
3148 /* No configuration specified. */
3154 if (_malloc_options != NULL) {
3156 * Use options that were compiled into the program.
3158 opts = _malloc_options;
3160 /* No configuration specified. */
3170 for (j = 0; opts[j] != '\0'; j++) {
3192 * Run fullness quartile limits don't have
3193 * enough resolution if there are too few
3194 * regions for the largest bin size classes.
3196 if (opt_chunk_2pow > pagesize_2pow + 4)
3200 if (opt_chunk_2pow < CHUNK_2POW_MAX)
3204 opt_narenas_lshift--;
3207 opt_narenas_lshift++;
3210 opt_print_stats = false;
3213 opt_print_stats = true;
3216 if (opt_quantum_2pow > QUANTUM_2POW_MIN)
3220 if (opt_quantum_2pow < pagesize_2pow - 1)
3224 if (opt_small_max_2pow > QUANTUM_2POW_MIN)
3225 opt_small_max_2pow--;
3228 if (opt_small_max_2pow < pagesize_2pow - 1)
3229 opt_small_max_2pow++;
3244 opt_xmalloc = false;
3256 malloc_printf("%s: (malloc) Unsupported"
3257 " character in malloc options: '%c'\n",
3258 _getprogname(), opts[j]);
3263 /* Take care to call atexit() only once. */
3264 if (opt_print_stats) {
3265 /* Print statistics at exit. */
3266 atexit(malloc_print_stats);
3269 /* Set variables according to the value of opt_small_max_2pow. */
3270 if (opt_small_max_2pow < opt_quantum_2pow)
3271 opt_small_max_2pow = opt_quantum_2pow;
3272 small_max = (1 << opt_small_max_2pow);
3274 /* Set bin-related variables. */
3275 bin_maxclass = (pagesize >> 1);
3276 if (pagesize_2pow > RUN_MIN_REGS_2POW + 1)
3277 tiny_min_2pow = pagesize_2pow - (RUN_MIN_REGS_2POW + 1);
3280 assert(opt_quantum_2pow >= tiny_min_2pow);
3281 ntbins = opt_quantum_2pow - tiny_min_2pow;
3282 assert(ntbins <= opt_quantum_2pow);
3283 nqbins = (small_max >> opt_quantum_2pow);
3284 nsbins = pagesize_2pow - opt_small_max_2pow - 1;
3286 /* Set variables according to the value of opt_quantum_2pow. */
3287 quantum = (1 << opt_quantum_2pow);
3288 quantum_mask = quantum - 1;
3290 small_min = (quantum >> 1) + 1;
3293 assert(small_min <= quantum);
3295 /* Set variables according to the value of opt_chunk_2pow. */
3296 chunk_size = (1 << opt_chunk_2pow);
3297 chunk_size_mask = chunk_size - 1;
3298 arena_chunk_maplen = (1 << (opt_chunk_2pow - pagesize_2pow));
3299 arena_maxclass = (chunk_size >> 1);
3304 memset(&stats_chunks, 0, sizeof(chunk_stats_t));
3307 /* Various sanity checks that regard configuration. */
3308 assert(quantum >= sizeof(void *));
3309 assert(quantum <= pagesize);
3310 assert(chunk_size >= pagesize);
3311 assert(quantum * 4 <= chunk_size);
3313 /* Initialize chunks data. */
3314 malloc_mutex_init(&chunks_mtx);
3317 malloc_mutex_init(&brk_mtx);
3319 brk_prev = brk_base;
3327 RB_INIT(&old_chunks);
3329 /* Initialize base allocation data structures. */
3335 * Allocate a base chunk here, since it doesn't actually have to be
3336 * chunk-aligned. Doing this before allocating any other chunks allows
3337 * the use of space that would otherwise be wasted.
3339 base_chunk_alloc(0);
3341 base_chunk_nodes = NULL;
3342 malloc_mutex_init(&base_mtx);
3346 * For SMP systems, create four times as many arenas as there
3347 * are CPUs by default.
3349 opt_narenas_lshift += 2;
3352 /* Determine how many arenas to use. */
3354 if (opt_narenas_lshift > 0) {
3355 if ((narenas << opt_narenas_lshift) > narenas)
3356 narenas <<= opt_narenas_lshift;
3358 * Make sure not to exceed the limits of what base_malloc()
3361 if (narenas * sizeof(arena_t *) > chunk_size)
3362 narenas = chunk_size / sizeof(arena_t *);
3363 } else if (opt_narenas_lshift < 0) {
3364 if ((narenas << opt_narenas_lshift) < narenas)
3365 narenas <<= opt_narenas_lshift;
3366 /* Make sure there is at least one arena. */
3373 static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
3374 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
3375 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
3376 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
3377 223, 227, 229, 233, 239, 241, 251, 257, 263};
3378 unsigned i, nprimes, parenas;
3381 * Pick a prime number of hash arenas that is more than narenas
3382 * so that direct hashing of pthread_self() pointers tends to
3383 * spread allocations evenly among the arenas.
3385 assert((narenas & 1) == 0); /* narenas must be even. */
3386 nprimes = (sizeof(primes) >> SIZEOF_INT_2POW);
3387 parenas = primes[nprimes - 1]; /* In case not enough primes. */
3388 for (i = 1; i < nprimes; i++) {
3389 if (primes[i] > narenas) {
3390 parenas = primes[i];
3402 /* Allocate and initialize arenas. */
3403 arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
3404 if (arenas == NULL) {
3405 malloc_mutex_unlock(&init_lock);
3409 * Zero the array. In practice, this should always be pre-zeroed,
3410 * since it was just mmap()ed, but let's be sure.
3412 memset(arenas, 0, sizeof(arena_t *) * narenas);
3415 * Initialize one arena here. The rest are lazily created in
3416 * arena_choose_hard().
3419 if (arenas[0] == NULL) {
3420 malloc_mutex_unlock(&init_lock);
3424 malloc_mutex_init(&arenas_mtx);
3426 malloc_initialized = true;
3427 malloc_mutex_unlock(&init_lock);
3432 * End general internal functions.
3434 /******************************************************************************/
3436 * Begin malloc(3)-compatible functions.
3444 if (malloc_init()) {
3450 if (opt_sysv == false)
3458 ret = imalloc(size);
3463 malloc_printf("%s: (malloc) Error in malloc(%zu):"
3464 " out of memory\n", _getprogname(), size);
3470 UTRACE(0, size, ret);
3475 posix_memalign(void **memptr, size_t alignment, size_t size)
3483 /* Make sure that alignment is a large enough power of 2. */
3484 if (((alignment - 1) & alignment) != 0
3485 || alignment < sizeof(void *)) {
3487 malloc_printf("%s: (malloc) Error in"
3488 " posix_memalign(%zu, %zu):"
3489 " invalid alignment\n",
3490 _getprogname(), alignment, size);
3498 result = ipalloc(alignment, size);
3501 if (result == NULL) {
3503 malloc_printf("%s: (malloc) Error in"
3504 " posix_memalign(%zu, %zu): out of memory\n",
3505 _getprogname(), alignment, size);
3516 UTRACE(0, size, result);
3521 calloc(size_t num, size_t size)
3526 if (malloc_init()) {
3531 num_size = num * size;
3532 if (num_size == 0) {
3533 if ((opt_sysv == false) && ((num == 0) || (size == 0)))
3540 * Try to avoid division here. We know that it isn't possible to
3541 * overflow during multiplication if neither operand uses any of the
3542 * most significant half of the bits in a size_t.
3544 } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2)))
3545 && (num_size / size != num)) {
3546 /* size_t overflow. */
3551 ret = icalloc(num_size);
3556 malloc_printf("%s: (malloc) Error in"
3557 " calloc(%zu, %zu): out of memory\n",
3558 _getprogname(), num, size);
3564 UTRACE(0, num_size, ret);
3569 realloc(void *ptr, size_t size)
3574 if (opt_sysv == false)
3585 assert(malloc_initialized);
3587 ret = iralloc(ptr, size);
3591 malloc_printf("%s: (malloc) Error in"
3592 " realloc(%p, %zu): out of memory\n",
3593 _getprogname(), ptr, size);
3602 ret = imalloc(size);
3606 malloc_printf("%s: (malloc) Error in"
3607 " realloc(%p, %zu): out of memory\n",
3608 _getprogname(), ptr, size);
3616 UTRACE(ptr, size, ret);
3626 assert(malloc_initialized);
3633 * End malloc(3)-compatible functions.
3635 /******************************************************************************/
3637 * Begin non-standard functions.
3641 malloc_usable_size(const void *ptr)
3644 assert(ptr != NULL);
3646 return (isalloc(ptr));
3650 * End non-standard functions.
3652 /******************************************************************************/
3654 * Begin library-private functions, used by threading libraries for protection
3655 * of malloc during fork(). These functions are only called if the program is
3656 * running in threaded mode, so there is no need to check whether the program
3661 _malloc_prefork(void)
3665 /* Acquire all mutexes in a safe order. */
3667 malloc_mutex_lock(&arenas_mtx);
3668 for (i = 0; i < narenas; i++) {
3669 if (arenas[i] != NULL)
3670 malloc_mutex_lock(&arenas[i]->mtx);
3672 malloc_mutex_unlock(&arenas_mtx);
3674 malloc_mutex_lock(&base_mtx);
3676 malloc_mutex_lock(&chunks_mtx);
3680 _malloc_postfork(void)
3684 /* Release all mutexes, now that fork() has completed. */
3686 malloc_mutex_unlock(&chunks_mtx);
3688 malloc_mutex_unlock(&base_mtx);
3690 malloc_mutex_lock(&arenas_mtx);
3691 for (i = 0; i < narenas; i++) {
3692 if (arenas[i] != NULL)
3693 malloc_mutex_unlock(&arenas[i]->mtx);
3695 malloc_mutex_unlock(&arenas_mtx);
3699 * End library-private functions.
3701 /******************************************************************************/