2 * Copyright (C) 2006-2010 Jason Evans <jasone@FreeBSD.org>.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice(s), this list of conditions and the following disclaimer as
10 * the first lines of this file unmodified other than the possible
11 * addition of one or more copyright notices.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice(s), this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *******************************************************************************
31 * This allocator implementation is designed to provide scalable performance
32 * for multi-threaded programs on multi-processor systems. The following
33 * features are included for this purpose:
35 * + Multiple arenas are used if there are multiple CPUs, which reduces lock
36 * contention and cache sloshing.
38 * + Thread-specific caching is used if there are multiple threads, which
39 * reduces the amount of locking.
41 * + Cache line sharing between arenas is avoided for internal data
44 * + Memory is managed in chunks and runs (chunks can be split into runs),
45 * rather than as individual pages. This provides a constant-time
46 * mechanism for associating allocations with particular arenas.
48 * Allocation requests are rounded up to the nearest size class, and no record
49 * of the original request size is maintained. Allocations are broken into
50 * categories according to size class. Assuming runtime defaults, 4 KiB pages
51 * and a 16 byte quantum on a 32-bit system, the size classes in each category
54 * |========================================|
55 * | Category | Subcategory | Size |
56 * |========================================|
57 * | Small | Tiny | 2 |
60 * | |------------------+----------|
61 * | | Quantum-spaced | 16 |
68 * | |------------------+----------|
69 * | | Cacheline-spaced | 192 |
75 * | |------------------+----------|
76 * | | Sub-page | 760 |
83 * |========================================|
91 * |========================================|
99 * |========================================|
104 * |========================================|
106 * Different mechanisms are used accoding to category:
108 * Small/medium : Each size class is segregated into its own set of runs.
109 * Each run maintains a bitmap of which regions are
112 * Large : Each allocation is backed by a dedicated run. Metadata are stored
113 * in the associated arena chunk header maps.
115 * Huge : Each allocation is backed by a dedicated contiguous set of chunks.
116 * Metadata are stored in a separate red-black tree.
118 *******************************************************************************
122 * MALLOC_PRODUCTION disables assertions and statistics gathering. It also
123 * defaults the A and J runtime options to off. These settings are appropriate
124 * for production systems.
126 #define MALLOC_PRODUCTION
128 #ifndef MALLOC_PRODUCTION
130 * MALLOC_DEBUG enables assertions and other sanity checks, and disables
133 # define MALLOC_DEBUG
135 /* MALLOC_STATS enables statistics calculation. */
136 # define MALLOC_STATS
140 * MALLOC_TINY enables support for tiny objects, which are smaller than one
146 * MALLOC_TCACHE enables a thread-specific caching layer for small and medium
147 * objects. This makes it possible to allocate/deallocate objects without any
148 * locking when the cache is in the steady state.
150 #define MALLOC_TCACHE
153 * MALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage
154 * segment (DSS). In an ideal world, this functionality would be completely
155 * unnecessary, but we are burdened by history and the lack of resource limits
156 * for anonymous mapped memory.
160 #include <sys/cdefs.h>
161 __FBSDID("$FreeBSD$");
163 #include "libc_private.h"
167 #include "spinlock.h"
168 #include "namespace.h"
169 #include <sys/mman.h>
170 #include <sys/param.h>
171 #include <sys/time.h>
172 #include <sys/types.h>
173 #include <sys/sysctl.h>
175 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
177 #include <machine/cpufunc.h>
178 #include <machine/param.h>
179 #include <machine/vmparam.h>
190 #include <inttypes.h>
196 #include "un-namespace.h"
198 #include "libc_private.h"
202 #if (defined(MALLOC_TCACHE) && defined(MALLOC_STATS))
208 /* Disable inlining to make debugging easier. */
212 /* Size of stack-allocated buffer passed to strerror_r(). */
213 #define STRERROR_BUF 64
216 * Minimum alignment of allocations is 2^LG_QUANTUM bytes.
219 # define LG_QUANTUM 4
220 # define LG_SIZEOF_PTR 2
221 # define CPU_SPINWAIT __asm__ volatile("pause")
222 # define TLS_MODEL __attribute__((tls_model("initial-exec")))
225 # define LG_QUANTUM 4
226 # define LG_SIZEOF_PTR 3
227 # define TLS_MODEL /* default */
230 # define LG_QUANTUM 4
231 # define LG_SIZEOF_PTR 3
235 # define LG_QUANTUM 4
236 # define LG_SIZEOF_PTR 3
237 # define TLS_MODEL __attribute__((tls_model("initial-exec")))
240 # define LG_QUANTUM 4
241 # define LG_SIZEOF_PTR 3
242 # define CPU_SPINWAIT __asm__ volatile("pause")
243 # define TLS_MODEL __attribute__((tls_model("initial-exec")))
246 # define LG_QUANTUM 3
247 # define LG_SIZEOF_PTR 2
251 # define LG_QUANTUM 3
252 # define LG_SIZEOF_PTR 2
256 # define LG_QUANTUM 4
257 # define LG_SIZEOF_PTR 3
258 # define TLS_MODEL /* default */
259 #elif defined(__powerpc__)
260 # define LG_QUANTUM 4
261 # define LG_SIZEOF_PTR 2
262 # define TLS_MODEL /* default */
265 # define LG_QUANTUM 4
268 #define QUANTUM ((size_t)(1U << LG_QUANTUM))
269 #define QUANTUM_MASK (QUANTUM - 1)
271 #define SIZEOF_PTR (1U << LG_SIZEOF_PTR)
273 /* sizeof(int) == (1U << LG_SIZEOF_INT). */
274 #ifndef LG_SIZEOF_INT
275 # define LG_SIZEOF_INT 2
278 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
279 #if (!defined(PIC) && !defined(NO_TLS))
284 /* MALLOC_TCACHE requires TLS. */
285 # ifdef MALLOC_TCACHE
286 # undef MALLOC_TCACHE
291 * Size and alignment of memory chunks that are allocated by the OS's virtual
294 #define LG_CHUNK_DEFAULT 22
297 * The minimum ratio of active:dirty pages per arena is computed as:
299 * (nactive >> opt_lg_dirty_mult) >= ndirty
301 * So, supposing that opt_lg_dirty_mult is 5, there can be no less than 32
302 * times as many active pages as dirty pages.
304 #define LG_DIRTY_MULT_DEFAULT 5
307 * Maximum size of L1 cache line. This is used to avoid cache line aliasing.
308 * In addition, this controls the spacing of cacheline-spaced size classes.
310 #define LG_CACHELINE 6
311 #define CACHELINE ((size_t)(1U << LG_CACHELINE))
312 #define CACHELINE_MASK (CACHELINE - 1)
315 * Subpages are an artificially designated partitioning of pages. Their only
316 * purpose is to support subpage-spaced size classes.
318 * There must be at least 4 subpages per page, due to the way size classes are
322 #define SUBPAGE ((size_t)(1U << LG_SUBPAGE))
323 #define SUBPAGE_MASK (SUBPAGE - 1)
326 /* Smallest size class to support. */
327 # define LG_TINY_MIN 1
331 * Maximum size class that is a multiple of the quantum, but not (necessarily)
332 * a power of 2. Above this size, allocations are rounded up to the nearest
335 #define LG_QSPACE_MAX_DEFAULT 7
338 * Maximum size class that is a multiple of the cacheline, but not (necessarily)
339 * a power of 2. Above this size, allocations are rounded up to the nearest
342 #define LG_CSPACE_MAX_DEFAULT 9
345 * Maximum medium size class. This must not be more than 1/4 of a chunk
346 * (LG_MEDIUM_MAX_DEFAULT <= LG_CHUNK_DEFAULT - 2).
348 #define LG_MEDIUM_MAX_DEFAULT 15
351 * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized
352 * as small as possible such that this setting is still honored, without
353 * violating other constraints. The goal is to make runs as small as possible
354 * without exceeding a per run external fragmentation threshold.
356 * We use binary fixed point math for overhead computations, where the binary
357 * point is implicitly RUN_BFP bits to the left.
359 * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
360 * honored for some/all object sizes, since there is one bit of header overhead
361 * per object (plus a constant). This constraint is relaxed (ignored) for runs
362 * that are so small that the per-region overhead is greater than:
364 * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
367 /* \/ Implicit binary fixed point. */
368 #define RUN_MAX_OVRHD 0x0000003dU
369 #define RUN_MAX_OVRHD_RELAX 0x00001800U
371 /* Put a cap on small object run size. This overrides RUN_MAX_OVRHD. */
372 #define RUN_MAX_SMALL \
373 (arena_maxclass <= (1U << (CHUNK_MAP_LG_PG_RANGE + PAGE_SHIFT)) \
374 ? arena_maxclass : (1U << (CHUNK_MAP_LG_PG_RANGE + \
378 * Hyper-threaded CPUs may need a special instruction inside spin loops in
379 * order to yield to another virtual CPU. If no such instruction is defined
380 * above, make CPU_SPINWAIT a no-op.
383 # define CPU_SPINWAIT
387 * Adaptive spinning must eventually switch to blocking, in order to avoid the
388 * potential for priority inversion deadlock. Backing off past a certain point
389 * can actually waste time.
391 #define LG_SPIN_LIMIT 11
395 * Default number of cache slots for each bin in the thread cache (0:
398 # define LG_TCACHE_NSLOTS_DEFAULT 7
400 * (1U << opt_lg_tcache_gc_sweep) is the approximate number of
401 * allocation events between full GC sweeps (-1: disabled). Integer
402 * rounding may cause the actual number to be slightly higher, since GC is
403 * performed incrementally.
405 # define LG_TCACHE_GC_SWEEP_DEFAULT 13
408 /******************************************************************************/
411 * Mutexes based on spinlocks. We can't use normal pthread spinlocks in all
412 * places, because they require malloc()ed memory, which causes bootstrapping
413 * issues in some cases.
419 /* Set to true once the allocator has been initialized. */
420 static bool malloc_initialized = false;
422 /* Used to avoid initialization races. */
423 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
425 /******************************************************************************/
427 * Statistics data structures.
433 typedef struct tcache_bin_stats_s tcache_bin_stats_t;
434 struct tcache_bin_stats_s {
436 * Number of allocation requests that corresponded to the size of this
443 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
444 struct malloc_bin_stats_s {
446 * Number of allocation requests that corresponded to the size of this
452 /* Number of tcache fills from this bin. */
455 /* Number of tcache flushes to this bin. */
459 /* Total number of runs created for this bin's size class. */
463 * Total number of runs reused by extracting them from the runs tree for
464 * this bin's size class.
468 /* High-water mark for this bin. */
471 /* Current number of runs in this bin. */
475 typedef struct malloc_large_stats_s malloc_large_stats_t;
476 struct malloc_large_stats_s {
478 * Number of allocation requests that corresponded to this size class.
482 /* High-water mark for this size class. */
485 /* Current number of runs of this size class. */
489 typedef struct arena_stats_s arena_stats_t;
490 struct arena_stats_s {
491 /* Number of bytes currently mapped. */
495 * Total number of purge sweeps, total number of madvise calls made,
496 * and total pages purged in order to keep dirty unused memory under
503 /* Per-size-category statistics. */
504 size_t allocated_small;
505 uint64_t nmalloc_small;
506 uint64_t ndalloc_small;
508 size_t allocated_medium;
509 uint64_t nmalloc_medium;
510 uint64_t ndalloc_medium;
512 size_t allocated_large;
513 uint64_t nmalloc_large;
514 uint64_t ndalloc_large;
517 * One element for each possible size class, including sizes that
518 * overlap with bin size classes. This is necessary because ipalloc()
519 * sometimes has to use such large objects in order to assure proper
522 malloc_large_stats_t *lstats;
525 typedef struct chunk_stats_s chunk_stats_t;
526 struct chunk_stats_s {
527 /* Number of chunks that were allocated. */
530 /* High-water mark for number of chunks allocated. */
534 * Current number of chunks allocated. This value isn't maintained for
535 * any other purpose, so keep track of it in order to be able to set
541 #endif /* #ifdef MALLOC_STATS */
543 /******************************************************************************/
545 * Extent data structures.
548 /* Tree of extents. */
549 typedef struct extent_node_s extent_node_t;
550 struct extent_node_s {
552 /* Linkage for the size/address-ordered tree. */
553 rb_node(extent_node_t) link_szad;
556 /* Linkage for the address-ordered tree. */
557 rb_node(extent_node_t) link_ad;
559 /* Pointer to the extent that this tree node is responsible for. */
562 /* Total region size. */
565 typedef rb_tree(extent_node_t) extent_tree_t;
567 /******************************************************************************/
569 * Arena data structures.
572 typedef struct arena_s arena_t;
573 typedef struct arena_bin_s arena_bin_t;
575 /* Each element of the chunk map corresponds to one page within the chunk. */
576 typedef struct arena_chunk_map_s arena_chunk_map_t;
577 struct arena_chunk_map_s {
579 * Linkage for run trees. There are two disjoint uses:
581 * 1) arena_t's runs_avail tree.
582 * 2) arena_run_t conceptually uses this linkage for in-use non-full
583 * runs, rather than directly embedding linkage.
585 rb_node(arena_chunk_map_t) link;
588 * Run address (or size) and various flags are stored together. The bit
589 * layout looks like (assuming 32-bit system):
591 * ???????? ???????? ????cccc ccccdzla
593 * ? : Unallocated: Run address for first/last pages, unset for internal
595 * Small/medium: Don't care.
596 * Large: Run size for first page, unset for trailing pages.
598 * c : refcount (could overflow for PAGE_SIZE >= 128 KiB)
604 * Following are example bit patterns for the three types of runs.
606 * p : run page offset
613 * ssssssss ssssssss ssss---- --------
614 * xxxxxxxx xxxxxxxx xxxx---- ----d---
615 * ssssssss ssssssss ssss---- -----z--
618 * pppppppp ppppcccc cccccccc cccc---a
619 * pppppppp ppppcccc cccccccc cccc---a
620 * pppppppp ppppcccc cccccccc cccc---a
623 * ssssssss ssssssss ssss---- ------la
624 * -------- -------- -------- ------la
625 * -------- -------- -------- ------la
628 #define CHUNK_MAP_PG_MASK ((size_t)0xfff00000U)
629 #define CHUNK_MAP_PG_SHIFT 20
630 #define CHUNK_MAP_LG_PG_RANGE 12
632 #define CHUNK_MAP_RC_MASK ((size_t)0xffff0U)
633 #define CHUNK_MAP_RC_ONE ((size_t)0x00010U)
635 #define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU)
636 #define CHUNK_MAP_DIRTY ((size_t)0x8U)
637 #define CHUNK_MAP_ZEROED ((size_t)0x4U)
638 #define CHUNK_MAP_LARGE ((size_t)0x2U)
639 #define CHUNK_MAP_ALLOCATED ((size_t)0x1U)
640 #define CHUNK_MAP_KEY (CHUNK_MAP_DIRTY | CHUNK_MAP_ALLOCATED)
642 typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
643 typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
645 /* Arena chunk header. */
646 typedef struct arena_chunk_s arena_chunk_t;
647 struct arena_chunk_s {
648 /* Arena that owns the chunk. */
651 /* Linkage for the arena's chunks_dirty tree. */
652 rb_node(arena_chunk_t) link_dirty;
655 * True if the chunk is currently in the chunks_dirty tree, due to
656 * having at some point contained one or more dirty pages. Removal
657 * from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible.
661 /* Number of dirty pages. */
664 /* Map of pages within chunk that keeps track of free/large/small. */
665 arena_chunk_map_t map[1]; /* Dynamically sized. */
667 typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
669 typedef struct arena_run_s arena_run_t;
673 # define ARENA_RUN_MAGIC 0x384adf93
676 /* Bin this run is associated with. */
679 /* Index of first element that might have a free region. */
680 unsigned regs_minelm;
682 /* Number of free regions in run. */
685 /* Bitmask of in-use regions (0: in use, 1: free). */
686 unsigned regs_mask[1]; /* Dynamically sized. */
691 * Current run being used to service allocations of this bin's size
697 * Tree of non-full runs. This tree is used when looking for an
698 * existing run when runcur is no longer usable. We choose the
699 * non-full run that is lowest in memory; this policy tends to keep
700 * objects packed well, and it can also help reduce the number of
701 * almost-empty chunks.
703 arena_run_tree_t runs;
705 /* Size of regions in a run for this bin's size class. */
708 /* Total size of a run for this bin's size class. */
711 /* Total number of regions in a run for this bin's size class. */
714 /* Number of elements in a run's regs_mask for this bin's size class. */
715 uint32_t regs_mask_nelms;
717 /* Offset of first region in a run for this bin's size class. */
718 uint32_t reg0_offset;
721 /* Bin statistics. */
722 malloc_bin_stats_t stats;
727 typedef struct tcache_s tcache_t;
733 # define ARENA_MAGIC 0x947d3d24
736 /* All operations on this arena require that lock be locked. */
737 pthread_mutex_t lock;
741 # ifdef MALLOC_TCACHE
743 * List of tcaches for extant threads associated with this arena.
744 * Stats from these are merged incrementally, and at exit.
746 ql_head(tcache_t) tcache_ql;
750 /* Tree of dirty-page-containing chunks this arena manages. */
751 arena_chunk_tree_t chunks_dirty;
754 * In order to avoid rapid chunk allocation/deallocation when an arena
755 * oscillates right on the cusp of needing a new chunk, cache the most
756 * recently freed chunk. The spare is left in the arena's chunk trees
757 * until it is deleted.
759 * There is one spare chunk per arena, rather than one spare total, in
760 * order to avoid interactions between multiple threads that could make
761 * a single spare inadequate.
763 arena_chunk_t *spare;
765 /* Number of pages in active runs. */
769 * Current count of pages within unused runs that are potentially
770 * dirty, and for which madvise(... MADV_FREE) has not been called. By
771 * tracking this, we can institute a limit on how much dirty unused
772 * memory is mapped for each arena.
777 * Size/address-ordered tree of this arena's available runs. This tree
778 * is used for first-best-fit run allocation.
780 arena_avail_tree_t runs_avail;
783 * bins is used to store trees of free regions of the following sizes,
784 * assuming a 16-byte quantum, 4 KiB page size, and default
825 arena_bin_t bins[1]; /* Dynamically sized. */
828 /******************************************************************************/
830 * Thread cache data structures.
834 typedef struct tcache_bin_s tcache_bin_t;
835 struct tcache_bin_s {
837 tcache_bin_stats_t tstats;
839 unsigned low_water; /* Min # cached since last GC. */
840 unsigned high_water; /* Max # cached since last GC. */
841 unsigned ncached; /* # of cached objects. */
842 void *slots[1]; /* Dynamically sized. */
847 ql_elm(tcache_t) link; /* Used for aggregating stats. */
849 arena_t *arena; /* This thread's arena. */
850 unsigned ev_cnt; /* Event count since incremental GC. */
851 unsigned next_gc_bin; /* Next bin to GC. */
852 tcache_bin_t *tbins[1]; /* Dynamically sized. */
856 /******************************************************************************/
861 /* Number of CPUs. */
862 static unsigned ncpus;
864 /* Various bin-related settings. */
865 #ifdef MALLOC_TINY /* Number of (2^n)-spaced tiny bins. */
866 # define ntbins ((unsigned)(LG_QUANTUM - LG_TINY_MIN))
870 static unsigned nqbins; /* Number of quantum-spaced bins. */
871 static unsigned ncbins; /* Number of cacheline-spaced bins. */
872 static unsigned nsbins; /* Number of subpage-spaced bins. */
873 static unsigned nmbins; /* Number of medium bins. */
874 static unsigned nbins;
875 static unsigned mbin0; /* mbin offset (nbins - nmbins). */
877 # define tspace_max ((size_t)(QUANTUM >> 1))
879 #define qspace_min QUANTUM
880 static size_t qspace_max;
881 static size_t cspace_min;
882 static size_t cspace_max;
883 static size_t sspace_min;
884 static size_t sspace_max;
885 #define small_maxclass sspace_max
886 #define medium_min PAGE_SIZE
887 static size_t medium_max;
888 #define bin_maxclass medium_max
891 * Soft limit on the number of medium size classes. Spacing between medium
892 * size classes never exceeds pagesize, which can force more than NBINS_MAX
893 * medium size classes.
895 #define NMBINS_MAX 16
896 /* Spacing between medium size classes. */
897 static size_t lg_mspace;
898 static size_t mspace_mask;
900 static uint8_t const *small_size2bin;
902 * const_small_size2bin is a static constant lookup table that in the common
903 * case can be used as-is for small_size2bin. For dynamically linked programs,
904 * this avoids a page of memory overhead per process.
907 #define S2B_2(i) S2B_1(i) S2B_1(i)
908 #define S2B_4(i) S2B_2(i) S2B_2(i)
909 #define S2B_8(i) S2B_4(i) S2B_4(i)
910 #define S2B_16(i) S2B_8(i) S2B_8(i)
911 #define S2B_32(i) S2B_16(i) S2B_16(i)
912 #define S2B_64(i) S2B_32(i) S2B_32(i)
913 #define S2B_128(i) S2B_64(i) S2B_64(i)
914 #define S2B_256(i) S2B_128(i) S2B_128(i)
915 static const uint8_t const_small_size2bin[PAGE_SIZE - 255] = {
917 #if (LG_QUANTUM == 4)
918 /* 64-bit system ************************/
929 S2B_16(S2B_QMIN + 1) /* 32 */
930 S2B_16(S2B_QMIN + 2) /* 48 */
931 S2B_16(S2B_QMIN + 3) /* 64 */
932 S2B_16(S2B_QMIN + 4) /* 80 */
933 S2B_16(S2B_QMIN + 5) /* 96 */
934 S2B_16(S2B_QMIN + 6) /* 112 */
935 S2B_16(S2B_QMIN + 7) /* 128 */
936 # define S2B_CMIN (S2B_QMIN + 8)
938 /* 32-bit system ************************/
948 S2B_8(S2B_QMIN + 1) /* 16 */
949 S2B_8(S2B_QMIN + 2) /* 24 */
950 S2B_8(S2B_QMIN + 3) /* 32 */
951 S2B_8(S2B_QMIN + 4) /* 40 */
952 S2B_8(S2B_QMIN + 5) /* 48 */
953 S2B_8(S2B_QMIN + 6) /* 56 */
954 S2B_8(S2B_QMIN + 7) /* 64 */
955 S2B_8(S2B_QMIN + 8) /* 72 */
956 S2B_8(S2B_QMIN + 9) /* 80 */
957 S2B_8(S2B_QMIN + 10) /* 88 */
958 S2B_8(S2B_QMIN + 11) /* 96 */
959 S2B_8(S2B_QMIN + 12) /* 104 */
960 S2B_8(S2B_QMIN + 13) /* 112 */
961 S2B_8(S2B_QMIN + 14) /* 120 */
962 S2B_8(S2B_QMIN + 15) /* 128 */
963 # define S2B_CMIN (S2B_QMIN + 16)
965 /****************************************/
966 S2B_64(S2B_CMIN + 0) /* 192 */
967 S2B_64(S2B_CMIN + 1) /* 256 */
968 S2B_64(S2B_CMIN + 2) /* 320 */
969 S2B_64(S2B_CMIN + 3) /* 384 */
970 S2B_64(S2B_CMIN + 4) /* 448 */
971 S2B_64(S2B_CMIN + 5) /* 512 */
972 # define S2B_SMIN (S2B_CMIN + 6)
973 S2B_256(S2B_SMIN + 0) /* 768 */
974 S2B_256(S2B_SMIN + 1) /* 1024 */
975 S2B_256(S2B_SMIN + 2) /* 1280 */
976 S2B_256(S2B_SMIN + 3) /* 1536 */
977 S2B_256(S2B_SMIN + 4) /* 1792 */
978 S2B_256(S2B_SMIN + 5) /* 2048 */
979 S2B_256(S2B_SMIN + 6) /* 2304 */
980 S2B_256(S2B_SMIN + 7) /* 2560 */
981 S2B_256(S2B_SMIN + 8) /* 2816 */
982 S2B_256(S2B_SMIN + 9) /* 3072 */
983 S2B_256(S2B_SMIN + 10) /* 3328 */
984 S2B_256(S2B_SMIN + 11) /* 3584 */
985 S2B_256(S2B_SMIN + 12) /* 3840 */
986 #if (PAGE_SHIFT == 13)
987 S2B_256(S2B_SMIN + 13) /* 4096 */
988 S2B_256(S2B_SMIN + 14) /* 4352 */
989 S2B_256(S2B_SMIN + 15) /* 4608 */
990 S2B_256(S2B_SMIN + 16) /* 4864 */
991 S2B_256(S2B_SMIN + 17) /* 5120 */
992 S2B_256(S2B_SMIN + 18) /* 5376 */
993 S2B_256(S2B_SMIN + 19) /* 5632 */
994 S2B_256(S2B_SMIN + 20) /* 5888 */
995 S2B_256(S2B_SMIN + 21) /* 6144 */
996 S2B_256(S2B_SMIN + 22) /* 6400 */
997 S2B_256(S2B_SMIN + 23) /* 6656 */
998 S2B_256(S2B_SMIN + 24) /* 6912 */
999 S2B_256(S2B_SMIN + 25) /* 7168 */
1000 S2B_256(S2B_SMIN + 26) /* 7424 */
1001 S2B_256(S2B_SMIN + 27) /* 7680 */
1002 S2B_256(S2B_SMIN + 28) /* 7936 */
1018 /* Various chunk-related settings. */
1019 static size_t chunksize;
1020 static size_t chunksize_mask; /* (chunksize - 1). */
1021 static size_t chunk_npages;
1022 static size_t arena_chunk_header_npages;
1023 static size_t arena_maxclass; /* Max size class for arenas. */
1030 /* Protects chunk-related data structures. */
1031 static malloc_mutex_t huge_mtx;
1033 /* Tree of chunks that are stand-alone huge allocations. */
1034 static extent_tree_t huge;
1038 * Protects sbrk() calls. This avoids malloc races among threads, though it
1039 * does not protect against races with threads that call sbrk() directly.
1041 static malloc_mutex_t dss_mtx;
1042 /* Base address of the DSS. */
1043 static void *dss_base;
1044 /* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
1045 static void *dss_prev;
1046 /* Current upper limit on DSS addresses. */
1047 static void *dss_max;
1050 * Trees of chunks that were previously allocated (trees differ only in node
1051 * ordering). These are used when allocating chunks, in an attempt to re-use
1052 * address space. Depending on function, different tree orderings are needed,
1053 * which is why there are two trees with the same contents.
1055 static extent_tree_t dss_chunks_szad;
1056 static extent_tree_t dss_chunks_ad;
1060 /* Huge allocation statistics. */
1061 static uint64_t huge_nmalloc;
1062 static uint64_t huge_ndalloc;
1063 static size_t huge_allocated;
1066 /****************************/
1068 * base (internal allocation).
1072 * Current pages that are being used for internal memory allocations. These
1073 * pages are carved up in cacheline-size quanta, so that there is no chance of
1074 * false cache line sharing.
1076 static void *base_pages;
1077 static void *base_next_addr;
1078 static void *base_past_addr; /* Addr immediately past base_pages. */
1079 static extent_node_t *base_nodes;
1080 static malloc_mutex_t base_mtx;
1082 static size_t base_mapped;
1091 * Arenas that are used to service external requests. Not all elements of the
1092 * arenas array are necessarily used; arenas are created lazily as needed.
1094 static arena_t **arenas;
1095 static unsigned narenas;
1097 static unsigned next_arena;
1099 static pthread_mutex_t arenas_lock; /* Protects arenas initialization. */
1103 * Map of _pthread_self() --> arenas[???], used for selecting an arena to use
1106 static __thread arena_t *arenas_map TLS_MODEL;
1109 #ifdef MALLOC_TCACHE
1110 /* Map of thread-specific caches. */
1111 static __thread tcache_t *tcache_tls TLS_MODEL;
1114 * Number of cache slots for each bin in the thread cache, or 0 if tcache is
1117 size_t tcache_nslots;
1119 /* Number of tcache allocation/deallocation events between incremental GCs. */
1120 unsigned tcache_gc_incr;
1124 * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and
1125 * potentially avoid some system calls. We can get away without TLS here,
1126 * since the state of mmap_unaligned only affects performance, rather than
1130 static __thread bool mmap_unaligned TLS_MODEL;
1132 static bool mmap_unaligned;
1136 static malloc_mutex_t chunks_mtx;
1137 /* Chunk statistics. */
1138 static chunk_stats_t stats_chunks;
1141 /*******************************/
1143 * Runtime configuration options.
1145 const char *_malloc_options;
1147 #ifndef MALLOC_PRODUCTION
1148 static bool opt_abort = true;
1149 static bool opt_junk = true;
1151 static bool opt_abort = false;
1152 static bool opt_junk = false;
1154 #ifdef MALLOC_TCACHE
1155 static size_t opt_lg_tcache_nslots = LG_TCACHE_NSLOTS_DEFAULT;
1156 static ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
1159 static bool opt_dss = true;
1160 static bool opt_mmap = true;
1162 static ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
1163 static bool opt_stats_print = false;
1164 static size_t opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT;
1165 static size_t opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT;
1166 static size_t opt_lg_medium_max = LG_MEDIUM_MAX_DEFAULT;
1167 static size_t opt_lg_chunk = LG_CHUNK_DEFAULT;
1168 static bool opt_utrace = false;
1169 static bool opt_sysv = false;
1170 static bool opt_xmalloc = false;
1171 static bool opt_zero = false;
1172 static int opt_narenas_lshift = 0;
1180 #define UTRACE(a, b, c) \
1182 malloc_utrace_t ut; \
1186 utrace(&ut, sizeof(ut)); \
1189 /******************************************************************************/
1191 * Begin function prototypes for non-inline static functions.
1194 static void malloc_mutex_init(malloc_mutex_t *mutex);
1195 static bool malloc_spin_init(pthread_mutex_t *lock);
1197 static size_t pow2_ceil(size_t x);
1199 static void wrtmessage(const char *p1, const char *p2, const char *p3,
1202 static void malloc_printf(const char *format, ...);
1204 static char *umax2s(uintmax_t x, unsigned base, char *s);
1206 static bool base_pages_alloc_dss(size_t minsize);
1208 static bool base_pages_alloc_mmap(size_t minsize);
1209 static bool base_pages_alloc(size_t minsize);
1210 static void *base_alloc(size_t size);
1211 static void *base_calloc(size_t number, size_t size);
1212 static extent_node_t *base_node_alloc(void);
1213 static void base_node_dealloc(extent_node_t *node);
1214 static void *pages_map(void *addr, size_t size);
1215 static void pages_unmap(void *addr, size_t size);
1217 static void *chunk_alloc_dss(size_t size, bool *zero);
1218 static void *chunk_recycle_dss(size_t size, bool *zero);
1220 static void *chunk_alloc_mmap_slow(size_t size, bool unaligned);
1221 static void *chunk_alloc_mmap(size_t size);
1222 static void *chunk_alloc(size_t size, bool *zero);
1224 static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size);
1225 static bool chunk_dealloc_dss(void *chunk, size_t size);
1227 static void chunk_dealloc_mmap(void *chunk, size_t size);
1228 static void chunk_dealloc(void *chunk, size_t size);
1230 static arena_t *choose_arena_hard(void);
1232 static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size,
1233 bool large, bool zero);
1234 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
1235 static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
1236 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large,
1238 static void arena_purge(arena_t *arena);
1239 static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty);
1240 static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk,
1241 arena_run_t *run, size_t oldsize, size_t newsize);
1242 static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
1243 arena_run_t *run, size_t oldsize, size_t newsize, bool dirty);
1244 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
1245 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
1246 static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
1247 #ifdef MALLOC_TCACHE
1248 static void tcache_bin_fill(tcache_t *tcache, tcache_bin_t *tbin,
1250 static void *tcache_alloc_hard(tcache_t *tcache, tcache_bin_t *tbin,
1253 static void *arena_malloc_medium(arena_t *arena, size_t size, bool zero);
1254 static void *arena_malloc_large(arena_t *arena, size_t size, bool zero);
1255 static void *arena_palloc(arena_t *arena, size_t alignment, size_t size,
1257 static bool arena_is_large(const void *ptr);
1258 static size_t arena_salloc(const void *ptr);
1260 arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
1263 static void arena_stats_print(arena_t *arena);
1265 static void stats_print_atexit(void);
1266 #ifdef MALLOC_TCACHE
1267 static void tcache_bin_flush(tcache_bin_t *tbin, size_t binind,
1270 static void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk,
1272 #ifdef MALLOC_TCACHE
1273 static void arena_dalloc_hard(arena_t *arena, arena_chunk_t *chunk,
1274 void *ptr, arena_chunk_map_t *mapelm, tcache_t *tcache);
1276 static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk,
1277 void *ptr, size_t size, size_t oldsize);
1278 static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk,
1279 void *ptr, size_t size, size_t oldsize);
1280 static bool arena_ralloc_large(void *ptr, size_t size, size_t oldsize);
1281 static void *arena_ralloc(void *ptr, size_t size, size_t oldsize);
1282 static bool arena_new(arena_t *arena, unsigned ind);
1283 static arena_t *arenas_extend(unsigned ind);
1284 #ifdef MALLOC_TCACHE
1285 static tcache_bin_t *tcache_bin_create(arena_t *arena);
1286 static void tcache_bin_destroy(tcache_t *tcache, tcache_bin_t *tbin,
1288 # ifdef MALLOC_STATS
1289 static void tcache_stats_merge(tcache_t *tcache, arena_t *arena);
1291 static tcache_t *tcache_create(arena_t *arena);
1292 static void tcache_destroy(tcache_t *tcache);
1294 static void *huge_malloc(size_t size, bool zero);
1295 static void *huge_palloc(size_t alignment, size_t size);
1296 static void *huge_ralloc(void *ptr, size_t size, size_t oldsize);
1297 static void huge_dalloc(void *ptr);
1298 static void malloc_stats_print(void);
1300 static void small_size2bin_validate(void);
1302 static bool small_size2bin_init(void);
1303 static bool small_size2bin_init_hard(void);
1304 static unsigned malloc_ncpus(void);
1305 static bool malloc_init_hard(void);
1308 * End function prototypes.
1310 /******************************************************************************/
1313 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
1316 if (_write(STDERR_FILENO, p1, strlen(p1)) < 0
1317 || _write(STDERR_FILENO, p2, strlen(p2)) < 0
1318 || _write(STDERR_FILENO, p3, strlen(p3)) < 0
1319 || _write(STDERR_FILENO, p4, strlen(p4)) < 0)
1323 void (*_malloc_message)(const char *p1, const char *p2, const char *p3,
1324 const char *p4) = wrtmessage;
1327 * We don't want to depend on vsnprintf() for production builds, since that can
1328 * cause unnecessary bloat for static binaries. umax2s() provides minimal
1329 * integer printing functionality, so that malloc_printf() use can be limited to
1330 * MALLOC_STATS code.
1332 #define UMAX2S_BUFSIZE 65
1334 umax2s(uintmax_t x, unsigned base, char *s)
1338 i = UMAX2S_BUFSIZE - 1;
1344 s[i] = "0123456789"[x % 10];
1351 s[i] = "0123456789abcdef"[x & 0xf];
1358 s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x % base];
1367 * Define a custom assert() in order to reduce the chances of deadlock during
1368 * assertion failure.
1371 # define assert(e) do { \
1373 char line_buf[UMAX2S_BUFSIZE]; \
1374 _malloc_message(_getprogname(), ": (malloc) ", \
1376 _malloc_message(umax2s(__LINE__, 10, line_buf), \
1377 ": Failed assertion: ", "\"", #e); \
1378 _malloc_message("\"\n", "", "", ""); \
1388 * Print to stderr in such a way as to (hopefully) avoid memory allocation.
1391 malloc_printf(const char *format, ...)
1396 va_start(ap, format);
1397 vsnprintf(buf, sizeof(buf), format, ap);
1399 _malloc_message(buf, "", "", "");
1403 /******************************************************************************/
1405 * Begin mutex. We can't use normal pthread mutexes in all places, because
1406 * they require malloc()ed memory, which causes bootstrapping issues in some
1411 malloc_mutex_init(malloc_mutex_t *mutex)
1413 static const spinlock_t lock = _SPINLOCK_INITIALIZER;
1419 malloc_mutex_lock(malloc_mutex_t *mutex)
1423 _SPINLOCK(&mutex->lock);
1427 malloc_mutex_unlock(malloc_mutex_t *mutex)
1431 _SPINUNLOCK(&mutex->lock);
1437 /******************************************************************************/
1439 * Begin spin lock. Spin locks here are actually adaptive mutexes that block
1440 * after a period of spinning, because unbounded spinning would allow for
1441 * priority inversion.
1445 * We use an unpublished interface to initialize pthread mutexes with an
1446 * allocation callback, in order to avoid infinite recursion.
1448 int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
1449 void *(calloc_cb)(size_t, size_t));
1451 __weak_reference(_pthread_mutex_init_calloc_cb_stub,
1452 _pthread_mutex_init_calloc_cb);
1455 _pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex,
1456 void *(calloc_cb)(size_t, size_t))
1463 malloc_spin_init(pthread_mutex_t *lock)
1466 if (_pthread_mutex_init_calloc_cb(lock, base_calloc) != 0)
1473 malloc_spin_lock(pthread_mutex_t *lock)
1477 if (_pthread_mutex_trylock(lock) != 0) {
1478 /* Exponentially back off if there are multiple CPUs. */
1481 volatile unsigned j;
1483 for (i = 1; i <= LG_SPIN_LIMIT; i++) {
1484 for (j = 0; j < (1U << i); j++) {
1488 if (_pthread_mutex_trylock(lock) == 0)
1494 * Spinning failed. Block until the lock becomes
1495 * available, in order to avoid indefinite priority
1498 _pthread_mutex_lock(lock);
1504 malloc_spin_unlock(pthread_mutex_t *lock)
1508 _pthread_mutex_unlock(lock);
1514 /******************************************************************************/
1516 * Begin Utility functions/macros.
1519 /* Return the chunk address for allocation address a. */
1520 #define CHUNK_ADDR2BASE(a) \
1521 ((void *)((uintptr_t)(a) & ~chunksize_mask))
1523 /* Return the chunk offset of address a. */
1524 #define CHUNK_ADDR2OFFSET(a) \
1525 ((size_t)((uintptr_t)(a) & chunksize_mask))
1527 /* Return the smallest chunk multiple that is >= s. */
1528 #define CHUNK_CEILING(s) \
1529 (((s) + chunksize_mask) & ~chunksize_mask)
1531 /* Return the smallest quantum multiple that is >= a. */
1532 #define QUANTUM_CEILING(a) \
1533 (((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
1535 /* Return the smallest cacheline multiple that is >= s. */
1536 #define CACHELINE_CEILING(s) \
1537 (((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
1539 /* Return the smallest subpage multiple that is >= s. */
1540 #define SUBPAGE_CEILING(s) \
1541 (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK)
1543 /* Return the smallest medium size class that is >= s. */
1544 #define MEDIUM_CEILING(s) \
1545 (((s) + mspace_mask) & ~mspace_mask)
1547 /* Return the smallest pagesize multiple that is >= s. */
1548 #define PAGE_CEILING(s) \
1549 (((s) + PAGE_MASK) & ~PAGE_MASK)
1552 /* Compute the smallest power of 2 that is >= x. */
1563 #if (SIZEOF_PTR == 8)
1571 /******************************************************************************/
1575 base_pages_alloc_dss(size_t minsize)
1579 * Do special DSS allocation here, since base allocations don't need to
1582 malloc_mutex_lock(&dss_mtx);
1583 if (dss_prev != (void *)-1) {
1585 size_t csize = CHUNK_CEILING(minsize);
1588 /* Get the current end of the DSS. */
1592 * Calculate how much padding is necessary to
1593 * chunk-align the end of the DSS. Don't worry about
1594 * dss_max not being chunk-aligned though.
1596 incr = (intptr_t)chunksize
1597 - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
1599 if ((size_t)incr < minsize)
1602 dss_prev = sbrk(incr);
1603 if (dss_prev == dss_max) {
1605 dss_max = (void *)((intptr_t)dss_prev + incr);
1606 base_pages = dss_prev;
1607 base_next_addr = base_pages;
1608 base_past_addr = dss_max;
1610 base_mapped += incr;
1612 malloc_mutex_unlock(&dss_mtx);
1615 } while (dss_prev != (void *)-1);
1617 malloc_mutex_unlock(&dss_mtx);
1624 base_pages_alloc_mmap(size_t minsize)
1628 assert(minsize != 0);
1629 csize = PAGE_CEILING(minsize);
1630 base_pages = pages_map(NULL, csize);
1631 if (base_pages == NULL)
1633 base_next_addr = base_pages;
1634 base_past_addr = (void *)((uintptr_t)base_pages + csize);
1636 base_mapped += csize;
1643 base_pages_alloc(size_t minsize)
1647 if (opt_mmap && minsize != 0)
1650 if (base_pages_alloc_mmap(minsize) == false)
1656 if (base_pages_alloc_dss(minsize) == false)
1666 base_alloc(size_t size)
1671 /* Round size up to nearest multiple of the cacheline size. */
1672 csize = CACHELINE_CEILING(size);
1674 malloc_mutex_lock(&base_mtx);
1675 /* Make sure there's enough space for the allocation. */
1676 if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1677 if (base_pages_alloc(csize)) {
1678 malloc_mutex_unlock(&base_mtx);
1683 ret = base_next_addr;
1684 base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1685 malloc_mutex_unlock(&base_mtx);
1691 base_calloc(size_t number, size_t size)
1695 ret = base_alloc(number * size);
1697 memset(ret, 0, number * size);
1702 static extent_node_t *
1703 base_node_alloc(void)
1707 malloc_mutex_lock(&base_mtx);
1708 if (base_nodes != NULL) {
1710 base_nodes = *(extent_node_t **)ret;
1711 malloc_mutex_unlock(&base_mtx);
1713 malloc_mutex_unlock(&base_mtx);
1714 ret = (extent_node_t *)base_alloc(sizeof(extent_node_t));
1721 base_node_dealloc(extent_node_t *node)
1724 malloc_mutex_lock(&base_mtx);
1725 *(extent_node_t **)node = base_nodes;
1727 malloc_mutex_unlock(&base_mtx);
1731 * End Utility functions/macros.
1733 /******************************************************************************/
1735 * Begin extent tree code.
1740 extent_szad_comp(extent_node_t *a, extent_node_t *b)
1743 size_t a_size = a->size;
1744 size_t b_size = b->size;
1746 ret = (a_size > b_size) - (a_size < b_size);
1748 uintptr_t a_addr = (uintptr_t)a->addr;
1749 uintptr_t b_addr = (uintptr_t)b->addr;
1751 ret = (a_addr > b_addr) - (a_addr < b_addr);
1757 /* Wrap red-black tree macros in functions. */
1758 rb_gen(__unused static, extent_tree_szad_, extent_tree_t, extent_node_t,
1759 link_szad, extent_szad_comp)
1763 extent_ad_comp(extent_node_t *a, extent_node_t *b)
1765 uintptr_t a_addr = (uintptr_t)a->addr;
1766 uintptr_t b_addr = (uintptr_t)b->addr;
1768 return ((a_addr > b_addr) - (a_addr < b_addr));
1771 /* Wrap red-black tree macros in functions. */
1772 rb_gen(__unused static, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad,
1776 * End extent tree code.
1778 /******************************************************************************/
1780 * Begin chunk management functions.
1784 pages_map(void *addr, size_t size)
1789 * We don't use MAP_FIXED here, because it can cause the *replacement*
1790 * of existing mappings, and we only want to create new mappings.
1792 ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
1794 assert(ret != NULL);
1796 if (ret == MAP_FAILED)
1798 else if (addr != NULL && ret != addr) {
1800 * We succeeded in mapping memory, but not in the right place.
1802 if (munmap(ret, size) == -1) {
1803 char buf[STRERROR_BUF];
1805 strerror_r(errno, buf, sizeof(buf));
1806 _malloc_message(_getprogname(),
1807 ": (malloc) Error in munmap(): ", buf, "\n");
1814 assert(ret == NULL || (addr == NULL && ret != addr)
1815 || (addr != NULL && ret == addr));
1820 pages_unmap(void *addr, size_t size)
1823 if (munmap(addr, size) == -1) {
1824 char buf[STRERROR_BUF];
1826 strerror_r(errno, buf, sizeof(buf));
1827 _malloc_message(_getprogname(),
1828 ": (malloc) Error in munmap(): ", buf, "\n");
1836 chunk_alloc_dss(size_t size, bool *zero)
1840 ret = chunk_recycle_dss(size, zero);
1845 * sbrk() uses a signed increment argument, so take care not to
1846 * interpret a huge allocation request as a negative increment.
1848 if ((intptr_t)size < 0)
1851 malloc_mutex_lock(&dss_mtx);
1852 if (dss_prev != (void *)-1) {
1856 * The loop is necessary to recover from races with other
1857 * threads that are using the DSS for something other than
1861 /* Get the current end of the DSS. */
1865 * Calculate how much padding is necessary to
1866 * chunk-align the end of the DSS.
1868 incr = (intptr_t)size
1869 - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
1870 if (incr == (intptr_t)size)
1873 ret = (void *)((intptr_t)dss_max + incr);
1877 dss_prev = sbrk(incr);
1878 if (dss_prev == dss_max) {
1880 dss_max = (void *)((intptr_t)dss_prev + incr);
1881 malloc_mutex_unlock(&dss_mtx);
1885 } while (dss_prev != (void *)-1);
1887 malloc_mutex_unlock(&dss_mtx);
1893 chunk_recycle_dss(size_t size, bool *zero)
1895 extent_node_t *node, key;
1899 malloc_mutex_lock(&dss_mtx);
1900 node = extent_tree_szad_nsearch(&dss_chunks_szad, &key);
1902 void *ret = node->addr;
1904 /* Remove node from the tree. */
1905 extent_tree_szad_remove(&dss_chunks_szad, node);
1906 if (node->size == size) {
1907 extent_tree_ad_remove(&dss_chunks_ad, node);
1908 base_node_dealloc(node);
1911 * Insert the remainder of node's address range as a
1912 * smaller chunk. Its position within dss_chunks_ad
1915 assert(node->size > size);
1916 node->addr = (void *)((uintptr_t)node->addr + size);
1918 extent_tree_szad_insert(&dss_chunks_szad, node);
1920 malloc_mutex_unlock(&dss_mtx);
1923 memset(ret, 0, size);
1926 malloc_mutex_unlock(&dss_mtx);
1933 chunk_alloc_mmap_slow(size_t size, bool unaligned)
1938 /* Beware size_t wrap-around. */
1939 if (size + chunksize <= size)
1942 ret = pages_map(NULL, size + chunksize);
1946 /* Clean up unneeded leading/trailing space. */
1947 offset = CHUNK_ADDR2OFFSET(ret);
1949 /* Note that mmap() returned an unaligned mapping. */
1952 /* Leading space. */
1953 pages_unmap(ret, chunksize - offset);
1955 ret = (void *)((uintptr_t)ret +
1956 (chunksize - offset));
1958 /* Trailing space. */
1959 pages_unmap((void *)((uintptr_t)ret + size),
1962 /* Trailing space only. */
1963 pages_unmap((void *)((uintptr_t)ret + size),
1968 * If mmap() returned an aligned mapping, reset mmap_unaligned so that
1969 * the next chunk_alloc_mmap() execution tries the fast allocation
1972 if (unaligned == false)
1973 mmap_unaligned = false;
1979 chunk_alloc_mmap(size_t size)
1984 * Ideally, there would be a way to specify alignment to mmap() (like
1985 * NetBSD has), but in the absence of such a feature, we have to work
1986 * hard to efficiently create aligned mappings. The reliable, but
1987 * slow method is to create a mapping that is over-sized, then trim the
1988 * excess. However, that always results in at least one call to
1991 * A more optimistic approach is to try mapping precisely the right
1992 * amount, then try to append another mapping if alignment is off. In
1993 * practice, this works out well as long as the application is not
1994 * interleaving mappings via direct mmap() calls. If we do run into a
1995 * situation where there is an interleaved mapping and we are unable to
1996 * extend an unaligned mapping, our best option is to switch to the
1997 * slow method until mmap() returns another aligned mapping. This will
1998 * tend to leave a gap in the memory map that is too small to cause
1999 * later problems for the optimistic method.
2001 * Another possible confounding factor is address space layout
2002 * randomization (ASLR), which causes mmap(2) to disregard the
2003 * requested address. mmap_unaligned tracks whether the previous
2004 * chunk_alloc_mmap() execution received any unaligned or relocated
2005 * mappings, and if so, the current execution will immediately fall
2006 * back to the slow method. However, we keep track of whether the fast
2007 * method would have succeeded, and if so, we make a note to try the
2008 * fast method next time.
2011 if (mmap_unaligned == false) {
2014 ret = pages_map(NULL, size);
2018 offset = CHUNK_ADDR2OFFSET(ret);
2020 mmap_unaligned = true;
2021 /* Try to extend chunk boundary. */
2022 if (pages_map((void *)((uintptr_t)ret + size),
2023 chunksize - offset) == NULL) {
2025 * Extension failed. Clean up, then revert to
2026 * the reliable-but-expensive method.
2028 pages_unmap(ret, size);
2029 ret = chunk_alloc_mmap_slow(size, true);
2031 /* Clean up unneeded leading space. */
2032 pages_unmap(ret, chunksize - offset);
2033 ret = (void *)((uintptr_t)ret + (chunksize -
2038 ret = chunk_alloc_mmap_slow(size, false);
2044 * If the caller specifies (*zero == false), it is still possible to receive
2045 * zeroed memory, in which case *zero is toggled to true. arena_chunk_alloc()
2046 * takes advantage of this to avoid demanding zeroed chunks, but taking
2047 * advantage of them if they are returned.
2050 chunk_alloc(size_t size, bool *zero)
2055 assert((size & chunksize_mask) == 0);
2061 ret = chunk_alloc_mmap(size);
2070 ret = chunk_alloc_dss(size, zero);
2076 /* All strategies for allocation failed. */
2081 malloc_mutex_lock(&chunks_mtx);
2082 stats_chunks.nchunks += (size / chunksize);
2083 stats_chunks.curchunks += (size / chunksize);
2084 if (stats_chunks.curchunks > stats_chunks.highchunks)
2085 stats_chunks.highchunks = stats_chunks.curchunks;
2086 malloc_mutex_unlock(&chunks_mtx);
2090 assert(CHUNK_ADDR2BASE(ret) == ret);
2095 static extent_node_t *
2096 chunk_dealloc_dss_record(void *chunk, size_t size)
2098 extent_node_t *node, *prev, key;
2100 key.addr = (void *)((uintptr_t)chunk + size);
2101 node = extent_tree_ad_nsearch(&dss_chunks_ad, &key);
2102 /* Try to coalesce forward. */
2103 if (node != NULL && node->addr == key.addr) {
2105 * Coalesce chunk with the following address range. This does
2106 * not change the position within dss_chunks_ad, so only
2107 * remove/insert from/into dss_chunks_szad.
2109 extent_tree_szad_remove(&dss_chunks_szad, node);
2112 extent_tree_szad_insert(&dss_chunks_szad, node);
2115 * Coalescing forward failed, so insert a new node. Drop
2116 * dss_mtx during node allocation, since it is possible that a
2117 * new base chunk will be allocated.
2119 malloc_mutex_unlock(&dss_mtx);
2120 node = base_node_alloc();
2121 malloc_mutex_lock(&dss_mtx);
2126 extent_tree_ad_insert(&dss_chunks_ad, node);
2127 extent_tree_szad_insert(&dss_chunks_szad, node);
2130 /* Try to coalesce backward. */
2131 prev = extent_tree_ad_prev(&dss_chunks_ad, node);
2132 if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
2135 * Coalesce chunk with the previous address range. This does
2136 * not change the position within dss_chunks_ad, so only
2137 * remove/insert node from/into dss_chunks_szad.
2139 extent_tree_szad_remove(&dss_chunks_szad, prev);
2140 extent_tree_ad_remove(&dss_chunks_ad, prev);
2142 extent_tree_szad_remove(&dss_chunks_szad, node);
2143 node->addr = prev->addr;
2144 node->size += prev->size;
2145 extent_tree_szad_insert(&dss_chunks_szad, node);
2147 base_node_dealloc(prev);
2154 chunk_dealloc_dss(void *chunk, size_t size)
2158 malloc_mutex_lock(&dss_mtx);
2159 if ((uintptr_t)chunk >= (uintptr_t)dss_base
2160 && (uintptr_t)chunk < (uintptr_t)dss_max) {
2161 extent_node_t *node;
2163 /* Try to coalesce with other unused chunks. */
2164 node = chunk_dealloc_dss_record(chunk, size);
2170 /* Get the current end of the DSS. */
2174 * Try to shrink the DSS if this chunk is at the end of the
2175 * DSS. The sbrk() call here is subject to a race condition
2176 * with threads that use brk(2) or sbrk(2) directly, but the
2177 * alternative would be to leak memory for the sake of poorly
2178 * designed multi-threaded programs.
2180 if ((void *)((uintptr_t)chunk + size) == dss_max
2181 && (dss_prev = sbrk(-(intptr_t)size)) == dss_max) {
2183 dss_max = (void *)((intptr_t)dss_prev - (intptr_t)size);
2186 extent_tree_szad_remove(&dss_chunks_szad, node);
2187 extent_tree_ad_remove(&dss_chunks_ad, node);
2188 base_node_dealloc(node);
2191 madvise(chunk, size, MADV_FREE);
2199 malloc_mutex_unlock(&dss_mtx);
2205 chunk_dealloc_mmap(void *chunk, size_t size)
2208 pages_unmap(chunk, size);
2212 chunk_dealloc(void *chunk, size_t size)
2215 assert(chunk != NULL);
2216 assert(CHUNK_ADDR2BASE(chunk) == chunk);
2218 assert((size & chunksize_mask) == 0);
2221 malloc_mutex_lock(&chunks_mtx);
2222 stats_chunks.curchunks -= (size / chunksize);
2223 malloc_mutex_unlock(&chunks_mtx);
2228 if (chunk_dealloc_dss(chunk, size) == false)
2234 chunk_dealloc_mmap(chunk, size);
2238 * End chunk management functions.
2240 /******************************************************************************/
2246 * Choose an arena based on a per-thread value (fast-path code, calls slow-path
2247 * code if necessary).
2249 static inline arena_t *
2255 * We can only use TLS if this is a PIC library, since for the static
2256 * library version, libc's malloc is used by TLS allocation, which
2257 * introduces a bootstrapping issue.
2260 if (__isthreaded == false) {
2261 /* Avoid the overhead of TLS for single-threaded operation. */
2267 ret = choose_arena_hard();
2268 assert(ret != NULL);
2271 if (__isthreaded && narenas > 1) {
2275 * Hash _pthread_self() to one of the arenas. There is a prime
2276 * number of arenas, so this has a reasonable chance of
2277 * working. Even so, the hashing can be easily thwarted by
2278 * inconvenient _pthread_self() values. Without specific
2279 * knowledge of how _pthread_self() calculates values, we can't
2280 * easily do much better than this.
2282 ind = (unsigned long) _pthread_self() % narenas;
2285 * Optimistially assume that arenas[ind] has been initialized.
2286 * At worst, we find out that some other thread has already
2287 * done so, after acquiring the lock in preparation. Note that
2288 * this lazy locking also has the effect of lazily forcing
2289 * cache coherency; without the lock acquisition, there's no
2290 * guarantee that modification of arenas[ind] by another thread
2291 * would be seen on this CPU for an arbitrary amount of time.
2293 * In general, this approach to modifying a synchronized value
2294 * isn't a good idea, but in this case we only ever modify the
2295 * value once, so things work out well.
2300 * Avoid races with another thread that may have already
2301 * initialized arenas[ind].
2303 malloc_spin_lock(&arenas_lock);
2304 if (arenas[ind] == NULL)
2305 ret = arenas_extend((unsigned)ind);
2308 malloc_spin_unlock(&arenas_lock);
2314 assert(ret != NULL);
2320 * Choose an arena based on a per-thread value (slow-path code only, called
2321 * only by choose_arena()).
2324 choose_arena_hard(void)
2328 assert(__isthreaded);
2331 malloc_spin_lock(&arenas_lock);
2332 if ((ret = arenas[next_arena]) == NULL)
2333 ret = arenas_extend(next_arena);
2334 next_arena = (next_arena + 1) % narenas;
2335 malloc_spin_unlock(&arenas_lock);
2346 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
2348 uintptr_t a_chunk = (uintptr_t)a;
2349 uintptr_t b_chunk = (uintptr_t)b;
2354 return ((a_chunk > b_chunk) - (a_chunk < b_chunk));
2357 /* Wrap red-black tree macros in functions. */
2358 rb_gen(__unused static, arena_chunk_tree_dirty_, arena_chunk_tree_t,
2359 arena_chunk_t, link_dirty, arena_chunk_comp)
2362 arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
2364 uintptr_t a_mapelm = (uintptr_t)a;
2365 uintptr_t b_mapelm = (uintptr_t)b;
2370 return ((a_mapelm > b_mapelm) - (a_mapelm < b_mapelm));
2373 /* Wrap red-black tree macros in functions. */
2374 rb_gen(__unused static, arena_run_tree_, arena_run_tree_t, arena_chunk_map_t,
2375 link, arena_run_comp)
2378 arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
2381 size_t a_size = a->bits & ~PAGE_MASK;
2382 size_t b_size = b->bits & ~PAGE_MASK;
2384 ret = (a_size > b_size) - (a_size < b_size);
2386 uintptr_t a_mapelm, b_mapelm;
2388 if ((a->bits & CHUNK_MAP_KEY) != CHUNK_MAP_KEY)
2389 a_mapelm = (uintptr_t)a;
2392 * Treat keys as though they are lower than anything
2397 b_mapelm = (uintptr_t)b;
2399 ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm);
2405 /* Wrap red-black tree macros in functions. */
2406 rb_gen(__unused static, arena_avail_tree_, arena_avail_tree_t,
2407 arena_chunk_map_t, link, arena_avail_comp)
2410 arena_run_rc_incr(arena_run_t *run, arena_bin_t *bin, const void *ptr)
2412 arena_chunk_t *chunk;
2414 size_t pagebeg, pageend, i;
2416 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2417 arena = chunk->arena;
2418 pagebeg = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
2419 pageend = ((uintptr_t)ptr + (uintptr_t)(bin->reg_size - 1) -
2420 (uintptr_t)chunk) >> PAGE_SHIFT;
2422 for (i = pagebeg; i <= pageend; i++) {
2423 size_t mapbits = chunk->map[i].bits;
2425 if (mapbits & CHUNK_MAP_DIRTY) {
2426 assert((mapbits & CHUNK_MAP_RC_MASK) == 0);
2429 mapbits ^= CHUNK_MAP_DIRTY;
2431 assert((mapbits & CHUNK_MAP_RC_MASK) != CHUNK_MAP_RC_MASK);
2432 mapbits += CHUNK_MAP_RC_ONE;
2433 chunk->map[i].bits = mapbits;
2438 arena_run_rc_decr(arena_run_t *run, arena_bin_t *bin, const void *ptr)
2440 arena_chunk_t *chunk;
2442 size_t pagebeg, pageend, mapbits, i;
2443 bool dirtier = false;
2445 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2446 arena = chunk->arena;
2447 pagebeg = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
2448 pageend = ((uintptr_t)ptr + (uintptr_t)(bin->reg_size - 1) -
2449 (uintptr_t)chunk) >> PAGE_SHIFT;
2452 mapbits = chunk->map[pagebeg].bits;
2453 mapbits -= CHUNK_MAP_RC_ONE;
2454 if ((mapbits & CHUNK_MAP_RC_MASK) == 0) {
2456 assert((mapbits & CHUNK_MAP_DIRTY) == 0);
2457 mapbits |= CHUNK_MAP_DIRTY;
2461 chunk->map[pagebeg].bits = mapbits;
2463 if (pageend - pagebeg >= 1) {
2465 * Interior pages are completely consumed by the object being
2466 * deallocated, which means that the pages can be
2467 * unconditionally marked dirty.
2469 for (i = pagebeg + 1; i < pageend; i++) {
2470 mapbits = chunk->map[i].bits;
2471 mapbits -= CHUNK_MAP_RC_ONE;
2472 assert((mapbits & CHUNK_MAP_RC_MASK) == 0);
2474 assert((mapbits & CHUNK_MAP_DIRTY) == 0);
2475 mapbits |= CHUNK_MAP_DIRTY;
2478 chunk->map[i].bits = mapbits;
2482 mapbits = chunk->map[pageend].bits;
2483 mapbits -= CHUNK_MAP_RC_ONE;
2484 if ((mapbits & CHUNK_MAP_RC_MASK) == 0) {
2486 assert((mapbits & CHUNK_MAP_DIRTY) == 0);
2487 mapbits |= CHUNK_MAP_DIRTY;
2491 chunk->map[pageend].bits = mapbits;
2495 if (chunk->dirtied == false) {
2496 arena_chunk_tree_dirty_insert(&arena->chunks_dirty,
2498 chunk->dirtied = true;
2501 /* Enforce opt_lg_dirty_mult. */
2502 if (opt_lg_dirty_mult >= 0 && (arena->nactive >>
2503 opt_lg_dirty_mult) < arena->ndirty)
2508 static inline void *
2509 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
2512 unsigned i, mask, bit, regind;
2514 assert(run->magic == ARENA_RUN_MAGIC);
2515 assert(run->regs_minelm < bin->regs_mask_nelms);
2518 * Move the first check outside the loop, so that run->regs_minelm can
2519 * be updated unconditionally, without the possibility of updating it
2522 i = run->regs_minelm;
2523 mask = run->regs_mask[i];
2525 /* Usable allocation found. */
2526 bit = ffs((int)mask) - 1;
2528 regind = ((i << (LG_SIZEOF_INT + 3)) + bit);
2529 assert(regind < bin->nregs);
2530 ret = (void *)(((uintptr_t)run) + bin->reg0_offset
2531 + (bin->reg_size * regind));
2534 mask ^= (1U << bit);
2535 run->regs_mask[i] = mask;
2537 arena_run_rc_incr(run, bin, ret);
2542 for (i++; i < bin->regs_mask_nelms; i++) {
2543 mask = run->regs_mask[i];
2545 /* Usable allocation found. */
2546 bit = ffs((int)mask) - 1;
2548 regind = ((i << (LG_SIZEOF_INT + 3)) + bit);
2549 assert(regind < bin->nregs);
2550 ret = (void *)(((uintptr_t)run) + bin->reg0_offset
2551 + (bin->reg_size * regind));
2554 mask ^= (1U << bit);
2555 run->regs_mask[i] = mask;
2558 * Make a note that nothing before this element
2559 * contains a free region.
2561 run->regs_minelm = i; /* Low payoff: + (mask == 0); */
2563 arena_run_rc_incr(run, bin, ret);
2574 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
2576 unsigned shift, diff, regind, elm, bit;
2578 assert(run->magic == ARENA_RUN_MAGIC);
2581 * Avoid doing division with a variable divisor if possible. Using
2582 * actual division here can reduce allocator throughput by over 20%!
2584 diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
2586 /* Rescale (factor powers of 2 out of the numerator and denominator). */
2587 shift = ffs(size) - 1;
2592 /* The divisor was a power of 2. */
2596 * To divide by a number D that is not a power of two we
2597 * multiply by (2^21 / D) and then right shift by 21 positions.
2603 * (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
2605 * We can omit the first three elements, because we never
2606 * divide by 0, and 1 and 2 are both powers of two, which are
2609 #define SIZE_INV_SHIFT 21
2610 #define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1)
2611 static const unsigned size_invs[] = {
2613 SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
2614 SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
2615 SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
2616 SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
2617 SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
2618 SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
2619 SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
2622 if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2))
2623 regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT;
2625 regind = diff / size;
2627 #undef SIZE_INV_SHIFT
2629 assert(diff == regind * size);
2630 assert(regind < bin->nregs);
2632 elm = regind >> (LG_SIZEOF_INT + 3);
2633 if (elm < run->regs_minelm)
2634 run->regs_minelm = elm;
2635 bit = regind - (elm << (LG_SIZEOF_INT + 3));
2636 assert((run->regs_mask[elm] & (1U << bit)) == 0);
2637 run->regs_mask[elm] |= (1U << bit);
2639 arena_run_rc_decr(run, bin, ptr);
2643 arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
2646 arena_chunk_t *chunk;
2647 size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i;
2649 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2650 old_ndirty = chunk->ndirty;
2651 run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
2653 total_pages = (chunk->map[run_ind].bits & ~PAGE_MASK) >>
2655 need_pages = (size >> PAGE_SHIFT);
2656 assert(need_pages > 0);
2657 assert(need_pages <= total_pages);
2658 rem_pages = total_pages - need_pages;
2660 arena_avail_tree_remove(&arena->runs_avail, &chunk->map[run_ind]);
2661 arena->nactive += need_pages;
2663 /* Keep track of trailing unused pages for later use. */
2664 if (rem_pages > 0) {
2665 chunk->map[run_ind+need_pages].bits = (rem_pages <<
2666 PAGE_SHIFT) | (chunk->map[run_ind+need_pages].bits &
2667 CHUNK_MAP_FLAGS_MASK);
2668 chunk->map[run_ind+total_pages-1].bits = (rem_pages <<
2669 PAGE_SHIFT) | (chunk->map[run_ind+total_pages-1].bits &
2670 CHUNK_MAP_FLAGS_MASK);
2671 arena_avail_tree_insert(&arena->runs_avail,
2672 &chunk->map[run_ind+need_pages]);
2675 for (i = 0; i < need_pages; i++) {
2676 /* Zero if necessary. */
2678 if ((chunk->map[run_ind + i].bits & CHUNK_MAP_ZEROED)
2680 memset((void *)((uintptr_t)chunk + ((run_ind
2681 + i) << PAGE_SHIFT)), 0, PAGE_SIZE);
2682 /* CHUNK_MAP_ZEROED is cleared below. */
2686 /* Update dirty page accounting. */
2687 if (chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY) {
2690 /* CHUNK_MAP_DIRTY is cleared below. */
2693 /* Initialize the chunk map. */
2695 chunk->map[run_ind + i].bits = CHUNK_MAP_LARGE
2696 | CHUNK_MAP_ALLOCATED;
2698 chunk->map[run_ind + i].bits = (i << CHUNK_MAP_PG_SHIFT)
2699 | CHUNK_MAP_ALLOCATED;
2705 * Set the run size only in the first element for large runs.
2706 * This is primarily a debugging aid, since the lack of size
2707 * info for trailing pages only matters if the application
2708 * tries to operate on an interior pointer.
2710 chunk->map[run_ind].bits |= size;
2713 * Initialize the first page's refcount to 1, so that the run
2714 * header is protected from dirty page purging.
2716 chunk->map[run_ind].bits += CHUNK_MAP_RC_ONE;
2720 static arena_chunk_t *
2721 arena_chunk_alloc(arena_t *arena)
2723 arena_chunk_t *chunk;
2726 if (arena->spare != NULL) {
2727 chunk = arena->spare;
2728 arena->spare = NULL;
2734 chunk = (arena_chunk_t *)chunk_alloc(chunksize, &zero);
2738 arena->stats.mapped += chunksize;
2741 chunk->arena = arena;
2742 chunk->dirtied = false;
2745 * Claim that no pages are in use, since the header is merely
2751 * Initialize the map to contain one maximal free untouched run.
2752 * Mark the pages as zeroed iff chunk_alloc() returned a zeroed
2755 zeroed = zero ? CHUNK_MAP_ZEROED : 0;
2756 for (i = 0; i < arena_chunk_header_npages; i++)
2757 chunk->map[i].bits = 0;
2758 chunk->map[i].bits = arena_maxclass | zeroed;
2759 for (i++; i < chunk_npages-1; i++)
2760 chunk->map[i].bits = zeroed;
2761 chunk->map[chunk_npages-1].bits = arena_maxclass | zeroed;
2764 /* Insert the run into the runs_avail tree. */
2765 arena_avail_tree_insert(&arena->runs_avail,
2766 &chunk->map[arena_chunk_header_npages]);
2772 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
2775 if (arena->spare != NULL) {
2776 if (arena->spare->dirtied) {
2777 arena_chunk_tree_dirty_remove(
2778 &chunk->arena->chunks_dirty, arena->spare);
2779 arena->ndirty -= arena->spare->ndirty;
2781 chunk_dealloc((void *)arena->spare, chunksize);
2783 arena->stats.mapped -= chunksize;
2788 * Remove run from runs_avail, regardless of whether this chunk
2789 * will be cached, so that the arena does not use it. Dirty page
2790 * flushing only uses the chunks_dirty tree, so leaving this chunk in
2791 * the chunks_* trees is sufficient for that purpose.
2793 arena_avail_tree_remove(&arena->runs_avail,
2794 &chunk->map[arena_chunk_header_npages]);
2796 arena->spare = chunk;
2799 static arena_run_t *
2800 arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
2802 arena_chunk_t *chunk;
2804 arena_chunk_map_t *mapelm, key;
2806 assert(size <= arena_maxclass);
2807 assert((size & PAGE_MASK) == 0);
2809 /* Search the arena's chunks for the lowest best fit. */
2810 key.bits = size | CHUNK_MAP_KEY;
2811 mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key);
2812 if (mapelm != NULL) {
2813 arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
2814 size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map)
2815 / sizeof(arena_chunk_map_t);
2817 run = (arena_run_t *)((uintptr_t)run_chunk + (pageind
2819 arena_run_split(arena, run, size, large, zero);
2824 * No usable runs. Create a new chunk from which to allocate the run.
2826 chunk = arena_chunk_alloc(arena);
2829 run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
2831 /* Update page map. */
2832 arena_run_split(arena, run, size, large, zero);
2837 static arena_chunk_t *
2838 chunks_dirty_iter_cb(arena_chunk_tree_t *tree, arena_chunk_t *chunk, void *arg)
2840 size_t *ndirty = (size_t *)arg;
2842 assert(chunk->dirtied);
2843 *ndirty += chunk->ndirty;
2849 arena_purge(arena_t *arena)
2851 arena_chunk_t *chunk;
2856 arena_chunk_tree_dirty_iter(&arena->chunks_dirty, NULL,
2857 chunks_dirty_iter_cb, (void *)&ndirty);
2858 assert(ndirty == arena->ndirty);
2860 assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty);
2863 arena->stats.npurge++;
2867 * Iterate downward through chunks until enough dirty memory has been
2868 * purged. Terminate as soon as possible in order to minimize the
2869 * number of system calls, even if a chunk has only been partially
2873 while ((arena->nactive >> (opt_lg_dirty_mult + 1)) < arena->ndirty) {
2874 chunk = arena_chunk_tree_dirty_last(&arena->chunks_dirty);
2875 assert(chunk != NULL);
2877 for (i = chunk_npages - 1; chunk->ndirty > 0; i--) {
2878 assert(i >= arena_chunk_header_npages);
2879 if (chunk->map[i].bits & CHUNK_MAP_DIRTY) {
2880 chunk->map[i].bits ^= CHUNK_MAP_DIRTY;
2881 /* Find adjacent dirty run(s). */
2882 for (npages = 1; i > arena_chunk_header_npages
2883 && (chunk->map[i - 1].bits &
2884 CHUNK_MAP_DIRTY); npages++) {
2886 chunk->map[i].bits ^= CHUNK_MAP_DIRTY;
2888 chunk->ndirty -= npages;
2889 arena->ndirty -= npages;
2891 madvise((void *)((uintptr_t)chunk + (i <<
2892 PAGE_SHIFT)), (npages << PAGE_SHIFT),
2895 arena->stats.nmadvise++;
2896 arena->stats.purged += npages;
2898 if ((arena->nactive >> (opt_lg_dirty_mult + 1))
2904 if (chunk->ndirty == 0) {
2905 arena_chunk_tree_dirty_remove(&arena->chunks_dirty,
2907 chunk->dirtied = false;
2913 arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
2915 arena_chunk_t *chunk;
2916 size_t size, run_ind, run_pages;
2918 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2919 run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk)
2921 assert(run_ind >= arena_chunk_header_npages);
2922 assert(run_ind < chunk_npages);
2923 if ((chunk->map[run_ind].bits & CHUNK_MAP_LARGE) != 0)
2924 size = chunk->map[run_ind].bits & ~PAGE_MASK;
2926 size = run->bin->run_size;
2927 run_pages = (size >> PAGE_SHIFT);
2928 arena->nactive -= run_pages;
2930 /* Mark pages as unallocated in the chunk map. */
2934 for (i = 0; i < run_pages; i++) {
2936 * When (dirty == true), *all* pages within the run
2937 * need to have their dirty bits set, because only
2938 * small runs can create a mixture of clean/dirty
2939 * pages, but such runs are passed to this function
2940 * with (dirty == false).
2942 assert((chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY)
2946 chunk->map[run_ind + i].bits = CHUNK_MAP_DIRTY;
2951 for (i = 0; i < run_pages; i++) {
2952 chunk->map[run_ind + i].bits &= ~(CHUNK_MAP_LARGE |
2953 CHUNK_MAP_ALLOCATED);
2956 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2957 CHUNK_MAP_FLAGS_MASK);
2958 chunk->map[run_ind+run_pages-1].bits = size |
2959 (chunk->map[run_ind+run_pages-1].bits & CHUNK_MAP_FLAGS_MASK);
2961 /* Try to coalesce forward. */
2962 if (run_ind + run_pages < chunk_npages &&
2963 (chunk->map[run_ind+run_pages].bits & CHUNK_MAP_ALLOCATED) == 0) {
2964 size_t nrun_size = chunk->map[run_ind+run_pages].bits &
2968 * Remove successor from runs_avail; the coalesced run is
2971 arena_avail_tree_remove(&arena->runs_avail,
2972 &chunk->map[run_ind+run_pages]);
2975 run_pages = size >> PAGE_SHIFT;
2977 assert((chunk->map[run_ind+run_pages-1].bits & ~PAGE_MASK)
2979 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2980 CHUNK_MAP_FLAGS_MASK);
2981 chunk->map[run_ind+run_pages-1].bits = size |
2982 (chunk->map[run_ind+run_pages-1].bits &
2983 CHUNK_MAP_FLAGS_MASK);
2986 /* Try to coalesce backward. */
2987 if (run_ind > arena_chunk_header_npages && (chunk->map[run_ind-1].bits &
2988 CHUNK_MAP_ALLOCATED) == 0) {
2989 size_t prun_size = chunk->map[run_ind-1].bits & ~PAGE_MASK;
2991 run_ind -= prun_size >> PAGE_SHIFT;
2994 * Remove predecessor from runs_avail; the coalesced run is
2997 arena_avail_tree_remove(&arena->runs_avail,
2998 &chunk->map[run_ind]);
3001 run_pages = size >> PAGE_SHIFT;
3003 assert((chunk->map[run_ind].bits & ~PAGE_MASK) == prun_size);
3004 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
3005 CHUNK_MAP_FLAGS_MASK);
3006 chunk->map[run_ind+run_pages-1].bits = size |
3007 (chunk->map[run_ind+run_pages-1].bits &
3008 CHUNK_MAP_FLAGS_MASK);
3011 /* Insert into runs_avail, now that coalescing is complete. */
3012 arena_avail_tree_insert(&arena->runs_avail, &chunk->map[run_ind]);
3015 * Deallocate chunk if it is now completely unused. The bit
3016 * manipulation checks whether the first run is unallocated and extends
3017 * to the end of the chunk.
3019 if ((chunk->map[arena_chunk_header_npages].bits & (~PAGE_MASK |
3020 CHUNK_MAP_ALLOCATED)) == arena_maxclass)
3021 arena_chunk_dealloc(arena, chunk);
3024 * It is okay to do dirty page processing even if the chunk was
3025 * deallocated above, since in that case it is the spare. Waiting
3026 * until after possible chunk deallocation to do dirty processing
3027 * allows for an old spare to be fully deallocated, thus decreasing the
3028 * chances of spuriously crossing the dirty page purging threshold.
3031 if (chunk->dirtied == false) {
3032 arena_chunk_tree_dirty_insert(&arena->chunks_dirty,
3034 chunk->dirtied = true;
3037 /* Enforce opt_lg_dirty_mult. */
3038 if (opt_lg_dirty_mult >= 0 && (arena->nactive >>
3039 opt_lg_dirty_mult) < arena->ndirty)
3045 arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
3046 size_t oldsize, size_t newsize)
3048 size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
3049 size_t head_npages = (oldsize - newsize) >> PAGE_SHIFT;
3051 assert(oldsize > newsize);
3054 * Update the chunk map so that arena_run_dalloc() can treat the
3055 * leading run as separately allocated.
3057 assert((chunk->map[pageind].bits & CHUNK_MAP_DIRTY) == 0);
3058 chunk->map[pageind].bits = (oldsize - newsize) | CHUNK_MAP_LARGE |
3059 CHUNK_MAP_ALLOCATED;
3060 assert((chunk->map[pageind+head_npages].bits & CHUNK_MAP_DIRTY) == 0);
3061 chunk->map[pageind+head_npages].bits = newsize | CHUNK_MAP_LARGE |
3062 CHUNK_MAP_ALLOCATED;
3064 arena_run_dalloc(arena, run, false);
3068 arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
3069 size_t oldsize, size_t newsize, bool dirty)
3071 size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
3072 size_t npages = newsize >> PAGE_SHIFT;
3074 assert(oldsize > newsize);
3077 * Update the chunk map so that arena_run_dalloc() can treat the
3078 * trailing run as separately allocated.
3080 assert((chunk->map[pageind].bits & CHUNK_MAP_DIRTY) == 0);
3081 chunk->map[pageind].bits = newsize | CHUNK_MAP_LARGE |
3082 CHUNK_MAP_ALLOCATED;
3083 assert((chunk->map[pageind+npages].bits & CHUNK_MAP_DIRTY) == 0);
3084 chunk->map[pageind+npages].bits = (oldsize - newsize) | CHUNK_MAP_LARGE
3085 | CHUNK_MAP_ALLOCATED;
3087 arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize),
3091 static arena_run_t *
3092 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
3094 arena_chunk_map_t *mapelm;
3096 unsigned i, remainder;
3098 /* Look for a usable run. */
3099 mapelm = arena_run_tree_first(&bin->runs);
3100 if (mapelm != NULL) {
3101 arena_chunk_t *chunk;
3104 /* run is guaranteed to have available space. */
3105 arena_run_tree_remove(&bin->runs, mapelm);
3107 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm);
3108 pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
3109 sizeof(arena_chunk_map_t));
3110 run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
3111 ((mapelm->bits & CHUNK_MAP_PG_MASK) >> CHUNK_MAP_PG_SHIFT))
3114 bin->stats.reruns++;
3118 /* No existing runs have any space available. */
3120 /* Allocate a new run. */
3121 run = arena_run_alloc(arena, bin->run_size, false, false);
3125 /* Initialize run internals. */
3128 for (i = 0; i < bin->regs_mask_nelms - 1; i++)
3129 run->regs_mask[i] = UINT_MAX;
3130 remainder = bin->nregs & ((1U << (LG_SIZEOF_INT + 3)) - 1);
3132 run->regs_mask[i] = UINT_MAX;
3134 /* The last element has spare bits that need to be unset. */
3135 run->regs_mask[i] = (UINT_MAX >> ((1U << (LG_SIZEOF_INT + 3))
3139 run->regs_minelm = 0;
3141 run->nfree = bin->nregs;
3143 run->magic = ARENA_RUN_MAGIC;
3148 bin->stats.curruns++;
3149 if (bin->stats.curruns > bin->stats.highruns)
3150 bin->stats.highruns = bin->stats.curruns;
3155 /* bin->runcur must have space available before this function is called. */
3156 static inline void *
3157 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
3161 assert(run->magic == ARENA_RUN_MAGIC);
3162 assert(run->nfree > 0);
3164 ret = arena_run_reg_alloc(run, bin);
3165 assert(ret != NULL);
3171 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
3173 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
3176 bin->runcur = arena_bin_nonfull_run_get(arena, bin);
3177 if (bin->runcur == NULL)
3179 assert(bin->runcur->magic == ARENA_RUN_MAGIC);
3180 assert(bin->runcur->nfree > 0);
3182 return (arena_bin_malloc_easy(arena, bin, bin->runcur));
3186 * Calculate bin->run_size such that it meets the following constraints:
3188 * *) bin->run_size >= min_run_size
3189 * *) bin->run_size <= arena_maxclass
3190 * *) bin->run_size <= RUN_MAX_SMALL
3191 * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
3192 * *) run header size < PAGE_SIZE
3194 * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
3195 * also calculated here, since these settings are all interdependent.
3198 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
3200 size_t try_run_size, good_run_size;
3201 unsigned good_nregs, good_mask_nelms, good_reg0_offset;
3202 unsigned try_nregs, try_mask_nelms, try_reg0_offset;
3204 assert(min_run_size >= PAGE_SIZE);
3205 assert(min_run_size <= arena_maxclass);
3206 assert(min_run_size <= RUN_MAX_SMALL);
3209 * Calculate known-valid settings before entering the run_size
3210 * expansion loop, so that the first part of the loop always copies
3213 * The do..while loop iteratively reduces the number of regions until
3214 * the run header and the regions no longer overlap. A closed formula
3215 * would be quite messy, since there is an interdependency between the
3216 * header's mask length and the number of regions.
3218 try_run_size = min_run_size;
3219 try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
3220 + 1; /* Counter-act try_nregs-- in loop. */
3223 try_mask_nelms = (try_nregs >> (LG_SIZEOF_INT + 3)) +
3224 ((try_nregs & ((1U << (LG_SIZEOF_INT + 3)) - 1)) ? 1 : 0);
3225 try_reg0_offset = try_run_size - (try_nregs * bin->reg_size);
3226 } while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
3229 /* run_size expansion loop. */
3232 * Copy valid settings before trying more aggressive settings.
3234 good_run_size = try_run_size;
3235 good_nregs = try_nregs;
3236 good_mask_nelms = try_mask_nelms;
3237 good_reg0_offset = try_reg0_offset;
3239 /* Try more aggressive settings. */
3240 try_run_size += PAGE_SIZE;
3241 try_nregs = ((try_run_size - sizeof(arena_run_t)) /
3242 bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
3245 try_mask_nelms = (try_nregs >> (LG_SIZEOF_INT + 3)) +
3246 ((try_nregs & ((1U << (LG_SIZEOF_INT + 3)) - 1)) ?
3248 try_reg0_offset = try_run_size - (try_nregs *
3250 } while (sizeof(arena_run_t) + (sizeof(unsigned) *
3251 (try_mask_nelms - 1)) > try_reg0_offset);
3252 } while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
3253 && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
3254 && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
3255 && (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1)))
3258 assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
3259 <= good_reg0_offset);
3260 assert((good_mask_nelms << (LG_SIZEOF_INT + 3)) >= good_nregs);
3262 /* Copy final settings. */
3263 bin->run_size = good_run_size;
3264 bin->nregs = good_nregs;
3265 bin->regs_mask_nelms = good_mask_nelms;
3266 bin->reg0_offset = good_reg0_offset;
3268 return (good_run_size);
3271 #ifdef MALLOC_TCACHE
3273 tcache_event(tcache_t *tcache)
3276 if (tcache_gc_incr == 0)
3280 assert(tcache->ev_cnt <= tcache_gc_incr);
3281 if (tcache->ev_cnt >= tcache_gc_incr) {
3282 size_t binind = tcache->next_gc_bin;
3283 tcache_bin_t *tbin = tcache->tbins[binind];
3286 if (tbin->high_water == 0) {
3288 * This bin went completely unused for an
3289 * entire GC cycle, so throw away the tbin.
3291 assert(tbin->ncached == 0);
3292 tcache_bin_destroy(tcache, tbin, binind);
3293 tcache->tbins[binind] = NULL;
3295 if (tbin->low_water > 0) {
3297 * Flush (ceiling) half of the objects
3298 * below the low water mark.
3300 tcache_bin_flush(tbin, binind,
3301 tbin->ncached - (tbin->low_water >>
3302 1) - (tbin->low_water & 1));
3304 tbin->low_water = tbin->ncached;
3305 tbin->high_water = tbin->ncached;
3309 tcache->next_gc_bin++;
3310 if (tcache->next_gc_bin == nbins)
3311 tcache->next_gc_bin = 0;
3316 static inline void *
3317 tcache_bin_alloc(tcache_bin_t *tbin)
3320 if (tbin->ncached == 0)
3323 if (tbin->ncached < tbin->low_water)
3324 tbin->low_water = tbin->ncached;
3325 return (tbin->slots[tbin->ncached]);
3329 tcache_bin_fill(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
3337 assert(tbin->ncached == 0);
3339 arena = tcache->arena;
3340 bin = &arena->bins[binind];
3341 malloc_spin_lock(&arena->lock);
3342 for (i = 0; i < (tcache_nslots >> 1); i++) {
3343 if ((run = bin->runcur) != NULL && run->nfree > 0)
3344 ptr = arena_bin_malloc_easy(arena, bin, run);
3346 ptr = arena_bin_malloc_hard(arena, bin);
3350 * Fill tbin such that the objects lowest in memory are used
3353 tbin->slots[(tcache_nslots >> 1) - 1 - i] = ptr;
3356 bin->stats.nfills++;
3357 bin->stats.nrequests += tbin->tstats.nrequests;
3358 if (bin->reg_size <= small_maxclass) {
3359 arena->stats.nmalloc_small += (i - tbin->ncached);
3360 arena->stats.allocated_small += (i - tbin->ncached) *
3362 arena->stats.nmalloc_small += tbin->tstats.nrequests;
3364 arena->stats.nmalloc_medium += (i - tbin->ncached);
3365 arena->stats.allocated_medium += (i - tbin->ncached) *
3367 arena->stats.nmalloc_medium += tbin->tstats.nrequests;
3369 tbin->tstats.nrequests = 0;
3371 malloc_spin_unlock(&arena->lock);
3373 if (tbin->ncached > tbin->high_water)
3374 tbin->high_water = tbin->ncached;
3377 static inline void *
3378 tcache_alloc(tcache_t *tcache, size_t size, bool zero)
3384 if (size <= small_maxclass)
3385 binind = small_size2bin[size];
3387 binind = mbin0 + ((MEDIUM_CEILING(size) - medium_min) >>
3390 assert(binind < nbins);
3391 tbin = tcache->tbins[binind];
3393 tbin = tcache_bin_create(tcache->arena);
3396 tcache->tbins[binind] = tbin;
3399 ret = tcache_bin_alloc(tbin);
3401 ret = tcache_alloc_hard(tcache, tbin, binind);
3406 if (zero == false) {
3408 memset(ret, 0xa5, size);
3410 memset(ret, 0, size);
3412 memset(ret, 0, size);
3415 tbin->tstats.nrequests++;
3417 tcache_event(tcache);
3422 tcache_alloc_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
3426 tcache_bin_fill(tcache, tbin, binind);
3427 ret = tcache_bin_alloc(tbin);
3433 static inline void *
3434 arena_malloc_small(arena_t *arena, size_t size, bool zero)
3441 binind = small_size2bin[size];
3442 assert(binind < mbin0);
3443 bin = &arena->bins[binind];
3444 size = bin->reg_size;
3446 malloc_spin_lock(&arena->lock);
3447 if ((run = bin->runcur) != NULL && run->nfree > 0)
3448 ret = arena_bin_malloc_easy(arena, bin, run);
3450 ret = arena_bin_malloc_hard(arena, bin);
3453 malloc_spin_unlock(&arena->lock);
3458 # ifdef MALLOC_TCACHE
3459 if (__isthreaded == false) {
3461 bin->stats.nrequests++;
3462 arena->stats.nmalloc_small++;
3463 # ifdef MALLOC_TCACHE
3466 arena->stats.allocated_small += size;
3468 malloc_spin_unlock(&arena->lock);
3470 if (zero == false) {
3472 memset(ret, 0xa5, size);
3474 memset(ret, 0, size);
3476 memset(ret, 0, size);
3482 arena_malloc_medium(arena_t *arena, size_t size, bool zero)
3489 size = MEDIUM_CEILING(size);
3490 binind = mbin0 + ((size - medium_min) >> lg_mspace);
3491 assert(binind < nbins);
3492 bin = &arena->bins[binind];
3493 assert(bin->reg_size == size);
3495 malloc_spin_lock(&arena->lock);
3496 if ((run = bin->runcur) != NULL && run->nfree > 0)
3497 ret = arena_bin_malloc_easy(arena, bin, run);
3499 ret = arena_bin_malloc_hard(arena, bin);
3502 malloc_spin_unlock(&arena->lock);
3507 # ifdef MALLOC_TCACHE
3508 if (__isthreaded == false) {
3510 bin->stats.nrequests++;
3511 arena->stats.nmalloc_medium++;
3512 # ifdef MALLOC_TCACHE
3515 arena->stats.allocated_medium += size;
3517 malloc_spin_unlock(&arena->lock);
3519 if (zero == false) {
3521 memset(ret, 0xa5, size);
3523 memset(ret, 0, size);
3525 memset(ret, 0, size);
3531 arena_malloc_large(arena_t *arena, size_t size, bool zero)
3535 /* Large allocation. */
3536 size = PAGE_CEILING(size);
3537 malloc_spin_lock(&arena->lock);
3538 ret = (void *)arena_run_alloc(arena, size, true, zero);
3540 malloc_spin_unlock(&arena->lock);
3544 arena->stats.nmalloc_large++;
3545 arena->stats.allocated_large += size;
3546 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
3547 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
3548 if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
3549 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
3550 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
3551 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
3554 malloc_spin_unlock(&arena->lock);
3556 if (zero == false) {
3558 memset(ret, 0xa5, size);
3560 memset(ret, 0, size);
3566 static inline void *
3567 arena_malloc(size_t size, bool zero)
3571 assert(QUANTUM_CEILING(size) <= arena_maxclass);
3573 if (size <= bin_maxclass) {
3574 #ifdef MALLOC_TCACHE
3575 if (__isthreaded && tcache_nslots) {
3576 tcache_t *tcache = tcache_tls;
3577 if ((uintptr_t)tcache > (uintptr_t)1)
3578 return (tcache_alloc(tcache, size, zero));
3579 else if (tcache == NULL) {
3580 tcache = tcache_create(choose_arena());
3583 return (tcache_alloc(tcache, size, zero));
3587 if (size <= small_maxclass) {
3588 return (arena_malloc_small(choose_arena(), size,
3591 return (arena_malloc_medium(choose_arena(),
3595 return (arena_malloc_large(choose_arena(), size, zero));
3598 static inline void *
3599 imalloc(size_t size)
3604 if (size <= arena_maxclass)
3605 return (arena_malloc(size, false));
3607 return (huge_malloc(size, false));
3610 static inline void *
3611 icalloc(size_t size)
3614 if (size <= arena_maxclass)
3615 return (arena_malloc(size, true));
3617 return (huge_malloc(size, true));
3620 /* Only handles large allocations that require more than page alignment. */
3622 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
3626 arena_chunk_t *chunk;
3628 assert((size & PAGE_MASK) == 0);
3629 assert((alignment & PAGE_MASK) == 0);
3631 malloc_spin_lock(&arena->lock);
3632 ret = (void *)arena_run_alloc(arena, alloc_size, true, false);
3634 malloc_spin_unlock(&arena->lock);
3638 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
3640 offset = (uintptr_t)ret & (alignment - 1);
3641 assert((offset & PAGE_MASK) == 0);
3642 assert(offset < alloc_size);
3644 arena_run_trim_tail(arena, chunk, ret, alloc_size, size, false);
3646 size_t leadsize, trailsize;
3648 leadsize = alignment - offset;
3650 arena_run_trim_head(arena, chunk, ret, alloc_size,
3651 alloc_size - leadsize);
3652 ret = (void *)((uintptr_t)ret + leadsize);
3655 trailsize = alloc_size - leadsize - size;
3656 if (trailsize != 0) {
3657 /* Trim trailing space. */
3658 assert(trailsize < alloc_size);
3659 arena_run_trim_tail(arena, chunk, ret, size + trailsize,
3665 arena->stats.nmalloc_large++;
3666 arena->stats.allocated_large += size;
3667 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
3668 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
3669 if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
3670 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
3671 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
3672 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
3675 malloc_spin_unlock(&arena->lock);
3678 memset(ret, 0xa5, size);
3680 memset(ret, 0, size);
3684 static inline void *
3685 ipalloc(size_t alignment, size_t size)
3691 * Round size up to the nearest multiple of alignment.
3693 * This done, we can take advantage of the fact that for each small
3694 * size class, every object is aligned at the smallest power of two
3695 * that is non-zero in the base two representation of the size. For
3698 * Size | Base 2 | Minimum alignment
3699 * -----+----------+------------------
3701 * 144 | 10100000 | 32
3702 * 192 | 11000000 | 64
3704 * Depending on runtime settings, it is possible that arena_malloc()
3705 * will further round up to a power of two, but that never causes
3706 * correctness issues.
3708 ceil_size = (size + (alignment - 1)) & (-alignment);
3710 * (ceil_size < size) protects against the combination of maximal
3711 * alignment and size greater than maximal alignment.
3713 if (ceil_size < size) {
3714 /* size_t overflow. */
3718 if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE
3719 && ceil_size <= arena_maxclass))
3720 ret = arena_malloc(ceil_size, false);
3725 * We can't achieve subpage alignment, so round up alignment
3726 * permanently; it makes later calculations simpler.
3728 alignment = PAGE_CEILING(alignment);
3729 ceil_size = PAGE_CEILING(size);
3731 * (ceil_size < size) protects against very large sizes within
3732 * PAGE_SIZE of SIZE_T_MAX.
3734 * (ceil_size + alignment < ceil_size) protects against the
3735 * combination of maximal alignment and ceil_size large enough
3736 * to cause overflow. This is similar to the first overflow
3737 * check above, but it needs to be repeated due to the new
3738 * ceil_size value, which may now be *equal* to maximal
3739 * alignment, whereas before we only detected overflow if the
3740 * original size was *greater* than maximal alignment.
3742 if (ceil_size < size || ceil_size + alignment < ceil_size) {
3743 /* size_t overflow. */
3748 * Calculate the size of the over-size run that arena_palloc()
3749 * would need to allocate in order to guarantee the alignment.
3751 if (ceil_size >= alignment)
3752 run_size = ceil_size + alignment - PAGE_SIZE;
3755 * It is possible that (alignment << 1) will cause
3756 * overflow, but it doesn't matter because we also
3757 * subtract PAGE_SIZE, which in the case of overflow
3758 * leaves us with a very large run_size. That causes
3759 * the first conditional below to fail, which means
3760 * that the bogus run_size value never gets used for
3761 * anything important.
3763 run_size = (alignment << 1) - PAGE_SIZE;
3766 if (run_size <= arena_maxclass) {
3767 ret = arena_palloc(choose_arena(), alignment, ceil_size,
3769 } else if (alignment <= chunksize)
3770 ret = huge_malloc(ceil_size, false);
3772 ret = huge_palloc(alignment, ceil_size);
3775 assert(((uintptr_t)ret & (alignment - 1)) == 0);
3780 arena_is_large(const void *ptr)
3782 arena_chunk_t *chunk;
3783 size_t pageind, mapbits;
3785 assert(ptr != NULL);
3786 assert(CHUNK_ADDR2BASE(ptr) != ptr);
3788 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3789 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3790 mapbits = chunk->map[pageind].bits;
3791 assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
3792 return ((mapbits & CHUNK_MAP_LARGE) != 0);
3795 /* Return the size of the allocation pointed to by ptr. */
3797 arena_salloc(const void *ptr)
3800 arena_chunk_t *chunk;
3801 size_t pageind, mapbits;
3803 assert(ptr != NULL);
3804 assert(CHUNK_ADDR2BASE(ptr) != ptr);
3806 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3807 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3808 mapbits = chunk->map[pageind].bits;
3809 assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
3810 if ((mapbits & CHUNK_MAP_LARGE) == 0) {
3811 arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
3812 (uintptr_t)((pageind - ((mapbits & CHUNK_MAP_PG_MASK) >>
3813 CHUNK_MAP_PG_SHIFT)) << PAGE_SHIFT));
3814 assert(run->magic == ARENA_RUN_MAGIC);
3815 ret = run->bin->reg_size;
3817 ret = mapbits & ~PAGE_MASK;
3824 static inline size_t
3825 isalloc(const void *ptr)
3828 arena_chunk_t *chunk;
3830 assert(ptr != NULL);
3832 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3835 assert(chunk->arena->magic == ARENA_MAGIC);
3837 ret = arena_salloc(ptr);
3839 extent_node_t *node, key;
3841 /* Chunk (huge allocation). */
3843 malloc_mutex_lock(&huge_mtx);
3845 /* Extract from tree of huge allocations. */
3846 key.addr = __DECONST(void *, ptr);
3847 node = extent_tree_ad_search(&huge, &key);
3848 assert(node != NULL);
3852 malloc_mutex_unlock(&huge_mtx);
3859 arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3860 arena_chunk_map_t *mapelm)
3867 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3868 run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
3869 ((mapelm->bits & CHUNK_MAP_PG_MASK) >> CHUNK_MAP_PG_SHIFT)) <<
3871 assert(run->magic == ARENA_RUN_MAGIC);
3873 size = bin->reg_size;
3876 memset(ptr, 0x5a, size);
3878 arena_run_reg_dalloc(run, bin, ptr, size);
3881 if (run->nfree == bin->nregs)
3882 arena_dalloc_bin_run(arena, chunk, run, bin);
3883 else if (run->nfree == 1 && run != bin->runcur) {
3885 * Make sure that bin->runcur always refers to the lowest
3886 * non-full run, if one exists.
3888 if (bin->runcur == NULL)
3890 else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
3891 /* Switch runcur. */
3892 if (bin->runcur->nfree > 0) {
3893 arena_chunk_t *runcur_chunk =
3894 CHUNK_ADDR2BASE(bin->runcur);
3895 size_t runcur_pageind =
3896 (((uintptr_t)bin->runcur -
3897 (uintptr_t)runcur_chunk)) >> PAGE_SHIFT;
3898 arena_chunk_map_t *runcur_mapelm =
3899 &runcur_chunk->map[runcur_pageind];
3901 /* Insert runcur. */
3902 arena_run_tree_insert(&bin->runs,
3907 size_t run_pageind = (((uintptr_t)run -
3908 (uintptr_t)chunk)) >> PAGE_SHIFT;
3909 arena_chunk_map_t *run_mapelm =
3910 &chunk->map[run_pageind];
3912 assert(arena_run_tree_search(&bin->runs, run_mapelm) ==
3914 arena_run_tree_insert(&bin->runs, run_mapelm);
3919 if (size <= small_maxclass) {
3920 arena->stats.allocated_small -= size;
3921 arena->stats.ndalloc_small++;
3923 arena->stats.allocated_medium -= size;
3924 arena->stats.ndalloc_medium++;
3930 arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
3935 /* Deallocate run. */
3936 if (run == bin->runcur)
3938 else if (bin->nregs != 1) {
3939 size_t run_pageind = (((uintptr_t)run -
3940 (uintptr_t)chunk)) >> PAGE_SHIFT;
3941 arena_chunk_map_t *run_mapelm =
3942 &chunk->map[run_pageind];
3944 * This block's conditional is necessary because if the
3945 * run only contains one region, then it never gets
3946 * inserted into the non-full runs tree.
3948 arena_run_tree_remove(&bin->runs, run_mapelm);
3951 * Mark the first page as dirty. The dirty bit for every other page in
3952 * the run is already properly set, which means we can call
3953 * arena_run_dalloc(..., false), thus potentially avoiding the needless
3954 * creation of many dirty pages.
3956 run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT);
3957 assert((chunk->map[run_ind].bits & CHUNK_MAP_DIRTY) == 0);
3958 chunk->map[run_ind].bits |= CHUNK_MAP_DIRTY;
3965 arena_run_dalloc(arena, run, false);
3967 bin->stats.curruns--;
3970 if (chunk->dirtied == false) {
3971 arena_chunk_tree_dirty_insert(&arena->chunks_dirty, chunk);
3972 chunk->dirtied = true;
3974 /* Enforce opt_lg_dirty_mult. */
3975 if (opt_lg_dirty_mult >= 0 && (arena->nactive >> opt_lg_dirty_mult) <
3982 arena_stats_print(arena_t *arena)
3985 malloc_printf("dirty pages: %zu:%zu active:dirty, %"PRIu64" sweep%s,"
3986 " %"PRIu64" madvise%s, %"PRIu64" purged\n",
3987 arena->nactive, arena->ndirty,
3988 arena->stats.npurge, arena->stats.npurge == 1 ? "" : "s",
3989 arena->stats.nmadvise, arena->stats.nmadvise == 1 ? "" : "s",
3990 arena->stats.purged);
3992 malloc_printf(" allocated nmalloc ndalloc\n");
3993 malloc_printf("small: %12zu %12"PRIu64" %12"PRIu64"\n",
3994 arena->stats.allocated_small, arena->stats.nmalloc_small,
3995 arena->stats.ndalloc_small);
3996 malloc_printf("medium: %12zu %12"PRIu64" %12"PRIu64"\n",
3997 arena->stats.allocated_medium, arena->stats.nmalloc_medium,
3998 arena->stats.ndalloc_medium);
3999 malloc_printf("large: %12zu %12"PRIu64" %12"PRIu64"\n",
4000 arena->stats.allocated_large, arena->stats.nmalloc_large,
4001 arena->stats.ndalloc_large);
4002 malloc_printf("total: %12zu %12"PRIu64" %12"PRIu64"\n",
4003 arena->stats.allocated_small + arena->stats.allocated_medium +
4004 arena->stats.allocated_large, arena->stats.nmalloc_small +
4005 arena->stats.nmalloc_medium + arena->stats.nmalloc_large,
4006 arena->stats.ndalloc_small + arena->stats.ndalloc_medium +
4007 arena->stats.ndalloc_large);
4008 malloc_printf("mapped: %12zu\n", arena->stats.mapped);
4010 if (arena->stats.nmalloc_small + arena->stats.nmalloc_medium > 0) {
4011 unsigned i, gap_start;
4012 #ifdef MALLOC_TCACHE
4013 malloc_printf("bins: bin size regs pgs requests "
4014 "nfills nflushes newruns reruns maxruns curruns\n");
4016 malloc_printf("bins: bin size regs pgs requests "
4017 "newruns reruns maxruns curruns\n");
4019 for (i = 0, gap_start = UINT_MAX; i < nbins; i++) {
4020 if (arena->bins[i].stats.nruns == 0) {
4021 if (gap_start == UINT_MAX)
4024 if (gap_start != UINT_MAX) {
4025 if (i > gap_start + 1) {
4027 * Gap of more than one size
4030 malloc_printf("[%u..%u]\n",
4033 /* Gap of one size class. */
4034 malloc_printf("[%u]\n",
4037 gap_start = UINT_MAX;
4040 "%13u %1s %5u %4u %3u %9"PRIu64" %9"PRIu64
4041 #ifdef MALLOC_TCACHE
4042 " %9"PRIu64" %9"PRIu64
4044 " %9"PRIu64" %7zu %7zu\n",
4046 i < ntbins ? "T" : i < ntbins + nqbins ?
4047 "Q" : i < ntbins + nqbins + ncbins ? "C" :
4048 i < ntbins + nqbins + ncbins + nsbins ? "S"
4050 arena->bins[i].reg_size,
4051 arena->bins[i].nregs,
4052 arena->bins[i].run_size >> PAGE_SHIFT,
4053 arena->bins[i].stats.nrequests,
4054 #ifdef MALLOC_TCACHE
4055 arena->bins[i].stats.nfills,
4056 arena->bins[i].stats.nflushes,
4058 arena->bins[i].stats.nruns,
4059 arena->bins[i].stats.reruns,
4060 arena->bins[i].stats.highruns,
4061 arena->bins[i].stats.curruns);
4064 if (gap_start != UINT_MAX) {
4065 if (i > gap_start + 1) {
4066 /* Gap of more than one size class. */
4067 malloc_printf("[%u..%u]\n", gap_start, i - 1);
4069 /* Gap of one size class. */
4070 malloc_printf("[%u]\n", gap_start);
4075 if (arena->stats.nmalloc_large > 0) {
4078 size_t nlclasses = (chunksize - PAGE_SIZE) >> PAGE_SHIFT;
4081 "large: size pages nrequests maxruns curruns\n");
4083 for (i = 0, gap_start = -1; i < nlclasses; i++) {
4084 if (arena->stats.lstats[i].nrequests == 0) {
4085 if (gap_start == -1)
4088 if (gap_start != -1) {
4089 malloc_printf("[%zu]\n", i - gap_start);
4093 "%13zu %5zu %9"PRIu64" %9zu %9zu\n",
4094 (i+1) << PAGE_SHIFT, i+1,
4095 arena->stats.lstats[i].nrequests,
4096 arena->stats.lstats[i].highruns,
4097 arena->stats.lstats[i].curruns);
4100 if (gap_start != -1)
4101 malloc_printf("[%zu]\n", i - gap_start);
4107 stats_print_atexit(void)
4110 #if (defined(MALLOC_TCACHE) && defined(MALLOC_STATS))
4114 * Merge stats from extant threads. This is racy, since individual
4115 * threads do not lock when recording tcache stats events. As a
4116 * consequence, the final stats may be slightly out of date by the time
4117 * they are reported, if other threads continue to allocate.
4119 for (i = 0; i < narenas; i++) {
4120 arena_t *arena = arenas[i];
4121 if (arena != NULL) {
4124 malloc_spin_lock(&arena->lock);
4125 ql_foreach(tcache, &arena->tcache_ql, link) {
4126 tcache_stats_merge(tcache, arena);
4128 malloc_spin_unlock(&arena->lock);
4132 malloc_stats_print();
4135 #ifdef MALLOC_TCACHE
4137 tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem)
4139 arena_chunk_t *chunk;
4142 unsigned i, ndeferred, ncached;
4144 for (ndeferred = tbin->ncached - rem; ndeferred > 0;) {
4145 ncached = ndeferred;
4146 /* Lock the arena associated with the first object. */
4147 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(tbin->slots[0]);
4148 arena = chunk->arena;
4149 malloc_spin_lock(&arena->lock);
4150 /* Deallocate every object that belongs to the locked arena. */
4151 for (i = ndeferred = 0; i < ncached; i++) {
4152 ptr = tbin->slots[i];
4153 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
4154 if (chunk->arena == arena) {
4155 size_t pageind = (((uintptr_t)ptr -
4156 (uintptr_t)chunk) >> PAGE_SHIFT);
4157 arena_chunk_map_t *mapelm =
4158 &chunk->map[pageind];
4159 arena_dalloc_bin(arena, chunk, ptr, mapelm);
4162 * This object was allocated via a different
4163 * arena than the one that is currently locked.
4164 * Stash the object, so that it can be handled
4167 tbin->slots[ndeferred] = ptr;
4172 arena->bins[binind].stats.nflushes++;
4174 arena_bin_t *bin = &arena->bins[binind];
4175 bin->stats.nrequests += tbin->tstats.nrequests;
4176 if (bin->reg_size <= small_maxclass) {
4177 arena->stats.nmalloc_small +=
4178 tbin->tstats.nrequests;
4180 arena->stats.nmalloc_medium +=
4181 tbin->tstats.nrequests;
4183 tbin->tstats.nrequests = 0;
4186 malloc_spin_unlock(&arena->lock);
4191 * Shift the remaining valid pointers to the base of the slots
4194 memmove(&tbin->slots[0], &tbin->slots[tbin->ncached - rem],
4195 rem * sizeof(void *));
4197 tbin->ncached = rem;
4201 tcache_dalloc(tcache_t *tcache, void *ptr)
4204 arena_chunk_t *chunk;
4208 size_t pageind, binind;
4209 arena_chunk_map_t *mapelm;
4211 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
4212 arena = chunk->arena;
4213 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
4214 mapelm = &chunk->map[pageind];
4215 run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
4216 ((mapelm->bits & CHUNK_MAP_PG_MASK) >> CHUNK_MAP_PG_SHIFT)) <<
4218 assert(run->magic == ARENA_RUN_MAGIC);
4220 binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) /
4221 sizeof(arena_bin_t);
4222 assert(binind < nbins);
4225 memset(ptr, 0x5a, arena->bins[binind].reg_size);
4227 tbin = tcache->tbins[binind];
4229 tbin = tcache_bin_create(choose_arena());
4231 malloc_spin_lock(&arena->lock);
4232 arena_dalloc_bin(arena, chunk, ptr, mapelm);
4233 malloc_spin_unlock(&arena->lock);
4236 tcache->tbins[binind] = tbin;
4239 if (tbin->ncached == tcache_nslots)
4240 tcache_bin_flush(tbin, binind, (tcache_nslots >> 1));
4241 assert(tbin->ncached < tcache_nslots);
4242 tbin->slots[tbin->ncached] = ptr;
4244 if (tbin->ncached > tbin->high_water)
4245 tbin->high_water = tbin->ncached;
4247 tcache_event(tcache);
4252 arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
4255 /* Large allocation. */
4256 malloc_spin_lock(&arena->lock);
4258 #ifndef MALLOC_STATS
4262 size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
4264 size_t size = chunk->map[pageind].bits & ~PAGE_MASK;
4269 memset(ptr, 0x5a, size);
4271 arena->stats.ndalloc_large++;
4272 arena->stats.allocated_large -= size;
4273 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns--;
4277 arena_run_dalloc(arena, (arena_run_t *)ptr, true);
4278 malloc_spin_unlock(&arena->lock);
4282 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
4285 arena_chunk_map_t *mapelm;
4287 assert(arena != NULL);
4288 assert(arena->magic == ARENA_MAGIC);
4289 assert(chunk->arena == arena);
4290 assert(ptr != NULL);
4291 assert(CHUNK_ADDR2BASE(ptr) != ptr);
4293 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
4294 mapelm = &chunk->map[pageind];
4295 assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0);
4296 if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
4297 /* Small allocation. */
4298 #ifdef MALLOC_TCACHE
4299 if (__isthreaded && tcache_nslots) {
4300 tcache_t *tcache = tcache_tls;
4301 if ((uintptr_t)tcache > (uintptr_t)1)
4302 tcache_dalloc(tcache, ptr);
4304 arena_dalloc_hard(arena, chunk, ptr, mapelm,
4309 malloc_spin_lock(&arena->lock);
4310 arena_dalloc_bin(arena, chunk, ptr, mapelm);
4311 malloc_spin_unlock(&arena->lock);
4312 #ifdef MALLOC_TCACHE
4316 arena_dalloc_large(arena, chunk, ptr);
4319 #ifdef MALLOC_TCACHE
4321 arena_dalloc_hard(arena_t *arena, arena_chunk_t *chunk, void *ptr,
4322 arena_chunk_map_t *mapelm, tcache_t *tcache)
4325 if (tcache == NULL) {
4326 tcache = tcache_create(arena);
4327 if (tcache == NULL) {
4328 malloc_spin_lock(&arena->lock);
4329 arena_dalloc_bin(arena, chunk, ptr, mapelm);
4330 malloc_spin_unlock(&arena->lock);
4332 tcache_dalloc(tcache, ptr);
4334 /* This thread is currently exiting, so directly deallocate. */
4335 assert(tcache == (void *)(uintptr_t)1);
4336 malloc_spin_lock(&arena->lock);
4337 arena_dalloc_bin(arena, chunk, ptr, mapelm);
4338 malloc_spin_unlock(&arena->lock);
4346 arena_chunk_t *chunk;
4348 assert(ptr != NULL);
4350 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
4352 arena_dalloc(chunk->arena, chunk, ptr);
4358 arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
4359 size_t size, size_t oldsize)
4362 assert(size < oldsize);
4365 * Shrink the run, and make trailing pages available for other
4368 malloc_spin_lock(&arena->lock);
4369 arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size,
4372 arena->stats.ndalloc_large++;
4373 arena->stats.allocated_large -= oldsize;
4374 arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--;
4376 arena->stats.nmalloc_large++;
4377 arena->stats.allocated_large += size;
4378 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
4379 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
4380 if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
4381 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
4382 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
4383 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
4386 malloc_spin_unlock(&arena->lock);
4390 arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
4391 size_t size, size_t oldsize)
4393 size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
4394 size_t npages = oldsize >> PAGE_SHIFT;
4396 assert(oldsize == (chunk->map[pageind].bits & ~PAGE_MASK));
4398 /* Try to extend the run. */
4399 assert(size > oldsize);
4400 malloc_spin_lock(&arena->lock);
4401 if (pageind + npages < chunk_npages && (chunk->map[pageind+npages].bits
4402 & CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[pageind+npages].bits &
4403 ~PAGE_MASK) >= size - oldsize) {
4405 * The next run is available and sufficiently large. Split the
4406 * following run, then merge the first part with the existing
4409 arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk +
4410 ((pageind+npages) << PAGE_SHIFT)), size - oldsize, true,
4413 chunk->map[pageind].bits = size | CHUNK_MAP_LARGE |
4414 CHUNK_MAP_ALLOCATED;
4415 chunk->map[pageind+npages].bits = CHUNK_MAP_LARGE |
4416 CHUNK_MAP_ALLOCATED;
4419 arena->stats.ndalloc_large++;
4420 arena->stats.allocated_large -= oldsize;
4421 arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--;
4423 arena->stats.nmalloc_large++;
4424 arena->stats.allocated_large += size;
4425 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
4426 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
4427 if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
4428 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
4429 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
4430 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
4433 malloc_spin_unlock(&arena->lock);
4436 malloc_spin_unlock(&arena->lock);
4442 * Try to resize a large allocation, in order to avoid copying. This will
4443 * always fail if growing an object, and the following run is already in use.
4446 arena_ralloc_large(void *ptr, size_t size, size_t oldsize)
4450 psize = PAGE_CEILING(size);
4451 if (psize == oldsize) {
4452 /* Same size class. */
4453 if (opt_junk && size < oldsize) {
4454 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize -
4459 arena_chunk_t *chunk;
4462 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
4463 arena = chunk->arena;
4464 assert(arena->magic == ARENA_MAGIC);
4466 if (psize < oldsize) {
4467 /* Fill before shrinking in order avoid a race. */
4469 memset((void *)((uintptr_t)ptr + size), 0x5a,
4472 arena_ralloc_large_shrink(arena, chunk, ptr, psize,
4476 bool ret = arena_ralloc_large_grow(arena, chunk, ptr,
4478 if (ret == false && opt_zero) {
4479 memset((void *)((uintptr_t)ptr + oldsize), 0,
4488 arena_ralloc(void *ptr, size_t size, size_t oldsize)
4494 * Try to avoid moving the allocation.
4496 * posix_memalign() can cause allocation of "large" objects that are
4497 * smaller than bin_maxclass (in order to meet alignment requirements).
4498 * Therefore, do not assume that (oldsize <= bin_maxclass) indicates
4499 * ptr refers to a bin-allocated object.
4501 if (oldsize <= arena_maxclass) {
4502 if (arena_is_large(ptr) == false ) {
4503 if (size <= small_maxclass) {
4504 if (oldsize <= small_maxclass &&
4505 small_size2bin[size] ==
4506 small_size2bin[oldsize])
4508 } else if (size <= bin_maxclass) {
4509 if (small_maxclass < oldsize && oldsize <=
4510 bin_maxclass && MEDIUM_CEILING(size) ==
4511 MEDIUM_CEILING(oldsize))
4515 assert(size <= arena_maxclass);
4516 if (size > bin_maxclass) {
4517 if (arena_ralloc_large(ptr, size, oldsize) ==
4524 /* Try to avoid moving the allocation. */
4525 if (size <= small_maxclass) {
4526 if (oldsize <= small_maxclass && small_size2bin[size] ==
4527 small_size2bin[oldsize])
4529 } else if (size <= bin_maxclass) {
4530 if (small_maxclass < oldsize && oldsize <= bin_maxclass &&
4531 MEDIUM_CEILING(size) == MEDIUM_CEILING(oldsize))
4534 if (bin_maxclass < oldsize && oldsize <= arena_maxclass) {
4535 assert(size > bin_maxclass);
4536 if (arena_ralloc_large(ptr, size, oldsize) == false)
4542 * If we get here, then size and oldsize are different enough that we
4543 * need to move the object. In that case, fall back to allocating new
4544 * space and copying.
4546 ret = arena_malloc(size, false);
4550 /* Junk/zero-filling were already done by arena_malloc(). */
4551 copysize = (size < oldsize) ? size : oldsize;
4552 memcpy(ret, ptr, copysize);
4556 if (opt_junk && size < oldsize)
4557 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
4558 else if (opt_zero && size > oldsize)
4559 memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
4563 static inline void *
4564 iralloc(void *ptr, size_t size)
4568 assert(ptr != NULL);
4571 oldsize = isalloc(ptr);
4573 if (size <= arena_maxclass)
4574 return (arena_ralloc(ptr, size, oldsize));
4576 return (huge_ralloc(ptr, size, oldsize));
4580 arena_new(arena_t *arena, unsigned ind)
4584 size_t prev_run_size;
4586 if (malloc_spin_init(&arena->lock))
4590 memset(&arena->stats, 0, sizeof(arena_stats_t));
4591 arena->stats.lstats = (malloc_large_stats_t *)base_alloc(
4592 sizeof(malloc_large_stats_t) * ((chunksize - PAGE_SIZE) >>
4594 if (arena->stats.lstats == NULL)
4596 memset(arena->stats.lstats, 0, sizeof(malloc_large_stats_t) *
4597 ((chunksize - PAGE_SIZE) >> PAGE_SHIFT));
4598 # ifdef MALLOC_TCACHE
4599 ql_new(&arena->tcache_ql);
4603 /* Initialize chunks. */
4604 arena_chunk_tree_dirty_new(&arena->chunks_dirty);
4605 arena->spare = NULL;
4610 arena_avail_tree_new(&arena->runs_avail);
4612 /* Initialize bins. */
4613 prev_run_size = PAGE_SIZE;
4617 /* (2^n)-spaced tiny bins. */
4618 for (; i < ntbins; i++) {
4619 bin = &arena->bins[i];
4621 arena_run_tree_new(&bin->runs);
4623 bin->reg_size = (1U << (LG_TINY_MIN + i));
4625 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4628 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4633 /* Quantum-spaced bins. */
4634 for (; i < ntbins + nqbins; i++) {
4635 bin = &arena->bins[i];
4637 arena_run_tree_new(&bin->runs);
4639 bin->reg_size = (i - ntbins + 1) << LG_QUANTUM;
4641 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4644 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4648 /* Cacheline-spaced bins. */
4649 for (; i < ntbins + nqbins + ncbins; i++) {
4650 bin = &arena->bins[i];
4652 arena_run_tree_new(&bin->runs);
4654 bin->reg_size = cspace_min + ((i - (ntbins + nqbins)) <<
4657 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4660 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4664 /* Subpage-spaced bins. */
4665 for (; i < ntbins + nqbins + ncbins + nsbins; i++) {
4666 bin = &arena->bins[i];
4668 arena_run_tree_new(&bin->runs);
4670 bin->reg_size = sspace_min + ((i - (ntbins + nqbins + ncbins))
4673 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4676 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4681 for (; i < nbins; i++) {
4682 bin = &arena->bins[i];
4684 arena_run_tree_new(&bin->runs);
4686 bin->reg_size = medium_min + ((i - (ntbins + nqbins + ncbins +
4687 nsbins)) << lg_mspace);
4689 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4692 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4697 arena->magic = ARENA_MAGIC;
4703 /* Create a new arena and insert it into the arenas array at index ind. */
4705 arenas_extend(unsigned ind)
4709 /* Allocate enough space for trailing bins. */
4710 ret = (arena_t *)base_alloc(sizeof(arena_t)
4711 + (sizeof(arena_bin_t) * (nbins - 1)));
4712 if (ret != NULL && arena_new(ret, ind) == false) {
4716 /* Only reached if there is an OOM error. */
4719 * OOM here is quite inconvenient to propagate, since dealing with it
4720 * would require a check for failure in the fast path. Instead, punt
4721 * by using arenas[0]. In practice, this is an extremely unlikely
4724 _malloc_message(_getprogname(),
4725 ": (malloc) Error initializing arena\n", "", "");
4732 #ifdef MALLOC_TCACHE
4733 static tcache_bin_t *
4734 tcache_bin_create(arena_t *arena)
4739 tsize = sizeof(tcache_bin_t) + (sizeof(void *) * (tcache_nslots - 1));
4740 if (tsize <= small_maxclass)
4741 ret = (tcache_bin_t *)arena_malloc_small(arena, tsize, false);
4742 else if (tsize <= bin_maxclass)
4743 ret = (tcache_bin_t *)arena_malloc_medium(arena, tsize, false);
4745 ret = (tcache_bin_t *)imalloc(tsize);
4749 memset(&ret->tstats, 0, sizeof(tcache_bin_stats_t));
4752 ret->high_water = 0;
4759 tcache_bin_destroy(tcache_t *tcache, tcache_bin_t *tbin, unsigned binind)
4762 arena_chunk_t *chunk;
4763 size_t pageind, tsize;
4764 arena_chunk_map_t *mapelm;
4766 chunk = CHUNK_ADDR2BASE(tbin);
4767 arena = chunk->arena;
4768 pageind = (((uintptr_t)tbin - (uintptr_t)chunk) >> PAGE_SHIFT);
4769 mapelm = &chunk->map[pageind];
4772 if (tbin->tstats.nrequests != 0) {
4773 arena_t *arena = tcache->arena;
4774 arena_bin_t *bin = &arena->bins[binind];
4775 malloc_spin_lock(&arena->lock);
4776 bin->stats.nrequests += tbin->tstats.nrequests;
4777 if (bin->reg_size <= small_maxclass)
4778 arena->stats.nmalloc_small += tbin->tstats.nrequests;
4780 arena->stats.nmalloc_medium += tbin->tstats.nrequests;
4781 malloc_spin_unlock(&arena->lock);
4785 assert(tbin->ncached == 0);
4786 tsize = sizeof(tcache_bin_t) + (sizeof(void *) * (tcache_nslots - 1));
4787 if (tsize <= bin_maxclass) {
4788 malloc_spin_lock(&arena->lock);
4789 arena_dalloc_bin(arena, chunk, tbin, mapelm);
4790 malloc_spin_unlock(&arena->lock);
4797 tcache_stats_merge(tcache_t *tcache, arena_t *arena)
4801 /* Merge and reset tcache stats. */
4802 for (i = 0; i < mbin0; i++) {
4803 arena_bin_t *bin = &arena->bins[i];
4804 tcache_bin_t *tbin = tcache->tbins[i];
4806 bin->stats.nrequests += tbin->tstats.nrequests;
4807 arena->stats.nmalloc_small += tbin->tstats.nrequests;
4808 tbin->tstats.nrequests = 0;
4811 for (; i < nbins; i++) {
4812 arena_bin_t *bin = &arena->bins[i];
4813 tcache_bin_t *tbin = tcache->tbins[i];
4815 bin->stats.nrequests += tbin->tstats.nrequests;
4816 arena->stats.nmalloc_medium += tbin->tstats.nrequests;
4817 tbin->tstats.nrequests = 0;
4824 tcache_create(arena_t *arena)
4828 if (sizeof(tcache_t) + (sizeof(tcache_bin_t *) * (nbins - 1)) <=
4830 tcache = (tcache_t *)arena_malloc_small(arena, sizeof(tcache_t)
4831 + (sizeof(tcache_bin_t *) * (nbins - 1)), true);
4832 } else if (sizeof(tcache_t) + (sizeof(tcache_bin_t *) * (nbins - 1)) <=
4834 tcache = (tcache_t *)arena_malloc_medium(arena, sizeof(tcache_t)
4835 + (sizeof(tcache_bin_t *) * (nbins - 1)), true);
4837 tcache = (tcache_t *)icalloc(sizeof(tcache_t) +
4838 (sizeof(tcache_bin_t *) * (nbins - 1)));
4845 /* Link into list of extant tcaches. */
4846 malloc_spin_lock(&arena->lock);
4847 ql_elm_new(tcache, link);
4848 ql_tail_insert(&arena->tcache_ql, tcache, link);
4849 malloc_spin_unlock(&arena->lock);
4852 tcache->arena = arena;
4854 tcache_tls = tcache;
4860 tcache_destroy(tcache_t *tcache)
4865 /* Unlink from list of extant tcaches. */
4866 malloc_spin_lock(&tcache->arena->lock);
4867 ql_remove(&tcache->arena->tcache_ql, tcache, link);
4868 tcache_stats_merge(tcache, tcache->arena);
4869 malloc_spin_unlock(&tcache->arena->lock);
4872 for (i = 0; i < nbins; i++) {
4873 tcache_bin_t *tbin = tcache->tbins[i];
4875 tcache_bin_flush(tbin, i, 0);
4876 tcache_bin_destroy(tcache, tbin, i);
4880 if (arena_salloc(tcache) <= bin_maxclass) {
4881 arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
4882 arena_t *arena = chunk->arena;
4883 size_t pageind = (((uintptr_t)tcache - (uintptr_t)chunk) >>
4885 arena_chunk_map_t *mapelm = &chunk->map[pageind];
4887 malloc_spin_lock(&arena->lock);
4888 arena_dalloc_bin(arena, chunk, tcache, mapelm);
4889 malloc_spin_unlock(&arena->lock);
4898 /******************************************************************************/
4900 * Begin general internal functions.
4904 huge_malloc(size_t size, bool zero)
4908 extent_node_t *node;
4910 /* Allocate one or more contiguous chunks for this request. */
4912 csize = CHUNK_CEILING(size);
4914 /* size is large enough to cause size_t wrap-around. */
4918 /* Allocate an extent node with which to track the chunk. */
4919 node = base_node_alloc();
4923 ret = chunk_alloc(csize, &zero);
4925 base_node_dealloc(node);
4929 /* Insert node into huge. */
4933 malloc_mutex_lock(&huge_mtx);
4934 extent_tree_ad_insert(&huge, node);
4937 huge_allocated += csize;
4939 malloc_mutex_unlock(&huge_mtx);
4941 if (zero == false) {
4943 memset(ret, 0xa5, csize);
4945 memset(ret, 0, csize);
4951 /* Only handles large allocations that require more than chunk alignment. */
4953 huge_palloc(size_t alignment, size_t size)
4956 size_t alloc_size, chunk_size, offset;
4957 extent_node_t *node;
4961 * This allocation requires alignment that is even larger than chunk
4962 * alignment. This means that huge_malloc() isn't good enough.
4964 * Allocate almost twice as many chunks as are demanded by the size or
4965 * alignment, in order to assure the alignment can be achieved, then
4966 * unmap leading and trailing chunks.
4968 assert(alignment >= chunksize);
4970 chunk_size = CHUNK_CEILING(size);
4972 if (size >= alignment)
4973 alloc_size = chunk_size + alignment - chunksize;
4975 alloc_size = (alignment << 1) - chunksize;
4977 /* Allocate an extent node with which to track the chunk. */
4978 node = base_node_alloc();
4983 ret = chunk_alloc(alloc_size, &zero);
4985 base_node_dealloc(node);
4989 offset = (uintptr_t)ret & (alignment - 1);
4990 assert((offset & chunksize_mask) == 0);
4991 assert(offset < alloc_size);
4993 /* Trim trailing space. */
4994 chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
4999 /* Trim leading space. */
5000 chunk_dealloc(ret, alignment - offset);
5002 ret = (void *)((uintptr_t)ret + (alignment - offset));
5004 trailsize = alloc_size - (alignment - offset) - chunk_size;
5005 if (trailsize != 0) {
5006 /* Trim trailing space. */
5007 assert(trailsize < alloc_size);
5008 chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
5013 /* Insert node into huge. */
5015 node->size = chunk_size;
5017 malloc_mutex_lock(&huge_mtx);
5018 extent_tree_ad_insert(&huge, node);
5021 huge_allocated += chunk_size;
5023 malloc_mutex_unlock(&huge_mtx);
5026 memset(ret, 0xa5, chunk_size);
5028 memset(ret, 0, chunk_size);
5034 huge_ralloc(void *ptr, size_t size, size_t oldsize)
5039 /* Avoid moving the allocation if the size class would not change. */
5040 if (oldsize > arena_maxclass &&
5041 CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
5042 if (opt_junk && size < oldsize) {
5043 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
5045 } else if (opt_zero && size > oldsize) {
5046 memset((void *)((uintptr_t)ptr + oldsize), 0, size
5053 * If we get here, then size and oldsize are different enough that we
5054 * need to use a different size class. In that case, fall back to
5055 * allocating new space and copying.
5057 ret = huge_malloc(size, false);
5061 copysize = (size < oldsize) ? size : oldsize;
5062 memcpy(ret, ptr, copysize);
5068 huge_dalloc(void *ptr)
5070 extent_node_t *node, key;
5072 malloc_mutex_lock(&huge_mtx);
5074 /* Extract from tree of huge allocations. */
5076 node = extent_tree_ad_search(&huge, &key);
5077 assert(node != NULL);
5078 assert(node->addr == ptr);
5079 extent_tree_ad_remove(&huge, node);
5083 huge_allocated -= node->size;
5086 malloc_mutex_unlock(&huge_mtx);
5090 if (opt_dss && opt_junk)
5091 memset(node->addr, 0x5a, node->size);
5093 chunk_dealloc(node->addr, node->size);
5095 base_node_dealloc(node);
5099 malloc_stats_print(void)
5101 char s[UMAX2S_BUFSIZE];
5103 _malloc_message("___ Begin malloc statistics ___\n", "", "", "");
5104 _malloc_message("Assertions ",
5111 _malloc_message("Boolean MALLOC_OPTIONS: ", opt_abort ? "A" : "a", "", "");
5113 _malloc_message(opt_dss ? "D" : "d", "", "", "");
5115 _malloc_message(opt_junk ? "J" : "j", "", "", "");
5117 _malloc_message(opt_mmap ? "M" : "m", "", "", "");
5119 _malloc_message("P", "", "", "");
5120 _malloc_message(opt_utrace ? "U" : "u", "", "", "");
5121 _malloc_message(opt_sysv ? "V" : "v", "", "", "");
5122 _malloc_message(opt_xmalloc ? "X" : "x", "", "", "");
5123 _malloc_message(opt_zero ? "Z" : "z", "", "", "");
5124 _malloc_message("\n", "", "", "");
5126 _malloc_message("CPUs: ", umax2s(ncpus, 10, s), "\n", "");
5127 _malloc_message("Max arenas: ", umax2s(narenas, 10, s), "\n", "");
5128 _malloc_message("Pointer size: ", umax2s(sizeof(void *), 10, s), "\n", "");
5129 _malloc_message("Quantum size: ", umax2s(QUANTUM, 10, s), "\n", "");
5130 _malloc_message("Cacheline size (assumed): ",
5131 umax2s(CACHELINE, 10, s), "\n", "");
5132 _malloc_message("Subpage spacing: ", umax2s(SUBPAGE, 10, s), "\n", "");
5133 _malloc_message("Medium spacing: ", umax2s((1U << lg_mspace), 10, s), "\n",
5136 _malloc_message("Tiny 2^n-spaced sizes: [", umax2s((1U << LG_TINY_MIN), 10,
5138 _malloc_message(umax2s((qspace_min >> 1), 10, s), "]\n", "", "");
5140 _malloc_message("Quantum-spaced sizes: [", umax2s(qspace_min, 10, s), "..",
5142 _malloc_message(umax2s(qspace_max, 10, s), "]\n", "", "");
5143 _malloc_message("Cacheline-spaced sizes: [",
5144 umax2s(cspace_min, 10, s), "..", "");
5145 _malloc_message(umax2s(cspace_max, 10, s), "]\n", "", "");
5146 _malloc_message("Subpage-spaced sizes: [", umax2s(sspace_min, 10, s), "..",
5148 _malloc_message(umax2s(sspace_max, 10, s), "]\n", "", "");
5149 _malloc_message("Medium sizes: [", umax2s(medium_min, 10, s), "..", "");
5150 _malloc_message(umax2s(medium_max, 10, s), "]\n", "", "");
5151 if (opt_lg_dirty_mult >= 0) {
5152 _malloc_message("Min active:dirty page ratio per arena: ",
5153 umax2s((1U << opt_lg_dirty_mult), 10, s), ":1\n", "");
5155 _malloc_message("Min active:dirty page ratio per arena: N/A\n", "",
5158 #ifdef MALLOC_TCACHE
5159 _malloc_message("Thread cache slots per size class: ",
5160 tcache_nslots ? umax2s(tcache_nslots, 10, s) : "N/A", "\n", "");
5161 _malloc_message("Thread cache GC sweep interval: ",
5162 (tcache_nslots && tcache_gc_incr > 0) ?
5163 umax2s((1U << opt_lg_tcache_gc_sweep), 10, s) : "N/A", "", "");
5164 _malloc_message(" (increment interval: ",
5165 (tcache_nslots && tcache_gc_incr > 0) ? umax2s(tcache_gc_incr, 10, s)
5166 : "N/A", ")\n", "");
5168 _malloc_message("Chunk size: ", umax2s(chunksize, 10, s), "", "");
5169 _malloc_message(" (2^", umax2s(opt_lg_chunk, 10, s), ")\n", "");
5173 size_t allocated, mapped;
5177 /* Calculate and print allocated/mapped stats. */
5180 for (i = 0, allocated = 0; i < narenas; i++) {
5181 if (arenas[i] != NULL) {
5182 malloc_spin_lock(&arenas[i]->lock);
5183 allocated += arenas[i]->stats.allocated_small;
5184 allocated += arenas[i]->stats.allocated_large;
5185 malloc_spin_unlock(&arenas[i]->lock);
5190 malloc_mutex_lock(&huge_mtx);
5191 allocated += huge_allocated;
5192 mapped = stats_chunks.curchunks * chunksize;
5193 malloc_mutex_unlock(&huge_mtx);
5195 malloc_mutex_lock(&base_mtx);
5196 mapped += base_mapped;
5197 malloc_mutex_unlock(&base_mtx);
5199 malloc_printf("Allocated: %zu, mapped: %zu\n", allocated,
5202 /* Print chunk stats. */
5204 chunk_stats_t chunks_stats;
5206 malloc_mutex_lock(&huge_mtx);
5207 chunks_stats = stats_chunks;
5208 malloc_mutex_unlock(&huge_mtx);
5210 malloc_printf("chunks: nchunks "
5211 "highchunks curchunks\n");
5212 malloc_printf(" %13"PRIu64"%13zu%13zu\n",
5213 chunks_stats.nchunks, chunks_stats.highchunks,
5214 chunks_stats.curchunks);
5217 /* Print chunk stats. */
5219 "huge: nmalloc ndalloc allocated\n");
5220 malloc_printf(" %12"PRIu64" %12"PRIu64" %12zu\n", huge_nmalloc,
5221 huge_ndalloc, huge_allocated);
5223 /* Print stats for each arena. */
5224 for (i = 0; i < narenas; i++) {
5226 if (arena != NULL) {
5227 malloc_printf("\narenas[%u]:\n", i);
5228 malloc_spin_lock(&arena->lock);
5229 arena_stats_print(arena);
5230 malloc_spin_unlock(&arena->lock);
5234 #endif /* #ifdef MALLOC_STATS */
5235 _malloc_message("--- End malloc statistics ---\n", "", "", "");
5240 small_size2bin_validate(void)
5242 size_t i, size, binind;
5244 assert(small_size2bin[0] == 0xffU);
5248 for (; i < (1U << LG_TINY_MIN); i++) {
5249 size = pow2_ceil(1U << LG_TINY_MIN);
5250 binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
5251 assert(small_size2bin[i] == binind);
5253 for (; i < qspace_min; i++) {
5254 size = pow2_ceil(i);
5255 binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
5256 assert(small_size2bin[i] == binind);
5259 /* Quantum-spaced. */
5260 for (; i <= qspace_max; i++) {
5261 size = QUANTUM_CEILING(i);
5262 binind = ntbins + (size >> LG_QUANTUM) - 1;
5263 assert(small_size2bin[i] == binind);
5265 /* Cacheline-spaced. */
5266 for (; i <= cspace_max; i++) {
5267 size = CACHELINE_CEILING(i);
5268 binind = ntbins + nqbins + ((size - cspace_min) >>
5270 assert(small_size2bin[i] == binind);
5273 for (; i <= sspace_max; i++) {
5274 size = SUBPAGE_CEILING(i);
5275 binind = ntbins + nqbins + ncbins + ((size - sspace_min)
5277 assert(small_size2bin[i] == binind);
5283 small_size2bin_init(void)
5286 if (opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT
5287 || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT
5288 || sizeof(const_small_size2bin) != small_maxclass + 1)
5289 return (small_size2bin_init_hard());
5291 small_size2bin = const_small_size2bin;
5293 assert(sizeof(const_small_size2bin) == small_maxclass + 1);
5294 small_size2bin_validate();
5300 small_size2bin_init_hard(void)
5302 size_t i, size, binind;
5303 uint8_t *custom_small_size2bin;
5305 assert(opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT
5306 || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT
5307 || sizeof(const_small_size2bin) != small_maxclass + 1);
5309 custom_small_size2bin = (uint8_t *)base_alloc(small_maxclass + 1);
5310 if (custom_small_size2bin == NULL)
5313 custom_small_size2bin[0] = 0xffU;
5317 for (; i < (1U << LG_TINY_MIN); i++) {
5318 size = pow2_ceil(1U << LG_TINY_MIN);
5319 binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
5320 custom_small_size2bin[i] = binind;
5322 for (; i < qspace_min; i++) {
5323 size = pow2_ceil(i);
5324 binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
5325 custom_small_size2bin[i] = binind;
5328 /* Quantum-spaced. */
5329 for (; i <= qspace_max; i++) {
5330 size = QUANTUM_CEILING(i);
5331 binind = ntbins + (size >> LG_QUANTUM) - 1;
5332 custom_small_size2bin[i] = binind;
5334 /* Cacheline-spaced. */
5335 for (; i <= cspace_max; i++) {
5336 size = CACHELINE_CEILING(i);
5337 binind = ntbins + nqbins + ((size - cspace_min) >>
5339 custom_small_size2bin[i] = binind;
5342 for (; i <= sspace_max; i++) {
5343 size = SUBPAGE_CEILING(i);
5344 binind = ntbins + nqbins + ncbins + ((size - sspace_min) >>
5346 custom_small_size2bin[i] = binind;
5349 small_size2bin = custom_small_size2bin;
5351 small_size2bin_validate();
5364 error = _elf_aux_info(AT_NCPUS, &ret, sizeof(ret));
5365 if (error != 0 || ret == 0) {
5369 if (sysctl(mib, 2, &ret, &len, (void *)NULL, 0) == -1) {
5379 * FreeBSD's pthreads implementation calls malloc(3), so the malloc
5380 * implementation has to take pains to avoid infinite recursion during
5387 if (malloc_initialized == false)
5388 return (malloc_init_hard());
5394 malloc_init_hard(void)
5398 char buf[PATH_MAX + 1];
5401 malloc_mutex_lock(&init_lock);
5402 if (malloc_initialized) {
5404 * Another thread initialized the allocator before this one
5405 * acquired init_lock.
5407 malloc_mutex_unlock(&init_lock);
5411 /* Get number of CPUs. */
5412 ncpus = malloc_ncpus();
5415 * Increase the chunk size to the largest page size that is greater
5416 * than the default chunk size and less than or equal to 4MB.
5419 size_t pagesizes[MAXPAGESIZES];
5422 nsizes = getpagesizes(pagesizes, MAXPAGESIZES);
5423 for (k = 0; k < nsizes; k++)
5424 if (pagesizes[k] <= (1LU << 22))
5425 while ((1LU << opt_lg_chunk) < pagesizes[k])
5429 for (i = 0; i < 3; i++) {
5432 /* Get runtime configuration. */
5435 if ((linklen = readlink("/etc/malloc.conf", buf,
5436 sizeof(buf) - 1)) != -1) {
5438 * Use the contents of the "/etc/malloc.conf"
5439 * symbolic link's name.
5441 buf[linklen] = '\0';
5444 /* No configuration specified. */
5450 if (issetugid() == 0 && (opts =
5451 getenv("MALLOC_OPTIONS")) != NULL) {
5453 * Do nothing; opts is already initialized to
5454 * the value of the MALLOC_OPTIONS environment
5458 /* No configuration specified. */
5464 if (_malloc_options != NULL) {
5466 * Use options that were compiled into the
5469 opts = _malloc_options;
5471 /* No configuration specified. */
5483 for (j = 0; opts[j] != '\0'; j++) {
5487 /* Parse repetition count, if any. */
5488 for (nreps = 0, nseen = false;; j++, nseen = true) {
5490 case '0': case '1': case '2': case '3':
5491 case '4': case '5': case '6': case '7':
5494 nreps += opts[j] - '0';
5504 for (k = 0; k < nreps; k++) {
5513 if (opt_lg_cspace_max - 1 >
5514 opt_lg_qspace_max &&
5517 opt_lg_cspace_max--;
5520 if (opt_lg_cspace_max < PAGE_SHIFT
5522 opt_lg_cspace_max++;
5535 if (opt_lg_medium_max > PAGE_SHIFT)
5536 opt_lg_medium_max--;
5539 if (opt_lg_medium_max + 1 <
5541 opt_lg_medium_max++;
5544 if (opt_lg_dirty_mult + 1 <
5545 (sizeof(size_t) << 3))
5546 opt_lg_dirty_mult++;
5549 if (opt_lg_dirty_mult >= 0)
5550 opt_lg_dirty_mult--;
5552 #ifdef MALLOC_TCACHE
5554 if (opt_lg_tcache_gc_sweep >= 0)
5555 opt_lg_tcache_gc_sweep--;
5558 if (opt_lg_tcache_gc_sweep + 1 <
5559 (sizeof(size_t) << 3))
5560 opt_lg_tcache_gc_sweep++;
5563 if (opt_lg_tcache_nslots > 0)
5564 opt_lg_tcache_nslots--;
5567 if (opt_lg_tcache_nslots + 1 <
5568 (sizeof(size_t) << 3))
5569 opt_lg_tcache_nslots++;
5580 * Chunks always require at least one
5581 * header page, plus enough room to
5582 * hold a run for the largest medium
5583 * size class (one page more than the
5586 if ((1U << (opt_lg_chunk - 1)) >=
5587 (2U << PAGE_SHIFT) + (1U <<
5592 if (opt_lg_chunk + 1 <
5593 (sizeof(size_t) << 3))
5607 opt_narenas_lshift--;
5610 opt_narenas_lshift++;
5613 opt_stats_print = false;
5616 opt_stats_print = true;
5619 if (opt_lg_qspace_max > LG_QUANTUM)
5620 opt_lg_qspace_max--;
5623 if (opt_lg_qspace_max + 1 <
5625 opt_lg_qspace_max++;
5640 opt_xmalloc = false;
5656 _malloc_message(_getprogname(),
5657 ": (malloc) Unsupported character "
5658 "in malloc options: '", cbuf,
5667 /* Make sure that there is some method for acquiring memory. */
5668 if (opt_dss == false && opt_mmap == false)
5671 if (opt_stats_print) {
5672 /* Print statistics at exit. */
5673 atexit(stats_print_atexit);
5677 /* Set variables according to the value of opt_lg_[qc]space_max. */
5678 qspace_max = (1U << opt_lg_qspace_max);
5679 cspace_min = CACHELINE_CEILING(qspace_max);
5680 if (cspace_min == qspace_max)
5681 cspace_min += CACHELINE;
5682 cspace_max = (1U << opt_lg_cspace_max);
5683 sspace_min = SUBPAGE_CEILING(cspace_max);
5684 if (sspace_min == cspace_max)
5685 sspace_min += SUBPAGE;
5686 assert(sspace_min < PAGE_SIZE);
5687 sspace_max = PAGE_SIZE - SUBPAGE;
5688 medium_max = (1U << opt_lg_medium_max);
5691 assert(LG_QUANTUM >= LG_TINY_MIN);
5693 assert(ntbins <= LG_QUANTUM);
5694 nqbins = qspace_max >> LG_QUANTUM;
5695 ncbins = ((cspace_max - cspace_min) >> LG_CACHELINE) + 1;
5696 nsbins = ((sspace_max - sspace_min) >> LG_SUBPAGE) + 1;
5699 * Compute medium size class spacing and the number of medium size
5700 * classes. Limit spacing to no more than pagesize, but if possible
5701 * use the smallest spacing that does not exceed NMBINS_MAX medium size
5704 lg_mspace = LG_SUBPAGE;
5705 nmbins = ((medium_max - medium_min) >> lg_mspace) + 1;
5706 while (lg_mspace < PAGE_SHIFT && nmbins > NMBINS_MAX) {
5707 lg_mspace = lg_mspace + 1;
5708 nmbins = ((medium_max - medium_min) >> lg_mspace) + 1;
5710 mspace_mask = (1U << lg_mspace) - 1U;
5712 mbin0 = ntbins + nqbins + ncbins + nsbins;
5713 nbins = mbin0 + nmbins;
5715 * The small_size2bin lookup table uses uint8_t to encode each bin
5716 * index, so we cannot support more than 256 small size classes. This
5717 * limit is difficult to exceed (not even possible with 16B quantum and
5718 * 4KiB pages), and such configurations are impractical, but
5719 * nonetheless we need to protect against this case in order to avoid
5720 * undefined behavior.
5723 char line_buf[UMAX2S_BUFSIZE];
5724 _malloc_message(_getprogname(),
5725 ": (malloc) Too many small size classes (",
5726 umax2s(mbin0, 10, line_buf), " > max 256)\n");
5730 if (small_size2bin_init()) {
5731 malloc_mutex_unlock(&init_lock);
5735 #ifdef MALLOC_TCACHE
5736 if (opt_lg_tcache_nslots > 0) {
5737 tcache_nslots = (1U << opt_lg_tcache_nslots);
5739 /* Compute incremental GC event threshold. */
5740 if (opt_lg_tcache_gc_sweep >= 0) {
5741 tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
5742 nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins ==
5750 /* Set variables according to the value of opt_lg_chunk. */
5751 chunksize = (1LU << opt_lg_chunk);
5752 chunksize_mask = chunksize - 1;
5753 chunk_npages = (chunksize >> PAGE_SHIFT);
5758 * Compute the header size such that it is large enough to
5759 * contain the page map.
5761 header_size = sizeof(arena_chunk_t) +
5762 (sizeof(arena_chunk_map_t) * (chunk_npages - 1));
5763 arena_chunk_header_npages = (header_size >> PAGE_SHIFT) +
5764 ((header_size & PAGE_MASK) != 0);
5766 arena_maxclass = chunksize - (arena_chunk_header_npages <<
5769 UTRACE((void *)(intptr_t)(-1), 0, 0);
5772 malloc_mutex_init(&chunks_mtx);
5773 memset(&stats_chunks, 0, sizeof(chunk_stats_t));
5776 /* Various sanity checks that regard configuration. */
5777 assert(chunksize >= PAGE_SIZE);
5779 /* Initialize chunks data. */
5780 malloc_mutex_init(&huge_mtx);
5781 extent_tree_ad_new(&huge);
5783 malloc_mutex_init(&dss_mtx);
5785 dss_prev = dss_base;
5787 extent_tree_szad_new(&dss_chunks_szad);
5788 extent_tree_ad_new(&dss_chunks_ad);
5796 /* Initialize base allocation data structures. */
5802 * Allocate a base chunk here, since it doesn't actually have to be
5803 * chunk-aligned. Doing this before allocating any other chunks allows
5804 * the use of space that would otherwise be wasted.
5807 base_pages_alloc(0);
5810 malloc_mutex_init(&base_mtx);
5814 * For SMP systems, create more than one arena per CPU by
5817 #ifdef MALLOC_TCACHE
5818 if (tcache_nslots) {
5820 * Only large object allocation/deallocation is
5821 * guaranteed to acquire an arena mutex, so we can get
5822 * away with fewer arenas than without thread caching.
5824 opt_narenas_lshift += 1;
5828 * All allocations must acquire an arena mutex, so use
5831 opt_narenas_lshift += 2;
5832 #ifdef MALLOC_TCACHE
5837 /* Determine how many arenas to use. */
5839 if (opt_narenas_lshift > 0) {
5840 if ((narenas << opt_narenas_lshift) > narenas)
5841 narenas <<= opt_narenas_lshift;
5843 * Make sure not to exceed the limits of what base_alloc() can
5846 if (narenas * sizeof(arena_t *) > chunksize)
5847 narenas = chunksize / sizeof(arena_t *);
5848 } else if (opt_narenas_lshift < 0) {
5849 if ((narenas >> -opt_narenas_lshift) < narenas)
5850 narenas >>= -opt_narenas_lshift;
5851 /* Make sure there is at least one arena. */
5858 static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
5859 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
5860 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
5861 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
5862 223, 227, 229, 233, 239, 241, 251, 257, 263};
5863 unsigned nprimes, parenas;
5866 * Pick a prime number of hash arenas that is more than narenas
5867 * so that direct hashing of pthread_self() pointers tends to
5868 * spread allocations evenly among the arenas.
5870 assert((narenas & 1) == 0); /* narenas must be even. */
5871 nprimes = (sizeof(primes) >> LG_SIZEOF_INT);
5872 parenas = primes[nprimes - 1]; /* In case not enough primes. */
5873 for (i = 1; i < nprimes; i++) {
5874 if (primes[i] > narenas) {
5875 parenas = primes[i];
5887 /* Allocate and initialize arenas. */
5888 arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
5889 if (arenas == NULL) {
5890 malloc_mutex_unlock(&init_lock);
5894 * Zero the array. In practice, this should always be pre-zeroed,
5895 * since it was just mmap()ed, but let's be sure.
5897 memset(arenas, 0, sizeof(arena_t *) * narenas);
5900 * Initialize one arena here. The rest are lazily created in
5901 * choose_arena_hard().
5904 if (arenas[0] == NULL) {
5905 malloc_mutex_unlock(&init_lock);
5910 * Assign the initial arena to the initial thread, in order to avoid
5911 * spurious creation of an extra arena if the application switches to
5914 arenas_map = arenas[0];
5916 malloc_spin_init(&arenas_lock);
5918 malloc_initialized = true;
5919 malloc_mutex_unlock(&init_lock);
5924 * End general internal functions.
5926 /******************************************************************************/
5928 * Begin malloc(3)-compatible functions.
5936 if (malloc_init()) {
5942 if (opt_sysv == false)
5946 _malloc_message(_getprogname(),
5947 ": (malloc) Error in malloc(): "
5948 "invalid size 0\n", "", "");
5956 ret = imalloc(size);
5961 _malloc_message(_getprogname(),
5962 ": (malloc) Error in malloc(): out of memory\n", "",
5970 UTRACE(0, size, ret);
5975 posix_memalign(void **memptr, size_t alignment, size_t size)
5984 if (opt_sysv == false)
5988 _malloc_message(_getprogname(),
5989 ": (malloc) Error in "
5990 "posix_memalign(): invalid "
5991 "size 0\n", "", "");
6001 /* Make sure that alignment is a large enough power of 2. */
6002 if (((alignment - 1) & alignment) != 0
6003 || alignment < sizeof(void *)) {
6005 _malloc_message(_getprogname(),
6006 ": (malloc) Error in posix_memalign(): "
6007 "invalid alignment\n", "", "");
6015 result = ipalloc(alignment, size);
6018 if (result == NULL) {
6020 _malloc_message(_getprogname(),
6021 ": (malloc) Error in posix_memalign(): out of memory\n",
6033 UTRACE(0, size, result);
6038 calloc(size_t num, size_t size)
6043 if (malloc_init()) {
6049 num_size = num * size;
6050 if (num_size == 0) {
6051 if ((opt_sysv == false) && ((num == 0) || (size == 0)))
6058 * Try to avoid division here. We know that it isn't possible to
6059 * overflow during multiplication if neither operand uses any of the
6060 * most significant half of the bits in a size_t.
6062 } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2)))
6063 && (num_size / size != num)) {
6064 /* size_t overflow. */
6069 ret = icalloc(num_size);
6074 _malloc_message(_getprogname(),
6075 ": (malloc) Error in calloc(): out of memory\n", "",
6082 UTRACE(0, num_size, ret);
6087 realloc(void *ptr, size_t size)
6092 if (opt_sysv == false)
6103 assert(malloc_initialized);
6105 ret = iralloc(ptr, size);
6109 _malloc_message(_getprogname(),
6110 ": (malloc) Error in realloc(): out of "
6111 "memory\n", "", "");
6120 ret = imalloc(size);
6124 _malloc_message(_getprogname(),
6125 ": (malloc) Error in realloc(): out of "
6126 "memory\n", "", "");
6134 UTRACE(ptr, size, ret);
6144 assert(malloc_initialized);
6151 * End malloc(3)-compatible functions.
6153 /******************************************************************************/
6155 * Begin non-standard functions.
6159 malloc_usable_size(const void *ptr)
6162 assert(ptr != NULL);
6164 return (isalloc(ptr));
6168 * End non-standard functions.
6170 /******************************************************************************/
6172 * Begin library-private functions.
6176 * We provide an unpublished interface in order to receive notifications from
6177 * the pthreads library whenever a thread exits. This allows us to clean up
6181 _malloc_thread_cleanup(void)
6184 #ifdef MALLOC_TCACHE
6185 tcache_t *tcache = tcache_tls;
6187 if (tcache != NULL) {
6188 assert(tcache != (void *)(uintptr_t)1);
6189 tcache_destroy(tcache);
6190 tcache_tls = (void *)(uintptr_t)1;
6196 * The following functions are used by threading libraries for protection of
6197 * malloc during fork(). These functions are only called if the program is
6198 * running in threaded mode, so there is no need to check whether the program
6203 _malloc_prefork(void)
6207 /* Acquire all mutexes in a safe order. */
6208 malloc_spin_lock(&arenas_lock);
6209 for (i = 0; i < narenas; i++) {
6210 if (arenas[i] != NULL)
6211 malloc_spin_lock(&arenas[i]->lock);
6214 malloc_mutex_lock(&base_mtx);
6216 malloc_mutex_lock(&huge_mtx);
6219 malloc_mutex_lock(&dss_mtx);
6224 _malloc_postfork(void)
6228 /* Release all mutexes, now that fork() has completed. */
6231 malloc_mutex_unlock(&dss_mtx);
6234 malloc_mutex_unlock(&huge_mtx);
6236 malloc_mutex_unlock(&base_mtx);
6238 for (i = 0; i < narenas; i++) {
6239 if (arenas[i] != NULL)
6240 malloc_spin_unlock(&arenas[i]->lock);
6242 malloc_spin_unlock(&arenas_lock);
6246 * End library-private functions.
6248 /******************************************************************************/