2 * Copyright (C) 2006-2010 Jason Evans <jasone@FreeBSD.org>.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice(s), this list of conditions and the following disclaimer as
10 * the first lines of this file unmodified other than the possible
11 * addition of one or more copyright notices.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice(s), this list of conditions and the following disclaimer in
14 * the documentation and/or other materials provided with the
17 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
18 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
21 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *******************************************************************************
31 * This allocator implementation is designed to provide scalable performance
32 * for multi-threaded programs on multi-processor systems. The following
33 * features are included for this purpose:
35 * + Multiple arenas are used if there are multiple CPUs, which reduces lock
36 * contention and cache sloshing.
38 * + Thread-specific caching is used if there are multiple threads, which
39 * reduces the amount of locking.
41 * + Cache line sharing between arenas is avoided for internal data
44 * + Memory is managed in chunks and runs (chunks can be split into runs),
45 * rather than as individual pages. This provides a constant-time
46 * mechanism for associating allocations with particular arenas.
48 * Allocation requests are rounded up to the nearest size class, and no record
49 * of the original request size is maintained. Allocations are broken into
50 * categories according to size class. Assuming runtime defaults, 4 KiB pages
51 * and a 16 byte quantum on a 32-bit system, the size classes in each category
54 * |========================================|
55 * | Category | Subcategory | Size |
56 * |========================================|
57 * | Small | Tiny | 2 |
60 * | |------------------+----------|
61 * | | Quantum-spaced | 16 |
68 * | |------------------+----------|
69 * | | Cacheline-spaced | 192 |
75 * | |------------------+----------|
76 * | | Sub-page | 760 |
83 * |========================================|
91 * |========================================|
99 * |========================================|
104 * |========================================|
106 * Different mechanisms are used accoding to category:
108 * Small/medium : Each size class is segregated into its own set of runs.
109 * Each run maintains a bitmap of which regions are
112 * Large : Each allocation is backed by a dedicated run. Metadata are stored
113 * in the associated arena chunk header maps.
115 * Huge : Each allocation is backed by a dedicated contiguous set of chunks.
116 * Metadata are stored in a separate red-black tree.
118 *******************************************************************************
122 * MALLOC_PRODUCTION disables assertions and statistics gathering. It also
123 * defaults the A and J runtime options to off. These settings are appropriate
124 * for production systems.
126 #ifndef MALLOC_PRODUCTION
127 #define MALLOC_PRODUCTION
130 #ifndef MALLOC_PRODUCTION
132 * MALLOC_DEBUG enables assertions and other sanity checks, and disables
135 # define MALLOC_DEBUG
137 /* MALLOC_STATS enables statistics calculation. */
138 # define MALLOC_STATS
142 * MALLOC_TINY enables support for tiny objects, which are smaller than one
148 * MALLOC_TCACHE enables a thread-specific caching layer for small and medium
149 * objects. This makes it possible to allocate/deallocate objects without any
150 * locking when the cache is in the steady state.
152 #define MALLOC_TCACHE
155 * MALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage
156 * segment (DSS). In an ideal world, this functionality would be completely
157 * unnecessary, but we are burdened by history and the lack of resource limits
158 * for anonymous mapped memory.
162 #include <sys/cdefs.h>
163 __FBSDID("$FreeBSD$");
165 #include "libc_private.h"
169 #include "spinlock.h"
170 #include "namespace.h"
171 #include <sys/mman.h>
172 #include <sys/param.h>
173 #include <sys/time.h>
174 #include <sys/types.h>
175 #include <sys/sysctl.h>
177 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
179 #include <machine/cpufunc.h>
180 #include <machine/param.h>
181 #include <machine/vmparam.h>
192 #include <inttypes.h>
198 #include "un-namespace.h"
200 #include "libc_private.h"
204 #if (defined(MALLOC_TCACHE) && defined(MALLOC_STATS))
210 /* Disable inlining to make debugging easier. */
214 /* Size of stack-allocated buffer passed to strerror_r(). */
215 #define STRERROR_BUF 64
218 * Minimum alignment of allocations is 2^LG_QUANTUM bytes.
221 # define LG_QUANTUM 4
222 # define LG_SIZEOF_PTR 2
223 # define CPU_SPINWAIT __asm__ volatile("pause")
225 # define TLS_MODEL /* clang does not support tls_model yet */
227 # define TLS_MODEL __attribute__((tls_model("initial-exec")))
231 # define LG_QUANTUM 4
232 # define LG_SIZEOF_PTR 3
233 # define TLS_MODEL /* default */
236 # define LG_QUANTUM 4
237 # define LG_SIZEOF_PTR 3
241 # define LG_QUANTUM 4
242 # define LG_SIZEOF_PTR 3
243 # define TLS_MODEL __attribute__((tls_model("initial-exec")))
246 # define LG_QUANTUM 4
247 # define LG_SIZEOF_PTR 3
248 # define CPU_SPINWAIT __asm__ volatile("pause")
250 # define TLS_MODEL /* clang does not support tls_model yet */
252 # define TLS_MODEL __attribute__((tls_model("initial-exec")))
256 # define LG_QUANTUM 3
257 # define LG_SIZEOF_PTR 2
261 # define LG_QUANTUM 3
262 # define LG_SIZEOF_PTR 2
266 # define LG_QUANTUM 4
267 # define LG_SIZEOF_PTR 3
268 # define TLS_MODEL /* default */
269 #elif defined(__powerpc__)
270 # define LG_QUANTUM 4
271 # define LG_SIZEOF_PTR 2
272 # define TLS_MODEL /* default */
275 # define LG_QUANTUM 4
278 #define QUANTUM ((size_t)(1U << LG_QUANTUM))
279 #define QUANTUM_MASK (QUANTUM - 1)
281 #define SIZEOF_PTR (1U << LG_SIZEOF_PTR)
283 /* sizeof(int) == (1U << LG_SIZEOF_INT). */
284 #ifndef LG_SIZEOF_INT
285 # define LG_SIZEOF_INT 2
288 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
289 #if (!defined(PIC) && !defined(NO_TLS))
294 /* MALLOC_TCACHE requires TLS. */
295 # ifdef MALLOC_TCACHE
296 # undef MALLOC_TCACHE
301 * Size and alignment of memory chunks that are allocated by the OS's virtual
304 #define LG_CHUNK_DEFAULT 22
307 * The minimum ratio of active:dirty pages per arena is computed as:
309 * (nactive >> opt_lg_dirty_mult) >= ndirty
311 * So, supposing that opt_lg_dirty_mult is 5, there can be no less than 32
312 * times as many active pages as dirty pages.
314 #define LG_DIRTY_MULT_DEFAULT 5
317 * Maximum size of L1 cache line. This is used to avoid cache line aliasing.
318 * In addition, this controls the spacing of cacheline-spaced size classes.
320 #define LG_CACHELINE 6
321 #define CACHELINE ((size_t)(1U << LG_CACHELINE))
322 #define CACHELINE_MASK (CACHELINE - 1)
325 * Subpages are an artificially designated partitioning of pages. Their only
326 * purpose is to support subpage-spaced size classes.
328 * There must be at least 4 subpages per page, due to the way size classes are
332 #define SUBPAGE ((size_t)(1U << LG_SUBPAGE))
333 #define SUBPAGE_MASK (SUBPAGE - 1)
336 /* Smallest size class to support. */
337 # define LG_TINY_MIN 1
341 * Maximum size class that is a multiple of the quantum, but not (necessarily)
342 * a power of 2. Above this size, allocations are rounded up to the nearest
345 #define LG_QSPACE_MAX_DEFAULT 7
348 * Maximum size class that is a multiple of the cacheline, but not (necessarily)
349 * a power of 2. Above this size, allocations are rounded up to the nearest
352 #define LG_CSPACE_MAX_DEFAULT 9
355 * Maximum medium size class. This must not be more than 1/4 of a chunk
356 * (LG_MEDIUM_MAX_DEFAULT <= LG_CHUNK_DEFAULT - 2).
358 #define LG_MEDIUM_MAX_DEFAULT 15
361 * RUN_MAX_OVRHD indicates maximum desired run header overhead. Runs are sized
362 * as small as possible such that this setting is still honored, without
363 * violating other constraints. The goal is to make runs as small as possible
364 * without exceeding a per run external fragmentation threshold.
366 * We use binary fixed point math for overhead computations, where the binary
367 * point is implicitly RUN_BFP bits to the left.
369 * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
370 * honored for some/all object sizes, since there is one bit of header overhead
371 * per object (plus a constant). This constraint is relaxed (ignored) for runs
372 * that are so small that the per-region overhead is greater than:
374 * (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
377 /* \/ Implicit binary fixed point. */
378 #define RUN_MAX_OVRHD 0x0000003dU
379 #define RUN_MAX_OVRHD_RELAX 0x00001800U
381 /* Put a cap on small object run size. This overrides RUN_MAX_OVRHD. */
382 #define RUN_MAX_SMALL \
383 (arena_maxclass <= (1U << (CHUNK_MAP_LG_PG_RANGE + PAGE_SHIFT)) \
384 ? arena_maxclass : (1U << (CHUNK_MAP_LG_PG_RANGE + \
388 * Hyper-threaded CPUs may need a special instruction inside spin loops in
389 * order to yield to another virtual CPU. If no such instruction is defined
390 * above, make CPU_SPINWAIT a no-op.
393 # define CPU_SPINWAIT
397 * Adaptive spinning must eventually switch to blocking, in order to avoid the
398 * potential for priority inversion deadlock. Backing off past a certain point
399 * can actually waste time.
401 #define LG_SPIN_LIMIT 11
405 * Default number of cache slots for each bin in the thread cache (0:
408 # define LG_TCACHE_NSLOTS_DEFAULT 7
410 * (1U << opt_lg_tcache_gc_sweep) is the approximate number of
411 * allocation events between full GC sweeps (-1: disabled). Integer
412 * rounding may cause the actual number to be slightly higher, since GC is
413 * performed incrementally.
415 # define LG_TCACHE_GC_SWEEP_DEFAULT 13
418 /******************************************************************************/
421 * Mutexes based on spinlocks. We can't use normal pthread spinlocks in all
422 * places, because they require malloc()ed memory, which causes bootstrapping
423 * issues in some cases.
429 /* Set to true once the allocator has been initialized. */
430 static bool malloc_initialized = false;
432 /* Used to avoid initialization races. */
433 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
435 /******************************************************************************/
437 * Statistics data structures.
443 typedef struct tcache_bin_stats_s tcache_bin_stats_t;
444 struct tcache_bin_stats_s {
446 * Number of allocation requests that corresponded to the size of this
453 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
454 struct malloc_bin_stats_s {
456 * Number of allocation requests that corresponded to the size of this
462 /* Number of tcache fills from this bin. */
465 /* Number of tcache flushes to this bin. */
469 /* Total number of runs created for this bin's size class. */
473 * Total number of runs reused by extracting them from the runs tree for
474 * this bin's size class.
478 /* High-water mark for this bin. */
481 /* Current number of runs in this bin. */
485 typedef struct malloc_large_stats_s malloc_large_stats_t;
486 struct malloc_large_stats_s {
488 * Number of allocation requests that corresponded to this size class.
492 /* High-water mark for this size class. */
495 /* Current number of runs of this size class. */
499 typedef struct arena_stats_s arena_stats_t;
500 struct arena_stats_s {
501 /* Number of bytes currently mapped. */
505 * Total number of purge sweeps, total number of madvise calls made,
506 * and total pages purged in order to keep dirty unused memory under
513 /* Per-size-category statistics. */
514 size_t allocated_small;
515 uint64_t nmalloc_small;
516 uint64_t ndalloc_small;
518 size_t allocated_medium;
519 uint64_t nmalloc_medium;
520 uint64_t ndalloc_medium;
522 size_t allocated_large;
523 uint64_t nmalloc_large;
524 uint64_t ndalloc_large;
527 * One element for each possible size class, including sizes that
528 * overlap with bin size classes. This is necessary because ipalloc()
529 * sometimes has to use such large objects in order to assure proper
532 malloc_large_stats_t *lstats;
535 typedef struct chunk_stats_s chunk_stats_t;
536 struct chunk_stats_s {
537 /* Number of chunks that were allocated. */
540 /* High-water mark for number of chunks allocated. */
544 * Current number of chunks allocated. This value isn't maintained for
545 * any other purpose, so keep track of it in order to be able to set
551 #endif /* #ifdef MALLOC_STATS */
553 /******************************************************************************/
555 * Extent data structures.
558 /* Tree of extents. */
559 typedef struct extent_node_s extent_node_t;
560 struct extent_node_s {
562 /* Linkage for the size/address-ordered tree. */
563 rb_node(extent_node_t) link_szad;
566 /* Linkage for the address-ordered tree. */
567 rb_node(extent_node_t) link_ad;
569 /* Pointer to the extent that this tree node is responsible for. */
572 /* Total region size. */
575 typedef rb_tree(extent_node_t) extent_tree_t;
577 /******************************************************************************/
579 * Arena data structures.
582 typedef struct arena_s arena_t;
583 typedef struct arena_bin_s arena_bin_t;
585 /* Each element of the chunk map corresponds to one page within the chunk. */
586 typedef struct arena_chunk_map_s arena_chunk_map_t;
587 struct arena_chunk_map_s {
589 * Linkage for run trees. There are two disjoint uses:
591 * 1) arena_t's runs_avail tree.
592 * 2) arena_run_t conceptually uses this linkage for in-use non-full
593 * runs, rather than directly embedding linkage.
595 rb_node(arena_chunk_map_t) link;
598 * Run address (or size) and various flags are stored together. The bit
599 * layout looks like (assuming 32-bit system):
601 * ???????? ???????? ????cccc ccccdzla
603 * ? : Unallocated: Run address for first/last pages, unset for internal
605 * Small/medium: Don't care.
606 * Large: Run size for first page, unset for trailing pages.
608 * c : refcount (could overflow for PAGE_SIZE >= 128 KiB)
614 * Following are example bit patterns for the three types of runs.
616 * p : run page offset
623 * ssssssss ssssssss ssss---- --------
624 * xxxxxxxx xxxxxxxx xxxx---- ----d---
625 * ssssssss ssssssss ssss---- -----z--
628 * pppppppp ppppcccc cccccccc cccc---a
629 * pppppppp ppppcccc cccccccc cccc---a
630 * pppppppp ppppcccc cccccccc cccc---a
633 * ssssssss ssssssss ssss---- ------la
634 * -------- -------- -------- ------la
635 * -------- -------- -------- ------la
638 #define CHUNK_MAP_PG_MASK ((size_t)0xfff00000U)
639 #define CHUNK_MAP_PG_SHIFT 20
640 #define CHUNK_MAP_LG_PG_RANGE 12
642 #define CHUNK_MAP_RC_MASK ((size_t)0xffff0U)
643 #define CHUNK_MAP_RC_ONE ((size_t)0x00010U)
645 #define CHUNK_MAP_FLAGS_MASK ((size_t)0xfU)
646 #define CHUNK_MAP_DIRTY ((size_t)0x8U)
647 #define CHUNK_MAP_ZEROED ((size_t)0x4U)
648 #define CHUNK_MAP_LARGE ((size_t)0x2U)
649 #define CHUNK_MAP_ALLOCATED ((size_t)0x1U)
650 #define CHUNK_MAP_KEY (CHUNK_MAP_DIRTY | CHUNK_MAP_ALLOCATED)
652 typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
653 typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
655 /* Arena chunk header. */
656 typedef struct arena_chunk_s arena_chunk_t;
657 struct arena_chunk_s {
658 /* Arena that owns the chunk. */
661 /* Linkage for the arena's chunks_dirty tree. */
662 rb_node(arena_chunk_t) link_dirty;
665 * True if the chunk is currently in the chunks_dirty tree, due to
666 * having at some point contained one or more dirty pages. Removal
667 * from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible.
671 /* Number of dirty pages. */
674 /* Map of pages within chunk that keeps track of free/large/small. */
675 arena_chunk_map_t map[1]; /* Dynamically sized. */
677 typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
679 typedef struct arena_run_s arena_run_t;
683 # define ARENA_RUN_MAGIC 0x384adf93
686 /* Bin this run is associated with. */
689 /* Index of first element that might have a free region. */
690 unsigned regs_minelm;
692 /* Number of free regions in run. */
695 /* Bitmask of in-use regions (0: in use, 1: free). */
696 unsigned regs_mask[1]; /* Dynamically sized. */
701 * Current run being used to service allocations of this bin's size
707 * Tree of non-full runs. This tree is used when looking for an
708 * existing run when runcur is no longer usable. We choose the
709 * non-full run that is lowest in memory; this policy tends to keep
710 * objects packed well, and it can also help reduce the number of
711 * almost-empty chunks.
713 arena_run_tree_t runs;
715 /* Size of regions in a run for this bin's size class. */
718 /* Total size of a run for this bin's size class. */
721 /* Total number of regions in a run for this bin's size class. */
724 /* Number of elements in a run's regs_mask for this bin's size class. */
725 uint32_t regs_mask_nelms;
727 /* Offset of first region in a run for this bin's size class. */
728 uint32_t reg0_offset;
731 /* Bin statistics. */
732 malloc_bin_stats_t stats;
737 typedef struct tcache_s tcache_t;
743 # define ARENA_MAGIC 0x947d3d24
746 /* All operations on this arena require that lock be locked. */
747 pthread_mutex_t lock;
751 # ifdef MALLOC_TCACHE
753 * List of tcaches for extant threads associated with this arena.
754 * Stats from these are merged incrementally, and at exit.
756 ql_head(tcache_t) tcache_ql;
760 /* Tree of dirty-page-containing chunks this arena manages. */
761 arena_chunk_tree_t chunks_dirty;
764 * In order to avoid rapid chunk allocation/deallocation when an arena
765 * oscillates right on the cusp of needing a new chunk, cache the most
766 * recently freed chunk. The spare is left in the arena's chunk trees
767 * until it is deleted.
769 * There is one spare chunk per arena, rather than one spare total, in
770 * order to avoid interactions between multiple threads that could make
771 * a single spare inadequate.
773 arena_chunk_t *spare;
775 /* Number of pages in active runs. */
779 * Current count of pages within unused runs that are potentially
780 * dirty, and for which madvise(... MADV_FREE) has not been called. By
781 * tracking this, we can institute a limit on how much dirty unused
782 * memory is mapped for each arena.
787 * Size/address-ordered tree of this arena's available runs. This tree
788 * is used for first-best-fit run allocation.
790 arena_avail_tree_t runs_avail;
793 * bins is used to store trees of free regions of the following sizes,
794 * assuming a 16-byte quantum, 4 KiB page size, and default
835 arena_bin_t bins[1]; /* Dynamically sized. */
838 /******************************************************************************/
840 * Thread cache data structures.
844 typedef struct tcache_bin_s tcache_bin_t;
845 struct tcache_bin_s {
847 tcache_bin_stats_t tstats;
849 unsigned low_water; /* Min # cached since last GC. */
850 unsigned high_water; /* Max # cached since last GC. */
851 unsigned ncached; /* # of cached objects. */
852 void *slots[1]; /* Dynamically sized. */
857 ql_elm(tcache_t) link; /* Used for aggregating stats. */
859 arena_t *arena; /* This thread's arena. */
860 unsigned ev_cnt; /* Event count since incremental GC. */
861 unsigned next_gc_bin; /* Next bin to GC. */
862 tcache_bin_t *tbins[1]; /* Dynamically sized. */
866 /******************************************************************************/
871 /* Number of CPUs. */
872 static unsigned ncpus;
874 /* Various bin-related settings. */
875 #ifdef MALLOC_TINY /* Number of (2^n)-spaced tiny bins. */
876 # define ntbins ((unsigned)(LG_QUANTUM - LG_TINY_MIN))
880 static unsigned nqbins; /* Number of quantum-spaced bins. */
881 static unsigned ncbins; /* Number of cacheline-spaced bins. */
882 static unsigned nsbins; /* Number of subpage-spaced bins. */
883 static unsigned nmbins; /* Number of medium bins. */
884 static unsigned nbins;
885 static unsigned mbin0; /* mbin offset (nbins - nmbins). */
887 # define tspace_max ((size_t)(QUANTUM >> 1))
889 #define qspace_min QUANTUM
890 static size_t qspace_max;
891 static size_t cspace_min;
892 static size_t cspace_max;
893 static size_t sspace_min;
894 static size_t sspace_max;
895 #define small_maxclass sspace_max
896 #define medium_min PAGE_SIZE
897 static size_t medium_max;
898 #define bin_maxclass medium_max
901 * Soft limit on the number of medium size classes. Spacing between medium
902 * size classes never exceeds pagesize, which can force more than NBINS_MAX
903 * medium size classes.
905 #define NMBINS_MAX 16
906 /* Spacing between medium size classes. */
907 static size_t lg_mspace;
908 static size_t mspace_mask;
910 static uint8_t const *small_size2bin;
912 * const_small_size2bin is a static constant lookup table that in the common
913 * case can be used as-is for small_size2bin. For dynamically linked programs,
914 * this avoids a page of memory overhead per process.
917 #define S2B_2(i) S2B_1(i) S2B_1(i)
918 #define S2B_4(i) S2B_2(i) S2B_2(i)
919 #define S2B_8(i) S2B_4(i) S2B_4(i)
920 #define S2B_16(i) S2B_8(i) S2B_8(i)
921 #define S2B_32(i) S2B_16(i) S2B_16(i)
922 #define S2B_64(i) S2B_32(i) S2B_32(i)
923 #define S2B_128(i) S2B_64(i) S2B_64(i)
924 #define S2B_256(i) S2B_128(i) S2B_128(i)
925 static const uint8_t const_small_size2bin[PAGE_SIZE - 255] = {
927 #if (LG_QUANTUM == 4)
928 /* 64-bit system ************************/
939 S2B_16(S2B_QMIN + 1) /* 32 */
940 S2B_16(S2B_QMIN + 2) /* 48 */
941 S2B_16(S2B_QMIN + 3) /* 64 */
942 S2B_16(S2B_QMIN + 4) /* 80 */
943 S2B_16(S2B_QMIN + 5) /* 96 */
944 S2B_16(S2B_QMIN + 6) /* 112 */
945 S2B_16(S2B_QMIN + 7) /* 128 */
946 # define S2B_CMIN (S2B_QMIN + 8)
948 /* 32-bit system ************************/
958 S2B_8(S2B_QMIN + 1) /* 16 */
959 S2B_8(S2B_QMIN + 2) /* 24 */
960 S2B_8(S2B_QMIN + 3) /* 32 */
961 S2B_8(S2B_QMIN + 4) /* 40 */
962 S2B_8(S2B_QMIN + 5) /* 48 */
963 S2B_8(S2B_QMIN + 6) /* 56 */
964 S2B_8(S2B_QMIN + 7) /* 64 */
965 S2B_8(S2B_QMIN + 8) /* 72 */
966 S2B_8(S2B_QMIN + 9) /* 80 */
967 S2B_8(S2B_QMIN + 10) /* 88 */
968 S2B_8(S2B_QMIN + 11) /* 96 */
969 S2B_8(S2B_QMIN + 12) /* 104 */
970 S2B_8(S2B_QMIN + 13) /* 112 */
971 S2B_8(S2B_QMIN + 14) /* 120 */
972 S2B_8(S2B_QMIN + 15) /* 128 */
973 # define S2B_CMIN (S2B_QMIN + 16)
975 /****************************************/
976 S2B_64(S2B_CMIN + 0) /* 192 */
977 S2B_64(S2B_CMIN + 1) /* 256 */
978 S2B_64(S2B_CMIN + 2) /* 320 */
979 S2B_64(S2B_CMIN + 3) /* 384 */
980 S2B_64(S2B_CMIN + 4) /* 448 */
981 S2B_64(S2B_CMIN + 5) /* 512 */
982 # define S2B_SMIN (S2B_CMIN + 6)
983 S2B_256(S2B_SMIN + 0) /* 768 */
984 S2B_256(S2B_SMIN + 1) /* 1024 */
985 S2B_256(S2B_SMIN + 2) /* 1280 */
986 S2B_256(S2B_SMIN + 3) /* 1536 */
987 S2B_256(S2B_SMIN + 4) /* 1792 */
988 S2B_256(S2B_SMIN + 5) /* 2048 */
989 S2B_256(S2B_SMIN + 6) /* 2304 */
990 S2B_256(S2B_SMIN + 7) /* 2560 */
991 S2B_256(S2B_SMIN + 8) /* 2816 */
992 S2B_256(S2B_SMIN + 9) /* 3072 */
993 S2B_256(S2B_SMIN + 10) /* 3328 */
994 S2B_256(S2B_SMIN + 11) /* 3584 */
995 S2B_256(S2B_SMIN + 12) /* 3840 */
996 #if (PAGE_SHIFT == 13)
997 S2B_256(S2B_SMIN + 13) /* 4096 */
998 S2B_256(S2B_SMIN + 14) /* 4352 */
999 S2B_256(S2B_SMIN + 15) /* 4608 */
1000 S2B_256(S2B_SMIN + 16) /* 4864 */
1001 S2B_256(S2B_SMIN + 17) /* 5120 */
1002 S2B_256(S2B_SMIN + 18) /* 5376 */
1003 S2B_256(S2B_SMIN + 19) /* 5632 */
1004 S2B_256(S2B_SMIN + 20) /* 5888 */
1005 S2B_256(S2B_SMIN + 21) /* 6144 */
1006 S2B_256(S2B_SMIN + 22) /* 6400 */
1007 S2B_256(S2B_SMIN + 23) /* 6656 */
1008 S2B_256(S2B_SMIN + 24) /* 6912 */
1009 S2B_256(S2B_SMIN + 25) /* 7168 */
1010 S2B_256(S2B_SMIN + 26) /* 7424 */
1011 S2B_256(S2B_SMIN + 27) /* 7680 */
1012 S2B_256(S2B_SMIN + 28) /* 7936 */
1028 /* Various chunk-related settings. */
1029 static size_t chunksize;
1030 static size_t chunksize_mask; /* (chunksize - 1). */
1031 static size_t chunk_npages;
1032 static size_t arena_chunk_header_npages;
1033 static size_t arena_maxclass; /* Max size class for arenas. */
1040 /* Protects chunk-related data structures. */
1041 static malloc_mutex_t huge_mtx;
1043 /* Tree of chunks that are stand-alone huge allocations. */
1044 static extent_tree_t huge;
1048 * Protects sbrk() calls. This avoids malloc races among threads, though it
1049 * does not protect against races with threads that call sbrk() directly.
1051 static malloc_mutex_t dss_mtx;
1052 /* Base address of the DSS. */
1053 static void *dss_base;
1054 /* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
1055 static void *dss_prev;
1056 /* Current upper limit on DSS addresses. */
1057 static void *dss_max;
1060 * Trees of chunks that were previously allocated (trees differ only in node
1061 * ordering). These are used when allocating chunks, in an attempt to re-use
1062 * address space. Depending on function, different tree orderings are needed,
1063 * which is why there are two trees with the same contents.
1065 static extent_tree_t dss_chunks_szad;
1066 static extent_tree_t dss_chunks_ad;
1070 /* Huge allocation statistics. */
1071 static uint64_t huge_nmalloc;
1072 static uint64_t huge_ndalloc;
1073 static size_t huge_allocated;
1076 /****************************/
1078 * base (internal allocation).
1082 * Current pages that are being used for internal memory allocations. These
1083 * pages are carved up in cacheline-size quanta, so that there is no chance of
1084 * false cache line sharing.
1086 static void *base_pages;
1087 static void *base_next_addr;
1088 static void *base_past_addr; /* Addr immediately past base_pages. */
1089 static extent_node_t *base_nodes;
1090 static malloc_mutex_t base_mtx;
1092 static size_t base_mapped;
1101 * Arenas that are used to service external requests. Not all elements of the
1102 * arenas array are necessarily used; arenas are created lazily as needed.
1104 static arena_t **arenas;
1105 static unsigned narenas;
1107 static unsigned next_arena;
1109 static pthread_mutex_t arenas_lock; /* Protects arenas initialization. */
1113 * Map of _pthread_self() --> arenas[???], used for selecting an arena to use
1116 static __thread arena_t *arenas_map TLS_MODEL;
1119 #ifdef MALLOC_TCACHE
1120 /* Map of thread-specific caches. */
1121 static __thread tcache_t *tcache_tls TLS_MODEL;
1124 * Number of cache slots for each bin in the thread cache, or 0 if tcache is
1127 size_t tcache_nslots;
1129 /* Number of tcache allocation/deallocation events between incremental GCs. */
1130 unsigned tcache_gc_incr;
1134 * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and
1135 * potentially avoid some system calls. We can get away without TLS here,
1136 * since the state of mmap_unaligned only affects performance, rather than
1140 static __thread bool mmap_unaligned TLS_MODEL;
1142 static bool mmap_unaligned;
1146 static malloc_mutex_t chunks_mtx;
1147 /* Chunk statistics. */
1148 static chunk_stats_t stats_chunks;
1151 /*******************************/
1153 * Runtime configuration options.
1155 const char *_malloc_options;
1157 #ifndef MALLOC_PRODUCTION
1158 static bool opt_abort = true;
1159 static bool opt_junk = true;
1161 static bool opt_abort = false;
1162 static bool opt_junk = false;
1164 #ifdef MALLOC_TCACHE
1165 static size_t opt_lg_tcache_nslots = LG_TCACHE_NSLOTS_DEFAULT;
1166 static ssize_t opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
1169 static bool opt_dss = true;
1170 static bool opt_mmap = true;
1172 static ssize_t opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
1173 static bool opt_stats_print = false;
1174 static size_t opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT;
1175 static size_t opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT;
1176 static size_t opt_lg_medium_max = LG_MEDIUM_MAX_DEFAULT;
1177 static size_t opt_lg_chunk = LG_CHUNK_DEFAULT;
1178 static bool opt_utrace = false;
1179 static bool opt_sysv = false;
1180 static bool opt_xmalloc = false;
1181 static bool opt_zero = false;
1182 static int opt_narenas_lshift = 0;
1190 #define UTRACE(a, b, c) \
1192 malloc_utrace_t ut; \
1196 utrace(&ut, sizeof(ut)); \
1199 /******************************************************************************/
1201 * Begin function prototypes for non-inline static functions.
1204 static void malloc_mutex_init(malloc_mutex_t *mutex);
1205 static bool malloc_spin_init(pthread_mutex_t *lock);
1207 static size_t pow2_ceil(size_t x);
1209 static void wrtmessage(const char *p1, const char *p2, const char *p3,
1212 static void malloc_printf(const char *format, ...);
1214 static char *umax2s(uintmax_t x, unsigned base, char *s);
1216 static bool base_pages_alloc_dss(size_t minsize);
1218 static bool base_pages_alloc_mmap(size_t minsize);
1219 static bool base_pages_alloc(size_t minsize);
1220 static void *base_alloc(size_t size);
1221 static void *base_calloc(size_t number, size_t size);
1222 static extent_node_t *base_node_alloc(void);
1223 static void base_node_dealloc(extent_node_t *node);
1224 static void *pages_map(void *addr, size_t size);
1225 static void pages_unmap(void *addr, size_t size);
1227 static void *chunk_alloc_dss(size_t size, bool *zero);
1228 static void *chunk_recycle_dss(size_t size, bool *zero);
1230 static void *chunk_alloc_mmap_slow(size_t size, bool unaligned);
1231 static void *chunk_alloc_mmap(size_t size);
1232 static void *chunk_alloc(size_t size, bool *zero);
1234 static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size);
1235 static bool chunk_dealloc_dss(void *chunk, size_t size);
1237 static void chunk_dealloc_mmap(void *chunk, size_t size);
1238 static void chunk_dealloc(void *chunk, size_t size);
1240 static arena_t *choose_arena_hard(void);
1242 static void arena_run_split(arena_t *arena, arena_run_t *run, size_t size,
1243 bool large, bool zero);
1244 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
1245 static void arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
1246 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large,
1248 static void arena_purge(arena_t *arena);
1249 static void arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty);
1250 static void arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk,
1251 arena_run_t *run, size_t oldsize, size_t newsize);
1252 static void arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
1253 arena_run_t *run, size_t oldsize, size_t newsize, bool dirty);
1254 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
1255 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
1256 static size_t arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
1257 #ifdef MALLOC_TCACHE
1258 static void tcache_bin_fill(tcache_t *tcache, tcache_bin_t *tbin,
1260 static void *tcache_alloc_hard(tcache_t *tcache, tcache_bin_t *tbin,
1263 static void *arena_malloc_medium(arena_t *arena, size_t size, bool zero);
1264 static void *arena_malloc_large(arena_t *arena, size_t size, bool zero);
1265 static void *arena_palloc(arena_t *arena, size_t alignment, size_t size,
1267 static bool arena_is_large(const void *ptr);
1268 static size_t arena_salloc(const void *ptr);
1270 arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
1273 static void arena_stats_print(arena_t *arena);
1275 static void stats_print_atexit(void);
1276 #ifdef MALLOC_TCACHE
1277 static void tcache_bin_flush(tcache_bin_t *tbin, size_t binind,
1280 static void arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk,
1282 #ifdef MALLOC_TCACHE
1283 static void arena_dalloc_hard(arena_t *arena, arena_chunk_t *chunk,
1284 void *ptr, arena_chunk_map_t *mapelm, tcache_t *tcache);
1286 static void arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk,
1287 void *ptr, size_t size, size_t oldsize);
1288 static bool arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk,
1289 void *ptr, size_t size, size_t oldsize);
1290 static bool arena_ralloc_large(void *ptr, size_t size, size_t oldsize);
1291 static void *arena_ralloc(void *ptr, size_t size, size_t oldsize);
1292 static bool arena_new(arena_t *arena, unsigned ind);
1293 static arena_t *arenas_extend(unsigned ind);
1294 #ifdef MALLOC_TCACHE
1295 static tcache_bin_t *tcache_bin_create(arena_t *arena);
1296 static void tcache_bin_destroy(tcache_t *tcache, tcache_bin_t *tbin,
1298 # ifdef MALLOC_STATS
1299 static void tcache_stats_merge(tcache_t *tcache, arena_t *arena);
1301 static tcache_t *tcache_create(arena_t *arena);
1302 static void tcache_destroy(tcache_t *tcache);
1304 static void *huge_malloc(size_t size, bool zero);
1305 static void *huge_palloc(size_t alignment, size_t size);
1306 static void *huge_ralloc(void *ptr, size_t size, size_t oldsize);
1307 static void huge_dalloc(void *ptr);
1308 static void malloc_stats_print(void);
1310 static void small_size2bin_validate(void);
1312 static bool small_size2bin_init(void);
1313 static bool small_size2bin_init_hard(void);
1314 static unsigned malloc_ncpus(void);
1315 static bool malloc_init_hard(void);
1318 * End function prototypes.
1320 /******************************************************************************/
1323 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
1326 if (_write(STDERR_FILENO, p1, strlen(p1)) < 0
1327 || _write(STDERR_FILENO, p2, strlen(p2)) < 0
1328 || _write(STDERR_FILENO, p3, strlen(p3)) < 0
1329 || _write(STDERR_FILENO, p4, strlen(p4)) < 0)
1333 void (*_malloc_message)(const char *p1, const char *p2, const char *p3,
1334 const char *p4) = wrtmessage;
1337 * We don't want to depend on vsnprintf() for production builds, since that can
1338 * cause unnecessary bloat for static binaries. umax2s() provides minimal
1339 * integer printing functionality, so that malloc_printf() use can be limited to
1340 * MALLOC_STATS code.
1342 #define UMAX2S_BUFSIZE 65
1344 umax2s(uintmax_t x, unsigned base, char *s)
1348 i = UMAX2S_BUFSIZE - 1;
1354 s[i] = "0123456789"[x % 10];
1361 s[i] = "0123456789abcdef"[x & 0xf];
1368 s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x % base];
1377 * Define a custom assert() in order to reduce the chances of deadlock during
1378 * assertion failure.
1381 # define assert(e) do { \
1383 char line_buf[UMAX2S_BUFSIZE]; \
1384 _malloc_message(_getprogname(), ": (malloc) ", \
1386 _malloc_message(umax2s(__LINE__, 10, line_buf), \
1387 ": Failed assertion: ", "\"", #e); \
1388 _malloc_message("\"\n", "", "", ""); \
1398 * Print to stderr in such a way as to (hopefully) avoid memory allocation.
1401 malloc_printf(const char *format, ...)
1406 va_start(ap, format);
1407 vsnprintf(buf, sizeof(buf), format, ap);
1409 _malloc_message(buf, "", "", "");
1413 /******************************************************************************/
1415 * Begin mutex. We can't use normal pthread mutexes in all places, because
1416 * they require malloc()ed memory, which causes bootstrapping issues in some
1421 malloc_mutex_init(malloc_mutex_t *mutex)
1423 static const spinlock_t lock = _SPINLOCK_INITIALIZER;
1429 malloc_mutex_lock(malloc_mutex_t *mutex)
1433 _SPINLOCK(&mutex->lock);
1437 malloc_mutex_unlock(malloc_mutex_t *mutex)
1441 _SPINUNLOCK(&mutex->lock);
1447 /******************************************************************************/
1449 * Begin spin lock. Spin locks here are actually adaptive mutexes that block
1450 * after a period of spinning, because unbounded spinning would allow for
1451 * priority inversion.
1455 * We use an unpublished interface to initialize pthread mutexes with an
1456 * allocation callback, in order to avoid infinite recursion.
1458 int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
1459 void *(calloc_cb)(size_t, size_t));
1461 __weak_reference(_pthread_mutex_init_calloc_cb_stub,
1462 _pthread_mutex_init_calloc_cb);
1465 _pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex,
1466 void *(calloc_cb)(size_t, size_t))
1473 malloc_spin_init(pthread_mutex_t *lock)
1476 if (_pthread_mutex_init_calloc_cb(lock, base_calloc) != 0)
1483 malloc_spin_lock(pthread_mutex_t *lock)
1487 if (_pthread_mutex_trylock(lock) != 0) {
1488 /* Exponentially back off if there are multiple CPUs. */
1491 volatile unsigned j;
1493 for (i = 1; i <= LG_SPIN_LIMIT; i++) {
1494 for (j = 0; j < (1U << i); j++) {
1498 if (_pthread_mutex_trylock(lock) == 0)
1504 * Spinning failed. Block until the lock becomes
1505 * available, in order to avoid indefinite priority
1508 _pthread_mutex_lock(lock);
1514 malloc_spin_unlock(pthread_mutex_t *lock)
1518 _pthread_mutex_unlock(lock);
1524 /******************************************************************************/
1526 * Begin Utility functions/macros.
1529 /* Return the chunk address for allocation address a. */
1530 #define CHUNK_ADDR2BASE(a) \
1531 ((void *)((uintptr_t)(a) & ~chunksize_mask))
1533 /* Return the chunk offset of address a. */
1534 #define CHUNK_ADDR2OFFSET(a) \
1535 ((size_t)((uintptr_t)(a) & chunksize_mask))
1537 /* Return the smallest chunk multiple that is >= s. */
1538 #define CHUNK_CEILING(s) \
1539 (((s) + chunksize_mask) & ~chunksize_mask)
1541 /* Return the smallest quantum multiple that is >= a. */
1542 #define QUANTUM_CEILING(a) \
1543 (((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
1545 /* Return the smallest cacheline multiple that is >= s. */
1546 #define CACHELINE_CEILING(s) \
1547 (((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
1549 /* Return the smallest subpage multiple that is >= s. */
1550 #define SUBPAGE_CEILING(s) \
1551 (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK)
1553 /* Return the smallest medium size class that is >= s. */
1554 #define MEDIUM_CEILING(s) \
1555 (((s) + mspace_mask) & ~mspace_mask)
1557 /* Return the smallest pagesize multiple that is >= s. */
1558 #define PAGE_CEILING(s) \
1559 (((s) + PAGE_MASK) & ~PAGE_MASK)
1562 /* Compute the smallest power of 2 that is >= x. */
1573 #if (SIZEOF_PTR == 8)
1581 /******************************************************************************/
1585 base_pages_alloc_dss(size_t minsize)
1589 * Do special DSS allocation here, since base allocations don't need to
1592 malloc_mutex_lock(&dss_mtx);
1593 if (dss_prev != (void *)-1) {
1595 size_t csize = CHUNK_CEILING(minsize);
1598 /* Get the current end of the DSS. */
1602 * Calculate how much padding is necessary to
1603 * chunk-align the end of the DSS. Don't worry about
1604 * dss_max not being chunk-aligned though.
1606 incr = (intptr_t)chunksize
1607 - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
1609 if ((size_t)incr < minsize)
1612 dss_prev = sbrk(incr);
1613 if (dss_prev == dss_max) {
1615 dss_max = (void *)((intptr_t)dss_prev + incr);
1616 base_pages = dss_prev;
1617 base_next_addr = base_pages;
1618 base_past_addr = dss_max;
1620 base_mapped += incr;
1622 malloc_mutex_unlock(&dss_mtx);
1625 } while (dss_prev != (void *)-1);
1627 malloc_mutex_unlock(&dss_mtx);
1634 base_pages_alloc_mmap(size_t minsize)
1638 assert(minsize != 0);
1639 csize = PAGE_CEILING(minsize);
1640 base_pages = pages_map(NULL, csize);
1641 if (base_pages == NULL)
1643 base_next_addr = base_pages;
1644 base_past_addr = (void *)((uintptr_t)base_pages + csize);
1646 base_mapped += csize;
1653 base_pages_alloc(size_t minsize)
1657 if (opt_mmap && minsize != 0)
1660 if (base_pages_alloc_mmap(minsize) == false)
1666 if (base_pages_alloc_dss(minsize) == false)
1676 base_alloc(size_t size)
1681 /* Round size up to nearest multiple of the cacheline size. */
1682 csize = CACHELINE_CEILING(size);
1684 malloc_mutex_lock(&base_mtx);
1685 /* Make sure there's enough space for the allocation. */
1686 if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1687 if (base_pages_alloc(csize)) {
1688 malloc_mutex_unlock(&base_mtx);
1693 ret = base_next_addr;
1694 base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1695 malloc_mutex_unlock(&base_mtx);
1701 base_calloc(size_t number, size_t size)
1705 ret = base_alloc(number * size);
1707 memset(ret, 0, number * size);
1712 static extent_node_t *
1713 base_node_alloc(void)
1717 malloc_mutex_lock(&base_mtx);
1718 if (base_nodes != NULL) {
1720 base_nodes = *(extent_node_t **)ret;
1721 malloc_mutex_unlock(&base_mtx);
1723 malloc_mutex_unlock(&base_mtx);
1724 ret = (extent_node_t *)base_alloc(sizeof(extent_node_t));
1731 base_node_dealloc(extent_node_t *node)
1734 malloc_mutex_lock(&base_mtx);
1735 *(extent_node_t **)node = base_nodes;
1737 malloc_mutex_unlock(&base_mtx);
1741 * End Utility functions/macros.
1743 /******************************************************************************/
1745 * Begin extent tree code.
1750 extent_szad_comp(extent_node_t *a, extent_node_t *b)
1753 size_t a_size = a->size;
1754 size_t b_size = b->size;
1756 ret = (a_size > b_size) - (a_size < b_size);
1758 uintptr_t a_addr = (uintptr_t)a->addr;
1759 uintptr_t b_addr = (uintptr_t)b->addr;
1761 ret = (a_addr > b_addr) - (a_addr < b_addr);
1767 /* Wrap red-black tree macros in functions. */
1768 rb_gen(__unused static, extent_tree_szad_, extent_tree_t, extent_node_t,
1769 link_szad, extent_szad_comp)
1773 extent_ad_comp(extent_node_t *a, extent_node_t *b)
1775 uintptr_t a_addr = (uintptr_t)a->addr;
1776 uintptr_t b_addr = (uintptr_t)b->addr;
1778 return ((a_addr > b_addr) - (a_addr < b_addr));
1781 /* Wrap red-black tree macros in functions. */
1782 rb_gen(__unused static, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad,
1786 * End extent tree code.
1788 /******************************************************************************/
1790 * Begin chunk management functions.
1794 pages_map(void *addr, size_t size)
1799 * We don't use MAP_FIXED here, because it can cause the *replacement*
1800 * of existing mappings, and we only want to create new mappings.
1802 ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
1804 assert(ret != NULL);
1806 if (ret == MAP_FAILED)
1808 else if (addr != NULL && ret != addr) {
1810 * We succeeded in mapping memory, but not in the right place.
1812 if (munmap(ret, size) == -1) {
1813 char buf[STRERROR_BUF];
1815 strerror_r(errno, buf, sizeof(buf));
1816 _malloc_message(_getprogname(),
1817 ": (malloc) Error in munmap(): ", buf, "\n");
1824 assert(ret == NULL || (addr == NULL && ret != addr)
1825 || (addr != NULL && ret == addr));
1830 pages_unmap(void *addr, size_t size)
1833 if (munmap(addr, size) == -1) {
1834 char buf[STRERROR_BUF];
1836 strerror_r(errno, buf, sizeof(buf));
1837 _malloc_message(_getprogname(),
1838 ": (malloc) Error in munmap(): ", buf, "\n");
1846 chunk_alloc_dss(size_t size, bool *zero)
1850 ret = chunk_recycle_dss(size, zero);
1855 * sbrk() uses a signed increment argument, so take care not to
1856 * interpret a huge allocation request as a negative increment.
1858 if ((intptr_t)size < 0)
1861 malloc_mutex_lock(&dss_mtx);
1862 if (dss_prev != (void *)-1) {
1866 * The loop is necessary to recover from races with other
1867 * threads that are using the DSS for something other than
1871 /* Get the current end of the DSS. */
1875 * Calculate how much padding is necessary to
1876 * chunk-align the end of the DSS.
1878 incr = (intptr_t)size
1879 - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
1880 if (incr == (intptr_t)size)
1883 ret = (void *)((intptr_t)dss_max + incr);
1887 dss_prev = sbrk(incr);
1888 if (dss_prev == dss_max) {
1890 dss_max = (void *)((intptr_t)dss_prev + incr);
1891 malloc_mutex_unlock(&dss_mtx);
1895 } while (dss_prev != (void *)-1);
1897 malloc_mutex_unlock(&dss_mtx);
1903 chunk_recycle_dss(size_t size, bool *zero)
1905 extent_node_t *node, key;
1909 malloc_mutex_lock(&dss_mtx);
1910 node = extent_tree_szad_nsearch(&dss_chunks_szad, &key);
1912 void *ret = node->addr;
1914 /* Remove node from the tree. */
1915 extent_tree_szad_remove(&dss_chunks_szad, node);
1916 if (node->size == size) {
1917 extent_tree_ad_remove(&dss_chunks_ad, node);
1918 base_node_dealloc(node);
1921 * Insert the remainder of node's address range as a
1922 * smaller chunk. Its position within dss_chunks_ad
1925 assert(node->size > size);
1926 node->addr = (void *)((uintptr_t)node->addr + size);
1928 extent_tree_szad_insert(&dss_chunks_szad, node);
1930 malloc_mutex_unlock(&dss_mtx);
1933 memset(ret, 0, size);
1936 malloc_mutex_unlock(&dss_mtx);
1943 chunk_alloc_mmap_slow(size_t size, bool unaligned)
1948 /* Beware size_t wrap-around. */
1949 if (size + chunksize <= size)
1952 ret = pages_map(NULL, size + chunksize);
1956 /* Clean up unneeded leading/trailing space. */
1957 offset = CHUNK_ADDR2OFFSET(ret);
1959 /* Note that mmap() returned an unaligned mapping. */
1962 /* Leading space. */
1963 pages_unmap(ret, chunksize - offset);
1965 ret = (void *)((uintptr_t)ret +
1966 (chunksize - offset));
1968 /* Trailing space. */
1969 pages_unmap((void *)((uintptr_t)ret + size),
1972 /* Trailing space only. */
1973 pages_unmap((void *)((uintptr_t)ret + size),
1978 * If mmap() returned an aligned mapping, reset mmap_unaligned so that
1979 * the next chunk_alloc_mmap() execution tries the fast allocation
1982 if (unaligned == false)
1983 mmap_unaligned = false;
1989 chunk_alloc_mmap(size_t size)
1994 * Ideally, there would be a way to specify alignment to mmap() (like
1995 * NetBSD has), but in the absence of such a feature, we have to work
1996 * hard to efficiently create aligned mappings. The reliable, but
1997 * slow method is to create a mapping that is over-sized, then trim the
1998 * excess. However, that always results in at least one call to
2001 * A more optimistic approach is to try mapping precisely the right
2002 * amount, then try to append another mapping if alignment is off. In
2003 * practice, this works out well as long as the application is not
2004 * interleaving mappings via direct mmap() calls. If we do run into a
2005 * situation where there is an interleaved mapping and we are unable to
2006 * extend an unaligned mapping, our best option is to switch to the
2007 * slow method until mmap() returns another aligned mapping. This will
2008 * tend to leave a gap in the memory map that is too small to cause
2009 * later problems for the optimistic method.
2011 * Another possible confounding factor is address space layout
2012 * randomization (ASLR), which causes mmap(2) to disregard the
2013 * requested address. mmap_unaligned tracks whether the previous
2014 * chunk_alloc_mmap() execution received any unaligned or relocated
2015 * mappings, and if so, the current execution will immediately fall
2016 * back to the slow method. However, we keep track of whether the fast
2017 * method would have succeeded, and if so, we make a note to try the
2018 * fast method next time.
2021 if (mmap_unaligned == false) {
2024 ret = pages_map(NULL, size);
2028 offset = CHUNK_ADDR2OFFSET(ret);
2030 mmap_unaligned = true;
2031 /* Try to extend chunk boundary. */
2032 if (pages_map((void *)((uintptr_t)ret + size),
2033 chunksize - offset) == NULL) {
2035 * Extension failed. Clean up, then revert to
2036 * the reliable-but-expensive method.
2038 pages_unmap(ret, size);
2039 ret = chunk_alloc_mmap_slow(size, true);
2041 /* Clean up unneeded leading space. */
2042 pages_unmap(ret, chunksize - offset);
2043 ret = (void *)((uintptr_t)ret + (chunksize -
2048 ret = chunk_alloc_mmap_slow(size, false);
2054 * If the caller specifies (*zero == false), it is still possible to receive
2055 * zeroed memory, in which case *zero is toggled to true. arena_chunk_alloc()
2056 * takes advantage of this to avoid demanding zeroed chunks, but taking
2057 * advantage of them if they are returned.
2060 chunk_alloc(size_t size, bool *zero)
2065 assert((size & chunksize_mask) == 0);
2071 ret = chunk_alloc_mmap(size);
2080 ret = chunk_alloc_dss(size, zero);
2086 /* All strategies for allocation failed. */
2091 malloc_mutex_lock(&chunks_mtx);
2092 stats_chunks.nchunks += (size / chunksize);
2093 stats_chunks.curchunks += (size / chunksize);
2094 if (stats_chunks.curchunks > stats_chunks.highchunks)
2095 stats_chunks.highchunks = stats_chunks.curchunks;
2096 malloc_mutex_unlock(&chunks_mtx);
2100 assert(CHUNK_ADDR2BASE(ret) == ret);
2105 static extent_node_t *
2106 chunk_dealloc_dss_record(void *chunk, size_t size)
2108 extent_node_t *node, *prev, key;
2110 key.addr = (void *)((uintptr_t)chunk + size);
2111 node = extent_tree_ad_nsearch(&dss_chunks_ad, &key);
2112 /* Try to coalesce forward. */
2113 if (node != NULL && node->addr == key.addr) {
2115 * Coalesce chunk with the following address range. This does
2116 * not change the position within dss_chunks_ad, so only
2117 * remove/insert from/into dss_chunks_szad.
2119 extent_tree_szad_remove(&dss_chunks_szad, node);
2122 extent_tree_szad_insert(&dss_chunks_szad, node);
2125 * Coalescing forward failed, so insert a new node. Drop
2126 * dss_mtx during node allocation, since it is possible that a
2127 * new base chunk will be allocated.
2129 malloc_mutex_unlock(&dss_mtx);
2130 node = base_node_alloc();
2131 malloc_mutex_lock(&dss_mtx);
2136 extent_tree_ad_insert(&dss_chunks_ad, node);
2137 extent_tree_szad_insert(&dss_chunks_szad, node);
2140 /* Try to coalesce backward. */
2141 prev = extent_tree_ad_prev(&dss_chunks_ad, node);
2142 if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
2145 * Coalesce chunk with the previous address range. This does
2146 * not change the position within dss_chunks_ad, so only
2147 * remove/insert node from/into dss_chunks_szad.
2149 extent_tree_szad_remove(&dss_chunks_szad, prev);
2150 extent_tree_ad_remove(&dss_chunks_ad, prev);
2152 extent_tree_szad_remove(&dss_chunks_szad, node);
2153 node->addr = prev->addr;
2154 node->size += prev->size;
2155 extent_tree_szad_insert(&dss_chunks_szad, node);
2157 base_node_dealloc(prev);
2164 chunk_dealloc_dss(void *chunk, size_t size)
2168 malloc_mutex_lock(&dss_mtx);
2169 if ((uintptr_t)chunk >= (uintptr_t)dss_base
2170 && (uintptr_t)chunk < (uintptr_t)dss_max) {
2171 extent_node_t *node;
2173 /* Try to coalesce with other unused chunks. */
2174 node = chunk_dealloc_dss_record(chunk, size);
2180 /* Get the current end of the DSS. */
2184 * Try to shrink the DSS if this chunk is at the end of the
2185 * DSS. The sbrk() call here is subject to a race condition
2186 * with threads that use brk(2) or sbrk(2) directly, but the
2187 * alternative would be to leak memory for the sake of poorly
2188 * designed multi-threaded programs.
2190 if ((void *)((uintptr_t)chunk + size) == dss_max
2191 && (dss_prev = sbrk(-(intptr_t)size)) == dss_max) {
2193 dss_max = (void *)((intptr_t)dss_prev - (intptr_t)size);
2196 extent_tree_szad_remove(&dss_chunks_szad, node);
2197 extent_tree_ad_remove(&dss_chunks_ad, node);
2198 base_node_dealloc(node);
2201 madvise(chunk, size, MADV_FREE);
2209 malloc_mutex_unlock(&dss_mtx);
2215 chunk_dealloc_mmap(void *chunk, size_t size)
2218 pages_unmap(chunk, size);
2222 chunk_dealloc(void *chunk, size_t size)
2225 assert(chunk != NULL);
2226 assert(CHUNK_ADDR2BASE(chunk) == chunk);
2228 assert((size & chunksize_mask) == 0);
2231 malloc_mutex_lock(&chunks_mtx);
2232 stats_chunks.curchunks -= (size / chunksize);
2233 malloc_mutex_unlock(&chunks_mtx);
2238 if (chunk_dealloc_dss(chunk, size) == false)
2244 chunk_dealloc_mmap(chunk, size);
2248 * End chunk management functions.
2250 /******************************************************************************/
2256 * Choose an arena based on a per-thread value (fast-path code, calls slow-path
2257 * code if necessary).
2259 static inline arena_t *
2265 * We can only use TLS if this is a PIC library, since for the static
2266 * library version, libc's malloc is used by TLS allocation, which
2267 * introduces a bootstrapping issue.
2270 if (__isthreaded == false) {
2271 /* Avoid the overhead of TLS for single-threaded operation. */
2277 ret = choose_arena_hard();
2278 assert(ret != NULL);
2281 if (__isthreaded && narenas > 1) {
2285 * Hash _pthread_self() to one of the arenas. There is a prime
2286 * number of arenas, so this has a reasonable chance of
2287 * working. Even so, the hashing can be easily thwarted by
2288 * inconvenient _pthread_self() values. Without specific
2289 * knowledge of how _pthread_self() calculates values, we can't
2290 * easily do much better than this.
2292 ind = (unsigned long) _pthread_self() % narenas;
2295 * Optimistially assume that arenas[ind] has been initialized.
2296 * At worst, we find out that some other thread has already
2297 * done so, after acquiring the lock in preparation. Note that
2298 * this lazy locking also has the effect of lazily forcing
2299 * cache coherency; without the lock acquisition, there's no
2300 * guarantee that modification of arenas[ind] by another thread
2301 * would be seen on this CPU for an arbitrary amount of time.
2303 * In general, this approach to modifying a synchronized value
2304 * isn't a good idea, but in this case we only ever modify the
2305 * value once, so things work out well.
2310 * Avoid races with another thread that may have already
2311 * initialized arenas[ind].
2313 malloc_spin_lock(&arenas_lock);
2314 if (arenas[ind] == NULL)
2315 ret = arenas_extend((unsigned)ind);
2318 malloc_spin_unlock(&arenas_lock);
2324 assert(ret != NULL);
2330 * Choose an arena based on a per-thread value (slow-path code only, called
2331 * only by choose_arena()).
2334 choose_arena_hard(void)
2338 assert(__isthreaded);
2341 malloc_spin_lock(&arenas_lock);
2342 if ((ret = arenas[next_arena]) == NULL)
2343 ret = arenas_extend(next_arena);
2344 next_arena = (next_arena + 1) % narenas;
2345 malloc_spin_unlock(&arenas_lock);
2356 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
2358 uintptr_t a_chunk = (uintptr_t)a;
2359 uintptr_t b_chunk = (uintptr_t)b;
2364 return ((a_chunk > b_chunk) - (a_chunk < b_chunk));
2367 /* Wrap red-black tree macros in functions. */
2368 rb_gen(__unused static, arena_chunk_tree_dirty_, arena_chunk_tree_t,
2369 arena_chunk_t, link_dirty, arena_chunk_comp)
2372 arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
2374 uintptr_t a_mapelm = (uintptr_t)a;
2375 uintptr_t b_mapelm = (uintptr_t)b;
2380 return ((a_mapelm > b_mapelm) - (a_mapelm < b_mapelm));
2383 /* Wrap red-black tree macros in functions. */
2384 rb_gen(__unused static, arena_run_tree_, arena_run_tree_t, arena_chunk_map_t,
2385 link, arena_run_comp)
2388 arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
2391 size_t a_size = a->bits & ~PAGE_MASK;
2392 size_t b_size = b->bits & ~PAGE_MASK;
2394 ret = (a_size > b_size) - (a_size < b_size);
2396 uintptr_t a_mapelm, b_mapelm;
2398 if ((a->bits & CHUNK_MAP_KEY) != CHUNK_MAP_KEY)
2399 a_mapelm = (uintptr_t)a;
2402 * Treat keys as though they are lower than anything
2407 b_mapelm = (uintptr_t)b;
2409 ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm);
2415 /* Wrap red-black tree macros in functions. */
2416 rb_gen(__unused static, arena_avail_tree_, arena_avail_tree_t,
2417 arena_chunk_map_t, link, arena_avail_comp)
2420 arena_run_rc_incr(arena_run_t *run, arena_bin_t *bin, const void *ptr)
2422 arena_chunk_t *chunk;
2424 size_t pagebeg, pageend, i;
2426 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2427 arena = chunk->arena;
2428 pagebeg = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
2429 pageend = ((uintptr_t)ptr + (uintptr_t)(bin->reg_size - 1) -
2430 (uintptr_t)chunk) >> PAGE_SHIFT;
2432 for (i = pagebeg; i <= pageend; i++) {
2433 size_t mapbits = chunk->map[i].bits;
2435 if (mapbits & CHUNK_MAP_DIRTY) {
2436 assert((mapbits & CHUNK_MAP_RC_MASK) == 0);
2439 mapbits ^= CHUNK_MAP_DIRTY;
2441 assert((mapbits & CHUNK_MAP_RC_MASK) != CHUNK_MAP_RC_MASK);
2442 mapbits += CHUNK_MAP_RC_ONE;
2443 chunk->map[i].bits = mapbits;
2448 arena_run_rc_decr(arena_run_t *run, arena_bin_t *bin, const void *ptr)
2450 arena_chunk_t *chunk;
2452 size_t pagebeg, pageend, mapbits, i;
2453 bool dirtier = false;
2455 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2456 arena = chunk->arena;
2457 pagebeg = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
2458 pageend = ((uintptr_t)ptr + (uintptr_t)(bin->reg_size - 1) -
2459 (uintptr_t)chunk) >> PAGE_SHIFT;
2462 mapbits = chunk->map[pagebeg].bits;
2463 mapbits -= CHUNK_MAP_RC_ONE;
2464 if ((mapbits & CHUNK_MAP_RC_MASK) == 0) {
2466 assert((mapbits & CHUNK_MAP_DIRTY) == 0);
2467 mapbits |= CHUNK_MAP_DIRTY;
2471 chunk->map[pagebeg].bits = mapbits;
2473 if (pageend - pagebeg >= 1) {
2475 * Interior pages are completely consumed by the object being
2476 * deallocated, which means that the pages can be
2477 * unconditionally marked dirty.
2479 for (i = pagebeg + 1; i < pageend; i++) {
2480 mapbits = chunk->map[i].bits;
2481 mapbits -= CHUNK_MAP_RC_ONE;
2482 assert((mapbits & CHUNK_MAP_RC_MASK) == 0);
2484 assert((mapbits & CHUNK_MAP_DIRTY) == 0);
2485 mapbits |= CHUNK_MAP_DIRTY;
2488 chunk->map[i].bits = mapbits;
2492 mapbits = chunk->map[pageend].bits;
2493 mapbits -= CHUNK_MAP_RC_ONE;
2494 if ((mapbits & CHUNK_MAP_RC_MASK) == 0) {
2496 assert((mapbits & CHUNK_MAP_DIRTY) == 0);
2497 mapbits |= CHUNK_MAP_DIRTY;
2501 chunk->map[pageend].bits = mapbits;
2505 if (chunk->dirtied == false) {
2506 arena_chunk_tree_dirty_insert(&arena->chunks_dirty,
2508 chunk->dirtied = true;
2511 /* Enforce opt_lg_dirty_mult. */
2512 if (opt_lg_dirty_mult >= 0 && (arena->nactive >>
2513 opt_lg_dirty_mult) < arena->ndirty)
2518 static inline void *
2519 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
2522 unsigned i, mask, bit, regind;
2524 assert(run->magic == ARENA_RUN_MAGIC);
2525 assert(run->regs_minelm < bin->regs_mask_nelms);
2528 * Move the first check outside the loop, so that run->regs_minelm can
2529 * be updated unconditionally, without the possibility of updating it
2532 i = run->regs_minelm;
2533 mask = run->regs_mask[i];
2535 /* Usable allocation found. */
2536 bit = ffs((int)mask) - 1;
2538 regind = ((i << (LG_SIZEOF_INT + 3)) + bit);
2539 assert(regind < bin->nregs);
2540 ret = (void *)(((uintptr_t)run) + bin->reg0_offset
2541 + (bin->reg_size * regind));
2544 mask ^= (1U << bit);
2545 run->regs_mask[i] = mask;
2547 arena_run_rc_incr(run, bin, ret);
2552 for (i++; i < bin->regs_mask_nelms; i++) {
2553 mask = run->regs_mask[i];
2555 /* Usable allocation found. */
2556 bit = ffs((int)mask) - 1;
2558 regind = ((i << (LG_SIZEOF_INT + 3)) + bit);
2559 assert(regind < bin->nregs);
2560 ret = (void *)(((uintptr_t)run) + bin->reg0_offset
2561 + (bin->reg_size * regind));
2564 mask ^= (1U << bit);
2565 run->regs_mask[i] = mask;
2568 * Make a note that nothing before this element
2569 * contains a free region.
2571 run->regs_minelm = i; /* Low payoff: + (mask == 0); */
2573 arena_run_rc_incr(run, bin, ret);
2584 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
2586 unsigned shift, diff, regind, elm, bit;
2588 assert(run->magic == ARENA_RUN_MAGIC);
2591 * Avoid doing division with a variable divisor if possible. Using
2592 * actual division here can reduce allocator throughput by over 20%!
2594 diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
2596 /* Rescale (factor powers of 2 out of the numerator and denominator). */
2597 shift = ffs(size) - 1;
2602 /* The divisor was a power of 2. */
2606 * To divide by a number D that is not a power of two we
2607 * multiply by (2^21 / D) and then right shift by 21 positions.
2613 * (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
2615 * We can omit the first three elements, because we never
2616 * divide by 0, and 1 and 2 are both powers of two, which are
2619 #define SIZE_INV_SHIFT 21
2620 #define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1)
2621 static const unsigned size_invs[] = {
2623 SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
2624 SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
2625 SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
2626 SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
2627 SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
2628 SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
2629 SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
2632 if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2))
2633 regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT;
2635 regind = diff / size;
2637 #undef SIZE_INV_SHIFT
2639 assert(diff == regind * size);
2640 assert(regind < bin->nregs);
2642 elm = regind >> (LG_SIZEOF_INT + 3);
2643 if (elm < run->regs_minelm)
2644 run->regs_minelm = elm;
2645 bit = regind - (elm << (LG_SIZEOF_INT + 3));
2646 assert((run->regs_mask[elm] & (1U << bit)) == 0);
2647 run->regs_mask[elm] |= (1U << bit);
2649 arena_run_rc_decr(run, bin, ptr);
2653 arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
2656 arena_chunk_t *chunk;
2657 size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i;
2659 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2660 old_ndirty = chunk->ndirty;
2661 run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
2663 total_pages = (chunk->map[run_ind].bits & ~PAGE_MASK) >>
2665 need_pages = (size >> PAGE_SHIFT);
2666 assert(need_pages > 0);
2667 assert(need_pages <= total_pages);
2668 rem_pages = total_pages - need_pages;
2670 arena_avail_tree_remove(&arena->runs_avail, &chunk->map[run_ind]);
2671 arena->nactive += need_pages;
2673 /* Keep track of trailing unused pages for later use. */
2674 if (rem_pages > 0) {
2675 chunk->map[run_ind+need_pages].bits = (rem_pages <<
2676 PAGE_SHIFT) | (chunk->map[run_ind+need_pages].bits &
2677 CHUNK_MAP_FLAGS_MASK);
2678 chunk->map[run_ind+total_pages-1].bits = (rem_pages <<
2679 PAGE_SHIFT) | (chunk->map[run_ind+total_pages-1].bits &
2680 CHUNK_MAP_FLAGS_MASK);
2681 arena_avail_tree_insert(&arena->runs_avail,
2682 &chunk->map[run_ind+need_pages]);
2685 for (i = 0; i < need_pages; i++) {
2686 /* Zero if necessary. */
2688 if ((chunk->map[run_ind + i].bits & CHUNK_MAP_ZEROED)
2690 memset((void *)((uintptr_t)chunk + ((run_ind
2691 + i) << PAGE_SHIFT)), 0, PAGE_SIZE);
2692 /* CHUNK_MAP_ZEROED is cleared below. */
2696 /* Update dirty page accounting. */
2697 if (chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY) {
2700 /* CHUNK_MAP_DIRTY is cleared below. */
2703 /* Initialize the chunk map. */
2705 chunk->map[run_ind + i].bits = CHUNK_MAP_LARGE
2706 | CHUNK_MAP_ALLOCATED;
2708 chunk->map[run_ind + i].bits = (i << CHUNK_MAP_PG_SHIFT)
2709 | CHUNK_MAP_ALLOCATED;
2715 * Set the run size only in the first element for large runs.
2716 * This is primarily a debugging aid, since the lack of size
2717 * info for trailing pages only matters if the application
2718 * tries to operate on an interior pointer.
2720 chunk->map[run_ind].bits |= size;
2723 * Initialize the first page's refcount to 1, so that the run
2724 * header is protected from dirty page purging.
2726 chunk->map[run_ind].bits += CHUNK_MAP_RC_ONE;
2730 static arena_chunk_t *
2731 arena_chunk_alloc(arena_t *arena)
2733 arena_chunk_t *chunk;
2736 if (arena->spare != NULL) {
2737 chunk = arena->spare;
2738 arena->spare = NULL;
2744 chunk = (arena_chunk_t *)chunk_alloc(chunksize, &zero);
2748 arena->stats.mapped += chunksize;
2751 chunk->arena = arena;
2752 chunk->dirtied = false;
2755 * Claim that no pages are in use, since the header is merely
2761 * Initialize the map to contain one maximal free untouched run.
2762 * Mark the pages as zeroed iff chunk_alloc() returned a zeroed
2765 zeroed = zero ? CHUNK_MAP_ZEROED : 0;
2766 for (i = 0; i < arena_chunk_header_npages; i++)
2767 chunk->map[i].bits = 0;
2768 chunk->map[i].bits = arena_maxclass | zeroed;
2769 for (i++; i < chunk_npages-1; i++)
2770 chunk->map[i].bits = zeroed;
2771 chunk->map[chunk_npages-1].bits = arena_maxclass | zeroed;
2774 /* Insert the run into the runs_avail tree. */
2775 arena_avail_tree_insert(&arena->runs_avail,
2776 &chunk->map[arena_chunk_header_npages]);
2782 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
2785 if (arena->spare != NULL) {
2786 if (arena->spare->dirtied) {
2787 arena_chunk_tree_dirty_remove(
2788 &chunk->arena->chunks_dirty, arena->spare);
2789 arena->ndirty -= arena->spare->ndirty;
2791 chunk_dealloc((void *)arena->spare, chunksize);
2793 arena->stats.mapped -= chunksize;
2798 * Remove run from runs_avail, regardless of whether this chunk
2799 * will be cached, so that the arena does not use it. Dirty page
2800 * flushing only uses the chunks_dirty tree, so leaving this chunk in
2801 * the chunks_* trees is sufficient for that purpose.
2803 arena_avail_tree_remove(&arena->runs_avail,
2804 &chunk->map[arena_chunk_header_npages]);
2806 arena->spare = chunk;
2809 static arena_run_t *
2810 arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
2812 arena_chunk_t *chunk;
2814 arena_chunk_map_t *mapelm, key;
2816 assert(size <= arena_maxclass);
2817 assert((size & PAGE_MASK) == 0);
2819 /* Search the arena's chunks for the lowest best fit. */
2820 key.bits = size | CHUNK_MAP_KEY;
2821 mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key);
2822 if (mapelm != NULL) {
2823 arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
2824 size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map)
2825 / sizeof(arena_chunk_map_t);
2827 run = (arena_run_t *)((uintptr_t)run_chunk + (pageind
2829 arena_run_split(arena, run, size, large, zero);
2834 * No usable runs. Create a new chunk from which to allocate the run.
2836 chunk = arena_chunk_alloc(arena);
2839 run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
2841 /* Update page map. */
2842 arena_run_split(arena, run, size, large, zero);
2847 static arena_chunk_t *
2848 chunks_dirty_iter_cb(arena_chunk_tree_t *tree, arena_chunk_t *chunk, void *arg)
2850 size_t *ndirty = (size_t *)arg;
2852 assert(chunk->dirtied);
2853 *ndirty += chunk->ndirty;
2859 arena_purge(arena_t *arena)
2861 arena_chunk_t *chunk;
2866 arena_chunk_tree_dirty_iter(&arena->chunks_dirty, NULL,
2867 chunks_dirty_iter_cb, (void *)&ndirty);
2868 assert(ndirty == arena->ndirty);
2870 assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty);
2873 arena->stats.npurge++;
2877 * Iterate downward through chunks until enough dirty memory has been
2878 * purged. Terminate as soon as possible in order to minimize the
2879 * number of system calls, even if a chunk has only been partially
2883 while ((arena->nactive >> (opt_lg_dirty_mult + 1)) < arena->ndirty) {
2884 chunk = arena_chunk_tree_dirty_last(&arena->chunks_dirty);
2885 assert(chunk != NULL);
2887 for (i = chunk_npages - 1; chunk->ndirty > 0; i--) {
2888 assert(i >= arena_chunk_header_npages);
2889 if (chunk->map[i].bits & CHUNK_MAP_DIRTY) {
2890 chunk->map[i].bits ^= CHUNK_MAP_DIRTY;
2891 /* Find adjacent dirty run(s). */
2892 for (npages = 1; i > arena_chunk_header_npages
2893 && (chunk->map[i - 1].bits &
2894 CHUNK_MAP_DIRTY); npages++) {
2896 chunk->map[i].bits ^= CHUNK_MAP_DIRTY;
2898 chunk->ndirty -= npages;
2899 arena->ndirty -= npages;
2901 madvise((void *)((uintptr_t)chunk + (i <<
2902 PAGE_SHIFT)), (npages << PAGE_SHIFT),
2905 arena->stats.nmadvise++;
2906 arena->stats.purged += npages;
2908 if ((arena->nactive >> (opt_lg_dirty_mult + 1))
2914 if (chunk->ndirty == 0) {
2915 arena_chunk_tree_dirty_remove(&arena->chunks_dirty,
2917 chunk->dirtied = false;
2923 arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
2925 arena_chunk_t *chunk;
2926 size_t size, run_ind, run_pages;
2928 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2929 run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk)
2931 assert(run_ind >= arena_chunk_header_npages);
2932 assert(run_ind < chunk_npages);
2933 if ((chunk->map[run_ind].bits & CHUNK_MAP_LARGE) != 0)
2934 size = chunk->map[run_ind].bits & ~PAGE_MASK;
2936 size = run->bin->run_size;
2937 run_pages = (size >> PAGE_SHIFT);
2938 arena->nactive -= run_pages;
2940 /* Mark pages as unallocated in the chunk map. */
2944 for (i = 0; i < run_pages; i++) {
2946 * When (dirty == true), *all* pages within the run
2947 * need to have their dirty bits set, because only
2948 * small runs can create a mixture of clean/dirty
2949 * pages, but such runs are passed to this function
2950 * with (dirty == false).
2952 assert((chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY)
2956 chunk->map[run_ind + i].bits = CHUNK_MAP_DIRTY;
2961 for (i = 0; i < run_pages; i++) {
2962 chunk->map[run_ind + i].bits &= ~(CHUNK_MAP_LARGE |
2963 CHUNK_MAP_ALLOCATED);
2966 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2967 CHUNK_MAP_FLAGS_MASK);
2968 chunk->map[run_ind+run_pages-1].bits = size |
2969 (chunk->map[run_ind+run_pages-1].bits & CHUNK_MAP_FLAGS_MASK);
2971 /* Try to coalesce forward. */
2972 if (run_ind + run_pages < chunk_npages &&
2973 (chunk->map[run_ind+run_pages].bits & CHUNK_MAP_ALLOCATED) == 0) {
2974 size_t nrun_size = chunk->map[run_ind+run_pages].bits &
2978 * Remove successor from runs_avail; the coalesced run is
2981 arena_avail_tree_remove(&arena->runs_avail,
2982 &chunk->map[run_ind+run_pages]);
2985 run_pages = size >> PAGE_SHIFT;
2987 assert((chunk->map[run_ind+run_pages-1].bits & ~PAGE_MASK)
2989 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2990 CHUNK_MAP_FLAGS_MASK);
2991 chunk->map[run_ind+run_pages-1].bits = size |
2992 (chunk->map[run_ind+run_pages-1].bits &
2993 CHUNK_MAP_FLAGS_MASK);
2996 /* Try to coalesce backward. */
2997 if (run_ind > arena_chunk_header_npages && (chunk->map[run_ind-1].bits &
2998 CHUNK_MAP_ALLOCATED) == 0) {
2999 size_t prun_size = chunk->map[run_ind-1].bits & ~PAGE_MASK;
3001 run_ind -= prun_size >> PAGE_SHIFT;
3004 * Remove predecessor from runs_avail; the coalesced run is
3007 arena_avail_tree_remove(&arena->runs_avail,
3008 &chunk->map[run_ind]);
3011 run_pages = size >> PAGE_SHIFT;
3013 assert((chunk->map[run_ind].bits & ~PAGE_MASK) == prun_size);
3014 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
3015 CHUNK_MAP_FLAGS_MASK);
3016 chunk->map[run_ind+run_pages-1].bits = size |
3017 (chunk->map[run_ind+run_pages-1].bits &
3018 CHUNK_MAP_FLAGS_MASK);
3021 /* Insert into runs_avail, now that coalescing is complete. */
3022 arena_avail_tree_insert(&arena->runs_avail, &chunk->map[run_ind]);
3025 * Deallocate chunk if it is now completely unused. The bit
3026 * manipulation checks whether the first run is unallocated and extends
3027 * to the end of the chunk.
3029 if ((chunk->map[arena_chunk_header_npages].bits & (~PAGE_MASK |
3030 CHUNK_MAP_ALLOCATED)) == arena_maxclass)
3031 arena_chunk_dealloc(arena, chunk);
3034 * It is okay to do dirty page processing even if the chunk was
3035 * deallocated above, since in that case it is the spare. Waiting
3036 * until after possible chunk deallocation to do dirty processing
3037 * allows for an old spare to be fully deallocated, thus decreasing the
3038 * chances of spuriously crossing the dirty page purging threshold.
3041 if (chunk->dirtied == false) {
3042 arena_chunk_tree_dirty_insert(&arena->chunks_dirty,
3044 chunk->dirtied = true;
3047 /* Enforce opt_lg_dirty_mult. */
3048 if (opt_lg_dirty_mult >= 0 && (arena->nactive >>
3049 opt_lg_dirty_mult) < arena->ndirty)
3055 arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
3056 size_t oldsize, size_t newsize)
3058 size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
3059 size_t head_npages = (oldsize - newsize) >> PAGE_SHIFT;
3061 assert(oldsize > newsize);
3064 * Update the chunk map so that arena_run_dalloc() can treat the
3065 * leading run as separately allocated.
3067 assert((chunk->map[pageind].bits & CHUNK_MAP_DIRTY) == 0);
3068 chunk->map[pageind].bits = (oldsize - newsize) | CHUNK_MAP_LARGE |
3069 CHUNK_MAP_ALLOCATED;
3070 assert((chunk->map[pageind+head_npages].bits & CHUNK_MAP_DIRTY) == 0);
3071 chunk->map[pageind+head_npages].bits = newsize | CHUNK_MAP_LARGE |
3072 CHUNK_MAP_ALLOCATED;
3074 arena_run_dalloc(arena, run, false);
3078 arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
3079 size_t oldsize, size_t newsize, bool dirty)
3081 size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
3082 size_t npages = newsize >> PAGE_SHIFT;
3084 assert(oldsize > newsize);
3087 * Update the chunk map so that arena_run_dalloc() can treat the
3088 * trailing run as separately allocated.
3090 assert((chunk->map[pageind].bits & CHUNK_MAP_DIRTY) == 0);
3091 chunk->map[pageind].bits = newsize | CHUNK_MAP_LARGE |
3092 CHUNK_MAP_ALLOCATED;
3093 assert((chunk->map[pageind+npages].bits & CHUNK_MAP_DIRTY) == 0);
3094 chunk->map[pageind+npages].bits = (oldsize - newsize) | CHUNK_MAP_LARGE
3095 | CHUNK_MAP_ALLOCATED;
3097 arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize),
3101 static arena_run_t *
3102 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
3104 arena_chunk_map_t *mapelm;
3106 unsigned i, remainder;
3108 /* Look for a usable run. */
3109 mapelm = arena_run_tree_first(&bin->runs);
3110 if (mapelm != NULL) {
3111 arena_chunk_t *chunk;
3114 /* run is guaranteed to have available space. */
3115 arena_run_tree_remove(&bin->runs, mapelm);
3117 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm);
3118 pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
3119 sizeof(arena_chunk_map_t));
3120 run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
3121 ((mapelm->bits & CHUNK_MAP_PG_MASK) >> CHUNK_MAP_PG_SHIFT))
3124 bin->stats.reruns++;
3128 /* No existing runs have any space available. */
3130 /* Allocate a new run. */
3131 run = arena_run_alloc(arena, bin->run_size, false, false);
3135 /* Initialize run internals. */
3138 for (i = 0; i < bin->regs_mask_nelms - 1; i++)
3139 run->regs_mask[i] = UINT_MAX;
3140 remainder = bin->nregs & ((1U << (LG_SIZEOF_INT + 3)) - 1);
3142 run->regs_mask[i] = UINT_MAX;
3144 /* The last element has spare bits that need to be unset. */
3145 run->regs_mask[i] = (UINT_MAX >> ((1U << (LG_SIZEOF_INT + 3))
3149 run->regs_minelm = 0;
3151 run->nfree = bin->nregs;
3153 run->magic = ARENA_RUN_MAGIC;
3158 bin->stats.curruns++;
3159 if (bin->stats.curruns > bin->stats.highruns)
3160 bin->stats.highruns = bin->stats.curruns;
3165 /* bin->runcur must have space available before this function is called. */
3166 static inline void *
3167 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
3171 assert(run->magic == ARENA_RUN_MAGIC);
3172 assert(run->nfree > 0);
3174 ret = arena_run_reg_alloc(run, bin);
3175 assert(ret != NULL);
3181 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
3183 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
3186 bin->runcur = arena_bin_nonfull_run_get(arena, bin);
3187 if (bin->runcur == NULL)
3189 assert(bin->runcur->magic == ARENA_RUN_MAGIC);
3190 assert(bin->runcur->nfree > 0);
3192 return (arena_bin_malloc_easy(arena, bin, bin->runcur));
3196 * Calculate bin->run_size such that it meets the following constraints:
3198 * *) bin->run_size >= min_run_size
3199 * *) bin->run_size <= arena_maxclass
3200 * *) bin->run_size <= RUN_MAX_SMALL
3201 * *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
3202 * *) run header size < PAGE_SIZE
3204 * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
3205 * also calculated here, since these settings are all interdependent.
3208 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
3210 size_t try_run_size, good_run_size;
3211 unsigned good_nregs, good_mask_nelms, good_reg0_offset;
3212 unsigned try_nregs, try_mask_nelms, try_reg0_offset;
3214 assert(min_run_size >= PAGE_SIZE);
3215 assert(min_run_size <= arena_maxclass);
3216 assert(min_run_size <= RUN_MAX_SMALL);
3219 * Calculate known-valid settings before entering the run_size
3220 * expansion loop, so that the first part of the loop always copies
3223 * The do..while loop iteratively reduces the number of regions until
3224 * the run header and the regions no longer overlap. A closed formula
3225 * would be quite messy, since there is an interdependency between the
3226 * header's mask length and the number of regions.
3228 try_run_size = min_run_size;
3229 try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
3230 + 1; /* Counter-act try_nregs-- in loop. */
3233 try_mask_nelms = (try_nregs >> (LG_SIZEOF_INT + 3)) +
3234 ((try_nregs & ((1U << (LG_SIZEOF_INT + 3)) - 1)) ? 1 : 0);
3235 try_reg0_offset = try_run_size - (try_nregs * bin->reg_size);
3236 } while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
3239 /* run_size expansion loop. */
3242 * Copy valid settings before trying more aggressive settings.
3244 good_run_size = try_run_size;
3245 good_nregs = try_nregs;
3246 good_mask_nelms = try_mask_nelms;
3247 good_reg0_offset = try_reg0_offset;
3249 /* Try more aggressive settings. */
3250 try_run_size += PAGE_SIZE;
3251 try_nregs = ((try_run_size - sizeof(arena_run_t)) /
3252 bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
3255 try_mask_nelms = (try_nregs >> (LG_SIZEOF_INT + 3)) +
3256 ((try_nregs & ((1U << (LG_SIZEOF_INT + 3)) - 1)) ?
3258 try_reg0_offset = try_run_size - (try_nregs *
3260 } while (sizeof(arena_run_t) + (sizeof(unsigned) *
3261 (try_mask_nelms - 1)) > try_reg0_offset);
3262 } while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
3263 && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
3264 && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
3265 && (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1)))
3268 assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
3269 <= good_reg0_offset);
3270 assert((good_mask_nelms << (LG_SIZEOF_INT + 3)) >= good_nregs);
3272 /* Copy final settings. */
3273 bin->run_size = good_run_size;
3274 bin->nregs = good_nregs;
3275 bin->regs_mask_nelms = good_mask_nelms;
3276 bin->reg0_offset = good_reg0_offset;
3278 return (good_run_size);
3281 #ifdef MALLOC_TCACHE
3283 tcache_event(tcache_t *tcache)
3286 if (tcache_gc_incr == 0)
3290 assert(tcache->ev_cnt <= tcache_gc_incr);
3291 if (tcache->ev_cnt >= tcache_gc_incr) {
3292 size_t binind = tcache->next_gc_bin;
3293 tcache_bin_t *tbin = tcache->tbins[binind];
3296 if (tbin->high_water == 0) {
3298 * This bin went completely unused for an
3299 * entire GC cycle, so throw away the tbin.
3301 assert(tbin->ncached == 0);
3302 tcache_bin_destroy(tcache, tbin, binind);
3303 tcache->tbins[binind] = NULL;
3305 if (tbin->low_water > 0) {
3307 * Flush (ceiling) half of the objects
3308 * below the low water mark.
3310 tcache_bin_flush(tbin, binind,
3311 tbin->ncached - (tbin->low_water >>
3312 1) - (tbin->low_water & 1));
3314 tbin->low_water = tbin->ncached;
3315 tbin->high_water = tbin->ncached;
3319 tcache->next_gc_bin++;
3320 if (tcache->next_gc_bin == nbins)
3321 tcache->next_gc_bin = 0;
3326 static inline void *
3327 tcache_bin_alloc(tcache_bin_t *tbin)
3330 if (tbin->ncached == 0)
3333 if (tbin->ncached < tbin->low_water)
3334 tbin->low_water = tbin->ncached;
3335 return (tbin->slots[tbin->ncached]);
3339 tcache_bin_fill(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
3347 assert(tbin->ncached == 0);
3349 arena = tcache->arena;
3350 bin = &arena->bins[binind];
3351 malloc_spin_lock(&arena->lock);
3352 for (i = 0; i < (tcache_nslots >> 1); i++) {
3353 if ((run = bin->runcur) != NULL && run->nfree > 0)
3354 ptr = arena_bin_malloc_easy(arena, bin, run);
3356 ptr = arena_bin_malloc_hard(arena, bin);
3360 * Fill tbin such that the objects lowest in memory are used
3363 tbin->slots[(tcache_nslots >> 1) - 1 - i] = ptr;
3366 bin->stats.nfills++;
3367 bin->stats.nrequests += tbin->tstats.nrequests;
3368 if (bin->reg_size <= small_maxclass) {
3369 arena->stats.nmalloc_small += (i - tbin->ncached);
3370 arena->stats.allocated_small += (i - tbin->ncached) *
3372 arena->stats.nmalloc_small += tbin->tstats.nrequests;
3374 arena->stats.nmalloc_medium += (i - tbin->ncached);
3375 arena->stats.allocated_medium += (i - tbin->ncached) *
3377 arena->stats.nmalloc_medium += tbin->tstats.nrequests;
3379 tbin->tstats.nrequests = 0;
3381 malloc_spin_unlock(&arena->lock);
3383 if (tbin->ncached > tbin->high_water)
3384 tbin->high_water = tbin->ncached;
3387 static inline void *
3388 tcache_alloc(tcache_t *tcache, size_t size, bool zero)
3394 if (size <= small_maxclass)
3395 binind = small_size2bin[size];
3397 binind = mbin0 + ((MEDIUM_CEILING(size) - medium_min) >>
3400 assert(binind < nbins);
3401 tbin = tcache->tbins[binind];
3403 tbin = tcache_bin_create(tcache->arena);
3406 tcache->tbins[binind] = tbin;
3409 ret = tcache_bin_alloc(tbin);
3411 ret = tcache_alloc_hard(tcache, tbin, binind);
3416 if (zero == false) {
3418 memset(ret, 0xa5, size);
3420 memset(ret, 0, size);
3422 memset(ret, 0, size);
3425 tbin->tstats.nrequests++;
3427 tcache_event(tcache);
3432 tcache_alloc_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
3436 tcache_bin_fill(tcache, tbin, binind);
3437 ret = tcache_bin_alloc(tbin);
3443 static inline void *
3444 arena_malloc_small(arena_t *arena, size_t size, bool zero)
3451 binind = small_size2bin[size];
3452 assert(binind < mbin0);
3453 bin = &arena->bins[binind];
3454 size = bin->reg_size;
3456 malloc_spin_lock(&arena->lock);
3457 if ((run = bin->runcur) != NULL && run->nfree > 0)
3458 ret = arena_bin_malloc_easy(arena, bin, run);
3460 ret = arena_bin_malloc_hard(arena, bin);
3463 malloc_spin_unlock(&arena->lock);
3468 # ifdef MALLOC_TCACHE
3469 if (__isthreaded == false) {
3471 bin->stats.nrequests++;
3472 arena->stats.nmalloc_small++;
3473 # ifdef MALLOC_TCACHE
3476 arena->stats.allocated_small += size;
3478 malloc_spin_unlock(&arena->lock);
3480 if (zero == false) {
3482 memset(ret, 0xa5, size);
3484 memset(ret, 0, size);
3486 memset(ret, 0, size);
3492 arena_malloc_medium(arena_t *arena, size_t size, bool zero)
3499 size = MEDIUM_CEILING(size);
3500 binind = mbin0 + ((size - medium_min) >> lg_mspace);
3501 assert(binind < nbins);
3502 bin = &arena->bins[binind];
3503 assert(bin->reg_size == size);
3505 malloc_spin_lock(&arena->lock);
3506 if ((run = bin->runcur) != NULL && run->nfree > 0)
3507 ret = arena_bin_malloc_easy(arena, bin, run);
3509 ret = arena_bin_malloc_hard(arena, bin);
3512 malloc_spin_unlock(&arena->lock);
3517 # ifdef MALLOC_TCACHE
3518 if (__isthreaded == false) {
3520 bin->stats.nrequests++;
3521 arena->stats.nmalloc_medium++;
3522 # ifdef MALLOC_TCACHE
3525 arena->stats.allocated_medium += size;
3527 malloc_spin_unlock(&arena->lock);
3529 if (zero == false) {
3531 memset(ret, 0xa5, size);
3533 memset(ret, 0, size);
3535 memset(ret, 0, size);
3541 arena_malloc_large(arena_t *arena, size_t size, bool zero)
3545 /* Large allocation. */
3546 size = PAGE_CEILING(size);
3547 malloc_spin_lock(&arena->lock);
3548 ret = (void *)arena_run_alloc(arena, size, true, zero);
3550 malloc_spin_unlock(&arena->lock);
3554 arena->stats.nmalloc_large++;
3555 arena->stats.allocated_large += size;
3556 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
3557 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
3558 if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
3559 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
3560 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
3561 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
3564 malloc_spin_unlock(&arena->lock);
3566 if (zero == false) {
3568 memset(ret, 0xa5, size);
3570 memset(ret, 0, size);
3576 static inline void *
3577 arena_malloc(size_t size, bool zero)
3581 assert(QUANTUM_CEILING(size) <= arena_maxclass);
3583 if (size <= bin_maxclass) {
3584 #ifdef MALLOC_TCACHE
3585 if (__isthreaded && tcache_nslots) {
3586 tcache_t *tcache = tcache_tls;
3587 if ((uintptr_t)tcache > (uintptr_t)1)
3588 return (tcache_alloc(tcache, size, zero));
3589 else if (tcache == NULL) {
3590 tcache = tcache_create(choose_arena());
3593 return (tcache_alloc(tcache, size, zero));
3597 if (size <= small_maxclass) {
3598 return (arena_malloc_small(choose_arena(), size,
3601 return (arena_malloc_medium(choose_arena(),
3605 return (arena_malloc_large(choose_arena(), size, zero));
3608 static inline void *
3609 imalloc(size_t size)
3614 if (size <= arena_maxclass)
3615 return (arena_malloc(size, false));
3617 return (huge_malloc(size, false));
3620 static inline void *
3621 icalloc(size_t size)
3624 if (size <= arena_maxclass)
3625 return (arena_malloc(size, true));
3627 return (huge_malloc(size, true));
3630 /* Only handles large allocations that require more than page alignment. */
3632 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
3636 arena_chunk_t *chunk;
3638 assert((size & PAGE_MASK) == 0);
3639 assert((alignment & PAGE_MASK) == 0);
3641 malloc_spin_lock(&arena->lock);
3642 ret = (void *)arena_run_alloc(arena, alloc_size, true, false);
3644 malloc_spin_unlock(&arena->lock);
3648 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
3650 offset = (uintptr_t)ret & (alignment - 1);
3651 assert((offset & PAGE_MASK) == 0);
3652 assert(offset < alloc_size);
3654 arena_run_trim_tail(arena, chunk, ret, alloc_size, size, false);
3656 size_t leadsize, trailsize;
3658 leadsize = alignment - offset;
3660 arena_run_trim_head(arena, chunk, ret, alloc_size,
3661 alloc_size - leadsize);
3662 ret = (void *)((uintptr_t)ret + leadsize);
3665 trailsize = alloc_size - leadsize - size;
3666 if (trailsize != 0) {
3667 /* Trim trailing space. */
3668 assert(trailsize < alloc_size);
3669 arena_run_trim_tail(arena, chunk, ret, size + trailsize,
3675 arena->stats.nmalloc_large++;
3676 arena->stats.allocated_large += size;
3677 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
3678 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
3679 if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
3680 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
3681 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
3682 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
3685 malloc_spin_unlock(&arena->lock);
3688 memset(ret, 0xa5, size);
3690 memset(ret, 0, size);
3694 static inline void *
3695 ipalloc(size_t alignment, size_t size)
3701 * Round size up to the nearest multiple of alignment.
3703 * This done, we can take advantage of the fact that for each small
3704 * size class, every object is aligned at the smallest power of two
3705 * that is non-zero in the base two representation of the size. For
3708 * Size | Base 2 | Minimum alignment
3709 * -----+----------+------------------
3711 * 144 | 10100000 | 32
3712 * 192 | 11000000 | 64
3714 * Depending on runtime settings, it is possible that arena_malloc()
3715 * will further round up to a power of two, but that never causes
3716 * correctness issues.
3718 ceil_size = (size + (alignment - 1)) & (-alignment);
3720 * (ceil_size < size) protects against the combination of maximal
3721 * alignment and size greater than maximal alignment.
3723 if (ceil_size < size) {
3724 /* size_t overflow. */
3728 if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE
3729 && ceil_size <= arena_maxclass))
3730 ret = arena_malloc(ceil_size, false);
3735 * We can't achieve subpage alignment, so round up alignment
3736 * permanently; it makes later calculations simpler.
3738 alignment = PAGE_CEILING(alignment);
3739 ceil_size = PAGE_CEILING(size);
3741 * (ceil_size < size) protects against very large sizes within
3742 * PAGE_SIZE of SIZE_T_MAX.
3744 * (ceil_size + alignment < ceil_size) protects against the
3745 * combination of maximal alignment and ceil_size large enough
3746 * to cause overflow. This is similar to the first overflow
3747 * check above, but it needs to be repeated due to the new
3748 * ceil_size value, which may now be *equal* to maximal
3749 * alignment, whereas before we only detected overflow if the
3750 * original size was *greater* than maximal alignment.
3752 if (ceil_size < size || ceil_size + alignment < ceil_size) {
3753 /* size_t overflow. */
3758 * Calculate the size of the over-size run that arena_palloc()
3759 * would need to allocate in order to guarantee the alignment.
3761 if (ceil_size >= alignment)
3762 run_size = ceil_size + alignment - PAGE_SIZE;
3765 * It is possible that (alignment << 1) will cause
3766 * overflow, but it doesn't matter because we also
3767 * subtract PAGE_SIZE, which in the case of overflow
3768 * leaves us with a very large run_size. That causes
3769 * the first conditional below to fail, which means
3770 * that the bogus run_size value never gets used for
3771 * anything important.
3773 run_size = (alignment << 1) - PAGE_SIZE;
3776 if (run_size <= arena_maxclass) {
3777 ret = arena_palloc(choose_arena(), alignment, ceil_size,
3779 } else if (alignment <= chunksize)
3780 ret = huge_malloc(ceil_size, false);
3782 ret = huge_palloc(alignment, ceil_size);
3785 assert(((uintptr_t)ret & (alignment - 1)) == 0);
3790 arena_is_large(const void *ptr)
3792 arena_chunk_t *chunk;
3793 size_t pageind, mapbits;
3795 assert(ptr != NULL);
3796 assert(CHUNK_ADDR2BASE(ptr) != ptr);
3798 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3799 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3800 mapbits = chunk->map[pageind].bits;
3801 assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
3802 return ((mapbits & CHUNK_MAP_LARGE) != 0);
3805 /* Return the size of the allocation pointed to by ptr. */
3807 arena_salloc(const void *ptr)
3810 arena_chunk_t *chunk;
3811 size_t pageind, mapbits;
3813 assert(ptr != NULL);
3814 assert(CHUNK_ADDR2BASE(ptr) != ptr);
3816 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3817 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3818 mapbits = chunk->map[pageind].bits;
3819 assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
3820 if ((mapbits & CHUNK_MAP_LARGE) == 0) {
3821 arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
3822 (uintptr_t)((pageind - ((mapbits & CHUNK_MAP_PG_MASK) >>
3823 CHUNK_MAP_PG_SHIFT)) << PAGE_SHIFT));
3824 assert(run->magic == ARENA_RUN_MAGIC);
3825 ret = run->bin->reg_size;
3827 ret = mapbits & ~PAGE_MASK;
3834 static inline size_t
3835 isalloc(const void *ptr)
3838 arena_chunk_t *chunk;
3840 assert(ptr != NULL);
3842 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3845 assert(chunk->arena->magic == ARENA_MAGIC);
3847 ret = arena_salloc(ptr);
3849 extent_node_t *node, key;
3851 /* Chunk (huge allocation). */
3853 malloc_mutex_lock(&huge_mtx);
3855 /* Extract from tree of huge allocations. */
3856 key.addr = __DECONST(void *, ptr);
3857 node = extent_tree_ad_search(&huge, &key);
3858 assert(node != NULL);
3862 malloc_mutex_unlock(&huge_mtx);
3869 arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3870 arena_chunk_map_t *mapelm)
3877 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3878 run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
3879 ((mapelm->bits & CHUNK_MAP_PG_MASK) >> CHUNK_MAP_PG_SHIFT)) <<
3881 assert(run->magic == ARENA_RUN_MAGIC);
3883 size = bin->reg_size;
3886 memset(ptr, 0x5a, size);
3888 arena_run_reg_dalloc(run, bin, ptr, size);
3891 if (run->nfree == bin->nregs)
3892 arena_dalloc_bin_run(arena, chunk, run, bin);
3893 else if (run->nfree == 1 && run != bin->runcur) {
3895 * Make sure that bin->runcur always refers to the lowest
3896 * non-full run, if one exists.
3898 if (bin->runcur == NULL)
3900 else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
3901 /* Switch runcur. */
3902 if (bin->runcur->nfree > 0) {
3903 arena_chunk_t *runcur_chunk =
3904 CHUNK_ADDR2BASE(bin->runcur);
3905 size_t runcur_pageind =
3906 (((uintptr_t)bin->runcur -
3907 (uintptr_t)runcur_chunk)) >> PAGE_SHIFT;
3908 arena_chunk_map_t *runcur_mapelm =
3909 &runcur_chunk->map[runcur_pageind];
3911 /* Insert runcur. */
3912 arena_run_tree_insert(&bin->runs,
3917 size_t run_pageind = (((uintptr_t)run -
3918 (uintptr_t)chunk)) >> PAGE_SHIFT;
3919 arena_chunk_map_t *run_mapelm =
3920 &chunk->map[run_pageind];
3922 assert(arena_run_tree_search(&bin->runs, run_mapelm) ==
3924 arena_run_tree_insert(&bin->runs, run_mapelm);
3929 if (size <= small_maxclass) {
3930 arena->stats.allocated_small -= size;
3931 arena->stats.ndalloc_small++;
3933 arena->stats.allocated_medium -= size;
3934 arena->stats.ndalloc_medium++;
3940 arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
3945 /* Deallocate run. */
3946 if (run == bin->runcur)
3948 else if (bin->nregs != 1) {
3949 size_t run_pageind = (((uintptr_t)run -
3950 (uintptr_t)chunk)) >> PAGE_SHIFT;
3951 arena_chunk_map_t *run_mapelm =
3952 &chunk->map[run_pageind];
3954 * This block's conditional is necessary because if the
3955 * run only contains one region, then it never gets
3956 * inserted into the non-full runs tree.
3958 arena_run_tree_remove(&bin->runs, run_mapelm);
3961 * Mark the first page as dirty. The dirty bit for every other page in
3962 * the run is already properly set, which means we can call
3963 * arena_run_dalloc(..., false), thus potentially avoiding the needless
3964 * creation of many dirty pages.
3966 run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT);
3967 assert((chunk->map[run_ind].bits & CHUNK_MAP_DIRTY) == 0);
3968 chunk->map[run_ind].bits |= CHUNK_MAP_DIRTY;
3975 arena_run_dalloc(arena, run, false);
3977 bin->stats.curruns--;
3980 if (chunk->dirtied == false) {
3981 arena_chunk_tree_dirty_insert(&arena->chunks_dirty, chunk);
3982 chunk->dirtied = true;
3984 /* Enforce opt_lg_dirty_mult. */
3985 if (opt_lg_dirty_mult >= 0 && (arena->nactive >> opt_lg_dirty_mult) <
3992 arena_stats_print(arena_t *arena)
3995 malloc_printf("dirty pages: %zu:%zu active:dirty, %"PRIu64" sweep%s,"
3996 " %"PRIu64" madvise%s, %"PRIu64" purged\n",
3997 arena->nactive, arena->ndirty,
3998 arena->stats.npurge, arena->stats.npurge == 1 ? "" : "s",
3999 arena->stats.nmadvise, arena->stats.nmadvise == 1 ? "" : "s",
4000 arena->stats.purged);
4002 malloc_printf(" allocated nmalloc ndalloc\n");
4003 malloc_printf("small: %12zu %12"PRIu64" %12"PRIu64"\n",
4004 arena->stats.allocated_small, arena->stats.nmalloc_small,
4005 arena->stats.ndalloc_small);
4006 malloc_printf("medium: %12zu %12"PRIu64" %12"PRIu64"\n",
4007 arena->stats.allocated_medium, arena->stats.nmalloc_medium,
4008 arena->stats.ndalloc_medium);
4009 malloc_printf("large: %12zu %12"PRIu64" %12"PRIu64"\n",
4010 arena->stats.allocated_large, arena->stats.nmalloc_large,
4011 arena->stats.ndalloc_large);
4012 malloc_printf("total: %12zu %12"PRIu64" %12"PRIu64"\n",
4013 arena->stats.allocated_small + arena->stats.allocated_medium +
4014 arena->stats.allocated_large, arena->stats.nmalloc_small +
4015 arena->stats.nmalloc_medium + arena->stats.nmalloc_large,
4016 arena->stats.ndalloc_small + arena->stats.ndalloc_medium +
4017 arena->stats.ndalloc_large);
4018 malloc_printf("mapped: %12zu\n", arena->stats.mapped);
4020 if (arena->stats.nmalloc_small + arena->stats.nmalloc_medium > 0) {
4021 unsigned i, gap_start;
4022 #ifdef MALLOC_TCACHE
4023 malloc_printf("bins: bin size regs pgs requests "
4024 "nfills nflushes newruns reruns maxruns curruns\n");
4026 malloc_printf("bins: bin size regs pgs requests "
4027 "newruns reruns maxruns curruns\n");
4029 for (i = 0, gap_start = UINT_MAX; i < nbins; i++) {
4030 if (arena->bins[i].stats.nruns == 0) {
4031 if (gap_start == UINT_MAX)
4034 if (gap_start != UINT_MAX) {
4035 if (i > gap_start + 1) {
4037 * Gap of more than one size
4040 malloc_printf("[%u..%u]\n",
4043 /* Gap of one size class. */
4044 malloc_printf("[%u]\n",
4047 gap_start = UINT_MAX;
4050 "%13u %1s %5u %4u %3u %9"PRIu64" %9"PRIu64
4051 #ifdef MALLOC_TCACHE
4052 " %9"PRIu64" %9"PRIu64
4054 " %9"PRIu64" %7zu %7zu\n",
4056 i < ntbins ? "T" : i < ntbins + nqbins ?
4057 "Q" : i < ntbins + nqbins + ncbins ? "C" :
4058 i < ntbins + nqbins + ncbins + nsbins ? "S"
4060 arena->bins[i].reg_size,
4061 arena->bins[i].nregs,
4062 arena->bins[i].run_size >> PAGE_SHIFT,
4063 arena->bins[i].stats.nrequests,
4064 #ifdef MALLOC_TCACHE
4065 arena->bins[i].stats.nfills,
4066 arena->bins[i].stats.nflushes,
4068 arena->bins[i].stats.nruns,
4069 arena->bins[i].stats.reruns,
4070 arena->bins[i].stats.highruns,
4071 arena->bins[i].stats.curruns);
4074 if (gap_start != UINT_MAX) {
4075 if (i > gap_start + 1) {
4076 /* Gap of more than one size class. */
4077 malloc_printf("[%u..%u]\n", gap_start, i - 1);
4079 /* Gap of one size class. */
4080 malloc_printf("[%u]\n", gap_start);
4085 if (arena->stats.nmalloc_large > 0) {
4088 size_t nlclasses = (chunksize - PAGE_SIZE) >> PAGE_SHIFT;
4091 "large: size pages nrequests maxruns curruns\n");
4093 for (i = 0, gap_start = -1; i < nlclasses; i++) {
4094 if (arena->stats.lstats[i].nrequests == 0) {
4095 if (gap_start == -1)
4098 if (gap_start != -1) {
4099 malloc_printf("[%zu]\n", i - gap_start);
4103 "%13zu %5zu %9"PRIu64" %9zu %9zu\n",
4104 (i+1) << PAGE_SHIFT, i+1,
4105 arena->stats.lstats[i].nrequests,
4106 arena->stats.lstats[i].highruns,
4107 arena->stats.lstats[i].curruns);
4110 if (gap_start != -1)
4111 malloc_printf("[%zu]\n", i - gap_start);
4117 stats_print_atexit(void)
4120 #if (defined(MALLOC_TCACHE) && defined(MALLOC_STATS))
4124 * Merge stats from extant threads. This is racy, since individual
4125 * threads do not lock when recording tcache stats events. As a
4126 * consequence, the final stats may be slightly out of date by the time
4127 * they are reported, if other threads continue to allocate.
4129 for (i = 0; i < narenas; i++) {
4130 arena_t *arena = arenas[i];
4131 if (arena != NULL) {
4134 malloc_spin_lock(&arena->lock);
4135 ql_foreach(tcache, &arena->tcache_ql, link) {
4136 tcache_stats_merge(tcache, arena);
4138 malloc_spin_unlock(&arena->lock);
4142 malloc_stats_print();
4145 #ifdef MALLOC_TCACHE
4147 tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem)
4149 arena_chunk_t *chunk;
4152 unsigned i, ndeferred, ncached;
4154 for (ndeferred = tbin->ncached - rem; ndeferred > 0;) {
4155 ncached = ndeferred;
4156 /* Lock the arena associated with the first object. */
4157 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(tbin->slots[0]);
4158 arena = chunk->arena;
4159 malloc_spin_lock(&arena->lock);
4160 /* Deallocate every object that belongs to the locked arena. */
4161 for (i = ndeferred = 0; i < ncached; i++) {
4162 ptr = tbin->slots[i];
4163 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
4164 if (chunk->arena == arena) {
4165 size_t pageind = (((uintptr_t)ptr -
4166 (uintptr_t)chunk) >> PAGE_SHIFT);
4167 arena_chunk_map_t *mapelm =
4168 &chunk->map[pageind];
4169 arena_dalloc_bin(arena, chunk, ptr, mapelm);
4172 * This object was allocated via a different
4173 * arena than the one that is currently locked.
4174 * Stash the object, so that it can be handled
4177 tbin->slots[ndeferred] = ptr;
4182 arena->bins[binind].stats.nflushes++;
4184 arena_bin_t *bin = &arena->bins[binind];
4185 bin->stats.nrequests += tbin->tstats.nrequests;
4186 if (bin->reg_size <= small_maxclass) {
4187 arena->stats.nmalloc_small +=
4188 tbin->tstats.nrequests;
4190 arena->stats.nmalloc_medium +=
4191 tbin->tstats.nrequests;
4193 tbin->tstats.nrequests = 0;
4196 malloc_spin_unlock(&arena->lock);
4201 * Shift the remaining valid pointers to the base of the slots
4204 memmove(&tbin->slots[0], &tbin->slots[tbin->ncached - rem],
4205 rem * sizeof(void *));
4207 tbin->ncached = rem;
4211 tcache_dalloc(tcache_t *tcache, void *ptr)
4214 arena_chunk_t *chunk;
4218 size_t pageind, binind;
4219 arena_chunk_map_t *mapelm;
4221 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
4222 arena = chunk->arena;
4223 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
4224 mapelm = &chunk->map[pageind];
4225 run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
4226 ((mapelm->bits & CHUNK_MAP_PG_MASK) >> CHUNK_MAP_PG_SHIFT)) <<
4228 assert(run->magic == ARENA_RUN_MAGIC);
4230 binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) /
4231 sizeof(arena_bin_t);
4232 assert(binind < nbins);
4235 memset(ptr, 0x5a, arena->bins[binind].reg_size);
4237 tbin = tcache->tbins[binind];
4239 tbin = tcache_bin_create(choose_arena());
4241 malloc_spin_lock(&arena->lock);
4242 arena_dalloc_bin(arena, chunk, ptr, mapelm);
4243 malloc_spin_unlock(&arena->lock);
4246 tcache->tbins[binind] = tbin;
4249 if (tbin->ncached == tcache_nslots)
4250 tcache_bin_flush(tbin, binind, (tcache_nslots >> 1));
4251 assert(tbin->ncached < tcache_nslots);
4252 tbin->slots[tbin->ncached] = ptr;
4254 if (tbin->ncached > tbin->high_water)
4255 tbin->high_water = tbin->ncached;
4257 tcache_event(tcache);
4262 arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
4265 /* Large allocation. */
4266 malloc_spin_lock(&arena->lock);
4268 #ifndef MALLOC_STATS
4272 size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
4274 size_t size = chunk->map[pageind].bits & ~PAGE_MASK;
4279 memset(ptr, 0x5a, size);
4281 arena->stats.ndalloc_large++;
4282 arena->stats.allocated_large -= size;
4283 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns--;
4287 arena_run_dalloc(arena, (arena_run_t *)ptr, true);
4288 malloc_spin_unlock(&arena->lock);
4292 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
4295 arena_chunk_map_t *mapelm;
4297 assert(arena != NULL);
4298 assert(arena->magic == ARENA_MAGIC);
4299 assert(chunk->arena == arena);
4300 assert(ptr != NULL);
4301 assert(CHUNK_ADDR2BASE(ptr) != ptr);
4303 pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
4304 mapelm = &chunk->map[pageind];
4305 assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0);
4306 if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
4307 /* Small allocation. */
4308 #ifdef MALLOC_TCACHE
4309 if (__isthreaded && tcache_nslots) {
4310 tcache_t *tcache = tcache_tls;
4311 if ((uintptr_t)tcache > (uintptr_t)1)
4312 tcache_dalloc(tcache, ptr);
4314 arena_dalloc_hard(arena, chunk, ptr, mapelm,
4319 malloc_spin_lock(&arena->lock);
4320 arena_dalloc_bin(arena, chunk, ptr, mapelm);
4321 malloc_spin_unlock(&arena->lock);
4322 #ifdef MALLOC_TCACHE
4326 arena_dalloc_large(arena, chunk, ptr);
4329 #ifdef MALLOC_TCACHE
4331 arena_dalloc_hard(arena_t *arena, arena_chunk_t *chunk, void *ptr,
4332 arena_chunk_map_t *mapelm, tcache_t *tcache)
4335 if (tcache == NULL) {
4336 tcache = tcache_create(arena);
4337 if (tcache == NULL) {
4338 malloc_spin_lock(&arena->lock);
4339 arena_dalloc_bin(arena, chunk, ptr, mapelm);
4340 malloc_spin_unlock(&arena->lock);
4342 tcache_dalloc(tcache, ptr);
4344 /* This thread is currently exiting, so directly deallocate. */
4345 assert(tcache == (void *)(uintptr_t)1);
4346 malloc_spin_lock(&arena->lock);
4347 arena_dalloc_bin(arena, chunk, ptr, mapelm);
4348 malloc_spin_unlock(&arena->lock);
4356 arena_chunk_t *chunk;
4358 assert(ptr != NULL);
4360 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
4362 arena_dalloc(chunk->arena, chunk, ptr);
4368 arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
4369 size_t size, size_t oldsize)
4372 assert(size < oldsize);
4375 * Shrink the run, and make trailing pages available for other
4378 malloc_spin_lock(&arena->lock);
4379 arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size,
4382 arena->stats.ndalloc_large++;
4383 arena->stats.allocated_large -= oldsize;
4384 arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--;
4386 arena->stats.nmalloc_large++;
4387 arena->stats.allocated_large += size;
4388 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
4389 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
4390 if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
4391 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
4392 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
4393 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
4396 malloc_spin_unlock(&arena->lock);
4400 arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
4401 size_t size, size_t oldsize)
4403 size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
4404 size_t npages = oldsize >> PAGE_SHIFT;
4406 assert(oldsize == (chunk->map[pageind].bits & ~PAGE_MASK));
4408 /* Try to extend the run. */
4409 assert(size > oldsize);
4410 malloc_spin_lock(&arena->lock);
4411 if (pageind + npages < chunk_npages && (chunk->map[pageind+npages].bits
4412 & CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[pageind+npages].bits &
4413 ~PAGE_MASK) >= size - oldsize) {
4415 * The next run is available and sufficiently large. Split the
4416 * following run, then merge the first part with the existing
4419 arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk +
4420 ((pageind+npages) << PAGE_SHIFT)), size - oldsize, true,
4423 chunk->map[pageind].bits = size | CHUNK_MAP_LARGE |
4424 CHUNK_MAP_ALLOCATED;
4425 chunk->map[pageind+npages].bits = CHUNK_MAP_LARGE |
4426 CHUNK_MAP_ALLOCATED;
4429 arena->stats.ndalloc_large++;
4430 arena->stats.allocated_large -= oldsize;
4431 arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--;
4433 arena->stats.nmalloc_large++;
4434 arena->stats.allocated_large += size;
4435 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
4436 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
4437 if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
4438 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
4439 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
4440 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
4443 malloc_spin_unlock(&arena->lock);
4446 malloc_spin_unlock(&arena->lock);
4452 * Try to resize a large allocation, in order to avoid copying. This will
4453 * always fail if growing an object, and the following run is already in use.
4456 arena_ralloc_large(void *ptr, size_t size, size_t oldsize)
4460 psize = PAGE_CEILING(size);
4461 if (psize == oldsize) {
4462 /* Same size class. */
4463 if (opt_junk && size < oldsize) {
4464 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize -
4469 arena_chunk_t *chunk;
4472 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
4473 arena = chunk->arena;
4474 assert(arena->magic == ARENA_MAGIC);
4476 if (psize < oldsize) {
4477 /* Fill before shrinking in order avoid a race. */
4479 memset((void *)((uintptr_t)ptr + size), 0x5a,
4482 arena_ralloc_large_shrink(arena, chunk, ptr, psize,
4486 bool ret = arena_ralloc_large_grow(arena, chunk, ptr,
4488 if (ret == false && opt_zero) {
4489 memset((void *)((uintptr_t)ptr + oldsize), 0,
4498 arena_ralloc(void *ptr, size_t size, size_t oldsize)
4504 * Try to avoid moving the allocation.
4506 * posix_memalign() can cause allocation of "large" objects that are
4507 * smaller than bin_maxclass (in order to meet alignment requirements).
4508 * Therefore, do not assume that (oldsize <= bin_maxclass) indicates
4509 * ptr refers to a bin-allocated object.
4511 if (oldsize <= arena_maxclass) {
4512 if (arena_is_large(ptr) == false ) {
4513 if (size <= small_maxclass) {
4514 if (oldsize <= small_maxclass &&
4515 small_size2bin[size] ==
4516 small_size2bin[oldsize])
4518 } else if (size <= bin_maxclass) {
4519 if (small_maxclass < oldsize && oldsize <=
4520 bin_maxclass && MEDIUM_CEILING(size) ==
4521 MEDIUM_CEILING(oldsize))
4525 assert(size <= arena_maxclass);
4526 if (size > bin_maxclass) {
4527 if (arena_ralloc_large(ptr, size, oldsize) ==
4534 /* Try to avoid moving the allocation. */
4535 if (size <= small_maxclass) {
4536 if (oldsize <= small_maxclass && small_size2bin[size] ==
4537 small_size2bin[oldsize])
4539 } else if (size <= bin_maxclass) {
4540 if (small_maxclass < oldsize && oldsize <= bin_maxclass &&
4541 MEDIUM_CEILING(size) == MEDIUM_CEILING(oldsize))
4544 if (bin_maxclass < oldsize && oldsize <= arena_maxclass) {
4545 assert(size > bin_maxclass);
4546 if (arena_ralloc_large(ptr, size, oldsize) == false)
4552 * If we get here, then size and oldsize are different enough that we
4553 * need to move the object. In that case, fall back to allocating new
4554 * space and copying.
4556 ret = arena_malloc(size, false);
4560 /* Junk/zero-filling were already done by arena_malloc(). */
4561 copysize = (size < oldsize) ? size : oldsize;
4562 memcpy(ret, ptr, copysize);
4566 if (opt_junk && size < oldsize)
4567 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
4568 else if (opt_zero && size > oldsize)
4569 memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
4573 static inline void *
4574 iralloc(void *ptr, size_t size)
4578 assert(ptr != NULL);
4581 oldsize = isalloc(ptr);
4583 if (size <= arena_maxclass)
4584 return (arena_ralloc(ptr, size, oldsize));
4586 return (huge_ralloc(ptr, size, oldsize));
4590 arena_new(arena_t *arena, unsigned ind)
4594 size_t prev_run_size;
4596 if (malloc_spin_init(&arena->lock))
4600 memset(&arena->stats, 0, sizeof(arena_stats_t));
4601 arena->stats.lstats = (malloc_large_stats_t *)base_alloc(
4602 sizeof(malloc_large_stats_t) * ((chunksize - PAGE_SIZE) >>
4604 if (arena->stats.lstats == NULL)
4606 memset(arena->stats.lstats, 0, sizeof(malloc_large_stats_t) *
4607 ((chunksize - PAGE_SIZE) >> PAGE_SHIFT));
4608 # ifdef MALLOC_TCACHE
4609 ql_new(&arena->tcache_ql);
4613 /* Initialize chunks. */
4614 arena_chunk_tree_dirty_new(&arena->chunks_dirty);
4615 arena->spare = NULL;
4620 arena_avail_tree_new(&arena->runs_avail);
4622 /* Initialize bins. */
4623 prev_run_size = PAGE_SIZE;
4627 /* (2^n)-spaced tiny bins. */
4628 for (; i < ntbins; i++) {
4629 bin = &arena->bins[i];
4631 arena_run_tree_new(&bin->runs);
4633 bin->reg_size = (1U << (LG_TINY_MIN + i));
4635 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4638 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4643 /* Quantum-spaced bins. */
4644 for (; i < ntbins + nqbins; i++) {
4645 bin = &arena->bins[i];
4647 arena_run_tree_new(&bin->runs);
4649 bin->reg_size = (i - ntbins + 1) << LG_QUANTUM;
4651 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4654 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4658 /* Cacheline-spaced bins. */
4659 for (; i < ntbins + nqbins + ncbins; i++) {
4660 bin = &arena->bins[i];
4662 arena_run_tree_new(&bin->runs);
4664 bin->reg_size = cspace_min + ((i - (ntbins + nqbins)) <<
4667 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4670 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4674 /* Subpage-spaced bins. */
4675 for (; i < ntbins + nqbins + ncbins + nsbins; i++) {
4676 bin = &arena->bins[i];
4678 arena_run_tree_new(&bin->runs);
4680 bin->reg_size = sspace_min + ((i - (ntbins + nqbins + ncbins))
4683 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4686 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4691 for (; i < nbins; i++) {
4692 bin = &arena->bins[i];
4694 arena_run_tree_new(&bin->runs);
4696 bin->reg_size = medium_min + ((i - (ntbins + nqbins + ncbins +
4697 nsbins)) << lg_mspace);
4699 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4702 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4707 arena->magic = ARENA_MAGIC;
4713 /* Create a new arena and insert it into the arenas array at index ind. */
4715 arenas_extend(unsigned ind)
4719 /* Allocate enough space for trailing bins. */
4720 ret = (arena_t *)base_alloc(sizeof(arena_t)
4721 + (sizeof(arena_bin_t) * (nbins - 1)));
4722 if (ret != NULL && arena_new(ret, ind) == false) {
4726 /* Only reached if there is an OOM error. */
4729 * OOM here is quite inconvenient to propagate, since dealing with it
4730 * would require a check for failure in the fast path. Instead, punt
4731 * by using arenas[0]. In practice, this is an extremely unlikely
4734 _malloc_message(_getprogname(),
4735 ": (malloc) Error initializing arena\n", "", "");
4742 #ifdef MALLOC_TCACHE
4743 static tcache_bin_t *
4744 tcache_bin_create(arena_t *arena)
4749 tsize = sizeof(tcache_bin_t) + (sizeof(void *) * (tcache_nslots - 1));
4750 if (tsize <= small_maxclass)
4751 ret = (tcache_bin_t *)arena_malloc_small(arena, tsize, false);
4752 else if (tsize <= bin_maxclass)
4753 ret = (tcache_bin_t *)arena_malloc_medium(arena, tsize, false);
4755 ret = (tcache_bin_t *)imalloc(tsize);
4759 memset(&ret->tstats, 0, sizeof(tcache_bin_stats_t));
4762 ret->high_water = 0;
4769 tcache_bin_destroy(tcache_t *tcache, tcache_bin_t *tbin, unsigned binind)
4772 arena_chunk_t *chunk;
4773 size_t pageind, tsize;
4774 arena_chunk_map_t *mapelm;
4776 chunk = CHUNK_ADDR2BASE(tbin);
4777 arena = chunk->arena;
4778 pageind = (((uintptr_t)tbin - (uintptr_t)chunk) >> PAGE_SHIFT);
4779 mapelm = &chunk->map[pageind];
4782 if (tbin->tstats.nrequests != 0) {
4783 arena_t *arena = tcache->arena;
4784 arena_bin_t *bin = &arena->bins[binind];
4785 malloc_spin_lock(&arena->lock);
4786 bin->stats.nrequests += tbin->tstats.nrequests;
4787 if (bin->reg_size <= small_maxclass)
4788 arena->stats.nmalloc_small += tbin->tstats.nrequests;
4790 arena->stats.nmalloc_medium += tbin->tstats.nrequests;
4791 malloc_spin_unlock(&arena->lock);
4795 assert(tbin->ncached == 0);
4796 tsize = sizeof(tcache_bin_t) + (sizeof(void *) * (tcache_nslots - 1));
4797 if (tsize <= bin_maxclass) {
4798 malloc_spin_lock(&arena->lock);
4799 arena_dalloc_bin(arena, chunk, tbin, mapelm);
4800 malloc_spin_unlock(&arena->lock);
4807 tcache_stats_merge(tcache_t *tcache, arena_t *arena)
4811 /* Merge and reset tcache stats. */
4812 for (i = 0; i < mbin0; i++) {
4813 arena_bin_t *bin = &arena->bins[i];
4814 tcache_bin_t *tbin = tcache->tbins[i];
4816 bin->stats.nrequests += tbin->tstats.nrequests;
4817 arena->stats.nmalloc_small += tbin->tstats.nrequests;
4818 tbin->tstats.nrequests = 0;
4821 for (; i < nbins; i++) {
4822 arena_bin_t *bin = &arena->bins[i];
4823 tcache_bin_t *tbin = tcache->tbins[i];
4825 bin->stats.nrequests += tbin->tstats.nrequests;
4826 arena->stats.nmalloc_medium += tbin->tstats.nrequests;
4827 tbin->tstats.nrequests = 0;
4834 tcache_create(arena_t *arena)
4838 if (sizeof(tcache_t) + (sizeof(tcache_bin_t *) * (nbins - 1)) <=
4840 tcache = (tcache_t *)arena_malloc_small(arena, sizeof(tcache_t)
4841 + (sizeof(tcache_bin_t *) * (nbins - 1)), true);
4842 } else if (sizeof(tcache_t) + (sizeof(tcache_bin_t *) * (nbins - 1)) <=
4844 tcache = (tcache_t *)arena_malloc_medium(arena, sizeof(tcache_t)
4845 + (sizeof(tcache_bin_t *) * (nbins - 1)), true);
4847 tcache = (tcache_t *)icalloc(sizeof(tcache_t) +
4848 (sizeof(tcache_bin_t *) * (nbins - 1)));
4855 /* Link into list of extant tcaches. */
4856 malloc_spin_lock(&arena->lock);
4857 ql_elm_new(tcache, link);
4858 ql_tail_insert(&arena->tcache_ql, tcache, link);
4859 malloc_spin_unlock(&arena->lock);
4862 tcache->arena = arena;
4864 tcache_tls = tcache;
4870 tcache_destroy(tcache_t *tcache)
4875 /* Unlink from list of extant tcaches. */
4876 malloc_spin_lock(&tcache->arena->lock);
4877 ql_remove(&tcache->arena->tcache_ql, tcache, link);
4878 tcache_stats_merge(tcache, tcache->arena);
4879 malloc_spin_unlock(&tcache->arena->lock);
4882 for (i = 0; i < nbins; i++) {
4883 tcache_bin_t *tbin = tcache->tbins[i];
4885 tcache_bin_flush(tbin, i, 0);
4886 tcache_bin_destroy(tcache, tbin, i);
4890 if (arena_salloc(tcache) <= bin_maxclass) {
4891 arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
4892 arena_t *arena = chunk->arena;
4893 size_t pageind = (((uintptr_t)tcache - (uintptr_t)chunk) >>
4895 arena_chunk_map_t *mapelm = &chunk->map[pageind];
4897 malloc_spin_lock(&arena->lock);
4898 arena_dalloc_bin(arena, chunk, tcache, mapelm);
4899 malloc_spin_unlock(&arena->lock);
4908 /******************************************************************************/
4910 * Begin general internal functions.
4914 huge_malloc(size_t size, bool zero)
4918 extent_node_t *node;
4920 /* Allocate one or more contiguous chunks for this request. */
4922 csize = CHUNK_CEILING(size);
4924 /* size is large enough to cause size_t wrap-around. */
4928 /* Allocate an extent node with which to track the chunk. */
4929 node = base_node_alloc();
4933 ret = chunk_alloc(csize, &zero);
4935 base_node_dealloc(node);
4939 /* Insert node into huge. */
4943 malloc_mutex_lock(&huge_mtx);
4944 extent_tree_ad_insert(&huge, node);
4947 huge_allocated += csize;
4949 malloc_mutex_unlock(&huge_mtx);
4951 if (zero == false) {
4953 memset(ret, 0xa5, csize);
4955 memset(ret, 0, csize);
4961 /* Only handles large allocations that require more than chunk alignment. */
4963 huge_palloc(size_t alignment, size_t size)
4966 size_t alloc_size, chunk_size, offset;
4967 extent_node_t *node;
4971 * This allocation requires alignment that is even larger than chunk
4972 * alignment. This means that huge_malloc() isn't good enough.
4974 * Allocate almost twice as many chunks as are demanded by the size or
4975 * alignment, in order to assure the alignment can be achieved, then
4976 * unmap leading and trailing chunks.
4978 assert(alignment >= chunksize);
4980 chunk_size = CHUNK_CEILING(size);
4982 if (size >= alignment)
4983 alloc_size = chunk_size + alignment - chunksize;
4985 alloc_size = (alignment << 1) - chunksize;
4987 /* Allocate an extent node with which to track the chunk. */
4988 node = base_node_alloc();
4993 ret = chunk_alloc(alloc_size, &zero);
4995 base_node_dealloc(node);
4999 offset = (uintptr_t)ret & (alignment - 1);
5000 assert((offset & chunksize_mask) == 0);
5001 assert(offset < alloc_size);
5003 /* Trim trailing space. */
5004 chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
5009 /* Trim leading space. */
5010 chunk_dealloc(ret, alignment - offset);
5012 ret = (void *)((uintptr_t)ret + (alignment - offset));
5014 trailsize = alloc_size - (alignment - offset) - chunk_size;
5015 if (trailsize != 0) {
5016 /* Trim trailing space. */
5017 assert(trailsize < alloc_size);
5018 chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
5023 /* Insert node into huge. */
5025 node->size = chunk_size;
5027 malloc_mutex_lock(&huge_mtx);
5028 extent_tree_ad_insert(&huge, node);
5031 huge_allocated += chunk_size;
5033 malloc_mutex_unlock(&huge_mtx);
5036 memset(ret, 0xa5, chunk_size);
5038 memset(ret, 0, chunk_size);
5044 huge_ralloc(void *ptr, size_t size, size_t oldsize)
5049 /* Avoid moving the allocation if the size class would not change. */
5050 if (oldsize > arena_maxclass &&
5051 CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
5052 if (opt_junk && size < oldsize) {
5053 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
5055 } else if (opt_zero && size > oldsize) {
5056 memset((void *)((uintptr_t)ptr + oldsize), 0, size
5063 * If we get here, then size and oldsize are different enough that we
5064 * need to use a different size class. In that case, fall back to
5065 * allocating new space and copying.
5067 ret = huge_malloc(size, false);
5071 copysize = (size < oldsize) ? size : oldsize;
5072 memcpy(ret, ptr, copysize);
5078 huge_dalloc(void *ptr)
5080 extent_node_t *node, key;
5082 malloc_mutex_lock(&huge_mtx);
5084 /* Extract from tree of huge allocations. */
5086 node = extent_tree_ad_search(&huge, &key);
5087 assert(node != NULL);
5088 assert(node->addr == ptr);
5089 extent_tree_ad_remove(&huge, node);
5093 huge_allocated -= node->size;
5096 malloc_mutex_unlock(&huge_mtx);
5100 if (opt_dss && opt_junk)
5101 memset(node->addr, 0x5a, node->size);
5103 chunk_dealloc(node->addr, node->size);
5105 base_node_dealloc(node);
5109 malloc_stats_print(void)
5111 char s[UMAX2S_BUFSIZE];
5113 _malloc_message("___ Begin malloc statistics ___\n", "", "", "");
5114 _malloc_message("Assertions ",
5121 _malloc_message("Boolean MALLOC_OPTIONS: ", opt_abort ? "A" : "a", "", "");
5123 _malloc_message(opt_dss ? "D" : "d", "", "", "");
5125 _malloc_message(opt_junk ? "J" : "j", "", "", "");
5127 _malloc_message(opt_mmap ? "M" : "m", "", "", "");
5129 _malloc_message("P", "", "", "");
5130 _malloc_message(opt_utrace ? "U" : "u", "", "", "");
5131 _malloc_message(opt_sysv ? "V" : "v", "", "", "");
5132 _malloc_message(opt_xmalloc ? "X" : "x", "", "", "");
5133 _malloc_message(opt_zero ? "Z" : "z", "", "", "");
5134 _malloc_message("\n", "", "", "");
5136 _malloc_message("CPUs: ", umax2s(ncpus, 10, s), "\n", "");
5137 _malloc_message("Max arenas: ", umax2s(narenas, 10, s), "\n", "");
5138 _malloc_message("Pointer size: ", umax2s(sizeof(void *), 10, s), "\n", "");
5139 _malloc_message("Quantum size: ", umax2s(QUANTUM, 10, s), "\n", "");
5140 _malloc_message("Cacheline size (assumed): ",
5141 umax2s(CACHELINE, 10, s), "\n", "");
5142 _malloc_message("Subpage spacing: ", umax2s(SUBPAGE, 10, s), "\n", "");
5143 _malloc_message("Medium spacing: ", umax2s((1U << lg_mspace), 10, s), "\n",
5146 _malloc_message("Tiny 2^n-spaced sizes: [", umax2s((1U << LG_TINY_MIN), 10,
5148 _malloc_message(umax2s((qspace_min >> 1), 10, s), "]\n", "", "");
5150 _malloc_message("Quantum-spaced sizes: [", umax2s(qspace_min, 10, s), "..",
5152 _malloc_message(umax2s(qspace_max, 10, s), "]\n", "", "");
5153 _malloc_message("Cacheline-spaced sizes: [",
5154 umax2s(cspace_min, 10, s), "..", "");
5155 _malloc_message(umax2s(cspace_max, 10, s), "]\n", "", "");
5156 _malloc_message("Subpage-spaced sizes: [", umax2s(sspace_min, 10, s), "..",
5158 _malloc_message(umax2s(sspace_max, 10, s), "]\n", "", "");
5159 _malloc_message("Medium sizes: [", umax2s(medium_min, 10, s), "..", "");
5160 _malloc_message(umax2s(medium_max, 10, s), "]\n", "", "");
5161 if (opt_lg_dirty_mult >= 0) {
5162 _malloc_message("Min active:dirty page ratio per arena: ",
5163 umax2s((1U << opt_lg_dirty_mult), 10, s), ":1\n", "");
5165 _malloc_message("Min active:dirty page ratio per arena: N/A\n", "",
5168 #ifdef MALLOC_TCACHE
5169 _malloc_message("Thread cache slots per size class: ",
5170 tcache_nslots ? umax2s(tcache_nslots, 10, s) : "N/A", "\n", "");
5171 _malloc_message("Thread cache GC sweep interval: ",
5172 (tcache_nslots && tcache_gc_incr > 0) ?
5173 umax2s((1U << opt_lg_tcache_gc_sweep), 10, s) : "N/A", "", "");
5174 _malloc_message(" (increment interval: ",
5175 (tcache_nslots && tcache_gc_incr > 0) ? umax2s(tcache_gc_incr, 10, s)
5176 : "N/A", ")\n", "");
5178 _malloc_message("Chunk size: ", umax2s(chunksize, 10, s), "", "");
5179 _malloc_message(" (2^", umax2s(opt_lg_chunk, 10, s), ")\n", "");
5183 size_t allocated, mapped;
5187 /* Calculate and print allocated/mapped stats. */
5190 for (i = 0, allocated = 0; i < narenas; i++) {
5191 if (arenas[i] != NULL) {
5192 malloc_spin_lock(&arenas[i]->lock);
5193 allocated += arenas[i]->stats.allocated_small;
5194 allocated += arenas[i]->stats.allocated_large;
5195 malloc_spin_unlock(&arenas[i]->lock);
5200 malloc_mutex_lock(&huge_mtx);
5201 allocated += huge_allocated;
5202 mapped = stats_chunks.curchunks * chunksize;
5203 malloc_mutex_unlock(&huge_mtx);
5205 malloc_mutex_lock(&base_mtx);
5206 mapped += base_mapped;
5207 malloc_mutex_unlock(&base_mtx);
5209 malloc_printf("Allocated: %zu, mapped: %zu\n", allocated,
5212 /* Print chunk stats. */
5214 chunk_stats_t chunks_stats;
5216 malloc_mutex_lock(&huge_mtx);
5217 chunks_stats = stats_chunks;
5218 malloc_mutex_unlock(&huge_mtx);
5220 malloc_printf("chunks: nchunks "
5221 "highchunks curchunks\n");
5222 malloc_printf(" %13"PRIu64"%13zu%13zu\n",
5223 chunks_stats.nchunks, chunks_stats.highchunks,
5224 chunks_stats.curchunks);
5227 /* Print chunk stats. */
5229 "huge: nmalloc ndalloc allocated\n");
5230 malloc_printf(" %12"PRIu64" %12"PRIu64" %12zu\n", huge_nmalloc,
5231 huge_ndalloc, huge_allocated);
5233 /* Print stats for each arena. */
5234 for (i = 0; i < narenas; i++) {
5236 if (arena != NULL) {
5237 malloc_printf("\narenas[%u]:\n", i);
5238 malloc_spin_lock(&arena->lock);
5239 arena_stats_print(arena);
5240 malloc_spin_unlock(&arena->lock);
5244 #endif /* #ifdef MALLOC_STATS */
5245 _malloc_message("--- End malloc statistics ---\n", "", "", "");
5250 small_size2bin_validate(void)
5252 size_t i, size, binind;
5254 assert(small_size2bin[0] == 0xffU);
5258 for (; i < (1U << LG_TINY_MIN); i++) {
5259 size = pow2_ceil(1U << LG_TINY_MIN);
5260 binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
5261 assert(small_size2bin[i] == binind);
5263 for (; i < qspace_min; i++) {
5264 size = pow2_ceil(i);
5265 binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
5266 assert(small_size2bin[i] == binind);
5269 /* Quantum-spaced. */
5270 for (; i <= qspace_max; i++) {
5271 size = QUANTUM_CEILING(i);
5272 binind = ntbins + (size >> LG_QUANTUM) - 1;
5273 assert(small_size2bin[i] == binind);
5275 /* Cacheline-spaced. */
5276 for (; i <= cspace_max; i++) {
5277 size = CACHELINE_CEILING(i);
5278 binind = ntbins + nqbins + ((size - cspace_min) >>
5280 assert(small_size2bin[i] == binind);
5283 for (; i <= sspace_max; i++) {
5284 size = SUBPAGE_CEILING(i);
5285 binind = ntbins + nqbins + ncbins + ((size - sspace_min)
5287 assert(small_size2bin[i] == binind);
5293 small_size2bin_init(void)
5296 if (opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT
5297 || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT
5298 || sizeof(const_small_size2bin) != small_maxclass + 1)
5299 return (small_size2bin_init_hard());
5301 small_size2bin = const_small_size2bin;
5303 assert(sizeof(const_small_size2bin) == small_maxclass + 1);
5304 small_size2bin_validate();
5310 small_size2bin_init_hard(void)
5312 size_t i, size, binind;
5313 uint8_t *custom_small_size2bin;
5315 assert(opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT
5316 || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT
5317 || sizeof(const_small_size2bin) != small_maxclass + 1);
5319 custom_small_size2bin = (uint8_t *)base_alloc(small_maxclass + 1);
5320 if (custom_small_size2bin == NULL)
5323 custom_small_size2bin[0] = 0xffU;
5327 for (; i < (1U << LG_TINY_MIN); i++) {
5328 size = pow2_ceil(1U << LG_TINY_MIN);
5329 binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
5330 custom_small_size2bin[i] = binind;
5332 for (; i < qspace_min; i++) {
5333 size = pow2_ceil(i);
5334 binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
5335 custom_small_size2bin[i] = binind;
5338 /* Quantum-spaced. */
5339 for (; i <= qspace_max; i++) {
5340 size = QUANTUM_CEILING(i);
5341 binind = ntbins + (size >> LG_QUANTUM) - 1;
5342 custom_small_size2bin[i] = binind;
5344 /* Cacheline-spaced. */
5345 for (; i <= cspace_max; i++) {
5346 size = CACHELINE_CEILING(i);
5347 binind = ntbins + nqbins + ((size - cspace_min) >>
5349 custom_small_size2bin[i] = binind;
5352 for (; i <= sspace_max; i++) {
5353 size = SUBPAGE_CEILING(i);
5354 binind = ntbins + nqbins + ncbins + ((size - sspace_min) >>
5356 custom_small_size2bin[i] = binind;
5359 small_size2bin = custom_small_size2bin;
5361 small_size2bin_validate();
5374 error = _elf_aux_info(AT_NCPUS, &ret, sizeof(ret));
5375 if (error != 0 || ret == 0) {
5379 if (sysctl(mib, 2, &ret, &len, (void *)NULL, 0) == -1) {
5389 * FreeBSD's pthreads implementation calls malloc(3), so the malloc
5390 * implementation has to take pains to avoid infinite recursion during
5397 if (malloc_initialized == false)
5398 return (malloc_init_hard());
5404 malloc_init_hard(void)
5408 char buf[PATH_MAX + 1];
5411 malloc_mutex_lock(&init_lock);
5412 if (malloc_initialized) {
5414 * Another thread initialized the allocator before this one
5415 * acquired init_lock.
5417 malloc_mutex_unlock(&init_lock);
5421 /* Get number of CPUs. */
5422 ncpus = malloc_ncpus();
5425 * Increase the chunk size to the largest page size that is greater
5426 * than the default chunk size and less than or equal to 4MB.
5429 size_t pagesizes[MAXPAGESIZES];
5432 nsizes = getpagesizes(pagesizes, MAXPAGESIZES);
5433 for (k = 0; k < nsizes; k++)
5434 if (pagesizes[k] <= (1LU << 22))
5435 while ((1LU << opt_lg_chunk) < pagesizes[k])
5439 for (i = 0; i < 3; i++) {
5442 /* Get runtime configuration. */
5445 if ((linklen = readlink("/etc/malloc.conf", buf,
5446 sizeof(buf) - 1)) != -1) {
5448 * Use the contents of the "/etc/malloc.conf"
5449 * symbolic link's name.
5451 buf[linklen] = '\0';
5454 /* No configuration specified. */
5460 if (issetugid() == 0 && (opts =
5461 getenv("MALLOC_OPTIONS")) != NULL) {
5463 * Do nothing; opts is already initialized to
5464 * the value of the MALLOC_OPTIONS environment
5468 /* No configuration specified. */
5474 if (_malloc_options != NULL) {
5476 * Use options that were compiled into the
5479 opts = _malloc_options;
5481 /* No configuration specified. */
5493 for (j = 0; opts[j] != '\0'; j++) {
5497 /* Parse repetition count, if any. */
5498 for (nreps = 0, nseen = false;; j++, nseen = true) {
5500 case '0': case '1': case '2': case '3':
5501 case '4': case '5': case '6': case '7':
5504 nreps += opts[j] - '0';
5514 for (k = 0; k < nreps; k++) {
5523 if (opt_lg_cspace_max - 1 >
5524 opt_lg_qspace_max &&
5527 opt_lg_cspace_max--;
5530 if (opt_lg_cspace_max < PAGE_SHIFT
5532 opt_lg_cspace_max++;
5545 if (opt_lg_medium_max > PAGE_SHIFT)
5546 opt_lg_medium_max--;
5549 if (opt_lg_medium_max + 1 <
5551 opt_lg_medium_max++;
5554 if (opt_lg_dirty_mult + 1 <
5555 (sizeof(size_t) << 3))
5556 opt_lg_dirty_mult++;
5559 if (opt_lg_dirty_mult >= 0)
5560 opt_lg_dirty_mult--;
5562 #ifdef MALLOC_TCACHE
5564 if (opt_lg_tcache_gc_sweep >= 0)
5565 opt_lg_tcache_gc_sweep--;
5568 if (opt_lg_tcache_gc_sweep + 1 <
5569 (sizeof(size_t) << 3))
5570 opt_lg_tcache_gc_sweep++;
5573 if (opt_lg_tcache_nslots > 0)
5574 opt_lg_tcache_nslots--;
5577 if (opt_lg_tcache_nslots + 1 <
5578 (sizeof(size_t) << 3))
5579 opt_lg_tcache_nslots++;
5590 * Chunks always require at least one
5591 * header page, plus enough room to
5592 * hold a run for the largest medium
5593 * size class (one page more than the
5596 if ((1U << (opt_lg_chunk - 1)) >=
5597 (2U << PAGE_SHIFT) + (1U <<
5602 if (opt_lg_chunk + 1 <
5603 (sizeof(size_t) << 3))
5617 opt_narenas_lshift--;
5620 opt_narenas_lshift++;
5623 opt_stats_print = false;
5626 opt_stats_print = true;
5629 if (opt_lg_qspace_max > LG_QUANTUM)
5630 opt_lg_qspace_max--;
5633 if (opt_lg_qspace_max + 1 <
5635 opt_lg_qspace_max++;
5650 opt_xmalloc = false;
5666 _malloc_message(_getprogname(),
5667 ": (malloc) Unsupported character "
5668 "in malloc options: '", cbuf,
5677 /* Make sure that there is some method for acquiring memory. */
5678 if (opt_dss == false && opt_mmap == false)
5681 if (opt_stats_print) {
5682 /* Print statistics at exit. */
5683 atexit(stats_print_atexit);
5687 /* Set variables according to the value of opt_lg_[qc]space_max. */
5688 qspace_max = (1U << opt_lg_qspace_max);
5689 cspace_min = CACHELINE_CEILING(qspace_max);
5690 if (cspace_min == qspace_max)
5691 cspace_min += CACHELINE;
5692 cspace_max = (1U << opt_lg_cspace_max);
5693 sspace_min = SUBPAGE_CEILING(cspace_max);
5694 if (sspace_min == cspace_max)
5695 sspace_min += SUBPAGE;
5696 assert(sspace_min < PAGE_SIZE);
5697 sspace_max = PAGE_SIZE - SUBPAGE;
5698 medium_max = (1U << opt_lg_medium_max);
5701 assert(LG_QUANTUM >= LG_TINY_MIN);
5703 assert(ntbins <= LG_QUANTUM);
5704 nqbins = qspace_max >> LG_QUANTUM;
5705 ncbins = ((cspace_max - cspace_min) >> LG_CACHELINE) + 1;
5706 nsbins = ((sspace_max - sspace_min) >> LG_SUBPAGE) + 1;
5709 * Compute medium size class spacing and the number of medium size
5710 * classes. Limit spacing to no more than pagesize, but if possible
5711 * use the smallest spacing that does not exceed NMBINS_MAX medium size
5714 lg_mspace = LG_SUBPAGE;
5715 nmbins = ((medium_max - medium_min) >> lg_mspace) + 1;
5716 while (lg_mspace < PAGE_SHIFT && nmbins > NMBINS_MAX) {
5717 lg_mspace = lg_mspace + 1;
5718 nmbins = ((medium_max - medium_min) >> lg_mspace) + 1;
5720 mspace_mask = (1U << lg_mspace) - 1U;
5722 mbin0 = ntbins + nqbins + ncbins + nsbins;
5723 nbins = mbin0 + nmbins;
5725 * The small_size2bin lookup table uses uint8_t to encode each bin
5726 * index, so we cannot support more than 256 small size classes. This
5727 * limit is difficult to exceed (not even possible with 16B quantum and
5728 * 4KiB pages), and such configurations are impractical, but
5729 * nonetheless we need to protect against this case in order to avoid
5730 * undefined behavior.
5733 char line_buf[UMAX2S_BUFSIZE];
5734 _malloc_message(_getprogname(),
5735 ": (malloc) Too many small size classes (",
5736 umax2s(mbin0, 10, line_buf), " > max 256)\n");
5740 if (small_size2bin_init()) {
5741 malloc_mutex_unlock(&init_lock);
5745 #ifdef MALLOC_TCACHE
5746 if (opt_lg_tcache_nslots > 0) {
5747 tcache_nslots = (1U << opt_lg_tcache_nslots);
5749 /* Compute incremental GC event threshold. */
5750 if (opt_lg_tcache_gc_sweep >= 0) {
5751 tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
5752 nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins ==
5760 /* Set variables according to the value of opt_lg_chunk. */
5761 chunksize = (1LU << opt_lg_chunk);
5762 chunksize_mask = chunksize - 1;
5763 chunk_npages = (chunksize >> PAGE_SHIFT);
5768 * Compute the header size such that it is large enough to
5769 * contain the page map.
5771 header_size = sizeof(arena_chunk_t) +
5772 (sizeof(arena_chunk_map_t) * (chunk_npages - 1));
5773 arena_chunk_header_npages = (header_size >> PAGE_SHIFT) +
5774 ((header_size & PAGE_MASK) != 0);
5776 arena_maxclass = chunksize - (arena_chunk_header_npages <<
5779 UTRACE((void *)(intptr_t)(-1), 0, 0);
5782 malloc_mutex_init(&chunks_mtx);
5783 memset(&stats_chunks, 0, sizeof(chunk_stats_t));
5786 /* Various sanity checks that regard configuration. */
5787 assert(chunksize >= PAGE_SIZE);
5789 /* Initialize chunks data. */
5790 malloc_mutex_init(&huge_mtx);
5791 extent_tree_ad_new(&huge);
5793 malloc_mutex_init(&dss_mtx);
5795 i = (uintptr_t)dss_base & QUANTUM_MASK;
5797 dss_base = sbrk(QUANTUM - i);
5798 dss_prev = dss_base;
5800 extent_tree_szad_new(&dss_chunks_szad);
5801 extent_tree_ad_new(&dss_chunks_ad);
5809 /* Initialize base allocation data structures. */
5815 * Allocate a base chunk here, since it doesn't actually have to be
5816 * chunk-aligned. Doing this before allocating any other chunks allows
5817 * the use of space that would otherwise be wasted.
5820 base_pages_alloc(0);
5823 malloc_mutex_init(&base_mtx);
5827 * For SMP systems, create more than one arena per CPU by
5830 #ifdef MALLOC_TCACHE
5831 if (tcache_nslots) {
5833 * Only large object allocation/deallocation is
5834 * guaranteed to acquire an arena mutex, so we can get
5835 * away with fewer arenas than without thread caching.
5837 opt_narenas_lshift += 1;
5841 * All allocations must acquire an arena mutex, so use
5844 opt_narenas_lshift += 2;
5845 #ifdef MALLOC_TCACHE
5850 /* Determine how many arenas to use. */
5852 if (opt_narenas_lshift > 0) {
5853 if ((narenas << opt_narenas_lshift) > narenas)
5854 narenas <<= opt_narenas_lshift;
5856 * Make sure not to exceed the limits of what base_alloc() can
5859 if (narenas * sizeof(arena_t *) > chunksize)
5860 narenas = chunksize / sizeof(arena_t *);
5861 } else if (opt_narenas_lshift < 0) {
5862 if ((narenas >> -opt_narenas_lshift) < narenas)
5863 narenas >>= -opt_narenas_lshift;
5864 /* Make sure there is at least one arena. */
5871 static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
5872 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
5873 89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
5874 151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
5875 223, 227, 229, 233, 239, 241, 251, 257, 263};
5876 unsigned nprimes, parenas;
5879 * Pick a prime number of hash arenas that is more than narenas
5880 * so that direct hashing of pthread_self() pointers tends to
5881 * spread allocations evenly among the arenas.
5883 assert((narenas & 1) == 0); /* narenas must be even. */
5884 nprimes = (sizeof(primes) >> LG_SIZEOF_INT);
5885 parenas = primes[nprimes - 1]; /* In case not enough primes. */
5886 for (i = 1; i < nprimes; i++) {
5887 if (primes[i] > narenas) {
5888 parenas = primes[i];
5900 /* Allocate and initialize arenas. */
5901 arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
5902 if (arenas == NULL) {
5903 malloc_mutex_unlock(&init_lock);
5907 * Zero the array. In practice, this should always be pre-zeroed,
5908 * since it was just mmap()ed, but let's be sure.
5910 memset(arenas, 0, sizeof(arena_t *) * narenas);
5913 * Initialize one arena here. The rest are lazily created in
5914 * choose_arena_hard().
5917 if (arenas[0] == NULL) {
5918 malloc_mutex_unlock(&init_lock);
5923 * Assign the initial arena to the initial thread, in order to avoid
5924 * spurious creation of an extra arena if the application switches to
5927 arenas_map = arenas[0];
5929 malloc_spin_init(&arenas_lock);
5931 malloc_initialized = true;
5932 malloc_mutex_unlock(&init_lock);
5937 * End general internal functions.
5939 /******************************************************************************/
5941 * Begin malloc(3)-compatible functions.
5949 if (malloc_init()) {
5955 if (opt_sysv == false)
5959 _malloc_message(_getprogname(),
5960 ": (malloc) Error in malloc(): "
5961 "invalid size 0\n", "", "");
5969 ret = imalloc(size);
5974 _malloc_message(_getprogname(),
5975 ": (malloc) Error in malloc(): out of memory\n", "",
5983 UTRACE(0, size, ret);
5988 posix_memalign(void **memptr, size_t alignment, size_t size)
5997 if (opt_sysv == false)
6001 _malloc_message(_getprogname(),
6002 ": (malloc) Error in "
6003 "posix_memalign(): invalid "
6004 "size 0\n", "", "");
6014 /* Make sure that alignment is a large enough power of 2. */
6015 if (((alignment - 1) & alignment) != 0
6016 || alignment < sizeof(void *)) {
6018 _malloc_message(_getprogname(),
6019 ": (malloc) Error in posix_memalign(): "
6020 "invalid alignment\n", "", "");
6028 result = ipalloc(alignment, size);
6031 if (result == NULL) {
6033 _malloc_message(_getprogname(),
6034 ": (malloc) Error in posix_memalign(): out of memory\n",
6046 UTRACE(0, size, result);
6051 aligned_alloc(size_t alignment, size_t size)
6056 ret = posix_memalign(&memptr, alignment, size);
6065 calloc(size_t num, size_t size)
6070 if (malloc_init()) {
6076 num_size = num * size;
6077 if (num_size == 0) {
6078 if ((opt_sysv == false) && ((num == 0) || (size == 0)))
6085 * Try to avoid division here. We know that it isn't possible to
6086 * overflow during multiplication if neither operand uses any of the
6087 * most significant half of the bits in a size_t.
6089 } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2)))
6090 && (num_size / size != num)) {
6091 /* size_t overflow. */
6096 ret = icalloc(num_size);
6101 _malloc_message(_getprogname(),
6102 ": (malloc) Error in calloc(): out of memory\n", "",
6109 UTRACE(0, num_size, ret);
6114 realloc(void *ptr, size_t size)
6119 if (opt_sysv == false)
6130 assert(malloc_initialized);
6132 ret = iralloc(ptr, size);
6136 _malloc_message(_getprogname(),
6137 ": (malloc) Error in realloc(): out of "
6138 "memory\n", "", "");
6147 ret = imalloc(size);
6151 _malloc_message(_getprogname(),
6152 ": (malloc) Error in realloc(): out of "
6153 "memory\n", "", "");
6161 UTRACE(ptr, size, ret);
6171 assert(malloc_initialized);
6178 * End malloc(3)-compatible functions.
6180 /******************************************************************************/
6182 * Begin non-standard functions.
6186 malloc_usable_size(const void *ptr)
6189 assert(ptr != NULL);
6191 return (isalloc(ptr));
6195 * End non-standard functions.
6197 /******************************************************************************/
6199 * Begin library-private functions.
6203 * We provide an unpublished interface in order to receive notifications from
6204 * the pthreads library whenever a thread exits. This allows us to clean up
6208 _malloc_thread_cleanup(void)
6211 #ifdef MALLOC_TCACHE
6212 tcache_t *tcache = tcache_tls;
6214 if (tcache != NULL) {
6215 assert(tcache != (void *)(uintptr_t)1);
6216 tcache_destroy(tcache);
6217 tcache_tls = (void *)(uintptr_t)1;
6223 * The following functions are used by threading libraries for protection of
6224 * malloc during fork(). These functions are only called if the program is
6225 * running in threaded mode, so there is no need to check whether the program
6230 _malloc_prefork(void)
6234 /* Acquire all mutexes in a safe order. */
6235 malloc_spin_lock(&arenas_lock);
6236 for (i = 0; i < narenas; i++) {
6237 if (arenas[i] != NULL)
6238 malloc_spin_lock(&arenas[i]->lock);
6241 malloc_mutex_lock(&base_mtx);
6243 malloc_mutex_lock(&huge_mtx);
6246 malloc_mutex_lock(&dss_mtx);
6251 _malloc_postfork(void)
6255 /* Release all mutexes, now that fork() has completed. */
6258 malloc_mutex_unlock(&dss_mtx);
6261 malloc_mutex_unlock(&huge_mtx);
6263 malloc_mutex_unlock(&base_mtx);
6265 for (i = 0; i < narenas; i++) {
6266 if (arenas[i] != NULL)
6267 malloc_spin_unlock(&arenas[i]->lock);
6269 malloc_spin_unlock(&arenas_lock);
6273 * End library-private functions.
6275 /******************************************************************************/