lib/libc/stdlib/malloc.c

   1 /*-
   2  * Copyright (C) 2006-2008 Jason Evans <jasone@FreeBSD.org>.
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice(s), this list of conditions and the following disclaimer as
  10  *    the first lines of this file unmodified other than the possible
  11  *    addition of one or more copyright notices.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice(s), this list of conditions and the following disclaimer in
  14  *    the documentation and/or other materials provided with the
  15  *    distribution.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  20  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
  21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  25  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  26  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  27  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28  *
  29  *******************************************************************************
  30  *
  31  * This allocator implementation is designed to provide scalable performance
  32  * for multi-threaded programs on multi-processor systems.  The following
  33  * features are included for this purpose:
  34  *
  35  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
  36  *     contention and cache sloshing.
  37  *
  38  *   + Thread-specific caching is used if there are multiple threads, which
  39  *     reduces the amount of locking.
  40  *
  41  *   + Cache line sharing between arenas is avoided for internal data
  42  *     structures.
  43  *
  44  *   + Memory is managed in chunks and runs (chunks can be split into runs),
  45  *     rather than as individual pages.  This provides a constant-time
  46  *     mechanism for associating allocations with particular arenas.
  47  *
  48  * Allocation requests are rounded up to the nearest size class, and no record
  49  * of the original request size is maintained.  Allocations are broken into
  50  * categories according to size class.  Assuming runtime defaults, 4 kB pages
  51  * and a 16 byte quantum on a 32-bit system, the size classes in each category
  52  * are as follows:
  53  *
  54  *   |=======================================|
  55  *   | Category | Subcategory      |    Size |
  56  *   |=======================================|
  57  *   | Small    | Tiny             |       2 |
  58  *   |          |                  |       4 |
  59  *   |          |                  |       8 |
  60  *   |          |------------------+---------|
  61  *   |          | Quantum-spaced   |      16 |
  62  *   |          |                  |      32 |
  63  *   |          |                  |      48 |
  64  *   |          |                  |     ... |
  65  *   |          |                  |      96 |
  66  *   |          |                  |     112 |
  67  *   |          |                  |     128 |
  68  *   |          |------------------+---------|
  69  *   |          | Cacheline-spaced |     192 |
  70  *   |          |                  |     256 |
  71  *   |          |                  |     320 |
  72  *   |          |                  |     384 |
  73  *   |          |                  |     448 |
  74  *   |          |                  |     512 |
  75  *   |          |------------------+---------|
  76  *   |          | Sub-page         |     760 |
  77  *   |          |                  |    1024 |
  78  *   |          |                  |    1280 |
  79  *   |          |                  |     ... |
  80  *   |          |                  |    3328 |
  81  *   |          |                  |    3584 |
  82  *   |          |                  |    3840 |
  83  *   |=======================================|
  84  *   | Large                       |    4 kB |
  85  *   |                             |    8 kB |
  86  *   |                             |   12 kB |
  87  *   |                             |     ... |
  88  *   |                             | 1012 kB |
  89  *   |                             | 1016 kB |
  90  *   |                             | 1020 kB |
  91  *   |=======================================|
  92  *   | Huge                        |    1 MB |
  93  *   |                             |    2 MB |
  94  *   |                             |    3 MB |
  95  *   |                             |     ... |
  96  *   |=======================================|
  97  *
  98  * A different mechanism is used for each category:
  99  *
 100  *   Small : Each size class is segregated into its own set of runs.  Each run
 101  *           maintains a bitmap of which regions are free/allocated.
 102  *
 103  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
 104  *           in the associated arena chunk header maps.
 105  *
 106  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
 107  *          Metadata are stored in a separate red-black tree.
 108  *
 109  *******************************************************************************
 110  */
 111
 112 /*
 113  * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
 114  * defaults the A and J runtime options to off.  These settings are appropriate
 115  * for production systems.
 116  */
 117 #ifndef MALLOC_PRODUCTION
 118 #define MALLOC_PRODUCTION
 119 #endif
 120
 121 #ifndef MALLOC_PRODUCTION
 122    /*
 123     * MALLOC_DEBUG enables assertions and other sanity checks, and disables
 124     * inline functions.
 125     */
 126 #  define MALLOC_DEBUG
 127
 128    /* MALLOC_STATS enables statistics calculation. */
 129 #  define MALLOC_STATS
 130 #endif
 131
 132 /*
 133  * MALLOC_TINY enables support for tiny objects, which are smaller than one
 134  * quantum.
 135  */
 136 #define MALLOC_TINY
 137
 138 /*
 139  * MALLOC_MAG enables a magazine-based thread-specific caching layer for small
 140  * objects.  This makes it possible to allocate/deallocate objects without any
 141  * locking when the cache is in the steady state.
 142  */
 143 #define MALLOC_MAG
 144
 145 /*
 146  * MALLOC_BALANCE enables monitoring of arena lock contention and dynamically
 147  * re-balances arena load if exponentially averaged contention exceeds a
 148  * certain threshold.
 149  */
 150 #define MALLOC_BALANCE
 151
 152 /*
 153  * MALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage
 154  * segment (DSS).  In an ideal world, this functionality would be completely
 155  * unnecessary, but we are burdened by history and the lack of resource limits
 156  * for anonymous mapped memory.
 157  */
 158 #define MALLOC_DSS
 159
 160 #include <sys/cdefs.h>
 161 __FBSDID("$FreeBSD$");
 162
 163 #include "libc_private.h"
 164 #ifdef MALLOC_DEBUG
 165 #  define _LOCK_DEBUG
 166 #endif
 167 #include "spinlock.h"
 168 #include "namespace.h"
 169 #include <sys/mman.h>
 170 #include <sys/param.h>
 171 #include <sys/stddef.h>
 172 #include <sys/time.h>
 173 #include <sys/types.h>
 174 #include <sys/sysctl.h>
 175 #include <sys/uio.h>
 176 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
 177
 178 #include <machine/cpufunc.h>
 179 #include <machine/param.h>
 180 #include <machine/vmparam.h>
 181
 182 #include <errno.h>
 183 #include <limits.h>
 184 #include <pthread.h>
 185 #include <sched.h>
 186 #include <stdarg.h>
 187 #include <stdbool.h>
 188 #include <stdio.h>
 189 #include <stdint.h>
 190 #include <stdlib.h>
 191 #include <string.h>
 192 #include <strings.h>
 193 #include <unistd.h>
 194
 195 #include "un-namespace.h"
 196
 197 #ifdef MALLOC_DEBUG
 198 #  ifdef NDEBUG
 199 #    undef NDEBUG
 200 #  endif
 201 #else
 202 #  ifndef NDEBUG
 203 #    define NDEBUG
 204 #  endif
 205 #endif
 206 #include <assert.h>
 207
 208 #include "rb.h"
 209
 210 #ifdef MALLOC_DEBUG
 211    /* Disable inlining to make debugging easier. */
 212 #  define inline
 213 #endif
 214
 215 /* Size of stack-allocated buffer passed to strerror_r(). */
 216 #define STRERROR_BUF            64
 217
 218 /*
 219  * Minimum alignment of allocations is 2^QUANTUM_2POW bytes.
 220  */
 221 #ifdef __i386__
 222 #  define QUANTUM_2POW          4
 223 #  define SIZEOF_PTR_2POW       2
 224 #  define CPU_SPINWAIT          __asm__ volatile("pause")
 225 #endif
 226 #ifdef __ia64__
 227 #  define QUANTUM_2POW          4
 228 #  define SIZEOF_PTR_2POW       3
 229 #endif
 230 #ifdef __alpha__
 231 #  define QUANTUM_2POW          4
 232 #  define SIZEOF_PTR_2POW       3
 233 #  define NO_TLS
 234 #endif
 235 #ifdef __sparc64__
 236 #  define QUANTUM_2POW          4
 237 #  define SIZEOF_PTR_2POW       3
 238 #  define NO_TLS
 239 #endif
 240 #ifdef __amd64__
 241 #  define QUANTUM_2POW          4
 242 #  define SIZEOF_PTR_2POW       3
 243 #  define CPU_SPINWAIT          __asm__ volatile("pause")
 244 #endif
 245 #ifdef __arm__
 246 #  define QUANTUM_2POW          3
 247 #  define SIZEOF_PTR_2POW       2
 248 #  define NO_TLS
 249 #endif
 250 #ifdef __mips__
 251 #  define QUANTUM_2POW          3
 252 #  define SIZEOF_PTR_2POW       2
 253 #  define NO_TLS
 254 #endif
 255 #ifdef __powerpc__
 256 #  define QUANTUM_2POW          4
 257 #  define SIZEOF_PTR_2POW       2
 258 #endif
 259
 260 #define QUANTUM                 ((size_t)(1U << QUANTUM_2POW))
 261 #define QUANTUM_MASK            (QUANTUM - 1)
 262
 263 #define SIZEOF_PTR              (1U << SIZEOF_PTR_2POW)
 264
 265 /* sizeof(int) == (1U << SIZEOF_INT_2POW). */
 266 #ifndef SIZEOF_INT_2POW
 267 #  define SIZEOF_INT_2POW       2
 268 #endif
 269
 270 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
 271 #if (!defined(PIC) && !defined(NO_TLS))
 272 #  define NO_TLS
 273 #endif
 274
 275 #ifdef NO_TLS
 276    /* MALLOC_MAG requires TLS. */
 277 #  ifdef MALLOC_MAG
 278 #    undef MALLOC_MAG
 279 #  endif
 280    /* MALLOC_BALANCE requires TLS. */
 281 #  ifdef MALLOC_BALANCE
 282 #    undef MALLOC_BALANCE
 283 #  endif
 284 #endif
 285
 286 /*
 287  * Size and alignment of memory chunks that are allocated by the OS's virtual
 288  * memory system.
 289  */
 290 #define CHUNK_2POW_DEFAULT      20
 291
 292 /* Maximum number of dirty pages per arena. */
 293 #define DIRTY_MAX_DEFAULT       (1U << 9)
 294
 295 /*
 296  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing.
 297  * In addition, this controls the spacing of cacheline-spaced size classes.
 298  */
 299 #define CACHELINE_2POW          6
 300 #define CACHELINE               ((size_t)(1U << CACHELINE_2POW))
 301 #define CACHELINE_MASK          (CACHELINE - 1)
 302
 303 /*
 304  * Subpages are an artificially designated partitioning of pages.  Their only
 305  * purpose is to support subpage-spaced size classes.
 306  *
 307  * There must be at least 4 subpages per page, due to the way size classes are
 308  * handled.
 309  */
 310 #define SUBPAGE_2POW            8
 311 #define SUBPAGE                 ((size_t)(1U << SUBPAGE_2POW))
 312 #define SUBPAGE_MASK            (SUBPAGE - 1)
 313
 314 #ifdef MALLOC_TINY
 315    /* Smallest size class to support. */
 316 #  define TINY_MIN_2POW         1
 317 #endif
 318
 319 /*
 320  * Maximum size class that is a multiple of the quantum, but not (necessarily)
 321  * a power of 2.  Above this size, allocations are rounded up to the nearest
 322  * power of 2.
 323  */
 324 #define QSPACE_MAX_2POW_DEFAULT 7
 325
 326 /*
 327  * Maximum size class that is a multiple of the cacheline, but not (necessarily)
 328  * a power of 2.  Above this size, allocations are rounded up to the nearest
 329  * power of 2.
 330  */
 331 #define CSPACE_MAX_2POW_DEFAULT 9
 332
 333 /*
 334  * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
 335  * as small as possible such that this setting is still honored, without
 336  * violating other constraints.  The goal is to make runs as small as possible
 337  * without exceeding a per run external fragmentation threshold.
 338  *
 339  * We use binary fixed point math for overhead computations, where the binary
 340  * point is implicitly RUN_BFP bits to the left.
 341  *
 342  * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
 343  * honored for some/all object sizes, since there is one bit of header overhead
 344  * per object (plus a constant).  This constraint is relaxed (ignored) for runs
 345  * that are so small that the per-region overhead is greater than:
 346  *
 347  *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
 348  */
 349 #define RUN_BFP                 12
 350 /*                                    \/   Implicit binary fixed point. */
 351 #define RUN_MAX_OVRHD           0x0000003dU
 352 #define RUN_MAX_OVRHD_RELAX     0x00001800U
 353
 354 /* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
 355 #define RUN_MAX_SMALL   (12 * PAGE_SIZE)
 356
 357 /*
 358  * Hyper-threaded CPUs may need a special instruction inside spin loops in
 359  * order to yield to another virtual CPU.  If no such instruction is defined
 360  * above, make CPU_SPINWAIT a no-op.
 361  */
 362 #ifndef CPU_SPINWAIT
 363 #  define CPU_SPINWAIT
 364 #endif
 365
 366 /*
 367  * Adaptive spinning must eventually switch to blocking, in order to avoid the
 368  * potential for priority inversion deadlock.  Backing off past a certain point
 369  * can actually waste time.
 370  */
 371 #define SPIN_LIMIT_2POW         11
 372
 373 /*
 374  * Conversion from spinning to blocking is expensive; we use (1U <<
 375  * BLOCK_COST_2POW) to estimate how many more times costly blocking is than
 376  * worst-case spinning.
 377  */
 378 #define BLOCK_COST_2POW         4
 379
 380 #ifdef MALLOC_MAG
 381    /*
 382     * Default magazine size, in bytes.  max_rounds is calculated to make
 383     * optimal use of the space, leaving just enough room for the magazine
 384     * header.
 385     */
 386 #  define MAG_SIZE_2POW_DEFAULT 9
 387 #endif
 388
 389 #ifdef MALLOC_BALANCE
 390    /*
 391     * We use an exponential moving average to track recent lock contention,
 392     * where the size of the history window is N, and alpha=2/(N+1).
 393     *
 394     * Due to integer math rounding, very small values here can cause
 395     * substantial degradation in accuracy, thus making the moving average decay
 396     * faster than it would with precise calculation.
 397     */
 398 #  define BALANCE_ALPHA_INV_2POW        9
 399
 400    /*
 401     * Threshold value for the exponential moving contention average at which to
 402     * re-assign a thread.
 403     */
 404 #  define BALANCE_THRESHOLD_DEFAULT     (1U << (SPIN_LIMIT_2POW-4))
 405 #endif
 406
 407 /******************************************************************************/
 408
 409 /*
 410  * Mutexes based on spinlocks.  We can't use normal pthread spinlocks in all
 411  * places, because they require malloc()ed memory, which causes bootstrapping
 412  * issues in some cases.
 413  */
 414 typedef struct {
 415         spinlock_t      lock;
 416 } malloc_mutex_t;
 417
 418 /* Set to true once the allocator has been initialized. */
 419 static bool malloc_initialized = false;
 420
 421 /* Used to avoid initialization races. */
 422 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
 423
 424 /******************************************************************************/
 425 /*
 426  * Statistics data structures.
 427  */
 428
 429 #ifdef MALLOC_STATS
 430
 431 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
 432 struct malloc_bin_stats_s {
 433         /*
 434          * Number of allocation requests that corresponded to the size of this
 435          * bin.
 436          */
 437         uint64_t        nrequests;
 438
 439 #ifdef MALLOC_MAG
 440         /* Number of magazine reloads from this bin. */
 441         uint64_t        nmags;
 442 #endif
 443
 444         /* Total number of runs created for this bin's size class. */
 445         uint64_t        nruns;
 446
 447         /*
 448          * Total number of runs reused by extracting them from the runs tree for
 449          * this bin's size class.
 450          */
 451         uint64_t        reruns;
 452
 453         /* High-water mark for this bin. */
 454         unsigned long   highruns;
 455
 456         /* Current number of runs in this bin. */
 457         unsigned long   curruns;
 458 };
 459
 460 typedef struct arena_stats_s arena_stats_t;
 461 struct arena_stats_s {
 462         /* Number of bytes currently mapped. */
 463         size_t          mapped;
 464
 465         /*
 466          * Total number of purge sweeps, total number of madvise calls made,
 467          * and total pages purged in order to keep dirty unused memory under
 468          * control.
 469          */
 470         uint64_t        npurge;
 471         uint64_t        nmadvise;
 472         uint64_t        purged;
 473
 474         /* Per-size-category statistics. */
 475         size_t          allocated_small;
 476         uint64_t        nmalloc_small;
 477         uint64_t        ndalloc_small;
 478
 479         size_t          allocated_large;
 480         uint64_t        nmalloc_large;
 481         uint64_t        ndalloc_large;
 482
 483 #ifdef MALLOC_BALANCE
 484         /* Number of times this arena reassigned a thread due to contention. */
 485         uint64_t        nbalance;
 486 #endif
 487 };
 488
 489 typedef struct chunk_stats_s chunk_stats_t;
 490 struct chunk_stats_s {
 491         /* Number of chunks that were allocated. */
 492         uint64_t        nchunks;
 493
 494         /* High-water mark for number of chunks allocated. */
 495         unsigned long   highchunks;
 496
 497         /*
 498          * Current number of chunks allocated.  This value isn't maintained for
 499          * any other purpose, so keep track of it in order to be able to set
 500          * highchunks.
 501          */
 502         unsigned long   curchunks;
 503 };
 504
 505 #endif /* #ifdef MALLOC_STATS */
 506
 507 /******************************************************************************/
 508 /*
 509  * Extent data structures.
 510  */
 511
 512 /* Tree of extents. */
 513 typedef struct extent_node_s extent_node_t;
 514 struct extent_node_s {
 515 #ifdef MALLOC_DSS
 516         /* Linkage for the size/address-ordered tree. */
 517         rb_node(extent_node_t) link_szad;
 518 #endif
 519
 520         /* Linkage for the address-ordered tree. */
 521         rb_node(extent_node_t) link_ad;
 522
 523         /* Pointer to the extent that this tree node is responsible for. */
 524         void    *addr;
 525
 526         /* Total region size. */
 527         size_t  size;
 528 };
 529 typedef rb_tree(extent_node_t) extent_tree_t;
 530
 531 /******************************************************************************/
 532 /*
 533  * Arena data structures.
 534  */
 535
 536 typedef struct arena_s arena_t;
 537 typedef struct arena_bin_s arena_bin_t;
 538
 539 /* Each element of the chunk map corresponds to one page within the chunk. */
 540 typedef struct arena_chunk_map_s arena_chunk_map_t;
 541 struct arena_chunk_map_s {
 542         /*
 543          * Linkage for run trees.  There are two disjoint uses:
 544          *
 545          * 1) arena_t's runs_avail tree.
 546          * 2) arena_run_t conceptually uses this linkage for in-use non-full
 547          *    runs, rather than directly embedding linkage.
 548          */
 549         rb_node(arena_chunk_map_t)      link;
 550
 551         /*
 552          * Run address (or size) and various flags are stored together.  The bit
 553          * layout looks like (assuming 32-bit system):
 554          *
 555          *   ???????? ???????? ????---- ---kdzla
 556          *
 557          * ? : Unallocated: Run address for first/last pages, unset for internal
 558          *                  pages.
 559          *     Small: Run address.
 560          *     Large: Run size for first page, unset for trailing pages.
 561          * - : Unused.
 562          * k : key?
 563          * d : dirty?
 564          * z : zeroed?
 565          * l : large?
 566          * a : allocated?
 567          *
 568          * Following are example bit patterns for the three types of runs.
 569          *
 570          * r : run address
 571          * s : run size
 572          * x : don't care
 573          * - : 0
 574          * [dzla] : bit set
 575          *
 576          *   Unallocated:
 577          *     ssssssss ssssssss ssss---- --------
 578          *     xxxxxxxx xxxxxxxx xxxx---- ----d---
 579          *     ssssssss ssssssss ssss---- -----z--
 580          *
 581          *   Small:
 582          *     rrrrrrrr rrrrrrrr rrrr---- -------a
 583          *     rrrrrrrr rrrrrrrr rrrr---- -------a
 584          *     rrrrrrrr rrrrrrrr rrrr---- -------a
 585          *
 586          *   Large:
 587          *     ssssssss ssssssss ssss---- ------la
 588          *     -------- -------- -------- ------la
 589          *     -------- -------- -------- ------la
 590          */
 591         size_t                          bits;
 592 #define CHUNK_MAP_KEY           ((size_t)0x10U)
 593 #define CHUNK_MAP_DIRTY         ((size_t)0x08U)
 594 #define CHUNK_MAP_ZEROED        ((size_t)0x04U)
 595 #define CHUNK_MAP_LARGE         ((size_t)0x02U)
 596 #define CHUNK_MAP_ALLOCATED     ((size_t)0x01U)
 597 };
 598 typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
 599 typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
 600
 601 /* Arena chunk header. */
 602 typedef struct arena_chunk_s arena_chunk_t;
 603 struct arena_chunk_s {
 604         /* Arena that owns the chunk. */
 605         arena_t         *arena;
 606
 607         /* Linkage for the arena's chunks_dirty tree. */
 608         rb_node(arena_chunk_t) link_dirty;
 609
 610         /* Number of dirty pages. */
 611         size_t          ndirty;
 612
 613         /* Map of pages within chunk that keeps track of free/large/small. */
 614         arena_chunk_map_t map[1]; /* Dynamically sized. */
 615 };
 616 typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
 617
 618 typedef struct arena_run_s arena_run_t;
 619 struct arena_run_s {
 620 #ifdef MALLOC_DEBUG
 621         uint32_t        magic;
 622 #  define ARENA_RUN_MAGIC 0x384adf93
 623 #endif
 624
 625         /* Bin this run is associated with. */
 626         arena_bin_t     *bin;
 627
 628         /* Index of first element that might have a free region. */
 629         unsigned        regs_minelm;
 630
 631         /* Number of free regions in run. */
 632         unsigned        nfree;
 633
 634         /* Bitmask of in-use regions (0: in use, 1: free). */
 635         unsigned        regs_mask[1]; /* Dynamically sized. */
 636 };
 637
 638 struct arena_bin_s {
 639         /*
 640          * Current run being used to service allocations of this bin's size
 641          * class.
 642          */
 643         arena_run_t     *runcur;
 644
 645         /*
 646          * Tree of non-full runs.  This tree is used when looking for an
 647          * existing run when runcur is no longer usable.  We choose the
 648          * non-full run that is lowest in memory; this policy tends to keep
 649          * objects packed well, and it can also help reduce the number of
 650          * almost-empty chunks.
 651          */
 652         arena_run_tree_t runs;
 653
 654         /* Size of regions in a run for this bin's size class. */
 655         size_t          reg_size;
 656
 657         /* Total size of a run for this bin's size class. */
 658         size_t          run_size;
 659
 660         /* Total number of regions in a run for this bin's size class. */
 661         uint32_t        nregs;
 662
 663         /* Number of elements in a run's regs_mask for this bin's size class. */
 664         uint32_t        regs_mask_nelms;
 665
 666         /* Offset of first region in a run for this bin's size class. */
 667         uint32_t        reg0_offset;
 668
 669 #ifdef MALLOC_STATS
 670         /* Bin statistics. */
 671         malloc_bin_stats_t stats;
 672 #endif
 673 };
 674
 675 struct arena_s {
 676 #ifdef MALLOC_DEBUG
 677         uint32_t                magic;
 678 #  define ARENA_MAGIC 0x947d3d24
 679 #endif
 680
 681         /* All operations on this arena require that lock be locked. */
 682         pthread_mutex_t         lock;
 683
 684 #ifdef MALLOC_STATS
 685         arena_stats_t           stats;
 686 #endif
 687
 688         /* Tree of dirty-page-containing chunks this arena manages. */
 689         arena_chunk_tree_t      chunks_dirty;
 690
 691         /*
 692          * In order to avoid rapid chunk allocation/deallocation when an arena
 693          * oscillates right on the cusp of needing a new chunk, cache the most
 694          * recently freed chunk.  The spare is left in the arena's chunk trees
 695          * until it is deleted.
 696          *
 697          * There is one spare chunk per arena, rather than one spare total, in
 698          * order to avoid interactions between multiple threads that could make
 699          * a single spare inadequate.
 700          */
 701         arena_chunk_t           *spare;
 702
 703         /*
 704          * Current count of pages within unused runs that are potentially
 705          * dirty, and for which madvise(... MADV_FREE) has not been called.  By
 706          * tracking this, we can institute a limit on how much dirty unused
 707          * memory is mapped for each arena.
 708          */
 709         size_t                  ndirty;
 710
 711         /*
 712          * Size/address-ordered tree of this arena's available runs.  This tree
 713          * is used for first-best-fit run allocation.
 714          */
 715         arena_avail_tree_t      runs_avail;
 716
 717 #ifdef MALLOC_BALANCE
 718         /*
 719          * The arena load balancing machinery needs to keep track of how much
 720          * lock contention there is.  This value is exponentially averaged.
 721          */
 722         uint32_t                contention;
 723 #endif
 724
 725         /*
 726          * bins is used to store rings of free regions of the following sizes,
 727          * assuming a 16-byte quantum, 4kB page size, and default
 728          * MALLOC_OPTIONS.
 729          *
 730          *   bins[i] | size |
 731          *   --------+------+
 732          *        0  |    2 |
 733          *        1  |    4 |
 734          *        2  |    8 |
 735          *   --------+------+
 736          *        3  |   16 |
 737          *        4  |   32 |
 738          *        5  |   48 |
 739          *        6  |   64 |
 740          *           :      :
 741          *           :      :
 742          *       33  |  496 |
 743          *       34  |  512 |
 744          *   --------+------+
 745          *       35  | 1024 |
 746          *       36  | 2048 |
 747          *   --------+------+
 748          */
 749         arena_bin_t             bins[1]; /* Dynamically sized. */
 750 };
 751
 752 /******************************************************************************/
 753 /*
 754  * Magazine data structures.
 755  */
 756
 757 #ifdef MALLOC_MAG
 758 typedef struct mag_s mag_t;
 759 struct mag_s {
 760         size_t          binind; /* Index of associated bin. */
 761         size_t          nrounds;
 762         void            *rounds[1]; /* Dynamically sized. */
 763 };
 764
 765 /*
 766  * Magazines are lazily allocated, but once created, they remain until the
 767  * associated mag_rack is destroyed.
 768  */
 769 typedef struct bin_mags_s bin_mags_t;
 770 struct bin_mags_s {
 771         mag_t   *curmag;
 772         mag_t   *sparemag;
 773 };
 774
 775 typedef struct mag_rack_s mag_rack_t;
 776 struct mag_rack_s {
 777         bin_mags_t      bin_mags[1]; /* Dynamically sized. */
 778 };
 779 #endif
 780
 781 /******************************************************************************/
 782 /*
 783  * Data.
 784  */
 785
 786 /* Number of CPUs. */
 787 static unsigned         ncpus;
 788
 789 /* Various bin-related settings. */
 790 #ifdef MALLOC_TINY              /* Number of (2^n)-spaced tiny bins. */
 791 #  define               ntbins  ((unsigned)(QUANTUM_2POW - TINY_MIN_2POW))
 792 #else
 793 #  define               ntbins  0
 794 #endif
 795 static unsigned         nqbins; /* Number of quantum-spaced bins. */
 796 static unsigned         ncbins; /* Number of cacheline-spaced bins. */
 797 static unsigned         nsbins; /* Number of subpage-spaced bins. */
 798 static unsigned         nbins;
 799 #ifdef MALLOC_TINY
 800 #  define               tspace_max      ((size_t)(QUANTUM >> 1))
 801 #endif
 802 #define                 qspace_min      QUANTUM
 803 static size_t           qspace_max;
 804 static size_t           cspace_min;
 805 static size_t           cspace_max;
 806 static size_t           sspace_min;
 807 static size_t           sspace_max;
 808 #define                 bin_maxclass    sspace_max
 809
 810 static uint8_t const    *size2bin;
 811 /*
 812  * const_size2bin is a static constant lookup table that in the common case can
 813  * be used as-is for size2bin.  For dynamically linked programs, this avoids
 814  * a page of memory overhead per process.
 815  */
 816 #define S2B_1(i)        i,
 817 #define S2B_2(i)        S2B_1(i) S2B_1(i)
 818 #define S2B_4(i)        S2B_2(i) S2B_2(i)
 819 #define S2B_8(i)        S2B_4(i) S2B_4(i)
 820 #define S2B_16(i)       S2B_8(i) S2B_8(i)
 821 #define S2B_32(i)       S2B_16(i) S2B_16(i)
 822 #define S2B_64(i)       S2B_32(i) S2B_32(i)
 823 #define S2B_128(i)      S2B_64(i) S2B_64(i)
 824 #define S2B_256(i)      S2B_128(i) S2B_128(i)
 825 static const uint8_t    const_size2bin[PAGE_SIZE - 255] = {
 826         S2B_1(0xffU)            /*    0 */
 827 #if (QUANTUM_2POW == 4)
 828 /* 64-bit system ************************/
 829 #  ifdef MALLOC_TINY
 830         S2B_2(0)                /*    2 */
 831         S2B_2(1)                /*    4 */
 832         S2B_4(2)                /*    8 */
 833         S2B_8(3)                /*   16 */
 834 #    define S2B_QMIN 3
 835 #  else
 836         S2B_16(0)               /*   16 */
 837 #    define S2B_QMIN 0
 838 #  endif
 839         S2B_16(S2B_QMIN + 1)    /*   32 */
 840         S2B_16(S2B_QMIN + 2)    /*   48 */
 841         S2B_16(S2B_QMIN + 3)    /*   64 */
 842         S2B_16(S2B_QMIN + 4)    /*   80 */
 843         S2B_16(S2B_QMIN + 5)    /*   96 */
 844         S2B_16(S2B_QMIN + 6)    /*  112 */
 845         S2B_16(S2B_QMIN + 7)    /*  128 */
 846 #  define S2B_CMIN (S2B_QMIN + 8)
 847 #else
 848 /* 32-bit system ************************/
 849 #  ifdef MALLOC_TINY
 850         S2B_2(0)                /*    2 */
 851         S2B_2(1)                /*    4 */
 852         S2B_4(2)                /*    8 */
 853 #    define S2B_QMIN 2
 854 #  else
 855         S2B_8(0)                /*    8 */
 856 #    define S2B_QMIN 0
 857 #  endif
 858         S2B_8(S2B_QMIN + 1)     /*   16 */
 859         S2B_8(S2B_QMIN + 2)     /*   24 */
 860         S2B_8(S2B_QMIN + 3)     /*   32 */
 861         S2B_8(S2B_QMIN + 4)     /*   40 */
 862         S2B_8(S2B_QMIN + 5)     /*   48 */
 863         S2B_8(S2B_QMIN + 6)     /*   56 */
 864         S2B_8(S2B_QMIN + 7)     /*   64 */
 865         S2B_8(S2B_QMIN + 8)     /*   72 */
 866         S2B_8(S2B_QMIN + 9)     /*   80 */
 867         S2B_8(S2B_QMIN + 10)    /*   88 */
 868         S2B_8(S2B_QMIN + 11)    /*   96 */
 869         S2B_8(S2B_QMIN + 12)    /*  104 */
 870         S2B_8(S2B_QMIN + 13)    /*  112 */
 871         S2B_8(S2B_QMIN + 14)    /*  120 */
 872         S2B_8(S2B_QMIN + 15)    /*  128 */
 873 #  define S2B_CMIN (S2B_QMIN + 16)
 874 #endif
 875 /****************************************/
 876         S2B_64(S2B_CMIN + 0)    /*  192 */
 877         S2B_64(S2B_CMIN + 1)    /*  256 */
 878         S2B_64(S2B_CMIN + 2)    /*  320 */
 879         S2B_64(S2B_CMIN + 3)    /*  384 */
 880         S2B_64(S2B_CMIN + 4)    /*  448 */
 881         S2B_64(S2B_CMIN + 5)    /*  512 */
 882 #  define S2B_SMIN (S2B_CMIN + 6)
 883         S2B_256(S2B_SMIN + 0)   /*  768 */
 884         S2B_256(S2B_SMIN + 1)   /* 1024 */
 885         S2B_256(S2B_SMIN + 2)   /* 1280 */
 886         S2B_256(S2B_SMIN + 3)   /* 1536 */
 887         S2B_256(S2B_SMIN + 4)   /* 1792 */
 888         S2B_256(S2B_SMIN + 5)   /* 2048 */
 889         S2B_256(S2B_SMIN + 6)   /* 2304 */
 890         S2B_256(S2B_SMIN + 7)   /* 2560 */
 891         S2B_256(S2B_SMIN + 8)   /* 2816 */
 892         S2B_256(S2B_SMIN + 9)   /* 3072 */
 893         S2B_256(S2B_SMIN + 10)  /* 3328 */
 894         S2B_256(S2B_SMIN + 11)  /* 3584 */
 895         S2B_256(S2B_SMIN + 12)  /* 3840 */
 896 #if (PAGE_SHIFT == 13)
 897         S2B_256(S2B_SMIN + 13)  /* 4096 */
 898         S2B_256(S2B_SMIN + 14)  /* 4352 */
 899         S2B_256(S2B_SMIN + 15)  /* 4608 */
 900         S2B_256(S2B_SMIN + 16)  /* 4864 */
 901         S2B_256(S2B_SMIN + 17)  /* 5120 */
 902         S2B_256(S2B_SMIN + 18)  /* 5376 */
 903         S2B_256(S2B_SMIN + 19)  /* 5632 */
 904         S2B_256(S2B_SMIN + 20)  /* 5888 */
 905         S2B_256(S2B_SMIN + 21)  /* 6144 */
 906         S2B_256(S2B_SMIN + 22)  /* 6400 */
 907         S2B_256(S2B_SMIN + 23)  /* 6656 */
 908         S2B_256(S2B_SMIN + 24)  /* 6912 */
 909         S2B_256(S2B_SMIN + 25)  /* 7168 */
 910         S2B_256(S2B_SMIN + 26)  /* 7424 */
 911         S2B_256(S2B_SMIN + 27)  /* 7680 */
 912         S2B_256(S2B_SMIN + 28)  /* 7936 */
 913 #endif
 914 };
 915 #undef S2B_1
 916 #undef S2B_2
 917 #undef S2B_4
 918 #undef S2B_8
 919 #undef S2B_16
 920 #undef S2B_32
 921 #undef S2B_64
 922 #undef S2B_128
 923 #undef S2B_256
 924 #undef S2B_QMIN
 925 #undef S2B_CMIN
 926 #undef S2B_SMIN
 927
 928 #ifdef MALLOC_MAG
 929 static size_t           max_rounds;
 930 #endif
 931
 932 /* Various chunk-related settings. */
 933 static size_t           chunksize;
 934 static size_t           chunksize_mask; /* (chunksize - 1). */
 935 static size_t           chunk_npages;
 936 static size_t           arena_chunk_header_npages;
 937 static size_t           arena_maxclass; /* Max size class for arenas. */
 938
 939 /********/
 940 /*
 941  * Chunks.
 942  */
 943
 944 /* Protects chunk-related data structures. */
 945 static malloc_mutex_t   huge_mtx;
 946
 947 /* Tree of chunks that are stand-alone huge allocations. */
 948 static extent_tree_t    huge;
 949
 950 #ifdef MALLOC_DSS
 951 /*
 952  * Protects sbrk() calls.  This avoids malloc races among threads, though it
 953  * does not protect against races with threads that call sbrk() directly.
 954  */
 955 static malloc_mutex_t   dss_mtx;
 956 /* Base address of the DSS. */
 957 static void             *dss_base;
 958 /* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
 959 static void             *dss_prev;
 960 /* Current upper limit on DSS addresses. */
 961 static void             *dss_max;
 962
 963 /*
 964  * Trees of chunks that were previously allocated (trees differ only in node
 965  * ordering).  These are used when allocating chunks, in an attempt to re-use
 966  * address space.  Depending on function, different tree orderings are needed,
 967  * which is why there are two trees with the same contents.
 968  */
 969 static extent_tree_t    dss_chunks_szad;
 970 static extent_tree_t    dss_chunks_ad;
 971 #endif
 972
 973 #ifdef MALLOC_STATS
 974 /* Huge allocation statistics. */
 975 static uint64_t         huge_nmalloc;
 976 static uint64_t         huge_ndalloc;
 977 static size_t           huge_allocated;
 978 #endif
 979
 980 /****************************/
 981 /*
 982  * base (internal allocation).
 983  */
 984
 985 /*
 986  * Current pages that are being used for internal memory allocations.  These
 987  * pages are carved up in cacheline-size quanta, so that there is no chance of
 988  * false cache line sharing.
 989  */
 990 static void             *base_pages;
 991 static void             *base_next_addr;
 992 static void             *base_past_addr; /* Addr immediately past base_pages. */
 993 static extent_node_t    *base_nodes;
 994 static malloc_mutex_t   base_mtx;
 995 #ifdef MALLOC_STATS
 996 static size_t           base_mapped;
 997 #endif
 998
 999 /********/
1000 /*
1001  * Arenas.
1002  */
1003
1004 /*
1005  * Arenas that are used to service external requests.  Not all elements of the
1006  * arenas array are necessarily used; arenas are created lazily as needed.
1007  */
1008 static arena_t          **arenas;
1009 static unsigned         narenas;
1010 #ifndef NO_TLS
1011 #  ifdef MALLOC_BALANCE
1012 static unsigned         narenas_2pow;
1013 #  else
1014 static unsigned         next_arena;
1015 #  endif
1016 #endif
1017 static pthread_mutex_t  arenas_lock; /* Protects arenas initialization. */
1018
1019 #ifndef NO_TLS
1020 /*
1021  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
1022  * for allocations.
1023  */
1024 static __thread arena_t *arenas_map;
1025 #endif
1026
1027 #ifdef MALLOC_MAG
1028 /*
1029  * Map of thread-specific magazine racks, used for thread-specific object
1030  * caching.
1031  */
1032 static __thread mag_rack_t      *mag_rack;
1033 #endif
1034
1035 #ifdef MALLOC_STATS
1036 /* Chunk statistics. */
1037 static chunk_stats_t    stats_chunks;
1038 #endif
1039
1040 /*******************************/
1041 /*
1042  * Runtime configuration options.
1043  */
1044 const char      *_malloc_options;
1045
1046 #ifndef MALLOC_PRODUCTION
1047 static bool     opt_abort = true;
1048 static bool     opt_junk = true;
1049 #else
1050 static bool     opt_abort = false;
1051 static bool     opt_junk = false;
1052 #endif
1053 #ifdef MALLOC_DSS
1054 static bool     opt_dss = true;
1055 static bool     opt_mmap = true;
1056 #endif
1057 #ifdef MALLOC_MAG
1058 static bool     opt_mag = true;
1059 static size_t   opt_mag_size_2pow = MAG_SIZE_2POW_DEFAULT;
1060 #endif
1061 static size_t   opt_dirty_max = DIRTY_MAX_DEFAULT;
1062 #ifdef MALLOC_BALANCE
1063 static uint64_t opt_balance_threshold = BALANCE_THRESHOLD_DEFAULT;
1064 #endif
1065 static bool     opt_print_stats = false;
1066 static size_t   opt_qspace_max_2pow = QSPACE_MAX_2POW_DEFAULT;
1067 static size_t   opt_cspace_max_2pow = CSPACE_MAX_2POW_DEFAULT;
1068 static size_t   opt_chunk_2pow = CHUNK_2POW_DEFAULT;
1069 static bool     opt_utrace = false;
1070 static bool     opt_sysv = false;
1071 static bool     opt_xmalloc = false;
1072 static bool     opt_zero = false;
1073 static int      opt_narenas_lshift = 0;
1074
1075 typedef struct {
1076         void    *p;
1077         size_t  s;
1078         void    *r;
1079 } malloc_utrace_t;
1080
1081 #define UTRACE(a, b, c)                                                 \
1082         if (opt_utrace) {                                               \
1083                 malloc_utrace_t ut;                                     \
1084                 ut.p = (a);                                             \
1085                 ut.s = (b);                                             \
1086                 ut.r = (c);                                             \
1087                 utrace(&ut, sizeof(ut));                                \
1088         }
1089
1090 /******************************************************************************/
1091 /*
1092  * Begin function prototypes for non-inline static functions.
1093  */
1094
1095 static void     malloc_mutex_init(malloc_mutex_t *mutex);
1096 static bool     malloc_spin_init(pthread_mutex_t *lock);
1097 static void     wrtmessage(const char *p1, const char *p2, const char *p3,
1098                 const char *p4);
1099 #ifdef MALLOC_STATS
1100 static void     malloc_printf(const char *format, ...);
1101 #endif
1102 static char     *umax2s(uintmax_t x, char *s);
1103 #ifdef MALLOC_DSS
1104 static bool     base_pages_alloc_dss(size_t minsize);
1105 #endif
1106 static bool     base_pages_alloc_mmap(size_t minsize);
1107 static bool     base_pages_alloc(size_t minsize);
1108 static void     *base_alloc(size_t size);
1109 static void     *base_calloc(size_t number, size_t size);
1110 static extent_node_t *base_node_alloc(void);
1111 static void     base_node_dealloc(extent_node_t *node);
1112 #ifdef MALLOC_STATS
1113 static void     stats_print(arena_t *arena);
1114 #endif
1115 static void     *pages_map(void *addr, size_t size);
1116 static void     pages_unmap(void *addr, size_t size);
1117 #ifdef MALLOC_DSS
1118 static void     *chunk_alloc_dss(size_t size);
1119 static void     *chunk_recycle_dss(size_t size, bool zero);
1120 #endif
1121 static void     *chunk_alloc_mmap(size_t size);
1122 static void     *chunk_alloc(size_t size, bool zero);
1123 #ifdef MALLOC_DSS
1124 static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size);
1125 static bool     chunk_dealloc_dss(void *chunk, size_t size);
1126 #endif
1127 static void     chunk_dealloc_mmap(void *chunk, size_t size);
1128 static void     chunk_dealloc(void *chunk, size_t size);
1129 #ifndef NO_TLS
1130 static arena_t  *choose_arena_hard(void);
1131 #endif
1132 static void     arena_run_split(arena_t *arena, arena_run_t *run, size_t size,
1133     bool large, bool zero);
1134 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
1135 static void     arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
1136 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large,
1137     bool zero);
1138 static void     arena_purge(arena_t *arena);
1139 static void     arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty);
1140 static void     arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk,
1141     arena_run_t *run, size_t oldsize, size_t newsize);
1142 static void     arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
1143     arena_run_t *run, size_t oldsize, size_t newsize, bool dirty);
1144 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
1145 static void     *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
1146 static size_t   arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
1147 #ifdef MALLOC_BALANCE
1148 static void     arena_lock_balance_hard(arena_t *arena);
1149 #endif
1150 #ifdef MALLOC_MAG
1151 static void     mag_load(mag_t *mag);
1152 #endif
1153 static void     *arena_malloc_large(arena_t *arena, size_t size, bool zero);
1154 static void     *arena_palloc(arena_t *arena, size_t alignment, size_t size,
1155     size_t alloc_size);
1156 static size_t   arena_salloc(const void *ptr);
1157 #ifdef MALLOC_MAG
1158 static void     mag_unload(mag_t *mag);
1159 #endif
1160 static void     arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk,
1161     void *ptr);
1162 static void     arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk,
1163     void *ptr, size_t size, size_t oldsize);
1164 static bool     arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk,
1165     void *ptr, size_t size, size_t oldsize);
1166 static bool     arena_ralloc_large(void *ptr, size_t size, size_t oldsize);
1167 static void     *arena_ralloc(void *ptr, size_t size, size_t oldsize);
1168 static bool     arena_new(arena_t *arena);
1169 static arena_t  *arenas_extend(unsigned ind);
1170 #ifdef MALLOC_MAG
1171 static mag_t    *mag_create(arena_t *arena, size_t binind);
1172 static void     mag_destroy(mag_t *mag);
1173 static mag_rack_t *mag_rack_create(arena_t *arena);
1174 static void     mag_rack_destroy(mag_rack_t *rack);
1175 #endif
1176 static void     *huge_malloc(size_t size, bool zero);
1177 static void     *huge_palloc(size_t alignment, size_t size);
1178 static void     *huge_ralloc(void *ptr, size_t size, size_t oldsize);
1179 static void     huge_dalloc(void *ptr);
1180 static void     malloc_print_stats(void);
1181 #ifdef MALLOC_DEBUG
1182 static void     size2bin_validate(void);
1183 #endif
1184 static bool     size2bin_init(void);
1185 static bool     size2bin_init_hard(void);
1186 static bool     malloc_init_hard(void);
1187
1188 /*
1189  * End function prototypes.
1190  */
1191 /******************************************************************************/
1192 /*
1193  * Begin mutex.  We can't use normal pthread mutexes in all places, because
1194  * they require malloc()ed memory, which causes bootstrapping issues in some
1195  * cases.
1196  */
1197
1198 static void
1199 malloc_mutex_init(malloc_mutex_t *mutex)
1200 {
1201         static const spinlock_t lock = _SPINLOCK_INITIALIZER;
1202
1203         mutex->lock = lock;
1204 }
1205
1206 static inline void
1207 malloc_mutex_lock(malloc_mutex_t *mutex)
1208 {
1209
1210         if (__isthreaded)
1211                 _SPINLOCK(&mutex->lock);
1212 }
1213
1214 static inline void
1215 malloc_mutex_unlock(malloc_mutex_t *mutex)
1216 {
1217
1218         if (__isthreaded)
1219                 _SPINUNLOCK(&mutex->lock);
1220 }
1221
1222 /*
1223  * End mutex.
1224  */
1225 /******************************************************************************/
1226 /*
1227  * Begin spin lock.  Spin locks here are actually adaptive mutexes that block
1228  * after a period of spinning, because unbounded spinning would allow for
1229  * priority inversion.
1230  */
1231
1232 /*
1233  * We use an unpublished interface to initialize pthread mutexes with an
1234  * allocation callback, in order to avoid infinite recursion.
1235  */
1236 int     _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
1237     void *(calloc_cb)(size_t, size_t));
1238
1239 __weak_reference(_pthread_mutex_init_calloc_cb_stub,
1240     _pthread_mutex_init_calloc_cb);
1241
1242 int
1243 _pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex,
1244     void *(calloc_cb)(size_t, size_t))
1245 {
1246
1247         return (0);
1248 }
1249
1250 static bool
1251 malloc_spin_init(pthread_mutex_t *lock)
1252 {
1253
1254         if (_pthread_mutex_init_calloc_cb(lock, base_calloc) != 0)
1255                 return (true);
1256
1257         return (false);
1258 }
1259
1260 static inline unsigned
1261 malloc_spin_lock(pthread_mutex_t *lock)
1262 {
1263         unsigned ret = 0;
1264
1265         if (__isthreaded) {
1266                 if (_pthread_mutex_trylock(lock) != 0) {
1267                         /* Exponentially back off if there are multiple CPUs. */
1268                         if (ncpus > 1) {
1269                                 unsigned i;
1270                                 volatile unsigned j;
1271
1272                                 for (i = 1; i <= SPIN_LIMIT_2POW; i++) {
1273                                         for (j = 0; j < (1U << i); j++) {
1274                                                 ret++;
1275                                                 CPU_SPINWAIT;
1276                                         }
1277
1278                                         if (_pthread_mutex_trylock(lock) == 0)
1279                                                 return (ret);
1280                                 }
1281                         }
1282
1283                         /*
1284                          * Spinning failed.  Block until the lock becomes
1285                          * available, in order to avoid indefinite priority
1286                          * inversion.
1287                          */
1288                         _pthread_mutex_lock(lock);
1289                         assert((ret << BLOCK_COST_2POW) != 0 || ncpus == 1);
1290                         return (ret << BLOCK_COST_2POW);
1291                 }
1292         }
1293
1294         return (ret);
1295 }
1296
1297 static inline void
1298 malloc_spin_unlock(pthread_mutex_t *lock)
1299 {
1300
1301         if (__isthreaded)
1302                 _pthread_mutex_unlock(lock);
1303 }
1304
1305 /*
1306  * End spin lock.
1307  */
1308 /******************************************************************************/
1309 /*
1310  * Begin Utility functions/macros.
1311  */
1312
1313 /* Return the chunk address for allocation address a. */
1314 #define CHUNK_ADDR2BASE(a)                                              \
1315         ((void *)((uintptr_t)(a) & ~chunksize_mask))
1316
1317 /* Return the chunk offset of address a. */
1318 #define CHUNK_ADDR2OFFSET(a)                                            \
1319         ((size_t)((uintptr_t)(a) & chunksize_mask))
1320
1321 /* Return the smallest chunk multiple that is >= s. */
1322 #define CHUNK_CEILING(s)                                                \
1323         (((s) + chunksize_mask) & ~chunksize_mask)
1324
1325 /* Return the smallest quantum multiple that is >= a. */
1326 #define QUANTUM_CEILING(a)                                              \
1327         (((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
1328
1329 /* Return the smallest cacheline multiple that is >= s. */
1330 #define CACHELINE_CEILING(s)                                            \
1331         (((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
1332
1333 /* Return the smallest subpage multiple that is >= s. */
1334 #define SUBPAGE_CEILING(s)                                              \
1335         (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK)
1336
1337 /* Return the smallest PAGE_SIZE multiple that is >= s. */
1338 #define PAGE_CEILING(s)                                                 \
1339         (((s) + PAGE_MASK) & ~PAGE_MASK)
1340
1341 #ifdef MALLOC_TINY
1342 /* Compute the smallest power of 2 that is >= x. */
1343 static inline size_t
1344 pow2_ceil(size_t x)
1345 {
1346
1347         x--;
1348         x |= x >> 1;
1349         x |= x >> 2;
1350         x |= x >> 4;
1351         x |= x >> 8;
1352         x |= x >> 16;
1353 #if (SIZEOF_PTR == 8)
1354         x |= x >> 32;
1355 #endif
1356         x++;
1357         return (x);
1358 }
1359 #endif
1360
1361 #ifdef MALLOC_BALANCE
1362 /*
1363  * Use a simple linear congruential pseudo-random number generator:
1364  *
1365  *   prn(y) = (a*x + c) % m
1366  *
1367  * where the following constants ensure maximal period:
1368  *
1369  *   a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4.
1370  *   c == Odd number (relatively prime to 2^n).
1371  *   m == 2^32
1372  *
1373  * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
1374  *
1375  * This choice of m has the disadvantage that the quality of the bits is
1376  * proportional to bit position.  For example. the lowest bit has a cycle of 2,
1377  * the next has a cycle of 4, etc.  For this reason, we prefer to use the upper
1378  * bits.
1379  */
1380 #  define PRN_DEFINE(suffix, var, a, c)                                 \
1381 static inline void                                                      \
1382 sprn_##suffix(uint32_t seed)                                            \
1383 {                                                                       \
1384         var = seed;                                                     \
1385 }                                                                       \
1386                                                                         \
1387 static inline uint32_t                                                  \
1388 prn_##suffix(uint32_t lg_range)                                         \
1389 {                                                                       \
1390         uint32_t ret, x;                                                \
1391                                                                         \
1392         assert(lg_range > 0);                                           \
1393         assert(lg_range <= 32);                                         \
1394                                                                         \
1395         x = (var * (a)) + (c);                                          \
1396         var = x;                                                        \
1397         ret = x >> (32 - lg_range);                                     \
1398                                                                         \
1399         return (ret);                                                   \
1400 }
1401 #  define SPRN(suffix, seed)    sprn_##suffix(seed)
1402 #  define PRN(suffix, lg_range) prn_##suffix(lg_range)
1403 #endif
1404
1405 #ifdef MALLOC_BALANCE
1406 /* Define the PRNG used for arena assignment. */
1407 static __thread uint32_t balance_x;
1408 PRN_DEFINE(balance, balance_x, 1297, 1301)
1409 #endif
1410
1411 static void
1412 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
1413 {
1414
1415         _write(STDERR_FILENO, p1, strlen(p1));
1416         _write(STDERR_FILENO, p2, strlen(p2));
1417         _write(STDERR_FILENO, p3, strlen(p3));
1418         _write(STDERR_FILENO, p4, strlen(p4));
1419 }
1420
1421 void    (*_malloc_message)(const char *p1, const char *p2, const char *p3,
1422             const char *p4) = wrtmessage;
1423
1424 #ifdef MALLOC_STATS
1425 /*
1426  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
1427  */
1428 static void
1429 malloc_printf(const char *format, ...)
1430 {
1431         char buf[4096];
1432         va_list ap;
1433
1434         va_start(ap, format);
1435         vsnprintf(buf, sizeof(buf), format, ap);
1436         va_end(ap);
1437         _malloc_message(buf, "", "", "");
1438 }
1439 #endif
1440
1441 /*
1442  * We don't want to depend on vsnprintf() for production builds, since that can
1443  * cause unnecessary bloat for static binaries.  umax2s() provides minimal
1444  * integer printing functionality, so that malloc_printf() use can be limited to
1445  * MALLOC_STATS code.
1446  */
1447 #define UMAX2S_BUFSIZE  21
1448 static char *
1449 umax2s(uintmax_t x, char *s)
1450 {
1451         unsigned i;
1452
1453         /* Make sure UMAX2S_BUFSIZE is large enough. */
1454         assert(sizeof(uintmax_t) <= 8);
1455
1456         i = UMAX2S_BUFSIZE - 1;
1457         s[i] = '\0';
1458         do {
1459                 i--;
1460                 s[i] = "0123456789"[x % 10];
1461                 x /= 10;
1462         } while (x > 0);
1463
1464         return (&s[i]);
1465 }
1466
1467 /******************************************************************************/
1468
1469 #ifdef MALLOC_DSS
1470 static bool
1471 base_pages_alloc_dss(size_t minsize)
1472 {
1473
1474         /*
1475          * Do special DSS allocation here, since base allocations don't need to
1476          * be chunk-aligned.
1477          */
1478         malloc_mutex_lock(&dss_mtx);
1479         if (dss_prev != (void *)-1) {
1480                 intptr_t incr;
1481                 size_t csize = CHUNK_CEILING(minsize);
1482
1483                 do {
1484                         /* Get the current end of the DSS. */
1485                         dss_max = sbrk(0);
1486
1487                         /*
1488                          * Calculate how much padding is necessary to
1489                          * chunk-align the end of the DSS.  Don't worry about
1490                          * dss_max not being chunk-aligned though.
1491                          */
1492                         incr = (intptr_t)chunksize
1493                             - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
1494                         assert(incr >= 0);
1495                         if ((size_t)incr < minsize)
1496                                 incr += csize;
1497
1498                         dss_prev = sbrk(incr);
1499                         if (dss_prev == dss_max) {
1500                                 /* Success. */
1501                                 dss_max = (void *)((intptr_t)dss_prev + incr);
1502                                 base_pages = dss_prev;
1503                                 base_next_addr = base_pages;
1504                                 base_past_addr = dss_max;
1505 #ifdef MALLOC_STATS
1506                                 base_mapped += incr;
1507 #endif
1508                                 malloc_mutex_unlock(&dss_mtx);
1509                                 return (false);
1510                         }
1511                 } while (dss_prev != (void *)-1);
1512         }
1513         malloc_mutex_unlock(&dss_mtx);
1514
1515         return (true);
1516 }
1517 #endif
1518
1519 static bool
1520 base_pages_alloc_mmap(size_t minsize)
1521 {
1522         size_t csize;
1523
1524         assert(minsize != 0);
1525         csize = PAGE_CEILING(minsize);
1526         base_pages = pages_map(NULL, csize);
1527         if (base_pages == NULL)
1528                 return (true);
1529         base_next_addr = base_pages;
1530         base_past_addr = (void *)((uintptr_t)base_pages + csize);
1531 #ifdef MALLOC_STATS
1532         base_mapped += csize;
1533 #endif
1534
1535         return (false);
1536 }
1537
1538 static bool
1539 base_pages_alloc(size_t minsize)
1540 {
1541
1542 #ifdef MALLOC_DSS
1543         if (opt_mmap && minsize != 0)
1544 #endif
1545         {
1546                 if (base_pages_alloc_mmap(minsize) == false)
1547                         return (false);
1548         }
1549
1550 #ifdef MALLOC_DSS
1551         if (opt_dss) {
1552                 if (base_pages_alloc_dss(minsize) == false)
1553                         return (false);
1554         }
1555
1556 #endif
1557
1558         return (true);
1559 }
1560
1561 static void *
1562 base_alloc(size_t size)
1563 {
1564         void *ret;
1565         size_t csize;
1566
1567         /* Round size up to nearest multiple of the cacheline size. */
1568         csize = CACHELINE_CEILING(size);
1569
1570         malloc_mutex_lock(&base_mtx);
1571         /* Make sure there's enough space for the allocation. */
1572         if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1573                 if (base_pages_alloc(csize)) {
1574                         malloc_mutex_unlock(&base_mtx);
1575                         return (NULL);
1576                 }
1577         }
1578         /* Allocate. */
1579         ret = base_next_addr;
1580         base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1581         malloc_mutex_unlock(&base_mtx);
1582
1583         return (ret);
1584 }
1585
1586 static void *
1587 base_calloc(size_t number, size_t size)
1588 {
1589         void *ret;
1590
1591         ret = base_alloc(number * size);
1592         memset(ret, 0, number * size);
1593
1594         return (ret);
1595 }
1596
1597 static extent_node_t *
1598 base_node_alloc(void)
1599 {
1600         extent_node_t *ret;
1601
1602         malloc_mutex_lock(&base_mtx);
1603         if (base_nodes != NULL) {
1604                 ret = base_nodes;
1605                 base_nodes = *(extent_node_t **)ret;
1606                 malloc_mutex_unlock(&base_mtx);
1607         } else {
1608                 malloc_mutex_unlock(&base_mtx);
1609                 ret = (extent_node_t *)base_alloc(sizeof(extent_node_t));
1610         }
1611
1612         return (ret);
1613 }
1614
1615 static void
1616 base_node_dealloc(extent_node_t *node)
1617 {
1618
1619         malloc_mutex_lock(&base_mtx);
1620         *(extent_node_t **)node = base_nodes;
1621         base_nodes = node;
1622         malloc_mutex_unlock(&base_mtx);
1623 }
1624
1625 /******************************************************************************/
1626
1627 #ifdef MALLOC_STATS
1628 static void
1629 stats_print(arena_t *arena)
1630 {
1631         unsigned i, gap_start;
1632
1633         malloc_printf("dirty: %zu page%s dirty, %llu sweep%s,"
1634             " %llu madvise%s, %llu page%s purged\n",
1635             arena->ndirty, arena->ndirty == 1 ? "" : "s",
1636             arena->stats.npurge, arena->stats.npurge == 1 ? "" : "s",
1637             arena->stats.nmadvise, arena->stats.nmadvise == 1 ? "" : "s",
1638             arena->stats.purged, arena->stats.purged == 1 ? "" : "s");
1639
1640         malloc_printf("            allocated      nmalloc      ndalloc\n");
1641         malloc_printf("small:   %12zu %12llu %12llu\n",
1642             arena->stats.allocated_small, arena->stats.nmalloc_small,
1643             arena->stats.ndalloc_small);
1644         malloc_printf("large:   %12zu %12llu %12llu\n",
1645             arena->stats.allocated_large, arena->stats.nmalloc_large,
1646             arena->stats.ndalloc_large);
1647         malloc_printf("total:   %12zu %12llu %12llu\n",
1648             arena->stats.allocated_small + arena->stats.allocated_large,
1649             arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1650             arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1651         malloc_printf("mapped:  %12zu\n", arena->stats.mapped);
1652
1653 #ifdef MALLOC_MAG
1654         if (__isthreaded && opt_mag) {
1655                 malloc_printf("bins:     bin   size regs pgs      mags   "
1656                     "newruns    reruns maxruns curruns\n");
1657         } else {
1658 #endif
1659                 malloc_printf("bins:     bin   size regs pgs  requests   "
1660                     "newruns    reruns maxruns curruns\n");
1661 #ifdef MALLOC_MAG
1662         }
1663 #endif
1664         for (i = 0, gap_start = UINT_MAX; i < nbins; i++) {
1665                 if (arena->bins[i].stats.nruns == 0) {
1666                         if (gap_start == UINT_MAX)
1667                                 gap_start = i;
1668                 } else {
1669                         if (gap_start != UINT_MAX) {
1670                                 if (i > gap_start + 1) {
1671                                         /* Gap of more than one size class. */
1672                                         malloc_printf("[%u..%u]\n",
1673                                             gap_start, i - 1);
1674                                 } else {
1675                                         /* Gap of one size class. */
1676                                         malloc_printf("[%u]\n", gap_start);
1677                                 }
1678                                 gap_start = UINT_MAX;
1679                         }
1680                         malloc_printf(
1681                             "%13u %1s %4u %4u %3u %9llu %9llu"
1682                             " %9llu %7lu %7lu\n",
1683                             i,
1684                             i < ntbins ? "T" : i < ntbins + nqbins ? "Q" :
1685                             i < ntbins + nqbins + ncbins ? "C" : "S",
1686                             arena->bins[i].reg_size,
1687                             arena->bins[i].nregs,
1688                             arena->bins[i].run_size >> PAGE_SHIFT,
1689 #ifdef MALLOC_MAG
1690                             (__isthreaded && opt_mag) ?
1691                             arena->bins[i].stats.nmags :
1692 #endif
1693                             arena->bins[i].stats.nrequests,
1694                             arena->bins[i].stats.nruns,
1695                             arena->bins[i].stats.reruns,
1696                             arena->bins[i].stats.highruns,
1697                             arena->bins[i].stats.curruns);
1698                 }
1699         }
1700         if (gap_start != UINT_MAX) {
1701                 if (i > gap_start + 1) {
1702                         /* Gap of more than one size class. */
1703                         malloc_printf("[%u..%u]\n", gap_start, i - 1);
1704                 } else {
1705                         /* Gap of one size class. */
1706                         malloc_printf("[%u]\n", gap_start);
1707                 }
1708         }
1709 }
1710 #endif
1711
1712 /*
1713  * End Utility functions/macros.
1714  */
1715 /******************************************************************************/
1716 /*
1717  * Begin extent tree code.
1718  */
1719
1720 #ifdef MALLOC_DSS
1721 static inline int
1722 extent_szad_comp(extent_node_t *a, extent_node_t *b)
1723 {
1724         int ret;
1725         size_t a_size = a->size;
1726         size_t b_size = b->size;
1727
1728         ret = (a_size > b_size) - (a_size < b_size);
1729         if (ret == 0) {
1730                 uintptr_t a_addr = (uintptr_t)a->addr;
1731                 uintptr_t b_addr = (uintptr_t)b->addr;
1732
1733                 ret = (a_addr > b_addr) - (a_addr < b_addr);
1734         }
1735
1736         return (ret);
1737 }
1738
1739 /* Wrap red-black tree macros in functions. */
1740 rb_wrap(__unused static, extent_tree_szad_, extent_tree_t, extent_node_t,
1741     link_szad, extent_szad_comp)
1742 #endif
1743
1744 static inline int
1745 extent_ad_comp(extent_node_t *a, extent_node_t *b)
1746 {
1747         uintptr_t a_addr = (uintptr_t)a->addr;
1748         uintptr_t b_addr = (uintptr_t)b->addr;
1749
1750         return ((a_addr > b_addr) - (a_addr < b_addr));
1751 }
1752
1753 /* Wrap red-black tree macros in functions. */
1754 rb_wrap(__unused static, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad,
1755     extent_ad_comp)
1756
1757 /*
1758  * End extent tree code.
1759  */
1760 /******************************************************************************/
1761 /*
1762  * Begin chunk management functions.
1763  */
1764
1765 static void *
1766 pages_map(void *addr, size_t size)
1767 {
1768         void *ret;
1769
1770         /*
1771          * We don't use MAP_FIXED here, because it can cause the *replacement*
1772          * of existing mappings, and we only want to create new mappings.
1773          */
1774         ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
1775             -1, 0);
1776         assert(ret != NULL);
1777
1778         if (ret == MAP_FAILED)
1779                 ret = NULL;
1780         else if (addr != NULL && ret != addr) {
1781                 /*
1782                  * We succeeded in mapping memory, but not in the right place.
1783                  */
1784                 if (munmap(ret, size) == -1) {
1785                         char buf[STRERROR_BUF];
1786
1787                         strerror_r(errno, buf, sizeof(buf));
1788                         _malloc_message(_getprogname(),
1789                             ": (malloc) Error in munmap(): ", buf, "\n");
1790                         if (opt_abort)
1791                                 abort();
1792                 }
1793                 ret = NULL;
1794         }
1795
1796         assert(ret == NULL || (addr == NULL && ret != addr)
1797             || (addr != NULL && ret == addr));
1798         return (ret);
1799 }
1800
1801 static void
1802 pages_unmap(void *addr, size_t size)
1803 {
1804
1805         if (munmap(addr, size) == -1) {
1806                 char buf[STRERROR_BUF];
1807
1808                 strerror_r(errno, buf, sizeof(buf));
1809                 _malloc_message(_getprogname(),
1810                     ": (malloc) Error in munmap(): ", buf, "\n");
1811                 if (opt_abort)
1812                         abort();
1813         }
1814 }
1815
1816 #ifdef MALLOC_DSS
1817 static void *
1818 chunk_alloc_dss(size_t size)
1819 {
1820
1821         /*
1822          * sbrk() uses a signed increment argument, so take care not to
1823          * interpret a huge allocation request as a negative increment.
1824          */
1825         if ((intptr_t)size < 0)
1826                 return (NULL);
1827
1828         malloc_mutex_lock(&dss_mtx);
1829         if (dss_prev != (void *)-1) {
1830                 intptr_t incr;
1831
1832                 /*
1833                  * The loop is necessary to recover from races with other
1834                  * threads that are using the DSS for something other than
1835                  * malloc.
1836                  */
1837                 do {
1838                         void *ret;
1839
1840                         /* Get the current end of the DSS. */
1841                         dss_max = sbrk(0);
1842
1843                         /*
1844                          * Calculate how much padding is necessary to
1845                          * chunk-align the end of the DSS.
1846                          */
1847                         incr = (intptr_t)size
1848                             - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
1849                         if (incr == (intptr_t)size)
1850                                 ret = dss_max;
1851                         else {
1852                                 ret = (void *)((intptr_t)dss_max + incr);
1853                                 incr += size;
1854                         }
1855
1856                         dss_prev = sbrk(incr);
1857                         if (dss_prev == dss_max) {
1858                                 /* Success. */
1859                                 dss_max = (void *)((intptr_t)dss_prev + incr);
1860                                 malloc_mutex_unlock(&dss_mtx);
1861                                 return (ret);
1862                         }
1863                 } while (dss_prev != (void *)-1);
1864         }
1865         malloc_mutex_unlock(&dss_mtx);
1866
1867         return (NULL);
1868 }
1869
1870 static void *
1871 chunk_recycle_dss(size_t size, bool zero)
1872 {
1873         extent_node_t *node, key;
1874
1875         key.addr = NULL;
1876         key.size = size;
1877         malloc_mutex_lock(&dss_mtx);
1878         node = extent_tree_szad_nsearch(&dss_chunks_szad, &key);
1879         if (node != NULL) {
1880                 void *ret = node->addr;
1881
1882                 /* Remove node from the tree. */
1883                 extent_tree_szad_remove(&dss_chunks_szad, node);
1884                 if (node->size == size) {
1885                         extent_tree_ad_remove(&dss_chunks_ad, node);
1886                         base_node_dealloc(node);
1887                 } else {
1888                         /*
1889                          * Insert the remainder of node's address range as a
1890                          * smaller chunk.  Its position within dss_chunks_ad
1891                          * does not change.
1892                          */
1893                         assert(node->size > size);
1894                         node->addr = (void *)((uintptr_t)node->addr + size);
1895                         node->size -= size;
1896                         extent_tree_szad_insert(&dss_chunks_szad, node);
1897                 }
1898                 malloc_mutex_unlock(&dss_mtx);
1899
1900                 if (zero)
1901                         memset(ret, 0, size);
1902                 return (ret);
1903         }
1904         malloc_mutex_unlock(&dss_mtx);
1905
1906         return (NULL);
1907 }
1908 #endif
1909
1910 static void *
1911 chunk_alloc_mmap(size_t size)
1912 {
1913         void *ret;
1914         size_t offset;
1915
1916         /*
1917          * Ideally, there would be a way to specify alignment to mmap() (like
1918          * NetBSD has), but in the absence of such a feature, we have to work
1919          * hard to efficiently create aligned mappings.  The reliable, but
1920          * expensive method is to create a mapping that is over-sized, then
1921          * trim the excess.  However, that always results in at least one call
1922          * to pages_unmap().
1923          *
1924          * A more optimistic approach is to try mapping precisely the right
1925          * amount, then try to append another mapping if alignment is off.  In
1926          * practice, this works out well as long as the application is not
1927          * interleaving mappings via direct mmap() calls.  If we do run into a
1928          * situation where there is an interleaved mapping and we are unable to
1929          * extend an unaligned mapping, our best option is to momentarily
1930          * revert to the reliable-but-expensive method.  This will tend to
1931          * leave a gap in the memory map that is too small to cause later
1932          * problems for the optimistic method.
1933          */
1934
1935         ret = pages_map(NULL, size);
1936         if (ret == NULL)
1937                 return (NULL);
1938
1939         offset = CHUNK_ADDR2OFFSET(ret);
1940         if (offset != 0) {
1941                 /* Try to extend chunk boundary. */
1942                 if (pages_map((void *)((uintptr_t)ret + size),
1943                     chunksize - offset) == NULL) {
1944                         /*
1945                          * Extension failed.  Clean up, then revert to the
1946                          * reliable-but-expensive method.
1947                          */
1948                         pages_unmap(ret, size);
1949
1950                         /* Beware size_t wrap-around. */
1951                         if (size + chunksize <= size)
1952                                 return NULL;
1953
1954                         ret = pages_map(NULL, size + chunksize);
1955                         if (ret == NULL)
1956                                 return (NULL);
1957
1958                         /* Clean up unneeded leading/trailing space. */
1959                         offset = CHUNK_ADDR2OFFSET(ret);
1960                         if (offset != 0) {
1961                                 /* Leading space. */
1962                                 pages_unmap(ret, chunksize - offset);
1963
1964                                 ret = (void *)((uintptr_t)ret +
1965                                     (chunksize - offset));
1966
1967                                 /* Trailing space. */
1968                                 pages_unmap((void *)((uintptr_t)ret + size),
1969                                     offset);
1970                         } else {
1971                                 /* Trailing space only. */
1972                                 pages_unmap((void *)((uintptr_t)ret + size),
1973                                     chunksize);
1974                         }
1975                 } else {
1976                         /* Clean up unneeded leading space. */
1977                         pages_unmap(ret, chunksize - offset);
1978                         ret = (void *)((uintptr_t)ret + (chunksize - offset));
1979                 }
1980         }
1981
1982         return (ret);
1983 }
1984
1985 static void *
1986 chunk_alloc(size_t size, bool zero)
1987 {
1988         void *ret;
1989
1990         assert(size != 0);
1991         assert((size & chunksize_mask) == 0);
1992
1993 #ifdef MALLOC_DSS
1994         if (opt_mmap)
1995 #endif
1996         {
1997                 ret = chunk_alloc_mmap(size);
1998                 if (ret != NULL)
1999                         goto RETURN;
2000         }
2001
2002 #ifdef MALLOC_DSS
2003         if (opt_dss) {
2004                 ret = chunk_recycle_dss(size, zero);
2005                 if (ret != NULL) {
2006                         goto RETURN;
2007                 }
2008
2009                 ret = chunk_alloc_dss(size);
2010                 if (ret != NULL)
2011                         goto RETURN;
2012         }
2013 #endif
2014
2015         /* All strategies for allocation failed. */
2016         ret = NULL;
2017 RETURN:
2018 #ifdef MALLOC_STATS
2019         if (ret != NULL) {
2020                 stats_chunks.nchunks += (size / chunksize);
2021                 stats_chunks.curchunks += (size / chunksize);
2022         }
2023         if (stats_chunks.curchunks > stats_chunks.highchunks)
2024                 stats_chunks.highchunks = stats_chunks.curchunks;
2025 #endif
2026
2027         assert(CHUNK_ADDR2BASE(ret) == ret);
2028         return (ret);
2029 }
2030
2031 #ifdef MALLOC_DSS
2032 static extent_node_t *
2033 chunk_dealloc_dss_record(void *chunk, size_t size)
2034 {
2035         extent_node_t *node, *prev, key;
2036
2037         key.addr = (void *)((uintptr_t)chunk + size);
2038         node = extent_tree_ad_nsearch(&dss_chunks_ad, &key);
2039         /* Try to coalesce forward. */
2040         if (node != NULL && node->addr == key.addr) {
2041                 /*
2042                  * Coalesce chunk with the following address range.  This does
2043                  * not change the position within dss_chunks_ad, so only
2044                  * remove/insert from/into dss_chunks_szad.
2045                  */
2046                 extent_tree_szad_remove(&dss_chunks_szad, node);
2047                 node->addr = chunk;
2048                 node->size += size;
2049                 extent_tree_szad_insert(&dss_chunks_szad, node);
2050         } else {
2051                 /*
2052                  * Coalescing forward failed, so insert a new node.  Drop
2053                  * dss_mtx during node allocation, since it is possible that a
2054                  * new base chunk will be allocated.
2055                  */
2056                 malloc_mutex_unlock(&dss_mtx);
2057                 node = base_node_alloc();
2058                 malloc_mutex_lock(&dss_mtx);
2059                 if (node == NULL)
2060                         return (NULL);
2061                 node->addr = chunk;
2062                 node->size = size;
2063                 extent_tree_ad_insert(&dss_chunks_ad, node);
2064                 extent_tree_szad_insert(&dss_chunks_szad, node);
2065         }
2066
2067         /* Try to coalesce backward. */
2068         prev = extent_tree_ad_prev(&dss_chunks_ad, node);
2069         if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
2070             chunk) {
2071                 /*
2072                  * Coalesce chunk with the previous address range.  This does
2073                  * not change the position within dss_chunks_ad, so only
2074                  * remove/insert node from/into dss_chunks_szad.
2075                  */
2076                 extent_tree_szad_remove(&dss_chunks_szad, prev);
2077                 extent_tree_ad_remove(&dss_chunks_ad, prev);
2078
2079                 extent_tree_szad_remove(&dss_chunks_szad, node);
2080                 node->addr = prev->addr;
2081                 node->size += prev->size;
2082                 extent_tree_szad_insert(&dss_chunks_szad, node);
2083
2084                 base_node_dealloc(prev);
2085         }
2086
2087         return (node);
2088 }
2089
2090 static bool
2091 chunk_dealloc_dss(void *chunk, size_t size)
2092 {
2093
2094         malloc_mutex_lock(&dss_mtx);
2095         if ((uintptr_t)chunk >= (uintptr_t)dss_base
2096             && (uintptr_t)chunk < (uintptr_t)dss_max) {
2097                 extent_node_t *node;
2098
2099                 /* Try to coalesce with other unused chunks. */
2100                 node = chunk_dealloc_dss_record(chunk, size);
2101                 if (node != NULL) {
2102                         chunk = node->addr;
2103                         size = node->size;
2104                 }
2105
2106                 /* Get the current end of the DSS. */
2107                 dss_max = sbrk(0);
2108
2109                 /*
2110                  * Try to shrink the DSS if this chunk is at the end of the
2111                  * DSS.  The sbrk() call here is subject to a race condition
2112                  * with threads that use brk(2) or sbrk(2) directly, but the
2113                  * alternative would be to leak memory for the sake of poorly
2114                  * designed multi-threaded programs.
2115                  */
2116                 if ((void *)((uintptr_t)chunk + size) == dss_max
2117                     && (dss_prev = sbrk(-(intptr_t)size)) == dss_max) {
2118                         /* Success. */
2119                         dss_max = (void *)((intptr_t)dss_prev - (intptr_t)size);
2120
2121                         if (node != NULL) {
2122                                 extent_tree_szad_remove(&dss_chunks_szad, node);
2123                                 extent_tree_ad_remove(&dss_chunks_ad, node);
2124                                 base_node_dealloc(node);
2125                         }
2126                         malloc_mutex_unlock(&dss_mtx);
2127                 } else {
2128                         malloc_mutex_unlock(&dss_mtx);
2129                         madvise(chunk, size, MADV_FREE);
2130                 }
2131
2132                 return (false);
2133         }
2134         malloc_mutex_unlock(&dss_mtx);
2135
2136         return (true);
2137 }
2138 #endif
2139
2140 static void
2141 chunk_dealloc_mmap(void *chunk, size_t size)
2142 {
2143
2144         pages_unmap(chunk, size);
2145 }
2146
2147 static void
2148 chunk_dealloc(void *chunk, size_t size)
2149 {
2150
2151         assert(chunk != NULL);
2152         assert(CHUNK_ADDR2BASE(chunk) == chunk);
2153         assert(size != 0);
2154         assert((size & chunksize_mask) == 0);
2155
2156 #ifdef MALLOC_STATS
2157         stats_chunks.curchunks -= (size / chunksize);
2158 #endif
2159
2160 #ifdef MALLOC_DSS
2161         if (opt_dss) {
2162                 if (chunk_dealloc_dss(chunk, size) == false)
2163                         return;
2164         }
2165
2166         if (opt_mmap)
2167 #endif
2168                 chunk_dealloc_mmap(chunk, size);
2169 }
2170
2171 /*
2172  * End chunk management functions.
2173  */
2174 /******************************************************************************/
2175 /*
2176  * Begin arena.
2177  */
2178
2179 /*
2180  * Choose an arena based on a per-thread value (fast-path code, calls slow-path
2181  * code if necessary).
2182  */
2183 static inline arena_t *
2184 choose_arena(void)
2185 {
2186         arena_t *ret;
2187
2188         /*
2189          * We can only use TLS if this is a PIC library, since for the static
2190          * library version, libc's malloc is used by TLS allocation, which
2191          * introduces a bootstrapping issue.
2192          */
2193 #ifndef NO_TLS
2194         if (__isthreaded == false) {
2195             /* Avoid the overhead of TLS for single-threaded operation. */
2196             return (arenas[0]);
2197         }
2198
2199         ret = arenas_map;
2200         if (ret == NULL) {
2201                 ret = choose_arena_hard();
2202                 assert(ret != NULL);
2203         }
2204 #else
2205         if (__isthreaded && narenas > 1) {
2206                 unsigned long ind;
2207
2208                 /*
2209                  * Hash _pthread_self() to one of the arenas.  There is a prime
2210                  * number of arenas, so this has a reasonable chance of
2211                  * working.  Even so, the hashing can be easily thwarted by
2212                  * inconvenient _pthread_self() values.  Without specific
2213                  * knowledge of how _pthread_self() calculates values, we can't
2214                  * easily do much better than this.
2215                  */
2216                 ind = (unsigned long) _pthread_self() % narenas;
2217
2218                 /*
2219                  * Optimistially assume that arenas[ind] has been initialized.
2220                  * At worst, we find out that some other thread has already
2221                  * done so, after acquiring the lock in preparation.  Note that
2222                  * this lazy locking also has the effect of lazily forcing
2223                  * cache coherency; without the lock acquisition, there's no
2224                  * guarantee that modification of arenas[ind] by another thread
2225                  * would be seen on this CPU for an arbitrary amount of time.
2226                  *
2227                  * In general, this approach to modifying a synchronized value
2228                  * isn't a good idea, but in this case we only ever modify the
2229                  * value once, so things work out well.
2230                  */
2231                 ret = arenas[ind];
2232                 if (ret == NULL) {
2233                         /*
2234                          * Avoid races with another thread that may have already
2235                          * initialized arenas[ind].
2236                          */
2237                         malloc_spin_lock(&arenas_lock);
2238                         if (arenas[ind] == NULL)
2239                                 ret = arenas_extend((unsigned)ind);
2240                         else
2241                                 ret = arenas[ind];
2242                         malloc_spin_unlock(&arenas_lock);
2243                 }
2244         } else
2245                 ret = arenas[0];
2246 #endif
2247
2248         assert(ret != NULL);
2249         return (ret);
2250 }
2251
2252 #ifndef NO_TLS
2253 /*
2254  * Choose an arena based on a per-thread value (slow-path code only, called
2255  * only by choose_arena()).
2256  */
2257 static arena_t *
2258 choose_arena_hard(void)
2259 {
2260         arena_t *ret;
2261
2262         assert(__isthreaded);
2263
2264 #ifdef MALLOC_BALANCE
2265         /* Seed the PRNG used for arena load balancing. */
2266         SPRN(balance, (uint32_t)(uintptr_t)(_pthread_self()));
2267 #endif
2268
2269         if (narenas > 1) {
2270 #ifdef MALLOC_BALANCE
2271                 unsigned ind;
2272
2273                 ind = PRN(balance, narenas_2pow);
2274                 if ((ret = arenas[ind]) == NULL) {
2275                         malloc_spin_lock(&arenas_lock);
2276                         if ((ret = arenas[ind]) == NULL)
2277                                 ret = arenas_extend(ind);
2278                         malloc_spin_unlock(&arenas_lock);
2279                 }
2280 #else
2281                 malloc_spin_lock(&arenas_lock);
2282                 if ((ret = arenas[next_arena]) == NULL)
2283                         ret = arenas_extend(next_arena);
2284                 next_arena = (next_arena + 1) % narenas;
2285                 malloc_spin_unlock(&arenas_lock);
2286 #endif
2287         } else
2288                 ret = arenas[0];
2289
2290         arenas_map = ret;
2291
2292         return (ret);
2293 }
2294 #endif
2295
2296 static inline int
2297 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
2298 {
2299         uintptr_t a_chunk = (uintptr_t)a;
2300         uintptr_t b_chunk = (uintptr_t)b;
2301
2302         assert(a != NULL);
2303         assert(b != NULL);
2304
2305         return ((a_chunk > b_chunk) - (a_chunk < b_chunk));
2306 }
2307
2308 /* Wrap red-black tree macros in functions. */
2309 rb_wrap(__unused static, arena_chunk_tree_dirty_, arena_chunk_tree_t,
2310     arena_chunk_t, link_dirty, arena_chunk_comp)
2311
2312 static inline int
2313 arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
2314 {
2315         uintptr_t a_mapelm = (uintptr_t)a;
2316         uintptr_t b_mapelm = (uintptr_t)b;
2317
2318         assert(a != NULL);
2319         assert(b != NULL);
2320
2321         return ((a_mapelm > b_mapelm) - (a_mapelm < b_mapelm));
2322 }
2323
2324 /* Wrap red-black tree macros in functions. */
2325 rb_wrap(__unused static, arena_run_tree_, arena_run_tree_t, arena_chunk_map_t,
2326     link, arena_run_comp)
2327
2328 static inline int
2329 arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
2330 {
2331         int ret;
2332         size_t a_size = a->bits & ~PAGE_MASK;
2333         size_t b_size = b->bits & ~PAGE_MASK;
2334
2335         ret = (a_size > b_size) - (a_size < b_size);
2336         if (ret == 0) {
2337                 uintptr_t a_mapelm, b_mapelm;
2338
2339                 if ((a->bits & CHUNK_MAP_KEY) == 0)
2340                         a_mapelm = (uintptr_t)a;
2341                 else {
2342                         /*
2343                          * Treat keys as though they are lower than anything
2344                          * else.
2345                          */
2346                         a_mapelm = 0;
2347                 }
2348                 b_mapelm = (uintptr_t)b;
2349
2350                 ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm);
2351         }
2352
2353         return (ret);
2354 }
2355
2356 /* Wrap red-black tree macros in functions. */
2357 rb_wrap(__unused static, arena_avail_tree_, arena_avail_tree_t,
2358     arena_chunk_map_t, link, arena_avail_comp)
2359
2360 static inline void *
2361 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
2362 {
2363         void *ret;
2364         unsigned i, mask, bit, regind;
2365
2366         assert(run->magic == ARENA_RUN_MAGIC);
2367         assert(run->regs_minelm < bin->regs_mask_nelms);
2368
2369         /*
2370          * Move the first check outside the loop, so that run->regs_minelm can
2371          * be updated unconditionally, without the possibility of updating it
2372          * multiple times.
2373          */
2374         i = run->regs_minelm;
2375         mask = run->regs_mask[i];
2376         if (mask != 0) {
2377                 /* Usable allocation found. */
2378                 bit = ffs((int)mask) - 1;
2379
2380                 regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
2381                 assert(regind < bin->nregs);
2382                 ret = (void *)(((uintptr_t)run) + bin->reg0_offset
2383                     + (bin->reg_size * regind));
2384
2385                 /* Clear bit. */
2386                 mask ^= (1U << bit);
2387                 run->regs_mask[i] = mask;
2388
2389                 return (ret);
2390         }
2391
2392         for (i++; i < bin->regs_mask_nelms; i++) {
2393                 mask = run->regs_mask[i];
2394                 if (mask != 0) {
2395                         /* Usable allocation found. */
2396                         bit = ffs((int)mask) - 1;
2397
2398                         regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
2399                         assert(regind < bin->nregs);
2400                         ret = (void *)(((uintptr_t)run) + bin->reg0_offset
2401                             + (bin->reg_size * regind));
2402
2403                         /* Clear bit. */
2404                         mask ^= (1U << bit);
2405                         run->regs_mask[i] = mask;
2406
2407                         /*
2408                          * Make a note that nothing before this element
2409                          * contains a free region.
2410                          */
2411                         run->regs_minelm = i; /* Low payoff: + (mask == 0); */
2412
2413                         return (ret);
2414                 }
2415         }
2416         /* Not reached. */
2417         assert(0);
2418         return (NULL);
2419 }
2420
2421 static inline void
2422 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
2423 {
2424         unsigned diff, regind, elm, bit;
2425
2426         assert(run->magic == ARENA_RUN_MAGIC);
2427
2428         /*
2429          * Avoid doing division with a variable divisor if possible.  Using
2430          * actual division here can reduce allocator throughput by over 20%!
2431          */
2432         diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
2433         if ((size & (size - 1)) == 0) {
2434                 /*
2435                  * log2_table allows fast division of a power of two in the
2436                  * [1..128] range.
2437                  *
2438                  * (x / divisor) becomes (x >> log2_table[divisor - 1]).
2439                  */
2440                 static const unsigned char log2_table[] = {
2441                     0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
2442                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
2443                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2444                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
2445                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2446                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2447                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2448                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
2449                 };
2450
2451                 if (size <= 128)
2452                         regind = (diff >> log2_table[size - 1]);
2453                 else if (size <= 32768)
2454                         regind = diff >> (8 + log2_table[(size >> 8) - 1]);
2455                 else
2456                         regind = diff / size;
2457         } else if (size < qspace_max) {
2458                 /*
2459                  * To divide by a number D that is not a power of two we
2460                  * multiply by (2^21 / D) and then right shift by 21 positions.
2461                  *
2462                  *   X / D
2463                  *
2464                  * becomes
2465                  *
2466                  *   (X * qsize_invs[(D >> QUANTUM_2POW) - 3])
2467                  *       >> SIZE_INV_SHIFT
2468                  *
2469                  * We can omit the first three elements, because we never
2470                  * divide by 0, and QUANTUM and 2*QUANTUM are both powers of
2471                  * two, which are handled above.
2472                  */
2473 #define SIZE_INV_SHIFT 21
2474 #define QSIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << QUANTUM_2POW)) + 1)
2475                 static const unsigned qsize_invs[] = {
2476                     QSIZE_INV(3),
2477                     QSIZE_INV(4), QSIZE_INV(5), QSIZE_INV(6), QSIZE_INV(7)
2478 #if (QUANTUM_2POW < 4)
2479                     ,
2480                     QSIZE_INV(8), QSIZE_INV(9), QSIZE_INV(10), QSIZE_INV(11),
2481                     QSIZE_INV(12),QSIZE_INV(13), QSIZE_INV(14), QSIZE_INV(15)
2482 #endif
2483                 };
2484                 assert(QUANTUM * (((sizeof(qsize_invs)) / sizeof(unsigned)) + 3)
2485                     >= (1U << QSPACE_MAX_2POW_DEFAULT));
2486
2487                 if (size <= (((sizeof(qsize_invs) / sizeof(unsigned)) + 2) <<
2488                     QUANTUM_2POW)) {
2489                         regind = qsize_invs[(size >> QUANTUM_2POW) - 3] * diff;
2490                         regind >>= SIZE_INV_SHIFT;
2491                 } else
2492                         regind = diff / size;
2493 #undef QSIZE_INV
2494         } else if (size < cspace_max) {
2495 #define CSIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << CACHELINE_2POW)) + 1)
2496                 static const unsigned csize_invs[] = {
2497                     CSIZE_INV(3),
2498                     CSIZE_INV(4), CSIZE_INV(5), CSIZE_INV(6), CSIZE_INV(7)
2499                 };
2500                 assert(CACHELINE * (((sizeof(csize_invs)) / sizeof(unsigned)) +
2501                     3) >= (1U << CSPACE_MAX_2POW_DEFAULT));
2502
2503                 if (size <= (((sizeof(csize_invs) / sizeof(unsigned)) + 2) <<
2504                     CACHELINE_2POW)) {
2505                         regind = csize_invs[(size >> CACHELINE_2POW) - 3] *
2506                             diff;
2507                         regind >>= SIZE_INV_SHIFT;
2508                 } else
2509                         regind = diff / size;
2510 #undef CSIZE_INV
2511         } else {
2512 #define SSIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << SUBPAGE_2POW)) + 1)
2513                 static const unsigned ssize_invs[] = {
2514                     SSIZE_INV(3),
2515                     SSIZE_INV(4), SSIZE_INV(5), SSIZE_INV(6), SSIZE_INV(7),
2516                     SSIZE_INV(8), SSIZE_INV(9), SSIZE_INV(10), SSIZE_INV(11),
2517                     SSIZE_INV(12), SSIZE_INV(13), SSIZE_INV(14), SSIZE_INV(15)
2518 #if (PAGE_SHIFT == 13)
2519                     ,
2520                     SSIZE_INV(16), SSIZE_INV(17), SSIZE_INV(18), SSIZE_INV(19),
2521                     SSIZE_INV(20), SSIZE_INV(21), SSIZE_INV(22), SSIZE_INV(23),
2522                     SSIZE_INV(24), SSIZE_INV(25), SSIZE_INV(26), SSIZE_INV(27),
2523                     SSIZE_INV(28), SSIZE_INV(29), SSIZE_INV(29), SSIZE_INV(30)
2524 #endif
2525                 };
2526                 assert(SUBPAGE * (((sizeof(ssize_invs)) / sizeof(unsigned)) + 3)
2527                     >= PAGE_SIZE);
2528
2529                 if (size < (((sizeof(ssize_invs) / sizeof(unsigned)) + 2) <<
2530                     SUBPAGE_2POW)) {
2531                         regind = ssize_invs[(size >> SUBPAGE_2POW) - 3] * diff;
2532                         regind >>= SIZE_INV_SHIFT;
2533                 } else
2534                         regind = diff / size;
2535 #undef SSIZE_INV
2536         }
2537 #undef SIZE_INV_SHIFT
2538         assert(diff == regind * size);
2539         assert(regind < bin->nregs);
2540
2541         elm = regind >> (SIZEOF_INT_2POW + 3);
2542         if (elm < run->regs_minelm)
2543                 run->regs_minelm = elm;
2544         bit = regind - (elm << (SIZEOF_INT_2POW + 3));
2545         assert((run->regs_mask[elm] & (1U << bit)) == 0);
2546         run->regs_mask[elm] |= (1U << bit);
2547 }
2548
2549 static void
2550 arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
2551     bool zero)
2552 {
2553         arena_chunk_t *chunk;
2554         size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i;
2555
2556         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2557         old_ndirty = chunk->ndirty;
2558         run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
2559             >> PAGE_SHIFT);
2560         total_pages = (chunk->map[run_ind].bits & ~PAGE_MASK) >>
2561             PAGE_SHIFT;
2562         need_pages = (size >> PAGE_SHIFT);
2563         assert(need_pages > 0);
2564         assert(need_pages <= total_pages);
2565         rem_pages = total_pages - need_pages;
2566
2567         arena_avail_tree_remove(&arena->runs_avail, &chunk->map[run_ind]);
2568
2569         /* Keep track of trailing unused pages for later use. */
2570         if (rem_pages > 0) {
2571                 chunk->map[run_ind+need_pages].bits = (rem_pages <<
2572                     PAGE_SHIFT) | (chunk->map[run_ind+need_pages].bits &
2573                     PAGE_MASK);
2574                 chunk->map[run_ind+total_pages-1].bits = (rem_pages <<
2575                     PAGE_SHIFT) | (chunk->map[run_ind+total_pages-1].bits &
2576                     PAGE_MASK);
2577                 arena_avail_tree_insert(&arena->runs_avail,
2578                     &chunk->map[run_ind+need_pages]);
2579         }
2580
2581         for (i = 0; i < need_pages; i++) {
2582                 /* Zero if necessary. */
2583                 if (zero) {
2584                         if ((chunk->map[run_ind + i].bits & CHUNK_MAP_ZEROED)
2585                             == 0) {
2586                                 memset((void *)((uintptr_t)chunk + ((run_ind
2587                                     + i) << PAGE_SHIFT)), 0, PAGE_SIZE);
2588                                 /* CHUNK_MAP_ZEROED is cleared below. */
2589                         }
2590                 }
2591
2592                 /* Update dirty page accounting. */
2593                 if (chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY) {
2594                         chunk->ndirty--;
2595                         arena->ndirty--;
2596                         /* CHUNK_MAP_DIRTY is cleared below. */
2597                 }
2598
2599                 /* Initialize the chunk map. */
2600                 if (large) {
2601                         chunk->map[run_ind + i].bits = CHUNK_MAP_LARGE
2602                             | CHUNK_MAP_ALLOCATED;
2603                 } else {
2604                         chunk->map[run_ind + i].bits = (size_t)run
2605                             | CHUNK_MAP_ALLOCATED;
2606                 }
2607         }
2608
2609         /*
2610          * Set the run size only in the first element for large runs.  This is
2611          * primarily a debugging aid, since the lack of size info for trailing
2612          * pages only matters if the application tries to operate on an
2613          * interior pointer.
2614          */
2615         if (large)
2616                 chunk->map[run_ind].bits |= size;
2617
2618         if (chunk->ndirty == 0 && old_ndirty > 0)
2619                 arena_chunk_tree_dirty_remove(&arena->chunks_dirty, chunk);
2620 }
2621
2622 static arena_chunk_t *
2623 arena_chunk_alloc(arena_t *arena)
2624 {
2625         arena_chunk_t *chunk;
2626         size_t i;
2627
2628         if (arena->spare != NULL) {
2629                 chunk = arena->spare;
2630                 arena->spare = NULL;
2631         } else {
2632                 chunk = (arena_chunk_t *)chunk_alloc(chunksize, true);
2633                 if (chunk == NULL)
2634                         return (NULL);
2635 #ifdef MALLOC_STATS
2636                 arena->stats.mapped += chunksize;
2637 #endif
2638
2639                 chunk->arena = arena;
2640
2641                 /*
2642                  * Claim that no pages are in use, since the header is merely
2643                  * overhead.
2644                  */
2645                 chunk->ndirty = 0;
2646
2647                 /*
2648                  * Initialize the map to contain one maximal free untouched run.
2649                  */
2650                 for (i = 0; i < arena_chunk_header_npages; i++)
2651                         chunk->map[i].bits = 0;
2652                 chunk->map[i].bits = arena_maxclass | CHUNK_MAP_ZEROED;
2653                 for (i++; i < chunk_npages-1; i++) {
2654                         chunk->map[i].bits = CHUNK_MAP_ZEROED;
2655                 }
2656                 chunk->map[chunk_npages-1].bits = arena_maxclass |
2657                     CHUNK_MAP_ZEROED;
2658         }
2659
2660         /* Insert the run into the runs_avail tree. */
2661         arena_avail_tree_insert(&arena->runs_avail,
2662             &chunk->map[arena_chunk_header_npages]);
2663
2664         return (chunk);
2665 }
2666
2667 static void
2668 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
2669 {
2670
2671         if (arena->spare != NULL) {
2672                 if (arena->spare->ndirty > 0) {
2673                         arena_chunk_tree_dirty_remove(
2674                             &chunk->arena->chunks_dirty, arena->spare);
2675                         arena->ndirty -= arena->spare->ndirty;
2676                 }
2677                 chunk_dealloc((void *)arena->spare, chunksize);
2678 #ifdef MALLOC_STATS
2679                 arena->stats.mapped -= chunksize;
2680 #endif
2681         }
2682
2683         /*
2684          * Remove run from runs_avail, regardless of whether this chunk
2685          * will be cached, so that the arena does not use it.  Dirty page
2686          * flushing only uses the chunks_dirty tree, so leaving this chunk in
2687          * the chunks_* trees is sufficient for that purpose.
2688          */
2689         arena_avail_tree_remove(&arena->runs_avail,
2690             &chunk->map[arena_chunk_header_npages]);
2691
2692         arena->spare = chunk;
2693 }
2694
2695 static arena_run_t *
2696 arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
2697 {
2698         arena_chunk_t *chunk;
2699         arena_run_t *run;
2700         arena_chunk_map_t *mapelm, key;
2701
2702         assert(size <= arena_maxclass);
2703         assert((size & PAGE_MASK) == 0);
2704
2705         /* Search the arena's chunks for the lowest best fit. */
2706         key.bits = size | CHUNK_MAP_KEY;
2707         mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key);
2708         if (mapelm != NULL) {
2709                 arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
2710                 size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map)
2711                     / sizeof(arena_chunk_map_t);
2712
2713                 run = (arena_run_t *)((uintptr_t)run_chunk + (pageind
2714                     << PAGE_SHIFT));
2715                 arena_run_split(arena, run, size, large, zero);
2716                 return (run);
2717         }
2718
2719         /*
2720          * No usable runs.  Create a new chunk from which to allocate the run.
2721          */
2722         chunk = arena_chunk_alloc(arena);
2723         if (chunk == NULL)
2724                 return (NULL);
2725         run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
2726             PAGE_SHIFT));
2727         /* Update page map. */
2728         arena_run_split(arena, run, size, large, zero);
2729         return (run);
2730 }
2731
2732 static void
2733 arena_purge(arena_t *arena)
2734 {
2735         arena_chunk_t *chunk;
2736         size_t i, npages;
2737 #ifdef MALLOC_DEBUG
2738         size_t ndirty = 0;
2739
2740         rb_foreach_begin(arena_chunk_t, link_dirty, &arena->chunks_dirty,
2741             chunk) {
2742                 ndirty += chunk->ndirty;
2743         } rb_foreach_end(arena_chunk_t, link_dirty, &arena->chunks_dirty, chunk)
2744         assert(ndirty == arena->ndirty);
2745 #endif
2746         assert(arena->ndirty > opt_dirty_max);
2747
2748 #ifdef MALLOC_STATS
2749         arena->stats.npurge++;
2750 #endif
2751
2752         /*
2753          * Iterate downward through chunks until enough dirty memory has been
2754          * purged.  Terminate as soon as possible in order to minimize the
2755          * number of system calls, even if a chunk has only been partially
2756          * purged.
2757          */
2758         while (arena->ndirty > (opt_dirty_max >> 1)) {
2759                 chunk = arena_chunk_tree_dirty_last(&arena->chunks_dirty);
2760                 assert(chunk != NULL);
2761
2762                 for (i = chunk_npages - 1; chunk->ndirty > 0; i--) {
2763                         assert(i >= arena_chunk_header_npages);
2764
2765                         if (chunk->map[i].bits & CHUNK_MAP_DIRTY) {
2766                                 chunk->map[i].bits ^= CHUNK_MAP_DIRTY;
2767                                 /* Find adjacent dirty run(s). */
2768                                 for (npages = 1; i > arena_chunk_header_npages
2769                                     && (chunk->map[i - 1].bits &
2770                                     CHUNK_MAP_DIRTY); npages++) {
2771                                         i--;
2772                                         chunk->map[i].bits ^= CHUNK_MAP_DIRTY;
2773                                 }
2774                                 chunk->ndirty -= npages;
2775                                 arena->ndirty -= npages;
2776
2777                                 madvise((void *)((uintptr_t)chunk + (i <<
2778                                     PAGE_SHIFT)), (npages << PAGE_SHIFT),
2779                                     MADV_FREE);
2780 #ifdef MALLOC_STATS
2781                                 arena->stats.nmadvise++;
2782                                 arena->stats.purged += npages;
2783 #endif
2784                                 if (arena->ndirty <= (opt_dirty_max >> 1))
2785                                         break;
2786                         }
2787                 }
2788
2789                 if (chunk->ndirty == 0) {
2790                         arena_chunk_tree_dirty_remove(&arena->chunks_dirty,
2791                             chunk);
2792                 }
2793         }
2794 }
2795
2796 static void
2797 arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
2798 {
2799         arena_chunk_t *chunk;
2800         size_t size, run_ind, run_pages;
2801
2802         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2803         run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk)
2804             >> PAGE_SHIFT);
2805         assert(run_ind >= arena_chunk_header_npages);
2806         assert(run_ind < chunk_npages);
2807         if ((chunk->map[run_ind].bits & CHUNK_MAP_LARGE) != 0)
2808                 size = chunk->map[run_ind].bits & ~PAGE_MASK;
2809         else
2810                 size = run->bin->run_size;
2811         run_pages = (size >> PAGE_SHIFT);
2812
2813         /* Mark pages as unallocated in the chunk map. */
2814         if (dirty) {
2815                 size_t i;
2816
2817                 for (i = 0; i < run_pages; i++) {
2818                         assert((chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY)
2819                             == 0);
2820                         chunk->map[run_ind + i].bits = CHUNK_MAP_DIRTY;
2821                 }
2822
2823                 if (chunk->ndirty == 0) {
2824                         arena_chunk_tree_dirty_insert(&arena->chunks_dirty,
2825                             chunk);
2826                 }
2827                 chunk->ndirty += run_pages;
2828                 arena->ndirty += run_pages;
2829         } else {
2830                 size_t i;
2831
2832                 for (i = 0; i < run_pages; i++) {
2833                         chunk->map[run_ind + i].bits &= ~(CHUNK_MAP_LARGE |
2834                             CHUNK_MAP_ALLOCATED);
2835                 }
2836         }
2837         chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2838             PAGE_MASK);
2839         chunk->map[run_ind+run_pages-1].bits = size |
2840             (chunk->map[run_ind+run_pages-1].bits & PAGE_MASK);
2841
2842         /* Try to coalesce forward. */
2843         if (run_ind + run_pages < chunk_npages &&
2844             (chunk->map[run_ind+run_pages].bits & CHUNK_MAP_ALLOCATED) == 0) {
2845                 size_t nrun_size = chunk->map[run_ind+run_pages].bits &
2846                     ~PAGE_MASK;
2847
2848                 /*
2849                  * Remove successor from runs_avail; the coalesced run is
2850                  * inserted later.
2851                  */
2852                 arena_avail_tree_remove(&arena->runs_avail,
2853                     &chunk->map[run_ind+run_pages]);
2854
2855                 size += nrun_size;
2856                 run_pages = size >> PAGE_SHIFT;
2857
2858                 assert((chunk->map[run_ind+run_pages-1].bits & ~PAGE_MASK)
2859                     == nrun_size);
2860                 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2861                     PAGE_MASK);
2862                 chunk->map[run_ind+run_pages-1].bits = size |
2863                     (chunk->map[run_ind+run_pages-1].bits & PAGE_MASK);
2864         }
2865
2866         /* Try to coalesce backward. */
2867         if (run_ind > arena_chunk_header_npages && (chunk->map[run_ind-1].bits &
2868             CHUNK_MAP_ALLOCATED) == 0) {
2869                 size_t prun_size = chunk->map[run_ind-1].bits & ~PAGE_MASK;
2870
2871                 run_ind -= prun_size >> PAGE_SHIFT;
2872
2873                 /*
2874                  * Remove predecessor from runs_avail; the coalesced run is
2875                  * inserted later.
2876                  */
2877                 arena_avail_tree_remove(&arena->runs_avail,
2878                     &chunk->map[run_ind]);
2879
2880                 size += prun_size;
2881                 run_pages = size >> PAGE_SHIFT;
2882
2883                 assert((chunk->map[run_ind].bits & ~PAGE_MASK) ==
2884                     prun_size);
2885                 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2886                     PAGE_MASK);
2887                 chunk->map[run_ind+run_pages-1].bits = size |
2888                     (chunk->map[run_ind+run_pages-1].bits & PAGE_MASK);
2889         }
2890
2891         /* Insert into runs_avail, now that coalescing is complete. */
2892         arena_avail_tree_insert(&arena->runs_avail, &chunk->map[run_ind]);
2893
2894         /* Deallocate chunk if it is now completely unused. */
2895         if ((chunk->map[arena_chunk_header_npages].bits & (~PAGE_MASK |
2896             CHUNK_MAP_ALLOCATED)) == arena_maxclass)
2897                 arena_chunk_dealloc(arena, chunk);
2898
2899         /* Enforce opt_dirty_max. */
2900         if (arena->ndirty > opt_dirty_max)
2901                 arena_purge(arena);
2902 }
2903
2904 static void
2905 arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
2906     size_t oldsize, size_t newsize)
2907 {
2908         size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
2909         size_t head_npages = (oldsize - newsize) >> PAGE_SHIFT;
2910
2911         assert(oldsize > newsize);
2912
2913         /*
2914          * Update the chunk map so that arena_run_dalloc() can treat the
2915          * leading run as separately allocated.
2916          */
2917         chunk->map[pageind].bits = (oldsize - newsize) | CHUNK_MAP_LARGE |
2918             CHUNK_MAP_ALLOCATED;
2919         chunk->map[pageind+head_npages].bits = newsize | CHUNK_MAP_LARGE |
2920             CHUNK_MAP_ALLOCATED;
2921
2922         arena_run_dalloc(arena, run, false);
2923 }
2924
2925 static void
2926 arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
2927     size_t oldsize, size_t newsize, bool dirty)
2928 {
2929         size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
2930         size_t npages = newsize >> PAGE_SHIFT;
2931
2932         assert(oldsize > newsize);
2933
2934         /*
2935          * Update the chunk map so that arena_run_dalloc() can treat the
2936          * trailing run as separately allocated.
2937          */
2938         chunk->map[pageind].bits = newsize | CHUNK_MAP_LARGE |
2939             CHUNK_MAP_ALLOCATED;
2940         chunk->map[pageind+npages].bits = (oldsize - newsize) | CHUNK_MAP_LARGE
2941             | CHUNK_MAP_ALLOCATED;
2942
2943         arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize),
2944             dirty);
2945 }
2946
2947 static arena_run_t *
2948 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2949 {
2950         arena_chunk_map_t *mapelm;
2951         arena_run_t *run;
2952         unsigned i, remainder;
2953
2954         /* Look for a usable run. */
2955         mapelm = arena_run_tree_first(&bin->runs);
2956         if (mapelm != NULL) {
2957                 /* run is guaranteed to have available space. */
2958                 arena_run_tree_remove(&bin->runs, mapelm);
2959                 run = (arena_run_t *)(mapelm->bits & ~PAGE_MASK);
2960 #ifdef MALLOC_STATS
2961                 bin->stats.reruns++;
2962 #endif
2963                 return (run);
2964         }
2965         /* No existing runs have any space available. */
2966
2967         /* Allocate a new run. */
2968         run = arena_run_alloc(arena, bin->run_size, false, false);
2969         if (run == NULL)
2970                 return (NULL);
2971
2972         /* Initialize run internals. */
2973         run->bin = bin;
2974
2975         for (i = 0; i < bin->regs_mask_nelms - 1; i++)
2976                 run->regs_mask[i] = UINT_MAX;
2977         remainder = bin->nregs & ((1U << (SIZEOF_INT_2POW + 3)) - 1);
2978         if (remainder == 0)
2979                 run->regs_mask[i] = UINT_MAX;
2980         else {
2981                 /* The last element has spare bits that need to be unset. */
2982                 run->regs_mask[i] = (UINT_MAX >> ((1U << (SIZEOF_INT_2POW + 3))
2983                     - remainder));
2984         }
2985
2986         run->regs_minelm = 0;
2987
2988         run->nfree = bin->nregs;
2989 #ifdef MALLOC_DEBUG
2990         run->magic = ARENA_RUN_MAGIC;
2991 #endif
2992
2993 #ifdef MALLOC_STATS
2994         bin->stats.nruns++;
2995         bin->stats.curruns++;
2996         if (bin->stats.curruns > bin->stats.highruns)
2997                 bin->stats.highruns = bin->stats.curruns;
2998 #endif
2999         return (run);
3000 }
3001
3002 /* bin->runcur must have space available before this function is called. */
3003 static inline void *
3004 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
3005 {
3006         void *ret;
3007
3008         assert(run->magic == ARENA_RUN_MAGIC);
3009         assert(run->nfree > 0);
3010
3011         ret = arena_run_reg_alloc(run, bin);
3012         assert(ret != NULL);
3013         run->nfree--;
3014
3015         return (ret);
3016 }
3017
3018 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
3019 static void *
3020 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
3021 {
3022
3023         bin->runcur = arena_bin_nonfull_run_get(arena, bin);
3024         if (bin->runcur == NULL)
3025                 return (NULL);
3026         assert(bin->runcur->magic == ARENA_RUN_MAGIC);
3027         assert(bin->runcur->nfree > 0);
3028
3029         return (arena_bin_malloc_easy(arena, bin, bin->runcur));
3030 }
3031
3032 /*
3033  * Calculate bin->run_size such that it meets the following constraints:
3034  *
3035  *   *) bin->run_size >= min_run_size
3036  *   *) bin->run_size <= arena_maxclass
3037  *   *) bin->run_size <= RUN_MAX_SMALL
3038  *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
3039  *
3040  * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
3041  * also calculated here, since these settings are all interdependent.
3042  */
3043 static size_t
3044 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
3045 {
3046         size_t try_run_size, good_run_size;
3047         unsigned good_nregs, good_mask_nelms, good_reg0_offset;
3048         unsigned try_nregs, try_mask_nelms, try_reg0_offset;
3049
3050         assert(min_run_size >= PAGE_SIZE);
3051         assert(min_run_size <= arena_maxclass);
3052         assert(min_run_size <= RUN_MAX_SMALL);
3053
3054         /*
3055          * Calculate known-valid settings before entering the run_size
3056          * expansion loop, so that the first part of the loop always copies
3057          * valid settings.
3058          *
3059          * The do..while loop iteratively reduces the number of regions until
3060          * the run header and the regions no longer overlap.  A closed formula
3061          * would be quite messy, since there is an interdependency between the
3062          * header's mask length and the number of regions.
3063          */
3064         try_run_size = min_run_size;
3065         try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
3066             + 1; /* Counter-act try_nregs-- in loop. */
3067         do {
3068                 try_nregs--;
3069                 try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
3070                     ((try_nregs & ((1U << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
3071                 try_reg0_offset = try_run_size - (try_nregs * bin->reg_size);
3072         } while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
3073             > try_reg0_offset);
3074
3075         /* run_size expansion loop. */
3076         do {
3077                 /*
3078                  * Copy valid settings before trying more aggressive settings.
3079                  */
3080                 good_run_size = try_run_size;
3081                 good_nregs = try_nregs;
3082                 good_mask_nelms = try_mask_nelms;
3083                 good_reg0_offset = try_reg0_offset;
3084
3085                 /* Try more aggressive settings. */
3086                 try_run_size += PAGE_SIZE;
3087                 try_nregs = ((try_run_size - sizeof(arena_run_t)) /
3088                     bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
3089                 do {
3090                         try_nregs--;
3091                         try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
3092                             ((try_nregs & ((1U << (SIZEOF_INT_2POW + 3)) - 1)) ?
3093                             1 : 0);
3094                         try_reg0_offset = try_run_size - (try_nregs *
3095                             bin->reg_size);
3096                 } while (sizeof(arena_run_t) + (sizeof(unsigned) *
3097                     (try_mask_nelms - 1)) > try_reg0_offset);
3098         } while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
3099             && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
3100             && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
3101
3102         assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
3103             <= good_reg0_offset);
3104         assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
3105
3106         /* Copy final settings. */
3107         bin->run_size = good_run_size;
3108         bin->nregs = good_nregs;
3109         bin->regs_mask_nelms = good_mask_nelms;
3110         bin->reg0_offset = good_reg0_offset;
3111
3112         return (good_run_size);
3113 }
3114
3115 #ifdef MALLOC_BALANCE
3116 static inline void
3117 arena_lock_balance(arena_t *arena)
3118 {
3119         unsigned contention;
3120
3121         contention = malloc_spin_lock(&arena->lock);
3122         if (narenas > 1) {
3123                 /*
3124                  * Calculate the exponentially averaged contention for this
3125                  * arena.  Due to integer math always rounding down, this value
3126                  * decays somewhat faster than normal.
3127                  */
3128                 arena->contention = (((uint64_t)arena->contention
3129                     * (uint64_t)((1U << BALANCE_ALPHA_INV_2POW)-1))
3130                     + (uint64_t)contention) >> BALANCE_ALPHA_INV_2POW;
3131                 if (arena->contention >= opt_balance_threshold)
3132                         arena_lock_balance_hard(arena);
3133         }
3134 }
3135
3136 static void
3137 arena_lock_balance_hard(arena_t *arena)
3138 {
3139         uint32_t ind;
3140
3141         arena->contention = 0;
3142 #ifdef MALLOC_STATS
3143         arena->stats.nbalance++;
3144 #endif
3145         ind = PRN(balance, narenas_2pow);
3146         if (arenas[ind] != NULL)
3147                 arenas_map = arenas[ind];
3148         else {
3149                 malloc_spin_lock(&arenas_lock);
3150                 if (arenas[ind] != NULL)
3151                         arenas_map = arenas[ind];
3152                 else
3153                         arenas_map = arenas_extend(ind);
3154                 malloc_spin_unlock(&arenas_lock);
3155         }
3156 }
3157 #endif
3158
3159 #ifdef MALLOC_MAG
3160 static inline void *
3161 mag_alloc(mag_t *mag)
3162 {
3163
3164         if (mag->nrounds == 0)
3165                 return (NULL);
3166         mag->nrounds--;
3167
3168         return (mag->rounds[mag->nrounds]);
3169 }
3170
3171 static void
3172 mag_load(mag_t *mag)
3173 {
3174         arena_t *arena;
3175         arena_bin_t *bin;
3176         arena_run_t *run;
3177         void *round;
3178         size_t i;
3179
3180         arena = choose_arena();
3181         bin = &arena->bins[mag->binind];
3182 #ifdef MALLOC_BALANCE
3183         arena_lock_balance(arena);
3184 #else
3185         malloc_spin_lock(&arena->lock);
3186 #endif
3187         for (i = mag->nrounds; i < max_rounds; i++) {
3188                 if ((run = bin->runcur) != NULL && run->nfree > 0)
3189                         round = arena_bin_malloc_easy(arena, bin, run);
3190                 else
3191                         round = arena_bin_malloc_hard(arena, bin);
3192                 if (round == NULL)
3193                         break;
3194                 mag->rounds[i] = round;
3195         }
3196 #ifdef MALLOC_STATS
3197         bin->stats.nmags++;
3198         arena->stats.nmalloc_small += (i - mag->nrounds);
3199         arena->stats.allocated_small += (i - mag->nrounds) * bin->reg_size;
3200 #endif
3201         malloc_spin_unlock(&arena->lock);
3202         mag->nrounds = i;
3203 }
3204
3205 static inline void *
3206 mag_rack_alloc(mag_rack_t *rack, size_t size, bool zero)
3207 {
3208         void *ret;
3209         bin_mags_t *bin_mags;
3210         mag_t *mag;
3211         size_t binind;
3212
3213         binind = size2bin[size];
3214         assert(binind < nbins);
3215         bin_mags = &rack->bin_mags[binind];
3216
3217         mag = bin_mags->curmag;
3218         if (mag == NULL) {
3219                 /* Create an initial magazine for this size class. */
3220                 assert(bin_mags->sparemag == NULL);
3221                 mag = mag_create(choose_arena(), binind);
3222                 if (mag == NULL)
3223                         return (NULL);
3224                 bin_mags->curmag = mag;
3225                 mag_load(mag);
3226         }
3227
3228         ret = mag_alloc(mag);
3229         if (ret == NULL) {
3230                 if (bin_mags->sparemag != NULL) {
3231                         if (bin_mags->sparemag->nrounds > 0) {
3232                                 /* Swap magazines. */
3233                                 bin_mags->curmag = bin_mags->sparemag;
3234                                 bin_mags->sparemag = mag;
3235                                 mag = bin_mags->curmag;
3236                         } else {
3237                                 /* Reload the current magazine. */
3238                                 mag_load(mag);
3239                         }
3240                 } else {
3241                         /* Create a second magazine. */
3242                         mag = mag_create(choose_arena(), binind);
3243                         if (mag == NULL)
3244                                 return (NULL);
3245                         mag_load(mag);
3246                         bin_mags->sparemag = bin_mags->curmag;
3247                         bin_mags->curmag = mag;
3248                 }
3249                 ret = mag_alloc(mag);
3250                 if (ret == NULL)
3251                         return (NULL);
3252         }
3253
3254         if (zero == false) {
3255                 if (opt_junk)
3256                         memset(ret, 0xa5, size);
3257                 else if (opt_zero)
3258                         memset(ret, 0, size);
3259         } else
3260                 memset(ret, 0, size);
3261
3262         return (ret);
3263 }
3264 #endif
3265
3266 static inline void *
3267 arena_malloc_small(arena_t *arena, size_t size, bool zero)
3268 {
3269         void *ret;
3270         arena_bin_t *bin;
3271         arena_run_t *run;
3272         size_t binind;
3273
3274         binind = size2bin[size];
3275         assert(binind < nbins);
3276         bin = &arena->bins[binind];
3277         size = bin->reg_size;
3278
3279 #ifdef MALLOC_BALANCE
3280         arena_lock_balance(arena);
3281 #else
3282         malloc_spin_lock(&arena->lock);
3283 #endif
3284         if ((run = bin->runcur) != NULL && run->nfree > 0)
3285                 ret = arena_bin_malloc_easy(arena, bin, run);
3286         else
3287                 ret = arena_bin_malloc_hard(arena, bin);
3288
3289         if (ret == NULL) {
3290                 malloc_spin_unlock(&arena->lock);
3291                 return (NULL);
3292         }
3293
3294 #ifdef MALLOC_STATS
3295         bin->stats.nrequests++;
3296         arena->stats.nmalloc_small++;
3297         arena->stats.allocated_small += size;
3298 #endif
3299         malloc_spin_unlock(&arena->lock);
3300
3301         if (zero == false) {
3302                 if (opt_junk)
3303                         memset(ret, 0xa5, size);
3304                 else if (opt_zero)
3305                         memset(ret, 0, size);
3306         } else
3307                 memset(ret, 0, size);
3308
3309         return (ret);
3310 }
3311
3312 static void *
3313 arena_malloc_large(arena_t *arena, size_t size, bool zero)
3314 {
3315         void *ret;
3316
3317         /* Large allocation. */
3318         size = PAGE_CEILING(size);
3319 #ifdef MALLOC_BALANCE
3320         arena_lock_balance(arena);
3321 #else
3322         malloc_spin_lock(&arena->lock);
3323 #endif
3324         ret = (void *)arena_run_alloc(arena, size, true, zero);
3325         if (ret == NULL) {
3326                 malloc_spin_unlock(&arena->lock);
3327                 return (NULL);
3328         }
3329 #ifdef MALLOC_STATS
3330         arena->stats.nmalloc_large++;
3331         arena->stats.allocated_large += size;
3332 #endif
3333         malloc_spin_unlock(&arena->lock);
3334
3335         if (zero == false) {
3336                 if (opt_junk)
3337                         memset(ret, 0xa5, size);
3338                 else if (opt_zero)
3339                         memset(ret, 0, size);
3340         }
3341
3342         return (ret);
3343 }
3344
3345 static inline void *
3346 arena_malloc(arena_t *arena, size_t size, bool zero)
3347 {
3348
3349         assert(arena != NULL);
3350         assert(arena->magic == ARENA_MAGIC);
3351         assert(size != 0);
3352         assert(QUANTUM_CEILING(size) <= arena_maxclass);
3353
3354         if (size <= bin_maxclass) {
3355 #ifdef MALLOC_MAG
3356                 if (__isthreaded && opt_mag) {
3357                         mag_rack_t *rack = mag_rack;
3358                         if (rack == NULL) {
3359                                 rack = mag_rack_create(arena);
3360                                 if (rack == NULL)
3361                                         return (NULL);
3362                                 mag_rack = rack;
3363                         }
3364                         return (mag_rack_alloc(rack, size, zero));
3365                 } else
3366 #endif
3367                         return (arena_malloc_small(arena, size, zero));
3368         } else
3369                 return (arena_malloc_large(arena, size, zero));
3370 }
3371
3372 static inline void *
3373 imalloc(size_t size)
3374 {
3375
3376         assert(size != 0);
3377
3378         if (size <= arena_maxclass)
3379                 return (arena_malloc(choose_arena(), size, false));
3380         else
3381                 return (huge_malloc(size, false));
3382 }
3383
3384 static inline void *
3385 icalloc(size_t size)
3386 {
3387
3388         if (size <= arena_maxclass)
3389                 return (arena_malloc(choose_arena(), size, true));
3390         else
3391                 return (huge_malloc(size, true));
3392 }
3393
3394 /* Only handles large allocations that require more than page alignment. */
3395 static void *
3396 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
3397 {
3398         void *ret;
3399         size_t offset;
3400         arena_chunk_t *chunk;
3401
3402         assert((size & PAGE_MASK) == 0);
3403         assert((alignment & PAGE_MASK) == 0);
3404
3405 #ifdef MALLOC_BALANCE
3406         arena_lock_balance(arena);
3407 #else
3408         malloc_spin_lock(&arena->lock);
3409 #endif
3410         ret = (void *)arena_run_alloc(arena, alloc_size, true, false);
3411         if (ret == NULL) {
3412                 malloc_spin_unlock(&arena->lock);
3413                 return (NULL);
3414         }
3415
3416         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
3417
3418         offset = (uintptr_t)ret & (alignment - 1);
3419         assert((offset & PAGE_MASK) == 0);
3420         assert(offset < alloc_size);
3421         if (offset == 0)
3422                 arena_run_trim_tail(arena, chunk, ret, alloc_size, size, false);
3423         else {
3424                 size_t leadsize, trailsize;
3425
3426                 leadsize = alignment - offset;
3427                 if (leadsize > 0) {
3428                         arena_run_trim_head(arena, chunk, ret, alloc_size,
3429                             alloc_size - leadsize);
3430                         ret = (void *)((uintptr_t)ret + leadsize);
3431                 }
3432
3433                 trailsize = alloc_size - leadsize - size;
3434                 if (trailsize != 0) {
3435                         /* Trim trailing space. */
3436                         assert(trailsize < alloc_size);
3437                         arena_run_trim_tail(arena, chunk, ret, size + trailsize,
3438                             size, false);
3439                 }
3440         }
3441
3442 #ifdef MALLOC_STATS
3443         arena->stats.nmalloc_large++;
3444         arena->stats.allocated_large += size;
3445 #endif
3446         malloc_spin_unlock(&arena->lock);
3447
3448         if (opt_junk)
3449                 memset(ret, 0xa5, size);
3450         else if (opt_zero)
3451                 memset(ret, 0, size);
3452         return (ret);
3453 }
3454
3455 static inline void *
3456 ipalloc(size_t alignment, size_t size)
3457 {
3458         void *ret;
3459         size_t ceil_size;
3460
3461         /*
3462          * Round size up to the nearest multiple of alignment.
3463          *
3464          * This done, we can take advantage of the fact that for each small
3465          * size class, every object is aligned at the smallest power of two
3466          * that is non-zero in the base two representation of the size.  For
3467          * example:
3468          *
3469          *   Size |   Base 2 | Minimum alignment
3470          *   -----+----------+------------------
3471          *     96 |  1100000 |  32
3472          *    144 | 10100000 |  32
3473          *    192 | 11000000 |  64
3474          *
3475          * Depending on runtime settings, it is possible that arena_malloc()
3476          * will further round up to a power of two, but that never causes
3477          * correctness issues.
3478          */
3479         ceil_size = (size + (alignment - 1)) & (-alignment);
3480         /*
3481          * (ceil_size < size) protects against the combination of maximal
3482          * alignment and size greater than maximal alignment.
3483          */
3484         if (ceil_size < size) {
3485                 /* size_t overflow. */
3486                 return (NULL);
3487         }
3488
3489         if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE
3490             && ceil_size <= arena_maxclass))
3491                 ret = arena_malloc(choose_arena(), ceil_size, false);
3492         else {
3493                 size_t run_size;
3494
3495                 /*
3496                  * We can't achieve subpage alignment, so round up alignment
3497                  * permanently; it makes later calculations simpler.
3498                  */
3499                 alignment = PAGE_CEILING(alignment);
3500                 ceil_size = PAGE_CEILING(size);
3501                 /*
3502                  * (ceil_size < size) protects against very large sizes within
3503                  * PAGE_SIZE of SIZE_T_MAX.
3504                  *
3505                  * (ceil_size + alignment < ceil_size) protects against the
3506                  * combination of maximal alignment and ceil_size large enough
3507                  * to cause overflow.  This is similar to the first overflow
3508                  * check above, but it needs to be repeated due to the new
3509                  * ceil_size value, which may now be *equal* to maximal
3510                  * alignment, whereas before we only detected overflow if the
3511                  * original size was *greater* than maximal alignment.
3512                  */
3513                 if (ceil_size < size || ceil_size + alignment < ceil_size) {
3514                         /* size_t overflow. */
3515                         return (NULL);
3516                 }
3517
3518                 /*
3519                  * Calculate the size of the over-size run that arena_palloc()
3520                  * would need to allocate in order to guarantee the alignment.
3521                  */
3522                 if (ceil_size >= alignment)
3523                         run_size = ceil_size + alignment - PAGE_SIZE;
3524                 else {
3525                         /*
3526                          * It is possible that (alignment << 1) will cause
3527                          * overflow, but it doesn't matter because we also
3528                          * subtract PAGE_SIZE, which in the case of overflow
3529                          * leaves us with a very large run_size.  That causes
3530                          * the first conditional below to fail, which means
3531                          * that the bogus run_size value never gets used for
3532                          * anything important.
3533                          */
3534                         run_size = (alignment << 1) - PAGE_SIZE;
3535                 }
3536
3537                 if (run_size <= arena_maxclass) {
3538                         ret = arena_palloc(choose_arena(), alignment, ceil_size,
3539                             run_size);
3540                 } else if (alignment <= chunksize)
3541                         ret = huge_malloc(ceil_size, false);
3542                 else
3543                         ret = huge_palloc(alignment, ceil_size);
3544         }
3545
3546         assert(((uintptr_t)ret & (alignment - 1)) == 0);
3547         return (ret);
3548 }
3549
3550 /* Return the size of the allocation pointed to by ptr. */
3551 static size_t
3552 arena_salloc(const void *ptr)
3553 {
3554         size_t ret;
3555         arena_chunk_t *chunk;
3556         size_t pageind, mapbits;
3557
3558         assert(ptr != NULL);
3559         assert(CHUNK_ADDR2BASE(ptr) != ptr);
3560
3561         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3562         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3563         mapbits = chunk->map[pageind].bits;
3564         assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
3565         if ((mapbits & CHUNK_MAP_LARGE) == 0) {
3566                 arena_run_t *run = (arena_run_t *)(mapbits & ~PAGE_MASK);
3567                 assert(run->magic == ARENA_RUN_MAGIC);
3568                 ret = run->bin->reg_size;
3569         } else {
3570                 ret = mapbits & ~PAGE_MASK;
3571                 assert(ret != 0);
3572         }
3573
3574         return (ret);
3575 }
3576
3577 static inline size_t
3578 isalloc(const void *ptr)
3579 {
3580         size_t ret;
3581         arena_chunk_t *chunk;
3582
3583         assert(ptr != NULL);
3584
3585         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3586         if (chunk != ptr) {
3587                 /* Region. */
3588                 assert(chunk->arena->magic == ARENA_MAGIC);
3589
3590                 ret = arena_salloc(ptr);
3591         } else {
3592                 extent_node_t *node, key;
3593
3594                 /* Chunk (huge allocation). */
3595
3596                 malloc_mutex_lock(&huge_mtx);
3597
3598                 /* Extract from tree of huge allocations. */
3599                 key.addr = __DECONST(void *, ptr);
3600                 node = extent_tree_ad_search(&huge, &key);
3601                 assert(node != NULL);
3602
3603                 ret = node->size;
3604
3605                 malloc_mutex_unlock(&huge_mtx);
3606         }
3607
3608         return (ret);
3609 }
3610
3611 static inline void
3612 arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3613     arena_chunk_map_t *mapelm)
3614 {
3615         arena_run_t *run;
3616         arena_bin_t *bin;
3617         size_t size;
3618
3619         run = (arena_run_t *)(mapelm->bits & ~PAGE_MASK);
3620         assert(run->magic == ARENA_RUN_MAGIC);
3621         bin = run->bin;
3622         size = bin->reg_size;
3623
3624         if (opt_junk)
3625                 memset(ptr, 0x5a, size);
3626
3627         arena_run_reg_dalloc(run, bin, ptr, size);
3628         run->nfree++;
3629
3630         if (run->nfree == bin->nregs) {
3631                 /* Deallocate run. */
3632                 if (run == bin->runcur)
3633                         bin->runcur = NULL;
3634                 else if (bin->nregs != 1) {
3635                         size_t run_pageind = (((uintptr_t)run -
3636                             (uintptr_t)chunk)) >> PAGE_SHIFT;
3637                         arena_chunk_map_t *run_mapelm =
3638                             &chunk->map[run_pageind];
3639                         /*
3640                          * This block's conditional is necessary because if the
3641                          * run only contains one region, then it never gets
3642                          * inserted into the non-full runs tree.
3643                          */
3644                         arena_run_tree_remove(&bin->runs, run_mapelm);
3645                 }
3646 #ifdef MALLOC_DEBUG
3647                 run->magic = 0;
3648 #endif
3649                 arena_run_dalloc(arena, run, true);
3650 #ifdef MALLOC_STATS
3651                 bin->stats.curruns--;
3652 #endif
3653         } else if (run->nfree == 1 && run != bin->runcur) {
3654                 /*
3655                  * Make sure that bin->runcur always refers to the lowest
3656                  * non-full run, if one exists.
3657                  */
3658                 if (bin->runcur == NULL)
3659                         bin->runcur = run;
3660                 else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
3661                         /* Switch runcur. */
3662                         if (bin->runcur->nfree > 0) {
3663                                 arena_chunk_t *runcur_chunk =
3664                                     CHUNK_ADDR2BASE(bin->runcur);
3665                                 size_t runcur_pageind =
3666                                     (((uintptr_t)bin->runcur -
3667                                     (uintptr_t)runcur_chunk)) >> PAGE_SHIFT;
3668                                 arena_chunk_map_t *runcur_mapelm =
3669                                     &runcur_chunk->map[runcur_pageind];
3670
3671                                 /* Insert runcur. */
3672                                 arena_run_tree_insert(&bin->runs,
3673                                     runcur_mapelm);
3674                         }
3675                         bin->runcur = run;
3676                 } else {
3677                         size_t run_pageind = (((uintptr_t)run -
3678                             (uintptr_t)chunk)) >> PAGE_SHIFT;
3679                         arena_chunk_map_t *run_mapelm =
3680                             &chunk->map[run_pageind];
3681
3682                         assert(arena_run_tree_search(&bin->runs, run_mapelm) ==
3683                             NULL);
3684                         arena_run_tree_insert(&bin->runs, run_mapelm);
3685                 }
3686         }
3687 #ifdef MALLOC_STATS
3688         arena->stats.allocated_small -= size;
3689         arena->stats.ndalloc_small++;
3690 #endif
3691 }
3692
3693 #ifdef MALLOC_MAG
3694 static void
3695 mag_unload(mag_t *mag)
3696 {
3697         arena_chunk_t *chunk;
3698         arena_t *arena;
3699         void *round;
3700         size_t i, ndeferred, nrounds;
3701
3702         for (ndeferred = mag->nrounds; ndeferred > 0;) {
3703                 nrounds = ndeferred;
3704                 /* Lock the arena associated with the first round. */
3705                 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mag->rounds[0]);
3706                 arena = chunk->arena;
3707 #ifdef MALLOC_BALANCE
3708                 arena_lock_balance(arena);
3709 #else
3710                 malloc_spin_lock(&arena->lock);
3711 #endif
3712                 /* Deallocate every round that belongs to the locked arena. */
3713                 for (i = ndeferred = 0; i < nrounds; i++) {
3714                         round = mag->rounds[i];
3715                         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(round);
3716                         if (chunk->arena == arena) {
3717                                 size_t pageind = (((uintptr_t)round -
3718                                     (uintptr_t)chunk) >> PAGE_SHIFT);
3719                                 arena_chunk_map_t *mapelm =
3720                                     &chunk->map[pageind];
3721                                 arena_dalloc_small(arena, chunk, round, mapelm);
3722                         } else {
3723                                 /*
3724                                  * This round was allocated via a different
3725                                  * arena than the one that is currently locked.
3726                                  * Stash the round, so that it can be handled
3727                                  * in a future pass.
3728                                  */
3729                                 mag->rounds[ndeferred] = round;
3730                                 ndeferred++;
3731                         }
3732                 }
3733                 malloc_spin_unlock(&arena->lock);
3734         }
3735
3736         mag->nrounds = 0;
3737 }
3738
3739 static inline void
3740 mag_rack_dalloc(mag_rack_t *rack, void *ptr)
3741 {
3742         arena_t *arena;
3743         arena_chunk_t *chunk;
3744         arena_run_t *run;
3745         arena_bin_t *bin;
3746         bin_mags_t *bin_mags;
3747         mag_t *mag;
3748         size_t pageind, binind;
3749         arena_chunk_map_t *mapelm;
3750
3751         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3752         arena = chunk->arena;
3753         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3754         mapelm = &chunk->map[pageind];
3755         run = (arena_run_t *)(mapelm->bits & ~PAGE_MASK);
3756         assert(run->magic == ARENA_RUN_MAGIC);
3757         bin = run->bin;
3758         binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) /
3759             sizeof(arena_bin_t);
3760         assert(binind < nbins);
3761
3762         if (opt_junk)
3763                 memset(ptr, 0x5a, arena->bins[binind].reg_size);
3764
3765         bin_mags = &rack->bin_mags[binind];
3766         mag = bin_mags->curmag;
3767         if (mag == NULL) {
3768                 /* Create an initial magazine for this size class. */
3769                 assert(bin_mags->sparemag == NULL);
3770                 mag = mag_create(choose_arena(), binind);
3771                 if (mag == NULL) {
3772                         malloc_spin_lock(&arena->lock);
3773                         arena_dalloc_small(arena, chunk, ptr, mapelm);
3774                         malloc_spin_unlock(&arena->lock);
3775                         return;
3776                 }
3777                 bin_mags->curmag = mag;
3778         }
3779
3780         if (mag->nrounds == max_rounds) {
3781                 if (bin_mags->sparemag != NULL) {
3782                         if (bin_mags->sparemag->nrounds < max_rounds) {
3783                                 /* Swap magazines. */
3784                                 bin_mags->curmag = bin_mags->sparemag;
3785                                 bin_mags->sparemag = mag;
3786                                 mag = bin_mags->curmag;
3787                         } else {
3788                                 /* Unload the current magazine. */
3789                                 mag_unload(mag);
3790                         }
3791                 } else {
3792                         /* Create a second magazine. */
3793                         mag = mag_create(choose_arena(), binind);
3794                         if (mag == NULL) {
3795                                 mag = rack->bin_mags[binind].curmag;
3796                                 mag_unload(mag);
3797                         } else {
3798                                 bin_mags->sparemag = bin_mags->curmag;
3799                                 bin_mags->curmag = mag;
3800                         }
3801                 }
3802                 assert(mag->nrounds < max_rounds);
3803         }
3804         mag->rounds[mag->nrounds] = ptr;
3805         mag->nrounds++;
3806 }
3807 #endif
3808
3809 static void
3810 arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
3811 {
3812         /* Large allocation. */
3813         malloc_spin_lock(&arena->lock);
3814
3815 #ifndef MALLOC_STATS
3816         if (opt_junk)
3817 #endif
3818         {
3819                 size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
3820                     PAGE_SHIFT;
3821                 size_t size = chunk->map[pageind].bits & ~PAGE_MASK;
3822
3823 #ifdef MALLOC_STATS
3824                 if (opt_junk)
3825 #endif
3826                         memset(ptr, 0x5a, size);
3827 #ifdef MALLOC_STATS
3828                 arena->stats.allocated_large -= size;
3829 #endif
3830         }
3831 #ifdef MALLOC_STATS
3832         arena->stats.ndalloc_large++;
3833 #endif
3834
3835         arena_run_dalloc(arena, (arena_run_t *)ptr, true);
3836         malloc_spin_unlock(&arena->lock);
3837 }
3838
3839 static inline void
3840 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
3841 {
3842         size_t pageind;
3843         arena_chunk_map_t *mapelm;
3844
3845         assert(arena != NULL);
3846         assert(arena->magic == ARENA_MAGIC);
3847         assert(chunk->arena == arena);
3848         assert(ptr != NULL);
3849         assert(CHUNK_ADDR2BASE(ptr) != ptr);
3850
3851         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3852         mapelm = &chunk->map[pageind];
3853         assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0);
3854         if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
3855                 /* Small allocation. */
3856 #ifdef MALLOC_MAG
3857                 if (__isthreaded && opt_mag) {
3858                         mag_rack_t *rack = mag_rack;
3859                         if (rack == NULL) {
3860                                 rack = mag_rack_create(arena);
3861                                 if (rack == NULL) {
3862                                         malloc_spin_lock(&arena->lock);
3863                                         arena_dalloc_small(arena, chunk, ptr,
3864                                             mapelm);
3865                                         malloc_spin_unlock(&arena->lock);
3866                                         return;
3867                                 }
3868                                 mag_rack = rack;
3869                         }
3870                         mag_rack_dalloc(rack, ptr);
3871                 } else {
3872 #endif
3873                         malloc_spin_lock(&arena->lock);
3874                         arena_dalloc_small(arena, chunk, ptr, mapelm);
3875                         malloc_spin_unlock(&arena->lock);
3876 #ifdef MALLOC_MAG
3877                 }
3878 #endif
3879         } else
3880                 arena_dalloc_large(arena, chunk, ptr);
3881 }
3882
3883 static inline void
3884 idalloc(void *ptr)
3885 {
3886         arena_chunk_t *chunk;
3887
3888         assert(ptr != NULL);
3889
3890         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3891         if (chunk != ptr)
3892                 arena_dalloc(chunk->arena, chunk, ptr);
3893         else
3894                 huge_dalloc(ptr);
3895 }
3896
3897 static void
3898 arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3899     size_t size, size_t oldsize)
3900 {
3901
3902         assert(size < oldsize);
3903
3904         /*
3905          * Shrink the run, and make trailing pages available for other
3906          * allocations.
3907          */
3908 #ifdef MALLOC_BALANCE
3909         arena_lock_balance(arena);
3910 #else
3911         malloc_spin_lock(&arena->lock);
3912 #endif
3913         arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size,
3914             true);
3915 #ifdef MALLOC_STATS
3916         arena->stats.allocated_large -= oldsize - size;
3917 #endif
3918         malloc_spin_unlock(&arena->lock);
3919 }
3920
3921 static bool
3922 arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3923     size_t size, size_t oldsize)
3924 {
3925         size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
3926         size_t npages = oldsize >> PAGE_SHIFT;
3927
3928         assert(oldsize == (chunk->map[pageind].bits & ~PAGE_MASK));
3929
3930         /* Try to extend the run. */
3931         assert(size > oldsize);
3932 #ifdef MALLOC_BALANCE
3933         arena_lock_balance(arena);
3934 #else
3935         malloc_spin_lock(&arena->lock);
3936 #endif
3937         if (pageind + npages < chunk_npages && (chunk->map[pageind+npages].bits
3938             & CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[pageind+npages].bits &
3939             ~PAGE_MASK) >= size - oldsize) {
3940                 /*
3941                  * The next run is available and sufficiently large.  Split the
3942                  * following run, then merge the first part with the existing
3943                  * allocation.
3944                  */
3945                 arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk +
3946                     ((pageind+npages) << PAGE_SHIFT)), size - oldsize, true,
3947                     false);
3948
3949                 chunk->map[pageind].bits = size | CHUNK_MAP_LARGE |
3950                     CHUNK_MAP_ALLOCATED;
3951                 chunk->map[pageind+npages].bits = CHUNK_MAP_LARGE |
3952                     CHUNK_MAP_ALLOCATED;
3953
3954 #ifdef MALLOC_STATS
3955                 arena->stats.allocated_large += size - oldsize;
3956 #endif
3957                 malloc_spin_unlock(&arena->lock);
3958                 return (false);
3959         }
3960         malloc_spin_unlock(&arena->lock);
3961
3962         return (true);
3963 }
3964
3965 /*
3966  * Try to resize a large allocation, in order to avoid copying.  This will
3967  * always fail if growing an object, and the following run is already in use.
3968  */
3969 static bool
3970 arena_ralloc_large(void *ptr, size_t size, size_t oldsize)
3971 {
3972         size_t psize;
3973
3974         psize = PAGE_CEILING(size);
3975         if (psize == oldsize) {
3976                 /* Same size class. */
3977                 if (opt_junk && size < oldsize) {
3978                         memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize -
3979                             size);
3980                 }
3981                 return (false);
3982         } else {
3983                 arena_chunk_t *chunk;
3984                 arena_t *arena;
3985
3986                 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3987                 arena = chunk->arena;
3988                 assert(arena->magic == ARENA_MAGIC);
3989
3990                 if (psize < oldsize) {
3991                         /* Fill before shrinking in order avoid a race. */
3992                         if (opt_junk) {
3993                                 memset((void *)((uintptr_t)ptr + size), 0x5a,
3994                                     oldsize - size);
3995                         }
3996                         arena_ralloc_large_shrink(arena, chunk, ptr, psize,
3997                             oldsize);
3998                         return (false);
3999                 } else {
4000                         bool ret = arena_ralloc_large_grow(arena, chunk, ptr,
4001                             psize, oldsize);
4002                         if (ret == false && opt_zero) {
4003                                 memset((void *)((uintptr_t)ptr + oldsize), 0,
4004                                     size - oldsize);
4005                         }
4006                         return (ret);
4007                 }
4008         }
4009 }
4010
4011 static void *
4012 arena_ralloc(void *ptr, size_t size, size_t oldsize)
4013 {
4014         void *ret;
4015         size_t copysize;
4016
4017         /* Try to avoid moving the allocation. */
4018         if (size <= bin_maxclass) {
4019                 if (oldsize <= bin_maxclass && size2bin[size] ==
4020                     size2bin[oldsize])
4021                         goto IN_PLACE;
4022         } else {
4023                 if (oldsize > bin_maxclass && oldsize <= arena_maxclass) {
4024                         assert(size > bin_maxclass);
4025                         if (arena_ralloc_large(ptr, size, oldsize) == false)
4026                                 return (ptr);
4027                 }
4028         }
4029
4030         /*
4031          * If we get here, then size and oldsize are different enough that we
4032          * need to move the object.  In that case, fall back to allocating new
4033          * space and copying.
4034          */
4035         ret = arena_malloc(choose_arena(), size, false);
4036         if (ret == NULL)
4037                 return (NULL);
4038
4039         /* Junk/zero-filling were already done by arena_malloc(). */
4040         copysize = (size < oldsize) ? size : oldsize;
4041         memcpy(ret, ptr, copysize);
4042         idalloc(ptr);
4043         return (ret);
4044 IN_PLACE:
4045         if (opt_junk && size < oldsize)
4046                 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
4047         else if (opt_zero && size > oldsize)
4048                 memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
4049         return (ptr);
4050 }
4051
4052 static inline void *
4053 iralloc(void *ptr, size_t size)
4054 {
4055         size_t oldsize;
4056
4057         assert(ptr != NULL);
4058         assert(size != 0);
4059
4060         oldsize = isalloc(ptr);
4061
4062         if (size <= arena_maxclass)
4063                 return (arena_ralloc(ptr, size, oldsize));
4064         else
4065                 return (huge_ralloc(ptr, size, oldsize));
4066 }
4067
4068 static bool
4069 arena_new(arena_t *arena)
4070 {
4071         unsigned i;
4072         arena_bin_t *bin;
4073         size_t prev_run_size;
4074
4075         if (malloc_spin_init(&arena->lock))
4076                 return (true);
4077
4078 #ifdef MALLOC_STATS
4079         memset(&arena->stats, 0, sizeof(arena_stats_t));
4080 #endif
4081
4082         /* Initialize chunks. */
4083         arena_chunk_tree_dirty_new(&arena->chunks_dirty);
4084         arena->spare = NULL;
4085
4086         arena->ndirty = 0;
4087
4088         arena_avail_tree_new(&arena->runs_avail);
4089
4090 #ifdef MALLOC_BALANCE
4091         arena->contention = 0;
4092 #endif
4093
4094         /* Initialize bins. */
4095         prev_run_size = PAGE_SIZE;
4096
4097         i = 0;
4098 #ifdef MALLOC_TINY
4099         /* (2^n)-spaced tiny bins. */
4100         for (; i < ntbins; i++) {
4101                 bin = &arena->bins[i];
4102                 bin->runcur = NULL;
4103                 arena_run_tree_new(&bin->runs);
4104
4105                 bin->reg_size = (1U << (TINY_MIN_2POW + i));
4106
4107                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4108
4109 #ifdef MALLOC_STATS
4110                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4111 #endif
4112         }
4113 #endif
4114
4115         /* Quantum-spaced bins. */
4116         for (; i < ntbins + nqbins; i++) {
4117                 bin = &arena->bins[i];
4118                 bin->runcur = NULL;
4119                 arena_run_tree_new(&bin->runs);
4120
4121                 bin->reg_size = (i - ntbins + 1) << QUANTUM_2POW;
4122
4123                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4124
4125 #ifdef MALLOC_STATS
4126                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4127 #endif
4128         }
4129
4130         /* Cacheline-spaced bins. */
4131         for (; i < ntbins + nqbins + ncbins; i++) {
4132                 bin = &arena->bins[i];
4133                 bin->runcur = NULL;
4134                 arena_run_tree_new(&bin->runs);
4135
4136                 bin->reg_size = cspace_min + ((i - (ntbins + nqbins)) <<
4137                     CACHELINE_2POW);
4138
4139                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4140
4141 #ifdef MALLOC_STATS
4142                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4143 #endif
4144         }
4145
4146         /* Subpage-spaced bins. */
4147         for (; i < nbins; i++) {
4148                 bin = &arena->bins[i];
4149                 bin->runcur = NULL;
4150                 arena_run_tree_new(&bin->runs);
4151
4152                 bin->reg_size = sspace_min + ((i - (ntbins + nqbins + ncbins))
4153                     << SUBPAGE_2POW);
4154
4155                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4156
4157 #ifdef MALLOC_STATS
4158                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4159 #endif
4160         }
4161
4162 #ifdef MALLOC_DEBUG
4163         arena->magic = ARENA_MAGIC;
4164 #endif
4165
4166         return (false);
4167 }
4168
4169 /* Create a new arena and insert it into the arenas array at index ind. */
4170 static arena_t *
4171 arenas_extend(unsigned ind)
4172 {
4173         arena_t *ret;
4174
4175         /* Allocate enough space for trailing bins. */
4176         ret = (arena_t *)base_alloc(sizeof(arena_t)
4177             + (sizeof(arena_bin_t) * (nbins - 1)));
4178         if (ret != NULL && arena_new(ret) == false) {
4179                 arenas[ind] = ret;
4180                 return (ret);
4181         }
4182         /* Only reached if there is an OOM error. */
4183
4184         /*
4185          * OOM here is quite inconvenient to propagate, since dealing with it
4186          * would require a check for failure in the fast path.  Instead, punt
4187          * by using arenas[0].  In practice, this is an extremely unlikely
4188          * failure.
4189          */
4190         _malloc_message(_getprogname(),
4191             ": (malloc) Error initializing arena\n", "", "");
4192         if (opt_abort)
4193                 abort();
4194
4195         return (arenas[0]);
4196 }
4197
4198 #ifdef MALLOC_MAG
4199 static mag_t *
4200 mag_create(arena_t *arena, size_t binind)
4201 {
4202         mag_t *ret;
4203
4204         if (sizeof(mag_t) + (sizeof(void *) * (max_rounds - 1)) <=
4205             bin_maxclass) {
4206                 ret = arena_malloc_small(arena, sizeof(mag_t) + (sizeof(void *)
4207                     * (max_rounds - 1)), false);
4208         } else {
4209                 ret = imalloc(sizeof(mag_t) + (sizeof(void *) * (max_rounds -
4210                     1)));
4211         }
4212         if (ret == NULL)
4213                 return (NULL);
4214         ret->binind = binind;
4215         ret->nrounds = 0;
4216
4217         return (ret);
4218 }
4219
4220 static void
4221 mag_destroy(mag_t *mag)
4222 {
4223         arena_t *arena;
4224         arena_chunk_t *chunk;
4225         size_t pageind;
4226         arena_chunk_map_t *mapelm;
4227
4228         chunk = CHUNK_ADDR2BASE(mag);
4229         arena = chunk->arena;
4230         pageind = (((uintptr_t)mag - (uintptr_t)chunk) >> PAGE_SHIFT);
4231         mapelm = &chunk->map[pageind];
4232
4233         assert(mag->nrounds == 0);
4234         if (sizeof(mag_t) + (sizeof(void *) * (max_rounds - 1)) <=
4235             bin_maxclass) {
4236                 malloc_spin_lock(&arena->lock);
4237                 arena_dalloc_small(arena, chunk, mag, mapelm);
4238                 malloc_spin_unlock(&arena->lock);
4239         } else
4240                 idalloc(mag);
4241 }
4242
4243 static mag_rack_t *
4244 mag_rack_create(arena_t *arena)
4245 {
4246
4247         assert(sizeof(mag_rack_t) + (sizeof(bin_mags_t *) * (nbins - 1)) <=
4248             bin_maxclass);
4249         return (arena_malloc_small(arena, sizeof(mag_rack_t) +
4250             (sizeof(bin_mags_t) * (nbins - 1)), true));
4251 }
4252
4253 static void
4254 mag_rack_destroy(mag_rack_t *rack)
4255 {
4256         arena_t *arena;
4257         arena_chunk_t *chunk;
4258         bin_mags_t *bin_mags;
4259         size_t i, pageind;
4260         arena_chunk_map_t *mapelm;
4261
4262         for (i = 0; i < nbins; i++) {
4263                 bin_mags = &rack->bin_mags[i];
4264                 if (bin_mags->curmag != NULL) {
4265                         assert(bin_mags->curmag->binind == i);
4266                         mag_unload(bin_mags->curmag);
4267                         mag_destroy(bin_mags->curmag);
4268                 }
4269                 if (bin_mags->sparemag != NULL) {
4270                         assert(bin_mags->sparemag->binind == i);
4271                         mag_unload(bin_mags->sparemag);
4272                         mag_destroy(bin_mags->sparemag);
4273                 }
4274         }
4275
4276         chunk = CHUNK_ADDR2BASE(rack);
4277         arena = chunk->arena;
4278         pageind = (((uintptr_t)rack - (uintptr_t)chunk) >> PAGE_SHIFT);
4279         mapelm = &chunk->map[pageind];
4280
4281         malloc_spin_lock(&arena->lock);
4282         arena_dalloc_small(arena, chunk, rack, mapelm);
4283         malloc_spin_unlock(&arena->lock);
4284 }
4285 #endif
4286
4287 /*
4288  * End arena.
4289  */
4290 /******************************************************************************/
4291 /*
4292  * Begin general internal functions.
4293  */
4294
4295 static void *
4296 huge_malloc(size_t size, bool zero)
4297 {
4298         void *ret;
4299         size_t csize;
4300         extent_node_t *node;
4301
4302         /* Allocate one or more contiguous chunks for this request. */
4303
4304         csize = CHUNK_CEILING(size);
4305         if (csize == 0) {
4306                 /* size is large enough to cause size_t wrap-around. */
4307                 return (NULL);
4308         }
4309
4310         /* Allocate an extent node with which to track the chunk. */
4311         node = base_node_alloc();
4312         if (node == NULL)
4313                 return (NULL);
4314
4315         ret = chunk_alloc(csize, zero);
4316         if (ret == NULL) {
4317                 base_node_dealloc(node);
4318                 return (NULL);
4319         }
4320
4321         /* Insert node into huge. */
4322         node->addr = ret;
4323         node->size = csize;
4324
4325         malloc_mutex_lock(&huge_mtx);
4326         extent_tree_ad_insert(&huge, node);
4327 #ifdef MALLOC_STATS
4328         huge_nmalloc++;
4329         huge_allocated += csize;
4330 #endif
4331         malloc_mutex_unlock(&huge_mtx);
4332
4333         if (zero == false) {
4334                 if (opt_junk)
4335                         memset(ret, 0xa5, csize);
4336                 else if (opt_zero)
4337                         memset(ret, 0, csize);
4338         }
4339
4340         return (ret);
4341 }
4342
4343 /* Only handles large allocations that require more than chunk alignment. */
4344 static void *
4345 huge_palloc(size_t alignment, size_t size)
4346 {
4347         void *ret;
4348         size_t alloc_size, chunk_size, offset;
4349         extent_node_t *node;
4350
4351         /*
4352          * This allocation requires alignment that is even larger than chunk
4353          * alignment.  This means that huge_malloc() isn't good enough.
4354          *
4355          * Allocate almost twice as many chunks as are demanded by the size or
4356          * alignment, in order to assure the alignment can be achieved, then
4357          * unmap leading and trailing chunks.
4358          */
4359         assert(alignment >= chunksize);
4360
4361         chunk_size = CHUNK_CEILING(size);
4362
4363         if (size >= alignment)
4364                 alloc_size = chunk_size + alignment - chunksize;
4365         else
4366                 alloc_size = (alignment << 1) - chunksize;
4367
4368         /* Allocate an extent node with which to track the chunk. */
4369         node = base_node_alloc();
4370         if (node == NULL)
4371                 return (NULL);
4372
4373         ret = chunk_alloc(alloc_size, false);
4374         if (ret == NULL) {
4375                 base_node_dealloc(node);
4376                 return (NULL);
4377         }
4378
4379         offset = (uintptr_t)ret & (alignment - 1);
4380         assert((offset & chunksize_mask) == 0);
4381         assert(offset < alloc_size);
4382         if (offset == 0) {
4383                 /* Trim trailing space. */
4384                 chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
4385                     - chunk_size);
4386         } else {
4387                 size_t trailsize;
4388
4389                 /* Trim leading space. */
4390                 chunk_dealloc(ret, alignment - offset);
4391
4392                 ret = (void *)((uintptr_t)ret + (alignment - offset));
4393
4394                 trailsize = alloc_size - (alignment - offset) - chunk_size;
4395                 if (trailsize != 0) {
4396                     /* Trim trailing space. */
4397                     assert(trailsize < alloc_size);
4398                     chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
4399                         trailsize);
4400                 }
4401         }
4402
4403         /* Insert node into huge. */
4404         node->addr = ret;
4405         node->size = chunk_size;
4406
4407         malloc_mutex_lock(&huge_mtx);
4408         extent_tree_ad_insert(&huge, node);
4409 #ifdef MALLOC_STATS
4410         huge_nmalloc++;
4411         huge_allocated += chunk_size;
4412 #endif
4413         malloc_mutex_unlock(&huge_mtx);
4414
4415         if (opt_junk)
4416                 memset(ret, 0xa5, chunk_size);
4417         else if (opt_zero)
4418                 memset(ret, 0, chunk_size);
4419
4420         return (ret);
4421 }
4422
4423 static void *
4424 huge_ralloc(void *ptr, size_t size, size_t oldsize)
4425 {
4426         void *ret;
4427         size_t copysize;
4428
4429         /* Avoid moving the allocation if the size class would not change. */
4430         if (oldsize > arena_maxclass &&
4431             CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
4432                 if (opt_junk && size < oldsize) {
4433                         memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
4434                             - size);
4435                 } else if (opt_zero && size > oldsize) {
4436                         memset((void *)((uintptr_t)ptr + oldsize), 0, size
4437                             - oldsize);
4438                 }
4439                 return (ptr);
4440         }
4441
4442         /*
4443          * If we get here, then size and oldsize are different enough that we
4444          * need to use a different size class.  In that case, fall back to
4445          * allocating new space and copying.
4446          */
4447         ret = huge_malloc(size, false);
4448         if (ret == NULL)
4449                 return (NULL);
4450
4451         copysize = (size < oldsize) ? size : oldsize;
4452         memcpy(ret, ptr, copysize);
4453         idalloc(ptr);
4454         return (ret);
4455 }
4456
4457 static void
4458 huge_dalloc(void *ptr)
4459 {
4460         extent_node_t *node, key;
4461
4462         malloc_mutex_lock(&huge_mtx);
4463
4464         /* Extract from tree of huge allocations. */
4465         key.addr = ptr;
4466         node = extent_tree_ad_search(&huge, &key);
4467         assert(node != NULL);
4468         assert(node->addr == ptr);
4469         extent_tree_ad_remove(&huge, node);
4470
4471 #ifdef MALLOC_STATS
4472         huge_ndalloc++;
4473         huge_allocated -= node->size;
4474 #endif
4475
4476         malloc_mutex_unlock(&huge_mtx);
4477
4478         /* Unmap chunk. */
4479 #ifdef MALLOC_DSS
4480         if (opt_dss && opt_junk)
4481                 memset(node->addr, 0x5a, node->size);
4482 #endif
4483         chunk_dealloc(node->addr, node->size);
4484
4485         base_node_dealloc(node);
4486 }
4487
4488 static void
4489 malloc_print_stats(void)
4490 {
4491
4492         if (opt_print_stats) {
4493                 char s[UMAX2S_BUFSIZE];
4494                 _malloc_message("___ Begin malloc statistics ___\n", "", "",
4495                     "");
4496                 _malloc_message("Assertions ",
4497 #ifdef NDEBUG
4498                     "disabled",
4499 #else
4500                     "enabled",
4501 #endif
4502                     "\n", "");
4503                 _malloc_message("Boolean MALLOC_OPTIONS: ",
4504                     opt_abort ? "A" : "a", "", "");
4505 #ifdef MALLOC_DSS
4506                 _malloc_message(opt_dss ? "D" : "d", "", "", "");
4507 #endif
4508 #ifdef MALLOC_MAG
4509                 _malloc_message(opt_mag ? "G" : "g", "", "", "");
4510 #endif
4511                 _malloc_message(opt_junk ? "J" : "j", "", "", "");
4512 #ifdef MALLOC_DSS
4513                 _malloc_message(opt_mmap ? "M" : "m", "", "", "");
4514 #endif
4515                 _malloc_message(opt_utrace ? "PU" : "Pu",
4516                     opt_sysv ? "V" : "v",
4517                     opt_xmalloc ? "X" : "x",
4518                     opt_zero ? "Z\n" : "z\n");
4519
4520                 _malloc_message("CPUs: ", umax2s(ncpus, s), "\n", "");
4521                 _malloc_message("Max arenas: ", umax2s(narenas, s), "\n", "");
4522 #ifdef MALLOC_BALANCE
4523                 _malloc_message("Arena balance threshold: ",
4524                     umax2s(opt_balance_threshold, s), "\n", "");
4525 #endif
4526                 _malloc_message("Pointer size: ", umax2s(sizeof(void *), s),
4527                     "\n", "");
4528                 _malloc_message("Quantum size: ", umax2s(QUANTUM, s), "\n", "");
4529                 _malloc_message("Cacheline size (assumed): ", umax2s(CACHELINE,
4530                     s), "\n", "");
4531 #ifdef MALLOC_TINY
4532                 _malloc_message("Tiny 2^n-spaced sizes: [", umax2s((1U <<
4533                     TINY_MIN_2POW), s), "..", "");
4534                 _malloc_message(umax2s((qspace_min >> 1), s), "]\n", "", "");
4535 #endif
4536                 _malloc_message("Quantum-spaced sizes: [", umax2s(qspace_min,
4537                     s), "..", "");
4538                 _malloc_message(umax2s(qspace_max, s), "]\n", "", "");
4539                 _malloc_message("Cacheline-spaced sizes: [", umax2s(cspace_min,
4540                     s), "..", "");
4541                 _malloc_message(umax2s(cspace_max, s), "]\n", "", "");
4542                 _malloc_message("Subpage-spaced sizes: [", umax2s(sspace_min,
4543                     s), "..", "");
4544                 _malloc_message(umax2s(sspace_max, s), "]\n", "", "");
4545 #ifdef MALLOC_MAG
4546                 _malloc_message("Rounds per magazine: ", umax2s(max_rounds, s),
4547                     "\n", "");
4548 #endif
4549                 _malloc_message("Max dirty pages per arena: ",
4550                     umax2s(opt_dirty_max, s), "\n", "");
4551
4552                 _malloc_message("Chunk size: ", umax2s(chunksize, s), "", "");
4553                 _malloc_message(" (2^", umax2s(opt_chunk_2pow, s), ")\n", "");
4554
4555 #ifdef MALLOC_STATS
4556                 {
4557                         size_t allocated, mapped;
4558 #ifdef MALLOC_BALANCE
4559                         uint64_t nbalance = 0;
4560 #endif
4561                         unsigned i;
4562                         arena_t *arena;
4563
4564                         /* Calculate and print allocated/mapped stats. */
4565
4566                         /* arenas. */
4567                         for (i = 0, allocated = 0; i < narenas; i++) {
4568                                 if (arenas[i] != NULL) {
4569                                         malloc_spin_lock(&arenas[i]->lock);
4570                                         allocated +=
4571                                             arenas[i]->stats.allocated_small;
4572                                         allocated +=
4573                                             arenas[i]->stats.allocated_large;
4574 #ifdef MALLOC_BALANCE
4575                                         nbalance += arenas[i]->stats.nbalance;
4576 #endif
4577                                         malloc_spin_unlock(&arenas[i]->lock);
4578                                 }
4579                         }
4580
4581                         /* huge/base. */
4582                         malloc_mutex_lock(&huge_mtx);
4583                         allocated += huge_allocated;
4584                         mapped = stats_chunks.curchunks * chunksize;
4585                         malloc_mutex_unlock(&huge_mtx);
4586
4587                         malloc_mutex_lock(&base_mtx);
4588                         mapped += base_mapped;
4589                         malloc_mutex_unlock(&base_mtx);
4590
4591                         malloc_printf("Allocated: %zu, mapped: %zu\n",
4592                             allocated, mapped);
4593
4594 #ifdef MALLOC_BALANCE
4595                         malloc_printf("Arena balance reassignments: %llu\n",
4596                             nbalance);
4597 #endif
4598
4599                         /* Print chunk stats. */
4600                         {
4601                                 chunk_stats_t chunks_stats;
4602
4603                                 malloc_mutex_lock(&huge_mtx);
4604                                 chunks_stats = stats_chunks;
4605                                 malloc_mutex_unlock(&huge_mtx);
4606
4607                                 malloc_printf("chunks: nchunks   "
4608                                     "highchunks    curchunks\n");
4609                                 malloc_printf("  %13llu%13lu%13lu\n",
4610                                     chunks_stats.nchunks,
4611                                     chunks_stats.highchunks,
4612                                     chunks_stats.curchunks);
4613                         }
4614
4615                         /* Print chunk stats. */
4616                         malloc_printf(
4617                             "huge: nmalloc      ndalloc    allocated\n");
4618                         malloc_printf(" %12llu %12llu %12zu\n",
4619                             huge_nmalloc, huge_ndalloc, huge_allocated);
4620
4621                         /* Print stats for each arena. */
4622                         for (i = 0; i < narenas; i++) {
4623                                 arena = arenas[i];
4624                                 if (arena != NULL) {
4625                                         malloc_printf(
4626                                             "\narenas[%u]:\n", i);
4627                                         malloc_spin_lock(&arena->lock);
4628                                         stats_print(arena);
4629                                         malloc_spin_unlock(&arena->lock);
4630                                 }
4631                         }
4632                 }
4633 #endif /* #ifdef MALLOC_STATS */
4634                 _malloc_message("--- End malloc statistics ---\n", "", "", "");
4635         }
4636 }
4637
4638 #ifdef MALLOC_DEBUG
4639 static void
4640 size2bin_validate(void)
4641 {
4642         size_t i, size, binind;
4643
4644         assert(size2bin[0] == 0xffU);
4645         i = 1;
4646 #  ifdef MALLOC_TINY
4647         /* Tiny. */
4648         for (; i < (1U << TINY_MIN_2POW); i++) {
4649                 size = pow2_ceil(1U << TINY_MIN_2POW);
4650                 binind = ffs((int)(size >> (TINY_MIN_2POW + 1)));
4651                 assert(size2bin[i] == binind);
4652         }
4653         for (; i < qspace_min; i++) {
4654                 size = pow2_ceil(i);
4655                 binind = ffs((int)(size >> (TINY_MIN_2POW + 1)));
4656                 assert(size2bin[i] == binind);
4657         }
4658 #  endif
4659         /* Quantum-spaced. */
4660         for (; i <= qspace_max; i++) {
4661                 size = QUANTUM_CEILING(i);
4662                 binind = ntbins + (size >> QUANTUM_2POW) - 1;
4663                 assert(size2bin[i] == binind);
4664         }
4665         /* Cacheline-spaced. */
4666         for (; i <= cspace_max; i++) {
4667                 size = CACHELINE_CEILING(i);
4668                 binind = ntbins + nqbins + ((size - cspace_min) >>
4669                     CACHELINE_2POW);
4670                 assert(size2bin[i] == binind);
4671         }
4672         /* Sub-page. */
4673         for (; i <= sspace_max; i++) {
4674                 size = SUBPAGE_CEILING(i);
4675                 binind = ntbins + nqbins + ncbins + ((size - sspace_min)
4676                     >> SUBPAGE_2POW);
4677                 assert(size2bin[i] == binind);
4678         }
4679 }
4680 #endif
4681
4682 static bool
4683 size2bin_init(void)
4684 {
4685
4686         if (opt_qspace_max_2pow != QSPACE_MAX_2POW_DEFAULT
4687             || opt_cspace_max_2pow != CSPACE_MAX_2POW_DEFAULT)
4688                 return (size2bin_init_hard());
4689
4690         size2bin = const_size2bin;
4691 #ifdef MALLOC_DEBUG
4692         assert(sizeof(const_size2bin) == bin_maxclass + 1);
4693         size2bin_validate();
4694 #endif
4695         return (false);
4696 }
4697
4698 static bool
4699 size2bin_init_hard(void)
4700 {
4701         size_t i, size, binind;
4702         uint8_t *custom_size2bin;
4703
4704         assert(opt_qspace_max_2pow != QSPACE_MAX_2POW_DEFAULT
4705             || opt_cspace_max_2pow != CSPACE_MAX_2POW_DEFAULT);
4706
4707         custom_size2bin = (uint8_t *)base_alloc(bin_maxclass + 1);
4708         if (custom_size2bin == NULL)
4709                 return (true);
4710
4711         custom_size2bin[0] = 0xffU;
4712         i = 1;
4713 #ifdef MALLOC_TINY
4714         /* Tiny. */
4715         for (; i < (1U << TINY_MIN_2POW); i++) {
4716                 size = pow2_ceil(1U << TINY_MIN_2POW);
4717                 binind = ffs((int)(size >> (TINY_MIN_2POW + 1)));
4718                 custom_size2bin[i] = binind;
4719         }
4720         for (; i < qspace_min; i++) {
4721                 size = pow2_ceil(i);
4722                 binind = ffs((int)(size >> (TINY_MIN_2POW + 1)));
4723                 custom_size2bin[i] = binind;
4724         }
4725 #endif
4726         /* Quantum-spaced. */
4727         for (; i <= qspace_max; i++) {
4728                 size = QUANTUM_CEILING(i);
4729                 binind = ntbins + (size >> QUANTUM_2POW) - 1;
4730                 custom_size2bin[i] = binind;
4731         }
4732         /* Cacheline-spaced. */
4733         for (; i <= cspace_max; i++) {
4734                 size = CACHELINE_CEILING(i);
4735                 binind = ntbins + nqbins + ((size - cspace_min) >>
4736                     CACHELINE_2POW);
4737                 custom_size2bin[i] = binind;
4738         }
4739         /* Sub-page. */
4740         for (; i <= sspace_max; i++) {
4741                 size = SUBPAGE_CEILING(i);
4742                 binind = ntbins + nqbins + ncbins + ((size - sspace_min) >>
4743                     SUBPAGE_2POW);
4744                 custom_size2bin[i] = binind;
4745         }
4746
4747         size2bin = custom_size2bin;
4748 #ifdef MALLOC_DEBUG
4749         size2bin_validate();
4750 #endif
4751         return (false);
4752 }
4753
4754 /*
4755  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
4756  * implementation has to take pains to avoid infinite recursion during
4757  * initialization.
4758  */
4759 static inline bool
4760 malloc_init(void)
4761 {
4762
4763         if (malloc_initialized == false)
4764                 return (malloc_init_hard());
4765
4766         return (false);
4767 }
4768
4769 static bool
4770 malloc_init_hard(void)
4771 {
4772         unsigned i;
4773         int linklen;
4774         char buf[PATH_MAX + 1];
4775         const char *opts;
4776
4777         malloc_mutex_lock(&init_lock);
4778         if (malloc_initialized) {
4779                 /*
4780                  * Another thread initialized the allocator before this one
4781                  * acquired init_lock.
4782                  */
4783                 malloc_mutex_unlock(&init_lock);
4784                 return (false);
4785         }
4786
4787         /* Get number of CPUs. */
4788         {
4789                 int mib[2];
4790                 size_t len;
4791
4792                 mib[0] = CTL_HW;
4793                 mib[1] = HW_NCPU;
4794                 len = sizeof(ncpus);
4795                 if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
4796                         /* Error. */
4797                         ncpus = 1;
4798                 }
4799         }
4800
4801         /*
4802          * Increase the chunk size to the largest page size that is greater
4803          * than the default chunk size and less than or equal to 4MB.
4804          */
4805         {
4806                 size_t pagesizes[MAXPAGESIZES];
4807                 int k, nsizes;
4808
4809                 nsizes = getpagesizes(pagesizes, MAXPAGESIZES);
4810                 for (k = 0; k < nsizes; k++)
4811                         if (pagesizes[k] <= (1LU << 22))
4812                                 while ((1LU << opt_chunk_2pow) < pagesizes[k])
4813                                         opt_chunk_2pow++;
4814         }
4815
4816         for (i = 0; i < 3; i++) {
4817                 unsigned j;
4818
4819                 /* Get runtime configuration. */
4820                 switch (i) {
4821                 case 0:
4822                         if ((linklen = readlink("/etc/malloc.conf", buf,
4823                                                 sizeof(buf) - 1)) != -1) {
4824                                 /*
4825                                  * Use the contents of the "/etc/malloc.conf"
4826                                  * symbolic link's name.
4827                                  */
4828                                 buf[linklen] = '\0';
4829                                 opts = buf;
4830                         } else {
4831                                 /* No configuration specified. */
4832                                 buf[0] = '\0';
4833                                 opts = buf;
4834                         }
4835                         break;
4836                 case 1:
4837                         if (issetugid() == 0 && (opts =
4838                             getenv("MALLOC_OPTIONS")) != NULL) {
4839                                 /*
4840                                  * Do nothing; opts is already initialized to
4841                                  * the value of the MALLOC_OPTIONS environment
4842                                  * variable.
4843                                  */
4844                         } else {
4845                                 /* No configuration specified. */
4846                                 buf[0] = '\0';
4847                                 opts = buf;
4848                         }
4849                         break;
4850                 case 2:
4851                         if (_malloc_options != NULL) {
4852                                 /*
4853                                  * Use options that were compiled into the
4854                                  * program.
4855                                  */
4856                                 opts = _malloc_options;
4857                         } else {
4858                                 /* No configuration specified. */
4859                                 buf[0] = '\0';
4860                                 opts = buf;
4861                         }
4862                         break;
4863                 default:
4864                         /* NOTREACHED */
4865                         assert(false);
4866                 }
4867
4868                 for (j = 0; opts[j] != '\0'; j++) {
4869                         unsigned k, nreps;
4870                         bool nseen;
4871
4872                         /* Parse repetition count, if any. */
4873                         for (nreps = 0, nseen = false;; j++, nseen = true) {
4874                                 switch (opts[j]) {
4875                                         case '0': case '1': case '2': case '3':
4876                                         case '4': case '5': case '6': case '7':
4877                                         case '8': case '9':
4878                                                 nreps *= 10;
4879                                                 nreps += opts[j] - '0';
4880                                                 break;
4881                                         default:
4882                                                 goto MALLOC_OUT;
4883                                 }
4884                         }
4885 MALLOC_OUT:
4886                         if (nseen == false)
4887                                 nreps = 1;
4888
4889                         for (k = 0; k < nreps; k++) {
4890                                 switch (opts[j]) {
4891                                 case 'a':
4892                                         opt_abort = false;
4893                                         break;
4894                                 case 'A':
4895                                         opt_abort = true;
4896                                         break;
4897                                 case 'b':
4898 #ifdef MALLOC_BALANCE
4899                                         opt_balance_threshold >>= 1;
4900 #endif
4901                                         break;
4902                                 case 'B':
4903 #ifdef MALLOC_BALANCE
4904                                         if (opt_balance_threshold == 0)
4905                                                 opt_balance_threshold = 1;
4906                                         else if ((opt_balance_threshold << 1)
4907                                             > opt_balance_threshold)
4908                                                 opt_balance_threshold <<= 1;
4909 #endif
4910                                         break;
4911                                 case 'c':
4912                                         if (opt_cspace_max_2pow - 1 >
4913                                             opt_qspace_max_2pow &&
4914                                             opt_cspace_max_2pow >
4915                                             CACHELINE_2POW)
4916                                                 opt_cspace_max_2pow--;
4917                                         break;
4918                                 case 'C':
4919                                         if (opt_cspace_max_2pow < PAGE_SHIFT
4920                                             - 1)
4921                                                 opt_cspace_max_2pow++;
4922                                         break;
4923                                 case 'd':
4924 #ifdef MALLOC_DSS
4925                                         opt_dss = false;
4926 #endif
4927                                         break;
4928                                 case 'D':
4929 #ifdef MALLOC_DSS
4930                                         opt_dss = true;
4931 #endif
4932                                         break;
4933                                 case 'f':
4934                                         opt_dirty_max >>= 1;
4935                                         break;
4936                                 case 'F':
4937                                         if (opt_dirty_max == 0)
4938                                                 opt_dirty_max = 1;
4939                                         else if ((opt_dirty_max << 1) != 0)
4940                                                 opt_dirty_max <<= 1;
4941                                         break;
4942 #ifdef MALLOC_MAG
4943                                 case 'g':
4944                                         opt_mag = false;
4945                                         break;
4946                                 case 'G':
4947                                         opt_mag = true;
4948                                         break;
4949 #endif
4950                                 case 'j':
4951                                         opt_junk = false;
4952                                         break;
4953                                 case 'J':
4954                                         opt_junk = true;
4955                                         break;
4956                                 case 'k':
4957                                         /*
4958                                          * Chunks always require at least one
4959                                          * header page, so chunks can never be
4960                                          * smaller than two pages.
4961                                          */
4962                                         if (opt_chunk_2pow > PAGE_SHIFT + 1)
4963                                                 opt_chunk_2pow--;
4964                                         break;
4965                                 case 'K':
4966                                         if (opt_chunk_2pow + 1 <
4967                                             (sizeof(size_t) << 3))
4968                                                 opt_chunk_2pow++;
4969                                         break;
4970                                 case 'm':
4971 #ifdef MALLOC_DSS
4972                                         opt_mmap = false;
4973 #endif
4974                                         break;
4975                                 case 'M':
4976 #ifdef MALLOC_DSS
4977                                         opt_mmap = true;
4978 #endif
4979                                         break;
4980                                 case 'n':
4981                                         opt_narenas_lshift--;
4982                                         break;
4983                                 case 'N':
4984                                         opt_narenas_lshift++;
4985                                         break;
4986                                 case 'p':
4987                                         opt_print_stats = false;
4988                                         break;
4989                                 case 'P':
4990                                         opt_print_stats = true;
4991                                         break;
4992                                 case 'q':
4993                                         if (opt_qspace_max_2pow > QUANTUM_2POW)
4994                                                 opt_qspace_max_2pow--;
4995                                         break;
4996                                 case 'Q':
4997                                         if (opt_qspace_max_2pow + 1 <
4998                                             opt_cspace_max_2pow)
4999                                                 opt_qspace_max_2pow++;
5000                                         break;
5001 #ifdef MALLOC_MAG
5002                                 case 'R':
5003                                         if (opt_mag_size_2pow + 1 < (8U <<
5004                                             SIZEOF_PTR_2POW))
5005                                                 opt_mag_size_2pow++;
5006                                         break;
5007                                 case 'r':
5008                                         /*
5009                                          * Make sure there's always at least
5010                                          * one round per magazine.
5011                                          */
5012                                         if ((1U << (opt_mag_size_2pow-1)) >=
5013                                             sizeof(mag_t))
5014                                                 opt_mag_size_2pow--;
5015                                         break;
5016 #endif
5017                                 case 'u':
5018                                         opt_utrace = false;
5019                                         break;
5020                                 case 'U':
5021                                         opt_utrace = true;
5022                                         break;
5023                                 case 'v':
5024                                         opt_sysv = false;
5025                                         break;
5026                                 case 'V':
5027                                         opt_sysv = true;
5028                                         break;
5029                                 case 'x':
5030                                         opt_xmalloc = false;
5031                                         break;
5032                                 case 'X':
5033                                         opt_xmalloc = true;
5034                                         break;
5035                                 case 'z':
5036                                         opt_zero = false;
5037                                         break;
5038                                 case 'Z':
5039                                         opt_zero = true;
5040                                         break;
5041                                 default: {
5042                                         char cbuf[2];
5043
5044                                         cbuf[0] = opts[j];
5045                                         cbuf[1] = '\0';
5046                                         _malloc_message(_getprogname(),
5047                                             ": (malloc) Unsupported character "
5048                                             "in malloc options: '", cbuf,
5049                                             "'\n");
5050                                 }
5051                                 }
5052                         }
5053                 }
5054         }
5055
5056 #ifdef MALLOC_DSS
5057         /* Make sure that there is some method for acquiring memory. */
5058         if (opt_dss == false && opt_mmap == false)
5059                 opt_mmap = true;
5060 #endif
5061
5062         /* Take care to call atexit() only once. */
5063         if (opt_print_stats) {
5064                 /* Print statistics at exit. */
5065                 atexit(malloc_print_stats);
5066         }
5067
5068 #ifdef MALLOC_MAG
5069         /*
5070          * Calculate the actual number of rounds per magazine, taking into
5071          * account header overhead.
5072          */
5073         max_rounds = (1LLU << (opt_mag_size_2pow - SIZEOF_PTR_2POW)) -
5074             (sizeof(mag_t) >> SIZEOF_PTR_2POW) + 1;
5075 #endif
5076
5077         /* Set variables according to the value of opt_[qc]space_max_2pow. */
5078         qspace_max = (1U << opt_qspace_max_2pow);
5079         cspace_min = CACHELINE_CEILING(qspace_max);
5080         if (cspace_min == qspace_max)
5081                 cspace_min += CACHELINE;
5082         cspace_max = (1U << opt_cspace_max_2pow);
5083         sspace_min = SUBPAGE_CEILING(cspace_max);
5084         if (sspace_min == cspace_max)
5085                 sspace_min += SUBPAGE;
5086         assert(sspace_min < PAGE_SIZE);
5087         sspace_max = PAGE_SIZE - SUBPAGE;
5088
5089 #ifdef MALLOC_TINY
5090         assert(QUANTUM_2POW >= TINY_MIN_2POW);
5091 #endif
5092         assert(ntbins <= QUANTUM_2POW);
5093         nqbins = qspace_max >> QUANTUM_2POW;
5094         ncbins = ((cspace_max - cspace_min) >> CACHELINE_2POW) + 1;
5095         nsbins = ((sspace_max - sspace_min) >> SUBPAGE_2POW) + 1;
5096         nbins = ntbins + nqbins + ncbins + nsbins;
5097
5098         if (size2bin_init()) {
5099                 malloc_mutex_unlock(&init_lock);
5100                 return (true);
5101         }
5102
5103         /* Set variables according to the value of opt_chunk_2pow. */
5104         chunksize = (1LU << opt_chunk_2pow);
5105         chunksize_mask = chunksize - 1;
5106         chunk_npages = (chunksize >> PAGE_SHIFT);
5107         {
5108                 size_t header_size;
5109
5110                 /*
5111                  * Compute the header size such that it is large enough to
5112                  * contain the page map.
5113                  */
5114                 header_size = sizeof(arena_chunk_t) +
5115                     (sizeof(arena_chunk_map_t) * (chunk_npages - 1));
5116                 arena_chunk_header_npages = (header_size >> PAGE_SHIFT) +
5117                     ((header_size & PAGE_MASK) != 0);
5118         }
5119         arena_maxclass = chunksize - (arena_chunk_header_npages <<
5120             PAGE_SHIFT);
5121
5122         UTRACE(0, 0, 0);
5123
5124 #ifdef MALLOC_STATS
5125         memset(&stats_chunks, 0, sizeof(chunk_stats_t));
5126 #endif
5127
5128         /* Various sanity checks that regard configuration. */
5129         assert(chunksize >= PAGE_SIZE);
5130
5131         /* Initialize chunks data. */
5132         malloc_mutex_init(&huge_mtx);
5133         extent_tree_ad_new(&huge);
5134 #ifdef MALLOC_DSS
5135         malloc_mutex_init(&dss_mtx);
5136         dss_base = sbrk(0);
5137         dss_prev = dss_base;
5138         dss_max = dss_base;
5139         extent_tree_szad_new(&dss_chunks_szad);
5140         extent_tree_ad_new(&dss_chunks_ad);
5141 #endif
5142 #ifdef MALLOC_STATS
5143         huge_nmalloc = 0;
5144         huge_ndalloc = 0;
5145         huge_allocated = 0;
5146 #endif
5147
5148         /* Initialize base allocation data structures. */
5149 #ifdef MALLOC_STATS
5150         base_mapped = 0;
5151 #endif
5152 #ifdef MALLOC_DSS
5153         /*
5154          * Allocate a base chunk here, since it doesn't actually have to be
5155          * chunk-aligned.  Doing this before allocating any other chunks allows
5156          * the use of space that would otherwise be wasted.
5157          */
5158         if (opt_dss)
5159                 base_pages_alloc(0);
5160 #endif
5161         base_nodes = NULL;
5162         malloc_mutex_init(&base_mtx);
5163
5164         if (ncpus > 1) {
5165                 /*
5166                  * For SMP systems, create twice as many arenas as there are
5167                  * CPUs by default.
5168                  */
5169                 opt_narenas_lshift++;
5170         }
5171
5172         /* Determine how many arenas to use. */
5173         narenas = ncpus;
5174         if (opt_narenas_lshift > 0) {
5175                 if ((narenas << opt_narenas_lshift) > narenas)
5176                         narenas <<= opt_narenas_lshift;
5177                 /*
5178                  * Make sure not to exceed the limits of what base_alloc() can
5179                  * handle.
5180                  */
5181                 if (narenas * sizeof(arena_t *) > chunksize)
5182                         narenas = chunksize / sizeof(arena_t *);
5183         } else if (opt_narenas_lshift < 0) {
5184                 if ((narenas >> -opt_narenas_lshift) < narenas)
5185                         narenas >>= -opt_narenas_lshift;
5186                 /* Make sure there is at least one arena. */
5187                 if (narenas == 0)
5188                         narenas = 1;
5189         }
5190 #ifdef MALLOC_BALANCE
5191         assert(narenas != 0);
5192         for (narenas_2pow = 0;
5193              (narenas >> (narenas_2pow + 1)) != 0;
5194              narenas_2pow++);
5195 #endif
5196
5197 #ifdef NO_TLS
5198         if (narenas > 1) {
5199                 static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
5200                     23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
5201                     89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
5202                     151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
5203                     223, 227, 229, 233, 239, 241, 251, 257, 263};
5204                 unsigned nprimes, parenas;
5205
5206                 /*
5207                  * Pick a prime number of hash arenas that is more than narenas
5208                  * so that direct hashing of pthread_self() pointers tends to
5209                  * spread allocations evenly among the arenas.
5210                  */
5211                 assert((narenas & 1) == 0); /* narenas must be even. */
5212                 nprimes = (sizeof(primes) >> SIZEOF_INT_2POW);
5213                 parenas = primes[nprimes - 1]; /* In case not enough primes. */
5214                 for (i = 1; i < nprimes; i++) {
5215                         if (primes[i] > narenas) {
5216                                 parenas = primes[i];
5217                                 break;
5218                         }
5219                 }
5220                 narenas = parenas;
5221         }
5222 #endif
5223
5224 #ifndef NO_TLS
5225 #  ifndef MALLOC_BALANCE
5226         next_arena = 0;
5227 #  endif
5228 #endif
5229
5230         /* Allocate and initialize arenas. */
5231         arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
5232         if (arenas == NULL) {
5233                 malloc_mutex_unlock(&init_lock);
5234                 return (true);
5235         }
5236         /*
5237          * Zero the array.  In practice, this should always be pre-zeroed,
5238          * since it was just mmap()ed, but let's be sure.
5239          */
5240         memset(arenas, 0, sizeof(arena_t *) * narenas);
5241
5242         /*
5243          * Initialize one arena here.  The rest are lazily created in
5244          * choose_arena_hard().
5245          */
5246         arenas_extend(0);
5247         if (arenas[0] == NULL) {
5248                 malloc_mutex_unlock(&init_lock);
5249                 return (true);
5250         }
5251 #ifndef NO_TLS
5252         /*
5253          * Assign the initial arena to the initial thread, in order to avoid
5254          * spurious creation of an extra arena if the application switches to
5255          * threaded mode.
5256          */
5257         arenas_map = arenas[0];
5258 #endif
5259         /*
5260          * Seed here for the initial thread, since choose_arena_hard() is only
5261          * called for other threads.  The seed value doesn't really matter.
5262          */
5263 #ifdef MALLOC_BALANCE
5264         SPRN(balance, 42);
5265 #endif
5266
5267         malloc_spin_init(&arenas_lock);
5268
5269         malloc_initialized = true;
5270         malloc_mutex_unlock(&init_lock);
5271         return (false);
5272 }
5273
5274 /*
5275  * End general internal functions.
5276  */
5277 /******************************************************************************/
5278 /*
5279  * Begin malloc(3)-compatible functions.
5280  */
5281
5282 void *
5283 malloc(size_t size)
5284 {
5285         void *ret;
5286
5287         if (malloc_init()) {
5288                 ret = NULL;
5289                 goto RETURN;
5290         }
5291
5292         if (size == 0) {
5293                 if (opt_sysv == false)
5294                         size = 1;
5295                 else {
5296                         ret = NULL;
5297                         goto RETURN;
5298                 }
5299         }
5300
5301         ret = imalloc(size);
5302
5303 RETURN:
5304         if (ret == NULL) {
5305                 if (opt_xmalloc) {
5306                         _malloc_message(_getprogname(),
5307                             ": (malloc) Error in malloc(): out of memory\n", "",
5308                             "");
5309                         abort();
5310                 }
5311                 errno = ENOMEM;
5312         }
5313
5314         UTRACE(0, size, ret);
5315         return (ret);
5316 }
5317
5318 int
5319 posix_memalign(void **memptr, size_t alignment, size_t size)
5320 {
5321         int ret;
5322         void *result;
5323
5324         if (malloc_init())
5325                 result = NULL;
5326         else {
5327                 /* Make sure that alignment is a large enough power of 2. */
5328                 if (((alignment - 1) & alignment) != 0
5329                     || alignment < sizeof(void *)) {
5330                         if (opt_xmalloc) {
5331                                 _malloc_message(_getprogname(),
5332                                     ": (malloc) Error in posix_memalign(): "
5333                                     "invalid alignment\n", "", "");
5334                                 abort();
5335                         }
5336                         result = NULL;
5337                         ret = EINVAL;
5338                         goto RETURN;
5339                 }
5340
5341                 if (size == 0) {
5342                         if (opt_sysv == false)
5343                                 size = 1;
5344                         else {
5345                                 result = NULL;
5346                                 ret = 0;
5347                                 goto RETURN;
5348                         }
5349                 }
5350                 result = ipalloc(alignment, size);
5351         }
5352
5353         if (result == NULL) {
5354                 if (opt_xmalloc) {
5355                         _malloc_message(_getprogname(),
5356                         ": (malloc) Error in posix_memalign(): out of memory\n",
5357                         "", "");
5358                         abort();
5359                 }
5360                 ret = ENOMEM;
5361                 goto RETURN;
5362         }
5363
5364         *memptr = result;
5365         ret = 0;
5366
5367 RETURN:
5368         UTRACE(0, size, result);
5369         return (ret);
5370 }
5371
5372 void *
5373 calloc(size_t num, size_t size)
5374 {
5375         void *ret;
5376         size_t num_size;
5377
5378         if (malloc_init()) {
5379                 num_size = 0;
5380                 ret = NULL;
5381                 goto RETURN;
5382         }
5383
5384         num_size = num * size;
5385         if (num_size == 0) {
5386                 if ((opt_sysv == false) && ((num == 0) || (size == 0)))
5387                         num_size = 1;
5388                 else {
5389                         ret = NULL;
5390                         goto RETURN;
5391                 }
5392         /*
5393          * Try to avoid division here.  We know that it isn't possible to
5394          * overflow during multiplication if neither operand uses any of the
5395          * most significant half of the bits in a size_t.
5396          */
5397         } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2)))
5398             && (num_size / size != num)) {
5399                 /* size_t overflow. */
5400                 ret = NULL;
5401                 goto RETURN;
5402         }
5403
5404         ret = icalloc(num_size);
5405
5406 RETURN:
5407         if (ret == NULL) {
5408                 if (opt_xmalloc) {
5409                         _malloc_message(_getprogname(),
5410                             ": (malloc) Error in calloc(): out of memory\n", "",
5411                             "");
5412                         abort();
5413                 }
5414                 errno = ENOMEM;
5415         }
5416
5417         UTRACE(0, num_size, ret);
5418         return (ret);
5419 }
5420
5421 void *
5422 realloc(void *ptr, size_t size)
5423 {
5424         void *ret;
5425
5426         if (size == 0) {
5427                 if (opt_sysv == false)
5428                         size = 1;
5429                 else {
5430                         if (ptr != NULL)
5431                                 idalloc(ptr);
5432                         ret = NULL;
5433                         goto RETURN;
5434                 }
5435         }
5436
5437         if (ptr != NULL) {
5438                 assert(malloc_initialized);
5439
5440                 ret = iralloc(ptr, size);
5441
5442                 if (ret == NULL) {
5443                         if (opt_xmalloc) {
5444                                 _malloc_message(_getprogname(),
5445                                     ": (malloc) Error in realloc(): out of "
5446                                     "memory\n", "", "");
5447                                 abort();
5448                         }
5449                         errno = ENOMEM;
5450                 }
5451         } else {
5452                 if (malloc_init())
5453                         ret = NULL;
5454                 else
5455                         ret = imalloc(size);
5456
5457                 if (ret == NULL) {
5458                         if (opt_xmalloc) {
5459                                 _malloc_message(_getprogname(),
5460                                     ": (malloc) Error in realloc(): out of "
5461                                     "memory\n", "", "");
5462                                 abort();
5463                         }
5464                         errno = ENOMEM;
5465                 }
5466         }
5467
5468 RETURN:
5469         UTRACE(ptr, size, ret);
5470         return (ret);
5471 }
5472
5473 void
5474 free(void *ptr)
5475 {
5476
5477         UTRACE(ptr, 0, 0);
5478         if (ptr != NULL) {
5479                 assert(malloc_initialized);
5480
5481                 idalloc(ptr);
5482         }
5483 }
5484
5485 /*
5486  * End malloc(3)-compatible functions.
5487  */
5488 /******************************************************************************/
5489 /*
5490  * Begin non-standard functions.
5491  */
5492
5493 size_t
5494 malloc_usable_size(const void *ptr)
5495 {
5496
5497         assert(ptr != NULL);
5498
5499         return (isalloc(ptr));
5500 }
5501
5502 /*
5503  * End non-standard functions.
5504  */
5505 /******************************************************************************/
5506 /*
5507  * Begin library-private functions.
5508  */
5509
5510 /******************************************************************************/
5511 /*
5512  * Begin thread cache.
5513  */
5514
5515 /*
5516  * We provide an unpublished interface in order to receive notifications from
5517  * the pthreads library whenever a thread exits.  This allows us to clean up
5518  * thread caches.
5519  */
5520 void
5521 _malloc_thread_cleanup(void)
5522 {
5523
5524 #ifdef MALLOC_MAG
5525         if (mag_rack != NULL) {
5526                 assert(mag_rack != (void *)-1);
5527                 mag_rack_destroy(mag_rack);
5528 #ifdef MALLOC_DEBUG
5529                 mag_rack = (void *)-1;
5530 #endif
5531         }
5532 #endif
5533 }
5534
5535 /*
5536  * The following functions are used by threading libraries for protection of
5537  * malloc during fork().  These functions are only called if the program is
5538  * running in threaded mode, so there is no need to check whether the program
5539  * is threaded here.
5540  */
5541
5542 void
5543 _malloc_prefork(void)
5544 {
5545         bool again;
5546         unsigned i, j;
5547         arena_t *larenas[narenas], *tarenas[narenas];
5548
5549         /* Acquire all mutexes in a safe order. */
5550
5551         /*
5552          * arenas_lock must be acquired after all of the arena mutexes, in
5553          * order to avoid potential deadlock with arena_lock_balance[_hard]().
5554          * Since arenas_lock protects the arenas array, the following code has
5555          * to race with arenas_extend() callers until it succeeds in locking
5556          * all arenas before locking arenas_lock.
5557          */
5558         memset(larenas, 0, sizeof(arena_t *) * narenas);
5559         do {
5560                 again = false;
5561
5562                 malloc_spin_lock(&arenas_lock);
5563                 for (i = 0; i < narenas; i++) {
5564                         if (arenas[i] != larenas[i]) {
5565                                 memcpy(tarenas, arenas, sizeof(arena_t *) *
5566                                     narenas);
5567                                 malloc_spin_unlock(&arenas_lock);
5568                                 for (j = 0; j < narenas; j++) {
5569                                         if (larenas[j] != tarenas[j]) {
5570                                                 larenas[j] = tarenas[j];
5571                                                 malloc_spin_lock(
5572                                                     &larenas[j]->lock);
5573                                         }
5574                                 }
5575                                 again = true;
5576                                 break;
5577                         }
5578                 }
5579         } while (again);
5580
5581         malloc_mutex_lock(&base_mtx);
5582
5583         malloc_mutex_lock(&huge_mtx);
5584
5585 #ifdef MALLOC_DSS
5586         malloc_mutex_lock(&dss_mtx);
5587 #endif
5588 }
5589
5590 void
5591 _malloc_postfork(void)
5592 {
5593         unsigned i;
5594         arena_t *larenas[narenas];
5595
5596         /* Release all mutexes, now that fork() has completed. */
5597
5598 #ifdef MALLOC_DSS
5599         malloc_mutex_unlock(&dss_mtx);
5600 #endif
5601
5602         malloc_mutex_unlock(&huge_mtx);
5603
5604         malloc_mutex_unlock(&base_mtx);
5605
5606         memcpy(larenas, arenas, sizeof(arena_t *) * narenas);
5607         malloc_spin_unlock(&arenas_lock);
5608         for (i = 0; i < narenas; i++) {
5609                 if (larenas[i] != NULL)
5610                         malloc_spin_unlock(&larenas[i]->lock);
5611         }
5612 }
5613
5614 /*
5615  * End library-private functions.
5616  */
5617 /******************************************************************************/