lib/libc/stdlib/malloc.c

   1 /*-
   2  * Copyright (C) 2006-2008 Jason Evans <jasone@FreeBSD.org>.
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice(s), this list of conditions and the following disclaimer as
  10  *    the first lines of this file unmodified other than the possible
  11  *    addition of one or more copyright notices.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice(s), this list of conditions and the following disclaimer in
  14  *    the documentation and/or other materials provided with the
  15  *    distribution.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  20  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
  21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  25  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  26  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  27  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28  *
  29  *******************************************************************************
  30  *
  31  * This allocator implementation is designed to provide scalable performance
  32  * for multi-threaded programs on multi-processor systems.  The following
  33  * features are included for this purpose:
  34  *
  35  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
  36  *     contention and cache sloshing.
  37  *
  38  *   + Thread-specific caching is used if there are multiple threads, which
  39  *     reduces the amount of locking.
  40  *
  41  *   + Cache line sharing between arenas is avoided for internal data
  42  *     structures.
  43  *
  44  *   + Memory is managed in chunks and runs (chunks can be split into runs),
  45  *     rather than as individual pages.  This provides a constant-time
  46  *     mechanism for associating allocations with particular arenas.
  47  *
  48  * Allocation requests are rounded up to the nearest size class, and no record
  49  * of the original request size is maintained.  Allocations are broken into
  50  * categories according to size class.  Assuming runtime defaults, 4 kB pages
  51  * and a 16 byte quantum on a 32-bit system, the size classes in each category
  52  * are as follows:
  53  *
  54  *   |=======================================|
  55  *   | Category | Subcategory      |    Size |
  56  *   |=======================================|
  57  *   | Small    | Tiny             |       2 |
  58  *   |          |                  |       4 |
  59  *   |          |                  |       8 |
  60  *   |          |------------------+---------|
  61  *   |          | Quantum-spaced   |      16 |
  62  *   |          |                  |      32 |
  63  *   |          |                  |      48 |
  64  *   |          |                  |     ... |
  65  *   |          |                  |      96 |
  66  *   |          |                  |     112 |
  67  *   |          |                  |     128 |
  68  *   |          |------------------+---------|
  69  *   |          | Cacheline-spaced |     192 |
  70  *   |          |                  |     256 |
  71  *   |          |                  |     320 |
  72  *   |          |                  |     384 |
  73  *   |          |                  |     448 |
  74  *   |          |                  |     512 |
  75  *   |          |------------------+---------|
  76  *   |          | Sub-page         |     760 |
  77  *   |          |                  |    1024 |
  78  *   |          |                  |    1280 |
  79  *   |          |                  |     ... |
  80  *   |          |                  |    3328 |
  81  *   |          |                  |    3584 |
  82  *   |          |                  |    3840 |
  83  *   |=======================================|
  84  *   | Large                       |    4 kB |
  85  *   |                             |    8 kB |
  86  *   |                             |   12 kB |
  87  *   |                             |     ... |
  88  *   |                             | 1012 kB |
  89  *   |                             | 1016 kB |
  90  *   |                             | 1020 kB |
  91  *   |=======================================|
  92  *   | Huge                        |    1 MB |
  93  *   |                             |    2 MB |
  94  *   |                             |    3 MB |
  95  *   |                             |     ... |
  96  *   |=======================================|
  97  *
  98  * A different mechanism is used for each category:
  99  *
 100  *   Small : Each size class is segregated into its own set of runs.  Each run
 101  *           maintains a bitmap of which regions are free/allocated.
 102  *
 103  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
 104  *           in the associated arena chunk header maps.
 105  *
 106  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
 107  *          Metadata are stored in a separate red-black tree.
 108  *
 109  *******************************************************************************
 110  */
 111
 112 /*
 113  * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
 114  * defaults the A and J runtime options to off.  These settings are appropriate
 115  * for production systems.
 116  */
 117 #define MALLOC_PRODUCTION
 118
 119 #ifndef MALLOC_PRODUCTION
 120    /*
 121     * MALLOC_DEBUG enables assertions and other sanity checks, and disables
 122     * inline functions.
 123     */
 124 #  define MALLOC_DEBUG
 125
 126    /* MALLOC_STATS enables statistics calculation. */
 127 #  define MALLOC_STATS
 128 #endif
 129
 130 /*
 131  * MALLOC_TINY enables support for tiny objects, which are smaller than one
 132  * quantum.
 133  */
 134 #define MALLOC_TINY
 135
 136 /*
 137  * MALLOC_MAG enables a magazine-based thread-specific caching layer for small
 138  * objects.  This makes it possible to allocate/deallocate objects without any
 139  * locking when the cache is in the steady state.
 140  */
 141 #define MALLOC_MAG
 142
 143 /*
 144  * MALLOC_BALANCE enables monitoring of arena lock contention and dynamically
 145  * re-balances arena load if exponentially averaged contention exceeds a
 146  * certain threshold.
 147  */
 148 #define MALLOC_BALANCE
 149
 150 /*
 151  * MALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage
 152  * segment (DSS).  In an ideal world, this functionality would be completely
 153  * unnecessary, but we are burdened by history and the lack of resource limits
 154  * for anonymous mapped memory.
 155  */
 156 #define MALLOC_DSS
 157
 158 #include <sys/cdefs.h>
 159 __FBSDID("$FreeBSD$");
 160
 161 #include "libc_private.h"
 162 #ifdef MALLOC_DEBUG
 163 #  define _LOCK_DEBUG
 164 #endif
 165 #include "spinlock.h"
 166 #include "namespace.h"
 167 #include <sys/mman.h>
 168 #include <sys/param.h>
 169 #include <sys/stddef.h>
 170 #include <sys/time.h>
 171 #include <sys/types.h>
 172 #include <sys/sysctl.h>
 173 #include <sys/uio.h>
 174 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
 175
 176 #include <machine/cpufunc.h>
 177 #include <machine/param.h>
 178 #include <machine/vmparam.h>
 179
 180 #include <errno.h>
 181 #include <limits.h>
 182 #include <pthread.h>
 183 #include <sched.h>
 184 #include <stdarg.h>
 185 #include <stdbool.h>
 186 #include <stdio.h>
 187 #include <stdint.h>
 188 #include <stdlib.h>
 189 #include <string.h>
 190 #include <strings.h>
 191 #include <unistd.h>
 192
 193 #include "un-namespace.h"
 194
 195 #ifdef MALLOC_DEBUG
 196 #  ifdef NDEBUG
 197 #    undef NDEBUG
 198 #  endif
 199 #else
 200 #  ifndef NDEBUG
 201 #    define NDEBUG
 202 #  endif
 203 #endif
 204 #include <assert.h>
 205
 206 #include "rb.h"
 207
 208 #ifdef MALLOC_DEBUG
 209    /* Disable inlining to make debugging easier. */
 210 #  define inline
 211 #endif
 212
 213 /* Size of stack-allocated buffer passed to strerror_r(). */
 214 #define STRERROR_BUF            64
 215
 216 /*
 217  * Minimum alignment of allocations is 2^QUANTUM_2POW bytes.
 218  */
 219 #ifdef __i386__
 220 #  define QUANTUM_2POW          4
 221 #  define SIZEOF_PTR_2POW       2
 222 #  define CPU_SPINWAIT          __asm__ volatile("pause")
 223 #endif
 224 #ifdef __ia64__
 225 #  define QUANTUM_2POW          4
 226 #  define SIZEOF_PTR_2POW       3
 227 #endif
 228 #ifdef __alpha__
 229 #  define QUANTUM_2POW          4
 230 #  define SIZEOF_PTR_2POW       3
 231 #  define NO_TLS
 232 #endif
 233 #ifdef __sparc64__
 234 #  define QUANTUM_2POW          4
 235 #  define SIZEOF_PTR_2POW       3
 236 #  define NO_TLS
 237 #endif
 238 #ifdef __amd64__
 239 #  define QUANTUM_2POW          4
 240 #  define SIZEOF_PTR_2POW       3
 241 #  define CPU_SPINWAIT          __asm__ volatile("pause")
 242 #endif
 243 #ifdef __arm__
 244 #  define QUANTUM_2POW          3
 245 #  define SIZEOF_PTR_2POW       2
 246 #  define NO_TLS
 247 #endif
 248 #ifdef __mips__
 249 #  define QUANTUM_2POW          3
 250 #  define SIZEOF_PTR_2POW       2
 251 #  define NO_TLS
 252 #endif
 253 #ifdef __powerpc__
 254 #  define QUANTUM_2POW          4
 255 #  define SIZEOF_PTR_2POW       2
 256 #endif
 257
 258 #define QUANTUM                 ((size_t)(1U << QUANTUM_2POW))
 259 #define QUANTUM_MASK            (QUANTUM - 1)
 260
 261 #define SIZEOF_PTR              (1U << SIZEOF_PTR_2POW)
 262
 263 /* sizeof(int) == (1U << SIZEOF_INT_2POW). */
 264 #ifndef SIZEOF_INT_2POW
 265 #  define SIZEOF_INT_2POW       2
 266 #endif
 267
 268 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
 269 #if (!defined(PIC) && !defined(NO_TLS))
 270 #  define NO_TLS
 271 #endif
 272
 273 #ifdef NO_TLS
 274    /* MALLOC_MAG requires TLS. */
 275 #  ifdef MALLOC_MAG
 276 #    undef MALLOC_MAG
 277 #  endif
 278    /* MALLOC_BALANCE requires TLS. */
 279 #  ifdef MALLOC_BALANCE
 280 #    undef MALLOC_BALANCE
 281 #  endif
 282 #endif
 283
 284 /*
 285  * Size and alignment of memory chunks that are allocated by the OS's virtual
 286  * memory system.
 287  */
 288 #define CHUNK_2POW_DEFAULT      20
 289
 290 /* Maximum number of dirty pages per arena. */
 291 #define DIRTY_MAX_DEFAULT       (1U << 9)
 292
 293 /*
 294  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing.
 295  * In addition, this controls the spacing of cacheline-spaced size classes.
 296  */
 297 #define CACHELINE_2POW          6
 298 #define CACHELINE               ((size_t)(1U << CACHELINE_2POW))
 299 #define CACHELINE_MASK          (CACHELINE - 1)
 300
 301 /*
 302  * Subpages are an artificially designated partitioning of pages.  Their only
 303  * purpose is to support subpage-spaced size classes.
 304  *
 305  * There must be at least 4 subpages per page, due to the way size classes are
 306  * handled.
 307  */
 308 #define SUBPAGE_2POW            8
 309 #define SUBPAGE                 ((size_t)(1U << SUBPAGE_2POW))
 310 #define SUBPAGE_MASK            (SUBPAGE - 1)
 311
 312 #ifdef MALLOC_TINY
 313    /* Smallest size class to support. */
 314 #  define TINY_MIN_2POW         1
 315 #endif
 316
 317 /*
 318  * Maximum size class that is a multiple of the quantum, but not (necessarily)
 319  * a power of 2.  Above this size, allocations are rounded up to the nearest
 320  * power of 2.
 321  */
 322 #define QSPACE_MAX_2POW_DEFAULT 7
 323
 324 /*
 325  * Maximum size class that is a multiple of the cacheline, but not (necessarily)
 326  * a power of 2.  Above this size, allocations are rounded up to the nearest
 327  * power of 2.
 328  */
 329 #define CSPACE_MAX_2POW_DEFAULT 9
 330
 331 /*
 332  * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
 333  * as small as possible such that this setting is still honored, without
 334  * violating other constraints.  The goal is to make runs as small as possible
 335  * without exceeding a per run external fragmentation threshold.
 336  *
 337  * We use binary fixed point math for overhead computations, where the binary
 338  * point is implicitly RUN_BFP bits to the left.
 339  *
 340  * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
 341  * honored for some/all object sizes, since there is one bit of header overhead
 342  * per object (plus a constant).  This constraint is relaxed (ignored) for runs
 343  * that are so small that the per-region overhead is greater than:
 344  *
 345  *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
 346  */
 347 #define RUN_BFP                 12
 348 /*                                    \/   Implicit binary fixed point. */
 349 #define RUN_MAX_OVRHD           0x0000003dU
 350 #define RUN_MAX_OVRHD_RELAX     0x00001800U
 351
 352 /* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
 353 #define RUN_MAX_SMALL   (12 * PAGE_SIZE)
 354
 355 /*
 356  * Hyper-threaded CPUs may need a special instruction inside spin loops in
 357  * order to yield to another virtual CPU.  If no such instruction is defined
 358  * above, make CPU_SPINWAIT a no-op.
 359  */
 360 #ifndef CPU_SPINWAIT
 361 #  define CPU_SPINWAIT
 362 #endif
 363
 364 /*
 365  * Adaptive spinning must eventually switch to blocking, in order to avoid the
 366  * potential for priority inversion deadlock.  Backing off past a certain point
 367  * can actually waste time.
 368  */
 369 #define SPIN_LIMIT_2POW         11
 370
 371 /*
 372  * Conversion from spinning to blocking is expensive; we use (1U <<
 373  * BLOCK_COST_2POW) to estimate how many more times costly blocking is than
 374  * worst-case spinning.
 375  */
 376 #define BLOCK_COST_2POW         4
 377
 378 #ifdef MALLOC_MAG
 379    /*
 380     * Default magazine size, in bytes.  max_rounds is calculated to make
 381     * optimal use of the space, leaving just enough room for the magazine
 382     * header.
 383     */
 384 #  define MAG_SIZE_2POW_DEFAULT 9
 385 #endif
 386
 387 #ifdef MALLOC_BALANCE
 388    /*
 389     * We use an exponential moving average to track recent lock contention,
 390     * where the size of the history window is N, and alpha=2/(N+1).
 391     *
 392     * Due to integer math rounding, very small values here can cause
 393     * substantial degradation in accuracy, thus making the moving average decay
 394     * faster than it would with precise calculation.
 395     */
 396 #  define BALANCE_ALPHA_INV_2POW        9
 397
 398    /*
 399     * Threshold value for the exponential moving contention average at which to
 400     * re-assign a thread.
 401     */
 402 #  define BALANCE_THRESHOLD_DEFAULT     (1U << (SPIN_LIMIT_2POW-4))
 403 #endif
 404
 405 /******************************************************************************/
 406
 407 /*
 408  * Mutexes based on spinlocks.  We can't use normal pthread spinlocks in all
 409  * places, because they require malloc()ed memory, which causes bootstrapping
 410  * issues in some cases.
 411  */
 412 typedef struct {
 413         spinlock_t      lock;
 414 } malloc_mutex_t;
 415
 416 /* Set to true once the allocator has been initialized. */
 417 static bool malloc_initialized = false;
 418
 419 /* Used to avoid initialization races. */
 420 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
 421
 422 /******************************************************************************/
 423 /*
 424  * Statistics data structures.
 425  */
 426
 427 #ifdef MALLOC_STATS
 428
 429 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
 430 struct malloc_bin_stats_s {
 431         /*
 432          * Number of allocation requests that corresponded to the size of this
 433          * bin.
 434          */
 435         uint64_t        nrequests;
 436
 437 #ifdef MALLOC_MAG
 438         /* Number of magazine reloads from this bin. */
 439         uint64_t        nmags;
 440 #endif
 441
 442         /* Total number of runs created for this bin's size class. */
 443         uint64_t        nruns;
 444
 445         /*
 446          * Total number of runs reused by extracting them from the runs tree for
 447          * this bin's size class.
 448          */
 449         uint64_t        reruns;
 450
 451         /* High-water mark for this bin. */
 452         unsigned long   highruns;
 453
 454         /* Current number of runs in this bin. */
 455         unsigned long   curruns;
 456 };
 457
 458 typedef struct arena_stats_s arena_stats_t;
 459 struct arena_stats_s {
 460         /* Number of bytes currently mapped. */
 461         size_t          mapped;
 462
 463         /*
 464          * Total number of purge sweeps, total number of madvise calls made,
 465          * and total pages purged in order to keep dirty unused memory under
 466          * control.
 467          */
 468         uint64_t        npurge;
 469         uint64_t        nmadvise;
 470         uint64_t        purged;
 471
 472         /* Per-size-category statistics. */
 473         size_t          allocated_small;
 474         uint64_t        nmalloc_small;
 475         uint64_t        ndalloc_small;
 476
 477         size_t          allocated_large;
 478         uint64_t        nmalloc_large;
 479         uint64_t        ndalloc_large;
 480
 481 #ifdef MALLOC_BALANCE
 482         /* Number of times this arena reassigned a thread due to contention. */
 483         uint64_t        nbalance;
 484 #endif
 485 };
 486
 487 typedef struct chunk_stats_s chunk_stats_t;
 488 struct chunk_stats_s {
 489         /* Number of chunks that were allocated. */
 490         uint64_t        nchunks;
 491
 492         /* High-water mark for number of chunks allocated. */
 493         unsigned long   highchunks;
 494
 495         /*
 496          * Current number of chunks allocated.  This value isn't maintained for
 497          * any other purpose, so keep track of it in order to be able to set
 498          * highchunks.
 499          */
 500         unsigned long   curchunks;
 501 };
 502
 503 #endif /* #ifdef MALLOC_STATS */
 504
 505 /******************************************************************************/
 506 /*
 507  * Extent data structures.
 508  */
 509
 510 /* Tree of extents. */
 511 typedef struct extent_node_s extent_node_t;
 512 struct extent_node_s {
 513 #ifdef MALLOC_DSS
 514         /* Linkage for the size/address-ordered tree. */
 515         rb_node(extent_node_t) link_szad;
 516 #endif
 517
 518         /* Linkage for the address-ordered tree. */
 519         rb_node(extent_node_t) link_ad;
 520
 521         /* Pointer to the extent that this tree node is responsible for. */
 522         void    *addr;
 523
 524         /* Total region size. */
 525         size_t  size;
 526 };
 527 typedef rb_tree(extent_node_t) extent_tree_t;
 528
 529 /******************************************************************************/
 530 /*
 531  * Arena data structures.
 532  */
 533
 534 typedef struct arena_s arena_t;
 535 typedef struct arena_bin_s arena_bin_t;
 536
 537 /* Each element of the chunk map corresponds to one page within the chunk. */
 538 typedef struct arena_chunk_map_s arena_chunk_map_t;
 539 struct arena_chunk_map_s {
 540         /*
 541          * Linkage for run trees.  There are two disjoint uses:
 542          *
 543          * 1) arena_t's runs_avail tree.
 544          * 2) arena_run_t conceptually uses this linkage for in-use non-full
 545          *    runs, rather than directly embedding linkage.
 546          */
 547         rb_node(arena_chunk_map_t)      link;
 548
 549         /*
 550          * Run address (or size) and various flags are stored together.  The bit
 551          * layout looks like (assuming 32-bit system):
 552          *
 553          *   ???????? ???????? ????---- ---kdzla
 554          *
 555          * ? : Unallocated: Run address for first/last pages, unset for internal
 556          *                  pages.
 557          *     Small: Run address.
 558          *     Large: Run size for first page, unset for trailing pages.
 559          * - : Unused.
 560          * k : key?
 561          * d : dirty?
 562          * z : zeroed?
 563          * l : large?
 564          * a : allocated?
 565          *
 566          * Following are example bit patterns for the three types of runs.
 567          *
 568          * r : run address
 569          * s : run size
 570          * x : don't care
 571          * - : 0
 572          * [dzla] : bit set
 573          *
 574          *   Unallocated:
 575          *     ssssssss ssssssss ssss---- --------
 576          *     xxxxxxxx xxxxxxxx xxxx---- ----d---
 577          *     ssssssss ssssssss ssss---- -----z--
 578          *
 579          *   Small:
 580          *     rrrrrrrr rrrrrrrr rrrr---- -------a
 581          *     rrrrrrrr rrrrrrrr rrrr---- -------a
 582          *     rrrrrrrr rrrrrrrr rrrr---- -------a
 583          *
 584          *   Large:
 585          *     ssssssss ssssssss ssss---- ------la
 586          *     -------- -------- -------- ------la
 587          *     -------- -------- -------- ------la
 588          */
 589         size_t                          bits;
 590 #define CHUNK_MAP_KEY           ((size_t)0x10U)
 591 #define CHUNK_MAP_DIRTY         ((size_t)0x08U)
 592 #define CHUNK_MAP_ZEROED        ((size_t)0x04U)
 593 #define CHUNK_MAP_LARGE         ((size_t)0x02U)
 594 #define CHUNK_MAP_ALLOCATED     ((size_t)0x01U)
 595 };
 596 typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
 597 typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
 598
 599 /* Arena chunk header. */
 600 typedef struct arena_chunk_s arena_chunk_t;
 601 struct arena_chunk_s {
 602         /* Arena that owns the chunk. */
 603         arena_t         *arena;
 604
 605         /* Linkage for the arena's chunks_dirty tree. */
 606         rb_node(arena_chunk_t) link_dirty;
 607
 608         /* Number of dirty pages. */
 609         size_t          ndirty;
 610
 611         /* Map of pages within chunk that keeps track of free/large/small. */
 612         arena_chunk_map_t map[1]; /* Dynamically sized. */
 613 };
 614 typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
 615
 616 typedef struct arena_run_s arena_run_t;
 617 struct arena_run_s {
 618 #ifdef MALLOC_DEBUG
 619         uint32_t        magic;
 620 #  define ARENA_RUN_MAGIC 0x384adf93
 621 #endif
 622
 623         /* Bin this run is associated with. */
 624         arena_bin_t     *bin;
 625
 626         /* Index of first element that might have a free region. */
 627         unsigned        regs_minelm;
 628
 629         /* Number of free regions in run. */
 630         unsigned        nfree;
 631
 632         /* Bitmask of in-use regions (0: in use, 1: free). */
 633         unsigned        regs_mask[1]; /* Dynamically sized. */
 634 };
 635
 636 struct arena_bin_s {
 637         /*
 638          * Current run being used to service allocations of this bin's size
 639          * class.
 640          */
 641         arena_run_t     *runcur;
 642
 643         /*
 644          * Tree of non-full runs.  This tree is used when looking for an
 645          * existing run when runcur is no longer usable.  We choose the
 646          * non-full run that is lowest in memory; this policy tends to keep
 647          * objects packed well, and it can also help reduce the number of
 648          * almost-empty chunks.
 649          */
 650         arena_run_tree_t runs;
 651
 652         /* Size of regions in a run for this bin's size class. */
 653         size_t          reg_size;
 654
 655         /* Total size of a run for this bin's size class. */
 656         size_t          run_size;
 657
 658         /* Total number of regions in a run for this bin's size class. */
 659         uint32_t        nregs;
 660
 661         /* Number of elements in a run's regs_mask for this bin's size class. */
 662         uint32_t        regs_mask_nelms;
 663
 664         /* Offset of first region in a run for this bin's size class. */
 665         uint32_t        reg0_offset;
 666
 667 #ifdef MALLOC_STATS
 668         /* Bin statistics. */
 669         malloc_bin_stats_t stats;
 670 #endif
 671 };
 672
 673 struct arena_s {
 674 #ifdef MALLOC_DEBUG
 675         uint32_t                magic;
 676 #  define ARENA_MAGIC 0x947d3d24
 677 #endif
 678
 679         /* All operations on this arena require that lock be locked. */
 680         pthread_mutex_t         lock;
 681
 682 #ifdef MALLOC_STATS
 683         arena_stats_t           stats;
 684 #endif
 685
 686         /* Tree of dirty-page-containing chunks this arena manages. */
 687         arena_chunk_tree_t      chunks_dirty;
 688
 689         /*
 690          * In order to avoid rapid chunk allocation/deallocation when an arena
 691          * oscillates right on the cusp of needing a new chunk, cache the most
 692          * recently freed chunk.  The spare is left in the arena's chunk trees
 693          * until it is deleted.
 694          *
 695          * There is one spare chunk per arena, rather than one spare total, in
 696          * order to avoid interactions between multiple threads that could make
 697          * a single spare inadequate.
 698          */
 699         arena_chunk_t           *spare;
 700
 701         /*
 702          * Current count of pages within unused runs that are potentially
 703          * dirty, and for which madvise(... MADV_FREE) has not been called.  By
 704          * tracking this, we can institute a limit on how much dirty unused
 705          * memory is mapped for each arena.
 706          */
 707         size_t                  ndirty;
 708
 709         /*
 710          * Size/address-ordered tree of this arena's available runs.  This tree
 711          * is used for first-best-fit run allocation.
 712          */
 713         arena_avail_tree_t      runs_avail;
 714
 715 #ifdef MALLOC_BALANCE
 716         /*
 717          * The arena load balancing machinery needs to keep track of how much
 718          * lock contention there is.  This value is exponentially averaged.
 719          */
 720         uint32_t                contention;
 721 #endif
 722
 723         /*
 724          * bins is used to store rings of free regions of the following sizes,
 725          * assuming a 16-byte quantum, 4kB page size, and default
 726          * MALLOC_OPTIONS.
 727          *
 728          *   bins[i] | size |
 729          *   --------+------+
 730          *        0  |    2 |
 731          *        1  |    4 |
 732          *        2  |    8 |
 733          *   --------+------+
 734          *        3  |   16 |
 735          *        4  |   32 |
 736          *        5  |   48 |
 737          *        6  |   64 |
 738          *           :      :
 739          *           :      :
 740          *       33  |  496 |
 741          *       34  |  512 |
 742          *   --------+------+
 743          *       35  | 1024 |
 744          *       36  | 2048 |
 745          *   --------+------+
 746          */
 747         arena_bin_t             bins[1]; /* Dynamically sized. */
 748 };
 749
 750 /******************************************************************************/
 751 /*
 752  * Magazine data structures.
 753  */
 754
 755 #ifdef MALLOC_MAG
 756 typedef struct mag_s mag_t;
 757 struct mag_s {
 758         size_t          binind; /* Index of associated bin. */
 759         size_t          nrounds;
 760         void            *rounds[1]; /* Dynamically sized. */
 761 };
 762
 763 /*
 764  * Magazines are lazily allocated, but once created, they remain until the
 765  * associated mag_rack is destroyed.
 766  */
 767 typedef struct bin_mags_s bin_mags_t;
 768 struct bin_mags_s {
 769         mag_t   *curmag;
 770         mag_t   *sparemag;
 771 };
 772
 773 typedef struct mag_rack_s mag_rack_t;
 774 struct mag_rack_s {
 775         bin_mags_t      bin_mags[1]; /* Dynamically sized. */
 776 };
 777 #endif
 778
 779 /******************************************************************************/
 780 /*
 781  * Data.
 782  */
 783
 784 /* Number of CPUs. */
 785 static unsigned         ncpus;
 786
 787 /* Various bin-related settings. */
 788 #ifdef MALLOC_TINY              /* Number of (2^n)-spaced tiny bins. */
 789 #  define               ntbins  ((unsigned)(QUANTUM_2POW - TINY_MIN_2POW))
 790 #else
 791 #  define               ntbins  0
 792 #endif
 793 static unsigned         nqbins; /* Number of quantum-spaced bins. */
 794 static unsigned         ncbins; /* Number of cacheline-spaced bins. */
 795 static unsigned         nsbins; /* Number of subpage-spaced bins. */
 796 static unsigned         nbins;
 797 #ifdef MALLOC_TINY
 798 #  define               tspace_max      ((size_t)(QUANTUM >> 1))
 799 #endif
 800 #define                 qspace_min      QUANTUM
 801 static size_t           qspace_max;
 802 static size_t           cspace_min;
 803 static size_t           cspace_max;
 804 static size_t           sspace_min;
 805 static size_t           sspace_max;
 806 #define                 bin_maxclass    sspace_max
 807
 808 static uint8_t const    *size2bin;
 809 /*
 810  * const_size2bin is a static constant lookup table that in the common case can
 811  * be used as-is for size2bin.  For dynamically linked programs, this avoids
 812  * a page of memory overhead per process.
 813  */
 814 #define S2B_1(i)        i,
 815 #define S2B_2(i)        S2B_1(i) S2B_1(i)
 816 #define S2B_4(i)        S2B_2(i) S2B_2(i)
 817 #define S2B_8(i)        S2B_4(i) S2B_4(i)
 818 #define S2B_16(i)       S2B_8(i) S2B_8(i)
 819 #define S2B_32(i)       S2B_16(i) S2B_16(i)
 820 #define S2B_64(i)       S2B_32(i) S2B_32(i)
 821 #define S2B_128(i)      S2B_64(i) S2B_64(i)
 822 #define S2B_256(i)      S2B_128(i) S2B_128(i)
 823 static const uint8_t    const_size2bin[PAGE_SIZE - 255] = {
 824         S2B_1(0xffU)            /*    0 */
 825 #if (QUANTUM_2POW == 4)
 826 /* 64-bit system ************************/
 827 #  ifdef MALLOC_TINY
 828         S2B_2(0)                /*    2 */
 829         S2B_2(1)                /*    4 */
 830         S2B_4(2)                /*    8 */
 831         S2B_8(3)                /*   16 */
 832 #    define S2B_QMIN 3
 833 #  else
 834         S2B_16(0)               /*   16 */
 835 #    define S2B_QMIN 0
 836 #  endif
 837         S2B_16(S2B_QMIN + 1)    /*   32 */
 838         S2B_16(S2B_QMIN + 2)    /*   48 */
 839         S2B_16(S2B_QMIN + 3)    /*   64 */
 840         S2B_16(S2B_QMIN + 4)    /*   80 */
 841         S2B_16(S2B_QMIN + 5)    /*   96 */
 842         S2B_16(S2B_QMIN + 6)    /*  112 */
 843         S2B_16(S2B_QMIN + 7)    /*  128 */
 844 #  define S2B_CMIN (S2B_QMIN + 8)
 845 #else
 846 /* 32-bit system ************************/
 847 #  ifdef MALLOC_TINY
 848         S2B_2(0)                /*    2 */
 849         S2B_2(1)                /*    4 */
 850         S2B_4(2)                /*    8 */
 851 #    define S2B_QMIN 2
 852 #  else
 853         S2B_8(0)                /*    8 */
 854 #    define S2B_QMIN 0
 855 #  endif
 856         S2B_8(S2B_QMIN + 1)     /*   16 */
 857         S2B_8(S2B_QMIN + 2)     /*   24 */
 858         S2B_8(S2B_QMIN + 3)     /*   32 */
 859         S2B_8(S2B_QMIN + 4)     /*   40 */
 860         S2B_8(S2B_QMIN + 5)     /*   48 */
 861         S2B_8(S2B_QMIN + 6)     /*   56 */
 862         S2B_8(S2B_QMIN + 7)     /*   64 */
 863         S2B_8(S2B_QMIN + 8)     /*   72 */
 864         S2B_8(S2B_QMIN + 9)     /*   80 */
 865         S2B_8(S2B_QMIN + 10)    /*   88 */
 866         S2B_8(S2B_QMIN + 11)    /*   96 */
 867         S2B_8(S2B_QMIN + 12)    /*  104 */
 868         S2B_8(S2B_QMIN + 13)    /*  112 */
 869         S2B_8(S2B_QMIN + 14)    /*  120 */
 870         S2B_8(S2B_QMIN + 15)    /*  128 */
 871 #  define S2B_CMIN (S2B_QMIN + 16)
 872 #endif
 873 /****************************************/
 874         S2B_64(S2B_CMIN + 0)    /*  192 */
 875         S2B_64(S2B_CMIN + 1)    /*  256 */
 876         S2B_64(S2B_CMIN + 2)    /*  320 */
 877         S2B_64(S2B_CMIN + 3)    /*  384 */
 878         S2B_64(S2B_CMIN + 4)    /*  448 */
 879         S2B_64(S2B_CMIN + 5)    /*  512 */
 880 #  define S2B_SMIN (S2B_CMIN + 6)
 881         S2B_256(S2B_SMIN + 0)   /*  768 */
 882         S2B_256(S2B_SMIN + 1)   /* 1024 */
 883         S2B_256(S2B_SMIN + 2)   /* 1280 */
 884         S2B_256(S2B_SMIN + 3)   /* 1536 */
 885         S2B_256(S2B_SMIN + 4)   /* 1792 */
 886         S2B_256(S2B_SMIN + 5)   /* 2048 */
 887         S2B_256(S2B_SMIN + 6)   /* 2304 */
 888         S2B_256(S2B_SMIN + 7)   /* 2560 */
 889         S2B_256(S2B_SMIN + 8)   /* 2816 */
 890         S2B_256(S2B_SMIN + 9)   /* 3072 */
 891         S2B_256(S2B_SMIN + 10)  /* 3328 */
 892         S2B_256(S2B_SMIN + 11)  /* 3584 */
 893         S2B_256(S2B_SMIN + 12)  /* 3840 */
 894 #if (PAGE_SHIFT == 13)
 895         S2B_256(S2B_SMIN + 13)  /* 4096 */
 896         S2B_256(S2B_SMIN + 14)  /* 4352 */
 897         S2B_256(S2B_SMIN + 15)  /* 4608 */
 898         S2B_256(S2B_SMIN + 16)  /* 4864 */
 899         S2B_256(S2B_SMIN + 17)  /* 5120 */
 900         S2B_256(S2B_SMIN + 18)  /* 5376 */
 901         S2B_256(S2B_SMIN + 19)  /* 5632 */
 902         S2B_256(S2B_SMIN + 20)  /* 5888 */
 903         S2B_256(S2B_SMIN + 21)  /* 6144 */
 904         S2B_256(S2B_SMIN + 22)  /* 6400 */
 905         S2B_256(S2B_SMIN + 23)  /* 6656 */
 906         S2B_256(S2B_SMIN + 24)  /* 6912 */
 907         S2B_256(S2B_SMIN + 25)  /* 7168 */
 908         S2B_256(S2B_SMIN + 26)  /* 7424 */
 909         S2B_256(S2B_SMIN + 27)  /* 7680 */
 910         S2B_256(S2B_SMIN + 28)  /* 7936 */
 911 #endif
 912 };
 913 #undef S2B_1
 914 #undef S2B_2
 915 #undef S2B_4
 916 #undef S2B_8
 917 #undef S2B_16
 918 #undef S2B_32
 919 #undef S2B_64
 920 #undef S2B_128
 921 #undef S2B_256
 922 #undef S2B_QMIN
 923 #undef S2B_CMIN
 924 #undef S2B_SMIN
 925
 926 #ifdef MALLOC_MAG
 927 static size_t           max_rounds;
 928 #endif
 929
 930 /* Various chunk-related settings. */
 931 static size_t           chunksize;
 932 static size_t           chunksize_mask; /* (chunksize - 1). */
 933 static size_t           chunk_npages;
 934 static size_t           arena_chunk_header_npages;
 935 static size_t           arena_maxclass; /* Max size class for arenas. */
 936
 937 /********/
 938 /*
 939  * Chunks.
 940  */
 941
 942 /* Protects chunk-related data structures. */
 943 static malloc_mutex_t   huge_mtx;
 944
 945 /* Tree of chunks that are stand-alone huge allocations. */
 946 static extent_tree_t    huge;
 947
 948 #ifdef MALLOC_DSS
 949 /*
 950  * Protects sbrk() calls.  This avoids malloc races among threads, though it
 951  * does not protect against races with threads that call sbrk() directly.
 952  */
 953 static malloc_mutex_t   dss_mtx;
 954 /* Base address of the DSS. */
 955 static void             *dss_base;
 956 /* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
 957 static void             *dss_prev;
 958 /* Current upper limit on DSS addresses. */
 959 static void             *dss_max;
 960
 961 /*
 962  * Trees of chunks that were previously allocated (trees differ only in node
 963  * ordering).  These are used when allocating chunks, in an attempt to re-use
 964  * address space.  Depending on function, different tree orderings are needed,
 965  * which is why there are two trees with the same contents.
 966  */
 967 static extent_tree_t    dss_chunks_szad;
 968 static extent_tree_t    dss_chunks_ad;
 969 #endif
 970
 971 #ifdef MALLOC_STATS
 972 /* Huge allocation statistics. */
 973 static uint64_t         huge_nmalloc;
 974 static uint64_t         huge_ndalloc;
 975 static size_t           huge_allocated;
 976 #endif
 977
 978 /****************************/
 979 /*
 980  * base (internal allocation).
 981  */
 982
 983 /*
 984  * Current pages that are being used for internal memory allocations.  These
 985  * pages are carved up in cacheline-size quanta, so that there is no chance of
 986  * false cache line sharing.
 987  */
 988 static void             *base_pages;
 989 static void             *base_next_addr;
 990 static void             *base_past_addr; /* Addr immediately past base_pages. */
 991 static extent_node_t    *base_nodes;
 992 static malloc_mutex_t   base_mtx;
 993 #ifdef MALLOC_STATS
 994 static size_t           base_mapped;
 995 #endif
 996
 997 /********/
 998 /*
 999  * Arenas.
1000  */
1001
1002 /*
1003  * Arenas that are used to service external requests.  Not all elements of the
1004  * arenas array are necessarily used; arenas are created lazily as needed.
1005  */
1006 static arena_t          **arenas;
1007 static unsigned         narenas;
1008 #ifndef NO_TLS
1009 #  ifdef MALLOC_BALANCE
1010 static unsigned         narenas_2pow;
1011 #  else
1012 static unsigned         next_arena;
1013 #  endif
1014 #endif
1015 static pthread_mutex_t  arenas_lock; /* Protects arenas initialization. */
1016
1017 #ifndef NO_TLS
1018 /*
1019  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
1020  * for allocations.
1021  */
1022 static __thread arena_t *arenas_map;
1023 #endif
1024
1025 #ifdef MALLOC_MAG
1026 /*
1027  * Map of thread-specific magazine racks, used for thread-specific object
1028  * caching.
1029  */
1030 static __thread mag_rack_t      *mag_rack;
1031 #endif
1032
1033 #ifdef MALLOC_STATS
1034 /* Chunk statistics. */
1035 static chunk_stats_t    stats_chunks;
1036 #endif
1037
1038 /*******************************/
1039 /*
1040  * Runtime configuration options.
1041  */
1042 const char      *_malloc_options;
1043
1044 #ifndef MALLOC_PRODUCTION
1045 static bool     opt_abort = true;
1046 static bool     opt_junk = true;
1047 #else
1048 static bool     opt_abort = false;
1049 static bool     opt_junk = false;
1050 #endif
1051 #ifdef MALLOC_DSS
1052 static bool     opt_dss = true;
1053 static bool     opt_mmap = true;
1054 #endif
1055 #ifdef MALLOC_MAG
1056 static bool     opt_mag = true;
1057 static size_t   opt_mag_size_2pow = MAG_SIZE_2POW_DEFAULT;
1058 #endif
1059 static size_t   opt_dirty_max = DIRTY_MAX_DEFAULT;
1060 #ifdef MALLOC_BALANCE
1061 static uint64_t opt_balance_threshold = BALANCE_THRESHOLD_DEFAULT;
1062 #endif
1063 static bool     opt_print_stats = false;
1064 static size_t   opt_qspace_max_2pow = QSPACE_MAX_2POW_DEFAULT;
1065 static size_t   opt_cspace_max_2pow = CSPACE_MAX_2POW_DEFAULT;
1066 static size_t   opt_chunk_2pow = CHUNK_2POW_DEFAULT;
1067 static bool     opt_utrace = false;
1068 static bool     opt_sysv = false;
1069 static bool     opt_xmalloc = false;
1070 static bool     opt_zero = false;
1071 static int      opt_narenas_lshift = 0;
1072
1073 typedef struct {
1074         void    *p;
1075         size_t  s;
1076         void    *r;
1077 } malloc_utrace_t;
1078
1079 #define UTRACE(a, b, c)                                                 \
1080         if (opt_utrace) {                                               \
1081                 malloc_utrace_t ut;                                     \
1082                 ut.p = (a);                                             \
1083                 ut.s = (b);                                             \
1084                 ut.r = (c);                                             \
1085                 utrace(&ut, sizeof(ut));                                \
1086         }
1087
1088 /******************************************************************************/
1089 /*
1090  * Begin function prototypes for non-inline static functions.
1091  */
1092
1093 static void     malloc_mutex_init(malloc_mutex_t *mutex);
1094 static bool     malloc_spin_init(pthread_mutex_t *lock);
1095 static void     wrtmessage(const char *p1, const char *p2, const char *p3,
1096                 const char *p4);
1097 #ifdef MALLOC_STATS
1098 static void     malloc_printf(const char *format, ...);
1099 #endif
1100 static char     *umax2s(uintmax_t x, char *s);
1101 #ifdef MALLOC_DSS
1102 static bool     base_pages_alloc_dss(size_t minsize);
1103 #endif
1104 static bool     base_pages_alloc_mmap(size_t minsize);
1105 static bool     base_pages_alloc(size_t minsize);
1106 static void     *base_alloc(size_t size);
1107 static void     *base_calloc(size_t number, size_t size);
1108 static extent_node_t *base_node_alloc(void);
1109 static void     base_node_dealloc(extent_node_t *node);
1110 #ifdef MALLOC_STATS
1111 static void     stats_print(arena_t *arena);
1112 #endif
1113 static void     *pages_map(void *addr, size_t size);
1114 static void     pages_unmap(void *addr, size_t size);
1115 #ifdef MALLOC_DSS
1116 static void     *chunk_alloc_dss(size_t size);
1117 static void     *chunk_recycle_dss(size_t size, bool zero);
1118 #endif
1119 static void     *chunk_alloc_mmap(size_t size);
1120 static void     *chunk_alloc(size_t size, bool zero);
1121 #ifdef MALLOC_DSS
1122 static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size);
1123 static bool     chunk_dealloc_dss(void *chunk, size_t size);
1124 #endif
1125 static void     chunk_dealloc_mmap(void *chunk, size_t size);
1126 static void     chunk_dealloc(void *chunk, size_t size);
1127 #ifndef NO_TLS
1128 static arena_t  *choose_arena_hard(void);
1129 #endif
1130 static void     arena_run_split(arena_t *arena, arena_run_t *run, size_t size,
1131     bool large, bool zero);
1132 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
1133 static void     arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
1134 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large,
1135     bool zero);
1136 static void     arena_purge(arena_t *arena);
1137 static void     arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty);
1138 static void     arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk,
1139     arena_run_t *run, size_t oldsize, size_t newsize);
1140 static void     arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
1141     arena_run_t *run, size_t oldsize, size_t newsize, bool dirty);
1142 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
1143 static void     *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
1144 static size_t   arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
1145 #ifdef MALLOC_BALANCE
1146 static void     arena_lock_balance_hard(arena_t *arena);
1147 #endif
1148 #ifdef MALLOC_MAG
1149 static void     mag_load(mag_t *mag);
1150 #endif
1151 static void     *arena_malloc_large(arena_t *arena, size_t size, bool zero);
1152 static void     *arena_palloc(arena_t *arena, size_t alignment, size_t size,
1153     size_t alloc_size);
1154 static size_t   arena_salloc(const void *ptr);
1155 #ifdef MALLOC_MAG
1156 static void     mag_unload(mag_t *mag);
1157 #endif
1158 static void     arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk,
1159     void *ptr);
1160 static void     arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk,
1161     void *ptr, size_t size, size_t oldsize);
1162 static bool     arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk,
1163     void *ptr, size_t size, size_t oldsize);
1164 static bool     arena_ralloc_large(void *ptr, size_t size, size_t oldsize);
1165 static void     *arena_ralloc(void *ptr, size_t size, size_t oldsize);
1166 static bool     arena_new(arena_t *arena);
1167 static arena_t  *arenas_extend(unsigned ind);
1168 #ifdef MALLOC_MAG
1169 static mag_t    *mag_create(arena_t *arena, size_t binind);
1170 static void     mag_destroy(mag_t *mag);
1171 static mag_rack_t *mag_rack_create(arena_t *arena);
1172 static void     mag_rack_destroy(mag_rack_t *rack);
1173 #endif
1174 static void     *huge_malloc(size_t size, bool zero);
1175 static void     *huge_palloc(size_t alignment, size_t size);
1176 static void     *huge_ralloc(void *ptr, size_t size, size_t oldsize);
1177 static void     huge_dalloc(void *ptr);
1178 static void     malloc_print_stats(void);
1179 #ifdef MALLOC_DEBUG
1180 static void     size2bin_validate(void);
1181 #endif
1182 static bool     size2bin_init(void);
1183 static bool     size2bin_init_hard(void);
1184 static bool     malloc_init_hard(void);
1185
1186 /*
1187  * End function prototypes.
1188  */
1189 /******************************************************************************/
1190 /*
1191  * Begin mutex.  We can't use normal pthread mutexes in all places, because
1192  * they require malloc()ed memory, which causes bootstrapping issues in some
1193  * cases.
1194  */
1195
1196 static void
1197 malloc_mutex_init(malloc_mutex_t *mutex)
1198 {
1199         static const spinlock_t lock = _SPINLOCK_INITIALIZER;
1200
1201         mutex->lock = lock;
1202 }
1203
1204 static inline void
1205 malloc_mutex_lock(malloc_mutex_t *mutex)
1206 {
1207
1208         if (__isthreaded)
1209                 _SPINLOCK(&mutex->lock);
1210 }
1211
1212 static inline void
1213 malloc_mutex_unlock(malloc_mutex_t *mutex)
1214 {
1215
1216         if (__isthreaded)
1217                 _SPINUNLOCK(&mutex->lock);
1218 }
1219
1220 /*
1221  * End mutex.
1222  */
1223 /******************************************************************************/
1224 /*
1225  * Begin spin lock.  Spin locks here are actually adaptive mutexes that block
1226  * after a period of spinning, because unbounded spinning would allow for
1227  * priority inversion.
1228  */
1229
1230 /*
1231  * We use an unpublished interface to initialize pthread mutexes with an
1232  * allocation callback, in order to avoid infinite recursion.
1233  */
1234 int     _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
1235     void *(calloc_cb)(size_t, size_t));
1236
1237 __weak_reference(_pthread_mutex_init_calloc_cb_stub,
1238     _pthread_mutex_init_calloc_cb);
1239
1240 int
1241 _pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex,
1242     void *(calloc_cb)(size_t, size_t))
1243 {
1244
1245         return (0);
1246 }
1247
1248 static bool
1249 malloc_spin_init(pthread_mutex_t *lock)
1250 {
1251
1252         if (_pthread_mutex_init_calloc_cb(lock, base_calloc) != 0)
1253                 return (true);
1254
1255         return (false);
1256 }
1257
1258 static inline unsigned
1259 malloc_spin_lock(pthread_mutex_t *lock)
1260 {
1261         unsigned ret = 0;
1262
1263         if (__isthreaded) {
1264                 if (_pthread_mutex_trylock(lock) != 0) {
1265                         /* Exponentially back off if there are multiple CPUs. */
1266                         if (ncpus > 1) {
1267                                 unsigned i;
1268                                 volatile unsigned j;
1269
1270                                 for (i = 1; i <= SPIN_LIMIT_2POW; i++) {
1271                                         for (j = 0; j < (1U << i); j++) {
1272                                                 ret++;
1273                                                 CPU_SPINWAIT;
1274                                         }
1275
1276                                         if (_pthread_mutex_trylock(lock) == 0)
1277                                                 return (ret);
1278                                 }
1279                         }
1280
1281                         /*
1282                          * Spinning failed.  Block until the lock becomes
1283                          * available, in order to avoid indefinite priority
1284                          * inversion.
1285                          */
1286                         _pthread_mutex_lock(lock);
1287                         assert((ret << BLOCK_COST_2POW) != 0 || ncpus == 1);
1288                         return (ret << BLOCK_COST_2POW);
1289                 }
1290         }
1291
1292         return (ret);
1293 }
1294
1295 static inline void
1296 malloc_spin_unlock(pthread_mutex_t *lock)
1297 {
1298
1299         if (__isthreaded)
1300                 _pthread_mutex_unlock(lock);
1301 }
1302
1303 /*
1304  * End spin lock.
1305  */
1306 /******************************************************************************/
1307 /*
1308  * Begin Utility functions/macros.
1309  */
1310
1311 /* Return the chunk address for allocation address a. */
1312 #define CHUNK_ADDR2BASE(a)                                              \
1313         ((void *)((uintptr_t)(a) & ~chunksize_mask))
1314
1315 /* Return the chunk offset of address a. */
1316 #define CHUNK_ADDR2OFFSET(a)                                            \
1317         ((size_t)((uintptr_t)(a) & chunksize_mask))
1318
1319 /* Return the smallest chunk multiple that is >= s. */
1320 #define CHUNK_CEILING(s)                                                \
1321         (((s) + chunksize_mask) & ~chunksize_mask)
1322
1323 /* Return the smallest quantum multiple that is >= a. */
1324 #define QUANTUM_CEILING(a)                                              \
1325         (((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
1326
1327 /* Return the smallest cacheline multiple that is >= s. */
1328 #define CACHELINE_CEILING(s)                                            \
1329         (((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
1330
1331 /* Return the smallest subpage multiple that is >= s. */
1332 #define SUBPAGE_CEILING(s)                                              \
1333         (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK)
1334
1335 /* Return the smallest PAGE_SIZE multiple that is >= s. */
1336 #define PAGE_CEILING(s)                                                 \
1337         (((s) + PAGE_MASK) & ~PAGE_MASK)
1338
1339 #ifdef MALLOC_TINY
1340 /* Compute the smallest power of 2 that is >= x. */
1341 static inline size_t
1342 pow2_ceil(size_t x)
1343 {
1344
1345         x--;
1346         x |= x >> 1;
1347         x |= x >> 2;
1348         x |= x >> 4;
1349         x |= x >> 8;
1350         x |= x >> 16;
1351 #if (SIZEOF_PTR == 8)
1352         x |= x >> 32;
1353 #endif
1354         x++;
1355         return (x);
1356 }
1357 #endif
1358
1359 #ifdef MALLOC_BALANCE
1360 /*
1361  * Use a simple linear congruential pseudo-random number generator:
1362  *
1363  *   prn(y) = (a*x + c) % m
1364  *
1365  * where the following constants ensure maximal period:
1366  *
1367  *   a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4.
1368  *   c == Odd number (relatively prime to 2^n).
1369  *   m == 2^32
1370  *
1371  * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
1372  *
1373  * This choice of m has the disadvantage that the quality of the bits is
1374  * proportional to bit position.  For example. the lowest bit has a cycle of 2,
1375  * the next has a cycle of 4, etc.  For this reason, we prefer to use the upper
1376  * bits.
1377  */
1378 #  define PRN_DEFINE(suffix, var, a, c)                                 \
1379 static inline void                                                      \
1380 sprn_##suffix(uint32_t seed)                                            \
1381 {                                                                       \
1382         var = seed;                                                     \
1383 }                                                                       \
1384                                                                         \
1385 static inline uint32_t                                                  \
1386 prn_##suffix(uint32_t lg_range)                                         \
1387 {                                                                       \
1388         uint32_t ret, x;                                                \
1389                                                                         \
1390         assert(lg_range > 0);                                           \
1391         assert(lg_range <= 32);                                         \
1392                                                                         \
1393         x = (var * (a)) + (c);                                          \
1394         var = x;                                                        \
1395         ret = x >> (32 - lg_range);                                     \
1396                                                                         \
1397         return (ret);                                                   \
1398 }
1399 #  define SPRN(suffix, seed)    sprn_##suffix(seed)
1400 #  define PRN(suffix, lg_range) prn_##suffix(lg_range)
1401 #endif
1402
1403 #ifdef MALLOC_BALANCE
1404 /* Define the PRNG used for arena assignment. */
1405 static __thread uint32_t balance_x;
1406 PRN_DEFINE(balance, balance_x, 1297, 1301)
1407 #endif
1408
1409 static void
1410 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
1411 {
1412
1413         _write(STDERR_FILENO, p1, strlen(p1));
1414         _write(STDERR_FILENO, p2, strlen(p2));
1415         _write(STDERR_FILENO, p3, strlen(p3));
1416         _write(STDERR_FILENO, p4, strlen(p4));
1417 }
1418
1419 void    (*_malloc_message)(const char *p1, const char *p2, const char *p3,
1420             const char *p4) = wrtmessage;
1421
1422 #ifdef MALLOC_STATS
1423 /*
1424  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
1425  */
1426 static void
1427 malloc_printf(const char *format, ...)
1428 {
1429         char buf[4096];
1430         va_list ap;
1431
1432         va_start(ap, format);
1433         vsnprintf(buf, sizeof(buf), format, ap);
1434         va_end(ap);
1435         _malloc_message(buf, "", "", "");
1436 }
1437 #endif
1438
1439 /*
1440  * We don't want to depend on vsnprintf() for production builds, since that can
1441  * cause unnecessary bloat for static binaries.  umax2s() provides minimal
1442  * integer printing functionality, so that malloc_printf() use can be limited to
1443  * MALLOC_STATS code.
1444  */
1445 #define UMAX2S_BUFSIZE  21
1446 static char *
1447 umax2s(uintmax_t x, char *s)
1448 {
1449         unsigned i;
1450
1451         /* Make sure UMAX2S_BUFSIZE is large enough. */
1452         assert(sizeof(uintmax_t) <= 8);
1453
1454         i = UMAX2S_BUFSIZE - 1;
1455         s[i] = '\0';
1456         do {
1457                 i--;
1458                 s[i] = "0123456789"[x % 10];
1459                 x /= 10;
1460         } while (x > 0);
1461
1462         return (&s[i]);
1463 }
1464
1465 /******************************************************************************/
1466
1467 #ifdef MALLOC_DSS
1468 static bool
1469 base_pages_alloc_dss(size_t minsize)
1470 {
1471
1472         /*
1473          * Do special DSS allocation here, since base allocations don't need to
1474          * be chunk-aligned.
1475          */
1476         malloc_mutex_lock(&dss_mtx);
1477         if (dss_prev != (void *)-1) {
1478                 intptr_t incr;
1479                 size_t csize = CHUNK_CEILING(minsize);
1480
1481                 do {
1482                         /* Get the current end of the DSS. */
1483                         dss_max = sbrk(0);
1484
1485                         /*
1486                          * Calculate how much padding is necessary to
1487                          * chunk-align the end of the DSS.  Don't worry about
1488                          * dss_max not being chunk-aligned though.
1489                          */
1490                         incr = (intptr_t)chunksize
1491                             - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
1492                         assert(incr >= 0);
1493                         if ((size_t)incr < minsize)
1494                                 incr += csize;
1495
1496                         dss_prev = sbrk(incr);
1497                         if (dss_prev == dss_max) {
1498                                 /* Success. */
1499                                 dss_max = (void *)((intptr_t)dss_prev + incr);
1500                                 base_pages = dss_prev;
1501                                 base_next_addr = base_pages;
1502                                 base_past_addr = dss_max;
1503 #ifdef MALLOC_STATS
1504                                 base_mapped += incr;
1505 #endif
1506                                 malloc_mutex_unlock(&dss_mtx);
1507                                 return (false);
1508                         }
1509                 } while (dss_prev != (void *)-1);
1510         }
1511         malloc_mutex_unlock(&dss_mtx);
1512
1513         return (true);
1514 }
1515 #endif
1516
1517 static bool
1518 base_pages_alloc_mmap(size_t minsize)
1519 {
1520         size_t csize;
1521
1522         assert(minsize != 0);
1523         csize = PAGE_CEILING(minsize);
1524         base_pages = pages_map(NULL, csize);
1525         if (base_pages == NULL)
1526                 return (true);
1527         base_next_addr = base_pages;
1528         base_past_addr = (void *)((uintptr_t)base_pages + csize);
1529 #ifdef MALLOC_STATS
1530         base_mapped += csize;
1531 #endif
1532
1533         return (false);
1534 }
1535
1536 static bool
1537 base_pages_alloc(size_t minsize)
1538 {
1539
1540 #ifdef MALLOC_DSS
1541         if (opt_mmap && minsize != 0)
1542 #endif
1543         {
1544                 if (base_pages_alloc_mmap(minsize) == false)
1545                         return (false);
1546         }
1547
1548 #ifdef MALLOC_DSS
1549         if (opt_dss) {
1550                 if (base_pages_alloc_dss(minsize) == false)
1551                         return (false);
1552         }
1553
1554 #endif
1555
1556         return (true);
1557 }
1558
1559 static void *
1560 base_alloc(size_t size)
1561 {
1562         void *ret;
1563         size_t csize;
1564
1565         /* Round size up to nearest multiple of the cacheline size. */
1566         csize = CACHELINE_CEILING(size);
1567
1568         malloc_mutex_lock(&base_mtx);
1569         /* Make sure there's enough space for the allocation. */
1570         if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1571                 if (base_pages_alloc(csize)) {
1572                         malloc_mutex_unlock(&base_mtx);
1573                         return (NULL);
1574                 }
1575         }
1576         /* Allocate. */
1577         ret = base_next_addr;
1578         base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1579         malloc_mutex_unlock(&base_mtx);
1580
1581         return (ret);
1582 }
1583
1584 static void *
1585 base_calloc(size_t number, size_t size)
1586 {
1587         void *ret;
1588
1589         ret = base_alloc(number * size);
1590         memset(ret, 0, number * size);
1591
1592         return (ret);
1593 }
1594
1595 static extent_node_t *
1596 base_node_alloc(void)
1597 {
1598         extent_node_t *ret;
1599
1600         malloc_mutex_lock(&base_mtx);
1601         if (base_nodes != NULL) {
1602                 ret = base_nodes;
1603                 base_nodes = *(extent_node_t **)ret;
1604                 malloc_mutex_unlock(&base_mtx);
1605         } else {
1606                 malloc_mutex_unlock(&base_mtx);
1607                 ret = (extent_node_t *)base_alloc(sizeof(extent_node_t));
1608         }
1609
1610         return (ret);
1611 }
1612
1613 static void
1614 base_node_dealloc(extent_node_t *node)
1615 {
1616
1617         malloc_mutex_lock(&base_mtx);
1618         *(extent_node_t **)node = base_nodes;
1619         base_nodes = node;
1620         malloc_mutex_unlock(&base_mtx);
1621 }
1622
1623 /******************************************************************************/
1624
1625 #ifdef MALLOC_STATS
1626 static void
1627 stats_print(arena_t *arena)
1628 {
1629         unsigned i, gap_start;
1630
1631         malloc_printf("dirty: %zu page%s dirty, %llu sweep%s,"
1632             " %llu madvise%s, %llu page%s purged\n",
1633             arena->ndirty, arena->ndirty == 1 ? "" : "s",
1634             arena->stats.npurge, arena->stats.npurge == 1 ? "" : "s",
1635             arena->stats.nmadvise, arena->stats.nmadvise == 1 ? "" : "s",
1636             arena->stats.purged, arena->stats.purged == 1 ? "" : "s");
1637
1638         malloc_printf("            allocated      nmalloc      ndalloc\n");
1639         malloc_printf("small:   %12zu %12llu %12llu\n",
1640             arena->stats.allocated_small, arena->stats.nmalloc_small,
1641             arena->stats.ndalloc_small);
1642         malloc_printf("large:   %12zu %12llu %12llu\n",
1643             arena->stats.allocated_large, arena->stats.nmalloc_large,
1644             arena->stats.ndalloc_large);
1645         malloc_printf("total:   %12zu %12llu %12llu\n",
1646             arena->stats.allocated_small + arena->stats.allocated_large,
1647             arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1648             arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1649         malloc_printf("mapped:  %12zu\n", arena->stats.mapped);
1650
1651 #ifdef MALLOC_MAG
1652         if (__isthreaded && opt_mag) {
1653                 malloc_printf("bins:     bin   size regs pgs      mags   "
1654                     "newruns    reruns maxruns curruns\n");
1655         } else {
1656 #endif
1657                 malloc_printf("bins:     bin   size regs pgs  requests   "
1658                     "newruns    reruns maxruns curruns\n");
1659 #ifdef MALLOC_MAG
1660         }
1661 #endif
1662         for (i = 0, gap_start = UINT_MAX; i < nbins; i++) {
1663                 if (arena->bins[i].stats.nruns == 0) {
1664                         if (gap_start == UINT_MAX)
1665                                 gap_start = i;
1666                 } else {
1667                         if (gap_start != UINT_MAX) {
1668                                 if (i > gap_start + 1) {
1669                                         /* Gap of more than one size class. */
1670                                         malloc_printf("[%u..%u]\n",
1671                                             gap_start, i - 1);
1672                                 } else {
1673                                         /* Gap of one size class. */
1674                                         malloc_printf("[%u]\n", gap_start);
1675                                 }
1676                                 gap_start = UINT_MAX;
1677                         }
1678                         malloc_printf(
1679                             "%13u %1s %4u %4u %3u %9llu %9llu"
1680                             " %9llu %7lu %7lu\n",
1681                             i,
1682                             i < ntbins ? "T" : i < ntbins + nqbins ? "Q" :
1683                             i < ntbins + nqbins + ncbins ? "C" : "S",
1684                             arena->bins[i].reg_size,
1685                             arena->bins[i].nregs,
1686                             arena->bins[i].run_size >> PAGE_SHIFT,
1687 #ifdef MALLOC_MAG
1688                             (__isthreaded && opt_mag) ?
1689                             arena->bins[i].stats.nmags :
1690 #endif
1691                             arena->bins[i].stats.nrequests,
1692                             arena->bins[i].stats.nruns,
1693                             arena->bins[i].stats.reruns,
1694                             arena->bins[i].stats.highruns,
1695                             arena->bins[i].stats.curruns);
1696                 }
1697         }
1698         if (gap_start != UINT_MAX) {
1699                 if (i > gap_start + 1) {
1700                         /* Gap of more than one size class. */
1701                         malloc_printf("[%u..%u]\n", gap_start, i - 1);
1702                 } else {
1703                         /* Gap of one size class. */
1704                         malloc_printf("[%u]\n", gap_start);
1705                 }
1706         }
1707 }
1708 #endif
1709
1710 /*
1711  * End Utility functions/macros.
1712  */
1713 /******************************************************************************/
1714 /*
1715  * Begin extent tree code.
1716  */
1717
1718 #ifdef MALLOC_DSS
1719 static inline int
1720 extent_szad_comp(extent_node_t *a, extent_node_t *b)
1721 {
1722         int ret;
1723         size_t a_size = a->size;
1724         size_t b_size = b->size;
1725
1726         ret = (a_size > b_size) - (a_size < b_size);
1727         if (ret == 0) {
1728                 uintptr_t a_addr = (uintptr_t)a->addr;
1729                 uintptr_t b_addr = (uintptr_t)b->addr;
1730
1731                 ret = (a_addr > b_addr) - (a_addr < b_addr);
1732         }
1733
1734         return (ret);
1735 }
1736
1737 /* Wrap red-black tree macros in functions. */
1738 rb_wrap(__unused static, extent_tree_szad_, extent_tree_t, extent_node_t,
1739     link_szad, extent_szad_comp)
1740 #endif
1741
1742 static inline int
1743 extent_ad_comp(extent_node_t *a, extent_node_t *b)
1744 {
1745         uintptr_t a_addr = (uintptr_t)a->addr;
1746         uintptr_t b_addr = (uintptr_t)b->addr;
1747
1748         return ((a_addr > b_addr) - (a_addr < b_addr));
1749 }
1750
1751 /* Wrap red-black tree macros in functions. */
1752 rb_wrap(__unused static, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad,
1753     extent_ad_comp)
1754
1755 /*
1756  * End extent tree code.
1757  */
1758 /******************************************************************************/
1759 /*
1760  * Begin chunk management functions.
1761  */
1762
1763 static void *
1764 pages_map(void *addr, size_t size)
1765 {
1766         void *ret;
1767
1768         /*
1769          * We don't use MAP_FIXED here, because it can cause the *replacement*
1770          * of existing mappings, and we only want to create new mappings.
1771          */
1772         ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
1773             -1, 0);
1774         assert(ret != NULL);
1775
1776         if (ret == MAP_FAILED)
1777                 ret = NULL;
1778         else if (addr != NULL && ret != addr) {
1779                 /*
1780                  * We succeeded in mapping memory, but not in the right place.
1781                  */
1782                 if (munmap(ret, size) == -1) {
1783                         char buf[STRERROR_BUF];
1784
1785                         strerror_r(errno, buf, sizeof(buf));
1786                         _malloc_message(_getprogname(),
1787                             ": (malloc) Error in munmap(): ", buf, "\n");
1788                         if (opt_abort)
1789                                 abort();
1790                 }
1791                 ret = NULL;
1792         }
1793
1794         assert(ret == NULL || (addr == NULL && ret != addr)
1795             || (addr != NULL && ret == addr));
1796         return (ret);
1797 }
1798
1799 static void
1800 pages_unmap(void *addr, size_t size)
1801 {
1802
1803         if (munmap(addr, size) == -1) {
1804                 char buf[STRERROR_BUF];
1805
1806                 strerror_r(errno, buf, sizeof(buf));
1807                 _malloc_message(_getprogname(),
1808                     ": (malloc) Error in munmap(): ", buf, "\n");
1809                 if (opt_abort)
1810                         abort();
1811         }
1812 }
1813
1814 #ifdef MALLOC_DSS
1815 static void *
1816 chunk_alloc_dss(size_t size)
1817 {
1818
1819         /*
1820          * sbrk() uses a signed increment argument, so take care not to
1821          * interpret a huge allocation request as a negative increment.
1822          */
1823         if ((intptr_t)size < 0)
1824                 return (NULL);
1825
1826         malloc_mutex_lock(&dss_mtx);
1827         if (dss_prev != (void *)-1) {
1828                 intptr_t incr;
1829
1830                 /*
1831                  * The loop is necessary to recover from races with other
1832                  * threads that are using the DSS for something other than
1833                  * malloc.
1834                  */
1835                 do {
1836                         void *ret;
1837
1838                         /* Get the current end of the DSS. */
1839                         dss_max = sbrk(0);
1840
1841                         /*
1842                          * Calculate how much padding is necessary to
1843                          * chunk-align the end of the DSS.
1844                          */
1845                         incr = (intptr_t)size
1846                             - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
1847                         if (incr == (intptr_t)size)
1848                                 ret = dss_max;
1849                         else {
1850                                 ret = (void *)((intptr_t)dss_max + incr);
1851                                 incr += size;
1852                         }
1853
1854                         dss_prev = sbrk(incr);
1855                         if (dss_prev == dss_max) {
1856                                 /* Success. */
1857                                 dss_max = (void *)((intptr_t)dss_prev + incr);
1858                                 malloc_mutex_unlock(&dss_mtx);
1859                                 return (ret);
1860                         }
1861                 } while (dss_prev != (void *)-1);
1862         }
1863         malloc_mutex_unlock(&dss_mtx);
1864
1865         return (NULL);
1866 }
1867
1868 static void *
1869 chunk_recycle_dss(size_t size, bool zero)
1870 {
1871         extent_node_t *node, key;
1872
1873         key.addr = NULL;
1874         key.size = size;
1875         malloc_mutex_lock(&dss_mtx);
1876         node = extent_tree_szad_nsearch(&dss_chunks_szad, &key);
1877         if (node != NULL) {
1878                 void *ret = node->addr;
1879
1880                 /* Remove node from the tree. */
1881                 extent_tree_szad_remove(&dss_chunks_szad, node);
1882                 if (node->size == size) {
1883                         extent_tree_ad_remove(&dss_chunks_ad, node);
1884                         base_node_dealloc(node);
1885                 } else {
1886                         /*
1887                          * Insert the remainder of node's address range as a
1888                          * smaller chunk.  Its position within dss_chunks_ad
1889                          * does not change.
1890                          */
1891                         assert(node->size > size);
1892                         node->addr = (void *)((uintptr_t)node->addr + size);
1893                         node->size -= size;
1894                         extent_tree_szad_insert(&dss_chunks_szad, node);
1895                 }
1896                 malloc_mutex_unlock(&dss_mtx);
1897
1898                 if (zero)
1899                         memset(ret, 0, size);
1900                 return (ret);
1901         }
1902         malloc_mutex_unlock(&dss_mtx);
1903
1904         return (NULL);
1905 }
1906 #endif
1907
1908 static void *
1909 chunk_alloc_mmap(size_t size)
1910 {
1911         void *ret;
1912         size_t offset;
1913
1914         /*
1915          * Ideally, there would be a way to specify alignment to mmap() (like
1916          * NetBSD has), but in the absence of such a feature, we have to work
1917          * hard to efficiently create aligned mappings.  The reliable, but
1918          * expensive method is to create a mapping that is over-sized, then
1919          * trim the excess.  However, that always results in at least one call
1920          * to pages_unmap().
1921          *
1922          * A more optimistic approach is to try mapping precisely the right
1923          * amount, then try to append another mapping if alignment is off.  In
1924          * practice, this works out well as long as the application is not
1925          * interleaving mappings via direct mmap() calls.  If we do run into a
1926          * situation where there is an interleaved mapping and we are unable to
1927          * extend an unaligned mapping, our best option is to momentarily
1928          * revert to the reliable-but-expensive method.  This will tend to
1929          * leave a gap in the memory map that is too small to cause later
1930          * problems for the optimistic method.
1931          */
1932
1933         ret = pages_map(NULL, size);
1934         if (ret == NULL)
1935                 return (NULL);
1936
1937         offset = CHUNK_ADDR2OFFSET(ret);
1938         if (offset != 0) {
1939                 /* Try to extend chunk boundary. */
1940                 if (pages_map((void *)((uintptr_t)ret + size),
1941                     chunksize - offset) == NULL) {
1942                         /*
1943                          * Extension failed.  Clean up, then revert to the
1944                          * reliable-but-expensive method.
1945                          */
1946                         pages_unmap(ret, size);
1947
1948                         /* Beware size_t wrap-around. */
1949                         if (size + chunksize <= size)
1950                                 return NULL;
1951
1952                         ret = pages_map(NULL, size + chunksize);
1953                         if (ret == NULL)
1954                                 return (NULL);
1955
1956                         /* Clean up unneeded leading/trailing space. */
1957                         offset = CHUNK_ADDR2OFFSET(ret);
1958                         if (offset != 0) {
1959                                 /* Leading space. */
1960                                 pages_unmap(ret, chunksize - offset);
1961
1962                                 ret = (void *)((uintptr_t)ret +
1963                                     (chunksize - offset));
1964
1965                                 /* Trailing space. */
1966                                 pages_unmap((void *)((uintptr_t)ret + size),
1967                                     offset);
1968                         } else {
1969                                 /* Trailing space only. */
1970                                 pages_unmap((void *)((uintptr_t)ret + size),
1971                                     chunksize);
1972                         }
1973                 } else {
1974                         /* Clean up unneeded leading space. */
1975                         pages_unmap(ret, chunksize - offset);
1976                         ret = (void *)((uintptr_t)ret + (chunksize - offset));
1977                 }
1978         }
1979
1980         return (ret);
1981 }
1982
1983 static void *
1984 chunk_alloc(size_t size, bool zero)
1985 {
1986         void *ret;
1987
1988         assert(size != 0);
1989         assert((size & chunksize_mask) == 0);
1990
1991 #ifdef MALLOC_DSS
1992         if (opt_mmap)
1993 #endif
1994         {
1995                 ret = chunk_alloc_mmap(size);
1996                 if (ret != NULL)
1997                         goto RETURN;
1998         }
1999
2000 #ifdef MALLOC_DSS
2001         if (opt_dss) {
2002                 ret = chunk_recycle_dss(size, zero);
2003                 if (ret != NULL) {
2004                         goto RETURN;
2005                 }
2006
2007                 ret = chunk_alloc_dss(size);
2008                 if (ret != NULL)
2009                         goto RETURN;
2010         }
2011 #endif
2012
2013         /* All strategies for allocation failed. */
2014         ret = NULL;
2015 RETURN:
2016 #ifdef MALLOC_STATS
2017         if (ret != NULL) {
2018                 stats_chunks.nchunks += (size / chunksize);
2019                 stats_chunks.curchunks += (size / chunksize);
2020         }
2021         if (stats_chunks.curchunks > stats_chunks.highchunks)
2022                 stats_chunks.highchunks = stats_chunks.curchunks;
2023 #endif
2024
2025         assert(CHUNK_ADDR2BASE(ret) == ret);
2026         return (ret);
2027 }
2028
2029 #ifdef MALLOC_DSS
2030 static extent_node_t *
2031 chunk_dealloc_dss_record(void *chunk, size_t size)
2032 {
2033         extent_node_t *node, *prev, key;
2034
2035         key.addr = (void *)((uintptr_t)chunk + size);
2036         node = extent_tree_ad_nsearch(&dss_chunks_ad, &key);
2037         /* Try to coalesce forward. */
2038         if (node != NULL && node->addr == key.addr) {
2039                 /*
2040                  * Coalesce chunk with the following address range.  This does
2041                  * not change the position within dss_chunks_ad, so only
2042                  * remove/insert from/into dss_chunks_szad.
2043                  */
2044                 extent_tree_szad_remove(&dss_chunks_szad, node);
2045                 node->addr = chunk;
2046                 node->size += size;
2047                 extent_tree_szad_insert(&dss_chunks_szad, node);
2048         } else {
2049                 /*
2050                  * Coalescing forward failed, so insert a new node.  Drop
2051                  * dss_mtx during node allocation, since it is possible that a
2052                  * new base chunk will be allocated.
2053                  */
2054                 malloc_mutex_unlock(&dss_mtx);
2055                 node = base_node_alloc();
2056                 malloc_mutex_lock(&dss_mtx);
2057                 if (node == NULL)
2058                         return (NULL);
2059                 node->addr = chunk;
2060                 node->size = size;
2061                 extent_tree_ad_insert(&dss_chunks_ad, node);
2062                 extent_tree_szad_insert(&dss_chunks_szad, node);
2063         }
2064
2065         /* Try to coalesce backward. */
2066         prev = extent_tree_ad_prev(&dss_chunks_ad, node);
2067         if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
2068             chunk) {
2069                 /*
2070                  * Coalesce chunk with the previous address range.  This does
2071                  * not change the position within dss_chunks_ad, so only
2072                  * remove/insert node from/into dss_chunks_szad.
2073                  */
2074                 extent_tree_szad_remove(&dss_chunks_szad, prev);
2075                 extent_tree_ad_remove(&dss_chunks_ad, prev);
2076
2077                 extent_tree_szad_remove(&dss_chunks_szad, node);
2078                 node->addr = prev->addr;
2079                 node->size += prev->size;
2080                 extent_tree_szad_insert(&dss_chunks_szad, node);
2081
2082                 base_node_dealloc(prev);
2083         }
2084
2085         return (node);
2086 }
2087
2088 static bool
2089 chunk_dealloc_dss(void *chunk, size_t size)
2090 {
2091
2092         malloc_mutex_lock(&dss_mtx);
2093         if ((uintptr_t)chunk >= (uintptr_t)dss_base
2094             && (uintptr_t)chunk < (uintptr_t)dss_max) {
2095                 extent_node_t *node;
2096
2097                 /* Try to coalesce with other unused chunks. */
2098                 node = chunk_dealloc_dss_record(chunk, size);
2099                 if (node != NULL) {
2100                         chunk = node->addr;
2101                         size = node->size;
2102                 }
2103
2104                 /* Get the current end of the DSS. */
2105                 dss_max = sbrk(0);
2106
2107                 /*
2108                  * Try to shrink the DSS if this chunk is at the end of the
2109                  * DSS.  The sbrk() call here is subject to a race condition
2110                  * with threads that use brk(2) or sbrk(2) directly, but the
2111                  * alternative would be to leak memory for the sake of poorly
2112                  * designed multi-threaded programs.
2113                  */
2114                 if ((void *)((uintptr_t)chunk + size) == dss_max
2115                     && (dss_prev = sbrk(-(intptr_t)size)) == dss_max) {
2116                         /* Success. */
2117                         dss_max = (void *)((intptr_t)dss_prev - (intptr_t)size);
2118
2119                         if (node != NULL) {
2120                                 extent_tree_szad_remove(&dss_chunks_szad, node);
2121                                 extent_tree_ad_remove(&dss_chunks_ad, node);
2122                                 base_node_dealloc(node);
2123                         }
2124                         malloc_mutex_unlock(&dss_mtx);
2125                 } else {
2126                         malloc_mutex_unlock(&dss_mtx);
2127                         madvise(chunk, size, MADV_FREE);
2128                 }
2129
2130                 return (false);
2131         }
2132         malloc_mutex_unlock(&dss_mtx);
2133
2134         return (true);
2135 }
2136 #endif
2137
2138 static void
2139 chunk_dealloc_mmap(void *chunk, size_t size)
2140 {
2141
2142         pages_unmap(chunk, size);
2143 }
2144
2145 static void
2146 chunk_dealloc(void *chunk, size_t size)
2147 {
2148
2149         assert(chunk != NULL);
2150         assert(CHUNK_ADDR2BASE(chunk) == chunk);
2151         assert(size != 0);
2152         assert((size & chunksize_mask) == 0);
2153
2154 #ifdef MALLOC_STATS
2155         stats_chunks.curchunks -= (size / chunksize);
2156 #endif
2157
2158 #ifdef MALLOC_DSS
2159         if (opt_dss) {
2160                 if (chunk_dealloc_dss(chunk, size) == false)
2161                         return;
2162         }
2163
2164         if (opt_mmap)
2165 #endif
2166                 chunk_dealloc_mmap(chunk, size);
2167 }
2168
2169 /*
2170  * End chunk management functions.
2171  */
2172 /******************************************************************************/
2173 /*
2174  * Begin arena.
2175  */
2176
2177 /*
2178  * Choose an arena based on a per-thread value (fast-path code, calls slow-path
2179  * code if necessary).
2180  */
2181 static inline arena_t *
2182 choose_arena(void)
2183 {
2184         arena_t *ret;
2185
2186         /*
2187          * We can only use TLS if this is a PIC library, since for the static
2188          * library version, libc's malloc is used by TLS allocation, which
2189          * introduces a bootstrapping issue.
2190          */
2191 #ifndef NO_TLS
2192         if (__isthreaded == false) {
2193             /* Avoid the overhead of TLS for single-threaded operation. */
2194             return (arenas[0]);
2195         }
2196
2197         ret = arenas_map;
2198         if (ret == NULL) {
2199                 ret = choose_arena_hard();
2200                 assert(ret != NULL);
2201         }
2202 #else
2203         if (__isthreaded && narenas > 1) {
2204                 unsigned long ind;
2205
2206                 /*
2207                  * Hash _pthread_self() to one of the arenas.  There is a prime
2208                  * number of arenas, so this has a reasonable chance of
2209                  * working.  Even so, the hashing can be easily thwarted by
2210                  * inconvenient _pthread_self() values.  Without specific
2211                  * knowledge of how _pthread_self() calculates values, we can't
2212                  * easily do much better than this.
2213                  */
2214                 ind = (unsigned long) _pthread_self() % narenas;
2215
2216                 /*
2217                  * Optimistially assume that arenas[ind] has been initialized.
2218                  * At worst, we find out that some other thread has already
2219                  * done so, after acquiring the lock in preparation.  Note that
2220                  * this lazy locking also has the effect of lazily forcing
2221                  * cache coherency; without the lock acquisition, there's no
2222                  * guarantee that modification of arenas[ind] by another thread
2223                  * would be seen on this CPU for an arbitrary amount of time.
2224                  *
2225                  * In general, this approach to modifying a synchronized value
2226                  * isn't a good idea, but in this case we only ever modify the
2227                  * value once, so things work out well.
2228                  */
2229                 ret = arenas[ind];
2230                 if (ret == NULL) {
2231                         /*
2232                          * Avoid races with another thread that may have already
2233                          * initialized arenas[ind].
2234                          */
2235                         malloc_spin_lock(&arenas_lock);
2236                         if (arenas[ind] == NULL)
2237                                 ret = arenas_extend((unsigned)ind);
2238                         else
2239                                 ret = arenas[ind];
2240                         malloc_spin_unlock(&arenas_lock);
2241                 }
2242         } else
2243                 ret = arenas[0];
2244 #endif
2245
2246         assert(ret != NULL);
2247         return (ret);
2248 }
2249
2250 #ifndef NO_TLS
2251 /*
2252  * Choose an arena based on a per-thread value (slow-path code only, called
2253  * only by choose_arena()).
2254  */
2255 static arena_t *
2256 choose_arena_hard(void)
2257 {
2258         arena_t *ret;
2259
2260         assert(__isthreaded);
2261
2262 #ifdef MALLOC_BALANCE
2263         /* Seed the PRNG used for arena load balancing. */
2264         SPRN(balance, (uint32_t)(uintptr_t)(_pthread_self()));
2265 #endif
2266
2267         if (narenas > 1) {
2268 #ifdef MALLOC_BALANCE
2269                 unsigned ind;
2270
2271                 ind = PRN(balance, narenas_2pow);
2272                 if ((ret = arenas[ind]) == NULL) {
2273                         malloc_spin_lock(&arenas_lock);
2274                         if ((ret = arenas[ind]) == NULL)
2275                                 ret = arenas_extend(ind);
2276                         malloc_spin_unlock(&arenas_lock);
2277                 }
2278 #else
2279                 malloc_spin_lock(&arenas_lock);
2280                 if ((ret = arenas[next_arena]) == NULL)
2281                         ret = arenas_extend(next_arena);
2282                 next_arena = (next_arena + 1) % narenas;
2283                 malloc_spin_unlock(&arenas_lock);
2284 #endif
2285         } else
2286                 ret = arenas[0];
2287
2288         arenas_map = ret;
2289
2290         return (ret);
2291 }
2292 #endif
2293
2294 static inline int
2295 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
2296 {
2297         uintptr_t a_chunk = (uintptr_t)a;
2298         uintptr_t b_chunk = (uintptr_t)b;
2299
2300         assert(a != NULL);
2301         assert(b != NULL);
2302
2303         return ((a_chunk > b_chunk) - (a_chunk < b_chunk));
2304 }
2305
2306 /* Wrap red-black tree macros in functions. */
2307 rb_wrap(__unused static, arena_chunk_tree_dirty_, arena_chunk_tree_t,
2308     arena_chunk_t, link_dirty, arena_chunk_comp)
2309
2310 static inline int
2311 arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
2312 {
2313         uintptr_t a_mapelm = (uintptr_t)a;
2314         uintptr_t b_mapelm = (uintptr_t)b;
2315
2316         assert(a != NULL);
2317         assert(b != NULL);
2318
2319         return ((a_mapelm > b_mapelm) - (a_mapelm < b_mapelm));
2320 }
2321
2322 /* Wrap red-black tree macros in functions. */
2323 rb_wrap(__unused static, arena_run_tree_, arena_run_tree_t, arena_chunk_map_t,
2324     link, arena_run_comp)
2325
2326 static inline int
2327 arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
2328 {
2329         int ret;
2330         size_t a_size = a->bits & ~PAGE_MASK;
2331         size_t b_size = b->bits & ~PAGE_MASK;
2332
2333         ret = (a_size > b_size) - (a_size < b_size);
2334         if (ret == 0) {
2335                 uintptr_t a_mapelm, b_mapelm;
2336
2337                 if ((a->bits & CHUNK_MAP_KEY) == 0)
2338                         a_mapelm = (uintptr_t)a;
2339                 else {
2340                         /*
2341                          * Treat keys as though they are lower than anything
2342                          * else.
2343                          */
2344                         a_mapelm = 0;
2345                 }
2346                 b_mapelm = (uintptr_t)b;
2347
2348                 ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm);
2349         }
2350
2351         return (ret);
2352 }
2353
2354 /* Wrap red-black tree macros in functions. */
2355 rb_wrap(__unused static, arena_avail_tree_, arena_avail_tree_t,
2356     arena_chunk_map_t, link, arena_avail_comp)
2357
2358 static inline void *
2359 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
2360 {
2361         void *ret;
2362         unsigned i, mask, bit, regind;
2363
2364         assert(run->magic == ARENA_RUN_MAGIC);
2365         assert(run->regs_minelm < bin->regs_mask_nelms);
2366
2367         /*
2368          * Move the first check outside the loop, so that run->regs_minelm can
2369          * be updated unconditionally, without the possibility of updating it
2370          * multiple times.
2371          */
2372         i = run->regs_minelm;
2373         mask = run->regs_mask[i];
2374         if (mask != 0) {
2375                 /* Usable allocation found. */
2376                 bit = ffs((int)mask) - 1;
2377
2378                 regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
2379                 assert(regind < bin->nregs);
2380                 ret = (void *)(((uintptr_t)run) + bin->reg0_offset
2381                     + (bin->reg_size * regind));
2382
2383                 /* Clear bit. */
2384                 mask ^= (1U << bit);
2385                 run->regs_mask[i] = mask;
2386
2387                 return (ret);
2388         }
2389
2390         for (i++; i < bin->regs_mask_nelms; i++) {
2391                 mask = run->regs_mask[i];
2392                 if (mask != 0) {
2393                         /* Usable allocation found. */
2394                         bit = ffs((int)mask) - 1;
2395
2396                         regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
2397                         assert(regind < bin->nregs);
2398                         ret = (void *)(((uintptr_t)run) + bin->reg0_offset
2399                             + (bin->reg_size * regind));
2400
2401                         /* Clear bit. */
2402                         mask ^= (1U << bit);
2403                         run->regs_mask[i] = mask;
2404
2405                         /*
2406                          * Make a note that nothing before this element
2407                          * contains a free region.
2408                          */
2409                         run->regs_minelm = i; /* Low payoff: + (mask == 0); */
2410
2411                         return (ret);
2412                 }
2413         }
2414         /* Not reached. */
2415         assert(0);
2416         return (NULL);
2417 }
2418
2419 static inline void
2420 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
2421 {
2422         unsigned diff, regind, elm, bit;
2423
2424         assert(run->magic == ARENA_RUN_MAGIC);
2425
2426         /*
2427          * Avoid doing division with a variable divisor if possible.  Using
2428          * actual division here can reduce allocator throughput by over 20%!
2429          */
2430         diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
2431         if ((size & (size - 1)) == 0) {
2432                 /*
2433                  * log2_table allows fast division of a power of two in the
2434                  * [1..128] range.
2435                  *
2436                  * (x / divisor) becomes (x >> log2_table[divisor - 1]).
2437                  */
2438                 static const unsigned char log2_table[] = {
2439                     0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
2440                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
2441                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2442                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
2443                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2444                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2445                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2446                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
2447                 };
2448
2449                 if (size <= 128)
2450                         regind = (diff >> log2_table[size - 1]);
2451                 else if (size <= 32768)
2452                         regind = diff >> (8 + log2_table[(size >> 8) - 1]);
2453                 else
2454                         regind = diff / size;
2455         } else if (size < qspace_max) {
2456                 /*
2457                  * To divide by a number D that is not a power of two we
2458                  * multiply by (2^21 / D) and then right shift by 21 positions.
2459                  *
2460                  *   X / D
2461                  *
2462                  * becomes
2463                  *
2464                  *   (X * qsize_invs[(D >> QUANTUM_2POW) - 3])
2465                  *       >> SIZE_INV_SHIFT
2466                  *
2467                  * We can omit the first three elements, because we never
2468                  * divide by 0, and QUANTUM and 2*QUANTUM are both powers of
2469                  * two, which are handled above.
2470                  */
2471 #define SIZE_INV_SHIFT 21
2472 #define QSIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << QUANTUM_2POW)) + 1)
2473                 static const unsigned qsize_invs[] = {
2474                     QSIZE_INV(3),
2475                     QSIZE_INV(4), QSIZE_INV(5), QSIZE_INV(6), QSIZE_INV(7)
2476 #if (QUANTUM_2POW < 4)
2477                     ,
2478                     QSIZE_INV(8), QSIZE_INV(9), QSIZE_INV(10), QSIZE_INV(11),
2479                     QSIZE_INV(12),QSIZE_INV(13), QSIZE_INV(14), QSIZE_INV(15)
2480 #endif
2481                 };
2482                 assert(QUANTUM * (((sizeof(qsize_invs)) / sizeof(unsigned)) + 3)
2483                     >= (1U << QSPACE_MAX_2POW_DEFAULT));
2484
2485                 if (size <= (((sizeof(qsize_invs) / sizeof(unsigned)) + 2) <<
2486                     QUANTUM_2POW)) {
2487                         regind = qsize_invs[(size >> QUANTUM_2POW) - 3] * diff;
2488                         regind >>= SIZE_INV_SHIFT;
2489                 } else
2490                         regind = diff / size;
2491 #undef QSIZE_INV
2492         } else if (size < cspace_max) {
2493 #define CSIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << CACHELINE_2POW)) + 1)
2494                 static const unsigned csize_invs[] = {
2495                     CSIZE_INV(3),
2496                     CSIZE_INV(4), CSIZE_INV(5), CSIZE_INV(6), CSIZE_INV(7)
2497                 };
2498                 assert(CACHELINE * (((sizeof(csize_invs)) / sizeof(unsigned)) +
2499                     3) >= (1U << CSPACE_MAX_2POW_DEFAULT));
2500
2501                 if (size <= (((sizeof(csize_invs) / sizeof(unsigned)) + 2) <<
2502                     CACHELINE_2POW)) {
2503                         regind = csize_invs[(size >> CACHELINE_2POW) - 3] *
2504                             diff;
2505                         regind >>= SIZE_INV_SHIFT;
2506                 } else
2507                         regind = diff / size;
2508 #undef CSIZE_INV
2509         } else {
2510 #define SSIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << SUBPAGE_2POW)) + 1)
2511                 static const unsigned ssize_invs[] = {
2512                     SSIZE_INV(3),
2513                     SSIZE_INV(4), SSIZE_INV(5), SSIZE_INV(6), SSIZE_INV(7),
2514                     SSIZE_INV(8), SSIZE_INV(9), SSIZE_INV(10), SSIZE_INV(11),
2515                     SSIZE_INV(12), SSIZE_INV(13), SSIZE_INV(14), SSIZE_INV(15)
2516 #if (PAGE_SHIFT == 13)
2517                     ,
2518                     SSIZE_INV(16), SSIZE_INV(17), SSIZE_INV(18), SSIZE_INV(19),
2519                     SSIZE_INV(20), SSIZE_INV(21), SSIZE_INV(22), SSIZE_INV(23),
2520                     SSIZE_INV(24), SSIZE_INV(25), SSIZE_INV(26), SSIZE_INV(27),
2521                     SSIZE_INV(28), SSIZE_INV(29), SSIZE_INV(29), SSIZE_INV(30)
2522 #endif
2523                 };
2524                 assert(SUBPAGE * (((sizeof(ssize_invs)) / sizeof(unsigned)) + 3)
2525                     >= PAGE_SIZE);
2526
2527                 if (size < (((sizeof(ssize_invs) / sizeof(unsigned)) + 2) <<
2528                     SUBPAGE_2POW)) {
2529                         regind = ssize_invs[(size >> SUBPAGE_2POW) - 3] * diff;
2530                         regind >>= SIZE_INV_SHIFT;
2531                 } else
2532                         regind = diff / size;
2533 #undef SSIZE_INV
2534         }
2535 #undef SIZE_INV_SHIFT
2536         assert(diff == regind * size);
2537         assert(regind < bin->nregs);
2538
2539         elm = regind >> (SIZEOF_INT_2POW + 3);
2540         if (elm < run->regs_minelm)
2541                 run->regs_minelm = elm;
2542         bit = regind - (elm << (SIZEOF_INT_2POW + 3));
2543         assert((run->regs_mask[elm] & (1U << bit)) == 0);
2544         run->regs_mask[elm] |= (1U << bit);
2545 }
2546
2547 static void
2548 arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
2549     bool zero)
2550 {
2551         arena_chunk_t *chunk;
2552         size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i;
2553
2554         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2555         old_ndirty = chunk->ndirty;
2556         run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
2557             >> PAGE_SHIFT);
2558         total_pages = (chunk->map[run_ind].bits & ~PAGE_MASK) >>
2559             PAGE_SHIFT;
2560         need_pages = (size >> PAGE_SHIFT);
2561         assert(need_pages > 0);
2562         assert(need_pages <= total_pages);
2563         rem_pages = total_pages - need_pages;
2564
2565         arena_avail_tree_remove(&arena->runs_avail, &chunk->map[run_ind]);
2566
2567         /* Keep track of trailing unused pages for later use. */
2568         if (rem_pages > 0) {
2569                 chunk->map[run_ind+need_pages].bits = (rem_pages <<
2570                     PAGE_SHIFT) | (chunk->map[run_ind+need_pages].bits &
2571                     PAGE_MASK);
2572                 chunk->map[run_ind+total_pages-1].bits = (rem_pages <<
2573                     PAGE_SHIFT) | (chunk->map[run_ind+total_pages-1].bits &
2574                     PAGE_MASK);
2575                 arena_avail_tree_insert(&arena->runs_avail,
2576                     &chunk->map[run_ind+need_pages]);
2577         }
2578
2579         for (i = 0; i < need_pages; i++) {
2580                 /* Zero if necessary. */
2581                 if (zero) {
2582                         if ((chunk->map[run_ind + i].bits & CHUNK_MAP_ZEROED)
2583                             == 0) {
2584                                 memset((void *)((uintptr_t)chunk + ((run_ind
2585                                     + i) << PAGE_SHIFT)), 0, PAGE_SIZE);
2586                                 /* CHUNK_MAP_ZEROED is cleared below. */
2587                         }
2588                 }
2589
2590                 /* Update dirty page accounting. */
2591                 if (chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY) {
2592                         chunk->ndirty--;
2593                         arena->ndirty--;
2594                         /* CHUNK_MAP_DIRTY is cleared below. */
2595                 }
2596
2597                 /* Initialize the chunk map. */
2598                 if (large) {
2599                         chunk->map[run_ind + i].bits = CHUNK_MAP_LARGE
2600                             | CHUNK_MAP_ALLOCATED;
2601                 } else {
2602                         chunk->map[run_ind + i].bits = (size_t)run
2603                             | CHUNK_MAP_ALLOCATED;
2604                 }
2605         }
2606
2607         /*
2608          * Set the run size only in the first element for large runs.  This is
2609          * primarily a debugging aid, since the lack of size info for trailing
2610          * pages only matters if the application tries to operate on an
2611          * interior pointer.
2612          */
2613         if (large)
2614                 chunk->map[run_ind].bits |= size;
2615
2616         if (chunk->ndirty == 0 && old_ndirty > 0)
2617                 arena_chunk_tree_dirty_remove(&arena->chunks_dirty, chunk);
2618 }
2619
2620 static arena_chunk_t *
2621 arena_chunk_alloc(arena_t *arena)
2622 {
2623         arena_chunk_t *chunk;
2624         size_t i;
2625
2626         if (arena->spare != NULL) {
2627                 chunk = arena->spare;
2628                 arena->spare = NULL;
2629         } else {
2630                 chunk = (arena_chunk_t *)chunk_alloc(chunksize, true);
2631                 if (chunk == NULL)
2632                         return (NULL);
2633 #ifdef MALLOC_STATS
2634                 arena->stats.mapped += chunksize;
2635 #endif
2636
2637                 chunk->arena = arena;
2638
2639                 /*
2640                  * Claim that no pages are in use, since the header is merely
2641                  * overhead.
2642                  */
2643                 chunk->ndirty = 0;
2644
2645                 /*
2646                  * Initialize the map to contain one maximal free untouched run.
2647                  */
2648                 for (i = 0; i < arena_chunk_header_npages; i++)
2649                         chunk->map[i].bits = 0;
2650                 chunk->map[i].bits = arena_maxclass | CHUNK_MAP_ZEROED;
2651                 for (i++; i < chunk_npages-1; i++) {
2652                         chunk->map[i].bits = CHUNK_MAP_ZEROED;
2653                 }
2654                 chunk->map[chunk_npages-1].bits = arena_maxclass |
2655                     CHUNK_MAP_ZEROED;
2656         }
2657
2658         /* Insert the run into the runs_avail tree. */
2659         arena_avail_tree_insert(&arena->runs_avail,
2660             &chunk->map[arena_chunk_header_npages]);
2661
2662         return (chunk);
2663 }
2664
2665 static void
2666 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
2667 {
2668
2669         if (arena->spare != NULL) {
2670                 if (arena->spare->ndirty > 0) {
2671                         arena_chunk_tree_dirty_remove(
2672                             &chunk->arena->chunks_dirty, arena->spare);
2673                         arena->ndirty -= arena->spare->ndirty;
2674                 }
2675                 chunk_dealloc((void *)arena->spare, chunksize);
2676 #ifdef MALLOC_STATS
2677                 arena->stats.mapped -= chunksize;
2678 #endif
2679         }
2680
2681         /*
2682          * Remove run from runs_avail, regardless of whether this chunk
2683          * will be cached, so that the arena does not use it.  Dirty page
2684          * flushing only uses the chunks_dirty tree, so leaving this chunk in
2685          * the chunks_* trees is sufficient for that purpose.
2686          */
2687         arena_avail_tree_remove(&arena->runs_avail,
2688             &chunk->map[arena_chunk_header_npages]);
2689
2690         arena->spare = chunk;
2691 }
2692
2693 static arena_run_t *
2694 arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
2695 {
2696         arena_chunk_t *chunk;
2697         arena_run_t *run;
2698         arena_chunk_map_t *mapelm, key;
2699
2700         assert(size <= arena_maxclass);
2701         assert((size & PAGE_MASK) == 0);
2702
2703         /* Search the arena's chunks for the lowest best fit. */
2704         key.bits = size | CHUNK_MAP_KEY;
2705         mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key);
2706         if (mapelm != NULL) {
2707                 arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
2708                 size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map)
2709                     / sizeof(arena_chunk_map_t);
2710
2711                 run = (arena_run_t *)((uintptr_t)run_chunk + (pageind
2712                     << PAGE_SHIFT));
2713                 arena_run_split(arena, run, size, large, zero);
2714                 return (run);
2715         }
2716
2717         /*
2718          * No usable runs.  Create a new chunk from which to allocate the run.
2719          */
2720         chunk = arena_chunk_alloc(arena);
2721         if (chunk == NULL)
2722                 return (NULL);
2723         run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
2724             PAGE_SHIFT));
2725         /* Update page map. */
2726         arena_run_split(arena, run, size, large, zero);
2727         return (run);
2728 }
2729
2730 static void
2731 arena_purge(arena_t *arena)
2732 {
2733         arena_chunk_t *chunk;
2734         size_t i, npages;
2735 #ifdef MALLOC_DEBUG
2736         size_t ndirty = 0;
2737
2738         rb_foreach_begin(arena_chunk_t, link_dirty, &arena->chunks_dirty,
2739             chunk) {
2740                 ndirty += chunk->ndirty;
2741         } rb_foreach_end(arena_chunk_t, link_dirty, &arena->chunks_dirty, chunk)
2742         assert(ndirty == arena->ndirty);
2743 #endif
2744         assert(arena->ndirty > opt_dirty_max);
2745
2746 #ifdef MALLOC_STATS
2747         arena->stats.npurge++;
2748 #endif
2749
2750         /*
2751          * Iterate downward through chunks until enough dirty memory has been
2752          * purged.  Terminate as soon as possible in order to minimize the
2753          * number of system calls, even if a chunk has only been partially
2754          * purged.
2755          */
2756         while (arena->ndirty > (opt_dirty_max >> 1)) {
2757                 chunk = arena_chunk_tree_dirty_last(&arena->chunks_dirty);
2758                 assert(chunk != NULL);
2759
2760                 for (i = chunk_npages - 1; chunk->ndirty > 0; i--) {
2761                         assert(i >= arena_chunk_header_npages);
2762
2763                         if (chunk->map[i].bits & CHUNK_MAP_DIRTY) {
2764                                 chunk->map[i].bits ^= CHUNK_MAP_DIRTY;
2765                                 /* Find adjacent dirty run(s). */
2766                                 for (npages = 1; i > arena_chunk_header_npages
2767                                     && (chunk->map[i - 1].bits &
2768                                     CHUNK_MAP_DIRTY); npages++) {
2769                                         i--;
2770                                         chunk->map[i].bits ^= CHUNK_MAP_DIRTY;
2771                                 }
2772                                 chunk->ndirty -= npages;
2773                                 arena->ndirty -= npages;
2774
2775                                 madvise((void *)((uintptr_t)chunk + (i <<
2776                                     PAGE_SHIFT)), (npages << PAGE_SHIFT),
2777                                     MADV_FREE);
2778 #ifdef MALLOC_STATS
2779                                 arena->stats.nmadvise++;
2780                                 arena->stats.purged += npages;
2781 #endif
2782                                 if (arena->ndirty <= (opt_dirty_max >> 1))
2783                                         break;
2784                         }
2785                 }
2786
2787                 if (chunk->ndirty == 0) {
2788                         arena_chunk_tree_dirty_remove(&arena->chunks_dirty,
2789                             chunk);
2790                 }
2791         }
2792 }
2793
2794 static void
2795 arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
2796 {
2797         arena_chunk_t *chunk;
2798         size_t size, run_ind, run_pages;
2799
2800         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2801         run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk)
2802             >> PAGE_SHIFT);
2803         assert(run_ind >= arena_chunk_header_npages);
2804         assert(run_ind < chunk_npages);
2805         if ((chunk->map[run_ind].bits & CHUNK_MAP_LARGE) != 0)
2806                 size = chunk->map[run_ind].bits & ~PAGE_MASK;
2807         else
2808                 size = run->bin->run_size;
2809         run_pages = (size >> PAGE_SHIFT);
2810
2811         /* Mark pages as unallocated in the chunk map. */
2812         if (dirty) {
2813                 size_t i;
2814
2815                 for (i = 0; i < run_pages; i++) {
2816                         assert((chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY)
2817                             == 0);
2818                         chunk->map[run_ind + i].bits = CHUNK_MAP_DIRTY;
2819                 }
2820
2821                 if (chunk->ndirty == 0) {
2822                         arena_chunk_tree_dirty_insert(&arena->chunks_dirty,
2823                             chunk);
2824                 }
2825                 chunk->ndirty += run_pages;
2826                 arena->ndirty += run_pages;
2827         } else {
2828                 size_t i;
2829
2830                 for (i = 0; i < run_pages; i++) {
2831                         chunk->map[run_ind + i].bits &= ~(CHUNK_MAP_LARGE |
2832                             CHUNK_MAP_ALLOCATED);
2833                 }
2834         }
2835         chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2836             PAGE_MASK);
2837         chunk->map[run_ind+run_pages-1].bits = size |
2838             (chunk->map[run_ind+run_pages-1].bits & PAGE_MASK);
2839
2840         /* Try to coalesce forward. */
2841         if (run_ind + run_pages < chunk_npages &&
2842             (chunk->map[run_ind+run_pages].bits & CHUNK_MAP_ALLOCATED) == 0) {
2843                 size_t nrun_size = chunk->map[run_ind+run_pages].bits &
2844                     ~PAGE_MASK;
2845
2846                 /*
2847                  * Remove successor from runs_avail; the coalesced run is
2848                  * inserted later.
2849                  */
2850                 arena_avail_tree_remove(&arena->runs_avail,
2851                     &chunk->map[run_ind+run_pages]);
2852
2853                 size += nrun_size;
2854                 run_pages = size >> PAGE_SHIFT;
2855
2856                 assert((chunk->map[run_ind+run_pages-1].bits & ~PAGE_MASK)
2857                     == nrun_size);
2858                 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2859                     PAGE_MASK);
2860                 chunk->map[run_ind+run_pages-1].bits = size |
2861                     (chunk->map[run_ind+run_pages-1].bits & PAGE_MASK);
2862         }
2863
2864         /* Try to coalesce backward. */
2865         if (run_ind > arena_chunk_header_npages && (chunk->map[run_ind-1].bits &
2866             CHUNK_MAP_ALLOCATED) == 0) {
2867                 size_t prun_size = chunk->map[run_ind-1].bits & ~PAGE_MASK;
2868
2869                 run_ind -= prun_size >> PAGE_SHIFT;
2870
2871                 /*
2872                  * Remove predecessor from runs_avail; the coalesced run is
2873                  * inserted later.
2874                  */
2875                 arena_avail_tree_remove(&arena->runs_avail,
2876                     &chunk->map[run_ind]);
2877
2878                 size += prun_size;
2879                 run_pages = size >> PAGE_SHIFT;
2880
2881                 assert((chunk->map[run_ind].bits & ~PAGE_MASK) ==
2882                     prun_size);
2883                 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2884                     PAGE_MASK);
2885                 chunk->map[run_ind+run_pages-1].bits = size |
2886                     (chunk->map[run_ind+run_pages-1].bits & PAGE_MASK);
2887         }
2888
2889         /* Insert into runs_avail, now that coalescing is complete. */
2890         arena_avail_tree_insert(&arena->runs_avail, &chunk->map[run_ind]);
2891
2892         /* Deallocate chunk if it is now completely unused. */
2893         if ((chunk->map[arena_chunk_header_npages].bits & (~PAGE_MASK |
2894             CHUNK_MAP_ALLOCATED)) == arena_maxclass)
2895                 arena_chunk_dealloc(arena, chunk);
2896
2897         /* Enforce opt_dirty_max. */
2898         if (arena->ndirty > opt_dirty_max)
2899                 arena_purge(arena);
2900 }
2901
2902 static void
2903 arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
2904     size_t oldsize, size_t newsize)
2905 {
2906         size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
2907         size_t head_npages = (oldsize - newsize) >> PAGE_SHIFT;
2908
2909         assert(oldsize > newsize);
2910
2911         /*
2912          * Update the chunk map so that arena_run_dalloc() can treat the
2913          * leading run as separately allocated.
2914          */
2915         chunk->map[pageind].bits = (oldsize - newsize) | CHUNK_MAP_LARGE |
2916             CHUNK_MAP_ALLOCATED;
2917         chunk->map[pageind+head_npages].bits = newsize | CHUNK_MAP_LARGE |
2918             CHUNK_MAP_ALLOCATED;
2919
2920         arena_run_dalloc(arena, run, false);
2921 }
2922
2923 static void
2924 arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
2925     size_t oldsize, size_t newsize, bool dirty)
2926 {
2927         size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
2928         size_t npages = newsize >> PAGE_SHIFT;
2929
2930         assert(oldsize > newsize);
2931
2932         /*
2933          * Update the chunk map so that arena_run_dalloc() can treat the
2934          * trailing run as separately allocated.
2935          */
2936         chunk->map[pageind].bits = newsize | CHUNK_MAP_LARGE |
2937             CHUNK_MAP_ALLOCATED;
2938         chunk->map[pageind+npages].bits = (oldsize - newsize) | CHUNK_MAP_LARGE
2939             | CHUNK_MAP_ALLOCATED;
2940
2941         arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize),
2942             dirty);
2943 }
2944
2945 static arena_run_t *
2946 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2947 {
2948         arena_chunk_map_t *mapelm;
2949         arena_run_t *run;
2950         unsigned i, remainder;
2951
2952         /* Look for a usable run. */
2953         mapelm = arena_run_tree_first(&bin->runs);
2954         if (mapelm != NULL) {
2955                 /* run is guaranteed to have available space. */
2956                 arena_run_tree_remove(&bin->runs, mapelm);
2957                 run = (arena_run_t *)(mapelm->bits & ~PAGE_MASK);
2958 #ifdef MALLOC_STATS
2959                 bin->stats.reruns++;
2960 #endif
2961                 return (run);
2962         }
2963         /* No existing runs have any space available. */
2964
2965         /* Allocate a new run. */
2966         run = arena_run_alloc(arena, bin->run_size, false, false);
2967         if (run == NULL)
2968                 return (NULL);
2969
2970         /* Initialize run internals. */
2971         run->bin = bin;
2972
2973         for (i = 0; i < bin->regs_mask_nelms - 1; i++)
2974                 run->regs_mask[i] = UINT_MAX;
2975         remainder = bin->nregs & ((1U << (SIZEOF_INT_2POW + 3)) - 1);
2976         if (remainder == 0)
2977                 run->regs_mask[i] = UINT_MAX;
2978         else {
2979                 /* The last element has spare bits that need to be unset. */
2980                 run->regs_mask[i] = (UINT_MAX >> ((1U << (SIZEOF_INT_2POW + 3))
2981                     - remainder));
2982         }
2983
2984         run->regs_minelm = 0;
2985
2986         run->nfree = bin->nregs;
2987 #ifdef MALLOC_DEBUG
2988         run->magic = ARENA_RUN_MAGIC;
2989 #endif
2990
2991 #ifdef MALLOC_STATS
2992         bin->stats.nruns++;
2993         bin->stats.curruns++;
2994         if (bin->stats.curruns > bin->stats.highruns)
2995                 bin->stats.highruns = bin->stats.curruns;
2996 #endif
2997         return (run);
2998 }
2999
3000 /* bin->runcur must have space available before this function is called. */
3001 static inline void *
3002 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
3003 {
3004         void *ret;
3005
3006         assert(run->magic == ARENA_RUN_MAGIC);
3007         assert(run->nfree > 0);
3008
3009         ret = arena_run_reg_alloc(run, bin);
3010         assert(ret != NULL);
3011         run->nfree--;
3012
3013         return (ret);
3014 }
3015
3016 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
3017 static void *
3018 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
3019 {
3020
3021         bin->runcur = arena_bin_nonfull_run_get(arena, bin);
3022         if (bin->runcur == NULL)
3023                 return (NULL);
3024         assert(bin->runcur->magic == ARENA_RUN_MAGIC);
3025         assert(bin->runcur->nfree > 0);
3026
3027         return (arena_bin_malloc_easy(arena, bin, bin->runcur));
3028 }
3029
3030 /*
3031  * Calculate bin->run_size such that it meets the following constraints:
3032  *
3033  *   *) bin->run_size >= min_run_size
3034  *   *) bin->run_size <= arena_maxclass
3035  *   *) bin->run_size <= RUN_MAX_SMALL
3036  *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
3037  *
3038  * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
3039  * also calculated here, since these settings are all interdependent.
3040  */
3041 static size_t
3042 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
3043 {
3044         size_t try_run_size, good_run_size;
3045         unsigned good_nregs, good_mask_nelms, good_reg0_offset;
3046         unsigned try_nregs, try_mask_nelms, try_reg0_offset;
3047
3048         assert(min_run_size >= PAGE_SIZE);
3049         assert(min_run_size <= arena_maxclass);
3050         assert(min_run_size <= RUN_MAX_SMALL);
3051
3052         /*
3053          * Calculate known-valid settings before entering the run_size
3054          * expansion loop, so that the first part of the loop always copies
3055          * valid settings.
3056          *
3057          * The do..while loop iteratively reduces the number of regions until
3058          * the run header and the regions no longer overlap.  A closed formula
3059          * would be quite messy, since there is an interdependency between the
3060          * header's mask length and the number of regions.
3061          */
3062         try_run_size = min_run_size;
3063         try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
3064             + 1; /* Counter-act try_nregs-- in loop. */
3065         do {
3066                 try_nregs--;
3067                 try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
3068                     ((try_nregs & ((1U << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
3069                 try_reg0_offset = try_run_size - (try_nregs * bin->reg_size);
3070         } while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
3071             > try_reg0_offset);
3072
3073         /* run_size expansion loop. */
3074         do {
3075                 /*
3076                  * Copy valid settings before trying more aggressive settings.
3077                  */
3078                 good_run_size = try_run_size;
3079                 good_nregs = try_nregs;
3080                 good_mask_nelms = try_mask_nelms;
3081                 good_reg0_offset = try_reg0_offset;
3082
3083                 /* Try more aggressive settings. */
3084                 try_run_size += PAGE_SIZE;
3085                 try_nregs = ((try_run_size - sizeof(arena_run_t)) /
3086                     bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
3087                 do {
3088                         try_nregs--;
3089                         try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
3090                             ((try_nregs & ((1U << (SIZEOF_INT_2POW + 3)) - 1)) ?
3091                             1 : 0);
3092                         try_reg0_offset = try_run_size - (try_nregs *
3093                             bin->reg_size);
3094                 } while (sizeof(arena_run_t) + (sizeof(unsigned) *
3095                     (try_mask_nelms - 1)) > try_reg0_offset);
3096         } while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
3097             && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
3098             && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
3099
3100         assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
3101             <= good_reg0_offset);
3102         assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
3103
3104         /* Copy final settings. */
3105         bin->run_size = good_run_size;
3106         bin->nregs = good_nregs;
3107         bin->regs_mask_nelms = good_mask_nelms;
3108         bin->reg0_offset = good_reg0_offset;
3109
3110         return (good_run_size);
3111 }
3112
3113 #ifdef MALLOC_BALANCE
3114 static inline void
3115 arena_lock_balance(arena_t *arena)
3116 {
3117         unsigned contention;
3118
3119         contention = malloc_spin_lock(&arena->lock);
3120         if (narenas > 1) {
3121                 /*
3122                  * Calculate the exponentially averaged contention for this
3123                  * arena.  Due to integer math always rounding down, this value
3124                  * decays somewhat faster than normal.
3125                  */
3126                 arena->contention = (((uint64_t)arena->contention
3127                     * (uint64_t)((1U << BALANCE_ALPHA_INV_2POW)-1))
3128                     + (uint64_t)contention) >> BALANCE_ALPHA_INV_2POW;
3129                 if (arena->contention >= opt_balance_threshold)
3130                         arena_lock_balance_hard(arena);
3131         }
3132 }
3133
3134 static void
3135 arena_lock_balance_hard(arena_t *arena)
3136 {
3137         uint32_t ind;
3138
3139         arena->contention = 0;
3140 #ifdef MALLOC_STATS
3141         arena->stats.nbalance++;
3142 #endif
3143         ind = PRN(balance, narenas_2pow);
3144         if (arenas[ind] != NULL)
3145                 arenas_map = arenas[ind];
3146         else {
3147                 malloc_spin_lock(&arenas_lock);
3148                 if (arenas[ind] != NULL)
3149                         arenas_map = arenas[ind];
3150                 else
3151                         arenas_map = arenas_extend(ind);
3152                 malloc_spin_unlock(&arenas_lock);
3153         }
3154 }
3155 #endif
3156
3157 #ifdef MALLOC_MAG
3158 static inline void *
3159 mag_alloc(mag_t *mag)
3160 {
3161
3162         if (mag->nrounds == 0)
3163                 return (NULL);
3164         mag->nrounds--;
3165
3166         return (mag->rounds[mag->nrounds]);
3167 }
3168
3169 static void
3170 mag_load(mag_t *mag)
3171 {
3172         arena_t *arena;
3173         arena_bin_t *bin;
3174         arena_run_t *run;
3175         void *round;
3176         size_t i;
3177
3178         arena = choose_arena();
3179         bin = &arena->bins[mag->binind];
3180 #ifdef MALLOC_BALANCE
3181         arena_lock_balance(arena);
3182 #else
3183         malloc_spin_lock(&arena->lock);
3184 #endif
3185         for (i = mag->nrounds; i < max_rounds; i++) {
3186                 if ((run = bin->runcur) != NULL && run->nfree > 0)
3187                         round = arena_bin_malloc_easy(arena, bin, run);
3188                 else
3189                         round = arena_bin_malloc_hard(arena, bin);
3190                 if (round == NULL)
3191                         break;
3192                 mag->rounds[i] = round;
3193         }
3194 #ifdef MALLOC_STATS
3195         bin->stats.nmags++;
3196         arena->stats.nmalloc_small += (i - mag->nrounds);
3197         arena->stats.allocated_small += (i - mag->nrounds) * bin->reg_size;
3198 #endif
3199         malloc_spin_unlock(&arena->lock);
3200         mag->nrounds = i;
3201 }
3202
3203 static inline void *
3204 mag_rack_alloc(mag_rack_t *rack, size_t size, bool zero)
3205 {
3206         void *ret;
3207         bin_mags_t *bin_mags;
3208         mag_t *mag;
3209         size_t binind;
3210
3211         binind = size2bin[size];
3212         assert(binind < nbins);
3213         bin_mags = &rack->bin_mags[binind];
3214
3215         mag = bin_mags->curmag;
3216         if (mag == NULL) {
3217                 /* Create an initial magazine for this size class. */
3218                 assert(bin_mags->sparemag == NULL);
3219                 mag = mag_create(choose_arena(), binind);
3220                 if (mag == NULL)
3221                         return (NULL);
3222                 bin_mags->curmag = mag;
3223                 mag_load(mag);
3224         }
3225
3226         ret = mag_alloc(mag);
3227         if (ret == NULL) {
3228                 if (bin_mags->sparemag != NULL) {
3229                         if (bin_mags->sparemag->nrounds > 0) {
3230                                 /* Swap magazines. */
3231                                 bin_mags->curmag = bin_mags->sparemag;
3232                                 bin_mags->sparemag = mag;
3233                                 mag = bin_mags->curmag;
3234                         } else {
3235                                 /* Reload the current magazine. */
3236                                 mag_load(mag);
3237                         }
3238                 } else {
3239                         /* Create a second magazine. */
3240                         mag = mag_create(choose_arena(), binind);
3241                         if (mag == NULL)
3242                                 return (NULL);
3243                         mag_load(mag);
3244                         bin_mags->sparemag = bin_mags->curmag;
3245                         bin_mags->curmag = mag;
3246                 }
3247                 ret = mag_alloc(mag);
3248                 if (ret == NULL)
3249                         return (NULL);
3250         }
3251
3252         if (zero == false) {
3253                 if (opt_junk)
3254                         memset(ret, 0xa5, size);
3255                 else if (opt_zero)
3256                         memset(ret, 0, size);
3257         } else
3258                 memset(ret, 0, size);
3259
3260         return (ret);
3261 }
3262 #endif
3263
3264 static inline void *
3265 arena_malloc_small(arena_t *arena, size_t size, bool zero)
3266 {
3267         void *ret;
3268         arena_bin_t *bin;
3269         arena_run_t *run;
3270         size_t binind;
3271
3272         binind = size2bin[size];
3273         assert(binind < nbins);
3274         bin = &arena->bins[binind];
3275         size = bin->reg_size;
3276
3277 #ifdef MALLOC_BALANCE
3278         arena_lock_balance(arena);
3279 #else
3280         malloc_spin_lock(&arena->lock);
3281 #endif
3282         if ((run = bin->runcur) != NULL && run->nfree > 0)
3283                 ret = arena_bin_malloc_easy(arena, bin, run);
3284         else
3285                 ret = arena_bin_malloc_hard(arena, bin);
3286
3287         if (ret == NULL) {
3288                 malloc_spin_unlock(&arena->lock);
3289                 return (NULL);
3290         }
3291
3292 #ifdef MALLOC_STATS
3293         bin->stats.nrequests++;
3294         arena->stats.nmalloc_small++;
3295         arena->stats.allocated_small += size;
3296 #endif
3297         malloc_spin_unlock(&arena->lock);
3298
3299         if (zero == false) {
3300                 if (opt_junk)
3301                         memset(ret, 0xa5, size);
3302                 else if (opt_zero)
3303                         memset(ret, 0, size);
3304         } else
3305                 memset(ret, 0, size);
3306
3307         return (ret);
3308 }
3309
3310 static void *
3311 arena_malloc_large(arena_t *arena, size_t size, bool zero)
3312 {
3313         void *ret;
3314
3315         /* Large allocation. */
3316         size = PAGE_CEILING(size);
3317 #ifdef MALLOC_BALANCE
3318         arena_lock_balance(arena);
3319 #else
3320         malloc_spin_lock(&arena->lock);
3321 #endif
3322         ret = (void *)arena_run_alloc(arena, size, true, zero);
3323         if (ret == NULL) {
3324                 malloc_spin_unlock(&arena->lock);
3325                 return (NULL);
3326         }
3327 #ifdef MALLOC_STATS
3328         arena->stats.nmalloc_large++;
3329         arena->stats.allocated_large += size;
3330 #endif
3331         malloc_spin_unlock(&arena->lock);
3332
3333         if (zero == false) {
3334                 if (opt_junk)
3335                         memset(ret, 0xa5, size);
3336                 else if (opt_zero)
3337                         memset(ret, 0, size);
3338         }
3339
3340         return (ret);
3341 }
3342
3343 static inline void *
3344 arena_malloc(arena_t *arena, size_t size, bool zero)
3345 {
3346
3347         assert(arena != NULL);
3348         assert(arena->magic == ARENA_MAGIC);
3349         assert(size != 0);
3350         assert(QUANTUM_CEILING(size) <= arena_maxclass);
3351
3352         if (size <= bin_maxclass) {
3353 #ifdef MALLOC_MAG
3354                 if (__isthreaded && opt_mag) {
3355                         mag_rack_t *rack = mag_rack;
3356                         if (rack == NULL) {
3357                                 rack = mag_rack_create(arena);
3358                                 if (rack == NULL)
3359                                         return (NULL);
3360                                 mag_rack = rack;
3361                         }
3362                         return (mag_rack_alloc(rack, size, zero));
3363                 } else
3364 #endif
3365                         return (arena_malloc_small(arena, size, zero));
3366         } else
3367                 return (arena_malloc_large(arena, size, zero));
3368 }
3369
3370 static inline void *
3371 imalloc(size_t size)
3372 {
3373
3374         assert(size != 0);
3375
3376         if (size <= arena_maxclass)
3377                 return (arena_malloc(choose_arena(), size, false));
3378         else
3379                 return (huge_malloc(size, false));
3380 }
3381
3382 static inline void *
3383 icalloc(size_t size)
3384 {
3385
3386         if (size <= arena_maxclass)
3387                 return (arena_malloc(choose_arena(), size, true));
3388         else
3389                 return (huge_malloc(size, true));
3390 }
3391
3392 /* Only handles large allocations that require more than page alignment. */
3393 static void *
3394 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
3395 {
3396         void *ret;
3397         size_t offset;
3398         arena_chunk_t *chunk;
3399
3400         assert((size & PAGE_MASK) == 0);
3401         assert((alignment & PAGE_MASK) == 0);
3402
3403 #ifdef MALLOC_BALANCE
3404         arena_lock_balance(arena);
3405 #else
3406         malloc_spin_lock(&arena->lock);
3407 #endif
3408         ret = (void *)arena_run_alloc(arena, alloc_size, true, false);
3409         if (ret == NULL) {
3410                 malloc_spin_unlock(&arena->lock);
3411                 return (NULL);
3412         }
3413
3414         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
3415
3416         offset = (uintptr_t)ret & (alignment - 1);
3417         assert((offset & PAGE_MASK) == 0);
3418         assert(offset < alloc_size);
3419         if (offset == 0)
3420                 arena_run_trim_tail(arena, chunk, ret, alloc_size, size, false);
3421         else {
3422                 size_t leadsize, trailsize;
3423
3424                 leadsize = alignment - offset;
3425                 if (leadsize > 0) {
3426                         arena_run_trim_head(arena, chunk, ret, alloc_size,
3427                             alloc_size - leadsize);
3428                         ret = (void *)((uintptr_t)ret + leadsize);
3429                 }
3430
3431                 trailsize = alloc_size - leadsize - size;
3432                 if (trailsize != 0) {
3433                         /* Trim trailing space. */
3434                         assert(trailsize < alloc_size);
3435                         arena_run_trim_tail(arena, chunk, ret, size + trailsize,
3436                             size, false);
3437                 }
3438         }
3439
3440 #ifdef MALLOC_STATS
3441         arena->stats.nmalloc_large++;
3442         arena->stats.allocated_large += size;
3443 #endif
3444         malloc_spin_unlock(&arena->lock);
3445
3446         if (opt_junk)
3447                 memset(ret, 0xa5, size);
3448         else if (opt_zero)
3449                 memset(ret, 0, size);
3450         return (ret);
3451 }
3452
3453 static inline void *
3454 ipalloc(size_t alignment, size_t size)
3455 {
3456         void *ret;
3457         size_t ceil_size;
3458
3459         /*
3460          * Round size up to the nearest multiple of alignment.
3461          *
3462          * This done, we can take advantage of the fact that for each small
3463          * size class, every object is aligned at the smallest power of two
3464          * that is non-zero in the base two representation of the size.  For
3465          * example:
3466          *
3467          *   Size |   Base 2 | Minimum alignment
3468          *   -----+----------+------------------
3469          *     96 |  1100000 |  32
3470          *    144 | 10100000 |  32
3471          *    192 | 11000000 |  64
3472          *
3473          * Depending on runtime settings, it is possible that arena_malloc()
3474          * will further round up to a power of two, but that never causes
3475          * correctness issues.
3476          */
3477         ceil_size = (size + (alignment - 1)) & (-alignment);
3478         /*
3479          * (ceil_size < size) protects against the combination of maximal
3480          * alignment and size greater than maximal alignment.
3481          */
3482         if (ceil_size < size) {
3483                 /* size_t overflow. */
3484                 return (NULL);
3485         }
3486
3487         if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE
3488             && ceil_size <= arena_maxclass))
3489                 ret = arena_malloc(choose_arena(), ceil_size, false);
3490         else {
3491                 size_t run_size;
3492
3493                 /*
3494                  * We can't achieve subpage alignment, so round up alignment
3495                  * permanently; it makes later calculations simpler.
3496                  */
3497                 alignment = PAGE_CEILING(alignment);
3498                 ceil_size = PAGE_CEILING(size);
3499                 /*
3500                  * (ceil_size < size) protects against very large sizes within
3501                  * PAGE_SIZE of SIZE_T_MAX.
3502                  *
3503                  * (ceil_size + alignment < ceil_size) protects against the
3504                  * combination of maximal alignment and ceil_size large enough
3505                  * to cause overflow.  This is similar to the first overflow
3506                  * check above, but it needs to be repeated due to the new
3507                  * ceil_size value, which may now be *equal* to maximal
3508                  * alignment, whereas before we only detected overflow if the
3509                  * original size was *greater* than maximal alignment.
3510                  */
3511                 if (ceil_size < size || ceil_size + alignment < ceil_size) {
3512                         /* size_t overflow. */
3513                         return (NULL);
3514                 }
3515
3516                 /*
3517                  * Calculate the size of the over-size run that arena_palloc()
3518                  * would need to allocate in order to guarantee the alignment.
3519                  */
3520                 if (ceil_size >= alignment)
3521                         run_size = ceil_size + alignment - PAGE_SIZE;
3522                 else {
3523                         /*
3524                          * It is possible that (alignment << 1) will cause
3525                          * overflow, but it doesn't matter because we also
3526                          * subtract PAGE_SIZE, which in the case of overflow
3527                          * leaves us with a very large run_size.  That causes
3528                          * the first conditional below to fail, which means
3529                          * that the bogus run_size value never gets used for
3530                          * anything important.
3531                          */
3532                         run_size = (alignment << 1) - PAGE_SIZE;
3533                 }
3534
3535                 if (run_size <= arena_maxclass) {
3536                         ret = arena_palloc(choose_arena(), alignment, ceil_size,
3537                             run_size);
3538                 } else if (alignment <= chunksize)
3539                         ret = huge_malloc(ceil_size, false);
3540                 else
3541                         ret = huge_palloc(alignment, ceil_size);
3542         }
3543
3544         assert(((uintptr_t)ret & (alignment - 1)) == 0);
3545         return (ret);
3546 }
3547
3548 /* Return the size of the allocation pointed to by ptr. */
3549 static size_t
3550 arena_salloc(const void *ptr)
3551 {
3552         size_t ret;
3553         arena_chunk_t *chunk;
3554         size_t pageind, mapbits;
3555
3556         assert(ptr != NULL);
3557         assert(CHUNK_ADDR2BASE(ptr) != ptr);
3558
3559         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3560         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3561         mapbits = chunk->map[pageind].bits;
3562         assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
3563         if ((mapbits & CHUNK_MAP_LARGE) == 0) {
3564                 arena_run_t *run = (arena_run_t *)(mapbits & ~PAGE_MASK);
3565                 assert(run->magic == ARENA_RUN_MAGIC);
3566                 ret = run->bin->reg_size;
3567         } else {
3568                 ret = mapbits & ~PAGE_MASK;
3569                 assert(ret != 0);
3570         }
3571
3572         return (ret);
3573 }
3574
3575 static inline size_t
3576 isalloc(const void *ptr)
3577 {
3578         size_t ret;
3579         arena_chunk_t *chunk;
3580
3581         assert(ptr != NULL);
3582
3583         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3584         if (chunk != ptr) {
3585                 /* Region. */
3586                 assert(chunk->arena->magic == ARENA_MAGIC);
3587
3588                 ret = arena_salloc(ptr);
3589         } else {
3590                 extent_node_t *node, key;
3591
3592                 /* Chunk (huge allocation). */
3593
3594                 malloc_mutex_lock(&huge_mtx);
3595
3596                 /* Extract from tree of huge allocations. */
3597                 key.addr = __DECONST(void *, ptr);
3598                 node = extent_tree_ad_search(&huge, &key);
3599                 assert(node != NULL);
3600
3601                 ret = node->size;
3602
3603                 malloc_mutex_unlock(&huge_mtx);
3604         }
3605
3606         return (ret);
3607 }
3608
3609 static inline void
3610 arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3611     arena_chunk_map_t *mapelm)
3612 {
3613         arena_run_t *run;
3614         arena_bin_t *bin;
3615         size_t size;
3616
3617         run = (arena_run_t *)(mapelm->bits & ~PAGE_MASK);
3618         assert(run->magic == ARENA_RUN_MAGIC);
3619         bin = run->bin;
3620         size = bin->reg_size;
3621
3622         if (opt_junk)
3623                 memset(ptr, 0x5a, size);
3624
3625         arena_run_reg_dalloc(run, bin, ptr, size);
3626         run->nfree++;
3627
3628         if (run->nfree == bin->nregs) {
3629                 /* Deallocate run. */
3630                 if (run == bin->runcur)
3631                         bin->runcur = NULL;
3632                 else if (bin->nregs != 1) {
3633                         size_t run_pageind = (((uintptr_t)run -
3634                             (uintptr_t)chunk)) >> PAGE_SHIFT;
3635                         arena_chunk_map_t *run_mapelm =
3636                             &chunk->map[run_pageind];
3637                         /*
3638                          * This block's conditional is necessary because if the
3639                          * run only contains one region, then it never gets
3640                          * inserted into the non-full runs tree.
3641                          */
3642                         arena_run_tree_remove(&bin->runs, run_mapelm);
3643                 }
3644 #ifdef MALLOC_DEBUG
3645                 run->magic = 0;
3646 #endif
3647                 arena_run_dalloc(arena, run, true);
3648 #ifdef MALLOC_STATS
3649                 bin->stats.curruns--;
3650 #endif
3651         } else if (run->nfree == 1 && run != bin->runcur) {
3652                 /*
3653                  * Make sure that bin->runcur always refers to the lowest
3654                  * non-full run, if one exists.
3655                  */
3656                 if (bin->runcur == NULL)
3657                         bin->runcur = run;
3658                 else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
3659                         /* Switch runcur. */
3660                         if (bin->runcur->nfree > 0) {
3661                                 arena_chunk_t *runcur_chunk =
3662                                     CHUNK_ADDR2BASE(bin->runcur);
3663                                 size_t runcur_pageind =
3664                                     (((uintptr_t)bin->runcur -
3665                                     (uintptr_t)runcur_chunk)) >> PAGE_SHIFT;
3666                                 arena_chunk_map_t *runcur_mapelm =
3667                                     &runcur_chunk->map[runcur_pageind];
3668
3669                                 /* Insert runcur. */
3670                                 arena_run_tree_insert(&bin->runs,
3671                                     runcur_mapelm);
3672                         }
3673                         bin->runcur = run;
3674                 } else {
3675                         size_t run_pageind = (((uintptr_t)run -
3676                             (uintptr_t)chunk)) >> PAGE_SHIFT;
3677                         arena_chunk_map_t *run_mapelm =
3678                             &chunk->map[run_pageind];
3679
3680                         assert(arena_run_tree_search(&bin->runs, run_mapelm) ==
3681                             NULL);
3682                         arena_run_tree_insert(&bin->runs, run_mapelm);
3683                 }
3684         }
3685 #ifdef MALLOC_STATS
3686         arena->stats.allocated_small -= size;
3687         arena->stats.ndalloc_small++;
3688 #endif
3689 }
3690
3691 #ifdef MALLOC_MAG
3692 static void
3693 mag_unload(mag_t *mag)
3694 {
3695         arena_chunk_t *chunk;
3696         arena_t *arena;
3697         void *round;
3698         size_t i, ndeferred, nrounds;
3699
3700         for (ndeferred = mag->nrounds; ndeferred > 0;) {
3701                 nrounds = ndeferred;
3702                 /* Lock the arena associated with the first round. */
3703                 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mag->rounds[0]);
3704                 arena = chunk->arena;
3705 #ifdef MALLOC_BALANCE
3706                 arena_lock_balance(arena);
3707 #else
3708                 malloc_spin_lock(&arena->lock);
3709 #endif
3710                 /* Deallocate every round that belongs to the locked arena. */
3711                 for (i = ndeferred = 0; i < nrounds; i++) {
3712                         round = mag->rounds[i];
3713                         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(round);
3714                         if (chunk->arena == arena) {
3715                                 size_t pageind = (((uintptr_t)round -
3716                                     (uintptr_t)chunk) >> PAGE_SHIFT);
3717                                 arena_chunk_map_t *mapelm =
3718                                     &chunk->map[pageind];
3719                                 arena_dalloc_small(arena, chunk, round, mapelm);
3720                         } else {
3721                                 /*
3722                                  * This round was allocated via a different
3723                                  * arena than the one that is currently locked.
3724                                  * Stash the round, so that it can be handled
3725                                  * in a future pass.
3726                                  */
3727                                 mag->rounds[ndeferred] = round;
3728                                 ndeferred++;
3729                         }
3730                 }
3731                 malloc_spin_unlock(&arena->lock);
3732         }
3733
3734         mag->nrounds = 0;
3735 }
3736
3737 static inline void
3738 mag_rack_dalloc(mag_rack_t *rack, void *ptr)
3739 {
3740         arena_t *arena;
3741         arena_chunk_t *chunk;
3742         arena_run_t *run;
3743         arena_bin_t *bin;
3744         bin_mags_t *bin_mags;
3745         mag_t *mag;
3746         size_t pageind, binind;
3747         arena_chunk_map_t *mapelm;
3748
3749         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3750         arena = chunk->arena;
3751         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3752         mapelm = &chunk->map[pageind];
3753         run = (arena_run_t *)(mapelm->bits & ~PAGE_MASK);
3754         assert(run->magic == ARENA_RUN_MAGIC);
3755         bin = run->bin;
3756         binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) /
3757             sizeof(arena_bin_t);
3758         assert(binind < nbins);
3759
3760         if (opt_junk)
3761                 memset(ptr, 0x5a, arena->bins[binind].reg_size);
3762
3763         bin_mags = &rack->bin_mags[binind];
3764         mag = bin_mags->curmag;
3765         if (mag == NULL) {
3766                 /* Create an initial magazine for this size class. */
3767                 assert(bin_mags->sparemag == NULL);
3768                 mag = mag_create(choose_arena(), binind);
3769                 if (mag == NULL) {
3770                         malloc_spin_lock(&arena->lock);
3771                         arena_dalloc_small(arena, chunk, ptr, mapelm);
3772                         malloc_spin_unlock(&arena->lock);
3773                         return;
3774                 }
3775                 bin_mags->curmag = mag;
3776         }
3777
3778         if (mag->nrounds == max_rounds) {
3779                 if (bin_mags->sparemag != NULL) {
3780                         if (bin_mags->sparemag->nrounds < max_rounds) {
3781                                 /* Swap magazines. */
3782                                 bin_mags->curmag = bin_mags->sparemag;
3783                                 bin_mags->sparemag = mag;
3784                                 mag = bin_mags->curmag;
3785                         } else {
3786                                 /* Unload the current magazine. */
3787                                 mag_unload(mag);
3788                         }
3789                 } else {
3790                         /* Create a second magazine. */
3791                         mag = mag_create(choose_arena(), binind);
3792                         if (mag == NULL) {
3793                                 mag = rack->bin_mags[binind].curmag;
3794                                 mag_unload(mag);
3795                         } else {
3796                                 bin_mags->sparemag = bin_mags->curmag;
3797                                 bin_mags->curmag = mag;
3798                         }
3799                 }
3800                 assert(mag->nrounds < max_rounds);
3801         }
3802         mag->rounds[mag->nrounds] = ptr;
3803         mag->nrounds++;
3804 }
3805 #endif
3806
3807 static void
3808 arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
3809 {
3810         /* Large allocation. */
3811         malloc_spin_lock(&arena->lock);
3812
3813 #ifndef MALLOC_STATS
3814         if (opt_junk)
3815 #endif
3816         {
3817                 size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
3818                     PAGE_SHIFT;
3819                 size_t size = chunk->map[pageind].bits & ~PAGE_MASK;
3820
3821 #ifdef MALLOC_STATS
3822                 if (opt_junk)
3823 #endif
3824                         memset(ptr, 0x5a, size);
3825 #ifdef MALLOC_STATS
3826                 arena->stats.allocated_large -= size;
3827 #endif
3828         }
3829 #ifdef MALLOC_STATS
3830         arena->stats.ndalloc_large++;
3831 #endif
3832
3833         arena_run_dalloc(arena, (arena_run_t *)ptr, true);
3834         malloc_spin_unlock(&arena->lock);
3835 }
3836
3837 static inline void
3838 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
3839 {
3840         size_t pageind;
3841         arena_chunk_map_t *mapelm;
3842
3843         assert(arena != NULL);
3844         assert(arena->magic == ARENA_MAGIC);
3845         assert(chunk->arena == arena);
3846         assert(ptr != NULL);
3847         assert(CHUNK_ADDR2BASE(ptr) != ptr);
3848
3849         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3850         mapelm = &chunk->map[pageind];
3851         assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0);
3852         if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
3853                 /* Small allocation. */
3854 #ifdef MALLOC_MAG
3855                 if (__isthreaded && opt_mag) {
3856                         mag_rack_t *rack = mag_rack;
3857                         if (rack == NULL) {
3858                                 rack = mag_rack_create(arena);
3859                                 if (rack == NULL) {
3860                                         malloc_spin_lock(&arena->lock);
3861                                         arena_dalloc_small(arena, chunk, ptr,
3862                                             mapelm);
3863                                         malloc_spin_unlock(&arena->lock);
3864                                         return;
3865                                 }
3866                                 mag_rack = rack;
3867                         }
3868                         mag_rack_dalloc(rack, ptr);
3869                 } else {
3870 #endif
3871                         malloc_spin_lock(&arena->lock);
3872                         arena_dalloc_small(arena, chunk, ptr, mapelm);
3873                         malloc_spin_unlock(&arena->lock);
3874 #ifdef MALLOC_MAG
3875                 }
3876 #endif
3877         } else
3878                 arena_dalloc_large(arena, chunk, ptr);
3879 }
3880
3881 static inline void
3882 idalloc(void *ptr)
3883 {
3884         arena_chunk_t *chunk;
3885
3886         assert(ptr != NULL);
3887
3888         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3889         if (chunk != ptr)
3890                 arena_dalloc(chunk->arena, chunk, ptr);
3891         else
3892                 huge_dalloc(ptr);
3893 }
3894
3895 static void
3896 arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3897     size_t size, size_t oldsize)
3898 {
3899
3900         assert(size < oldsize);
3901
3902         /*
3903          * Shrink the run, and make trailing pages available for other
3904          * allocations.
3905          */
3906 #ifdef MALLOC_BALANCE
3907         arena_lock_balance(arena);
3908 #else
3909         malloc_spin_lock(&arena->lock);
3910 #endif
3911         arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size,
3912             true);
3913 #ifdef MALLOC_STATS
3914         arena->stats.allocated_large -= oldsize - size;
3915 #endif
3916         malloc_spin_unlock(&arena->lock);
3917 }
3918
3919 static bool
3920 arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3921     size_t size, size_t oldsize)
3922 {
3923         size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
3924         size_t npages = oldsize >> PAGE_SHIFT;
3925
3926         assert(oldsize == (chunk->map[pageind].bits & ~PAGE_MASK));
3927
3928         /* Try to extend the run. */
3929         assert(size > oldsize);
3930 #ifdef MALLOC_BALANCE
3931         arena_lock_balance(arena);
3932 #else
3933         malloc_spin_lock(&arena->lock);
3934 #endif
3935         if (pageind + npages < chunk_npages && (chunk->map[pageind+npages].bits
3936             & CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[pageind+npages].bits &
3937             ~PAGE_MASK) >= size - oldsize) {
3938                 /*
3939                  * The next run is available and sufficiently large.  Split the
3940                  * following run, then merge the first part with the existing
3941                  * allocation.
3942                  */
3943                 arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk +
3944                     ((pageind+npages) << PAGE_SHIFT)), size - oldsize, true,
3945                     false);
3946
3947                 chunk->map[pageind].bits = size | CHUNK_MAP_LARGE |
3948                     CHUNK_MAP_ALLOCATED;
3949                 chunk->map[pageind+npages].bits = CHUNK_MAP_LARGE |
3950                     CHUNK_MAP_ALLOCATED;
3951
3952 #ifdef MALLOC_STATS
3953                 arena->stats.allocated_large += size - oldsize;
3954 #endif
3955                 malloc_spin_unlock(&arena->lock);
3956                 return (false);
3957         }
3958         malloc_spin_unlock(&arena->lock);
3959
3960         return (true);
3961 }
3962
3963 /*
3964  * Try to resize a large allocation, in order to avoid copying.  This will
3965  * always fail if growing an object, and the following run is already in use.
3966  */
3967 static bool
3968 arena_ralloc_large(void *ptr, size_t size, size_t oldsize)
3969 {
3970         size_t psize;
3971
3972         psize = PAGE_CEILING(size);
3973         if (psize == oldsize) {
3974                 /* Same size class. */
3975                 if (opt_junk && size < oldsize) {
3976                         memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize -
3977                             size);
3978                 }
3979                 return (false);
3980         } else {
3981                 arena_chunk_t *chunk;
3982                 arena_t *arena;
3983
3984                 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3985                 arena = chunk->arena;
3986                 assert(arena->magic == ARENA_MAGIC);
3987
3988                 if (psize < oldsize) {
3989                         /* Fill before shrinking in order avoid a race. */
3990                         if (opt_junk) {
3991                                 memset((void *)((uintptr_t)ptr + size), 0x5a,
3992                                     oldsize - size);
3993                         }
3994                         arena_ralloc_large_shrink(arena, chunk, ptr, psize,
3995                             oldsize);
3996                         return (false);
3997                 } else {
3998                         bool ret = arena_ralloc_large_grow(arena, chunk, ptr,
3999                             psize, oldsize);
4000                         if (ret == false && opt_zero) {
4001                                 memset((void *)((uintptr_t)ptr + oldsize), 0,
4002                                     size - oldsize);
4003                         }
4004                         return (ret);
4005                 }
4006         }
4007 }
4008
4009 static void *
4010 arena_ralloc(void *ptr, size_t size, size_t oldsize)
4011 {
4012         void *ret;
4013         size_t copysize;
4014
4015         /* Try to avoid moving the allocation. */
4016         if (size <= bin_maxclass) {
4017                 if (oldsize <= bin_maxclass && size2bin[size] ==
4018                     size2bin[oldsize])
4019                         goto IN_PLACE;
4020         } else {
4021                 if (oldsize > bin_maxclass && oldsize <= arena_maxclass) {
4022                         assert(size > bin_maxclass);
4023                         if (arena_ralloc_large(ptr, size, oldsize) == false)
4024                                 return (ptr);
4025                 }
4026         }
4027
4028         /*
4029          * If we get here, then size and oldsize are different enough that we
4030          * need to move the object.  In that case, fall back to allocating new
4031          * space and copying.
4032          */
4033         ret = arena_malloc(choose_arena(), size, false);
4034         if (ret == NULL)
4035                 return (NULL);
4036
4037         /* Junk/zero-filling were already done by arena_malloc(). */
4038         copysize = (size < oldsize) ? size : oldsize;
4039         memcpy(ret, ptr, copysize);
4040         idalloc(ptr);
4041         return (ret);
4042 IN_PLACE:
4043         if (opt_junk && size < oldsize)
4044                 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
4045         else if (opt_zero && size > oldsize)
4046                 memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
4047         return (ptr);
4048 }
4049
4050 static inline void *
4051 iralloc(void *ptr, size_t size)
4052 {
4053         size_t oldsize;
4054
4055         assert(ptr != NULL);
4056         assert(size != 0);
4057
4058         oldsize = isalloc(ptr);
4059
4060         if (size <= arena_maxclass)
4061                 return (arena_ralloc(ptr, size, oldsize));
4062         else
4063                 return (huge_ralloc(ptr, size, oldsize));
4064 }
4065
4066 static bool
4067 arena_new(arena_t *arena)
4068 {
4069         unsigned i;
4070         arena_bin_t *bin;
4071         size_t prev_run_size;
4072
4073         if (malloc_spin_init(&arena->lock))
4074                 return (true);
4075
4076 #ifdef MALLOC_STATS
4077         memset(&arena->stats, 0, sizeof(arena_stats_t));
4078 #endif
4079
4080         /* Initialize chunks. */
4081         arena_chunk_tree_dirty_new(&arena->chunks_dirty);
4082         arena->spare = NULL;
4083
4084         arena->ndirty = 0;
4085
4086         arena_avail_tree_new(&arena->runs_avail);
4087
4088 #ifdef MALLOC_BALANCE
4089         arena->contention = 0;
4090 #endif
4091
4092         /* Initialize bins. */
4093         prev_run_size = PAGE_SIZE;
4094
4095         i = 0;
4096 #ifdef MALLOC_TINY
4097         /* (2^n)-spaced tiny bins. */
4098         for (; i < ntbins; i++) {
4099                 bin = &arena->bins[i];
4100                 bin->runcur = NULL;
4101                 arena_run_tree_new(&bin->runs);
4102
4103                 bin->reg_size = (1U << (TINY_MIN_2POW + i));
4104
4105                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4106
4107 #ifdef MALLOC_STATS
4108                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4109 #endif
4110         }
4111 #endif
4112
4113         /* Quantum-spaced bins. */
4114         for (; i < ntbins + nqbins; i++) {
4115                 bin = &arena->bins[i];
4116                 bin->runcur = NULL;
4117                 arena_run_tree_new(&bin->runs);
4118
4119                 bin->reg_size = (i - ntbins + 1) << QUANTUM_2POW;
4120
4121                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4122
4123 #ifdef MALLOC_STATS
4124                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4125 #endif
4126         }
4127
4128         /* Cacheline-spaced bins. */
4129         for (; i < ntbins + nqbins + ncbins; i++) {
4130                 bin = &arena->bins[i];
4131                 bin->runcur = NULL;
4132                 arena_run_tree_new(&bin->runs);
4133
4134                 bin->reg_size = cspace_min + ((i - (ntbins + nqbins)) <<
4135                     CACHELINE_2POW);
4136
4137                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4138
4139 #ifdef MALLOC_STATS
4140                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4141 #endif
4142         }
4143
4144         /* Subpage-spaced bins. */
4145         for (; i < nbins; i++) {
4146                 bin = &arena->bins[i];
4147                 bin->runcur = NULL;
4148                 arena_run_tree_new(&bin->runs);
4149
4150                 bin->reg_size = sspace_min + ((i - (ntbins + nqbins + ncbins))
4151                     << SUBPAGE_2POW);
4152
4153                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4154
4155 #ifdef MALLOC_STATS
4156                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4157 #endif
4158         }
4159
4160 #ifdef MALLOC_DEBUG
4161         arena->magic = ARENA_MAGIC;
4162 #endif
4163
4164         return (false);
4165 }
4166
4167 /* Create a new arena and insert it into the arenas array at index ind. */
4168 static arena_t *
4169 arenas_extend(unsigned ind)
4170 {
4171         arena_t *ret;
4172
4173         /* Allocate enough space for trailing bins. */
4174         ret = (arena_t *)base_alloc(sizeof(arena_t)
4175             + (sizeof(arena_bin_t) * (nbins - 1)));
4176         if (ret != NULL && arena_new(ret) == false) {
4177                 arenas[ind] = ret;
4178                 return (ret);
4179         }
4180         /* Only reached if there is an OOM error. */
4181
4182         /*
4183          * OOM here is quite inconvenient to propagate, since dealing with it
4184          * would require a check for failure in the fast path.  Instead, punt
4185          * by using arenas[0].  In practice, this is an extremely unlikely
4186          * failure.
4187          */
4188         _malloc_message(_getprogname(),
4189             ": (malloc) Error initializing arena\n", "", "");
4190         if (opt_abort)
4191                 abort();
4192
4193         return (arenas[0]);
4194 }
4195
4196 #ifdef MALLOC_MAG
4197 static mag_t *
4198 mag_create(arena_t *arena, size_t binind)
4199 {
4200         mag_t *ret;
4201
4202         if (sizeof(mag_t) + (sizeof(void *) * (max_rounds - 1)) <=
4203             bin_maxclass) {
4204                 ret = arena_malloc_small(arena, sizeof(mag_t) + (sizeof(void *)
4205                     * (max_rounds - 1)), false);
4206         } else {
4207                 ret = imalloc(sizeof(mag_t) + (sizeof(void *) * (max_rounds -
4208                     1)));
4209         }
4210         if (ret == NULL)
4211                 return (NULL);
4212         ret->binind = binind;
4213         ret->nrounds = 0;
4214
4215         return (ret);
4216 }
4217
4218 static void
4219 mag_destroy(mag_t *mag)
4220 {
4221         arena_t *arena;
4222         arena_chunk_t *chunk;
4223         size_t pageind;
4224         arena_chunk_map_t *mapelm;
4225
4226         chunk = CHUNK_ADDR2BASE(mag);
4227         arena = chunk->arena;
4228         pageind = (((uintptr_t)mag - (uintptr_t)chunk) >> PAGE_SHIFT);
4229         mapelm = &chunk->map[pageind];
4230
4231         assert(mag->nrounds == 0);
4232         if (sizeof(mag_t) + (sizeof(void *) * (max_rounds - 1)) <=
4233             bin_maxclass) {
4234                 malloc_spin_lock(&arena->lock);
4235                 arena_dalloc_small(arena, chunk, mag, mapelm);
4236                 malloc_spin_unlock(&arena->lock);
4237         } else
4238                 idalloc(mag);
4239 }
4240
4241 static mag_rack_t *
4242 mag_rack_create(arena_t *arena)
4243 {
4244
4245         assert(sizeof(mag_rack_t) + (sizeof(bin_mags_t *) * (nbins - 1)) <=
4246             bin_maxclass);
4247         return (arena_malloc_small(arena, sizeof(mag_rack_t) +
4248             (sizeof(bin_mags_t) * (nbins - 1)), true));
4249 }
4250
4251 static void
4252 mag_rack_destroy(mag_rack_t *rack)
4253 {
4254         arena_t *arena;
4255         arena_chunk_t *chunk;
4256         bin_mags_t *bin_mags;
4257         size_t i, pageind;
4258         arena_chunk_map_t *mapelm;
4259
4260         for (i = 0; i < nbins; i++) {
4261                 bin_mags = &rack->bin_mags[i];
4262                 if (bin_mags->curmag != NULL) {
4263                         assert(bin_mags->curmag->binind == i);
4264                         mag_unload(bin_mags->curmag);
4265                         mag_destroy(bin_mags->curmag);
4266                 }
4267                 if (bin_mags->sparemag != NULL) {
4268                         assert(bin_mags->sparemag->binind == i);
4269                         mag_unload(bin_mags->sparemag);
4270                         mag_destroy(bin_mags->sparemag);
4271                 }
4272         }
4273
4274         chunk = CHUNK_ADDR2BASE(rack);
4275         arena = chunk->arena;
4276         pageind = (((uintptr_t)rack - (uintptr_t)chunk) >> PAGE_SHIFT);
4277         mapelm = &chunk->map[pageind];
4278
4279         malloc_spin_lock(&arena->lock);
4280         arena_dalloc_small(arena, chunk, rack, mapelm);
4281         malloc_spin_unlock(&arena->lock);
4282 }
4283 #endif
4284
4285 /*
4286  * End arena.
4287  */
4288 /******************************************************************************/
4289 /*
4290  * Begin general internal functions.
4291  */
4292
4293 static void *
4294 huge_malloc(size_t size, bool zero)
4295 {
4296         void *ret;
4297         size_t csize;
4298         extent_node_t *node;
4299
4300         /* Allocate one or more contiguous chunks for this request. */
4301
4302         csize = CHUNK_CEILING(size);
4303         if (csize == 0) {
4304                 /* size is large enough to cause size_t wrap-around. */
4305                 return (NULL);
4306         }
4307
4308         /* Allocate an extent node with which to track the chunk. */
4309         node = base_node_alloc();
4310         if (node == NULL)
4311                 return (NULL);
4312
4313         ret = chunk_alloc(csize, zero);
4314         if (ret == NULL) {
4315                 base_node_dealloc(node);
4316                 return (NULL);
4317         }
4318
4319         /* Insert node into huge. */
4320         node->addr = ret;
4321         node->size = csize;
4322
4323         malloc_mutex_lock(&huge_mtx);
4324         extent_tree_ad_insert(&huge, node);
4325 #ifdef MALLOC_STATS
4326         huge_nmalloc++;
4327         huge_allocated += csize;
4328 #endif
4329         malloc_mutex_unlock(&huge_mtx);
4330
4331         if (zero == false) {
4332                 if (opt_junk)
4333                         memset(ret, 0xa5, csize);
4334                 else if (opt_zero)
4335                         memset(ret, 0, csize);
4336         }
4337
4338         return (ret);
4339 }
4340
4341 /* Only handles large allocations that require more than chunk alignment. */
4342 static void *
4343 huge_palloc(size_t alignment, size_t size)
4344 {
4345         void *ret;
4346         size_t alloc_size, chunk_size, offset;
4347         extent_node_t *node;
4348
4349         /*
4350          * This allocation requires alignment that is even larger than chunk
4351          * alignment.  This means that huge_malloc() isn't good enough.
4352          *
4353          * Allocate almost twice as many chunks as are demanded by the size or
4354          * alignment, in order to assure the alignment can be achieved, then
4355          * unmap leading and trailing chunks.
4356          */
4357         assert(alignment >= chunksize);
4358
4359         chunk_size = CHUNK_CEILING(size);
4360
4361         if (size >= alignment)
4362                 alloc_size = chunk_size + alignment - chunksize;
4363         else
4364                 alloc_size = (alignment << 1) - chunksize;
4365
4366         /* Allocate an extent node with which to track the chunk. */
4367         node = base_node_alloc();
4368         if (node == NULL)
4369                 return (NULL);
4370
4371         ret = chunk_alloc(alloc_size, false);
4372         if (ret == NULL) {
4373                 base_node_dealloc(node);
4374                 return (NULL);
4375         }
4376
4377         offset = (uintptr_t)ret & (alignment - 1);
4378         assert((offset & chunksize_mask) == 0);
4379         assert(offset < alloc_size);
4380         if (offset == 0) {
4381                 /* Trim trailing space. */
4382                 chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
4383                     - chunk_size);
4384         } else {
4385                 size_t trailsize;
4386
4387                 /* Trim leading space. */
4388                 chunk_dealloc(ret, alignment - offset);
4389
4390                 ret = (void *)((uintptr_t)ret + (alignment - offset));
4391
4392                 trailsize = alloc_size - (alignment - offset) - chunk_size;
4393                 if (trailsize != 0) {
4394                     /* Trim trailing space. */
4395                     assert(trailsize < alloc_size);
4396                     chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
4397                         trailsize);
4398                 }
4399         }
4400
4401         /* Insert node into huge. */
4402         node->addr = ret;
4403         node->size = chunk_size;
4404
4405         malloc_mutex_lock(&huge_mtx);
4406         extent_tree_ad_insert(&huge, node);
4407 #ifdef MALLOC_STATS
4408         huge_nmalloc++;
4409         huge_allocated += chunk_size;
4410 #endif
4411         malloc_mutex_unlock(&huge_mtx);
4412
4413         if (opt_junk)
4414                 memset(ret, 0xa5, chunk_size);
4415         else if (opt_zero)
4416                 memset(ret, 0, chunk_size);
4417
4418         return (ret);
4419 }
4420
4421 static void *
4422 huge_ralloc(void *ptr, size_t size, size_t oldsize)
4423 {
4424         void *ret;
4425         size_t copysize;
4426
4427         /* Avoid moving the allocation if the size class would not change. */
4428         if (oldsize > arena_maxclass &&
4429             CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
4430                 if (opt_junk && size < oldsize) {
4431                         memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
4432                             - size);
4433                 } else if (opt_zero && size > oldsize) {
4434                         memset((void *)((uintptr_t)ptr + oldsize), 0, size
4435                             - oldsize);
4436                 }
4437                 return (ptr);
4438         }
4439
4440         /*
4441          * If we get here, then size and oldsize are different enough that we
4442          * need to use a different size class.  In that case, fall back to
4443          * allocating new space and copying.
4444          */
4445         ret = huge_malloc(size, false);
4446         if (ret == NULL)
4447                 return (NULL);
4448
4449         copysize = (size < oldsize) ? size : oldsize;
4450         memcpy(ret, ptr, copysize);
4451         idalloc(ptr);
4452         return (ret);
4453 }
4454
4455 static void
4456 huge_dalloc(void *ptr)
4457 {
4458         extent_node_t *node, key;
4459
4460         malloc_mutex_lock(&huge_mtx);
4461
4462         /* Extract from tree of huge allocations. */
4463         key.addr = ptr;
4464         node = extent_tree_ad_search(&huge, &key);
4465         assert(node != NULL);
4466         assert(node->addr == ptr);
4467         extent_tree_ad_remove(&huge, node);
4468
4469 #ifdef MALLOC_STATS
4470         huge_ndalloc++;
4471         huge_allocated -= node->size;
4472 #endif
4473
4474         malloc_mutex_unlock(&huge_mtx);
4475
4476         /* Unmap chunk. */
4477 #ifdef MALLOC_DSS
4478         if (opt_dss && opt_junk)
4479                 memset(node->addr, 0x5a, node->size);
4480 #endif
4481         chunk_dealloc(node->addr, node->size);
4482
4483         base_node_dealloc(node);
4484 }
4485
4486 static void
4487 malloc_print_stats(void)
4488 {
4489
4490         if (opt_print_stats) {
4491                 char s[UMAX2S_BUFSIZE];
4492                 _malloc_message("___ Begin malloc statistics ___\n", "", "",
4493                     "");
4494                 _malloc_message("Assertions ",
4495 #ifdef NDEBUG
4496                     "disabled",
4497 #else
4498                     "enabled",
4499 #endif
4500                     "\n", "");
4501                 _malloc_message("Boolean MALLOC_OPTIONS: ",
4502                     opt_abort ? "A" : "a", "", "");
4503 #ifdef MALLOC_DSS
4504                 _malloc_message(opt_dss ? "D" : "d", "", "", "");
4505 #endif
4506 #ifdef MALLOC_MAG
4507                 _malloc_message(opt_mag ? "G" : "g", "", "", "");
4508 #endif
4509                 _malloc_message(opt_junk ? "J" : "j", "", "", "");
4510 #ifdef MALLOC_DSS
4511                 _malloc_message(opt_mmap ? "M" : "m", "", "", "");
4512 #endif
4513                 _malloc_message(opt_utrace ? "PU" : "Pu",
4514                     opt_sysv ? "V" : "v",
4515                     opt_xmalloc ? "X" : "x",
4516                     opt_zero ? "Z\n" : "z\n");
4517
4518                 _malloc_message("CPUs: ", umax2s(ncpus, s), "\n", "");
4519                 _malloc_message("Max arenas: ", umax2s(narenas, s), "\n", "");
4520 #ifdef MALLOC_BALANCE
4521                 _malloc_message("Arena balance threshold: ",
4522                     umax2s(opt_balance_threshold, s), "\n", "");
4523 #endif
4524                 _malloc_message("Pointer size: ", umax2s(sizeof(void *), s),
4525                     "\n", "");
4526                 _malloc_message("Quantum size: ", umax2s(QUANTUM, s), "\n", "");
4527                 _malloc_message("Cacheline size (assumed): ", umax2s(CACHELINE,
4528                     s), "\n", "");
4529 #ifdef MALLOC_TINY
4530                 _malloc_message("Tiny 2^n-spaced sizes: [", umax2s((1U <<
4531                     TINY_MIN_2POW), s), "..", "");
4532                 _malloc_message(umax2s((qspace_min >> 1), s), "]\n", "", "");
4533 #endif
4534                 _malloc_message("Quantum-spaced sizes: [", umax2s(qspace_min,
4535                     s), "..", "");
4536                 _malloc_message(umax2s(qspace_max, s), "]\n", "", "");
4537                 _malloc_message("Cacheline-spaced sizes: [", umax2s(cspace_min,
4538                     s), "..", "");
4539                 _malloc_message(umax2s(cspace_max, s), "]\n", "", "");
4540                 _malloc_message("Subpage-spaced sizes: [", umax2s(sspace_min,
4541                     s), "..", "");
4542                 _malloc_message(umax2s(sspace_max, s), "]\n", "", "");
4543 #ifdef MALLOC_MAG
4544                 _malloc_message("Rounds per magazine: ", umax2s(max_rounds, s),
4545                     "\n", "");
4546 #endif
4547                 _malloc_message("Max dirty pages per arena: ",
4548                     umax2s(opt_dirty_max, s), "\n", "");
4549
4550                 _malloc_message("Chunk size: ", umax2s(chunksize, s), "", "");
4551                 _malloc_message(" (2^", umax2s(opt_chunk_2pow, s), ")\n", "");
4552
4553 #ifdef MALLOC_STATS
4554                 {
4555                         size_t allocated, mapped;
4556 #ifdef MALLOC_BALANCE
4557                         uint64_t nbalance = 0;
4558 #endif
4559                         unsigned i;
4560                         arena_t *arena;
4561
4562                         /* Calculate and print allocated/mapped stats. */
4563
4564                         /* arenas. */
4565                         for (i = 0, allocated = 0; i < narenas; i++) {
4566                                 if (arenas[i] != NULL) {
4567                                         malloc_spin_lock(&arenas[i]->lock);
4568                                         allocated +=
4569                                             arenas[i]->stats.allocated_small;
4570                                         allocated +=
4571                                             arenas[i]->stats.allocated_large;
4572 #ifdef MALLOC_BALANCE
4573                                         nbalance += arenas[i]->stats.nbalance;
4574 #endif
4575                                         malloc_spin_unlock(&arenas[i]->lock);
4576                                 }
4577                         }
4578
4579                         /* huge/base. */
4580                         malloc_mutex_lock(&huge_mtx);
4581                         allocated += huge_allocated;
4582                         mapped = stats_chunks.curchunks * chunksize;
4583                         malloc_mutex_unlock(&huge_mtx);
4584
4585                         malloc_mutex_lock(&base_mtx);
4586                         mapped += base_mapped;
4587                         malloc_mutex_unlock(&base_mtx);
4588
4589                         malloc_printf("Allocated: %zu, mapped: %zu\n",
4590                             allocated, mapped);
4591
4592 #ifdef MALLOC_BALANCE
4593                         malloc_printf("Arena balance reassignments: %llu\n",
4594                             nbalance);
4595 #endif
4596
4597                         /* Print chunk stats. */
4598                         {
4599                                 chunk_stats_t chunks_stats;
4600
4601                                 malloc_mutex_lock(&huge_mtx);
4602                                 chunks_stats = stats_chunks;
4603                                 malloc_mutex_unlock(&huge_mtx);
4604
4605                                 malloc_printf("chunks: nchunks   "
4606                                     "highchunks    curchunks\n");
4607                                 malloc_printf("  %13llu%13lu%13lu\n",
4608                                     chunks_stats.nchunks,
4609                                     chunks_stats.highchunks,
4610                                     chunks_stats.curchunks);
4611                         }
4612
4613                         /* Print chunk stats. */
4614                         malloc_printf(
4615                             "huge: nmalloc      ndalloc    allocated\n");
4616                         malloc_printf(" %12llu %12llu %12zu\n",
4617                             huge_nmalloc, huge_ndalloc, huge_allocated);
4618
4619                         /* Print stats for each arena. */
4620                         for (i = 0; i < narenas; i++) {
4621                                 arena = arenas[i];
4622                                 if (arena != NULL) {
4623                                         malloc_printf(
4624                                             "\narenas[%u]:\n", i);
4625                                         malloc_spin_lock(&arena->lock);
4626                                         stats_print(arena);
4627                                         malloc_spin_unlock(&arena->lock);
4628                                 }
4629                         }
4630                 }
4631 #endif /* #ifdef MALLOC_STATS */
4632                 _malloc_message("--- End malloc statistics ---\n", "", "", "");
4633         }
4634 }
4635
4636 #ifdef MALLOC_DEBUG
4637 static void
4638 size2bin_validate(void)
4639 {
4640         size_t i, size, binind;
4641
4642         assert(size2bin[0] == 0xffU);
4643         i = 1;
4644 #  ifdef MALLOC_TINY
4645         /* Tiny. */
4646         for (; i < (1U << TINY_MIN_2POW); i++) {
4647                 size = pow2_ceil(1U << TINY_MIN_2POW);
4648                 binind = ffs((int)(size >> (TINY_MIN_2POW + 1)));
4649                 assert(size2bin[i] == binind);
4650         }
4651         for (; i < qspace_min; i++) {
4652                 size = pow2_ceil(i);
4653                 binind = ffs((int)(size >> (TINY_MIN_2POW + 1)));
4654                 assert(size2bin[i] == binind);
4655         }
4656 #  endif
4657         /* Quantum-spaced. */
4658         for (; i <= qspace_max; i++) {
4659                 size = QUANTUM_CEILING(i);
4660                 binind = ntbins + (size >> QUANTUM_2POW) - 1;
4661                 assert(size2bin[i] == binind);
4662         }
4663         /* Cacheline-spaced. */
4664         for (; i <= cspace_max; i++) {
4665                 size = CACHELINE_CEILING(i);
4666                 binind = ntbins + nqbins + ((size - cspace_min) >>
4667                     CACHELINE_2POW);
4668                 assert(size2bin[i] == binind);
4669         }
4670         /* Sub-page. */
4671         for (; i <= sspace_max; i++) {
4672                 size = SUBPAGE_CEILING(i);
4673                 binind = ntbins + nqbins + ncbins + ((size - sspace_min)
4674                     >> SUBPAGE_2POW);
4675                 assert(size2bin[i] == binind);
4676         }
4677 }
4678 #endif
4679
4680 static bool
4681 size2bin_init(void)
4682 {
4683
4684         if (opt_qspace_max_2pow != QSPACE_MAX_2POW_DEFAULT
4685             || opt_cspace_max_2pow != CSPACE_MAX_2POW_DEFAULT)
4686                 return (size2bin_init_hard());
4687
4688         size2bin = const_size2bin;
4689 #ifdef MALLOC_DEBUG
4690         assert(sizeof(const_size2bin) == bin_maxclass + 1);
4691         size2bin_validate();
4692 #endif
4693         return (false);
4694 }
4695
4696 static bool
4697 size2bin_init_hard(void)
4698 {
4699         size_t i, size, binind;
4700         uint8_t *custom_size2bin;
4701
4702         assert(opt_qspace_max_2pow != QSPACE_MAX_2POW_DEFAULT
4703             || opt_cspace_max_2pow != CSPACE_MAX_2POW_DEFAULT);
4704
4705         custom_size2bin = (uint8_t *)base_alloc(bin_maxclass + 1);
4706         if (custom_size2bin == NULL)
4707                 return (true);
4708
4709         custom_size2bin[0] = 0xffU;
4710         i = 1;
4711 #ifdef MALLOC_TINY
4712         /* Tiny. */
4713         for (; i < (1U << TINY_MIN_2POW); i++) {
4714                 size = pow2_ceil(1U << TINY_MIN_2POW);
4715                 binind = ffs((int)(size >> (TINY_MIN_2POW + 1)));
4716                 custom_size2bin[i] = binind;
4717         }
4718         for (; i < qspace_min; i++) {
4719                 size = pow2_ceil(i);
4720                 binind = ffs((int)(size >> (TINY_MIN_2POW + 1)));
4721                 custom_size2bin[i] = binind;
4722         }
4723 #endif
4724         /* Quantum-spaced. */
4725         for (; i <= qspace_max; i++) {
4726                 size = QUANTUM_CEILING(i);
4727                 binind = ntbins + (size >> QUANTUM_2POW) - 1;
4728                 custom_size2bin[i] = binind;
4729         }
4730         /* Cacheline-spaced. */
4731         for (; i <= cspace_max; i++) {
4732                 size = CACHELINE_CEILING(i);
4733                 binind = ntbins + nqbins + ((size - cspace_min) >>
4734                     CACHELINE_2POW);
4735                 custom_size2bin[i] = binind;
4736         }
4737         /* Sub-page. */
4738         for (; i <= sspace_max; i++) {
4739                 size = SUBPAGE_CEILING(i);
4740                 binind = ntbins + nqbins + ncbins + ((size - sspace_min) >>
4741                     SUBPAGE_2POW);
4742                 custom_size2bin[i] = binind;
4743         }
4744
4745         size2bin = custom_size2bin;
4746 #ifdef MALLOC_DEBUG
4747         size2bin_validate();
4748 #endif
4749         return (false);
4750 }
4751
4752 /*
4753  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
4754  * implementation has to take pains to avoid infinite recursion during
4755  * initialization.
4756  */
4757 static inline bool
4758 malloc_init(void)
4759 {
4760
4761         if (malloc_initialized == false)
4762                 return (malloc_init_hard());
4763
4764         return (false);
4765 }
4766
4767 static bool
4768 malloc_init_hard(void)
4769 {
4770         unsigned i;
4771         int linklen;
4772         char buf[PATH_MAX + 1];
4773         const char *opts;
4774
4775         malloc_mutex_lock(&init_lock);
4776         if (malloc_initialized) {
4777                 /*
4778                  * Another thread initialized the allocator before this one
4779                  * acquired init_lock.
4780                  */
4781                 malloc_mutex_unlock(&init_lock);
4782                 return (false);
4783         }
4784
4785         /* Get number of CPUs. */
4786         {
4787                 int mib[2];
4788                 size_t len;
4789
4790                 mib[0] = CTL_HW;
4791                 mib[1] = HW_NCPU;
4792                 len = sizeof(ncpus);
4793                 if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
4794                         /* Error. */
4795                         ncpus = 1;
4796                 }
4797         }
4798
4799         /*
4800          * Increase the chunk size to the largest page size that is greater
4801          * than the default chunk size and less than or equal to 4MB.
4802          */
4803         {
4804                 size_t pagesizes[MAXPAGESIZES];
4805                 int k, nsizes;
4806
4807                 nsizes = getpagesizes(pagesizes, MAXPAGESIZES);
4808                 for (k = 0; k < nsizes; k++)
4809                         if (pagesizes[k] <= (1LU << 22))
4810                                 while ((1LU << opt_chunk_2pow) < pagesizes[k])
4811                                         opt_chunk_2pow++;
4812         }
4813
4814         for (i = 0; i < 3; i++) {
4815                 unsigned j;
4816
4817                 /* Get runtime configuration. */
4818                 switch (i) {
4819                 case 0:
4820                         if ((linklen = readlink("/etc/malloc.conf", buf,
4821                                                 sizeof(buf) - 1)) != -1) {
4822                                 /*
4823                                  * Use the contents of the "/etc/malloc.conf"
4824                                  * symbolic link's name.
4825                                  */
4826                                 buf[linklen] = '\0';
4827                                 opts = buf;
4828                         } else {
4829                                 /* No configuration specified. */
4830                                 buf[0] = '\0';
4831                                 opts = buf;
4832                         }
4833                         break;
4834                 case 1:
4835                         if (issetugid() == 0 && (opts =
4836                             getenv("MALLOC_OPTIONS")) != NULL) {
4837                                 /*
4838                                  * Do nothing; opts is already initialized to
4839                                  * the value of the MALLOC_OPTIONS environment
4840                                  * variable.
4841                                  */
4842                         } else {
4843                                 /* No configuration specified. */
4844                                 buf[0] = '\0';
4845                                 opts = buf;
4846                         }
4847                         break;
4848                 case 2:
4849                         if (_malloc_options != NULL) {
4850                                 /*
4851                                  * Use options that were compiled into the
4852                                  * program.
4853                                  */
4854                                 opts = _malloc_options;
4855                         } else {
4856                                 /* No configuration specified. */
4857                                 buf[0] = '\0';
4858                                 opts = buf;
4859                         }
4860                         break;
4861                 default:
4862                         /* NOTREACHED */
4863                         assert(false);
4864                 }
4865
4866                 for (j = 0; opts[j] != '\0'; j++) {
4867                         unsigned k, nreps;
4868                         bool nseen;
4869
4870                         /* Parse repetition count, if any. */
4871                         for (nreps = 0, nseen = false;; j++, nseen = true) {
4872                                 switch (opts[j]) {
4873                                         case '0': case '1': case '2': case '3':
4874                                         case '4': case '5': case '6': case '7':
4875                                         case '8': case '9':
4876                                                 nreps *= 10;
4877                                                 nreps += opts[j] - '0';
4878                                                 break;
4879                                         default:
4880                                                 goto MALLOC_OUT;
4881                                 }
4882                         }
4883 MALLOC_OUT:
4884                         if (nseen == false)
4885                                 nreps = 1;
4886
4887                         for (k = 0; k < nreps; k++) {
4888                                 switch (opts[j]) {
4889                                 case 'a':
4890                                         opt_abort = false;
4891                                         break;
4892                                 case 'A':
4893                                         opt_abort = true;
4894                                         break;
4895                                 case 'b':
4896 #ifdef MALLOC_BALANCE
4897                                         opt_balance_threshold >>= 1;
4898 #endif
4899                                         break;
4900                                 case 'B':
4901 #ifdef MALLOC_BALANCE
4902                                         if (opt_balance_threshold == 0)
4903                                                 opt_balance_threshold = 1;
4904                                         else if ((opt_balance_threshold << 1)
4905                                             > opt_balance_threshold)
4906                                                 opt_balance_threshold <<= 1;
4907 #endif
4908                                         break;
4909                                 case 'c':
4910                                         if (opt_cspace_max_2pow - 1 >
4911                                             opt_qspace_max_2pow &&
4912                                             opt_cspace_max_2pow >
4913                                             CACHELINE_2POW)
4914                                                 opt_cspace_max_2pow--;
4915                                         break;
4916                                 case 'C':
4917                                         if (opt_cspace_max_2pow < PAGE_SHIFT
4918                                             - 1)
4919                                                 opt_cspace_max_2pow++;
4920                                         break;
4921                                 case 'd':
4922 #ifdef MALLOC_DSS
4923                                         opt_dss = false;
4924 #endif
4925                                         break;
4926                                 case 'D':
4927 #ifdef MALLOC_DSS
4928                                         opt_dss = true;
4929 #endif
4930                                         break;
4931                                 case 'f':
4932                                         opt_dirty_max >>= 1;
4933                                         break;
4934                                 case 'F':
4935                                         if (opt_dirty_max == 0)
4936                                                 opt_dirty_max = 1;
4937                                         else if ((opt_dirty_max << 1) != 0)
4938                                                 opt_dirty_max <<= 1;
4939                                         break;
4940 #ifdef MALLOC_MAG
4941                                 case 'g':
4942                                         opt_mag = false;
4943                                         break;
4944                                 case 'G':
4945                                         opt_mag = true;
4946                                         break;
4947 #endif
4948                                 case 'j':
4949                                         opt_junk = false;
4950                                         break;
4951                                 case 'J':
4952                                         opt_junk = true;
4953                                         break;
4954                                 case 'k':
4955                                         /*
4956                                          * Chunks always require at least one
4957                                          * header page, so chunks can never be
4958                                          * smaller than two pages.
4959                                          */
4960                                         if (opt_chunk_2pow > PAGE_SHIFT + 1)
4961                                                 opt_chunk_2pow--;
4962                                         break;
4963                                 case 'K':
4964                                         if (opt_chunk_2pow + 1 <
4965                                             (sizeof(size_t) << 3))
4966                                                 opt_chunk_2pow++;
4967                                         break;
4968                                 case 'm':
4969 #ifdef MALLOC_DSS
4970                                         opt_mmap = false;
4971 #endif
4972                                         break;
4973                                 case 'M':
4974 #ifdef MALLOC_DSS
4975                                         opt_mmap = true;
4976 #endif
4977                                         break;
4978                                 case 'n':
4979                                         opt_narenas_lshift--;
4980                                         break;
4981                                 case 'N':
4982                                         opt_narenas_lshift++;
4983                                         break;
4984                                 case 'p':
4985                                         opt_print_stats = false;
4986                                         break;
4987                                 case 'P':
4988                                         opt_print_stats = true;
4989                                         break;
4990                                 case 'q':
4991                                         if (opt_qspace_max_2pow > QUANTUM_2POW)
4992                                                 opt_qspace_max_2pow--;
4993                                         break;
4994                                 case 'Q':
4995                                         if (opt_qspace_max_2pow + 1 <
4996                                             opt_cspace_max_2pow)
4997                                                 opt_qspace_max_2pow++;
4998                                         break;
4999 #ifdef MALLOC_MAG
5000                                 case 'R':
5001                                         if (opt_mag_size_2pow + 1 < (8U <<
5002                                             SIZEOF_PTR_2POW))
5003                                                 opt_mag_size_2pow++;
5004                                         break;
5005                                 case 'r':
5006                                         /*
5007                                          * Make sure there's always at least
5008                                          * one round per magazine.
5009                                          */
5010                                         if ((1U << (opt_mag_size_2pow-1)) >=
5011                                             sizeof(mag_t))
5012                                                 opt_mag_size_2pow--;
5013                                         break;
5014 #endif
5015                                 case 'u':
5016                                         opt_utrace = false;
5017                                         break;
5018                                 case 'U':
5019                                         opt_utrace = true;
5020                                         break;
5021                                 case 'v':
5022                                         opt_sysv = false;
5023                                         break;
5024                                 case 'V':
5025                                         opt_sysv = true;
5026                                         break;
5027                                 case 'x':
5028                                         opt_xmalloc = false;
5029                                         break;
5030                                 case 'X':
5031                                         opt_xmalloc = true;
5032                                         break;
5033                                 case 'z':
5034                                         opt_zero = false;
5035                                         break;
5036                                 case 'Z':
5037                                         opt_zero = true;
5038                                         break;
5039                                 default: {
5040                                         char cbuf[2];
5041
5042                                         cbuf[0] = opts[j];
5043                                         cbuf[1] = '\0';
5044                                         _malloc_message(_getprogname(),
5045                                             ": (malloc) Unsupported character "
5046                                             "in malloc options: '", cbuf,
5047                                             "'\n");
5048                                 }
5049                                 }
5050                         }
5051                 }
5052         }
5053
5054 #ifdef MALLOC_DSS
5055         /* Make sure that there is some method for acquiring memory. */
5056         if (opt_dss == false && opt_mmap == false)
5057                 opt_mmap = true;
5058 #endif
5059
5060         /* Take care to call atexit() only once. */
5061         if (opt_print_stats) {
5062                 /* Print statistics at exit. */
5063                 atexit(malloc_print_stats);
5064         }
5065
5066 #ifdef MALLOC_MAG
5067         /*
5068          * Calculate the actual number of rounds per magazine, taking into
5069          * account header overhead.
5070          */
5071         max_rounds = (1LLU << (opt_mag_size_2pow - SIZEOF_PTR_2POW)) -
5072             (sizeof(mag_t) >> SIZEOF_PTR_2POW) + 1;
5073 #endif
5074
5075         /* Set variables according to the value of opt_[qc]space_max_2pow. */
5076         qspace_max = (1U << opt_qspace_max_2pow);
5077         cspace_min = CACHELINE_CEILING(qspace_max);
5078         if (cspace_min == qspace_max)
5079                 cspace_min += CACHELINE;
5080         cspace_max = (1U << opt_cspace_max_2pow);
5081         sspace_min = SUBPAGE_CEILING(cspace_max);
5082         if (sspace_min == cspace_max)
5083                 sspace_min += SUBPAGE;
5084         assert(sspace_min < PAGE_SIZE);
5085         sspace_max = PAGE_SIZE - SUBPAGE;
5086
5087 #ifdef MALLOC_TINY
5088         assert(QUANTUM_2POW >= TINY_MIN_2POW);
5089 #endif
5090         assert(ntbins <= QUANTUM_2POW);
5091         nqbins = qspace_max >> QUANTUM_2POW;
5092         ncbins = ((cspace_max - cspace_min) >> CACHELINE_2POW) + 1;
5093         nsbins = ((sspace_max - sspace_min) >> SUBPAGE_2POW) + 1;
5094         nbins = ntbins + nqbins + ncbins + nsbins;
5095
5096         if (size2bin_init()) {
5097                 malloc_mutex_unlock(&init_lock);
5098                 return (true);
5099         }
5100
5101         /* Set variables according to the value of opt_chunk_2pow. */
5102         chunksize = (1LU << opt_chunk_2pow);
5103         chunksize_mask = chunksize - 1;
5104         chunk_npages = (chunksize >> PAGE_SHIFT);
5105         {
5106                 size_t header_size;
5107
5108                 /*
5109                  * Compute the header size such that it is large enough to
5110                  * contain the page map.
5111                  */
5112                 header_size = sizeof(arena_chunk_t) +
5113                     (sizeof(arena_chunk_map_t) * (chunk_npages - 1));
5114                 arena_chunk_header_npages = (header_size >> PAGE_SHIFT) +
5115                     ((header_size & PAGE_MASK) != 0);
5116         }
5117         arena_maxclass = chunksize - (arena_chunk_header_npages <<
5118             PAGE_SHIFT);
5119
5120         UTRACE(0, 0, 0);
5121
5122 #ifdef MALLOC_STATS
5123         memset(&stats_chunks, 0, sizeof(chunk_stats_t));
5124 #endif
5125
5126         /* Various sanity checks that regard configuration. */
5127         assert(chunksize >= PAGE_SIZE);
5128
5129         /* Initialize chunks data. */
5130         malloc_mutex_init(&huge_mtx);
5131         extent_tree_ad_new(&huge);
5132 #ifdef MALLOC_DSS
5133         malloc_mutex_init(&dss_mtx);
5134         dss_base = sbrk(0);
5135         dss_prev = dss_base;
5136         dss_max = dss_base;
5137         extent_tree_szad_new(&dss_chunks_szad);
5138         extent_tree_ad_new(&dss_chunks_ad);
5139 #endif
5140 #ifdef MALLOC_STATS
5141         huge_nmalloc = 0;
5142         huge_ndalloc = 0;
5143         huge_allocated = 0;
5144 #endif
5145
5146         /* Initialize base allocation data structures. */
5147 #ifdef MALLOC_STATS
5148         base_mapped = 0;
5149 #endif
5150 #ifdef MALLOC_DSS
5151         /*
5152          * Allocate a base chunk here, since it doesn't actually have to be
5153          * chunk-aligned.  Doing this before allocating any other chunks allows
5154          * the use of space that would otherwise be wasted.
5155          */
5156         if (opt_dss)
5157                 base_pages_alloc(0);
5158 #endif
5159         base_nodes = NULL;
5160         malloc_mutex_init(&base_mtx);
5161
5162         if (ncpus > 1) {
5163                 /*
5164                  * For SMP systems, create twice as many arenas as there are
5165                  * CPUs by default.
5166                  */
5167                 opt_narenas_lshift++;
5168         }
5169
5170         /* Determine how many arenas to use. */
5171         narenas = ncpus;
5172         if (opt_narenas_lshift > 0) {
5173                 if ((narenas << opt_narenas_lshift) > narenas)
5174                         narenas <<= opt_narenas_lshift;
5175                 /*
5176                  * Make sure not to exceed the limits of what base_alloc() can
5177                  * handle.
5178                  */
5179                 if (narenas * sizeof(arena_t *) > chunksize)
5180                         narenas = chunksize / sizeof(arena_t *);
5181         } else if (opt_narenas_lshift < 0) {
5182                 if ((narenas >> -opt_narenas_lshift) < narenas)
5183                         narenas >>= -opt_narenas_lshift;
5184                 /* Make sure there is at least one arena. */
5185                 if (narenas == 0)
5186                         narenas = 1;
5187         }
5188 #ifdef MALLOC_BALANCE
5189         assert(narenas != 0);
5190         for (narenas_2pow = 0;
5191              (narenas >> (narenas_2pow + 1)) != 0;
5192              narenas_2pow++);
5193 #endif
5194
5195 #ifdef NO_TLS
5196         if (narenas > 1) {
5197                 static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
5198                     23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
5199                     89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
5200                     151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
5201                     223, 227, 229, 233, 239, 241, 251, 257, 263};
5202                 unsigned nprimes, parenas;
5203
5204                 /*
5205                  * Pick a prime number of hash arenas that is more than narenas
5206                  * so that direct hashing of pthread_self() pointers tends to
5207                  * spread allocations evenly among the arenas.
5208                  */
5209                 assert((narenas & 1) == 0); /* narenas must be even. */
5210                 nprimes = (sizeof(primes) >> SIZEOF_INT_2POW);
5211                 parenas = primes[nprimes - 1]; /* In case not enough primes. */
5212                 for (i = 1; i < nprimes; i++) {
5213                         if (primes[i] > narenas) {
5214                                 parenas = primes[i];
5215                                 break;
5216                         }
5217                 }
5218                 narenas = parenas;
5219         }
5220 #endif
5221
5222 #ifndef NO_TLS
5223 #  ifndef MALLOC_BALANCE
5224         next_arena = 0;
5225 #  endif
5226 #endif
5227
5228         /* Allocate and initialize arenas. */
5229         arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
5230         if (arenas == NULL) {
5231                 malloc_mutex_unlock(&init_lock);
5232                 return (true);
5233         }
5234         /*
5235          * Zero the array.  In practice, this should always be pre-zeroed,
5236          * since it was just mmap()ed, but let's be sure.
5237          */
5238         memset(arenas, 0, sizeof(arena_t *) * narenas);
5239
5240         /*
5241          * Initialize one arena here.  The rest are lazily created in
5242          * choose_arena_hard().
5243          */
5244         arenas_extend(0);
5245         if (arenas[0] == NULL) {
5246                 malloc_mutex_unlock(&init_lock);
5247                 return (true);
5248         }
5249 #ifndef NO_TLS
5250         /*
5251          * Assign the initial arena to the initial thread, in order to avoid
5252          * spurious creation of an extra arena if the application switches to
5253          * threaded mode.
5254          */
5255         arenas_map = arenas[0];
5256 #endif
5257         /*
5258          * Seed here for the initial thread, since choose_arena_hard() is only
5259          * called for other threads.  The seed value doesn't really matter.
5260          */
5261 #ifdef MALLOC_BALANCE
5262         SPRN(balance, 42);
5263 #endif
5264
5265         malloc_spin_init(&arenas_lock);
5266
5267         malloc_initialized = true;
5268         malloc_mutex_unlock(&init_lock);
5269         return (false);
5270 }
5271
5272 /*
5273  * End general internal functions.
5274  */
5275 /******************************************************************************/
5276 /*
5277  * Begin malloc(3)-compatible functions.
5278  */
5279
5280 void *
5281 malloc(size_t size)
5282 {
5283         void *ret;
5284
5285         if (malloc_init()) {
5286                 ret = NULL;
5287                 goto RETURN;
5288         }
5289
5290         if (size == 0) {
5291                 if (opt_sysv == false)
5292                         size = 1;
5293                 else {
5294                         ret = NULL;
5295                         goto RETURN;
5296                 }
5297         }
5298
5299         ret = imalloc(size);
5300
5301 RETURN:
5302         if (ret == NULL) {
5303                 if (opt_xmalloc) {
5304                         _malloc_message(_getprogname(),
5305                             ": (malloc) Error in malloc(): out of memory\n", "",
5306                             "");
5307                         abort();
5308                 }
5309                 errno = ENOMEM;
5310         }
5311
5312         UTRACE(0, size, ret);
5313         return (ret);
5314 }
5315
5316 int
5317 posix_memalign(void **memptr, size_t alignment, size_t size)
5318 {
5319         int ret;
5320         void *result;
5321
5322         if (malloc_init())
5323                 result = NULL;
5324         else {
5325                 /* Make sure that alignment is a large enough power of 2. */
5326                 if (((alignment - 1) & alignment) != 0
5327                     || alignment < sizeof(void *)) {
5328                         if (opt_xmalloc) {
5329                                 _malloc_message(_getprogname(),
5330                                     ": (malloc) Error in posix_memalign(): "
5331                                     "invalid alignment\n", "", "");
5332                                 abort();
5333                         }
5334                         result = NULL;
5335                         ret = EINVAL;
5336                         goto RETURN;
5337                 }
5338
5339                 if (size == 0) {
5340                         if (opt_sysv == false)
5341                                 size = 1;
5342                         else {
5343                                 result = NULL;
5344                                 ret = 0;
5345                                 goto RETURN;
5346                         }
5347                 }
5348                 result = ipalloc(alignment, size);
5349         }
5350
5351         if (result == NULL) {
5352                 if (opt_xmalloc) {
5353                         _malloc_message(_getprogname(),
5354                         ": (malloc) Error in posix_memalign(): out of memory\n",
5355                         "", "");
5356                         abort();
5357                 }
5358                 ret = ENOMEM;
5359                 goto RETURN;
5360         }
5361
5362         *memptr = result;
5363         ret = 0;
5364
5365 RETURN:
5366         UTRACE(0, size, result);
5367         return (ret);
5368 }
5369
5370 void *
5371 calloc(size_t num, size_t size)
5372 {
5373         void *ret;
5374         size_t num_size;
5375
5376         if (malloc_init()) {
5377                 num_size = 0;
5378                 ret = NULL;
5379                 goto RETURN;
5380         }
5381
5382         num_size = num * size;
5383         if (num_size == 0) {
5384                 if ((opt_sysv == false) && ((num == 0) || (size == 0)))
5385                         num_size = 1;
5386                 else {
5387                         ret = NULL;
5388                         goto RETURN;
5389                 }
5390         /*
5391          * Try to avoid division here.  We know that it isn't possible to
5392          * overflow during multiplication if neither operand uses any of the
5393          * most significant half of the bits in a size_t.
5394          */
5395         } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2)))
5396             && (num_size / size != num)) {
5397                 /* size_t overflow. */
5398                 ret = NULL;
5399                 goto RETURN;
5400         }
5401
5402         ret = icalloc(num_size);
5403
5404 RETURN:
5405         if (ret == NULL) {
5406                 if (opt_xmalloc) {
5407                         _malloc_message(_getprogname(),
5408                             ": (malloc) Error in calloc(): out of memory\n", "",
5409                             "");
5410                         abort();
5411                 }
5412                 errno = ENOMEM;
5413         }
5414
5415         UTRACE(0, num_size, ret);
5416         return (ret);
5417 }
5418
5419 void *
5420 realloc(void *ptr, size_t size)
5421 {
5422         void *ret;
5423
5424         if (size == 0) {
5425                 if (opt_sysv == false)
5426                         size = 1;
5427                 else {
5428                         if (ptr != NULL)
5429                                 idalloc(ptr);
5430                         ret = NULL;
5431                         goto RETURN;
5432                 }
5433         }
5434
5435         if (ptr != NULL) {
5436                 assert(malloc_initialized);
5437
5438                 ret = iralloc(ptr, size);
5439
5440                 if (ret == NULL) {
5441                         if (opt_xmalloc) {
5442                                 _malloc_message(_getprogname(),
5443                                     ": (malloc) Error in realloc(): out of "
5444                                     "memory\n", "", "");
5445                                 abort();
5446                         }
5447                         errno = ENOMEM;
5448                 }
5449         } else {
5450                 if (malloc_init())
5451                         ret = NULL;
5452                 else
5453                         ret = imalloc(size);
5454
5455                 if (ret == NULL) {
5456                         if (opt_xmalloc) {
5457                                 _malloc_message(_getprogname(),
5458                                     ": (malloc) Error in realloc(): out of "
5459                                     "memory\n", "", "");
5460                                 abort();
5461                         }
5462                         errno = ENOMEM;
5463                 }
5464         }
5465
5466 RETURN:
5467         UTRACE(ptr, size, ret);
5468         return (ret);
5469 }
5470
5471 void
5472 free(void *ptr)
5473 {
5474
5475         UTRACE(ptr, 0, 0);
5476         if (ptr != NULL) {
5477                 assert(malloc_initialized);
5478
5479                 idalloc(ptr);
5480         }
5481 }
5482
5483 /*
5484  * End malloc(3)-compatible functions.
5485  */
5486 /******************************************************************************/
5487 /*
5488  * Begin non-standard functions.
5489  */
5490
5491 size_t
5492 malloc_usable_size(const void *ptr)
5493 {
5494
5495         assert(ptr != NULL);
5496
5497         return (isalloc(ptr));
5498 }
5499
5500 /*
5501  * End non-standard functions.
5502  */
5503 /******************************************************************************/
5504 /*
5505  * Begin library-private functions.
5506  */
5507
5508 /******************************************************************************/
5509 /*
5510  * Begin thread cache.
5511  */
5512
5513 /*
5514  * We provide an unpublished interface in order to receive notifications from
5515  * the pthreads library whenever a thread exits.  This allows us to clean up
5516  * thread caches.
5517  */
5518 void
5519 _malloc_thread_cleanup(void)
5520 {
5521
5522 #ifdef MALLOC_MAG
5523         if (mag_rack != NULL) {
5524                 assert(mag_rack != (void *)-1);
5525                 mag_rack_destroy(mag_rack);
5526 #ifdef MALLOC_DEBUG
5527                 mag_rack = (void *)-1;
5528 #endif
5529         }
5530 #endif
5531 }
5532
5533 /*
5534  * The following functions are used by threading libraries for protection of
5535  * malloc during fork().  These functions are only called if the program is
5536  * running in threaded mode, so there is no need to check whether the program
5537  * is threaded here.
5538  */
5539
5540 void
5541 _malloc_prefork(void)
5542 {
5543         bool again;
5544         unsigned i, j;
5545         arena_t *larenas[narenas], *tarenas[narenas];
5546
5547         /* Acquire all mutexes in a safe order. */
5548
5549         /*
5550          * arenas_lock must be acquired after all of the arena mutexes, in
5551          * order to avoid potential deadlock with arena_lock_balance[_hard]().
5552          * Since arenas_lock protects the arenas array, the following code has
5553          * to race with arenas_extend() callers until it succeeds in locking
5554          * all arenas before locking arenas_lock.
5555          */
5556         memset(larenas, 0, sizeof(arena_t *) * narenas);
5557         do {
5558                 again = false;
5559
5560                 malloc_spin_lock(&arenas_lock);
5561                 for (i = 0; i < narenas; i++) {
5562                         if (arenas[i] != larenas[i]) {
5563                                 memcpy(tarenas, arenas, sizeof(arena_t *) *
5564                                     narenas);
5565                                 malloc_spin_unlock(&arenas_lock);
5566                                 for (j = 0; j < narenas; j++) {
5567                                         if (larenas[j] != tarenas[j]) {
5568                                                 larenas[j] = tarenas[j];
5569                                                 malloc_spin_lock(
5570                                                     &larenas[j]->lock);
5571                                         }
5572                                 }
5573                                 again = true;
5574                                 break;
5575                         }
5576                 }
5577         } while (again);
5578
5579         malloc_mutex_lock(&base_mtx);
5580
5581         malloc_mutex_lock(&huge_mtx);
5582
5583 #ifdef MALLOC_DSS
5584         malloc_mutex_lock(&dss_mtx);
5585 #endif
5586 }
5587
5588 void
5589 _malloc_postfork(void)
5590 {
5591         unsigned i;
5592         arena_t *larenas[narenas];
5593
5594         /* Release all mutexes, now that fork() has completed. */
5595
5596 #ifdef MALLOC_DSS
5597         malloc_mutex_unlock(&dss_mtx);
5598 #endif
5599
5600         malloc_mutex_unlock(&huge_mtx);
5601
5602         malloc_mutex_unlock(&base_mtx);
5603
5604         memcpy(larenas, arenas, sizeof(arena_t *) * narenas);
5605         malloc_spin_unlock(&arenas_lock);
5606         for (i = 0; i < narenas; i++) {
5607                 if (larenas[i] != NULL)
5608                         malloc_spin_unlock(&larenas[i]->lock);
5609         }
5610 }
5611
5612 /*
5613  * End library-private functions.
5614  */
5615 /******************************************************************************/