lib/libc/stdlib/malloc.c

   1 /*-
   2  * Copyright (C) 2006-2008 Jason Evans <jasone@FreeBSD.org>.
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice(s), this list of conditions and the following disclaimer as
  10  *    the first lines of this file unmodified other than the possible
  11  *    addition of one or more copyright notices.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice(s), this list of conditions and the following disclaimer in
  14  *    the documentation and/or other materials provided with the
  15  *    distribution.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  20  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
  21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  25  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  26  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  27  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28  *
  29  *******************************************************************************
  30  *
  31  * This allocator implementation is designed to provide scalable performance
  32  * for multi-threaded programs on multi-processor systems.  The following
  33  * features are included for this purpose:
  34  *
  35  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
  36  *     contention and cache sloshing.
  37  *
  38  *   + Thread-specific caching is used if there are multiple threads, which
  39  *     reduces the amount of locking.
  40  *
  41  *   + Cache line sharing between arenas is avoided for internal data
  42  *     structures.
  43  *
  44  *   + Memory is managed in chunks and runs (chunks can be split into runs),
  45  *     rather than as individual pages.  This provides a constant-time
  46  *     mechanism for associating allocations with particular arenas.
  47  *
  48  * Allocation requests are rounded up to the nearest size class, and no record
  49  * of the original request size is maintained.  Allocations are broken into
  50  * categories according to size class.  Assuming runtime defaults, 4 kB pages
  51  * and a 16 byte quantum on a 32-bit system, the size classes in each category
  52  * are as follows:
  53  *
  54  *   |=======================================|
  55  *   | Category | Subcategory      |    Size |
  56  *   |=======================================|
  57  *   | Small    | Tiny             |       2 |
  58  *   |          |                  |       4 |
  59  *   |          |                  |       8 |
  60  *   |          |------------------+---------|
  61  *   |          | Quantum-spaced   |      16 |
  62  *   |          |                  |      32 |
  63  *   |          |                  |      48 |
  64  *   |          |                  |     ... |
  65  *   |          |                  |      96 |
  66  *   |          |                  |     112 |
  67  *   |          |                  |     128 |
  68  *   |          |------------------+---------|
  69  *   |          | Cacheline-spaced |     192 |
  70  *   |          |                  |     256 |
  71  *   |          |                  |     320 |
  72  *   |          |                  |     384 |
  73  *   |          |                  |     448 |
  74  *   |          |                  |     512 |
  75  *   |          |------------------+---------|
  76  *   |          | Sub-page         |     760 |
  77  *   |          |                  |    1024 |
  78  *   |          |                  |    1280 |
  79  *   |          |                  |     ... |
  80  *   |          |                  |    3328 |
  81  *   |          |                  |    3584 |
  82  *   |          |                  |    3840 |
  83  *   |=======================================|
  84  *   | Large                       |    4 kB |
  85  *   |                             |    8 kB |
  86  *   |                             |   12 kB |
  87  *   |                             |     ... |
  88  *   |                             | 1012 kB |
  89  *   |                             | 1016 kB |
  90  *   |                             | 1020 kB |
  91  *   |=======================================|
  92  *   | Huge                        |    1 MB |
  93  *   |                             |    2 MB |
  94  *   |                             |    3 MB |
  95  *   |                             |     ... |
  96  *   |=======================================|
  97  *
  98  * A different mechanism is used for each category:
  99  *
 100  *   Small : Each size class is segregated into its own set of runs.  Each run
 101  *           maintains a bitmap of which regions are free/allocated.
 102  *
 103  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
 104  *           in the associated arena chunk header maps.
 105  *
 106  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
 107  *          Metadata are stored in a separate red-black tree.
 108  *
 109  *******************************************************************************
 110  */
 111
 112 /*
 113  * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
 114  * defaults the A and J runtime options to off.  These settings are appropriate
 115  * for production systems.
 116  */
 117 #define MALLOC_PRODUCTION
 118
 119 #ifndef MALLOC_PRODUCTION
 120    /*
 121     * MALLOC_DEBUG enables assertions and other sanity checks, and disables
 122     * inline functions.
 123     */
 124 #  define MALLOC_DEBUG
 125
 126    /* MALLOC_STATS enables statistics calculation. */
 127 #  define MALLOC_STATS
 128 #endif
 129
 130 /*
 131  * MALLOC_TINY enables support for tiny objects, which are smaller than one
 132  * quantum.
 133  */
 134 #define MALLOC_TINY
 135
 136 /*
 137  * MALLOC_MAG enables a magazine-based thread-specific caching layer for small
 138  * objects.  This makes it possible to allocate/deallocate objects without any
 139  * locking when the cache is in the steady state.
 140  */
 141 #define MALLOC_MAG
 142
 143 /*
 144  * MALLOC_BALANCE enables monitoring of arena lock contention and dynamically
 145  * re-balances arena load if exponentially averaged contention exceeds a
 146  * certain threshold.
 147  */
 148 #define MALLOC_BALANCE
 149
 150 /*
 151  * MALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage
 152  * segment (DSS).  In an ideal world, this functionality would be completely
 153  * unnecessary, but we are burdened by history and the lack of resource limits
 154  * for anonymous mapped memory.
 155  */
 156 #define MALLOC_DSS
 157
 158 #include <sys/cdefs.h>
 159 __FBSDID("$FreeBSD$");
 160
 161 #include "libc_private.h"
 162 #ifdef MALLOC_DEBUG
 163 #  define _LOCK_DEBUG
 164 #endif
 165 #include "spinlock.h"
 166 #include "namespace.h"
 167 #include <sys/mman.h>
 168 #include <sys/param.h>
 169 #include <sys/stddef.h>
 170 #include <sys/time.h>
 171 #include <sys/types.h>
 172 #include <sys/sysctl.h>
 173 #include <sys/uio.h>
 174 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
 175
 176 #include <machine/cpufunc.h>
 177 #include <machine/param.h>
 178 #include <machine/vmparam.h>
 179
 180 #include <errno.h>
 181 #include <limits.h>
 182 #include <pthread.h>
 183 #include <sched.h>
 184 #include <stdarg.h>
 185 #include <stdbool.h>
 186 #include <stdio.h>
 187 #include <stdint.h>
 188 #include <stdlib.h>
 189 #include <string.h>
 190 #include <strings.h>
 191 #include <unistd.h>
 192
 193 #include "un-namespace.h"
 194
 195 #ifdef MALLOC_DEBUG
 196 #  ifdef NDEBUG
 197 #    undef NDEBUG
 198 #  endif
 199 #else
 200 #  ifndef NDEBUG
 201 #    define NDEBUG
 202 #  endif
 203 #endif
 204 #include <assert.h>
 205
 206 #include "rb.h"
 207
 208 #ifdef MALLOC_DEBUG
 209    /* Disable inlining to make debugging easier. */
 210 #  define inline
 211 #endif
 212
 213 /* Size of stack-allocated buffer passed to strerror_r(). */
 214 #define STRERROR_BUF            64
 215
 216 /*
 217  * Minimum alignment of allocations is 2^QUANTUM_2POW bytes.
 218  */
 219 #ifdef __i386__
 220 #  define QUANTUM_2POW          4
 221 #  define SIZEOF_PTR_2POW       2
 222 #  define CPU_SPINWAIT          __asm__ volatile("pause")
 223 #endif
 224 #ifdef __ia64__
 225 #  define QUANTUM_2POW          4
 226 #  define SIZEOF_PTR_2POW       3
 227 #endif
 228 #ifdef __alpha__
 229 #  define QUANTUM_2POW          4
 230 #  define SIZEOF_PTR_2POW       3
 231 #  define NO_TLS
 232 #endif
 233 #ifdef __sparc64__
 234 #  define QUANTUM_2POW          4
 235 #  define SIZEOF_PTR_2POW       3
 236 #  define NO_TLS
 237 #endif
 238 #ifdef __amd64__
 239 #  define QUANTUM_2POW          4
 240 #  define SIZEOF_PTR_2POW       3
 241 #  define CPU_SPINWAIT          __asm__ volatile("pause")
 242 #endif
 243 #ifdef __arm__
 244 #  define QUANTUM_2POW          3
 245 #  define SIZEOF_PTR_2POW       2
 246 #  define NO_TLS
 247 #endif
 248 #ifdef __mips__
 249 #  define QUANTUM_2POW          3
 250 #  define SIZEOF_PTR_2POW       2
 251 #  define NO_TLS
 252 #endif
 253 #ifdef __powerpc__
 254 #  define QUANTUM_2POW          4
 255 #  define SIZEOF_PTR_2POW       2
 256 #endif
 257
 258 #define QUANTUM                 ((size_t)(1U << QUANTUM_2POW))
 259 #define QUANTUM_MASK            (QUANTUM - 1)
 260
 261 #define SIZEOF_PTR              (1U << SIZEOF_PTR_2POW)
 262
 263 /* sizeof(int) == (1U << SIZEOF_INT_2POW). */
 264 #ifndef SIZEOF_INT_2POW
 265 #  define SIZEOF_INT_2POW       2
 266 #endif
 267
 268 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
 269 #if (!defined(PIC) && !defined(NO_TLS))
 270 #  define NO_TLS
 271 #endif
 272
 273 #ifdef NO_TLS
 274    /* MALLOC_MAG requires TLS. */
 275 #  ifdef MALLOC_MAG
 276 #    undef MALLOC_MAG
 277 #  endif
 278    /* MALLOC_BALANCE requires TLS. */
 279 #  ifdef MALLOC_BALANCE
 280 #    undef MALLOC_BALANCE
 281 #  endif
 282 #endif
 283
 284 /*
 285  * Size and alignment of memory chunks that are allocated by the OS's virtual
 286  * memory system.
 287  */
 288 #define CHUNK_2POW_DEFAULT      20
 289
 290 /* Maximum number of dirty pages per arena. */
 291 #define DIRTY_MAX_DEFAULT       (1U << 9)
 292
 293 /*
 294  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing.
 295  * In addition, this controls the spacing of cacheline-spaced size classes.
 296  */
 297 #define CACHELINE_2POW          6
 298 #define CACHELINE               ((size_t)(1U << CACHELINE_2POW))
 299 #define CACHELINE_MASK          (CACHELINE - 1)
 300
 301 /*
 302  * Subpages are an artificially designated partitioning of pages.  Their only
 303  * purpose is to support subpage-spaced size classes.
 304  *
 305  * There must be at least 4 subpages per page, due to the way size classes are
 306  * handled.
 307  */
 308 #define SUBPAGE_2POW            8
 309 #define SUBPAGE                 ((size_t)(1U << SUBPAGE_2POW))
 310 #define SUBPAGE_MASK            (SUBPAGE - 1)
 311
 312 #ifdef MALLOC_TINY
 313    /* Smallest size class to support. */
 314 #  define TINY_MIN_2POW         1
 315 #endif
 316
 317 /*
 318  * Maximum size class that is a multiple of the quantum, but not (necessarily)
 319  * a power of 2.  Above this size, allocations are rounded up to the nearest
 320  * power of 2.
 321  */
 322 #define QSPACE_MAX_2POW_DEFAULT 7
 323
 324 /*
 325  * Maximum size class that is a multiple of the cacheline, but not (necessarily)
 326  * a power of 2.  Above this size, allocations are rounded up to the nearest
 327  * power of 2.
 328  */
 329 #define CSPACE_MAX_2POW_DEFAULT 9
 330
 331 /*
 332  * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
 333  * as small as possible such that this setting is still honored, without
 334  * violating other constraints.  The goal is to make runs as small as possible
 335  * without exceeding a per run external fragmentation threshold.
 336  *
 337  * We use binary fixed point math for overhead computations, where the binary
 338  * point is implicitly RUN_BFP bits to the left.
 339  *
 340  * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
 341  * honored for some/all object sizes, since there is one bit of header overhead
 342  * per object (plus a constant).  This constraint is relaxed (ignored) for runs
 343  * that are so small that the per-region overhead is greater than:
 344  *
 345  *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
 346  */
 347 #define RUN_BFP                 12
 348 /*                                    \/   Implicit binary fixed point. */
 349 #define RUN_MAX_OVRHD           0x0000003dU
 350 #define RUN_MAX_OVRHD_RELAX     0x00001800U
 351
 352 /* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
 353 #define RUN_MAX_SMALL   (12 * PAGE_SIZE)
 354
 355 /*
 356  * Hyper-threaded CPUs may need a special instruction inside spin loops in
 357  * order to yield to another virtual CPU.  If no such instruction is defined
 358  * above, make CPU_SPINWAIT a no-op.
 359  */
 360 #ifndef CPU_SPINWAIT
 361 #  define CPU_SPINWAIT
 362 #endif
 363
 364 /*
 365  * Adaptive spinning must eventually switch to blocking, in order to avoid the
 366  * potential for priority inversion deadlock.  Backing off past a certain point
 367  * can actually waste time.
 368  */
 369 #define SPIN_LIMIT_2POW         11
 370
 371 /*
 372  * Conversion from spinning to blocking is expensive; we use (1U <<
 373  * BLOCK_COST_2POW) to estimate how many more times costly blocking is than
 374  * worst-case spinning.
 375  */
 376 #define BLOCK_COST_2POW         4
 377
 378 #ifdef MALLOC_MAG
 379    /*
 380     * Default magazine size, in bytes.  max_rounds is calculated to make
 381     * optimal use of the space, leaving just enough room for the magazine
 382     * header.
 383     */
 384 #  define MAG_SIZE_2POW_DEFAULT 9
 385 #endif
 386
 387 #ifdef MALLOC_BALANCE
 388    /*
 389     * We use an exponential moving average to track recent lock contention,
 390     * where the size of the history window is N, and alpha=2/(N+1).
 391     *
 392     * Due to integer math rounding, very small values here can cause
 393     * substantial degradation in accuracy, thus making the moving average decay
 394     * faster than it would with precise calculation.
 395     */
 396 #  define BALANCE_ALPHA_INV_2POW        9
 397
 398    /*
 399     * Threshold value for the exponential moving contention average at which to
 400     * re-assign a thread.
 401     */
 402 #  define BALANCE_THRESHOLD_DEFAULT     (1U << (SPIN_LIMIT_2POW-4))
 403 #endif
 404
 405 /******************************************************************************/
 406
 407 /*
 408  * Mutexes based on spinlocks.  We can't use normal pthread spinlocks in all
 409  * places, because they require malloc()ed memory, which causes bootstrapping
 410  * issues in some cases.
 411  */
 412 typedef struct {
 413         spinlock_t      lock;
 414 } malloc_mutex_t;
 415
 416 /* Set to true once the allocator has been initialized. */
 417 static bool malloc_initialized = false;
 418
 419 /* Used to avoid initialization races. */
 420 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
 421
 422 /******************************************************************************/
 423 /*
 424  * Statistics data structures.
 425  */
 426
 427 #ifdef MALLOC_STATS
 428
 429 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
 430 struct malloc_bin_stats_s {
 431         /*
 432          * Number of allocation requests that corresponded to the size of this
 433          * bin.
 434          */
 435         uint64_t        nrequests;
 436
 437 #ifdef MALLOC_MAG
 438         /* Number of magazine reloads from this bin. */
 439         uint64_t        nmags;
 440 #endif
 441
 442         /* Total number of runs created for this bin's size class. */
 443         uint64_t        nruns;
 444
 445         /*
 446          * Total number of runs reused by extracting them from the runs tree for
 447          * this bin's size class.
 448          */
 449         uint64_t        reruns;
 450
 451         /* High-water mark for this bin. */
 452         unsigned long   highruns;
 453
 454         /* Current number of runs in this bin. */
 455         unsigned long   curruns;
 456 };
 457
 458 typedef struct arena_stats_s arena_stats_t;
 459 struct arena_stats_s {
 460         /* Number of bytes currently mapped. */
 461         size_t          mapped;
 462
 463         /*
 464          * Total number of purge sweeps, total number of madvise calls made,
 465          * and total pages purged in order to keep dirty unused memory under
 466          * control.
 467          */
 468         uint64_t        npurge;
 469         uint64_t        nmadvise;
 470         uint64_t        purged;
 471
 472         /* Per-size-category statistics. */
 473         size_t          allocated_small;
 474         uint64_t        nmalloc_small;
 475         uint64_t        ndalloc_small;
 476
 477         size_t          allocated_large;
 478         uint64_t        nmalloc_large;
 479         uint64_t        ndalloc_large;
 480
 481 #ifdef MALLOC_BALANCE
 482         /* Number of times this arena reassigned a thread due to contention. */
 483         uint64_t        nbalance;
 484 #endif
 485 };
 486
 487 typedef struct chunk_stats_s chunk_stats_t;
 488 struct chunk_stats_s {
 489         /* Number of chunks that were allocated. */
 490         uint64_t        nchunks;
 491
 492         /* High-water mark for number of chunks allocated. */
 493         unsigned long   highchunks;
 494
 495         /*
 496          * Current number of chunks allocated.  This value isn't maintained for
 497          * any other purpose, so keep track of it in order to be able to set
 498          * highchunks.
 499          */
 500         unsigned long   curchunks;
 501 };
 502
 503 #endif /* #ifdef MALLOC_STATS */
 504
 505 /******************************************************************************/
 506 /*
 507  * Extent data structures.
 508  */
 509
 510 /* Tree of extents. */
 511 typedef struct extent_node_s extent_node_t;
 512 struct extent_node_s {
 513 #ifdef MALLOC_DSS
 514         /* Linkage for the size/address-ordered tree. */
 515         rb_node(extent_node_t) link_szad;
 516 #endif
 517
 518         /* Linkage for the address-ordered tree. */
 519         rb_node(extent_node_t) link_ad;
 520
 521         /* Pointer to the extent that this tree node is responsible for. */
 522         void    *addr;
 523
 524         /* Total region size. */
 525         size_t  size;
 526 };
 527 typedef rb_tree(extent_node_t) extent_tree_t;
 528
 529 /******************************************************************************/
 530 /*
 531  * Arena data structures.
 532  */
 533
 534 typedef struct arena_s arena_t;
 535 typedef struct arena_bin_s arena_bin_t;
 536
 537 /* Each element of the chunk map corresponds to one page within the chunk. */
 538 typedef struct arena_chunk_map_s arena_chunk_map_t;
 539 struct arena_chunk_map_s {
 540         /*
 541          * Linkage for run trees.  There are two disjoint uses:
 542          *
 543          * 1) arena_t's runs_avail tree.
 544          * 2) arena_run_t conceptually uses this linkage for in-use non-full
 545          *    runs, rather than directly embedding linkage.
 546          */
 547         rb_node(arena_chunk_map_t)      link;
 548
 549         /*
 550          * Run address (or size) and various flags are stored together.  The bit
 551          * layout looks like (assuming 32-bit system):
 552          *
 553          *   ???????? ???????? ????---- ---kdzla
 554          *
 555          * ? : Unallocated: Run address for first/last pages, unset for internal
 556          *                  pages.
 557          *     Small: Run address.
 558          *     Large: Run size for first page, unset for trailing pages.
 559          * - : Unused.
 560          * k : key?
 561          * d : dirty?
 562          * z : zeroed?
 563          * l : large?
 564          * a : allocated?
 565          *
 566          * Following are example bit patterns for the three types of runs.
 567          *
 568          * r : run address
 569          * s : run size
 570          * x : don't care
 571          * - : 0
 572          * [dzla] : bit set
 573          *
 574          *   Unallocated:
 575          *     ssssssss ssssssss ssss---- --------
 576          *     xxxxxxxx xxxxxxxx xxxx---- ----d---
 577          *     ssssssss ssssssss ssss---- -----z--
 578          *
 579          *   Small:
 580          *     rrrrrrrr rrrrrrrr rrrr---- -------a
 581          *     rrrrrrrr rrrrrrrr rrrr---- -------a
 582          *     rrrrrrrr rrrrrrrr rrrr---- -------a
 583          *
 584          *   Large:
 585          *     ssssssss ssssssss ssss---- ------la
 586          *     -------- -------- -------- ------la
 587          *     -------- -------- -------- ------la
 588          */
 589         size_t                          bits;
 590 #define CHUNK_MAP_KEY           ((size_t)0x10U)
 591 #define CHUNK_MAP_DIRTY         ((size_t)0x08U)
 592 #define CHUNK_MAP_ZEROED        ((size_t)0x04U)
 593 #define CHUNK_MAP_LARGE         ((size_t)0x02U)
 594 #define CHUNK_MAP_ALLOCATED     ((size_t)0x01U)
 595 };
 596 typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
 597 typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
 598
 599 /* Arena chunk header. */
 600 typedef struct arena_chunk_s arena_chunk_t;
 601 struct arena_chunk_s {
 602         /* Arena that owns the chunk. */
 603         arena_t         *arena;
 604
 605         /* Linkage for the arena's chunks_dirty tree. */
 606         rb_node(arena_chunk_t) link_dirty;
 607
 608         /* Number of dirty pages. */
 609         size_t          ndirty;
 610
 611         /* Map of pages within chunk that keeps track of free/large/small. */
 612         arena_chunk_map_t map[1]; /* Dynamically sized. */
 613 };
 614 typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
 615
 616 typedef struct arena_run_s arena_run_t;
 617 struct arena_run_s {
 618 #ifdef MALLOC_DEBUG
 619         uint32_t        magic;
 620 #  define ARENA_RUN_MAGIC 0x384adf93
 621 #endif
 622
 623         /* Bin this run is associated with. */
 624         arena_bin_t     *bin;
 625
 626         /* Index of first element that might have a free region. */
 627         unsigned        regs_minelm;
 628
 629         /* Number of free regions in run. */
 630         unsigned        nfree;
 631
 632         /* Bitmask of in-use regions (0: in use, 1: free). */
 633         unsigned        regs_mask[1]; /* Dynamically sized. */
 634 };
 635
 636 struct arena_bin_s {
 637         /*
 638          * Current run being used to service allocations of this bin's size
 639          * class.
 640          */
 641         arena_run_t     *runcur;
 642
 643         /*
 644          * Tree of non-full runs.  This tree is used when looking for an
 645          * existing run when runcur is no longer usable.  We choose the
 646          * non-full run that is lowest in memory; this policy tends to keep
 647          * objects packed well, and it can also help reduce the number of
 648          * almost-empty chunks.
 649          */
 650         arena_run_tree_t runs;
 651
 652         /* Size of regions in a run for this bin's size class. */
 653         size_t          reg_size;
 654
 655         /* Total size of a run for this bin's size class. */
 656         size_t          run_size;
 657
 658         /* Total number of regions in a run for this bin's size class. */
 659         uint32_t        nregs;
 660
 661         /* Number of elements in a run's regs_mask for this bin's size class. */
 662         uint32_t        regs_mask_nelms;
 663
 664         /* Offset of first region in a run for this bin's size class. */
 665         uint32_t        reg0_offset;
 666
 667 #ifdef MALLOC_STATS
 668         /* Bin statistics. */
 669         malloc_bin_stats_t stats;
 670 #endif
 671 };
 672
 673 struct arena_s {
 674 #ifdef MALLOC_DEBUG
 675         uint32_t                magic;
 676 #  define ARENA_MAGIC 0x947d3d24
 677 #endif
 678
 679         /* All operations on this arena require that lock be locked. */
 680         pthread_mutex_t         lock;
 681
 682 #ifdef MALLOC_STATS
 683         arena_stats_t           stats;
 684 #endif
 685
 686         /* Tree of dirty-page-containing chunks this arena manages. */
 687         arena_chunk_tree_t      chunks_dirty;
 688
 689         /*
 690          * In order to avoid rapid chunk allocation/deallocation when an arena
 691          * oscillates right on the cusp of needing a new chunk, cache the most
 692          * recently freed chunk.  The spare is left in the arena's chunk trees
 693          * until it is deleted.
 694          *
 695          * There is one spare chunk per arena, rather than one spare total, in
 696          * order to avoid interactions between multiple threads that could make
 697          * a single spare inadequate.
 698          */
 699         arena_chunk_t           *spare;
 700
 701         /*
 702          * Current count of pages within unused runs that are potentially
 703          * dirty, and for which madvise(... MADV_FREE) has not been called.  By
 704          * tracking this, we can institute a limit on how much dirty unused
 705          * memory is mapped for each arena.
 706          */
 707         size_t                  ndirty;
 708
 709         /*
 710          * Size/address-ordered tree of this arena's available runs.  This tree
 711          * is used for first-best-fit run allocation.
 712          */
 713         arena_avail_tree_t      runs_avail;
 714
 715 #ifdef MALLOC_BALANCE
 716         /*
 717          * The arena load balancing machinery needs to keep track of how much
 718          * lock contention there is.  This value is exponentially averaged.
 719          */
 720         uint32_t                contention;
 721 #endif
 722
 723         /*
 724          * bins is used to store rings of free regions of the following sizes,
 725          * assuming a 16-byte quantum, 4kB page size, and default
 726          * MALLOC_OPTIONS.
 727          *
 728          *   bins[i] | size |
 729          *   --------+------+
 730          *        0  |    2 |
 731          *        1  |    4 |
 732          *        2  |    8 |
 733          *   --------+------+
 734          *        3  |   16 |
 735          *        4  |   32 |
 736          *        5  |   48 |
 737          *        6  |   64 |
 738          *           :      :
 739          *           :      :
 740          *       33  |  496 |
 741          *       34  |  512 |
 742          *   --------+------+
 743          *       35  | 1024 |
 744          *       36  | 2048 |
 745          *   --------+------+
 746          */
 747         arena_bin_t             bins[1]; /* Dynamically sized. */
 748 };
 749
 750 /******************************************************************************/
 751 /*
 752  * Magazine data structures.
 753  */
 754
 755 #ifdef MALLOC_MAG
 756 typedef struct mag_s mag_t;
 757 struct mag_s {
 758         size_t          binind; /* Index of associated bin. */
 759         size_t          nrounds;
 760         void            *rounds[1]; /* Dynamically sized. */
 761 };
 762
 763 /*
 764  * Magazines are lazily allocated, but once created, they remain until the
 765  * associated mag_rack is destroyed.
 766  */
 767 typedef struct bin_mags_s bin_mags_t;
 768 struct bin_mags_s {
 769         mag_t   *curmag;
 770         mag_t   *sparemag;
 771 };
 772
 773 typedef struct mag_rack_s mag_rack_t;
 774 struct mag_rack_s {
 775         bin_mags_t      bin_mags[1]; /* Dynamically sized. */
 776 };
 777 #endif
 778
 779 /******************************************************************************/
 780 /*
 781  * Data.
 782  */
 783
 784 /* Number of CPUs. */
 785 static unsigned         ncpus;
 786
 787 /* Various bin-related settings. */
 788 #ifdef MALLOC_TINY              /* Number of (2^n)-spaced tiny bins. */
 789 #  define               ntbins  ((unsigned)(QUANTUM_2POW - TINY_MIN_2POW))
 790 #else
 791 #  define               ntbins  0
 792 #endif
 793 static unsigned         nqbins; /* Number of quantum-spaced bins. */
 794 static unsigned         ncbins; /* Number of cacheline-spaced bins. */
 795 static unsigned         nsbins; /* Number of subpage-spaced bins. */
 796 static unsigned         nbins;
 797 #ifdef MALLOC_TINY
 798 #  define               tspace_max      ((size_t)(QUANTUM >> 1))
 799 #endif
 800 #define                 qspace_min      QUANTUM
 801 static size_t           qspace_max;
 802 static size_t           cspace_min;
 803 static size_t           cspace_max;
 804 static size_t           sspace_min;
 805 static size_t           sspace_max;
 806 #define                 bin_maxclass    sspace_max
 807
 808 static uint8_t const    *size2bin;
 809 /*
 810  * const_size2bin is a static constant lookup table that in the common case can
 811  * be used as-is for size2bin.  For dynamically linked programs, this avoids
 812  * a page of memory overhead per process.
 813  */
 814 #define S2B_1(i)        i,
 815 #define S2B_2(i)        S2B_1(i) S2B_1(i)
 816 #define S2B_4(i)        S2B_2(i) S2B_2(i)
 817 #define S2B_8(i)        S2B_4(i) S2B_4(i)
 818 #define S2B_16(i)       S2B_8(i) S2B_8(i)
 819 #define S2B_32(i)       S2B_16(i) S2B_16(i)
 820 #define S2B_64(i)       S2B_32(i) S2B_32(i)
 821 #define S2B_128(i)      S2B_64(i) S2B_64(i)
 822 #define S2B_256(i)      S2B_128(i) S2B_128(i)
 823 static const uint8_t    const_size2bin[PAGE_SIZE - 255] = {
 824         S2B_1(0xffU)            /*    0 */
 825 #if (QUANTUM_2POW == 4)
 826 /* 64-bit system ************************/
 827 #  ifdef MALLOC_TINY
 828         S2B_2(0)                /*    2 */
 829         S2B_2(1)                /*    4 */
 830         S2B_4(2)                /*    8 */
 831         S2B_8(3)                /*   16 */
 832 #    define S2B_QMIN 3
 833 #  else
 834         S2B_16(0)               /*   16 */
 835 #    define S2B_QMIN 0
 836 #  endif
 837         S2B_16(S2B_QMIN + 1)    /*   32 */
 838         S2B_16(S2B_QMIN + 2)    /*   48 */
 839         S2B_16(S2B_QMIN + 3)    /*   64 */
 840         S2B_16(S2B_QMIN + 4)    /*   80 */
 841         S2B_16(S2B_QMIN + 5)    /*   96 */
 842         S2B_16(S2B_QMIN + 6)    /*  112 */
 843         S2B_16(S2B_QMIN + 7)    /*  128 */
 844 #  define S2B_CMIN (S2B_QMIN + 8)
 845 #else
 846 /* 32-bit system ************************/
 847 #  ifdef MALLOC_TINY
 848         S2B_2(0)                /*    2 */
 849         S2B_2(1)                /*    4 */
 850         S2B_4(2)                /*    8 */
 851 #    define S2B_QMIN 2
 852 #  else
 853         S2B_8(0)                /*    8 */
 854 #    define S2B_QMIN 0
 855 #  endif
 856         S2B_8(S2B_QMIN + 1)     /*   16 */
 857         S2B_8(S2B_QMIN + 2)     /*   24 */
 858         S2B_8(S2B_QMIN + 3)     /*   32 */
 859         S2B_8(S2B_QMIN + 4)     /*   40 */
 860         S2B_8(S2B_QMIN + 5)     /*   48 */
 861         S2B_8(S2B_QMIN + 6)     /*   56 */
 862         S2B_8(S2B_QMIN + 7)     /*   64 */
 863         S2B_8(S2B_QMIN + 8)     /*   72 */
 864         S2B_8(S2B_QMIN + 9)     /*   80 */
 865         S2B_8(S2B_QMIN + 10)    /*   88 */
 866         S2B_8(S2B_QMIN + 11)    /*   96 */
 867         S2B_8(S2B_QMIN + 12)    /*  104 */
 868         S2B_8(S2B_QMIN + 13)    /*  112 */
 869         S2B_8(S2B_QMIN + 14)    /*  120 */
 870         S2B_8(S2B_QMIN + 15)    /*  128 */
 871 #  define S2B_CMIN (S2B_QMIN + 16)
 872 #endif
 873 /****************************************/
 874         S2B_64(S2B_CMIN + 0)    /*  192 */
 875         S2B_64(S2B_CMIN + 1)    /*  256 */
 876         S2B_64(S2B_CMIN + 2)    /*  320 */
 877         S2B_64(S2B_CMIN + 3)    /*  384 */
 878         S2B_64(S2B_CMIN + 4)    /*  448 */
 879         S2B_64(S2B_CMIN + 5)    /*  512 */
 880 #  define S2B_SMIN (S2B_CMIN + 6)
 881         S2B_256(S2B_SMIN + 0)   /*  768 */
 882         S2B_256(S2B_SMIN + 1)   /* 1024 */
 883         S2B_256(S2B_SMIN + 2)   /* 1280 */
 884         S2B_256(S2B_SMIN + 3)   /* 1536 */
 885         S2B_256(S2B_SMIN + 4)   /* 1792 */
 886         S2B_256(S2B_SMIN + 5)   /* 2048 */
 887         S2B_256(S2B_SMIN + 6)   /* 2304 */
 888         S2B_256(S2B_SMIN + 7)   /* 2560 */
 889         S2B_256(S2B_SMIN + 8)   /* 2816 */
 890         S2B_256(S2B_SMIN + 9)   /* 3072 */
 891         S2B_256(S2B_SMIN + 10)  /* 3328 */
 892         S2B_256(S2B_SMIN + 11)  /* 3584 */
 893         S2B_256(S2B_SMIN + 12)  /* 3840 */
 894 #if (PAGE_SHIFT == 13)
 895         S2B_256(S2B_SMIN + 13)  /* 4096 */
 896         S2B_256(S2B_SMIN + 14)  /* 4352 */
 897         S2B_256(S2B_SMIN + 15)  /* 4608 */
 898         S2B_256(S2B_SMIN + 16)  /* 4864 */
 899         S2B_256(S2B_SMIN + 17)  /* 5120 */
 900         S2B_256(S2B_SMIN + 18)  /* 5376 */
 901         S2B_256(S2B_SMIN + 19)  /* 5632 */
 902         S2B_256(S2B_SMIN + 20)  /* 5888 */
 903         S2B_256(S2B_SMIN + 21)  /* 6144 */
 904         S2B_256(S2B_SMIN + 22)  /* 6400 */
 905         S2B_256(S2B_SMIN + 23)  /* 6656 */
 906         S2B_256(S2B_SMIN + 24)  /* 6912 */
 907         S2B_256(S2B_SMIN + 25)  /* 7168 */
 908         S2B_256(S2B_SMIN + 26)  /* 7424 */
 909         S2B_256(S2B_SMIN + 27)  /* 7680 */
 910         S2B_256(S2B_SMIN + 28)  /* 7936 */
 911 #endif
 912 };
 913 #undef S2B_1
 914 #undef S2B_2
 915 #undef S2B_4
 916 #undef S2B_8
 917 #undef S2B_16
 918 #undef S2B_32
 919 #undef S2B_64
 920 #undef S2B_128
 921 #undef S2B_256
 922 #undef S2B_QMIN
 923 #undef S2B_CMIN
 924 #undef S2B_SMIN
 925
 926 #ifdef MALLOC_MAG
 927 static size_t           max_rounds;
 928 #endif
 929
 930 /* Various chunk-related settings. */
 931 static size_t           chunksize;
 932 static size_t           chunksize_mask; /* (chunksize - 1). */
 933 static size_t           chunk_npages;
 934 static size_t           arena_chunk_header_npages;
 935 static size_t           arena_maxclass; /* Max size class for arenas. */
 936
 937 /********/
 938 /*
 939  * Chunks.
 940  */
 941
 942 /* Protects chunk-related data structures. */
 943 static malloc_mutex_t   huge_mtx;
 944
 945 /* Tree of chunks that are stand-alone huge allocations. */
 946 static extent_tree_t    huge;
 947
 948 #ifdef MALLOC_DSS
 949 /*
 950  * Protects sbrk() calls.  This avoids malloc races among threads, though it
 951  * does not protect against races with threads that call sbrk() directly.
 952  */
 953 static malloc_mutex_t   dss_mtx;
 954 /* Base address of the DSS. */
 955 static void             *dss_base;
 956 /* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
 957 static void             *dss_prev;
 958 /* Current upper limit on DSS addresses. */
 959 static void             *dss_max;
 960
 961 /*
 962  * Trees of chunks that were previously allocated (trees differ only in node
 963  * ordering).  These are used when allocating chunks, in an attempt to re-use
 964  * address space.  Depending on function, different tree orderings are needed,
 965  * which is why there are two trees with the same contents.
 966  */
 967 static extent_tree_t    dss_chunks_szad;
 968 static extent_tree_t    dss_chunks_ad;
 969 #endif
 970
 971 #ifdef MALLOC_STATS
 972 /* Huge allocation statistics. */
 973 static uint64_t         huge_nmalloc;
 974 static uint64_t         huge_ndalloc;
 975 static size_t           huge_allocated;
 976 #endif
 977
 978 /****************************/
 979 /*
 980  * base (internal allocation).
 981  */
 982
 983 /*
 984  * Current pages that are being used for internal memory allocations.  These
 985  * pages are carved up in cacheline-size quanta, so that there is no chance of
 986  * false cache line sharing.
 987  */
 988 static void             *base_pages;
 989 static void             *base_next_addr;
 990 static void             *base_past_addr; /* Addr immediately past base_pages. */
 991 static extent_node_t    *base_nodes;
 992 static malloc_mutex_t   base_mtx;
 993 #ifdef MALLOC_STATS
 994 static size_t           base_mapped;
 995 #endif
 996
 997 /********/
 998 /*
 999  * Arenas.
1000  */
1001
1002 /*
1003  * Arenas that are used to service external requests.  Not all elements of the
1004  * arenas array are necessarily used; arenas are created lazily as needed.
1005  */
1006 static arena_t          **arenas;
1007 static unsigned         narenas;
1008 #ifndef NO_TLS
1009 #  ifdef MALLOC_BALANCE
1010 static unsigned         narenas_2pow;
1011 #  else
1012 static unsigned         next_arena;
1013 #  endif
1014 #endif
1015 static pthread_mutex_t  arenas_lock; /* Protects arenas initialization. */
1016
1017 #ifndef NO_TLS
1018 /*
1019  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
1020  * for allocations.
1021  */
1022 static __thread arena_t *arenas_map;
1023 #endif
1024
1025 #ifdef MALLOC_MAG
1026 /*
1027  * Map of thread-specific magazine racks, used for thread-specific object
1028  * caching.
1029  */
1030 static __thread mag_rack_t      *mag_rack;
1031 #endif
1032
1033 #ifdef MALLOC_STATS
1034 /* Chunk statistics. */
1035 static chunk_stats_t    stats_chunks;
1036 #endif
1037
1038 /*******************************/
1039 /*
1040  * Runtime configuration options.
1041  */
1042 const char      *_malloc_options;
1043
1044 #ifndef MALLOC_PRODUCTION
1045 static bool     opt_abort = true;
1046 static bool     opt_junk = true;
1047 #else
1048 static bool     opt_abort = false;
1049 static bool     opt_junk = false;
1050 #endif
1051 #ifdef MALLOC_DSS
1052 static bool     opt_dss = true;
1053 static bool     opt_mmap = true;
1054 #endif
1055 #ifdef MALLOC_MAG
1056 static bool     opt_mag = true;
1057 static size_t   opt_mag_size_2pow = MAG_SIZE_2POW_DEFAULT;
1058 #endif
1059 static size_t   opt_dirty_max = DIRTY_MAX_DEFAULT;
1060 #ifdef MALLOC_BALANCE
1061 static uint64_t opt_balance_threshold = BALANCE_THRESHOLD_DEFAULT;
1062 #endif
1063 static bool     opt_print_stats = false;
1064 static size_t   opt_qspace_max_2pow = QSPACE_MAX_2POW_DEFAULT;
1065 static size_t   opt_cspace_max_2pow = CSPACE_MAX_2POW_DEFAULT;
1066 static size_t   opt_chunk_2pow = CHUNK_2POW_DEFAULT;
1067 static bool     opt_utrace = false;
1068 static bool     opt_sysv = false;
1069 static bool     opt_xmalloc = false;
1070 static bool     opt_zero = false;
1071 static int      opt_narenas_lshift = 0;
1072
1073 typedef struct {
1074         void    *p;
1075         size_t  s;
1076         void    *r;
1077 } malloc_utrace_t;
1078
1079 #define UTRACE(a, b, c)                                                 \
1080         if (opt_utrace) {                                               \
1081                 malloc_utrace_t ut;                                     \
1082                 ut.p = (a);                                             \
1083                 ut.s = (b);                                             \
1084                 ut.r = (c);                                             \
1085                 utrace(&ut, sizeof(ut));                                \
1086         }
1087
1088 /******************************************************************************/
1089 /*
1090  * Begin function prototypes for non-inline static functions.
1091  */
1092
1093 static void     malloc_mutex_init(malloc_mutex_t *mutex);
1094 static bool     malloc_spin_init(pthread_mutex_t *lock);
1095 static void     wrtmessage(const char *p1, const char *p2, const char *p3,
1096                 const char *p4);
1097 #ifdef MALLOC_STATS
1098 static void     malloc_printf(const char *format, ...);
1099 #endif
1100 static char     *umax2s(uintmax_t x, char *s);
1101 #ifdef MALLOC_DSS
1102 static bool     base_pages_alloc_dss(size_t minsize);
1103 #endif
1104 static bool     base_pages_alloc_mmap(size_t minsize);
1105 static bool     base_pages_alloc(size_t minsize);
1106 static void     *base_alloc(size_t size);
1107 static void     *base_calloc(size_t number, size_t size);
1108 static extent_node_t *base_node_alloc(void);
1109 static void     base_node_dealloc(extent_node_t *node);
1110 #ifdef MALLOC_STATS
1111 static void     stats_print(arena_t *arena);
1112 #endif
1113 static void     *pages_map(void *addr, size_t size);
1114 static void     pages_unmap(void *addr, size_t size);
1115 #ifdef MALLOC_DSS
1116 static void     *chunk_alloc_dss(size_t size);
1117 static void     *chunk_recycle_dss(size_t size, bool zero);
1118 #endif
1119 static void     *chunk_alloc_mmap(size_t size);
1120 static void     *chunk_alloc(size_t size, bool zero);
1121 #ifdef MALLOC_DSS
1122 static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size);
1123 static bool     chunk_dealloc_dss(void *chunk, size_t size);
1124 #endif
1125 static void     chunk_dealloc_mmap(void *chunk, size_t size);
1126 static void     chunk_dealloc(void *chunk, size_t size);
1127 #ifndef NO_TLS
1128 static arena_t  *choose_arena_hard(void);
1129 #endif
1130 static void     arena_run_split(arena_t *arena, arena_run_t *run, size_t size,
1131     bool large, bool zero);
1132 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
1133 static void     arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
1134 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large,
1135     bool zero);
1136 static void     arena_purge(arena_t *arena);
1137 static void     arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty);
1138 static void     arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk,
1139     arena_run_t *run, size_t oldsize, size_t newsize);
1140 static void     arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
1141     arena_run_t *run, size_t oldsize, size_t newsize, bool dirty);
1142 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
1143 static void     *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
1144 static size_t   arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
1145 #ifdef MALLOC_BALANCE
1146 static void     arena_lock_balance_hard(arena_t *arena);
1147 #endif
1148 #ifdef MALLOC_MAG
1149 static void     mag_load(mag_t *mag);
1150 #endif
1151 static void     *arena_malloc_large(arena_t *arena, size_t size, bool zero);
1152 static void     *arena_palloc(arena_t *arena, size_t alignment, size_t size,
1153     size_t alloc_size);
1154 static size_t   arena_salloc(const void *ptr);
1155 #ifdef MALLOC_MAG
1156 static void     mag_unload(mag_t *mag);
1157 #endif
1158 static void     arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk,
1159     void *ptr);
1160 static void     arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk,
1161     void *ptr, size_t size, size_t oldsize);
1162 static bool     arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk,
1163     void *ptr, size_t size, size_t oldsize);
1164 static bool     arena_ralloc_large(void *ptr, size_t size, size_t oldsize);
1165 static void     *arena_ralloc(void *ptr, size_t size, size_t oldsize);
1166 static bool     arena_new(arena_t *arena);
1167 static arena_t  *arenas_extend(unsigned ind);
1168 #ifdef MALLOC_MAG
1169 static mag_t    *mag_create(arena_t *arena, size_t binind);
1170 static void     mag_destroy(mag_t *mag);
1171 static mag_rack_t *mag_rack_create(arena_t *arena);
1172 static void     mag_rack_destroy(mag_rack_t *rack);
1173 #endif
1174 static void     *huge_malloc(size_t size, bool zero);
1175 static void     *huge_palloc(size_t alignment, size_t size);
1176 static void     *huge_ralloc(void *ptr, size_t size, size_t oldsize);
1177 static void     huge_dalloc(void *ptr);
1178 static void     malloc_print_stats(void);
1179 #ifdef MALLOC_DEBUG
1180 static void     size2bin_validate(void);
1181 #endif
1182 static bool     size2bin_init(void);
1183 static bool     size2bin_init_hard(void);
1184 static bool     malloc_init_hard(void);
1185
1186 /*
1187  * End function prototypes.
1188  */
1189 /******************************************************************************/
1190 /*
1191  * Begin mutex.  We can't use normal pthread mutexes in all places, because
1192  * they require malloc()ed memory, which causes bootstrapping issues in some
1193  * cases.
1194  */
1195
1196 static void
1197 malloc_mutex_init(malloc_mutex_t *mutex)
1198 {
1199         static const spinlock_t lock = _SPINLOCK_INITIALIZER;
1200
1201         mutex->lock = lock;
1202 }
1203
1204 static inline void
1205 malloc_mutex_lock(malloc_mutex_t *mutex)
1206 {
1207
1208         if (__isthreaded)
1209                 _SPINLOCK(&mutex->lock);
1210 }
1211
1212 static inline void
1213 malloc_mutex_unlock(malloc_mutex_t *mutex)
1214 {
1215
1216         if (__isthreaded)
1217                 _SPINUNLOCK(&mutex->lock);
1218 }
1219
1220 /*
1221  * End mutex.
1222  */
1223 /******************************************************************************/
1224 /*
1225  * Begin spin lock.  Spin locks here are actually adaptive mutexes that block
1226  * after a period of spinning, because unbounded spinning would allow for
1227  * priority inversion.
1228  */
1229
1230 /*
1231  * We use an unpublished interface to initialize pthread mutexes with an
1232  * allocation callback, in order to avoid infinite recursion.
1233  */
1234 int     _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
1235     void *(calloc_cb)(size_t, size_t));
1236
1237 __weak_reference(_pthread_mutex_init_calloc_cb_stub,
1238     _pthread_mutex_init_calloc_cb);
1239
1240 int
1241 _pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex,
1242     void *(calloc_cb)(size_t, size_t))
1243 {
1244
1245         return (0);
1246 }
1247
1248 static bool
1249 malloc_spin_init(pthread_mutex_t *lock)
1250 {
1251
1252         if (_pthread_mutex_init_calloc_cb(lock, base_calloc) != 0)
1253                 return (true);
1254
1255         return (false);
1256 }
1257
1258 static inline unsigned
1259 malloc_spin_lock(pthread_mutex_t *lock)
1260 {
1261         unsigned ret = 0;
1262
1263         if (__isthreaded) {
1264                 if (_pthread_mutex_trylock(lock) != 0) {
1265                         /* Exponentially back off if there are multiple CPUs. */
1266                         if (ncpus > 1) {
1267                                 unsigned i;
1268                                 volatile unsigned j;
1269
1270                                 for (i = 1; i <= SPIN_LIMIT_2POW; i++) {
1271                                         for (j = 0; j < (1U << i); j++) {
1272                                                 ret++;
1273                                                 CPU_SPINWAIT;
1274                                         }
1275
1276                                         if (_pthread_mutex_trylock(lock) == 0)
1277                                                 return (ret);
1278                                 }
1279                         }
1280
1281                         /*
1282                          * Spinning failed.  Block until the lock becomes
1283                          * available, in order to avoid indefinite priority
1284                          * inversion.
1285                          */
1286                         _pthread_mutex_lock(lock);
1287                         assert((ret << BLOCK_COST_2POW) != 0 || ncpus == 1);
1288                         return (ret << BLOCK_COST_2POW);
1289                 }
1290         }
1291
1292         return (ret);
1293 }
1294
1295 static inline void
1296 malloc_spin_unlock(pthread_mutex_t *lock)
1297 {
1298
1299         if (__isthreaded)
1300                 _pthread_mutex_unlock(lock);
1301 }
1302
1303 /*
1304  * End spin lock.
1305  */
1306 /******************************************************************************/
1307 /*
1308  * Begin Utility functions/macros.
1309  */
1310
1311 /* Return the chunk address for allocation address a. */
1312 #define CHUNK_ADDR2BASE(a)                                              \
1313         ((void *)((uintptr_t)(a) & ~chunksize_mask))
1314
1315 /* Return the chunk offset of address a. */
1316 #define CHUNK_ADDR2OFFSET(a)                                            \
1317         ((size_t)((uintptr_t)(a) & chunksize_mask))
1318
1319 /* Return the smallest chunk multiple that is >= s. */
1320 #define CHUNK_CEILING(s)                                                \
1321         (((s) + chunksize_mask) & ~chunksize_mask)
1322
1323 /* Return the smallest quantum multiple that is >= a. */
1324 #define QUANTUM_CEILING(a)                                              \
1325         (((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
1326
1327 /* Return the smallest cacheline multiple that is >= s. */
1328 #define CACHELINE_CEILING(s)                                            \
1329         (((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
1330
1331 /* Return the smallest subpage multiple that is >= s. */
1332 #define SUBPAGE_CEILING(s)                                              \
1333         (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK)
1334
1335 /* Return the smallest PAGE_SIZE multiple that is >= s. */
1336 #define PAGE_CEILING(s)                                                 \
1337         (((s) + PAGE_MASK) & ~PAGE_MASK)
1338
1339 #ifdef MALLOC_TINY
1340 /* Compute the smallest power of 2 that is >= x. */
1341 static inline size_t
1342 pow2_ceil(size_t x)
1343 {
1344
1345         x--;
1346         x |= x >> 1;
1347         x |= x >> 2;
1348         x |= x >> 4;
1349         x |= x >> 8;
1350         x |= x >> 16;
1351 #if (SIZEOF_PTR == 8)
1352         x |= x >> 32;
1353 #endif
1354         x++;
1355         return (x);
1356 }
1357 #endif
1358
1359 #ifdef MALLOC_BALANCE
1360 /*
1361  * Use a simple linear congruential pseudo-random number generator:
1362  *
1363  *   prn(y) = (a*x + c) % m
1364  *
1365  * where the following constants ensure maximal period:
1366  *
1367  *   a == Odd number (relatively prime to 2^n), and (a-1) is a multiple of 4.
1368  *   c == Odd number (relatively prime to 2^n).
1369  *   m == 2^32
1370  *
1371  * See Knuth's TAOCP 3rd Ed., Vol. 2, pg. 17 for details on these constraints.
1372  *
1373  * This choice of m has the disadvantage that the quality of the bits is
1374  * proportional to bit position.  For example. the lowest bit has a cycle of 2,
1375  * the next has a cycle of 4, etc.  For this reason, we prefer to use the upper
1376  * bits.
1377  */
1378 #  define PRN_DEFINE(suffix, var, a, c)                                 \
1379 static inline void                                                      \
1380 sprn_##suffix(uint32_t seed)                                            \
1381 {                                                                       \
1382         var = seed;                                                     \
1383 }                                                                       \
1384                                                                         \
1385 static inline uint32_t                                                  \
1386 prn_##suffix(uint32_t lg_range)                                         \
1387 {                                                                       \
1388         uint32_t ret, x;                                                \
1389                                                                         \
1390         assert(lg_range > 0);                                           \
1391         assert(lg_range <= 32);                                         \
1392                                                                         \
1393         x = (var * (a)) + (c);                                          \
1394         var = x;                                                        \
1395         ret = x >> (32 - lg_range);                                     \
1396                                                                         \
1397         return (ret);                                                   \
1398 }
1399 #  define SPRN(suffix, seed)    sprn_##suffix(seed)
1400 #  define PRN(suffix, lg_range) prn_##suffix(lg_range)
1401 #endif
1402
1403 #ifdef MALLOC_BALANCE
1404 /* Define the PRNG used for arena assignment. */
1405 static __thread uint32_t balance_x;
1406 PRN_DEFINE(balance, balance_x, 1297, 1301)
1407 #endif
1408
1409 static void
1410 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
1411 {
1412
1413         _write(STDERR_FILENO, p1, strlen(p1));
1414         _write(STDERR_FILENO, p2, strlen(p2));
1415         _write(STDERR_FILENO, p3, strlen(p3));
1416         _write(STDERR_FILENO, p4, strlen(p4));
1417 }
1418
1419 void    (*_malloc_message)(const char *p1, const char *p2, const char *p3,
1420             const char *p4) = wrtmessage;
1421
1422 #ifdef MALLOC_STATS
1423 /*
1424  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
1425  */
1426 static void
1427 malloc_printf(const char *format, ...)
1428 {
1429         char buf[4096];
1430         va_list ap;
1431
1432         va_start(ap, format);
1433         vsnprintf(buf, sizeof(buf), format, ap);
1434         va_end(ap);
1435         _malloc_message(buf, "", "", "");
1436 }
1437 #endif
1438
1439 /*
1440  * We don't want to depend on vsnprintf() for production builds, since that can
1441  * cause unnecessary bloat for static binaries.  umax2s() provides minimal
1442  * integer printing functionality, so that malloc_printf() use can be limited to
1443  * MALLOC_STATS code.
1444  */
1445 #define UMAX2S_BUFSIZE  21
1446 static char *
1447 umax2s(uintmax_t x, char *s)
1448 {
1449         unsigned i;
1450
1451         /* Make sure UMAX2S_BUFSIZE is large enough. */
1452         assert(sizeof(uintmax_t) <= 8);
1453
1454         i = UMAX2S_BUFSIZE - 1;
1455         s[i] = '\0';
1456         do {
1457                 i--;
1458                 s[i] = "0123456789"[x % 10];
1459                 x /= 10;
1460         } while (x > 0);
1461
1462         return (&s[i]);
1463 }
1464
1465 /******************************************************************************/
1466
1467 #ifdef MALLOC_DSS
1468 static bool
1469 base_pages_alloc_dss(size_t minsize)
1470 {
1471
1472         /*
1473          * Do special DSS allocation here, since base allocations don't need to
1474          * be chunk-aligned.
1475          */
1476         malloc_mutex_lock(&dss_mtx);
1477         if (dss_prev != (void *)-1) {
1478                 intptr_t incr;
1479                 size_t csize = CHUNK_CEILING(minsize);
1480
1481                 do {
1482                         /* Get the current end of the DSS. */
1483                         dss_max = sbrk(0);
1484
1485                         /*
1486                          * Calculate how much padding is necessary to
1487                          * chunk-align the end of the DSS.  Don't worry about
1488                          * dss_max not being chunk-aligned though.
1489                          */
1490                         incr = (intptr_t)chunksize
1491                             - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
1492                         assert(incr >= 0);
1493                         if ((size_t)incr < minsize)
1494                                 incr += csize;
1495
1496                         dss_prev = sbrk(incr);
1497                         if (dss_prev == dss_max) {
1498                                 /* Success. */
1499                                 dss_max = (void *)((intptr_t)dss_prev + incr);
1500                                 base_pages = dss_prev;
1501                                 base_next_addr = base_pages;
1502                                 base_past_addr = dss_max;
1503 #ifdef MALLOC_STATS
1504                                 base_mapped += incr;
1505 #endif
1506                                 malloc_mutex_unlock(&dss_mtx);
1507                                 return (false);
1508                         }
1509                 } while (dss_prev != (void *)-1);
1510         }
1511         malloc_mutex_unlock(&dss_mtx);
1512
1513         return (true);
1514 }
1515 #endif
1516
1517 static bool
1518 base_pages_alloc_mmap(size_t minsize)
1519 {
1520         size_t csize;
1521
1522         assert(minsize != 0);
1523         csize = PAGE_CEILING(minsize);
1524         base_pages = pages_map(NULL, csize);
1525         if (base_pages == NULL)
1526                 return (true);
1527         base_next_addr = base_pages;
1528         base_past_addr = (void *)((uintptr_t)base_pages + csize);
1529 #ifdef MALLOC_STATS
1530         base_mapped += csize;
1531 #endif
1532
1533         return (false);
1534 }
1535
1536 static bool
1537 base_pages_alloc(size_t minsize)
1538 {
1539
1540 #ifdef MALLOC_DSS
1541         if (opt_mmap && minsize != 0)
1542 #endif
1543         {
1544                 if (base_pages_alloc_mmap(minsize) == false)
1545                         return (false);
1546         }
1547
1548 #ifdef MALLOC_DSS
1549         if (opt_dss) {
1550                 if (base_pages_alloc_dss(minsize) == false)
1551                         return (false);
1552         }
1553
1554 #endif
1555
1556         return (true);
1557 }
1558
1559 static void *
1560 base_alloc(size_t size)
1561 {
1562         void *ret;
1563         size_t csize;
1564
1565         /* Round size up to nearest multiple of the cacheline size. */
1566         csize = CACHELINE_CEILING(size);
1567
1568         malloc_mutex_lock(&base_mtx);
1569         /* Make sure there's enough space for the allocation. */
1570         if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1571                 if (base_pages_alloc(csize)) {
1572                         malloc_mutex_unlock(&base_mtx);
1573                         return (NULL);
1574                 }
1575         }
1576         /* Allocate. */
1577         ret = base_next_addr;
1578         base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1579         malloc_mutex_unlock(&base_mtx);
1580
1581         return (ret);
1582 }
1583
1584 static void *
1585 base_calloc(size_t number, size_t size)
1586 {
1587         void *ret;
1588
1589         ret = base_alloc(number * size);
1590         memset(ret, 0, number * size);
1591
1592         return (ret);
1593 }
1594
1595 static extent_node_t *
1596 base_node_alloc(void)
1597 {
1598         extent_node_t *ret;
1599
1600         malloc_mutex_lock(&base_mtx);
1601         if (base_nodes != NULL) {
1602                 ret = base_nodes;
1603                 base_nodes = *(extent_node_t **)ret;
1604                 malloc_mutex_unlock(&base_mtx);
1605         } else {
1606                 malloc_mutex_unlock(&base_mtx);
1607                 ret = (extent_node_t *)base_alloc(sizeof(extent_node_t));
1608         }
1609
1610         return (ret);
1611 }
1612
1613 static void
1614 base_node_dealloc(extent_node_t *node)
1615 {
1616
1617         malloc_mutex_lock(&base_mtx);
1618         *(extent_node_t **)node = base_nodes;
1619         base_nodes = node;
1620         malloc_mutex_unlock(&base_mtx);
1621 }
1622
1623 /******************************************************************************/
1624
1625 #ifdef MALLOC_STATS
1626 static void
1627 stats_print(arena_t *arena)
1628 {
1629         unsigned i, gap_start;
1630
1631         malloc_printf("dirty: %zu page%s dirty, %llu sweep%s,"
1632             " %llu madvise%s, %llu page%s purged\n",
1633             arena->ndirty, arena->ndirty == 1 ? "" : "s",
1634             arena->stats.npurge, arena->stats.npurge == 1 ? "" : "s",
1635             arena->stats.nmadvise, arena->stats.nmadvise == 1 ? "" : "s",
1636             arena->stats.purged, arena->stats.purged == 1 ? "" : "s");
1637
1638         malloc_printf("            allocated      nmalloc      ndalloc\n");
1639         malloc_printf("small:   %12zu %12llu %12llu\n",
1640             arena->stats.allocated_small, arena->stats.nmalloc_small,
1641             arena->stats.ndalloc_small);
1642         malloc_printf("large:   %12zu %12llu %12llu\n",
1643             arena->stats.allocated_large, arena->stats.nmalloc_large,
1644             arena->stats.ndalloc_large);
1645         malloc_printf("total:   %12zu %12llu %12llu\n",
1646             arena->stats.allocated_small + arena->stats.allocated_large,
1647             arena->stats.nmalloc_small + arena->stats.nmalloc_large,
1648             arena->stats.ndalloc_small + arena->stats.ndalloc_large);
1649         malloc_printf("mapped:  %12zu\n", arena->stats.mapped);
1650
1651 #ifdef MALLOC_MAG
1652         if (__isthreaded && opt_mag) {
1653                 malloc_printf("bins:     bin   size regs pgs      mags   "
1654                     "newruns    reruns maxruns curruns\n");
1655         } else {
1656 #endif
1657                 malloc_printf("bins:     bin   size regs pgs  requests   "
1658                     "newruns    reruns maxruns curruns\n");
1659 #ifdef MALLOC_MAG
1660         }
1661 #endif
1662         for (i = 0, gap_start = UINT_MAX; i < nbins; i++) {
1663                 if (arena->bins[i].stats.nruns == 0) {
1664                         if (gap_start == UINT_MAX)
1665                                 gap_start = i;
1666                 } else {
1667                         if (gap_start != UINT_MAX) {
1668                                 if (i > gap_start + 1) {
1669                                         /* Gap of more than one size class. */
1670                                         malloc_printf("[%u..%u]\n",
1671                                             gap_start, i - 1);
1672                                 } else {
1673                                         /* Gap of one size class. */
1674                                         malloc_printf("[%u]\n", gap_start);
1675                                 }
1676                                 gap_start = UINT_MAX;
1677                         }
1678                         malloc_printf(
1679                             "%13u %1s %4u %4u %3u %9llu %9llu"
1680                             " %9llu %7lu %7lu\n",
1681                             i,
1682                             i < ntbins ? "T" : i < ntbins + nqbins ? "Q" :
1683                             i < ntbins + nqbins + ncbins ? "C" : "S",
1684                             arena->bins[i].reg_size,
1685                             arena->bins[i].nregs,
1686                             arena->bins[i].run_size >> PAGE_SHIFT,
1687 #ifdef MALLOC_MAG
1688                             (__isthreaded && opt_mag) ?
1689                             arena->bins[i].stats.nmags :
1690 #endif
1691                             arena->bins[i].stats.nrequests,
1692                             arena->bins[i].stats.nruns,
1693                             arena->bins[i].stats.reruns,
1694                             arena->bins[i].stats.highruns,
1695                             arena->bins[i].stats.curruns);
1696                 }
1697         }
1698         if (gap_start != UINT_MAX) {
1699                 if (i > gap_start + 1) {
1700                         /* Gap of more than one size class. */
1701                         malloc_printf("[%u..%u]\n", gap_start, i - 1);
1702                 } else {
1703                         /* Gap of one size class. */
1704                         malloc_printf("[%u]\n", gap_start);
1705                 }
1706         }
1707 }
1708 #endif
1709
1710 /*
1711  * End Utility functions/macros.
1712  */
1713 /******************************************************************************/
1714 /*
1715  * Begin extent tree code.
1716  */
1717
1718 #ifdef MALLOC_DSS
1719 static inline int
1720 extent_szad_comp(extent_node_t *a, extent_node_t *b)
1721 {
1722         int ret;
1723         size_t a_size = a->size;
1724         size_t b_size = b->size;
1725
1726         ret = (a_size > b_size) - (a_size < b_size);
1727         if (ret == 0) {
1728                 uintptr_t a_addr = (uintptr_t)a->addr;
1729                 uintptr_t b_addr = (uintptr_t)b->addr;
1730
1731                 ret = (a_addr > b_addr) - (a_addr < b_addr);
1732         }
1733
1734         return (ret);
1735 }
1736
1737 /* Wrap red-black tree macros in functions. */
1738 rb_wrap(__unused static, extent_tree_szad_, extent_tree_t, extent_node_t,
1739     link_szad, extent_szad_comp)
1740 #endif
1741
1742 static inline int
1743 extent_ad_comp(extent_node_t *a, extent_node_t *b)
1744 {
1745         uintptr_t a_addr = (uintptr_t)a->addr;
1746         uintptr_t b_addr = (uintptr_t)b->addr;
1747
1748         return ((a_addr > b_addr) - (a_addr < b_addr));
1749 }
1750
1751 /* Wrap red-black tree macros in functions. */
1752 rb_wrap(__unused static, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad,
1753     extent_ad_comp)
1754
1755 /*
1756  * End extent tree code.
1757  */
1758 /******************************************************************************/
1759 /*
1760  * Begin chunk management functions.
1761  */
1762
1763 static void *
1764 pages_map(void *addr, size_t size)
1765 {
1766         void *ret;
1767
1768         /*
1769          * We don't use MAP_FIXED here, because it can cause the *replacement*
1770          * of existing mappings, and we only want to create new mappings.
1771          */
1772         ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
1773             -1, 0);
1774         assert(ret != NULL);
1775
1776         if (ret == MAP_FAILED)
1777                 ret = NULL;
1778         else if (addr != NULL && ret != addr) {
1779                 /*
1780                  * We succeeded in mapping memory, but not in the right place.
1781                  */
1782                 if (munmap(ret, size) == -1) {
1783                         char buf[STRERROR_BUF];
1784
1785                         strerror_r(errno, buf, sizeof(buf));
1786                         _malloc_message(_getprogname(),
1787                             ": (malloc) Error in munmap(): ", buf, "\n");
1788                         if (opt_abort)
1789                                 abort();
1790                 }
1791                 ret = NULL;
1792         }
1793
1794         assert(ret == NULL || (addr == NULL && ret != addr)
1795             || (addr != NULL && ret == addr));
1796         return (ret);
1797 }
1798
1799 static void
1800 pages_unmap(void *addr, size_t size)
1801 {
1802
1803         if (munmap(addr, size) == -1) {
1804                 char buf[STRERROR_BUF];
1805
1806                 strerror_r(errno, buf, sizeof(buf));
1807                 _malloc_message(_getprogname(),
1808                     ": (malloc) Error in munmap(): ", buf, "\n");
1809                 if (opt_abort)
1810                         abort();
1811         }
1812 }
1813
1814 #ifdef MALLOC_DSS
1815 static void *
1816 chunk_alloc_dss(size_t size)
1817 {
1818
1819         /*
1820          * sbrk() uses a signed increment argument, so take care not to
1821          * interpret a huge allocation request as a negative increment.
1822          */
1823         if ((intptr_t)size < 0)
1824                 return (NULL);
1825
1826         malloc_mutex_lock(&dss_mtx);
1827         if (dss_prev != (void *)-1) {
1828                 intptr_t incr;
1829
1830                 /*
1831                  * The loop is necessary to recover from races with other
1832                  * threads that are using the DSS for something other than
1833                  * malloc.
1834                  */
1835                 do {
1836                         void *ret;
1837
1838                         /* Get the current end of the DSS. */
1839                         dss_max = sbrk(0);
1840
1841                         /*
1842                          * Calculate how much padding is necessary to
1843                          * chunk-align the end of the DSS.
1844                          */
1845                         incr = (intptr_t)size
1846                             - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
1847                         if (incr == (intptr_t)size)
1848                                 ret = dss_max;
1849                         else {
1850                                 ret = (void *)((intptr_t)dss_max + incr);
1851                                 incr += size;
1852                         }
1853
1854                         dss_prev = sbrk(incr);
1855                         if (dss_prev == dss_max) {
1856                                 /* Success. */
1857                                 dss_max = (void *)((intptr_t)dss_prev + incr);
1858                                 malloc_mutex_unlock(&dss_mtx);
1859                                 return (ret);
1860                         }
1861                 } while (dss_prev != (void *)-1);
1862         }
1863         malloc_mutex_unlock(&dss_mtx);
1864
1865         return (NULL);
1866 }
1867
1868 static void *
1869 chunk_recycle_dss(size_t size, bool zero)
1870 {
1871         extent_node_t *node, key;
1872
1873         key.addr = NULL;
1874         key.size = size;
1875         malloc_mutex_lock(&dss_mtx);
1876         node = extent_tree_szad_nsearch(&dss_chunks_szad, &key);
1877         if (node != NULL) {
1878                 void *ret = node->addr;
1879
1880                 /* Remove node from the tree. */
1881                 extent_tree_szad_remove(&dss_chunks_szad, node);
1882                 if (node->size == size) {
1883                         extent_tree_ad_remove(&dss_chunks_ad, node);
1884                         base_node_dealloc(node);
1885                 } else {
1886                         /*
1887                          * Insert the remainder of node's address range as a
1888                          * smaller chunk.  Its position within dss_chunks_ad
1889                          * does not change.
1890                          */
1891                         assert(node->size > size);
1892                         node->addr = (void *)((uintptr_t)node->addr + size);
1893                         node->size -= size;
1894                         extent_tree_szad_insert(&dss_chunks_szad, node);
1895                 }
1896                 malloc_mutex_unlock(&dss_mtx);
1897
1898                 if (zero)
1899                         memset(ret, 0, size);
1900                 return (ret);
1901         }
1902         malloc_mutex_unlock(&dss_mtx);
1903
1904         return (NULL);
1905 }
1906 #endif
1907
1908 static void *
1909 chunk_alloc_mmap(size_t size)
1910 {
1911         void *ret;
1912         size_t offset;
1913
1914         /*
1915          * Ideally, there would be a way to specify alignment to mmap() (like
1916          * NetBSD has), but in the absence of such a feature, we have to work
1917          * hard to efficiently create aligned mappings.  The reliable, but
1918          * expensive method is to create a mapping that is over-sized, then
1919          * trim the excess.  However, that always results in at least one call
1920          * to pages_unmap().
1921          *
1922          * A more optimistic approach is to try mapping precisely the right
1923          * amount, then try to append another mapping if alignment is off.  In
1924          * practice, this works out well as long as the application is not
1925          * interleaving mappings via direct mmap() calls.  If we do run into a
1926          * situation where there is an interleaved mapping and we are unable to
1927          * extend an unaligned mapping, our best option is to momentarily
1928          * revert to the reliable-but-expensive method.  This will tend to
1929          * leave a gap in the memory map that is too small to cause later
1930          * problems for the optimistic method.
1931          */
1932
1933         ret = pages_map(NULL, size);
1934         if (ret == NULL)
1935                 return (NULL);
1936
1937         offset = CHUNK_ADDR2OFFSET(ret);
1938         if (offset != 0) {
1939                 /* Try to extend chunk boundary. */
1940                 if (pages_map((void *)((uintptr_t)ret + size),
1941                     chunksize - offset) == NULL) {
1942                         /*
1943                          * Extension failed.  Clean up, then revert to the
1944                          * reliable-but-expensive method.
1945                          */
1946                         pages_unmap(ret, size);
1947
1948                         /* Beware size_t wrap-around. */
1949                         if (size + chunksize <= size)
1950                                 return NULL;
1951
1952                         ret = pages_map(NULL, size + chunksize);
1953                         if (ret == NULL)
1954                                 return (NULL);
1955
1956                         /* Clean up unneeded leading/trailing space. */
1957                         offset = CHUNK_ADDR2OFFSET(ret);
1958                         if (offset != 0) {
1959                                 /* Leading space. */
1960                                 pages_unmap(ret, chunksize - offset);
1961
1962                                 ret = (void *)((uintptr_t)ret +
1963                                     (chunksize - offset));
1964
1965                                 /* Trailing space. */
1966                                 pages_unmap((void *)((uintptr_t)ret + size),
1967                                     offset);
1968                         } else {
1969                                 /* Trailing space only. */
1970                                 pages_unmap((void *)((uintptr_t)ret + size),
1971                                     chunksize);
1972                         }
1973                 } else {
1974                         /* Clean up unneeded leading space. */
1975                         pages_unmap(ret, chunksize - offset);
1976                         ret = (void *)((uintptr_t)ret + (chunksize - offset));
1977                 }
1978         }
1979
1980         return (ret);
1981 }
1982
1983 static void *
1984 chunk_alloc(size_t size, bool zero)
1985 {
1986         void *ret;
1987
1988         assert(size != 0);
1989         assert((size & chunksize_mask) == 0);
1990
1991 #ifdef MALLOC_DSS
1992         if (opt_mmap)
1993 #endif
1994         {
1995                 ret = chunk_alloc_mmap(size);
1996                 if (ret != NULL)
1997                         goto RETURN;
1998         }
1999
2000 #ifdef MALLOC_DSS
2001         if (opt_dss) {
2002                 ret = chunk_recycle_dss(size, zero);
2003                 if (ret != NULL) {
2004                         goto RETURN;
2005                 }
2006
2007                 ret = chunk_alloc_dss(size);
2008                 if (ret != NULL)
2009                         goto RETURN;
2010         }
2011 #endif
2012
2013         /* All strategies for allocation failed. */
2014         ret = NULL;
2015 RETURN:
2016 #ifdef MALLOC_STATS
2017         if (ret != NULL) {
2018                 stats_chunks.nchunks += (size / chunksize);
2019                 stats_chunks.curchunks += (size / chunksize);
2020         }
2021         if (stats_chunks.curchunks > stats_chunks.highchunks)
2022                 stats_chunks.highchunks = stats_chunks.curchunks;
2023 #endif
2024
2025         assert(CHUNK_ADDR2BASE(ret) == ret);
2026         return (ret);
2027 }
2028
2029 #ifdef MALLOC_DSS
2030 static extent_node_t *
2031 chunk_dealloc_dss_record(void *chunk, size_t size)
2032 {
2033         extent_node_t *node, *prev, key;
2034
2035         key.addr = (void *)((uintptr_t)chunk + size);
2036         node = extent_tree_ad_nsearch(&dss_chunks_ad, &key);
2037         /* Try to coalesce forward. */
2038         if (node != NULL && node->addr == key.addr) {
2039                 /*
2040                  * Coalesce chunk with the following address range.  This does
2041                  * not change the position within dss_chunks_ad, so only
2042                  * remove/insert from/into dss_chunks_szad.
2043                  */
2044                 extent_tree_szad_remove(&dss_chunks_szad, node);
2045                 node->addr = chunk;
2046                 node->size += size;
2047                 extent_tree_szad_insert(&dss_chunks_szad, node);
2048         } else {
2049                 /*
2050                  * Coalescing forward failed, so insert a new node.  Drop
2051                  * dss_mtx during node allocation, since it is possible that a
2052                  * new base chunk will be allocated.
2053                  */
2054                 malloc_mutex_unlock(&dss_mtx);
2055                 node = base_node_alloc();
2056                 malloc_mutex_lock(&dss_mtx);
2057                 if (node == NULL)
2058                         return (NULL);
2059                 node->addr = chunk;
2060                 node->size = size;
2061                 extent_tree_ad_insert(&dss_chunks_ad, node);
2062                 extent_tree_szad_insert(&dss_chunks_szad, node);
2063         }
2064
2065         /* Try to coalesce backward. */
2066         prev = extent_tree_ad_prev(&dss_chunks_ad, node);
2067         if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
2068             chunk) {
2069                 /*
2070                  * Coalesce chunk with the previous address range.  This does
2071                  * not change the position within dss_chunks_ad, so only
2072                  * remove/insert node from/into dss_chunks_szad.
2073                  */
2074                 extent_tree_szad_remove(&dss_chunks_szad, prev);
2075                 extent_tree_ad_remove(&dss_chunks_ad, prev);
2076
2077                 extent_tree_szad_remove(&dss_chunks_szad, node);
2078                 node->addr = prev->addr;
2079                 node->size += prev->size;
2080                 extent_tree_szad_insert(&dss_chunks_szad, node);
2081
2082                 base_node_dealloc(prev);
2083         }
2084
2085         return (node);
2086 }
2087
2088 static bool
2089 chunk_dealloc_dss(void *chunk, size_t size)
2090 {
2091
2092         malloc_mutex_lock(&dss_mtx);
2093         if ((uintptr_t)chunk >= (uintptr_t)dss_base
2094             && (uintptr_t)chunk < (uintptr_t)dss_max) {
2095                 extent_node_t *node;
2096
2097                 /* Try to coalesce with other unused chunks. */
2098                 node = chunk_dealloc_dss_record(chunk, size);
2099                 if (node != NULL) {
2100                         chunk = node->addr;
2101                         size = node->size;
2102                 }
2103
2104                 /* Get the current end of the DSS. */
2105                 dss_max = sbrk(0);
2106
2107                 /*
2108                  * Try to shrink the DSS if this chunk is at the end of the
2109                  * DSS.  The sbrk() call here is subject to a race condition
2110                  * with threads that use brk(2) or sbrk(2) directly, but the
2111                  * alternative would be to leak memory for the sake of poorly
2112                  * designed multi-threaded programs.
2113                  */
2114                 if ((void *)((uintptr_t)chunk + size) == dss_max
2115                     && (dss_prev = sbrk(-(intptr_t)size)) == dss_max) {
2116                         /* Success. */
2117                         dss_max = (void *)((intptr_t)dss_prev - (intptr_t)size);
2118
2119                         if (node != NULL) {
2120                                 extent_tree_szad_remove(&dss_chunks_szad, node);
2121                                 extent_tree_ad_remove(&dss_chunks_ad, node);
2122                                 base_node_dealloc(node);
2123                         }
2124                         malloc_mutex_unlock(&dss_mtx);
2125                 } else {
2126                         malloc_mutex_unlock(&dss_mtx);
2127                         madvise(chunk, size, MADV_FREE);
2128                 }
2129
2130                 return (false);
2131         }
2132         malloc_mutex_unlock(&dss_mtx);
2133
2134         return (true);
2135 }
2136 #endif
2137
2138 static void
2139 chunk_dealloc_mmap(void *chunk, size_t size)
2140 {
2141
2142         pages_unmap(chunk, size);
2143 }
2144
2145 static void
2146 chunk_dealloc(void *chunk, size_t size)
2147 {
2148
2149         assert(chunk != NULL);
2150         assert(CHUNK_ADDR2BASE(chunk) == chunk);
2151         assert(size != 0);
2152         assert((size & chunksize_mask) == 0);
2153
2154 #ifdef MALLOC_STATS
2155         stats_chunks.curchunks -= (size / chunksize);
2156 #endif
2157
2158 #ifdef MALLOC_DSS
2159         if (opt_dss) {
2160                 if (chunk_dealloc_dss(chunk, size) == false)
2161                         return;
2162         }
2163
2164         if (opt_mmap)
2165 #endif
2166                 chunk_dealloc_mmap(chunk, size);
2167 }
2168
2169 /*
2170  * End chunk management functions.
2171  */
2172 /******************************************************************************/
2173 /*
2174  * Begin arena.
2175  */
2176
2177 /*
2178  * Choose an arena based on a per-thread value (fast-path code, calls slow-path
2179  * code if necessary).
2180  */
2181 static inline arena_t *
2182 choose_arena(void)
2183 {
2184         arena_t *ret;
2185
2186         /*
2187          * We can only use TLS if this is a PIC library, since for the static
2188          * library version, libc's malloc is used by TLS allocation, which
2189          * introduces a bootstrapping issue.
2190          */
2191 #ifndef NO_TLS
2192         if (__isthreaded == false) {
2193             /* Avoid the overhead of TLS for single-threaded operation. */
2194             return (arenas[0]);
2195         }
2196
2197         ret = arenas_map;
2198         if (ret == NULL) {
2199                 ret = choose_arena_hard();
2200                 assert(ret != NULL);
2201         }
2202 #else
2203         if (__isthreaded && narenas > 1) {
2204                 unsigned long ind;
2205
2206                 /*
2207                  * Hash _pthread_self() to one of the arenas.  There is a prime
2208                  * number of arenas, so this has a reasonable chance of
2209                  * working.  Even so, the hashing can be easily thwarted by
2210                  * inconvenient _pthread_self() values.  Without specific
2211                  * knowledge of how _pthread_self() calculates values, we can't
2212                  * easily do much better than this.
2213                  */
2214                 ind = (unsigned long) _pthread_self() % narenas;
2215
2216                 /*
2217                  * Optimistially assume that arenas[ind] has been initialized.
2218                  * At worst, we find out that some other thread has already
2219                  * done so, after acquiring the lock in preparation.  Note that
2220                  * this lazy locking also has the effect of lazily forcing
2221                  * cache coherency; without the lock acquisition, there's no
2222                  * guarantee that modification of arenas[ind] by another thread
2223                  * would be seen on this CPU for an arbitrary amount of time.
2224                  *
2225                  * In general, this approach to modifying a synchronized value
2226                  * isn't a good idea, but in this case we only ever modify the
2227                  * value once, so things work out well.
2228                  */
2229                 ret = arenas[ind];
2230                 if (ret == NULL) {
2231                         /*
2232                          * Avoid races with another thread that may have already
2233                          * initialized arenas[ind].
2234                          */
2235                         malloc_spin_lock(&arenas_lock);
2236                         if (arenas[ind] == NULL)
2237                                 ret = arenas_extend((unsigned)ind);
2238                         else
2239                                 ret = arenas[ind];
2240                         malloc_spin_unlock(&arenas_lock);
2241                 }
2242         } else
2243                 ret = arenas[0];
2244 #endif
2245
2246         assert(ret != NULL);
2247         return (ret);
2248 }
2249
2250 #ifndef NO_TLS
2251 /*
2252  * Choose an arena based on a per-thread value (slow-path code only, called
2253  * only by choose_arena()).
2254  */
2255 static arena_t *
2256 choose_arena_hard(void)
2257 {
2258         arena_t *ret;
2259
2260         assert(__isthreaded);
2261
2262 #ifdef MALLOC_BALANCE
2263         /* Seed the PRNG used for arena load balancing. */
2264         SPRN(balance, (uint32_t)(uintptr_t)(_pthread_self()));
2265 #endif
2266
2267         if (narenas > 1) {
2268 #ifdef MALLOC_BALANCE
2269                 unsigned ind;
2270
2271                 ind = PRN(balance, narenas_2pow);
2272                 if ((ret = arenas[ind]) == NULL) {
2273                         malloc_spin_lock(&arenas_lock);
2274                         if ((ret = arenas[ind]) == NULL)
2275                                 ret = arenas_extend(ind);
2276                         malloc_spin_unlock(&arenas_lock);
2277                 }
2278 #else
2279                 malloc_spin_lock(&arenas_lock);
2280                 if ((ret = arenas[next_arena]) == NULL)
2281                         ret = arenas_extend(next_arena);
2282                 next_arena = (next_arena + 1) % narenas;
2283                 malloc_spin_unlock(&arenas_lock);
2284 #endif
2285         } else
2286                 ret = arenas[0];
2287
2288         arenas_map = ret;
2289
2290         return (ret);
2291 }
2292 #endif
2293
2294 static inline int
2295 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
2296 {
2297         uintptr_t a_chunk = (uintptr_t)a;
2298         uintptr_t b_chunk = (uintptr_t)b;
2299
2300         assert(a != NULL);
2301         assert(b != NULL);
2302
2303         return ((a_chunk > b_chunk) - (a_chunk < b_chunk));
2304 }
2305
2306 /* Wrap red-black tree macros in functions. */
2307 rb_wrap(__unused static, arena_chunk_tree_dirty_, arena_chunk_tree_t,
2308     arena_chunk_t, link_dirty, arena_chunk_comp)
2309
2310 static inline int
2311 arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
2312 {
2313         uintptr_t a_mapelm = (uintptr_t)a;
2314         uintptr_t b_mapelm = (uintptr_t)b;
2315
2316         assert(a != NULL);
2317         assert(b != NULL);
2318
2319         return ((a_mapelm > b_mapelm) - (a_mapelm < b_mapelm));
2320 }
2321
2322 /* Wrap red-black tree macros in functions. */
2323 rb_wrap(__unused static, arena_run_tree_, arena_run_tree_t, arena_chunk_map_t,
2324     link, arena_run_comp)
2325
2326 static inline int
2327 arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
2328 {
2329         int ret;
2330         size_t a_size = a->bits & ~PAGE_MASK;
2331         size_t b_size = b->bits & ~PAGE_MASK;
2332
2333         ret = (a_size > b_size) - (a_size < b_size);
2334         if (ret == 0) {
2335                 uintptr_t a_mapelm, b_mapelm;
2336
2337                 if ((a->bits & CHUNK_MAP_KEY) == 0)
2338                         a_mapelm = (uintptr_t)a;
2339                 else {
2340                         /*
2341                          * Treat keys as though they are lower than anything
2342                          * else.
2343                          */
2344                         a_mapelm = 0;
2345                 }
2346                 b_mapelm = (uintptr_t)b;
2347
2348                 ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm);
2349         }
2350
2351         return (ret);
2352 }
2353
2354 /* Wrap red-black tree macros in functions. */
2355 rb_wrap(__unused static, arena_avail_tree_, arena_avail_tree_t,
2356     arena_chunk_map_t, link, arena_avail_comp)
2357
2358 static inline void *
2359 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
2360 {
2361         void *ret;
2362         unsigned i, mask, bit, regind;
2363
2364         assert(run->magic == ARENA_RUN_MAGIC);
2365         assert(run->regs_minelm < bin->regs_mask_nelms);
2366
2367         /*
2368          * Move the first check outside the loop, so that run->regs_minelm can
2369          * be updated unconditionally, without the possibility of updating it
2370          * multiple times.
2371          */
2372         i = run->regs_minelm;
2373         mask = run->regs_mask[i];
2374         if (mask != 0) {
2375                 /* Usable allocation found. */
2376                 bit = ffs((int)mask) - 1;
2377
2378                 regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
2379                 assert(regind < bin->nregs);
2380                 ret = (void *)(((uintptr_t)run) + bin->reg0_offset
2381                     + (bin->reg_size * regind));
2382
2383                 /* Clear bit. */
2384                 mask ^= (1U << bit);
2385                 run->regs_mask[i] = mask;
2386
2387                 return (ret);
2388         }
2389
2390         for (i++; i < bin->regs_mask_nelms; i++) {
2391                 mask = run->regs_mask[i];
2392                 if (mask != 0) {
2393                         /* Usable allocation found. */
2394                         bit = ffs((int)mask) - 1;
2395
2396                         regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
2397                         assert(regind < bin->nregs);
2398                         ret = (void *)(((uintptr_t)run) + bin->reg0_offset
2399                             + (bin->reg_size * regind));
2400
2401                         /* Clear bit. */
2402                         mask ^= (1U << bit);
2403                         run->regs_mask[i] = mask;
2404
2405                         /*
2406                          * Make a note that nothing before this element
2407                          * contains a free region.
2408                          */
2409                         run->regs_minelm = i; /* Low payoff: + (mask == 0); */
2410
2411                         return (ret);
2412                 }
2413         }
2414         /* Not reached. */
2415         assert(0);
2416         return (NULL);
2417 }
2418
2419 static inline void
2420 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
2421 {
2422         unsigned diff, regind, elm, bit;
2423
2424         assert(run->magic == ARENA_RUN_MAGIC);
2425
2426         /*
2427          * Avoid doing division with a variable divisor if possible.  Using
2428          * actual division here can reduce allocator throughput by over 20%!
2429          */
2430         diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
2431         if ((size & (size - 1)) == 0) {
2432                 /*
2433                  * log2_table allows fast division of a power of two in the
2434                  * [1..128] range.
2435                  *
2436                  * (x / divisor) becomes (x >> log2_table[divisor - 1]).
2437                  */
2438                 static const unsigned char log2_table[] = {
2439                     0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
2440                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
2441                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2442                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
2443                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2444                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2445                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
2446                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
2447                 };
2448
2449                 if (size <= 128)
2450                         regind = (diff >> log2_table[size - 1]);
2451                 else if (size <= 32768)
2452                         regind = diff >> (8 + log2_table[(size >> 8) - 1]);
2453                 else
2454                         regind = diff / size;
2455         } else if (size < qspace_max) {
2456                 /*
2457                  * To divide by a number D that is not a power of two we
2458                  * multiply by (2^21 / D) and then right shift by 21 positions.
2459                  *
2460                  *   X / D
2461                  *
2462                  * becomes
2463                  *
2464                  *   (X * qsize_invs[(D >> QUANTUM_2POW) - 3])
2465                  *       >> SIZE_INV_SHIFT
2466                  *
2467                  * We can omit the first three elements, because we never
2468                  * divide by 0, and QUANTUM and 2*QUANTUM are both powers of
2469                  * two, which are handled above.
2470                  */
2471 #define SIZE_INV_SHIFT 21
2472 #define QSIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << QUANTUM_2POW)) + 1)
2473                 static const unsigned qsize_invs[] = {
2474                     QSIZE_INV(3),
2475                     QSIZE_INV(4), QSIZE_INV(5), QSIZE_INV(6), QSIZE_INV(7)
2476 #if (QUANTUM_2POW < 4)
2477                     ,
2478                     QSIZE_INV(8), QSIZE_INV(9), QSIZE_INV(10), QSIZE_INV(11),
2479                     QSIZE_INV(12),QSIZE_INV(13), QSIZE_INV(14), QSIZE_INV(15)
2480 #endif
2481                 };
2482                 assert(QUANTUM * (((sizeof(qsize_invs)) / sizeof(unsigned)) + 3)
2483                     >= (1U << QSPACE_MAX_2POW_DEFAULT));
2484
2485                 if (size <= (((sizeof(qsize_invs) / sizeof(unsigned)) + 2) <<
2486                     QUANTUM_2POW)) {
2487                         regind = qsize_invs[(size >> QUANTUM_2POW) - 3] * diff;
2488                         regind >>= SIZE_INV_SHIFT;
2489                 } else
2490                         regind = diff / size;
2491 #undef QSIZE_INV
2492         } else if (size < cspace_max) {
2493 #define CSIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << CACHELINE_2POW)) + 1)
2494                 static const unsigned csize_invs[] = {
2495                     CSIZE_INV(3),
2496                     CSIZE_INV(4), CSIZE_INV(5), CSIZE_INV(6), CSIZE_INV(7)
2497                 };
2498                 assert(CACHELINE * (((sizeof(csize_invs)) / sizeof(unsigned)) +
2499                     3) >= (1U << CSPACE_MAX_2POW_DEFAULT));
2500
2501                 if (size <= (((sizeof(csize_invs) / sizeof(unsigned)) + 2) <<
2502                     CACHELINE_2POW)) {
2503                         regind = csize_invs[(size >> CACHELINE_2POW) - 3] *
2504                             diff;
2505                         regind >>= SIZE_INV_SHIFT;
2506                 } else
2507                         regind = diff / size;
2508 #undef CSIZE_INV
2509         } else {
2510 #define SSIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s << SUBPAGE_2POW)) + 1)
2511                 static const unsigned ssize_invs[] = {
2512                     SSIZE_INV(3),
2513                     SSIZE_INV(4), SSIZE_INV(5), SSIZE_INV(6), SSIZE_INV(7),
2514                     SSIZE_INV(8), SSIZE_INV(9), SSIZE_INV(10), SSIZE_INV(11),
2515                     SSIZE_INV(12), SSIZE_INV(13), SSIZE_INV(14), SSIZE_INV(15)
2516 #if (PAGE_SHIFT == 13)
2517                     ,
2518                     SSIZE_INV(16), SSIZE_INV(17), SSIZE_INV(18), SSIZE_INV(19),
2519                     SSIZE_INV(20), SSIZE_INV(21), SSIZE_INV(22), SSIZE_INV(23),
2520                     SSIZE_INV(24), SSIZE_INV(25), SSIZE_INV(26), SSIZE_INV(27),
2521                     SSIZE_INV(28), SSIZE_INV(29), SSIZE_INV(29), SSIZE_INV(30)
2522 #endif
2523                 };
2524                 assert(SUBPAGE * (((sizeof(ssize_invs)) / sizeof(unsigned)) + 3)
2525                     >= PAGE_SIZE);
2526
2527                 if (size < (((sizeof(ssize_invs) / sizeof(unsigned)) + 2) <<
2528                     SUBPAGE_2POW)) {
2529                         regind = ssize_invs[(size >> SUBPAGE_2POW) - 3] * diff;
2530                         regind >>= SIZE_INV_SHIFT;
2531                 } else
2532                         regind = diff / size;
2533 #undef SSIZE_INV
2534         }
2535 #undef SIZE_INV_SHIFT
2536         assert(diff == regind * size);
2537         assert(regind < bin->nregs);
2538
2539         elm = regind >> (SIZEOF_INT_2POW + 3);
2540         if (elm < run->regs_minelm)
2541                 run->regs_minelm = elm;
2542         bit = regind - (elm << (SIZEOF_INT_2POW + 3));
2543         assert((run->regs_mask[elm] & (1U << bit)) == 0);
2544         run->regs_mask[elm] |= (1U << bit);
2545 }
2546
2547 static void
2548 arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
2549     bool zero)
2550 {
2551         arena_chunk_t *chunk;
2552         size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i;
2553
2554         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2555         old_ndirty = chunk->ndirty;
2556         run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
2557             >> PAGE_SHIFT);
2558         total_pages = (chunk->map[run_ind].bits & ~PAGE_MASK) >>
2559             PAGE_SHIFT;
2560         need_pages = (size >> PAGE_SHIFT);
2561         assert(need_pages > 0);
2562         assert(need_pages <= total_pages);
2563         rem_pages = total_pages - need_pages;
2564
2565         arena_avail_tree_remove(&arena->runs_avail, &chunk->map[run_ind]);
2566
2567         /* Keep track of trailing unused pages for later use. */
2568         if (rem_pages > 0) {
2569                 chunk->map[run_ind+need_pages].bits = (rem_pages <<
2570                     PAGE_SHIFT) | (chunk->map[run_ind+need_pages].bits &
2571                     PAGE_MASK);
2572                 chunk->map[run_ind+total_pages-1].bits = (rem_pages <<
2573                     PAGE_SHIFT) | (chunk->map[run_ind+total_pages-1].bits &
2574                     PAGE_MASK);
2575                 arena_avail_tree_insert(&arena->runs_avail,
2576                     &chunk->map[run_ind+need_pages]);
2577         }
2578
2579         for (i = 0; i < need_pages; i++) {
2580                 /* Zero if necessary. */
2581                 if (zero) {
2582                         if ((chunk->map[run_ind + i].bits & CHUNK_MAP_ZEROED)
2583                             == 0) {
2584                                 memset((void *)((uintptr_t)chunk + ((run_ind
2585                                     + i) << PAGE_SHIFT)), 0, PAGE_SIZE);
2586                                 /* CHUNK_MAP_ZEROED is cleared below. */
2587                         }
2588                 }
2589
2590                 /* Update dirty page accounting. */
2591                 if (chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY) {
2592                         chunk->ndirty--;
2593                         arena->ndirty--;
2594                         /* CHUNK_MAP_DIRTY is cleared below. */
2595                 }
2596
2597                 /* Initialize the chunk map. */
2598                 if (large) {
2599                         chunk->map[run_ind + i].bits = CHUNK_MAP_LARGE
2600                             | CHUNK_MAP_ALLOCATED;
2601                 } else {
2602                         chunk->map[run_ind + i].bits = (size_t)run
2603                             | CHUNK_MAP_ALLOCATED;
2604                 }
2605         }
2606
2607         /*
2608          * Set the run size only in the first element for large runs.  This is
2609          * primarily a debugging aid, since the lack of size info for trailing
2610          * pages only matters if the application tries to operate on an
2611          * interior pointer.
2612          */
2613         if (large)
2614                 chunk->map[run_ind].bits |= size;
2615
2616         if (chunk->ndirty == 0 && old_ndirty > 0)
2617                 arena_chunk_tree_dirty_remove(&arena->chunks_dirty, chunk);
2618 }
2619
2620 static arena_chunk_t *
2621 arena_chunk_alloc(arena_t *arena)
2622 {
2623         arena_chunk_t *chunk;
2624         size_t i;
2625
2626         if (arena->spare != NULL) {
2627                 chunk = arena->spare;
2628                 arena->spare = NULL;
2629         } else {
2630                 chunk = (arena_chunk_t *)chunk_alloc(chunksize, true);
2631                 if (chunk == NULL)
2632                         return (NULL);
2633 #ifdef MALLOC_STATS
2634                 arena->stats.mapped += chunksize;
2635 #endif
2636
2637                 chunk->arena = arena;
2638
2639                 /*
2640                  * Claim that no pages are in use, since the header is merely
2641                  * overhead.
2642                  */
2643                 chunk->ndirty = 0;
2644
2645                 /*
2646                  * Initialize the map to contain one maximal free untouched run.
2647                  */
2648                 for (i = 0; i < arena_chunk_header_npages; i++)
2649                         chunk->map[i].bits = 0;
2650                 chunk->map[i].bits = arena_maxclass | CHUNK_MAP_ZEROED;
2651                 for (i++; i < chunk_npages-1; i++) {
2652                         chunk->map[i].bits = CHUNK_MAP_ZEROED;
2653                 }
2654                 chunk->map[chunk_npages-1].bits = arena_maxclass |
2655                     CHUNK_MAP_ZEROED;
2656         }
2657
2658         /* Insert the run into the runs_avail tree. */
2659         arena_avail_tree_insert(&arena->runs_avail,
2660             &chunk->map[arena_chunk_header_npages]);
2661
2662         return (chunk);
2663 }
2664
2665 static void
2666 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
2667 {
2668
2669         if (arena->spare != NULL) {
2670                 if (arena->spare->ndirty > 0) {
2671                         arena_chunk_tree_dirty_remove(
2672                             &chunk->arena->chunks_dirty, arena->spare);
2673                         arena->ndirty -= arena->spare->ndirty;
2674                 }
2675                 chunk_dealloc((void *)arena->spare, chunksize);
2676 #ifdef MALLOC_STATS
2677                 arena->stats.mapped -= chunksize;
2678 #endif
2679         }
2680
2681         /*
2682          * Remove run from runs_avail, regardless of whether this chunk
2683          * will be cached, so that the arena does not use it.  Dirty page
2684          * flushing only uses the chunks_dirty tree, so leaving this chunk in
2685          * the chunks_* trees is sufficient for that purpose.
2686          */
2687         arena_avail_tree_remove(&arena->runs_avail,
2688             &chunk->map[arena_chunk_header_npages]);
2689
2690         arena->spare = chunk;
2691 }
2692
2693 static arena_run_t *
2694 arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
2695 {
2696         arena_chunk_t *chunk;
2697         arena_run_t *run;
2698         arena_chunk_map_t *mapelm, key;
2699
2700         assert(size <= arena_maxclass);
2701         assert((size & PAGE_MASK) == 0);
2702
2703         /* Search the arena's chunks for the lowest best fit. */
2704         key.bits = size | CHUNK_MAP_KEY;
2705         mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key);
2706         if (mapelm != NULL) {
2707                 arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
2708                 size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map)
2709                     / sizeof(arena_chunk_map_t);
2710
2711                 run = (arena_run_t *)((uintptr_t)run_chunk + (pageind
2712                     << PAGE_SHIFT));
2713                 arena_run_split(arena, run, size, large, zero);
2714                 return (run);
2715         }
2716
2717         /*
2718          * No usable runs.  Create a new chunk from which to allocate the run.
2719          */
2720         chunk = arena_chunk_alloc(arena);
2721         if (chunk == NULL)
2722                 return (NULL);
2723         run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
2724             PAGE_SHIFT));
2725         /* Update page map. */
2726         arena_run_split(arena, run, size, large, zero);
2727         return (run);
2728 }
2729
2730 static void
2731 arena_purge(arena_t *arena)
2732 {
2733         arena_chunk_t *chunk;
2734         size_t i, npages;
2735 #ifdef MALLOC_DEBUG
2736         size_t ndirty = 0;
2737
2738         rb_foreach_begin(arena_chunk_t, link_dirty, &arena->chunks_dirty,
2739             chunk) {
2740                 ndirty += chunk->ndirty;
2741         } rb_foreach_end(arena_chunk_t, link_dirty, &arena->chunks_dirty, chunk)
2742         assert(ndirty == arena->ndirty);
2743 #endif
2744         assert(arena->ndirty > opt_dirty_max);
2745
2746 #ifdef MALLOC_STATS
2747         arena->stats.npurge++;
2748 #endif
2749
2750         /*
2751          * Iterate downward through chunks until enough dirty memory has been
2752          * purged.  Terminate as soon as possible in order to minimize the
2753          * number of system calls, even if a chunk has only been partially
2754          * purged.
2755          */
2756         while (arena->ndirty > (opt_dirty_max >> 1)) {
2757                 chunk = arena_chunk_tree_dirty_last(&arena->chunks_dirty);
2758                 assert(chunk != NULL);
2759
2760                 for (i = chunk_npages - 1; chunk->ndirty > 0; i--) {
2761                         assert(i >= arena_chunk_header_npages);
2762
2763                         if (chunk->map[i].bits & CHUNK_MAP_DIRTY) {
2764                                 chunk->map[i].bits ^= CHUNK_MAP_DIRTY;
2765                                 /* Find adjacent dirty run(s). */
2766                                 for (npages = 1; i > arena_chunk_header_npages
2767                                     && (chunk->map[i - 1].bits &
2768                                     CHUNK_MAP_DIRTY); npages++) {
2769                                         i--;
2770                                         chunk->map[i].bits ^= CHUNK_MAP_DIRTY;
2771                                 }
2772                                 chunk->ndirty -= npages;
2773                                 arena->ndirty -= npages;
2774
2775                                 madvise((void *)((uintptr_t)chunk + (i <<
2776                                     PAGE_SHIFT)), (npages << PAGE_SHIFT),
2777                                     MADV_FREE);
2778 #ifdef MALLOC_STATS
2779                                 arena->stats.nmadvise++;
2780                                 arena->stats.purged += npages;
2781 #endif
2782                                 if (arena->ndirty <= (opt_dirty_max >> 1))
2783                                         break;
2784                         }
2785                 }
2786
2787                 if (chunk->ndirty == 0) {
2788                         arena_chunk_tree_dirty_remove(&arena->chunks_dirty,
2789                             chunk);
2790                 }
2791         }
2792 }
2793
2794 static void
2795 arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
2796 {
2797         arena_chunk_t *chunk;
2798         size_t size, run_ind, run_pages;
2799
2800         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2801         run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk)
2802             >> PAGE_SHIFT);
2803         assert(run_ind >= arena_chunk_header_npages);
2804         assert(run_ind < chunk_npages);
2805         if ((chunk->map[run_ind].bits & CHUNK_MAP_LARGE) != 0)
2806                 size = chunk->map[run_ind].bits & ~PAGE_MASK;
2807         else
2808                 size = run->bin->run_size;
2809         run_pages = (size >> PAGE_SHIFT);
2810
2811         /* Mark pages as unallocated in the chunk map. */
2812         if (dirty) {
2813                 size_t i;
2814
2815                 for (i = 0; i < run_pages; i++) {
2816                         assert((chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY)
2817                             == 0);
2818                         chunk->map[run_ind + i].bits = CHUNK_MAP_DIRTY;
2819                 }
2820
2821                 if (chunk->ndirty == 0) {
2822                         arena_chunk_tree_dirty_insert(&arena->chunks_dirty,
2823                             chunk);
2824                 }
2825                 chunk->ndirty += run_pages;
2826                 arena->ndirty += run_pages;
2827         } else {
2828                 size_t i;
2829
2830                 for (i = 0; i < run_pages; i++) {
2831                         chunk->map[run_ind + i].bits &= ~(CHUNK_MAP_LARGE |
2832                             CHUNK_MAP_ALLOCATED);
2833                 }
2834         }
2835         chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2836             PAGE_MASK);
2837         chunk->map[run_ind+run_pages-1].bits = size |
2838             (chunk->map[run_ind+run_pages-1].bits & PAGE_MASK);
2839
2840         /* Try to coalesce forward. */
2841         if (run_ind + run_pages < chunk_npages &&
2842             (chunk->map[run_ind+run_pages].bits & CHUNK_MAP_ALLOCATED) == 0) {
2843                 size_t nrun_size = chunk->map[run_ind+run_pages].bits &
2844                     ~PAGE_MASK;
2845
2846                 /*
2847                  * Remove successor from runs_avail; the coalesced run is
2848                  * inserted later.
2849                  */
2850                 arena_avail_tree_remove(&arena->runs_avail,
2851                     &chunk->map[run_ind+run_pages]);
2852
2853                 size += nrun_size;
2854                 run_pages = size >> PAGE_SHIFT;
2855
2856                 assert((chunk->map[run_ind+run_pages-1].bits & ~PAGE_MASK)
2857                     == nrun_size);
2858                 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2859                     PAGE_MASK);
2860                 chunk->map[run_ind+run_pages-1].bits = size |
2861                     (chunk->map[run_ind+run_pages-1].bits & PAGE_MASK);
2862         }
2863
2864         /* Try to coalesce backward. */
2865         if (run_ind > arena_chunk_header_npages && (chunk->map[run_ind-1].bits &
2866             CHUNK_MAP_ALLOCATED) == 0) {
2867                 size_t prun_size = chunk->map[run_ind-1].bits & ~PAGE_MASK;
2868
2869                 run_ind -= prun_size >> PAGE_SHIFT;
2870
2871                 /*
2872                  * Remove predecessor from runs_avail; the coalesced run is
2873                  * inserted later.
2874                  */
2875                 arena_avail_tree_remove(&arena->runs_avail,
2876                     &chunk->map[run_ind]);
2877
2878                 size += prun_size;
2879                 run_pages = size >> PAGE_SHIFT;
2880
2881                 assert((chunk->map[run_ind].bits & ~PAGE_MASK) ==
2882                     prun_size);
2883                 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2884                     PAGE_MASK);
2885                 chunk->map[run_ind+run_pages-1].bits = size |
2886                     (chunk->map[run_ind+run_pages-1].bits & PAGE_MASK);
2887         }
2888
2889         /* Insert into runs_avail, now that coalescing is complete. */
2890         arena_avail_tree_insert(&arena->runs_avail, &chunk->map[run_ind]);
2891
2892         /* Deallocate chunk if it is now completely unused. */
2893         if ((chunk->map[arena_chunk_header_npages].bits & (~PAGE_MASK |
2894             CHUNK_MAP_ALLOCATED)) == arena_maxclass)
2895                 arena_chunk_dealloc(arena, chunk);
2896
2897         /* Enforce opt_dirty_max. */
2898         if (arena->ndirty > opt_dirty_max)
2899                 arena_purge(arena);
2900 }
2901
2902 static void
2903 arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
2904     size_t oldsize, size_t newsize)
2905 {
2906         size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
2907         size_t head_npages = (oldsize - newsize) >> PAGE_SHIFT;
2908
2909         assert(oldsize > newsize);
2910
2911         /*
2912          * Update the chunk map so that arena_run_dalloc() can treat the
2913          * leading run as separately allocated.
2914          */
2915         chunk->map[pageind].bits = (oldsize - newsize) | CHUNK_MAP_LARGE |
2916             CHUNK_MAP_ALLOCATED;
2917         chunk->map[pageind+head_npages].bits = newsize | CHUNK_MAP_LARGE |
2918             CHUNK_MAP_ALLOCATED;
2919
2920         arena_run_dalloc(arena, run, false);
2921 }
2922
2923 static void
2924 arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
2925     size_t oldsize, size_t newsize, bool dirty)
2926 {
2927         size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
2928         size_t npages = newsize >> PAGE_SHIFT;
2929
2930         assert(oldsize > newsize);
2931
2932         /*
2933          * Update the chunk map so that arena_run_dalloc() can treat the
2934          * trailing run as separately allocated.
2935          */
2936         chunk->map[pageind].bits = newsize | CHUNK_MAP_LARGE |
2937             CHUNK_MAP_ALLOCATED;
2938         chunk->map[pageind+npages].bits = (oldsize - newsize) | CHUNK_MAP_LARGE
2939             | CHUNK_MAP_ALLOCATED;
2940
2941         arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize),
2942             dirty);
2943 }
2944
2945 static arena_run_t *
2946 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2947 {
2948         arena_chunk_map_t *mapelm;
2949         arena_run_t *run;
2950         unsigned i, remainder;
2951
2952         /* Look for a usable run. */
2953         mapelm = arena_run_tree_first(&bin->runs);
2954         if (mapelm != NULL) {
2955                 /* run is guaranteed to have available space. */
2956                 arena_run_tree_remove(&bin->runs, mapelm);
2957                 run = (arena_run_t *)(mapelm->bits & ~PAGE_MASK);
2958 #ifdef MALLOC_STATS
2959                 bin->stats.reruns++;
2960 #endif
2961                 return (run);
2962         }
2963         /* No existing runs have any space available. */
2964
2965         /* Allocate a new run. */
2966         run = arena_run_alloc(arena, bin->run_size, false, false);
2967         if (run == NULL)
2968                 return (NULL);
2969
2970         /* Initialize run internals. */
2971         run->bin = bin;
2972
2973         for (i = 0; i < bin->regs_mask_nelms - 1; i++)
2974                 run->regs_mask[i] = UINT_MAX;
2975         remainder = bin->nregs & ((1U << (SIZEOF_INT_2POW + 3)) - 1);
2976         if (remainder == 0)
2977                 run->regs_mask[i] = UINT_MAX;
2978         else {
2979                 /* The last element has spare bits that need to be unset. */
2980                 run->regs_mask[i] = (UINT_MAX >> ((1U << (SIZEOF_INT_2POW + 3))
2981                     - remainder));
2982         }
2983
2984         run->regs_minelm = 0;
2985
2986         run->nfree = bin->nregs;
2987 #ifdef MALLOC_DEBUG
2988         run->magic = ARENA_RUN_MAGIC;
2989 #endif
2990
2991 #ifdef MALLOC_STATS
2992         bin->stats.nruns++;
2993         bin->stats.curruns++;
2994         if (bin->stats.curruns > bin->stats.highruns)
2995                 bin->stats.highruns = bin->stats.curruns;
2996 #endif
2997         return (run);
2998 }
2999
3000 /* bin->runcur must have space available before this function is called. */
3001 static inline void *
3002 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
3003 {
3004         void *ret;
3005
3006         assert(run->magic == ARENA_RUN_MAGIC);
3007         assert(run->nfree > 0);
3008
3009         ret = arena_run_reg_alloc(run, bin);
3010         assert(ret != NULL);
3011         run->nfree--;
3012
3013         return (ret);
3014 }
3015
3016 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
3017 static void *
3018 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
3019 {
3020
3021         bin->runcur = arena_bin_nonfull_run_get(arena, bin);
3022         if (bin->runcur == NULL)
3023                 return (NULL);
3024         assert(bin->runcur->magic == ARENA_RUN_MAGIC);
3025         assert(bin->runcur->nfree > 0);
3026
3027         return (arena_bin_malloc_easy(arena, bin, bin->runcur));
3028 }
3029
3030 /*
3031  * Calculate bin->run_size such that it meets the following constraints:
3032  *
3033  *   *) bin->run_size >= min_run_size
3034  *   *) bin->run_size <= arena_maxclass
3035  *   *) bin->run_size <= RUN_MAX_SMALL
3036  *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
3037  *
3038  * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
3039  * also calculated here, since these settings are all interdependent.
3040  */
3041 static size_t
3042 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
3043 {
3044         size_t try_run_size, good_run_size;
3045         unsigned good_nregs, good_mask_nelms, good_reg0_offset;
3046         unsigned try_nregs, try_mask_nelms, try_reg0_offset;
3047
3048         assert(min_run_size >= PAGE_SIZE);
3049         assert(min_run_size <= arena_maxclass);
3050         assert(min_run_size <= RUN_MAX_SMALL);
3051
3052         /*
3053          * Calculate known-valid settings before entering the run_size
3054          * expansion loop, so that the first part of the loop always copies
3055          * valid settings.
3056          *
3057          * The do..while loop iteratively reduces the number of regions until
3058          * the run header and the regions no longer overlap.  A closed formula
3059          * would be quite messy, since there is an interdependency between the
3060          * header's mask length and the number of regions.
3061          */
3062         try_run_size = min_run_size;
3063         try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
3064             + 1; /* Counter-act try_nregs-- in loop. */
3065         do {
3066                 try_nregs--;
3067                 try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
3068                     ((try_nregs & ((1U << (SIZEOF_INT_2POW + 3)) - 1)) ? 1 : 0);
3069                 try_reg0_offset = try_run_size - (try_nregs * bin->reg_size);
3070         } while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
3071             > try_reg0_offset);
3072
3073         /* run_size expansion loop. */
3074         do {
3075                 /*
3076                  * Copy valid settings before trying more aggressive settings.
3077                  */
3078                 good_run_size = try_run_size;
3079                 good_nregs = try_nregs;
3080                 good_mask_nelms = try_mask_nelms;
3081                 good_reg0_offset = try_reg0_offset;
3082
3083                 /* Try more aggressive settings. */
3084                 try_run_size += PAGE_SIZE;
3085                 try_nregs = ((try_run_size - sizeof(arena_run_t)) /
3086                     bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
3087                 do {
3088                         try_nregs--;
3089                         try_mask_nelms = (try_nregs >> (SIZEOF_INT_2POW + 3)) +
3090                             ((try_nregs & ((1U << (SIZEOF_INT_2POW + 3)) - 1)) ?
3091                             1 : 0);
3092                         try_reg0_offset = try_run_size - (try_nregs *
3093                             bin->reg_size);
3094                 } while (sizeof(arena_run_t) + (sizeof(unsigned) *
3095                     (try_mask_nelms - 1)) > try_reg0_offset);
3096         } while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
3097             && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
3098             && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size);
3099
3100         assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
3101             <= good_reg0_offset);
3102         assert((good_mask_nelms << (SIZEOF_INT_2POW + 3)) >= good_nregs);
3103
3104         /* Copy final settings. */
3105         bin->run_size = good_run_size;
3106         bin->nregs = good_nregs;
3107         bin->regs_mask_nelms = good_mask_nelms;
3108         bin->reg0_offset = good_reg0_offset;
3109
3110         return (good_run_size);
3111 }
3112
3113 #ifdef MALLOC_BALANCE
3114 static inline void
3115 arena_lock_balance(arena_t *arena)
3116 {
3117         unsigned contention;
3118
3119         contention = malloc_spin_lock(&arena->lock);
3120         if (narenas > 1) {
3121                 /*
3122                  * Calculate the exponentially averaged contention for this
3123                  * arena.  Due to integer math always rounding down, this value
3124                  * decays somewhat faster than normal.
3125                  */
3126                 arena->contention = (((uint64_t)arena->contention
3127                     * (uint64_t)((1U << BALANCE_ALPHA_INV_2POW)-1))
3128                     + (uint64_t)contention) >> BALANCE_ALPHA_INV_2POW;
3129                 if (arena->contention >= opt_balance_threshold)
3130                         arena_lock_balance_hard(arena);
3131         }
3132 }
3133
3134 static void
3135 arena_lock_balance_hard(arena_t *arena)
3136 {
3137         uint32_t ind;
3138
3139         arena->contention = 0;
3140 #ifdef MALLOC_STATS
3141         arena->stats.nbalance++;
3142 #endif
3143         ind = PRN(balance, narenas_2pow);
3144         if (arenas[ind] != NULL)
3145                 arenas_map = arenas[ind];
3146         else {
3147                 malloc_spin_lock(&arenas_lock);
3148                 if (arenas[ind] != NULL)
3149                         arenas_map = arenas[ind];
3150                 else
3151                         arenas_map = arenas_extend(ind);
3152                 malloc_spin_unlock(&arenas_lock);
3153         }
3154 }
3155 #endif
3156
3157 #ifdef MALLOC_MAG
3158 static inline void *
3159 mag_alloc(mag_t *mag)
3160 {
3161
3162         if (mag->nrounds == 0)
3163                 return (NULL);
3164         mag->nrounds--;
3165
3166         return (mag->rounds[mag->nrounds]);
3167 }
3168
3169 static void
3170 mag_load(mag_t *mag)
3171 {
3172         arena_t *arena;
3173         arena_bin_t *bin;
3174         arena_run_t *run;
3175         void *round;
3176         size_t i;
3177
3178         arena = choose_arena();
3179         bin = &arena->bins[mag->binind];
3180 #ifdef MALLOC_BALANCE
3181         arena_lock_balance(arena);
3182 #else
3183         malloc_spin_lock(&arena->lock);
3184 #endif
3185         for (i = mag->nrounds; i < max_rounds; i++) {
3186                 if ((run = bin->runcur) != NULL && run->nfree > 0)
3187                         round = arena_bin_malloc_easy(arena, bin, run);
3188                 else
3189                         round = arena_bin_malloc_hard(arena, bin);
3190                 if (round == NULL)
3191                         break;
3192                 mag->rounds[i] = round;
3193         }
3194 #ifdef MALLOC_STATS
3195         bin->stats.nmags++;
3196         arena->stats.nmalloc_small += (i - mag->nrounds);
3197         arena->stats.allocated_small += (i - mag->nrounds) * bin->reg_size;
3198 #endif
3199         malloc_spin_unlock(&arena->lock);
3200         mag->nrounds = i;
3201 }
3202
3203 static inline void *
3204 mag_rack_alloc(mag_rack_t *rack, size_t size, bool zero)
3205 {
3206         void *ret;
3207         bin_mags_t *bin_mags;
3208         mag_t *mag;
3209         size_t binind;
3210
3211         binind = size2bin[size];
3212         assert(binind < nbins);
3213         bin_mags = &rack->bin_mags[binind];
3214
3215         mag = bin_mags->curmag;
3216         if (mag == NULL) {
3217                 /* Create an initial magazine for this size class. */
3218                 assert(bin_mags->sparemag == NULL);
3219                 mag = mag_create(choose_arena(), binind);
3220                 if (mag == NULL)
3221                         return (NULL);
3222                 bin_mags->curmag = mag;
3223                 mag_load(mag);
3224         }
3225
3226         ret = mag_alloc(mag);
3227         if (ret == NULL) {
3228                 if (bin_mags->sparemag != NULL) {
3229                         if (bin_mags->sparemag->nrounds > 0) {
3230                                 /* Swap magazines. */
3231                                 bin_mags->curmag = bin_mags->sparemag;
3232                                 bin_mags->sparemag = mag;
3233                                 mag = bin_mags->curmag;
3234                         } else {
3235                                 /* Reload the current magazine. */
3236                                 mag_load(mag);
3237                         }
3238                 } else {
3239                         /* Create a second magazine. */
3240                         mag = mag_create(choose_arena(), binind);
3241                         if (mag == NULL)
3242                                 return (NULL);
3243                         mag_load(mag);
3244                         bin_mags->sparemag = bin_mags->curmag;
3245                         bin_mags->curmag = mag;
3246                 }
3247                 ret = mag_alloc(mag);
3248                 if (ret == NULL)
3249                         return (NULL);
3250         }
3251
3252         if (zero == false) {
3253                 if (opt_junk)
3254                         memset(ret, 0xa5, size);
3255                 else if (opt_zero)
3256                         memset(ret, 0, size);
3257         } else
3258                 memset(ret, 0, size);
3259
3260         return (ret);
3261 }
3262 #endif
3263
3264 static inline void *
3265 arena_malloc_small(arena_t *arena, size_t size, bool zero)
3266 {
3267         void *ret;
3268         arena_bin_t *bin;
3269         arena_run_t *run;
3270         size_t binind;
3271
3272         binind = size2bin[size];
3273         assert(binind < nbins);
3274         bin = &arena->bins[binind];
3275         size = bin->reg_size;
3276
3277 #ifdef MALLOC_BALANCE
3278         arena_lock_balance(arena);
3279 #else
3280         malloc_spin_lock(&arena->lock);
3281 #endif
3282         if ((run = bin->runcur) != NULL && run->nfree > 0)
3283                 ret = arena_bin_malloc_easy(arena, bin, run);
3284         else
3285                 ret = arena_bin_malloc_hard(arena, bin);
3286
3287         if (ret == NULL) {
3288                 malloc_spin_unlock(&arena->lock);
3289                 return (NULL);
3290         }
3291
3292 #ifdef MALLOC_STATS
3293         bin->stats.nrequests++;
3294         arena->stats.nmalloc_small++;
3295         arena->stats.allocated_small += size;
3296 #endif
3297         malloc_spin_unlock(&arena->lock);
3298
3299         if (zero == false) {
3300                 if (opt_junk)
3301                         memset(ret, 0xa5, size);
3302                 else if (opt_zero)
3303                         memset(ret, 0, size);
3304         } else
3305                 memset(ret, 0, size);
3306
3307         return (ret);
3308 }
3309
3310 static void *
3311 arena_malloc_large(arena_t *arena, size_t size, bool zero)
3312 {
3313         void *ret;
3314
3315         /* Large allocation. */
3316         size = PAGE_CEILING(size);
3317 #ifdef MALLOC_BALANCE
3318         arena_lock_balance(arena);
3319 #else
3320         malloc_spin_lock(&arena->lock);
3321 #endif
3322         ret = (void *)arena_run_alloc(arena, size, true, zero);
3323         if (ret == NULL) {
3324                 malloc_spin_unlock(&arena->lock);
3325                 return (NULL);
3326         }
3327 #ifdef MALLOC_STATS
3328         arena->stats.nmalloc_large++;
3329         arena->stats.allocated_large += size;
3330 #endif
3331         malloc_spin_unlock(&arena->lock);
3332
3333         if (zero == false) {
3334                 if (opt_junk)
3335                         memset(ret, 0xa5, size);
3336                 else if (opt_zero)
3337                         memset(ret, 0, size);
3338         }
3339
3340         return (ret);
3341 }
3342
3343 static inline void *
3344 arena_malloc(arena_t *arena, size_t size, bool zero)
3345 {
3346
3347         assert(arena != NULL);
3348         assert(arena->magic == ARENA_MAGIC);
3349         assert(size != 0);
3350         assert(QUANTUM_CEILING(size) <= arena_maxclass);
3351
3352         if (size <= bin_maxclass) {
3353 #ifdef MALLOC_MAG
3354                 if (__isthreaded && opt_mag) {
3355                         mag_rack_t *rack = mag_rack;
3356                         if (rack == NULL) {
3357                                 rack = mag_rack_create(arena);
3358                                 if (rack == NULL)
3359                                         return (NULL);
3360                                 mag_rack = rack;
3361                         }
3362                         return (mag_rack_alloc(rack, size, zero));
3363                 } else
3364 #endif
3365                         return (arena_malloc_small(arena, size, zero));
3366         } else
3367                 return (arena_malloc_large(arena, size, zero));
3368 }
3369
3370 static inline void *
3371 imalloc(size_t size)
3372 {
3373
3374         assert(size != 0);
3375
3376         if (size <= arena_maxclass)
3377                 return (arena_malloc(choose_arena(), size, false));
3378         else
3379                 return (huge_malloc(size, false));
3380 }
3381
3382 static inline void *
3383 icalloc(size_t size)
3384 {
3385
3386         if (size <= arena_maxclass)
3387                 return (arena_malloc(choose_arena(), size, true));
3388         else
3389                 return (huge_malloc(size, true));
3390 }
3391
3392 /* Only handles large allocations that require more than page alignment. */
3393 static void *
3394 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
3395 {
3396         void *ret;
3397         size_t offset;
3398         arena_chunk_t *chunk;
3399
3400         assert((size & PAGE_MASK) == 0);
3401         assert((alignment & PAGE_MASK) == 0);
3402
3403 #ifdef MALLOC_BALANCE
3404         arena_lock_balance(arena);
3405 #else
3406         malloc_spin_lock(&arena->lock);
3407 #endif
3408         ret = (void *)arena_run_alloc(arena, alloc_size, true, false);
3409         if (ret == NULL) {
3410                 malloc_spin_unlock(&arena->lock);
3411                 return (NULL);
3412         }
3413
3414         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
3415
3416         offset = (uintptr_t)ret & (alignment - 1);
3417         assert((offset & PAGE_MASK) == 0);
3418         assert(offset < alloc_size);
3419         if (offset == 0)
3420                 arena_run_trim_tail(arena, chunk, ret, alloc_size, size, false);
3421         else {
3422                 size_t leadsize, trailsize;
3423
3424                 leadsize = alignment - offset;
3425                 if (leadsize > 0) {
3426                         arena_run_trim_head(arena, chunk, ret, alloc_size,
3427                             alloc_size - leadsize);
3428                         ret = (void *)((uintptr_t)ret + leadsize);
3429                 }
3430
3431                 trailsize = alloc_size - leadsize - size;
3432                 if (trailsize != 0) {
3433                         /* Trim trailing space. */
3434                         assert(trailsize < alloc_size);
3435                         arena_run_trim_tail(arena, chunk, ret, size + trailsize,
3436                             size, false);
3437                 }
3438         }
3439
3440 #ifdef MALLOC_STATS
3441         arena->stats.nmalloc_large++;
3442         arena->stats.allocated_large += size;
3443 #endif
3444         malloc_spin_unlock(&arena->lock);
3445
3446         if (opt_junk)
3447                 memset(ret, 0xa5, size);
3448         else if (opt_zero)
3449                 memset(ret, 0, size);
3450         return (ret);
3451 }
3452
3453 static inline void *
3454 ipalloc(size_t alignment, size_t size)
3455 {
3456         void *ret;
3457         size_t ceil_size;
3458
3459         /*
3460          * Round size up to the nearest multiple of alignment.
3461          *
3462          * This done, we can take advantage of the fact that for each small
3463          * size class, every object is aligned at the smallest power of two
3464          * that is non-zero in the base two representation of the size.  For
3465          * example:
3466          *
3467          *   Size |   Base 2 | Minimum alignment
3468          *   -----+----------+------------------
3469          *     96 |  1100000 |  32
3470          *    144 | 10100000 |  32
3471          *    192 | 11000000 |  64
3472          *
3473          * Depending on runtime settings, it is possible that arena_malloc()
3474          * will further round up to a power of two, but that never causes
3475          * correctness issues.
3476          */
3477         ceil_size = (size + (alignment - 1)) & (-alignment);
3478         /*
3479          * (ceil_size < size) protects against the combination of maximal
3480          * alignment and size greater than maximal alignment.
3481          */
3482         if (ceil_size < size) {
3483                 /* size_t overflow. */
3484                 return (NULL);
3485         }
3486
3487         if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE
3488             && ceil_size <= arena_maxclass))
3489                 ret = arena_malloc(choose_arena(), ceil_size, false);
3490         else {
3491                 size_t run_size;
3492
3493                 /*
3494                  * We can't achieve subpage alignment, so round up alignment
3495                  * permanently; it makes later calculations simpler.
3496                  */
3497                 alignment = PAGE_CEILING(alignment);
3498                 ceil_size = PAGE_CEILING(size);
3499                 /*
3500                  * (ceil_size < size) protects against very large sizes within
3501                  * PAGE_SIZE of SIZE_T_MAX.
3502                  *
3503                  * (ceil_size + alignment < ceil_size) protects against the
3504                  * combination of maximal alignment and ceil_size large enough
3505                  * to cause overflow.  This is similar to the first overflow
3506                  * check above, but it needs to be repeated due to the new
3507                  * ceil_size value, which may now be *equal* to maximal
3508                  * alignment, whereas before we only detected overflow if the
3509                  * original size was *greater* than maximal alignment.
3510                  */
3511                 if (ceil_size < size || ceil_size + alignment < ceil_size) {
3512                         /* size_t overflow. */
3513                         return (NULL);
3514                 }
3515
3516                 /*
3517                  * Calculate the size of the over-size run that arena_palloc()
3518                  * would need to allocate in order to guarantee the alignment.
3519                  */
3520                 if (ceil_size >= alignment)
3521                         run_size = ceil_size + alignment - PAGE_SIZE;
3522                 else {
3523                         /*
3524                          * It is possible that (alignment << 1) will cause
3525                          * overflow, but it doesn't matter because we also
3526                          * subtract PAGE_SIZE, which in the case of overflow
3527                          * leaves us with a very large run_size.  That causes
3528                          * the first conditional below to fail, which means
3529                          * that the bogus run_size value never gets used for
3530                          * anything important.
3531                          */
3532                         run_size = (alignment << 1) - PAGE_SIZE;
3533                 }
3534
3535                 if (run_size <= arena_maxclass) {
3536                         ret = arena_palloc(choose_arena(), alignment, ceil_size,
3537                             run_size);
3538                 } else if (alignment <= chunksize)
3539                         ret = huge_malloc(ceil_size, false);
3540                 else
3541                         ret = huge_palloc(alignment, ceil_size);
3542         }
3543
3544         assert(((uintptr_t)ret & (alignment - 1)) == 0);
3545         return (ret);
3546 }
3547
3548 /* Return the size of the allocation pointed to by ptr. */
3549 static size_t
3550 arena_salloc(const void *ptr)
3551 {
3552         size_t ret;
3553         arena_chunk_t *chunk;
3554         size_t pageind, mapbits;
3555
3556         assert(ptr != NULL);
3557         assert(CHUNK_ADDR2BASE(ptr) != ptr);
3558
3559         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3560         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3561         mapbits = chunk->map[pageind].bits;
3562         assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
3563         if ((mapbits & CHUNK_MAP_LARGE) == 0) {
3564                 arena_run_t *run = (arena_run_t *)(mapbits & ~PAGE_MASK);
3565                 assert(run->magic == ARENA_RUN_MAGIC);
3566                 ret = run->bin->reg_size;
3567         } else {
3568                 ret = mapbits & ~PAGE_MASK;
3569                 assert(ret != 0);
3570         }
3571
3572         return (ret);
3573 }
3574
3575 static inline size_t
3576 isalloc(const void *ptr)
3577 {
3578         size_t ret;
3579         arena_chunk_t *chunk;
3580
3581         assert(ptr != NULL);
3582
3583         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3584         if (chunk != ptr) {
3585                 /* Region. */
3586                 assert(chunk->arena->magic == ARENA_MAGIC);
3587
3588                 ret = arena_salloc(ptr);
3589         } else {
3590                 extent_node_t *node, key;
3591
3592                 /* Chunk (huge allocation). */
3593
3594                 malloc_mutex_lock(&huge_mtx);
3595
3596                 /* Extract from tree of huge allocations. */
3597                 key.addr = __DECONST(void *, ptr);
3598                 node = extent_tree_ad_search(&huge, &key);
3599                 assert(node != NULL);
3600
3601                 ret = node->size;
3602
3603                 malloc_mutex_unlock(&huge_mtx);
3604         }
3605
3606         return (ret);
3607 }
3608
3609 static inline void
3610 arena_dalloc_small(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3611     arena_chunk_map_t *mapelm)
3612 {
3613         arena_run_t *run;
3614         arena_bin_t *bin;
3615         size_t size;
3616
3617         run = (arena_run_t *)(mapelm->bits & ~PAGE_MASK);
3618         assert(run->magic == ARENA_RUN_MAGIC);
3619         bin = run->bin;
3620         size = bin->reg_size;
3621
3622         if (opt_junk)
3623                 memset(ptr, 0x5a, size);
3624
3625         arena_run_reg_dalloc(run, bin, ptr, size);
3626         run->nfree++;
3627
3628         if (run->nfree == bin->nregs) {
3629                 /* Deallocate run. */
3630                 if (run == bin->runcur)
3631                         bin->runcur = NULL;
3632                 else if (bin->nregs != 1) {
3633                         size_t run_pageind = (((uintptr_t)run -
3634                             (uintptr_t)chunk)) >> PAGE_SHIFT;
3635                         arena_chunk_map_t *run_mapelm =
3636                             &chunk->map[run_pageind];
3637                         /*
3638                          * This block's conditional is necessary because if the
3639                          * run only contains one region, then it never gets
3640                          * inserted into the non-full runs tree.
3641                          */
3642                         arena_run_tree_remove(&bin->runs, run_mapelm);
3643                 }
3644 #ifdef MALLOC_DEBUG
3645                 run->magic = 0;
3646 #endif
3647                 arena_run_dalloc(arena, run, true);
3648 #ifdef MALLOC_STATS
3649                 bin->stats.curruns--;
3650 #endif
3651         } else if (run->nfree == 1 && run != bin->runcur) {
3652                 /*
3653                  * Make sure that bin->runcur always refers to the lowest
3654                  * non-full run, if one exists.
3655                  */
3656                 if (bin->runcur == NULL)
3657                         bin->runcur = run;
3658                 else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
3659                         /* Switch runcur. */
3660                         if (bin->runcur->nfree > 0) {
3661                                 arena_chunk_t *runcur_chunk =
3662                                     CHUNK_ADDR2BASE(bin->runcur);
3663                                 size_t runcur_pageind =
3664                                     (((uintptr_t)bin->runcur -
3665                                     (uintptr_t)runcur_chunk)) >> PAGE_SHIFT;
3666                                 arena_chunk_map_t *runcur_mapelm =
3667                                     &runcur_chunk->map[runcur_pageind];
3668
3669                                 /* Insert runcur. */
3670                                 arena_run_tree_insert(&bin->runs,
3671                                     runcur_mapelm);
3672                         }
3673                         bin->runcur = run;
3674                 } else {
3675                         size_t run_pageind = (((uintptr_t)run -
3676                             (uintptr_t)chunk)) >> PAGE_SHIFT;
3677                         arena_chunk_map_t *run_mapelm =
3678                             &chunk->map[run_pageind];
3679
3680                         assert(arena_run_tree_search(&bin->runs, run_mapelm) ==
3681                             NULL);
3682                         arena_run_tree_insert(&bin->runs, run_mapelm);
3683                 }
3684         }
3685 #ifdef MALLOC_STATS
3686         arena->stats.allocated_small -= size;
3687         arena->stats.ndalloc_small++;
3688 #endif
3689 }
3690
3691 #ifdef MALLOC_MAG
3692 static void
3693 mag_unload(mag_t *mag)
3694 {
3695         arena_chunk_t *chunk;
3696         arena_t *arena;
3697         void *round;
3698         size_t i, ndeferred, nrounds;
3699
3700         for (ndeferred = mag->nrounds; ndeferred > 0;) {
3701                 nrounds = ndeferred;
3702                 /* Lock the arena associated with the first round. */
3703                 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mag->rounds[0]);
3704                 arena = chunk->arena;
3705 #ifdef MALLOC_BALANCE
3706                 arena_lock_balance(arena);
3707 #else
3708                 malloc_spin_lock(&arena->lock);
3709 #endif
3710                 /* Deallocate every round that belongs to the locked arena. */
3711                 for (i = ndeferred = 0; i < nrounds; i++) {
3712                         round = mag->rounds[i];
3713                         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(round);
3714                         if (chunk->arena == arena) {
3715                                 size_t pageind = (((uintptr_t)round -
3716                                     (uintptr_t)chunk) >> PAGE_SHIFT);
3717                                 arena_chunk_map_t *mapelm =
3718                                     &chunk->map[pageind];
3719                                 arena_dalloc_small(arena, chunk, round, mapelm);
3720                         } else {
3721                                 /*
3722                                  * This round was allocated via a different
3723                                  * arena than the one that is currently locked.
3724                                  * Stash the round, so that it can be handled
3725                                  * in a future pass.
3726                                  */
3727                                 mag->rounds[ndeferred] = round;
3728                                 ndeferred++;
3729                         }
3730                 }
3731                 malloc_spin_unlock(&arena->lock);
3732         }
3733
3734         mag->nrounds = 0;
3735 }
3736
3737 static inline void
3738 mag_rack_dalloc(mag_rack_t *rack, void *ptr)
3739 {
3740         arena_t *arena;
3741         arena_chunk_t *chunk;
3742         arena_run_t *run;
3743         arena_bin_t *bin;
3744         bin_mags_t *bin_mags;
3745         mag_t *mag;
3746         size_t pageind, binind;
3747         arena_chunk_map_t *mapelm;
3748
3749         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3750         arena = chunk->arena;
3751         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3752         mapelm = &chunk->map[pageind];
3753         run = (arena_run_t *)(mapelm->bits & ~PAGE_MASK);
3754         assert(run->magic == ARENA_RUN_MAGIC);
3755         bin = run->bin;
3756         binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) /
3757             sizeof(arena_bin_t);
3758         assert(binind < nbins);
3759
3760         if (opt_junk)
3761                 memset(ptr, 0x5a, arena->bins[binind].reg_size);
3762
3763         bin_mags = &rack->bin_mags[binind];
3764         mag = bin_mags->curmag;
3765         if (mag == NULL) {
3766                 /* Create an initial magazine for this size class. */
3767                 assert(bin_mags->sparemag == NULL);
3768                 mag = mag_create(choose_arena(), binind);
3769                 if (mag == NULL) {
3770                         malloc_spin_lock(&arena->lock);
3771                         arena_dalloc_small(arena, chunk, ptr, mapelm);
3772                         malloc_spin_unlock(&arena->lock);
3773                         return;
3774                 }
3775                 bin_mags->curmag = mag;
3776         }
3777
3778         if (mag->nrounds == max_rounds) {
3779                 if (bin_mags->sparemag != NULL) {
3780                         if (bin_mags->sparemag->nrounds < max_rounds) {
3781                                 /* Swap magazines. */
3782                                 bin_mags->curmag = bin_mags->sparemag;
3783                                 bin_mags->sparemag = mag;
3784                                 mag = bin_mags->curmag;
3785                         } else {
3786                                 /* Unload the current magazine. */
3787                                 mag_unload(mag);
3788                         }
3789                 } else {
3790                         /* Create a second magazine. */
3791                         mag = mag_create(choose_arena(), binind);
3792                         if (mag == NULL) {
3793                                 mag = rack->bin_mags[binind].curmag;
3794                                 mag_unload(mag);
3795                         } else {
3796                                 bin_mags->sparemag = bin_mags->curmag;
3797                                 bin_mags->curmag = mag;
3798                         }
3799                 }
3800                 assert(mag->nrounds < max_rounds);
3801         }
3802         mag->rounds[mag->nrounds] = ptr;
3803         mag->nrounds++;
3804 }
3805 #endif
3806
3807 static void
3808 arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
3809 {
3810         /* Large allocation. */
3811         malloc_spin_lock(&arena->lock);
3812
3813 #ifndef MALLOC_STATS
3814         if (opt_junk)
3815 #endif
3816         {
3817                 size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
3818                     PAGE_SHIFT;
3819                 size_t size = chunk->map[pageind].bits & ~PAGE_MASK;
3820
3821 #ifdef MALLOC_STATS
3822                 if (opt_junk)
3823 #endif
3824                         memset(ptr, 0x5a, size);
3825 #ifdef MALLOC_STATS
3826                 arena->stats.allocated_large -= size;
3827 #endif
3828         }
3829 #ifdef MALLOC_STATS
3830         arena->stats.ndalloc_large++;
3831 #endif
3832
3833         arena_run_dalloc(arena, (arena_run_t *)ptr, true);
3834         malloc_spin_unlock(&arena->lock);
3835 }
3836
3837 static inline void
3838 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
3839 {
3840         size_t pageind;
3841         arena_chunk_map_t *mapelm;
3842
3843         assert(arena != NULL);
3844         assert(arena->magic == ARENA_MAGIC);
3845         assert(chunk->arena == arena);
3846         assert(ptr != NULL);
3847         assert(CHUNK_ADDR2BASE(ptr) != ptr);
3848
3849         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3850         mapelm = &chunk->map[pageind];
3851         assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0);
3852         if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
3853                 /* Small allocation. */
3854 #ifdef MALLOC_MAG
3855                 if (__isthreaded && opt_mag) {
3856                         mag_rack_t *rack = mag_rack;
3857                         if (rack == NULL) {
3858                                 rack = mag_rack_create(arena);
3859                                 if (rack == NULL) {
3860                                         malloc_spin_lock(&arena->lock);
3861                                         arena_dalloc_small(arena, chunk, ptr,
3862                                             mapelm);
3863                                         malloc_spin_unlock(&arena->lock);
3864                                 }
3865                                 mag_rack = rack;
3866                         }
3867                         mag_rack_dalloc(rack, ptr);
3868                 } else {
3869 #endif
3870                         malloc_spin_lock(&arena->lock);
3871                         arena_dalloc_small(arena, chunk, ptr, mapelm);
3872                         malloc_spin_unlock(&arena->lock);
3873 #ifdef MALLOC_MAG
3874                 }
3875 #endif
3876         } else
3877                 arena_dalloc_large(arena, chunk, ptr);
3878 }
3879
3880 static inline void
3881 idalloc(void *ptr)
3882 {
3883         arena_chunk_t *chunk;
3884
3885         assert(ptr != NULL);
3886
3887         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3888         if (chunk != ptr)
3889                 arena_dalloc(chunk->arena, chunk, ptr);
3890         else
3891                 huge_dalloc(ptr);
3892 }
3893
3894 static void
3895 arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3896     size_t size, size_t oldsize)
3897 {
3898
3899         assert(size < oldsize);
3900
3901         /*
3902          * Shrink the run, and make trailing pages available for other
3903          * allocations.
3904          */
3905 #ifdef MALLOC_BALANCE
3906         arena_lock_balance(arena);
3907 #else
3908         malloc_spin_lock(&arena->lock);
3909 #endif
3910         arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size,
3911             true);
3912 #ifdef MALLOC_STATS
3913         arena->stats.allocated_large -= oldsize - size;
3914 #endif
3915         malloc_spin_unlock(&arena->lock);
3916 }
3917
3918 static bool
3919 arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3920     size_t size, size_t oldsize)
3921 {
3922         size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
3923         size_t npages = oldsize >> PAGE_SHIFT;
3924
3925         assert(oldsize == (chunk->map[pageind].bits & ~PAGE_MASK));
3926
3927         /* Try to extend the run. */
3928         assert(size > oldsize);
3929 #ifdef MALLOC_BALANCE
3930         arena_lock_balance(arena);
3931 #else
3932         malloc_spin_lock(&arena->lock);
3933 #endif
3934         if (pageind + npages < chunk_npages && (chunk->map[pageind+npages].bits
3935             & CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[pageind+npages].bits &
3936             ~PAGE_MASK) >= size - oldsize) {
3937                 /*
3938                  * The next run is available and sufficiently large.  Split the
3939                  * following run, then merge the first part with the existing
3940                  * allocation.
3941                  */
3942                 arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk +
3943                     ((pageind+npages) << PAGE_SHIFT)), size - oldsize, true,
3944                     false);
3945
3946                 chunk->map[pageind].bits = size | CHUNK_MAP_LARGE |
3947                     CHUNK_MAP_ALLOCATED;
3948                 chunk->map[pageind+npages].bits = CHUNK_MAP_LARGE |
3949                     CHUNK_MAP_ALLOCATED;
3950
3951 #ifdef MALLOC_STATS
3952                 arena->stats.allocated_large += size - oldsize;
3953 #endif
3954                 malloc_spin_unlock(&arena->lock);
3955                 return (false);
3956         }
3957         malloc_spin_unlock(&arena->lock);
3958
3959         return (true);
3960 }
3961
3962 /*
3963  * Try to resize a large allocation, in order to avoid copying.  This will
3964  * always fail if growing an object, and the following run is already in use.
3965  */
3966 static bool
3967 arena_ralloc_large(void *ptr, size_t size, size_t oldsize)
3968 {
3969         size_t psize;
3970
3971         psize = PAGE_CEILING(size);
3972         if (psize == oldsize) {
3973                 /* Same size class. */
3974                 if (opt_junk && size < oldsize) {
3975                         memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize -
3976                             size);
3977                 }
3978                 return (false);
3979         } else {
3980                 arena_chunk_t *chunk;
3981                 arena_t *arena;
3982
3983                 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3984                 arena = chunk->arena;
3985                 assert(arena->magic == ARENA_MAGIC);
3986
3987                 if (psize < oldsize) {
3988                         /* Fill before shrinking in order avoid a race. */
3989                         if (opt_junk) {
3990                                 memset((void *)((uintptr_t)ptr + size), 0x5a,
3991                                     oldsize - size);
3992                         }
3993                         arena_ralloc_large_shrink(arena, chunk, ptr, psize,
3994                             oldsize);
3995                         return (false);
3996                 } else {
3997                         bool ret = arena_ralloc_large_grow(arena, chunk, ptr,
3998                             psize, oldsize);
3999                         if (ret == false && opt_zero) {
4000                                 memset((void *)((uintptr_t)ptr + oldsize), 0,
4001                                     size - oldsize);
4002                         }
4003                         return (ret);
4004                 }
4005         }
4006 }
4007
4008 static void *
4009 arena_ralloc(void *ptr, size_t size, size_t oldsize)
4010 {
4011         void *ret;
4012         size_t copysize;
4013
4014         /* Try to avoid moving the allocation. */
4015         if (size <= bin_maxclass) {
4016                 if (oldsize <= bin_maxclass && size2bin[size] ==
4017                     size2bin[oldsize])
4018                         goto IN_PLACE;
4019         } else {
4020                 if (oldsize > bin_maxclass && oldsize <= arena_maxclass) {
4021                         assert(size > bin_maxclass);
4022                         if (arena_ralloc_large(ptr, size, oldsize) == false)
4023                                 return (ptr);
4024                 }
4025         }
4026
4027         /*
4028          * If we get here, then size and oldsize are different enough that we
4029          * need to move the object.  In that case, fall back to allocating new
4030          * space and copying.
4031          */
4032         ret = arena_malloc(choose_arena(), size, false);
4033         if (ret == NULL)
4034                 return (NULL);
4035
4036         /* Junk/zero-filling were already done by arena_malloc(). */
4037         copysize = (size < oldsize) ? size : oldsize;
4038         memcpy(ret, ptr, copysize);
4039         idalloc(ptr);
4040         return (ret);
4041 IN_PLACE:
4042         if (opt_junk && size < oldsize)
4043                 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
4044         else if (opt_zero && size > oldsize)
4045                 memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
4046         return (ptr);
4047 }
4048
4049 static inline void *
4050 iralloc(void *ptr, size_t size)
4051 {
4052         size_t oldsize;
4053
4054         assert(ptr != NULL);
4055         assert(size != 0);
4056
4057         oldsize = isalloc(ptr);
4058
4059         if (size <= arena_maxclass)
4060                 return (arena_ralloc(ptr, size, oldsize));
4061         else
4062                 return (huge_ralloc(ptr, size, oldsize));
4063 }
4064
4065 static bool
4066 arena_new(arena_t *arena)
4067 {
4068         unsigned i;
4069         arena_bin_t *bin;
4070         size_t prev_run_size;
4071
4072         if (malloc_spin_init(&arena->lock))
4073                 return (true);
4074
4075 #ifdef MALLOC_STATS
4076         memset(&arena->stats, 0, sizeof(arena_stats_t));
4077 #endif
4078
4079         /* Initialize chunks. */
4080         arena_chunk_tree_dirty_new(&arena->chunks_dirty);
4081         arena->spare = NULL;
4082
4083         arena->ndirty = 0;
4084
4085         arena_avail_tree_new(&arena->runs_avail);
4086
4087 #ifdef MALLOC_BALANCE
4088         arena->contention = 0;
4089 #endif
4090
4091         /* Initialize bins. */
4092         prev_run_size = PAGE_SIZE;
4093
4094         i = 0;
4095 #ifdef MALLOC_TINY
4096         /* (2^n)-spaced tiny bins. */
4097         for (; i < ntbins; i++) {
4098                 bin = &arena->bins[i];
4099                 bin->runcur = NULL;
4100                 arena_run_tree_new(&bin->runs);
4101
4102                 bin->reg_size = (1U << (TINY_MIN_2POW + i));
4103
4104                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4105
4106 #ifdef MALLOC_STATS
4107                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4108 #endif
4109         }
4110 #endif
4111
4112         /* Quantum-spaced bins. */
4113         for (; i < ntbins + nqbins; i++) {
4114                 bin = &arena->bins[i];
4115                 bin->runcur = NULL;
4116                 arena_run_tree_new(&bin->runs);
4117
4118                 bin->reg_size = (i - ntbins + 1) << QUANTUM_2POW;
4119
4120                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4121
4122 #ifdef MALLOC_STATS
4123                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4124 #endif
4125         }
4126
4127         /* Cacheline-spaced bins. */
4128         for (; i < ntbins + nqbins + ncbins; i++) {
4129                 bin = &arena->bins[i];
4130                 bin->runcur = NULL;
4131                 arena_run_tree_new(&bin->runs);
4132
4133                 bin->reg_size = cspace_min + ((i - (ntbins + nqbins)) <<
4134                     CACHELINE_2POW);
4135
4136                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4137
4138 #ifdef MALLOC_STATS
4139                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4140 #endif
4141         }
4142
4143         /* Subpage-spaced bins. */
4144         for (; i < nbins; i++) {
4145                 bin = &arena->bins[i];
4146                 bin->runcur = NULL;
4147                 arena_run_tree_new(&bin->runs);
4148
4149                 bin->reg_size = sspace_min + ((i - (ntbins + nqbins + ncbins))
4150                     << SUBPAGE_2POW);
4151
4152                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4153
4154 #ifdef MALLOC_STATS
4155                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4156 #endif
4157         }
4158
4159 #ifdef MALLOC_DEBUG
4160         arena->magic = ARENA_MAGIC;
4161 #endif
4162
4163         return (false);
4164 }
4165
4166 /* Create a new arena and insert it into the arenas array at index ind. */
4167 static arena_t *
4168 arenas_extend(unsigned ind)
4169 {
4170         arena_t *ret;
4171
4172         /* Allocate enough space for trailing bins. */
4173         ret = (arena_t *)base_alloc(sizeof(arena_t)
4174             + (sizeof(arena_bin_t) * (nbins - 1)));
4175         if (ret != NULL && arena_new(ret) == false) {
4176                 arenas[ind] = ret;
4177                 return (ret);
4178         }
4179         /* Only reached if there is an OOM error. */
4180
4181         /*
4182          * OOM here is quite inconvenient to propagate, since dealing with it
4183          * would require a check for failure in the fast path.  Instead, punt
4184          * by using arenas[0].  In practice, this is an extremely unlikely
4185          * failure.
4186          */
4187         _malloc_message(_getprogname(),
4188             ": (malloc) Error initializing arena\n", "", "");
4189         if (opt_abort)
4190                 abort();
4191
4192         return (arenas[0]);
4193 }
4194
4195 #ifdef MALLOC_MAG
4196 static mag_t *
4197 mag_create(arena_t *arena, size_t binind)
4198 {
4199         mag_t *ret;
4200
4201         if (sizeof(mag_t) + (sizeof(void *) * (max_rounds - 1)) <=
4202             bin_maxclass) {
4203                 ret = arena_malloc_small(arena, sizeof(mag_t) + (sizeof(void *)
4204                     * (max_rounds - 1)), false);
4205         } else {
4206                 ret = imalloc(sizeof(mag_t) + (sizeof(void *) * (max_rounds -
4207                     1)));
4208         }
4209         if (ret == NULL)
4210                 return (NULL);
4211         ret->binind = binind;
4212         ret->nrounds = 0;
4213
4214         return (ret);
4215 }
4216
4217 static void
4218 mag_destroy(mag_t *mag)
4219 {
4220         arena_t *arena;
4221         arena_chunk_t *chunk;
4222         size_t pageind;
4223         arena_chunk_map_t *mapelm;
4224
4225         chunk = CHUNK_ADDR2BASE(mag);
4226         arena = chunk->arena;
4227         pageind = (((uintptr_t)mag - (uintptr_t)chunk) >> PAGE_SHIFT);
4228         mapelm = &chunk->map[pageind];
4229
4230         assert(mag->nrounds == 0);
4231         if (sizeof(mag_t) + (sizeof(void *) * (max_rounds - 1)) <=
4232             bin_maxclass) {
4233                 malloc_spin_lock(&arena->lock);
4234                 arena_dalloc_small(arena, chunk, mag, mapelm);
4235                 malloc_spin_unlock(&arena->lock);
4236         } else
4237                 idalloc(mag);
4238 }
4239
4240 static mag_rack_t *
4241 mag_rack_create(arena_t *arena)
4242 {
4243
4244         assert(sizeof(mag_rack_t) + (sizeof(bin_mags_t *) * (nbins - 1)) <=
4245             bin_maxclass);
4246         return (arena_malloc_small(arena, sizeof(mag_rack_t) +
4247             (sizeof(bin_mags_t) * (nbins - 1)), true));
4248 }
4249
4250 static void
4251 mag_rack_destroy(mag_rack_t *rack)
4252 {
4253         arena_t *arena;
4254         arena_chunk_t *chunk;
4255         bin_mags_t *bin_mags;
4256         size_t i, pageind;
4257         arena_chunk_map_t *mapelm;
4258
4259         for (i = 0; i < nbins; i++) {
4260                 bin_mags = &rack->bin_mags[i];
4261                 if (bin_mags->curmag != NULL) {
4262                         assert(bin_mags->curmag->binind == i);
4263                         mag_unload(bin_mags->curmag);
4264                         mag_destroy(bin_mags->curmag);
4265                 }
4266                 if (bin_mags->sparemag != NULL) {
4267                         assert(bin_mags->sparemag->binind == i);
4268                         mag_unload(bin_mags->sparemag);
4269                         mag_destroy(bin_mags->sparemag);
4270                 }
4271         }
4272
4273         chunk = CHUNK_ADDR2BASE(rack);
4274         arena = chunk->arena;
4275         pageind = (((uintptr_t)rack - (uintptr_t)chunk) >> PAGE_SHIFT);
4276         mapelm = &chunk->map[pageind];
4277
4278         malloc_spin_lock(&arena->lock);
4279         arena_dalloc_small(arena, chunk, rack, mapelm);
4280         malloc_spin_unlock(&arena->lock);
4281 }
4282 #endif
4283
4284 /*
4285  * End arena.
4286  */
4287 /******************************************************************************/
4288 /*
4289  * Begin general internal functions.
4290  */
4291
4292 static void *
4293 huge_malloc(size_t size, bool zero)
4294 {
4295         void *ret;
4296         size_t csize;
4297         extent_node_t *node;
4298
4299         /* Allocate one or more contiguous chunks for this request. */
4300
4301         csize = CHUNK_CEILING(size);
4302         if (csize == 0) {
4303                 /* size is large enough to cause size_t wrap-around. */
4304                 return (NULL);
4305         }
4306
4307         /* Allocate an extent node with which to track the chunk. */
4308         node = base_node_alloc();
4309         if (node == NULL)
4310                 return (NULL);
4311
4312         ret = chunk_alloc(csize, zero);
4313         if (ret == NULL) {
4314                 base_node_dealloc(node);
4315                 return (NULL);
4316         }
4317
4318         /* Insert node into huge. */
4319         node->addr = ret;
4320         node->size = csize;
4321
4322         malloc_mutex_lock(&huge_mtx);
4323         extent_tree_ad_insert(&huge, node);
4324 #ifdef MALLOC_STATS
4325         huge_nmalloc++;
4326         huge_allocated += csize;
4327 #endif
4328         malloc_mutex_unlock(&huge_mtx);
4329
4330         if (zero == false) {
4331                 if (opt_junk)
4332                         memset(ret, 0xa5, csize);
4333                 else if (opt_zero)
4334                         memset(ret, 0, csize);
4335         }
4336
4337         return (ret);
4338 }
4339
4340 /* Only handles large allocations that require more than chunk alignment. */
4341 static void *
4342 huge_palloc(size_t alignment, size_t size)
4343 {
4344         void *ret;
4345         size_t alloc_size, chunk_size, offset;
4346         extent_node_t *node;
4347
4348         /*
4349          * This allocation requires alignment that is even larger than chunk
4350          * alignment.  This means that huge_malloc() isn't good enough.
4351          *
4352          * Allocate almost twice as many chunks as are demanded by the size or
4353          * alignment, in order to assure the alignment can be achieved, then
4354          * unmap leading and trailing chunks.
4355          */
4356         assert(alignment >= chunksize);
4357
4358         chunk_size = CHUNK_CEILING(size);
4359
4360         if (size >= alignment)
4361                 alloc_size = chunk_size + alignment - chunksize;
4362         else
4363                 alloc_size = (alignment << 1) - chunksize;
4364
4365         /* Allocate an extent node with which to track the chunk. */
4366         node = base_node_alloc();
4367         if (node == NULL)
4368                 return (NULL);
4369
4370         ret = chunk_alloc(alloc_size, false);
4371         if (ret == NULL) {
4372                 base_node_dealloc(node);
4373                 return (NULL);
4374         }
4375
4376         offset = (uintptr_t)ret & (alignment - 1);
4377         assert((offset & chunksize_mask) == 0);
4378         assert(offset < alloc_size);
4379         if (offset == 0) {
4380                 /* Trim trailing space. */
4381                 chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
4382                     - chunk_size);
4383         } else {
4384                 size_t trailsize;
4385
4386                 /* Trim leading space. */
4387                 chunk_dealloc(ret, alignment - offset);
4388
4389                 ret = (void *)((uintptr_t)ret + (alignment - offset));
4390
4391                 trailsize = alloc_size - (alignment - offset) - chunk_size;
4392                 if (trailsize != 0) {
4393                     /* Trim trailing space. */
4394                     assert(trailsize < alloc_size);
4395                     chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
4396                         trailsize);
4397                 }
4398         }
4399
4400         /* Insert node into huge. */
4401         node->addr = ret;
4402         node->size = chunk_size;
4403
4404         malloc_mutex_lock(&huge_mtx);
4405         extent_tree_ad_insert(&huge, node);
4406 #ifdef MALLOC_STATS
4407         huge_nmalloc++;
4408         huge_allocated += chunk_size;
4409 #endif
4410         malloc_mutex_unlock(&huge_mtx);
4411
4412         if (opt_junk)
4413                 memset(ret, 0xa5, chunk_size);
4414         else if (opt_zero)
4415                 memset(ret, 0, chunk_size);
4416
4417         return (ret);
4418 }
4419
4420 static void *
4421 huge_ralloc(void *ptr, size_t size, size_t oldsize)
4422 {
4423         void *ret;
4424         size_t copysize;
4425
4426         /* Avoid moving the allocation if the size class would not change. */
4427         if (oldsize > arena_maxclass &&
4428             CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
4429                 if (opt_junk && size < oldsize) {
4430                         memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
4431                             - size);
4432                 } else if (opt_zero && size > oldsize) {
4433                         memset((void *)((uintptr_t)ptr + oldsize), 0, size
4434                             - oldsize);
4435                 }
4436                 return (ptr);
4437         }
4438
4439         /*
4440          * If we get here, then size and oldsize are different enough that we
4441          * need to use a different size class.  In that case, fall back to
4442          * allocating new space and copying.
4443          */
4444         ret = huge_malloc(size, false);
4445         if (ret == NULL)
4446                 return (NULL);
4447
4448         copysize = (size < oldsize) ? size : oldsize;
4449         memcpy(ret, ptr, copysize);
4450         idalloc(ptr);
4451         return (ret);
4452 }
4453
4454 static void
4455 huge_dalloc(void *ptr)
4456 {
4457         extent_node_t *node, key;
4458
4459         malloc_mutex_lock(&huge_mtx);
4460
4461         /* Extract from tree of huge allocations. */
4462         key.addr = ptr;
4463         node = extent_tree_ad_search(&huge, &key);
4464         assert(node != NULL);
4465         assert(node->addr == ptr);
4466         extent_tree_ad_remove(&huge, node);
4467
4468 #ifdef MALLOC_STATS
4469         huge_ndalloc++;
4470         huge_allocated -= node->size;
4471 #endif
4472
4473         malloc_mutex_unlock(&huge_mtx);
4474
4475         /* Unmap chunk. */
4476 #ifdef MALLOC_DSS
4477         if (opt_dss && opt_junk)
4478                 memset(node->addr, 0x5a, node->size);
4479 #endif
4480         chunk_dealloc(node->addr, node->size);
4481
4482         base_node_dealloc(node);
4483 }
4484
4485 static void
4486 malloc_print_stats(void)
4487 {
4488
4489         if (opt_print_stats) {
4490                 char s[UMAX2S_BUFSIZE];
4491                 _malloc_message("___ Begin malloc statistics ___\n", "", "",
4492                     "");
4493                 _malloc_message("Assertions ",
4494 #ifdef NDEBUG
4495                     "disabled",
4496 #else
4497                     "enabled",
4498 #endif
4499                     "\n", "");
4500                 _malloc_message("Boolean MALLOC_OPTIONS: ",
4501                     opt_abort ? "A" : "a", "", "");
4502 #ifdef MALLOC_DSS
4503                 _malloc_message(opt_dss ? "D" : "d", "", "", "");
4504 #endif
4505 #ifdef MALLOC_MAG
4506                 _malloc_message(opt_mag ? "G" : "g", "", "", "");
4507 #endif
4508                 _malloc_message(opt_junk ? "J" : "j", "", "", "");
4509 #ifdef MALLOC_DSS
4510                 _malloc_message(opt_mmap ? "M" : "m", "", "", "");
4511 #endif
4512                 _malloc_message(opt_utrace ? "PU" : "Pu",
4513                     opt_sysv ? "V" : "v",
4514                     opt_xmalloc ? "X" : "x",
4515                     opt_zero ? "Z\n" : "z\n");
4516
4517                 _malloc_message("CPUs: ", umax2s(ncpus, s), "\n", "");
4518                 _malloc_message("Max arenas: ", umax2s(narenas, s), "\n", "");
4519 #ifdef MALLOC_BALANCE
4520                 _malloc_message("Arena balance threshold: ",
4521                     umax2s(opt_balance_threshold, s), "\n", "");
4522 #endif
4523                 _malloc_message("Pointer size: ", umax2s(sizeof(void *), s),
4524                     "\n", "");
4525                 _malloc_message("Quantum size: ", umax2s(QUANTUM, s), "\n", "");
4526                 _malloc_message("Cacheline size (assumed): ", umax2s(CACHELINE,
4527                     s), "\n", "");
4528 #ifdef MALLOC_TINY
4529                 _malloc_message("Tiny 2^n-spaced sizes: [", umax2s((1U <<
4530                     TINY_MIN_2POW), s), "..", "");
4531                 _malloc_message(umax2s((qspace_min >> 1), s), "]\n", "", "");
4532 #endif
4533                 _malloc_message("Quantum-spaced sizes: [", umax2s(qspace_min,
4534                     s), "..", "");
4535                 _malloc_message(umax2s(qspace_max, s), "]\n", "", "");
4536                 _malloc_message("Cacheline-spaced sizes: [", umax2s(cspace_min,
4537                     s), "..", "");
4538                 _malloc_message(umax2s(cspace_max, s), "]\n", "", "");
4539                 _malloc_message("Subpage-spaced sizes: [", umax2s(sspace_min,
4540                     s), "..", "");
4541                 _malloc_message(umax2s(sspace_max, s), "]\n", "", "");
4542 #ifdef MALLOC_MAG
4543                 _malloc_message("Rounds per magazine: ", umax2s(max_rounds, s),
4544                     "\n", "");
4545 #endif
4546                 _malloc_message("Max dirty pages per arena: ",
4547                     umax2s(opt_dirty_max, s), "\n", "");
4548
4549                 _malloc_message("Chunk size: ", umax2s(chunksize, s), "", "");
4550                 _malloc_message(" (2^", umax2s(opt_chunk_2pow, s), ")\n", "");
4551
4552 #ifdef MALLOC_STATS
4553                 {
4554                         size_t allocated, mapped;
4555 #ifdef MALLOC_BALANCE
4556                         uint64_t nbalance = 0;
4557 #endif
4558                         unsigned i;
4559                         arena_t *arena;
4560
4561                         /* Calculate and print allocated/mapped stats. */
4562
4563                         /* arenas. */
4564                         for (i = 0, allocated = 0; i < narenas; i++) {
4565                                 if (arenas[i] != NULL) {
4566                                         malloc_spin_lock(&arenas[i]->lock);
4567                                         allocated +=
4568                                             arenas[i]->stats.allocated_small;
4569                                         allocated +=
4570                                             arenas[i]->stats.allocated_large;
4571 #ifdef MALLOC_BALANCE
4572                                         nbalance += arenas[i]->stats.nbalance;
4573 #endif
4574                                         malloc_spin_unlock(&arenas[i]->lock);
4575                                 }
4576                         }
4577
4578                         /* huge/base. */
4579                         malloc_mutex_lock(&huge_mtx);
4580                         allocated += huge_allocated;
4581                         mapped = stats_chunks.curchunks * chunksize;
4582                         malloc_mutex_unlock(&huge_mtx);
4583
4584                         malloc_mutex_lock(&base_mtx);
4585                         mapped += base_mapped;
4586                         malloc_mutex_unlock(&base_mtx);
4587
4588                         malloc_printf("Allocated: %zu, mapped: %zu\n",
4589                             allocated, mapped);
4590
4591 #ifdef MALLOC_BALANCE
4592                         malloc_printf("Arena balance reassignments: %llu\n",
4593                             nbalance);
4594 #endif
4595
4596                         /* Print chunk stats. */
4597                         {
4598                                 chunk_stats_t chunks_stats;
4599
4600                                 malloc_mutex_lock(&huge_mtx);
4601                                 chunks_stats = stats_chunks;
4602                                 malloc_mutex_unlock(&huge_mtx);
4603
4604                                 malloc_printf("chunks: nchunks   "
4605                                     "highchunks    curchunks\n");
4606                                 malloc_printf("  %13llu%13lu%13lu\n",
4607                                     chunks_stats.nchunks,
4608                                     chunks_stats.highchunks,
4609                                     chunks_stats.curchunks);
4610                         }
4611
4612                         /* Print chunk stats. */
4613                         malloc_printf(
4614                             "huge: nmalloc      ndalloc    allocated\n");
4615                         malloc_printf(" %12llu %12llu %12zu\n",
4616                             huge_nmalloc, huge_ndalloc, huge_allocated);
4617
4618                         /* Print stats for each arena. */
4619                         for (i = 0; i < narenas; i++) {
4620                                 arena = arenas[i];
4621                                 if (arena != NULL) {
4622                                         malloc_printf(
4623                                             "\narenas[%u]:\n", i);
4624                                         malloc_spin_lock(&arena->lock);
4625                                         stats_print(arena);
4626                                         malloc_spin_unlock(&arena->lock);
4627                                 }
4628                         }
4629                 }
4630 #endif /* #ifdef MALLOC_STATS */
4631                 _malloc_message("--- End malloc statistics ---\n", "", "", "");
4632         }
4633 }
4634
4635 #ifdef MALLOC_DEBUG
4636 static void
4637 size2bin_validate(void)
4638 {
4639         size_t i, size, binind;
4640
4641         assert(size2bin[0] == 0xffU);
4642         i = 1;
4643 #  ifdef MALLOC_TINY
4644         /* Tiny. */
4645         for (; i < (1U << TINY_MIN_2POW); i++) {
4646                 size = pow2_ceil(1U << TINY_MIN_2POW);
4647                 binind = ffs((int)(size >> (TINY_MIN_2POW + 1)));
4648                 assert(size2bin[i] == binind);
4649         }
4650         for (; i < qspace_min; i++) {
4651                 size = pow2_ceil(i);
4652                 binind = ffs((int)(size >> (TINY_MIN_2POW + 1)));
4653                 assert(size2bin[i] == binind);
4654         }
4655 #  endif
4656         /* Quantum-spaced. */
4657         for (; i <= qspace_max; i++) {
4658                 size = QUANTUM_CEILING(i);
4659                 binind = ntbins + (size >> QUANTUM_2POW) - 1;
4660                 assert(size2bin[i] == binind);
4661         }
4662         /* Cacheline-spaced. */
4663         for (; i <= cspace_max; i++) {
4664                 size = CACHELINE_CEILING(i);
4665                 binind = ntbins + nqbins + ((size - cspace_min) >>
4666                     CACHELINE_2POW);
4667                 assert(size2bin[i] == binind);
4668         }
4669         /* Sub-page. */
4670         for (; i <= sspace_max; i++) {
4671                 size = SUBPAGE_CEILING(i);
4672                 binind = ntbins + nqbins + ncbins + ((size - sspace_min)
4673                     >> SUBPAGE_2POW);
4674                 assert(size2bin[i] == binind);
4675         }
4676 }
4677 #endif
4678
4679 static bool
4680 size2bin_init(void)
4681 {
4682
4683         if (opt_qspace_max_2pow != QSPACE_MAX_2POW_DEFAULT
4684             || opt_cspace_max_2pow != CSPACE_MAX_2POW_DEFAULT)
4685                 return (size2bin_init_hard());
4686
4687         size2bin = const_size2bin;
4688 #ifdef MALLOC_DEBUG
4689         assert(sizeof(const_size2bin) == bin_maxclass + 1);
4690         size2bin_validate();
4691 #endif
4692         return (false);
4693 }
4694
4695 static bool
4696 size2bin_init_hard(void)
4697 {
4698         size_t i, size, binind;
4699         uint8_t *custom_size2bin;
4700
4701         assert(opt_qspace_max_2pow != QSPACE_MAX_2POW_DEFAULT
4702             || opt_cspace_max_2pow != CSPACE_MAX_2POW_DEFAULT);
4703
4704         custom_size2bin = (uint8_t *)base_alloc(bin_maxclass + 1);
4705         if (custom_size2bin == NULL)
4706                 return (true);
4707
4708         custom_size2bin[0] = 0xffU;
4709         i = 1;
4710 #ifdef MALLOC_TINY
4711         /* Tiny. */
4712         for (; i < (1U << TINY_MIN_2POW); i++) {
4713                 size = pow2_ceil(1U << TINY_MIN_2POW);
4714                 binind = ffs((int)(size >> (TINY_MIN_2POW + 1)));
4715                 custom_size2bin[i] = binind;
4716         }
4717         for (; i < qspace_min; i++) {
4718                 size = pow2_ceil(i);
4719                 binind = ffs((int)(size >> (TINY_MIN_2POW + 1)));
4720                 custom_size2bin[i] = binind;
4721         }
4722 #endif
4723         /* Quantum-spaced. */
4724         for (; i <= qspace_max; i++) {
4725                 size = QUANTUM_CEILING(i);
4726                 binind = ntbins + (size >> QUANTUM_2POW) - 1;
4727                 custom_size2bin[i] = binind;
4728         }
4729         /* Cacheline-spaced. */
4730         for (; i <= cspace_max; i++) {
4731                 size = CACHELINE_CEILING(i);
4732                 binind = ntbins + nqbins + ((size - cspace_min) >>
4733                     CACHELINE_2POW);
4734                 custom_size2bin[i] = binind;
4735         }
4736         /* Sub-page. */
4737         for (; i <= sspace_max; i++) {
4738                 size = SUBPAGE_CEILING(i);
4739                 binind = ntbins + nqbins + ncbins + ((size - sspace_min) >>
4740                     SUBPAGE_2POW);
4741                 custom_size2bin[i] = binind;
4742         }
4743
4744         size2bin = custom_size2bin;
4745 #ifdef MALLOC_DEBUG
4746         size2bin_validate();
4747 #endif
4748         return (false);
4749 }
4750
4751 /*
4752  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
4753  * implementation has to take pains to avoid infinite recursion during
4754  * initialization.
4755  */
4756 static inline bool
4757 malloc_init(void)
4758 {
4759
4760         if (malloc_initialized == false)
4761                 return (malloc_init_hard());
4762
4763         return (false);
4764 }
4765
4766 static bool
4767 malloc_init_hard(void)
4768 {
4769         unsigned i;
4770         int linklen;
4771         char buf[PATH_MAX + 1];
4772         const char *opts;
4773
4774         malloc_mutex_lock(&init_lock);
4775         if (malloc_initialized) {
4776                 /*
4777                  * Another thread initialized the allocator before this one
4778                  * acquired init_lock.
4779                  */
4780                 malloc_mutex_unlock(&init_lock);
4781                 return (false);
4782         }
4783
4784         /* Get number of CPUs. */
4785         {
4786                 int mib[2];
4787                 size_t len;
4788
4789                 mib[0] = CTL_HW;
4790                 mib[1] = HW_NCPU;
4791                 len = sizeof(ncpus);
4792                 if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
4793                         /* Error. */
4794                         ncpus = 1;
4795                 }
4796         }
4797
4798         for (i = 0; i < 3; i++) {
4799                 unsigned j;
4800
4801                 /* Get runtime configuration. */
4802                 switch (i) {
4803                 case 0:
4804                         if ((linklen = readlink("/etc/malloc.conf", buf,
4805                                                 sizeof(buf) - 1)) != -1) {
4806                                 /*
4807                                  * Use the contents of the "/etc/malloc.conf"
4808                                  * symbolic link's name.
4809                                  */
4810                                 buf[linklen] = '\0';
4811                                 opts = buf;
4812                         } else {
4813                                 /* No configuration specified. */
4814                                 buf[0] = '\0';
4815                                 opts = buf;
4816                         }
4817                         break;
4818                 case 1:
4819                         if (issetugid() == 0 && (opts =
4820                             getenv("MALLOC_OPTIONS")) != NULL) {
4821                                 /*
4822                                  * Do nothing; opts is already initialized to
4823                                  * the value of the MALLOC_OPTIONS environment
4824                                  * variable.
4825                                  */
4826                         } else {
4827                                 /* No configuration specified. */
4828                                 buf[0] = '\0';
4829                                 opts = buf;
4830                         }
4831                         break;
4832                 case 2:
4833                         if (_malloc_options != NULL) {
4834                                 /*
4835                                  * Use options that were compiled into the
4836                                  * program.
4837                                  */
4838                                 opts = _malloc_options;
4839                         } else {
4840                                 /* No configuration specified. */
4841                                 buf[0] = '\0';
4842                                 opts = buf;
4843                         }
4844                         break;
4845                 default:
4846                         /* NOTREACHED */
4847                         assert(false);
4848                 }
4849
4850                 for (j = 0; opts[j] != '\0'; j++) {
4851                         unsigned k, nreps;
4852                         bool nseen;
4853
4854                         /* Parse repetition count, if any. */
4855                         for (nreps = 0, nseen = false;; j++, nseen = true) {
4856                                 switch (opts[j]) {
4857                                         case '0': case '1': case '2': case '3':
4858                                         case '4': case '5': case '6': case '7':
4859                                         case '8': case '9':
4860                                                 nreps *= 10;
4861                                                 nreps += opts[j] - '0';
4862                                                 break;
4863                                         default:
4864                                                 goto MALLOC_OUT;
4865                                 }
4866                         }
4867 MALLOC_OUT:
4868                         if (nseen == false)
4869                                 nreps = 1;
4870
4871                         for (k = 0; k < nreps; k++) {
4872                                 switch (opts[j]) {
4873                                 case 'a':
4874                                         opt_abort = false;
4875                                         break;
4876                                 case 'A':
4877                                         opt_abort = true;
4878                                         break;
4879                                 case 'b':
4880 #ifdef MALLOC_BALANCE
4881                                         opt_balance_threshold >>= 1;
4882 #endif
4883                                         break;
4884                                 case 'B':
4885 #ifdef MALLOC_BALANCE
4886                                         if (opt_balance_threshold == 0)
4887                                                 opt_balance_threshold = 1;
4888                                         else if ((opt_balance_threshold << 1)
4889                                             > opt_balance_threshold)
4890                                                 opt_balance_threshold <<= 1;
4891 #endif
4892                                         break;
4893                                 case 'c':
4894                                         if (opt_cspace_max_2pow - 1 >
4895                                             opt_qspace_max_2pow &&
4896                                             opt_cspace_max_2pow >
4897                                             CACHELINE_2POW)
4898                                                 opt_cspace_max_2pow--;
4899                                         break;
4900                                 case 'C':
4901                                         if (opt_cspace_max_2pow < PAGE_SHIFT
4902                                             - 1)
4903                                                 opt_cspace_max_2pow++;
4904                                         break;
4905                                 case 'd':
4906 #ifdef MALLOC_DSS
4907                                         opt_dss = false;
4908 #endif
4909                                         break;
4910                                 case 'D':
4911 #ifdef MALLOC_DSS
4912                                         opt_dss = true;
4913 #endif
4914                                         break;
4915                                 case 'f':
4916                                         opt_dirty_max >>= 1;
4917                                         break;
4918                                 case 'F':
4919                                         if (opt_dirty_max == 0)
4920                                                 opt_dirty_max = 1;
4921                                         else if ((opt_dirty_max << 1) != 0)
4922                                                 opt_dirty_max <<= 1;
4923                                         break;
4924 #ifdef MALLOC_MAG
4925                                 case 'g':
4926                                         opt_mag = false;
4927                                         break;
4928                                 case 'G':
4929                                         opt_mag = true;
4930                                         break;
4931 #endif
4932                                 case 'j':
4933                                         opt_junk = false;
4934                                         break;
4935                                 case 'J':
4936                                         opt_junk = true;
4937                                         break;
4938                                 case 'k':
4939                                         /*
4940                                          * Chunks always require at least one
4941                                          * header page, so chunks can never be
4942                                          * smaller than two pages.
4943                                          */
4944                                         if (opt_chunk_2pow > PAGE_SHIFT + 1)
4945                                                 opt_chunk_2pow--;
4946                                         break;
4947                                 case 'K':
4948                                         if (opt_chunk_2pow + 1 <
4949                                             (sizeof(size_t) << 3))
4950                                                 opt_chunk_2pow++;
4951                                         break;
4952                                 case 'm':
4953 #ifdef MALLOC_DSS
4954                                         opt_mmap = false;
4955 #endif
4956                                         break;
4957                                 case 'M':
4958 #ifdef MALLOC_DSS
4959                                         opt_mmap = true;
4960 #endif
4961                                         break;
4962                                 case 'n':
4963                                         opt_narenas_lshift--;
4964                                         break;
4965                                 case 'N':
4966                                         opt_narenas_lshift++;
4967                                         break;
4968                                 case 'p':
4969                                         opt_print_stats = false;
4970                                         break;
4971                                 case 'P':
4972                                         opt_print_stats = true;
4973                                         break;
4974                                 case 'q':
4975                                         if (opt_qspace_max_2pow > QUANTUM_2POW)
4976                                                 opt_qspace_max_2pow--;
4977                                         break;
4978                                 case 'Q':
4979                                         if (opt_qspace_max_2pow + 1 <
4980                                             opt_cspace_max_2pow)
4981                                                 opt_qspace_max_2pow++;
4982                                         break;
4983 #ifdef MALLOC_MAG
4984                                 case 'R':
4985                                         if (opt_mag_size_2pow + 1 < (8U <<
4986                                             SIZEOF_PTR_2POW))
4987                                                 opt_mag_size_2pow++;
4988                                         break;
4989                                 case 'r':
4990                                         /*
4991                                          * Make sure there's always at least
4992                                          * one round per magazine.
4993                                          */
4994                                         if ((1U << (opt_mag_size_2pow-1)) >=
4995                                             sizeof(mag_t))
4996                                                 opt_mag_size_2pow--;
4997                                         break;
4998 #endif
4999                                 case 'u':
5000                                         opt_utrace = false;
5001                                         break;
5002                                 case 'U':
5003                                         opt_utrace = true;
5004                                         break;
5005                                 case 'v':
5006                                         opt_sysv = false;
5007                                         break;
5008                                 case 'V':
5009                                         opt_sysv = true;
5010                                         break;
5011                                 case 'x':
5012                                         opt_xmalloc = false;
5013                                         break;
5014                                 case 'X':
5015                                         opt_xmalloc = true;
5016                                         break;
5017                                 case 'z':
5018                                         opt_zero = false;
5019                                         break;
5020                                 case 'Z':
5021                                         opt_zero = true;
5022                                         break;
5023                                 default: {
5024                                         char cbuf[2];
5025
5026                                         cbuf[0] = opts[j];
5027                                         cbuf[1] = '\0';
5028                                         _malloc_message(_getprogname(),
5029                                             ": (malloc) Unsupported character "
5030                                             "in malloc options: '", cbuf,
5031                                             "'\n");
5032                                 }
5033                                 }
5034                         }
5035                 }
5036         }
5037
5038 #ifdef MALLOC_DSS
5039         /* Make sure that there is some method for acquiring memory. */
5040         if (opt_dss == false && opt_mmap == false)
5041                 opt_mmap = true;
5042 #endif
5043
5044         /* Take care to call atexit() only once. */
5045         if (opt_print_stats) {
5046                 /* Print statistics at exit. */
5047                 atexit(malloc_print_stats);
5048         }
5049
5050 #ifdef MALLOC_MAG
5051         /*
5052          * Calculate the actual number of rounds per magazine, taking into
5053          * account header overhead.
5054          */
5055         max_rounds = (1LLU << (opt_mag_size_2pow - SIZEOF_PTR_2POW)) -
5056             (sizeof(mag_t) >> SIZEOF_PTR_2POW) + 1;
5057 #endif
5058
5059         /* Set variables according to the value of opt_[qc]space_max_2pow. */
5060         qspace_max = (1U << opt_qspace_max_2pow);
5061         cspace_min = CACHELINE_CEILING(qspace_max);
5062         if (cspace_min == qspace_max)
5063                 cspace_min += CACHELINE;
5064         cspace_max = (1U << opt_cspace_max_2pow);
5065         sspace_min = SUBPAGE_CEILING(cspace_max);
5066         if (sspace_min == cspace_max)
5067                 sspace_min += SUBPAGE;
5068         assert(sspace_min < PAGE_SIZE);
5069         sspace_max = PAGE_SIZE - SUBPAGE;
5070
5071 #ifdef MALLOC_TINY
5072         assert(QUANTUM_2POW >= TINY_MIN_2POW);
5073 #endif
5074         assert(ntbins <= QUANTUM_2POW);
5075         nqbins = qspace_max >> QUANTUM_2POW;
5076         ncbins = ((cspace_max - cspace_min) >> CACHELINE_2POW) + 1;
5077         nsbins = ((sspace_max - sspace_min) >> SUBPAGE_2POW) + 1;
5078         nbins = ntbins + nqbins + ncbins + nsbins;
5079
5080         if (size2bin_init()) {
5081                 malloc_mutex_unlock(&init_lock);
5082                 return (true);
5083         }
5084
5085         /* Set variables according to the value of opt_chunk_2pow. */
5086         chunksize = (1LU << opt_chunk_2pow);
5087         chunksize_mask = chunksize - 1;
5088         chunk_npages = (chunksize >> PAGE_SHIFT);
5089         {
5090                 size_t header_size;
5091
5092                 /*
5093                  * Compute the header size such that it is large enough to
5094                  * contain the page map.
5095                  */
5096                 header_size = sizeof(arena_chunk_t) +
5097                     (sizeof(arena_chunk_map_t) * (chunk_npages - 1));
5098                 arena_chunk_header_npages = (header_size >> PAGE_SHIFT) +
5099                     ((header_size & PAGE_MASK) != 0);
5100         }
5101         arena_maxclass = chunksize - (arena_chunk_header_npages <<
5102             PAGE_SHIFT);
5103
5104         UTRACE(0, 0, 0);
5105
5106 #ifdef MALLOC_STATS
5107         memset(&stats_chunks, 0, sizeof(chunk_stats_t));
5108 #endif
5109
5110         /* Various sanity checks that regard configuration. */
5111         assert(chunksize >= PAGE_SIZE);
5112
5113         /* Initialize chunks data. */
5114         malloc_mutex_init(&huge_mtx);
5115         extent_tree_ad_new(&huge);
5116 #ifdef MALLOC_DSS
5117         malloc_mutex_init(&dss_mtx);
5118         dss_base = sbrk(0);
5119         dss_prev = dss_base;
5120         dss_max = dss_base;
5121         extent_tree_szad_new(&dss_chunks_szad);
5122         extent_tree_ad_new(&dss_chunks_ad);
5123 #endif
5124 #ifdef MALLOC_STATS
5125         huge_nmalloc = 0;
5126         huge_ndalloc = 0;
5127         huge_allocated = 0;
5128 #endif
5129
5130         /* Initialize base allocation data structures. */
5131 #ifdef MALLOC_STATS
5132         base_mapped = 0;
5133 #endif
5134 #ifdef MALLOC_DSS
5135         /*
5136          * Allocate a base chunk here, since it doesn't actually have to be
5137          * chunk-aligned.  Doing this before allocating any other chunks allows
5138          * the use of space that would otherwise be wasted.
5139          */
5140         if (opt_dss)
5141                 base_pages_alloc(0);
5142 #endif
5143         base_nodes = NULL;
5144         malloc_mutex_init(&base_mtx);
5145
5146         if (ncpus > 1) {
5147                 /*
5148                  * For SMP systems, create twice as many arenas as there are
5149                  * CPUs by default.
5150                  */
5151                 opt_narenas_lshift++;
5152         }
5153
5154         /* Determine how many arenas to use. */
5155         narenas = ncpus;
5156         if (opt_narenas_lshift > 0) {
5157                 if ((narenas << opt_narenas_lshift) > narenas)
5158                         narenas <<= opt_narenas_lshift;
5159                 /*
5160                  * Make sure not to exceed the limits of what base_alloc() can
5161                  * handle.
5162                  */
5163                 if (narenas * sizeof(arena_t *) > chunksize)
5164                         narenas = chunksize / sizeof(arena_t *);
5165         } else if (opt_narenas_lshift < 0) {
5166                 if ((narenas >> -opt_narenas_lshift) < narenas)
5167                         narenas >>= -opt_narenas_lshift;
5168                 /* Make sure there is at least one arena. */
5169                 if (narenas == 0)
5170                         narenas = 1;
5171         }
5172 #ifdef MALLOC_BALANCE
5173         assert(narenas != 0);
5174         for (narenas_2pow = 0;
5175              (narenas >> (narenas_2pow + 1)) != 0;
5176              narenas_2pow++);
5177 #endif
5178
5179 #ifdef NO_TLS
5180         if (narenas > 1) {
5181                 static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
5182                     23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
5183                     89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
5184                     151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
5185                     223, 227, 229, 233, 239, 241, 251, 257, 263};
5186                 unsigned nprimes, parenas;
5187
5188                 /*
5189                  * Pick a prime number of hash arenas that is more than narenas
5190                  * so that direct hashing of pthread_self() pointers tends to
5191                  * spread allocations evenly among the arenas.
5192                  */
5193                 assert((narenas & 1) == 0); /* narenas must be even. */
5194                 nprimes = (sizeof(primes) >> SIZEOF_INT_2POW);
5195                 parenas = primes[nprimes - 1]; /* In case not enough primes. */
5196                 for (i = 1; i < nprimes; i++) {
5197                         if (primes[i] > narenas) {
5198                                 parenas = primes[i];
5199                                 break;
5200                         }
5201                 }
5202                 narenas = parenas;
5203         }
5204 #endif
5205
5206 #ifndef NO_TLS
5207 #  ifndef MALLOC_BALANCE
5208         next_arena = 0;
5209 #  endif
5210 #endif
5211
5212         /* Allocate and initialize arenas. */
5213         arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
5214         if (arenas == NULL) {
5215                 malloc_mutex_unlock(&init_lock);
5216                 return (true);
5217         }
5218         /*
5219          * Zero the array.  In practice, this should always be pre-zeroed,
5220          * since it was just mmap()ed, but let's be sure.
5221          */
5222         memset(arenas, 0, sizeof(arena_t *) * narenas);
5223
5224         /*
5225          * Initialize one arena here.  The rest are lazily created in
5226          * choose_arena_hard().
5227          */
5228         arenas_extend(0);
5229         if (arenas[0] == NULL) {
5230                 malloc_mutex_unlock(&init_lock);
5231                 return (true);
5232         }
5233 #ifndef NO_TLS
5234         /*
5235          * Assign the initial arena to the initial thread, in order to avoid
5236          * spurious creation of an extra arena if the application switches to
5237          * threaded mode.
5238          */
5239         arenas_map = arenas[0];
5240 #endif
5241         /*
5242          * Seed here for the initial thread, since choose_arena_hard() is only
5243          * called for other threads.  The seed value doesn't really matter.
5244          */
5245 #ifdef MALLOC_BALANCE
5246         SPRN(balance, 42);
5247 #endif
5248
5249         malloc_spin_init(&arenas_lock);
5250
5251         malloc_initialized = true;
5252         malloc_mutex_unlock(&init_lock);
5253         return (false);
5254 }
5255
5256 /*
5257  * End general internal functions.
5258  */
5259 /******************************************************************************/
5260 /*
5261  * Begin malloc(3)-compatible functions.
5262  */
5263
5264 void *
5265 malloc(size_t size)
5266 {
5267         void *ret;
5268
5269         if (malloc_init()) {
5270                 ret = NULL;
5271                 goto RETURN;
5272         }
5273
5274         if (size == 0) {
5275                 if (opt_sysv == false)
5276                         size = 1;
5277                 else {
5278                         ret = NULL;
5279                         goto RETURN;
5280                 }
5281         }
5282
5283         ret = imalloc(size);
5284
5285 RETURN:
5286         if (ret == NULL) {
5287                 if (opt_xmalloc) {
5288                         _malloc_message(_getprogname(),
5289                             ": (malloc) Error in malloc(): out of memory\n", "",
5290                             "");
5291                         abort();
5292                 }
5293                 errno = ENOMEM;
5294         }
5295
5296         UTRACE(0, size, ret);
5297         return (ret);
5298 }
5299
5300 int
5301 posix_memalign(void **memptr, size_t alignment, size_t size)
5302 {
5303         int ret;
5304         void *result;
5305
5306         if (malloc_init())
5307                 result = NULL;
5308         else {
5309                 /* Make sure that alignment is a large enough power of 2. */
5310                 if (((alignment - 1) & alignment) != 0
5311                     || alignment < sizeof(void *)) {
5312                         if (opt_xmalloc) {
5313                                 _malloc_message(_getprogname(),
5314                                     ": (malloc) Error in posix_memalign(): "
5315                                     "invalid alignment\n", "", "");
5316                                 abort();
5317                         }
5318                         result = NULL;
5319                         ret = EINVAL;
5320                         goto RETURN;
5321                 }
5322
5323                 if (size == 0) {
5324                         if (opt_sysv == false)
5325                                 size = 1;
5326                         else {
5327                                 result = NULL;
5328                                 ret = 0;
5329                                 goto RETURN;
5330                         }
5331                 }
5332                 result = ipalloc(alignment, size);
5333         }
5334
5335         if (result == NULL) {
5336                 if (opt_xmalloc) {
5337                         _malloc_message(_getprogname(),
5338                         ": (malloc) Error in posix_memalign(): out of memory\n",
5339                         "", "");
5340                         abort();
5341                 }
5342                 ret = ENOMEM;
5343                 goto RETURN;
5344         }
5345
5346         *memptr = result;
5347         ret = 0;
5348
5349 RETURN:
5350         UTRACE(0, size, result);
5351         return (ret);
5352 }
5353
5354 void *
5355 calloc(size_t num, size_t size)
5356 {
5357         void *ret;
5358         size_t num_size;
5359
5360         if (malloc_init()) {
5361                 num_size = 0;
5362                 ret = NULL;
5363                 goto RETURN;
5364         }
5365
5366         num_size = num * size;
5367         if (num_size == 0) {
5368                 if ((opt_sysv == false) && ((num == 0) || (size == 0)))
5369                         num_size = 1;
5370                 else {
5371                         ret = NULL;
5372                         goto RETURN;
5373                 }
5374         /*
5375          * Try to avoid division here.  We know that it isn't possible to
5376          * overflow during multiplication if neither operand uses any of the
5377          * most significant half of the bits in a size_t.
5378          */
5379         } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2)))
5380             && (num_size / size != num)) {
5381                 /* size_t overflow. */
5382                 ret = NULL;
5383                 goto RETURN;
5384         }
5385
5386         ret = icalloc(num_size);
5387
5388 RETURN:
5389         if (ret == NULL) {
5390                 if (opt_xmalloc) {
5391                         _malloc_message(_getprogname(),
5392                             ": (malloc) Error in calloc(): out of memory\n", "",
5393                             "");
5394                         abort();
5395                 }
5396                 errno = ENOMEM;
5397         }
5398
5399         UTRACE(0, num_size, ret);
5400         return (ret);
5401 }
5402
5403 void *
5404 realloc(void *ptr, size_t size)
5405 {
5406         void *ret;
5407
5408         if (size == 0) {
5409                 if (opt_sysv == false)
5410                         size = 1;
5411                 else {
5412                         if (ptr != NULL)
5413                                 idalloc(ptr);
5414                         ret = NULL;
5415                         goto RETURN;
5416                 }
5417         }
5418
5419         if (ptr != NULL) {
5420                 assert(malloc_initialized);
5421
5422                 ret = iralloc(ptr, size);
5423
5424                 if (ret == NULL) {
5425                         if (opt_xmalloc) {
5426                                 _malloc_message(_getprogname(),
5427                                     ": (malloc) Error in realloc(): out of "
5428                                     "memory\n", "", "");
5429                                 abort();
5430                         }
5431                         errno = ENOMEM;
5432                 }
5433         } else {
5434                 if (malloc_init())
5435                         ret = NULL;
5436                 else
5437                         ret = imalloc(size);
5438
5439                 if (ret == NULL) {
5440                         if (opt_xmalloc) {
5441                                 _malloc_message(_getprogname(),
5442                                     ": (malloc) Error in realloc(): out of "
5443                                     "memory\n", "", "");
5444                                 abort();
5445                         }
5446                         errno = ENOMEM;
5447                 }
5448         }
5449
5450 RETURN:
5451         UTRACE(ptr, size, ret);
5452         return (ret);
5453 }
5454
5455 void
5456 free(void *ptr)
5457 {
5458
5459         UTRACE(ptr, 0, 0);
5460         if (ptr != NULL) {
5461                 assert(malloc_initialized);
5462
5463                 idalloc(ptr);
5464         }
5465 }
5466
5467 /*
5468  * End malloc(3)-compatible functions.
5469  */
5470 /******************************************************************************/
5471 /*
5472  * Begin non-standard functions.
5473  */
5474
5475 size_t
5476 malloc_usable_size(const void *ptr)
5477 {
5478
5479         assert(ptr != NULL);
5480
5481         return (isalloc(ptr));
5482 }
5483
5484 /*
5485  * End non-standard functions.
5486  */
5487 /******************************************************************************/
5488 /*
5489  * Begin library-private functions.
5490  */
5491
5492 /******************************************************************************/
5493 /*
5494  * Begin thread cache.
5495  */
5496
5497 /*
5498  * We provide an unpublished interface in order to receive notifications from
5499  * the pthreads library whenever a thread exits.  This allows us to clean up
5500  * thread caches.
5501  */
5502 void
5503 _malloc_thread_cleanup(void)
5504 {
5505
5506 #ifdef MALLOC_MAG
5507         if (mag_rack != NULL) {
5508                 assert(mag_rack != (void *)-1);
5509                 mag_rack_destroy(mag_rack);
5510 #ifdef MALLOC_DEBUG
5511                 mag_rack = (void *)-1;
5512 #endif
5513         }
5514 #endif
5515 }
5516
5517 /*
5518  * The following functions are used by threading libraries for protection of
5519  * malloc during fork().  These functions are only called if the program is
5520  * running in threaded mode, so there is no need to check whether the program
5521  * is threaded here.
5522  */
5523
5524 void
5525 _malloc_prefork(void)
5526 {
5527         bool again;
5528         unsigned i, j;
5529         arena_t *larenas[narenas], *tarenas[narenas];
5530
5531         /* Acquire all mutexes in a safe order. */
5532
5533         /*
5534          * arenas_lock must be acquired after all of the arena mutexes, in
5535          * order to avoid potential deadlock with arena_lock_balance[_hard]().
5536          * Since arenas_lock protects the arenas array, the following code has
5537          * to race with arenas_extend() callers until it succeeds in locking
5538          * all arenas before locking arenas_lock.
5539          */
5540         memset(larenas, 0, sizeof(arena_t *) * narenas);
5541         do {
5542                 again = false;
5543
5544                 malloc_spin_lock(&arenas_lock);
5545                 for (i = 0; i < narenas; i++) {
5546                         if (arenas[i] != larenas[i]) {
5547                                 memcpy(tarenas, arenas, sizeof(arena_t *) *
5548                                     narenas);
5549                                 malloc_spin_unlock(&arenas_lock);
5550                                 for (j = 0; j < narenas; j++) {
5551                                         if (larenas[j] != tarenas[j]) {
5552                                                 larenas[j] = tarenas[j];
5553                                                 malloc_spin_lock(
5554                                                     &larenas[j]->lock);
5555                                         }
5556                                 }
5557                                 again = true;
5558                                 break;
5559                         }
5560                 }
5561         } while (again);
5562
5563         malloc_mutex_lock(&base_mtx);
5564
5565         malloc_mutex_lock(&huge_mtx);
5566
5567 #ifdef MALLOC_DSS
5568         malloc_mutex_lock(&dss_mtx);
5569 #endif
5570 }
5571
5572 void
5573 _malloc_postfork(void)
5574 {
5575         unsigned i;
5576         arena_t *larenas[narenas];
5577
5578         /* Release all mutexes, now that fork() has completed. */
5579
5580 #ifdef MALLOC_DSS
5581         malloc_mutex_unlock(&dss_mtx);
5582 #endif
5583
5584         malloc_mutex_unlock(&huge_mtx);
5585
5586         malloc_mutex_unlock(&base_mtx);
5587
5588         memcpy(larenas, arenas, sizeof(arena_t *) * narenas);
5589         malloc_spin_unlock(&arenas_lock);
5590         for (i = 0; i < narenas; i++) {
5591                 if (larenas[i] != NULL)
5592                         malloc_spin_unlock(&larenas[i]->lock);
5593         }
5594 }
5595
5596 /*
5597  * End library-private functions.
5598  */
5599 /******************************************************************************/