lib/libc/stdlib/malloc.c

   1 /*-
   2  * Copyright (C) 2006 Jason Evans <jasone@FreeBSD.org>.
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice(s), this list of conditions and the following disclaimer as
  10  *    the first lines of this file unmodified other than the possible
  11  *    addition of one or more copyright notices.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice(s), this list of conditions and the following disclaimer in
  14  *    the documentation and/or other materials provided with the
  15  *    distribution.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  20  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
  21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  25  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  26  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  27  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28  *
  29  *******************************************************************************
  30  *
  31  * This allocator implementation is designed to provide scalable performance
  32  * for multi-threaded programs on multi-processor systems.  The following
  33  * features are included for this purpose:
  34  *
  35  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
  36  *     contention and cache sloshing.
  37  *
  38  *   + Cache line sharing between arenas is avoided for internal data
  39  *     structures.
  40  *
  41  *   + Memory is managed in chunks and runs (chunks can be split into runs using
  42  *     a binary buddy scheme), rather than as individual pages.  This provides
  43  *     a constant-time mechanism for associating allocations with particular
  44  *     arenas.
  45  *
  46  * Allocation requests are rounded up to the nearest size class, and no record
  47  * of the original request size is maintained.  Allocations are broken into
  48  * categories according to size class.  Assuming runtime defaults, 4 kB pages
  49  * and a 16 byte quantum, the size classes in each category are as follows:
  50  *
  51  *   |====================================|
  52  *   | Category | Subcategory    |   Size |
  53  *   |====================================|
  54  *   | Small    | Tiny           |      2 |
  55  *   |          |                |      4 |
  56  *   |          |                |      8 |
  57  *   |          |----------------+--------|
  58  *   |          | Quantum-spaced |     16 |
  59  *   |          |                |     32 |
  60  *   |          |                |     48 |
  61  *   |          |                |    ... |
  62  *   |          |                |    480 |
  63  *   |          |                |    496 |
  64  *   |          |                |    512 |
  65  *   |          |----------------+--------|
  66  *   |          | Sub-page       |   1 kB |
  67  *   |          |                |   2 kB |
  68  *   |====================================|
  69  *   | Large                     |   4 kB |
  70  *   |                           |   8 kB |
  71  *   |                           |  16 kB |
  72  *   |                           |    ... |
  73  *   |                           | 256 kB |
  74  *   |                           | 512 kB |
  75  *   |                           |   1 MB |
  76  *   |====================================|
  77  *   | Huge                      |   2 MB |
  78  *   |                           |   4 MB |
  79  *   |                           |   6 MB |
  80  *   |                           |    ... |
  81  *   |====================================|
  82  *
  83  * A different mechanism is used for each category:
  84  *
  85  *   Small : Each size class is segregated into its own set of runs.  Each run
  86  *           maintains a bitmap of which regions are free/allocated.
  87  *
  88  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
  89  *           in the associated arena chunk header maps.
  90  *
  91  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
  92  *          Metadata are stored in a separate red-black tree.
  93  *
  94  *******************************************************************************
  95  */
  96
  97 /*
  98  *******************************************************************************
  99  *
 100  * Ring macros.
 101  *
 102  *******************************************************************************
 103  */
 104
 105 /* Ring definitions. */
 106 #define qr(a_type) struct {                                             \
 107         a_type *qre_next;                                               \
 108         a_type *qre_prev;                                               \
 109 }
 110
 111 #define qr_initializer {NULL, NULL}
 112
 113 /* Ring functions. */
 114 #define qr_new(a_qr, a_field) do {                                      \
 115         (a_qr)->a_field.qre_next = (a_qr);                              \
 116         (a_qr)->a_field.qre_prev = (a_qr);                              \
 117 } while (0)
 118
 119 #define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next)
 120
 121 #define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev)
 122
 123 #define qr_before_insert(a_qrelm, a_qr, a_field) do {                   \
 124         (a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev;         \
 125         (a_qr)->a_field.qre_next = (a_qrelm);                           \
 126         (a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr);            \
 127         (a_qrelm)->a_field.qre_prev = (a_qr);                           \
 128 } while (0)
 129
 130 #define qr_after_insert(a_qrelm, a_qr, a_field) do {                    \
 131         (a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next;         \
 132         (a_qr)->a_field.qre_prev = (a_qrelm);                           \
 133         (a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr);            \
 134         (a_qrelm)->a_field.qre_next = (a_qr);                           \
 135 } while (0)
 136
 137 #define qr_meld(a_qr_a, a_qr_b, a_type, a_field) do {                   \
 138         a_type *t;                                                      \
 139         (a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b);        \
 140         (a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a);        \
 141         t = (a_qr_a)->a_field.qre_prev;                                 \
 142         (a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev;        \
 143         (a_qr_b)->a_field.qre_prev = t;                                 \
 144 } while (0)
 145
 146 /*
 147  * qr_meld() and qr_split() are functionally equivalent, so there's no need to
 148  * have two copies of the code.
 149  */
 150 #define qr_split(a_qr_a, a_qr_b, a_type, a_field)                       \
 151         qr_meld((a_qr_a), (a_qr_b), a_type, a_field)
 152
 153 #define qr_remove(a_qr, a_field) do {                                   \
 154         (a_qr)->a_field.qre_prev->a_field.qre_next                      \
 155             = (a_qr)->a_field.qre_next;                                 \
 156         (a_qr)->a_field.qre_next->a_field.qre_prev                      \
 157             = (a_qr)->a_field.qre_prev;                                 \
 158         (a_qr)->a_field.qre_next = (a_qr);                              \
 159         (a_qr)->a_field.qre_prev = (a_qr);                              \
 160 } while (0)
 161
 162 #define qr_foreach(var, a_qr, a_field)                                  \
 163         for ((var) = (a_qr);                                            \
 164             (var) != NULL;                                              \
 165             (var) = (((var)->a_field.qre_next != (a_qr))                \
 166             ? (var)->a_field.qre_next : NULL))
 167
 168 #define qr_reverse_foreach(var, a_qr, a_field)                          \
 169         for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL;  \
 170             (var) != NULL;                                              \
 171             (var) = (((var) != (a_qr))                                  \
 172             ? (var)->a_field.qre_prev : NULL))
 173
 174 /******************************************************************************/
 175
 176 /*
 177  * In order to disable various extra features that may have negative
 178  * performance impacts, (assertions, expanded statistics), define
 179  * NO_MALLOC_EXTRAS.
 180  */
 181 /* #define NO_MALLOC_EXTRAS */
 182
 183 #ifndef NO_MALLOC_EXTRAS
 184 #  define MALLOC_DEBUG
 185 #endif
 186
 187 #include <sys/cdefs.h>
 188 __FBSDID("$FreeBSD$");
 189
 190 #include "libc_private.h"
 191 #ifdef MALLOC_DEBUG
 192 #  define _LOCK_DEBUG
 193 #endif
 194 #include "spinlock.h"
 195 #include "namespace.h"
 196 #include <sys/mman.h>
 197 #include <sys/param.h>
 198 #include <sys/stddef.h>
 199 #include <sys/time.h>
 200 #include <sys/types.h>
 201 #include <sys/sysctl.h>
 202 #include <sys/tree.h>
 203 #include <sys/uio.h>
 204 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
 205
 206 #include <machine/atomic.h>
 207 #include <machine/cpufunc.h>
 208 #include <machine/vmparam.h>
 209
 210 #include <errno.h>
 211 #include <limits.h>
 212 #include <pthread.h>
 213 #include <sched.h>
 214 #include <stdarg.h>
 215 #include <stdbool.h>
 216 #include <stdio.h>
 217 #include <stdlib.h>
 218 #include <string.h>
 219 #include <strings.h>
 220 #include <unistd.h>
 221
 222 #include "un-namespace.h"
 223
 224 /*
 225  * Calculate statistics that can be used to get an idea of how well caching is
 226  * working.
 227  */
 228 #ifndef NO_MALLOC_EXTRAS
 229 #  define MALLOC_STATS
 230 #endif
 231
 232 #ifndef MALLOC_DEBUG
 233 #  ifndef NDEBUG
 234 #    define NDEBUG
 235 #  endif
 236 #endif
 237 #include <assert.h>
 238
 239 #ifdef MALLOC_DEBUG
 240    /* Disable inlining to make debugging easier. */
 241 #  define inline
 242 #endif
 243
 244 /* Size of stack-allocated buffer passed to strerror_r(). */
 245 #define STRERROR_BUF 64
 246
 247 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
 248 #ifdef __i386__
 249 #  define QUANTUM_2POW_MIN      4
 250 #  define SIZEOF_PTR            4
 251 #  define USE_BRK
 252 #endif
 253 #ifdef __ia64__
 254 #  define QUANTUM_2POW_MIN      4
 255 #  define SIZEOF_PTR            8
 256 #endif
 257 #ifdef __alpha__
 258 #  define QUANTUM_2POW_MIN      4
 259 #  define SIZEOF_PTR            8
 260 #  define NO_TLS
 261 #endif
 262 #ifdef __sparc64__
 263 #  define QUANTUM_2POW_MIN      4
 264 #  define SIZEOF_PTR            8
 265 #  define NO_TLS
 266 #endif
 267 #ifdef __amd64__
 268 #  define QUANTUM_2POW_MIN      4
 269 #  define SIZEOF_PTR            8
 270 #endif
 271 #ifdef __arm__
 272 #  define QUANTUM_2POW_MIN      3
 273 #  define SIZEOF_PTR            4
 274 #  define USE_BRK
 275 #  define NO_TLS
 276 #endif
 277 #ifdef __powerpc__
 278 #  define QUANTUM_2POW_MIN      4
 279 #  define SIZEOF_PTR            4
 280 #  define USE_BRK
 281 #endif
 282
 283 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
 284 #ifndef SIZEOF_INT_2POW
 285 #  define SIZEOF_INT_2POW       2
 286 #endif
 287
 288 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
 289 #if (!defined(PIC) && !defined(NO_TLS))
 290 #  define NO_TLS
 291 #endif
 292
 293 /*
 294  * Size and alignment of memory chunks that are allocated by the OS's virtual
 295  * memory system.
 296  *
 297  * chunksize limits:
 298  *
 299  *   2^(pagesize_2pow - 1 + RUN_MIN_REGS_2POW) <= chunk_size <= 2^28
 300  */
 301 #define CHUNK_2POW_DEFAULT      21
 302 #define CHUNK_2POW_MAX          28
 303
 304 /*
 305  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing,
 306  * so over-estimates are okay (up to a point), but under-estimates will
 307  * negatively affect performance.
 308  */
 309 #define CACHELINE_2POW 6
 310 #define CACHELINE ((size_t)(1 << CACHELINE_2POW))
 311
 312 /*
 313  * Maximum size class that is a multiple of the quantum, but not (necessarily)
 314  * a power of 2.  Above this size, allocations are rounded up to the nearest
 315  * power of 2.
 316  */
 317 #define SMALL_MAX_2POW_DEFAULT 9
 318 #define SMALL_MAX_DEFAULT (1 << SMALL_MAX_2POW_DEFAULT)
 319
 320 /*
 321  * Minimum number of regions that must fit into a run that serves quantum-size
 322  * bin allocations.
 323  *
 324  * Note that if this is set too low, space will be wasted if there are size
 325  * classes that are small enough that RUN_MIN_REGS regions don't fill a page.
 326  * If this is set too high, then the overhead of searching through the bitmap
 327  * that tracks region usage will become excessive.
 328  */
 329 #define RUN_MIN_REGS_2POW 10
 330 #define RUN_MIN_REGS (1 << RUN_MIN_REGS_2POW)
 331
 332 /*
 333  * Maximum number of pages for a run that is used for bin allocations.
 334  *
 335  * Note that if this is set too low, then fragmentation for the largest bin
 336  * size classes will be high.  If this is set too high, then even small
 337  * programs will often have to allocate more than two chunks early on.
 338  */
 339 #define RUN_MAX_PAGES_2POW 4
 340 #define RUN_MAX_PAGES (1 << RUN_MAX_PAGES_2POW)
 341
 342 /******************************************************************************/
 343
 344 /*
 345  * Mutexes based on spinlocks.  We can't use normal pthread mutexes, because
 346  * they require malloc()ed memory.
 347  */
 348 typedef struct {
 349         spinlock_t      lock;
 350 } malloc_mutex_t;
 351
 352 /* Set to true once the allocator has been initialized. */
 353 static bool malloc_initialized = false;
 354
 355 /* Used to avoid initialization races. */
 356 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
 357
 358 /******************************************************************************/
 359 /*
 360  * Statistics data structures.
 361  */
 362
 363 #ifdef MALLOC_STATS
 364
 365 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
 366 struct malloc_bin_stats_s {
 367         /*
 368          * Number of allocation requests that corresponded to the size of this
 369          * bin.
 370          */
 371         uint64_t        nrequests;
 372
 373         /* Total number of runs created for this bin's size class. */
 374         uint64_t        nruns;
 375
 376         /*
 377          * Total number of run promotions/demotions for this bin's size class.
 378          */
 379         uint64_t        npromote;
 380         uint64_t        ndemote;
 381
 382         /* High-water mark for this bin. */
 383         unsigned long   highruns;
 384
 385         /* Current number of runs in this bin. */
 386         unsigned long   curruns;
 387 };
 388
 389 typedef struct arena_stats_s arena_stats_t;
 390 struct arena_stats_s {
 391         /* Total byte count of allocated memory, not including overhead. */
 392         size_t          allocated;
 393
 394         /* Number of times each function was called. */
 395         uint64_t        nmalloc;
 396         uint64_t        ndalloc;
 397         uint64_t        nmadvise;
 398
 399         /* Number of large allocation requests. */
 400         uint64_t        large_nrequests;
 401 };
 402
 403 typedef struct chunk_stats_s chunk_stats_t;
 404 struct chunk_stats_s {
 405         /* Number of chunks that were allocated. */
 406         uint64_t        nchunks;
 407
 408         /* High-water mark for number of chunks allocated. */
 409         unsigned long   highchunks;
 410
 411         /*
 412          * Current number of chunks allocated.  This value isn't maintained for
 413          * any other purpose, so keep track of it in order to be able to set
 414          * highchunks.
 415          */
 416         unsigned long   curchunks;
 417 };
 418
 419 #endif /* #ifdef MALLOC_STATS */
 420
 421 /******************************************************************************/
 422 /*
 423  * Chunk data structures.
 424  */
 425
 426 /* Tree of chunks. */
 427 typedef struct chunk_node_s chunk_node_t;
 428 struct chunk_node_s {
 429         /* Linkage for the chunk tree. */
 430         RB_ENTRY(chunk_node_s) link;
 431
 432         /*
 433          * Pointer to the chunk that this tree node is responsible for.  In some
 434          * (but certainly not all) cases, this data structure is placed at the
 435          * beginning of the corresponding chunk, so this field may point to this
 436          * node.
 437          */
 438         void    *chunk;
 439
 440         /* Total chunk size. */
 441         size_t  size;
 442 };
 443 typedef struct chunk_tree_s chunk_tree_t;
 444 RB_HEAD(chunk_tree_s, chunk_node_s);
 445
 446 /******************************************************************************/
 447 /*
 448  * Arena data structures.
 449  */
 450
 451 typedef struct arena_s arena_t;
 452 typedef struct arena_bin_s arena_bin_t;
 453
 454 typedef struct arena_chunk_map_s arena_chunk_map_t;
 455 struct arena_chunk_map_s {
 456         bool            free:1;
 457         bool            large:1;
 458         unsigned        npages:15; /* Limiting factor for CHUNK_2POW_MAX. */
 459         unsigned        pos:15;
 460 };
 461
 462 /* Arena chunk header. */
 463 typedef struct arena_chunk_s arena_chunk_t;
 464 struct arena_chunk_s {
 465         /* Arena that owns the chunk. */
 466         arena_t *arena;
 467
 468         /* Linkage for the arena's chunk tree. */
 469         RB_ENTRY(arena_chunk_s) link;
 470
 471         /*
 472          * Number of pages in use.  This is maintained in order to make
 473          * detection of empty chunks fast.
 474          */
 475         uint32_t pages_used;
 476
 477         /*
 478          * Array of counters that keeps track of how many free runs of each
 479          * size are available in this chunk.  This table is sized at compile
 480          * time, which is wasteful.  However, due to unrelated rounding, this
 481          * doesn't actually waste any otherwise useful space.
 482          *
 483          *   index == 2^n pages
 484          *
 485          *   index | npages
 486          *   ------+-------
 487          *       0 |      1
 488          *       1 |      2
 489          *       2 |      4
 490          *       3 |      8
 491          *         :
 492          */
 493         uint32_t nfree_runs[CHUNK_2POW_MAX/* - PAGE_SHIFT */];
 494
 495         /* Map of pages within chunk that keeps track of free/large/small. */
 496         arena_chunk_map_t map[1]; /* Dynamically sized. */
 497 };
 498 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
 499 RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
 500
 501 typedef struct arena_run_s arena_run_t;
 502 struct arena_run_s {
 503         /* Linkage for run rings. */
 504         qr(arena_run_t) link;
 505
 506 #ifdef MALLOC_DEBUG
 507         uint32_t        magic;
 508 #  define ARENA_RUN_MAGIC 0x384adf93
 509 #endif
 510
 511         /* Bin this run is associated with. */
 512         arena_bin_t     *bin;
 513
 514         /* Bitmask of in-use regions (0: in use, 1: free). */
 515 #define REGS_MASK_NELMS                                                 \
 516         (1 << (RUN_MIN_REGS_2POW - SIZEOF_INT_2POW - 2))
 517         unsigned        regs_mask[REGS_MASK_NELMS];
 518
 519         /* Index of first element that might have a free region. */
 520         unsigned        regs_minelm;
 521
 522         /* Number of free regions in run. */
 523         unsigned        nfree;
 524
 525         /*
 526          * Current quartile for this run, one of: {RUN_QINIT, RUN_Q0, RUN_25,
 527          * RUN_Q50, RUN_Q75, RUN_Q100}.
 528          */
 529 #define RUN_QINIT       0
 530 #define RUN_Q0          1
 531 #define RUN_Q25         2
 532 #define RUN_Q50         3
 533 #define RUN_Q75         4
 534 #define RUN_Q100        5
 535         unsigned        quartile;
 536
 537         /*
 538          * Limits on the number of free regions for the fullness quartile this
 539          * run is currently in.  If nfree goes outside these limits, the run
 540          * is moved to a different fullness quartile.
 541          */
 542         unsigned        free_max;
 543         unsigned        free_min;
 544 };
 545
 546 /* Used for run ring headers, where the run isn't actually used. */
 547 typedef struct arena_run_link_s arena_run_link_t;
 548 struct arena_run_link_s {
 549         /* Linkage for run rings. */
 550         qr(arena_run_t) link;
 551 };
 552
 553 struct arena_bin_s {
 554         /*
 555          * Current run being used to service allocations of this bin's size
 556          * class.
 557          */
 558         arena_run_t     *runcur;
 559
 560         /*
 561          * Links into rings of runs, of various fullnesses (names indicate
 562          * approximate lower bounds).  A new run conceptually starts off in
 563          * runsinit, and it isn't inserted into the runs0 ring until it
 564          * reaches 25% full (hysteresis mechanism).  For the run to be moved
 565          * again, it must become either empty or 50% full.  Thus, each ring
 566          * contains runs that are within 50% above the advertised fullness for
 567          * the ring.  This provides a low-overhead mechanism for segregating
 568          * runs into approximate fullness classes.
 569          *
 570          * Conceptually, there is a runs100 that contains completely full runs.
 571          * Since we don't need to search for these runs though, no runs100 ring
 572          * is actually maintained.
 573          *
 574          * These rings are useful when looking for an existing run to use when
 575          * runcur is no longer usable.  We look for usable runs in the
 576          * following order:
 577          *
 578          *   1) runs50
 579          *   2) runs25
 580          *   3) runs0
 581          *   4) runs75
 582          *
 583          * runs75 isn't a good place to look, because it contains runs that may
 584          * be nearly completely full.  Still, we look there as a last resort in
 585          * order to avoid allocating a new run if at all possible.
 586          */
 587         /* arena_run_link_t     runsinit;   0% <= fullness <   25% */
 588         arena_run_link_t        runs0;  /*  0% <  fullness <   50% */
 589         arena_run_link_t        runs25; /* 25% <  fullness <   75% */
 590         arena_run_link_t        runs50; /* 50% <  fullness <  100% */
 591         arena_run_link_t        runs75; /* 75% <  fullness <  100% */
 592         /* arena_run_link_t     runs100;          fullness == 100% */
 593
 594         /* Size of regions in a run for this bin's size class. */
 595         size_t          reg_size;
 596
 597         /* Total size of a run for this bin's size class. */
 598         size_t          run_size;
 599
 600         /* Total number of regions in a run for this bin's size class. */
 601         uint32_t        nregs;
 602
 603         /* Offset of first region in a run for this bin's size class. */
 604         uint32_t        reg0_offset;
 605
 606 #ifdef MALLOC_STATS
 607         /* Bin statistics. */
 608         malloc_bin_stats_t stats;
 609 #endif
 610 };
 611
 612 struct arena_s {
 613 #ifdef MALLOC_DEBUG
 614         uint32_t                magic;
 615 #  define ARENA_MAGIC 0x947d3d24
 616 #endif
 617
 618         /* All operations on this arena require that mtx be locked. */
 619         malloc_mutex_t          mtx;
 620
 621 #ifdef MALLOC_STATS
 622         arena_stats_t           stats;
 623 #endif
 624
 625         /*
 626          * Tree of chunks this arena manages.
 627          */
 628         arena_chunk_tree_t      chunks;
 629
 630         /*
 631          * bins is used to store rings of free regions of the following sizes,
 632          * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
 633          *
 634          *   bins[i] | size |
 635          *   --------+------+
 636          *        0  |    2 |
 637          *        1  |    4 |
 638          *        2  |    8 |
 639          *   --------+------+
 640          *        3  |   16 |
 641          *        4  |   32 |
 642          *        5  |   48 |
 643          *        6  |   64 |
 644          *           :      :
 645          *           :      :
 646          *       33  |  496 |
 647          *       34  |  512 |
 648          *   --------+------+
 649          *       35  | 1024 |
 650          *       36  | 2048 |
 651          *   --------+------+
 652          */
 653         arena_bin_t             bins[1]; /* Dynamically sized. */
 654 };
 655
 656 /******************************************************************************/
 657 /*
 658  * Data.
 659  */
 660
 661 /* Number of CPUs. */
 662 static unsigned         ncpus;
 663
 664 /* VM page size. */
 665 static unsigned         pagesize;
 666 static unsigned         pagesize_2pow;
 667
 668 /* Various bin-related settings. */
 669 static size_t           bin_maxclass; /* Max size class for bins. */
 670 static unsigned         ntbins; /* Number of (2^n)-spaced tiny bins. */
 671 static unsigned         nqbins; /* Number of quantum-spaced bins. */
 672 static unsigned         nsbins; /* Number of (2^n)-spaced sub-page bins. */
 673 static size_t           small_min;
 674 static size_t           small_max;
 675 static unsigned         tiny_min_2pow;
 676
 677 /* Various quantum-related settings. */
 678 static size_t           quantum;
 679 static size_t           quantum_mask; /* (quantum - 1). */
 680
 681 /* Various chunk-related settings. */
 682 static size_t           chunk_size;
 683 static size_t           chunk_size_mask; /* (chunk_size - 1). */
 684 static size_t           arena_maxclass; /* Max size class for arenas. */
 685 static unsigned         arena_chunk_maplen;
 686
 687 /********/
 688 /*
 689  * Chunks.
 690  */
 691
 692 /* Protects chunk-related data structures. */
 693 static malloc_mutex_t   chunks_mtx;
 694
 695 /* Tree of chunks that are stand-alone huge allocations. */
 696 static chunk_tree_t     huge;
 697
 698 #ifdef USE_BRK
 699 /*
 700  * Try to use brk for chunk-size allocations, due to address space constraints.
 701  */
 702 /*
 703  * Protects sbrk() calls.  This must be separate from chunks_mtx, since
 704  * base_chunk_alloc() also uses sbrk(), but cannot lock chunks_mtx (doing so
 705  * could cause recursive lock acquisition).
 706  */
 707 static malloc_mutex_t   brk_mtx;
 708 /* Result of first sbrk(0) call. */
 709 static void             *brk_base;
 710 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
 711 static void             *brk_prev;
 712 /* Current upper limit on brk addresses. */
 713 static void             *brk_max;
 714 #endif
 715
 716 #ifdef MALLOC_STATS
 717 /*
 718  * Byte counters for allocated/total space used by the chunks in the huge
 719  * allocations tree.
 720  */
 721 static uint64_t         huge_nmalloc;
 722 static uint64_t         huge_ndalloc;
 723 static size_t           huge_allocated;
 724 #endif
 725
 726 /*
 727  * Tree of chunks that were previously allocated.  This is used when allocating
 728  * chunks, in an attempt to re-use address space.
 729  */
 730 static chunk_tree_t     old_chunks;
 731
 732 /****************************/
 733 /*
 734  * base (internal allocation).
 735  */
 736
 737 /*
 738  * Current chunk that is being used for internal memory allocations.  This
 739  * chunk is carved up in cacheline-size quanta, so that there is no chance of
 740  * false cache line sharing.
 741  */
 742 static void             *base_chunk;
 743 static void             *base_next_addr;
 744 static void             *base_past_addr; /* Addr immediately past base_chunk. */
 745 static chunk_node_t     *base_chunk_nodes; /* LIFO cache of chunk nodes. */
 746 static malloc_mutex_t   base_mtx;
 747 #ifdef MALLOC_STATS
 748 static uint64_t         base_total;
 749 #endif
 750
 751 /********/
 752 /*
 753  * Arenas.
 754  */
 755
 756 /*
 757  * Arenas that are used to service external requests.  Not all elements of the
 758  * arenas array are necessarily used; arenas are created lazily as needed.
 759  */
 760 static arena_t          **arenas;
 761 static unsigned         narenas;
 762 #ifndef NO_TLS
 763 static unsigned         next_arena;
 764 #endif
 765 static malloc_mutex_t   arenas_mtx; /* Protects arenas initialization. */
 766
 767 #ifndef NO_TLS
 768 /*
 769  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
 770  * for allocations.
 771  */
 772 static __thread arena_t *arenas_map;
 773 #endif
 774
 775 #ifdef MALLOC_STATS
 776 /* Chunk statistics. */
 777 static chunk_stats_t    stats_chunks;
 778 #endif
 779
 780 /*******************************/
 781 /*
 782  * Runtime configuration options.
 783  */
 784 const char      *_malloc_options;
 785
 786 #ifndef NO_MALLOC_EXTRAS
 787 static bool     opt_abort = true;
 788 static bool     opt_junk = true;
 789 #else
 790 static bool     opt_abort = false;
 791 static bool     opt_junk = false;
 792 #endif
 793 static bool     opt_hint = false;
 794 static bool     opt_print_stats = false;
 795 static size_t   opt_quantum_2pow = QUANTUM_2POW_MIN;
 796 static size_t   opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
 797 static size_t   opt_chunk_2pow = CHUNK_2POW_DEFAULT;
 798 static bool     opt_utrace = false;
 799 static bool     opt_sysv = false;
 800 static bool     opt_xmalloc = false;
 801 static bool     opt_zero = false;
 802 static int32_t  opt_narenas_lshift = 0;
 803
 804 typedef struct {
 805         void    *p;
 806         size_t  s;
 807         void    *r;
 808 } malloc_utrace_t;
 809
 810 #define UTRACE(a, b, c)                                                 \
 811         if (opt_utrace) {                                               \
 812                 malloc_utrace_t ut = {a, b, c};                         \
 813                 utrace(&ut, sizeof(ut));                                \
 814         }
 815
 816 /******************************************************************************/
 817 /*
 818  * Begin function prototypes for non-inline static functions.
 819  */
 820
 821 static void     malloc_mutex_init(malloc_mutex_t *a_mutex);
 822 static void     wrtmessage(const char *p1, const char *p2, const char *p3,
 823                 const char *p4);
 824 static void     malloc_printf(const char *format, ...);
 825 static bool     base_chunk_alloc(size_t minsize);
 826 static void     *base_alloc(size_t size);
 827 static chunk_node_t *base_chunk_node_alloc(void);
 828 static void     base_chunk_node_dealloc(chunk_node_t *node);
 829 #ifdef MALLOC_STATS
 830 static void     stats_print(arena_t *arena);
 831 #endif
 832 static void     *pages_map(void *addr, size_t size);
 833 static void     pages_unmap(void *addr, size_t size);
 834 static void     *chunk_alloc(size_t size);
 835 static void     chunk_dealloc(void *chunk, size_t size);
 836 #ifndef NO_TLS
 837 static arena_t  *choose_arena_hard(void);
 838 #endif
 839 static void     arena_run_split(arena_t *arena, arena_run_t *run, bool large,
 840     size_t size);
 841 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
 842 static void     arena_chunk_dealloc(arena_chunk_t *chunk);
 843 static void     arena_bin_run_promote(arena_t *arena, arena_bin_t *bin,
 844     arena_run_t *run);
 845 static void     arena_bin_run_demote(arena_t *arena, arena_bin_t *bin,
 846     arena_run_t *run);
 847 static arena_run_t *arena_run_alloc(arena_t *arena, bool large, size_t size);
 848 static void     arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
 849 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
 850 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
 851 static void     *arena_malloc(arena_t *arena, size_t size);
 852 static size_t   arena_salloc(const void *ptr);
 853 static void     *arena_ralloc(void *ptr, size_t size, size_t oldsize);
 854 static void     arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
 855 static bool     arena_new(arena_t *arena);
 856 static arena_t  *arenas_extend(unsigned ind);
 857 static void     *huge_malloc(size_t size);
 858 static void     *huge_ralloc(void *ptr, size_t size, size_t oldsize);
 859 static void     huge_dalloc(void *ptr);
 860 static void     *imalloc(size_t size);
 861 static void     *ipalloc(size_t alignment, size_t size);
 862 static void     *icalloc(size_t size);
 863 static size_t   isalloc(const void *ptr);
 864 static void     *iralloc(void *ptr, size_t size);
 865 static void     idalloc(void *ptr);
 866 static void     malloc_print_stats(void);
 867 static bool     malloc_init_hard(void);
 868
 869 /*
 870  * End function prototypes.
 871  */
 872 /******************************************************************************/
 873 /*
 874  * Begin mutex.
 875  */
 876
 877 static void
 878 malloc_mutex_init(malloc_mutex_t *a_mutex)
 879 {
 880         static const spinlock_t lock = _SPINLOCK_INITIALIZER;
 881
 882         a_mutex->lock = lock;
 883 }
 884
 885 static inline void
 886 malloc_mutex_lock(malloc_mutex_t *a_mutex)
 887 {
 888
 889         if (__isthreaded)
 890                 _SPINLOCK(&a_mutex->lock);
 891 }
 892
 893 static inline void
 894 malloc_mutex_unlock(malloc_mutex_t *a_mutex)
 895 {
 896
 897         if (__isthreaded)
 898                 _SPINUNLOCK(&a_mutex->lock);
 899 }
 900
 901 /*
 902  * End mutex.
 903  */
 904 /******************************************************************************/
 905 /*
 906  * Begin Utility functions/macros.
 907  */
 908
 909 /* Return the chunk address for allocation address a. */
 910 #define CHUNK_ADDR2BASE(a)                                              \
 911         ((void *)((uintptr_t)(a) & ~chunk_size_mask))
 912
 913 /* Return the chunk offset of address a. */
 914 #define CHUNK_ADDR2OFFSET(a)                                            \
 915         ((size_t)((uintptr_t)(a) & chunk_size_mask))
 916
 917 /* Return the smallest chunk multiple that is >= s. */
 918 #define CHUNK_CEILING(s)                                                \
 919         (((s) + chunk_size_mask) & ~chunk_size_mask)
 920
 921 /* Return the smallest cacheline multiple that is >= s. */
 922 #define CACHELINE_CEILING(s)                                            \
 923         (((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
 924
 925 /* Return the smallest quantum multiple that is >= a. */
 926 #define QUANTUM_CEILING(a)                                              \
 927         (((a) + quantum_mask) & ~quantum_mask)
 928
 929 /* Compute the smallest power of 2 that is >= x. */
 930 static inline size_t
 931 pow2_ceil(size_t x)
 932 {
 933
 934         x--;
 935         x |= x >> 1;
 936         x |= x >> 2;
 937         x |= x >> 4;
 938         x |= x >> 8;
 939         x |= x >> 16;
 940 #if (SIZEOF_PTR == 8)
 941         x |= x >> 32;
 942 #endif
 943         x++;
 944         return (x);
 945 }
 946
 947 static void
 948 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
 949 {
 950
 951         _write(STDERR_FILENO, p1, strlen(p1));
 952         _write(STDERR_FILENO, p2, strlen(p2));
 953         _write(STDERR_FILENO, p3, strlen(p3));
 954         _write(STDERR_FILENO, p4, strlen(p4));
 955 }
 956
 957 void    (*_malloc_message)(const char *p1, const char *p2, const char *p3,
 958             const char *p4) = wrtmessage;
 959
 960 /*
 961  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
 962  */
 963 static void
 964 malloc_printf(const char *format, ...)
 965 {
 966         char buf[4096];
 967         va_list ap;
 968
 969         va_start(ap, format);
 970         vsnprintf(buf, sizeof(buf), format, ap);
 971         va_end(ap);
 972         _malloc_message(buf, "", "", "");
 973 }
 974
 975 /******************************************************************************/
 976
 977 static bool
 978 base_chunk_alloc(size_t minsize)
 979 {
 980
 981         assert(minsize <= chunk_size);
 982
 983 #ifdef USE_BRK
 984         /*
 985          * Do special brk allocation here, since the base chunk doesn't really
 986          * need to be chunk-aligned.
 987          */
 988         if (brk_prev != (void *)-1) {
 989                 void *brk_cur;
 990                 intptr_t incr;
 991
 992                 malloc_mutex_lock(&brk_mtx);
 993                 do {
 994                         /* Get the current end of brk. */
 995                         brk_cur = sbrk(0);
 996
 997                         /*
 998                          * Calculate how much padding is necessary to
 999                          * chunk-align the end of brk.  Don't worry about
1000                          * brk_cur not being chunk-aligned though.
1001                          */
1002                         incr = (intptr_t)chunk_size
1003                             - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1004                         if (incr < minsize)
1005                                 incr += chunk_size;
1006
1007                         brk_prev = sbrk(incr);
1008                         if (brk_prev == brk_cur) {
1009                                 /* Success. */
1010                                 malloc_mutex_unlock(&brk_mtx);
1011                                 base_chunk = brk_cur;
1012                                 base_next_addr = base_chunk;
1013                                 base_past_addr = (void *)((uintptr_t)base_chunk +
1014                                     incr);
1015 #ifdef MALLOC_STATS
1016                                 base_total += incr;
1017 #endif
1018                                 return (false);
1019                         }
1020                 } while (brk_prev != (void *)-1);
1021                 malloc_mutex_unlock(&brk_mtx);
1022         }
1023 #endif
1024
1025         /*
1026          * Don't worry about chunk alignment here, since base_chunk doesn't really
1027          * need to be aligned.
1028          */
1029         base_chunk = pages_map(NULL, chunk_size);
1030         if (base_chunk == NULL)
1031                 return (true);
1032         base_next_addr = base_chunk;
1033         base_past_addr = (void *)((uintptr_t)base_chunk + chunk_size);
1034 #ifdef MALLOC_STATS
1035         base_total += chunk_size;
1036 #endif
1037         return (false);
1038 }
1039
1040 static void *
1041 base_alloc(size_t size)
1042 {
1043         void *ret;
1044         size_t csize;
1045
1046         /* Round size up to nearest multiple of the cacheline size. */
1047         csize = CACHELINE_CEILING(size);
1048
1049         malloc_mutex_lock(&base_mtx);
1050
1051         /* Make sure there's enough space for the allocation. */
1052         if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1053                 if (base_chunk_alloc(csize)) {
1054                         ret = NULL;
1055                         goto RETURN;
1056                 }
1057         }
1058
1059         /* Allocate. */
1060         ret = base_next_addr;
1061         base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1062
1063 RETURN:
1064         malloc_mutex_unlock(&base_mtx);
1065         return (ret);
1066 }
1067
1068 static chunk_node_t *
1069 base_chunk_node_alloc(void)
1070 {
1071         chunk_node_t *ret;
1072
1073         malloc_mutex_lock(&base_mtx);
1074         if (base_chunk_nodes != NULL) {
1075                 ret = base_chunk_nodes;
1076                 base_chunk_nodes = *(chunk_node_t **)ret;
1077                 malloc_mutex_unlock(&base_mtx);
1078         } else {
1079                 malloc_mutex_unlock(&base_mtx);
1080                 ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
1081         }
1082
1083         return (ret);
1084 }
1085
1086 static void
1087 base_chunk_node_dealloc(chunk_node_t *node)
1088 {
1089
1090         malloc_mutex_lock(&base_mtx);
1091         *(chunk_node_t **)node = base_chunk_nodes;
1092         base_chunk_nodes = node;
1093         malloc_mutex_unlock(&base_mtx);
1094 }
1095
1096 /******************************************************************************/
1097
1098 #ifdef MALLOC_STATS
1099 static void
1100 stats_print(arena_t *arena)
1101 {
1102         unsigned i;
1103         int gap_start;
1104
1105         malloc_printf("allocated: %zu\n", arena->stats.allocated);
1106
1107         malloc_printf("calls:\n");
1108         malloc_printf(" %12s %12s %12s\n", "nmalloc","ndalloc", "nmadvise");
1109         malloc_printf(" %12llu %12llu %12llu\n",
1110             arena->stats.nmalloc, arena->stats.ndalloc, arena->stats.nmadvise);
1111
1112         malloc_printf("large requests: %llu\n", arena->stats.large_nrequests);
1113
1114         malloc_printf("bins:\n");
1115         malloc_printf("%13s %1s %4s %5s %6s %9s %5s %6s %7s %6s %6s\n",
1116             "bin", "", "size", "nregs", "run_sz", "nrequests", "nruns",
1117             "hiruns", "curruns", "npromo", "ndemo");
1118         for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
1119                 if (arena->bins[i].stats.nrequests == 0) {
1120                         if (gap_start == -1)
1121                                 gap_start = i;
1122                 } else {
1123                         if (gap_start != -1) {
1124                                 if (i > gap_start + 1) {
1125                                         /* Gap of more than one size class. */
1126                                         malloc_printf("[%u..%u]\n",
1127                                             gap_start, i - 1);
1128                                 } else {
1129                                         /* Gap of one size class. */
1130                                         malloc_printf("[%u]\n", gap_start);
1131                                 }
1132                                 gap_start = -1;
1133                         }
1134                         malloc_printf(
1135                             "%13u %1s %4u %5u %6u %9llu %5llu"
1136                             " %6lu %7lu %6llu %6llu\n",
1137                             i,
1138                             i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1139                             arena->bins[i].reg_size,
1140                             arena->bins[i].nregs,
1141                             arena->bins[i].run_size,
1142                             arena->bins[i].stats.nrequests,
1143                             arena->bins[i].stats.nruns,
1144                             arena->bins[i].stats.highruns,
1145                             arena->bins[i].stats.curruns,
1146                             arena->bins[i].stats.npromote,
1147                             arena->bins[i].stats.ndemote);
1148                 }
1149         }
1150         if (gap_start != -1) {
1151                 if (i > gap_start + 1) {
1152                         /* Gap of more than one size class. */
1153                         malloc_printf("[%u..%u]\n", gap_start, i - 1);
1154                 } else {
1155                         /* Gap of one size class. */
1156                         malloc_printf("[%u]\n", gap_start);
1157                 }
1158         }
1159 }
1160 #endif
1161
1162 /*
1163  * End Utility functions/macros.
1164  */
1165 /******************************************************************************/
1166 /*
1167  * Begin chunk management functions.
1168  */
1169
1170 static inline int
1171 chunk_comp(chunk_node_t *a, chunk_node_t *b)
1172 {
1173
1174         assert(a != NULL);
1175         assert(b != NULL);
1176
1177         if ((uintptr_t)a->chunk < (uintptr_t)b->chunk)
1178                 return (-1);
1179         else if (a->chunk == b->chunk)
1180                 return (0);
1181         else
1182                 return (1);
1183 }
1184
1185 /* Generate red-black tree code for chunks. */
1186 RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp);
1187
1188 static void *
1189 pages_map(void *addr, size_t size)
1190 {
1191         void *ret;
1192
1193         /*
1194          * We don't use MAP_FIXED here, because it can cause the *replacement*
1195          * of existing mappings, and we only want to create new mappings.
1196          */
1197         ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
1198             -1, 0);
1199         assert(ret != NULL);
1200
1201         if (ret == MAP_FAILED)
1202                 ret = NULL;
1203         else if (addr != NULL && ret != addr) {
1204                 /*
1205                  * We succeeded in mapping memory, but not in the right place.
1206                  */
1207                 if (munmap(ret, size) == -1) {
1208                         char buf[STRERROR_BUF];
1209
1210                         strerror_r(errno, buf, sizeof(buf));
1211                         malloc_printf("%s: (malloc) Error in munmap(): %s\n",
1212                             _getprogname(), buf);
1213                         if (opt_abort)
1214                                 abort();
1215                 }
1216                 ret = NULL;
1217         }
1218
1219         assert(ret == NULL || (addr == NULL && ret != addr)
1220             || (addr != NULL && ret == addr));
1221         return (ret);
1222 }
1223
1224 static void
1225 pages_unmap(void *addr, size_t size)
1226 {
1227
1228         if (munmap(addr, size) == -1) {
1229                 char buf[STRERROR_BUF];
1230
1231                 strerror_r(errno, buf, sizeof(buf));
1232                 malloc_printf("%s: (malloc) Error in munmap(): %s\n",
1233                     _getprogname(), buf);
1234                 if (opt_abort)
1235                         abort();
1236         }
1237 }
1238
1239 static void *
1240 chunk_alloc(size_t size)
1241 {
1242         void *ret, *chunk;
1243         chunk_node_t *tchunk, *delchunk;
1244
1245         assert(size != 0);
1246         assert(size % chunk_size == 0);
1247
1248         malloc_mutex_lock(&chunks_mtx);
1249
1250         if (size == chunk_size) {
1251                 /*
1252                  * Check for address ranges that were previously chunks and try
1253                  * to use them.
1254                  */
1255
1256                 tchunk = RB_MIN(chunk_tree_s, &old_chunks);
1257                 while (tchunk != NULL) {
1258                         /* Found an address range.  Try to recycle it. */
1259
1260                         chunk = tchunk->chunk;
1261                         delchunk = tchunk;
1262                         tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1263
1264                         /* Remove delchunk from the tree. */
1265                         RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1266                         base_chunk_node_dealloc(delchunk);
1267
1268 #ifdef USE_BRK
1269                         if ((uintptr_t)chunk >= (uintptr_t)brk_base
1270                             && (uintptr_t)chunk < (uintptr_t)brk_max) {
1271                                 /* Re-use a previously freed brk chunk. */
1272                                 ret = chunk;
1273                                 goto RETURN;
1274                         }
1275 #endif
1276                         if ((ret = pages_map(chunk, size)) != NULL) {
1277                                 /* Success. */
1278                                 goto RETURN;
1279                         }
1280                 }
1281         }
1282
1283 #ifdef USE_BRK
1284         /*
1285          * Try to create allocations in brk, in order to make full use of
1286          * limited address space.
1287          */
1288         if (brk_prev != (void *)-1) {
1289                 void *brk_cur;
1290                 intptr_t incr;
1291
1292                 /*
1293                  * The loop is necessary to recover from races with other
1294                  * threads that are using brk for something other than malloc.
1295                  */
1296                 malloc_mutex_lock(&brk_mtx);
1297                 do {
1298                         /* Get the current end of brk. */
1299                         brk_cur = sbrk(0);
1300
1301                         /*
1302                          * Calculate how much padding is necessary to
1303                          * chunk-align the end of brk.
1304                          */
1305                         incr = (intptr_t)size
1306                             - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1307                         if (incr == size) {
1308                                 ret = brk_cur;
1309                         } else {
1310                                 ret = (void *)(intptr_t)brk_cur + incr;
1311                                 incr += size;
1312                         }
1313
1314                         brk_prev = sbrk(incr);
1315                         if (brk_prev == brk_cur) {
1316                                 /* Success. */
1317                                 malloc_mutex_unlock(&brk_mtx);
1318                                 brk_max = (void *)(intptr_t)ret + size;
1319                                 goto RETURN;
1320                         }
1321                 } while (brk_prev != (void *)-1);
1322                 malloc_mutex_unlock(&brk_mtx);
1323         }
1324 #endif
1325
1326         /*
1327          * Try to over-allocate, but allow the OS to place the allocation
1328          * anywhere.  Beware of size_t wrap-around.
1329          */
1330         if (size + chunk_size > size) {
1331                 if ((ret = pages_map(NULL, size + chunk_size)) != NULL) {
1332                         size_t offset = CHUNK_ADDR2OFFSET(ret);
1333
1334                         /*
1335                          * Success.  Clean up unneeded leading/trailing space.
1336                          */
1337                         if (offset != 0) {
1338                                 /* Leading space. */
1339                                 pages_unmap(ret, chunk_size - offset);
1340
1341                                 ret = (void *)((uintptr_t)ret + (chunk_size -
1342                                     offset));
1343
1344                                 /* Trailing space. */
1345                                 pages_unmap((void *)((uintptr_t)ret + size),
1346                                     offset);
1347                         } else {
1348                                 /* Trailing space only. */
1349                                 pages_unmap((void *)((uintptr_t)ret + size),
1350                                     chunk_size);
1351                         }
1352                         goto RETURN;
1353                 }
1354         }
1355
1356         /* All strategies for allocation failed. */
1357         ret = NULL;
1358 RETURN:
1359 #ifdef MALLOC_STATS
1360         if (ret != NULL) {
1361                 stats_chunks.nchunks += (size / chunk_size);
1362                 stats_chunks.curchunks += (size / chunk_size);
1363         }
1364         if (stats_chunks.curchunks > stats_chunks.highchunks)
1365                 stats_chunks.highchunks = stats_chunks.curchunks;
1366 #endif
1367         malloc_mutex_unlock(&chunks_mtx);
1368
1369         assert(CHUNK_ADDR2BASE(ret) == ret);
1370         return (ret);
1371 }
1372
1373 static void
1374 chunk_dealloc(void *chunk, size_t size)
1375 {
1376         size_t offset;
1377         chunk_node_t key;
1378         chunk_node_t *node;
1379
1380         assert(chunk != NULL);
1381         assert(CHUNK_ADDR2BASE(chunk) == chunk);
1382         assert(size != 0);
1383         assert(size % chunk_size == 0);
1384
1385         malloc_mutex_lock(&chunks_mtx);
1386
1387 #ifdef USE_BRK
1388         if ((uintptr_t)chunk >= (uintptr_t)brk_base
1389             && (uintptr_t)chunk < (uintptr_t)brk_max) {
1390                 void *brk_cur;
1391
1392                 malloc_mutex_lock(&brk_mtx);
1393                 /* Get the current end of brk. */
1394                 brk_cur = sbrk(0);
1395
1396                 /*
1397                  * Try to shrink the data segment if this chunk is at the end
1398                  * of the data segment.  The sbrk() call here is subject to a
1399                  * race condition with threads that use brk(2) or sbrk(2)
1400                  * directly, but the alternative would be to leak memory for
1401                  * the sake of poorly designed multi-threaded programs.
1402                  */
1403                 if (brk_cur == brk_max
1404                     && (void *)(uintptr_t)chunk + size == brk_max
1405                     && sbrk(-(intptr_t)size) == brk_max) {
1406                         malloc_mutex_unlock(&brk_mtx);
1407                         if (brk_prev == brk_max) {
1408                                 /* Success. */
1409                                 brk_prev = (void *)(intptr_t)brk_max
1410                                     - (intptr_t)size;
1411                                 brk_max = brk_prev;
1412                         }
1413                         goto RETURN;
1414                 } else
1415                         malloc_mutex_unlock(&brk_mtx);
1416                         madvise(chunk, size, MADV_FREE);
1417         } else
1418 #endif
1419                 pages_unmap(chunk, size);
1420
1421         /*
1422          * Iteratively create records of each chunk-sized memory region that
1423          * 'chunk' is comprised of, so that the address range can be recycled
1424          * if memory usage increases later on.
1425          */
1426         for (offset = 0; offset < size; offset += chunk_size) {
1427                 /*
1428                  * It is possible for chunk to overlap existing entries in
1429                  * old_chunks if it is a huge allocation, so take care to not
1430                  * leak tree nodes.
1431                  */
1432                 key.chunk = (void *)((uintptr_t)chunk + (uintptr_t)offset);
1433                 if (RB_FIND(chunk_tree_s, &old_chunks, &key) == NULL) {
1434                         node = base_chunk_node_alloc();
1435                         if (node == NULL)
1436                                 break;
1437
1438                         node->chunk = key.chunk;
1439                         node->size = chunk_size;
1440                         RB_INSERT(chunk_tree_s, &old_chunks, node);
1441                 }
1442         }
1443
1444 #ifdef USE_BRK
1445 RETURN:
1446 #endif
1447 #ifdef MALLOC_STATS
1448         stats_chunks.curchunks -= (size / chunk_size);
1449 #endif
1450         malloc_mutex_unlock(&chunks_mtx);
1451 }
1452
1453 /*
1454  * End chunk management functions.
1455  */
1456 /******************************************************************************/
1457 /*
1458  * Begin arena.
1459  */
1460
1461 /*
1462  * Choose an arena based on a per-thread value (fast-path code, calls slow-path
1463  * code if necessary.
1464  */
1465 static inline arena_t *
1466 choose_arena(void)
1467 {
1468         arena_t *ret;
1469
1470         /*
1471          * We can only use TLS if this is a PIC library, since for the static
1472          * library version, libc's malloc is used by TLS allocation, which
1473          * introduces a bootstrapping issue.
1474          */
1475 #ifndef NO_TLS
1476         if (__isthreaded == false) {
1477             /*
1478              * Avoid the overhead of TLS for single-threaded operation.  If the
1479              * app switches to threaded mode, the initial thread may end up
1480              * being assigned to some other arena, but this one-time switch
1481              * shouldn't cause significant issues.
1482              * */
1483             return (arenas[0]);
1484         }
1485
1486         ret = arenas_map;
1487         if (ret == NULL)
1488                 ret = choose_arena_hard();
1489 #else
1490         if (__isthreaded) {
1491                 unsigned long ind;
1492
1493                 /*
1494                  * Hash _pthread_self() to one of the arenas.  There is a prime
1495                  * number of arenas, so this has a reasonable chance of
1496                  * working.  Even so, the hashing can be easily thwarted by
1497                  * inconvenient _pthread_self() values.  Without specific
1498                  * knowledge of how _pthread_self() calculates values, we can't
1499                  * easily do much better than this.
1500                  */
1501                 ind = (unsigned long) _pthread_self() % narenas;
1502
1503                 /*
1504                  * Optimistially assume that arenas[ind] has been initialized.
1505                  * At worst, we find out that some other thread has already
1506                  * done so, after acquiring the lock in preparation.  Note that
1507                  * this lazy locking also has the effect of lazily forcing
1508                  * cache coherency; without the lock acquisition, there's no
1509                  * guarantee that modification of arenas[ind] by another thread
1510                  * would be seen on this CPU for an arbitrary amount of time.
1511                  *
1512                  * In general, this approach to modifying a synchronized value
1513                  * isn't a good idea, but in this case we only ever modify the
1514                  * value once, so things work out well.
1515                  */
1516                 ret = arenas[ind];
1517                 if (ret == NULL) {
1518                         /*
1519                          * Avoid races with another thread that may have already
1520                          * initialized arenas[ind].
1521                          */
1522                         malloc_mutex_lock(&arenas_mtx);
1523                         if (arenas[ind] == NULL)
1524                                 ret = arenas_extend((unsigned)ind);
1525                         else
1526                                 ret = arenas[ind];
1527                         malloc_mutex_unlock(&arenas_mtx);
1528                 }
1529         } else
1530                 ret = arenas[0];
1531 #endif
1532
1533         assert(ret != NULL);
1534         return (ret);
1535 }
1536
1537 #ifndef NO_TLS
1538 /*
1539  * Choose an arena based on a per-thread value (slow-path code only, called
1540  * only by choose_arena()).
1541  */
1542 static arena_t *
1543 choose_arena_hard(void)
1544 {
1545         arena_t *ret;
1546
1547         assert(__isthreaded);
1548
1549         /* Assign one of the arenas to this thread, in a round-robin fashion. */
1550         malloc_mutex_lock(&arenas_mtx);
1551         ret = arenas[next_arena];
1552         if (ret == NULL)
1553                 ret = arenas_extend(next_arena);
1554         if (ret == NULL) {
1555                 /*
1556                  * Make sure that this function never returns NULL, so that
1557                  * choose_arena() doesn't have to check for a NULL return
1558                  * value.
1559                  */
1560                 ret = arenas[0];
1561         }
1562         next_arena = (next_arena + 1) % narenas;
1563         malloc_mutex_unlock(&arenas_mtx);
1564         arenas_map = ret;
1565
1566         return (ret);
1567 }
1568 #endif
1569
1570 static inline int
1571 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
1572 {
1573
1574         assert(a != NULL);
1575         assert(b != NULL);
1576
1577         if ((uintptr_t)a < (uintptr_t)b)
1578                 return (-1);
1579         else if (a == b)
1580                 return (0);
1581         else
1582                 return (1);
1583 }
1584
1585 /* Generate red-black tree code for arena chunks. */
1586 RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp);
1587
1588 static inline void *
1589 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
1590 {
1591         void *ret;
1592         unsigned i, mask, bit, regind;
1593
1594         assert(run->magic == ARENA_RUN_MAGIC);
1595
1596         for (i = run->regs_minelm; i < REGS_MASK_NELMS; i++) {
1597                 mask = run->regs_mask[i];
1598                 if (mask != 0) {
1599                         /* Usable allocation found. */
1600                         bit = ffs(mask) - 1;
1601
1602                         regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1603                         ret = (void *)&((char *)run)[bin->reg0_offset
1604                             + (bin->reg_size * regind)];
1605
1606                         /* Clear bit. */
1607                         mask ^= (1 << bit);
1608                         run->regs_mask[i] = mask;
1609
1610                         return (ret);
1611                 } else {
1612                         /*
1613                          * Make a note that nothing before this element
1614                          * contains a free region.
1615                          */
1616                         run->regs_minelm = i + 1;
1617                 }
1618         }
1619         /* Not reached. */
1620         assert(0);
1621         return (NULL);
1622 }
1623
1624 static inline void
1625 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
1626 {
1627         /*
1628          * To divide by a number D that is not a power of two we multiply
1629          * by (2^21 / D) and then right shift by 21 positions.
1630          *
1631          *   X / D
1632          *
1633          * becomes
1634          *
1635          *   (X * size_invs[(D >> QUANTUM_2POW_MIN) - 3]) >> SIZE_INV_SHIFT
1636          */
1637 #define SIZE_INV_SHIFT 21
1638 #define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
1639         static const unsigned size_invs[] = {
1640             SIZE_INV(3),
1641             SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
1642             SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
1643             SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
1644             SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
1645             SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
1646             SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
1647             SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
1648 #if (QUANTUM_2POW_MIN < 4)
1649             ,
1650             SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
1651             SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
1652             SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
1653             SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
1654             SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
1655             SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
1656             SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
1657             SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
1658 #endif
1659         };
1660         unsigned diff, regind, elm, bit;
1661
1662         assert(run->magic == ARENA_RUN_MAGIC);
1663         assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
1664             >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
1665
1666         /*
1667          * Avoid doing division with a variable divisor if possible.  Using
1668          * actual division here can reduce allocator throughput by over 20%!
1669          */
1670         diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
1671         if ((size & (size - 1)) == 0) {
1672                 /*
1673                  * log2_table allows fast division of a power of two in the
1674                  * [1..128] range.
1675                  *
1676                  * (x / divisor) becomes (x >> log2_table[divisor - 1]).
1677                  */
1678                 static const unsigned char log2_table[] = {
1679                     0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
1680                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
1681                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1682                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
1683                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1684                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1685                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1686                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
1687                 };
1688
1689                 if (size <= 128)
1690                         regind = (diff >> log2_table[size - 1]);
1691                 else if (size <= 32768)
1692                         regind = diff >> (8 + log2_table[(size >> 8) - 1]);
1693                 else {
1694                         /*
1695                          * The page size is too large for us to use the lookup
1696                          * table.  Use real division.
1697                          */
1698                         regind = diff / size;
1699                 }
1700         } else if (size <= ((sizeof(size_invs) / sizeof(unsigned))
1701             << QUANTUM_2POW_MIN) + 2) {
1702                 regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
1703                 regind >>= SIZE_INV_SHIFT;
1704         } else {
1705                 /*
1706                  * size_invs isn't large enough to handle this size class, so
1707                  * calculate regind using actual division.  This only happens
1708                  * if the user increases small_max via the 'S' runtime
1709                  * configuration option.
1710                  */
1711                 regind = diff / size;
1712         };
1713         assert(diff == regind * size);
1714         assert(regind < bin->nregs);
1715
1716         elm = regind >> (SIZEOF_INT_2POW + 3);
1717         if (elm < run->regs_minelm)
1718                 run->regs_minelm = elm;
1719         bit = regind - (elm << (SIZEOF_INT_2POW + 3));
1720         assert((run->regs_mask[elm] & (1 << bit)) == 0);
1721         run->regs_mask[elm] |= (1 << bit);
1722 #undef SIZE_INV
1723 #undef SIZE_INV_SHIFT
1724 }
1725
1726 static void
1727 arena_run_split(arena_t *arena, arena_run_t *run, bool large, size_t size)
1728 {
1729         arena_chunk_t *chunk;
1730         unsigned run_ind, map_offset, total_pages, need_pages;
1731         unsigned i, log2_run_pages, run_pages;
1732
1733         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1734         run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1735             >> pagesize_2pow);
1736         assert(chunk->map[run_ind].free);
1737         total_pages = chunk->map[run_ind].npages;
1738         need_pages = (size >> pagesize_2pow);
1739
1740         assert(chunk->map[run_ind].free);
1741         assert(chunk->map[run_ind].large == false);
1742         assert(chunk->map[run_ind].npages == total_pages);
1743
1744         /* Split enough pages from the front of run to fit allocation size. */
1745         map_offset = run_ind;
1746         for (i = 0; i < need_pages; i++) {
1747                 chunk->map[map_offset + i].free = false;
1748                 chunk->map[map_offset + i].large = large;
1749                 chunk->map[map_offset + i].npages = need_pages;
1750                 chunk->map[map_offset + i].pos = i;
1751         }
1752
1753         /* Update map for trailing pages. */
1754         map_offset += need_pages;
1755         while (map_offset < run_ind + total_pages) {
1756                 log2_run_pages = ffs(map_offset) - 1;
1757                 run_pages = (1 << log2_run_pages);
1758
1759                 chunk->map[map_offset].free = true;
1760                 chunk->map[map_offset].large = false;
1761                 chunk->map[map_offset].npages = run_pages;
1762
1763                 chunk->nfree_runs[log2_run_pages]++;
1764
1765                 map_offset += run_pages;
1766         }
1767
1768         chunk->pages_used += (size >> pagesize_2pow);
1769 }
1770
1771 static arena_chunk_t *
1772 arena_chunk_alloc(arena_t *arena)
1773 {
1774         arena_chunk_t *chunk;
1775         unsigned i, j, header_npages, pow2_header_npages, map_offset;
1776         unsigned log2_run_pages, run_pages;
1777         size_t header_size;
1778
1779         chunk = (arena_chunk_t *)chunk_alloc(chunk_size);
1780         if (chunk == NULL)
1781                 return (NULL);
1782
1783         chunk->arena = arena;
1784
1785         RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1786
1787         /*
1788          * Claim that no pages are in use, since the header is merely overhead.
1789          */
1790         chunk->pages_used = 0;
1791
1792         memset(&chunk->nfree_runs, 0, sizeof(chunk->nfree_runs));
1793
1794         header_size = (size_t)((uintptr_t)&chunk->map[arena_chunk_maplen]
1795             - (uintptr_t)chunk);
1796         if (header_size % pagesize != 0) {
1797                 /* Round up to the nearest page boundary. */
1798                 header_size += pagesize - (header_size % pagesize);
1799         }
1800
1801         header_npages = header_size >> pagesize_2pow;
1802         pow2_header_npages = pow2_ceil(header_npages);
1803
1804         /*
1805          * Iteratively mark runs as in use, until we've spoken for the entire
1806          * header.
1807          */
1808         map_offset = 0;
1809         for (i = 0; header_npages > 0; i++) {
1810                 if ((pow2_header_npages >> i) <= header_npages) {
1811                         for (j = 0; j < (pow2_header_npages >> i); j++) {
1812                                 chunk->map[map_offset + j].free = false;
1813                                 chunk->map[map_offset + j].large = false;
1814                                 chunk->map[map_offset + j].npages =
1815                                     (pow2_header_npages >> i);
1816                                 chunk->map[map_offset + j].pos = j;
1817                         }
1818                         header_npages -= (pow2_header_npages >> i);
1819                         map_offset += (pow2_header_npages >> i);
1820                 }
1821         }
1822
1823         /*
1824          * Finish initializing map.  The chunk header takes up some space at
1825          * the beginning of the chunk, which we just took care of by
1826          * "allocating" the leading pages.
1827          */
1828         while (map_offset < (chunk_size >> pagesize_2pow)) {
1829                 log2_run_pages = ffs(map_offset) - 1;
1830                 run_pages = (1 << log2_run_pages);
1831
1832                 chunk->map[map_offset].free = true;
1833                 chunk->map[map_offset].large = false;
1834                 chunk->map[map_offset].npages = run_pages;
1835
1836                 chunk->nfree_runs[log2_run_pages]++;
1837
1838                 map_offset += run_pages;
1839         }
1840
1841         return (chunk);
1842 }
1843
1844 static void
1845 arena_chunk_dealloc(arena_chunk_t *chunk)
1846 {
1847
1848         RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
1849
1850         chunk_dealloc((void *)chunk, chunk_size);
1851 }
1852
1853 static void
1854 arena_bin_run_promote(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
1855 {
1856
1857         assert(bin == run->bin);
1858
1859         /* Promote. */
1860         assert(run->free_min > run->nfree);
1861         assert(run->quartile < RUN_Q100);
1862         run->quartile++;
1863 #ifdef MALLOC_STATS
1864         bin->stats.npromote++;
1865 #endif
1866
1867         /* Re-file run. */
1868         switch (run->quartile) {
1869                 case RUN_QINIT:
1870                         assert(0);
1871                         break;
1872                 case RUN_Q0:
1873                         qr_before_insert((arena_run_t *)&bin->runs0, run, link);
1874                         run->free_max = bin->nregs - 1;
1875                         run->free_min = (bin->nregs >> 1) + 1;
1876                         assert(run->nfree <= run->free_max);
1877                         assert(run->nfree >= run->free_min);
1878                         break;
1879                 case RUN_Q25:
1880                         qr_remove(run, link);
1881                         qr_before_insert((arena_run_t *)&bin->runs25, run,
1882                             link);
1883                         run->free_max = ((bin->nregs >> 2) * 3) - 1;
1884                         run->free_min = (bin->nregs >> 2) + 1;
1885                         assert(run->nfree <= run->free_max);
1886                         assert(run->nfree >= run->free_min);
1887                         break;
1888                 case RUN_Q50:
1889                         qr_remove(run, link);
1890                         qr_before_insert((arena_run_t *)&bin->runs50, run,
1891                             link);
1892                         run->free_max = (bin->nregs >> 1) - 1;
1893                         run->free_min = 1;
1894                         assert(run->nfree <= run->free_max);
1895                         assert(run->nfree >= run->free_min);
1896                         break;
1897                 case RUN_Q75:
1898                         /*
1899                          * Skip RUN_Q75 during promotion from RUN_Q50.
1900                          * Separate handling of RUN_Q75 and RUN_Q100 allows us
1901                          * to keep completely full runs in RUN_Q100, thus
1902                          * guaranteeing that runs in RUN_Q75 are only mostly
1903                          * full.  This provides a method for avoiding a linear
1904                          * search for non-full runs, which avoids some
1905                          * pathological edge cases.
1906                          */
1907                         run->quartile++;
1908                         /* Fall through. */
1909                 case RUN_Q100:
1910                         qr_remove(run, link);
1911                         assert(bin->runcur == run);
1912                         bin->runcur = NULL;
1913                         run->free_max = 0;
1914                         run->free_min = 0;
1915                         assert(run->nfree <= run->free_max);
1916                         assert(run->nfree >= run->free_min);
1917                         break;
1918                 default:
1919                         assert(0);
1920                         break;
1921         }
1922 }
1923
1924 static void
1925 arena_bin_run_demote(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
1926 {
1927
1928         assert(bin == run->bin);
1929
1930         /* Demote. */
1931         assert(run->free_max < run->nfree);
1932         assert(run->quartile > RUN_QINIT);
1933         run->quartile--;
1934 #ifdef MALLOC_STATS
1935         bin->stats.ndemote++;
1936 #endif
1937
1938         /* Re-file run. */
1939         switch (run->quartile) {
1940                 case RUN_QINIT:
1941                         qr_remove(run, link);
1942 #ifdef MALLOC_STATS
1943                         bin->stats.curruns--;
1944 #endif
1945                         if (bin->runcur == run)
1946                                 bin->runcur = NULL;
1947 #ifdef MALLOC_DEBUG
1948                         run->magic = 0;
1949 #endif
1950                         arena_run_dalloc(arena, run, bin->run_size);
1951                         break;
1952                 case RUN_Q0:
1953                         qr_remove(run, link);
1954                         qr_before_insert((arena_run_t *)&bin->runs0, run, link);
1955                         run->free_max = bin->nregs - 1;
1956                         run->free_min = (bin->nregs >> 1) + 1;
1957                         assert(run->nfree <= run->free_max);
1958                         assert(run->nfree >= run->free_min);
1959                         break;
1960                 case RUN_Q25:
1961                         qr_remove(run, link);
1962                         qr_before_insert((arena_run_t *)&bin->runs25, run,
1963                             link);
1964                         run->free_max = ((bin->nregs >> 2) * 3) - 1;
1965                         run->free_min = (bin->nregs >> 2) + 1;
1966                         assert(run->nfree <= run->free_max);
1967                         assert(run->nfree >= run->free_min);
1968                         break;
1969                 case RUN_Q50:
1970                         qr_remove(run, link);
1971                         qr_before_insert((arena_run_t *)&bin->runs50, run,
1972                             link);
1973                         run->free_max = (bin->nregs >> 1) - 1;
1974                         run->free_min = 1;
1975                         assert(run->nfree <= run->free_max);
1976                         assert(run->nfree >= run->free_min);
1977                         break;
1978                 case RUN_Q75:
1979                         qr_before_insert((arena_run_t *)&bin->runs75, run,
1980                             link);
1981                         run->free_max = (bin->nregs >> 2) - 1;
1982                         run->free_min = 1;
1983                         assert(run->nfree <= run->free_max);
1984                         assert(run->nfree >= run->free_min);
1985                         break;
1986                 case RUN_Q100:
1987                 default:
1988                         assert(0);
1989                         break;
1990         }
1991 }
1992
1993 static arena_run_t *
1994 arena_run_alloc(arena_t *arena, bool large, size_t size)
1995 {
1996         arena_run_t *run;
1997         unsigned min_ind, i, j;
1998         arena_chunk_t *chunk;
1999 #ifndef NDEBUG
2000         int rep = 0;
2001 #endif
2002
2003         assert(size <= arena_maxclass);
2004
2005 AGAIN:
2006 #ifndef NDEBUG
2007         rep++;
2008         assert(rep <= 2);
2009 #endif
2010
2011         /*
2012          * Search through arena's chunks in address order for a run that is
2013          * large enough.  Look for a precise fit, but do not pass up a chunk
2014          * that has a run which is large enough to split.
2015          */
2016         min_ind = ffs(size >> pagesize_2pow) - 1;
2017         RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
2018                 for (i = min_ind;
2019                     i < (opt_chunk_2pow - pagesize_2pow);
2020                     i++) {
2021                         if (chunk->nfree_runs[i] > 0) {
2022                                 arena_chunk_map_t *map = chunk->map;
2023
2024                                 /* Scan chunk's map for free run. */
2025                                 for (j = 0;
2026                                     j < arena_chunk_maplen;
2027                                     j += map[j].npages) {
2028                                         if (map[j].free
2029                                             && map[j].npages == (1 << i))
2030                 {/*<----------------------------*/
2031                         run = (arena_run_t *)&((char *)chunk)[j
2032                             << pagesize_2pow];
2033
2034                         assert(chunk->nfree_runs[i] > 0);
2035                         chunk->nfree_runs[i]--;
2036
2037                         /* Update page map. */
2038                         arena_run_split(arena, run, large, size);
2039
2040                         return (run);
2041                 }/*---------------------------->*/
2042                                 }
2043                                 /* Not reached. */
2044                                 assert(0);
2045                         }
2046                 }
2047         }
2048
2049         /* No usable runs.  Allocate a new chunk, then try again. */
2050         if (arena_chunk_alloc(arena) == NULL)
2051                 return (NULL);
2052         goto AGAIN;
2053 }
2054
2055 static void
2056 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
2057 {
2058         arena_chunk_t *chunk;
2059         unsigned run_ind, buddy_ind, base_run_ind, run_pages, log2_run_pages;
2060
2061         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2062         run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
2063             >> pagesize_2pow);
2064         run_pages = (size >> pagesize_2pow);
2065         log2_run_pages = ffs(run_pages) - 1;
2066         assert(run_pages > 0);
2067
2068         /* Subtract pages from count of pages used in chunk. */
2069         chunk->pages_used -= run_pages;
2070
2071         /* Mark run as deallocated. */
2072         chunk->map[run_ind].free = true;
2073         chunk->map[run_ind].large = false;
2074         chunk->map[run_ind].npages = run_pages;
2075
2076         /*
2077          * Tell the kernel that we don't need the data in this run, but only if
2078          * requested via runtime configuration.
2079          */
2080         if (opt_hint) {
2081                 madvise(run, size, MADV_FREE);
2082 #ifdef MALLOC_STATS
2083                 arena->stats.nmadvise += (size >> pagesize_2pow);
2084 #endif
2085         }
2086
2087         /*
2088          * Iteratively coalesce with buddies.  Conceptually, the buddy scheme
2089          * induces a tree on the set of pages.  If we know the number of pages
2090          * in the subtree rooted at the current node, we can quickly determine
2091          * whether a run is the left or right buddy, and then calculate the
2092          * buddy's index.
2093          */
2094         for (;
2095             (run_pages = (1 << log2_run_pages)) < arena_chunk_maplen;
2096             log2_run_pages++) {
2097                 if (((run_ind >> log2_run_pages) & 1) == 0) {
2098                         /* Current run precedes its buddy. */
2099                         buddy_ind = run_ind + run_pages;
2100                         base_run_ind = run_ind;
2101                 } else {
2102                         /* Current run follows its buddy. */
2103                         buddy_ind = run_ind - run_pages;
2104                         base_run_ind = buddy_ind;
2105                 }
2106
2107                 if (chunk->map[buddy_ind].free == false
2108                     || chunk->map[buddy_ind].npages != run_pages)
2109                         break;
2110
2111                 assert(chunk->nfree_runs[log2_run_pages] > 0);
2112                 chunk->nfree_runs[log2_run_pages]--;
2113
2114                 /* Coalesce. */
2115                 chunk->map[base_run_ind].npages = (run_pages << 1);
2116
2117                 /* Update run_ind to be the beginning of the coalesced run. */
2118                 run_ind = base_run_ind;
2119         }
2120
2121         chunk->nfree_runs[log2_run_pages]++;
2122
2123         /* Free pages, to the extent possible. */
2124         if (chunk->pages_used == 0) {
2125                 /* This chunk is completely unused now, so deallocate it. */
2126                 arena_chunk_dealloc(chunk);
2127         }
2128 }
2129
2130 static arena_run_t *
2131 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2132 {
2133         arena_run_t *run;
2134         unsigned i, remainder;
2135
2136         /* Look for a usable run. */
2137         if ((run = qr_next((arena_run_t *)&bin->runs50, link))
2138             != (arena_run_t *)&bin->runs50
2139             || (run = qr_next((arena_run_t *)&bin->runs25, link))
2140             != (arena_run_t *)&bin->runs25
2141             || (run = qr_next((arena_run_t *)&bin->runs0, link))
2142             != (arena_run_t *)&bin->runs0
2143             || (run = qr_next((arena_run_t *)&bin->runs75, link))
2144             != (arena_run_t *)&bin->runs75) {
2145                 /* run is guaranteed to have available space. */
2146                 qr_remove(run, link);
2147                 return (run);
2148         }
2149         /* No existing runs have any space available. */
2150
2151         /* Allocate a new run. */
2152         run = arena_run_alloc(arena, false, bin->run_size);
2153         if (run == NULL)
2154                 return (NULL);
2155
2156         /* Initialize run internals. */
2157         qr_new(run, link);
2158         run->bin = bin;
2159
2160         for (i = 0; i < (bin->nregs >> (SIZEOF_INT_2POW + 3)); i++)
2161                 run->regs_mask[i] = UINT_MAX;
2162         remainder = bin->nregs % (1 << (SIZEOF_INT_2POW + 3));
2163         if (remainder != 0) {
2164                 run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
2165                     - remainder));
2166                 i++;
2167         }
2168         for (; i < REGS_MASK_NELMS; i++)
2169                 run->regs_mask[i] = 0;
2170
2171         run->regs_minelm = 0;
2172
2173         run->nfree = bin->nregs;
2174         run->quartile = RUN_QINIT;
2175         run->free_max = bin->nregs;
2176         run->free_min = ((bin->nregs >> 2) * 3) + 1;
2177 #ifdef MALLOC_DEBUG
2178         run->magic = ARENA_RUN_MAGIC;
2179 #endif
2180
2181 #ifdef MALLOC_STATS
2182         bin->stats.nruns++;
2183         bin->stats.curruns++;
2184         if (bin->stats.curruns > bin->stats.highruns)
2185                 bin->stats.highruns = bin->stats.curruns;
2186 #endif
2187         return (run);
2188 }
2189
2190 /* bin->runcur must have space available before this function is called. */
2191 static inline void *
2192 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
2193 {
2194         void *ret;
2195
2196         assert(run->magic == ARENA_RUN_MAGIC);
2197         assert(run->nfree > 0);
2198
2199         ret = arena_run_reg_alloc(run, bin);
2200         assert(ret != NULL);
2201         run->nfree--;
2202         if (run->nfree < run->free_min) {
2203                 /* Promote run to higher fullness quartile. */
2204                 arena_bin_run_promote(arena, bin, run);
2205         }
2206
2207         return (ret);
2208 }
2209
2210 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2211 static void *
2212 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
2213 {
2214
2215         assert(bin->runcur == NULL || bin->runcur->quartile == RUN_Q100);
2216
2217         bin->runcur = arena_bin_nonfull_run_get(arena, bin);
2218         if (bin->runcur == NULL)
2219                 return (NULL);
2220         assert(bin->runcur->magic == ARENA_RUN_MAGIC);
2221         assert(bin->runcur->nfree > 0);
2222
2223         return (arena_bin_malloc_easy(arena, bin, bin->runcur));
2224 }
2225
2226 static void *
2227 arena_malloc(arena_t *arena, size_t size)
2228 {
2229         void *ret;
2230
2231         assert(arena != NULL);
2232         assert(arena->magic == ARENA_MAGIC);
2233         assert(size != 0);
2234         assert(QUANTUM_CEILING(size) <= arena_maxclass);
2235
2236         if (size <= bin_maxclass) {
2237                 arena_bin_t *bin;
2238                 arena_run_t *run;
2239
2240                 /* Small allocation. */
2241
2242                 if (size < small_min) {
2243                         /* Tiny. */
2244                         size = pow2_ceil(size);
2245                         bin = &arena->bins[ffs(size >> (tiny_min_2pow + 1))];
2246 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
2247                         /*
2248                          * Bin calculation is always correct, but we may need
2249                          * to fix size for the purposes of assertions and/or
2250                          * stats accuracy.
2251                          */
2252                         if (size < (1 << tiny_min_2pow))
2253                                 size = (1 << tiny_min_2pow);
2254 #endif
2255                 } else if (size <= small_max) {
2256                         /* Quantum-spaced. */
2257                         size = QUANTUM_CEILING(size);
2258                         bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
2259                             - 1];
2260                 } else {
2261                         /* Sub-page. */
2262                         size = pow2_ceil(size);
2263                         bin = &arena->bins[ntbins + nqbins
2264                             + (ffs(size >> opt_small_max_2pow) - 2)];
2265                 }
2266                 assert(size == bin->reg_size);
2267
2268                 malloc_mutex_lock(&arena->mtx);
2269                 if ((run = bin->runcur) != NULL)
2270                         ret = arena_bin_malloc_easy(arena, bin, run);
2271                 else
2272                         ret = arena_bin_malloc_hard(arena, bin);
2273
2274 #ifdef MALLOC_STATS
2275                 bin->stats.nrequests++;
2276 #endif
2277         } else {
2278                 /* Medium allocation. */
2279                 size = pow2_ceil(size);
2280                 malloc_mutex_lock(&arena->mtx);
2281                 ret = (void *)arena_run_alloc(arena, true, size);
2282 #ifdef MALLOC_STATS
2283                 arena->stats.large_nrequests++;
2284 #endif
2285         }
2286
2287 #ifdef MALLOC_STATS
2288         arena->stats.nmalloc++;
2289         if (ret != NULL)
2290                 arena->stats.allocated += size;
2291 #endif
2292
2293         malloc_mutex_unlock(&arena->mtx);
2294
2295         if (opt_junk && ret != NULL)
2296                 memset(ret, 0xa5, size);
2297         else if (opt_zero && ret != NULL)
2298                 memset(ret, 0, size);
2299         return (ret);
2300 }
2301
2302 /* Return the size of the allocation pointed to by ptr. */
2303 static size_t
2304 arena_salloc(const void *ptr)
2305 {
2306         size_t ret;
2307         arena_chunk_t *chunk;
2308         uint32_t pageind;
2309         arena_chunk_map_t mapelm;
2310
2311         assert(ptr != NULL);
2312         assert(CHUNK_ADDR2BASE(ptr) != ptr);
2313
2314         /*
2315          * No arena data structures that we query here can change in a way that
2316          * affects this function, so we don't need to lock.
2317          */
2318         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2319         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
2320         mapelm = chunk->map[pageind];
2321         assert(mapelm.free == false);
2322         if (mapelm.large == false) {
2323                 arena_run_t *run;
2324
2325                 pageind -= mapelm.pos;
2326
2327                 run = (arena_run_t *)&((char *)chunk)[pageind << pagesize_2pow];
2328                 assert(run->magic == ARENA_RUN_MAGIC);
2329                 ret = run->bin->reg_size;
2330         } else
2331                 ret = mapelm.npages << pagesize_2pow;
2332
2333         return (ret);
2334 }
2335
2336 static void *
2337 arena_ralloc(void *ptr, size_t size, size_t oldsize)
2338 {
2339         void *ret;
2340
2341         /* Avoid moving the allocation if the size class would not change. */
2342         if (size < small_min) {
2343                 if (oldsize < small_min &&
2344                     ffs(pow2_ceil(size) >> (tiny_min_2pow + 1))
2345                     == ffs(pow2_ceil(oldsize) >> (tiny_min_2pow + 1)))
2346                         goto IN_PLACE;
2347         } else if (size <= small_max) {
2348                 if (oldsize >= small_min && oldsize <= small_max &&
2349                     (QUANTUM_CEILING(size) >> opt_quantum_2pow)
2350                     == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
2351                         goto IN_PLACE;
2352         } else {
2353                 if (oldsize > small_max && pow2_ceil(size) == oldsize)
2354                         goto IN_PLACE;
2355         }
2356
2357         /*
2358          * If we get here, then size and oldsize are different enough that we
2359          * need to use a different size class.  In that case, fall back to
2360          * allocating new space and copying.
2361          */
2362         ret = arena_malloc(choose_arena(), size);
2363         if (ret == NULL)
2364                 return (NULL);
2365
2366         if (size < oldsize)
2367                 memcpy(ret, ptr, size);
2368         else
2369                 memcpy(ret, ptr, oldsize);
2370         idalloc(ptr);
2371         return (ret);
2372 IN_PLACE:
2373         if (opt_junk && size < oldsize)
2374                 memset(&((char *)ptr)[size], 0x5a, oldsize - size);
2375         else if (opt_zero && size > oldsize)
2376                 memset(&((char *)ptr)[size], 0, size - oldsize);
2377         return (ptr);
2378 }
2379
2380 static void
2381 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
2382 {
2383         unsigned pageind;
2384         arena_chunk_map_t mapelm;
2385         size_t size;
2386
2387         assert(arena != NULL);
2388         assert(arena->magic == ARENA_MAGIC);
2389         assert(chunk->arena == arena);
2390         assert(ptr != NULL);
2391         assert(CHUNK_ADDR2BASE(ptr) != ptr);
2392
2393         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
2394         mapelm = chunk->map[pageind];
2395         assert(mapelm.free == false);
2396         if (mapelm.large == false) {
2397                 arena_run_t *run;
2398                 arena_bin_t *bin;
2399
2400                 /* Small allocation. */
2401
2402                 pageind -= mapelm.pos;
2403
2404                 run = (arena_run_t *)&((char *)chunk)[pageind << pagesize_2pow];
2405                 assert(run->magic == ARENA_RUN_MAGIC);
2406                 bin = run->bin;
2407                 size = bin->reg_size;
2408
2409                 if (opt_junk)
2410                         memset(ptr, 0x5a, size);
2411
2412                 malloc_mutex_lock(&arena->mtx);
2413                 arena_run_reg_dalloc(run, bin, ptr, size);
2414                 run->nfree++;
2415                 if (run->nfree > run->free_max) {
2416                         /* Demote run to lower fullness quartile. */
2417                         arena_bin_run_demote(arena, bin, run);
2418                 }
2419         } else {
2420                 /* Medium allocation. */
2421
2422                 size = mapelm.npages << pagesize_2pow;
2423                 assert((((uintptr_t)ptr) & (size - 1)) == 0);
2424
2425                 if (opt_junk)
2426                         memset(ptr, 0x5a, size);
2427
2428                 malloc_mutex_lock(&arena->mtx);
2429                 arena_run_dalloc(arena, (arena_run_t *)ptr, size);
2430         }
2431
2432 #ifdef MALLOC_STATS
2433         arena->stats.allocated -= size;
2434 #endif
2435
2436         malloc_mutex_unlock(&arena->mtx);
2437 }
2438
2439 static bool
2440 arena_new(arena_t *arena)
2441 {
2442         unsigned i;
2443         arena_bin_t *bin;
2444         size_t pow2_size, run_size;
2445
2446         malloc_mutex_init(&arena->mtx);
2447
2448 #ifdef MALLOC_STATS
2449         memset(&arena->stats, 0, sizeof(arena_stats_t));
2450 #endif
2451
2452         /* Initialize chunks. */
2453         RB_INIT(&arena->chunks);
2454
2455         /* Initialize bins. */
2456
2457         /* (2^n)-spaced tiny bins. */
2458         for (i = 0; i < ntbins; i++) {
2459                 bin = &arena->bins[i];
2460                 bin->runcur = NULL;
2461                 qr_new((arena_run_t *)&bin->runs0, link);
2462                 qr_new((arena_run_t *)&bin->runs25, link);
2463                 qr_new((arena_run_t *)&bin->runs50, link);
2464                 qr_new((arena_run_t *)&bin->runs75, link);
2465
2466                 bin->reg_size = (1 << (tiny_min_2pow + i));
2467
2468                 /*
2469                  * Calculate how large of a run to allocate.  Make sure that at
2470                  * least RUN_MIN_REGS regions fit in the run.
2471                  */
2472                 run_size = bin->reg_size << RUN_MIN_REGS_2POW;
2473                 if (run_size < pagesize)
2474                         run_size = pagesize;
2475                 if (run_size > (pagesize << RUN_MAX_PAGES_2POW))
2476                         run_size = (pagesize << RUN_MAX_PAGES_2POW);
2477                 if (run_size > arena_maxclass)
2478                         run_size = arena_maxclass;
2479                 bin->run_size = run_size;
2480
2481                 assert(run_size >= sizeof(arena_run_t));
2482                 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size;
2483                 if (bin->nregs > (REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3))) {
2484                         /* Take care not to overflow regs_mask. */
2485                         bin->nregs = REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3);
2486                 }
2487                 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size);
2488
2489 #ifdef MALLOC_STATS
2490                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2491 #endif
2492         }
2493
2494         /* Quantum-spaced bins. */
2495         for (; i < ntbins + nqbins; i++) {
2496                 bin = &arena->bins[i];
2497                 bin->runcur = NULL;
2498                 qr_new((arena_run_t *)&bin->runs0, link);
2499                 qr_new((arena_run_t *)&bin->runs25, link);
2500                 qr_new((arena_run_t *)&bin->runs50, link);
2501                 qr_new((arena_run_t *)&bin->runs75, link);
2502
2503                 bin->reg_size = quantum * (i - ntbins + 1);
2504
2505                 /*
2506                  * Calculate how large of a run to allocate.  Make sure that at
2507                  * least RUN_MIN_REGS regions fit in the run.
2508                  */
2509                 pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2510                 run_size = (pow2_size << RUN_MIN_REGS_2POW);
2511                 if (run_size < pagesize)
2512                         run_size = pagesize;
2513                 if (run_size > (pagesize << RUN_MAX_PAGES_2POW))
2514                         run_size = (pagesize << RUN_MAX_PAGES_2POW);
2515                 if (run_size > arena_maxclass)
2516                         run_size = arena_maxclass;
2517                 bin->run_size = run_size;
2518
2519                 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size;
2520                 assert(bin->nregs <= REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3));
2521                 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size);
2522
2523 #ifdef MALLOC_STATS
2524                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2525 #endif
2526         }
2527
2528         /* (2^n)-spaced sub-page bins. */
2529         for (; i < ntbins + nqbins + nsbins; i++) {
2530                 bin = &arena->bins[i];
2531                 bin->runcur = NULL;
2532                 qr_new((arena_run_t *)&bin->runs0, link);
2533                 qr_new((arena_run_t *)&bin->runs25, link);
2534                 qr_new((arena_run_t *)&bin->runs50, link);
2535                 qr_new((arena_run_t *)&bin->runs75, link);
2536
2537                 bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
2538
2539                 /*
2540                  * Calculate how large of a run to allocate.  Make sure that at
2541                  * least RUN_MIN_REGS regions fit in the run.
2542                  */
2543                 run_size = bin->reg_size << RUN_MIN_REGS_2POW;
2544                 if (run_size < pagesize)
2545                         run_size = pagesize;
2546                 if (run_size > (pagesize << RUN_MAX_PAGES_2POW))
2547                         run_size = (pagesize << RUN_MAX_PAGES_2POW);
2548                 if (run_size > arena_maxclass)
2549                         run_size = arena_maxclass;
2550                 bin->run_size = run_size;
2551
2552                 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size;
2553                 assert(bin->nregs <= REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3));
2554                 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size);
2555
2556 #ifdef MALLOC_STATS
2557                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2558 #endif
2559         }
2560
2561 #ifdef MALLOC_DEBUG
2562         arena->magic = ARENA_MAGIC;
2563 #endif
2564
2565         return (false);
2566 }
2567
2568 /* Create a new arena and insert it into the arenas array at index ind. */
2569 static arena_t *
2570 arenas_extend(unsigned ind)
2571 {
2572         arena_t *ret;
2573
2574         /* Allocate enough space for trailing bins. */
2575         ret = (arena_t *)base_alloc(sizeof(arena_t)
2576             + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
2577         if (ret != NULL && arena_new(ret) == false) {
2578                 arenas[ind] = ret;
2579                 return (ret);
2580         }
2581         /* Only reached if there is an OOM error. */
2582
2583         /*
2584          * OOM here is quite inconvenient to propagate, since dealing with it
2585          * would require a check for failure in the fast path.  Instead, punt
2586          * by using arenas[0].  In practice, this is an extremely unlikely
2587          * failure.
2588          */
2589         malloc_printf("%s: (malloc) Error initializing arena\n",
2590             _getprogname());
2591         if (opt_abort)
2592                 abort();
2593
2594         return (arenas[0]);
2595 }
2596
2597 /*
2598  * End arena.
2599  */
2600 /******************************************************************************/
2601 /*
2602  * Begin general internal functions.
2603  */
2604
2605 static void *
2606 huge_malloc(size_t size)
2607 {
2608         void *ret;
2609         size_t csize;
2610         chunk_node_t *node;
2611
2612         /* Allocate one or more contiguous chunks for this request. */
2613
2614         csize = CHUNK_CEILING(size);
2615         if (csize == 0) {
2616                 /* size is large enough to cause size_t wrap-around. */
2617                 return (NULL);
2618         }
2619
2620         /* Allocate a chunk node with which to track the chunk. */
2621         node = base_chunk_node_alloc();
2622         if (node == NULL)
2623                 return (NULL);
2624
2625         ret = chunk_alloc(csize);
2626         if (ret == NULL) {
2627                 base_chunk_node_dealloc(node);
2628                 return (NULL);
2629         }
2630
2631         /* Insert node into huge. */
2632         node->chunk = ret;
2633         node->size = csize;
2634
2635         malloc_mutex_lock(&chunks_mtx);
2636         RB_INSERT(chunk_tree_s, &huge, node);
2637 #ifdef MALLOC_STATS
2638         huge_nmalloc++;
2639         huge_allocated += csize;
2640 #endif
2641         malloc_mutex_unlock(&chunks_mtx);
2642
2643         if (opt_junk && ret != NULL)
2644                 memset(ret, 0xa5, csize);
2645         else if (opt_zero && ret != NULL)
2646                 memset(ret, 0, csize);
2647
2648         return (ret);
2649 }
2650
2651 static void *
2652 huge_ralloc(void *ptr, size_t size, size_t oldsize)
2653 {
2654         void *ret;
2655
2656         /* Avoid moving the allocation if the size class would not change. */
2657         if (oldsize > arena_maxclass &&
2658             CHUNK_CEILING(size) == CHUNK_CEILING(oldsize))
2659                 return (ptr);
2660
2661         /*
2662          * If we get here, then size and oldsize are different enough that we
2663          * need to use a different size class.  In that case, fall back to
2664          * allocating new space and copying.
2665          */
2666         ret = huge_malloc(size);
2667         if (ret == NULL)
2668                 return (NULL);
2669
2670         if (CHUNK_ADDR2BASE(ptr) == ptr) {
2671                 /* The old allocation is a chunk. */
2672                 if (size < oldsize)
2673                         memcpy(ret, ptr, size);
2674                 else
2675                         memcpy(ret, ptr, oldsize);
2676         } else {
2677                 /* The old allocation is a region. */
2678                 assert(oldsize < size);
2679                 memcpy(ret, ptr, oldsize);
2680         }
2681         idalloc(ptr);
2682         return (ret);
2683 }
2684
2685 static void
2686 huge_dalloc(void *ptr)
2687 {
2688         chunk_node_t key;
2689         chunk_node_t *node;
2690
2691         malloc_mutex_lock(&chunks_mtx);
2692
2693         /* Extract from tree of huge allocations. */
2694         key.chunk = ptr;
2695         node = RB_FIND(chunk_tree_s, &huge, &key);
2696         assert(node != NULL);
2697         assert(node->chunk == ptr);
2698         RB_REMOVE(chunk_tree_s, &huge, node);
2699
2700 #ifdef MALLOC_STATS
2701         /* Update counters. */
2702         huge_ndalloc++;
2703         huge_allocated -= node->size;
2704 #endif
2705
2706         malloc_mutex_unlock(&chunks_mtx);
2707
2708         /* Unmap chunk. */
2709 #ifdef USE_BRK
2710         if (opt_junk)
2711                 memset(node->chunk, 0x5a, node->size);
2712 #endif
2713         chunk_dealloc(node->chunk, node->size);
2714
2715         base_chunk_node_dealloc(node);
2716 }
2717
2718 static void *
2719 imalloc(size_t size)
2720 {
2721         void *ret;
2722
2723         assert(size != 0);
2724
2725         if (size <= arena_maxclass)
2726                 ret = arena_malloc(choose_arena(), size);
2727         else
2728                 ret = huge_malloc(size);
2729
2730         return (ret);
2731 }
2732
2733 static void *
2734 ipalloc(size_t alignment, size_t size)
2735 {
2736         void *ret;
2737         size_t alloc_size;
2738
2739         /*
2740          * Take advantage of the fact that for each size class, every object is
2741          * aligned at the smallest power of two that is non-zero in the base
2742          * two representation of the size.  For example:
2743          *
2744          *   Size |   Base 2 | Minimum alignment
2745          *   -----+----------+------------------
2746          *     96 |  1100000 |  32
2747          *    144 | 10100000 |  32
2748          *    192 | 11000000 |  64
2749          *
2750          * Depending on runtime settings, it is possible that arena_malloc()
2751          * will further round up to a power of two, but that never causes
2752          * correctness issues.
2753          */
2754         alloc_size = (size + (alignment - 1)) & (-alignment);
2755         if (alloc_size < size) {
2756                 /* size_t overflow. */
2757                 return (NULL);
2758         }
2759
2760         if (alloc_size <= arena_maxclass)
2761                 ret = arena_malloc(choose_arena(), alloc_size);
2762         else {
2763                 if (alignment <= chunk_size)
2764                         ret = huge_malloc(size);
2765                 else {
2766                         size_t chunksize, offset;
2767                         chunk_node_t *node;
2768
2769                         /*
2770                          * This allocation requires alignment that is even
2771                          * larger than chunk alignment.  This means that
2772                          * huge_malloc() isn't good enough.
2773                          *
2774                          * Allocate almost twice as many chunks as are demanded
2775                          * by the size or alignment, in order to assure the
2776                          * alignment can be achieved, then unmap leading and
2777                          * trailing chunks.
2778                          */
2779
2780                         chunksize = CHUNK_CEILING(size);
2781
2782                         if (size >= alignment)
2783                                 alloc_size = chunksize + alignment - chunk_size;
2784                         else
2785                                 alloc_size = (alignment << 1) - chunk_size;
2786
2787                         /*
2788                          * Allocate a chunk node with which to track the chunk.
2789                          */
2790                         node = base_chunk_node_alloc();
2791                         if (node == NULL)
2792                                 return (NULL);
2793
2794                         ret = chunk_alloc(alloc_size);
2795                         if (ret == NULL) {
2796                                 base_chunk_node_dealloc(node);
2797                                 return (NULL);
2798                         }
2799
2800                         offset = (uintptr_t)ret & (alignment - 1);
2801                         assert(offset % chunk_size == 0);
2802                         assert(offset < alloc_size);
2803                         if (offset == 0) {
2804                                 /* Trim trailing space. */
2805                                 chunk_dealloc((void *)((uintptr_t)ret
2806                                     + chunksize), alloc_size - chunksize);
2807                         } else {
2808                                 size_t trailsize;
2809
2810                                 /* Trim leading space. */
2811                                 chunk_dealloc(ret, alignment - offset);
2812
2813                                 ret = (void *)((uintptr_t)ret + (alignment
2814                                     - offset));
2815
2816                                 trailsize = alloc_size - (alignment - offset)
2817                                     - chunksize;
2818                                 if (trailsize != 0) {
2819                                     /* Trim trailing space. */
2820                                     assert(trailsize < alloc_size);
2821                                     chunk_dealloc((void *)((uintptr_t)ret
2822                                         + chunksize), trailsize);
2823                                 }
2824                         }
2825
2826                         /* Insert node into huge. */
2827                         node->chunk = ret;
2828                         node->size = chunksize;
2829
2830                         malloc_mutex_lock(&chunks_mtx);
2831                         RB_INSERT(chunk_tree_s, &huge, node);
2832 #ifdef MALLOC_STATS
2833                         huge_allocated += size;
2834 #endif
2835                         malloc_mutex_unlock(&chunks_mtx);
2836
2837                         if (opt_junk)
2838                                 memset(ret, 0xa5, chunksize);
2839                         else if (opt_zero)
2840                                 memset(ret, 0, chunksize);
2841                 }
2842         }
2843
2844         assert(((uintptr_t)ret & (alignment - 1)) == 0);
2845         return (ret);
2846 }
2847
2848 static void *
2849 icalloc(size_t size)
2850 {
2851         void *ret;
2852
2853         if (size <= arena_maxclass) {
2854                 ret = arena_malloc(choose_arena(), size);
2855                 if (ret == NULL)
2856                         return (NULL);
2857                 memset(ret, 0, size);
2858         } else {
2859                 /*
2860                  * The virtual memory system provides zero-filled pages, so
2861                  * there is no need to do so manually, unless opt_junk is
2862                  * enabled, in which case huge_malloc() fills huge allocations
2863                  * with junk.
2864                  */
2865                 ret = huge_malloc(size);
2866                 if (ret == NULL)
2867                         return (NULL);
2868
2869                 if (opt_junk)
2870                         memset(ret, 0, size);
2871 #ifdef USE_BRK
2872                 else if ((uintptr_t)ret >= (uintptr_t)brk_base
2873                     && (uintptr_t)ret < (uintptr_t)brk_max) {
2874                         /*
2875                          * This may be a re-used brk chunk.  Therefore, zero
2876                          * the memory.
2877                          */
2878                         memset(ret, 0, size);
2879                 }
2880 #endif
2881         }
2882
2883         return (ret);
2884 }
2885
2886 static size_t
2887 isalloc(const void *ptr)
2888 {
2889         size_t ret;
2890         arena_chunk_t *chunk;
2891
2892         assert(ptr != NULL);
2893
2894         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2895         if (chunk != ptr) {
2896                 /* Region. */
2897                 assert(chunk->arena->magic == ARENA_MAGIC);
2898
2899                 ret = arena_salloc(ptr);
2900         } else {
2901                 chunk_node_t *node, key;
2902
2903                 /* Chunk (huge allocation). */
2904
2905                 malloc_mutex_lock(&chunks_mtx);
2906
2907                 /* Extract from tree of huge allocations. */
2908                 key.chunk = (void *)ptr;
2909                 node = RB_FIND(chunk_tree_s, &huge, &key);
2910                 assert(node != NULL);
2911
2912                 ret = node->size;
2913
2914                 malloc_mutex_unlock(&chunks_mtx);
2915         }
2916
2917         return (ret);
2918 }
2919
2920 static void *
2921 iralloc(void *ptr, size_t size)
2922 {
2923         void *ret;
2924         size_t oldsize;
2925
2926         assert(ptr != NULL);
2927         assert(size != 0);
2928
2929         oldsize = isalloc(ptr);
2930
2931         if (size <= arena_maxclass)
2932                 ret = arena_ralloc(ptr, size, oldsize);
2933         else
2934                 ret = huge_ralloc(ptr, size, oldsize);
2935
2936         return (ret);
2937 }
2938
2939 static void
2940 idalloc(void *ptr)
2941 {
2942         arena_chunk_t *chunk;
2943
2944         assert(ptr != NULL);
2945
2946         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2947         if (chunk != ptr) {
2948                 /* Region. */
2949 #ifdef MALLOC_STATS
2950                 malloc_mutex_lock(&chunk->arena->mtx);
2951                 chunk->arena->stats.ndalloc++;
2952                 malloc_mutex_unlock(&chunk->arena->mtx);
2953 #endif
2954                 arena_dalloc(chunk->arena, chunk, ptr);
2955         } else
2956                 huge_dalloc(ptr);
2957 }
2958
2959 static void
2960 malloc_print_stats(void)
2961 {
2962
2963         if (opt_print_stats) {
2964                 malloc_printf("___ Begin malloc statistics ___\n");
2965                 malloc_printf("Number of CPUs: %u\n", ncpus);
2966                 malloc_printf("Number of arenas: %u\n", narenas);
2967                 malloc_printf("Chunk size: %zu (2^%zu)\n", chunk_size,
2968                     opt_chunk_2pow);
2969                 malloc_printf("Quantum size: %zu (2^%zu)\n", quantum,
2970                     opt_quantum_2pow);
2971                 malloc_printf("Max small size: %zu\n", small_max);
2972                 malloc_printf("Pointer size: %u\n", sizeof(void *));
2973                 malloc_printf("Assertions %s\n",
2974 #ifdef NDEBUG
2975                     "disabled"
2976 #else
2977                     "enabled"
2978 #endif
2979                     );
2980
2981 #ifdef MALLOC_STATS
2982
2983                 {
2984                         size_t allocated, total;
2985                         unsigned i;
2986                         arena_t *arena;
2987
2988                         /* Calculate and print allocated/total stats. */
2989
2990                         /* arenas. */
2991                         for (i = 0, allocated = 0; i < narenas; i++) {
2992                                 if (arenas[i] != NULL) {
2993                                         malloc_mutex_lock(&arenas[i]->mtx);
2994                                         allocated += arenas[i]->stats.allocated;
2995                                         malloc_mutex_unlock(&arenas[i]->mtx);
2996                                 }
2997                         }
2998
2999                         /* huge. */
3000                         malloc_mutex_lock(&chunks_mtx);
3001                         allocated += huge_allocated;
3002                         total = stats_chunks.curchunks * chunk_size;
3003                         malloc_mutex_unlock(&chunks_mtx);
3004
3005                         malloc_printf("Allocated: %zu, space used: %zu\n",
3006                             allocated, total);
3007
3008                         /* Print base stats. */
3009                         {
3010                                 malloc_mutex_lock(&base_mtx);
3011                                 malloc_printf("\nbase:\n");
3012                                 malloc_printf(" %13s\n", "total");
3013                                 malloc_printf(" %13llu\n", base_total);
3014                                 malloc_mutex_unlock(&base_mtx);
3015                         }
3016
3017                         /* Print chunk stats. */
3018                         {
3019                                 chunk_stats_t chunks_stats;
3020
3021                                 malloc_mutex_lock(&chunks_mtx);
3022                                 chunks_stats = stats_chunks;
3023                                 malloc_mutex_unlock(&chunks_mtx);
3024
3025                                 malloc_printf("\nchunks:\n");
3026                                 malloc_printf(" %13s%13s%13s\n", "nchunks",
3027                                     "highchunks", "curchunks");
3028                                 malloc_printf(" %13llu%13lu%13lu\n",
3029                                     chunks_stats.nchunks,
3030                                     chunks_stats.highchunks,
3031                                     chunks_stats.curchunks);
3032                         }
3033
3034                         /* Print chunk stats. */
3035                         malloc_printf("\nhuge:\n");
3036                         malloc_printf("%12s %12s %12s\n",
3037                             "nmalloc", "ndalloc", "allocated");
3038                         malloc_printf("%12llu %12llu %12zu\n",
3039                             huge_nmalloc, huge_ndalloc, huge_allocated);
3040
3041                         /* Print stats for each arena. */
3042                         for (i = 0; i < narenas; i++) {
3043                                 arena = arenas[i];
3044                                 if (arena != NULL) {
3045                                         malloc_printf(
3046                                             "\narenas[%u] statistics:\n", i);
3047                                         malloc_mutex_lock(&arena->mtx);
3048                                         stats_print(arena);
3049                                         malloc_mutex_unlock(&arena->mtx);
3050                                 }
3051                         }
3052                 }
3053 #endif /* #ifdef MALLOC_STATS */
3054                 malloc_printf("--- End malloc statistics ---\n");
3055         }
3056 }
3057
3058 /*
3059  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
3060  * implementation has to take pains to avoid infinite recursion during
3061  * initialization.
3062  */
3063 static inline bool
3064 malloc_init(void)
3065 {
3066
3067         if (malloc_initialized == false)
3068                 return (malloc_init_hard());
3069
3070         return (false);
3071 }
3072
3073 static bool
3074 malloc_init_hard(void)
3075 {
3076         unsigned i, j;
3077         int linklen;
3078         char buf[PATH_MAX + 1];
3079         const char *opts;
3080
3081         malloc_mutex_lock(&init_lock);
3082         if (malloc_initialized) {
3083                 /*
3084                  * Another thread initialized the allocator before this one
3085                  * acquired init_lock.
3086                  */
3087                 malloc_mutex_unlock(&init_lock);
3088                 return (false);
3089         }
3090
3091         /* Get number of CPUs. */
3092         {
3093                 int mib[2];
3094                 size_t len;
3095
3096                 mib[0] = CTL_HW;
3097                 mib[1] = HW_NCPU;
3098                 len = sizeof(ncpus);
3099                 if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
3100                         /* Error. */
3101                         ncpus = 1;
3102                 }
3103         }
3104
3105         /* Get page size. */
3106         {
3107                 long result;
3108
3109                 result = sysconf(_SC_PAGESIZE);
3110                 assert(result != -1);
3111                 pagesize = (unsigned) result;
3112
3113                 /*
3114                  * We assume that pagesize is a power of 2 when calculating
3115                  * pagesize_2pow.
3116                  */
3117                 assert(((result - 1) & result) == 0);
3118                 pagesize_2pow = ffs(result) - 1;
3119         }
3120
3121         for (i = 0; i < 3; i++) {
3122                 /* Get runtime configuration. */
3123                 switch (i) {
3124                 case 0:
3125                         if ((linklen = readlink("/etc/malloc.conf", buf,
3126                                                 sizeof(buf) - 1)) != -1) {
3127                                 /*
3128                                  * Use the contents of the "/etc/malloc.conf"
3129                                  * symbolic link's name.
3130                                  */
3131                                 buf[linklen] = '\0';
3132                                 opts = buf;
3133                         } else {
3134                                 /* No configuration specified. */
3135                                 buf[0] = '\0';
3136                                 opts = buf;
3137                         }
3138                         break;
3139                 case 1:
3140                         if (issetugid() == 0 && (opts =
3141                             getenv("MALLOC_OPTIONS")) != NULL) {
3142                                 /*
3143                                  * Do nothing; opts is already initialized to
3144                                  * the value of the MALLOC_OPTIONS environment
3145                                  * variable.
3146                                  */
3147                         } else {
3148                                 /* No configuration specified. */
3149                                 buf[0] = '\0';
3150                                 opts = buf;
3151                         }
3152                         break;
3153                 case 2:
3154                         if (_malloc_options != NULL) {
3155                             /*
3156                              * Use options that were compiled into the program.
3157                              */
3158                             opts = _malloc_options;
3159                         } else {
3160                                 /* No configuration specified. */
3161                                 buf[0] = '\0';
3162                                 opts = buf;
3163                         }
3164                         break;
3165                 default:
3166                         /* NOTREACHED */
3167                         assert(false);
3168                 }
3169
3170                 for (j = 0; opts[j] != '\0'; j++) {
3171                         switch (opts[j]) {
3172                         case 'a':
3173                                 opt_abort = false;
3174                                 break;
3175                         case 'A':
3176                                 opt_abort = true;
3177                                 break;
3178                         case 'h':
3179                                 opt_hint = false;
3180                                 break;
3181                         case 'H':
3182                                 opt_hint = true;
3183                                 break;
3184                         case 'j':
3185                                 opt_junk = false;
3186                                 break;
3187                         case 'J':
3188                                 opt_junk = true;
3189                                 break;
3190                         case 'k':
3191                                 /*
3192                                  * Run fullness quartile limits don't have
3193                                  * enough resolution if there are too few
3194                                  * regions for the largest bin size classes.
3195                                  */
3196                                 if (opt_chunk_2pow > pagesize_2pow + 4)
3197                                         opt_chunk_2pow--;
3198                                 break;
3199                         case 'K':
3200                                 if (opt_chunk_2pow < CHUNK_2POW_MAX)
3201                                         opt_chunk_2pow++;
3202                                 break;
3203                         case 'n':
3204                                 opt_narenas_lshift--;
3205                                 break;
3206                         case 'N':
3207                                 opt_narenas_lshift++;
3208                                 break;
3209                         case 'p':
3210                                 opt_print_stats = false;
3211                                 break;
3212                         case 'P':
3213                                 opt_print_stats = true;
3214                                 break;
3215                         case 'q':
3216                                 if (opt_quantum_2pow > QUANTUM_2POW_MIN)
3217                                         opt_quantum_2pow--;
3218                                 break;
3219                         case 'Q':
3220                                 if (opt_quantum_2pow < pagesize_2pow - 1)
3221                                         opt_quantum_2pow++;
3222                                 break;
3223                         case 's':
3224                                 if (opt_small_max_2pow > QUANTUM_2POW_MIN)
3225                                         opt_small_max_2pow--;
3226                                 break;
3227                         case 'S':
3228                                 if (opt_small_max_2pow < pagesize_2pow - 1)
3229                                         opt_small_max_2pow++;
3230                                 break;
3231                         case 'u':
3232                                 opt_utrace = false;
3233                                 break;
3234                         case 'U':
3235                                 opt_utrace = true;
3236                                 break;
3237                         case 'v':
3238                                 opt_sysv = false;
3239                                 break;
3240                         case 'V':
3241                                 opt_sysv = true;
3242                                 break;
3243                         case 'x':
3244                                 opt_xmalloc = false;
3245                                 break;
3246                         case 'X':
3247                                 opt_xmalloc = true;
3248                                 break;
3249                         case 'z':
3250                                 opt_zero = false;
3251                                 break;
3252                         case 'Z':
3253                                 opt_zero = true;
3254                                 break;
3255                         default:
3256                                 malloc_printf("%s: (malloc) Unsupported"
3257                                     " character in malloc options: '%c'\n",
3258                                     _getprogname(), opts[j]);
3259                         }
3260                 }
3261         }
3262
3263         /* Take care to call atexit() only once. */
3264         if (opt_print_stats) {
3265                 /* Print statistics at exit. */
3266                 atexit(malloc_print_stats);
3267         }
3268
3269         /* Set variables according to the value of opt_small_max_2pow. */
3270         if (opt_small_max_2pow < opt_quantum_2pow)
3271                 opt_small_max_2pow = opt_quantum_2pow;
3272         small_max = (1 << opt_small_max_2pow);
3273
3274         /* Set bin-related variables. */
3275         bin_maxclass = (pagesize >> 1);
3276         if (pagesize_2pow > RUN_MIN_REGS_2POW + 1)
3277                 tiny_min_2pow = pagesize_2pow - (RUN_MIN_REGS_2POW + 1);
3278         else
3279                 tiny_min_2pow = 1;
3280         assert(opt_quantum_2pow >= tiny_min_2pow);
3281         ntbins = opt_quantum_2pow - tiny_min_2pow;
3282         assert(ntbins <= opt_quantum_2pow);
3283         nqbins = (small_max >> opt_quantum_2pow);
3284         nsbins = pagesize_2pow - opt_small_max_2pow - 1;
3285
3286         /* Set variables according to the value of opt_quantum_2pow. */
3287         quantum = (1 << opt_quantum_2pow);
3288         quantum_mask = quantum - 1;
3289         if (ntbins > 0)
3290                 small_min = (quantum >> 1) + 1;
3291         else
3292                 small_min = 1;
3293         assert(small_min <= quantum);
3294
3295         /* Set variables according to the value of opt_chunk_2pow. */
3296         chunk_size = (1 << opt_chunk_2pow);
3297         chunk_size_mask = chunk_size - 1;
3298         arena_chunk_maplen = (1 << (opt_chunk_2pow - pagesize_2pow));
3299         arena_maxclass = (chunk_size >> 1);
3300
3301         UTRACE(0, 0, 0);
3302
3303 #ifdef MALLOC_STATS
3304         memset(&stats_chunks, 0, sizeof(chunk_stats_t));
3305 #endif
3306
3307         /* Various sanity checks that regard configuration. */
3308         assert(quantum >= sizeof(void *));
3309         assert(quantum <= pagesize);
3310         assert(chunk_size >= pagesize);
3311         assert(quantum * 4 <= chunk_size);
3312
3313         /* Initialize chunks data. */
3314         malloc_mutex_init(&chunks_mtx);
3315         RB_INIT(&huge);
3316 #ifdef USE_BRK
3317         malloc_mutex_init(&brk_mtx);
3318         brk_base = sbrk(0);
3319         brk_prev = brk_base;
3320         brk_max = brk_base;
3321 #endif
3322 #ifdef MALLOC_STATS
3323         huge_nmalloc = 0;
3324         huge_ndalloc = 0;
3325         huge_allocated = 0;
3326 #endif
3327         RB_INIT(&old_chunks);
3328
3329         /* Initialize base allocation data structures. */
3330 #ifdef MALLOC_STATS
3331         base_total = 0;
3332 #endif
3333 #ifdef USE_BRK
3334         /*
3335          * Allocate a base chunk here, since it doesn't actually have to be
3336          * chunk-aligned.  Doing this before allocating any other chunks allows
3337          * the use of space that would otherwise be wasted.
3338          */
3339         base_chunk_alloc(0);
3340 #endif
3341         base_chunk_nodes = NULL;
3342         malloc_mutex_init(&base_mtx);
3343
3344         if (ncpus > 1) {
3345                 /*
3346                  * For SMP systems, create four times as many arenas as there
3347                  * are CPUs by default.
3348                  */
3349                 opt_narenas_lshift += 2;
3350         }
3351
3352         /* Determine how many arenas to use. */
3353         narenas = ncpus;
3354         if (opt_narenas_lshift > 0) {
3355                 if ((narenas << opt_narenas_lshift) > narenas)
3356                         narenas <<= opt_narenas_lshift;
3357                 /*
3358                  * Make sure not to exceed the limits of what base_malloc()
3359                  * can handle.
3360                  */
3361                 if (narenas * sizeof(arena_t *) > chunk_size)
3362                         narenas = chunk_size / sizeof(arena_t *);
3363         } else if (opt_narenas_lshift < 0) {
3364                 if ((narenas << opt_narenas_lshift) < narenas)
3365                         narenas <<= opt_narenas_lshift;
3366                 /* Make sure there is at least one arena. */
3367                 if (narenas == 0)
3368                         narenas = 1;
3369         }
3370
3371 #ifdef NO_TLS
3372         if (narenas > 1) {
3373                 static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
3374                     23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
3375                     89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
3376                     151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
3377                     223, 227, 229, 233, 239, 241, 251, 257, 263};
3378                 unsigned i, nprimes, parenas;
3379
3380                 /*
3381                  * Pick a prime number of hash arenas that is more than narenas
3382                  * so that direct hashing of pthread_self() pointers tends to
3383                  * spread allocations evenly among the arenas.
3384                  */
3385                 assert((narenas & 1) == 0); /* narenas must be even. */
3386                 nprimes = (sizeof(primes) >> SIZEOF_INT_2POW);
3387                 parenas = primes[nprimes - 1]; /* In case not enough primes. */
3388                 for (i = 1; i < nprimes; i++) {
3389                         if (primes[i] > narenas) {
3390                                 parenas = primes[i];
3391                                 break;
3392                         }
3393                 }
3394                 narenas = parenas;
3395         }
3396 #endif
3397
3398 #ifndef NO_TLS
3399         next_arena = 0;
3400 #endif
3401
3402         /* Allocate and initialize arenas. */
3403         arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
3404         if (arenas == NULL) {
3405                 malloc_mutex_unlock(&init_lock);
3406                 return (true);
3407         }
3408         /*
3409          * Zero the array.  In practice, this should always be pre-zeroed,
3410          * since it was just mmap()ed, but let's be sure.
3411          */
3412         memset(arenas, 0, sizeof(arena_t *) * narenas);
3413
3414         /*
3415          * Initialize one arena here.  The rest are lazily created in
3416          * arena_choose_hard().
3417          */
3418         arenas_extend(0);
3419         if (arenas[0] == NULL) {
3420                 malloc_mutex_unlock(&init_lock);
3421                 return (true);
3422         }
3423
3424         malloc_mutex_init(&arenas_mtx);
3425
3426         malloc_initialized = true;
3427         malloc_mutex_unlock(&init_lock);
3428         return (false);
3429 }
3430
3431 /*
3432  * End general internal functions.
3433  */
3434 /******************************************************************************/
3435 /*
3436  * Begin malloc(3)-compatible functions.
3437  */
3438
3439 void *
3440 malloc(size_t size)
3441 {
3442         void *ret;
3443
3444         if (malloc_init()) {
3445                 ret = NULL;
3446                 goto RETURN;
3447         }
3448
3449         if (size == 0) {
3450                 if (opt_sysv == false)
3451                         size = 1;
3452                 else {
3453                         ret = NULL;
3454                         goto RETURN;
3455                 }
3456         }
3457
3458         ret = imalloc(size);
3459
3460 RETURN:
3461         if (ret == NULL) {
3462                 if (opt_xmalloc) {
3463                         malloc_printf("%s: (malloc) Error in malloc(%zu):"
3464                             " out of memory\n", _getprogname(), size);
3465                         abort();
3466                 }
3467                 errno = ENOMEM;
3468         }
3469
3470         UTRACE(0, size, ret);
3471         return (ret);
3472 }
3473
3474 int
3475 posix_memalign(void **memptr, size_t alignment, size_t size)
3476 {
3477         int ret;
3478         void *result;
3479
3480         if (malloc_init())
3481                 result = NULL;
3482         else {
3483                 /* Make sure that alignment is a large enough power of 2. */
3484                 if (((alignment - 1) & alignment) != 0
3485                     || alignment < sizeof(void *)) {
3486                         if (opt_xmalloc) {
3487                                 malloc_printf("%s: (malloc) Error in"
3488                                     " posix_memalign(%zu, %zu):"
3489                                     " invalid alignment\n",
3490                                     _getprogname(), alignment, size);
3491                                 abort();
3492                         }
3493                         result = NULL;
3494                         ret = EINVAL;
3495                         goto RETURN;
3496                 }
3497
3498                 result = ipalloc(alignment, size);
3499         }
3500
3501         if (result == NULL) {
3502                 if (opt_xmalloc) {
3503                         malloc_printf("%s: (malloc) Error in"
3504                             " posix_memalign(%zu, %zu): out of memory\n",
3505                             _getprogname(), alignment, size);
3506                         abort();
3507                 }
3508                 ret = ENOMEM;
3509                 goto RETURN;
3510         }
3511
3512         *memptr = result;
3513         ret = 0;
3514
3515 RETURN:
3516         UTRACE(0, size, result);
3517         return (ret);
3518 }
3519
3520 void *
3521 calloc(size_t num, size_t size)
3522 {
3523         void *ret;
3524         size_t num_size;
3525
3526         if (malloc_init()) {
3527                 ret = NULL;
3528                 goto RETURN;
3529         }
3530
3531         num_size = num * size;
3532         if (num_size == 0) {
3533                 if ((opt_sysv == false) && ((num == 0) || (size == 0)))
3534                         num_size = 1;
3535                 else {
3536                         ret = NULL;
3537                         goto RETURN;
3538                 }
3539         /*
3540          * Try to avoid division here.  We know that it isn't possible to
3541          * overflow during multiplication if neither operand uses any of the
3542          * most significant half of the bits in a size_t.
3543          */
3544         } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2)))
3545             && (num_size / size != num)) {
3546                 /* size_t overflow. */
3547                 ret = NULL;
3548                 goto RETURN;
3549         }
3550
3551         ret = icalloc(num_size);
3552
3553 RETURN:
3554         if (ret == NULL) {
3555                 if (opt_xmalloc) {
3556                         malloc_printf("%s: (malloc) Error in"
3557                             " calloc(%zu, %zu): out of memory\n",
3558                             _getprogname(), num, size);
3559                         abort();
3560                 }
3561                 errno = ENOMEM;
3562         }
3563
3564         UTRACE(0, num_size, ret);
3565         return (ret);
3566 }
3567
3568 void *
3569 realloc(void *ptr, size_t size)
3570 {
3571         void *ret;
3572
3573         if (size == 0) {
3574                 if (opt_sysv == false)
3575                         size = 1;
3576                 else {
3577                         if (ptr != NULL)
3578                                 idalloc(ptr);
3579                         ret = NULL;
3580                         goto RETURN;
3581                 }
3582         }
3583
3584         if (ptr != NULL) {
3585                 assert(malloc_initialized);
3586
3587                 ret = iralloc(ptr, size);
3588
3589                 if (ret == NULL) {
3590                         if (opt_xmalloc) {
3591                                 malloc_printf("%s: (malloc) Error in"
3592                                     " realloc(%p, %zu): out of memory\n",
3593                                     _getprogname(), ptr, size);
3594                                 abort();
3595                         }
3596                         errno = ENOMEM;
3597                 }
3598         } else {
3599                 if (malloc_init())
3600                         ret = NULL;
3601                 else
3602                         ret = imalloc(size);
3603
3604                 if (ret == NULL) {
3605                         if (opt_xmalloc) {
3606                                 malloc_printf("%s: (malloc) Error in"
3607                                     " realloc(%p, %zu): out of memory\n",
3608                                     _getprogname(), ptr, size);
3609                                 abort();
3610                         }
3611                         errno = ENOMEM;
3612                 }
3613         }
3614
3615 RETURN:
3616         UTRACE(ptr, size, ret);
3617         return (ret);
3618 }
3619
3620 void
3621 free(void *ptr)
3622 {
3623
3624         UTRACE(ptr, 0, 0);
3625         if (ptr != NULL) {
3626                 assert(malloc_initialized);
3627
3628                 idalloc(ptr);
3629         }
3630 }
3631
3632 /*
3633  * End malloc(3)-compatible functions.
3634  */
3635 /******************************************************************************/
3636 /*
3637  * Begin non-standard functions.
3638  */
3639
3640 size_t
3641 malloc_usable_size(const void *ptr)
3642 {
3643
3644         assert(ptr != NULL);
3645
3646         return (isalloc(ptr));
3647 }
3648
3649 /*
3650  * End non-standard functions.
3651  */
3652 /******************************************************************************/
3653 /*
3654  * Begin library-private functions, used by threading libraries for protection
3655  * of malloc during fork().  These functions are only called if the program is
3656  * running in threaded mode, so there is no need to check whether the program
3657  * is threaded here.
3658  */
3659
3660 void
3661 _malloc_prefork(void)
3662 {
3663         unsigned i;
3664
3665         /* Acquire all mutexes in a safe order. */
3666
3667         malloc_mutex_lock(&arenas_mtx);
3668         for (i = 0; i < narenas; i++) {
3669                 if (arenas[i] != NULL)
3670                         malloc_mutex_lock(&arenas[i]->mtx);
3671         }
3672         malloc_mutex_unlock(&arenas_mtx);
3673
3674         malloc_mutex_lock(&base_mtx);
3675
3676         malloc_mutex_lock(&chunks_mtx);
3677 }
3678
3679 void
3680 _malloc_postfork(void)
3681 {
3682         unsigned i;
3683
3684         /* Release all mutexes, now that fork() has completed. */
3685
3686         malloc_mutex_unlock(&chunks_mtx);
3687
3688         malloc_mutex_unlock(&base_mtx);
3689
3690         malloc_mutex_lock(&arenas_mtx);
3691         for (i = 0; i < narenas; i++) {
3692                 if (arenas[i] != NULL)
3693                         malloc_mutex_unlock(&arenas[i]->mtx);
3694         }
3695         malloc_mutex_unlock(&arenas_mtx);
3696 }
3697
3698 /*
3699  * End library-private functions.
3700  */
3701 /******************************************************************************/