lib/libc/stdlib/malloc.c

   1 /*-
   2  * Copyright (C) 2006 Jason Evans <jasone@FreeBSD.org>.
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice(s), this list of conditions and the following disclaimer as
  10  *    the first lines of this file unmodified other than the possible
  11  *    addition of one or more copyright notices.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice(s), this list of conditions and the following disclaimer in
  14  *    the documentation and/or other materials provided with the
  15  *    distribution.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  20  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
  21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  25  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  26  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  27  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28  *
  29  *******************************************************************************
  30  *
  31  * This allocator implementation is designed to provide scalable performance
  32  * for multi-threaded programs on multi-processor systems.  The following
  33  * features are included for this purpose:
  34  *
  35  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
  36  *     contention and cache sloshing.
  37  *
  38  *   + Cache line sharing between arenas is avoided for internal data
  39  *     structures.
  40  *
  41  *   + Memory is managed in chunks and runs (chunks can be split into runs using
  42  *     a binary buddy scheme), rather than as individual pages.  This provides
  43  *     a constant-time mechanism for associating allocations with particular
  44  *     arenas.
  45  *
  46  * Allocation requests are rounded up to the nearest size class, and no record
  47  * of the original request size is maintained.  Allocations are broken into
  48  * categories according to size class.  Assuming runtime defaults, 4 kB pages
  49  * and a 16 byte quantum, the size classes in each category are as follows:
  50  *
  51  *   |====================================|
  52  *   | Category | Subcategory    |   Size |
  53  *   |====================================|
  54  *   | Small    | Tiny           |      2 |
  55  *   |          |                |      4 |
  56  *   |          |                |      8 |
  57  *   |          |----------------+--------|
  58  *   |          | Quantum-spaced |     16 |
  59  *   |          |                |     32 |
  60  *   |          |                |     48 |
  61  *   |          |                |    ... |
  62  *   |          |                |    480 |
  63  *   |          |                |    496 |
  64  *   |          |                |    512 |
  65  *   |          |----------------+--------|
  66  *   |          | Sub-page       |   1 kB |
  67  *   |          |                |   2 kB |
  68  *   |====================================|
  69  *   | Large                     |   4 kB |
  70  *   |                           |   8 kB |
  71  *   |                           |  16 kB |
  72  *   |                           |    ... |
  73  *   |                           | 256 kB |
  74  *   |                           | 512 kB |
  75  *   |                           |   1 MB |
  76  *   |====================================|
  77  *   | Huge                      |   2 MB |
  78  *   |                           |   4 MB |
  79  *   |                           |   6 MB |
  80  *   |                           |    ... |
  81  *   |====================================|
  82  *
  83  * A different mechanism is used for each category:
  84  *
  85  *   Small : Each size class is segregated into its own set of runs.  Each run
  86  *           maintains a bitmap of which regions are free/allocated.
  87  *
  88  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
  89  *           in the associated arena chunk header maps.
  90  *
  91  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
  92  *          Metadata are stored in a separate red-black tree.
  93  *
  94  *******************************************************************************
  95  */
  96
  97 /*
  98  *******************************************************************************
  99  *
 100  * Ring macros.
 101  *
 102  *******************************************************************************
 103  */
 104
 105 /* Ring definitions. */
 106 #define qr(a_type) struct {                                             \
 107         a_type *qre_next;                                               \
 108         a_type *qre_prev;                                               \
 109 }
 110
 111 #define qr_initializer {NULL, NULL}
 112
 113 /* Ring functions. */
 114 #define qr_new(a_qr, a_field) do {                                      \
 115         (a_qr)->a_field.qre_next = (a_qr);                              \
 116         (a_qr)->a_field.qre_prev = (a_qr);                              \
 117 } while (0)
 118
 119 #define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next)
 120
 121 #define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev)
 122
 123 #define qr_before_insert(a_qrelm, a_qr, a_field) do {                   \
 124         (a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev;         \
 125         (a_qr)->a_field.qre_next = (a_qrelm);                           \
 126         (a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr);            \
 127         (a_qrelm)->a_field.qre_prev = (a_qr);                           \
 128 } while (0)
 129
 130 #define qr_after_insert(a_qrelm, a_qr, a_field) do {                    \
 131         (a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next;         \
 132         (a_qr)->a_field.qre_prev = (a_qrelm);                           \
 133         (a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr);            \
 134         (a_qrelm)->a_field.qre_next = (a_qr);                           \
 135 } while (0)
 136
 137 #define qr_meld(a_qr_a, a_qr_b, a_type, a_field) do {                   \
 138         a_type *t;                                                      \
 139         (a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b);        \
 140         (a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a);        \
 141         t = (a_qr_a)->a_field.qre_prev;                                 \
 142         (a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev;        \
 143         (a_qr_b)->a_field.qre_prev = t;                                 \
 144 } while (0)
 145
 146 /*
 147  * qr_meld() and qr_split() are functionally equivalent, so there's no need to
 148  * have two copies of the code.
 149  */
 150 #define qr_split(a_qr_a, a_qr_b, a_type, a_field)                       \
 151         qr_meld((a_qr_a), (a_qr_b), a_type, a_field)
 152
 153 #define qr_remove(a_qr, a_field) do {                                   \
 154         (a_qr)->a_field.qre_prev->a_field.qre_next                      \
 155             = (a_qr)->a_field.qre_next;                                 \
 156         (a_qr)->a_field.qre_next->a_field.qre_prev                      \
 157             = (a_qr)->a_field.qre_prev;                                 \
 158         (a_qr)->a_field.qre_next = (a_qr);                              \
 159         (a_qr)->a_field.qre_prev = (a_qr);                              \
 160 } while (0)
 161
 162 #define qr_foreach(var, a_qr, a_field)                                  \
 163         for ((var) = (a_qr);                                            \
 164             (var) != NULL;                                              \
 165             (var) = (((var)->a_field.qre_next != (a_qr))                \
 166             ? (var)->a_field.qre_next : NULL))
 167
 168 #define qr_reverse_foreach(var, a_qr, a_field)                          \
 169         for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL;  \
 170             (var) != NULL;                                              \
 171             (var) = (((var) != (a_qr))                                  \
 172             ? (var)->a_field.qre_prev : NULL))
 173
 174 /******************************************************************************/
 175
 176 /*
 177  * In order to disable various extra features that may have negative
 178  * performance impacts, (assertions, expanded statistics), define
 179  * NO_MALLOC_EXTRAS.
 180  */
 181 /* #define NO_MALLOC_EXTRAS */
 182
 183 #ifndef NO_MALLOC_EXTRAS
 184 #  define MALLOC_DEBUG
 185 #endif
 186
 187 #include <sys/cdefs.h>
 188 __FBSDID("$FreeBSD$");
 189
 190 #include "libc_private.h"
 191 #ifdef MALLOC_DEBUG
 192 #  define _LOCK_DEBUG
 193 #endif
 194 #include "spinlock.h"
 195 #include "namespace.h"
 196 #include <sys/mman.h>
 197 #include <sys/param.h>
 198 #include <sys/stddef.h>
 199 #include <sys/time.h>
 200 #include <sys/types.h>
 201 #include <sys/sysctl.h>
 202 #include <sys/tree.h>
 203 #include <sys/uio.h>
 204 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
 205
 206 #include <machine/atomic.h>
 207 #include <machine/cpufunc.h>
 208 #include <machine/vmparam.h>
 209
 210 #include <errno.h>
 211 #include <limits.h>
 212 #include <pthread.h>
 213 #include <sched.h>
 214 #include <stdarg.h>
 215 #include <stdbool.h>
 216 #include <stdio.h>
 217 #include <stdlib.h>
 218 #include <string.h>
 219 #include <strings.h>
 220 #include <unistd.h>
 221
 222 #include "un-namespace.h"
 223
 224 /*
 225  * Calculate statistics that can be used to get an idea of how well caching is
 226  * working.
 227  */
 228 #ifndef NO_MALLOC_EXTRAS
 229 #  define MALLOC_STATS
 230 #endif
 231
 232 #ifndef MALLOC_DEBUG
 233 #  ifndef NDEBUG
 234 #    define NDEBUG
 235 #  endif
 236 #endif
 237 #include <assert.h>
 238
 239 #ifdef MALLOC_DEBUG
 240    /* Disable inlining to make debugging easier. */
 241 #  define inline
 242 #endif
 243
 244 /* Size of stack-allocated buffer passed to strerror_r(). */
 245 #define STRERROR_BUF 64
 246
 247 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
 248 #ifdef __i386__
 249 #  define QUANTUM_2POW_MIN      4
 250 #  define SIZEOF_PTR            4
 251 #  define USE_BRK
 252 #endif
 253 #ifdef __ia64__
 254 #  define QUANTUM_2POW_MIN      4
 255 #  define SIZEOF_PTR            8
 256 #  define NO_TLS
 257 #endif
 258 #ifdef __alpha__
 259 #  define QUANTUM_2POW_MIN      4
 260 #  define SIZEOF_PTR            8
 261 #  define NO_TLS
 262 #endif
 263 #ifdef __sparc64__
 264 #  define QUANTUM_2POW_MIN      4
 265 #  define SIZEOF_PTR            8
 266 #  define NO_TLS
 267 #endif
 268 #ifdef __amd64__
 269 #  define QUANTUM_2POW_MIN      4
 270 #  define SIZEOF_PTR            8
 271 #endif
 272 #ifdef __arm__
 273 #  define QUANTUM_2POW_MIN      3
 274 #  define SIZEOF_PTR            4
 275 #  define USE_BRK
 276 #  define NO_TLS
 277 #endif
 278 #ifdef __powerpc__
 279 #  define QUANTUM_2POW_MIN      4
 280 #  define SIZEOF_PTR            4
 281 #  define USE_BRK
 282 #  define NO_TLS
 283 #endif
 284
 285 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
 286 #ifndef SIZEOF_INT_2POW
 287 #  define SIZEOF_INT_2POW       2
 288 #endif
 289
 290 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
 291 #if (!defined(PIC) && !defined(NO_TLS))
 292 #  define NO_TLS
 293 #endif
 294
 295 /*
 296  * Size and alignment of memory chunks that are allocated by the OS's virtual
 297  * memory system.
 298  *
 299  * chunksize limits:
 300  *
 301  *   2^(pagesize_2pow - 1 + RUN_MIN_REGS_2POW) <= chunk_size <= 2^28
 302  */
 303 #define CHUNK_2POW_DEFAULT      21
 304 #define CHUNK_2POW_MAX          28
 305
 306 /*
 307  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing,
 308  * so over-estimates are okay (up to a point), but under-estimates will
 309  * negatively affect performance.
 310  */
 311 #define CACHELINE_2POW 6
 312 #define CACHELINE ((size_t)(1 << CACHELINE_2POW))
 313
 314 /*
 315  * Maximum size class that is a multiple of the quantum, but not (necessarily)
 316  * a power of 2.  Above this size, allocations are rounded up to the nearest
 317  * power of 2.
 318  */
 319 #define SMALL_MAX_2POW_DEFAULT 9
 320 #define SMALL_MAX_DEFAULT (1 << SMALL_MAX_2POW_DEFAULT)
 321
 322 /*
 323  * Minimum number of regions that must fit into a run that serves quantum-size
 324  * bin allocations.
 325  *
 326  * Note that if this is set too low, space will be wasted if there are size
 327  * classes that are small enough that RUN_MIN_REGS regions don't fill a page.
 328  * If this is set too high, then the overhead of searching through the bitmap
 329  * that tracks region usage will become excessive.
 330  */
 331 #define RUN_MIN_REGS_2POW 10
 332 #define RUN_MIN_REGS (1 << RUN_MIN_REGS_2POW)
 333
 334 /*
 335  * Maximum number of pages for a run that is used for bin allocations.
 336  *
 337  * Note that if this is set too low, then fragmentation for the largest bin
 338  * size classes will be high.  If this is set too high, then even small
 339  * programs will often have to allocate more than two chunks early on.
 340  */
 341 #define RUN_MAX_PAGES_2POW 4
 342 #define RUN_MAX_PAGES (1 << RUN_MAX_PAGES_2POW)
 343
 344 /******************************************************************************/
 345
 346 /*
 347  * Mutexes based on spinlocks.  We can't use normal pthread mutexes, because
 348  * they require malloc()ed memory.
 349  */
 350 typedef struct {
 351         spinlock_t      lock;
 352 } malloc_mutex_t;
 353
 354 /* Set to true once the allocator has been initialized. */
 355 static bool malloc_initialized = false;
 356
 357 /* Used to avoid initialization races. */
 358 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
 359
 360 /******************************************************************************/
 361 /*
 362  * Statistics data structures.
 363  */
 364
 365 #ifdef MALLOC_STATS
 366
 367 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
 368 struct malloc_bin_stats_s {
 369         /*
 370          * Number of allocation requests that corresponded to the size of this
 371          * bin.
 372          */
 373         uint64_t        nrequests;
 374
 375         /* Total number of runs created for this bin's size class. */
 376         uint64_t        nruns;
 377
 378         /*
 379          * Total number of run promotions/demotions for this bin's size class.
 380          */
 381         uint64_t        npromote;
 382         uint64_t        ndemote;
 383
 384         /* High-water mark for this bin. */
 385         unsigned long   highruns;
 386
 387         /* Current number of runs in this bin. */
 388         unsigned long   curruns;
 389 };
 390
 391 typedef struct arena_stats_s arena_stats_t;
 392 struct arena_stats_s {
 393         /* Total byte count of allocated memory, not including overhead. */
 394         size_t          allocated;
 395
 396         /* Number of times each function was called. */
 397         uint64_t        nmalloc;
 398         uint64_t        ndalloc;
 399         uint64_t        nmadvise;
 400
 401         /* Number of large allocation requests. */
 402         uint64_t        large_nrequests;
 403 };
 404
 405 typedef struct chunk_stats_s chunk_stats_t;
 406 struct chunk_stats_s {
 407         /* Number of chunks that were allocated. */
 408         uint64_t        nchunks;
 409
 410         /* High-water mark for number of chunks allocated. */
 411         unsigned long   highchunks;
 412
 413         /*
 414          * Current number of chunks allocated.  This value isn't maintained for
 415          * any other purpose, so keep track of it in order to be able to set
 416          * highchunks.
 417          */
 418         unsigned long   curchunks;
 419 };
 420
 421 #endif /* #ifdef MALLOC_STATS */
 422
 423 /******************************************************************************/
 424 /*
 425  * Chunk data structures.
 426  */
 427
 428 /* Tree of chunks. */
 429 typedef struct chunk_node_s chunk_node_t;
 430 struct chunk_node_s {
 431         /* Linkage for the chunk tree. */
 432         RB_ENTRY(chunk_node_s) link;
 433
 434         /*
 435          * Pointer to the chunk that this tree node is responsible for.  In some
 436          * (but certainly not all) cases, this data structure is placed at the
 437          * beginning of the corresponding chunk, so this field may point to this
 438          * node.
 439          */
 440         void    *chunk;
 441
 442         /* Total chunk size. */
 443         size_t  size;
 444 };
 445 typedef struct chunk_tree_s chunk_tree_t;
 446 RB_HEAD(chunk_tree_s, chunk_node_s);
 447
 448 /******************************************************************************/
 449 /*
 450  * Arena data structures.
 451  */
 452
 453 typedef struct arena_s arena_t;
 454 typedef struct arena_bin_s arena_bin_t;
 455
 456 typedef struct arena_chunk_map_s arena_chunk_map_t;
 457 struct arena_chunk_map_s {
 458         bool            free:1;
 459         bool            large:1;
 460         unsigned        npages:15; /* Limiting factor for CHUNK_2POW_MAX. */
 461         unsigned        pos:15;
 462 };
 463
 464 /* Arena chunk header. */
 465 typedef struct arena_chunk_s arena_chunk_t;
 466 struct arena_chunk_s {
 467         /* Arena that owns the chunk. */
 468         arena_t *arena;
 469
 470         /* Linkage for the arena's chunk tree. */
 471         RB_ENTRY(arena_chunk_s) link;
 472
 473         /*
 474          * Number of pages in use.  This is maintained in order to make
 475          * detection of empty chunks fast.
 476          */
 477         uint32_t pages_used;
 478
 479         /*
 480          * Array of counters that keeps track of how many free runs of each
 481          * size are available in this chunk.  This table is sized at compile
 482          * time, which is wasteful.  However, due to unrelated rounding, this
 483          * doesn't actually waste any otherwise useful space.
 484          *
 485          *   index == 2^n pages
 486          *
 487          *   index | npages
 488          *   ------+-------
 489          *       0 |      1
 490          *       1 |      2
 491          *       2 |      4
 492          *       3 |      8
 493          *         :
 494          */
 495         uint32_t nfree_runs[CHUNK_2POW_MAX/* - PAGE_SHIFT */];
 496
 497         /* Map of pages within chunk that keeps track of free/large/small. */
 498         arena_chunk_map_t map[1]; /* Dynamically sized. */
 499 };
 500 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
 501 RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
 502
 503 typedef struct arena_run_s arena_run_t;
 504 struct arena_run_s {
 505         /* Linkage for run rings. */
 506         qr(arena_run_t) link;
 507
 508 #ifdef MALLOC_DEBUG
 509         uint32_t        magic;
 510 #  define ARENA_RUN_MAGIC 0x384adf93
 511 #endif
 512
 513         /* Bin this run is associated with. */
 514         arena_bin_t     *bin;
 515
 516         /* Bitmask of in-use regions (0: in use, 1: free). */
 517 #define REGS_MASK_NELMS                                                 \
 518         (1 << (RUN_MIN_REGS_2POW - SIZEOF_INT_2POW - 2))
 519         unsigned        regs_mask[REGS_MASK_NELMS];
 520
 521         /* Index of first element that might have a free region. */
 522         unsigned        regs_minelm;
 523
 524         /* Number of free regions in run. */
 525         unsigned        nfree;
 526
 527         /*
 528          * Current quartile for this run, one of: {RUN_QINIT, RUN_Q0, RUN_25,
 529          * RUN_Q50, RUN_Q75, RUN_Q100}.
 530          */
 531 #define RUN_QINIT       0
 532 #define RUN_Q0          1
 533 #define RUN_Q25         2
 534 #define RUN_Q50         3
 535 #define RUN_Q75         4
 536 #define RUN_Q100        5
 537         unsigned        quartile;
 538
 539         /*
 540          * Limits on the number of free regions for the fullness quartile this
 541          * run is currently in.  If nfree goes outside these limits, the run
 542          * is moved to a different fullness quartile.
 543          */
 544         unsigned        free_max;
 545         unsigned        free_min;
 546 };
 547
 548 /* Used for run ring headers, where the run isn't actually used. */
 549 typedef struct arena_run_link_s arena_run_link_t;
 550 struct arena_run_link_s {
 551         /* Linkage for run rings. */
 552         qr(arena_run_t) link;
 553 };
 554
 555 struct arena_bin_s {
 556         /*
 557          * Current run being used to service allocations of this bin's size
 558          * class.
 559          */
 560         arena_run_t     *runcur;
 561
 562         /*
 563          * Links into rings of runs, of various fullnesses (names indicate
 564          * approximate lower bounds).  A new run conceptually starts off in
 565          * runsinit, and it isn't inserted into the runs0 ring until it
 566          * reaches 25% full (hysteresis mechanism).  For the run to be moved
 567          * again, it must become either empty or 50% full.  Thus, each ring
 568          * contains runs that are within 50% above the advertised fullness for
 569          * the ring.  This provides a low-overhead mechanism for segregating
 570          * runs into approximate fullness classes.
 571          *
 572          * Conceptually, there is a runs100 that contains completely full runs.
 573          * Since we don't need to search for these runs though, no runs100 ring
 574          * is actually maintained.
 575          *
 576          * These rings are useful when looking for an existing run to use when
 577          * runcur is no longer usable.  We look for usable runs in the
 578          * following order:
 579          *
 580          *   1) runs50
 581          *   2) runs25
 582          *   3) runs0
 583          *   4) runs75
 584          *
 585          * runs75 isn't a good place to look, because it contains runs that may
 586          * be nearly completely full.  Still, we look there as a last resort in
 587          * order to avoid allocating a new run if at all possible.
 588          */
 589         /* arena_run_link_t     runsinit;   0% <= fullness <   25% */
 590         arena_run_link_t        runs0;  /*  0% <  fullness <   50% */
 591         arena_run_link_t        runs25; /* 25% <  fullness <   75% */
 592         arena_run_link_t        runs50; /* 50% <  fullness <  100% */
 593         arena_run_link_t        runs75; /* 75% <  fullness <  100% */
 594         /* arena_run_link_t     runs100;          fullness == 100% */
 595
 596         /* Size of regions in a run for this bin's size class. */
 597         size_t          reg_size;
 598
 599         /* Total size of a run for this bin's size class. */
 600         size_t          run_size;
 601
 602         /* Total number of regions in a run for this bin's size class. */
 603         uint32_t        nregs;
 604
 605         /* Offset of first region in a run for this bin's size class. */
 606         uint32_t        reg0_offset;
 607
 608 #ifdef MALLOC_STATS
 609         /* Bin statistics. */
 610         malloc_bin_stats_t stats;
 611 #endif
 612 };
 613
 614 struct arena_s {
 615 #ifdef MALLOC_DEBUG
 616         uint32_t                magic;
 617 #  define ARENA_MAGIC 0x947d3d24
 618 #endif
 619
 620         /* All operations on this arena require that mtx be locked. */
 621         malloc_mutex_t          mtx;
 622
 623 #ifdef MALLOC_STATS
 624         arena_stats_t           stats;
 625 #endif
 626
 627         /*
 628          * Tree of chunks this arena manages.
 629          */
 630         arena_chunk_tree_t      chunks;
 631
 632         /*
 633          * bins is used to store rings of free regions of the following sizes,
 634          * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
 635          *
 636          *   bins[i] | size |
 637          *   --------+------+
 638          *        0  |    2 |
 639          *        1  |    4 |
 640          *        2  |    8 |
 641          *   --------+------+
 642          *        3  |   16 |
 643          *        4  |   32 |
 644          *        5  |   48 |
 645          *        6  |   64 |
 646          *           :      :
 647          *           :      :
 648          *       33  |  496 |
 649          *       34  |  512 |
 650          *   --------+------+
 651          *       35  | 1024 |
 652          *       36  | 2048 |
 653          *   --------+------+
 654          */
 655         arena_bin_t             bins[1]; /* Dynamically sized. */
 656 };
 657
 658 /******************************************************************************/
 659 /*
 660  * Data.
 661  */
 662
 663 /* Number of CPUs. */
 664 static unsigned         ncpus;
 665
 666 /* VM page size. */
 667 static unsigned         pagesize;
 668 static unsigned         pagesize_2pow;
 669
 670 /* Various bin-related settings. */
 671 static size_t           bin_maxclass; /* Max size class for bins. */
 672 static unsigned         ntbins; /* Number of (2^n)-spaced tiny bins. */
 673 static unsigned         nqbins; /* Number of quantum-spaced bins. */
 674 static unsigned         nsbins; /* Number of (2^n)-spaced sub-page bins. */
 675 static size_t           small_min;
 676 static size_t           small_max;
 677 static unsigned         tiny_min_2pow;
 678
 679 /* Various quantum-related settings. */
 680 static size_t           quantum;
 681 static size_t           quantum_mask; /* (quantum - 1). */
 682
 683 /* Various chunk-related settings. */
 684 static size_t           chunk_size;
 685 static size_t           chunk_size_mask; /* (chunk_size - 1). */
 686 static size_t           arena_maxclass; /* Max size class for arenas. */
 687 static unsigned         arena_chunk_maplen;
 688
 689 /********/
 690 /*
 691  * Chunks.
 692  */
 693
 694 /* Protects chunk-related data structures. */
 695 static malloc_mutex_t   chunks_mtx;
 696
 697 /* Tree of chunks that are stand-alone huge allocations. */
 698 static chunk_tree_t     huge;
 699
 700 #ifdef USE_BRK
 701 /*
 702  * Try to use brk for chunk-size allocations, due to address space constraints.
 703  */
 704 /* Result of first sbrk(0) call. */
 705 static void             *brk_base;
 706 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
 707 static void             *brk_prev;
 708 /* Current upper limit on brk addresses. */
 709 static void             *brk_max;
 710 #endif
 711
 712 #ifdef MALLOC_STATS
 713 /*
 714  * Byte counters for allocated/total space used by the chunks in the huge
 715  * allocations tree.
 716  */
 717 static uint64_t         huge_nmalloc;
 718 static uint64_t         huge_ndalloc;
 719 static size_t           huge_allocated;
 720 #endif
 721
 722 /*
 723  * Tree of chunks that were previously allocated.  This is used when allocating
 724  * chunks, in an attempt to re-use address space.
 725  */
 726 static chunk_tree_t     old_chunks;
 727
 728 /****************************/
 729 /*
 730  * base (internal allocation).
 731  */
 732
 733 /*
 734  * Current chunk that is being used for internal memory allocations.  This
 735  * chunk is carved up in cacheline-size quanta, so that there is no chance of
 736  * false cache line sharing.
 737  */
 738 static void             *base_chunk;
 739 static void             *base_next_addr;
 740 static void             *base_past_addr; /* Addr immediately past base_chunk. */
 741 static chunk_node_t     *base_chunk_nodes; /* LIFO cache of chunk nodes. */
 742 static malloc_mutex_t   base_mtx;
 743 #ifdef MALLOC_STATS
 744 static uint64_t         base_total;
 745 #endif
 746
 747 /********/
 748 /*
 749  * Arenas.
 750  */
 751
 752 /*
 753  * Arenas that are used to service external requests.  Not all elements of the
 754  * arenas array are necessarily used; arenas are created lazily as needed.
 755  */
 756 static arena_t          **arenas;
 757 static unsigned         narenas;
 758 #ifndef NO_TLS
 759 static unsigned         next_arena;
 760 #endif
 761 static malloc_mutex_t   arenas_mtx; /* Protects arenas initialization. */
 762
 763 #ifndef NO_TLS
 764 /*
 765  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
 766  * for allocations.
 767  */
 768 static __thread arena_t *arenas_map;
 769 #endif
 770
 771 #ifdef MALLOC_STATS
 772 /* Chunk statistics. */
 773 static chunk_stats_t    stats_chunks;
 774 #endif
 775
 776 /*******************************/
 777 /*
 778  * Runtime configuration options.
 779  */
 780 const char      *_malloc_options;
 781
 782 #ifndef NO_MALLOC_EXTRAS
 783 static bool     opt_abort = true;
 784 static bool     opt_junk = true;
 785 #else
 786 static bool     opt_abort = false;
 787 static bool     opt_junk = false;
 788 #endif
 789 static bool     opt_hint = false;
 790 static bool     opt_print_stats = false;
 791 static size_t   opt_quantum_2pow = QUANTUM_2POW_MIN;
 792 static size_t   opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
 793 static size_t   opt_chunk_2pow = CHUNK_2POW_DEFAULT;
 794 static bool     opt_utrace = false;
 795 static bool     opt_sysv = false;
 796 static bool     opt_xmalloc = false;
 797 static bool     opt_zero = false;
 798 static int32_t  opt_narenas_lshift = 0;
 799
 800 typedef struct {
 801         void    *p;
 802         size_t  s;
 803         void    *r;
 804 } malloc_utrace_t;
 805
 806 #define UTRACE(a, b, c)                                                 \
 807         if (opt_utrace) {                                               \
 808                 malloc_utrace_t ut = {a, b, c};                         \
 809                 utrace(&ut, sizeof(ut));                                \
 810         }
 811
 812 /******************************************************************************/
 813 /*
 814  * Begin function prototypes for non-inline static functions.
 815  */
 816
 817 static void     malloc_mutex_init(malloc_mutex_t *a_mutex);
 818 static void     wrtmessage(const char *p1, const char *p2, const char *p3,
 819                 const char *p4);
 820 static void     malloc_printf(const char *format, ...);
 821 static void     *base_alloc(size_t size);
 822 static chunk_node_t *base_chunk_node_alloc(void);
 823 static void     base_chunk_node_dealloc(chunk_node_t *node);
 824 #ifdef MALLOC_STATS
 825 static void     stats_print(arena_t *arena);
 826 #endif
 827 static void     *pages_map(void *addr, size_t size);
 828 static void     pages_unmap(void *addr, size_t size);
 829 static void     *chunk_alloc(size_t size);
 830 static void     chunk_dealloc(void *chunk, size_t size);
 831 #ifndef NO_TLS
 832 static arena_t  *choose_arena_hard(void);
 833 #endif
 834 static void     arena_run_split(arena_t *arena, arena_run_t *run, bool large,
 835     size_t size);
 836 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
 837 static void     arena_chunk_dealloc(arena_chunk_t *chunk);
 838 static void     arena_bin_run_promote(arena_t *arena, arena_bin_t *bin,
 839     arena_run_t *run);
 840 static void     arena_bin_run_demote(arena_t *arena, arena_bin_t *bin,
 841     arena_run_t *run);
 842 static arena_run_t *arena_run_alloc(arena_t *arena, bool large, size_t size);
 843 static void     arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
 844 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
 845 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
 846 static void     *arena_malloc(arena_t *arena, size_t size);
 847 static size_t   arena_salloc(const void *ptr);
 848 static void     *arena_ralloc(void *ptr, size_t size, size_t oldsize);
 849 static void     arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
 850 static bool     arena_new(arena_t *arena);
 851 static arena_t  *arenas_extend(unsigned ind);
 852 static void     *huge_malloc(size_t size);
 853 static void     *huge_ralloc(void *ptr, size_t size, size_t oldsize);
 854 static void     huge_dalloc(void *ptr);
 855 static void     *imalloc(size_t size);
 856 static void     *ipalloc(size_t alignment, size_t size);
 857 static void     *icalloc(size_t size);
 858 static size_t   isalloc(const void *ptr);
 859 static void     *iralloc(void *ptr, size_t size);
 860 static void     idalloc(void *ptr);
 861 static void     malloc_print_stats(void);
 862 static bool     malloc_init_hard(void);
 863
 864 /*
 865  * End function prototypes.
 866  */
 867 /******************************************************************************/
 868 /*
 869  * Begin mutex.
 870  */
 871
 872 static void
 873 malloc_mutex_init(malloc_mutex_t *a_mutex)
 874 {
 875         static const spinlock_t lock = _SPINLOCK_INITIALIZER;
 876
 877         a_mutex->lock = lock;
 878 }
 879
 880 static inline void
 881 malloc_mutex_lock(malloc_mutex_t *a_mutex)
 882 {
 883
 884         if (__isthreaded)
 885                 _SPINLOCK(&a_mutex->lock);
 886 }
 887
 888 static inline void
 889 malloc_mutex_unlock(malloc_mutex_t *a_mutex)
 890 {
 891
 892         if (__isthreaded)
 893                 _SPINUNLOCK(&a_mutex->lock);
 894 }
 895
 896 /*
 897  * End mutex.
 898  */
 899 /******************************************************************************/
 900 /*
 901  * Begin Utility functions/macros.
 902  */
 903
 904 /* Return the chunk address for allocation address a. */
 905 #define CHUNK_ADDR2BASE(a)                                              \
 906         ((void *)((uintptr_t)(a) & ~chunk_size_mask))
 907
 908 /* Return the chunk offset of address a. */
 909 #define CHUNK_ADDR2OFFSET(a)                                            \
 910         ((size_t)((uintptr_t)(a) & chunk_size_mask))
 911
 912 /* Return the smallest chunk multiple that is >= s. */
 913 #define CHUNK_CEILING(s)                                                \
 914         (((s) + chunk_size_mask) & ~chunk_size_mask)
 915
 916 /* Return the smallest cacheline multiple that is >= s. */
 917 #define CACHELINE_CEILING(s)                                            \
 918         (((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
 919
 920 /* Return the smallest quantum multiple that is >= a. */
 921 #define QUANTUM_CEILING(a)                                              \
 922         (((a) + quantum_mask) & ~quantum_mask)
 923
 924 /* Compute the smallest power of 2 that is >= x. */
 925 static inline size_t
 926 pow2_ceil(size_t x)
 927 {
 928
 929         x--;
 930         x |= x >> 1;
 931         x |= x >> 2;
 932         x |= x >> 4;
 933         x |= x >> 8;
 934         x |= x >> 16;
 935 #if (SIZEOF_PTR == 8)
 936         x |= x >> 32;
 937 #endif
 938         x++;
 939         return (x);
 940 }
 941
 942 static void
 943 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
 944 {
 945
 946         _write(STDERR_FILENO, p1, strlen(p1));
 947         _write(STDERR_FILENO, p2, strlen(p2));
 948         _write(STDERR_FILENO, p3, strlen(p3));
 949         _write(STDERR_FILENO, p4, strlen(p4));
 950 }
 951
 952 void    (*_malloc_message)(const char *p1, const char *p2, const char *p3,
 953             const char *p4) = wrtmessage;
 954
 955 /*
 956  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
 957  */
 958 static void
 959 malloc_printf(const char *format, ...)
 960 {
 961         char buf[4096];
 962         va_list ap;
 963
 964         va_start(ap, format);
 965         vsnprintf(buf, sizeof(buf), format, ap);
 966         va_end(ap);
 967         _malloc_message(buf, "", "", "");
 968 }
 969
 970 /******************************************************************************/
 971
 972 static void *
 973 base_alloc(size_t size)
 974 {
 975         void *ret;
 976         size_t csize;
 977
 978         /* Round size up to nearest multiple of the cacheline size. */
 979         csize = CACHELINE_CEILING(size);
 980
 981         malloc_mutex_lock(&base_mtx);
 982
 983         /* Make sure there's enough space for the allocation. */
 984         if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
 985                 void *tchunk;
 986
 987                 assert(csize <= chunk_size);
 988
 989                 tchunk = chunk_alloc(chunk_size);
 990                 if (tchunk == NULL) {
 991                         ret = NULL;
 992                         goto RETURN;
 993                 }
 994                 base_chunk = tchunk;
 995                 base_next_addr = (void *)base_chunk;
 996                 base_past_addr = (void *)((uintptr_t)base_chunk + chunk_size);
 997 #ifdef MALLOC_STATS
 998                 base_total += chunk_size;
 999 #endif
1000         }
1001
1002         /* Allocate. */
1003         ret = base_next_addr;
1004         base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1005
1006 RETURN:
1007         malloc_mutex_unlock(&base_mtx);
1008         return (ret);
1009 }
1010
1011 static chunk_node_t *
1012 base_chunk_node_alloc(void)
1013 {
1014         chunk_node_t *ret;
1015
1016         malloc_mutex_lock(&base_mtx);
1017         if (base_chunk_nodes != NULL) {
1018                 ret = base_chunk_nodes;
1019                 base_chunk_nodes = *(chunk_node_t **)ret;
1020                 malloc_mutex_unlock(&base_mtx);
1021         } else {
1022                 malloc_mutex_unlock(&base_mtx);
1023                 ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
1024         }
1025
1026         return (ret);
1027 }
1028
1029 static void
1030 base_chunk_node_dealloc(chunk_node_t *node)
1031 {
1032
1033         malloc_mutex_lock(&base_mtx);
1034         *(chunk_node_t **)node = base_chunk_nodes;
1035         base_chunk_nodes = node;
1036         malloc_mutex_unlock(&base_mtx);
1037 }
1038
1039 /******************************************************************************/
1040
1041 #ifdef MALLOC_STATS
1042 static void
1043 stats_print(arena_t *arena)
1044 {
1045         unsigned i;
1046         int gap_start;
1047
1048         malloc_printf("allocated: %zu\n", arena->stats.allocated);
1049
1050         malloc_printf("calls:\n");
1051         malloc_printf(" %12s %12s %12s\n", "nmalloc","ndalloc", "nmadvise");
1052         malloc_printf(" %12llu %12llu %12llu\n",
1053             arena->stats.nmalloc, arena->stats.ndalloc, arena->stats.nmadvise);
1054
1055         malloc_printf("large requests: %llu\n", arena->stats.large_nrequests);
1056
1057         malloc_printf("bins:\n");
1058         malloc_printf("%13s %1s %4s %5s %6s %9s %5s %6s %7s %6s %6s\n",
1059             "bin", "", "size", "nregs", "run_sz", "nrequests", "nruns",
1060             "hiruns", "curruns", "npromo", "ndemo");
1061         for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
1062                 if (arena->bins[i].stats.nrequests == 0) {
1063                         if (gap_start == -1)
1064                                 gap_start = i;
1065                 } else {
1066                         if (gap_start != -1) {
1067                                 if (i > gap_start + 1) {
1068                                         /* Gap of more than one size class. */
1069                                         malloc_printf("[%u..%u]\n",
1070                                             gap_start, i - 1);
1071                                 } else {
1072                                         /* Gap of one size class. */
1073                                         malloc_printf("[%u]\n", gap_start);
1074                                 }
1075                                 gap_start = -1;
1076                         }
1077                         malloc_printf(
1078                             "%13u %1s %4u %5u %6u %9llu %5llu"
1079                             " %6lu %7lu %6llu %6llu\n",
1080                             i,
1081                             i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1082                             arena->bins[i].reg_size,
1083                             arena->bins[i].nregs,
1084                             arena->bins[i].run_size,
1085                             arena->bins[i].stats.nrequests,
1086                             arena->bins[i].stats.nruns,
1087                             arena->bins[i].stats.highruns,
1088                             arena->bins[i].stats.curruns,
1089                             arena->bins[i].stats.npromote,
1090                             arena->bins[i].stats.ndemote);
1091                 }
1092         }
1093         if (gap_start != -1) {
1094                 if (i > gap_start + 1) {
1095                         /* Gap of more than one size class. */
1096                         malloc_printf("[%u..%u]\n", gap_start, i - 1);
1097                 } else {
1098                         /* Gap of one size class. */
1099                         malloc_printf("[%u]\n", gap_start);
1100                 }
1101         }
1102 }
1103 #endif
1104
1105 /*
1106  * End Utility functions/macros.
1107  */
1108 /******************************************************************************/
1109 /*
1110  * Begin chunk management functions.
1111  */
1112
1113 static inline int
1114 chunk_comp(chunk_node_t *a, chunk_node_t *b)
1115 {
1116
1117         assert(a != NULL);
1118         assert(b != NULL);
1119
1120         if ((uintptr_t)a->chunk < (uintptr_t)b->chunk)
1121                 return (-1);
1122         else if (a->chunk == b->chunk)
1123                 return (0);
1124         else
1125                 return (1);
1126 }
1127
1128 /* Generate red-black tree code for chunks. */
1129 RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp);
1130
1131 static void *
1132 pages_map(void *addr, size_t size)
1133 {
1134         void *ret;
1135
1136         /*
1137          * We don't use MAP_FIXED here, because it can cause the *replacement*
1138          * of existing mappings, and we only want to create new mappings.
1139          */
1140         ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
1141             -1, 0);
1142         assert(ret != NULL);
1143
1144         if (ret == MAP_FAILED)
1145                 ret = NULL;
1146         else if (addr != NULL && ret != addr) {
1147                 /*
1148                  * We succeeded in mapping memory, but not in the right place.
1149                  */
1150                 if (munmap(ret, size) == -1) {
1151                         char buf[STRERROR_BUF];
1152
1153                         strerror_r(errno, buf, sizeof(buf));
1154                         malloc_printf("%s: (malloc) Error in munmap(): %s\n",
1155                             _getprogname(), buf);
1156                         if (opt_abort)
1157                                 abort();
1158                 }
1159                 ret = NULL;
1160         }
1161
1162         assert(ret == NULL || (addr == NULL && ret != addr)
1163             || (addr != NULL && ret == addr));
1164         return (ret);
1165 }
1166
1167 static void
1168 pages_unmap(void *addr, size_t size)
1169 {
1170
1171         if (munmap(addr, size) == -1) {
1172                 char buf[STRERROR_BUF];
1173
1174                 strerror_r(errno, buf, sizeof(buf));
1175                 malloc_printf("%s: (malloc) Error in munmap(): %s\n",
1176                     _getprogname(), buf);
1177                 if (opt_abort)
1178                         abort();
1179         }
1180 }
1181
1182 static void *
1183 chunk_alloc(size_t size)
1184 {
1185         void *ret, *chunk;
1186         chunk_node_t *tchunk, *delchunk;
1187
1188         assert(size != 0);
1189         assert(size % chunk_size == 0);
1190
1191         malloc_mutex_lock(&chunks_mtx);
1192
1193         if (size == chunk_size) {
1194                 /*
1195                  * Check for address ranges that were previously chunks and try
1196                  * to use them.
1197                  */
1198
1199                 tchunk = RB_MIN(chunk_tree_s, &old_chunks);
1200                 while (tchunk != NULL) {
1201                         /* Found an address range.  Try to recycle it. */
1202
1203                         chunk = tchunk->chunk;
1204                         delchunk = tchunk;
1205                         tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1206
1207                         /* Remove delchunk from the tree. */
1208                         RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1209                         base_chunk_node_dealloc(delchunk);
1210
1211 #ifdef USE_BRK
1212                         if ((uintptr_t)chunk >= (uintptr_t)brk_base
1213                             && (uintptr_t)chunk < (uintptr_t)brk_max) {
1214                                 /* Re-use a previously freed brk chunk. */
1215                                 ret = chunk;
1216                                 goto RETURN;
1217                         }
1218 #endif
1219                         if ((ret = pages_map(chunk, size)) != NULL) {
1220                                 /* Success. */
1221                                 goto RETURN;
1222                         }
1223                 }
1224         }
1225
1226 #ifdef USE_BRK
1227         /*
1228          * Try to create allocations in brk, in order to make full use of
1229          * limited address space.
1230          */
1231         if (brk_prev != (void *)-1) {
1232                 void *brk_cur;
1233                 intptr_t incr;
1234
1235                 /*
1236                  * The loop is necessary to recover from races with other
1237                  * threads that are using brk for something other than malloc.
1238                  */
1239                 do {
1240                         /* Get the current end of brk. */
1241                         brk_cur = sbrk(0);
1242
1243                         /*
1244                          * Calculate how much padding is necessary to
1245                          * chunk-align the end of brk.
1246                          */
1247                         incr = (intptr_t)size
1248                             - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1249                         if (incr == size) {
1250                                 ret = brk_cur;
1251                         } else {
1252                                 ret = (void *)(intptr_t)brk_cur + incr;
1253                                 incr += size;
1254                         }
1255
1256                         brk_prev = sbrk(incr);
1257                         if (brk_prev == brk_cur) {
1258                                 /* Success. */
1259                                 brk_max = (void *)(intptr_t)ret + size;
1260                                 goto RETURN;
1261                         }
1262                 } while (brk_prev != (void *)-1);
1263         }
1264 #endif
1265
1266         /*
1267          * Try to over-allocate, but allow the OS to place the allocation
1268          * anywhere.  Beware of size_t wrap-around.
1269          */
1270         if (size + chunk_size > size) {
1271                 if ((ret = pages_map(NULL, size + chunk_size)) != NULL) {
1272                         size_t offset = CHUNK_ADDR2OFFSET(ret);
1273
1274                         /*
1275                          * Success.  Clean up unneeded leading/trailing space.
1276                          */
1277                         if (offset != 0) {
1278                                 /* Leading space. */
1279                                 pages_unmap(ret, chunk_size - offset);
1280
1281                                 ret = (void *)((uintptr_t)ret + (chunk_size -
1282                                     offset));
1283
1284                                 /* Trailing space. */
1285                                 pages_unmap((void *)((uintptr_t)ret + size),
1286                                     offset);
1287                         } else {
1288                                 /* Trailing space only. */
1289                                 pages_unmap((void *)((uintptr_t)ret + size),
1290                                     chunk_size);
1291                         }
1292                         goto RETURN;
1293                 }
1294         }
1295
1296         /* All strategies for allocation failed. */
1297         ret = NULL;
1298 RETURN:
1299 #ifdef MALLOC_STATS
1300         if (ret != NULL) {
1301                 stats_chunks.nchunks += (size / chunk_size);
1302                 stats_chunks.curchunks += (size / chunk_size);
1303         }
1304         if (stats_chunks.curchunks > stats_chunks.highchunks)
1305                 stats_chunks.highchunks = stats_chunks.curchunks;
1306 #endif
1307         malloc_mutex_unlock(&chunks_mtx);
1308
1309         assert(CHUNK_ADDR2BASE(ret) == ret);
1310         return (ret);
1311 }
1312
1313 static void
1314 chunk_dealloc(void *chunk, size_t size)
1315 {
1316         size_t offset;
1317         chunk_node_t key;
1318         chunk_node_t *node;
1319
1320         assert(chunk != NULL);
1321         assert(CHUNK_ADDR2BASE(chunk) == chunk);
1322         assert(size != 0);
1323         assert(size % chunk_size == 0);
1324
1325         malloc_mutex_lock(&chunks_mtx);
1326
1327 #ifdef USE_BRK
1328         if ((uintptr_t)chunk >= (uintptr_t)brk_base
1329             && (uintptr_t)chunk < (uintptr_t)brk_max) {
1330                 void *brk_cur;
1331
1332                 /* Get the current end of brk. */
1333                 brk_cur = sbrk(0);
1334
1335                 /*
1336                  * Try to shrink the data segment if this chunk is at the end
1337                  * of the data segment.  The sbrk() call here is subject to a
1338                  * race condition with threads that use brk(2) or sbrk(2)
1339                  * directly, but the alternative would be to leak memory for
1340                  * the sake of poorly designed multi-threaded programs.
1341                  */
1342                 if (brk_cur == brk_max
1343                     && (void *)(uintptr_t)chunk + size == brk_max
1344                     && sbrk(-(intptr_t)size) == brk_max) {
1345                         if (brk_prev == brk_max) {
1346                                 /* Success. */
1347                                 brk_prev = (void *)(intptr_t)brk_max
1348                                     - (intptr_t)size;
1349                                 brk_max = brk_prev;
1350                         }
1351                         goto RETURN;
1352                 } else
1353                         madvise(chunk, size, MADV_FREE);
1354         } else
1355 #endif
1356                 pages_unmap(chunk, size);
1357
1358         /*
1359          * Iteratively create records of each chunk-sized memory region that
1360          * 'chunk' is comprised of, so that the address range can be recycled
1361          * if memory usage increases later on.
1362          */
1363         for (offset = 0; offset < size; offset += chunk_size) {
1364                 /*
1365                  * It is possible for chunk to overlap existing entries in
1366                  * old_chunks if it is a huge allocation, so take care to not
1367                  * leak tree nodes.
1368                  */
1369                 key.chunk = (void *)((uintptr_t)chunk + (uintptr_t)offset);
1370                 if (RB_FIND(chunk_tree_s, &old_chunks, &key) == NULL) {
1371                         node = base_chunk_node_alloc();
1372                         if (node == NULL)
1373                                 break;
1374
1375                         node->chunk = key.chunk;
1376                         node->size = chunk_size;
1377                         RB_INSERT(chunk_tree_s, &old_chunks, node);
1378                 }
1379         }
1380
1381 #ifdef USE_BRK
1382 RETURN:
1383 #endif
1384 #ifdef MALLOC_STATS
1385         stats_chunks.curchunks -= (size / chunk_size);
1386 #endif
1387         malloc_mutex_unlock(&chunks_mtx);
1388 }
1389
1390 /*
1391  * End chunk management functions.
1392  */
1393 /******************************************************************************/
1394 /*
1395  * Begin arena.
1396  */
1397
1398 /*
1399  * Choose an arena based on a per-thread value (fast-path code, calls slow-path
1400  * code if necessary.
1401  */
1402 static inline arena_t *
1403 choose_arena(void)
1404 {
1405         arena_t *ret;
1406
1407         /*
1408          * We can only use TLS if this is a PIC library, since for the static
1409          * library version, libc's malloc is used by TLS allocation, which
1410          * introduces a bootstrapping issue.
1411          */
1412 #ifndef NO_TLS
1413         if (__isthreaded == false) {
1414             /*
1415              * Avoid the overhead of TLS for single-threaded operation.  If the
1416              * app switches to threaded mode, the initial thread may end up
1417              * being assigned to some other arena, but this one-time switch
1418              * shouldn't cause significant issues.
1419              * */
1420             return (arenas[0]);
1421         }
1422
1423         ret = arenas_map;
1424         if (ret == NULL)
1425                 ret = choose_arena_hard();
1426 #else
1427         if (__isthreaded) {
1428                 unsigned long ind;
1429
1430                 /*
1431                  * Hash _pthread_self() to one of the arenas.  There is a prime
1432                  * number of arenas, so this has a reasonable chance of
1433                  * working.  Even so, the hashing can be easily thwarted by
1434                  * inconvenient _pthread_self() values.  Without specific
1435                  * knowledge of how _pthread_self() calculates values, we can't
1436                  * easily do much better than this.
1437                  */
1438                 ind = (unsigned long) _pthread_self() % narenas;
1439
1440                 /*
1441                  * Optimistially assume that arenas[ind] has been initialized.
1442                  * At worst, we find out that some other thread has already
1443                  * done so, after acquiring the lock in preparation.  Note that
1444                  * this lazy locking also has the effect of lazily forcing
1445                  * cache coherency; without the lock acquisition, there's no
1446                  * guarantee that modification of arenas[ind] by another thread
1447                  * would be seen on this CPU for an arbitrary amount of time.
1448                  *
1449                  * In general, this approach to modifying a synchronized value
1450                  * isn't a good idea, but in this case we only ever modify the
1451                  * value once, so things work out well.
1452                  */
1453                 ret = arenas[ind];
1454                 if (ret == NULL) {
1455                         /*
1456                          * Avoid races with another thread that may have already
1457                          * initialized arenas[ind].
1458                          */
1459                         malloc_mutex_lock(&arenas_mtx);
1460                         if (arenas[ind] == NULL)
1461                                 ret = arenas_extend((unsigned)ind);
1462                         else
1463                                 ret = arenas[ind];
1464                         malloc_mutex_unlock(&arenas_mtx);
1465                 }
1466         } else
1467                 ret = arenas[0];
1468 #endif
1469
1470         assert(ret != NULL);
1471         return (ret);
1472 }
1473
1474 #ifndef NO_TLS
1475 /*
1476  * Choose an arena based on a per-thread value (slow-path code only, called
1477  * only by choose_arena()).
1478  */
1479 static arena_t *
1480 choose_arena_hard(void)
1481 {
1482         arena_t *ret;
1483
1484         assert(__isthreaded);
1485
1486         /* Assign one of the arenas to this thread, in a round-robin fashion. */
1487         malloc_mutex_lock(&arenas_mtx);
1488         ret = arenas[next_arena];
1489         if (ret == NULL)
1490                 ret = arenas_extend(next_arena);
1491         if (ret == NULL) {
1492                 /*
1493                  * Make sure that this function never returns NULL, so that
1494                  * choose_arena() doesn't have to check for a NULL return
1495                  * value.
1496                  */
1497                 ret = arenas[0];
1498         }
1499         next_arena = (next_arena + 1) % narenas;
1500         malloc_mutex_unlock(&arenas_mtx);
1501         arenas_map = ret;
1502
1503         return (ret);
1504 }
1505 #endif
1506
1507 static inline int
1508 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
1509 {
1510
1511         assert(a != NULL);
1512         assert(b != NULL);
1513
1514         if ((uintptr_t)a < (uintptr_t)b)
1515                 return (-1);
1516         else if (a == b)
1517                 return (0);
1518         else
1519                 return (1);
1520 }
1521
1522 /* Generate red-black tree code for arena chunks. */
1523 RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp);
1524
1525 static inline void *
1526 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
1527 {
1528         void *ret;
1529         unsigned i, mask, bit, regind;
1530
1531         assert(run->magic == ARENA_RUN_MAGIC);
1532
1533         for (i = run->regs_minelm; i < REGS_MASK_NELMS; i++) {
1534                 mask = run->regs_mask[i];
1535                 if (mask != 0) {
1536                         /* Usable allocation found. */
1537                         bit = ffs(mask) - 1;
1538
1539                         regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1540                         ret = (void *)&((char *)run)[bin->reg0_offset
1541                             + (bin->reg_size * regind)];
1542
1543                         /* Clear bit. */
1544                         mask ^= (1 << bit);
1545                         run->regs_mask[i] = mask;
1546
1547                         return (ret);
1548                 } else {
1549                         /*
1550                          * Make a note that nothing before this element
1551                          * contains a free region.
1552                          */
1553                         run->regs_minelm = i + 1;
1554                 }
1555         }
1556         /* Not reached. */
1557         assert(0);
1558         return (NULL);
1559 }
1560
1561 static inline void
1562 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
1563 {
1564         /*
1565          * To divide by a number D that is not a power of two we multiply
1566          * by (2^21 / D) and then right shift by 21 positions.
1567          *
1568          *   X / D
1569          *
1570          * becomes
1571          *
1572          *   (size_invs[(D >> QUANTUM_2POW_MIN) - 3] * D) >> SIZE_INV_SHIFT
1573          */
1574 #define SIZE_INV_SHIFT 21
1575 #define SIZE_INV(s) (((1 << SIZE_INV_SHIFT) / (s << QUANTUM_2POW_MIN)) + 1)
1576         static const unsigned size_invs[] = {
1577             SIZE_INV(3),
1578             SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
1579             SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
1580             SIZE_INV(12),SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
1581             SIZE_INV(16),SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
1582             SIZE_INV(20),SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
1583             SIZE_INV(24),SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
1584             SIZE_INV(28),SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
1585 #if (QUANTUM_2POW_MIN < 4)
1586             ,
1587             SIZE_INV(32), SIZE_INV(33), SIZE_INV(34), SIZE_INV(35),
1588             SIZE_INV(36), SIZE_INV(37), SIZE_INV(38), SIZE_INV(39),
1589             SIZE_INV(40), SIZE_INV(41), SIZE_INV(42), SIZE_INV(43),
1590             SIZE_INV(44), SIZE_INV(45), SIZE_INV(46), SIZE_INV(47),
1591             SIZE_INV(48), SIZE_INV(49), SIZE_INV(50), SIZE_INV(51),
1592             SIZE_INV(52), SIZE_INV(53), SIZE_INV(54), SIZE_INV(55),
1593             SIZE_INV(56), SIZE_INV(57), SIZE_INV(58), SIZE_INV(59),
1594             SIZE_INV(60), SIZE_INV(61), SIZE_INV(62), SIZE_INV(63)
1595 #endif
1596         };
1597         unsigned diff, regind, elm, bit;
1598
1599         assert(run->magic == ARENA_RUN_MAGIC);
1600         assert(((sizeof(size_invs)) / sizeof(unsigned)) + 3
1601             >= (SMALL_MAX_DEFAULT >> QUANTUM_2POW_MIN));
1602
1603         /*
1604          * Avoid doing division with a variable divisor if possible.  Using
1605          * actual division here can reduce allocator throughput by over 20%!
1606          */
1607         diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
1608         if ((size & (size - 1)) == 0) {
1609                 /*
1610                  * log2_table allows fast division of a power of two in the
1611                  * [1..128] range.
1612                  *
1613                  * (x / divisor) becomes (x >> log2_table[divisor - 1]).
1614                  */
1615                 static const unsigned char log2_table[] = {
1616                     0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 4,
1617                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5,
1618                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1619                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6,
1620                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1621                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1622                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1623                     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7
1624                 };
1625
1626                 if (size <= 128)
1627                         regind = (diff >> log2_table[size - 1]);
1628                 else if (size <= 32768)
1629                         regind = diff >> (8 + log2_table[(size >> 8) - 1]);
1630                 else {
1631                         /*
1632                          * The page size is too large for us to use the lookup
1633                          * table.  Use real division.
1634                          */
1635                         regind = diff / size;
1636                 }
1637         } else if (size <= ((sizeof(size_invs) / sizeof(unsigned))
1638             << QUANTUM_2POW_MIN) + 2) {
1639                 regind = size_invs[(size >> QUANTUM_2POW_MIN) - 3] * diff;
1640                 regind >>= SIZE_INV_SHIFT;
1641         } else {
1642                 /*
1643                  * size_invs isn't large enough to handle this size class, so
1644                  * calculate regind using actual division.  This only happens
1645                  * if the user increases small_max via the 'S' runtime
1646                  * configuration option.
1647                  */
1648                 regind = diff / size;
1649         };
1650         assert(regind == diff / size);
1651         assert(regind < bin->nregs);
1652
1653         elm = regind >> (SIZEOF_INT_2POW + 3);
1654         if (elm < run->regs_minelm)
1655                 run->regs_minelm = elm;
1656         bit = regind - (elm << (SIZEOF_INT_2POW + 3));
1657         assert((run->regs_mask[elm] & (1 << bit)) == 0);
1658         run->regs_mask[elm] |= (1 << bit);
1659 #undef SIZE_INV
1660 #undef SIZE_INV_SHIFT
1661 }
1662
1663 static void
1664 arena_run_split(arena_t *arena, arena_run_t *run, bool large, size_t size)
1665 {
1666         arena_chunk_t *chunk;
1667         unsigned run_ind, map_offset, total_pages, need_pages;
1668         unsigned i, log2_run_pages, run_pages;
1669
1670         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1671         run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1672             >> pagesize_2pow);
1673         assert(chunk->map[run_ind].free);
1674         total_pages = chunk->map[run_ind].npages;
1675         need_pages = (size >> pagesize_2pow);
1676
1677         assert(chunk->map[run_ind].free);
1678         assert(chunk->map[run_ind].large == false);
1679         assert(chunk->map[run_ind].npages == total_pages);
1680
1681         /* Split enough pages from the front of run to fit allocation size. */
1682         map_offset = run_ind;
1683         for (i = 0; i < need_pages; i++) {
1684                 chunk->map[map_offset + i].free = false;
1685                 chunk->map[map_offset + i].large = large;
1686                 chunk->map[map_offset + i].npages = need_pages;
1687                 chunk->map[map_offset + i].pos = i;
1688         }
1689
1690         /* Update map for trailing pages. */
1691         map_offset += need_pages;
1692         while (map_offset < run_ind + total_pages) {
1693                 log2_run_pages = ffs(map_offset) - 1;
1694                 run_pages = (1 << log2_run_pages);
1695
1696                 chunk->map[map_offset].free = true;
1697                 chunk->map[map_offset].large = false;
1698                 chunk->map[map_offset].npages = run_pages;
1699
1700                 chunk->nfree_runs[log2_run_pages]++;
1701
1702                 map_offset += run_pages;
1703         }
1704
1705         chunk->pages_used += (size >> pagesize_2pow);
1706 }
1707
1708 static arena_chunk_t *
1709 arena_chunk_alloc(arena_t *arena)
1710 {
1711         arena_chunk_t *chunk;
1712         unsigned i, j, header_npages, pow2_header_npages, map_offset;
1713         unsigned log2_run_pages, run_pages;
1714         size_t header_size;
1715
1716         chunk = (arena_chunk_t *)chunk_alloc(chunk_size);
1717         if (chunk == NULL)
1718                 return (NULL);
1719
1720         chunk->arena = arena;
1721
1722         RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1723
1724         /*
1725          * Claim that no pages are in use, since the header is merely overhead.
1726          */
1727         chunk->pages_used = 0;
1728
1729         memset(&chunk->nfree_runs, 0, sizeof(chunk->nfree_runs));
1730
1731         header_size = (size_t)((uintptr_t)&chunk->map[arena_chunk_maplen]
1732             - (uintptr_t)chunk);
1733         if (header_size % pagesize != 0) {
1734                 /* Round up to the nearest page boundary. */
1735                 header_size += pagesize - (header_size % pagesize);
1736         }
1737
1738         header_npages = header_size >> pagesize_2pow;
1739         pow2_header_npages = pow2_ceil(header_npages);
1740
1741         /*
1742          * Iteratively mark runs as in use, until we've spoken for the entire
1743          * header.
1744          */
1745         map_offset = 0;
1746         for (i = 0; header_npages > 0; i++) {
1747                 if ((pow2_header_npages >> i) <= header_npages) {
1748                         for (j = 0; j < (pow2_header_npages >> i); j++) {
1749                                 chunk->map[map_offset + j].free = false;
1750                                 chunk->map[map_offset + j].large = false;
1751                                 chunk->map[map_offset + j].npages =
1752                                     (pow2_header_npages >> i);
1753                                 chunk->map[map_offset + j].pos = j;
1754                         }
1755                         header_npages -= (pow2_header_npages >> i);
1756                         map_offset += (pow2_header_npages >> i);
1757                 }
1758         }
1759
1760         /*
1761          * Finish initializing map.  The chunk header takes up some space at
1762          * the beginning of the chunk, which we just took care of by
1763          * "allocating" the leading pages.
1764          */
1765         while (map_offset < (chunk_size >> pagesize_2pow)) {
1766                 log2_run_pages = ffs(map_offset) - 1;
1767                 run_pages = (1 << log2_run_pages);
1768
1769                 chunk->map[map_offset].free = true;
1770                 chunk->map[map_offset].large = false;
1771                 chunk->map[map_offset].npages = run_pages;
1772
1773                 chunk->nfree_runs[log2_run_pages]++;
1774
1775                 map_offset += run_pages;
1776         }
1777
1778         return (chunk);
1779 }
1780
1781 static void
1782 arena_chunk_dealloc(arena_chunk_t *chunk)
1783 {
1784
1785         RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
1786
1787         chunk_dealloc((void *)chunk, chunk_size);
1788 }
1789
1790 static void
1791 arena_bin_run_promote(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
1792 {
1793
1794         assert(bin == run->bin);
1795
1796         /* Promote. */
1797         assert(run->free_min > run->nfree);
1798         assert(run->quartile < RUN_Q100);
1799         run->quartile++;
1800 #ifdef MALLOC_STATS
1801         bin->stats.npromote++;
1802 #endif
1803
1804         /* Re-file run. */
1805         switch (run->quartile) {
1806                 case RUN_QINIT:
1807                         assert(0);
1808                         break;
1809                 case RUN_Q0:
1810                         qr_before_insert((arena_run_t *)&bin->runs0, run, link);
1811                         run->free_max = bin->nregs - 1;
1812                         run->free_min = (bin->nregs >> 1) + 1;
1813                         assert(run->nfree <= run->free_max);
1814                         assert(run->nfree >= run->free_min);
1815                         break;
1816                 case RUN_Q25:
1817                         qr_remove(run, link);
1818                         qr_before_insert((arena_run_t *)&bin->runs25, run,
1819                             link);
1820                         run->free_max = ((bin->nregs >> 2) * 3) - 1;
1821                         run->free_min = (bin->nregs >> 2) + 1;
1822                         assert(run->nfree <= run->free_max);
1823                         assert(run->nfree >= run->free_min);
1824                         break;
1825                 case RUN_Q50:
1826                         qr_remove(run, link);
1827                         qr_before_insert((arena_run_t *)&bin->runs50, run,
1828                             link);
1829                         run->free_max = (bin->nregs >> 1) - 1;
1830                         run->free_min = 1;
1831                         assert(run->nfree <= run->free_max);
1832                         assert(run->nfree >= run->free_min);
1833                         break;
1834                 case RUN_Q75:
1835                         /*
1836                          * Skip RUN_Q75 during promotion from RUN_Q50.
1837                          * Separate handling of RUN_Q75 and RUN_Q100 allows us
1838                          * to keep completely full runs in RUN_Q100, thus
1839                          * guaranteeing that runs in RUN_Q75 are only mostly
1840                          * full.  This provides a method for avoiding a linear
1841                          * search for non-full runs, which avoids some
1842                          * pathological edge cases.
1843                          */
1844                         run->quartile++;
1845                         /* Fall through. */
1846                 case RUN_Q100:
1847                         qr_remove(run, link);
1848                         assert(bin->runcur == run);
1849                         bin->runcur = NULL;
1850                         run->free_max = 0;
1851                         run->free_min = 0;
1852                         assert(run->nfree <= run->free_max);
1853                         assert(run->nfree >= run->free_min);
1854                         break;
1855                 default:
1856                         assert(0);
1857                         break;
1858         }
1859 }
1860
1861 static void
1862 arena_bin_run_demote(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
1863 {
1864
1865         assert(bin == run->bin);
1866
1867         /* Demote. */
1868         assert(run->free_max < run->nfree);
1869         assert(run->quartile > RUN_QINIT);
1870         run->quartile--;
1871 #ifdef MALLOC_STATS
1872         bin->stats.ndemote++;
1873 #endif
1874
1875         /* Re-file run. */
1876         switch (run->quartile) {
1877                 case RUN_QINIT:
1878                         qr_remove(run, link);
1879 #ifdef MALLOC_STATS
1880                         bin->stats.curruns--;
1881 #endif
1882                         if (bin->runcur == run)
1883                                 bin->runcur = NULL;
1884 #ifdef MALLOC_DEBUG
1885                         run->magic = 0;
1886 #endif
1887                         arena_run_dalloc(arena, run, bin->run_size);
1888                         break;
1889                 case RUN_Q0:
1890                         qr_remove(run, link);
1891                         qr_before_insert((arena_run_t *)&bin->runs0, run, link);
1892                         run->free_max = bin->nregs - 1;
1893                         run->free_min = (bin->nregs >> 1) + 1;
1894                         assert(run->nfree <= run->free_max);
1895                         assert(run->nfree >= run->free_min);
1896                         break;
1897                 case RUN_Q25:
1898                         qr_remove(run, link);
1899                         qr_before_insert((arena_run_t *)&bin->runs25, run,
1900                             link);
1901                         run->free_max = ((bin->nregs >> 2) * 3) - 1;
1902                         run->free_min = (bin->nregs >> 2) + 1;
1903                         assert(run->nfree <= run->free_max);
1904                         assert(run->nfree >= run->free_min);
1905                         break;
1906                 case RUN_Q50:
1907                         qr_remove(run, link);
1908                         qr_before_insert((arena_run_t *)&bin->runs50, run,
1909                             link);
1910                         run->free_max = (bin->nregs >> 1) - 1;
1911                         run->free_min = 1;
1912                         assert(run->nfree <= run->free_max);
1913                         assert(run->nfree >= run->free_min);
1914                         break;
1915                 case RUN_Q75:
1916                         qr_before_insert((arena_run_t *)&bin->runs75, run,
1917                             link);
1918                         run->free_max = (bin->nregs >> 2) - 1;
1919                         run->free_min = 1;
1920                         assert(run->nfree <= run->free_max);
1921                         assert(run->nfree >= run->free_min);
1922                         break;
1923                 case RUN_Q100:
1924                 default:
1925                         assert(0);
1926                         break;
1927         }
1928 }
1929
1930 static arena_run_t *
1931 arena_run_alloc(arena_t *arena, bool large, size_t size)
1932 {
1933         arena_run_t *run;
1934         unsigned min_ind, i, j;
1935         arena_chunk_t *chunk;
1936 #ifndef NDEBUG
1937         int rep = 0;
1938 #endif
1939
1940         assert(size <= arena_maxclass);
1941
1942 AGAIN:
1943 #ifndef NDEBUG
1944         rep++;
1945         assert(rep <= 2);
1946 #endif
1947
1948         /*
1949          * Search through arena's chunks in address order for a run that is
1950          * large enough.  Look for a precise fit, but do not pass up a chunk
1951          * that has a run which is large enough to split.
1952          */
1953         min_ind = ffs(size >> pagesize_2pow) - 1;
1954         RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
1955                 for (i = min_ind;
1956                     i < (opt_chunk_2pow - pagesize_2pow);
1957                     i++) {
1958                         if (chunk->nfree_runs[i] > 0) {
1959                                 arena_chunk_map_t *map = chunk->map;
1960
1961                                 /* Scan chunk's map for free run. */
1962                                 for (j = 0;
1963                                     j < arena_chunk_maplen;
1964                                     j += map[j].npages) {
1965                                         if (map[j].free
1966                                             && map[j].npages == (1 << i))
1967                 {/*<----------------------------*/
1968                         run = (arena_run_t *)&((char *)chunk)[j
1969                             << pagesize_2pow];
1970
1971                         assert(chunk->nfree_runs[i] > 0);
1972                         chunk->nfree_runs[i]--;
1973
1974                         /* Update page map. */
1975                         arena_run_split(arena, run, large, size);
1976
1977                         return (run);
1978                 }/*---------------------------->*/
1979                                 }
1980                                 /* Not reached. */
1981                                 assert(0);
1982                         }
1983                 }
1984         }
1985
1986         /* No usable runs.  Allocate a new chunk, then try again. */
1987         if (arena_chunk_alloc(arena) == NULL)
1988                 return (NULL);
1989         goto AGAIN;
1990 }
1991
1992 static void
1993 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
1994 {
1995         arena_chunk_t *chunk;
1996         unsigned run_ind, buddy_ind, base_run_ind, run_pages, log2_run_pages;
1997
1998         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1999         run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
2000             >> pagesize_2pow);
2001         run_pages = (size >> pagesize_2pow);
2002         log2_run_pages = ffs(run_pages) - 1;
2003         assert(run_pages > 0);
2004
2005         /* Subtract pages from count of pages used in chunk. */
2006         chunk->pages_used -= run_pages;
2007
2008         /* Mark run as deallocated. */
2009         chunk->map[run_ind].free = true;
2010         chunk->map[run_ind].large = false;
2011         chunk->map[run_ind].npages = run_pages;
2012
2013         /*
2014          * Tell the kernel that we don't need the data in this run, but only if
2015          * requested via runtime configuration.
2016          */
2017         if (opt_hint) {
2018                 madvise(run, size, MADV_FREE);
2019 #ifdef MALLOC_STATS
2020                 arena->stats.nmadvise += (size >> pagesize_2pow);
2021 #endif
2022         }
2023
2024         /*
2025          * Iteratively coalesce with buddies.  Conceptually, the buddy scheme
2026          * induces a tree on the set of pages.  If we know the number of pages
2027          * in the subtree rooted at the current node, we can quickly determine
2028          * whether a run is the left or right buddy, and then calculate the
2029          * buddy's index.
2030          */
2031         for (;
2032             (run_pages = (1 << log2_run_pages)) < arena_chunk_maplen;
2033             log2_run_pages++) {
2034                 if (((run_ind >> log2_run_pages) & 1) == 0) {
2035                         /* Current run precedes its buddy. */
2036                         buddy_ind = run_ind + run_pages;
2037                         base_run_ind = run_ind;
2038                 } else {
2039                         /* Current run follows its buddy. */
2040                         buddy_ind = run_ind - run_pages;
2041                         base_run_ind = buddy_ind;
2042                 }
2043
2044                 if (chunk->map[buddy_ind].free == false
2045                     || chunk->map[buddy_ind].npages != run_pages)
2046                         break;
2047
2048                 assert(chunk->nfree_runs[log2_run_pages] > 0);
2049                 chunk->nfree_runs[log2_run_pages]--;
2050
2051                 /* Coalesce. */
2052                 chunk->map[base_run_ind].npages = (run_pages << 1);
2053
2054                 /* Update run_ind to be the beginning of the coalesced run. */
2055                 run_ind = base_run_ind;
2056         }
2057
2058         chunk->nfree_runs[log2_run_pages]++;
2059
2060         /* Free pages, to the extent possible. */
2061         if (chunk->pages_used == 0) {
2062                 /* This chunk is completely unused now, so deallocate it. */
2063                 arena_chunk_dealloc(chunk);
2064         }
2065 }
2066
2067 static arena_run_t *
2068 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
2069 {
2070         arena_run_t *run;
2071         unsigned i, remainder;
2072
2073         /* Look for a usable run. */
2074         if ((run = qr_next((arena_run_t *)&bin->runs50, link))
2075             != (arena_run_t *)&bin->runs50
2076             || (run = qr_next((arena_run_t *)&bin->runs25, link))
2077             != (arena_run_t *)&bin->runs25
2078             || (run = qr_next((arena_run_t *)&bin->runs0, link))
2079             != (arena_run_t *)&bin->runs0
2080             || (run = qr_next((arena_run_t *)&bin->runs75, link))
2081             != (arena_run_t *)&bin->runs75) {
2082                 /* run is guaranteed to have available space. */
2083                 qr_remove(run, link);
2084                 return (run);
2085         }
2086         /* No existing runs have any space available. */
2087
2088         /* Allocate a new run. */
2089         run = arena_run_alloc(arena, false, bin->run_size);
2090         if (run == NULL)
2091                 return (NULL);
2092
2093         /* Initialize run internals. */
2094         qr_new(run, link);
2095         run->bin = bin;
2096
2097         for (i = 0; i < (bin->nregs >> (SIZEOF_INT_2POW + 3)); i++)
2098                 run->regs_mask[i] = UINT_MAX;
2099         remainder = bin->nregs % (1 << (SIZEOF_INT_2POW + 3));
2100         if (remainder != 0) {
2101                 run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
2102                     - remainder));
2103                 i++;
2104         }
2105         for (; i < REGS_MASK_NELMS; i++)
2106                 run->regs_mask[i] = 0;
2107
2108         run->regs_minelm = 0;
2109
2110         run->nfree = bin->nregs;
2111         run->quartile = RUN_QINIT;
2112         run->free_max = bin->nregs;
2113         run->free_min = ((bin->nregs >> 2) * 3) + 1;
2114 #ifdef MALLOC_DEBUG
2115         run->magic = ARENA_RUN_MAGIC;
2116 #endif
2117
2118 #ifdef MALLOC_STATS
2119         bin->stats.nruns++;
2120         bin->stats.curruns++;
2121         if (bin->stats.curruns > bin->stats.highruns)
2122                 bin->stats.highruns = bin->stats.curruns;
2123 #endif
2124         return (run);
2125 }
2126
2127 /* bin->runcur must have space available before this function is called. */
2128 static inline void *
2129 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
2130 {
2131         void *ret;
2132
2133         assert(run->magic == ARENA_RUN_MAGIC);
2134         assert(run->nfree > 0);
2135
2136         ret = arena_run_reg_alloc(run, bin);
2137         assert(ret != NULL);
2138         run->nfree--;
2139         if (run->nfree < run->free_min) {
2140                 /* Promote run to higher fullness quartile. */
2141                 arena_bin_run_promote(arena, bin, run);
2142         }
2143
2144         return (ret);
2145 }
2146
2147 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2148 static void *
2149 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
2150 {
2151
2152         assert(bin->runcur == NULL || bin->runcur->quartile == RUN_Q100);
2153
2154         bin->runcur = arena_bin_nonfull_run_get(arena, bin);
2155         if (bin->runcur == NULL)
2156                 return (NULL);
2157         assert(bin->runcur->magic == ARENA_RUN_MAGIC);
2158         assert(bin->runcur->nfree > 0);
2159
2160         return (arena_bin_malloc_easy(arena, bin, bin->runcur));
2161 }
2162
2163 static void *
2164 arena_malloc(arena_t *arena, size_t size)
2165 {
2166         void *ret;
2167
2168         assert(arena != NULL);
2169         assert(arena->magic == ARENA_MAGIC);
2170         assert(size != 0);
2171         assert(QUANTUM_CEILING(size) <= arena_maxclass);
2172
2173         if (size <= bin_maxclass) {
2174                 arena_bin_t *bin;
2175                 arena_run_t *run;
2176
2177                 /* Small allocation. */
2178
2179                 if (size < small_min) {
2180                         /* Tiny. */
2181                         size = pow2_ceil(size);
2182                         bin = &arena->bins[ffs(size >> (tiny_min_2pow + 1))];
2183 #if (!defined(NDEBUG) || defined(MALLOC_STATS))
2184                         /*
2185                          * Bin calculation is always correct, but we may need
2186                          * to fix size for the purposes of assertions and/or
2187                          * stats accuracy.
2188                          */
2189                         if (size < (1 << tiny_min_2pow))
2190                                 size = (1 << tiny_min_2pow);
2191 #endif
2192                 } else if (size <= small_max) {
2193                         /* Quantum-spaced. */
2194                         size = QUANTUM_CEILING(size);
2195                         bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
2196                             - 1];
2197                 } else {
2198                         /* Sub-page. */
2199                         size = pow2_ceil(size);
2200                         bin = &arena->bins[ntbins + nqbins
2201                             + (ffs(size >> opt_small_max_2pow) - 2)];
2202                 }
2203                 assert(size == bin->reg_size);
2204
2205                 malloc_mutex_lock(&arena->mtx);
2206                 if ((run = bin->runcur) != NULL)
2207                         ret = arena_bin_malloc_easy(arena, bin, run);
2208                 else
2209                         ret = arena_bin_malloc_hard(arena, bin);
2210
2211 #ifdef MALLOC_STATS
2212                 bin->stats.nrequests++;
2213 #endif
2214         } else {
2215                 /* Medium allocation. */
2216                 size = pow2_ceil(size);
2217                 malloc_mutex_lock(&arena->mtx);
2218                 ret = (void *)arena_run_alloc(arena, true, size);
2219 #ifdef MALLOC_STATS
2220                 arena->stats.large_nrequests++;
2221 #endif
2222         }
2223
2224 #ifdef MALLOC_STATS
2225         arena->stats.nmalloc++;
2226         if (ret != NULL)
2227                 arena->stats.allocated += size;
2228 #endif
2229
2230         malloc_mutex_unlock(&arena->mtx);
2231
2232         if (opt_junk && ret != NULL)
2233                 memset(ret, 0xa5, size);
2234         else if (opt_zero && ret != NULL)
2235                 memset(ret, 0, size);
2236         return (ret);
2237 }
2238
2239 /* Return the size of the allocation pointed to by ptr. */
2240 static size_t
2241 arena_salloc(const void *ptr)
2242 {
2243         size_t ret;
2244         arena_chunk_t *chunk;
2245         uint32_t pageind;
2246         arena_chunk_map_t mapelm;
2247
2248         assert(ptr != NULL);
2249         assert(CHUNK_ADDR2BASE(ptr) != ptr);
2250
2251         /*
2252          * No arena data structures that we query here can change in a way that
2253          * affects this function, so we don't need to lock.
2254          */
2255         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2256         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
2257         mapelm = chunk->map[pageind];
2258         assert(mapelm.free == false);
2259         if (mapelm.large == false) {
2260                 arena_run_t *run;
2261
2262                 pageind -= mapelm.pos;
2263
2264                 run = (arena_run_t *)&((char *)chunk)[pageind << pagesize_2pow];
2265                 assert(run->magic == ARENA_RUN_MAGIC);
2266                 ret = run->bin->reg_size;
2267         } else
2268                 ret = mapelm.npages << pagesize_2pow;
2269
2270         return (ret);
2271 }
2272
2273 static void *
2274 arena_ralloc(void *ptr, size_t size, size_t oldsize)
2275 {
2276         void *ret;
2277
2278         /* Avoid moving the allocation if the size class would not change. */
2279         if (size < small_min) {
2280                 if (oldsize < small_min &&
2281                     ffs(pow2_ceil(size) >> (tiny_min_2pow + 1))
2282                     == ffs(pow2_ceil(oldsize) >> (tiny_min_2pow + 1)))
2283                         goto IN_PLACE;
2284         } else if (size <= small_max) {
2285                 if (oldsize >= small_min && oldsize <= small_max &&
2286                     (QUANTUM_CEILING(size) >> opt_quantum_2pow)
2287                     == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
2288                         goto IN_PLACE;
2289         } else {
2290                 if (oldsize > small_max && pow2_ceil(size) == oldsize)
2291                         goto IN_PLACE;
2292         }
2293
2294         /*
2295          * If we get here, then size and oldsize are different enough that we
2296          * need to use a different size class.  In that case, fall back to
2297          * allocating new space and copying.
2298          */
2299         ret = arena_malloc(choose_arena(), size);
2300         if (ret == NULL)
2301                 return (NULL);
2302
2303         if (size < oldsize)
2304                 memcpy(ret, ptr, size);
2305         else
2306                 memcpy(ret, ptr, oldsize);
2307         idalloc(ptr);
2308         return (ret);
2309 IN_PLACE:
2310         if (opt_junk && size < oldsize)
2311                 memset(&((char *)ptr)[size], 0x5a, oldsize - size);
2312         else if (opt_zero && size > oldsize)
2313                 memset(&((char *)ptr)[size], 0, size - oldsize);
2314         return (ptr);
2315 }
2316
2317 static void
2318 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
2319 {
2320         unsigned pageind;
2321         arena_chunk_map_t mapelm;
2322         size_t size;
2323
2324         assert(arena != NULL);
2325         assert(arena->magic == ARENA_MAGIC);
2326         assert(chunk->arena == arena);
2327         assert(ptr != NULL);
2328         assert(CHUNK_ADDR2BASE(ptr) != ptr);
2329
2330         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
2331         mapelm = chunk->map[pageind];
2332         assert(mapelm.free == false);
2333         if (mapelm.large == false) {
2334                 arena_run_t *run;
2335                 arena_bin_t *bin;
2336
2337                 /* Small allocation. */
2338
2339                 pageind -= mapelm.pos;
2340
2341                 run = (arena_run_t *)&((char *)chunk)[pageind << pagesize_2pow];
2342                 assert(run->magic == ARENA_RUN_MAGIC);
2343                 bin = run->bin;
2344                 size = bin->reg_size;
2345
2346                 if (opt_junk)
2347                         memset(ptr, 0x5a, size);
2348
2349                 malloc_mutex_lock(&arena->mtx);
2350                 arena_run_reg_dalloc(run, bin, ptr, size);
2351                 run->nfree++;
2352                 if (run->nfree > run->free_max) {
2353                         /* Demote run to lower fullness quartile. */
2354                         arena_bin_run_demote(arena, bin, run);
2355                 }
2356         } else {
2357                 /* Medium allocation. */
2358
2359                 size = mapelm.npages << pagesize_2pow;
2360                 assert((((uintptr_t)ptr) & (size - 1)) == 0);
2361
2362                 if (opt_junk)
2363                         memset(ptr, 0x5a, size);
2364
2365                 malloc_mutex_lock(&arena->mtx);
2366                 arena_run_dalloc(arena, (arena_run_t *)ptr, size);
2367         }
2368
2369 #ifdef MALLOC_STATS
2370         arena->stats.allocated -= size;
2371 #endif
2372
2373         malloc_mutex_unlock(&arena->mtx);
2374 }
2375
2376 static bool
2377 arena_new(arena_t *arena)
2378 {
2379         unsigned i;
2380         arena_bin_t *bin;
2381         size_t pow2_size, run_size;
2382
2383         malloc_mutex_init(&arena->mtx);
2384
2385 #ifdef MALLOC_STATS
2386         memset(&arena->stats, 0, sizeof(arena_stats_t));
2387 #endif
2388
2389         /* Initialize chunks. */
2390         RB_INIT(&arena->chunks);
2391
2392         /* Initialize bins. */
2393
2394         /* (2^n)-spaced tiny bins. */
2395         for (i = 0; i < ntbins; i++) {
2396                 bin = &arena->bins[i];
2397                 bin->runcur = NULL;
2398                 qr_new((arena_run_t *)&bin->runs0, link);
2399                 qr_new((arena_run_t *)&bin->runs25, link);
2400                 qr_new((arena_run_t *)&bin->runs50, link);
2401                 qr_new((arena_run_t *)&bin->runs75, link);
2402
2403                 bin->reg_size = (1 << (tiny_min_2pow + i));
2404
2405                 /*
2406                  * Calculate how large of a run to allocate.  Make sure that at
2407                  * least RUN_MIN_REGS regions fit in the run.
2408                  */
2409                 run_size = bin->reg_size << RUN_MIN_REGS_2POW;
2410                 if (run_size < pagesize)
2411                         run_size = pagesize;
2412                 if (run_size > (pagesize << RUN_MAX_PAGES_2POW))
2413                         run_size = (pagesize << RUN_MAX_PAGES_2POW);
2414                 if (run_size > arena_maxclass)
2415                         run_size = arena_maxclass;
2416                 bin->run_size = run_size;
2417
2418                 assert(run_size >= sizeof(arena_run_t));
2419                 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size;
2420                 if (bin->nregs > (REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3))) {
2421                         /* Take care not to overflow regs_mask. */
2422                         bin->nregs = REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3);
2423                 }
2424                 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size);
2425
2426 #ifdef MALLOC_STATS
2427                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2428 #endif
2429         }
2430
2431         /* Quantum-spaced bins. */
2432         for (; i < ntbins + nqbins; i++) {
2433                 bin = &arena->bins[i];
2434                 bin->runcur = NULL;
2435                 qr_new((arena_run_t *)&bin->runs0, link);
2436                 qr_new((arena_run_t *)&bin->runs25, link);
2437                 qr_new((arena_run_t *)&bin->runs50, link);
2438                 qr_new((arena_run_t *)&bin->runs75, link);
2439
2440                 bin->reg_size = quantum * (i - ntbins + 1);
2441
2442                 /*
2443                  * Calculate how large of a run to allocate.  Make sure that at
2444                  * least RUN_MIN_REGS regions fit in the run.
2445                  */
2446                 pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2447                 run_size = (pow2_size << RUN_MIN_REGS_2POW);
2448                 if (run_size < pagesize)
2449                         run_size = pagesize;
2450                 if (run_size > (pagesize << RUN_MAX_PAGES_2POW))
2451                         run_size = (pagesize << RUN_MAX_PAGES_2POW);
2452                 if (run_size > arena_maxclass)
2453                         run_size = arena_maxclass;
2454                 bin->run_size = run_size;
2455
2456                 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size;
2457                 assert(bin->nregs <= REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3));
2458                 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size);
2459
2460 #ifdef MALLOC_STATS
2461                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2462 #endif
2463         }
2464
2465         /* (2^n)-spaced sub-page bins. */
2466         for (; i < ntbins + nqbins + nsbins; i++) {
2467                 bin = &arena->bins[i];
2468                 bin->runcur = NULL;
2469                 qr_new((arena_run_t *)&bin->runs0, link);
2470                 qr_new((arena_run_t *)&bin->runs25, link);
2471                 qr_new((arena_run_t *)&bin->runs50, link);
2472                 qr_new((arena_run_t *)&bin->runs75, link);
2473
2474                 bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
2475
2476                 /*
2477                  * Calculate how large of a run to allocate.  Make sure that at
2478                  * least RUN_MIN_REGS regions fit in the run.
2479                  */
2480                 run_size = bin->reg_size << RUN_MIN_REGS_2POW;
2481                 if (run_size < pagesize)
2482                         run_size = pagesize;
2483                 if (run_size > (pagesize << RUN_MAX_PAGES_2POW))
2484                         run_size = (pagesize << RUN_MAX_PAGES_2POW);
2485                 if (run_size > arena_maxclass)
2486                         run_size = arena_maxclass;
2487                 bin->run_size = run_size;
2488
2489                 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size;
2490                 assert(bin->nregs <= REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3));
2491                 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size);
2492
2493 #ifdef MALLOC_STATS
2494                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2495 #endif
2496         }
2497
2498 #ifdef MALLOC_DEBUG
2499         arena->magic = ARENA_MAGIC;
2500 #endif
2501
2502         return (false);
2503 }
2504
2505 /* Create a new arena and insert it into the arenas array at index ind. */
2506 static arena_t *
2507 arenas_extend(unsigned ind)
2508 {
2509         arena_t *ret;
2510
2511         /* Allocate enough space for trailing bins. */
2512         ret = (arena_t *)base_alloc(sizeof(arena_t)
2513             + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
2514         if (ret != NULL && arena_new(ret) == false) {
2515                 arenas[ind] = ret;
2516                 return (ret);
2517         }
2518         /* Only reached if there is an OOM error. */
2519
2520         /*
2521          * OOM here is quite inconvenient to propagate, since dealing with it
2522          * would require a check for failure in the fast path.  Instead, punt
2523          * by using arenas[0].  In practice, this is an extremely unlikely
2524          * failure.
2525          */
2526         malloc_printf("%s: (malloc) Error initializing arena\n",
2527             _getprogname());
2528         if (opt_abort)
2529                 abort();
2530
2531         return (arenas[0]);
2532 }
2533
2534 /*
2535  * End arena.
2536  */
2537 /******************************************************************************/
2538 /*
2539  * Begin general internal functions.
2540  */
2541
2542 static void *
2543 huge_malloc(size_t size)
2544 {
2545         void *ret;
2546         size_t csize;
2547         chunk_node_t *node;
2548
2549         /* Allocate one or more contiguous chunks for this request. */
2550
2551         csize = CHUNK_CEILING(size);
2552         if (csize == 0) {
2553                 /* size is large enough to cause size_t wrap-around. */
2554                 return (NULL);
2555         }
2556
2557         /* Allocate a chunk node with which to track the chunk. */
2558         node = base_chunk_node_alloc();
2559         if (node == NULL)
2560                 return (NULL);
2561
2562         ret = chunk_alloc(csize);
2563         if (ret == NULL) {
2564                 base_chunk_node_dealloc(node);
2565                 return (NULL);
2566         }
2567
2568         /* Insert node into huge. */
2569         node->chunk = ret;
2570         node->size = csize;
2571
2572         malloc_mutex_lock(&chunks_mtx);
2573         RB_INSERT(chunk_tree_s, &huge, node);
2574 #ifdef MALLOC_STATS
2575         huge_nmalloc++;
2576         huge_allocated += csize;
2577 #endif
2578         malloc_mutex_unlock(&chunks_mtx);
2579
2580         if (opt_junk && ret != NULL)
2581                 memset(ret, 0xa5, csize);
2582         else if (opt_zero && ret != NULL)
2583                 memset(ret, 0, csize);
2584
2585         return (ret);
2586 }
2587
2588 static void *
2589 huge_ralloc(void *ptr, size_t size, size_t oldsize)
2590 {
2591         void *ret;
2592
2593         /* Avoid moving the allocation if the size class would not change. */
2594         if (oldsize > arena_maxclass &&
2595             CHUNK_CEILING(size) == CHUNK_CEILING(oldsize))
2596                 return (ptr);
2597
2598         /*
2599          * If we get here, then size and oldsize are different enough that we
2600          * need to use a different size class.  In that case, fall back to
2601          * allocating new space and copying.
2602          */
2603         ret = huge_malloc(size);
2604         if (ret == NULL)
2605                 return (NULL);
2606
2607         if (CHUNK_ADDR2BASE(ptr) == ptr) {
2608                 /* The old allocation is a chunk. */
2609                 if (size < oldsize)
2610                         memcpy(ret, ptr, size);
2611                 else
2612                         memcpy(ret, ptr, oldsize);
2613         } else {
2614                 /* The old allocation is a region. */
2615                 assert(oldsize < size);
2616                 memcpy(ret, ptr, oldsize);
2617         }
2618         idalloc(ptr);
2619         return (ret);
2620 }
2621
2622 static void
2623 huge_dalloc(void *ptr)
2624 {
2625         chunk_node_t key;
2626         chunk_node_t *node;
2627
2628         malloc_mutex_lock(&chunks_mtx);
2629
2630         /* Extract from tree of huge allocations. */
2631         key.chunk = ptr;
2632         node = RB_FIND(chunk_tree_s, &huge, &key);
2633         assert(node != NULL);
2634         assert(node->chunk == ptr);
2635         RB_REMOVE(chunk_tree_s, &huge, node);
2636
2637 #ifdef MALLOC_STATS
2638         /* Update counters. */
2639         huge_ndalloc++;
2640         huge_allocated -= node->size;
2641 #endif
2642
2643         malloc_mutex_unlock(&chunks_mtx);
2644
2645         /* Unmap chunk. */
2646 #ifdef USE_BRK
2647         if (opt_junk)
2648                 memset(node->chunk, 0x5a, node->size);
2649 #endif
2650         chunk_dealloc(node->chunk, node->size);
2651
2652         base_chunk_node_dealloc(node);
2653 }
2654
2655 static void *
2656 imalloc(size_t size)
2657 {
2658         void *ret;
2659
2660         assert(size != 0);
2661
2662         if (size <= arena_maxclass)
2663                 ret = arena_malloc(choose_arena(), size);
2664         else
2665                 ret = huge_malloc(size);
2666
2667         return (ret);
2668 }
2669
2670 static void *
2671 ipalloc(size_t alignment, size_t size)
2672 {
2673         void *ret;
2674         size_t alloc_size;
2675
2676         /*
2677          * Take advantage of the fact that for each size class, every object is
2678          * aligned at the smallest power of two that is non-zero in the base
2679          * two representation of the size.  For example:
2680          *
2681          *   Size |   Base 2 | Minimum alignment
2682          *   -----+----------+------------------
2683          *     96 |  1100000 |  32
2684          *    144 | 10100000 |  32
2685          *    192 | 11000000 |  64
2686          *
2687          * Depending on runtime settings, it is possible that arena_malloc()
2688          * will further round up to a power of two, but that never causes
2689          * correctness issues.
2690          */
2691         alloc_size = (size + (alignment - 1)) & (-alignment);
2692         if (alloc_size < size) {
2693                 /* size_t overflow. */
2694                 return (NULL);
2695         }
2696
2697         if (alloc_size <= arena_maxclass)
2698                 ret = arena_malloc(choose_arena(), alloc_size);
2699         else {
2700                 if (alignment <= chunk_size)
2701                         ret = huge_malloc(size);
2702                 else {
2703                         size_t chunksize, offset;
2704                         chunk_node_t *node;
2705
2706                         /*
2707                          * This allocation requires alignment that is even
2708                          * larger than chunk alignment.  This means that
2709                          * huge_malloc() isn't good enough.
2710                          *
2711                          * Allocate almost twice as many chunks as are demanded
2712                          * by the size or alignment, in order to assure the
2713                          * alignment can be achieved, then unmap leading and
2714                          * trailing chunks.
2715                          */
2716
2717                         chunksize = CHUNK_CEILING(size);
2718
2719                         if (size >= alignment)
2720                                 alloc_size = chunksize + alignment - chunk_size;
2721                         else
2722                                 alloc_size = (alignment << 1) - chunk_size;
2723
2724                         /*
2725                          * Allocate a chunk node with which to track the chunk.
2726                          */
2727                         node = base_chunk_node_alloc();
2728                         if (node == NULL)
2729                                 return (NULL);
2730
2731                         ret = chunk_alloc(alloc_size);
2732                         if (ret == NULL) {
2733                                 base_chunk_node_dealloc(node);
2734                                 return (NULL);
2735                         }
2736
2737                         offset = (uintptr_t)ret & (alignment - 1);
2738                         assert(offset % chunk_size == 0);
2739                         assert(offset < alloc_size);
2740                         if (offset == 0) {
2741                                 /* Trim trailing space. */
2742                                 chunk_dealloc((void *)((uintptr_t)ret
2743                                     + chunksize), alloc_size - chunksize);
2744                         } else {
2745                                 size_t trailsize;
2746
2747                                 /* Trim leading space. */
2748                                 chunk_dealloc(ret, alignment - offset);
2749
2750                                 ret = (void *)((uintptr_t)ret + (alignment
2751                                     - offset));
2752
2753                                 trailsize = alloc_size - (alignment - offset)
2754                                     - chunksize;
2755                                 if (trailsize != 0) {
2756                                     /* Trim trailing space. */
2757                                     assert(trailsize < alloc_size);
2758                                     chunk_dealloc((void *)((uintptr_t)ret
2759                                         + chunksize), trailsize);
2760                                 }
2761                         }
2762
2763                         /* Insert node into huge. */
2764                         node->chunk = ret;
2765                         node->size = chunksize;
2766
2767                         malloc_mutex_lock(&chunks_mtx);
2768                         RB_INSERT(chunk_tree_s, &huge, node);
2769 #ifdef MALLOC_STATS
2770                         huge_allocated += size;
2771 #endif
2772                         malloc_mutex_unlock(&chunks_mtx);
2773
2774                         if (opt_junk)
2775                                 memset(ret, 0xa5, chunksize);
2776                         else if (opt_zero)
2777                                 memset(ret, 0, chunksize);
2778                 }
2779         }
2780
2781         assert(((uintptr_t)ret & (alignment - 1)) == 0);
2782         return (ret);
2783 }
2784
2785 static void *
2786 icalloc(size_t size)
2787 {
2788         void *ret;
2789
2790         if (size <= arena_maxclass) {
2791                 ret = arena_malloc(choose_arena(), size);
2792                 if (ret == NULL)
2793                         return (NULL);
2794                 memset(ret, 0, size);
2795         } else {
2796                 /*
2797                  * The virtual memory system provides zero-filled pages, so
2798                  * there is no need to do so manually, unless opt_junk is
2799                  * enabled, in which case huge_malloc() fills huge allocations
2800                  * with junk.
2801                  */
2802                 ret = huge_malloc(size);
2803                 if (ret == NULL)
2804                         return (NULL);
2805
2806                 if (opt_junk)
2807                         memset(ret, 0, size);
2808 #ifdef USE_BRK
2809                 else if ((uintptr_t)ret >= (uintptr_t)brk_base
2810                     && (uintptr_t)ret < (uintptr_t)brk_max) {
2811                         /*
2812                          * This may be a re-used brk chunk.  Therefore, zero
2813                          * the memory.
2814                          */
2815                         memset(ret, 0, size);
2816                 }
2817 #endif
2818         }
2819
2820         return (ret);
2821 }
2822
2823 static size_t
2824 isalloc(const void *ptr)
2825 {
2826         size_t ret;
2827         arena_chunk_t *chunk;
2828
2829         assert(ptr != NULL);
2830
2831         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2832         if (chunk != ptr) {
2833                 /* Region. */
2834                 assert(chunk->arena->magic == ARENA_MAGIC);
2835
2836                 ret = arena_salloc(ptr);
2837         } else {
2838                 chunk_node_t *node, key;
2839
2840                 /* Chunk (huge allocation). */
2841
2842                 malloc_mutex_lock(&chunks_mtx);
2843
2844                 /* Extract from tree of huge allocations. */
2845                 key.chunk = (void *)ptr;
2846                 node = RB_FIND(chunk_tree_s, &huge, &key);
2847                 assert(node != NULL);
2848
2849                 ret = node->size;
2850
2851                 malloc_mutex_unlock(&chunks_mtx);
2852         }
2853
2854         return (ret);
2855 }
2856
2857 static void *
2858 iralloc(void *ptr, size_t size)
2859 {
2860         void *ret;
2861         size_t oldsize;
2862
2863         assert(ptr != NULL);
2864         assert(size != 0);
2865
2866         oldsize = isalloc(ptr);
2867
2868         if (size <= arena_maxclass)
2869                 ret = arena_ralloc(ptr, size, oldsize);
2870         else
2871                 ret = huge_ralloc(ptr, size, oldsize);
2872
2873         return (ret);
2874 }
2875
2876 static void
2877 idalloc(void *ptr)
2878 {
2879         arena_chunk_t *chunk;
2880
2881         assert(ptr != NULL);
2882
2883         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2884         if (chunk != ptr) {
2885                 /* Region. */
2886 #ifdef MALLOC_STATS
2887                 malloc_mutex_lock(&chunk->arena->mtx);
2888                 chunk->arena->stats.ndalloc++;
2889                 malloc_mutex_unlock(&chunk->arena->mtx);
2890 #endif
2891                 arena_dalloc(chunk->arena, chunk, ptr);
2892         } else
2893                 huge_dalloc(ptr);
2894 }
2895
2896 static void
2897 malloc_print_stats(void)
2898 {
2899
2900         if (opt_print_stats) {
2901                 malloc_printf("___ Begin malloc statistics ___\n");
2902                 malloc_printf("Number of CPUs: %u\n", ncpus);
2903                 malloc_printf("Number of arenas: %u\n", narenas);
2904                 malloc_printf("Chunk size: %zu (2^%zu)\n", chunk_size,
2905                     opt_chunk_2pow);
2906                 malloc_printf("Quantum size: %zu (2^%zu)\n", quantum,
2907                     opt_quantum_2pow);
2908                 malloc_printf("Max small size: %zu\n", small_max);
2909                 malloc_printf("Pointer size: %u\n", sizeof(void *));
2910                 malloc_printf("Assertions %s\n",
2911 #ifdef NDEBUG
2912                     "disabled"
2913 #else
2914                     "enabled"
2915 #endif
2916                     );
2917
2918 #ifdef MALLOC_STATS
2919
2920                 {
2921                         size_t allocated, total;
2922                         unsigned i;
2923                         arena_t *arena;
2924
2925                         /* Calculate and print allocated/total stats. */
2926
2927                         /* arenas. */
2928                         for (i = 0, allocated = 0; i < narenas; i++) {
2929                                 if (arenas[i] != NULL) {
2930                                         malloc_mutex_lock(&arenas[i]->mtx);
2931                                         allocated += arenas[i]->stats.allocated;
2932                                         malloc_mutex_unlock(&arenas[i]->mtx);
2933                                 }
2934                         }
2935
2936                         /* huge. */
2937                         malloc_mutex_lock(&chunks_mtx);
2938                         allocated += huge_allocated;
2939                         total = stats_chunks.curchunks * chunk_size;
2940                         malloc_mutex_unlock(&chunks_mtx);
2941
2942                         malloc_printf("Allocated: %zu, space used: %zu\n",
2943                             allocated, total);
2944
2945                         /* Print chunk stats. */
2946                         {
2947                                 chunk_stats_t chunks_stats;
2948
2949                                 malloc_mutex_lock(&chunks_mtx);
2950                                 chunks_stats = stats_chunks;
2951                                 malloc_mutex_unlock(&chunks_mtx);
2952
2953                                 malloc_printf("\nchunks:\n");
2954                                 malloc_printf(" %13s%13s%13s\n", "nchunks",
2955                                     "highchunks", "curchunks");
2956                                 malloc_printf(" %13llu%13lu%13lu\n",
2957                                     chunks_stats.nchunks,
2958                                     chunks_stats.highchunks,
2959                                     chunks_stats.curchunks);
2960                         }
2961
2962                         /* Print chunk stats. */
2963                         malloc_printf("\nhuge:\n");
2964                         malloc_printf("%12s %12s %12s\n",
2965                             "nmalloc", "ndalloc", "allocated");
2966                         malloc_printf("%12llu %12llu %12zu\n",
2967                             huge_nmalloc, huge_ndalloc, huge_allocated);
2968
2969                         /* Print stats for each arena. */
2970                         for (i = 0; i < narenas; i++) {
2971                                 arena = arenas[i];
2972                                 if (arena != NULL) {
2973                                         malloc_printf(
2974                                             "\narenas[%u] statistics:\n", i);
2975                                         malloc_mutex_lock(&arena->mtx);
2976                                         stats_print(arena);
2977                                         malloc_mutex_unlock(&arena->mtx);
2978                                 }
2979                         }
2980                 }
2981 #endif /* #ifdef MALLOC_STATS */
2982                 malloc_printf("--- End malloc statistics ---\n");
2983         }
2984 }
2985
2986 /*
2987  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
2988  * implementation has to take pains to avoid infinite recursion during
2989  * initialization.
2990  */
2991 static inline bool
2992 malloc_init(void)
2993 {
2994
2995         if (malloc_initialized == false)
2996                 return (malloc_init_hard());
2997
2998         return (false);
2999 }
3000
3001 static bool
3002 malloc_init_hard(void)
3003 {
3004         unsigned i, j;
3005         int linklen;
3006         char buf[PATH_MAX + 1];
3007         const char *opts;
3008
3009         malloc_mutex_lock(&init_lock);
3010         if (malloc_initialized) {
3011                 /*
3012                  * Another thread initialized the allocator before this one
3013                  * acquired init_lock.
3014                  */
3015                 malloc_mutex_unlock(&init_lock);
3016                 return (false);
3017         }
3018
3019         /* Get number of CPUs. */
3020         {
3021                 int mib[2];
3022                 size_t len;
3023
3024                 mib[0] = CTL_HW;
3025                 mib[1] = HW_NCPU;
3026                 len = sizeof(ncpus);
3027                 if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
3028                         /* Error. */
3029                         ncpus = 1;
3030                 }
3031         }
3032
3033         /* Get page size. */
3034         {
3035                 long result;
3036
3037                 result = sysconf(_SC_PAGESIZE);
3038                 assert(result != -1);
3039                 pagesize = (unsigned) result;
3040
3041                 /*
3042                  * We assume that pagesize is a power of 2 when calculating
3043                  * pagesize_2pow.
3044                  */
3045                 assert(((result - 1) & result) == 0);
3046                 pagesize_2pow = ffs(result) - 1;
3047         }
3048
3049         for (i = 0; i < 3; i++) {
3050                 /* Get runtime configuration. */
3051                 switch (i) {
3052                 case 0:
3053                         if ((linklen = readlink("/etc/malloc.conf", buf,
3054                                                 sizeof(buf) - 1)) != -1) {
3055                                 /*
3056                                  * Use the contents of the "/etc/malloc.conf"
3057                                  * symbolic link's name.
3058                                  */
3059                                 buf[linklen] = '\0';
3060                                 opts = buf;
3061                         } else {
3062                                 /* No configuration specified. */
3063                                 buf[0] = '\0';
3064                                 opts = buf;
3065                         }
3066                         break;
3067                 case 1:
3068                         if (issetugid() == 0 && (opts =
3069                             getenv("MALLOC_OPTIONS")) != NULL) {
3070                                 /*
3071                                  * Do nothing; opts is already initialized to
3072                                  * the value of the MALLOC_OPTIONS environment
3073                                  * variable.
3074                                  */
3075                         } else {
3076                                 /* No configuration specified. */
3077                                 buf[0] = '\0';
3078                                 opts = buf;
3079                         }
3080                         break;
3081                 case 2:
3082                         if (_malloc_options != NULL) {
3083                             /*
3084                              * Use options that were compiled into the program.
3085                              */
3086                             opts = _malloc_options;
3087                         } else {
3088                                 /* No configuration specified. */
3089                                 buf[0] = '\0';
3090                                 opts = buf;
3091                         }
3092                         break;
3093                 default:
3094                         /* NOTREACHED */
3095                         assert(false);
3096                 }
3097
3098                 for (j = 0; opts[j] != '\0'; j++) {
3099                         switch (opts[j]) {
3100                         case 'a':
3101                                 opt_abort = false;
3102                                 break;
3103                         case 'A':
3104                                 opt_abort = true;
3105                                 break;
3106                         case 'h':
3107                                 opt_hint = false;
3108                                 break;
3109                         case 'H':
3110                                 opt_hint = true;
3111                                 break;
3112                         case 'j':
3113                                 opt_junk = false;
3114                                 break;
3115                         case 'J':
3116                                 opt_junk = true;
3117                                 break;
3118                         case 'k':
3119                                 /*
3120                                  * Run fullness quartile limits don't have
3121                                  * enough resolution if there are too few
3122                                  * regions for the largest bin size classes.
3123                                  */
3124                                 if (opt_chunk_2pow > pagesize_2pow + 4)
3125                                         opt_chunk_2pow--;
3126                                 break;
3127                         case 'K':
3128                                 if (opt_chunk_2pow < CHUNK_2POW_MAX)
3129                                         opt_chunk_2pow++;
3130                                 break;
3131                         case 'n':
3132                                 opt_narenas_lshift--;
3133                                 break;
3134                         case 'N':
3135                                 opt_narenas_lshift++;
3136                                 break;
3137                         case 'p':
3138                                 opt_print_stats = false;
3139                                 break;
3140                         case 'P':
3141                                 opt_print_stats = true;
3142                                 break;
3143                         case 'q':
3144                                 if (opt_quantum_2pow > QUANTUM_2POW_MIN)
3145                                         opt_quantum_2pow--;
3146                                 break;
3147                         case 'Q':
3148                                 if (opt_quantum_2pow < pagesize_2pow - 1)
3149                                         opt_quantum_2pow++;
3150                                 break;
3151                         case 's':
3152                                 if (opt_small_max_2pow > QUANTUM_2POW_MIN)
3153                                         opt_small_max_2pow--;
3154                                 break;
3155                         case 'S':
3156                                 if (opt_small_max_2pow < pagesize_2pow - 1)
3157                                         opt_small_max_2pow++;
3158                                 break;
3159                         case 'u':
3160                                 opt_utrace = false;
3161                                 break;
3162                         case 'U':
3163                                 opt_utrace = true;
3164                                 break;
3165                         case 'v':
3166                                 opt_sysv = false;
3167                                 break;
3168                         case 'V':
3169                                 opt_sysv = true;
3170                                 break;
3171                         case 'x':
3172                                 opt_xmalloc = false;
3173                                 break;
3174                         case 'X':
3175                                 opt_xmalloc = true;
3176                                 break;
3177                         case 'z':
3178                                 opt_zero = false;
3179                                 break;
3180                         case 'Z':
3181                                 opt_zero = true;
3182                                 break;
3183                         default:
3184                                 malloc_printf("%s: (malloc) Unsupported"
3185                                     " character in malloc options: '%c'\n",
3186                                     _getprogname(), opts[j]);
3187                         }
3188                 }
3189         }
3190
3191         /* Take care to call atexit() only once. */
3192         if (opt_print_stats) {
3193                 /* Print statistics at exit. */
3194                 atexit(malloc_print_stats);
3195         }
3196
3197         /* Set variables according to the value of opt_small_max_2pow. */
3198         if (opt_small_max_2pow < opt_quantum_2pow)
3199                 opt_small_max_2pow = opt_quantum_2pow;
3200         small_max = (1 << opt_small_max_2pow);
3201
3202         /* Set bin-related variables. */
3203         bin_maxclass = (pagesize >> 1);
3204         if (pagesize_2pow > RUN_MIN_REGS_2POW + 1)
3205                 tiny_min_2pow = pagesize_2pow - (RUN_MIN_REGS_2POW + 1);
3206         else
3207                 tiny_min_2pow = 1;
3208         assert(opt_quantum_2pow >= tiny_min_2pow);
3209         ntbins = opt_quantum_2pow - tiny_min_2pow;
3210         assert(ntbins <= opt_quantum_2pow);
3211         nqbins = (small_max >> opt_quantum_2pow);
3212         nsbins = pagesize_2pow - opt_small_max_2pow - 1;
3213
3214         /* Set variables according to the value of opt_quantum_2pow. */
3215         quantum = (1 << opt_quantum_2pow);
3216         quantum_mask = quantum - 1;
3217         if (ntbins > 0)
3218                 small_min = (quantum >> 1) + 1;
3219         else
3220                 small_min = 1;
3221         assert(small_min <= quantum);
3222
3223         /* Set variables according to the value of opt_chunk_2pow. */
3224         chunk_size = (1 << opt_chunk_2pow);
3225         chunk_size_mask = chunk_size - 1;
3226         arena_chunk_maplen = (1 << (opt_chunk_2pow - pagesize_2pow));
3227         arena_maxclass = (chunk_size >> 1);
3228
3229         UTRACE(0, 0, 0);
3230
3231 #ifdef MALLOC_STATS
3232         memset(&stats_chunks, 0, sizeof(chunk_stats_t));
3233 #endif
3234
3235         /* Various sanity checks that regard configuration. */
3236         assert(quantum >= sizeof(void *));
3237         assert(quantum <= pagesize);
3238         assert(chunk_size >= pagesize);
3239         assert(quantum * 4 <= chunk_size);
3240
3241         /* Initialize chunks data. */
3242         malloc_mutex_init(&chunks_mtx);
3243         RB_INIT(&huge);
3244 #ifdef USE_BRK
3245         brk_base = sbrk(0);
3246         brk_prev = brk_base;
3247         brk_max = brk_base;
3248 #endif
3249 #ifdef MALLOC_STATS
3250         huge_nmalloc = 0;
3251         huge_ndalloc = 0;
3252         huge_allocated = 0;
3253 #endif
3254         RB_INIT(&old_chunks);
3255
3256         /* Initialize base allocation data structures. */
3257 #ifdef MALLOC_STATS
3258         base_total = 0;
3259 #endif
3260 #ifdef USE_BRK
3261         /*
3262          * Do special brk allocation here, since the base chunk doesn't really
3263          * need to be chunk-aligned.
3264          */
3265         {
3266                 void *brk_cur;
3267                 intptr_t incr;
3268
3269                 do {
3270                         /* Get the current end of brk. */
3271                         brk_cur = sbrk(0);
3272
3273                         /*
3274                          * Calculate how much padding is necessary to
3275                          * chunk-align the end of brk.  Don't worry about
3276                          * brk_cur not being chunk-aligned though.
3277                          */
3278                         incr = (intptr_t)chunk_size
3279                             - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
3280
3281                         brk_prev = sbrk(incr);
3282                         if (brk_prev == brk_cur) {
3283                                 /* Success. */
3284                                 break;
3285                         }
3286                 } while (brk_prev != (void *)-1);
3287
3288                 base_chunk = brk_cur;
3289                 base_next_addr = base_chunk;
3290                 base_past_addr = (void *)((uintptr_t)base_chunk + incr);
3291 #ifdef MALLOC_STATS
3292                 base_total += incr;
3293                 stats_chunks.nchunks = 1;
3294                 stats_chunks.curchunks = 1;
3295                 stats_chunks.highchunks = 1;
3296 #endif
3297         }
3298 #else
3299         /*
3300          * The first base chunk will be allocated when needed by base_alloc().
3301          */
3302         base_chunk = NULL;
3303         base_next_addr = NULL;
3304         base_past_addr = NULL;
3305 #endif
3306         base_chunk_nodes = NULL;
3307         malloc_mutex_init(&base_mtx);
3308
3309         if (ncpus > 1) {
3310                 /*
3311                  * For SMP systems, create four times as many arenas as there
3312                  * are CPUs by default.
3313                  */
3314                 opt_narenas_lshift += 2;
3315         }
3316
3317         /* Determine how many arenas to use. */
3318         narenas = ncpus;
3319         if (opt_narenas_lshift > 0) {
3320                 if ((narenas << opt_narenas_lshift) > narenas)
3321                         narenas <<= opt_narenas_lshift;
3322                 /*
3323                  * Make sure not to exceed the limits of what base_malloc()
3324                  * can handle.
3325                  */
3326                 if (narenas * sizeof(arena_t *) > chunk_size)
3327                         narenas = chunk_size / sizeof(arena_t *);
3328         } else if (opt_narenas_lshift < 0) {
3329                 if ((narenas << opt_narenas_lshift) < narenas)
3330                         narenas <<= opt_narenas_lshift;
3331                 /* Make sure there is at least one arena. */
3332                 if (narenas == 0)
3333                         narenas = 1;
3334         }
3335
3336 #ifdef NO_TLS
3337         if (narenas > 1) {
3338                 static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
3339                     23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
3340                     89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
3341                     151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
3342                     223, 227, 229, 233, 239, 241, 251, 257, 263};
3343                 unsigned i, nprimes, parenas;
3344
3345                 /*
3346                  * Pick a prime number of hash arenas that is more than narenas
3347                  * so that direct hashing of pthread_self() pointers tends to
3348                  * spread allocations evenly among the arenas.
3349                  */
3350                 assert((narenas & 1) == 0); /* narenas must be even. */
3351                 nprimes = (sizeof(primes) >> SIZEOF_INT_2POW);
3352                 parenas = primes[nprimes - 1]; /* In case not enough primes. */
3353                 for (i = 1; i < nprimes; i++) {
3354                         if (primes[i] > narenas) {
3355                                 parenas = primes[i];
3356                                 break;
3357                         }
3358                 }
3359                 narenas = parenas;
3360         }
3361 #endif
3362
3363 #ifndef NO_TLS
3364         next_arena = 0;
3365 #endif
3366
3367         /* Allocate and initialize arenas. */
3368         arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
3369         if (arenas == NULL) {
3370                 malloc_mutex_unlock(&init_lock);
3371                 return (true);
3372         }
3373         /*
3374          * Zero the array.  In practice, this should always be pre-zeroed,
3375          * since it was just mmap()ed, but let's be sure.
3376          */
3377         memset(arenas, 0, sizeof(arena_t *) * narenas);
3378
3379         /*
3380          * Initialize one arena here.  The rest are lazily created in
3381          * arena_choose_hard().
3382          */
3383         arenas_extend(0);
3384         if (arenas[0] == NULL) {
3385                 malloc_mutex_unlock(&init_lock);
3386                 return (true);
3387         }
3388
3389         malloc_mutex_init(&arenas_mtx);
3390
3391         malloc_initialized = true;
3392         malloc_mutex_unlock(&init_lock);
3393         return (false);
3394 }
3395
3396 /*
3397  * End general internal functions.
3398  */
3399 /******************************************************************************/
3400 /*
3401  * Begin malloc(3)-compatible functions.
3402  */
3403
3404 void *
3405 malloc(size_t size)
3406 {
3407         void *ret;
3408
3409         if (malloc_init()) {
3410                 ret = NULL;
3411                 goto RETURN;
3412         }
3413
3414         if (size == 0) {
3415                 if (opt_sysv == false)
3416                         size = 1;
3417                 else {
3418                         ret = NULL;
3419                         goto RETURN;
3420                 }
3421         }
3422
3423         ret = imalloc(size);
3424
3425 RETURN:
3426         if (ret == NULL) {
3427                 if (opt_xmalloc) {
3428                         malloc_printf("%s: (malloc) Error in malloc(%zu):"
3429                             " out of memory\n", _getprogname(), size);
3430                         abort();
3431                 }
3432                 errno = ENOMEM;
3433         }
3434
3435         UTRACE(0, size, ret);
3436         return (ret);
3437 }
3438
3439 int
3440 posix_memalign(void **memptr, size_t alignment, size_t size)
3441 {
3442         int ret;
3443         void *result;
3444
3445         if (malloc_init())
3446                 result = NULL;
3447         else {
3448                 /* Make sure that alignment is a large enough power of 2. */
3449                 if (((alignment - 1) & alignment) != 0
3450                     || alignment < sizeof(void *)) {
3451                         if (opt_xmalloc) {
3452                                 malloc_printf("%s: (malloc) Error in"
3453                                     " posix_memalign(%zu, %zu):"
3454                                     " invalid alignment\n",
3455                                     _getprogname(), alignment, size);
3456                                 abort();
3457                         }
3458                         result = NULL;
3459                         ret = EINVAL;
3460                         goto RETURN;
3461                 }
3462
3463                 result = ipalloc(alignment, size);
3464         }
3465
3466         if (result == NULL) {
3467                 if (opt_xmalloc) {
3468                         malloc_printf("%s: (malloc) Error in"
3469                             " posix_memalign(%zu, %zu): out of memory\n",
3470                             _getprogname(), alignment, size);
3471                         abort();
3472                 }
3473                 ret = ENOMEM;
3474                 goto RETURN;
3475         }
3476
3477         *memptr = result;
3478         ret = 0;
3479
3480 RETURN:
3481         UTRACE(0, size, result);
3482         return (ret);
3483 }
3484
3485 void *
3486 calloc(size_t num, size_t size)
3487 {
3488         void *ret;
3489         size_t num_size;
3490
3491         if (malloc_init()) {
3492                 ret = NULL;
3493                 goto RETURN;
3494         }
3495
3496         num_size = num * size;
3497         if (num_size == 0) {
3498                 if ((opt_sysv == false) && ((num == 0) || (size == 0)))
3499                         num_size = 1;
3500                 else {
3501                         ret = NULL;
3502                         goto RETURN;
3503                 }
3504         /*
3505          * Try to avoid division here.  We know that it isn't possible to
3506          * overflow during multiplication if neither operand uses any of the
3507          * most significant half of the bits in a size_t.
3508          */
3509         } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2)))
3510             && (num_size / size != num)) {
3511                 /* size_t overflow. */
3512                 ret = NULL;
3513                 goto RETURN;
3514         }
3515
3516         ret = icalloc(num_size);
3517
3518 RETURN:
3519         if (ret == NULL) {
3520                 if (opt_xmalloc) {
3521                         malloc_printf("%s: (malloc) Error in"
3522                             " calloc(%zu, %zu): out of memory\n",
3523                             _getprogname(), num, size);
3524                         abort();
3525                 }
3526                 errno = ENOMEM;
3527         }
3528
3529         UTRACE(0, num_size, ret);
3530         return (ret);
3531 }
3532
3533 void *
3534 realloc(void *ptr, size_t size)
3535 {
3536         void *ret;
3537
3538         if (size == 0) {
3539                 if (opt_sysv == false)
3540                         size = 1;
3541                 else {
3542                         if (ptr != NULL)
3543                                 idalloc(ptr);
3544                         ret = NULL;
3545                         goto RETURN;
3546                 }
3547         }
3548
3549         if (ptr != NULL) {
3550                 assert(malloc_initialized);
3551
3552                 ret = iralloc(ptr, size);
3553
3554                 if (ret == NULL) {
3555                         if (opt_xmalloc) {
3556                                 malloc_printf("%s: (malloc) Error in"
3557                                     " realloc(%p, %zu): out of memory\n",
3558                                     _getprogname(), ptr, size);
3559                                 abort();
3560                         }
3561                         errno = ENOMEM;
3562                 }
3563         } else {
3564                 if (malloc_init())
3565                         ret = NULL;
3566                 else
3567                         ret = imalloc(size);
3568
3569                 if (ret == NULL) {
3570                         if (opt_xmalloc) {
3571                                 malloc_printf("%s: (malloc) Error in"
3572                                     " realloc(%p, %zu): out of memory\n",
3573                                     _getprogname(), ptr, size);
3574                                 abort();
3575                         }
3576                         errno = ENOMEM;
3577                 }
3578         }
3579
3580 RETURN:
3581         UTRACE(ptr, size, ret);
3582         return (ret);
3583 }
3584
3585 void
3586 free(void *ptr)
3587 {
3588
3589         UTRACE(ptr, 0, 0);
3590         if (ptr != NULL) {
3591                 assert(malloc_initialized);
3592
3593                 idalloc(ptr);
3594         }
3595 }
3596
3597 /*
3598  * End malloc(3)-compatible functions.
3599  */
3600 /******************************************************************************/
3601 /*
3602  * Begin non-standard functions.
3603  */
3604
3605 size_t
3606 malloc_usable_size(const void *ptr)
3607 {
3608
3609         assert(ptr != NULL);
3610
3611         return (isalloc(ptr));
3612 }
3613
3614 /*
3615  * End non-standard functions.
3616  */
3617 /******************************************************************************/
3618 /*
3619  * Begin library-private functions, used by threading libraries for protection
3620  * of malloc during fork().  These functions are only called if the program is
3621  * running in threaded mode, so there is no need to check whether the program
3622  * is threaded here.
3623  */
3624
3625 void
3626 _malloc_prefork(void)
3627 {
3628         unsigned i;
3629
3630         /* Acquire all mutexes in a safe order. */
3631
3632         malloc_mutex_lock(&arenas_mtx);
3633         for (i = 0; i < narenas; i++) {
3634                 if (arenas[i] != NULL)
3635                         malloc_mutex_lock(&arenas[i]->mtx);
3636         }
3637         malloc_mutex_unlock(&arenas_mtx);
3638
3639         malloc_mutex_lock(&base_mtx);
3640
3641         malloc_mutex_lock(&chunks_mtx);
3642 }
3643
3644 void
3645 _malloc_postfork(void)
3646 {
3647         unsigned i;
3648
3649         /* Release all mutexes, now that fork() has completed. */
3650
3651         malloc_mutex_unlock(&chunks_mtx);
3652
3653         malloc_mutex_unlock(&base_mtx);
3654
3655         malloc_mutex_lock(&arenas_mtx);
3656         for (i = 0; i < narenas; i++) {
3657                 if (arenas[i] != NULL)
3658                         malloc_mutex_unlock(&arenas[i]->mtx);
3659         }
3660         malloc_mutex_unlock(&arenas_mtx);
3661 }
3662
3663 /*
3664  * End library-private functions.
3665  */
3666 /******************************************************************************/