lib/libc/stdlib/malloc.c

   1 /*-
   2  * Copyright (C) 2006 Jason Evans <jasone@FreeBSD.org>.
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice(s), this list of conditions and the following disclaimer as
  10  *    the first lines of this file unmodified other than the possible
  11  *    addition of one or more copyright notices.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice(s), this list of conditions and the following disclaimer in
  14  *    the documentation and/or other materials provided with the
  15  *    distribution.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  20  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
  21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  25  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  26  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  27  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28  *
  29  *******************************************************************************
  30  *
  31  * This allocator implementation is designed to provide scalable performance
  32  * for multi-threaded programs on multi-processor systems.  The following
  33  * features are included for this purpose:
  34  *
  35  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
  36  *     contention and cache sloshing.
  37  *
  38  *   + Cache line sharing between arenas is avoided for internal data
  39  *     structures.
  40  *
  41  *   + Memory is managed in chunks and runs (chunks can be split into runs using
  42  *     a binary buddy scheme), rather than as individual pages.  This provides
  43  *     a constant-time mechanism for associating allocations with particular
  44  *     arenas.
  45  *
  46  * Allocation requests are rounded up to the nearest size class, and no record
  47  * of the original request size is maintained.  Allocations are broken into
  48  * categories according to size class.  Assuming runtime defaults, 4 kB pages
  49  * and a 16 byte quantum, the size classes in each category are as follows:
  50  *
  51  *   |====================================|
  52  *   | Category | Subcategory    |   Size |
  53  *   |====================================|
  54  *   | Small    | Tiny           |      2 |
  55  *   |          |                |      4 |
  56  *   |          |                |      8 |
  57  *   |          |----------------+--------|
  58  *   |          | Quantum-spaced |     16 |
  59  *   |          |                |     32 |
  60  *   |          |                |     48 |
  61  *   |          |                |    ... |
  62  *   |          |                |    480 |
  63  *   |          |                |    496 |
  64  *   |          |                |    512 |
  65  *   |          |----------------+--------|
  66  *   |          | Sub-page       |   1 kB |
  67  *   |          |                |   2 kB |
  68  *   |====================================|
  69  *   | Large                     |   4 kB |
  70  *   |                           |   8 kB |
  71  *   |                           |  16 kB |
  72  *   |                           |    ... |
  73  *   |                           | 256 kB |
  74  *   |                           | 512 kB |
  75  *   |                           |   1 MB |
  76  *   |====================================|
  77  *   | Huge                      |   2 MB |
  78  *   |                           |   4 MB |
  79  *   |                           |   6 MB |
  80  *   |                           |    ... |
  81  *   |====================================|
  82  *
  83  * A different mechanism is used for each category:
  84  *
  85  *   Small : Each size class is segregated into its own set of runs.  Each run
  86  *           maintains a bitmap of which regions are free/allocated.
  87  *
  88  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
  89  *           in the associated arena chunk header maps.
  90  *
  91  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
  92  *          Metadata are stored in a separate red-black tree.
  93  *
  94  *******************************************************************************
  95  */
  96
  97 /*
  98  *******************************************************************************
  99  *
 100  * Ring macros.
 101  *
 102  *******************************************************************************
 103  */
 104
 105 /* Ring definitions. */
 106 #define qr(a_type) struct {                                             \
 107         a_type *qre_next;                                               \
 108         a_type *qre_prev;                                               \
 109 }
 110
 111 #define qr_initializer {NULL, NULL}
 112
 113 /* Ring functions. */
 114 #define qr_new(a_qr, a_field) do {                                      \
 115         (a_qr)->a_field.qre_next = (a_qr);                              \
 116         (a_qr)->a_field.qre_prev = (a_qr);                              \
 117 } while (0)
 118
 119 #define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next)
 120
 121 #define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev)
 122
 123 #define qr_before_insert(a_qrelm, a_qr, a_field) do {                   \
 124         (a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev;         \
 125         (a_qr)->a_field.qre_next = (a_qrelm);                           \
 126         (a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr);            \
 127         (a_qrelm)->a_field.qre_prev = (a_qr);                           \
 128 } while (0)
 129
 130 #define qr_after_insert(a_qrelm, a_qr, a_field) do {                    \
 131         (a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next;         \
 132         (a_qr)->a_field.qre_prev = (a_qrelm);                           \
 133         (a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr);            \
 134         (a_qrelm)->a_field.qre_next = (a_qr);                           \
 135 } while (0)
 136
 137 #define qr_meld(a_qr_a, a_qr_b, a_type, a_field) do {                   \
 138         a_type *t;                                                      \
 139         (a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b);        \
 140         (a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a);        \
 141         t = (a_qr_a)->a_field.qre_prev;                                 \
 142         (a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev;        \
 143         (a_qr_b)->a_field.qre_prev = t;                                 \
 144 } while (0)
 145
 146 /*
 147  * qr_meld() and qr_split() are functionally equivalent, so there's no need to
 148  * have two copies of the code.
 149  */
 150 #define qr_split(a_qr_a, a_qr_b, a_type, a_field)                       \
 151         qr_meld((a_qr_a), (a_qr_b), a_type, a_field)
 152
 153 #define qr_remove(a_qr, a_field) do {                                   \
 154         (a_qr)->a_field.qre_prev->a_field.qre_next                      \
 155             = (a_qr)->a_field.qre_next;                                 \
 156         (a_qr)->a_field.qre_next->a_field.qre_prev                      \
 157             = (a_qr)->a_field.qre_prev;                                 \
 158         (a_qr)->a_field.qre_next = (a_qr);                              \
 159         (a_qr)->a_field.qre_prev = (a_qr);                              \
 160 } while (0)
 161
 162 #define qr_foreach(var, a_qr, a_field)                                  \
 163         for ((var) = (a_qr);                                            \
 164             (var) != NULL;                                              \
 165             (var) = (((var)->a_field.qre_next != (a_qr))                \
 166             ? (var)->a_field.qre_next : NULL))
 167
 168 #define qr_reverse_foreach(var, a_qr, a_field)                          \
 169         for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL;  \
 170             (var) != NULL;                                              \
 171             (var) = (((var) != (a_qr))                                  \
 172             ? (var)->a_field.qre_prev : NULL))
 173
 174 /******************************************************************************/
 175
 176 /*
 177  * In order to disable various extra features that may have negative
 178  * performance impacts, (assertions, expanded statistics), define
 179  * NO_MALLOC_EXTRAS.
 180  */
 181 /* #define NO_MALLOC_EXTRAS */
 182
 183 #ifndef NO_MALLOC_EXTRAS
 184 #  define MALLOC_DEBUG
 185 #endif
 186
 187 #include <sys/cdefs.h>
 188 __FBSDID("$FreeBSD$");
 189
 190 #include "libc_private.h"
 191 #ifdef MALLOC_DEBUG
 192 #  define _LOCK_DEBUG
 193 #endif
 194 #include "spinlock.h"
 195 #include "namespace.h"
 196 #include <sys/mman.h>
 197 #include <sys/param.h>
 198 #include <sys/stddef.h>
 199 #include <sys/time.h>
 200 #include <sys/types.h>
 201 #include <sys/sysctl.h>
 202 #include <sys/tree.h>
 203 #include <sys/uio.h>
 204 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
 205
 206 #include <machine/atomic.h>
 207 #include <machine/cpufunc.h>
 208 #include <machine/vmparam.h>
 209
 210 #include <errno.h>
 211 #include <limits.h>
 212 #include <pthread.h>
 213 #include <sched.h>
 214 #include <stdarg.h>
 215 #include <stdbool.h>
 216 #include <stdio.h>
 217 #include <stdlib.h>
 218 #include <string.h>
 219 #include <strings.h>
 220 #include <unistd.h>
 221
 222 #include "un-namespace.h"
 223
 224 /*
 225  * Calculate statistics that can be used to get an idea of how well caching is
 226  * working.
 227  */
 228 #ifndef NO_MALLOC_EXTRAS
 229 #  define MALLOC_STATS
 230 #endif
 231
 232 #ifndef MALLOC_DEBUG
 233 #  ifndef NDEBUG
 234 #    define NDEBUG
 235 #  endif
 236 #endif
 237 #include <assert.h>
 238
 239 #ifdef MALLOC_DEBUG
 240    /* Disable inlining to make debugging easier. */
 241 #  define inline
 242 #endif
 243
 244 /* Size of stack-allocated buffer passed to strerror_r(). */
 245 #define STRERROR_BUF 64
 246
 247 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
 248 #ifdef __i386__
 249 #  define QUANTUM_2POW_MIN      4
 250 #  define SIZEOF_PTR            4
 251 #  define USE_BRK
 252 #endif
 253 #ifdef __ia64__
 254 #  define QUANTUM_2POW_MIN      4
 255 #  define SIZEOF_PTR            8
 256 #  define NO_TLS
 257 #endif
 258 #ifdef __alpha__
 259 #  define QUANTUM_2POW_MIN      4
 260 #  define SIZEOF_PTR            8
 261 #  define NO_TLS
 262 #endif
 263 #ifdef __sparc64__
 264 #  define QUANTUM_2POW_MIN      4
 265 #  define SIZEOF_PTR            8
 266 #  define NO_TLS
 267 #endif
 268 #ifdef __amd64__
 269 #  define QUANTUM_2POW_MIN      4
 270 #  define SIZEOF_PTR            8
 271 #endif
 272 #ifdef __arm__
 273 #  define QUANTUM_2POW_MIN      3
 274 #  define SIZEOF_PTR            4
 275 #  define USE_BRK
 276 #  define NO_TLS
 277 #endif
 278 #ifdef __powerpc__
 279 #  define QUANTUM_2POW_MIN      4
 280 #  define SIZEOF_PTR            4
 281 #  define USE_BRK
 282 #endif
 283
 284 /* sizeof(int) == (1 << SIZEOF_INT_2POW). */
 285 #ifndef SIZEOF_INT_2POW
 286 #  define SIZEOF_INT_2POW       2
 287 #endif
 288
 289 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
 290 #if (!defined(PIC) && !defined(NO_TLS))
 291 #  define NO_TLS
 292 #endif
 293
 294 /*
 295  * Size and alignment of memory chunks that are allocated by the OS's virtual
 296  * memory system.
 297  *
 298  * chunksize limits:
 299  *
 300  *   2^(pagesize_2pow - 1 + RUN_MIN_REGS_2POW) <= chunk_size <= 2^28
 301  */
 302 #define CHUNK_2POW_DEFAULT      21
 303 #define CHUNK_2POW_MAX          28
 304
 305 /*
 306  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing,
 307  * so over-estimates are okay (up to a point), but under-estimates will
 308  * negatively affect performance.
 309  */
 310 #define CACHELINE_2POW 6
 311 #define CACHELINE ((size_t)(1 << CACHELINE_2POW))
 312
 313 /*
 314  * Maximum size class that is a multiple of the quantum, but not (necessarily)
 315  * a power of 2.  Above this size, allocations are rounded up to the nearest
 316  * power of 2.
 317  */
 318 #define SMALL_MAX_2POW_DEFAULT 9
 319 #define SMALL_MAX_DEFAULT (1 << SMALL_MAX_2POW_DEFAULT)
 320
 321 /*
 322  * Minimum number of regions that must fit into a run that serves quantum-size
 323  * bin allocations.
 324  *
 325  * Note that if this is set too low, space will be wasted if there are size
 326  * classes that are small enough that RUN_MIN_REGS regions don't fill a page.
 327  * If this is set too high, then the overhead of searching through the bitmap
 328  * that tracks region usage will become excessive.
 329  */
 330 #define RUN_MIN_REGS_2POW 10
 331 #define RUN_MIN_REGS (1 << RUN_MIN_REGS_2POW)
 332
 333 /*
 334  * Maximum number of pages for a run that is used for bin allocations.
 335  *
 336  * Note that if this is set too low, then fragmentation for the largest bin
 337  * size classes will be high.  If this is set too high, then even small
 338  * programs will often have to allocate more than two chunks early on.
 339  */
 340 #define RUN_MAX_PAGES_2POW 4
 341 #define RUN_MAX_PAGES (1 << RUN_MAX_PAGES_2POW)
 342
 343 /******************************************************************************/
 344
 345 /*
 346  * Mutexes based on spinlocks.  We can't use normal pthread mutexes, because
 347  * they require malloc()ed memory.
 348  */
 349 typedef struct {
 350         spinlock_t      lock;
 351 } malloc_mutex_t;
 352
 353 /* Set to true once the allocator has been initialized. */
 354 static bool malloc_initialized = false;
 355
 356 /* Used to avoid initialization races. */
 357 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
 358
 359 /******************************************************************************/
 360 /*
 361  * Statistics data structures.
 362  */
 363
 364 #ifdef MALLOC_STATS
 365
 366 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
 367 struct malloc_bin_stats_s {
 368         /*
 369          * Number of allocation requests that corresponded to the size of this
 370          * bin.
 371          */
 372         uint64_t        nrequests;
 373
 374         /* Total number of runs created for this bin's size class. */
 375         uint64_t        nruns;
 376
 377         /*
 378          * Total number of run promotions/demotions for this bin's size class.
 379          */
 380         uint64_t        npromote;
 381         uint64_t        ndemote;
 382
 383         /* High-water mark for this bin. */
 384         unsigned long   highruns;
 385
 386         /* Current number of runs in this bin. */
 387         unsigned long   curruns;
 388 };
 389
 390 typedef struct arena_stats_s arena_stats_t;
 391 struct arena_stats_s {
 392         /* Total byte count of allocated memory, not including overhead. */
 393         size_t          allocated;
 394
 395         /* Number of times each function was called. */
 396         uint64_t        nmalloc;
 397         uint64_t        ndalloc;
 398         uint64_t        nmadvise;
 399
 400         /* Number of large allocation requests. */
 401         uint64_t        large_nrequests;
 402 };
 403
 404 typedef struct chunk_stats_s chunk_stats_t;
 405 struct chunk_stats_s {
 406         /* Number of chunks that were allocated. */
 407         uint64_t        nchunks;
 408
 409         /* High-water mark for number of chunks allocated. */
 410         unsigned long   highchunks;
 411
 412         /*
 413          * Current number of chunks allocated.  This value isn't maintained for
 414          * any other purpose, so keep track of it in order to be able to set
 415          * highchunks.
 416          */
 417         unsigned long   curchunks;
 418 };
 419
 420 #endif /* #ifdef MALLOC_STATS */
 421
 422 /******************************************************************************/
 423 /*
 424  * Chunk data structures.
 425  */
 426
 427 /* Tree of chunks. */
 428 typedef struct chunk_node_s chunk_node_t;
 429 struct chunk_node_s {
 430         /* Linkage for the chunk tree. */
 431         RB_ENTRY(chunk_node_s) link;
 432
 433         /*
 434          * Pointer to the chunk that this tree node is responsible for.  In some
 435          * (but certainly not all) cases, this data structure is placed at the
 436          * beginning of the corresponding chunk, so this field may point to this
 437          * node.
 438          */
 439         void    *chunk;
 440
 441         /* Total chunk size. */
 442         size_t  size;
 443 };
 444 typedef struct chunk_tree_s chunk_tree_t;
 445 RB_HEAD(chunk_tree_s, chunk_node_s);
 446
 447 /******************************************************************************/
 448 /*
 449  * Arena data structures.
 450  */
 451
 452 typedef struct arena_s arena_t;
 453 typedef struct arena_bin_s arena_bin_t;
 454
 455 typedef struct arena_chunk_map_s arena_chunk_map_t;
 456 struct arena_chunk_map_s {
 457         bool            free:1;
 458         bool            large:1;
 459         unsigned        npages:15; /* Limiting factor for CHUNK_2POW_MAX. */
 460         unsigned        pos:15;
 461 };
 462
 463 /* Arena chunk header. */
 464 typedef struct arena_chunk_s arena_chunk_t;
 465 struct arena_chunk_s {
 466         /* Arena that owns the chunk. */
 467         arena_t *arena;
 468
 469         /* Linkage for the arena's chunk tree. */
 470         RB_ENTRY(arena_chunk_s) link;
 471
 472         /*
 473          * Number of pages in use.  This is maintained in order to make
 474          * detection of empty chunks fast.
 475          */
 476         uint32_t pages_used;
 477
 478         /*
 479          * Array of counters that keeps track of how many free runs of each
 480          * size are available in this chunk.  This table is sized at compile
 481          * time, which is wasteful.  However, due to unrelated rounding, this
 482          * doesn't actually waste any otherwise useful space.
 483          *
 484          *   index == 2^n pages
 485          *
 486          *   index | npages
 487          *   ------+-------
 488          *       0 |      1
 489          *       1 |      2
 490          *       2 |      4
 491          *       3 |      8
 492          *         :
 493          */
 494         uint32_t nfree_runs[CHUNK_2POW_MAX/* - PAGE_SHIFT */];
 495
 496         /* Map of pages within chunk that keeps track of free/large/small. */
 497         arena_chunk_map_t map[1]; /* Dynamically sized. */
 498 };
 499 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
 500 RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
 501
 502 typedef struct arena_run_s arena_run_t;
 503 struct arena_run_s {
 504         /* Linkage for run rings. */
 505         qr(arena_run_t) link;
 506
 507 #ifdef MALLOC_DEBUG
 508         uint32_t        magic;
 509 #  define ARENA_RUN_MAGIC 0x384adf93
 510 #endif
 511
 512         /* Bin this run is associated with. */
 513         arena_bin_t     *bin;
 514
 515         /* Bitmask of in-use regions (0: in use, 1: free). */
 516 #define REGS_MASK_NELMS                                                 \
 517         (1 << (RUN_MIN_REGS_2POW - SIZEOF_INT_2POW - 2))
 518         unsigned        regs_mask[REGS_MASK_NELMS];
 519
 520         /* Index of first element that might have a free region. */
 521         unsigned        regs_minelm;
 522
 523         /* Number of free regions in run. */
 524         unsigned        nfree;
 525
 526         /*
 527          * Current quartile for this run, one of: {RUN_QINIT, RUN_Q0, RUN_25,
 528          * RUN_Q50, RUN_Q75, RUN_Q100}.
 529          */
 530 #define RUN_QINIT       0
 531 #define RUN_Q0          1
 532 #define RUN_Q25         2
 533 #define RUN_Q50         3
 534 #define RUN_Q75         4
 535 #define RUN_Q100        5
 536         unsigned        quartile;
 537
 538         /*
 539          * Limits on the number of free regions for the fullness quartile this
 540          * run is currently in.  If nfree goes outside these limits, the run
 541          * is moved to a different fullness quartile.
 542          */
 543         unsigned        free_max;
 544         unsigned        free_min;
 545 };
 546
 547 /* Used for run ring headers, where the run isn't actually used. */
 548 typedef struct arena_run_link_s arena_run_link_t;
 549 struct arena_run_link_s {
 550         /* Linkage for run rings. */
 551         qr(arena_run_t) link;
 552 };
 553
 554 struct arena_bin_s {
 555         /*
 556          * Current run being used to service allocations of this bin's size
 557          * class.
 558          */
 559         arena_run_t     *runcur;
 560
 561         /*
 562          * Links into rings of runs, of various fullnesses (names indicate
 563          * approximate lower bounds).  A new run conceptually starts off in
 564          * runsinit, and it isn't inserted into the runs0 ring until it
 565          * reaches 25% full (hysteresis mechanism).  For the run to be moved
 566          * again, it must become either empty or 50% full.  Thus, each ring
 567          * contains runs that are within 50% above the advertised fullness for
 568          * the ring.  This provides a low-overhead mechanism for segregating
 569          * runs into approximate fullness classes.
 570          *
 571          * Conceptually, there is a runs100 that contains completely full runs.
 572          * Since we don't need to search for these runs though, no runs100 ring
 573          * is actually maintained.
 574          *
 575          * These rings are useful when looking for an existing run to use when
 576          * runcur is no longer usable.  We look for usable runs in the
 577          * following order:
 578          *
 579          *   1) runs50
 580          *   2) runs25
 581          *   3) runs0
 582          *   4) runs75
 583          *
 584          * runs75 isn't a good place to look, because it contains runs that may
 585          * be nearly completely full.  Still, we look there as a last resort in
 586          * order to avoid allocating a new run if at all possible.
 587          */
 588         /* arena_run_link_t     runsinit;   0% <= fullness <   25% */
 589         arena_run_link_t        runs0;  /*  0% <  fullness <   50% */
 590         arena_run_link_t        runs25; /* 25% <  fullness <   75% */
 591         arena_run_link_t        runs50; /* 50% <  fullness <  100% */
 592         arena_run_link_t        runs75; /* 75% <  fullness <  100% */
 593         /* arena_run_link_t     runs100;          fullness == 100% */
 594
 595         /* Size of regions in a run for this bin's size class. */
 596         size_t          reg_size;
 597
 598         /* Total size of a run for this bin's size class. */
 599         size_t          run_size;
 600
 601         /* Total number of regions in a run for this bin's size class. */
 602         uint32_t        nregs;
 603
 604         /* Offset of first region in a run for this bin's size class. */
 605         uint32_t        reg0_offset;
 606
 607 #ifdef MALLOC_STATS
 608         /* Bin statistics. */
 609         malloc_bin_stats_t stats;
 610 #endif
 611 };
 612
 613 struct arena_s {
 614 #ifdef MALLOC_DEBUG
 615         uint32_t                magic;
 616 #  define ARENA_MAGIC 0x947d3d24
 617 #endif
 618
 619         /* All operations on this arena require that mtx be locked. */
 620         malloc_mutex_t          mtx;
 621
 622 #ifdef MALLOC_STATS
 623         arena_stats_t           stats;
 624 #endif
 625
 626         /*
 627          * Tree of chunks this arena manages.
 628          */
 629         arena_chunk_tree_t      chunks;
 630
 631         /*
 632          * bins is used to store rings of free regions of the following sizes,
 633          * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
 634          *
 635          *   bins[i] | size |
 636          *   --------+------+
 637          *        0  |    2 |
 638          *        1  |    4 |
 639          *        2  |    8 |
 640          *   --------+------+
 641          *        3  |   16 |
 642          *        4  |   32 |
 643          *        5  |   48 |
 644          *        6  |   64 |
 645          *           :      :
 646          *           :      :
 647          *       33  |  496 |
 648          *       34  |  512 |
 649          *   --------+------+
 650          *       35  | 1024 |
 651          *       36  | 2048 |
 652          *   --------+------+
 653          */
 654         arena_bin_t             bins[1]; /* Dynamically sized. */
 655 };
 656
 657 /******************************************************************************/
 658 /*
 659  * Data.
 660  */
 661
 662 /* Used as a special "nil" return value for malloc(0). */
 663 static const int        nil;
 664
 665 /* Number of CPUs. */
 666 static unsigned         ncpus;
 667
 668 /* VM page size. */
 669 static unsigned         pagesize;
 670 static unsigned         pagesize_2pow;
 671
 672 /* Various bin-related settings. */
 673 static size_t           bin_maxclass; /* Max size class for bins. */
 674 static unsigned         ntbins; /* Number of (2^n)-spaced tiny bins. */
 675 static unsigned         nqbins; /* Number of quantum-spaced bins. */
 676 static unsigned         nsbins; /* Number of (2^n)-spaced sub-page bins. */
 677 static size_t           small_min;
 678 static size_t           small_max;
 679 static unsigned         tiny_min_2pow;
 680
 681 /* Various quantum-related settings. */
 682 static size_t           quantum;
 683 static size_t           quantum_mask; /* (quantum - 1). */
 684
 685 /* Various chunk-related settings. */
 686 static size_t           chunk_size;
 687 static size_t           chunk_size_mask; /* (chunk_size - 1). */
 688 static size_t           arena_maxclass; /* Max size class for arenas. */
 689 static unsigned         arena_chunk_maplen;
 690
 691 /********/
 692 /*
 693  * Chunks.
 694  */
 695
 696 /* Protects chunk-related data structures. */
 697 static malloc_mutex_t   chunks_mtx;
 698
 699 /* Tree of chunks that are stand-alone huge allocations. */
 700 static chunk_tree_t     huge;
 701
 702 #ifdef USE_BRK
 703 /*
 704  * Try to use brk for chunk-size allocations, due to address space constraints.
 705  */
 706 /* Result of first sbrk(0) call. */
 707 static void             *brk_base;
 708 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
 709 static void             *brk_prev;
 710 /* Current upper limit on brk addresses. */
 711 static void             *brk_max;
 712 #endif
 713
 714 #ifdef MALLOC_STATS
 715 /*
 716  * Byte counters for allocated/total space used by the chunks in the huge
 717  * allocations tree.
 718  */
 719 static uint64_t         huge_nmalloc;
 720 static uint64_t         huge_ndalloc;
 721 static size_t           huge_allocated;
 722 #endif
 723
 724 /*
 725  * Tree of chunks that were previously allocated.  This is used when allocating
 726  * chunks, in an attempt to re-use address space.
 727  */
 728 static chunk_tree_t     old_chunks;
 729
 730 /****************************/
 731 /*
 732  * base (internal allocation).
 733  */
 734
 735 /*
 736  * Current chunk that is being used for internal memory allocations.  This
 737  * chunk is carved up in cacheline-size quanta, so that there is no chance of
 738  * false cache line sharing.
 739  */
 740 static void             *base_chunk;
 741 static void             *base_next_addr;
 742 static void             *base_past_addr; /* Addr immediately past base_chunk. */
 743 static chunk_node_t     *base_chunk_nodes; /* LIFO cache of chunk nodes. */
 744 static malloc_mutex_t   base_mtx;
 745 #ifdef MALLOC_STATS
 746 static uint64_t         base_total;
 747 #endif
 748
 749 /********/
 750 /*
 751  * Arenas.
 752  */
 753
 754 /*
 755  * Arenas that are used to service external requests.  Not all elements of the
 756  * arenas array are necessarily used; arenas are created lazily as needed.
 757  */
 758 static arena_t          **arenas;
 759 static unsigned         narenas;
 760 #ifndef NO_TLS
 761 static unsigned         next_arena;
 762 #endif
 763 static malloc_mutex_t   arenas_mtx; /* Protects arenas initialization. */
 764
 765 #ifndef NO_TLS
 766 /*
 767  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
 768  * for allocations.
 769  */
 770 static __thread arena_t *arenas_map;
 771 #endif
 772
 773 #ifdef MALLOC_STATS
 774 /* Chunk statistics. */
 775 static chunk_stats_t    stats_chunks;
 776 #endif
 777
 778 /*******************************/
 779 /*
 780  * Runtime configuration options.
 781  */
 782 const char      *_malloc_options;
 783
 784 #ifndef NO_MALLOC_EXTRAS
 785 static bool     opt_abort = true;
 786 static bool     opt_junk = true;
 787 #else
 788 static bool     opt_abort = false;
 789 static bool     opt_junk = false;
 790 #endif
 791 static bool     opt_hint = false;
 792 static bool     opt_print_stats = false;
 793 static size_t   opt_quantum_2pow = QUANTUM_2POW_MIN;
 794 static size_t   opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
 795 static size_t   opt_chunk_2pow = CHUNK_2POW_DEFAULT;
 796 static bool     opt_utrace = false;
 797 static bool     opt_sysv = false;
 798 static bool     opt_xmalloc = false;
 799 static bool     opt_zero = false;
 800 static int32_t  opt_narenas_lshift = 0;
 801
 802 typedef struct {
 803         void    *p;
 804         size_t  s;
 805         void    *r;
 806 } malloc_utrace_t;
 807
 808 #define UTRACE(a, b, c)                                                 \
 809         if (opt_utrace) {                                               \
 810                 malloc_utrace_t ut = {a, b, c};                         \
 811                 utrace(&ut, sizeof(ut));                                \
 812         }
 813
 814 /******************************************************************************/
 815 /*
 816  * Begin function prototypes for non-inline static functions.
 817  */
 818
 819 static void     malloc_mutex_init(malloc_mutex_t *a_mutex);
 820 static void     wrtmessage(const char *p1, const char *p2, const char *p3,
 821                 const char *p4);
 822 static void     malloc_printf(const char *format, ...);
 823 static void     *base_alloc(size_t size);
 824 static chunk_node_t *base_chunk_node_alloc(void);
 825 static void     base_chunk_node_dealloc(chunk_node_t *node);
 826 #ifdef MALLOC_STATS
 827 static void     stats_print(arena_t *arena);
 828 #endif
 829 static void     *pages_map(void *addr, size_t size);
 830 static void     pages_unmap(void *addr, size_t size);
 831 static void     *chunk_alloc(size_t size);
 832 static void     chunk_dealloc(void *chunk, size_t size);
 833 #ifndef NO_TLS
 834 static arena_t  *choose_arena_hard(void);
 835 #endif
 836 static void     arena_run_split(arena_t *arena, arena_run_t *run, bool large,
 837     size_t size);
 838 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
 839 static void     arena_chunk_dealloc(arena_chunk_t *chunk);
 840 static void     arena_bin_run_promote(arena_t *arena, arena_bin_t *bin,
 841     arena_run_t *run, size_t size);
 842 static void     arena_bin_run_demote(arena_t *arena, arena_bin_t *bin,
 843     arena_run_t *run, size_t size);
 844 static arena_run_t *arena_run_alloc(arena_t *arena, bool large, size_t size);
 845 static void     arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
 846 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin,
 847     size_t size);
 848 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin,
 849     size_t size);
 850 static void     *arena_malloc(arena_t *arena, size_t size);
 851 static size_t   arena_salloc(const void *ptr);
 852 static void     *arena_ralloc(void *ptr, size_t size, size_t oldsize);
 853 static void     arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr);
 854 static bool     arena_new(arena_t *arena);
 855 static arena_t  *arenas_extend(unsigned ind);
 856 static void     *huge_malloc(size_t size);
 857 static void     *huge_ralloc(void *ptr, size_t size, size_t oldsize);
 858 static void     huge_dalloc(void *ptr);
 859 static void     *imalloc(size_t size);
 860 static void     *ipalloc(size_t alignment, size_t size);
 861 static void     *icalloc(size_t size);
 862 static size_t   isalloc(const void *ptr);
 863 static void     *iralloc(void *ptr, size_t size);
 864 static void     idalloc(void *ptr);
 865 static void     malloc_print_stats(void);
 866 static bool     malloc_init_hard(void);
 867
 868 /*
 869  * End function prototypes.
 870  */
 871 /******************************************************************************/
 872 /*
 873  * Begin mutex.
 874  */
 875
 876 static void
 877 malloc_mutex_init(malloc_mutex_t *a_mutex)
 878 {
 879         static const spinlock_t lock = _SPINLOCK_INITIALIZER;
 880
 881         a_mutex->lock = lock;
 882 }
 883
 884 static inline void
 885 malloc_mutex_lock(malloc_mutex_t *a_mutex)
 886 {
 887
 888         if (__isthreaded)
 889                 _SPINLOCK(&a_mutex->lock);
 890 }
 891
 892 static inline void
 893 malloc_mutex_unlock(malloc_mutex_t *a_mutex)
 894 {
 895
 896         if (__isthreaded)
 897                 _SPINUNLOCK(&a_mutex->lock);
 898 }
 899
 900 /*
 901  * End mutex.
 902  */
 903 /******************************************************************************/
 904 /*
 905  * Begin Utility functions/macros.
 906  */
 907
 908 /* Return the chunk address for allocation address a. */
 909 #define CHUNK_ADDR2BASE(a)                                              \
 910         ((void *)((uintptr_t)(a) & ~chunk_size_mask))
 911
 912 /* Return the chunk offset of address a. */
 913 #define CHUNK_ADDR2OFFSET(a)                                            \
 914         ((size_t)((uintptr_t)(a) & chunk_size_mask))
 915
 916 /* Return the smallest chunk multiple that is >= s. */
 917 #define CHUNK_CEILING(s)                                                \
 918         (((s) + chunk_size_mask) & ~chunk_size_mask)
 919
 920 /* Return the smallest cacheline multiple that is >= s. */
 921 #define CACHELINE_CEILING(s)                                            \
 922         (((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
 923
 924 /* Return the smallest quantum multiple that is >= a. */
 925 #define QUANTUM_CEILING(a)                                              \
 926         (((a) + quantum_mask) & ~quantum_mask)
 927
 928 /* Compute the smallest power of 2 that is >= x. */
 929 static inline size_t
 930 pow2_ceil(size_t x)
 931 {
 932
 933         x--;
 934         x |= x >> 1;
 935         x |= x >> 2;
 936         x |= x >> 4;
 937         x |= x >> 8;
 938         x |= x >> 16;
 939 #if (SIZEOF_PTR == 8)
 940         x |= x >> 32;
 941 #endif
 942         x++;
 943         return (x);
 944 }
 945
 946 static void
 947 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
 948 {
 949
 950         _write(STDERR_FILENO, p1, strlen(p1));
 951         _write(STDERR_FILENO, p2, strlen(p2));
 952         _write(STDERR_FILENO, p3, strlen(p3));
 953         _write(STDERR_FILENO, p4, strlen(p4));
 954 }
 955
 956 void    (*_malloc_message)(const char *p1, const char *p2, const char *p3,
 957             const char *p4) = wrtmessage;
 958
 959 /*
 960  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
 961  */
 962 static void
 963 malloc_printf(const char *format, ...)
 964 {
 965         char buf[4096];
 966         va_list ap;
 967
 968         va_start(ap, format);
 969         vsnprintf(buf, sizeof(buf), format, ap);
 970         va_end(ap);
 971         _malloc_message(buf, "", "", "");
 972 }
 973
 974 /******************************************************************************/
 975
 976 static void *
 977 base_alloc(size_t size)
 978 {
 979         void *ret;
 980         size_t csize;
 981
 982         /* Round size up to nearest multiple of the cacheline size. */
 983         csize = CACHELINE_CEILING(size);
 984
 985         malloc_mutex_lock(&base_mtx);
 986
 987         /* Make sure there's enough space for the allocation. */
 988         if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
 989                 void *tchunk;
 990
 991                 assert(csize <= chunk_size);
 992
 993                 tchunk = chunk_alloc(chunk_size);
 994                 if (tchunk == NULL) {
 995                         ret = NULL;
 996                         goto RETURN;
 997                 }
 998                 base_chunk = tchunk;
 999                 base_next_addr = (void *)base_chunk;
1000                 base_past_addr = (void *)((uintptr_t)base_chunk + chunk_size);
1001 #ifdef MALLOC_STATS
1002                 base_total += chunk_size;
1003 #endif
1004         }
1005
1006         /* Allocate. */
1007         ret = base_next_addr;
1008         base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1009
1010 RETURN:
1011         malloc_mutex_unlock(&base_mtx);
1012         return (ret);
1013 }
1014
1015 static chunk_node_t *
1016 base_chunk_node_alloc(void)
1017 {
1018         chunk_node_t *ret;
1019
1020         malloc_mutex_lock(&base_mtx);
1021         if (base_chunk_nodes != NULL) {
1022                 ret = base_chunk_nodes;
1023                 base_chunk_nodes = *(chunk_node_t **)ret;
1024                 malloc_mutex_unlock(&base_mtx);
1025         } else {
1026                 malloc_mutex_unlock(&base_mtx);
1027                 ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
1028         }
1029
1030         return (ret);
1031 }
1032
1033 static void
1034 base_chunk_node_dealloc(chunk_node_t *node)
1035 {
1036
1037         malloc_mutex_lock(&base_mtx);
1038         *(chunk_node_t **)node = base_chunk_nodes;
1039         base_chunk_nodes = node;
1040         malloc_mutex_unlock(&base_mtx);
1041 }
1042
1043 /******************************************************************************/
1044
1045 #ifdef MALLOC_STATS
1046 static void
1047 stats_print(arena_t *arena)
1048 {
1049         unsigned i;
1050         int gap_start;
1051
1052         malloc_printf("allocated: %zu\n", arena->stats.allocated);
1053
1054         malloc_printf("calls:\n");
1055         malloc_printf(" %12s %12s %12s\n", "nmalloc","ndalloc", "nmadvise");
1056         malloc_printf(" %12llu %12llu %12llu\n",
1057             arena->stats.nmalloc, arena->stats.ndalloc, arena->stats.nmadvise);
1058
1059         malloc_printf("large requests: %llu\n", arena->stats.large_nrequests);
1060
1061         malloc_printf("bins:\n");
1062         malloc_printf("%13s %1s %4s %5s %6s %9s %5s %6s %7s %6s %6s\n",
1063             "bin", "", "size", "nregs", "run_sz", "nrequests", "nruns",
1064             "hiruns", "curruns", "npromo", "ndemo");
1065         for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
1066                 if (arena->bins[i].stats.nrequests == 0) {
1067                         if (gap_start == -1)
1068                                 gap_start = i;
1069                 } else {
1070                         if (gap_start != -1) {
1071                                 if (i > gap_start + 1) {
1072                                         /* Gap of more than one size class. */
1073                                         malloc_printf("[%u..%u]\n",
1074                                             gap_start, i - 1);
1075                                 } else {
1076                                         /* Gap of one size class. */
1077                                         malloc_printf("[%u]\n", gap_start);
1078                                 }
1079                                 gap_start = -1;
1080                         }
1081                         malloc_printf(
1082                             "%13u %1s %4u %5u %6u %9llu %5llu"
1083                             " %6lu %7lu %6llu %6llu\n",
1084                             i,
1085                             i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1086                             arena->bins[i].reg_size,
1087                             arena->bins[i].nregs,
1088                             arena->bins[i].run_size,
1089                             arena->bins[i].stats.nrequests,
1090                             arena->bins[i].stats.nruns,
1091                             arena->bins[i].stats.highruns,
1092                             arena->bins[i].stats.curruns,
1093                             arena->bins[i].stats.npromote,
1094                             arena->bins[i].stats.ndemote);
1095                 }
1096         }
1097         if (gap_start != -1) {
1098                 if (i > gap_start + 1) {
1099                         /* Gap of more than one size class. */
1100                         malloc_printf("[%u..%u]\n", gap_start, i - 1);
1101                 } else {
1102                         /* Gap of one size class. */
1103                         malloc_printf("[%u]\n", gap_start);
1104                 }
1105         }
1106 }
1107 #endif
1108
1109 /*
1110  * End Utility functions/macros.
1111  */
1112 /******************************************************************************/
1113 /*
1114  * Begin chunk management functions.
1115  */
1116
1117 static inline int
1118 chunk_comp(chunk_node_t *a, chunk_node_t *b)
1119 {
1120
1121         assert(a != NULL);
1122         assert(b != NULL);
1123
1124         if ((uintptr_t)a->chunk < (uintptr_t)b->chunk)
1125                 return (-1);
1126         else if (a->chunk == b->chunk)
1127                 return (0);
1128         else
1129                 return (1);
1130 }
1131
1132 /* Generate red-black tree code for chunks. */
1133 RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp);
1134
1135 static void *
1136 pages_map(void *addr, size_t size)
1137 {
1138         void *ret;
1139
1140         /*
1141          * We don't use MAP_FIXED here, because it can cause the *replacement*
1142          * of existing mappings, and we only want to create new mappings.
1143          */
1144         ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
1145             -1, 0);
1146         assert(ret != NULL);
1147
1148         if (ret == MAP_FAILED)
1149                 ret = NULL;
1150         else if (addr != NULL && ret != addr) {
1151                 /*
1152                  * We succeeded in mapping memory, but not in the right place.
1153                  */
1154                 if (munmap(ret, size) == -1) {
1155                         char buf[STRERROR_BUF];
1156
1157                         strerror_r(errno, buf, sizeof(buf));
1158                         malloc_printf("%s: (malloc) Error in munmap(): %s\n",
1159                             _getprogname(), buf);
1160                         if (opt_abort)
1161                                 abort();
1162                 }
1163                 ret = NULL;
1164         }
1165
1166         assert(ret == NULL || (addr == NULL && ret != addr)
1167             || (addr != NULL && ret == addr));
1168         return (ret);
1169 }
1170
1171 static void
1172 pages_unmap(void *addr, size_t size)
1173 {
1174
1175         if (munmap(addr, size) == -1) {
1176                 char buf[STRERROR_BUF];
1177
1178                 strerror_r(errno, buf, sizeof(buf));
1179                 malloc_printf("%s: (malloc) Error in munmap(): %s\n",
1180                     _getprogname(), buf);
1181                 if (opt_abort)
1182                         abort();
1183         }
1184 }
1185
1186 static void *
1187 chunk_alloc(size_t size)
1188 {
1189         void *ret, *chunk;
1190         chunk_node_t *tchunk, *delchunk;
1191
1192         assert(size != 0);
1193         assert(size % chunk_size == 0);
1194
1195         malloc_mutex_lock(&chunks_mtx);
1196
1197         if (size == chunk_size) {
1198                 /*
1199                  * Check for address ranges that were previously chunks and try
1200                  * to use them.
1201                  */
1202
1203                 tchunk = RB_MIN(chunk_tree_s, &old_chunks);
1204                 while (tchunk != NULL) {
1205                         /* Found an address range.  Try to recycle it. */
1206
1207                         chunk = tchunk->chunk;
1208                         delchunk = tchunk;
1209                         tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1210
1211                         /* Remove delchunk from the tree. */
1212                         RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1213                         base_chunk_node_dealloc(delchunk);
1214
1215 #ifdef USE_BRK
1216                         if ((uintptr_t)chunk >= (uintptr_t)brk_base
1217                             && (uintptr_t)chunk < (uintptr_t)brk_max) {
1218                                 /* Re-use a previously freed brk chunk. */
1219                                 ret = chunk;
1220                                 goto RETURN;
1221                         }
1222 #endif
1223                         if ((ret = pages_map(chunk, size)) != NULL) {
1224                                 /* Success. */
1225                                 goto RETURN;
1226                         }
1227                 }
1228         }
1229
1230 #ifdef USE_BRK
1231         /*
1232          * Try to create allocations in brk, in order to make full use of
1233          * limited address space.
1234          */
1235         if (brk_prev != (void *)-1) {
1236                 void *brk_cur;
1237                 intptr_t incr;
1238
1239                 /*
1240                  * The loop is necessary to recover from races with other
1241                  * threads that are using brk for something other than malloc.
1242                  */
1243                 do {
1244                         /* Get the current end of brk. */
1245                         brk_cur = sbrk(0);
1246
1247                         /*
1248                          * Calculate how much padding is necessary to
1249                          * chunk-align the end of brk.
1250                          */
1251                         incr = (intptr_t)size
1252                             - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1253                         if (incr == size) {
1254                                 ret = brk_cur;
1255                         } else {
1256                                 ret = (void *)(intptr_t)brk_cur + incr;
1257                                 incr += size;
1258                         }
1259
1260                         brk_prev = sbrk(incr);
1261                         if (brk_prev == brk_cur) {
1262                                 /* Success. */
1263                                 brk_max = (void *)(intptr_t)ret + size;
1264                                 goto RETURN;
1265                         }
1266                 } while (brk_prev != (void *)-1);
1267         }
1268 #endif
1269
1270         /*
1271          * Try to over-allocate, but allow the OS to place the allocation
1272          * anywhere.  Beware of size_t wrap-around.
1273          */
1274         if (size + chunk_size > size) {
1275                 if ((ret = pages_map(NULL, size + chunk_size)) != NULL) {
1276                         size_t offset = CHUNK_ADDR2OFFSET(ret);
1277
1278                         /*
1279                          * Success.  Clean up unneeded leading/trailing space.
1280                          */
1281                         if (offset != 0) {
1282                                 /* Leading space. */
1283                                 pages_unmap(ret, chunk_size - offset);
1284
1285                                 ret = (void *)((uintptr_t)ret + (chunk_size -
1286                                     offset));
1287
1288                                 /* Trailing space. */
1289                                 pages_unmap((void *)((uintptr_t)ret + size),
1290                                     offset);
1291                         } else {
1292                                 /* Trailing space only. */
1293                                 pages_unmap((void *)((uintptr_t)ret + size),
1294                                     chunk_size);
1295                         }
1296                         goto RETURN;
1297                 }
1298         }
1299
1300         /* All strategies for allocation failed. */
1301         ret = NULL;
1302 RETURN:
1303 #ifdef MALLOC_STATS
1304         if (ret != NULL) {
1305                 stats_chunks.nchunks += (size / chunk_size);
1306                 stats_chunks.curchunks += (size / chunk_size);
1307         }
1308         if (stats_chunks.curchunks > stats_chunks.highchunks)
1309                 stats_chunks.highchunks = stats_chunks.curchunks;
1310 #endif
1311         malloc_mutex_unlock(&chunks_mtx);
1312
1313         assert(CHUNK_ADDR2BASE(ret) == ret);
1314         return (ret);
1315 }
1316
1317 static void
1318 chunk_dealloc(void *chunk, size_t size)
1319 {
1320         size_t offset;
1321         chunk_node_t *node;
1322
1323         assert(chunk != NULL);
1324         assert(CHUNK_ADDR2BASE(chunk) == chunk);
1325         assert(size != 0);
1326         assert(size % chunk_size == 0);
1327
1328         malloc_mutex_lock(&chunks_mtx);
1329
1330 #ifdef USE_BRK
1331         if ((uintptr_t)chunk >= (uintptr_t)brk_base
1332             && (uintptr_t)chunk < (uintptr_t)brk_max) {
1333                 void *brk_cur;
1334
1335                 /* Get the current end of brk. */
1336                 brk_cur = sbrk(0);
1337
1338                 /*
1339                  * Try to shrink the data segment if this chunk is at the end
1340                  * of the data segment.  The sbrk() call here is subject to a
1341                  * race condition with threads that use brk(2) or sbrk(2)
1342                  * directly, but the alternative would be to leak memory for
1343                  * the sake of poorly designed multi-threaded programs.
1344                  */
1345                 if (brk_cur == brk_max
1346                     && (void *)(uintptr_t)chunk + size == brk_max
1347                     && sbrk(-(intptr_t)size) == brk_max) {
1348                         if (brk_prev == brk_max) {
1349                                 /* Success. */
1350                                 brk_prev = (void *)(intptr_t)brk_max
1351                                     - (intptr_t)size;
1352                                 brk_max = brk_prev;
1353                         }
1354                         goto RETURN;
1355                 } else
1356                         madvise(chunk, size, MADV_FREE);
1357         } else
1358 #endif
1359                 pages_unmap(chunk, size);
1360
1361         /*
1362          * Iteratively create records of each chunk-sized memory region that
1363          * 'chunk' is comprised of, so that the address range can be recycled
1364          * if memory usage increases later on.
1365          */
1366         for (offset = 0; offset < size; offset += chunk_size) {
1367                 node = base_chunk_node_alloc();
1368                 if (node == NULL)
1369                         break;
1370
1371                 node->chunk = (void *)((uintptr_t)chunk + (uintptr_t)offset);
1372                 node->size = chunk_size;
1373                 RB_INSERT(chunk_tree_s, &old_chunks, node);
1374         }
1375
1376 #ifdef USE_BRK
1377 RETURN:
1378 #endif
1379 #ifdef MALLOC_STATS
1380         stats_chunks.curchunks -= (size / chunk_size);
1381 #endif
1382         malloc_mutex_unlock(&chunks_mtx);
1383 }
1384
1385 /*
1386  * End chunk management functions.
1387  */
1388 /******************************************************************************/
1389 /*
1390  * Begin arena.
1391  */
1392
1393 /*
1394  * Choose an arena based on a per-thread value (fast-path code, calls slow-path
1395  * code if necessary.
1396  */
1397 static inline arena_t *
1398 choose_arena(void)
1399 {
1400         arena_t *ret;
1401
1402         /*
1403          * We can only use TLS if this is a PIC library, since for the static
1404          * library version, libc's malloc is used by TLS allocation, which
1405          * introduces a bootstrapping issue.
1406          */
1407 #ifndef NO_TLS
1408         if (__isthreaded == false) {
1409             /*
1410              * Avoid the overhead of TLS for single-threaded operation.  If the
1411              * app switches to threaded mode, the initial thread may end up
1412              * being assigned to some other arena, but this one-time switch
1413              * shouldn't cause significant issues.
1414              * */
1415             return (arenas[0]);
1416         }
1417
1418         ret = arenas_map;
1419         if (ret == NULL)
1420                 ret = choose_arena_hard();
1421 #else
1422         if (__isthreaded) {
1423                 unsigned long ind;
1424
1425                 /*
1426                  * Hash _pthread_self() to one of the arenas.  There is a prime
1427                  * number of arenas, so this has a reasonable chance of
1428                  * working.  Even so, the hashing can be easily thwarted by
1429                  * inconvenient _pthread_self() values.  Without specific
1430                  * knowledge of how _pthread_self() calculates values, we can't
1431                  * easily do much better than this.
1432                  */
1433                 ind = (unsigned long) _pthread_self() % narenas;
1434
1435                 /*
1436                  * Optimistially assume that arenas[ind] has been initialized.
1437                  * At worst, we find out that some other thread has already
1438                  * done so, after acquiring the lock in preparation.  Note that
1439                  * this lazy locking also has the effect of lazily forcing
1440                  * cache coherency; without the lock acquisition, there's no
1441                  * guarantee that modification of arenas[ind] by another thread
1442                  * would be seen on this CPU for an arbitrary amount of time.
1443                  *
1444                  * In general, this approach to modifying a synchronized value
1445                  * isn't a good idea, but in this case we only ever modify the
1446                  * value once, so things work out well.
1447                  */
1448                 ret = arenas[ind];
1449                 if (ret == NULL) {
1450                         /*
1451                          * Avoid races with another thread that may have already
1452                          * initialized arenas[ind].
1453                          */
1454                         malloc_mutex_lock(&arenas_mtx);
1455                         if (arenas[ind] == NULL)
1456                                 ret = arenas_extend((unsigned)ind);
1457                         else
1458                                 ret = arenas[ind];
1459                         malloc_mutex_unlock(&arenas_mtx);
1460                 }
1461         } else
1462                 ret = arenas[0];
1463 #endif
1464
1465         assert(ret != NULL);
1466         return (ret);
1467 }
1468
1469 #ifndef NO_TLS
1470 /*
1471  * Choose an arena based on a per-thread value (slow-path code only, called
1472  * only by choose_arena()).
1473  */
1474 static arena_t *
1475 choose_arena_hard(void)
1476 {
1477         arena_t *ret;
1478
1479         assert(__isthreaded);
1480
1481         /* Assign one of the arenas to this thread, in a round-robin fashion. */
1482         malloc_mutex_lock(&arenas_mtx);
1483         ret = arenas[next_arena];
1484         if (ret == NULL)
1485                 ret = arenas_extend(next_arena);
1486         if (ret == NULL) {
1487                 /*
1488                  * Make sure that this function never returns NULL, so that
1489                  * choose_arena() doesn't have to check for a NULL return
1490                  * value.
1491                  */
1492                 ret = arenas[0];
1493         }
1494         next_arena = (next_arena + 1) % narenas;
1495         malloc_mutex_unlock(&arenas_mtx);
1496         arenas_map = ret;
1497
1498         return (ret);
1499 }
1500 #endif
1501
1502 static inline int
1503 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
1504 {
1505
1506         assert(a != NULL);
1507         assert(b != NULL);
1508
1509         if ((uintptr_t)a < (uintptr_t)b)
1510                 return (-1);
1511         else if (a == b)
1512                 return (0);
1513         else
1514                 return (1);
1515 }
1516
1517 /* Generate red-black tree code for arena chunks. */
1518 RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp);
1519
1520 static inline void *
1521 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
1522 {
1523         void *ret;
1524         unsigned i, mask, bit, regind;
1525
1526         assert(run->magic == ARENA_RUN_MAGIC);
1527
1528         for (i = run->regs_minelm; i < REGS_MASK_NELMS; i++) {
1529                 mask = run->regs_mask[i];
1530                 if (mask != 0) {
1531                         /* Usable allocation found. */
1532                         bit = ffs(mask) - 1;
1533
1534                         regind = ((i << (SIZEOF_INT_2POW + 3)) + bit);
1535                         ret = (void *)&((char *)run)[bin->reg0_offset
1536                             + (bin->reg_size * regind)];
1537
1538                         /* Clear bit. */
1539                         mask ^= (1 << bit);
1540                         run->regs_mask[i] = mask;
1541
1542                         return (ret);
1543                 } else {
1544                         /*
1545                          * Make a note that nothing before this element
1546                          * contains a free region.
1547                          */
1548                         run->regs_minelm = i + 1;
1549                 }
1550         }
1551         /* Not reached. */
1552         assert(0);
1553         return (NULL);
1554 }
1555
1556 static inline void
1557 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
1558 {
1559         unsigned diff, regind, elm, bit;
1560
1561         assert(run->magic == ARENA_RUN_MAGIC);
1562
1563         /*
1564          * Avoid doing division with a variable divisor if possible.  This
1565          * single operation can reduce allocator throughput by around 20%!
1566          */
1567 #define POW2_CASE(p)                                                    \
1568                 case (1 << (p)):                                        \
1569                         regind = diff >> (p);                           \
1570                         break;
1571 #define QUANTUM_CASE(n)                                                 \
1572                 case ((n) << QUANTUM_2POW_MIN):                         \
1573                         regind = diff / ((n) << QUANTUM_2POW_MIN);      \
1574                         break;
1575
1576         /*
1577          * These assertions make sure that the switch statement matches
1578          * compile-time configuration.
1579          */
1580         assert(tiny_min_2pow >= 1);
1581         assert(QUANTUM_2POW_MIN >= 3 && QUANTUM_2POW_MIN <= 4);
1582         assert(SMALL_MAX_2POW_DEFAULT == 9);
1583
1584         diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
1585         switch (size) {
1586                 POW2_CASE(1)
1587                 POW2_CASE(2)
1588 #if (QUANTUM_2POW_MIN > 3)
1589                 POW2_CASE(3)
1590 #endif
1591                 QUANTUM_CASE(1)
1592                 QUANTUM_CASE(2)
1593                 QUANTUM_CASE(3)
1594                 QUANTUM_CASE(4)
1595                 QUANTUM_CASE(5)
1596                 QUANTUM_CASE(6)
1597                 QUANTUM_CASE(7)
1598                 QUANTUM_CASE(8)
1599                 QUANTUM_CASE(9)
1600                 QUANTUM_CASE(10)
1601                 QUANTUM_CASE(11)
1602                 QUANTUM_CASE(12)
1603                 QUANTUM_CASE(13)
1604                 QUANTUM_CASE(14)
1605                 QUANTUM_CASE(15)
1606                 QUANTUM_CASE(16)
1607                 QUANTUM_CASE(17)
1608                 QUANTUM_CASE(18)
1609                 QUANTUM_CASE(19)
1610                 QUANTUM_CASE(20)
1611                 QUANTUM_CASE(21)
1612                 QUANTUM_CASE(22)
1613                 QUANTUM_CASE(23)
1614                 QUANTUM_CASE(24)
1615                 QUANTUM_CASE(25)
1616                 QUANTUM_CASE(26)
1617                 QUANTUM_CASE(27)
1618                 QUANTUM_CASE(28)
1619                 QUANTUM_CASE(29)
1620                 QUANTUM_CASE(30)
1621                 QUANTUM_CASE(31)
1622                 QUANTUM_CASE(32)
1623
1624                 POW2_CASE(10)
1625                 POW2_CASE(11)
1626                 POW2_CASE(12) /* Handle up to 8 kB pages. */
1627                 default:
1628                         regind = diff / size;
1629         }
1630 #undef POW2_CASE
1631 #undef QUANTUM_CASE
1632         assert(regind < bin->nregs);
1633
1634         elm = regind >> (SIZEOF_INT_2POW + 3);
1635         if (elm < run->regs_minelm)
1636                 run->regs_minelm = elm;
1637         bit = regind - (elm << (SIZEOF_INT_2POW + 3));
1638         assert((run->regs_mask[elm] & (1 << bit)) == 0);
1639         run->regs_mask[elm] |= (1 << bit);
1640 }
1641
1642 static void
1643 arena_run_split(arena_t *arena, arena_run_t *run, bool large, size_t size)
1644 {
1645         arena_chunk_t *chunk;
1646         unsigned run_ind, map_offset, total_pages, need_pages;
1647         unsigned i, log2_run_pages, run_pages;
1648
1649         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1650         run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1651             >> pagesize_2pow);
1652         assert(chunk->map[run_ind].free);
1653         total_pages = chunk->map[run_ind].npages;
1654         need_pages = (size >> pagesize_2pow);
1655
1656         assert(chunk->map[run_ind].free);
1657         assert(chunk->map[run_ind].large == false);
1658         assert(chunk->map[run_ind].npages == total_pages);
1659
1660         /* Split enough pages from the front of run to fit allocation size. */
1661         map_offset = run_ind;
1662         for (i = 0; i < need_pages; i++) {
1663                 chunk->map[map_offset + i].free = false;
1664                 chunk->map[map_offset + i].large = large;
1665                 chunk->map[map_offset + i].npages = need_pages;
1666                 chunk->map[map_offset + i].pos = i;
1667         }
1668
1669         /* Update map for trailing pages. */
1670         map_offset += need_pages;
1671         while (map_offset < run_ind + total_pages) {
1672                 log2_run_pages = ffs(map_offset) - 1;
1673                 run_pages = (1 << log2_run_pages);
1674
1675                 chunk->map[map_offset].free = true;
1676                 chunk->map[map_offset].large = false;
1677                 chunk->map[map_offset].npages = run_pages;
1678
1679                 chunk->nfree_runs[log2_run_pages]++;
1680
1681                 map_offset += run_pages;
1682         }
1683
1684         chunk->pages_used += (size >> pagesize_2pow);
1685 }
1686
1687 static arena_chunk_t *
1688 arena_chunk_alloc(arena_t *arena)
1689 {
1690         arena_chunk_t *chunk;
1691         unsigned i, j, header_npages, pow2_header_npages, map_offset;
1692         unsigned log2_run_pages, run_pages;
1693         size_t header_size;
1694
1695         chunk = (arena_chunk_t *)chunk_alloc(chunk_size);
1696         if (chunk == NULL)
1697                 return (NULL);
1698
1699         chunk->arena = arena;
1700
1701         RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1702
1703         /*
1704          * Claim that no pages are in use, since the header is merely overhead.
1705          */
1706         chunk->pages_used = 0;
1707
1708         memset(&chunk->nfree_runs, 0, sizeof(chunk->nfree_runs));
1709
1710         header_size = (size_t)((uintptr_t)&chunk->map[arena_chunk_maplen]
1711             - (uintptr_t)chunk);
1712         if (header_size % pagesize != 0) {
1713                 /* Round up to the nearest page boundary. */
1714                 header_size += pagesize - (header_size % pagesize);
1715         }
1716
1717         header_npages = header_size >> pagesize_2pow;
1718         pow2_header_npages = pow2_ceil(header_npages);
1719
1720         /*
1721          * Iteratively mark runs as in use, until we've spoken for the entire
1722          * header.
1723          */
1724         map_offset = 0;
1725         for (i = 0; header_npages > 0; i++) {
1726                 if ((pow2_header_npages >> i) <= header_npages) {
1727                         for (j = 0; j < (pow2_header_npages >> i); j++) {
1728                                 chunk->map[map_offset + j].free = false;
1729                                 chunk->map[map_offset + j].large = false;
1730                                 chunk->map[map_offset + j].npages =
1731                                     (pow2_header_npages >> i);
1732                                 chunk->map[map_offset + j].pos = j;
1733                         }
1734                         header_npages -= (pow2_header_npages >> i);
1735                         map_offset += (pow2_header_npages >> i);
1736                 }
1737         }
1738
1739         /*
1740          * Finish initializing map.  The chunk header takes up some space at
1741          * the beginning of the chunk, which we just took care of by
1742          * "allocating" the leading pages.
1743          */
1744         while (map_offset < (chunk_size >> pagesize_2pow)) {
1745                 log2_run_pages = ffs(map_offset) - 1;
1746                 run_pages = (1 << log2_run_pages);
1747
1748                 chunk->map[map_offset].free = true;
1749                 chunk->map[map_offset].large = false;
1750                 chunk->map[map_offset].npages = run_pages;
1751
1752                 chunk->nfree_runs[log2_run_pages]++;
1753
1754                 map_offset += run_pages;
1755         }
1756
1757         return (chunk);
1758 }
1759
1760 static void
1761 arena_chunk_dealloc(arena_chunk_t *chunk)
1762 {
1763
1764         RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
1765
1766         chunk_dealloc((void *)chunk, chunk_size);
1767 }
1768
1769 static void
1770 arena_bin_run_promote(arena_t *arena, arena_bin_t *bin, arena_run_t *run,
1771     size_t size)
1772 {
1773
1774         assert(bin == run->bin);
1775
1776         /* Promote. */
1777         assert(run->free_min > run->nfree);
1778         assert(run->quartile < RUN_Q100);
1779         run->quartile++;
1780 #ifdef MALLOC_STATS
1781         bin->stats.npromote++;
1782 #endif
1783
1784         /* Re-file run. */
1785         switch (run->quartile) {
1786                 case RUN_QINIT:
1787                         assert(0);
1788                         break;
1789                 case RUN_Q0:
1790                         qr_before_insert((arena_run_t *)&bin->runs0, run, link);
1791                         run->free_max = bin->nregs - 1;
1792                         run->free_min = (bin->nregs >> 1) + 1;
1793                         assert(run->nfree <= run->free_max);
1794                         assert(run->nfree >= run->free_min);
1795                         break;
1796                 case RUN_Q25:
1797                         qr_remove(run, link);
1798                         qr_before_insert((arena_run_t *)&bin->runs25, run,
1799                             link);
1800                         run->free_max = ((bin->nregs >> 2) * 3) - 1;
1801                         run->free_min = (bin->nregs >> 2) + 1;
1802                         assert(run->nfree <= run->free_max);
1803                         assert(run->nfree >= run->free_min);
1804                         break;
1805                 case RUN_Q50:
1806                         qr_remove(run, link);
1807                         qr_before_insert((arena_run_t *)&bin->runs50, run,
1808                             link);
1809                         run->free_max = (bin->nregs >> 1) - 1;
1810                         run->free_min = 1;
1811                         assert(run->nfree <= run->free_max);
1812                         assert(run->nfree >= run->free_min);
1813                         break;
1814                 case RUN_Q75:
1815                         /*
1816                          * Skip RUN_Q75 during promotion from RUN_Q50.
1817                          * Separate handling of RUN_Q75 and RUN_Q100 allows us
1818                          * to keep completely full runs in RUN_Q100, thus
1819                          * guaranteeing that runs in RUN_Q75 are only mostly
1820                          * full.  This provides a method for avoiding a linear
1821                          * search for non-full runs, which avoids some
1822                          * pathological edge cases.
1823                          */
1824                         run->quartile++;
1825                         /* Fall through. */
1826                 case RUN_Q100:
1827                         qr_remove(run, link);
1828                         assert(bin->runcur == run);
1829                         bin->runcur = NULL;
1830                         run->free_max = 0;
1831                         run->free_min = 0;
1832                         assert(run->nfree <= run->free_max);
1833                         assert(run->nfree >= run->free_min);
1834                         break;
1835                 default:
1836                         assert(0);
1837                         break;
1838         }
1839 }
1840
1841 static void
1842 arena_bin_run_demote(arena_t *arena, arena_bin_t *bin, arena_run_t *run,
1843     size_t size)
1844 {
1845
1846         assert(bin == run->bin);
1847
1848         /* Demote. */
1849         assert(run->free_max < run->nfree);
1850         assert(run->quartile > RUN_QINIT);
1851         run->quartile--;
1852 #ifdef MALLOC_STATS
1853         bin->stats.ndemote++;
1854 #endif
1855
1856         /* Re-file run. */
1857         switch (run->quartile) {
1858                 case RUN_QINIT:
1859                         qr_remove(run, link);
1860 #ifdef MALLOC_STATS
1861                         bin->stats.curruns--;
1862 #endif
1863                         if (bin->runcur == run)
1864                                 bin->runcur = NULL;
1865 #ifdef MALLOC_DEBUG
1866                         run->magic = 0;
1867 #endif
1868                         arena_run_dalloc(arena, run, bin->run_size);
1869                         break;
1870                 case RUN_Q0:
1871                         qr_remove(run, link);
1872                         qr_before_insert((arena_run_t *)&bin->runs0, run, link);
1873                         run->free_max = bin->nregs - 1;
1874                         run->free_min = (bin->nregs >> 1) + 1;
1875                         assert(run->nfree <= run->free_max);
1876                         assert(run->nfree >= run->free_min);
1877                         break;
1878                 case RUN_Q25:
1879                         qr_remove(run, link);
1880                         qr_before_insert((arena_run_t *)&bin->runs25, run,
1881                             link);
1882                         run->free_max = ((bin->nregs >> 2) * 3) - 1;
1883                         run->free_min = (bin->nregs >> 2) + 1;
1884                         assert(run->nfree <= run->free_max);
1885                         assert(run->nfree >= run->free_min);
1886                         break;
1887                 case RUN_Q50:
1888                         qr_remove(run, link);
1889                         qr_before_insert((arena_run_t *)&bin->runs50, run,
1890                             link);
1891                         run->free_max = (bin->nregs >> 1) - 1;
1892                         run->free_min = 1;
1893                         assert(run->nfree <= run->free_max);
1894                         assert(run->nfree >= run->free_min);
1895                         break;
1896                 case RUN_Q75:
1897                         qr_before_insert((arena_run_t *)&bin->runs75, run,
1898                             link);
1899                         run->free_max = (bin->nregs >> 2) - 1;
1900                         run->free_min = 1;
1901                         assert(run->nfree <= run->free_max);
1902                         assert(run->nfree >= run->free_min);
1903                         break;
1904                 case RUN_Q100:
1905                 default:
1906                         assert(0);
1907                         break;
1908         }
1909 }
1910
1911 static arena_run_t *
1912 arena_run_alloc(arena_t *arena, bool large, size_t size)
1913 {
1914         arena_run_t *run;
1915         unsigned min_ind, i, j;
1916         arena_chunk_t *chunk;
1917 #ifndef NDEBUG
1918         int rep = 0;
1919 #endif
1920
1921         assert(size <= arena_maxclass);
1922
1923 AGAIN:
1924 #ifndef NDEBUG
1925         rep++;
1926         assert(rep <= 2);
1927 #endif
1928
1929         /*
1930          * Search through arena's chunks in address order for a run that is
1931          * large enough.  Look for a precise fit, but do not pass up a chunk
1932          * that has a run which is large enough to split.
1933          */
1934         min_ind = ffs(size >> pagesize_2pow) - 1;
1935         RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
1936                 for (i = min_ind;
1937                     i < (opt_chunk_2pow - pagesize_2pow);
1938                     i++) {
1939                         if (chunk->nfree_runs[i] > 0) {
1940                                 arena_chunk_map_t *map = chunk->map;
1941
1942                                 /* Scan chunk's map for free run. */
1943                                 for (j = 0;
1944                                     j < arena_chunk_maplen;
1945                                     j += map[j].npages) {
1946                                         if (map[j].free
1947                                             && map[j].npages == (1 << i))
1948                 {/*<----------------------------*/
1949                         run = (arena_run_t *)&((char *)chunk)[j
1950                             << pagesize_2pow];
1951
1952                         assert(chunk->nfree_runs[i] > 0);
1953                         chunk->nfree_runs[i]--;
1954
1955                         /* Update page map. */
1956                         arena_run_split(arena, run, large, size);
1957
1958                         return (run);
1959                 }/*---------------------------->*/
1960                                 }
1961                                 /* Not reached. */
1962                                 assert(0);
1963                         }
1964                 }
1965         }
1966
1967         /* No usable runs.  Allocate a new chunk, then try again. */
1968         if (arena_chunk_alloc(arena) == NULL)
1969                 return (NULL);
1970         goto AGAIN;
1971 }
1972
1973 static void
1974 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
1975 {
1976         arena_chunk_t *chunk;
1977         unsigned run_ind, buddy_ind, base_run_ind, run_pages, log2_run_pages;
1978
1979         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1980         run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1981             >> pagesize_2pow);
1982         run_pages = (size >> pagesize_2pow);
1983         log2_run_pages = ffs(run_pages) - 1;
1984         assert(run_pages > 0);
1985
1986         /* Subtract pages from count of pages used in chunk. */
1987         chunk->pages_used -= run_pages;
1988
1989         /* Mark run as deallocated. */
1990         chunk->map[run_ind].free = true;
1991         chunk->map[run_ind].large = false;
1992         chunk->map[run_ind].npages = run_pages;
1993
1994         /*
1995          * Tell the kernel that we don't need the data in this run, but only if
1996          * requested via runtime configuration.
1997          */
1998         if (opt_hint) {
1999                 madvise(run, size, MADV_FREE);
2000 #ifdef MALLOC_STATS
2001                 arena->stats.nmadvise += (size >> pagesize_2pow);
2002 #endif
2003         }
2004
2005         /*
2006          * Iteratively coalesce with buddies.  Conceptually, the buddy scheme
2007          * induces a tree on the set of pages.  If we know the number of pages
2008          * in the subtree rooted at the current node, we can quickly determine
2009          * whether a run is the left or right buddy, and then calculate the
2010          * buddy's index.
2011          */
2012         for (;
2013             (run_pages = (1 << log2_run_pages)) < arena_chunk_maplen;
2014             log2_run_pages++) {
2015                 if (((run_ind >> log2_run_pages) & 1) == 0) {
2016                         /* Current run precedes its buddy. */
2017                         buddy_ind = run_ind + run_pages;
2018                         base_run_ind = run_ind;
2019                 } else {
2020                         /* Current run follows its buddy. */
2021                         buddy_ind = run_ind - run_pages;
2022                         base_run_ind = buddy_ind;
2023                 }
2024
2025                 if (chunk->map[buddy_ind].free == false
2026                     || chunk->map[buddy_ind].npages != run_pages)
2027                         break;
2028
2029                 assert(chunk->nfree_runs[log2_run_pages] > 0);
2030                 chunk->nfree_runs[log2_run_pages]--;
2031
2032                 /* Coalesce. */
2033                 chunk->map[base_run_ind].npages = (run_pages << 1);
2034
2035                 /* Update run_ind to be the beginning of the coalesced run. */
2036                 run_ind = base_run_ind;
2037         }
2038
2039         chunk->nfree_runs[log2_run_pages]++;
2040
2041         /* Free pages, to the extent possible. */
2042         if (chunk->pages_used == 0) {
2043                 /* This chunk is completely unused now, so deallocate it. */
2044                 arena_chunk_dealloc(chunk);
2045         }
2046 }
2047
2048 static arena_run_t *
2049 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin, size_t size)
2050 {
2051         arena_run_t *run;
2052         unsigned i, remainder;
2053
2054         /* Look for a usable run. */
2055         if ((run = qr_next((arena_run_t *)&bin->runs50, link))
2056             != (arena_run_t *)&bin->runs50
2057             || (run = qr_next((arena_run_t *)&bin->runs25, link))
2058             != (arena_run_t *)&bin->runs25
2059             || (run = qr_next((arena_run_t *)&bin->runs0, link))
2060             != (arena_run_t *)&bin->runs0
2061             || (run = qr_next((arena_run_t *)&bin->runs75, link))
2062             != (arena_run_t *)&bin->runs75) {
2063                 /* run is guaranteed to have available space. */
2064                 qr_remove(run, link);
2065                 return (run);
2066         }
2067         /* No existing runs have any space available. */
2068
2069         /* Allocate a new run. */
2070         run = arena_run_alloc(arena, false, bin->run_size);
2071         if (run == NULL)
2072                 return (NULL);
2073
2074         /* Initialize run internals. */
2075         qr_new(run, link);
2076         run->bin = bin;
2077
2078         for (i = 0; i < (bin->nregs >> (SIZEOF_INT_2POW + 3)); i++)
2079                 run->regs_mask[i] = UINT_MAX;
2080         remainder = bin->nregs % (1 << (SIZEOF_INT_2POW + 3));
2081         if (remainder != 0) {
2082                 run->regs_mask[i] = (UINT_MAX >> ((1 << (SIZEOF_INT_2POW + 3))
2083                     - remainder));
2084                 i++;
2085         }
2086         for (; i < REGS_MASK_NELMS; i++)
2087                 run->regs_mask[i] = 0;
2088
2089         run->regs_minelm = 0;
2090
2091         run->nfree = bin->nregs;
2092         run->quartile = RUN_QINIT;
2093         run->free_max = bin->nregs;
2094         run->free_min = ((bin->nregs >> 2) * 3) + 1;
2095 #ifdef MALLOC_DEBUG
2096         run->magic = ARENA_RUN_MAGIC;
2097 #endif
2098
2099 #ifdef MALLOC_STATS
2100         bin->stats.nruns++;
2101         bin->stats.curruns++;
2102         if (bin->stats.curruns > bin->stats.highruns)
2103                 bin->stats.highruns = bin->stats.curruns;
2104 #endif
2105         return (run);
2106 }
2107
2108 /* bin->runcur must have space available before this function is called. */
2109 static inline void *
2110 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run,
2111     size_t size)
2112 {
2113         void *ret;
2114
2115         assert(run->magic == ARENA_RUN_MAGIC);
2116         assert(run->nfree > 0);
2117
2118         ret = arena_run_reg_alloc(run, bin);
2119         assert(ret != NULL);
2120         run->nfree--;
2121         if (run->nfree < run->free_min) {
2122                 /* Promote run to higher fullness quartile. */
2123                 arena_bin_run_promote(arena, bin, run, size);
2124         }
2125
2126         return (ret);
2127 }
2128
2129 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
2130 static void *
2131 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin, size_t size)
2132 {
2133
2134         assert(bin->runcur == NULL || bin->runcur->quartile == RUN_Q100);
2135
2136         bin->runcur = arena_bin_nonfull_run_get(arena, bin, size);
2137         if (bin->runcur == NULL)
2138                 return (NULL);
2139         assert(bin->runcur->magic == ARENA_RUN_MAGIC);
2140         assert(bin->runcur->nfree > 0);
2141
2142         return (arena_bin_malloc_easy(arena, bin, bin->runcur, size));
2143 }
2144
2145 static void *
2146 arena_malloc(arena_t *arena, size_t size)
2147 {
2148         void *ret;
2149
2150         assert(arena != NULL);
2151         assert(arena->magic == ARENA_MAGIC);
2152         assert(size != 0);
2153         assert(QUANTUM_CEILING(size) <= arena_maxclass);
2154
2155         if (size <= bin_maxclass) {
2156                 arena_bin_t *bin;
2157                 arena_run_t *run;
2158
2159                 /* Small allocation. */
2160
2161                 if (size < small_min) {
2162                         /* Tiny. */
2163                         size = pow2_ceil(size);
2164                         bin = &arena->bins[ffs(size >> (tiny_min_2pow + 1))];
2165 #ifdef MALLOC_STATS
2166                         /*
2167                          * Bin calculation is always correct, but we may need to
2168                          * fix size for the purposes of stats accuracy.
2169                          */
2170                         if (size < (1 << tiny_min_2pow))
2171                                 size = (1 << tiny_min_2pow);
2172 #endif
2173                 } else if (size <= small_max) {
2174                         /* Quantum-spaced. */
2175                         size = QUANTUM_CEILING(size);
2176                         bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
2177                             - 1];
2178                 } else {
2179                         /* Sub-page. */
2180                         size = pow2_ceil(size);
2181                         bin = &arena->bins[ntbins + nqbins
2182                             + (ffs(size >> opt_small_max_2pow) - 2)];
2183                 }
2184                 assert(size == bin->reg_size);
2185
2186                 malloc_mutex_lock(&arena->mtx);
2187                 if ((run = bin->runcur) != NULL)
2188                         ret = arena_bin_malloc_easy(arena, bin, run, size);
2189                 else
2190                         ret = arena_bin_malloc_hard(arena, bin, size);
2191
2192 #ifdef MALLOC_STATS
2193                 bin->stats.nrequests++;
2194 #endif
2195         } else {
2196                 /* Medium allocation. */
2197                 size = pow2_ceil(size);
2198                 malloc_mutex_lock(&arena->mtx);
2199                 ret = (void *)arena_run_alloc(arena, true, size);
2200 #ifdef MALLOC_STATS
2201                 arena->stats.large_nrequests++;
2202 #endif
2203         }
2204
2205 #ifdef MALLOC_STATS
2206         arena->stats.nmalloc++;
2207         if (ret != NULL)
2208                 arena->stats.allocated += size;
2209 #endif
2210
2211         malloc_mutex_unlock(&arena->mtx);
2212
2213         if (opt_junk && ret != NULL)
2214                 memset(ret, 0xa5, size);
2215         else if (opt_zero && ret != NULL)
2216                 memset(ret, 0, size);
2217         return (ret);
2218 }
2219
2220 /* Return the size of the allocation pointed to by ptr. */
2221 static size_t
2222 arena_salloc(const void *ptr)
2223 {
2224         size_t ret;
2225         arena_chunk_t *chunk;
2226         uint32_t pageind;
2227         arena_chunk_map_t mapelm;
2228
2229         assert(ptr != NULL);
2230         assert(ptr != &nil);
2231         assert(CHUNK_ADDR2BASE(ptr) != ptr);
2232
2233         /*
2234          * No arena data structures that we query here can change in a way that
2235          * affects this function, so we don't need to lock.
2236          */
2237         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2238         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
2239         mapelm = chunk->map[pageind];
2240         assert(mapelm.free == false);
2241         if (mapelm.large == false) {
2242                 arena_run_t *run;
2243
2244                 pageind -= mapelm.pos;
2245
2246                 run = (arena_run_t *)&((char *)chunk)[pageind << pagesize_2pow];
2247                 assert(run->magic == ARENA_RUN_MAGIC);
2248                 ret = run->bin->reg_size;
2249         } else
2250                 ret = mapelm.npages << pagesize_2pow;
2251
2252         return (ret);
2253 }
2254
2255 static void *
2256 arena_ralloc(void *ptr, size_t size, size_t oldsize)
2257 {
2258         void *ret;
2259
2260         /* Avoid moving the allocation if the size class would not change. */
2261         if (size < small_min) {
2262                 if (oldsize < small_min &&
2263                     ffs(pow2_ceil(size) >> (tiny_min_2pow + 1))
2264                     == ffs(pow2_ceil(oldsize) >> (tiny_min_2pow + 1)))
2265                         goto IN_PLACE;
2266         } else if (size <= small_max) {
2267                 if (oldsize >= small_min && oldsize <= small_max &&
2268                     (QUANTUM_CEILING(size) >> opt_quantum_2pow)
2269                     == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
2270                         goto IN_PLACE;
2271         } else {
2272                 if (oldsize > small_max &&
2273                     pow2_ceil(size) == pow2_ceil(oldsize))
2274                         goto IN_PLACE;
2275         }
2276
2277         /*
2278          * If we get here, then size and oldsize are different enough that we
2279          * need to use a different size class.  In that case, fall back to
2280          * allocating new space and copying.
2281          */
2282         ret = arena_malloc(choose_arena(), size);
2283         if (ret == NULL)
2284                 return (NULL);
2285
2286         if (size < oldsize)
2287                 memcpy(ret, ptr, size);
2288         else
2289                 memcpy(ret, ptr, oldsize);
2290         idalloc(ptr);
2291         return (ret);
2292 IN_PLACE:
2293         if (opt_junk && size < oldsize)
2294                 memset(&((char *)ptr)[size], 0x5a, oldsize - size);
2295         else if (opt_zero && size > oldsize)
2296                 memset(&((char *)ptr)[size], 0, size - oldsize);
2297         return (ptr);
2298 }
2299
2300 static void
2301 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
2302 {
2303         unsigned pageind;
2304         arena_chunk_map_t mapelm;
2305         size_t size;
2306
2307         assert(arena != NULL);
2308         assert(arena->magic == ARENA_MAGIC);
2309         assert(chunk->arena == arena);
2310         assert(ptr != NULL);
2311         assert(ptr != &nil);
2312         assert(CHUNK_ADDR2BASE(ptr) != ptr);
2313
2314         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
2315         mapelm = chunk->map[pageind];
2316         assert(mapelm.free == false);
2317         if (mapelm.large == false) {
2318                 arena_run_t *run;
2319                 arena_bin_t *bin;
2320
2321                 /* Small allocation. */
2322
2323                 pageind -= mapelm.pos;
2324
2325                 run = (arena_run_t *)&((char *)chunk)[pageind << pagesize_2pow];
2326                 assert(run->magic == ARENA_RUN_MAGIC);
2327                 bin = run->bin;
2328                 size = bin->reg_size;
2329
2330                 if (opt_junk)
2331                         memset(ptr, 0x5a, size);
2332
2333                 malloc_mutex_lock(&arena->mtx);
2334                 arena_run_reg_dalloc(run, bin, ptr, size);
2335                 run->nfree++;
2336                 if (run->nfree > run->free_max) {
2337                         /* Demote run to lower fullness quartile. */
2338                         arena_bin_run_demote(arena, bin, run, size);
2339                 }
2340         } else {
2341                 /* Medium allocation. */
2342
2343                 size = mapelm.npages << pagesize_2pow;
2344
2345                 if (opt_junk)
2346                         memset(ptr, 0x5a, size);
2347
2348                 malloc_mutex_lock(&arena->mtx);
2349                 arena_run_dalloc(arena, (arena_run_t *)ptr, size);
2350         }
2351
2352 #ifdef MALLOC_STATS
2353         arena->stats.allocated -= size;
2354 #endif
2355
2356         malloc_mutex_unlock(&arena->mtx);
2357 }
2358
2359 static bool
2360 arena_new(arena_t *arena)
2361 {
2362         unsigned i;
2363         arena_bin_t *bin;
2364         size_t pow2_size, run_size;
2365
2366         malloc_mutex_init(&arena->mtx);
2367
2368 #ifdef MALLOC_STATS
2369         memset(&arena->stats, 0, sizeof(arena_stats_t));
2370 #endif
2371
2372         /* Initialize chunks. */
2373         RB_INIT(&arena->chunks);
2374
2375         /* Initialize bins. */
2376
2377         /* (2^n)-spaced tiny bins. */
2378         for (i = 0; i < ntbins; i++) {
2379                 bin = &arena->bins[i];
2380                 bin->runcur = NULL;
2381                 qr_new((arena_run_t *)&bin->runs0, link);
2382                 qr_new((arena_run_t *)&bin->runs25, link);
2383                 qr_new((arena_run_t *)&bin->runs50, link);
2384                 qr_new((arena_run_t *)&bin->runs75, link);
2385
2386                 bin->reg_size = (1 << (tiny_min_2pow + i));
2387
2388                 /*
2389                  * Calculate how large of a run to allocate.  Make sure that at
2390                  * least RUN_MIN_REGS regions fit in the run.
2391                  */
2392                 run_size = bin->reg_size << RUN_MIN_REGS_2POW;
2393                 if (run_size < pagesize)
2394                         run_size = pagesize;
2395                 if (run_size > (pagesize << RUN_MAX_PAGES_2POW))
2396                         run_size = (pagesize << RUN_MAX_PAGES_2POW);
2397                 if (run_size > arena_maxclass)
2398                         run_size = arena_maxclass;
2399                 bin->run_size = run_size;
2400
2401                 assert(run_size >= sizeof(arena_run_t));
2402                 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size;
2403                 if (bin->nregs > (REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3))) {
2404                         /* Take care not to overflow regs_mask. */
2405                         bin->nregs = REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3);
2406                 }
2407                 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size);
2408
2409 #ifdef MALLOC_STATS
2410                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2411 #endif
2412         }
2413
2414         /* Quantum-spaced bins. */
2415         for (; i < ntbins + nqbins; i++) {
2416                 bin = &arena->bins[i];
2417                 bin->runcur = NULL;
2418                 qr_new((arena_run_t *)&bin->runs0, link);
2419                 qr_new((arena_run_t *)&bin->runs25, link);
2420                 qr_new((arena_run_t *)&bin->runs50, link);
2421                 qr_new((arena_run_t *)&bin->runs75, link);
2422
2423                 bin->reg_size = quantum * (i - ntbins + 1);
2424
2425                 /*
2426                  * Calculate how large of a run to allocate.  Make sure that at
2427                  * least RUN_MIN_REGS regions fit in the run.
2428                  */
2429                 pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2430                 run_size = (pow2_size << RUN_MIN_REGS_2POW);
2431                 if (run_size < pagesize)
2432                         run_size = pagesize;
2433                 if (run_size > (pagesize << RUN_MAX_PAGES_2POW))
2434                         run_size = (pagesize << RUN_MAX_PAGES_2POW);
2435                 if (run_size > arena_maxclass)
2436                         run_size = arena_maxclass;
2437                 bin->run_size = run_size;
2438
2439                 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size;
2440                 assert(bin->nregs <= REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3));
2441                 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size);
2442
2443 #ifdef MALLOC_STATS
2444                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2445 #endif
2446         }
2447
2448         /* (2^n)-spaced sub-page bins. */
2449         for (; i < ntbins + nqbins + nsbins; i++) {
2450                 bin = &arena->bins[i];
2451                 bin->runcur = NULL;
2452                 qr_new((arena_run_t *)&bin->runs0, link);
2453                 qr_new((arena_run_t *)&bin->runs25, link);
2454                 qr_new((arena_run_t *)&bin->runs50, link);
2455                 qr_new((arena_run_t *)&bin->runs75, link);
2456
2457                 bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
2458
2459                 /*
2460                  * Calculate how large of a run to allocate.  Make sure that at
2461                  * least RUN_MIN_REGS regions fit in the run.
2462                  */
2463                 run_size = bin->reg_size << RUN_MIN_REGS_2POW;
2464                 if (run_size < pagesize)
2465                         run_size = pagesize;
2466                 if (run_size > (pagesize << RUN_MAX_PAGES_2POW))
2467                         run_size = (pagesize << RUN_MAX_PAGES_2POW);
2468                 if (run_size > arena_maxclass)
2469                         run_size = arena_maxclass;
2470                 bin->run_size = run_size;
2471
2472                 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size;
2473                 assert(bin->nregs <= REGS_MASK_NELMS << (SIZEOF_INT_2POW + 3));
2474                 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size);
2475
2476 #ifdef MALLOC_STATS
2477                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2478 #endif
2479         }
2480
2481 #ifdef MALLOC_DEBUG
2482         arena->magic = ARENA_MAGIC;
2483 #endif
2484
2485         return (false);
2486 }
2487
2488 /* Create a new arena and insert it into the arenas array at index ind. */
2489 static arena_t *
2490 arenas_extend(unsigned ind)
2491 {
2492         arena_t *ret;
2493
2494         /* Allocate enough space for trailing bins. */
2495         ret = (arena_t *)base_alloc(sizeof(arena_t)
2496             + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
2497         if (ret != NULL && arena_new(ret) == false) {
2498                 arenas[ind] = ret;
2499                 return (ret);
2500         }
2501         /* Only reached if there is an OOM error. */
2502
2503         /*
2504          * OOM here is quite inconvenient to propagate, since dealing with it
2505          * would require a check for failure in the fast path.  Instead, punt
2506          * by using arenas[0].  In practice, this is an extremely unlikely
2507          * failure.
2508          */
2509         malloc_printf("%s: (malloc) Error initializing arena\n",
2510             _getprogname());
2511         if (opt_abort)
2512                 abort();
2513
2514         return (arenas[0]);
2515 }
2516
2517 /*
2518  * End arena.
2519  */
2520 /******************************************************************************/
2521 /*
2522  * Begin general internal functions.
2523  */
2524
2525 static void *
2526 huge_malloc(size_t size)
2527 {
2528         void *ret;
2529         size_t csize;
2530         chunk_node_t *node;
2531
2532         /* Allocate one or more contiguous chunks for this request. */
2533
2534         csize = CHUNK_CEILING(size);
2535         if (csize == 0) {
2536                 /* size is large enough to cause size_t wrap-around. */
2537                 return (NULL);
2538         }
2539
2540         /* Allocate a chunk node with which to track the chunk. */
2541         node = base_chunk_node_alloc();
2542         if (node == NULL)
2543                 return (NULL);
2544
2545         ret = chunk_alloc(csize);
2546         if (ret == NULL) {
2547                 base_chunk_node_dealloc(node);
2548                 return (NULL);
2549         }
2550
2551         /* Insert node into chunks. */
2552         node->chunk = ret;
2553         node->size = csize;
2554
2555         malloc_mutex_lock(&chunks_mtx);
2556         RB_INSERT(chunk_tree_s, &huge, node);
2557 #ifdef MALLOC_STATS
2558         huge_nmalloc++;
2559         huge_allocated += csize;
2560 #endif
2561         malloc_mutex_unlock(&chunks_mtx);
2562
2563         if (opt_junk && ret != NULL)
2564                 memset(ret, 0xa5, csize);
2565         else if (opt_zero && ret != NULL)
2566                 memset(ret, 0, csize);
2567
2568         return (ret);
2569 }
2570
2571 static void *
2572 huge_ralloc(void *ptr, size_t size, size_t oldsize)
2573 {
2574         void *ret;
2575
2576         /* Avoid moving the allocation if the size class would not change. */
2577         if (oldsize > arena_maxclass &&
2578             CHUNK_CEILING(size) == CHUNK_CEILING(oldsize))
2579                 return (ptr);
2580
2581         /*
2582          * If we get here, then size and oldsize are different enough that we
2583          * need to use a different size class.  In that case, fall back to
2584          * allocating new space and copying.
2585          */
2586         ret = huge_malloc(size);
2587         if (ret == NULL)
2588                 return (NULL);
2589
2590         if (CHUNK_ADDR2BASE(ptr) == ptr) {
2591                 /* The old allocation is a chunk. */
2592                 if (size < oldsize)
2593                         memcpy(ret, ptr, size);
2594                 else
2595                         memcpy(ret, ptr, oldsize);
2596         } else {
2597                 /* The old allocation is a region. */
2598                 assert(oldsize < size);
2599                 memcpy(ret, ptr, oldsize);
2600         }
2601         idalloc(ptr);
2602         return (ret);
2603 }
2604
2605 static void
2606 huge_dalloc(void *ptr)
2607 {
2608         chunk_node_t key;
2609         chunk_node_t *node;
2610
2611         malloc_mutex_lock(&chunks_mtx);
2612
2613         /* Extract from tree of huge allocations. */
2614         key.chunk = ptr;
2615         node = RB_FIND(chunk_tree_s, &huge, &key);
2616         assert(node != NULL);
2617         assert(node->chunk == ptr);
2618         RB_REMOVE(chunk_tree_s, &huge, node);
2619
2620 #ifdef MALLOC_STATS
2621         /* Update counters. */
2622         huge_ndalloc++;
2623         huge_allocated -= node->size;
2624 #endif
2625
2626         malloc_mutex_unlock(&chunks_mtx);
2627
2628         /* Unmap chunk. */
2629 #ifdef USE_BRK
2630         if (opt_junk)
2631                 memset(node->chunk, 0x5a, node->size);
2632 #endif
2633         chunk_dealloc(node->chunk, node->size);
2634
2635         base_chunk_node_dealloc(node);
2636 }
2637
2638 static void *
2639 imalloc(size_t size)
2640 {
2641         void *ret;
2642
2643         assert(size != 0);
2644
2645         if (size <= arena_maxclass)
2646                 ret = arena_malloc(choose_arena(), size);
2647         else
2648                 ret = huge_malloc(size);
2649
2650         return (ret);
2651 }
2652
2653 static void *
2654 ipalloc(size_t alignment, size_t size)
2655 {
2656         void *ret;
2657         size_t pow2_size;
2658
2659         /*
2660          * Round up to the nearest power of two that is >= alignment and
2661          * >= size.
2662          */
2663         if (size > alignment)
2664                 pow2_size = pow2_ceil(size);
2665         else
2666                 pow2_size = alignment;
2667         pow2_size = QUANTUM_CEILING(pow2_size);
2668         if (pow2_size < size) {
2669                 /* size_t overflow. */
2670                 return (NULL);
2671         }
2672
2673         if (pow2_size <= arena_maxclass)
2674                 ret = arena_malloc(choose_arena(), pow2_size);
2675         else {
2676                 if (alignment <= chunk_size)
2677                         ret = huge_malloc(size);
2678                 else {
2679                         size_t chunksize, alloc_size, offset;
2680                         chunk_node_t *node;
2681
2682                         /*
2683                          * This allocation requires alignment that is even
2684                          * larger than chunk alignment.  This means that
2685                          * huge_malloc() isn't good enough.
2686                          *
2687                          * Allocate almost twice as many chunks as are demanded
2688                          * by the size or alignment, in order to assure the
2689                          * alignment can be achieved, then unmap leading and
2690                          * trailing chunks.
2691                          */
2692
2693                         chunksize = CHUNK_CEILING(size);
2694
2695                         if (size >= alignment)
2696                                 alloc_size = chunksize + alignment - chunk_size;
2697                         else
2698                                 alloc_size = (alignment << 1) - chunk_size;
2699
2700                         /*
2701                          * Allocate a chunk node with which to track the chunk.
2702                          */
2703                         node = base_chunk_node_alloc();
2704                         if (node == NULL)
2705                                 return (NULL);
2706
2707                         ret = chunk_alloc(alloc_size);
2708                         if (ret == NULL) {
2709                                 base_chunk_node_dealloc(node);
2710                                 return (NULL);
2711                         }
2712
2713                         offset = (uintptr_t)ret & (alignment - 1);
2714                         assert(offset % chunk_size == 0);
2715                         assert(offset < alloc_size);
2716                         if (offset == 0) {
2717                                 /* Trim trailing space. */
2718                                 chunk_dealloc((void *)((uintptr_t)ret
2719                                     + chunksize), alloc_size - chunksize);
2720                         } else {
2721                                 size_t trailsize;
2722
2723                                 /* Trim leading space. */
2724                                 chunk_dealloc(ret, alignment - offset);
2725
2726                                 ret = (void *)((uintptr_t)ret + (alignment
2727                                     - offset));
2728
2729                                 trailsize = alloc_size - (alignment - offset)
2730                                     - chunksize;
2731                                 if (trailsize != 0) {
2732                                     /* Trim trailing space. */
2733                                     assert(trailsize < alloc_size);
2734                                     chunk_dealloc((void *)((uintptr_t)ret
2735                                         + chunksize), trailsize);
2736                                 }
2737                         }
2738
2739                         /* Insert node into chunks. */
2740                         node->chunk = ret;
2741                         node->size = chunksize;
2742
2743                         malloc_mutex_lock(&chunks_mtx);
2744                         RB_INSERT(chunk_tree_s, &huge, node);
2745 #ifdef MALLOC_STATS
2746                         huge_allocated += size;
2747 #endif
2748                         malloc_mutex_unlock(&chunks_mtx);
2749
2750                         if (opt_junk)
2751                                 memset(ret, 0xa5, chunksize);
2752                         else if (opt_zero)
2753                                 memset(ret, 0, chunksize);
2754                 }
2755         }
2756
2757         assert(((uintptr_t)ret & (alignment - 1)) == 0);
2758         return (ret);
2759 }
2760
2761 static void *
2762 icalloc(size_t size)
2763 {
2764         void *ret;
2765
2766         if (size <= arena_maxclass) {
2767                 ret = arena_malloc(choose_arena(), size);
2768                 if (ret == NULL)
2769                         return (NULL);
2770                 memset(ret, 0, size);
2771         } else {
2772                 /*
2773                  * The virtual memory system provides zero-filled pages, so
2774                  * there is no need to do so manually, unless opt_junk is
2775                  * enabled, in which case huge_malloc() fills huge allocations
2776                  * with junk.
2777                  */
2778                 ret = huge_malloc(size);
2779                 if (ret == NULL)
2780                         return (NULL);
2781
2782                 if (opt_junk)
2783                         memset(ret, 0, size);
2784 #ifdef USE_BRK
2785                 else if ((uintptr_t)ret >= (uintptr_t)brk_base
2786                     && (uintptr_t)ret < (uintptr_t)brk_max) {
2787                         /*
2788                          * This may be a re-used brk chunk.  Therefore, zero
2789                          * the memory.
2790                          */
2791                         memset(ret, 0, size);
2792                 }
2793 #endif
2794         }
2795
2796         return (ret);
2797 }
2798
2799 static size_t
2800 isalloc(const void *ptr)
2801 {
2802         size_t ret;
2803         arena_chunk_t *chunk;
2804
2805         assert(ptr != NULL);
2806         assert(ptr != &nil);
2807
2808         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2809         if (chunk != ptr) {
2810                 /* Region. */
2811                 assert(chunk->arena->magic == ARENA_MAGIC);
2812
2813                 ret = arena_salloc(ptr);
2814         } else {
2815                 chunk_node_t *node, key;
2816
2817                 /* Chunk (huge allocation). */
2818
2819                 malloc_mutex_lock(&chunks_mtx);
2820
2821                 /* Extract from tree of huge allocations. */
2822                 key.chunk = (void *)ptr;
2823                 node = RB_FIND(chunk_tree_s, &huge, &key);
2824                 assert(node != NULL);
2825
2826                 ret = node->size;
2827
2828                 malloc_mutex_unlock(&chunks_mtx);
2829         }
2830
2831         return (ret);
2832 }
2833
2834 static void *
2835 iralloc(void *ptr, size_t size)
2836 {
2837         void *ret;
2838         size_t oldsize;
2839
2840         assert(ptr != NULL);
2841         assert(ptr != &nil);
2842         assert(size != 0);
2843
2844         oldsize = isalloc(ptr);
2845
2846         if (size <= arena_maxclass)
2847                 ret = arena_ralloc(ptr, size, oldsize);
2848         else
2849                 ret = huge_ralloc(ptr, size, oldsize);
2850
2851         return (ret);
2852 }
2853
2854 static void
2855 idalloc(void *ptr)
2856 {
2857         arena_chunk_t *chunk;
2858
2859         assert(ptr != NULL);
2860         assert(ptr != &nil);
2861
2862         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2863         if (chunk != ptr) {
2864                 /* Region. */
2865 #ifdef MALLOC_STATS
2866                 malloc_mutex_lock(&chunk->arena->mtx);
2867                 chunk->arena->stats.ndalloc++;
2868                 malloc_mutex_unlock(&chunk->arena->mtx);
2869 #endif
2870                 arena_dalloc(chunk->arena, chunk, ptr);
2871         } else
2872                 huge_dalloc(ptr);
2873 }
2874
2875 static void
2876 malloc_print_stats(void)
2877 {
2878
2879         if (opt_print_stats) {
2880                 malloc_printf("___ Begin malloc statistics ___\n");
2881                 malloc_printf("Number of CPUs: %u\n", ncpus);
2882                 malloc_printf("Number of arenas: %u\n", narenas);
2883                 malloc_printf("Chunk size: %zu (2^%zu)\n", chunk_size,
2884                     opt_chunk_2pow);
2885                 malloc_printf("Quantum size: %zu (2^%zu)\n", quantum,
2886                     opt_quantum_2pow);
2887                 malloc_printf("Max small size: %zu\n", small_max);
2888                 malloc_printf("Pointer size: %u\n", sizeof(void *));
2889                 malloc_printf("Assertions %s\n",
2890 #ifdef NDEBUG
2891                     "disabled"
2892 #else
2893                     "enabled"
2894 #endif
2895                     );
2896
2897 #ifdef MALLOC_STATS
2898
2899                 {
2900                         size_t allocated, total;
2901                         unsigned i;
2902                         arena_t *arena;
2903
2904                         /* Calculate and print allocated/total stats. */
2905
2906                         /* arenas. */
2907                         for (i = 0, allocated = 0; i < narenas; i++) {
2908                                 if (arenas[i] != NULL) {
2909                                         malloc_mutex_lock(&arenas[i]->mtx);
2910                                         allocated += arenas[i]->stats.allocated;
2911                                         malloc_mutex_unlock(&arenas[i]->mtx);
2912                                 }
2913                         }
2914
2915                         /* huge. */
2916                         malloc_mutex_lock(&chunks_mtx);
2917                         allocated += huge_allocated;
2918                         total = stats_chunks.curchunks * chunk_size;
2919                         malloc_mutex_unlock(&chunks_mtx);
2920
2921                         malloc_printf("Allocated: %zu, space used: %zu\n",
2922                             allocated, total);
2923
2924                         /* Print chunk stats. */
2925                         {
2926                                 chunk_stats_t chunks_stats;
2927
2928                                 malloc_mutex_lock(&chunks_mtx);
2929                                 chunks_stats = stats_chunks;
2930                                 malloc_mutex_unlock(&chunks_mtx);
2931
2932                                 malloc_printf("\nchunks:\n");
2933                                 malloc_printf(" %13s%13s%13s\n", "nchunks",
2934                                     "highchunks", "curchunks");
2935                                 malloc_printf(" %13llu%13lu%13lu\n",
2936                                     chunks_stats.nchunks,
2937                                     chunks_stats.highchunks,
2938                                     chunks_stats.curchunks);
2939                         }
2940
2941                         /* Print chunk stats. */
2942                         malloc_printf("\nhuge:\n");
2943                         malloc_printf("%12s %12s %12s\n",
2944                             "nmalloc", "ndalloc", "allocated");
2945                         malloc_printf("%12llu %12llu %12zu\n",
2946                             huge_nmalloc, huge_ndalloc, huge_allocated);
2947
2948                         /* Print stats for each arena. */
2949                         for (i = 0; i < narenas; i++) {
2950                                 arena = arenas[i];
2951                                 if (arena != NULL) {
2952                                         malloc_printf(
2953                                             "\narenas[%u] statistics:\n", i);
2954                                         malloc_mutex_lock(&arena->mtx);
2955                                         stats_print(arena);
2956                                         malloc_mutex_unlock(&arena->mtx);
2957                                 }
2958                         }
2959                 }
2960 #endif /* #ifdef MALLOC_STATS */
2961                 malloc_printf("--- End malloc statistics ---\n");
2962         }
2963 }
2964
2965 /*
2966  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
2967  * implementation has to take pains to avoid infinite recursion during
2968  * initialization.
2969  */
2970 static inline bool
2971 malloc_init(void)
2972 {
2973
2974         if (malloc_initialized == false)
2975                 return (malloc_init_hard());
2976
2977         return (false);
2978 }
2979
2980 static bool
2981 malloc_init_hard(void)
2982 {
2983         unsigned i, j;
2984         int linklen;
2985         char buf[PATH_MAX + 1];
2986         const char *opts;
2987
2988         malloc_mutex_lock(&init_lock);
2989         if (malloc_initialized) {
2990                 /*
2991                  * Another thread initialized the allocator before this one
2992                  * acquired init_lock.
2993                  */
2994                 malloc_mutex_unlock(&init_lock);
2995                 return (false);
2996         }
2997
2998         /* Get number of CPUs. */
2999         {
3000                 int mib[2];
3001                 size_t len;
3002
3003                 mib[0] = CTL_HW;
3004                 mib[1] = HW_NCPU;
3005                 len = sizeof(ncpus);
3006                 if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
3007                         /* Error. */
3008                         ncpus = 1;
3009                 }
3010         }
3011
3012         /* Get page size. */
3013         {
3014                 long result;
3015
3016                 result = sysconf(_SC_PAGESIZE);
3017                 assert(result != -1);
3018                 pagesize = (unsigned) result;
3019
3020                 /*
3021                  * We assume that pagesize is a power of 2 when calculating
3022                  * pagesize_2pow.
3023                  */
3024                 assert(((result - 1) & result) == 0);
3025                 pagesize_2pow = ffs(result) - 1;
3026         }
3027
3028         for (i = 0; i < 3; i++) {
3029                 /* Get runtime configuration. */
3030                 switch (i) {
3031                 case 0:
3032                         if ((linklen = readlink("/etc/malloc.conf", buf,
3033                                                 sizeof(buf) - 1)) != -1) {
3034                                 /*
3035                                  * Use the contents of the "/etc/malloc.conf"
3036                                  * symbolic link's name.
3037                                  */
3038                                 buf[linklen] = '\0';
3039                                 opts = buf;
3040                         } else {
3041                                 /* No configuration specified. */
3042                                 buf[0] = '\0';
3043                                 opts = buf;
3044                         }
3045                         break;
3046                 case 1:
3047                         if (issetugid() == 0 && (opts =
3048                             getenv("MALLOC_OPTIONS")) != NULL) {
3049                                 /*
3050                                  * Do nothing; opts is already initialized to
3051                                  * the value of the MALLOC_OPTIONS environment
3052                                  * variable.
3053                                  */
3054                         } else {
3055                                 /* No configuration specified. */
3056                                 buf[0] = '\0';
3057                                 opts = buf;
3058                         }
3059                         break;
3060                 case 2:
3061                         if (_malloc_options != NULL) {
3062                             /*
3063                              * Use options that were compiled into the program.
3064                              */
3065                             opts = _malloc_options;
3066                         } else {
3067                                 /* No configuration specified. */
3068                                 buf[0] = '\0';
3069                                 opts = buf;
3070                         }
3071                         break;
3072                 default:
3073                         /* NOTREACHED */
3074                         assert(false);
3075                 }
3076
3077                 for (j = 0; opts[j] != '\0'; j++) {
3078                         switch (opts[j]) {
3079                         case 'a':
3080                                 opt_abort = false;
3081                                 break;
3082                         case 'A':
3083                                 opt_abort = true;
3084                                 break;
3085                         case 'h':
3086                                 opt_hint = false;
3087                                 break;
3088                         case 'H':
3089                                 opt_hint = true;
3090                                 break;
3091                         case 'j':
3092                                 opt_junk = false;
3093                                 break;
3094                         case 'J':
3095                                 opt_junk = true;
3096                                 break;
3097                         case 'k':
3098                                 /*
3099                                  * Run fullness quartile limits don't have
3100                                  * enough resolution if there are too few
3101                                  * regions for the largest bin size classes.
3102                                  */
3103                                 if (opt_chunk_2pow > pagesize_2pow + 4)
3104                                         opt_chunk_2pow--;
3105                                 break;
3106                         case 'K':
3107                                 if (opt_chunk_2pow < CHUNK_2POW_MAX)
3108                                         opt_chunk_2pow++;
3109                                 break;
3110                         case 'n':
3111                                 opt_narenas_lshift--;
3112                                 break;
3113                         case 'N':
3114                                 opt_narenas_lshift++;
3115                                 break;
3116                         case 'p':
3117                                 opt_print_stats = false;
3118                                 break;
3119                         case 'P':
3120                                 opt_print_stats = true;
3121                                 break;
3122                         case 'q':
3123                                 if (opt_quantum_2pow > QUANTUM_2POW_MIN)
3124                                         opt_quantum_2pow--;
3125                                 break;
3126                         case 'Q':
3127                                 if (opt_quantum_2pow < pagesize_2pow - 1)
3128                                         opt_quantum_2pow++;
3129                                 break;
3130                         case 's':
3131                                 if (opt_small_max_2pow > QUANTUM_2POW_MIN)
3132                                         opt_small_max_2pow--;
3133                                 break;
3134                         case 'S':
3135                                 if (opt_small_max_2pow < pagesize_2pow - 1)
3136                                         opt_small_max_2pow++;
3137                                 break;
3138                         case 'u':
3139                                 opt_utrace = false;
3140                                 break;
3141                         case 'U':
3142                                 opt_utrace = true;
3143                                 break;
3144                         case 'v':
3145                                 opt_sysv = false;
3146                                 break;
3147                         case 'V':
3148                                 opt_sysv = true;
3149                                 break;
3150                         case 'x':
3151                                 opt_xmalloc = false;
3152                                 break;
3153                         case 'X':
3154                                 opt_xmalloc = true;
3155                                 break;
3156                         case 'z':
3157                                 opt_zero = false;
3158                                 break;
3159                         case 'Z':
3160                                 opt_zero = true;
3161                                 break;
3162                         default:
3163                                 malloc_printf("%s: (malloc) Unsupported"
3164                                     " character in malloc options: '%c'\n",
3165                                     _getprogname(), opts[j]);
3166                         }
3167                 }
3168         }
3169
3170         /* Take care to call atexit() only once. */
3171         if (opt_print_stats) {
3172                 /* Print statistics at exit. */
3173                 atexit(malloc_print_stats);
3174         }
3175
3176         /* Set variables according to the value of opt_small_max_2pow. */
3177         if (opt_small_max_2pow < opt_quantum_2pow)
3178                 opt_small_max_2pow = opt_quantum_2pow;
3179         small_max = (1 << opt_small_max_2pow);
3180
3181         /* Set bin-related variables. */
3182         bin_maxclass = (pagesize >> 1);
3183         if (pagesize_2pow > RUN_MIN_REGS_2POW + 1)
3184                 tiny_min_2pow = pagesize_2pow - (RUN_MIN_REGS_2POW + 1);
3185         else
3186                 tiny_min_2pow = 1;
3187         assert(opt_quantum_2pow >= tiny_min_2pow);
3188         ntbins = opt_quantum_2pow - tiny_min_2pow;
3189         assert(ntbins <= opt_quantum_2pow);
3190         nqbins = (small_max >> opt_quantum_2pow);
3191         nsbins = pagesize_2pow - opt_small_max_2pow - 1;
3192
3193         /* Set variables according to the value of opt_quantum_2pow. */
3194         quantum = (1 << opt_quantum_2pow);
3195         quantum_mask = quantum - 1;
3196         if (ntbins > 0)
3197                 small_min = (quantum >> 1) + 1;
3198         else
3199                 small_min = 1;
3200         assert(small_min <= quantum);
3201
3202         /* Set variables according to the value of opt_chunk_2pow. */
3203         chunk_size = (1 << opt_chunk_2pow);
3204         chunk_size_mask = chunk_size - 1;
3205         arena_chunk_maplen = (1 << (opt_chunk_2pow - pagesize_2pow));
3206         arena_maxclass = (chunk_size >> 1);
3207
3208         UTRACE(0, 0, 0);
3209
3210 #ifdef MALLOC_STATS
3211         memset(&stats_chunks, 0, sizeof(chunk_stats_t));
3212 #endif
3213
3214         /* Various sanity checks that regard configuration. */
3215         assert(quantum >= sizeof(void *));
3216         assert(quantum <= pagesize);
3217         assert(chunk_size >= pagesize);
3218         assert(quantum * 4 <= chunk_size);
3219
3220         /* Initialize chunks data. */
3221         malloc_mutex_init(&chunks_mtx);
3222         RB_INIT(&huge);
3223 #ifdef USE_BRK
3224         brk_base = sbrk(0);
3225         brk_prev = brk_base;
3226         brk_max = brk_base;
3227 #endif
3228 #ifdef MALLOC_STATS
3229         huge_nmalloc = 0;
3230         huge_ndalloc = 0;
3231         huge_allocated = 0;
3232 #endif
3233         RB_INIT(&old_chunks);
3234
3235         /* Initialize base allocation data structures. */
3236 #ifdef MALLOC_STATS
3237         base_total = 0;
3238 #endif
3239 #ifdef USE_BRK
3240         /*
3241          * Do special brk allocation here, since the base chunk doesn't really
3242          * need to be chunk-aligned.
3243          */
3244         {
3245                 void *brk_cur;
3246                 intptr_t incr;
3247
3248                 do {
3249                         /* Get the current end of brk. */
3250                         brk_cur = sbrk(0);
3251
3252                         /*
3253                          * Calculate how much padding is necessary to
3254                          * chunk-align the end of brk.  Don't worry about
3255                          * brk_cur not being chunk-aligned though.
3256                          */
3257                         incr = (intptr_t)chunk_size
3258                             - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
3259
3260                         brk_prev = sbrk(incr);
3261                         if (brk_prev == brk_cur) {
3262                                 /* Success. */
3263                                 break;
3264                         }
3265                 } while (brk_prev != (void *)-1);
3266
3267                 base_chunk = brk_cur;
3268                 base_next_addr = base_chunk;
3269                 base_past_addr = (void *)((uintptr_t)base_chunk + incr);
3270 #ifdef MALLOC_STATS
3271                 base_total += incr;
3272                 stats_chunks.nchunks = 1;
3273                 stats_chunks.curchunks = 1;
3274                 stats_chunks.highchunks = 1;
3275 #endif
3276         }
3277 #else
3278         /*
3279          * The first base chunk will be allocated when needed by base_alloc().
3280          */
3281         base_chunk = NULL;
3282         base_next_addr = NULL;
3283         base_past_addr = NULL;
3284 #endif
3285         base_chunk_nodes = NULL;
3286         malloc_mutex_init(&base_mtx);
3287
3288         if (ncpus > 1) {
3289                 /*
3290                  * For SMP systems, create four times as many arenas as there
3291                  * are CPUs by default.
3292                  */
3293                 opt_narenas_lshift += 2;
3294         }
3295
3296         /* Determine how many arenas to use. */
3297         narenas = ncpus;
3298         if (opt_narenas_lshift > 0) {
3299                 if ((narenas << opt_narenas_lshift) > narenas)
3300                         narenas <<= opt_narenas_lshift;
3301                 /*
3302                  * Make sure not to exceed the limits of what base_malloc()
3303                  * can handle.
3304                  */
3305                 if (narenas * sizeof(arena_t *) > chunk_size)
3306                         narenas = chunk_size / sizeof(arena_t *);
3307         } else if (opt_narenas_lshift < 0) {
3308                 if ((narenas << opt_narenas_lshift) < narenas)
3309                         narenas <<= opt_narenas_lshift;
3310                 /* Make sure there is at least one arena. */
3311                 if (narenas == 0)
3312                         narenas = 1;
3313         }
3314
3315 #ifdef NO_TLS
3316         if (narenas > 1) {
3317                 static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
3318                     23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
3319                     89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
3320                     151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
3321                     223, 227, 229, 233, 239, 241, 251, 257, 263};
3322                 unsigned i, nprimes, parenas;
3323
3324                 /*
3325                  * Pick a prime number of hash arenas that is more than narenas
3326                  * so that direct hashing of pthread_self() pointers tends to
3327                  * spread allocations evenly among the arenas.
3328                  */
3329                 assert((narenas & 1) == 0); /* narenas must be even. */
3330                 nprimes = (sizeof(primes) >> SIZEOF_INT_2POW);
3331                 parenas = primes[nprimes - 1]; /* In case not enough primes. */
3332                 for (i = 1; i < nprimes; i++) {
3333                         if (primes[i] > narenas) {
3334                                 parenas = primes[i];
3335                                 break;
3336                         }
3337                 }
3338                 narenas = parenas;
3339         }
3340 #endif
3341
3342 #ifndef NO_TLS
3343         next_arena = 0;
3344 #endif
3345
3346         /* Allocate and initialize arenas. */
3347         arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
3348         if (arenas == NULL) {
3349                 malloc_mutex_unlock(&init_lock);
3350                 return (true);
3351         }
3352         /*
3353          * Zero the array.  In practice, this should always be pre-zeroed,
3354          * since it was just mmap()ed, but let's be sure.
3355          */
3356         memset(arenas, 0, sizeof(arena_t *) * narenas);
3357
3358         /*
3359          * Initialize one arena here.  The rest are lazily created in
3360          * arena_choose_hard().
3361          */
3362         arenas_extend(0);
3363         if (arenas[0] == NULL) {
3364                 malloc_mutex_unlock(&init_lock);
3365                 return (true);
3366         }
3367
3368         malloc_mutex_init(&arenas_mtx);
3369
3370         malloc_initialized = true;
3371         malloc_mutex_unlock(&init_lock);
3372         return (false);
3373 }
3374
3375 /*
3376  * End general internal functions.
3377  */
3378 /******************************************************************************/
3379 /*
3380  * Begin malloc(3)-compatible functions.
3381  */
3382
3383 void *
3384 malloc(size_t size)
3385 {
3386         void *ret;
3387
3388         if (malloc_init()) {
3389                 ret = NULL;
3390                 goto RETURN;
3391         }
3392
3393         if (size == 0) {
3394                 if (opt_sysv == false)
3395                         ret = (void *)&nil;
3396                 else
3397                         ret = NULL;
3398                 goto RETURN;
3399         }
3400
3401         ret = imalloc(size);
3402
3403 RETURN:
3404         if (ret == NULL) {
3405                 if (opt_xmalloc) {
3406                         malloc_printf("%s: (malloc) Error in malloc(%zu):"
3407                             " out of memory\n", _getprogname(), size);
3408                         abort();
3409                 }
3410                 errno = ENOMEM;
3411         }
3412
3413         UTRACE(0, size, ret);
3414         return (ret);
3415 }
3416
3417 int
3418 posix_memalign(void **memptr, size_t alignment, size_t size)
3419 {
3420         int ret;
3421         void *result;
3422
3423         if (malloc_init())
3424                 result = NULL;
3425         else {
3426                 /* Make sure that alignment is a large enough power of 2. */
3427                 if (((alignment - 1) & alignment) != 0
3428                     || alignment < sizeof(void *)) {
3429                         if (opt_xmalloc) {
3430                                 malloc_printf("%s: (malloc) Error in"
3431                                     " posix_memalign(%zu, %zu):"
3432                                     " invalid alignment\n",
3433                                     _getprogname(), alignment, size);
3434                                 abort();
3435                         }
3436                         result = NULL;
3437                         ret = EINVAL;
3438                         goto RETURN;
3439                 }
3440
3441                 result = ipalloc(alignment, size);
3442         }
3443
3444         if (result == NULL) {
3445                 if (opt_xmalloc) {
3446                         malloc_printf("%s: (malloc) Error in"
3447                             " posix_memalign(%zu, %zu): out of memory\n",
3448                             _getprogname(), alignment, size);
3449                         abort();
3450                 }
3451                 ret = ENOMEM;
3452                 goto RETURN;
3453         }
3454
3455         *memptr = result;
3456         ret = 0;
3457
3458 RETURN:
3459         UTRACE(0, size, result);
3460         return (ret);
3461 }
3462
3463 void *
3464 calloc(size_t num, size_t size)
3465 {
3466         void *ret;
3467         size_t num_size;
3468
3469         if (malloc_init()) {
3470                 ret = NULL;
3471                 goto RETURN;
3472         }
3473
3474         num_size = num * size;
3475         if (num_size == 0) {
3476                 if (opt_sysv == false)
3477                         ret = (void *)&nil;
3478                 else
3479                         ret = NULL;
3480                 goto RETURN;
3481         /*
3482          * Try to avoid division here.  We know that it isn't possible to
3483          * overflow during multiplication if neither operand uses any of the
3484          * most significant half of the bits in a size_t.
3485          */
3486         } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2)))
3487             && (num_size / size != num)) {
3488                 /* size_t overflow. */
3489                 ret = NULL;
3490                 goto RETURN;
3491         }
3492
3493         ret = icalloc(num_size);
3494
3495 RETURN:
3496         if (ret == NULL) {
3497                 if (opt_xmalloc) {
3498                         malloc_printf("%s: (malloc) Error in"
3499                             " calloc(%zu, %zu): out of memory\n",
3500                             _getprogname(), num, size);
3501                         abort();
3502                 }
3503                 errno = ENOMEM;
3504         }
3505
3506         UTRACE(0, num_size, ret);
3507         return (ret);
3508 }
3509
3510 void *
3511 realloc(void *ptr, size_t size)
3512 {
3513         void *ret;
3514
3515         if (size != 0) {
3516                 if (ptr != &nil && ptr != NULL) {
3517                         assert(malloc_initialized);
3518
3519                         ret = iralloc(ptr, size);
3520
3521                         if (ret == NULL) {
3522                                 if (opt_xmalloc) {
3523                                         malloc_printf("%s: (malloc) Error in"
3524                                             " ralloc(%p, %zu): out of memory\n",
3525                                             _getprogname(), ptr, size);
3526                                         abort();
3527                                 }
3528                                 errno = ENOMEM;
3529                         }
3530                 } else {
3531                         if (malloc_init())
3532                                 ret = NULL;
3533                         else
3534                                 ret = imalloc(size);
3535
3536                         if (ret == NULL) {
3537                                 if (opt_xmalloc) {
3538                                         malloc_printf("%s: (malloc) Error in"
3539                                             " ralloc(%p, %zu): out of memory\n",
3540                                             _getprogname(), ptr, size);
3541                                         abort();
3542                                 }
3543                                 errno = ENOMEM;
3544                         }
3545                 }
3546         } else {
3547                 if (ptr != &nil && ptr != NULL)
3548                         idalloc(ptr);
3549
3550                 ret = (void *)&nil;
3551         }
3552
3553         UTRACE(ptr, size, ret);
3554         return (ret);
3555 }
3556
3557 void
3558 free(void *ptr)
3559 {
3560
3561         UTRACE(ptr, 0, 0);
3562         if (ptr != &nil && ptr != NULL) {
3563                 assert(malloc_initialized);
3564
3565                 idalloc(ptr);
3566         }
3567 }
3568
3569 /*
3570  * End malloc(3)-compatible functions.
3571  */
3572 /******************************************************************************/
3573 /*
3574  * Begin non-standard functions.
3575  */
3576
3577 size_t
3578 malloc_usable_size(const void *ptr)
3579 {
3580
3581         assert(ptr != NULL);
3582
3583         if (ptr == &nil)
3584                 return (0);
3585         else
3586                 return (isalloc(ptr));
3587 }
3588
3589 /*
3590  * End non-standard functions.
3591  */
3592 /******************************************************************************/
3593 /*
3594  * Begin library-private functions, used by threading libraries for protection
3595  * of malloc during fork().  These functions are only called if the program is
3596  * running in threaded mode, so there is no need to check whether the program
3597  * is threaded here.
3598  */
3599
3600 void
3601 _malloc_prefork(void)
3602 {
3603         unsigned i;
3604
3605         /* Acquire all mutexes in a safe order. */
3606
3607         malloc_mutex_lock(&arenas_mtx);
3608         for (i = 0; i < narenas; i++) {
3609                 if (arenas[i] != NULL)
3610                         malloc_mutex_lock(&arenas[i]->mtx);
3611         }
3612         malloc_mutex_unlock(&arenas_mtx);
3613
3614         malloc_mutex_lock(&base_mtx);
3615
3616         malloc_mutex_lock(&chunks_mtx);
3617 }
3618
3619 void
3620 _malloc_postfork(void)
3621 {
3622         unsigned i;
3623
3624         /* Release all mutexes, now that fork() has completed. */
3625
3626         malloc_mutex_unlock(&chunks_mtx);
3627
3628         malloc_mutex_unlock(&base_mtx);
3629
3630         malloc_mutex_lock(&arenas_mtx);
3631         for (i = 0; i < narenas; i++) {
3632                 if (arenas[i] != NULL)
3633                         malloc_mutex_unlock(&arenas[i]->mtx);
3634         }
3635         malloc_mutex_unlock(&arenas_mtx);
3636 }
3637
3638 /*
3639  * End library-private functions.
3640  */
3641 /******************************************************************************/