lib/libc/stdlib/malloc.c

   1 /*-
   2  * Copyright (C) 2006 Jason Evans <jasone@FreeBSD.org>.
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice(s), this list of conditions and the following disclaimer as
  10  *    the first lines of this file unmodified other than the possible
  11  *    addition of one or more copyright notices.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice(s), this list of conditions and the following disclaimer in
  14  *    the documentation and/or other materials provided with the
  15  *    distribution.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  20  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
  21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  25  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  26  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  27  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28  *
  29  *******************************************************************************
  30  *
  31  * This allocator implementation is designed to provide scalable performance
  32  * for multi-threaded programs on multi-processor systems.  The following
  33  * features are included for this purpose:
  34  *
  35  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
  36  *     contention and cache sloshing.
  37  *
  38  *   + Cache line sharing between arenas is avoided for internal data
  39  *     structures.
  40  *
  41  *   + Memory is managed in chunks and runs (chunks can be split into runs using
  42  *     a binary buddy scheme), rather than as individual pages.  This provides
  43  *     a constant-time mechanism for associating allocations with particular
  44  *     arenas.
  45  *
  46  * Allocation requests are rounded up to the nearest size class, and no record
  47  * of the original request size is maintained.  Allocations are broken into
  48  * categories according to size class.  Assuming runtime defaults, 4 kB pages
  49  * and a 16 byte quantum, the size classes in each category are as follows:
  50  *
  51  *   |====================================|
  52  *   | Category | Subcategory    |   Size |
  53  *   |====================================|
  54  *   | Small    | Tiny           |      2 |
  55  *   |          |                |      4 |
  56  *   |          |                |      8 |
  57  *   |          |----------------+--------|
  58  *   |          | Quantum-spaced |     16 |
  59  *   |          |                |     32 |
  60  *   |          |                |     48 |
  61  *   |          |                |    ... |
  62  *   |          |                |    480 |
  63  *   |          |                |    496 |
  64  *   |          |                |    512 |
  65  *   |          |----------------+--------|
  66  *   |          | Sub-page       |   1 kB |
  67  *   |          |                |   2 kB |
  68  *   |====================================|
  69  *   | Medium                    |   4 kB |
  70  *   |                           |   8 kB |
  71  *   |                           |  16 kB |
  72  *   |                           |    ... |
  73  *   |                           | 256 kB |
  74  *   |                           | 512 kB |
  75  *   |                           |   1 MB |
  76  *   |====================================|
  77  *   | Large                     |   2 MB |
  78  *   |                           |   4 MB |
  79  *   |                           |   6 MB |
  80  *   |                           |    ... |
  81  *   |====================================|
  82  *
  83  * A different mechanism is used for each category:
  84  *
  85  *   Small : Each size class is segregated into its own set of runs.  Each run
  86  *           maintains a bitmap of which regions are free/allocated.
  87  *
  88  *   Medium : Each allocation is backed by a dedicated run.  Metadata are stored
  89  *            in the associated arena chunk header maps.
  90  *
  91  *   Large : Each allocation is backed by a dedicated contiguous set of chunks.
  92  *           Metadata are stored in a separate red-black tree.
  93  *
  94  *******************************************************************************
  95  */
  96
  97 /*
  98  *******************************************************************************
  99  *
 100  * Ring macros.
 101  *
 102  *******************************************************************************
 103  */
 104
 105 /* Ring definitions. */
 106 #define qr(a_type) struct {                                             \
 107         a_type *qre_next;                                               \
 108         a_type *qre_prev;                                               \
 109 }
 110
 111 #define qr_initializer {NULL, NULL}
 112
 113 /* Ring functions. */
 114 #define qr_new(a_qr, a_field) do {                                      \
 115         (a_qr)->a_field.qre_next = (a_qr);                              \
 116         (a_qr)->a_field.qre_prev = (a_qr);                              \
 117 } while (0)
 118
 119 #define qr_next(a_qr, a_field) ((a_qr)->a_field.qre_next)
 120
 121 #define qr_prev(a_qr, a_field) ((a_qr)->a_field.qre_prev)
 122
 123 #define qr_before_insert(a_qrelm, a_qr, a_field) do {                   \
 124         (a_qr)->a_field.qre_prev = (a_qrelm)->a_field.qre_prev;         \
 125         (a_qr)->a_field.qre_next = (a_qrelm);                           \
 126         (a_qr)->a_field.qre_prev->a_field.qre_next = (a_qr);            \
 127         (a_qrelm)->a_field.qre_prev = (a_qr);                           \
 128 } while (0)
 129
 130 #define qr_after_insert(a_qrelm, a_qr, a_field) do {                    \
 131         (a_qr)->a_field.qre_next = (a_qrelm)->a_field.qre_next;         \
 132         (a_qr)->a_field.qre_prev = (a_qrelm);                           \
 133         (a_qr)->a_field.qre_next->a_field.qre_prev = (a_qr);            \
 134         (a_qrelm)->a_field.qre_next = (a_qr);                           \
 135 } while (0)
 136
 137 #define qr_meld(a_qr_a, a_qr_b, a_type, a_field) do {                   \
 138         a_type *t;                                                      \
 139         (a_qr_a)->a_field.qre_prev->a_field.qre_next = (a_qr_b);        \
 140         (a_qr_b)->a_field.qre_prev->a_field.qre_next = (a_qr_a);        \
 141         t = (a_qr_a)->a_field.qre_prev;                                 \
 142         (a_qr_a)->a_field.qre_prev = (a_qr_b)->a_field.qre_prev;        \
 143         (a_qr_b)->a_field.qre_prev = t;                                 \
 144 } while (0)
 145
 146 /* qr_meld() and qr_split() are functionally equivalent, so there's no need to
 147  * have two copies of the code. */
 148 #define qr_split(a_qr_a, a_qr_b, a_type, a_field)                       \
 149         qr_meld((a_qr_a), (a_qr_b), a_type, a_field)
 150
 151 #define qr_remove(a_qr, a_field) do {                                   \
 152         (a_qr)->a_field.qre_prev->a_field.qre_next                      \
 153             = (a_qr)->a_field.qre_next;                                 \
 154         (a_qr)->a_field.qre_next->a_field.qre_prev                      \
 155             = (a_qr)->a_field.qre_prev;                                 \
 156         (a_qr)->a_field.qre_next = (a_qr);                              \
 157         (a_qr)->a_field.qre_prev = (a_qr);                              \
 158 } while (0)
 159
 160 #define qr_foreach(var, a_qr, a_field)                                  \
 161         for ((var) = (a_qr);                                            \
 162             (var) != NULL;                                              \
 163             (var) = (((var)->a_field.qre_next != (a_qr))                \
 164             ? (var)->a_field.qre_next : NULL))
 165
 166 #define qr_reverse_foreach(var, a_qr, a_field)                          \
 167         for ((var) = ((a_qr) != NULL) ? qr_prev(a_qr, a_field) : NULL;  \
 168             (var) != NULL;                                              \
 169             (var) = (((var) != (a_qr))                                  \
 170             ? (var)->a_field.qre_prev : NULL))
 171
 172 /******************************************************************************/
 173
 174 /*
 175  * In order to disable various extra features that may have negative
 176  * performance impacts, (assertions, expanded statistics, redzones), define
 177  * NO_MALLOC_EXTRAS.
 178  */
 179 /* #define NO_MALLOC_EXTRAS */
 180
 181 #ifndef NO_MALLOC_EXTRAS
 182 #  define MALLOC_DEBUG
 183 #endif
 184
 185 #include <sys/cdefs.h>
 186 __FBSDID("$FreeBSD$");
 187
 188 #include "libc_private.h"
 189 #ifdef MALLOC_DEBUG
 190 #  define _LOCK_DEBUG
 191 #endif
 192 #include "spinlock.h"
 193 #include "namespace.h"
 194 #include <sys/mman.h>
 195 #include <sys/param.h>
 196 #include <sys/stddef.h>
 197 #include <sys/time.h>
 198 #include <sys/types.h>
 199 #include <sys/sysctl.h>
 200 #include <sys/tree.h>
 201 #include <sys/uio.h>
 202 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
 203
 204 #include <machine/atomic.h>
 205 #include <machine/cpufunc.h>
 206 #include <machine/vmparam.h>
 207
 208 #include <errno.h>
 209 #include <limits.h>
 210 #include <pthread.h>
 211 #include <sched.h>
 212 #include <stdarg.h>
 213 #include <stdbool.h>
 214 #include <stdio.h>
 215 #include <stdlib.h>
 216 #include <string.h>
 217 #include <strings.h>
 218 #include <unistd.h>
 219
 220 #include "un-namespace.h"
 221
 222 /*
 223  * Calculate statistics that can be used to get an idea of how well caching is
 224  * working.
 225  */
 226 #ifndef NO_MALLOC_EXTRAS
 227 #  define MALLOC_STATS
 228 #endif
 229
 230 #ifndef MALLOC_DEBUG
 231 #  ifndef NDEBUG
 232 #    define NDEBUG
 233 #  endif
 234 #endif
 235 #include <assert.h>
 236
 237 #ifdef MALLOC_DEBUG
 238    /* Disable inlining to make debugging easier. */
 239 #  define inline
 240 #endif
 241
 242 /* Size of stack-allocated buffer passed to strerror_r(). */
 243 #define STRERROR_BUF 64
 244
 245 /* Minimum alignment of allocations is 2^QUANTUM_2POW_MIN bytes. */
 246 #ifdef __i386__
 247 #  define QUANTUM_2POW_MIN      4
 248 #  define SIZEOF_PTR            4
 249 #  define USE_BRK
 250 #endif
 251 #ifdef __ia64__
 252 #  define QUANTUM_2POW_MIN      4
 253 #  define SIZEOF_PTR            8
 254 #  define NO_TLS
 255 #endif
 256 #ifdef __alpha__
 257 #  define QUANTUM_2POW_MIN      4
 258 #  define SIZEOF_PTR            8
 259 #  define NO_TLS
 260 #endif
 261 #ifdef __sparc64__
 262 #  define QUANTUM_2POW_MIN      4
 263 #  define SIZEOF_PTR            8
 264 #  define NO_TLS
 265 #endif
 266 #ifdef __amd64__
 267 #  define QUANTUM_2POW_MIN      4
 268 #  define SIZEOF_PTR            8
 269 #endif
 270 #ifdef __arm__
 271 #  define QUANTUM_2POW_MIN      3
 272 #  define SIZEOF_PTR            4
 273 #  define USE_BRK
 274 #  define NO_TLS
 275 #endif
 276 #ifdef __powerpc__
 277 #  define QUANTUM_2POW_MIN      4
 278 #  define SIZEOF_PTR            4
 279 #  define USE_BRK
 280 #endif
 281
 282 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
 283 #if (!defined(PIC) && !defined(NO_TLS))
 284 #  define NO_TLS
 285 #endif
 286
 287 /*
 288  * Size and alignment of memory chunks that are allocated by the OS's virtual
 289  * memory system.
 290  *
 291  * chunksize limits:
 292  *
 293  *   2^(pagesize_2pow - 1 + RUN_MIN_REGS_2POW) <= chunk_size <= 2^28
 294  */
 295 #define CHUNK_2POW_DEFAULT      21
 296 #define CHUNK_2POW_MAX          28
 297
 298 /*
 299  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing,
 300  * so over-estimates are okay (up to a point), but under-estimates will
 301  * negatively affect performance.
 302  */
 303 #define CACHELINE_2POW 6
 304 #define CACHELINE ((size_t)(1 << CACHELINE_2POW))
 305
 306 /*
 307  * Maximum size class that is a multiple of the quantum, but not (necessarily)
 308  * a power of 2.  Above this size, allocations are rounded up to the nearest
 309  * power of 2.
 310  */
 311 #define SMALL_MAX_2POW_DEFAULT 9
 312 #define SMALL_MAX_DEFAULT (1 << SMALL_MAX_2POW_DEFAULT)
 313
 314 /*
 315  * Minimum number of regions that must fit into a run that serves quantum-size
 316  * bin allocations.
 317  *
 318  * Note that if this is set too low, space will be wasted if there are size
 319  * classes that are small enough that RUN_MIN_REGS regions don't fill a page.
 320  * If this is set too high, then the overhead of searching through the bitmap
 321  * that tracks region usage will become excessive.
 322  */
 323 #define RUN_MIN_REGS_2POW 10
 324 #define RUN_MIN_REGS (1 << RUN_MIN_REGS_2POW)
 325
 326 /*
 327  * Maximum number of pages for a run that is used for bin allocations.
 328  *
 329  * Note that if this is set too low, then fragmentation for the largest bin
 330  * size classes will be high.  If this is set too high, then even small
 331  * programs will often have to allocate more than two chunks early on.
 332  */
 333 #define RUN_MAX_PAGES_2POW 4
 334 #define RUN_MAX_PAGES (1 << RUN_MAX_PAGES_2POW)
 335
 336 /******************************************************************************/
 337
 338 /*
 339  * Mutexes based on spinlocks.  We can't use normal pthread mutexes, because
 340  * they require malloc()ed memory.
 341  */
 342 typedef struct {
 343         spinlock_t      lock;
 344 } malloc_mutex_t;
 345
 346 static bool malloc_initialized = false;
 347
 348 /******************************************************************************/
 349 /*
 350  * Statistics data structures.
 351  */
 352
 353 #ifdef MALLOC_STATS
 354
 355 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
 356 struct malloc_bin_stats_s {
 357         /*
 358          * Number of allocation requests that corresponded to the size of this
 359          * bin.
 360          */
 361         uint64_t        nrequests;
 362
 363         /* Total number of runs created for this bin's size class. */
 364         uint64_t        nruns;
 365
 366         /*
 367          * Total number of run promotions/demotions for this bin's size class.
 368          */
 369         uint64_t        npromote;
 370         uint64_t        ndemote;
 371
 372         /* High-water mark for this bin. */
 373         unsigned long   highruns;
 374
 375         /* Current number of runs in this bin. */
 376         unsigned long   curruns;
 377 };
 378
 379 typedef struct arena_stats_s arena_stats_t;
 380 struct arena_stats_s {
 381         /* Total byte count of allocated memory, not including overhead. */
 382         size_t          allocated;
 383
 384         /* Number of times each function was called. */
 385         uint64_t        nmalloc;
 386         uint64_t        npalloc;
 387         uint64_t        ncalloc;
 388         uint64_t        ndalloc;
 389         uint64_t        nralloc;
 390         uint64_t        nmadvise;
 391 };
 392
 393 typedef struct chunk_stats_s chunk_stats_t;
 394 struct chunk_stats_s {
 395         /* Number of chunks that were allocated. */
 396         uint64_t        nchunks;
 397
 398         /* High-water mark for number of chunks allocated. */
 399         unsigned long   highchunks;
 400
 401         /*
 402          * Current number of chunks allocated.  This value isn't maintained for
 403          * any other purpose, so keep track of it in order to be able to set
 404          * highchunks.
 405          */
 406         unsigned long   curchunks;
 407 };
 408
 409 #endif /* #ifdef MALLOC_STATS */
 410
 411 /******************************************************************************/
 412 /*
 413  * Chunk data structures.
 414  */
 415
 416 /* Tree of chunks. */
 417 typedef struct chunk_node_s chunk_node_t;
 418 struct chunk_node_s {
 419         /* Linkage for the chunk tree. */
 420         RB_ENTRY(chunk_node_s) link;
 421
 422         /*
 423          * Pointer to the chunk that this tree node is responsible for.  In some
 424          * (but certainly not all) cases, this data structure is placed at the
 425          * beginning of the corresponding chunk, so this field may point to this
 426          * node.
 427          */
 428         void    *chunk;
 429
 430         /* Total chunk size. */
 431         size_t  size;
 432 };
 433 typedef struct chunk_tree_s chunk_tree_t;
 434 RB_HEAD(chunk_tree_s, chunk_node_s);
 435
 436 /******************************************************************************/
 437 /*
 438  * Arena data structures.
 439  */
 440
 441 typedef struct arena_s arena_t;
 442 typedef struct arena_bin_s arena_bin_t;
 443
 444 typedef struct arena_chunk_map_s arena_chunk_map_t;
 445 struct arena_chunk_map_s {
 446         bool            free:1;
 447         bool            large:1;
 448         unsigned        npages:15; /* Limiting factor for CHUNK_2POW_MAX. */
 449         unsigned        pos:15;
 450 };
 451
 452 /* Arena chunk header. */
 453 typedef struct arena_chunk_s arena_chunk_t;
 454 struct arena_chunk_s {
 455         /* Arena that owns the chunk. */
 456         arena_t *arena;
 457
 458         /* Linkage for the arena's chunk tree. */
 459         RB_ENTRY(arena_chunk_s) link;
 460
 461         /*
 462          * Number of pages in use.  This is maintained in order to make
 463          * detection of empty chunks fast.
 464          */
 465         uint32_t pages_used;
 466
 467         /*
 468          * Array of counters that keeps track of how many free runs of each
 469          * size are available in this chunk.  This table is sized at compile
 470          * time, which is wasteful.  However, due to unrelated rounding, this
 471          * doesn't actually waste any otherwise useful space.
 472          *
 473          *   index == 2^n pages
 474          *
 475          *   index | npages
 476          *   ------+-------
 477          *       0 |      1
 478          *       1 |      2
 479          *       2 |      4
 480          *       3 |      8
 481          *         :
 482          */
 483         uint32_t nfree_runs[CHUNK_2POW_MAX/* - PAGE_SHIFT */];
 484
 485         /* Map of pages within chunk that keeps track of free/large/small. */
 486         arena_chunk_map_t map[1]; /* Dynamically sized. */
 487 };
 488 typedef struct arena_chunk_tree_s arena_chunk_tree_t;
 489 RB_HEAD(arena_chunk_tree_s, arena_chunk_s);
 490
 491 typedef struct arena_run_s arena_run_t;
 492 struct arena_run_s {
 493         /* Linkage for run rings. */
 494         qr(arena_run_t) link;
 495
 496 #ifdef MALLOC_DEBUG
 497         uint32_t        magic;
 498 #  define ARENA_RUN_MAGIC 0x384adf93
 499 #endif
 500
 501         /* Bin this run is associated with. */
 502         arena_bin_t     *bin;
 503
 504         /* Bitmask of in-use regions (0: in use, 1: free). */
 505 #define REGS_MASK_NELMS                                                 \
 506         ((1 << (RUN_MIN_REGS_2POW + 1)) / (sizeof(unsigned) << 3))
 507         unsigned        regs_mask[REGS_MASK_NELMS];
 508
 509         /* Index of first element that might have a free region. */
 510         unsigned        regs_minelm:(RUN_MIN_REGS_2POW + 2);
 511
 512         /* Number of free regions in run. */
 513         unsigned        nfree:(RUN_MIN_REGS_2POW + 2);
 514
 515         /*
 516          * Current quartile for this run, one of: {RUN_QINIT, RUN_Q0, RUN_25,
 517          * RUN_Q50, RUN_Q75, RUN_Q100}.
 518          */
 519 #define RUN_QINIT       0
 520 #define RUN_Q0          1
 521 #define RUN_Q25         2
 522 #define RUN_Q50         3
 523 #define RUN_Q75         4
 524 #define RUN_Q100        5
 525         unsigned        quartile:3;
 526
 527         /*
 528          * Limits on the number of free regions for the fullness quartile this
 529          * run is currently in.  If nfree goes outside these limits, the run
 530          * is moved to a different fullness quartile.
 531          */
 532         unsigned        free_max:(RUN_MIN_REGS_2POW + 2);
 533         unsigned        free_min:(RUN_MIN_REGS_2POW + 2);
 534 };
 535
 536 /* Used for run ring headers, where the run isn't actually used. */
 537 typedef struct arena_run_link_s arena_run_link_t;
 538 struct arena_run_link_s {
 539         /* Linkage for run rings. */
 540         qr(arena_run_t) link;
 541 };
 542
 543 struct arena_bin_s {
 544         /*
 545          * Current run being used to service allocations of this bin's size
 546          * class.
 547          */
 548         arena_run_t     *runcur;
 549
 550         /*
 551          * Links into rings of runs, of various fullnesses (names indicate
 552          * approximate lower bounds).  A new run conceptually starts off in
 553          * runsinit, and it isn't inserted into the runs0 ring until it
 554          * reaches 25% full (hysteresis mechanism).  For the run to be moved
 555          * again, it must become either empty or 50% full.  Thus, each ring
 556          * contains runs that are within 50% above the advertised fullness for
 557          * the ring.  This provides a low-overhead mechanism for segregating
 558          * runs into approximate fullness classes.
 559          *
 560          * Conceptually, there is a runs100 that contains completely full runs.
 561          * Since we don't need to search for these runs though, no runs100 ring
 562          * is actually maintained.
 563          *
 564          * These rings are useful when looking for an existing run to use when
 565          * runcur is no longer usable.  We look for usable runs in the
 566          * following order:
 567          *
 568          *   1) runs50
 569          *   2) runs25
 570          *   3) runs0
 571          *   4) runs75
 572          *
 573          * runs75 isn't a good place to look, because it contains runs that may
 574          * be nearly completely full.  Still, we look there as a last resort in
 575          * order to avoid allocating a new run if at all possible.
 576          */
 577         /* arena_run_link_t     runsinit;   0% <= fullness <   25% */
 578         arena_run_link_t        runs0;  /*  0% <  fullness <   50% */
 579         arena_run_link_t        runs25; /* 25% <  fullness <   75% */
 580         arena_run_link_t        runs50; /* 50% <  fullness <  100% */
 581         arena_run_link_t        runs75; /* 75% <  fullness <  100% */
 582         /* arena_run_link_t     runs100;          fullness == 100% */
 583
 584         /* Size of regions in a run for this bin's size class. */
 585         size_t          reg_size;
 586
 587         /* Total size of a run for this bin's size class. */
 588         size_t          run_size;
 589
 590         /* Total number of regions in a run for this bin's size class. */
 591         uint32_t        nregs;
 592
 593         /* Offset of first region in a run for this bin's size class. */
 594         uint32_t        reg0_offset;
 595
 596 #ifdef MALLOC_STATS
 597         /* Bin statistics. */
 598         malloc_bin_stats_t stats;
 599 #endif
 600 };
 601
 602 struct arena_s {
 603 #ifdef MALLOC_DEBUG
 604         uint32_t                magic;
 605 #  define ARENA_MAGIC 0x947d3d24
 606 #endif
 607
 608         /* All operations on this arena require that mtx be locked. */
 609         malloc_mutex_t          mtx;
 610
 611 #ifdef MALLOC_STATS
 612         arena_stats_t           stats;
 613 #endif
 614
 615         /*
 616          * Tree of chunks this arena manages.
 617          */
 618         arena_chunk_tree_t      chunks;
 619
 620         /*
 621          * bins is used to store rings of free regions of the following sizes,
 622          * assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
 623          *
 624          *   bins[i] | size |
 625          *   --------+------+
 626          *        0  |    2 |
 627          *        1  |    4 |
 628          *        2  |    8 |
 629          *   --------+------+
 630          *        3  |   16 |
 631          *        4  |   32 |
 632          *        5  |   48 |
 633          *        6  |   64 |
 634          *           :      :
 635          *           :      :
 636          *       33  |  496 |
 637          *       34  |  512 |
 638          *   --------+------+
 639          *       35  | 1024 |
 640          *       36  | 2048 |
 641          *   --------+------+
 642          */
 643         arena_bin_t             bins[1]; /* Dynamically sized. */
 644 };
 645
 646 /******************************************************************************/
 647 /*
 648  * Data.
 649  */
 650
 651 /* Used as a special "nil" return value for malloc(0). */
 652 static int              nil;
 653
 654 /* Number of CPUs. */
 655 static unsigned         ncpus;
 656
 657 /* VM page size. */
 658 static unsigned         pagesize;
 659 static unsigned         pagesize_2pow;
 660
 661 /* Various bin-related settings. */
 662 static size_t           bin_maxclass; /* Max size class for bins. */
 663 static unsigned         ntbins; /* Number of (2^n)-spaced tiny bins. */
 664 static unsigned         nqbins; /* Number of quantum-spaced bins. */
 665 static unsigned         nsbins; /* Number of (2^n)-spaced sub-page bins. */
 666 static size_t           small_min;
 667 static size_t           small_max;
 668 static unsigned         tiny_min_2pow;
 669
 670 /* Various quantum-related settings. */
 671 static size_t           quantum;
 672 static size_t           quantum_mask; /* (quantum - 1). */
 673
 674 /* Various chunk-related settings. */
 675 static size_t           chunk_size;
 676 static size_t           chunk_size_mask; /* (chunk_size - 1). */
 677 static size_t           arena_maxclass; /* Max size class for arenas. */
 678 static unsigned         arena_chunk_maplen;
 679
 680 /********/
 681 /*
 682  * Chunks.
 683  */
 684
 685 /* Protects chunk-related data structures. */
 686 static malloc_mutex_t   chunks_mtx;
 687
 688 /* Tree of chunks that are stand-alone huge allocations. */
 689 static chunk_tree_t     huge;
 690
 691 #ifdef USE_BRK
 692 /*
 693  * Try to use brk for chunk-size allocations, due to address space constraints.
 694  */
 695 /* Result of first sbrk(0) call. */
 696 static void             *brk_base;
 697 /* Current end of brk, or ((void *)-1) if brk is exhausted. */
 698 static void             *brk_prev;
 699 /* Upper limit on brk addresses (may be an over-estimate). */
 700 static void             *brk_max;
 701 #endif
 702
 703 #ifdef MALLOC_STATS
 704 /*
 705  * Byte counters for allocated/total space used by the chunks in the huge
 706  * allocations tree.
 707  */
 708 static uint64_t         huge_nmalloc;
 709 static uint64_t         huge_ndalloc;
 710 static size_t           huge_allocated;
 711 #endif
 712
 713 /*
 714  * Tree of chunks that were previously allocated.  This is used when allocating
 715  * chunks, in an attempt to re-use address space.
 716  */
 717 static chunk_tree_t     old_chunks;
 718
 719 /****************************/
 720 /*
 721  * base (internal allocation).
 722  */
 723
 724 /*
 725  * Current chunk that is being used for internal memory allocations.  This
 726  * chunk is carved up in cacheline-size quanta, so that there is no chance of
 727  * false cache line sharing.
 728  */
 729 static void             *base_chunk;
 730 static void             *base_next_addr;
 731 static void             *base_past_addr; /* Addr immediately past base_chunk. */
 732 static chunk_node_t     *base_chunk_nodes; /* LIFO cache of chunk nodes. */
 733 static malloc_mutex_t   base_mtx;
 734 #ifdef MALLOC_STATS
 735 static uint64_t         base_total;
 736 #endif
 737
 738 /********/
 739 /*
 740  * Arenas.
 741  */
 742
 743 /*
 744  * Arenas that are used to service external requests.  Not all elements of the
 745  * arenas array are necessarily used; arenas are created lazily as needed.
 746  */
 747 static arena_t          **arenas;
 748 static unsigned         narenas;
 749 #ifndef NO_TLS
 750 static unsigned         next_arena;
 751 #endif
 752 static malloc_mutex_t   arenas_mtx; /* Protects arenas initialization. */
 753
 754 #ifndef NO_TLS
 755 /*
 756  * Map of pthread_self() --> arenas[???], used for selecting an arena to use
 757  * for allocations.
 758  */
 759 static __thread arena_t *arenas_map;
 760 #endif
 761
 762 #ifdef MALLOC_STATS
 763 /* Chunk statistics. */
 764 static chunk_stats_t    stats_chunks;
 765 #endif
 766
 767 /*******************************/
 768 /*
 769  * Runtime configuration options.
 770  */
 771 const char      *_malloc_options;
 772
 773 #ifndef NO_MALLOC_EXTRAS
 774 static bool     opt_abort = true;
 775 static bool     opt_junk = true;
 776 #else
 777 static bool     opt_abort = false;
 778 static bool     opt_junk = false;
 779 #endif
 780 static bool     opt_hint = false;
 781 static bool     opt_print_stats = false;
 782 static size_t   opt_quantum_2pow = QUANTUM_2POW_MIN;
 783 static size_t   opt_small_max_2pow = SMALL_MAX_2POW_DEFAULT;
 784 static size_t   opt_chunk_2pow = CHUNK_2POW_DEFAULT;
 785 static bool     opt_utrace = false;
 786 static bool     opt_sysv = false;
 787 static bool     opt_xmalloc = false;
 788 static bool     opt_zero = false;
 789 static int32_t  opt_narenas_lshift = 0;
 790
 791 typedef struct {
 792         void    *p;
 793         size_t  s;
 794         void    *r;
 795 } malloc_utrace_t;
 796
 797 #define UTRACE(a, b, c)                                                 \
 798         if (opt_utrace) {                                               \
 799                 malloc_utrace_t ut = {a, b, c};                         \
 800                 utrace(&ut, sizeof(ut));                                \
 801         }
 802
 803 /******************************************************************************/
 804 /*
 805  * Begin function prototypes for non-inline static functions.
 806  */
 807
 808 static void     malloc_mutex_init(malloc_mutex_t *a_mutex);
 809 static void     wrtmessage(const char *p1, const char *p2, const char *p3,
 810                 const char *p4);
 811 static void     malloc_printf(const char *format, ...);
 812 static void     *base_alloc(size_t size);
 813 static chunk_node_t *base_chunk_node_alloc(void);
 814 static void     base_chunk_node_dealloc(chunk_node_t *node);
 815 #ifdef MALLOC_STATS
 816 static void     stats_print(arena_t *arena);
 817 #endif
 818 static void     *pages_map(void *addr, size_t size);
 819 static void     pages_unmap(void *addr, size_t size);
 820 static void     *chunk_alloc(size_t size);
 821 static void     chunk_dealloc(void *chunk, size_t size);
 822 static void     arena_run_split(arena_t *arena, arena_run_t *run, bool large,
 823     size_t size);
 824 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
 825 static void     arena_chunk_dealloc(arena_chunk_t *chunk);
 826 static void     arena_bin_run_refile(arena_t *arena, arena_bin_t *bin,
 827     arena_run_t *run, size_t size, bool promote);
 828 static arena_run_t *arena_run_alloc(arena_t *arena, bool large, size_t size);
 829 static void     arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size);
 830 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin,
 831     size_t size);
 832 static void *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin,
 833     size_t size);
 834 static void     *arena_malloc(arena_t *arena, size_t size);
 835 static size_t   arena_salloc(arena_t *arena, const void *ptr);
 836 static void     *arena_ralloc(arena_t *arena, void *ptr, size_t size,
 837     size_t oldsize);
 838 static void     arena_dalloc(arena_t *arena, void *ptr);
 839 static bool     arena_new(arena_t *arena);
 840 static arena_t  *arenas_extend(unsigned ind);
 841 #ifndef NO_TLS
 842 static arena_t  *choose_arena_hard(void);
 843 #endif
 844 static void     *huge_malloc(size_t size);
 845 static void     *huge_ralloc(void *ptr, size_t size, size_t oldsize);
 846 static void     huge_dalloc(void *ptr);
 847 static void     *imalloc(arena_t *arena, size_t size);
 848 static void     *ipalloc(arena_t *arena, size_t alignment, size_t size);
 849 static void     *icalloc(arena_t *arena, size_t size);
 850 static size_t   isalloc(const void *ptr);
 851 static void     *iralloc(arena_t *arena, void *ptr, size_t size);
 852 static void     idalloc(void *ptr);
 853 static void     malloc_print_stats(void);
 854 static bool     malloc_init_hard(void);
 855
 856 /*
 857  * End function prototypes.
 858  */
 859 /******************************************************************************/
 860 /*
 861  * Begin mutex.
 862  */
 863
 864 static void
 865 malloc_mutex_init(malloc_mutex_t *a_mutex)
 866 {
 867         static const spinlock_t lock = _SPINLOCK_INITIALIZER;
 868
 869         a_mutex->lock = lock;
 870 }
 871
 872 static inline void
 873 malloc_mutex_lock(malloc_mutex_t *a_mutex)
 874 {
 875
 876         if (__isthreaded)
 877                 _SPINLOCK(&a_mutex->lock);
 878 }
 879
 880 static inline void
 881 malloc_mutex_unlock(malloc_mutex_t *a_mutex)
 882 {
 883
 884         if (__isthreaded)
 885                 _SPINUNLOCK(&a_mutex->lock);
 886 }
 887
 888 /*
 889  * End mutex.
 890  */
 891 /******************************************************************************/
 892 /*
 893  * Begin Utility functions/macros.
 894  */
 895
 896 /* Return the chunk address for allocation address a. */
 897 #define CHUNK_ADDR2BASE(a)                                              \
 898         ((void *)((uintptr_t)(a) & ~chunk_size_mask))
 899
 900 /* Return the chunk offset of address a. */
 901 #define CHUNK_ADDR2OFFSET(a)                                            \
 902         ((size_t)((uintptr_t)(a) & chunk_size_mask))
 903
 904 /* Return the smallest chunk multiple that is >= s. */
 905 #define CHUNK_CEILING(s)                                                \
 906         (((s) + chunk_size_mask) & ~chunk_size_mask)
 907
 908 /* Return the smallest cacheline multiple that is >= s. */
 909 #define CACHELINE_CEILING(s)                                            \
 910         (((s) + (CACHELINE - 1)) & ~(CACHELINE - 1))
 911
 912 /* Return the smallest quantum multiple that is >= a. */
 913 #define QUANTUM_CEILING(a)                                              \
 914         (((a) + quantum_mask) & ~quantum_mask)
 915
 916 /* Compute the smallest power of 2 that is >= x. */
 917 static inline size_t
 918 pow2_ceil(size_t x)
 919 {
 920         x--;
 921         x |= x >> 1;
 922         x |= x >> 2;
 923         x |= x >> 4;
 924         x |= x >> 8;
 925         x |= x >> 16;
 926 #if (SIZEOF_PTR == 8)
 927         x |= x >> 32;
 928 #endif
 929         x++;
 930         return (x);
 931 }
 932
 933 static void
 934 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
 935 {
 936
 937         _write(STDERR_FILENO, p1, strlen(p1));
 938         _write(STDERR_FILENO, p2, strlen(p2));
 939         _write(STDERR_FILENO, p3, strlen(p3));
 940         _write(STDERR_FILENO, p4, strlen(p4));
 941 }
 942
 943 void    (*_malloc_message)(const char *p1, const char *p2, const char *p3,
 944             const char *p4) = wrtmessage;
 945
 946 /*
 947  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
 948  */
 949 static void
 950 malloc_printf(const char *format, ...)
 951 {
 952         char buf[4096];
 953         va_list ap;
 954
 955         va_start(ap, format);
 956         vsnprintf(buf, sizeof(buf), format, ap);
 957         va_end(ap);
 958         _malloc_message(buf, "", "", "");
 959 }
 960
 961 /******************************************************************************/
 962
 963 static void *
 964 base_alloc(size_t size)
 965 {
 966         void *ret;
 967         size_t csize;
 968
 969         /* Round size up to nearest multiple of the cacheline size. */
 970         csize = CACHELINE_CEILING(size);
 971
 972         malloc_mutex_lock(&base_mtx);
 973
 974         /* Make sure there's enough space for the allocation. */
 975         if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
 976                 void *tchunk;
 977
 978                 assert(csize <= chunk_size);
 979
 980                 tchunk = chunk_alloc(chunk_size);
 981                 if (tchunk == NULL) {
 982                         ret = NULL;
 983                         goto RETURN;
 984                 }
 985                 base_chunk = tchunk;
 986                 base_next_addr = (void *)base_chunk;
 987                 base_past_addr = (void *)((uintptr_t)base_chunk + chunk_size);
 988 #ifdef MALLOC_STATS
 989                 base_total += chunk_size;
 990 #endif
 991         }
 992
 993         /* Allocate. */
 994         ret = base_next_addr;
 995         base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
 996
 997 RETURN:
 998         malloc_mutex_unlock(&base_mtx);
 999         return (ret);
1000 }
1001
1002 static chunk_node_t *
1003 base_chunk_node_alloc(void)
1004 {
1005         chunk_node_t *ret;
1006
1007         malloc_mutex_lock(&base_mtx);
1008         if (base_chunk_nodes != NULL) {
1009                 ret = base_chunk_nodes;
1010                 base_chunk_nodes = *(chunk_node_t **)ret;
1011                 malloc_mutex_unlock(&base_mtx);
1012         } else {
1013                 malloc_mutex_unlock(&base_mtx);
1014                 ret = (chunk_node_t *)base_alloc(sizeof(chunk_node_t));
1015         }
1016
1017         return (ret);
1018 }
1019
1020 static void
1021 base_chunk_node_dealloc(chunk_node_t *node)
1022 {
1023
1024         malloc_mutex_lock(&base_mtx);
1025         *(chunk_node_t **)node = base_chunk_nodes;
1026         base_chunk_nodes = node;
1027         malloc_mutex_unlock(&base_mtx);
1028 }
1029
1030 /******************************************************************************/
1031
1032 #ifdef MALLOC_STATS
1033 static void
1034 stats_print(arena_t *arena)
1035 {
1036         unsigned i;
1037         int gap_start;
1038
1039         malloc_printf("allocated: %zu\n", arena->stats.allocated);
1040
1041         malloc_printf("calls:\n");
1042         malloc_printf(" %12s %12s %12s %12s %12s %12s\n", "nmalloc", "npalloc",
1043             "ncalloc", "ndalloc", "nralloc", "nmadvise");
1044         malloc_printf(" %12llu %12llu %12llu %12llu %12llu %12llu\n",
1045             arena->stats.nmalloc, arena->stats.npalloc, arena->stats.ncalloc,
1046             arena->stats.ndalloc, arena->stats.nralloc, arena->stats.nmadvise);
1047
1048         malloc_printf("bins:\n");
1049         malloc_printf("%13s %1s %4s %5s %8s %9s %5s %6s %7s %6s %6s\n",
1050             "bin", "", "size", "nregs", "run_size", "nrequests", "nruns",
1051             "hiruns", "curruns", "npromo", "ndemo");
1052         for (i = 0, gap_start = -1; i < ntbins + nqbins + nsbins; i++) {
1053                 if (arena->bins[i].stats.nrequests == 0) {
1054                         if (gap_start == -1)
1055                                 gap_start = i;
1056                 } else {
1057                         if (gap_start != -1) {
1058                                 if (i > gap_start + 1) {
1059                                         /* Gap of more than one size class. */
1060                                         malloc_printf("[%u..%u]\n",
1061                                             gap_start, i - 1);
1062                                 } else {
1063                                         /* Gap of one size class. */
1064                                         malloc_printf("[%u]\n", gap_start);
1065                                 }
1066                                 gap_start = -1;
1067                         }
1068                         malloc_printf(
1069                             "%13u %1s %4u %5u %8u %9llu %5llu"
1070                             " %6lu %7lu %6llu %6llu\n",
1071                             i,
1072                             i < ntbins ? "T" : i < ntbins + nqbins ? "Q" : "S",
1073                             arena->bins[i].reg_size,
1074                             arena->bins[i].nregs,
1075                             arena->bins[i].run_size,
1076                             arena->bins[i].stats.nrequests,
1077                             arena->bins[i].stats.nruns,
1078                             arena->bins[i].stats.highruns,
1079                             arena->bins[i].stats.curruns,
1080                             arena->bins[i].stats.npromote,
1081                             arena->bins[i].stats.ndemote);
1082                 }
1083         }
1084         if (gap_start != -1) {
1085                 if (i > gap_start + 1) {
1086                         /* Gap of more than one size class. */
1087                         malloc_printf("[%u..%u]\n", gap_start, i - 1);
1088                 } else {
1089                         /* Gap of one size class. */
1090                         malloc_printf("[%u]\n", gap_start);
1091                 }
1092         }
1093 }
1094 #endif
1095
1096 /*
1097  * End Utility functions/macros.
1098  */
1099 /******************************************************************************/
1100 /*
1101  * Begin chunk management functions.
1102  */
1103
1104 static inline int
1105 chunk_comp(chunk_node_t *a, chunk_node_t *b)
1106 {
1107
1108         assert(a != NULL);
1109         assert(b != NULL);
1110
1111         if ((uintptr_t)a->chunk < (uintptr_t)b->chunk)
1112                 return (-1);
1113         else if (a->chunk == b->chunk)
1114                 return (0);
1115         else
1116                 return (1);
1117 }
1118
1119 /* Generate red-black tree code for chunks. */
1120 RB_GENERATE_STATIC(chunk_tree_s, chunk_node_s, link, chunk_comp);
1121
1122 static void *
1123 pages_map(void *addr, size_t size)
1124 {
1125         void *ret;
1126
1127 #ifdef USE_BRK
1128 AGAIN:
1129 #endif
1130         /*
1131          * We don't use MAP_FIXED here, because it can cause the *replacement*
1132          * of existing mappings, and we only want to create new mappings.
1133          */
1134         ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
1135             -1, 0);
1136         assert(ret != NULL);
1137
1138         if (ret == MAP_FAILED)
1139                 ret = NULL;
1140         else if (addr != NULL && ret != addr) {
1141                 /*
1142                  * We succeeded in mapping memory, but not in the right place.
1143                  */
1144                 if (munmap(ret, size) == -1) {
1145                         char buf[STRERROR_BUF];
1146
1147                         strerror_r(errno, buf, sizeof(buf));
1148                         malloc_printf("%s: (malloc) Error in munmap(): %s\n",
1149                             _getprogname(), buf);
1150                         if (opt_abort)
1151                                 abort();
1152                 }
1153                 ret = NULL;
1154         }
1155 #ifdef USE_BRK
1156         else if ((uintptr_t)ret >= (uintptr_t)brk_base
1157             && (uintptr_t)ret < (uintptr_t)brk_max) {
1158                 /*
1159                  * We succeeded in mapping memory, but at a location that could
1160                  * be confused with brk.  Leave the mapping intact so that this
1161                  * won't ever happen again, then try again.
1162                  */
1163                 assert(addr == NULL);
1164                 goto AGAIN;
1165         }
1166 #endif
1167
1168         assert(ret == NULL || (addr == NULL && ret != addr)
1169             || (addr != NULL && ret == addr));
1170         return (ret);
1171 }
1172
1173 static void
1174 pages_unmap(void *addr, size_t size)
1175 {
1176
1177         if (munmap(addr, size) == -1) {
1178                 char buf[STRERROR_BUF];
1179
1180                 strerror_r(errno, buf, sizeof(buf));
1181                 malloc_printf("%s: (malloc) Error in munmap(): %s\n",
1182                     _getprogname(), buf);
1183                 if (opt_abort)
1184                         abort();
1185         }
1186 }
1187
1188 static void *
1189 chunk_alloc(size_t size)
1190 {
1191         void *ret, *chunk;
1192         chunk_node_t *tchunk, *delchunk;
1193
1194         assert(size != 0);
1195         assert(size % chunk_size == 0);
1196
1197         malloc_mutex_lock(&chunks_mtx);
1198
1199         if (size == chunk_size) {
1200                 /*
1201                  * Check for address ranges that were previously chunks and try
1202                  * to use them.
1203                  */
1204
1205                 tchunk = RB_MIN(chunk_tree_s, &old_chunks);
1206                 while (tchunk != NULL) {
1207                         /* Found an address range.  Try to recycle it. */
1208
1209                         chunk = tchunk->chunk;
1210                         delchunk = tchunk;
1211                         tchunk = RB_NEXT(chunk_tree_s, &old_chunks, delchunk);
1212
1213                         /* Remove delchunk from the tree. */
1214                         RB_REMOVE(chunk_tree_s, &old_chunks, delchunk);
1215                         base_chunk_node_dealloc(delchunk);
1216
1217 #ifdef USE_BRK
1218                         if ((uintptr_t)chunk >= (uintptr_t)brk_base
1219                             && (uintptr_t)chunk < (uintptr_t)brk_max) {
1220                                 /* Re-use a previously freed brk chunk. */
1221                                 ret = chunk;
1222                                 goto RETURN;
1223                         }
1224 #endif
1225                         if ((ret = pages_map(chunk, size)) != NULL) {
1226                                 /* Success. */
1227                                 goto RETURN;
1228                         }
1229                 }
1230
1231 #ifdef USE_BRK
1232                 /*
1233                  * Try to create chunk-size allocations in brk, in order to
1234                  * make full use of limited address space.
1235                  */
1236                 if (brk_prev != (void *)-1) {
1237                         void *brk_cur;
1238                         intptr_t incr;
1239
1240                         /*
1241                          * The loop is necessary to recover from races with
1242                          * other threads that are using brk for something other
1243                          * than malloc.
1244                          */
1245                         do {
1246                                 /* Get the current end of brk. */
1247                                 brk_cur = sbrk(0);
1248
1249                                 /*
1250                                  * Calculate how much padding is necessary to
1251                                  * chunk-align the end of brk.
1252                                  */
1253                                 incr = (intptr_t)chunk_size
1254                                     - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
1255                                 if (incr == chunk_size) {
1256                                         ret = brk_cur;
1257                                 } else {
1258                                         ret = (void *)(intptr_t)brk_cur + incr;
1259                                         incr += chunk_size;
1260                                 }
1261
1262                                 brk_prev = sbrk(incr);
1263                                 if (brk_prev == brk_cur) {
1264                                         /* Success. */
1265                                         goto RETURN;
1266                                 }
1267                         } while (brk_prev != (void *)-1);
1268                 }
1269 #endif
1270         }
1271
1272         /*
1273          * Try to over-allocate, but allow the OS to place the allocation
1274          * anywhere.  Beware of size_t wrap-around.
1275          */
1276         if (size + chunk_size > size) {
1277                 if ((ret = pages_map(NULL, size + chunk_size)) != NULL) {
1278                         size_t offset = CHUNK_ADDR2OFFSET(ret);
1279
1280                         /*
1281                          * Success.  Clean up unneeded leading/trailing space.
1282                          */
1283                         if (offset != 0) {
1284                                 /* Leading space. */
1285                                 pages_unmap(ret, chunk_size - offset);
1286
1287                                 ret = (void *)((uintptr_t)ret + (chunk_size -
1288                                     offset));
1289
1290                                 /* Trailing space. */
1291                                 pages_unmap((void *)((uintptr_t)ret + size),
1292                                     offset);
1293                         } else {
1294                                 /* Trailing space only. */
1295                                 pages_unmap((void *)((uintptr_t)ret + size),
1296                                     chunk_size);
1297                         }
1298                         goto RETURN;
1299                 }
1300         }
1301
1302         /* All strategies for allocation failed. */
1303         ret = NULL;
1304 RETURN:
1305 #ifdef MALLOC_STATS
1306         if (ret != NULL) {
1307                 stats_chunks.nchunks += (size / chunk_size);
1308                 stats_chunks.curchunks += (size / chunk_size);
1309         }
1310         if (stats_chunks.curchunks > stats_chunks.highchunks)
1311                 stats_chunks.highchunks = stats_chunks.curchunks;
1312 #endif
1313         malloc_mutex_unlock(&chunks_mtx);
1314
1315         assert(CHUNK_ADDR2BASE(ret) == ret);
1316         return (ret);
1317 }
1318
1319 static void
1320 chunk_dealloc(void *chunk, size_t size)
1321 {
1322         size_t offset;
1323         chunk_node_t *node;
1324
1325         assert(chunk != NULL);
1326         assert(CHUNK_ADDR2BASE(chunk) == chunk);
1327         assert(size != 0);
1328         assert(size % chunk_size == 0);
1329
1330         malloc_mutex_lock(&chunks_mtx);
1331         for (offset = 0; offset < size; offset += chunk_size) {
1332                 node = base_chunk_node_alloc();
1333                 if (node == NULL)
1334                         break;
1335
1336                 /*
1337                  * Create a record of this chunk before deallocating it, so
1338                  * that the address range can be recycled if memory usage
1339                  * increases later on.
1340                  */
1341                 node->chunk = (void *)((uintptr_t)chunk + (uintptr_t)offset);
1342                 node->size = chunk_size;
1343                 RB_INSERT(chunk_tree_s, &old_chunks, node);
1344         }
1345
1346 #ifdef USE_BRK
1347         if ((uintptr_t)chunk >= (uintptr_t)brk_base
1348             && (uintptr_t)chunk < (uintptr_t)brk_max)
1349                 madvise(chunk, size, MADV_FREE);
1350         else
1351 #endif
1352                 pages_unmap(chunk, size);
1353
1354 #ifdef MALLOC_STATS
1355         stats_chunks.curchunks -= (size / chunk_size);
1356 #endif
1357         malloc_mutex_unlock(&chunks_mtx);
1358 }
1359
1360 /*
1361  * End chunk management functions.
1362  */
1363 /******************************************************************************/
1364 /*
1365  * Begin arena.
1366  */
1367
1368 static inline int
1369 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
1370 {
1371
1372         assert(a != NULL);
1373         assert(b != NULL);
1374
1375         if ((uintptr_t)a < (uintptr_t)b)
1376                 return (-1);
1377         else if (a == b)
1378                 return (0);
1379         else
1380                 return (1);
1381 }
1382
1383 /* Generate red-black tree code for arena chunks. */
1384 RB_GENERATE_STATIC(arena_chunk_tree_s, arena_chunk_s, link, arena_chunk_comp);
1385
1386 static inline void
1387 arena_run_mask_free_set(arena_run_t *run, unsigned reg)
1388 {
1389         unsigned elm, bit;
1390
1391         assert(run->magic == ARENA_RUN_MAGIC);
1392         assert(reg < run->bin->nregs);
1393
1394         elm = reg / (sizeof(unsigned) << 3);
1395         if (elm < run->regs_minelm)
1396                 run->regs_minelm = elm;
1397         bit = reg - (elm * (sizeof(unsigned) << 3));
1398         assert((run->regs_mask[elm] & (1 << bit)) == 0);
1399         run->regs_mask[elm] |= (1 << bit);
1400 }
1401
1402 static inline void
1403 arena_run_mask_free_unset(arena_run_t *run, unsigned reg)
1404 {
1405         unsigned elm, bit;
1406
1407         assert(run->magic == ARENA_RUN_MAGIC);
1408         assert(reg < run->bin->nregs);
1409
1410         elm = reg / (sizeof(unsigned) << 3);
1411         bit = reg - (elm * (sizeof(unsigned) << 3));
1412         assert((run->regs_mask[elm] & (1 << bit)) != 0);
1413         run->regs_mask[elm] ^= (1 << bit);
1414 }
1415
1416 static inline unsigned
1417 arena_run_search(arena_run_t *run)
1418 {
1419         unsigned i, mask, bit;
1420
1421         assert(run->magic == ARENA_RUN_MAGIC);
1422
1423         for (i = run->regs_minelm; i < REGS_MASK_NELMS; i++) {
1424                 mask = run->regs_mask[i];
1425                 if (mask != 0) {
1426                         bit = ffs(mask);
1427                         if (bit != 0) {
1428                                 /* Usable allocation found. */
1429                                 return ((i * (sizeof(unsigned) << 3))
1430                                     + bit - 1);
1431                         }
1432                 } else {
1433                         /*
1434                          * Make a note that nothing before this element
1435                          * contains a free region.
1436                          */
1437                         run->regs_minelm = i + 1;
1438                 }
1439         }
1440
1441         return (UINT_MAX);
1442 }
1443
1444 static void
1445 arena_run_split(arena_t *arena, arena_run_t *run, bool large, size_t size)
1446 {
1447         arena_chunk_t *chunk;
1448         unsigned run_ind, map_offset, total_pages, need_pages;
1449         unsigned i, log2_run_pages, run_pages;
1450
1451         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1452         run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1453             >> pagesize_2pow);
1454         assert(chunk->map[run_ind].free);
1455         total_pages = chunk->map[run_ind].npages;
1456         need_pages = (size >> pagesize_2pow);
1457
1458 #ifdef MALLOC_DEBUG
1459         for (i = 0; i < total_pages; i++) {
1460                 assert(chunk->map[run_ind + i].free);
1461                 assert(chunk->map[run_ind + i].large == false);
1462                 assert(chunk->map[run_ind + i].npages == total_pages);
1463                 assert(chunk->map[run_ind + i].pos == i);
1464         }
1465 #endif
1466
1467         /* Split enough pages from the front of run to fit allocation size. */
1468         map_offset = run_ind;
1469         for (i = 0; i < need_pages; i++) {
1470                 chunk->map[map_offset + i].free = false;
1471                 chunk->map[map_offset + i].large = large;
1472                 chunk->map[map_offset + i].npages = need_pages;
1473                 chunk->map[map_offset + i].pos = i;
1474         }
1475
1476         /* Update map for trailing pages. */
1477         map_offset += need_pages;
1478         while (map_offset < run_ind + total_pages) {
1479                 log2_run_pages = ffs(map_offset) - 1;
1480                 run_pages = (1 << log2_run_pages);
1481                 for (i = 0; i < run_pages; i++) {
1482                         chunk->map[map_offset + i].free = true;
1483                         chunk->map[map_offset + i].large = false;
1484                         chunk->map[map_offset + i].npages = run_pages;
1485                         chunk->map[map_offset + i].pos = i;
1486                 }
1487
1488                 chunk->nfree_runs[log2_run_pages]++;
1489
1490                 map_offset += run_pages;
1491         }
1492
1493         chunk->pages_used += (size >> pagesize_2pow);
1494 }
1495
1496 static arena_chunk_t *
1497 arena_chunk_alloc(arena_t *arena)
1498 {
1499         arena_chunk_t *chunk;
1500         unsigned i, j, header_npages, pow2_header_npages, map_offset;
1501         unsigned log2_run_pages, run_pages;
1502         size_t header_size;
1503
1504         chunk = (arena_chunk_t *)chunk_alloc(chunk_size);
1505         if (chunk == NULL)
1506                 return (NULL);
1507
1508         chunk->arena = arena;
1509
1510         RB_INSERT(arena_chunk_tree_s, &arena->chunks, chunk);
1511
1512         /*
1513          * Claim that no pages are in use, since the header is merely overhead.
1514          */
1515         chunk->pages_used = 0;
1516
1517         memset(&chunk->nfree_runs, 0, sizeof(chunk->nfree_runs));
1518
1519         header_size = (size_t)((uintptr_t)&chunk->map[arena_chunk_maplen]
1520             - (uintptr_t)chunk);
1521         if (header_size % pagesize != 0) {
1522                 /* Round up to the nearest page boundary. */
1523                 header_size += pagesize - (header_size % pagesize);
1524         }
1525
1526         header_npages = header_size / pagesize;
1527         pow2_header_npages = pow2_ceil(header_npages);
1528
1529         /*
1530          * Iteratively mark runs as in use, until we've spoken for the entire
1531          * header.
1532          */
1533         map_offset = 0;
1534         for (i = 0; header_npages > 0; i++) {
1535                 if ((pow2_header_npages >> i) <= header_npages) {
1536                         for (j = 0; j < (pow2_header_npages >> i); j++) {
1537                                 chunk->map[map_offset + j].free = false;
1538                                 chunk->map[map_offset + j].large = false;
1539                                 chunk->map[map_offset + j].npages =
1540                                     (pow2_header_npages >> i);
1541                                 chunk->map[map_offset + j].pos = j;
1542                         }
1543                         header_npages -= (pow2_header_npages >> i);
1544                         map_offset += (pow2_header_npages >> i);
1545                 }
1546         }
1547
1548         /*
1549          * Finish initializing map.  The chunk header takes up some space at
1550          * the beginning of the chunk, which we just took care of by
1551          * "allocating" the leading pages.
1552          */
1553         while (map_offset < (chunk_size / pagesize)) {
1554                 log2_run_pages = ffs(map_offset) - 1;
1555                 run_pages = (1 << log2_run_pages);
1556                 for (i = 0; i < run_pages; i++) {
1557                         chunk->map[map_offset + i].free = true;
1558                         chunk->map[map_offset + i].large = false;
1559                         chunk->map[map_offset + i].npages = run_pages;
1560                         chunk->map[map_offset + i].pos = i;
1561                 }
1562
1563                 chunk->nfree_runs[log2_run_pages]++;
1564
1565                 map_offset += run_pages;
1566         }
1567
1568         return (chunk);
1569 }
1570
1571 static void
1572 arena_chunk_dealloc(arena_chunk_t *chunk)
1573 {
1574
1575         RB_REMOVE(arena_chunk_tree_s, &chunk->arena->chunks, chunk);
1576
1577         chunk_dealloc((void *)chunk, chunk_size);
1578 }
1579
1580 static void
1581 arena_bin_run_refile(arena_t *arena, arena_bin_t *bin, arena_run_t *run,
1582     size_t size, bool promote)
1583 {
1584
1585         assert(bin == run->bin);
1586
1587         /* Determine whether to promote or demote run. */
1588         if (promote) {
1589                 /* Promote. */
1590                 assert(run->free_min > run->nfree);
1591                 assert(run->quartile < RUN_Q100);
1592                 run->quartile++;
1593                 if (run->quartile == RUN_Q75) {
1594                         /*
1595                          * Skip RUN_Q75 during promotion from RUN_Q50.
1596                          * Separate handling of RUN_Q75 and RUN_Q100 allows us
1597                          * to keep completely full runs in RUN_Q100, thus
1598                          * guaranteeing that runs in RUN_Q75 are only mostly
1599                          * full.  This provides a method for avoiding a linear
1600                          * search for non-full runs, which avoids some
1601                          * pathological edge cases.
1602                          */
1603                         run->quartile++;
1604                 }
1605 #ifdef MALLOC_STATS
1606                 bin->stats.npromote++;
1607 #endif
1608         } else {
1609                 /* Demote. */
1610                 assert(run->free_max < run->nfree);
1611                 assert(run->quartile > RUN_QINIT);
1612                 run->quartile--;
1613 #ifdef MALLOC_STATS
1614                 bin->stats.ndemote++;
1615 #endif
1616         }
1617
1618         /* Re-file run. */
1619         qr_remove(run, link);
1620         switch (run->quartile) {
1621                 case RUN_QINIT:
1622 #ifdef MALLOC_STATS
1623                         bin->stats.curruns--;
1624 #endif
1625                         if (bin->runcur == run)
1626                                 bin->runcur = NULL;
1627 #ifdef MALLOC_DEBUG
1628                         run->magic = 0;
1629 #endif
1630                         arena_run_dalloc(arena, run, bin->run_size);
1631                         break;
1632                 case RUN_Q0:
1633                         qr_before_insert((arena_run_t *)&bin->runs0, run, link);
1634                         run->free_max = run->bin->nregs - 1;
1635                         run->free_min = (run->bin->nregs >> 1) + 1;
1636                         assert(run->nfree <= run->free_max);
1637                         assert(run->nfree >= run->free_min);
1638                         break;
1639                 case RUN_Q25:
1640                         qr_before_insert((arena_run_t *)&bin->runs25, run,
1641                             link);
1642                         run->free_max = ((run->bin->nregs >> 2) * 3) - 1;
1643                         run->free_min = (run->bin->nregs >> 2) + 1;
1644                         assert(run->nfree <= run->free_max);
1645                         assert(run->nfree >= run->free_min);
1646                         break;
1647                 case RUN_Q50:
1648                         qr_before_insert((arena_run_t *)&bin->runs50, run,
1649                             link);
1650                         run->free_max = (run->bin->nregs >> 1) - 1;
1651                         run->free_min = 1;
1652                         assert(run->nfree <= run->free_max);
1653                         assert(run->nfree >= run->free_min);
1654                         break;
1655                 case RUN_Q75:
1656                         qr_before_insert((arena_run_t *)&bin->runs75, run,
1657                             link);
1658                         run->free_max = (run->bin->nregs >> 2) - 1;
1659                         run->free_min = 1;
1660                         assert(run->nfree <= run->free_max);
1661                         assert(run->nfree >= run->free_min);
1662                         break;
1663                 case RUN_Q100:
1664                         assert(bin->runcur == run);
1665                         bin->runcur = NULL;
1666                         run->free_max = 0;
1667                         run->free_min = 0;
1668                         assert(run->nfree <= run->free_max);
1669                         assert(run->nfree >= run->free_min);
1670                         break;
1671                 default:
1672                         assert(0);
1673                         break;
1674         }
1675 }
1676
1677 static arena_run_t *
1678 arena_run_alloc(arena_t *arena, bool large, size_t size)
1679 {
1680         arena_run_t *run;
1681         unsigned min_ind, i, j;
1682         arena_chunk_t *chunk;
1683 #ifndef NDEBUG
1684         int rep = 0;
1685 #endif
1686
1687         assert(size <= arena_maxclass);
1688
1689 AGAIN:
1690 #ifndef NDEBUG
1691         rep++;
1692         assert(rep <= 2);
1693 #endif
1694
1695         /*
1696          * Search through arena's chunks in address order for a run that is
1697          * large enough.  Look for a precise fit, but do not pass up a chunk
1698          * that has a run which is large enough to split.
1699          */
1700         min_ind = ffs(size / pagesize) - 1;
1701         RB_FOREACH(chunk, arena_chunk_tree_s, &arena->chunks) {
1702                 for (i = min_ind;
1703                     i < (opt_chunk_2pow - pagesize_2pow);
1704                     i++) {
1705                         if (chunk->nfree_runs[i] > 0) {
1706                                 arena_chunk_map_t *map = chunk->map;
1707
1708                                 /* Scan chunk's map for free run. */
1709                                 for (j = 0;
1710                                     j < arena_chunk_maplen;
1711                                     j += map[j].npages) {
1712                                         if (map[j].free
1713                                             && map[j].npages == (1 << i))
1714                 {/*<----------------------------*/
1715                         run = (arena_run_t *)&((char *)chunk)[j
1716                             << pagesize_2pow];
1717
1718                         assert(chunk->nfree_runs[i] > 0);
1719                         chunk->nfree_runs[i]--;
1720
1721                         /* Update page map. */
1722                         arena_run_split(arena, run, large, size);
1723
1724                         return (run);
1725                 }/*---------------------------->*/
1726                                 }
1727                                 /* Not reached. */
1728                                 assert(0);
1729                         }
1730                 }
1731         }
1732
1733         /* No usable runs.  Allocate a new chunk, then try again. */
1734         if (arena_chunk_alloc(arena) == NULL)
1735                 return (NULL);
1736         goto AGAIN;
1737 }
1738
1739 static void
1740 arena_run_dalloc(arena_t *arena, arena_run_t *run, size_t size)
1741 {
1742         arena_chunk_t *chunk;
1743         unsigned i, run_ind, buddy_ind, base_run_ind, run_pages, log2_run_pages;
1744
1745         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
1746         run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
1747             >> pagesize_2pow);
1748         run_pages = (size >> pagesize_2pow);
1749         log2_run_pages = ffs(run_pages) - 1;
1750         assert(run_pages > 0);
1751
1752         /* Subtract pages from count of pages used in chunk. */
1753         chunk->pages_used -= run_pages;
1754
1755         /* Mark run as deallocated. */
1756         for (i = 0; i < run_pages; i++) {
1757                 chunk->map[run_ind + i].free = true;
1758                 chunk->map[run_ind + i].large = false;
1759                 chunk->map[run_ind + i].npages = run_pages;
1760                 chunk->map[run_ind + i].pos = i;
1761         }
1762
1763         /*
1764          * Tell the kernel that we don't need the data in this run, but only if
1765          * requested via runtime configuration.
1766          */
1767         if (opt_hint) {
1768                 madvise(run, size, MADV_FREE);
1769 #ifdef MALLOC_STATS
1770                 arena->stats.nmadvise += (size >> pagesize_2pow);
1771 #endif
1772         }
1773
1774         /*
1775          * Iteratively coalesce with buddies.  Conceptually, the buddy scheme
1776          * induces a tree on the set of pages.  If we know the number of pages
1777          * in the subtree rooted at the current node, we can quickly determine
1778          * whether a run is the left or right buddy, and then calculate the
1779          * buddy's index.
1780          */
1781         for (;
1782             (run_pages = (1 << log2_run_pages)) < arena_chunk_maplen;
1783             log2_run_pages++) {
1784                 if (((run_ind >> log2_run_pages) & 1) == 0) {
1785                         /* Current run precedes its buddy. */
1786                         buddy_ind = run_ind + run_pages;
1787                         base_run_ind = run_ind;
1788                 } else {
1789                         /* Current run follows its buddy. */
1790                         buddy_ind = run_ind - run_pages;
1791                         base_run_ind = buddy_ind;
1792                 }
1793
1794                 if (chunk->map[buddy_ind].free == false
1795                     || chunk->map[buddy_ind].npages != run_pages)
1796                         break;
1797
1798                 assert(chunk->nfree_runs[log2_run_pages] > 0);
1799                 chunk->nfree_runs[log2_run_pages]--;
1800
1801                 /* Coalesce. */
1802                 for (i = 0; i < (run_pages << 1); i++) {
1803                         chunk->map[base_run_ind + i].npages = (run_pages << 1);
1804                         chunk->map[base_run_ind + i].pos = i;
1805                 }
1806
1807                 /* Update run_ind to be the beginning of the coalesced run. */
1808                 run_ind = base_run_ind;
1809         }
1810
1811         chunk->nfree_runs[log2_run_pages]++;
1812
1813         /* Free pages, to the extent possible. */
1814         if (chunk->pages_used == 0) {
1815                 /* This chunk is completely unused now, so deallocate it. */
1816                 arena_chunk_dealloc(chunk);
1817         }
1818 }
1819
1820 static arena_run_t *
1821 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin, size_t size)
1822 {
1823         arena_run_t *run;
1824         unsigned i, remainder;
1825
1826         /* Look for a usable run. */
1827         if ((run = qr_next((arena_run_t *)&bin->runs50, link))
1828             != (arena_run_t *)&bin->runs50
1829             || (run = qr_next((arena_run_t *)&bin->runs25, link))
1830             != (arena_run_t *)&bin->runs25
1831             || (run = qr_next((arena_run_t *)&bin->runs0, link))
1832             != (arena_run_t *)&bin->runs0
1833             || (run = qr_next((arena_run_t *)&bin->runs75, link))
1834             != (arena_run_t *)&bin->runs75) {
1835                 /* run is guaranteed to have available space. */
1836                 qr_remove(run, link);
1837                 return (run);
1838         }
1839         /* No existing runs have any space available. */
1840
1841         /* Allocate a new run. */
1842         run = arena_run_alloc(arena, false, bin->run_size);
1843         if (run == NULL)
1844                 return (NULL);
1845
1846         /* Initialize run internals. */
1847         qr_new(run, link);
1848         run->bin = bin;
1849
1850         for (i = 0; i < bin->nregs / (sizeof(unsigned) << 3); i++)
1851                 run->regs_mask[i] = UINT_MAX;
1852         remainder = bin->nregs % (sizeof(unsigned) << 3);
1853         if (remainder != 0) {
1854                 run->regs_mask[i] = (UINT_MAX >> ((sizeof(unsigned) << 3)
1855                     - remainder));
1856                 i++;
1857         }
1858         for (; i < REGS_MASK_NELMS; i++)
1859                 run->regs_mask[i] = 0;
1860
1861         run->regs_minelm = 0;
1862
1863         run->nfree = bin->nregs;
1864         run->quartile = RUN_QINIT;
1865         run->free_max = bin->nregs;
1866         run->free_min = ((bin->nregs >> 2) * 3) + 1;
1867 #ifdef MALLOC_DEBUG
1868         run->magic = ARENA_RUN_MAGIC;
1869 #endif
1870
1871 #ifdef MALLOC_STATS
1872         bin->stats.nruns++;
1873         bin->stats.curruns++;
1874         if (bin->stats.curruns > bin->stats.highruns)
1875                 bin->stats.highruns = bin->stats.curruns;
1876 #endif
1877         return (run);
1878 }
1879
1880 /* bin->runcur must have space available before this function is called. */
1881 static inline void *
1882 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run,
1883     size_t size)
1884 {
1885         void *ret;
1886         unsigned regind;
1887
1888         assert(run->magic == ARENA_RUN_MAGIC);
1889         assert(run->nfree > 0);
1890
1891         regind = arena_run_search(run);
1892         assert(regind != UINT_MAX);
1893         assert(regind < bin->nregs);
1894
1895         ret = (void *)&((char *)run)[bin->reg0_offset + (bin->reg_size
1896             * regind)];
1897         arena_run_mask_free_unset(run, regind);
1898         run->nfree--;
1899         if (run->nfree < run->free_min) {
1900                 /* Promote run to higher fullness quartile. */
1901                 arena_bin_run_refile(arena, bin, run, size, true);
1902         }
1903
1904         return (ret);
1905 }
1906
1907 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
1908 static void *
1909 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin, size_t size)
1910 {
1911
1912         assert(bin->runcur == NULL || bin->runcur->quartile == RUN_Q100);
1913
1914         bin->runcur = arena_bin_nonfull_run_get(arena, bin, size);
1915         if (bin->runcur == NULL)
1916                 return (NULL);
1917         assert(bin->runcur->magic == ARENA_RUN_MAGIC);
1918         assert(bin->runcur->nfree > 0);
1919
1920         return (arena_bin_malloc_easy(arena, bin, bin->runcur, size));
1921 }
1922
1923 static void *
1924 arena_malloc(arena_t *arena, size_t size)
1925 {
1926         void *ret;
1927
1928         assert(arena != NULL);
1929         assert(arena->magic == ARENA_MAGIC);
1930         assert(size != 0);
1931         assert(QUANTUM_CEILING(size) <= arena_maxclass);
1932
1933         malloc_mutex_lock(&arena->mtx);
1934         if (size <= bin_maxclass) {
1935                 arena_bin_t *bin;
1936                 arena_run_t *run;
1937
1938                 /* Small allocation. */
1939
1940                 if (size < small_min) {
1941                         /* Tiny. */
1942                         size = pow2_ceil(size);
1943                         bin = &arena->bins[ffs(size >> (tiny_min_2pow + 1))];
1944 #ifdef MALLOC_STATS
1945                         /*
1946                          * Bin calculation is always correct, but we may need to
1947                          * fix size for the purposes of stats accuracy.
1948                          */
1949                         if (size < (1 << tiny_min_2pow))
1950                                 size = (1 << tiny_min_2pow);
1951 #endif
1952                 } else if (size <= small_max) {
1953                         /* Quantum-spaced. */
1954                         size = QUANTUM_CEILING(size);
1955                         bin = &arena->bins[ntbins + (size >> opt_quantum_2pow)
1956                             - 1];
1957                 } else {
1958                         /* Sub-page. */
1959                         size = pow2_ceil(size);
1960                         bin = &arena->bins[ntbins + nqbins
1961                             + (ffs(size >> opt_small_max_2pow) - 2)];
1962                 }
1963                 assert(size == bin->reg_size);
1964
1965                 if ((run = bin->runcur) != NULL)
1966                         ret = arena_bin_malloc_easy(arena, bin, run, size);
1967                 else
1968                         ret = arena_bin_malloc_hard(arena, bin, size);
1969
1970 #ifdef MALLOC_STATS
1971                 bin->stats.nrequests++;
1972 #endif
1973         } else {
1974                 /* Medium allocation. */
1975                 size = pow2_ceil(size);
1976                 ret = (void *)arena_run_alloc(arena, true, size);
1977         }
1978
1979 #ifdef MALLOC_STATS
1980         if (ret != NULL)
1981                 arena->stats.allocated += size;
1982 #endif
1983
1984         malloc_mutex_unlock(&arena->mtx);
1985
1986         if (opt_junk && ret != NULL)
1987                 memset(ret, 0xa5, size);
1988         else if (opt_zero && ret != NULL)
1989                 memset(ret, 0, size);
1990         return (ret);
1991 }
1992
1993 /* Return the size of the allocation pointed to by ptr. */
1994 static size_t
1995 arena_salloc(arena_t *arena, const void *ptr)
1996 {
1997         size_t ret;
1998         arena_chunk_t *chunk;
1999         uint32_t pageind;
2000         arena_chunk_map_t *mapelm;
2001
2002         assert(arena != NULL);
2003         assert(arena->magic == ARENA_MAGIC);
2004         assert(ptr != NULL);
2005         assert(ptr != &nil);
2006         assert(CHUNK_ADDR2BASE(ptr) != ptr);
2007
2008         malloc_mutex_lock(&arena->mtx);
2009         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2010         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
2011         mapelm = &chunk->map[pageind];
2012         assert(mapelm->free == false);
2013         if (mapelm->large == false) {
2014                 arena_run_t *run;
2015
2016                 pageind -= mapelm->pos;
2017                 mapelm = &chunk->map[pageind];
2018
2019                 run = (arena_run_t *)&((char *)chunk)[pageind << pagesize_2pow];
2020                 assert(run->magic == ARENA_RUN_MAGIC);
2021                 ret = run->bin->reg_size;
2022         } else
2023                 ret = mapelm->npages << pagesize_2pow;
2024
2025         malloc_mutex_unlock(&arena->mtx);
2026
2027         return (ret);
2028 }
2029
2030 static void *
2031 arena_ralloc(arena_t *arena, void *ptr, size_t size, size_t oldsize)
2032 {
2033         void *ret;
2034
2035         /* Avoid moving the allocation if the size class would not change. */
2036         if (size < small_min) {
2037                 if (oldsize < small_min &&
2038                     ffs(pow2_ceil(size) >> (tiny_min_2pow + 1))
2039                     == ffs(pow2_ceil(oldsize) >> (tiny_min_2pow + 1)))
2040                         goto IN_PLACE;
2041         } else if (size <= small_max) {
2042                 if (oldsize >= small_min && oldsize <= small_max &&
2043                     (QUANTUM_CEILING(size) >> opt_quantum_2pow)
2044                     == (QUANTUM_CEILING(oldsize) >> opt_quantum_2pow))
2045                         goto IN_PLACE;
2046         } else {
2047                 if (oldsize > small_max &&
2048                     pow2_ceil(size) == pow2_ceil(oldsize))
2049                         goto IN_PLACE;
2050         }
2051
2052         /*
2053          * If we get here, then size and oldsize are different enough that we
2054          * need to use a different size class.  In that case, fall back to
2055          * allocating new space and copying.
2056          */
2057         ret = arena_malloc(arena, size);
2058         if (ret == NULL)
2059                 return (NULL);
2060
2061         if (size < oldsize)
2062                 memcpy(ret, ptr, size);
2063         else
2064                 memcpy(ret, ptr, oldsize);
2065         idalloc(ptr);
2066         return (ret);
2067 IN_PLACE:
2068         if (opt_junk && size < oldsize)
2069                 memset(&((char *)ptr)[size], 0x5a, oldsize - size);
2070         else if (opt_zero && size > oldsize)
2071                 memset(&((char *)ptr)[size], 0, size - oldsize);
2072         return (ptr);
2073 }
2074
2075 static void
2076 arena_dalloc(arena_t *arena, void *ptr)
2077 {
2078         arena_chunk_t *chunk;
2079         unsigned pageind;
2080         arena_chunk_map_t *mapelm;
2081         size_t size;
2082
2083         assert(arena != NULL);
2084         assert(arena->magic == ARENA_MAGIC);
2085         assert(ptr != NULL);
2086         assert(ptr != &nil);
2087         assert(CHUNK_ADDR2BASE(ptr) != ptr);
2088
2089         malloc_mutex_lock(&arena->mtx);
2090
2091         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2092         assert(chunk->arena == arena);
2093         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> pagesize_2pow);
2094         mapelm = &chunk->map[pageind];
2095         assert(mapelm->free == false);
2096         if (mapelm->large == false) {
2097                 arena_run_t *run;
2098                 unsigned regind;
2099
2100                 /* Small allocation. */
2101
2102                 pageind -= mapelm->pos;
2103                 mapelm = &chunk->map[pageind];
2104
2105                 run = (arena_run_t *)&((char *)chunk)[pageind << pagesize_2pow];
2106                 assert(run->magic == ARENA_RUN_MAGIC);
2107                 size = run->bin->reg_size;
2108
2109                 if (opt_junk)
2110                         memset(ptr, 0x5a, size);
2111
2112                 regind = (unsigned)(((uintptr_t)ptr
2113                     - (uintptr_t)&((char *)run)[run->bin->reg0_offset])
2114                     / run->bin->reg_size);
2115                 arena_run_mask_free_set(run, regind);
2116                 run->nfree++;
2117                 if (run->nfree > run->free_max) {
2118                         /* Demote run to lower fullness quartile. */
2119                         arena_bin_run_refile(arena, run->bin, run, size, false);
2120                 }
2121         } else {
2122                 /* Medium allocation. */
2123
2124                 size = mapelm->npages << pagesize_2pow;
2125
2126                 if (opt_junk)
2127                         memset(ptr, 0x5a, size);
2128
2129                 arena_run_dalloc(arena, (arena_run_t *)ptr, size);
2130         }
2131
2132 #ifdef MALLOC_STATS
2133         arena->stats.allocated -= size;
2134 #endif
2135
2136         malloc_mutex_unlock(&arena->mtx);
2137 }
2138
2139 static bool
2140 arena_new(arena_t *arena)
2141 {
2142         unsigned i;
2143         arena_bin_t *bin;
2144         size_t pow2_size, run_size;
2145
2146         malloc_mutex_init(&arena->mtx);
2147
2148 #ifdef MALLOC_STATS
2149         memset(&arena->stats, 0, sizeof(arena_stats_t));
2150 #endif
2151
2152         /* Initialize chunks. */
2153         RB_INIT(&arena->chunks);
2154
2155         /* Initialize bins. */
2156
2157         /* (2^n)-spaced tiny bins. */
2158         for (i = 0; i < ntbins; i++) {
2159                 bin = &arena->bins[i];
2160                 bin->runcur = NULL;
2161                 qr_new((arena_run_t *)&bin->runs0, link);
2162                 qr_new((arena_run_t *)&bin->runs25, link);
2163                 qr_new((arena_run_t *)&bin->runs50, link);
2164                 qr_new((arena_run_t *)&bin->runs75, link);
2165
2166                 bin->reg_size = (1 << (tiny_min_2pow + i));
2167
2168                 /*
2169                  * Calculate how large of a run to allocate.  Make sure that at
2170                  * least RUN_MIN_REGS regions fit in the run.
2171                  */
2172                 run_size = bin->reg_size << RUN_MIN_REGS_2POW;
2173                 if (run_size < pagesize)
2174                         run_size = pagesize;
2175                 if (run_size > (pagesize << RUN_MAX_PAGES_2POW))
2176                         run_size = (pagesize << RUN_MAX_PAGES_2POW);
2177                 if (run_size > arena_maxclass)
2178                         run_size = arena_maxclass;
2179                 bin->run_size = run_size;
2180
2181                 assert(run_size >= sizeof(arena_run_t));
2182                 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size;
2183                 if (bin->nregs > REGS_MASK_NELMS * (sizeof(unsigned) << 3)) {
2184                         /* Take care not to overflow regs_mask. */
2185                         bin->nregs = REGS_MASK_NELMS * (sizeof(unsigned) << 3);
2186                 }
2187                 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size);
2188
2189 #ifdef MALLOC_STATS
2190                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2191 #endif
2192         }
2193
2194         /* Quantum-spaced bins. */
2195         for (; i < ntbins + nqbins; i++) {
2196                 bin = &arena->bins[i];
2197                 bin->runcur = NULL;
2198                 qr_new((arena_run_t *)&bin->runs0, link);
2199                 qr_new((arena_run_t *)&bin->runs25, link);
2200                 qr_new((arena_run_t *)&bin->runs50, link);
2201                 qr_new((arena_run_t *)&bin->runs75, link);
2202
2203                 bin->reg_size = quantum * (i - ntbins + 1);
2204
2205                 /*
2206                  * Calculate how large of a run to allocate.  Make sure that at
2207                  * least RUN_MIN_REGS regions fit in the run.
2208                  */
2209                 pow2_size = pow2_ceil(quantum * (i - ntbins + 1));
2210                 run_size = (pow2_size << RUN_MIN_REGS_2POW);
2211                 if (run_size < pagesize)
2212                         run_size = pagesize;
2213                 if (run_size > (pagesize << RUN_MAX_PAGES_2POW))
2214                         run_size = (pagesize << RUN_MAX_PAGES_2POW);
2215                 if (run_size > arena_maxclass)
2216                         run_size = arena_maxclass;
2217                 bin->run_size = run_size;
2218
2219                 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size;
2220                 assert(bin->nregs <= REGS_MASK_NELMS * (sizeof(unsigned) << 3));
2221                 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size);
2222
2223 #ifdef MALLOC_STATS
2224                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2225 #endif
2226         }
2227
2228         /* (2^n)-spaced sub-page bins. */
2229         for (; i < ntbins + nqbins + nsbins; i++) {
2230                 bin = &arena->bins[i];
2231                 bin->runcur = NULL;
2232                 qr_new((arena_run_t *)&bin->runs0, link);
2233                 qr_new((arena_run_t *)&bin->runs25, link);
2234                 qr_new((arena_run_t *)&bin->runs50, link);
2235                 qr_new((arena_run_t *)&bin->runs75, link);
2236
2237                 bin->reg_size = (small_max << (i - (ntbins + nqbins) + 1));
2238
2239                 /*
2240                  * Calculate how large of a run to allocate.  Make sure that at
2241                  * least RUN_MIN_REGS regions fit in the run.
2242                  */
2243                 run_size = bin->reg_size << RUN_MIN_REGS_2POW;
2244                 if (run_size < pagesize)
2245                         run_size = pagesize;
2246                 if (run_size > (pagesize << RUN_MAX_PAGES_2POW))
2247                         run_size = (pagesize << RUN_MAX_PAGES_2POW);
2248                 if (run_size > arena_maxclass)
2249                         run_size = arena_maxclass;
2250                 bin->run_size = run_size;
2251
2252                 bin->nregs = (run_size - sizeof(arena_run_t)) / bin->reg_size;
2253                 assert(bin->nregs <= REGS_MASK_NELMS * (sizeof(unsigned) << 3));
2254                 bin->reg0_offset = run_size - (bin->nregs * bin->reg_size);
2255
2256 #ifdef MALLOC_STATS
2257                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
2258 #endif
2259         }
2260
2261 #ifdef MALLOC_DEBUG
2262         arena->magic = ARENA_MAGIC;
2263 #endif
2264
2265         return (false);
2266 }
2267
2268 /* Create a new arena and insert it into the arenas array at index ind. */
2269 static arena_t *
2270 arenas_extend(unsigned ind)
2271 {
2272         arena_t *ret;
2273
2274         /* Allocate enough space for trailing bins. */
2275         ret = (arena_t *)base_alloc(sizeof(arena_t)
2276             + (sizeof(arena_bin_t) * (ntbins + nqbins + nsbins - 1)));
2277         if (ret != NULL && arena_new(ret) == false) {
2278                 arenas[ind] = ret;
2279                 return (ret);
2280         }
2281         /* Only reached if there is an OOM error. */
2282
2283         /*
2284          * OOM here is quite inconvenient to propagate, since dealing with it
2285          * would require a check for failure in the fast path.  Instead, punt
2286          * by using arenas[0].  In practice, this is an extremely unlikely
2287          * failure.
2288          */
2289         malloc_printf("%s: (malloc) Error initializing arena\n",
2290             _getprogname());
2291         if (opt_abort)
2292                 abort();
2293
2294         return (arenas[0]);
2295 }
2296
2297 /*
2298  * End arena.
2299  */
2300 /******************************************************************************/
2301 /*
2302  * Begin general internal functions.
2303  */
2304
2305 /*
2306  * Choose an arena based on a per-thread value (fast-path code, calls slow-path
2307  * code if necessary.
2308  */
2309 static inline arena_t *
2310 choose_arena(void)
2311 {
2312         arena_t *ret;
2313
2314         /*
2315          * We can only use TLS if this is a PIC library, since for the static
2316          * library version, libc's malloc is used by TLS allocation, which
2317          * introduces a bootstrapping issue.
2318          */
2319 #ifndef NO_TLS
2320         ret = arenas_map;
2321         if (ret == NULL)
2322                 ret = choose_arena_hard();
2323 #else
2324         if (__isthreaded) {
2325                 unsigned long ind;
2326
2327                 /*
2328                  * Hash _pthread_self() to one of the arenas.  There is a prime
2329                  * number of arenas, so this has a reasonable chance of
2330                  * working.  Even so, the hashing can be easily thwarted by
2331                  * inconvenient _pthread_self() values.  Without specific
2332                  * knowledge of how _pthread_self() calculates values, we can't
2333                  * do much better than this.
2334                  */
2335                 ind = (unsigned long) _pthread_self() % narenas;
2336
2337                 /*
2338                  * Optimistially assume that arenas[ind] has been initialized.
2339                  * At worst, we find out that some other thread has already
2340                  * done so, after acquiring the lock in preparation.  Note that
2341                  * this lazy locking also has the effect of lazily forcing
2342                  * cache coherency; without the lock acquisition, there's no
2343                  * guarantee that modification of arenas[ind] by another thread
2344                  * would be seen on this CPU for an arbitrary amount of time.
2345                  *
2346                  * In general, this approach to modifying a synchronized value
2347                  * isn't a good idea, but in this case we only ever modify the
2348                  * value once, so things work out well.
2349                  */
2350                 ret = arenas[ind];
2351                 if (ret == NULL) {
2352                         /*
2353                          * Avoid races with another thread that may have already
2354                          * initialized arenas[ind].
2355                          */
2356                         malloc_mutex_lock(&arenas_mtx);
2357                         if (arenas[ind] == NULL)
2358                                 ret = arenas_extend((unsigned)ind);
2359                         else
2360                                 ret = arenas[ind];
2361                         malloc_mutex_unlock(&arenas_mtx);
2362                 }
2363         } else
2364                 ret = arenas[0];
2365 #endif
2366
2367         return (ret);
2368 }
2369
2370 #ifndef NO_TLS
2371 /*
2372  * Choose an arena based on a per-thread value (slow-path code only, called
2373  * only by choose_arena()).
2374  */
2375 static arena_t *
2376 choose_arena_hard(void)
2377 {
2378         arena_t *ret;
2379
2380         /* Assign one of the arenas to this thread, in a round-robin fashion. */
2381         if (__isthreaded) {
2382                 malloc_mutex_lock(&arenas_mtx);
2383                 ret = arenas[next_arena];
2384                 if (ret == NULL)
2385                         ret = arenas_extend(next_arena);
2386                 next_arena = (next_arena + 1) % narenas;
2387                 malloc_mutex_unlock(&arenas_mtx);
2388         } else
2389                 ret = arenas[0];
2390         arenas_map = ret;
2391
2392         return (ret);
2393 }
2394 #endif
2395
2396 static void *
2397 huge_malloc(size_t size)
2398 {
2399         void *ret;
2400         size_t chunk_size;
2401         chunk_node_t *node;
2402
2403         /* Allocate a chunk for this request. */
2404
2405         chunk_size = CHUNK_CEILING(size);
2406         if (chunk_size == 0) {
2407                 /* size is large enough to cause size_t wrap-around. */
2408                 return (NULL);
2409         }
2410
2411         /* Allocate a chunk node with which to track the chunk. */
2412         node = base_chunk_node_alloc();
2413         if (node == NULL)
2414                 return (NULL);
2415
2416         ret = chunk_alloc(chunk_size);
2417         if (ret == NULL) {
2418                 base_chunk_node_dealloc(node);
2419                 return (NULL);
2420         }
2421
2422         /* Insert node into chunks. */
2423         node->chunk = ret;
2424         node->size = chunk_size;
2425
2426         malloc_mutex_lock(&chunks_mtx);
2427         RB_INSERT(chunk_tree_s, &huge, node);
2428 #ifdef MALLOC_STATS
2429         huge_nmalloc++;
2430         huge_allocated += chunk_size;
2431 #endif
2432         malloc_mutex_unlock(&chunks_mtx);
2433
2434         if (opt_junk && ret != NULL)
2435                 memset(ret, 0xa5, chunk_size);
2436         else if (opt_zero && ret != NULL)
2437                 memset(ret, 0, chunk_size);
2438
2439         return (ret);
2440 }
2441
2442 static void *
2443 huge_ralloc(void *ptr, size_t size, size_t oldsize)
2444 {
2445         void *ret;
2446
2447         /* Avoid moving the allocation if the size class would not change. */
2448         if (oldsize > arena_maxclass &&
2449             CHUNK_CEILING(size) == CHUNK_CEILING(oldsize))
2450                 return (ptr);
2451
2452         /*
2453          * If we get here, then size and oldsize are different enough that we
2454          * need to use a different size class.  In that case, fall back to
2455          * allocating new space and copying.
2456          */
2457         ret = huge_malloc(size);
2458         if (ret == NULL)
2459                 return (NULL);
2460
2461         if (CHUNK_ADDR2BASE(ptr) == ptr) {
2462                 /* The old allocation is a chunk. */
2463                 if (size < oldsize)
2464                         memcpy(ret, ptr, size);
2465                 else
2466                         memcpy(ret, ptr, oldsize);
2467         } else {
2468                 /* The old allocation is a region. */
2469                 assert(oldsize < size);
2470                 memcpy(ret, ptr, oldsize);
2471         }
2472         idalloc(ptr);
2473         return (ret);
2474 }
2475
2476 static void
2477 huge_dalloc(void *ptr)
2478 {
2479         chunk_node_t key;
2480         chunk_node_t *node;
2481
2482         malloc_mutex_lock(&chunks_mtx);
2483
2484         /* Extract from tree of huge allocations. */
2485         key.chunk = ptr;
2486         node = RB_FIND(chunk_tree_s, &huge, &key);
2487         assert(node != NULL);
2488         assert(node->chunk == ptr);
2489         RB_REMOVE(chunk_tree_s, &huge, node);
2490
2491 #ifdef MALLOC_STATS
2492         /* Update counters. */
2493         huge_ndalloc++;
2494         huge_allocated -= node->size;
2495 #endif
2496
2497         malloc_mutex_unlock(&chunks_mtx);
2498
2499         /* Unmap chunk. */
2500 #ifdef USE_BRK
2501         if (opt_junk)
2502                 memset(node->chunk, 0x5a, node->size);
2503 #endif
2504         chunk_dealloc(node->chunk, node->size);
2505
2506         base_chunk_node_dealloc(node);
2507 }
2508
2509 static void *
2510 imalloc(arena_t *arena, size_t size)
2511 {
2512         void *ret;
2513
2514         assert(arena != NULL);
2515         assert(arena->magic == ARENA_MAGIC);
2516         assert(size != 0);
2517
2518         if (size <= arena_maxclass)
2519                 ret = arena_malloc(arena, size);
2520         else
2521                 ret = huge_malloc(size);
2522
2523 #ifdef MALLOC_STATS
2524         malloc_mutex_lock(&arena->mtx);
2525         arena->stats.nmalloc++;
2526         malloc_mutex_unlock(&arena->mtx);
2527 #endif
2528
2529         return (ret);
2530 }
2531
2532 static void *
2533 ipalloc(arena_t *arena, size_t alignment, size_t size)
2534 {
2535         void *ret;
2536         size_t pow2_size;
2537
2538         assert(arena != NULL);
2539         assert(arena->magic == ARENA_MAGIC);
2540
2541         /*
2542          * Round up to the nearest power of two that is >= alignment and
2543          * >= size.
2544          */
2545         if (size > alignment)
2546                 pow2_size = pow2_ceil(size);
2547         else
2548                 pow2_size = alignment;
2549         pow2_size = QUANTUM_CEILING(pow2_size);
2550         if (pow2_size < size) {
2551                 /* size_t overflow. */
2552                 return (NULL);
2553         }
2554
2555         if (pow2_size <= arena_maxclass)
2556                 ret = arena_malloc(arena, pow2_size);
2557         else {
2558                 if (alignment <= chunk_size)
2559                         ret = huge_malloc(size);
2560                 else {
2561                         size_t chunksize, alloc_size, offset;
2562                         chunk_node_t *node;
2563
2564                         /*
2565                          * This allocation requires alignment that is even
2566                          * larger than chunk alignment.  This means that
2567                          * huge_malloc() isn't good enough.
2568                          *
2569                          * Allocate almost twice as many chunks as are demanded
2570                          * by the size or alignment, in order to assure the
2571                          * alignment can be achieved, then unmap leading and
2572                          * trailing chunks.
2573                          */
2574
2575                         chunksize = CHUNK_CEILING(size);
2576
2577                         if (size >= alignment)
2578                                 alloc_size = chunksize + alignment - chunk_size;
2579                         else
2580                                 alloc_size = (alignment << 1) - chunk_size;
2581
2582                         /*
2583                          * Allocate a chunk node with which to track the chunk.
2584                          */
2585                         node = base_chunk_node_alloc();
2586                         if (node == NULL)
2587                                 return (NULL);
2588
2589                         ret = chunk_alloc(alloc_size);
2590                         if (ret == NULL) {
2591                                 base_chunk_node_dealloc(node);
2592                                 return (NULL);
2593                         }
2594
2595                         offset = (uintptr_t)ret & (alignment - 1);
2596                         assert(offset % chunk_size == 0);
2597                         assert(offset < alloc_size);
2598                         if (offset == 0) {
2599                                 /* Trim trailing space. */
2600                                 chunk_dealloc((void *)((uintptr_t)ret
2601                                     + chunksize), alloc_size - chunksize);
2602                         } else {
2603                                 size_t trailsize;
2604
2605                                 /* Trim leading space. */
2606                                 chunk_dealloc(ret, alignment - offset);
2607
2608                                 ret = (void *)((uintptr_t)ret + (alignment
2609                                     - offset));
2610
2611                                 trailsize = alloc_size - (alignment - offset)
2612                                     - chunksize;
2613                                 if (trailsize != 0) {
2614                                     /* Trim trailing space. */
2615                                     assert(trailsize < alloc_size);
2616                                     chunk_dealloc((void *)((uintptr_t)ret
2617                                         + chunksize), trailsize);
2618                                 }
2619                         }
2620
2621                         /* Insert node into chunks. */
2622                         node->chunk = ret;
2623                         node->size = chunksize;
2624
2625                         malloc_mutex_lock(&chunks_mtx);
2626                         RB_INSERT(chunk_tree_s, &huge, node);
2627 #ifdef MALLOC_STATS
2628                         huge_allocated += size;
2629 #endif
2630                         malloc_mutex_unlock(&chunks_mtx);
2631
2632                         if (opt_junk)
2633                                 memset(ret, 0xa5, chunksize);
2634                         else if (opt_zero)
2635                                 memset(ret, 0, chunksize);
2636                 }
2637         }
2638
2639 #ifdef MALLOC_STATS
2640         malloc_mutex_lock(&arena->mtx);
2641         arena->stats.npalloc++;
2642         malloc_mutex_unlock(&arena->mtx);
2643 #endif
2644         assert(((uintptr_t)ret & (alignment - 1)) == 0);
2645         return (ret);
2646 }
2647
2648 static void *
2649 icalloc(arena_t *arena, size_t size)
2650 {
2651         void *ret;
2652
2653         assert(arena != NULL);
2654         assert(arena->magic == ARENA_MAGIC);
2655
2656         if (size <= arena_maxclass) {
2657                 ret = arena_malloc(arena, size);
2658                 if (ret == NULL)
2659                         return (NULL);
2660                 memset(ret, 0, size);
2661         } else {
2662                 /*
2663                  * The virtual memory system provides zero-filled pages, so
2664                  * there is no need to do so manually, unless opt_junk is
2665                  * enabled, in which case huge_malloc() fills huge allocations
2666                  * with junk.
2667                  */
2668                 ret = huge_malloc(size);
2669                 if (ret == NULL)
2670                         return (NULL);
2671
2672                 if (opt_junk)
2673                         memset(ret, 0, size);
2674 #ifdef USE_BRK
2675                 else if ((uintptr_t)ret >= (uintptr_t)brk_base
2676                     && (uintptr_t)ret < (uintptr_t)brk_max) {
2677                         /*
2678                          * This may be a re-used brk chunk.  Therefore, zero
2679                          * the memory.
2680                          */
2681                         memset(ret, 0, size);
2682                 }
2683 #endif
2684         }
2685
2686 #ifdef MALLOC_STATS
2687         malloc_mutex_lock(&arena->mtx);
2688         arena->stats.ncalloc++;
2689         malloc_mutex_unlock(&arena->mtx);
2690 #endif
2691
2692         return (ret);
2693 }
2694
2695 static size_t
2696 isalloc(const void *ptr)
2697 {
2698         size_t ret;
2699         arena_chunk_t *chunk;
2700
2701         assert(ptr != NULL);
2702         assert(ptr != &nil);
2703
2704         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2705         if (chunk != ptr) {
2706                 /* Region. */
2707                 assert(chunk->arena->magic == ARENA_MAGIC);
2708
2709                 ret = arena_salloc(chunk->arena, ptr);
2710         } else {
2711                 chunk_node_t *node, key;
2712
2713                 /* Chunk (huge allocation). */
2714
2715                 malloc_mutex_lock(&chunks_mtx);
2716
2717                 /* Extract from tree of huge allocations. */
2718                 key.chunk = (void *)ptr;
2719                 node = RB_FIND(chunk_tree_s, &huge, &key);
2720                 assert(node != NULL);
2721
2722                 ret = node->size;
2723
2724                 malloc_mutex_unlock(&chunks_mtx);
2725         }
2726
2727         return (ret);
2728 }
2729
2730 static void *
2731 iralloc(arena_t *arena, void *ptr, size_t size)
2732 {
2733         void *ret;
2734         size_t oldsize;
2735
2736         assert(arena != NULL);
2737         assert(arena->magic == ARENA_MAGIC);
2738         assert(ptr != NULL);
2739         assert(ptr != &nil);
2740         assert(size != 0);
2741
2742         oldsize = isalloc(ptr);
2743
2744         if (size <= arena_maxclass)
2745                 ret = arena_ralloc(arena, ptr, size, oldsize);
2746         else
2747                 ret = huge_ralloc(ptr, size, oldsize);
2748
2749 #ifdef MALLOC_STATS
2750         malloc_mutex_lock(&arena->mtx);
2751         arena->stats.nralloc++;
2752         malloc_mutex_unlock(&arena->mtx);
2753 #endif
2754         return (ret);
2755 }
2756
2757 static void
2758 idalloc(void *ptr)
2759 {
2760         arena_chunk_t *chunk;
2761
2762         assert(ptr != NULL);
2763         assert(ptr != &nil);
2764
2765         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2766         if (chunk != ptr) {
2767                 /* Region. */
2768 #ifdef MALLOC_STATS
2769                 malloc_mutex_lock(&chunk->arena->mtx);
2770                 chunk->arena->stats.ndalloc++;
2771                 malloc_mutex_unlock(&chunk->arena->mtx);
2772 #endif
2773                 arena_dalloc(chunk->arena, ptr);
2774         } else
2775                 huge_dalloc(ptr);
2776 }
2777
2778 static void
2779 malloc_print_stats(void)
2780 {
2781
2782         if (opt_print_stats) {
2783                 malloc_printf("___ Begin malloc statistics ___\n");
2784                 malloc_printf("Number of CPUs: %u\n", ncpus);
2785                 malloc_printf("Number of arenas: %u\n", narenas);
2786                 malloc_printf("Chunk size: %zu (2^%zu)\n", chunk_size,
2787                     opt_chunk_2pow);
2788                 malloc_printf("Quantum size: %zu (2^%zu)\n", quantum,
2789                     opt_quantum_2pow);
2790                 malloc_printf("Max small size: %zu\n", small_max);
2791                 malloc_printf("Pointer size: %u\n", sizeof(void *));
2792                 malloc_printf("Assertions %s\n",
2793 #ifdef NDEBUG
2794                     "disabled"
2795 #else
2796                     "enabled"
2797 #endif
2798                     );
2799
2800 #ifdef MALLOC_STATS
2801
2802                 {
2803                         size_t allocated, total;
2804                         unsigned i;
2805                         arena_t *arena;
2806
2807                         /* Calculate and print allocated/total stats. */
2808
2809                         /* arenas. */
2810                         for (i = 0, allocated = 0; i < narenas; i++) {
2811                                 if (arenas[i] != NULL) {
2812                                         malloc_mutex_lock(&arenas[i]->mtx);
2813                                         allocated += arenas[i]->stats.allocated;
2814                                         malloc_mutex_unlock(&arenas[i]->mtx);
2815                                 }
2816                         }
2817
2818                         /* huge. */
2819                         malloc_mutex_lock(&chunks_mtx);
2820                         allocated += huge_allocated;
2821                         total = stats_chunks.curchunks * chunk_size;
2822                         malloc_mutex_unlock(&chunks_mtx);
2823
2824                         malloc_printf("Allocated: %zu, space used: %zu\n",
2825                             allocated, total);
2826
2827                         /* Print chunk stats. */
2828                         {
2829                                 chunk_stats_t chunks_stats;
2830
2831                                 malloc_mutex_lock(&chunks_mtx);
2832                                 chunks_stats = stats_chunks;
2833                                 malloc_mutex_unlock(&chunks_mtx);
2834
2835                                 malloc_printf("\nchunks:\n");
2836                                 malloc_printf(" %13s%13s%13s\n", "nchunks",
2837                                     "highchunks", "curchunks");
2838                                 malloc_printf(" %13llu%13lu%13lu\n",
2839                                     chunks_stats.nchunks,
2840                                     chunks_stats.highchunks,
2841                                     chunks_stats.curchunks);
2842                         }
2843
2844                         /* Print chunk stats. */
2845                         malloc_printf("\nhuge:\n");
2846                         malloc_printf("%12s %12s %12s\n",
2847                             "nmalloc", "ndalloc", "allocated");
2848                         malloc_printf("%12llu %12llu %12zu\n",
2849                             huge_nmalloc, huge_ndalloc, huge_allocated);
2850
2851                         /* Print stats for each arena. */
2852                         for (i = 0; i < narenas; i++) {
2853                                 arena = arenas[i];
2854                                 if (arena != NULL) {
2855                                         malloc_printf(
2856                                             "\narenas[%u] statistics:\n", i);
2857                                         malloc_mutex_lock(&arena->mtx);
2858                                         stats_print(arena);
2859                                         malloc_mutex_unlock(&arena->mtx);
2860                                 }
2861                         }
2862                 }
2863 #endif /* #ifdef MALLOC_STATS */
2864                 malloc_printf("--- End malloc statistics ---\n");
2865         }
2866 }
2867
2868 /*
2869  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
2870  * implementation has to take pains to avoid infinite recursion during
2871  * initialization.
2872  *
2873  * atomic_init_start() returns true if it started initializing.  In that case,
2874  * the caller must also call atomic_init_finish(), just before returning to its
2875  * caller.  This delayed finalization of initialization is critical, since
2876  * otherwise choose_arena() has no way to know whether it's safe to call
2877  * _pthread_self().
2878  */
2879 static inline bool
2880 malloc_init(void)
2881 {
2882
2883         /*
2884          * We always initialize before threads are created, since any thread
2885          * creation first triggers allocations.
2886          */
2887         assert(__isthreaded == 0 || malloc_initialized);
2888
2889         if (malloc_initialized == false)
2890                 return (malloc_init_hard());
2891
2892         return (false);
2893 }
2894
2895 static bool
2896 malloc_init_hard(void)
2897 {
2898         unsigned i, j;
2899         int linklen;
2900         char buf[PATH_MAX + 1];
2901         const char *opts;
2902
2903         /* Get number of CPUs. */
2904         {
2905                 int mib[2];
2906                 size_t len;
2907
2908                 mib[0] = CTL_HW;
2909                 mib[1] = HW_NCPU;
2910                 len = sizeof(ncpus);
2911                 if (sysctl(mib, 2, &ncpus, &len, (void *) 0, 0) == -1) {
2912                         /* Error. */
2913                         ncpus = 1;
2914                 }
2915         }
2916
2917         /* Get page size. */
2918         {
2919                 long result;
2920
2921                 result = sysconf(_SC_PAGESIZE);
2922                 assert(result != -1);
2923                 pagesize = (unsigned) result;
2924
2925                 /*
2926                  * We assume that pagesize is a power of 2 when calculating
2927                  * pagesize_2pow.
2928                  */
2929                 assert(((result - 1) & result) == 0);
2930                 pagesize_2pow = ffs(result) - 1;
2931         }
2932
2933         for (i = 0; i < 3; i++) {
2934                 /* Get runtime configuration. */
2935                 switch (i) {
2936                 case 0:
2937                         if ((linklen = readlink("/etc/malloc.conf", buf,
2938                                                 sizeof(buf) - 1)) != -1) {
2939                                 /*
2940                                  * Use the contents of the "/etc/malloc.conf"
2941                                  * symbolic link's name.
2942                                  */
2943                                 buf[linklen] = '\0';
2944                                 opts = buf;
2945                         } else {
2946                                 /* No configuration specified. */
2947                                 buf[0] = '\0';
2948                                 opts = buf;
2949                         }
2950                         break;
2951                 case 1:
2952                         if (issetugid() == 0 && (opts =
2953                             getenv("MALLOC_OPTIONS")) != NULL) {
2954                                 /*
2955                                  * Do nothing; opts is already initialized to
2956                                  * the value of the MALLOC_OPTIONS environment
2957                                  * variable.
2958                                  */
2959                         } else {
2960                                 /* No configuration specified. */
2961                                 buf[0] = '\0';
2962                                 opts = buf;
2963                         }
2964                         break;
2965                 case 2:
2966                         if (_malloc_options != NULL) {
2967                             /*
2968                              * Use options that were compiled into the program.
2969                              */
2970                             opts = _malloc_options;
2971                         } else {
2972                                 /* No configuration specified. */
2973                                 buf[0] = '\0';
2974                                 opts = buf;
2975                         }
2976                         break;
2977                 default:
2978                         /* NOTREACHED */
2979                         assert(false);
2980                 }
2981
2982                 for (j = 0; opts[j] != '\0'; j++) {
2983                         switch (opts[j]) {
2984                         case 'a':
2985                                 opt_abort = false;
2986                                 break;
2987                         case 'A':
2988                                 opt_abort = true;
2989                                 break;
2990                         case 'h':
2991                                 opt_hint = false;
2992                                 break;
2993                         case 'H':
2994                                 opt_hint = true;
2995                                 break;
2996                         case 'j':
2997                                 opt_junk = false;
2998                                 break;
2999                         case 'J':
3000                                 opt_junk = true;
3001                                 break;
3002                         case 'k':
3003                                 /*
3004                                  * Run fullness quartile limits don't have
3005                                  * enough resolution if there are too few
3006                                  * regions for the largest bin size classes.
3007                                  */
3008                                 if (opt_chunk_2pow > pagesize_2pow + 3)
3009                                         opt_chunk_2pow--;
3010                                 break;
3011                         case 'K':
3012                                 if (opt_chunk_2pow < CHUNK_2POW_MAX)
3013                                         opt_chunk_2pow++;
3014                                 break;
3015                         case 'n':
3016                                 opt_narenas_lshift--;
3017                                 break;
3018                         case 'N':
3019                                 opt_narenas_lshift++;
3020                                 break;
3021                         case 'p':
3022                                 opt_print_stats = false;
3023                                 break;
3024                         case 'P':
3025                                 opt_print_stats = true;
3026                                 break;
3027                         case 'q':
3028                                 if (opt_quantum_2pow > QUANTUM_2POW_MIN)
3029                                         opt_quantum_2pow--;
3030                                 break;
3031                         case 'Q':
3032                                 if (opt_quantum_2pow < pagesize_2pow - 1)
3033                                         opt_quantum_2pow++;
3034                                 break;
3035                         case 's':
3036                                 if (opt_small_max_2pow > QUANTUM_2POW_MIN)
3037                                         opt_small_max_2pow--;
3038                                 break;
3039                         case 'S':
3040                                 if (opt_small_max_2pow < pagesize_2pow - 1)
3041                                         opt_small_max_2pow++;
3042                                 break;
3043                         case 'u':
3044                                 opt_utrace = false;
3045                                 break;
3046                         case 'U':
3047                                 opt_utrace = true;
3048                                 break;
3049                         case 'v':
3050                                 opt_sysv = false;
3051                                 break;
3052                         case 'V':
3053                                 opt_sysv = true;
3054                                 break;
3055                         case 'x':
3056                                 opt_xmalloc = false;
3057                                 break;
3058                         case 'X':
3059                                 opt_xmalloc = true;
3060                                 break;
3061                         case 'z':
3062                                 opt_zero = false;
3063                                 break;
3064                         case 'Z':
3065                                 opt_zero = true;
3066                                 break;
3067                         default:
3068                                 malloc_printf("%s: (malloc) Unsupported"
3069                                     " character in malloc options: '%c'\n",
3070                                     _getprogname(), opts[j]);
3071                         }
3072                 }
3073         }
3074
3075         /* Take care to call atexit() only once. */
3076         if (opt_print_stats) {
3077                 /* Print statistics at exit. */
3078                 atexit(malloc_print_stats);
3079         }
3080
3081         /* Set variables according to the value of opt_small_max_2pow. */
3082         if (opt_small_max_2pow < opt_quantum_2pow)
3083                 opt_small_max_2pow = opt_quantum_2pow;
3084         small_max = (1 << opt_small_max_2pow);
3085
3086         /* Set bin-related variables. */
3087         bin_maxclass = (pagesize >> 1);
3088         if (pagesize_2pow > RUN_MIN_REGS_2POW + 1)
3089                 tiny_min_2pow = pagesize_2pow - (RUN_MIN_REGS_2POW + 1);
3090         else
3091                 tiny_min_2pow = 1;
3092         assert(opt_quantum_2pow >= tiny_min_2pow);
3093         ntbins = opt_quantum_2pow - tiny_min_2pow;
3094         assert(ntbins <= opt_quantum_2pow);
3095         nqbins = (small_max >> opt_quantum_2pow);
3096         nsbins = pagesize_2pow - opt_small_max_2pow - 1;
3097
3098         /* Set variables according to the value of opt_quantum_2pow. */
3099         quantum = (1 << opt_quantum_2pow);
3100         quantum_mask = quantum - 1;
3101         if (ntbins > 0)
3102                 small_min = (quantum >> 1) + 1;
3103         else
3104                 small_min = 1;
3105         assert(small_min <= quantum);
3106
3107         /* Set variables according to the value of opt_chunk_2pow. */
3108         chunk_size = (1 << opt_chunk_2pow);
3109         chunk_size_mask = chunk_size - 1;
3110         arena_chunk_maplen = (1 << (opt_chunk_2pow - pagesize_2pow));
3111         arena_maxclass = (chunk_size >> 1);
3112
3113         UTRACE(0, 0, 0);
3114
3115 #ifdef MALLOC_STATS
3116         memset(&stats_chunks, 0, sizeof(chunk_stats_t));
3117 #endif
3118
3119         /* Various sanity checks that regard configuration. */
3120         assert(quantum >= sizeof(void *));
3121         assert(quantum <= pagesize);
3122         assert(chunk_size >= pagesize);
3123         assert(quantum * 4 <= chunk_size);
3124
3125         /* Initialize chunks data. */
3126         malloc_mutex_init(&chunks_mtx);
3127         RB_INIT(&huge);
3128 #ifdef USE_BRK
3129         brk_base = sbrk(0);
3130         brk_prev = brk_base;
3131         brk_max = (void *)((uintptr_t)brk_base + MAXDSIZ);
3132 #endif
3133 #ifdef MALLOC_STATS
3134         huge_nmalloc = 0;
3135         huge_ndalloc = 0;
3136         huge_allocated = 0;
3137 #endif
3138         RB_INIT(&old_chunks);
3139
3140         /* Initialize base allocation data structures. */
3141 #ifdef MALLOC_STATS
3142         base_total = 0;
3143 #endif
3144 #ifdef USE_BRK
3145         /*
3146          * Do special brk allocation here, since the base chunk doesn't really
3147          * need to be chunk-aligned.
3148          */
3149         {
3150                 void *brk_cur;
3151                 intptr_t incr;
3152
3153                 do {
3154                         /* Get the current end of brk. */
3155                         brk_cur = sbrk(0);
3156
3157                         /*
3158                          * Calculate how much padding is necessary to
3159                          * chunk-align the end of brk.  Don't worry about
3160                          * brk_cur not being chunk-aligned though.
3161                          */
3162                         incr = (intptr_t)chunk_size
3163                             - (intptr_t)CHUNK_ADDR2OFFSET(brk_cur);
3164
3165                         brk_prev = sbrk(incr);
3166                         if (brk_prev == brk_cur) {
3167                                 /* Success. */
3168                                 break;
3169                         }
3170                 } while (brk_prev != (void *)-1);
3171
3172                 base_chunk = brk_cur;
3173                 base_next_addr = base_chunk;
3174                 base_past_addr = (void *)((uintptr_t)base_chunk + incr);
3175 #ifdef MALLOC_STATS
3176                 base_total += incr;
3177                 stats_chunks.nchunks = 1;
3178                 stats_chunks.curchunks = 1;
3179                 stats_chunks.highchunks = 1;
3180 #endif
3181         }
3182 #else
3183         /*
3184          * The first base chunk will be allocated when needed by base_alloc().
3185          */
3186         base_chunk = NULL;
3187         base_next_addr = NULL;
3188         base_past_addr = NULL;
3189 #endif
3190         base_chunk_nodes = NULL;
3191         malloc_mutex_init(&base_mtx);
3192
3193         if (ncpus > 1) {
3194                 /*
3195                  * For SMP systems, create four times as many arenas as there
3196                  * are CPUs by default.
3197                  */
3198                 opt_narenas_lshift += 2;
3199         }
3200
3201         /* Determine how many arenas to use. */
3202         narenas = ncpus;
3203         if (opt_narenas_lshift > 0)
3204                 narenas <<= opt_narenas_lshift;
3205
3206 #ifdef NO_TLS
3207         if (narenas > 1) {
3208                 static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
3209                     23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
3210                     89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
3211                     151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
3212                     223, 227, 229, 233, 239, 241, 251, 257, 263};
3213                 unsigned i, nprimes, parenas;
3214
3215                 /*
3216                  * Pick a prime number of hash arenas that is more than narenas
3217                  * so that direct hashing of pthread_self() pointers tends to
3218                  * spread allocations evenly among the arenas.
3219                  */
3220                 assert((narenas & 1) == 0); /* narenas must be even. */
3221                 nprimes = sizeof(primes) / sizeof(unsigned);
3222                 parenas = primes[nprimes - 1]; /* In case not enough primes. */
3223                 for (i = 1; i < nprimes; i++) {
3224                         if (primes[i] > narenas) {
3225                                 parenas = primes[i];
3226                                 break;
3227                         }
3228                 }
3229                 narenas = parenas;
3230         }
3231 #endif
3232
3233 #ifndef NO_TLS
3234         next_arena = 0;
3235 #endif
3236
3237         /* Allocate and initialize arenas. */
3238         arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
3239         if (arenas == NULL)
3240                 return (true);
3241         /*
3242          * Zero the array.  In practice, this should always be pre-zeroed,
3243          * since it was just mmap()ed, but let's be sure.
3244          */
3245         memset(arenas, 0, sizeof(arena_t *) * narenas);
3246
3247         /*
3248          * Initialize one arena here.  The rest are lazily created in
3249          * arena_choose_hard().
3250          */
3251         arenas_extend(0);
3252         if (arenas[0] == NULL)
3253                 return (true);
3254
3255         malloc_mutex_init(&arenas_mtx);
3256
3257         malloc_initialized = true;
3258         return (false);
3259 }
3260
3261 /*
3262  * End general internal functions.
3263  */
3264 /******************************************************************************/
3265 /*
3266  * Begin malloc(3)-compatible functions.
3267  */
3268
3269 void *
3270 malloc(size_t size)
3271 {
3272         void *ret;
3273         arena_t *arena;
3274
3275         if (malloc_init()) {
3276                 ret = NULL;
3277                 goto RETURN;
3278         }
3279
3280         if (size == 0) {
3281                 if (opt_sysv == false)
3282                         ret = &nil;
3283                 else
3284                         ret = NULL;
3285                 goto RETURN;
3286         }
3287
3288         arena = choose_arena();
3289         if (arena != NULL)
3290                 ret = imalloc(arena, size);
3291         else
3292                 ret = NULL;
3293
3294 RETURN:
3295         if (ret == NULL) {
3296                 if (opt_xmalloc) {
3297                         malloc_printf("%s: (malloc) Error in malloc(%zu):"
3298                             " out of memory\n", _getprogname(), size);
3299                         abort();
3300                 }
3301                 errno = ENOMEM;
3302         }
3303
3304         UTRACE(0, size, ret);
3305         return (ret);
3306 }
3307
3308 int
3309 posix_memalign(void **memptr, size_t alignment, size_t size)
3310 {
3311         int ret;
3312         arena_t *arena;
3313         void *result;
3314
3315         if (malloc_init())
3316                 result = NULL;
3317         else {
3318                 /* Make sure that alignment is a large enough power of 2. */
3319                 if (((alignment - 1) & alignment) != 0
3320                     || alignment < sizeof(void *)) {
3321                         if (opt_xmalloc) {
3322                                 malloc_printf("%s: (malloc) Error in"
3323                                     " posix_memalign(%zu, %zu):"
3324                                     " invalid alignment\n",
3325                                     _getprogname(), alignment, size);
3326                                 abort();
3327                         }
3328                         result = NULL;
3329                         ret = EINVAL;
3330                         goto RETURN;
3331                 }
3332
3333                 arena = choose_arena();
3334                 if (arena != NULL)
3335                         result = ipalloc(arena, alignment, size);
3336                 else
3337                         result = NULL;
3338         }
3339
3340         if (result == NULL) {
3341                 if (opt_xmalloc) {
3342                         malloc_printf("%s: (malloc) Error in"
3343                             " posix_memalign(%zu, %zu): out of memory\n",
3344                             _getprogname(), alignment, size);
3345                         abort();
3346                 }
3347                 ret = ENOMEM;
3348                 goto RETURN;
3349         }
3350
3351         *memptr = result;
3352         ret = 0;
3353
3354 RETURN:
3355         UTRACE(0, size, result);
3356         return (ret);
3357 }
3358
3359 void *
3360 calloc(size_t num, size_t size)
3361 {
3362         void *ret;
3363         arena_t *arena;
3364
3365         if (malloc_init()) {
3366                 ret = NULL;
3367                 goto RETURN;
3368         }
3369
3370         if (num * size == 0) {
3371                 if (opt_sysv == false)
3372                         ret = &nil;
3373                 else
3374                         ret = NULL;
3375                 goto RETURN;
3376         } else if ((num * size) / size != num) {
3377                 /* size_t overflow. */
3378                 ret = NULL;
3379                 goto RETURN;
3380         }
3381
3382         arena = choose_arena();
3383         if (arena != NULL)
3384                 ret = icalloc(arena, num * size);
3385         else
3386                 ret = NULL;
3387
3388 RETURN:
3389         if (ret == NULL) {
3390                 if (opt_xmalloc) {
3391                         malloc_printf("%s: (malloc) Error in"
3392                             " calloc(%zu, %zu): out of memory\n",
3393                             _getprogname(), num, size);
3394                         abort();
3395                 }
3396                 errno = ENOMEM;
3397         }
3398
3399         UTRACE(0, num * size, ret);
3400         return (ret);
3401 }
3402
3403 void *
3404 realloc(void *ptr, size_t size)
3405 {
3406         void *ret;
3407
3408         if (size != 0) {
3409                 arena_t *arena;
3410
3411                 if (ptr != &nil && ptr != NULL) {
3412                         assert(malloc_initialized);
3413
3414                         arena = choose_arena();
3415                         if (arena != NULL)
3416                                 ret = iralloc(arena, ptr, size);
3417                         else
3418                                 ret = NULL;
3419
3420                         if (ret == NULL) {
3421                                 if (opt_xmalloc) {
3422                                         malloc_printf("%s: (malloc) Error in"
3423                                             " ralloc(%p, %zu): out of memory\n",
3424                                             _getprogname(), ptr, size);
3425                                         abort();
3426                                 }
3427                                 errno = ENOMEM;
3428                         }
3429                 } else {
3430                         if (malloc_init())
3431                                 ret = NULL;
3432                         else {
3433                                 arena = choose_arena();
3434                                 if (arena != NULL)
3435                                         ret = imalloc(arena, size);
3436                                 else
3437                                         ret = NULL;
3438                         }
3439
3440                         if (ret == NULL) {
3441                                 if (opt_xmalloc) {
3442                                         malloc_printf("%s: (malloc) Error in"
3443                                             " ralloc(%p, %zu): out of memory\n",
3444                                             _getprogname(), ptr, size);
3445                                         abort();
3446                                 }
3447                                 errno = ENOMEM;
3448                         }
3449                 }
3450         } else {
3451                 if (ptr != &nil && ptr != NULL)
3452                         idalloc(ptr);
3453
3454                 ret = &nil;
3455         }
3456
3457         UTRACE(ptr, size, ret);
3458         return (ret);
3459 }
3460
3461 void
3462 free(void *ptr)
3463 {
3464
3465         UTRACE(ptr, 0, 0);
3466         if (ptr != &nil && ptr != NULL) {
3467                 assert(malloc_initialized);
3468
3469                 idalloc(ptr);
3470         }
3471 }
3472
3473 /*
3474  * End malloc(3)-compatible functions.
3475  */
3476 /******************************************************************************/
3477 /*
3478  * Begin library-private functions, used by threading libraries for protection
3479  * of malloc during fork().  These functions are only called if the program is
3480  * running in threaded mode, so there is no need to check whether the program
3481  * is threaded here.
3482  */
3483
3484 void
3485 _malloc_prefork(void)
3486 {
3487         unsigned i;
3488
3489         /* Acquire all mutexes in a safe order. */
3490
3491         malloc_mutex_lock(&arenas_mtx);
3492         for (i = 0; i < narenas; i++) {
3493                 if (arenas[i] != NULL)
3494                         malloc_mutex_lock(&arenas[i]->mtx);
3495         }
3496         malloc_mutex_unlock(&arenas_mtx);
3497
3498         malloc_mutex_lock(&base_mtx);
3499
3500         malloc_mutex_lock(&chunks_mtx);
3501 }
3502
3503 void
3504 _malloc_postfork(void)
3505 {
3506         unsigned i;
3507
3508         /* Release all mutexes, now that fork() has completed. */
3509
3510         malloc_mutex_unlock(&chunks_mtx);
3511
3512         malloc_mutex_unlock(&base_mtx);
3513
3514         malloc_mutex_lock(&arenas_mtx);
3515         for (i = 0; i < narenas; i++) {
3516                 if (arenas[i] != NULL)
3517                         malloc_mutex_unlock(&arenas[i]->mtx);
3518         }
3519         malloc_mutex_unlock(&arenas_mtx);
3520 }
3521
3522 /*
3523  * End library-private functions.
3524  */
3525 /******************************************************************************/