lib/libc/stdlib/malloc.c

   1 /*-
   2  * Copyright (C) 2006-2010 Jason Evans <jasone@FreeBSD.org>.
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice(s), this list of conditions and the following disclaimer as
  10  *    the first lines of this file unmodified other than the possible
  11  *    addition of one or more copyright notices.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice(s), this list of conditions and the following disclaimer in
  14  *    the documentation and/or other materials provided with the
  15  *    distribution.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
  18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  20  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
  21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  25  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  26  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  27  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28  *
  29  *******************************************************************************
  30  *
  31  * This allocator implementation is designed to provide scalable performance
  32  * for multi-threaded programs on multi-processor systems.  The following
  33  * features are included for this purpose:
  34  *
  35  *   + Multiple arenas are used if there are multiple CPUs, which reduces lock
  36  *     contention and cache sloshing.
  37  *
  38  *   + Thread-specific caching is used if there are multiple threads, which
  39  *     reduces the amount of locking.
  40  *
  41  *   + Cache line sharing between arenas is avoided for internal data
  42  *     structures.
  43  *
  44  *   + Memory is managed in chunks and runs (chunks can be split into runs),
  45  *     rather than as individual pages.  This provides a constant-time
  46  *     mechanism for associating allocations with particular arenas.
  47  *
  48  * Allocation requests are rounded up to the nearest size class, and no record
  49  * of the original request size is maintained.  Allocations are broken into
  50  * categories according to size class.  Assuming runtime defaults, 4 KiB pages
  51  * and a 16 byte quantum on a 32-bit system, the size classes in each category
  52  * are as follows:
  53  *
  54  *   |========================================|
  55  *   | Category | Subcategory      |     Size |
  56  *   |========================================|
  57  *   | Small    | Tiny             |        2 |
  58  *   |          |                  |        4 |
  59  *   |          |                  |        8 |
  60  *   |          |------------------+----------|
  61  *   |          | Quantum-spaced   |       16 |
  62  *   |          |                  |       32 |
  63  *   |          |                  |       48 |
  64  *   |          |                  |      ... |
  65  *   |          |                  |       96 |
  66  *   |          |                  |      112 |
  67  *   |          |                  |      128 |
  68  *   |          |------------------+----------|
  69  *   |          | Cacheline-spaced |      192 |
  70  *   |          |                  |      256 |
  71  *   |          |                  |      320 |
  72  *   |          |                  |      384 |
  73  *   |          |                  |      448 |
  74  *   |          |                  |      512 |
  75  *   |          |------------------+----------|
  76  *   |          | Sub-page         |      760 |
  77  *   |          |                  |     1024 |
  78  *   |          |                  |     1280 |
  79  *   |          |                  |      ... |
  80  *   |          |                  |     3328 |
  81  *   |          |                  |     3584 |
  82  *   |          |                  |     3840 |
  83  *   |========================================|
  84  *   | Medium                      |    4 KiB |
  85  *   |                             |    6 KiB |
  86  *   |                             |    8 KiB |
  87  *   |                             |      ... |
  88  *   |                             |   28 KiB |
  89  *   |                             |   30 KiB |
  90  *   |                             |   32 KiB |
  91  *   |========================================|
  92  *   | Large                       |   36 KiB |
  93  *   |                             |   40 KiB |
  94  *   |                             |   44 KiB |
  95  *   |                             |      ... |
  96  *   |                             | 1012 KiB |
  97  *   |                             | 1016 KiB |
  98  *   |                             | 1020 KiB |
  99  *   |========================================|
 100  *   | Huge                        |    1 MiB |
 101  *   |                             |    2 MiB |
 102  *   |                             |    3 MiB |
 103  *   |                             |      ... |
 104  *   |========================================|
 105  *
 106  * Different mechanisms are used accoding to category:
 107  *
 108  *   Small/medium : Each size class is segregated into its own set of runs.
 109  *                  Each run maintains a bitmap of which regions are
 110  *                  free/allocated.
 111  *
 112  *   Large : Each allocation is backed by a dedicated run.  Metadata are stored
 113  *           in the associated arena chunk header maps.
 114  *
 115  *   Huge : Each allocation is backed by a dedicated contiguous set of chunks.
 116  *          Metadata are stored in a separate red-black tree.
 117  *
 118  *******************************************************************************
 119  */
 120
 121 /*
 122  * MALLOC_PRODUCTION disables assertions and statistics gathering.  It also
 123  * defaults the A and J runtime options to off.  These settings are appropriate
 124  * for production systems.
 125  */
 126 #define MALLOC_PRODUCTION
 127
 128 #ifndef MALLOC_PRODUCTION
 129    /*
 130     * MALLOC_DEBUG enables assertions and other sanity checks, and disables
 131     * inline functions.
 132     */
 133 #  define MALLOC_DEBUG
 134
 135    /* MALLOC_STATS enables statistics calculation. */
 136 #  define MALLOC_STATS
 137 #endif
 138
 139 /*
 140  * MALLOC_TINY enables support for tiny objects, which are smaller than one
 141  * quantum.
 142  */
 143 #define MALLOC_TINY
 144
 145 /*
 146  * MALLOC_TCACHE enables a thread-specific caching layer for small and medium
 147  * objects.  This makes it possible to allocate/deallocate objects without any
 148  * locking when the cache is in the steady state.
 149  */
 150 #define MALLOC_TCACHE
 151
 152 /*
 153  * MALLOC_DSS enables use of sbrk(2) to allocate chunks from the data storage
 154  * segment (DSS).  In an ideal world, this functionality would be completely
 155  * unnecessary, but we are burdened by history and the lack of resource limits
 156  * for anonymous mapped memory.
 157  */
 158 #define MALLOC_DSS
 159
 160 #include <sys/cdefs.h>
 161 __FBSDID("$FreeBSD$");
 162
 163 #include "libc_private.h"
 164 #ifdef MALLOC_DEBUG
 165 #  define _LOCK_DEBUG
 166 #endif
 167 #include "spinlock.h"
 168 #include "namespace.h"
 169 #include <sys/mman.h>
 170 #include <sys/param.h>
 171 #include <sys/time.h>
 172 #include <sys/types.h>
 173 #include <sys/sysctl.h>
 174 #include <sys/uio.h>
 175 #include <sys/ktrace.h> /* Must come after several other sys/ includes. */
 176
 177 #include <machine/cpufunc.h>
 178 #include <machine/param.h>
 179 #include <machine/vmparam.h>
 180
 181 #include <errno.h>
 182 #include <limits.h>
 183 #include <link.h>
 184 #include <pthread.h>
 185 #include <sched.h>
 186 #include <stdarg.h>
 187 #include <stdbool.h>
 188 #include <stdio.h>
 189 #include <stdint.h>
 190 #include <inttypes.h>
 191 #include <stdlib.h>
 192 #include <string.h>
 193 #include <strings.h>
 194 #include <unistd.h>
 195
 196 #include "un-namespace.h"
 197
 198 #include "libc_private.h"
 199
 200 #define RB_COMPACT
 201 #include "rb.h"
 202 #if (defined(MALLOC_TCACHE) && defined(MALLOC_STATS))
 203 #include "qr.h"
 204 #include "ql.h"
 205 #endif
 206
 207 #ifdef MALLOC_DEBUG
 208    /* Disable inlining to make debugging easier. */
 209 #  define inline
 210 #endif
 211
 212 /* Size of stack-allocated buffer passed to strerror_r(). */
 213 #define STRERROR_BUF            64
 214
 215 /*
 216  * Minimum alignment of allocations is 2^LG_QUANTUM bytes.
 217  */
 218 #ifdef __i386__
 219 #  define LG_QUANTUM            4
 220 #  define LG_SIZEOF_PTR         2
 221 #  define CPU_SPINWAIT          __asm__ volatile("pause")
 222 #  define TLS_MODEL             __attribute__((tls_model("initial-exec")))
 223 #endif
 224 #ifdef __ia64__
 225 #  define LG_QUANTUM            4
 226 #  define LG_SIZEOF_PTR         3
 227 #  define TLS_MODEL             /* default */
 228 #endif
 229 #ifdef __alpha__
 230 #  define LG_QUANTUM            4
 231 #  define LG_SIZEOF_PTR         3
 232 #  define NO_TLS
 233 #endif
 234 #ifdef __sparc64__
 235 #  define LG_QUANTUM            4
 236 #  define LG_SIZEOF_PTR         3
 237 #  define TLS_MODEL             __attribute__((tls_model("initial-exec")))
 238 #endif
 239 #ifdef __amd64__
 240 #  define LG_QUANTUM            4
 241 #  define LG_SIZEOF_PTR         3
 242 #  define CPU_SPINWAIT          __asm__ volatile("pause")
 243 #  define TLS_MODEL             __attribute__((tls_model("initial-exec")))
 244 #endif
 245 #ifdef __arm__
 246 #  define LG_QUANTUM            3
 247 #  define LG_SIZEOF_PTR         2
 248 #  define NO_TLS
 249 #endif
 250 #ifdef __mips__
 251 #  define LG_QUANTUM            3
 252 #  define LG_SIZEOF_PTR         2
 253 #  define NO_TLS
 254 #endif
 255 #ifdef __powerpc64__
 256 #  define LG_QUANTUM            4
 257 #  define LG_SIZEOF_PTR         3
 258 #  define TLS_MODEL             /* default */
 259 #elif defined(__powerpc__)
 260 #  define LG_QUANTUM            4
 261 #  define LG_SIZEOF_PTR         2
 262 #  define TLS_MODEL             /* default */
 263 #endif
 264 #ifdef __s390x__
 265 #  define LG_QUANTUM            4
 266 #endif
 267
 268 #define QUANTUM                 ((size_t)(1U << LG_QUANTUM))
 269 #define QUANTUM_MASK            (QUANTUM - 1)
 270
 271 #define SIZEOF_PTR              (1U << LG_SIZEOF_PTR)
 272
 273 /* sizeof(int) == (1U << LG_SIZEOF_INT). */
 274 #ifndef LG_SIZEOF_INT
 275 #  define LG_SIZEOF_INT 2
 276 #endif
 277
 278 /* We can't use TLS in non-PIC programs, since TLS relies on loader magic. */
 279 #if (!defined(PIC) && !defined(NO_TLS))
 280 #  define NO_TLS
 281 #endif
 282
 283 #ifdef NO_TLS
 284    /* MALLOC_TCACHE requires TLS. */
 285 #  ifdef MALLOC_TCACHE
 286 #    undef MALLOC_TCACHE
 287 #  endif
 288 #endif
 289
 290 /*
 291  * Size and alignment of memory chunks that are allocated by the OS's virtual
 292  * memory system.
 293  */
 294 #define LG_CHUNK_DEFAULT        22
 295
 296 /*
 297  * The minimum ratio of active:dirty pages per arena is computed as:
 298  *
 299  *   (nactive >> opt_lg_dirty_mult) >= ndirty
 300  *
 301  * So, supposing that opt_lg_dirty_mult is 5, there can be no less than 32
 302  * times as many active pages as dirty pages.
 303  */
 304 #define LG_DIRTY_MULT_DEFAULT   5
 305
 306 /*
 307  * Maximum size of L1 cache line.  This is used to avoid cache line aliasing.
 308  * In addition, this controls the spacing of cacheline-spaced size classes.
 309  */
 310 #define LG_CACHELINE            6
 311 #define CACHELINE               ((size_t)(1U << LG_CACHELINE))
 312 #define CACHELINE_MASK          (CACHELINE - 1)
 313
 314 /*
 315  * Subpages are an artificially designated partitioning of pages.  Their only
 316  * purpose is to support subpage-spaced size classes.
 317  *
 318  * There must be at least 4 subpages per page, due to the way size classes are
 319  * handled.
 320  */
 321 #define LG_SUBPAGE              8
 322 #define SUBPAGE                 ((size_t)(1U << LG_SUBPAGE))
 323 #define SUBPAGE_MASK            (SUBPAGE - 1)
 324
 325 #ifdef MALLOC_TINY
 326    /* Smallest size class to support. */
 327 #  define LG_TINY_MIN           1
 328 #endif
 329
 330 /*
 331  * Maximum size class that is a multiple of the quantum, but not (necessarily)
 332  * a power of 2.  Above this size, allocations are rounded up to the nearest
 333  * power of 2.
 334  */
 335 #define LG_QSPACE_MAX_DEFAULT   7
 336
 337 /*
 338  * Maximum size class that is a multiple of the cacheline, but not (necessarily)
 339  * a power of 2.  Above this size, allocations are rounded up to the nearest
 340  * power of 2.
 341  */
 342 #define LG_CSPACE_MAX_DEFAULT   9
 343
 344 /*
 345  * Maximum medium size class.  This must not be more than 1/4 of a chunk
 346  * (LG_MEDIUM_MAX_DEFAULT <= LG_CHUNK_DEFAULT - 2).
 347  */
 348 #define LG_MEDIUM_MAX_DEFAULT   15
 349
 350 /*
 351  * RUN_MAX_OVRHD indicates maximum desired run header overhead.  Runs are sized
 352  * as small as possible such that this setting is still honored, without
 353  * violating other constraints.  The goal is to make runs as small as possible
 354  * without exceeding a per run external fragmentation threshold.
 355  *
 356  * We use binary fixed point math for overhead computations, where the binary
 357  * point is implicitly RUN_BFP bits to the left.
 358  *
 359  * Note that it is possible to set RUN_MAX_OVRHD low enough that it cannot be
 360  * honored for some/all object sizes, since there is one bit of header overhead
 361  * per object (plus a constant).  This constraint is relaxed (ignored) for runs
 362  * that are so small that the per-region overhead is greater than:
 363  *
 364  *   (RUN_MAX_OVRHD / (reg_size << (3+RUN_BFP))
 365  */
 366 #define RUN_BFP                 12
 367 /*                                    \/   Implicit binary fixed point. */
 368 #define RUN_MAX_OVRHD           0x0000003dU
 369 #define RUN_MAX_OVRHD_RELAX     0x00001800U
 370
 371 /* Put a cap on small object run size.  This overrides RUN_MAX_OVRHD. */
 372 #define RUN_MAX_SMALL                                                   \
 373         (arena_maxclass <= (1U << (CHUNK_MAP_LG_PG_RANGE + PAGE_SHIFT)) \
 374             ? arena_maxclass : (1U << (CHUNK_MAP_LG_PG_RANGE +          \
 375             PAGE_SHIFT)))
 376
 377 /*
 378  * Hyper-threaded CPUs may need a special instruction inside spin loops in
 379  * order to yield to another virtual CPU.  If no such instruction is defined
 380  * above, make CPU_SPINWAIT a no-op.
 381  */
 382 #ifndef CPU_SPINWAIT
 383 #  define CPU_SPINWAIT
 384 #endif
 385
 386 /*
 387  * Adaptive spinning must eventually switch to blocking, in order to avoid the
 388  * potential for priority inversion deadlock.  Backing off past a certain point
 389  * can actually waste time.
 390  */
 391 #define LG_SPIN_LIMIT           11
 392
 393 #ifdef MALLOC_TCACHE
 394    /*
 395     * Default number of cache slots for each bin in the thread cache (0:
 396     * disabled).
 397     */
 398 #  define LG_TCACHE_NSLOTS_DEFAULT      7
 399    /*
 400     * (1U << opt_lg_tcache_gc_sweep) is the approximate number of
 401     * allocation events between full GC sweeps (-1: disabled).  Integer
 402     * rounding may cause the actual number to be slightly higher, since GC is
 403     * performed incrementally.
 404     */
 405 #  define LG_TCACHE_GC_SWEEP_DEFAULT    13
 406 #endif
 407
 408 /******************************************************************************/
 409
 410 /*
 411  * Mutexes based on spinlocks.  We can't use normal pthread spinlocks in all
 412  * places, because they require malloc()ed memory, which causes bootstrapping
 413  * issues in some cases.
 414  */
 415 typedef struct {
 416         spinlock_t      lock;
 417 } malloc_mutex_t;
 418
 419 /* Set to true once the allocator has been initialized. */
 420 static bool malloc_initialized = false;
 421
 422 /* Used to avoid initialization races. */
 423 static malloc_mutex_t init_lock = {_SPINLOCK_INITIALIZER};
 424
 425 /******************************************************************************/
 426 /*
 427  * Statistics data structures.
 428  */
 429
 430 #ifdef MALLOC_STATS
 431
 432 #ifdef MALLOC_TCACHE
 433 typedef struct tcache_bin_stats_s tcache_bin_stats_t;
 434 struct tcache_bin_stats_s {
 435         /*
 436          * Number of allocation requests that corresponded to the size of this
 437          * bin.
 438          */
 439         uint64_t        nrequests;
 440 };
 441 #endif
 442
 443 typedef struct malloc_bin_stats_s malloc_bin_stats_t;
 444 struct malloc_bin_stats_s {
 445         /*
 446          * Number of allocation requests that corresponded to the size of this
 447          * bin.
 448          */
 449         uint64_t        nrequests;
 450
 451 #ifdef MALLOC_TCACHE
 452         /* Number of tcache fills from this bin. */
 453         uint64_t        nfills;
 454
 455         /* Number of tcache flushes to this bin. */
 456         uint64_t        nflushes;
 457 #endif
 458
 459         /* Total number of runs created for this bin's size class. */
 460         uint64_t        nruns;
 461
 462         /*
 463          * Total number of runs reused by extracting them from the runs tree for
 464          * this bin's size class.
 465          */
 466         uint64_t        reruns;
 467
 468         /* High-water mark for this bin. */
 469         size_t          highruns;
 470
 471         /* Current number of runs in this bin. */
 472         size_t          curruns;
 473 };
 474
 475 typedef struct malloc_large_stats_s malloc_large_stats_t;
 476 struct malloc_large_stats_s {
 477         /*
 478          * Number of allocation requests that corresponded to this size class.
 479          */
 480         uint64_t        nrequests;
 481
 482         /* High-water mark for this size class. */
 483         size_t          highruns;
 484
 485         /* Current number of runs of this size class. */
 486         size_t          curruns;
 487 };
 488
 489 typedef struct arena_stats_s arena_stats_t;
 490 struct arena_stats_s {
 491         /* Number of bytes currently mapped. */
 492         size_t          mapped;
 493
 494         /*
 495          * Total number of purge sweeps, total number of madvise calls made,
 496          * and total pages purged in order to keep dirty unused memory under
 497          * control.
 498          */
 499         uint64_t        npurge;
 500         uint64_t        nmadvise;
 501         uint64_t        purged;
 502
 503         /* Per-size-category statistics. */
 504         size_t          allocated_small;
 505         uint64_t        nmalloc_small;
 506         uint64_t        ndalloc_small;
 507
 508         size_t          allocated_medium;
 509         uint64_t        nmalloc_medium;
 510         uint64_t        ndalloc_medium;
 511
 512         size_t          allocated_large;
 513         uint64_t        nmalloc_large;
 514         uint64_t        ndalloc_large;
 515
 516         /*
 517          * One element for each possible size class, including sizes that
 518          * overlap with bin size classes.  This is necessary because ipalloc()
 519          * sometimes has to use such large objects in order to assure proper
 520          * alignment.
 521          */
 522         malloc_large_stats_t    *lstats;
 523 };
 524
 525 typedef struct chunk_stats_s chunk_stats_t;
 526 struct chunk_stats_s {
 527         /* Number of chunks that were allocated. */
 528         uint64_t        nchunks;
 529
 530         /* High-water mark for number of chunks allocated. */
 531         size_t          highchunks;
 532
 533         /*
 534          * Current number of chunks allocated.  This value isn't maintained for
 535          * any other purpose, so keep track of it in order to be able to set
 536          * highchunks.
 537          */
 538         size_t          curchunks;
 539 };
 540
 541 #endif /* #ifdef MALLOC_STATS */
 542
 543 /******************************************************************************/
 544 /*
 545  * Extent data structures.
 546  */
 547
 548 /* Tree of extents. */
 549 typedef struct extent_node_s extent_node_t;
 550 struct extent_node_s {
 551 #ifdef MALLOC_DSS
 552         /* Linkage for the size/address-ordered tree. */
 553         rb_node(extent_node_t) link_szad;
 554 #endif
 555
 556         /* Linkage for the address-ordered tree. */
 557         rb_node(extent_node_t) link_ad;
 558
 559         /* Pointer to the extent that this tree node is responsible for. */
 560         void    *addr;
 561
 562         /* Total region size. */
 563         size_t  size;
 564 };
 565 typedef rb_tree(extent_node_t) extent_tree_t;
 566
 567 /******************************************************************************/
 568 /*
 569  * Arena data structures.
 570  */
 571
 572 typedef struct arena_s arena_t;
 573 typedef struct arena_bin_s arena_bin_t;
 574
 575 /* Each element of the chunk map corresponds to one page within the chunk. */
 576 typedef struct arena_chunk_map_s arena_chunk_map_t;
 577 struct arena_chunk_map_s {
 578         /*
 579          * Linkage for run trees.  There are two disjoint uses:
 580          *
 581          * 1) arena_t's runs_avail tree.
 582          * 2) arena_run_t conceptually uses this linkage for in-use non-full
 583          *    runs, rather than directly embedding linkage.
 584          */
 585         rb_node(arena_chunk_map_t)      link;
 586
 587         /*
 588          * Run address (or size) and various flags are stored together.  The bit
 589          * layout looks like (assuming 32-bit system):
 590          *
 591          *   ???????? ???????? ????cccc ccccdzla
 592          *
 593          * ? : Unallocated: Run address for first/last pages, unset for internal
 594          *                  pages.
 595          *     Small/medium: Don't care.
 596          *     Large: Run size for first page, unset for trailing pages.
 597          * - : Unused.
 598          * c : refcount (could overflow for PAGE_SIZE >= 128 KiB)
 599          * d : dirty?
 600          * z : zeroed?
 601          * l : large?
 602          * a : allocated?
 603          *
 604          * Following are example bit patterns for the three types of runs.
 605          *
 606          * p : run page offset
 607          * s : run size
 608          * x : don't care
 609          * - : 0
 610          * [dzla] : bit set
 611          *
 612          *   Unallocated:
 613          *     ssssssss ssssssss ssss---- --------
 614          *     xxxxxxxx xxxxxxxx xxxx---- ----d---
 615          *     ssssssss ssssssss ssss---- -----z--
 616          *
 617          *   Small/medium:
 618          *     pppppppp ppppcccc cccccccc cccc---a
 619          *     pppppppp ppppcccc cccccccc cccc---a
 620          *     pppppppp ppppcccc cccccccc cccc---a
 621          *
 622          *   Large:
 623          *     ssssssss ssssssss ssss---- ------la
 624          *     -------- -------- -------- ------la
 625          *     -------- -------- -------- ------la
 626          */
 627         size_t                          bits;
 628 #define CHUNK_MAP_PG_MASK       ((size_t)0xfff00000U)
 629 #define CHUNK_MAP_PG_SHIFT      20
 630 #define CHUNK_MAP_LG_PG_RANGE   12
 631
 632 #define CHUNK_MAP_RC_MASK       ((size_t)0xffff0U)
 633 #define CHUNK_MAP_RC_ONE        ((size_t)0x00010U)
 634
 635 #define CHUNK_MAP_FLAGS_MASK    ((size_t)0xfU)
 636 #define CHUNK_MAP_DIRTY         ((size_t)0x8U)
 637 #define CHUNK_MAP_ZEROED        ((size_t)0x4U)
 638 #define CHUNK_MAP_LARGE         ((size_t)0x2U)
 639 #define CHUNK_MAP_ALLOCATED     ((size_t)0x1U)
 640 #define CHUNK_MAP_KEY           (CHUNK_MAP_DIRTY | CHUNK_MAP_ALLOCATED)
 641 };
 642 typedef rb_tree(arena_chunk_map_t) arena_avail_tree_t;
 643 typedef rb_tree(arena_chunk_map_t) arena_run_tree_t;
 644
 645 /* Arena chunk header. */
 646 typedef struct arena_chunk_s arena_chunk_t;
 647 struct arena_chunk_s {
 648         /* Arena that owns the chunk. */
 649         arena_t         *arena;
 650
 651         /* Linkage for the arena's chunks_dirty tree. */
 652         rb_node(arena_chunk_t) link_dirty;
 653
 654         /*
 655          * True if the chunk is currently in the chunks_dirty tree, due to
 656          * having at some point contained one or more dirty pages.  Removal
 657          * from chunks_dirty is lazy, so (dirtied && ndirty == 0) is possible.
 658          */
 659         bool            dirtied;
 660
 661         /* Number of dirty pages. */
 662         size_t          ndirty;
 663
 664         /* Map of pages within chunk that keeps track of free/large/small. */
 665         arena_chunk_map_t map[1]; /* Dynamically sized. */
 666 };
 667 typedef rb_tree(arena_chunk_t) arena_chunk_tree_t;
 668
 669 typedef struct arena_run_s arena_run_t;
 670 struct arena_run_s {
 671 #ifdef MALLOC_DEBUG
 672         uint32_t        magic;
 673 #  define ARENA_RUN_MAGIC 0x384adf93
 674 #endif
 675
 676         /* Bin this run is associated with. */
 677         arena_bin_t     *bin;
 678
 679         /* Index of first element that might have a free region. */
 680         unsigned        regs_minelm;
 681
 682         /* Number of free regions in run. */
 683         unsigned        nfree;
 684
 685         /* Bitmask of in-use regions (0: in use, 1: free). */
 686         unsigned        regs_mask[1]; /* Dynamically sized. */
 687 };
 688
 689 struct arena_bin_s {
 690         /*
 691          * Current run being used to service allocations of this bin's size
 692          * class.
 693          */
 694         arena_run_t     *runcur;
 695
 696         /*
 697          * Tree of non-full runs.  This tree is used when looking for an
 698          * existing run when runcur is no longer usable.  We choose the
 699          * non-full run that is lowest in memory; this policy tends to keep
 700          * objects packed well, and it can also help reduce the number of
 701          * almost-empty chunks.
 702          */
 703         arena_run_tree_t runs;
 704
 705         /* Size of regions in a run for this bin's size class. */
 706         size_t          reg_size;
 707
 708         /* Total size of a run for this bin's size class. */
 709         size_t          run_size;
 710
 711         /* Total number of regions in a run for this bin's size class. */
 712         uint32_t        nregs;
 713
 714         /* Number of elements in a run's regs_mask for this bin's size class. */
 715         uint32_t        regs_mask_nelms;
 716
 717         /* Offset of first region in a run for this bin's size class. */
 718         uint32_t        reg0_offset;
 719
 720 #ifdef MALLOC_STATS
 721         /* Bin statistics. */
 722         malloc_bin_stats_t stats;
 723 #endif
 724 };
 725
 726 #ifdef MALLOC_TCACHE
 727 typedef struct tcache_s tcache_t;
 728 #endif
 729
 730 struct arena_s {
 731 #ifdef MALLOC_DEBUG
 732         uint32_t                magic;
 733 #  define ARENA_MAGIC 0x947d3d24
 734 #endif
 735
 736         /* All operations on this arena require that lock be locked. */
 737         pthread_mutex_t         lock;
 738
 739 #ifdef MALLOC_STATS
 740         arena_stats_t           stats;
 741 #  ifdef MALLOC_TCACHE
 742         /*
 743          * List of tcaches for extant threads associated with this arena.
 744          * Stats from these are merged incrementally, and at exit.
 745          */
 746         ql_head(tcache_t)       tcache_ql;
 747 #  endif
 748 #endif
 749
 750         /* Tree of dirty-page-containing chunks this arena manages. */
 751         arena_chunk_tree_t      chunks_dirty;
 752
 753         /*
 754          * In order to avoid rapid chunk allocation/deallocation when an arena
 755          * oscillates right on the cusp of needing a new chunk, cache the most
 756          * recently freed chunk.  The spare is left in the arena's chunk trees
 757          * until it is deleted.
 758          *
 759          * There is one spare chunk per arena, rather than one spare total, in
 760          * order to avoid interactions between multiple threads that could make
 761          * a single spare inadequate.
 762          */
 763         arena_chunk_t           *spare;
 764
 765         /* Number of pages in active runs. */
 766         size_t                  nactive;
 767
 768         /*
 769          * Current count of pages within unused runs that are potentially
 770          * dirty, and for which madvise(... MADV_FREE) has not been called.  By
 771          * tracking this, we can institute a limit on how much dirty unused
 772          * memory is mapped for each arena.
 773          */
 774         size_t                  ndirty;
 775
 776         /*
 777          * Size/address-ordered tree of this arena's available runs.  This tree
 778          * is used for first-best-fit run allocation.
 779          */
 780         arena_avail_tree_t      runs_avail;
 781
 782         /*
 783          * bins is used to store trees of free regions of the following sizes,
 784          * assuming a 16-byte quantum, 4 KiB page size, and default
 785          * MALLOC_OPTIONS.
 786          *
 787          *   bins[i] |   size |
 788          *   --------+--------+
 789          *        0  |      2 |
 790          *        1  |      4 |
 791          *        2  |      8 |
 792          *   --------+--------+
 793          *        3  |     16 |
 794          *        4  |     32 |
 795          *        5  |     48 |
 796          *           :        :
 797          *        8  |     96 |
 798          *        9  |    112 |
 799          *       10  |    128 |
 800          *   --------+--------+
 801          *       11  |    192 |
 802          *       12  |    256 |
 803          *       13  |    320 |
 804          *       14  |    384 |
 805          *       15  |    448 |
 806          *       16  |    512 |
 807          *   --------+--------+
 808          *       17  |    768 |
 809          *       18  |   1024 |
 810          *       19  |   1280 |
 811          *           :        :
 812          *       27  |   3328 |
 813          *       28  |   3584 |
 814          *       29  |   3840 |
 815          *   --------+--------+
 816          *       30  |  4 KiB |
 817          *       31  |  6 KiB |
 818          *       33  |  8 KiB |
 819          *           :        :
 820          *       43  | 28 KiB |
 821          *       44  | 30 KiB |
 822          *       45  | 32 KiB |
 823          *   --------+--------+
 824          */
 825         arena_bin_t             bins[1]; /* Dynamically sized. */
 826 };
 827
 828 /******************************************************************************/
 829 /*
 830  * Thread cache data structures.
 831  */
 832
 833 #ifdef MALLOC_TCACHE
 834 typedef struct tcache_bin_s tcache_bin_t;
 835 struct tcache_bin_s {
 836 #  ifdef MALLOC_STATS
 837         tcache_bin_stats_t tstats;
 838 #  endif
 839         unsigned        low_water;      /* Min # cached since last GC. */
 840         unsigned        high_water;     /* Max # cached since last GC. */
 841         unsigned        ncached;        /* # of cached objects. */
 842         void            *slots[1];      /* Dynamically sized. */
 843 };
 844
 845 struct tcache_s {
 846 #  ifdef MALLOC_STATS
 847         ql_elm(tcache_t) link;          /* Used for aggregating stats. */
 848 #  endif
 849         arena_t         *arena;         /* This thread's arena. */
 850         unsigned        ev_cnt;         /* Event count since incremental GC. */
 851         unsigned        next_gc_bin;    /* Next bin to GC. */
 852         tcache_bin_t    *tbins[1];      /* Dynamically sized. */
 853 };
 854 #endif
 855
 856 /******************************************************************************/
 857 /*
 858  * Data.
 859  */
 860
 861 /* Number of CPUs. */
 862 static unsigned         ncpus;
 863
 864 /* Various bin-related settings. */
 865 #ifdef MALLOC_TINY              /* Number of (2^n)-spaced tiny bins. */
 866 #  define               ntbins  ((unsigned)(LG_QUANTUM - LG_TINY_MIN))
 867 #else
 868 #  define               ntbins  0
 869 #endif
 870 static unsigned         nqbins; /* Number of quantum-spaced bins. */
 871 static unsigned         ncbins; /* Number of cacheline-spaced bins. */
 872 static unsigned         nsbins; /* Number of subpage-spaced bins. */
 873 static unsigned         nmbins; /* Number of medium bins. */
 874 static unsigned         nbins;
 875 static unsigned         mbin0; /* mbin offset (nbins - nmbins). */
 876 #ifdef MALLOC_TINY
 877 #  define               tspace_max      ((size_t)(QUANTUM >> 1))
 878 #endif
 879 #define                 qspace_min      QUANTUM
 880 static size_t           qspace_max;
 881 static size_t           cspace_min;
 882 static size_t           cspace_max;
 883 static size_t           sspace_min;
 884 static size_t           sspace_max;
 885 #define                 small_maxclass  sspace_max
 886 #define                 medium_min      PAGE_SIZE
 887 static size_t           medium_max;
 888 #define                 bin_maxclass    medium_max
 889
 890 /*
 891  * Soft limit on the number of medium size classes.  Spacing between medium
 892  * size classes never exceeds pagesize, which can force more than NBINS_MAX
 893  * medium size classes.
 894  */
 895 #define NMBINS_MAX      16
 896 /* Spacing between medium size classes. */
 897 static size_t           lg_mspace;
 898 static size_t           mspace_mask;
 899
 900 static uint8_t const    *small_size2bin;
 901 /*
 902  * const_small_size2bin is a static constant lookup table that in the common
 903  * case can be used as-is for small_size2bin.  For dynamically linked programs,
 904  * this avoids a page of memory overhead per process.
 905  */
 906 #define S2B_1(i)        i,
 907 #define S2B_2(i)        S2B_1(i) S2B_1(i)
 908 #define S2B_4(i)        S2B_2(i) S2B_2(i)
 909 #define S2B_8(i)        S2B_4(i) S2B_4(i)
 910 #define S2B_16(i)       S2B_8(i) S2B_8(i)
 911 #define S2B_32(i)       S2B_16(i) S2B_16(i)
 912 #define S2B_64(i)       S2B_32(i) S2B_32(i)
 913 #define S2B_128(i)      S2B_64(i) S2B_64(i)
 914 #define S2B_256(i)      S2B_128(i) S2B_128(i)
 915 static const uint8_t    const_small_size2bin[PAGE_SIZE - 255] = {
 916         S2B_1(0xffU)            /*    0 */
 917 #if (LG_QUANTUM == 4)
 918 /* 64-bit system ************************/
 919 #  ifdef MALLOC_TINY
 920         S2B_2(0)                /*    2 */
 921         S2B_2(1)                /*    4 */
 922         S2B_4(2)                /*    8 */
 923         S2B_8(3)                /*   16 */
 924 #    define S2B_QMIN 3
 925 #  else
 926         S2B_16(0)               /*   16 */
 927 #    define S2B_QMIN 0
 928 #  endif
 929         S2B_16(S2B_QMIN + 1)    /*   32 */
 930         S2B_16(S2B_QMIN + 2)    /*   48 */
 931         S2B_16(S2B_QMIN + 3)    /*   64 */
 932         S2B_16(S2B_QMIN + 4)    /*   80 */
 933         S2B_16(S2B_QMIN + 5)    /*   96 */
 934         S2B_16(S2B_QMIN + 6)    /*  112 */
 935         S2B_16(S2B_QMIN + 7)    /*  128 */
 936 #  define S2B_CMIN (S2B_QMIN + 8)
 937 #else
 938 /* 32-bit system ************************/
 939 #  ifdef MALLOC_TINY
 940         S2B_2(0)                /*    2 */
 941         S2B_2(1)                /*    4 */
 942         S2B_4(2)                /*    8 */
 943 #    define S2B_QMIN 2
 944 #  else
 945         S2B_8(0)                /*    8 */
 946 #    define S2B_QMIN 0
 947 #  endif
 948         S2B_8(S2B_QMIN + 1)     /*   16 */
 949         S2B_8(S2B_QMIN + 2)     /*   24 */
 950         S2B_8(S2B_QMIN + 3)     /*   32 */
 951         S2B_8(S2B_QMIN + 4)     /*   40 */
 952         S2B_8(S2B_QMIN + 5)     /*   48 */
 953         S2B_8(S2B_QMIN + 6)     /*   56 */
 954         S2B_8(S2B_QMIN + 7)     /*   64 */
 955         S2B_8(S2B_QMIN + 8)     /*   72 */
 956         S2B_8(S2B_QMIN + 9)     /*   80 */
 957         S2B_8(S2B_QMIN + 10)    /*   88 */
 958         S2B_8(S2B_QMIN + 11)    /*   96 */
 959         S2B_8(S2B_QMIN + 12)    /*  104 */
 960         S2B_8(S2B_QMIN + 13)    /*  112 */
 961         S2B_8(S2B_QMIN + 14)    /*  120 */
 962         S2B_8(S2B_QMIN + 15)    /*  128 */
 963 #  define S2B_CMIN (S2B_QMIN + 16)
 964 #endif
 965 /****************************************/
 966         S2B_64(S2B_CMIN + 0)    /*  192 */
 967         S2B_64(S2B_CMIN + 1)    /*  256 */
 968         S2B_64(S2B_CMIN + 2)    /*  320 */
 969         S2B_64(S2B_CMIN + 3)    /*  384 */
 970         S2B_64(S2B_CMIN + 4)    /*  448 */
 971         S2B_64(S2B_CMIN + 5)    /*  512 */
 972 #  define S2B_SMIN (S2B_CMIN + 6)
 973         S2B_256(S2B_SMIN + 0)   /*  768 */
 974         S2B_256(S2B_SMIN + 1)   /* 1024 */
 975         S2B_256(S2B_SMIN + 2)   /* 1280 */
 976         S2B_256(S2B_SMIN + 3)   /* 1536 */
 977         S2B_256(S2B_SMIN + 4)   /* 1792 */
 978         S2B_256(S2B_SMIN + 5)   /* 2048 */
 979         S2B_256(S2B_SMIN + 6)   /* 2304 */
 980         S2B_256(S2B_SMIN + 7)   /* 2560 */
 981         S2B_256(S2B_SMIN + 8)   /* 2816 */
 982         S2B_256(S2B_SMIN + 9)   /* 3072 */
 983         S2B_256(S2B_SMIN + 10)  /* 3328 */
 984         S2B_256(S2B_SMIN + 11)  /* 3584 */
 985         S2B_256(S2B_SMIN + 12)  /* 3840 */
 986 #if (PAGE_SHIFT == 13)
 987         S2B_256(S2B_SMIN + 13)  /* 4096 */
 988         S2B_256(S2B_SMIN + 14)  /* 4352 */
 989         S2B_256(S2B_SMIN + 15)  /* 4608 */
 990         S2B_256(S2B_SMIN + 16)  /* 4864 */
 991         S2B_256(S2B_SMIN + 17)  /* 5120 */
 992         S2B_256(S2B_SMIN + 18)  /* 5376 */
 993         S2B_256(S2B_SMIN + 19)  /* 5632 */
 994         S2B_256(S2B_SMIN + 20)  /* 5888 */
 995         S2B_256(S2B_SMIN + 21)  /* 6144 */
 996         S2B_256(S2B_SMIN + 22)  /* 6400 */
 997         S2B_256(S2B_SMIN + 23)  /* 6656 */
 998         S2B_256(S2B_SMIN + 24)  /* 6912 */
 999         S2B_256(S2B_SMIN + 25)  /* 7168 */
1000         S2B_256(S2B_SMIN + 26)  /* 7424 */
1001         S2B_256(S2B_SMIN + 27)  /* 7680 */
1002         S2B_256(S2B_SMIN + 28)  /* 7936 */
1003 #endif
1004 };
1005 #undef S2B_1
1006 #undef S2B_2
1007 #undef S2B_4
1008 #undef S2B_8
1009 #undef S2B_16
1010 #undef S2B_32
1011 #undef S2B_64
1012 #undef S2B_128
1013 #undef S2B_256
1014 #undef S2B_QMIN
1015 #undef S2B_CMIN
1016 #undef S2B_SMIN
1017
1018 /* Various chunk-related settings. */
1019 static size_t           chunksize;
1020 static size_t           chunksize_mask; /* (chunksize - 1). */
1021 static size_t           chunk_npages;
1022 static size_t           arena_chunk_header_npages;
1023 static size_t           arena_maxclass; /* Max size class for arenas. */
1024
1025 /********/
1026 /*
1027  * Chunks.
1028  */
1029
1030 /* Protects chunk-related data structures. */
1031 static malloc_mutex_t   huge_mtx;
1032
1033 /* Tree of chunks that are stand-alone huge allocations. */
1034 static extent_tree_t    huge;
1035
1036 #ifdef MALLOC_DSS
1037 /*
1038  * Protects sbrk() calls.  This avoids malloc races among threads, though it
1039  * does not protect against races with threads that call sbrk() directly.
1040  */
1041 static malloc_mutex_t   dss_mtx;
1042 /* Base address of the DSS. */
1043 static void             *dss_base;
1044 /* Current end of the DSS, or ((void *)-1) if the DSS is exhausted. */
1045 static void             *dss_prev;
1046 /* Current upper limit on DSS addresses. */
1047 static void             *dss_max;
1048
1049 /*
1050  * Trees of chunks that were previously allocated (trees differ only in node
1051  * ordering).  These are used when allocating chunks, in an attempt to re-use
1052  * address space.  Depending on function, different tree orderings are needed,
1053  * which is why there are two trees with the same contents.
1054  */
1055 static extent_tree_t    dss_chunks_szad;
1056 static extent_tree_t    dss_chunks_ad;
1057 #endif
1058
1059 #ifdef MALLOC_STATS
1060 /* Huge allocation statistics. */
1061 static uint64_t         huge_nmalloc;
1062 static uint64_t         huge_ndalloc;
1063 static size_t           huge_allocated;
1064 #endif
1065
1066 /****************************/
1067 /*
1068  * base (internal allocation).
1069  */
1070
1071 /*
1072  * Current pages that are being used for internal memory allocations.  These
1073  * pages are carved up in cacheline-size quanta, so that there is no chance of
1074  * false cache line sharing.
1075  */
1076 static void             *base_pages;
1077 static void             *base_next_addr;
1078 static void             *base_past_addr; /* Addr immediately past base_pages. */
1079 static extent_node_t    *base_nodes;
1080 static malloc_mutex_t   base_mtx;
1081 #ifdef MALLOC_STATS
1082 static size_t           base_mapped;
1083 #endif
1084
1085 /********/
1086 /*
1087  * Arenas.
1088  */
1089
1090 /*
1091  * Arenas that are used to service external requests.  Not all elements of the
1092  * arenas array are necessarily used; arenas are created lazily as needed.
1093  */
1094 static arena_t          **arenas;
1095 static unsigned         narenas;
1096 #ifndef NO_TLS
1097 static unsigned         next_arena;
1098 #endif
1099 static pthread_mutex_t  arenas_lock; /* Protects arenas initialization. */
1100
1101 #ifndef NO_TLS
1102 /*
1103  * Map of _pthread_self() --> arenas[???], used for selecting an arena to use
1104  * for allocations.
1105  */
1106 static __thread arena_t         *arenas_map TLS_MODEL;
1107 #endif
1108
1109 #ifdef MALLOC_TCACHE
1110 /* Map of thread-specific caches. */
1111 static __thread tcache_t        *tcache_tls TLS_MODEL;
1112
1113 /*
1114  * Number of cache slots for each bin in the thread cache, or 0 if tcache is
1115  * disabled.
1116  */
1117 size_t                          tcache_nslots;
1118
1119 /* Number of tcache allocation/deallocation events between incremental GCs. */
1120 unsigned                        tcache_gc_incr;
1121 #endif
1122
1123 /*
1124  * Used by chunk_alloc_mmap() to decide whether to attempt the fast path and
1125  * potentially avoid some system calls.  We can get away without TLS here,
1126  * since the state of mmap_unaligned only affects performance, rather than
1127  * correct function.
1128  */
1129 #ifndef NO_TLS
1130 static __thread bool    mmap_unaligned TLS_MODEL;
1131 #else
1132 static          bool    mmap_unaligned;
1133 #endif
1134
1135 #ifdef MALLOC_STATS
1136 static malloc_mutex_t   chunks_mtx;
1137 /* Chunk statistics. */
1138 static chunk_stats_t    stats_chunks;
1139 #endif
1140
1141 /*******************************/
1142 /*
1143  * Runtime configuration options.
1144  */
1145 const char      *_malloc_options;
1146
1147 #ifndef MALLOC_PRODUCTION
1148 static bool     opt_abort = true;
1149 static bool     opt_junk = true;
1150 #else
1151 static bool     opt_abort = false;
1152 static bool     opt_junk = false;
1153 #endif
1154 #ifdef MALLOC_TCACHE
1155 static size_t   opt_lg_tcache_nslots = LG_TCACHE_NSLOTS_DEFAULT;
1156 static ssize_t  opt_lg_tcache_gc_sweep = LG_TCACHE_GC_SWEEP_DEFAULT;
1157 #endif
1158 #ifdef MALLOC_DSS
1159 static bool     opt_dss = true;
1160 static bool     opt_mmap = true;
1161 #endif
1162 static ssize_t  opt_lg_dirty_mult = LG_DIRTY_MULT_DEFAULT;
1163 static bool     opt_stats_print = false;
1164 static size_t   opt_lg_qspace_max = LG_QSPACE_MAX_DEFAULT;
1165 static size_t   opt_lg_cspace_max = LG_CSPACE_MAX_DEFAULT;
1166 static size_t   opt_lg_medium_max = LG_MEDIUM_MAX_DEFAULT;
1167 static size_t   opt_lg_chunk = LG_CHUNK_DEFAULT;
1168 static bool     opt_utrace = false;
1169 static bool     opt_sysv = false;
1170 static bool     opt_xmalloc = false;
1171 static bool     opt_zero = false;
1172 static int      opt_narenas_lshift = 0;
1173
1174 typedef struct {
1175         void    *p;
1176         size_t  s;
1177         void    *r;
1178 } malloc_utrace_t;
1179
1180 #define UTRACE(a, b, c)                                                 \
1181         if (opt_utrace) {                                               \
1182                 malloc_utrace_t ut;                                     \
1183                 ut.p = (a);                                             \
1184                 ut.s = (b);                                             \
1185                 ut.r = (c);                                             \
1186                 utrace(&ut, sizeof(ut));                                \
1187         }
1188
1189 /******************************************************************************/
1190 /*
1191  * Begin function prototypes for non-inline static functions.
1192  */
1193
1194 static void     malloc_mutex_init(malloc_mutex_t *mutex);
1195 static bool     malloc_spin_init(pthread_mutex_t *lock);
1196 #ifdef MALLOC_TINY
1197 static size_t   pow2_ceil(size_t x);
1198 #endif
1199 static void     wrtmessage(const char *p1, const char *p2, const char *p3,
1200                 const char *p4);
1201 #ifdef MALLOC_STATS
1202 static void     malloc_printf(const char *format, ...);
1203 #endif
1204 static char     *umax2s(uintmax_t x, unsigned base, char *s);
1205 #ifdef MALLOC_DSS
1206 static bool     base_pages_alloc_dss(size_t minsize);
1207 #endif
1208 static bool     base_pages_alloc_mmap(size_t minsize);
1209 static bool     base_pages_alloc(size_t minsize);
1210 static void     *base_alloc(size_t size);
1211 static void     *base_calloc(size_t number, size_t size);
1212 static extent_node_t *base_node_alloc(void);
1213 static void     base_node_dealloc(extent_node_t *node);
1214 static void     *pages_map(void *addr, size_t size);
1215 static void     pages_unmap(void *addr, size_t size);
1216 #ifdef MALLOC_DSS
1217 static void     *chunk_alloc_dss(size_t size, bool *zero);
1218 static void     *chunk_recycle_dss(size_t size, bool *zero);
1219 #endif
1220 static void     *chunk_alloc_mmap_slow(size_t size, bool unaligned);
1221 static void     *chunk_alloc_mmap(size_t size);
1222 static void     *chunk_alloc(size_t size, bool *zero);
1223 #ifdef MALLOC_DSS
1224 static extent_node_t *chunk_dealloc_dss_record(void *chunk, size_t size);
1225 static bool     chunk_dealloc_dss(void *chunk, size_t size);
1226 #endif
1227 static void     chunk_dealloc_mmap(void *chunk, size_t size);
1228 static void     chunk_dealloc(void *chunk, size_t size);
1229 #ifndef NO_TLS
1230 static arena_t  *choose_arena_hard(void);
1231 #endif
1232 static void     arena_run_split(arena_t *arena, arena_run_t *run, size_t size,
1233     bool large, bool zero);
1234 static arena_chunk_t *arena_chunk_alloc(arena_t *arena);
1235 static void     arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk);
1236 static arena_run_t *arena_run_alloc(arena_t *arena, size_t size, bool large,
1237     bool zero);
1238 static void     arena_purge(arena_t *arena);
1239 static void     arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty);
1240 static void     arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk,
1241     arena_run_t *run, size_t oldsize, size_t newsize);
1242 static void     arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk,
1243     arena_run_t *run, size_t oldsize, size_t newsize, bool dirty);
1244 static arena_run_t *arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin);
1245 static void     *arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin);
1246 static size_t   arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size);
1247 #ifdef MALLOC_TCACHE
1248 static void     tcache_bin_fill(tcache_t *tcache, tcache_bin_t *tbin,
1249     size_t binind);
1250 static void     *tcache_alloc_hard(tcache_t *tcache, tcache_bin_t *tbin,
1251     size_t binind);
1252 #endif
1253 static void     *arena_malloc_medium(arena_t *arena, size_t size, bool zero);
1254 static void     *arena_malloc_large(arena_t *arena, size_t size, bool zero);
1255 static void     *arena_palloc(arena_t *arena, size_t alignment, size_t size,
1256     size_t alloc_size);
1257 static bool     arena_is_large(const void *ptr);
1258 static size_t   arena_salloc(const void *ptr);
1259 static void
1260 arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
1261     arena_bin_t *bin);
1262 #ifdef MALLOC_STATS
1263 static void     arena_stats_print(arena_t *arena);
1264 #endif
1265 static void     stats_print_atexit(void);
1266 #ifdef MALLOC_TCACHE
1267 static void     tcache_bin_flush(tcache_bin_t *tbin, size_t binind,
1268     unsigned rem);
1269 #endif
1270 static void     arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk,
1271     void *ptr);
1272 #ifdef MALLOC_TCACHE
1273 static void     arena_dalloc_hard(arena_t *arena, arena_chunk_t *chunk,
1274     void *ptr, arena_chunk_map_t *mapelm, tcache_t *tcache);
1275 #endif
1276 static void     arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk,
1277     void *ptr, size_t size, size_t oldsize);
1278 static bool     arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk,
1279     void *ptr, size_t size, size_t oldsize);
1280 static bool     arena_ralloc_large(void *ptr, size_t size, size_t oldsize);
1281 static void     *arena_ralloc(void *ptr, size_t size, size_t oldsize);
1282 static bool     arena_new(arena_t *arena, unsigned ind);
1283 static arena_t  *arenas_extend(unsigned ind);
1284 #ifdef MALLOC_TCACHE
1285 static tcache_bin_t     *tcache_bin_create(arena_t *arena);
1286 static void     tcache_bin_destroy(tcache_t *tcache, tcache_bin_t *tbin,
1287     unsigned binind);
1288 #  ifdef MALLOC_STATS
1289 static void     tcache_stats_merge(tcache_t *tcache, arena_t *arena);
1290 #  endif
1291 static tcache_t *tcache_create(arena_t *arena);
1292 static void     tcache_destroy(tcache_t *tcache);
1293 #endif
1294 static void     *huge_malloc(size_t size, bool zero);
1295 static void     *huge_palloc(size_t alignment, size_t size);
1296 static void     *huge_ralloc(void *ptr, size_t size, size_t oldsize);
1297 static void     huge_dalloc(void *ptr);
1298 static void     malloc_stats_print(void);
1299 #ifdef MALLOC_DEBUG
1300 static void     small_size2bin_validate(void);
1301 #endif
1302 static bool     small_size2bin_init(void);
1303 static bool     small_size2bin_init_hard(void);
1304 static unsigned malloc_ncpus(void);
1305 static bool     malloc_init_hard(void);
1306
1307 /*
1308  * End function prototypes.
1309  */
1310 /******************************************************************************/
1311
1312 static void
1313 wrtmessage(const char *p1, const char *p2, const char *p3, const char *p4)
1314 {
1315
1316         if (_write(STDERR_FILENO, p1, strlen(p1)) < 0
1317             || _write(STDERR_FILENO, p2, strlen(p2)) < 0
1318             || _write(STDERR_FILENO, p3, strlen(p3)) < 0
1319             || _write(STDERR_FILENO, p4, strlen(p4)) < 0)
1320                 return;
1321 }
1322
1323 void    (*_malloc_message)(const char *p1, const char *p2, const char *p3,
1324             const char *p4) = wrtmessage;
1325
1326 /*
1327  * We don't want to depend on vsnprintf() for production builds, since that can
1328  * cause unnecessary bloat for static binaries.  umax2s() provides minimal
1329  * integer printing functionality, so that malloc_printf() use can be limited to
1330  * MALLOC_STATS code.
1331  */
1332 #define UMAX2S_BUFSIZE  65
1333 static char *
1334 umax2s(uintmax_t x, unsigned base, char *s)
1335 {
1336         unsigned i;
1337
1338         i = UMAX2S_BUFSIZE - 1;
1339         s[i] = '\0';
1340         switch (base) {
1341         case 10:
1342                 do {
1343                         i--;
1344                         s[i] = "0123456789"[x % 10];
1345                         x /= 10;
1346                 } while (x > 0);
1347                 break;
1348         case 16:
1349                 do {
1350                         i--;
1351                         s[i] = "0123456789abcdef"[x & 0xf];
1352                         x >>= 4;
1353                 } while (x > 0);
1354                 break;
1355         default:
1356                 do {
1357                         i--;
1358                         s[i] = "0123456789abcdefghijklmnopqrstuvwxyz"[x % base];
1359                         x /= base;
1360                 } while (x > 0);
1361         }
1362
1363         return (&s[i]);
1364 }
1365
1366 /*
1367  * Define a custom assert() in order to reduce the chances of deadlock during
1368  * assertion failure.
1369  */
1370 #ifdef MALLOC_DEBUG
1371 #  define assert(e) do {                                                \
1372         if (!(e)) {                                                     \
1373                 char line_buf[UMAX2S_BUFSIZE];                          \
1374                 _malloc_message(_getprogname(), ": (malloc) ",          \
1375                     __FILE__, ":");                                     \
1376                 _malloc_message(umax2s(__LINE__, 10, line_buf),         \
1377                     ": Failed assertion: ", "\"", #e);                  \
1378                 _malloc_message("\"\n", "", "", "");                    \
1379                 abort();                                                \
1380         }                                                               \
1381 } while (0)
1382 #else
1383 #define assert(e)
1384 #endif
1385
1386 #ifdef MALLOC_STATS
1387 /*
1388  * Print to stderr in such a way as to (hopefully) avoid memory allocation.
1389  */
1390 static void
1391 malloc_printf(const char *format, ...)
1392 {
1393         char buf[4096];
1394         va_list ap;
1395
1396         va_start(ap, format);
1397         vsnprintf(buf, sizeof(buf), format, ap);
1398         va_end(ap);
1399         _malloc_message(buf, "", "", "");
1400 }
1401 #endif
1402
1403 /******************************************************************************/
1404 /*
1405  * Begin mutex.  We can't use normal pthread mutexes in all places, because
1406  * they require malloc()ed memory, which causes bootstrapping issues in some
1407  * cases.
1408  */
1409
1410 static void
1411 malloc_mutex_init(malloc_mutex_t *mutex)
1412 {
1413         static const spinlock_t lock = _SPINLOCK_INITIALIZER;
1414
1415         mutex->lock = lock;
1416 }
1417
1418 static inline void
1419 malloc_mutex_lock(malloc_mutex_t *mutex)
1420 {
1421
1422         if (__isthreaded)
1423                 _SPINLOCK(&mutex->lock);
1424 }
1425
1426 static inline void
1427 malloc_mutex_unlock(malloc_mutex_t *mutex)
1428 {
1429
1430         if (__isthreaded)
1431                 _SPINUNLOCK(&mutex->lock);
1432 }
1433
1434 /*
1435  * End mutex.
1436  */
1437 /******************************************************************************/
1438 /*
1439  * Begin spin lock.  Spin locks here are actually adaptive mutexes that block
1440  * after a period of spinning, because unbounded spinning would allow for
1441  * priority inversion.
1442  */
1443
1444 /*
1445  * We use an unpublished interface to initialize pthread mutexes with an
1446  * allocation callback, in order to avoid infinite recursion.
1447  */
1448 int     _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex,
1449     void *(calloc_cb)(size_t, size_t));
1450
1451 __weak_reference(_pthread_mutex_init_calloc_cb_stub,
1452     _pthread_mutex_init_calloc_cb);
1453
1454 int
1455 _pthread_mutex_init_calloc_cb_stub(pthread_mutex_t *mutex,
1456     void *(calloc_cb)(size_t, size_t))
1457 {
1458
1459         return (0);
1460 }
1461
1462 static bool
1463 malloc_spin_init(pthread_mutex_t *lock)
1464 {
1465
1466         if (_pthread_mutex_init_calloc_cb(lock, base_calloc) != 0)
1467                 return (true);
1468
1469         return (false);
1470 }
1471
1472 static inline void
1473 malloc_spin_lock(pthread_mutex_t *lock)
1474 {
1475
1476         if (__isthreaded) {
1477                 if (_pthread_mutex_trylock(lock) != 0) {
1478                         /* Exponentially back off if there are multiple CPUs. */
1479                         if (ncpus > 1) {
1480                                 unsigned i;
1481                                 volatile unsigned j;
1482
1483                                 for (i = 1; i <= LG_SPIN_LIMIT; i++) {
1484                                         for (j = 0; j < (1U << i); j++) {
1485                                                 CPU_SPINWAIT;
1486                                         }
1487
1488                                         if (_pthread_mutex_trylock(lock) == 0)
1489                                                 return;
1490                                 }
1491                         }
1492
1493                         /*
1494                          * Spinning failed.  Block until the lock becomes
1495                          * available, in order to avoid indefinite priority
1496                          * inversion.
1497                          */
1498                         _pthread_mutex_lock(lock);
1499                 }
1500         }
1501 }
1502
1503 static inline void
1504 malloc_spin_unlock(pthread_mutex_t *lock)
1505 {
1506
1507         if (__isthreaded)
1508                 _pthread_mutex_unlock(lock);
1509 }
1510
1511 /*
1512  * End spin lock.
1513  */
1514 /******************************************************************************/
1515 /*
1516  * Begin Utility functions/macros.
1517  */
1518
1519 /* Return the chunk address for allocation address a. */
1520 #define CHUNK_ADDR2BASE(a)                                              \
1521         ((void *)((uintptr_t)(a) & ~chunksize_mask))
1522
1523 /* Return the chunk offset of address a. */
1524 #define CHUNK_ADDR2OFFSET(a)                                            \
1525         ((size_t)((uintptr_t)(a) & chunksize_mask))
1526
1527 /* Return the smallest chunk multiple that is >= s. */
1528 #define CHUNK_CEILING(s)                                                \
1529         (((s) + chunksize_mask) & ~chunksize_mask)
1530
1531 /* Return the smallest quantum multiple that is >= a. */
1532 #define QUANTUM_CEILING(a)                                              \
1533         (((a) + QUANTUM_MASK) & ~QUANTUM_MASK)
1534
1535 /* Return the smallest cacheline multiple that is >= s. */
1536 #define CACHELINE_CEILING(s)                                            \
1537         (((s) + CACHELINE_MASK) & ~CACHELINE_MASK)
1538
1539 /* Return the smallest subpage multiple that is >= s. */
1540 #define SUBPAGE_CEILING(s)                                              \
1541         (((s) + SUBPAGE_MASK) & ~SUBPAGE_MASK)
1542
1543 /* Return the smallest medium size class that is >= s. */
1544 #define MEDIUM_CEILING(s)                                               \
1545         (((s) + mspace_mask) & ~mspace_mask)
1546
1547 /* Return the smallest pagesize multiple that is >= s. */
1548 #define PAGE_CEILING(s)                                                 \
1549         (((s) + PAGE_MASK) & ~PAGE_MASK)
1550
1551 #ifdef MALLOC_TINY
1552 /* Compute the smallest power of 2 that is >= x. */
1553 static size_t
1554 pow2_ceil(size_t x)
1555 {
1556
1557         x--;
1558         x |= x >> 1;
1559         x |= x >> 2;
1560         x |= x >> 4;
1561         x |= x >> 8;
1562         x |= x >> 16;
1563 #if (SIZEOF_PTR == 8)
1564         x |= x >> 32;
1565 #endif
1566         x++;
1567         return (x);
1568 }
1569 #endif
1570
1571 /******************************************************************************/
1572
1573 #ifdef MALLOC_DSS
1574 static bool
1575 base_pages_alloc_dss(size_t minsize)
1576 {
1577
1578         /*
1579          * Do special DSS allocation here, since base allocations don't need to
1580          * be chunk-aligned.
1581          */
1582         malloc_mutex_lock(&dss_mtx);
1583         if (dss_prev != (void *)-1) {
1584                 intptr_t incr;
1585                 size_t csize = CHUNK_CEILING(minsize);
1586
1587                 do {
1588                         /* Get the current end of the DSS. */
1589                         dss_max = sbrk(0);
1590
1591                         /*
1592                          * Calculate how much padding is necessary to
1593                          * chunk-align the end of the DSS.  Don't worry about
1594                          * dss_max not being chunk-aligned though.
1595                          */
1596                         incr = (intptr_t)chunksize
1597                             - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
1598                         assert(incr >= 0);
1599                         if ((size_t)incr < minsize)
1600                                 incr += csize;
1601
1602                         dss_prev = sbrk(incr);
1603                         if (dss_prev == dss_max) {
1604                                 /* Success. */
1605                                 dss_max = (void *)((intptr_t)dss_prev + incr);
1606                                 base_pages = dss_prev;
1607                                 base_next_addr = base_pages;
1608                                 base_past_addr = dss_max;
1609 #ifdef MALLOC_STATS
1610                                 base_mapped += incr;
1611 #endif
1612                                 malloc_mutex_unlock(&dss_mtx);
1613                                 return (false);
1614                         }
1615                 } while (dss_prev != (void *)-1);
1616         }
1617         malloc_mutex_unlock(&dss_mtx);
1618
1619         return (true);
1620 }
1621 #endif
1622
1623 static bool
1624 base_pages_alloc_mmap(size_t minsize)
1625 {
1626         size_t csize;
1627
1628         assert(minsize != 0);
1629         csize = PAGE_CEILING(minsize);
1630         base_pages = pages_map(NULL, csize);
1631         if (base_pages == NULL)
1632                 return (true);
1633         base_next_addr = base_pages;
1634         base_past_addr = (void *)((uintptr_t)base_pages + csize);
1635 #ifdef MALLOC_STATS
1636         base_mapped += csize;
1637 #endif
1638
1639         return (false);
1640 }
1641
1642 static bool
1643 base_pages_alloc(size_t minsize)
1644 {
1645
1646 #ifdef MALLOC_DSS
1647         if (opt_mmap && minsize != 0)
1648 #endif
1649         {
1650                 if (base_pages_alloc_mmap(minsize) == false)
1651                         return (false);
1652         }
1653
1654 #ifdef MALLOC_DSS
1655         if (opt_dss) {
1656                 if (base_pages_alloc_dss(minsize) == false)
1657                         return (false);
1658         }
1659
1660 #endif
1661
1662         return (true);
1663 }
1664
1665 static void *
1666 base_alloc(size_t size)
1667 {
1668         void *ret;
1669         size_t csize;
1670
1671         /* Round size up to nearest multiple of the cacheline size. */
1672         csize = CACHELINE_CEILING(size);
1673
1674         malloc_mutex_lock(&base_mtx);
1675         /* Make sure there's enough space for the allocation. */
1676         if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1677                 if (base_pages_alloc(csize)) {
1678                         malloc_mutex_unlock(&base_mtx);
1679                         return (NULL);
1680                 }
1681         }
1682         /* Allocate. */
1683         ret = base_next_addr;
1684         base_next_addr = (void *)((uintptr_t)base_next_addr + csize);
1685         malloc_mutex_unlock(&base_mtx);
1686
1687         return (ret);
1688 }
1689
1690 static void *
1691 base_calloc(size_t number, size_t size)
1692 {
1693         void *ret;
1694
1695         ret = base_alloc(number * size);
1696         if (ret != NULL)
1697                 memset(ret, 0, number * size);
1698
1699         return (ret);
1700 }
1701
1702 static extent_node_t *
1703 base_node_alloc(void)
1704 {
1705         extent_node_t *ret;
1706
1707         malloc_mutex_lock(&base_mtx);
1708         if (base_nodes != NULL) {
1709                 ret = base_nodes;
1710                 base_nodes = *(extent_node_t **)ret;
1711                 malloc_mutex_unlock(&base_mtx);
1712         } else {
1713                 malloc_mutex_unlock(&base_mtx);
1714                 ret = (extent_node_t *)base_alloc(sizeof(extent_node_t));
1715         }
1716
1717         return (ret);
1718 }
1719
1720 static void
1721 base_node_dealloc(extent_node_t *node)
1722 {
1723
1724         malloc_mutex_lock(&base_mtx);
1725         *(extent_node_t **)node = base_nodes;
1726         base_nodes = node;
1727         malloc_mutex_unlock(&base_mtx);
1728 }
1729
1730 /*
1731  * End Utility functions/macros.
1732  */
1733 /******************************************************************************/
1734 /*
1735  * Begin extent tree code.
1736  */
1737
1738 #ifdef MALLOC_DSS
1739 static inline int
1740 extent_szad_comp(extent_node_t *a, extent_node_t *b)
1741 {
1742         int ret;
1743         size_t a_size = a->size;
1744         size_t b_size = b->size;
1745
1746         ret = (a_size > b_size) - (a_size < b_size);
1747         if (ret == 0) {
1748                 uintptr_t a_addr = (uintptr_t)a->addr;
1749                 uintptr_t b_addr = (uintptr_t)b->addr;
1750
1751                 ret = (a_addr > b_addr) - (a_addr < b_addr);
1752         }
1753
1754         return (ret);
1755 }
1756
1757 /* Wrap red-black tree macros in functions. */
1758 rb_gen(__unused static, extent_tree_szad_, extent_tree_t, extent_node_t,
1759     link_szad, extent_szad_comp)
1760 #endif
1761
1762 static inline int
1763 extent_ad_comp(extent_node_t *a, extent_node_t *b)
1764 {
1765         uintptr_t a_addr = (uintptr_t)a->addr;
1766         uintptr_t b_addr = (uintptr_t)b->addr;
1767
1768         return ((a_addr > b_addr) - (a_addr < b_addr));
1769 }
1770
1771 /* Wrap red-black tree macros in functions. */
1772 rb_gen(__unused static, extent_tree_ad_, extent_tree_t, extent_node_t, link_ad,
1773     extent_ad_comp)
1774
1775 /*
1776  * End extent tree code.
1777  */
1778 /******************************************************************************/
1779 /*
1780  * Begin chunk management functions.
1781  */
1782
1783 static void *
1784 pages_map(void *addr, size_t size)
1785 {
1786         void *ret;
1787
1788         /*
1789          * We don't use MAP_FIXED here, because it can cause the *replacement*
1790          * of existing mappings, and we only want to create new mappings.
1791          */
1792         ret = mmap(addr, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
1793             -1, 0);
1794         assert(ret != NULL);
1795
1796         if (ret == MAP_FAILED)
1797                 ret = NULL;
1798         else if (addr != NULL && ret != addr) {
1799                 /*
1800                  * We succeeded in mapping memory, but not in the right place.
1801                  */
1802                 if (munmap(ret, size) == -1) {
1803                         char buf[STRERROR_BUF];
1804
1805                         strerror_r(errno, buf, sizeof(buf));
1806                         _malloc_message(_getprogname(),
1807                             ": (malloc) Error in munmap(): ", buf, "\n");
1808                         if (opt_abort)
1809                                 abort();
1810                 }
1811                 ret = NULL;
1812         }
1813
1814         assert(ret == NULL || (addr == NULL && ret != addr)
1815             || (addr != NULL && ret == addr));
1816         return (ret);
1817 }
1818
1819 static void
1820 pages_unmap(void *addr, size_t size)
1821 {
1822
1823         if (munmap(addr, size) == -1) {
1824                 char buf[STRERROR_BUF];
1825
1826                 strerror_r(errno, buf, sizeof(buf));
1827                 _malloc_message(_getprogname(),
1828                     ": (malloc) Error in munmap(): ", buf, "\n");
1829                 if (opt_abort)
1830                         abort();
1831         }
1832 }
1833
1834 #ifdef MALLOC_DSS
1835 static void *
1836 chunk_alloc_dss(size_t size, bool *zero)
1837 {
1838         void *ret;
1839
1840         ret = chunk_recycle_dss(size, zero);
1841         if (ret != NULL)
1842                 return (ret);
1843
1844         /*
1845          * sbrk() uses a signed increment argument, so take care not to
1846          * interpret a huge allocation request as a negative increment.
1847          */
1848         if ((intptr_t)size < 0)
1849                 return (NULL);
1850
1851         malloc_mutex_lock(&dss_mtx);
1852         if (dss_prev != (void *)-1) {
1853                 intptr_t incr;
1854
1855                 /*
1856                  * The loop is necessary to recover from races with other
1857                  * threads that are using the DSS for something other than
1858                  * malloc.
1859                  */
1860                 do {
1861                         /* Get the current end of the DSS. */
1862                         dss_max = sbrk(0);
1863
1864                         /*
1865                          * Calculate how much padding is necessary to
1866                          * chunk-align the end of the DSS.
1867                          */
1868                         incr = (intptr_t)size
1869                             - (intptr_t)CHUNK_ADDR2OFFSET(dss_max);
1870                         if (incr == (intptr_t)size)
1871                                 ret = dss_max;
1872                         else {
1873                                 ret = (void *)((intptr_t)dss_max + incr);
1874                                 incr += size;
1875                         }
1876
1877                         dss_prev = sbrk(incr);
1878                         if (dss_prev == dss_max) {
1879                                 /* Success. */
1880                                 dss_max = (void *)((intptr_t)dss_prev + incr);
1881                                 malloc_mutex_unlock(&dss_mtx);
1882                                 *zero = true;
1883                                 return (ret);
1884                         }
1885                 } while (dss_prev != (void *)-1);
1886         }
1887         malloc_mutex_unlock(&dss_mtx);
1888
1889         return (NULL);
1890 }
1891
1892 static void *
1893 chunk_recycle_dss(size_t size, bool *zero)
1894 {
1895         extent_node_t *node, key;
1896
1897         key.addr = NULL;
1898         key.size = size;
1899         malloc_mutex_lock(&dss_mtx);
1900         node = extent_tree_szad_nsearch(&dss_chunks_szad, &key);
1901         if (node != NULL) {
1902                 void *ret = node->addr;
1903
1904                 /* Remove node from the tree. */
1905                 extent_tree_szad_remove(&dss_chunks_szad, node);
1906                 if (node->size == size) {
1907                         extent_tree_ad_remove(&dss_chunks_ad, node);
1908                         base_node_dealloc(node);
1909                 } else {
1910                         /*
1911                          * Insert the remainder of node's address range as a
1912                          * smaller chunk.  Its position within dss_chunks_ad
1913                          * does not change.
1914                          */
1915                         assert(node->size > size);
1916                         node->addr = (void *)((uintptr_t)node->addr + size);
1917                         node->size -= size;
1918                         extent_tree_szad_insert(&dss_chunks_szad, node);
1919                 }
1920                 malloc_mutex_unlock(&dss_mtx);
1921
1922                 if (*zero)
1923                         memset(ret, 0, size);
1924                 return (ret);
1925         }
1926         malloc_mutex_unlock(&dss_mtx);
1927
1928         return (NULL);
1929 }
1930 #endif
1931
1932 static void *
1933 chunk_alloc_mmap_slow(size_t size, bool unaligned)
1934 {
1935         void *ret;
1936         size_t offset;
1937
1938         /* Beware size_t wrap-around. */
1939         if (size + chunksize <= size)
1940                 return (NULL);
1941
1942         ret = pages_map(NULL, size + chunksize);
1943         if (ret == NULL)
1944                 return (NULL);
1945
1946         /* Clean up unneeded leading/trailing space. */
1947         offset = CHUNK_ADDR2OFFSET(ret);
1948         if (offset != 0) {
1949                 /* Note that mmap() returned an unaligned mapping. */
1950                 unaligned = true;
1951
1952                 /* Leading space. */
1953                 pages_unmap(ret, chunksize - offset);
1954
1955                 ret = (void *)((uintptr_t)ret +
1956                     (chunksize - offset));
1957
1958                 /* Trailing space. */
1959                 pages_unmap((void *)((uintptr_t)ret + size),
1960                     offset);
1961         } else {
1962                 /* Trailing space only. */
1963                 pages_unmap((void *)((uintptr_t)ret + size),
1964                     chunksize);
1965         }
1966
1967         /*
1968          * If mmap() returned an aligned mapping, reset mmap_unaligned so that
1969          * the next chunk_alloc_mmap() execution tries the fast allocation
1970          * method.
1971          */
1972         if (unaligned == false)
1973                 mmap_unaligned = false;
1974
1975         return (ret);
1976 }
1977
1978 static void *
1979 chunk_alloc_mmap(size_t size)
1980 {
1981         void *ret;
1982
1983         /*
1984          * Ideally, there would be a way to specify alignment to mmap() (like
1985          * NetBSD has), but in the absence of such a feature, we have to work
1986          * hard to efficiently create aligned mappings.  The reliable, but
1987          * slow method is to create a mapping that is over-sized, then trim the
1988          * excess.  However, that always results in at least one call to
1989          * pages_unmap().
1990          *
1991          * A more optimistic approach is to try mapping precisely the right
1992          * amount, then try to append another mapping if alignment is off.  In
1993          * practice, this works out well as long as the application is not
1994          * interleaving mappings via direct mmap() calls.  If we do run into a
1995          * situation where there is an interleaved mapping and we are unable to
1996          * extend an unaligned mapping, our best option is to switch to the
1997          * slow method until mmap() returns another aligned mapping.  This will
1998          * tend to leave a gap in the memory map that is too small to cause
1999          * later problems for the optimistic method.
2000          *
2001          * Another possible confounding factor is address space layout
2002          * randomization (ASLR), which causes mmap(2) to disregard the
2003          * requested address.  mmap_unaligned tracks whether the previous
2004          * chunk_alloc_mmap() execution received any unaligned or relocated
2005          * mappings, and if so, the current execution will immediately fall
2006          * back to the slow method.  However, we keep track of whether the fast
2007          * method would have succeeded, and if so, we make a note to try the
2008          * fast method next time.
2009          */
2010
2011         if (mmap_unaligned == false) {
2012                 size_t offset;
2013
2014                 ret = pages_map(NULL, size);
2015                 if (ret == NULL)
2016                         return (NULL);
2017
2018                 offset = CHUNK_ADDR2OFFSET(ret);
2019                 if (offset != 0) {
2020                         mmap_unaligned = true;
2021                         /* Try to extend chunk boundary. */
2022                         if (pages_map((void *)((uintptr_t)ret + size),
2023                             chunksize - offset) == NULL) {
2024                                 /*
2025                                  * Extension failed.  Clean up, then revert to
2026                                  * the reliable-but-expensive method.
2027                                  */
2028                                 pages_unmap(ret, size);
2029                                 ret = chunk_alloc_mmap_slow(size, true);
2030                         } else {
2031                                 /* Clean up unneeded leading space. */
2032                                 pages_unmap(ret, chunksize - offset);
2033                                 ret = (void *)((uintptr_t)ret + (chunksize -
2034                                     offset));
2035                         }
2036                 }
2037         } else
2038                 ret = chunk_alloc_mmap_slow(size, false);
2039
2040         return (ret);
2041 }
2042
2043 /*
2044  * If the caller specifies (*zero == false), it is still possible to receive
2045  * zeroed memory, in which case *zero is toggled to true.  arena_chunk_alloc()
2046  * takes advantage of this to avoid demanding zeroed chunks, but taking
2047  * advantage of them if they are returned.
2048  */
2049 static void *
2050 chunk_alloc(size_t size, bool *zero)
2051 {
2052         void *ret;
2053
2054         assert(size != 0);
2055         assert((size & chunksize_mask) == 0);
2056
2057 #ifdef MALLOC_DSS
2058         if (opt_mmap)
2059 #endif
2060         {
2061                 ret = chunk_alloc_mmap(size);
2062                 if (ret != NULL) {
2063                         *zero = true;
2064                         goto RETURN;
2065                 }
2066         }
2067
2068 #ifdef MALLOC_DSS
2069         if (opt_dss) {
2070                 ret = chunk_alloc_dss(size, zero);
2071                 if (ret != NULL)
2072                         goto RETURN;
2073         }
2074 #endif
2075
2076         /* All strategies for allocation failed. */
2077         ret = NULL;
2078 RETURN:
2079 #ifdef MALLOC_STATS
2080         if (ret != NULL) {
2081                 malloc_mutex_lock(&chunks_mtx);
2082                 stats_chunks.nchunks += (size / chunksize);
2083                 stats_chunks.curchunks += (size / chunksize);
2084                 if (stats_chunks.curchunks > stats_chunks.highchunks)
2085                         stats_chunks.highchunks = stats_chunks.curchunks;
2086                 malloc_mutex_unlock(&chunks_mtx);
2087         }
2088 #endif
2089
2090         assert(CHUNK_ADDR2BASE(ret) == ret);
2091         return (ret);
2092 }
2093
2094 #ifdef MALLOC_DSS
2095 static extent_node_t *
2096 chunk_dealloc_dss_record(void *chunk, size_t size)
2097 {
2098         extent_node_t *node, *prev, key;
2099
2100         key.addr = (void *)((uintptr_t)chunk + size);
2101         node = extent_tree_ad_nsearch(&dss_chunks_ad, &key);
2102         /* Try to coalesce forward. */
2103         if (node != NULL && node->addr == key.addr) {
2104                 /*
2105                  * Coalesce chunk with the following address range.  This does
2106                  * not change the position within dss_chunks_ad, so only
2107                  * remove/insert from/into dss_chunks_szad.
2108                  */
2109                 extent_tree_szad_remove(&dss_chunks_szad, node);
2110                 node->addr = chunk;
2111                 node->size += size;
2112                 extent_tree_szad_insert(&dss_chunks_szad, node);
2113         } else {
2114                 /*
2115                  * Coalescing forward failed, so insert a new node.  Drop
2116                  * dss_mtx during node allocation, since it is possible that a
2117                  * new base chunk will be allocated.
2118                  */
2119                 malloc_mutex_unlock(&dss_mtx);
2120                 node = base_node_alloc();
2121                 malloc_mutex_lock(&dss_mtx);
2122                 if (node == NULL)
2123                         return (NULL);
2124                 node->addr = chunk;
2125                 node->size = size;
2126                 extent_tree_ad_insert(&dss_chunks_ad, node);
2127                 extent_tree_szad_insert(&dss_chunks_szad, node);
2128         }
2129
2130         /* Try to coalesce backward. */
2131         prev = extent_tree_ad_prev(&dss_chunks_ad, node);
2132         if (prev != NULL && (void *)((uintptr_t)prev->addr + prev->size) ==
2133             chunk) {
2134                 /*
2135                  * Coalesce chunk with the previous address range.  This does
2136                  * not change the position within dss_chunks_ad, so only
2137                  * remove/insert node from/into dss_chunks_szad.
2138                  */
2139                 extent_tree_szad_remove(&dss_chunks_szad, prev);
2140                 extent_tree_ad_remove(&dss_chunks_ad, prev);
2141
2142                 extent_tree_szad_remove(&dss_chunks_szad, node);
2143                 node->addr = prev->addr;
2144                 node->size += prev->size;
2145                 extent_tree_szad_insert(&dss_chunks_szad, node);
2146
2147                 base_node_dealloc(prev);
2148         }
2149
2150         return (node);
2151 }
2152
2153 static bool
2154 chunk_dealloc_dss(void *chunk, size_t size)
2155 {
2156         bool ret;
2157
2158         malloc_mutex_lock(&dss_mtx);
2159         if ((uintptr_t)chunk >= (uintptr_t)dss_base
2160             && (uintptr_t)chunk < (uintptr_t)dss_max) {
2161                 extent_node_t *node;
2162
2163                 /* Try to coalesce with other unused chunks. */
2164                 node = chunk_dealloc_dss_record(chunk, size);
2165                 if (node != NULL) {
2166                         chunk = node->addr;
2167                         size = node->size;
2168                 }
2169
2170                 /* Get the current end of the DSS. */
2171                 dss_max = sbrk(0);
2172
2173                 /*
2174                  * Try to shrink the DSS if this chunk is at the end of the
2175                  * DSS.  The sbrk() call here is subject to a race condition
2176                  * with threads that use brk(2) or sbrk(2) directly, but the
2177                  * alternative would be to leak memory for the sake of poorly
2178                  * designed multi-threaded programs.
2179                  */
2180                 if ((void *)((uintptr_t)chunk + size) == dss_max
2181                     && (dss_prev = sbrk(-(intptr_t)size)) == dss_max) {
2182                         /* Success. */
2183                         dss_max = (void *)((intptr_t)dss_prev - (intptr_t)size);
2184
2185                         if (node != NULL) {
2186                                 extent_tree_szad_remove(&dss_chunks_szad, node);
2187                                 extent_tree_ad_remove(&dss_chunks_ad, node);
2188                                 base_node_dealloc(node);
2189                         }
2190                 } else
2191                         madvise(chunk, size, MADV_FREE);
2192
2193                 ret = false;
2194                 goto RETURN;
2195         }
2196
2197         ret = true;
2198 RETURN:
2199         malloc_mutex_unlock(&dss_mtx);
2200         return (ret);
2201 }
2202 #endif
2203
2204 static void
2205 chunk_dealloc_mmap(void *chunk, size_t size)
2206 {
2207
2208         pages_unmap(chunk, size);
2209 }
2210
2211 static void
2212 chunk_dealloc(void *chunk, size_t size)
2213 {
2214
2215         assert(chunk != NULL);
2216         assert(CHUNK_ADDR2BASE(chunk) == chunk);
2217         assert(size != 0);
2218         assert((size & chunksize_mask) == 0);
2219
2220 #ifdef MALLOC_STATS
2221         malloc_mutex_lock(&chunks_mtx);
2222         stats_chunks.curchunks -= (size / chunksize);
2223         malloc_mutex_unlock(&chunks_mtx);
2224 #endif
2225
2226 #ifdef MALLOC_DSS
2227         if (opt_dss) {
2228                 if (chunk_dealloc_dss(chunk, size) == false)
2229                         return;
2230         }
2231
2232         if (opt_mmap)
2233 #endif
2234                 chunk_dealloc_mmap(chunk, size);
2235 }
2236
2237 /*
2238  * End chunk management functions.
2239  */
2240 /******************************************************************************/
2241 /*
2242  * Begin arena.
2243  */
2244
2245 /*
2246  * Choose an arena based on a per-thread value (fast-path code, calls slow-path
2247  * code if necessary).
2248  */
2249 static inline arena_t *
2250 choose_arena(void)
2251 {
2252         arena_t *ret;
2253
2254         /*
2255          * We can only use TLS if this is a PIC library, since for the static
2256          * library version, libc's malloc is used by TLS allocation, which
2257          * introduces a bootstrapping issue.
2258          */
2259 #ifndef NO_TLS
2260         if (__isthreaded == false) {
2261             /* Avoid the overhead of TLS for single-threaded operation. */
2262             return (arenas[0]);
2263         }
2264
2265         ret = arenas_map;
2266         if (ret == NULL) {
2267                 ret = choose_arena_hard();
2268                 assert(ret != NULL);
2269         }
2270 #else
2271         if (__isthreaded && narenas > 1) {
2272                 unsigned long ind;
2273
2274                 /*
2275                  * Hash _pthread_self() to one of the arenas.  There is a prime
2276                  * number of arenas, so this has a reasonable chance of
2277                  * working.  Even so, the hashing can be easily thwarted by
2278                  * inconvenient _pthread_self() values.  Without specific
2279                  * knowledge of how _pthread_self() calculates values, we can't
2280                  * easily do much better than this.
2281                  */
2282                 ind = (unsigned long) _pthread_self() % narenas;
2283
2284                 /*
2285                  * Optimistially assume that arenas[ind] has been initialized.
2286                  * At worst, we find out that some other thread has already
2287                  * done so, after acquiring the lock in preparation.  Note that
2288                  * this lazy locking also has the effect of lazily forcing
2289                  * cache coherency; without the lock acquisition, there's no
2290                  * guarantee that modification of arenas[ind] by another thread
2291                  * would be seen on this CPU for an arbitrary amount of time.
2292                  *
2293                  * In general, this approach to modifying a synchronized value
2294                  * isn't a good idea, but in this case we only ever modify the
2295                  * value once, so things work out well.
2296                  */
2297                 ret = arenas[ind];
2298                 if (ret == NULL) {
2299                         /*
2300                          * Avoid races with another thread that may have already
2301                          * initialized arenas[ind].
2302                          */
2303                         malloc_spin_lock(&arenas_lock);
2304                         if (arenas[ind] == NULL)
2305                                 ret = arenas_extend((unsigned)ind);
2306                         else
2307                                 ret = arenas[ind];
2308                         malloc_spin_unlock(&arenas_lock);
2309                 }
2310         } else
2311                 ret = arenas[0];
2312 #endif
2313
2314         assert(ret != NULL);
2315         return (ret);
2316 }
2317
2318 #ifndef NO_TLS
2319 /*
2320  * Choose an arena based on a per-thread value (slow-path code only, called
2321  * only by choose_arena()).
2322  */
2323 static arena_t *
2324 choose_arena_hard(void)
2325 {
2326         arena_t *ret;
2327
2328         assert(__isthreaded);
2329
2330         if (narenas > 1) {
2331                 malloc_spin_lock(&arenas_lock);
2332                 if ((ret = arenas[next_arena]) == NULL)
2333                         ret = arenas_extend(next_arena);
2334                 next_arena = (next_arena + 1) % narenas;
2335                 malloc_spin_unlock(&arenas_lock);
2336         } else
2337                 ret = arenas[0];
2338
2339         arenas_map = ret;
2340
2341         return (ret);
2342 }
2343 #endif
2344
2345 static inline int
2346 arena_chunk_comp(arena_chunk_t *a, arena_chunk_t *b)
2347 {
2348         uintptr_t a_chunk = (uintptr_t)a;
2349         uintptr_t b_chunk = (uintptr_t)b;
2350
2351         assert(a != NULL);
2352         assert(b != NULL);
2353
2354         return ((a_chunk > b_chunk) - (a_chunk < b_chunk));
2355 }
2356
2357 /* Wrap red-black tree macros in functions. */
2358 rb_gen(__unused static, arena_chunk_tree_dirty_, arena_chunk_tree_t,
2359     arena_chunk_t, link_dirty, arena_chunk_comp)
2360
2361 static inline int
2362 arena_run_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
2363 {
2364         uintptr_t a_mapelm = (uintptr_t)a;
2365         uintptr_t b_mapelm = (uintptr_t)b;
2366
2367         assert(a != NULL);
2368         assert(b != NULL);
2369
2370         return ((a_mapelm > b_mapelm) - (a_mapelm < b_mapelm));
2371 }
2372
2373 /* Wrap red-black tree macros in functions. */
2374 rb_gen(__unused static, arena_run_tree_, arena_run_tree_t, arena_chunk_map_t,
2375     link, arena_run_comp)
2376
2377 static inline int
2378 arena_avail_comp(arena_chunk_map_t *a, arena_chunk_map_t *b)
2379 {
2380         int ret;
2381         size_t a_size = a->bits & ~PAGE_MASK;
2382         size_t b_size = b->bits & ~PAGE_MASK;
2383
2384         ret = (a_size > b_size) - (a_size < b_size);
2385         if (ret == 0) {
2386                 uintptr_t a_mapelm, b_mapelm;
2387
2388                 if ((a->bits & CHUNK_MAP_KEY) != CHUNK_MAP_KEY)
2389                         a_mapelm = (uintptr_t)a;
2390                 else {
2391                         /*
2392                          * Treat keys as though they are lower than anything
2393                          * else.
2394                          */
2395                         a_mapelm = 0;
2396                 }
2397                 b_mapelm = (uintptr_t)b;
2398
2399                 ret = (a_mapelm > b_mapelm) - (a_mapelm < b_mapelm);
2400         }
2401
2402         return (ret);
2403 }
2404
2405 /* Wrap red-black tree macros in functions. */
2406 rb_gen(__unused static, arena_avail_tree_, arena_avail_tree_t,
2407     arena_chunk_map_t, link, arena_avail_comp)
2408
2409 static inline void
2410 arena_run_rc_incr(arena_run_t *run, arena_bin_t *bin, const void *ptr)
2411 {
2412         arena_chunk_t *chunk;
2413         arena_t *arena;
2414         size_t pagebeg, pageend, i;
2415
2416         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2417         arena = chunk->arena;
2418         pagebeg = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
2419         pageend = ((uintptr_t)ptr + (uintptr_t)(bin->reg_size - 1) -
2420             (uintptr_t)chunk) >> PAGE_SHIFT;
2421
2422         for (i = pagebeg; i <= pageend; i++) {
2423                 size_t mapbits = chunk->map[i].bits;
2424
2425                 if (mapbits & CHUNK_MAP_DIRTY) {
2426                         assert((mapbits & CHUNK_MAP_RC_MASK) == 0);
2427                         chunk->ndirty--;
2428                         arena->ndirty--;
2429                         mapbits ^= CHUNK_MAP_DIRTY;
2430                 }
2431                 assert((mapbits & CHUNK_MAP_RC_MASK) != CHUNK_MAP_RC_MASK);
2432                 mapbits += CHUNK_MAP_RC_ONE;
2433                 chunk->map[i].bits = mapbits;
2434         }
2435 }
2436
2437 static inline void
2438 arena_run_rc_decr(arena_run_t *run, arena_bin_t *bin, const void *ptr)
2439 {
2440         arena_chunk_t *chunk;
2441         arena_t *arena;
2442         size_t pagebeg, pageend, mapbits, i;
2443         bool dirtier = false;
2444
2445         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
2446         arena = chunk->arena;
2447         pagebeg = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
2448         pageend = ((uintptr_t)ptr + (uintptr_t)(bin->reg_size - 1) -
2449             (uintptr_t)chunk) >> PAGE_SHIFT;
2450
2451         /* First page. */
2452         mapbits = chunk->map[pagebeg].bits;
2453         mapbits -= CHUNK_MAP_RC_ONE;
2454         if ((mapbits & CHUNK_MAP_RC_MASK) == 0) {
2455                 dirtier = true;
2456                 assert((mapbits & CHUNK_MAP_DIRTY) == 0);
2457                 mapbits |= CHUNK_MAP_DIRTY;
2458                 chunk->ndirty++;
2459                 arena->ndirty++;
2460         }
2461         chunk->map[pagebeg].bits = mapbits;
2462
2463         if (pageend - pagebeg >= 1) {
2464                 /*
2465                  * Interior pages are completely consumed by the object being
2466                  * deallocated, which means that the pages can be
2467                  * unconditionally marked dirty.
2468                  */
2469                 for (i = pagebeg + 1; i < pageend; i++) {
2470                         mapbits = chunk->map[i].bits;
2471                         mapbits -= CHUNK_MAP_RC_ONE;
2472                         assert((mapbits & CHUNK_MAP_RC_MASK) == 0);
2473                         dirtier = true;
2474                         assert((mapbits & CHUNK_MAP_DIRTY) == 0);
2475                         mapbits |= CHUNK_MAP_DIRTY;
2476                         chunk->ndirty++;
2477                         arena->ndirty++;
2478                         chunk->map[i].bits = mapbits;
2479                 }
2480
2481                 /* Last page. */
2482                 mapbits = chunk->map[pageend].bits;
2483                 mapbits -= CHUNK_MAP_RC_ONE;
2484                 if ((mapbits & CHUNK_MAP_RC_MASK) == 0) {
2485                         dirtier = true;
2486                         assert((mapbits & CHUNK_MAP_DIRTY) == 0);
2487                         mapbits |= CHUNK_MAP_DIRTY;
2488                         chunk->ndirty++;
2489                         arena->ndirty++;
2490                 }
2491                 chunk->map[pageend].bits = mapbits;
2492         }
2493
2494         if (dirtier) {
2495                 if (chunk->dirtied == false) {
2496                         arena_chunk_tree_dirty_insert(&arena->chunks_dirty,
2497                             chunk);
2498                         chunk->dirtied = true;
2499                 }
2500
2501                 /* Enforce opt_lg_dirty_mult. */
2502                 if (opt_lg_dirty_mult >= 0 && (arena->nactive >>
2503                     opt_lg_dirty_mult) < arena->ndirty)
2504                         arena_purge(arena);
2505         }
2506 }
2507
2508 static inline void *
2509 arena_run_reg_alloc(arena_run_t *run, arena_bin_t *bin)
2510 {
2511         void *ret;
2512         unsigned i, mask, bit, regind;
2513
2514         assert(run->magic == ARENA_RUN_MAGIC);
2515         assert(run->regs_minelm < bin->regs_mask_nelms);
2516
2517         /*
2518          * Move the first check outside the loop, so that run->regs_minelm can
2519          * be updated unconditionally, without the possibility of updating it
2520          * multiple times.
2521          */
2522         i = run->regs_minelm;
2523         mask = run->regs_mask[i];
2524         if (mask != 0) {
2525                 /* Usable allocation found. */
2526                 bit = ffs((int)mask) - 1;
2527
2528                 regind = ((i << (LG_SIZEOF_INT + 3)) + bit);
2529                 assert(regind < bin->nregs);
2530                 ret = (void *)(((uintptr_t)run) + bin->reg0_offset
2531                     + (bin->reg_size * regind));
2532
2533                 /* Clear bit. */
2534                 mask ^= (1U << bit);
2535                 run->regs_mask[i] = mask;
2536
2537                 arena_run_rc_incr(run, bin, ret);
2538
2539                 return (ret);
2540         }
2541
2542         for (i++; i < bin->regs_mask_nelms; i++) {
2543                 mask = run->regs_mask[i];
2544                 if (mask != 0) {
2545                         /* Usable allocation found. */
2546                         bit = ffs((int)mask) - 1;
2547
2548                         regind = ((i << (LG_SIZEOF_INT + 3)) + bit);
2549                         assert(regind < bin->nregs);
2550                         ret = (void *)(((uintptr_t)run) + bin->reg0_offset
2551                             + (bin->reg_size * regind));
2552
2553                         /* Clear bit. */
2554                         mask ^= (1U << bit);
2555                         run->regs_mask[i] = mask;
2556
2557                         /*
2558                          * Make a note that nothing before this element
2559                          * contains a free region.
2560                          */
2561                         run->regs_minelm = i; /* Low payoff: + (mask == 0); */
2562
2563                         arena_run_rc_incr(run, bin, ret);
2564
2565                         return (ret);
2566                 }
2567         }
2568         /* Not reached. */
2569         assert(0);
2570         return (NULL);
2571 }
2572
2573 static inline void
2574 arena_run_reg_dalloc(arena_run_t *run, arena_bin_t *bin, void *ptr, size_t size)
2575 {
2576         unsigned shift, diff, regind, elm, bit;
2577
2578         assert(run->magic == ARENA_RUN_MAGIC);
2579
2580         /*
2581          * Avoid doing division with a variable divisor if possible.  Using
2582          * actual division here can reduce allocator throughput by over 20%!
2583          */
2584         diff = (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->reg0_offset);
2585
2586         /* Rescale (factor powers of 2 out of the numerator and denominator). */
2587         shift = ffs(size) - 1;
2588         diff >>= shift;
2589         size >>= shift;
2590
2591         if (size == 1) {
2592                 /* The divisor was a power of 2. */
2593                 regind = diff;
2594         } else {
2595                 /*
2596                  * To divide by a number D that is not a power of two we
2597                  * multiply by (2^21 / D) and then right shift by 21 positions.
2598                  *
2599                  *   X / D
2600                  *
2601                  * becomes
2602                  *
2603                  *   (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
2604                  *
2605                  * We can omit the first three elements, because we never
2606                  * divide by 0, and 1 and 2 are both powers of two, which are
2607                  * handled above.
2608                  */
2609 #define SIZE_INV_SHIFT 21
2610 #define SIZE_INV(s) (((1U << SIZE_INV_SHIFT) / (s)) + 1)
2611                 static const unsigned size_invs[] = {
2612                     SIZE_INV(3),
2613                     SIZE_INV(4), SIZE_INV(5), SIZE_INV(6), SIZE_INV(7),
2614                     SIZE_INV(8), SIZE_INV(9), SIZE_INV(10), SIZE_INV(11),
2615                     SIZE_INV(12), SIZE_INV(13), SIZE_INV(14), SIZE_INV(15),
2616                     SIZE_INV(16), SIZE_INV(17), SIZE_INV(18), SIZE_INV(19),
2617                     SIZE_INV(20), SIZE_INV(21), SIZE_INV(22), SIZE_INV(23),
2618                     SIZE_INV(24), SIZE_INV(25), SIZE_INV(26), SIZE_INV(27),
2619                     SIZE_INV(28), SIZE_INV(29), SIZE_INV(30), SIZE_INV(31)
2620                 };
2621
2622                 if (size <= ((sizeof(size_invs) / sizeof(unsigned)) + 2))
2623                         regind = (diff * size_invs[size - 3]) >> SIZE_INV_SHIFT;
2624                 else
2625                         regind = diff / size;
2626 #undef SIZE_INV
2627 #undef SIZE_INV_SHIFT
2628         }
2629         assert(diff == regind * size);
2630         assert(regind < bin->nregs);
2631
2632         elm = regind >> (LG_SIZEOF_INT + 3);
2633         if (elm < run->regs_minelm)
2634                 run->regs_minelm = elm;
2635         bit = regind - (elm << (LG_SIZEOF_INT + 3));
2636         assert((run->regs_mask[elm] & (1U << bit)) == 0);
2637         run->regs_mask[elm] |= (1U << bit);
2638
2639         arena_run_rc_decr(run, bin, ptr);
2640 }
2641
2642 static void
2643 arena_run_split(arena_t *arena, arena_run_t *run, size_t size, bool large,
2644     bool zero)
2645 {
2646         arena_chunk_t *chunk;
2647         size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i;
2648
2649         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2650         old_ndirty = chunk->ndirty;
2651         run_ind = (unsigned)(((uintptr_t)run - (uintptr_t)chunk)
2652             >> PAGE_SHIFT);
2653         total_pages = (chunk->map[run_ind].bits & ~PAGE_MASK) >>
2654             PAGE_SHIFT;
2655         need_pages = (size >> PAGE_SHIFT);
2656         assert(need_pages > 0);
2657         assert(need_pages <= total_pages);
2658         rem_pages = total_pages - need_pages;
2659
2660         arena_avail_tree_remove(&arena->runs_avail, &chunk->map[run_ind]);
2661         arena->nactive += need_pages;
2662
2663         /* Keep track of trailing unused pages for later use. */
2664         if (rem_pages > 0) {
2665                 chunk->map[run_ind+need_pages].bits = (rem_pages <<
2666                     PAGE_SHIFT) | (chunk->map[run_ind+need_pages].bits &
2667                     CHUNK_MAP_FLAGS_MASK);
2668                 chunk->map[run_ind+total_pages-1].bits = (rem_pages <<
2669                     PAGE_SHIFT) | (chunk->map[run_ind+total_pages-1].bits &
2670                     CHUNK_MAP_FLAGS_MASK);
2671                 arena_avail_tree_insert(&arena->runs_avail,
2672                     &chunk->map[run_ind+need_pages]);
2673         }
2674
2675         for (i = 0; i < need_pages; i++) {
2676                 /* Zero if necessary. */
2677                 if (zero) {
2678                         if ((chunk->map[run_ind + i].bits & CHUNK_MAP_ZEROED)
2679                             == 0) {
2680                                 memset((void *)((uintptr_t)chunk + ((run_ind
2681                                     + i) << PAGE_SHIFT)), 0, PAGE_SIZE);
2682                                 /* CHUNK_MAP_ZEROED is cleared below. */
2683                         }
2684                 }
2685
2686                 /* Update dirty page accounting. */
2687                 if (chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY) {
2688                         chunk->ndirty--;
2689                         arena->ndirty--;
2690                         /* CHUNK_MAP_DIRTY is cleared below. */
2691                 }
2692
2693                 /* Initialize the chunk map. */
2694                 if (large) {
2695                         chunk->map[run_ind + i].bits = CHUNK_MAP_LARGE
2696                             | CHUNK_MAP_ALLOCATED;
2697                 } else {
2698                         chunk->map[run_ind + i].bits = (i << CHUNK_MAP_PG_SHIFT)
2699                             | CHUNK_MAP_ALLOCATED;
2700                 }
2701         }
2702
2703         if (large) {
2704                 /*
2705                  * Set the run size only in the first element for large runs.
2706                  * This is primarily a debugging aid, since the lack of size
2707                  * info for trailing pages only matters if the application
2708                  * tries to operate on an interior pointer.
2709                  */
2710                 chunk->map[run_ind].bits |= size;
2711         } else {
2712                 /*
2713                  * Initialize the first page's refcount to 1, so that the run
2714                  * header is protected from dirty page purging.
2715                  */
2716                 chunk->map[run_ind].bits += CHUNK_MAP_RC_ONE;
2717         }
2718 }
2719
2720 static arena_chunk_t *
2721 arena_chunk_alloc(arena_t *arena)
2722 {
2723         arena_chunk_t *chunk;
2724         size_t i;
2725
2726         if (arena->spare != NULL) {
2727                 chunk = arena->spare;
2728                 arena->spare = NULL;
2729         } else {
2730                 bool zero;
2731                 size_t zeroed;
2732
2733                 zero = false;
2734                 chunk = (arena_chunk_t *)chunk_alloc(chunksize, &zero);
2735                 if (chunk == NULL)
2736                         return (NULL);
2737 #ifdef MALLOC_STATS
2738                 arena->stats.mapped += chunksize;
2739 #endif
2740
2741                 chunk->arena = arena;
2742                 chunk->dirtied = false;
2743
2744                 /*
2745                  * Claim that no pages are in use, since the header is merely
2746                  * overhead.
2747                  */
2748                 chunk->ndirty = 0;
2749
2750                 /*
2751                  * Initialize the map to contain one maximal free untouched run.
2752                  * Mark the pages as zeroed iff chunk_alloc() returned a zeroed
2753                  * chunk.
2754                  */
2755                 zeroed = zero ? CHUNK_MAP_ZEROED : 0;
2756                 for (i = 0; i < arena_chunk_header_npages; i++)
2757                         chunk->map[i].bits = 0;
2758                 chunk->map[i].bits = arena_maxclass | zeroed;
2759                 for (i++; i < chunk_npages-1; i++)
2760                         chunk->map[i].bits = zeroed;
2761                 chunk->map[chunk_npages-1].bits = arena_maxclass | zeroed;
2762         }
2763
2764         /* Insert the run into the runs_avail tree. */
2765         arena_avail_tree_insert(&arena->runs_avail,
2766             &chunk->map[arena_chunk_header_npages]);
2767
2768         return (chunk);
2769 }
2770
2771 static void
2772 arena_chunk_dealloc(arena_t *arena, arena_chunk_t *chunk)
2773 {
2774
2775         if (arena->spare != NULL) {
2776                 if (arena->spare->dirtied) {
2777                         arena_chunk_tree_dirty_remove(
2778                             &chunk->arena->chunks_dirty, arena->spare);
2779                         arena->ndirty -= arena->spare->ndirty;
2780                 }
2781                 chunk_dealloc((void *)arena->spare, chunksize);
2782 #ifdef MALLOC_STATS
2783                 arena->stats.mapped -= chunksize;
2784 #endif
2785         }
2786
2787         /*
2788          * Remove run from runs_avail, regardless of whether this chunk
2789          * will be cached, so that the arena does not use it.  Dirty page
2790          * flushing only uses the chunks_dirty tree, so leaving this chunk in
2791          * the chunks_* trees is sufficient for that purpose.
2792          */
2793         arena_avail_tree_remove(&arena->runs_avail,
2794             &chunk->map[arena_chunk_header_npages]);
2795
2796         arena->spare = chunk;
2797 }
2798
2799 static arena_run_t *
2800 arena_run_alloc(arena_t *arena, size_t size, bool large, bool zero)
2801 {
2802         arena_chunk_t *chunk;
2803         arena_run_t *run;
2804         arena_chunk_map_t *mapelm, key;
2805
2806         assert(size <= arena_maxclass);
2807         assert((size & PAGE_MASK) == 0);
2808
2809         /* Search the arena's chunks for the lowest best fit. */
2810         key.bits = size | CHUNK_MAP_KEY;
2811         mapelm = arena_avail_tree_nsearch(&arena->runs_avail, &key);
2812         if (mapelm != NULL) {
2813                 arena_chunk_t *run_chunk = CHUNK_ADDR2BASE(mapelm);
2814                 size_t pageind = ((uintptr_t)mapelm - (uintptr_t)run_chunk->map)
2815                     / sizeof(arena_chunk_map_t);
2816
2817                 run = (arena_run_t *)((uintptr_t)run_chunk + (pageind
2818                     << PAGE_SHIFT));
2819                 arena_run_split(arena, run, size, large, zero);
2820                 return (run);
2821         }
2822
2823         /*
2824          * No usable runs.  Create a new chunk from which to allocate the run.
2825          */
2826         chunk = arena_chunk_alloc(arena);
2827         if (chunk == NULL)
2828                 return (NULL);
2829         run = (arena_run_t *)((uintptr_t)chunk + (arena_chunk_header_npages <<
2830             PAGE_SHIFT));
2831         /* Update page map. */
2832         arena_run_split(arena, run, size, large, zero);
2833         return (run);
2834 }
2835
2836 #ifdef MALLOC_DEBUG
2837 static arena_chunk_t *
2838 chunks_dirty_iter_cb(arena_chunk_tree_t *tree, arena_chunk_t *chunk, void *arg)
2839 {
2840         size_t *ndirty = (size_t *)arg;
2841
2842         assert(chunk->dirtied);
2843         *ndirty += chunk->ndirty;
2844         return (NULL);
2845 }
2846 #endif
2847
2848 static void
2849 arena_purge(arena_t *arena)
2850 {
2851         arena_chunk_t *chunk;
2852         size_t i, npages;
2853 #ifdef MALLOC_DEBUG
2854         size_t ndirty = 0;
2855
2856         arena_chunk_tree_dirty_iter(&arena->chunks_dirty, NULL,
2857             chunks_dirty_iter_cb, (void *)&ndirty);
2858         assert(ndirty == arena->ndirty);
2859 #endif
2860         assert((arena->nactive >> opt_lg_dirty_mult) < arena->ndirty);
2861
2862 #ifdef MALLOC_STATS
2863         arena->stats.npurge++;
2864 #endif
2865
2866         /*
2867          * Iterate downward through chunks until enough dirty memory has been
2868          * purged.  Terminate as soon as possible in order to minimize the
2869          * number of system calls, even if a chunk has only been partially
2870          * purged.
2871          */
2872
2873         while ((arena->nactive >> (opt_lg_dirty_mult + 1)) < arena->ndirty) {
2874                 chunk = arena_chunk_tree_dirty_last(&arena->chunks_dirty);
2875                 assert(chunk != NULL);
2876
2877                 for (i = chunk_npages - 1; chunk->ndirty > 0; i--) {
2878                         assert(i >= arena_chunk_header_npages);
2879                         if (chunk->map[i].bits & CHUNK_MAP_DIRTY) {
2880                                 chunk->map[i].bits ^= CHUNK_MAP_DIRTY;
2881                                 /* Find adjacent dirty run(s). */
2882                                 for (npages = 1; i > arena_chunk_header_npages
2883                                     && (chunk->map[i - 1].bits &
2884                                     CHUNK_MAP_DIRTY); npages++) {
2885                                         i--;
2886                                         chunk->map[i].bits ^= CHUNK_MAP_DIRTY;
2887                                 }
2888                                 chunk->ndirty -= npages;
2889                                 arena->ndirty -= npages;
2890
2891                                 madvise((void *)((uintptr_t)chunk + (i <<
2892                                     PAGE_SHIFT)), (npages << PAGE_SHIFT),
2893                                     MADV_FREE);
2894 #ifdef MALLOC_STATS
2895                                 arena->stats.nmadvise++;
2896                                 arena->stats.purged += npages;
2897 #endif
2898                                 if ((arena->nactive >> (opt_lg_dirty_mult + 1))
2899                                     >= arena->ndirty)
2900                                         break;
2901                         }
2902                 }
2903
2904                 if (chunk->ndirty == 0) {
2905                         arena_chunk_tree_dirty_remove(&arena->chunks_dirty,
2906                             chunk);
2907                         chunk->dirtied = false;
2908                 }
2909         }
2910 }
2911
2912 static void
2913 arena_run_dalloc(arena_t *arena, arena_run_t *run, bool dirty)
2914 {
2915         arena_chunk_t *chunk;
2916         size_t size, run_ind, run_pages;
2917
2918         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(run);
2919         run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk)
2920             >> PAGE_SHIFT);
2921         assert(run_ind >= arena_chunk_header_npages);
2922         assert(run_ind < chunk_npages);
2923         if ((chunk->map[run_ind].bits & CHUNK_MAP_LARGE) != 0)
2924                 size = chunk->map[run_ind].bits & ~PAGE_MASK;
2925         else
2926                 size = run->bin->run_size;
2927         run_pages = (size >> PAGE_SHIFT);
2928         arena->nactive -= run_pages;
2929
2930         /* Mark pages as unallocated in the chunk map. */
2931         if (dirty) {
2932                 size_t i;
2933
2934                 for (i = 0; i < run_pages; i++) {
2935                         /*
2936                          * When (dirty == true), *all* pages within the run
2937                          * need to have their dirty bits set, because only
2938                          * small runs can create a mixture of clean/dirty
2939                          * pages, but such runs are passed to this function
2940                          * with (dirty == false).
2941                          */
2942                         assert((chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY)
2943                             == 0);
2944                         chunk->ndirty++;
2945                         arena->ndirty++;
2946                         chunk->map[run_ind + i].bits = CHUNK_MAP_DIRTY;
2947                 }
2948         } else {
2949                 size_t i;
2950
2951                 for (i = 0; i < run_pages; i++) {
2952                         chunk->map[run_ind + i].bits &= ~(CHUNK_MAP_LARGE |
2953                             CHUNK_MAP_ALLOCATED);
2954                 }
2955         }
2956         chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2957             CHUNK_MAP_FLAGS_MASK);
2958         chunk->map[run_ind+run_pages-1].bits = size |
2959             (chunk->map[run_ind+run_pages-1].bits & CHUNK_MAP_FLAGS_MASK);
2960
2961         /* Try to coalesce forward. */
2962         if (run_ind + run_pages < chunk_npages &&
2963             (chunk->map[run_ind+run_pages].bits & CHUNK_MAP_ALLOCATED) == 0) {
2964                 size_t nrun_size = chunk->map[run_ind+run_pages].bits &
2965                     ~PAGE_MASK;
2966
2967                 /*
2968                  * Remove successor from runs_avail; the coalesced run is
2969                  * inserted later.
2970                  */
2971                 arena_avail_tree_remove(&arena->runs_avail,
2972                     &chunk->map[run_ind+run_pages]);
2973
2974                 size += nrun_size;
2975                 run_pages = size >> PAGE_SHIFT;
2976
2977                 assert((chunk->map[run_ind+run_pages-1].bits & ~PAGE_MASK)
2978                     == nrun_size);
2979                 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
2980                     CHUNK_MAP_FLAGS_MASK);
2981                 chunk->map[run_ind+run_pages-1].bits = size |
2982                     (chunk->map[run_ind+run_pages-1].bits &
2983                     CHUNK_MAP_FLAGS_MASK);
2984         }
2985
2986         /* Try to coalesce backward. */
2987         if (run_ind > arena_chunk_header_npages && (chunk->map[run_ind-1].bits &
2988             CHUNK_MAP_ALLOCATED) == 0) {
2989                 size_t prun_size = chunk->map[run_ind-1].bits & ~PAGE_MASK;
2990
2991                 run_ind -= prun_size >> PAGE_SHIFT;
2992
2993                 /*
2994                  * Remove predecessor from runs_avail; the coalesced run is
2995                  * inserted later.
2996                  */
2997                 arena_avail_tree_remove(&arena->runs_avail,
2998                     &chunk->map[run_ind]);
2999
3000                 size += prun_size;
3001                 run_pages = size >> PAGE_SHIFT;
3002
3003                 assert((chunk->map[run_ind].bits & ~PAGE_MASK) == prun_size);
3004                 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits &
3005                     CHUNK_MAP_FLAGS_MASK);
3006                 chunk->map[run_ind+run_pages-1].bits = size |
3007                     (chunk->map[run_ind+run_pages-1].bits &
3008                     CHUNK_MAP_FLAGS_MASK);
3009         }
3010
3011         /* Insert into runs_avail, now that coalescing is complete. */
3012         arena_avail_tree_insert(&arena->runs_avail, &chunk->map[run_ind]);
3013
3014         /*
3015          * Deallocate chunk if it is now completely unused.  The bit
3016          * manipulation checks whether the first run is unallocated and extends
3017          * to the end of the chunk.
3018          */
3019         if ((chunk->map[arena_chunk_header_npages].bits & (~PAGE_MASK |
3020             CHUNK_MAP_ALLOCATED)) == arena_maxclass)
3021                 arena_chunk_dealloc(arena, chunk);
3022
3023         /*
3024          * It is okay to do dirty page processing even if the chunk was
3025          * deallocated above, since in that case it is the spare.  Waiting
3026          * until after possible chunk deallocation to do dirty processing
3027          * allows for an old spare to be fully deallocated, thus decreasing the
3028          * chances of spuriously crossing the dirty page purging threshold.
3029          */
3030         if (dirty) {
3031                 if (chunk->dirtied == false) {
3032                         arena_chunk_tree_dirty_insert(&arena->chunks_dirty,
3033                             chunk);
3034                         chunk->dirtied = true;
3035                 }
3036
3037                 /* Enforce opt_lg_dirty_mult. */
3038                 if (opt_lg_dirty_mult >= 0 && (arena->nactive >>
3039                     opt_lg_dirty_mult) < arena->ndirty)
3040                         arena_purge(arena);
3041         }
3042 }
3043
3044 static void
3045 arena_run_trim_head(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
3046     size_t oldsize, size_t newsize)
3047 {
3048         size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
3049         size_t head_npages = (oldsize - newsize) >> PAGE_SHIFT;
3050
3051         assert(oldsize > newsize);
3052
3053         /*
3054          * Update the chunk map so that arena_run_dalloc() can treat the
3055          * leading run as separately allocated.
3056          */
3057         assert((chunk->map[pageind].bits & CHUNK_MAP_DIRTY) == 0);
3058         chunk->map[pageind].bits = (oldsize - newsize) | CHUNK_MAP_LARGE |
3059             CHUNK_MAP_ALLOCATED;
3060         assert((chunk->map[pageind+head_npages].bits & CHUNK_MAP_DIRTY) == 0);
3061         chunk->map[pageind+head_npages].bits = newsize | CHUNK_MAP_LARGE |
3062             CHUNK_MAP_ALLOCATED;
3063
3064         arena_run_dalloc(arena, run, false);
3065 }
3066
3067 static void
3068 arena_run_trim_tail(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
3069     size_t oldsize, size_t newsize, bool dirty)
3070 {
3071         size_t pageind = ((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT;
3072         size_t npages = newsize >> PAGE_SHIFT;
3073
3074         assert(oldsize > newsize);
3075
3076         /*
3077          * Update the chunk map so that arena_run_dalloc() can treat the
3078          * trailing run as separately allocated.
3079          */
3080         assert((chunk->map[pageind].bits & CHUNK_MAP_DIRTY) == 0);
3081         chunk->map[pageind].bits = newsize | CHUNK_MAP_LARGE |
3082             CHUNK_MAP_ALLOCATED;
3083         assert((chunk->map[pageind+npages].bits & CHUNK_MAP_DIRTY) == 0);
3084         chunk->map[pageind+npages].bits = (oldsize - newsize) | CHUNK_MAP_LARGE
3085             | CHUNK_MAP_ALLOCATED;
3086
3087         arena_run_dalloc(arena, (arena_run_t *)((uintptr_t)run + newsize),
3088             dirty);
3089 }
3090
3091 static arena_run_t *
3092 arena_bin_nonfull_run_get(arena_t *arena, arena_bin_t *bin)
3093 {
3094         arena_chunk_map_t *mapelm;
3095         arena_run_t *run;
3096         unsigned i, remainder;
3097
3098         /* Look for a usable run. */
3099         mapelm = arena_run_tree_first(&bin->runs);
3100         if (mapelm != NULL) {
3101                 arena_chunk_t *chunk;
3102                 size_t pageind;
3103
3104                 /* run is guaranteed to have available space. */
3105                 arena_run_tree_remove(&bin->runs, mapelm);
3106
3107                 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(mapelm);
3108                 pageind = (((uintptr_t)mapelm - (uintptr_t)chunk->map) /
3109                     sizeof(arena_chunk_map_t));
3110                 run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
3111                     ((mapelm->bits & CHUNK_MAP_PG_MASK) >> CHUNK_MAP_PG_SHIFT))
3112                     << PAGE_SHIFT));
3113 #ifdef MALLOC_STATS
3114                 bin->stats.reruns++;
3115 #endif
3116                 return (run);
3117         }
3118         /* No existing runs have any space available. */
3119
3120         /* Allocate a new run. */
3121         run = arena_run_alloc(arena, bin->run_size, false, false);
3122         if (run == NULL)
3123                 return (NULL);
3124
3125         /* Initialize run internals. */
3126         run->bin = bin;
3127
3128         for (i = 0; i < bin->regs_mask_nelms - 1; i++)
3129                 run->regs_mask[i] = UINT_MAX;
3130         remainder = bin->nregs & ((1U << (LG_SIZEOF_INT + 3)) - 1);
3131         if (remainder == 0)
3132                 run->regs_mask[i] = UINT_MAX;
3133         else {
3134                 /* The last element has spare bits that need to be unset. */
3135                 run->regs_mask[i] = (UINT_MAX >> ((1U << (LG_SIZEOF_INT + 3))
3136                     - remainder));
3137         }
3138
3139         run->regs_minelm = 0;
3140
3141         run->nfree = bin->nregs;
3142 #ifdef MALLOC_DEBUG
3143         run->magic = ARENA_RUN_MAGIC;
3144 #endif
3145
3146 #ifdef MALLOC_STATS
3147         bin->stats.nruns++;
3148         bin->stats.curruns++;
3149         if (bin->stats.curruns > bin->stats.highruns)
3150                 bin->stats.highruns = bin->stats.curruns;
3151 #endif
3152         return (run);
3153 }
3154
3155 /* bin->runcur must have space available before this function is called. */
3156 static inline void *
3157 arena_bin_malloc_easy(arena_t *arena, arena_bin_t *bin, arena_run_t *run)
3158 {
3159         void *ret;
3160
3161         assert(run->magic == ARENA_RUN_MAGIC);
3162         assert(run->nfree > 0);
3163
3164         ret = arena_run_reg_alloc(run, bin);
3165         assert(ret != NULL);
3166         run->nfree--;
3167
3168         return (ret);
3169 }
3170
3171 /* Re-fill bin->runcur, then call arena_bin_malloc_easy(). */
3172 static void *
3173 arena_bin_malloc_hard(arena_t *arena, arena_bin_t *bin)
3174 {
3175
3176         bin->runcur = arena_bin_nonfull_run_get(arena, bin);
3177         if (bin->runcur == NULL)
3178                 return (NULL);
3179         assert(bin->runcur->magic == ARENA_RUN_MAGIC);
3180         assert(bin->runcur->nfree > 0);
3181
3182         return (arena_bin_malloc_easy(arena, bin, bin->runcur));
3183 }
3184
3185 /*
3186  * Calculate bin->run_size such that it meets the following constraints:
3187  *
3188  *   *) bin->run_size >= min_run_size
3189  *   *) bin->run_size <= arena_maxclass
3190  *   *) bin->run_size <= RUN_MAX_SMALL
3191  *   *) run header overhead <= RUN_MAX_OVRHD (or header overhead relaxed).
3192  *   *) run header size < PAGE_SIZE
3193  *
3194  * bin->nregs, bin->regs_mask_nelms, and bin->reg0_offset are
3195  * also calculated here, since these settings are all interdependent.
3196  */
3197 static size_t
3198 arena_bin_run_size_calc(arena_bin_t *bin, size_t min_run_size)
3199 {
3200         size_t try_run_size, good_run_size;
3201         unsigned good_nregs, good_mask_nelms, good_reg0_offset;
3202         unsigned try_nregs, try_mask_nelms, try_reg0_offset;
3203
3204         assert(min_run_size >= PAGE_SIZE);
3205         assert(min_run_size <= arena_maxclass);
3206         assert(min_run_size <= RUN_MAX_SMALL);
3207
3208         /*
3209          * Calculate known-valid settings before entering the run_size
3210          * expansion loop, so that the first part of the loop always copies
3211          * valid settings.
3212          *
3213          * The do..while loop iteratively reduces the number of regions until
3214          * the run header and the regions no longer overlap.  A closed formula
3215          * would be quite messy, since there is an interdependency between the
3216          * header's mask length and the number of regions.
3217          */
3218         try_run_size = min_run_size;
3219         try_nregs = ((try_run_size - sizeof(arena_run_t)) / bin->reg_size)
3220             + 1; /* Counter-act try_nregs-- in loop. */
3221         do {
3222                 try_nregs--;
3223                 try_mask_nelms = (try_nregs >> (LG_SIZEOF_INT + 3)) +
3224                     ((try_nregs & ((1U << (LG_SIZEOF_INT + 3)) - 1)) ? 1 : 0);
3225                 try_reg0_offset = try_run_size - (try_nregs * bin->reg_size);
3226         } while (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1))
3227             > try_reg0_offset);
3228
3229         /* run_size expansion loop. */
3230         do {
3231                 /*
3232                  * Copy valid settings before trying more aggressive settings.
3233                  */
3234                 good_run_size = try_run_size;
3235                 good_nregs = try_nregs;
3236                 good_mask_nelms = try_mask_nelms;
3237                 good_reg0_offset = try_reg0_offset;
3238
3239                 /* Try more aggressive settings. */
3240                 try_run_size += PAGE_SIZE;
3241                 try_nregs = ((try_run_size - sizeof(arena_run_t)) /
3242                     bin->reg_size) + 1; /* Counter-act try_nregs-- in loop. */
3243                 do {
3244                         try_nregs--;
3245                         try_mask_nelms = (try_nregs >> (LG_SIZEOF_INT + 3)) +
3246                             ((try_nregs & ((1U << (LG_SIZEOF_INT + 3)) - 1)) ?
3247                             1 : 0);
3248                         try_reg0_offset = try_run_size - (try_nregs *
3249                             bin->reg_size);
3250                 } while (sizeof(arena_run_t) + (sizeof(unsigned) *
3251                     (try_mask_nelms - 1)) > try_reg0_offset);
3252         } while (try_run_size <= arena_maxclass && try_run_size <= RUN_MAX_SMALL
3253             && RUN_MAX_OVRHD * (bin->reg_size << 3) > RUN_MAX_OVRHD_RELAX
3254             && (try_reg0_offset << RUN_BFP) > RUN_MAX_OVRHD * try_run_size
3255             && (sizeof(arena_run_t) + (sizeof(unsigned) * (try_mask_nelms - 1)))
3256             < PAGE_SIZE);
3257
3258         assert(sizeof(arena_run_t) + (sizeof(unsigned) * (good_mask_nelms - 1))
3259             <= good_reg0_offset);
3260         assert((good_mask_nelms << (LG_SIZEOF_INT + 3)) >= good_nregs);
3261
3262         /* Copy final settings. */
3263         bin->run_size = good_run_size;
3264         bin->nregs = good_nregs;
3265         bin->regs_mask_nelms = good_mask_nelms;
3266         bin->reg0_offset = good_reg0_offset;
3267
3268         return (good_run_size);
3269 }
3270
3271 #ifdef MALLOC_TCACHE
3272 static inline void
3273 tcache_event(tcache_t *tcache)
3274 {
3275
3276         if (tcache_gc_incr == 0)
3277                 return;
3278
3279         tcache->ev_cnt++;
3280         assert(tcache->ev_cnt <= tcache_gc_incr);
3281         if (tcache->ev_cnt >= tcache_gc_incr) {
3282                 size_t binind = tcache->next_gc_bin;
3283                 tcache_bin_t *tbin = tcache->tbins[binind];
3284
3285                 if (tbin != NULL) {
3286                         if (tbin->high_water == 0) {
3287                                 /*
3288                                  * This bin went completely unused for an
3289                                  * entire GC cycle, so throw away the tbin.
3290                                  */
3291                                 assert(tbin->ncached == 0);
3292                                 tcache_bin_destroy(tcache, tbin, binind);
3293                                 tcache->tbins[binind] = NULL;
3294                         } else {
3295                                 if (tbin->low_water > 0) {
3296                                         /*
3297                                          * Flush (ceiling) half of the objects
3298                                          * below the low water mark.
3299                                          */
3300                                         tcache_bin_flush(tbin, binind,
3301                                             tbin->ncached - (tbin->low_water >>
3302                                             1) - (tbin->low_water & 1));
3303                                 }
3304                                 tbin->low_water = tbin->ncached;
3305                                 tbin->high_water = tbin->ncached;
3306                         }
3307                 }
3308
3309                 tcache->next_gc_bin++;
3310                 if (tcache->next_gc_bin == nbins)
3311                         tcache->next_gc_bin = 0;
3312                 tcache->ev_cnt = 0;
3313         }
3314 }
3315
3316 static inline void *
3317 tcache_bin_alloc(tcache_bin_t *tbin)
3318 {
3319
3320         if (tbin->ncached == 0)
3321                 return (NULL);
3322         tbin->ncached--;
3323         if (tbin->ncached < tbin->low_water)
3324                 tbin->low_water = tbin->ncached;
3325         return (tbin->slots[tbin->ncached]);
3326 }
3327
3328 static void
3329 tcache_bin_fill(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
3330 {
3331         arena_t *arena;
3332         arena_bin_t *bin;
3333         arena_run_t *run;
3334         void *ptr;
3335         unsigned i;
3336
3337         assert(tbin->ncached == 0);
3338
3339         arena = tcache->arena;
3340         bin = &arena->bins[binind];
3341         malloc_spin_lock(&arena->lock);
3342         for (i = 0; i < (tcache_nslots >> 1); i++) {
3343                 if ((run = bin->runcur) != NULL && run->nfree > 0)
3344                         ptr = arena_bin_malloc_easy(arena, bin, run);
3345                 else
3346                         ptr = arena_bin_malloc_hard(arena, bin);
3347                 if (ptr == NULL)
3348                         break;
3349                 /*
3350                  * Fill tbin such that the objects lowest in memory are used
3351                  * first.
3352                  */
3353                 tbin->slots[(tcache_nslots >> 1) - 1 - i] = ptr;
3354         }
3355 #ifdef MALLOC_STATS
3356         bin->stats.nfills++;
3357         bin->stats.nrequests += tbin->tstats.nrequests;
3358         if (bin->reg_size <= small_maxclass) {
3359                 arena->stats.nmalloc_small += (i - tbin->ncached);
3360                 arena->stats.allocated_small += (i - tbin->ncached) *
3361                     bin->reg_size;
3362                 arena->stats.nmalloc_small += tbin->tstats.nrequests;
3363         } else {
3364                 arena->stats.nmalloc_medium += (i - tbin->ncached);
3365                 arena->stats.allocated_medium += (i - tbin->ncached) *
3366                     bin->reg_size;
3367                 arena->stats.nmalloc_medium += tbin->tstats.nrequests;
3368         }
3369         tbin->tstats.nrequests = 0;
3370 #endif
3371         malloc_spin_unlock(&arena->lock);
3372         tbin->ncached = i;
3373         if (tbin->ncached > tbin->high_water)
3374                 tbin->high_water = tbin->ncached;
3375 }
3376
3377 static inline void *
3378 tcache_alloc(tcache_t *tcache, size_t size, bool zero)
3379 {
3380         void *ret;
3381         tcache_bin_t *tbin;
3382         size_t binind;
3383
3384         if (size <= small_maxclass)
3385                 binind = small_size2bin[size];
3386         else {
3387                 binind = mbin0 + ((MEDIUM_CEILING(size) - medium_min) >>
3388                     lg_mspace);
3389         }
3390         assert(binind < nbins);
3391         tbin = tcache->tbins[binind];
3392         if (tbin == NULL) {
3393                 tbin = tcache_bin_create(tcache->arena);
3394                 if (tbin == NULL)
3395                         return (NULL);
3396                 tcache->tbins[binind] = tbin;
3397         }
3398
3399         ret = tcache_bin_alloc(tbin);
3400         if (ret == NULL) {
3401                 ret = tcache_alloc_hard(tcache, tbin, binind);
3402                 if (ret == NULL)
3403                         return (NULL);
3404         }
3405
3406         if (zero == false) {
3407                 if (opt_junk)
3408                         memset(ret, 0xa5, size);
3409                 else if (opt_zero)
3410                         memset(ret, 0, size);
3411         } else
3412                 memset(ret, 0, size);
3413
3414 #ifdef MALLOC_STATS
3415         tbin->tstats.nrequests++;
3416 #endif
3417         tcache_event(tcache);
3418         return (ret);
3419 }
3420
3421 static void *
3422 tcache_alloc_hard(tcache_t *tcache, tcache_bin_t *tbin, size_t binind)
3423 {
3424         void *ret;
3425
3426         tcache_bin_fill(tcache, tbin, binind);
3427         ret = tcache_bin_alloc(tbin);
3428
3429         return (ret);
3430 }
3431 #endif
3432
3433 static inline void *
3434 arena_malloc_small(arena_t *arena, size_t size, bool zero)
3435 {
3436         void *ret;
3437         arena_bin_t *bin;
3438         arena_run_t *run;
3439         size_t binind;
3440
3441         binind = small_size2bin[size];
3442         assert(binind < mbin0);
3443         bin = &arena->bins[binind];
3444         size = bin->reg_size;
3445
3446         malloc_spin_lock(&arena->lock);
3447         if ((run = bin->runcur) != NULL && run->nfree > 0)
3448                 ret = arena_bin_malloc_easy(arena, bin, run);
3449         else
3450                 ret = arena_bin_malloc_hard(arena, bin);
3451
3452         if (ret == NULL) {
3453                 malloc_spin_unlock(&arena->lock);
3454                 return (NULL);
3455         }
3456
3457 #ifdef MALLOC_STATS
3458 #  ifdef MALLOC_TCACHE
3459         if (__isthreaded == false) {
3460 #  endif
3461                 bin->stats.nrequests++;
3462                 arena->stats.nmalloc_small++;
3463 #  ifdef MALLOC_TCACHE
3464         }
3465 #  endif
3466         arena->stats.allocated_small += size;
3467 #endif
3468         malloc_spin_unlock(&arena->lock);
3469
3470         if (zero == false) {
3471                 if (opt_junk)
3472                         memset(ret, 0xa5, size);
3473                 else if (opt_zero)
3474                         memset(ret, 0, size);
3475         } else
3476                 memset(ret, 0, size);
3477
3478         return (ret);
3479 }
3480
3481 static void *
3482 arena_malloc_medium(arena_t *arena, size_t size, bool zero)
3483 {
3484         void *ret;
3485         arena_bin_t *bin;
3486         arena_run_t *run;
3487         size_t binind;
3488
3489         size = MEDIUM_CEILING(size);
3490         binind = mbin0 + ((size - medium_min) >> lg_mspace);
3491         assert(binind < nbins);
3492         bin = &arena->bins[binind];
3493         assert(bin->reg_size == size);
3494
3495         malloc_spin_lock(&arena->lock);
3496         if ((run = bin->runcur) != NULL && run->nfree > 0)
3497                 ret = arena_bin_malloc_easy(arena, bin, run);
3498         else
3499                 ret = arena_bin_malloc_hard(arena, bin);
3500
3501         if (ret == NULL) {
3502                 malloc_spin_unlock(&arena->lock);
3503                 return (NULL);
3504         }
3505
3506 #ifdef MALLOC_STATS
3507 #  ifdef MALLOC_TCACHE
3508         if (__isthreaded == false) {
3509 #  endif
3510                 bin->stats.nrequests++;
3511                 arena->stats.nmalloc_medium++;
3512 #  ifdef MALLOC_TCACHE
3513         }
3514 #  endif
3515         arena->stats.allocated_medium += size;
3516 #endif
3517         malloc_spin_unlock(&arena->lock);
3518
3519         if (zero == false) {
3520                 if (opt_junk)
3521                         memset(ret, 0xa5, size);
3522                 else if (opt_zero)
3523                         memset(ret, 0, size);
3524         } else
3525                 memset(ret, 0, size);
3526
3527         return (ret);
3528 }
3529
3530 static void *
3531 arena_malloc_large(arena_t *arena, size_t size, bool zero)
3532 {
3533         void *ret;
3534
3535         /* Large allocation. */
3536         size = PAGE_CEILING(size);
3537         malloc_spin_lock(&arena->lock);
3538         ret = (void *)arena_run_alloc(arena, size, true, zero);
3539         if (ret == NULL) {
3540                 malloc_spin_unlock(&arena->lock);
3541                 return (NULL);
3542         }
3543 #ifdef MALLOC_STATS
3544         arena->stats.nmalloc_large++;
3545         arena->stats.allocated_large += size;
3546         arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
3547         arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
3548         if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
3549             arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
3550                 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
3551                     arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
3552         }
3553 #endif
3554         malloc_spin_unlock(&arena->lock);
3555
3556         if (zero == false) {
3557                 if (opt_junk)
3558                         memset(ret, 0xa5, size);
3559                 else if (opt_zero)
3560                         memset(ret, 0, size);
3561         }
3562
3563         return (ret);
3564 }
3565
3566 static inline void *
3567 arena_malloc(size_t size, bool zero)
3568 {
3569
3570         assert(size != 0);
3571         assert(QUANTUM_CEILING(size) <= arena_maxclass);
3572
3573         if (size <= bin_maxclass) {
3574 #ifdef MALLOC_TCACHE
3575                 if (__isthreaded && tcache_nslots) {
3576                         tcache_t *tcache = tcache_tls;
3577                         if ((uintptr_t)tcache > (uintptr_t)1)
3578                                 return (tcache_alloc(tcache, size, zero));
3579                         else if (tcache == NULL) {
3580                                 tcache = tcache_create(choose_arena());
3581                                 if (tcache == NULL)
3582                                         return (NULL);
3583                                 return (tcache_alloc(tcache, size, zero));
3584                         }
3585                 }
3586 #endif
3587                 if (size <= small_maxclass) {
3588                         return (arena_malloc_small(choose_arena(), size,
3589                             zero));
3590                 } else {
3591                         return (arena_malloc_medium(choose_arena(),
3592                             size, zero));
3593                 }
3594         } else
3595                 return (arena_malloc_large(choose_arena(), size, zero));
3596 }
3597
3598 static inline void *
3599 imalloc(size_t size)
3600 {
3601
3602         assert(size != 0);
3603
3604         if (size <= arena_maxclass)
3605                 return (arena_malloc(size, false));
3606         else
3607                 return (huge_malloc(size, false));
3608 }
3609
3610 static inline void *
3611 icalloc(size_t size)
3612 {
3613
3614         if (size <= arena_maxclass)
3615                 return (arena_malloc(size, true));
3616         else
3617                 return (huge_malloc(size, true));
3618 }
3619
3620 /* Only handles large allocations that require more than page alignment. */
3621 static void *
3622 arena_palloc(arena_t *arena, size_t alignment, size_t size, size_t alloc_size)
3623 {
3624         void *ret;
3625         size_t offset;
3626         arena_chunk_t *chunk;
3627
3628         assert((size & PAGE_MASK) == 0);
3629         assert((alignment & PAGE_MASK) == 0);
3630
3631         malloc_spin_lock(&arena->lock);
3632         ret = (void *)arena_run_alloc(arena, alloc_size, true, false);
3633         if (ret == NULL) {
3634                 malloc_spin_unlock(&arena->lock);
3635                 return (NULL);
3636         }
3637
3638         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ret);
3639
3640         offset = (uintptr_t)ret & (alignment - 1);
3641         assert((offset & PAGE_MASK) == 0);
3642         assert(offset < alloc_size);
3643         if (offset == 0)
3644                 arena_run_trim_tail(arena, chunk, ret, alloc_size, size, false);
3645         else {
3646                 size_t leadsize, trailsize;
3647
3648                 leadsize = alignment - offset;
3649                 if (leadsize > 0) {
3650                         arena_run_trim_head(arena, chunk, ret, alloc_size,
3651                             alloc_size - leadsize);
3652                         ret = (void *)((uintptr_t)ret + leadsize);
3653                 }
3654
3655                 trailsize = alloc_size - leadsize - size;
3656                 if (trailsize != 0) {
3657                         /* Trim trailing space. */
3658                         assert(trailsize < alloc_size);
3659                         arena_run_trim_tail(arena, chunk, ret, size + trailsize,
3660                             size, false);
3661                 }
3662         }
3663
3664 #ifdef MALLOC_STATS
3665         arena->stats.nmalloc_large++;
3666         arena->stats.allocated_large += size;
3667         arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
3668         arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
3669         if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
3670             arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
3671                 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
3672                     arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
3673         }
3674 #endif
3675         malloc_spin_unlock(&arena->lock);
3676
3677         if (opt_junk)
3678                 memset(ret, 0xa5, size);
3679         else if (opt_zero)
3680                 memset(ret, 0, size);
3681         return (ret);
3682 }
3683
3684 static inline void *
3685 ipalloc(size_t alignment, size_t size)
3686 {
3687         void *ret;
3688         size_t ceil_size;
3689
3690         /*
3691          * Round size up to the nearest multiple of alignment.
3692          *
3693          * This done, we can take advantage of the fact that for each small
3694          * size class, every object is aligned at the smallest power of two
3695          * that is non-zero in the base two representation of the size.  For
3696          * example:
3697          *
3698          *   Size |   Base 2 | Minimum alignment
3699          *   -----+----------+------------------
3700          *     96 |  1100000 |  32
3701          *    144 | 10100000 |  32
3702          *    192 | 11000000 |  64
3703          *
3704          * Depending on runtime settings, it is possible that arena_malloc()
3705          * will further round up to a power of two, but that never causes
3706          * correctness issues.
3707          */
3708         ceil_size = (size + (alignment - 1)) & (-alignment);
3709         /*
3710          * (ceil_size < size) protects against the combination of maximal
3711          * alignment and size greater than maximal alignment.
3712          */
3713         if (ceil_size < size) {
3714                 /* size_t overflow. */
3715                 return (NULL);
3716         }
3717
3718         if (ceil_size <= PAGE_SIZE || (alignment <= PAGE_SIZE
3719             && ceil_size <= arena_maxclass))
3720                 ret = arena_malloc(ceil_size, false);
3721         else {
3722                 size_t run_size;
3723
3724                 /*
3725                  * We can't achieve subpage alignment, so round up alignment
3726                  * permanently; it makes later calculations simpler.
3727                  */
3728                 alignment = PAGE_CEILING(alignment);
3729                 ceil_size = PAGE_CEILING(size);
3730                 /*
3731                  * (ceil_size < size) protects against very large sizes within
3732                  * PAGE_SIZE of SIZE_T_MAX.
3733                  *
3734                  * (ceil_size + alignment < ceil_size) protects against the
3735                  * combination of maximal alignment and ceil_size large enough
3736                  * to cause overflow.  This is similar to the first overflow
3737                  * check above, but it needs to be repeated due to the new
3738                  * ceil_size value, which may now be *equal* to maximal
3739                  * alignment, whereas before we only detected overflow if the
3740                  * original size was *greater* than maximal alignment.
3741                  */
3742                 if (ceil_size < size || ceil_size + alignment < ceil_size) {
3743                         /* size_t overflow. */
3744                         return (NULL);
3745                 }
3746
3747                 /*
3748                  * Calculate the size of the over-size run that arena_palloc()
3749                  * would need to allocate in order to guarantee the alignment.
3750                  */
3751                 if (ceil_size >= alignment)
3752                         run_size = ceil_size + alignment - PAGE_SIZE;
3753                 else {
3754                         /*
3755                          * It is possible that (alignment << 1) will cause
3756                          * overflow, but it doesn't matter because we also
3757                          * subtract PAGE_SIZE, which in the case of overflow
3758                          * leaves us with a very large run_size.  That causes
3759                          * the first conditional below to fail, which means
3760                          * that the bogus run_size value never gets used for
3761                          * anything important.
3762                          */
3763                         run_size = (alignment << 1) - PAGE_SIZE;
3764                 }
3765
3766                 if (run_size <= arena_maxclass) {
3767                         ret = arena_palloc(choose_arena(), alignment, ceil_size,
3768                             run_size);
3769                 } else if (alignment <= chunksize)
3770                         ret = huge_malloc(ceil_size, false);
3771                 else
3772                         ret = huge_palloc(alignment, ceil_size);
3773         }
3774
3775         assert(((uintptr_t)ret & (alignment - 1)) == 0);
3776         return (ret);
3777 }
3778
3779 static bool
3780 arena_is_large(const void *ptr)
3781 {
3782         arena_chunk_t *chunk;
3783         size_t pageind, mapbits;
3784
3785         assert(ptr != NULL);
3786         assert(CHUNK_ADDR2BASE(ptr) != ptr);
3787
3788         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3789         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3790         mapbits = chunk->map[pageind].bits;
3791         assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
3792         return ((mapbits & CHUNK_MAP_LARGE) != 0);
3793 }
3794
3795 /* Return the size of the allocation pointed to by ptr. */
3796 static size_t
3797 arena_salloc(const void *ptr)
3798 {
3799         size_t ret;
3800         arena_chunk_t *chunk;
3801         size_t pageind, mapbits;
3802
3803         assert(ptr != NULL);
3804         assert(CHUNK_ADDR2BASE(ptr) != ptr);
3805
3806         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3807         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3808         mapbits = chunk->map[pageind].bits;
3809         assert((mapbits & CHUNK_MAP_ALLOCATED) != 0);
3810         if ((mapbits & CHUNK_MAP_LARGE) == 0) {
3811                 arena_run_t *run = (arena_run_t *)((uintptr_t)chunk +
3812                     (uintptr_t)((pageind - ((mapbits & CHUNK_MAP_PG_MASK) >>
3813                     CHUNK_MAP_PG_SHIFT)) << PAGE_SHIFT));
3814                 assert(run->magic == ARENA_RUN_MAGIC);
3815                 ret = run->bin->reg_size;
3816         } else {
3817                 ret = mapbits & ~PAGE_MASK;
3818                 assert(ret != 0);
3819         }
3820
3821         return (ret);
3822 }
3823
3824 static inline size_t
3825 isalloc(const void *ptr)
3826 {
3827         size_t ret;
3828         arena_chunk_t *chunk;
3829
3830         assert(ptr != NULL);
3831
3832         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
3833         if (chunk != ptr) {
3834                 /* Region. */
3835                 assert(chunk->arena->magic == ARENA_MAGIC);
3836
3837                 ret = arena_salloc(ptr);
3838         } else {
3839                 extent_node_t *node, key;
3840
3841                 /* Chunk (huge allocation). */
3842
3843                 malloc_mutex_lock(&huge_mtx);
3844
3845                 /* Extract from tree of huge allocations. */
3846                 key.addr = __DECONST(void *, ptr);
3847                 node = extent_tree_ad_search(&huge, &key);
3848                 assert(node != NULL);
3849
3850                 ret = node->size;
3851
3852                 malloc_mutex_unlock(&huge_mtx);
3853         }
3854
3855         return (ret);
3856 }
3857
3858 static inline void
3859 arena_dalloc_bin(arena_t *arena, arena_chunk_t *chunk, void *ptr,
3860     arena_chunk_map_t *mapelm)
3861 {
3862         size_t pageind;
3863         arena_run_t *run;
3864         arena_bin_t *bin;
3865         size_t size;
3866
3867         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
3868         run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
3869             ((mapelm->bits & CHUNK_MAP_PG_MASK) >> CHUNK_MAP_PG_SHIFT)) <<
3870             PAGE_SHIFT));
3871         assert(run->magic == ARENA_RUN_MAGIC);
3872         bin = run->bin;
3873         size = bin->reg_size;
3874
3875         if (opt_junk)
3876                 memset(ptr, 0x5a, size);
3877
3878         arena_run_reg_dalloc(run, bin, ptr, size);
3879         run->nfree++;
3880
3881         if (run->nfree == bin->nregs)
3882                 arena_dalloc_bin_run(arena, chunk, run, bin);
3883         else if (run->nfree == 1 && run != bin->runcur) {
3884                 /*
3885                  * Make sure that bin->runcur always refers to the lowest
3886                  * non-full run, if one exists.
3887                  */
3888                 if (bin->runcur == NULL)
3889                         bin->runcur = run;
3890                 else if ((uintptr_t)run < (uintptr_t)bin->runcur) {
3891                         /* Switch runcur. */
3892                         if (bin->runcur->nfree > 0) {
3893                                 arena_chunk_t *runcur_chunk =
3894                                     CHUNK_ADDR2BASE(bin->runcur);
3895                                 size_t runcur_pageind =
3896                                     (((uintptr_t)bin->runcur -
3897                                     (uintptr_t)runcur_chunk)) >> PAGE_SHIFT;
3898                                 arena_chunk_map_t *runcur_mapelm =
3899                                     &runcur_chunk->map[runcur_pageind];
3900
3901                                 /* Insert runcur. */
3902                                 arena_run_tree_insert(&bin->runs,
3903                                     runcur_mapelm);
3904                         }
3905                         bin->runcur = run;
3906                 } else {
3907                         size_t run_pageind = (((uintptr_t)run -
3908                             (uintptr_t)chunk)) >> PAGE_SHIFT;
3909                         arena_chunk_map_t *run_mapelm =
3910                             &chunk->map[run_pageind];
3911
3912                         assert(arena_run_tree_search(&bin->runs, run_mapelm) ==
3913                             NULL);
3914                         arena_run_tree_insert(&bin->runs, run_mapelm);
3915                 }
3916         }
3917
3918 #ifdef MALLOC_STATS
3919         if (size <= small_maxclass) {
3920                 arena->stats.allocated_small -= size;
3921                 arena->stats.ndalloc_small++;
3922         } else {
3923                 arena->stats.allocated_medium -= size;
3924                 arena->stats.ndalloc_medium++;
3925         }
3926 #endif
3927 }
3928
3929 static void
3930 arena_dalloc_bin_run(arena_t *arena, arena_chunk_t *chunk, arena_run_t *run,
3931     arena_bin_t *bin)
3932 {
3933         size_t run_ind;
3934
3935         /* Deallocate run. */
3936         if (run == bin->runcur)
3937                 bin->runcur = NULL;
3938         else if (bin->nregs != 1) {
3939                 size_t run_pageind = (((uintptr_t)run -
3940                     (uintptr_t)chunk)) >> PAGE_SHIFT;
3941                 arena_chunk_map_t *run_mapelm =
3942                     &chunk->map[run_pageind];
3943                 /*
3944                  * This block's conditional is necessary because if the
3945                  * run only contains one region, then it never gets
3946                  * inserted into the non-full runs tree.
3947                  */
3948                 arena_run_tree_remove(&bin->runs, run_mapelm);
3949         }
3950         /*
3951          * Mark the first page as dirty.  The dirty bit for every other page in
3952          * the run is already properly set, which means we can call
3953          * arena_run_dalloc(..., false), thus potentially avoiding the needless
3954          * creation of many dirty pages.
3955          */
3956         run_ind = (size_t)(((uintptr_t)run - (uintptr_t)chunk) >> PAGE_SHIFT);
3957         assert((chunk->map[run_ind].bits & CHUNK_MAP_DIRTY) == 0);
3958         chunk->map[run_ind].bits |= CHUNK_MAP_DIRTY;
3959         chunk->ndirty++;
3960         arena->ndirty++;
3961
3962 #ifdef MALLOC_DEBUG
3963         run->magic = 0;
3964 #endif
3965         arena_run_dalloc(arena, run, false);
3966 #ifdef MALLOC_STATS
3967         bin->stats.curruns--;
3968 #endif
3969
3970         if (chunk->dirtied == false) {
3971                 arena_chunk_tree_dirty_insert(&arena->chunks_dirty, chunk);
3972                 chunk->dirtied = true;
3973         }
3974         /* Enforce opt_lg_dirty_mult. */
3975         if (opt_lg_dirty_mult >= 0 && (arena->nactive >> opt_lg_dirty_mult) <
3976             arena->ndirty)
3977                 arena_purge(arena);
3978 }
3979
3980 #ifdef MALLOC_STATS
3981 static void
3982 arena_stats_print(arena_t *arena)
3983 {
3984
3985         malloc_printf("dirty pages: %zu:%zu active:dirty, %"PRIu64" sweep%s,"
3986             " %"PRIu64" madvise%s, %"PRIu64" purged\n",
3987             arena->nactive, arena->ndirty,
3988             arena->stats.npurge, arena->stats.npurge == 1 ? "" : "s",
3989             arena->stats.nmadvise, arena->stats.nmadvise == 1 ? "" : "s",
3990             arena->stats.purged);
3991
3992         malloc_printf("            allocated      nmalloc      ndalloc\n");
3993         malloc_printf("small:   %12zu %12"PRIu64" %12"PRIu64"\n",
3994             arena->stats.allocated_small, arena->stats.nmalloc_small,
3995             arena->stats.ndalloc_small);
3996         malloc_printf("medium:  %12zu %12"PRIu64" %12"PRIu64"\n",
3997             arena->stats.allocated_medium, arena->stats.nmalloc_medium,
3998             arena->stats.ndalloc_medium);
3999         malloc_printf("large:   %12zu %12"PRIu64" %12"PRIu64"\n",
4000             arena->stats.allocated_large, arena->stats.nmalloc_large,
4001             arena->stats.ndalloc_large);
4002         malloc_printf("total:   %12zu %12"PRIu64" %12"PRIu64"\n",
4003             arena->stats.allocated_small + arena->stats.allocated_medium +
4004             arena->stats.allocated_large, arena->stats.nmalloc_small +
4005             arena->stats.nmalloc_medium + arena->stats.nmalloc_large,
4006             arena->stats.ndalloc_small + arena->stats.ndalloc_medium +
4007             arena->stats.ndalloc_large);
4008         malloc_printf("mapped:  %12zu\n", arena->stats.mapped);
4009
4010         if (arena->stats.nmalloc_small + arena->stats.nmalloc_medium > 0) {
4011                 unsigned i, gap_start;
4012 #ifdef MALLOC_TCACHE
4013                 malloc_printf("bins:     bin    size regs pgs  requests    "
4014                     "nfills  nflushes   newruns    reruns maxruns curruns\n");
4015 #else
4016                 malloc_printf("bins:     bin    size regs pgs  requests   "
4017                     "newruns    reruns maxruns curruns\n");
4018 #endif
4019                 for (i = 0, gap_start = UINT_MAX; i < nbins; i++) {
4020                         if (arena->bins[i].stats.nruns == 0) {
4021                                 if (gap_start == UINT_MAX)
4022                                         gap_start = i;
4023                         } else {
4024                                 if (gap_start != UINT_MAX) {
4025                                         if (i > gap_start + 1) {
4026                                                 /*
4027                                                  * Gap of more than one size
4028                                                  * class.
4029                                                  */
4030                                                 malloc_printf("[%u..%u]\n",
4031                                                     gap_start, i - 1);
4032                                         } else {
4033                                                 /* Gap of one size class. */
4034                                                 malloc_printf("[%u]\n",
4035                                                     gap_start);
4036                                         }
4037                                         gap_start = UINT_MAX;
4038                                 }
4039                                 malloc_printf(
4040                                     "%13u %1s %5u %4u %3u %9"PRIu64" %9"PRIu64
4041 #ifdef MALLOC_TCACHE
4042                                     " %9"PRIu64" %9"PRIu64
4043 #endif
4044                                     " %9"PRIu64" %7zu %7zu\n",
4045                                     i,
4046                                     i < ntbins ? "T" : i < ntbins + nqbins ?
4047                                     "Q" : i < ntbins + nqbins + ncbins ? "C" :
4048                                     i < ntbins + nqbins + ncbins + nsbins ? "S"
4049                                     : "M",
4050                                     arena->bins[i].reg_size,
4051                                     arena->bins[i].nregs,
4052                                     arena->bins[i].run_size >> PAGE_SHIFT,
4053                                     arena->bins[i].stats.nrequests,
4054 #ifdef MALLOC_TCACHE
4055                                     arena->bins[i].stats.nfills,
4056                                     arena->bins[i].stats.nflushes,
4057 #endif
4058                                     arena->bins[i].stats.nruns,
4059                                     arena->bins[i].stats.reruns,
4060                                     arena->bins[i].stats.highruns,
4061                                     arena->bins[i].stats.curruns);
4062                         }
4063                 }
4064                 if (gap_start != UINT_MAX) {
4065                         if (i > gap_start + 1) {
4066                                 /* Gap of more than one size class. */
4067                                 malloc_printf("[%u..%u]\n", gap_start, i - 1);
4068                         } else {
4069                                 /* Gap of one size class. */
4070                                 malloc_printf("[%u]\n", gap_start);
4071                         }
4072                 }
4073         }
4074
4075         if (arena->stats.nmalloc_large > 0) {
4076                 size_t i;
4077                 ssize_t gap_start;
4078                 size_t nlclasses = (chunksize - PAGE_SIZE) >> PAGE_SHIFT;
4079
4080                 malloc_printf(
4081                     "large:   size pages nrequests   maxruns   curruns\n");
4082
4083                 for (i = 0, gap_start = -1; i < nlclasses; i++) {
4084                         if (arena->stats.lstats[i].nrequests == 0) {
4085                                 if (gap_start == -1)
4086                                         gap_start = i;
4087                         } else {
4088                                 if (gap_start != -1) {
4089                                         malloc_printf("[%zu]\n", i - gap_start);
4090                                         gap_start = -1;
4091                                 }
4092                                 malloc_printf(
4093                                     "%13zu %5zu %9"PRIu64" %9zu %9zu\n",
4094                                     (i+1) << PAGE_SHIFT, i+1,
4095                                     arena->stats.lstats[i].nrequests,
4096                                     arena->stats.lstats[i].highruns,
4097                                     arena->stats.lstats[i].curruns);
4098                         }
4099                 }
4100                 if (gap_start != -1)
4101                         malloc_printf("[%zu]\n", i - gap_start);
4102         }
4103 }
4104 #endif
4105
4106 static void
4107 stats_print_atexit(void)
4108 {
4109
4110 #if (defined(MALLOC_TCACHE) && defined(MALLOC_STATS))
4111         unsigned i;
4112
4113         /*
4114          * Merge stats from extant threads.  This is racy, since individual
4115          * threads do not lock when recording tcache stats events.  As a
4116          * consequence, the final stats may be slightly out of date by the time
4117          * they are reported, if other threads continue to allocate.
4118          */
4119         for (i = 0; i < narenas; i++) {
4120                 arena_t *arena = arenas[i];
4121                 if (arena != NULL) {
4122                         tcache_t *tcache;
4123
4124                         malloc_spin_lock(&arena->lock);
4125                         ql_foreach(tcache, &arena->tcache_ql, link) {
4126                                 tcache_stats_merge(tcache, arena);
4127                         }
4128                         malloc_spin_unlock(&arena->lock);
4129                 }
4130         }
4131 #endif
4132         malloc_stats_print();
4133 }
4134
4135 #ifdef MALLOC_TCACHE
4136 static void
4137 tcache_bin_flush(tcache_bin_t *tbin, size_t binind, unsigned rem)
4138 {
4139         arena_chunk_t *chunk;
4140         arena_t *arena;
4141         void *ptr;
4142         unsigned i, ndeferred, ncached;
4143
4144         for (ndeferred = tbin->ncached - rem; ndeferred > 0;) {
4145                 ncached = ndeferred;
4146                 /* Lock the arena associated with the first object. */
4147                 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(tbin->slots[0]);
4148                 arena = chunk->arena;
4149                 malloc_spin_lock(&arena->lock);
4150                 /* Deallocate every object that belongs to the locked arena. */
4151                 for (i = ndeferred = 0; i < ncached; i++) {
4152                         ptr = tbin->slots[i];
4153                         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
4154                         if (chunk->arena == arena) {
4155                                 size_t pageind = (((uintptr_t)ptr -
4156                                     (uintptr_t)chunk) >> PAGE_SHIFT);
4157                                 arena_chunk_map_t *mapelm =
4158                                     &chunk->map[pageind];
4159                                 arena_dalloc_bin(arena, chunk, ptr, mapelm);
4160                         } else {
4161                                 /*
4162                                  * This object was allocated via a different
4163                                  * arena than the one that is currently locked.
4164                                  * Stash the object, so that it can be handled
4165                                  * in a future pass.
4166                                  */
4167                                 tbin->slots[ndeferred] = ptr;
4168                                 ndeferred++;
4169                         }
4170                 }
4171 #ifdef MALLOC_STATS
4172                 arena->bins[binind].stats.nflushes++;
4173                 {
4174                         arena_bin_t *bin = &arena->bins[binind];
4175                         bin->stats.nrequests += tbin->tstats.nrequests;
4176                         if (bin->reg_size <= small_maxclass) {
4177                                 arena->stats.nmalloc_small +=
4178                                     tbin->tstats.nrequests;
4179                         } else {
4180                                 arena->stats.nmalloc_medium +=
4181                                     tbin->tstats.nrequests;
4182                         }
4183                         tbin->tstats.nrequests = 0;
4184                 }
4185 #endif
4186                 malloc_spin_unlock(&arena->lock);
4187         }
4188
4189         if (rem > 0) {
4190                 /*
4191                  * Shift the remaining valid pointers to the base of the slots
4192                  * array.
4193                  */
4194                 memmove(&tbin->slots[0], &tbin->slots[tbin->ncached - rem],
4195                     rem * sizeof(void *));
4196         }
4197         tbin->ncached = rem;
4198 }
4199
4200 static inline void
4201 tcache_dalloc(tcache_t *tcache, void *ptr)
4202 {
4203         arena_t *arena;
4204         arena_chunk_t *chunk;
4205         arena_run_t *run;
4206         arena_bin_t *bin;
4207         tcache_bin_t *tbin;
4208         size_t pageind, binind;
4209         arena_chunk_map_t *mapelm;
4210
4211         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
4212         arena = chunk->arena;
4213         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
4214         mapelm = &chunk->map[pageind];
4215         run = (arena_run_t *)((uintptr_t)chunk + (uintptr_t)((pageind -
4216             ((mapelm->bits & CHUNK_MAP_PG_MASK) >> CHUNK_MAP_PG_SHIFT)) <<
4217             PAGE_SHIFT));
4218         assert(run->magic == ARENA_RUN_MAGIC);
4219         bin = run->bin;
4220         binind = ((uintptr_t)bin - (uintptr_t)&arena->bins) /
4221             sizeof(arena_bin_t);
4222         assert(binind < nbins);
4223
4224         if (opt_junk)
4225                 memset(ptr, 0x5a, arena->bins[binind].reg_size);
4226
4227         tbin = tcache->tbins[binind];
4228         if (tbin == NULL) {
4229                 tbin = tcache_bin_create(choose_arena());
4230                 if (tbin == NULL) {
4231                         malloc_spin_lock(&arena->lock);
4232                         arena_dalloc_bin(arena, chunk, ptr, mapelm);
4233                         malloc_spin_unlock(&arena->lock);
4234                         return;
4235                 }
4236                 tcache->tbins[binind] = tbin;
4237         }
4238
4239         if (tbin->ncached == tcache_nslots)
4240                 tcache_bin_flush(tbin, binind, (tcache_nslots >> 1));
4241         assert(tbin->ncached < tcache_nslots);
4242         tbin->slots[tbin->ncached] = ptr;
4243         tbin->ncached++;
4244         if (tbin->ncached > tbin->high_water)
4245                 tbin->high_water = tbin->ncached;
4246
4247         tcache_event(tcache);
4248 }
4249 #endif
4250
4251 static void
4252 arena_dalloc_large(arena_t *arena, arena_chunk_t *chunk, void *ptr)
4253 {
4254
4255         /* Large allocation. */
4256         malloc_spin_lock(&arena->lock);
4257
4258 #ifndef MALLOC_STATS
4259         if (opt_junk)
4260 #endif
4261         {
4262                 size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >>
4263                     PAGE_SHIFT;
4264                 size_t size = chunk->map[pageind].bits & ~PAGE_MASK;
4265
4266 #ifdef MALLOC_STATS
4267                 if (opt_junk)
4268 #endif
4269                         memset(ptr, 0x5a, size);
4270 #ifdef MALLOC_STATS
4271                 arena->stats.ndalloc_large++;
4272                 arena->stats.allocated_large -= size;
4273                 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns--;
4274 #endif
4275         }
4276
4277         arena_run_dalloc(arena, (arena_run_t *)ptr, true);
4278         malloc_spin_unlock(&arena->lock);
4279 }
4280
4281 static inline void
4282 arena_dalloc(arena_t *arena, arena_chunk_t *chunk, void *ptr)
4283 {
4284         size_t pageind;
4285         arena_chunk_map_t *mapelm;
4286
4287         assert(arena != NULL);
4288         assert(arena->magic == ARENA_MAGIC);
4289         assert(chunk->arena == arena);
4290         assert(ptr != NULL);
4291         assert(CHUNK_ADDR2BASE(ptr) != ptr);
4292
4293         pageind = (((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT);
4294         mapelm = &chunk->map[pageind];
4295         assert((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0);
4296         if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
4297                 /* Small allocation. */
4298 #ifdef MALLOC_TCACHE
4299                 if (__isthreaded && tcache_nslots) {
4300                         tcache_t *tcache = tcache_tls;
4301                         if ((uintptr_t)tcache > (uintptr_t)1)
4302                                 tcache_dalloc(tcache, ptr);
4303                         else {
4304                                 arena_dalloc_hard(arena, chunk, ptr, mapelm,
4305                                     tcache);
4306                         }
4307                 } else {
4308 #endif
4309                         malloc_spin_lock(&arena->lock);
4310                         arena_dalloc_bin(arena, chunk, ptr, mapelm);
4311                         malloc_spin_unlock(&arena->lock);
4312 #ifdef MALLOC_TCACHE
4313                 }
4314 #endif
4315         } else
4316                 arena_dalloc_large(arena, chunk, ptr);
4317 }
4318
4319 #ifdef MALLOC_TCACHE
4320 static void
4321 arena_dalloc_hard(arena_t *arena, arena_chunk_t *chunk, void *ptr,
4322     arena_chunk_map_t *mapelm, tcache_t *tcache)
4323 {
4324
4325         if (tcache == NULL) {
4326                 tcache = tcache_create(arena);
4327                 if (tcache == NULL) {
4328                         malloc_spin_lock(&arena->lock);
4329                         arena_dalloc_bin(arena, chunk, ptr, mapelm);
4330                         malloc_spin_unlock(&arena->lock);
4331                 } else
4332                         tcache_dalloc(tcache, ptr);
4333         } else {
4334                 /* This thread is currently exiting, so directly deallocate. */
4335                 assert(tcache == (void *)(uintptr_t)1);
4336                 malloc_spin_lock(&arena->lock);
4337                 arena_dalloc_bin(arena, chunk, ptr, mapelm);
4338                 malloc_spin_unlock(&arena->lock);
4339         }
4340 }
4341 #endif
4342
4343 static inline void
4344 idalloc(void *ptr)
4345 {
4346         arena_chunk_t *chunk;
4347
4348         assert(ptr != NULL);
4349
4350         chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
4351         if (chunk != ptr)
4352                 arena_dalloc(chunk->arena, chunk, ptr);
4353         else
4354                 huge_dalloc(ptr);
4355 }
4356
4357 static void
4358 arena_ralloc_large_shrink(arena_t *arena, arena_chunk_t *chunk, void *ptr,
4359     size_t size, size_t oldsize)
4360 {
4361
4362         assert(size < oldsize);
4363
4364         /*
4365          * Shrink the run, and make trailing pages available for other
4366          * allocations.
4367          */
4368         malloc_spin_lock(&arena->lock);
4369         arena_run_trim_tail(arena, chunk, (arena_run_t *)ptr, oldsize, size,
4370             true);
4371 #ifdef MALLOC_STATS
4372         arena->stats.ndalloc_large++;
4373         arena->stats.allocated_large -= oldsize;
4374         arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--;
4375
4376         arena->stats.nmalloc_large++;
4377         arena->stats.allocated_large += size;
4378         arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
4379         arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
4380         if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
4381             arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
4382                 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
4383                     arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
4384         }
4385 #endif
4386         malloc_spin_unlock(&arena->lock);
4387 }
4388
4389 static bool
4390 arena_ralloc_large_grow(arena_t *arena, arena_chunk_t *chunk, void *ptr,
4391     size_t size, size_t oldsize)
4392 {
4393         size_t pageind = ((uintptr_t)ptr - (uintptr_t)chunk) >> PAGE_SHIFT;
4394         size_t npages = oldsize >> PAGE_SHIFT;
4395
4396         assert(oldsize == (chunk->map[pageind].bits & ~PAGE_MASK));
4397
4398         /* Try to extend the run. */
4399         assert(size > oldsize);
4400         malloc_spin_lock(&arena->lock);
4401         if (pageind + npages < chunk_npages && (chunk->map[pageind+npages].bits
4402             & CHUNK_MAP_ALLOCATED) == 0 && (chunk->map[pageind+npages].bits &
4403             ~PAGE_MASK) >= size - oldsize) {
4404                 /*
4405                  * The next run is available and sufficiently large.  Split the
4406                  * following run, then merge the first part with the existing
4407                  * allocation.
4408                  */
4409                 arena_run_split(arena, (arena_run_t *)((uintptr_t)chunk +
4410                     ((pageind+npages) << PAGE_SHIFT)), size - oldsize, true,
4411                     false);
4412
4413                 chunk->map[pageind].bits = size | CHUNK_MAP_LARGE |
4414                     CHUNK_MAP_ALLOCATED;
4415                 chunk->map[pageind+npages].bits = CHUNK_MAP_LARGE |
4416                     CHUNK_MAP_ALLOCATED;
4417
4418 #ifdef MALLOC_STATS
4419                 arena->stats.ndalloc_large++;
4420                 arena->stats.allocated_large -= oldsize;
4421                 arena->stats.lstats[(oldsize >> PAGE_SHIFT) - 1].curruns--;
4422
4423                 arena->stats.nmalloc_large++;
4424                 arena->stats.allocated_large += size;
4425                 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].nrequests++;
4426                 arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns++;
4427                 if (arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns >
4428                     arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns) {
4429                         arena->stats.lstats[(size >> PAGE_SHIFT) - 1].highruns =
4430                             arena->stats.lstats[(size >> PAGE_SHIFT) - 1].curruns;
4431                 }
4432 #endif
4433                 malloc_spin_unlock(&arena->lock);
4434                 return (false);
4435         }
4436         malloc_spin_unlock(&arena->lock);
4437
4438         return (true);
4439 }
4440
4441 /*
4442  * Try to resize a large allocation, in order to avoid copying.  This will
4443  * always fail if growing an object, and the following run is already in use.
4444  */
4445 static bool
4446 arena_ralloc_large(void *ptr, size_t size, size_t oldsize)
4447 {
4448         size_t psize;
4449
4450         psize = PAGE_CEILING(size);
4451         if (psize == oldsize) {
4452                 /* Same size class. */
4453                 if (opt_junk && size < oldsize) {
4454                         memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize -
4455                             size);
4456                 }
4457                 return (false);
4458         } else {
4459                 arena_chunk_t *chunk;
4460                 arena_t *arena;
4461
4462                 chunk = (arena_chunk_t *)CHUNK_ADDR2BASE(ptr);
4463                 arena = chunk->arena;
4464                 assert(arena->magic == ARENA_MAGIC);
4465
4466                 if (psize < oldsize) {
4467                         /* Fill before shrinking in order avoid a race. */
4468                         if (opt_junk) {
4469                                 memset((void *)((uintptr_t)ptr + size), 0x5a,
4470                                     oldsize - size);
4471                         }
4472                         arena_ralloc_large_shrink(arena, chunk, ptr, psize,
4473                             oldsize);
4474                         return (false);
4475                 } else {
4476                         bool ret = arena_ralloc_large_grow(arena, chunk, ptr,
4477                             psize, oldsize);
4478                         if (ret == false && opt_zero) {
4479                                 memset((void *)((uintptr_t)ptr + oldsize), 0,
4480                                     size - oldsize);
4481                         }
4482                         return (ret);
4483                 }
4484         }
4485 }
4486
4487 static void *
4488 arena_ralloc(void *ptr, size_t size, size_t oldsize)
4489 {
4490         void *ret;
4491         size_t copysize;
4492
4493         /*
4494          * Try to avoid moving the allocation.
4495          *
4496          * posix_memalign() can cause allocation of "large" objects that are
4497          * smaller than bin_maxclass (in order to meet alignment requirements).
4498          * Therefore, do not assume that (oldsize <= bin_maxclass) indicates
4499          * ptr refers to a bin-allocated object.
4500          */
4501         if (oldsize <= arena_maxclass) {
4502                 if (arena_is_large(ptr) == false ) {
4503                         if (size <= small_maxclass) {
4504                                 if (oldsize <= small_maxclass &&
4505                                     small_size2bin[size] ==
4506                                     small_size2bin[oldsize])
4507                                         goto IN_PLACE;
4508                         } else if (size <= bin_maxclass) {
4509                                 if (small_maxclass < oldsize && oldsize <=
4510                                     bin_maxclass && MEDIUM_CEILING(size) ==
4511                                     MEDIUM_CEILING(oldsize))
4512                                         goto IN_PLACE;
4513                         }
4514                 } else {
4515                         assert(size <= arena_maxclass);
4516                         if (size > bin_maxclass) {
4517                                 if (arena_ralloc_large(ptr, size, oldsize) ==
4518                                     false)
4519                                         return (ptr);
4520                         }
4521                 }
4522         }
4523
4524         /* Try to avoid moving the allocation. */
4525         if (size <= small_maxclass) {
4526                 if (oldsize <= small_maxclass && small_size2bin[size] ==
4527                     small_size2bin[oldsize])
4528                         goto IN_PLACE;
4529         } else if (size <= bin_maxclass) {
4530                 if (small_maxclass < oldsize && oldsize <= bin_maxclass &&
4531                     MEDIUM_CEILING(size) == MEDIUM_CEILING(oldsize))
4532                         goto IN_PLACE;
4533         } else {
4534                 if (bin_maxclass < oldsize && oldsize <= arena_maxclass) {
4535                         assert(size > bin_maxclass);
4536                         if (arena_ralloc_large(ptr, size, oldsize) == false)
4537                                 return (ptr);
4538                 }
4539         }
4540
4541         /*
4542          * If we get here, then size and oldsize are different enough that we
4543          * need to move the object.  In that case, fall back to allocating new
4544          * space and copying.
4545          */
4546         ret = arena_malloc(size, false);
4547         if (ret == NULL)
4548                 return (NULL);
4549
4550         /* Junk/zero-filling were already done by arena_malloc(). */
4551         copysize = (size < oldsize) ? size : oldsize;
4552         memcpy(ret, ptr, copysize);
4553         idalloc(ptr);
4554         return (ret);
4555 IN_PLACE:
4556         if (opt_junk && size < oldsize)
4557                 memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize - size);
4558         else if (opt_zero && size > oldsize)
4559                 memset((void *)((uintptr_t)ptr + oldsize), 0, size - oldsize);
4560         return (ptr);
4561 }
4562
4563 static inline void *
4564 iralloc(void *ptr, size_t size)
4565 {
4566         size_t oldsize;
4567
4568         assert(ptr != NULL);
4569         assert(size != 0);
4570
4571         oldsize = isalloc(ptr);
4572
4573         if (size <= arena_maxclass)
4574                 return (arena_ralloc(ptr, size, oldsize));
4575         else
4576                 return (huge_ralloc(ptr, size, oldsize));
4577 }
4578
4579 static bool
4580 arena_new(arena_t *arena, unsigned ind)
4581 {
4582         unsigned i;
4583         arena_bin_t *bin;
4584         size_t prev_run_size;
4585
4586         if (malloc_spin_init(&arena->lock))
4587                 return (true);
4588
4589 #ifdef MALLOC_STATS
4590         memset(&arena->stats, 0, sizeof(arena_stats_t));
4591         arena->stats.lstats = (malloc_large_stats_t *)base_alloc(
4592             sizeof(malloc_large_stats_t) * ((chunksize - PAGE_SIZE) >>
4593                 PAGE_SHIFT));
4594         if (arena->stats.lstats == NULL)
4595                 return (true);
4596         memset(arena->stats.lstats, 0, sizeof(malloc_large_stats_t) *
4597             ((chunksize - PAGE_SIZE) >> PAGE_SHIFT));
4598 #  ifdef MALLOC_TCACHE
4599         ql_new(&arena->tcache_ql);
4600 #  endif
4601 #endif
4602
4603         /* Initialize chunks. */
4604         arena_chunk_tree_dirty_new(&arena->chunks_dirty);
4605         arena->spare = NULL;
4606
4607         arena->nactive = 0;
4608         arena->ndirty = 0;
4609
4610         arena_avail_tree_new(&arena->runs_avail);
4611
4612         /* Initialize bins. */
4613         prev_run_size = PAGE_SIZE;
4614
4615         i = 0;
4616 #ifdef MALLOC_TINY
4617         /* (2^n)-spaced tiny bins. */
4618         for (; i < ntbins; i++) {
4619                 bin = &arena->bins[i];
4620                 bin->runcur = NULL;
4621                 arena_run_tree_new(&bin->runs);
4622
4623                 bin->reg_size = (1U << (LG_TINY_MIN + i));
4624
4625                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4626
4627 #ifdef MALLOC_STATS
4628                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4629 #endif
4630         }
4631 #endif
4632
4633         /* Quantum-spaced bins. */
4634         for (; i < ntbins + nqbins; i++) {
4635                 bin = &arena->bins[i];
4636                 bin->runcur = NULL;
4637                 arena_run_tree_new(&bin->runs);
4638
4639                 bin->reg_size = (i - ntbins + 1) << LG_QUANTUM;
4640
4641                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4642
4643 #ifdef MALLOC_STATS
4644                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4645 #endif
4646         }
4647
4648         /* Cacheline-spaced bins. */
4649         for (; i < ntbins + nqbins + ncbins; i++) {
4650                 bin = &arena->bins[i];
4651                 bin->runcur = NULL;
4652                 arena_run_tree_new(&bin->runs);
4653
4654                 bin->reg_size = cspace_min + ((i - (ntbins + nqbins)) <<
4655                     LG_CACHELINE);
4656
4657                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4658
4659 #ifdef MALLOC_STATS
4660                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4661 #endif
4662         }
4663
4664         /* Subpage-spaced bins. */
4665         for (; i < ntbins + nqbins + ncbins + nsbins; i++) {
4666                 bin = &arena->bins[i];
4667                 bin->runcur = NULL;
4668                 arena_run_tree_new(&bin->runs);
4669
4670                 bin->reg_size = sspace_min + ((i - (ntbins + nqbins + ncbins))
4671                     << LG_SUBPAGE);
4672
4673                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4674
4675 #ifdef MALLOC_STATS
4676                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4677 #endif
4678         }
4679
4680         /* Medium bins. */
4681         for (; i < nbins; i++) {
4682                 bin = &arena->bins[i];
4683                 bin->runcur = NULL;
4684                 arena_run_tree_new(&bin->runs);
4685
4686                 bin->reg_size = medium_min + ((i - (ntbins + nqbins + ncbins +
4687                     nsbins)) << lg_mspace);
4688
4689                 prev_run_size = arena_bin_run_size_calc(bin, prev_run_size);
4690
4691 #ifdef MALLOC_STATS
4692                 memset(&bin->stats, 0, sizeof(malloc_bin_stats_t));
4693 #endif
4694         }
4695
4696 #ifdef MALLOC_DEBUG
4697         arena->magic = ARENA_MAGIC;
4698 #endif
4699
4700         return (false);
4701 }
4702
4703 /* Create a new arena and insert it into the arenas array at index ind. */
4704 static arena_t *
4705 arenas_extend(unsigned ind)
4706 {
4707         arena_t *ret;
4708
4709         /* Allocate enough space for trailing bins. */
4710         ret = (arena_t *)base_alloc(sizeof(arena_t)
4711             + (sizeof(arena_bin_t) * (nbins - 1)));
4712         if (ret != NULL && arena_new(ret, ind) == false) {
4713                 arenas[ind] = ret;
4714                 return (ret);
4715         }
4716         /* Only reached if there is an OOM error. */
4717
4718         /*
4719          * OOM here is quite inconvenient to propagate, since dealing with it
4720          * would require a check for failure in the fast path.  Instead, punt
4721          * by using arenas[0].  In practice, this is an extremely unlikely
4722          * failure.
4723          */
4724         _malloc_message(_getprogname(),
4725             ": (malloc) Error initializing arena\n", "", "");
4726         if (opt_abort)
4727                 abort();
4728
4729         return (arenas[0]);
4730 }
4731
4732 #ifdef MALLOC_TCACHE
4733 static tcache_bin_t *
4734 tcache_bin_create(arena_t *arena)
4735 {
4736         tcache_bin_t *ret;
4737         size_t tsize;
4738
4739         tsize = sizeof(tcache_bin_t) + (sizeof(void *) * (tcache_nslots - 1));
4740         if (tsize <= small_maxclass)
4741                 ret = (tcache_bin_t *)arena_malloc_small(arena, tsize, false);
4742         else if (tsize <= bin_maxclass)
4743                 ret = (tcache_bin_t *)arena_malloc_medium(arena, tsize, false);
4744         else
4745                 ret = (tcache_bin_t *)imalloc(tsize);
4746         if (ret == NULL)
4747                 return (NULL);
4748 #ifdef MALLOC_STATS
4749         memset(&ret->tstats, 0, sizeof(tcache_bin_stats_t));
4750 #endif
4751         ret->low_water = 0;
4752         ret->high_water = 0;
4753         ret->ncached = 0;
4754
4755         return (ret);
4756 }
4757
4758 static void
4759 tcache_bin_destroy(tcache_t *tcache, tcache_bin_t *tbin, unsigned binind)
4760 {
4761         arena_t *arena;
4762         arena_chunk_t *chunk;
4763         size_t pageind, tsize;
4764         arena_chunk_map_t *mapelm;
4765
4766         chunk = CHUNK_ADDR2BASE(tbin);
4767         arena = chunk->arena;
4768         pageind = (((uintptr_t)tbin - (uintptr_t)chunk) >> PAGE_SHIFT);
4769         mapelm = &chunk->map[pageind];
4770
4771 #ifdef MALLOC_STATS
4772         if (tbin->tstats.nrequests != 0) {
4773                 arena_t *arena = tcache->arena;
4774                 arena_bin_t *bin = &arena->bins[binind];
4775                 malloc_spin_lock(&arena->lock);
4776                 bin->stats.nrequests += tbin->tstats.nrequests;
4777                 if (bin->reg_size <= small_maxclass)
4778                         arena->stats.nmalloc_small += tbin->tstats.nrequests;
4779                 else
4780                         arena->stats.nmalloc_medium += tbin->tstats.nrequests;
4781                 malloc_spin_unlock(&arena->lock);
4782         }
4783 #endif
4784
4785         assert(tbin->ncached == 0);
4786         tsize = sizeof(tcache_bin_t) + (sizeof(void *) * (tcache_nslots - 1));
4787         if (tsize <= bin_maxclass) {
4788                 malloc_spin_lock(&arena->lock);
4789                 arena_dalloc_bin(arena, chunk, tbin, mapelm);
4790                 malloc_spin_unlock(&arena->lock);
4791         } else
4792                 idalloc(tbin);
4793 }
4794
4795 #ifdef MALLOC_STATS
4796 static void
4797 tcache_stats_merge(tcache_t *tcache, arena_t *arena)
4798 {
4799         unsigned i;
4800
4801         /* Merge and reset tcache stats. */
4802         for (i = 0; i < mbin0; i++) {
4803                 arena_bin_t *bin = &arena->bins[i];
4804                 tcache_bin_t *tbin = tcache->tbins[i];
4805                 if (tbin != NULL) {
4806                         bin->stats.nrequests += tbin->tstats.nrequests;
4807                         arena->stats.nmalloc_small += tbin->tstats.nrequests;
4808                         tbin->tstats.nrequests = 0;
4809                 }
4810         }
4811         for (; i < nbins; i++) {
4812                 arena_bin_t *bin = &arena->bins[i];
4813                 tcache_bin_t *tbin = tcache->tbins[i];
4814                 if (tbin != NULL) {
4815                         bin->stats.nrequests += tbin->tstats.nrequests;
4816                         arena->stats.nmalloc_medium += tbin->tstats.nrequests;
4817                         tbin->tstats.nrequests = 0;
4818                 }
4819         }
4820 }
4821 #endif
4822
4823 static tcache_t *
4824 tcache_create(arena_t *arena)
4825 {
4826         tcache_t *tcache;
4827
4828         if (sizeof(tcache_t) + (sizeof(tcache_bin_t *) * (nbins - 1)) <=
4829             small_maxclass) {
4830                 tcache = (tcache_t *)arena_malloc_small(arena, sizeof(tcache_t)
4831                     + (sizeof(tcache_bin_t *) * (nbins - 1)), true);
4832         } else if (sizeof(tcache_t) + (sizeof(tcache_bin_t *) * (nbins - 1)) <=
4833             bin_maxclass) {
4834                 tcache = (tcache_t *)arena_malloc_medium(arena, sizeof(tcache_t)
4835                     + (sizeof(tcache_bin_t *) * (nbins - 1)), true);
4836         } else {
4837                 tcache = (tcache_t *)icalloc(sizeof(tcache_t) +
4838                     (sizeof(tcache_bin_t *) * (nbins - 1)));
4839         }
4840
4841         if (tcache == NULL)
4842                 return (NULL);
4843
4844 #ifdef MALLOC_STATS
4845         /* Link into list of extant tcaches. */
4846         malloc_spin_lock(&arena->lock);
4847         ql_elm_new(tcache, link);
4848         ql_tail_insert(&arena->tcache_ql, tcache, link);
4849         malloc_spin_unlock(&arena->lock);
4850 #endif
4851
4852         tcache->arena = arena;
4853
4854         tcache_tls = tcache;
4855
4856         return (tcache);
4857 }
4858
4859 static void
4860 tcache_destroy(tcache_t *tcache)
4861 {
4862         unsigned i;
4863
4864 #ifdef MALLOC_STATS
4865         /* Unlink from list of extant tcaches. */
4866         malloc_spin_lock(&tcache->arena->lock);
4867         ql_remove(&tcache->arena->tcache_ql, tcache, link);
4868         tcache_stats_merge(tcache, tcache->arena);
4869         malloc_spin_unlock(&tcache->arena->lock);
4870 #endif
4871
4872         for (i = 0; i < nbins; i++) {
4873                 tcache_bin_t *tbin = tcache->tbins[i];
4874                 if (tbin != NULL) {
4875                         tcache_bin_flush(tbin, i, 0);
4876                         tcache_bin_destroy(tcache, tbin, i);
4877                 }
4878         }
4879
4880         if (arena_salloc(tcache) <= bin_maxclass) {
4881                 arena_chunk_t *chunk = CHUNK_ADDR2BASE(tcache);
4882                 arena_t *arena = chunk->arena;
4883                 size_t pageind = (((uintptr_t)tcache - (uintptr_t)chunk) >>
4884                     PAGE_SHIFT);
4885                 arena_chunk_map_t *mapelm = &chunk->map[pageind];
4886
4887                 malloc_spin_lock(&arena->lock);
4888                 arena_dalloc_bin(arena, chunk, tcache, mapelm);
4889                 malloc_spin_unlock(&arena->lock);
4890         } else
4891                 idalloc(tcache);
4892 }
4893 #endif
4894
4895 /*
4896  * End arena.
4897  */
4898 /******************************************************************************/
4899 /*
4900  * Begin general internal functions.
4901  */
4902
4903 static void *
4904 huge_malloc(size_t size, bool zero)
4905 {
4906         void *ret;
4907         size_t csize;
4908         extent_node_t *node;
4909
4910         /* Allocate one or more contiguous chunks for this request. */
4911
4912         csize = CHUNK_CEILING(size);
4913         if (csize == 0) {
4914                 /* size is large enough to cause size_t wrap-around. */
4915                 return (NULL);
4916         }
4917
4918         /* Allocate an extent node with which to track the chunk. */
4919         node = base_node_alloc();
4920         if (node == NULL)
4921                 return (NULL);
4922
4923         ret = chunk_alloc(csize, &zero);
4924         if (ret == NULL) {
4925                 base_node_dealloc(node);
4926                 return (NULL);
4927         }
4928
4929         /* Insert node into huge. */
4930         node->addr = ret;
4931         node->size = csize;
4932
4933         malloc_mutex_lock(&huge_mtx);
4934         extent_tree_ad_insert(&huge, node);
4935 #ifdef MALLOC_STATS
4936         huge_nmalloc++;
4937         huge_allocated += csize;
4938 #endif
4939         malloc_mutex_unlock(&huge_mtx);
4940
4941         if (zero == false) {
4942                 if (opt_junk)
4943                         memset(ret, 0xa5, csize);
4944                 else if (opt_zero)
4945                         memset(ret, 0, csize);
4946         }
4947
4948         return (ret);
4949 }
4950
4951 /* Only handles large allocations that require more than chunk alignment. */
4952 static void *
4953 huge_palloc(size_t alignment, size_t size)
4954 {
4955         void *ret;
4956         size_t alloc_size, chunk_size, offset;
4957         extent_node_t *node;
4958         bool zero;
4959
4960         /*
4961          * This allocation requires alignment that is even larger than chunk
4962          * alignment.  This means that huge_malloc() isn't good enough.
4963          *
4964          * Allocate almost twice as many chunks as are demanded by the size or
4965          * alignment, in order to assure the alignment can be achieved, then
4966          * unmap leading and trailing chunks.
4967          */
4968         assert(alignment >= chunksize);
4969
4970         chunk_size = CHUNK_CEILING(size);
4971
4972         if (size >= alignment)
4973                 alloc_size = chunk_size + alignment - chunksize;
4974         else
4975                 alloc_size = (alignment << 1) - chunksize;
4976
4977         /* Allocate an extent node with which to track the chunk. */
4978         node = base_node_alloc();
4979         if (node == NULL)
4980                 return (NULL);
4981
4982         zero = false;
4983         ret = chunk_alloc(alloc_size, &zero);
4984         if (ret == NULL) {
4985                 base_node_dealloc(node);
4986                 return (NULL);
4987         }
4988
4989         offset = (uintptr_t)ret & (alignment - 1);
4990         assert((offset & chunksize_mask) == 0);
4991         assert(offset < alloc_size);
4992         if (offset == 0) {
4993                 /* Trim trailing space. */
4994                 chunk_dealloc((void *)((uintptr_t)ret + chunk_size), alloc_size
4995                     - chunk_size);
4996         } else {
4997                 size_t trailsize;
4998
4999                 /* Trim leading space. */
5000                 chunk_dealloc(ret, alignment - offset);
5001
5002                 ret = (void *)((uintptr_t)ret + (alignment - offset));
5003
5004                 trailsize = alloc_size - (alignment - offset) - chunk_size;
5005                 if (trailsize != 0) {
5006                     /* Trim trailing space. */
5007                     assert(trailsize < alloc_size);
5008                     chunk_dealloc((void *)((uintptr_t)ret + chunk_size),
5009                         trailsize);
5010                 }
5011         }
5012
5013         /* Insert node into huge. */
5014         node->addr = ret;
5015         node->size = chunk_size;
5016
5017         malloc_mutex_lock(&huge_mtx);
5018         extent_tree_ad_insert(&huge, node);
5019 #ifdef MALLOC_STATS
5020         huge_nmalloc++;
5021         huge_allocated += chunk_size;
5022 #endif
5023         malloc_mutex_unlock(&huge_mtx);
5024
5025         if (opt_junk)
5026                 memset(ret, 0xa5, chunk_size);
5027         else if (opt_zero)
5028                 memset(ret, 0, chunk_size);
5029
5030         return (ret);
5031 }
5032
5033 static void *
5034 huge_ralloc(void *ptr, size_t size, size_t oldsize)
5035 {
5036         void *ret;
5037         size_t copysize;
5038
5039         /* Avoid moving the allocation if the size class would not change. */
5040         if (oldsize > arena_maxclass &&
5041             CHUNK_CEILING(size) == CHUNK_CEILING(oldsize)) {
5042                 if (opt_junk && size < oldsize) {
5043                         memset((void *)((uintptr_t)ptr + size), 0x5a, oldsize
5044                             - size);
5045                 } else if (opt_zero && size > oldsize) {
5046                         memset((void *)((uintptr_t)ptr + oldsize), 0, size
5047                             - oldsize);
5048                 }
5049                 return (ptr);
5050         }
5051
5052         /*
5053          * If we get here, then size and oldsize are different enough that we
5054          * need to use a different size class.  In that case, fall back to
5055          * allocating new space and copying.
5056          */
5057         ret = huge_malloc(size, false);
5058         if (ret == NULL)
5059                 return (NULL);
5060
5061         copysize = (size < oldsize) ? size : oldsize;
5062         memcpy(ret, ptr, copysize);
5063         idalloc(ptr);
5064         return (ret);
5065 }
5066
5067 static void
5068 huge_dalloc(void *ptr)
5069 {
5070         extent_node_t *node, key;
5071
5072         malloc_mutex_lock(&huge_mtx);
5073
5074         /* Extract from tree of huge allocations. */
5075         key.addr = ptr;
5076         node = extent_tree_ad_search(&huge, &key);
5077         assert(node != NULL);
5078         assert(node->addr == ptr);
5079         extent_tree_ad_remove(&huge, node);
5080
5081 #ifdef MALLOC_STATS
5082         huge_ndalloc++;
5083         huge_allocated -= node->size;
5084 #endif
5085
5086         malloc_mutex_unlock(&huge_mtx);
5087
5088         /* Unmap chunk. */
5089 #ifdef MALLOC_DSS
5090         if (opt_dss && opt_junk)
5091                 memset(node->addr, 0x5a, node->size);
5092 #endif
5093         chunk_dealloc(node->addr, node->size);
5094
5095         base_node_dealloc(node);
5096 }
5097
5098 static void
5099 malloc_stats_print(void)
5100 {
5101         char s[UMAX2S_BUFSIZE];
5102
5103         _malloc_message("___ Begin malloc statistics ___\n", "", "", "");
5104         _malloc_message("Assertions ",
5105 #ifdef NDEBUG
5106             "disabled",
5107 #else
5108             "enabled",
5109 #endif
5110             "\n", "");
5111         _malloc_message("Boolean MALLOC_OPTIONS: ", opt_abort ? "A" : "a", "", "");
5112 #ifdef MALLOC_DSS
5113         _malloc_message(opt_dss ? "D" : "d", "", "", "");
5114 #endif
5115         _malloc_message(opt_junk ? "J" : "j", "", "", "");
5116 #ifdef MALLOC_DSS
5117         _malloc_message(opt_mmap ? "M" : "m", "", "", "");
5118 #endif
5119         _malloc_message("P", "", "", "");
5120         _malloc_message(opt_utrace ? "U" : "u", "", "", "");
5121         _malloc_message(opt_sysv ? "V" : "v", "", "", "");
5122         _malloc_message(opt_xmalloc ? "X" : "x", "", "", "");
5123         _malloc_message(opt_zero ? "Z" : "z", "", "", "");
5124         _malloc_message("\n", "", "", "");
5125
5126         _malloc_message("CPUs: ", umax2s(ncpus, 10, s), "\n", "");
5127         _malloc_message("Max arenas: ", umax2s(narenas, 10, s), "\n", "");
5128         _malloc_message("Pointer size: ", umax2s(sizeof(void *), 10, s), "\n", "");
5129         _malloc_message("Quantum size: ", umax2s(QUANTUM, 10, s), "\n", "");
5130         _malloc_message("Cacheline size (assumed): ",
5131             umax2s(CACHELINE, 10, s), "\n", "");
5132         _malloc_message("Subpage spacing: ", umax2s(SUBPAGE, 10, s), "\n", "");
5133         _malloc_message("Medium spacing: ", umax2s((1U << lg_mspace), 10, s), "\n",
5134             "");
5135 #ifdef MALLOC_TINY
5136         _malloc_message("Tiny 2^n-spaced sizes: [", umax2s((1U << LG_TINY_MIN), 10,
5137             s), "..", "");
5138         _malloc_message(umax2s((qspace_min >> 1), 10, s), "]\n", "", "");
5139 #endif
5140         _malloc_message("Quantum-spaced sizes: [", umax2s(qspace_min, 10, s), "..",
5141             "");
5142         _malloc_message(umax2s(qspace_max, 10, s), "]\n", "", "");
5143         _malloc_message("Cacheline-spaced sizes: [",
5144             umax2s(cspace_min, 10, s), "..", "");
5145         _malloc_message(umax2s(cspace_max, 10, s), "]\n", "", "");
5146         _malloc_message("Subpage-spaced sizes: [", umax2s(sspace_min, 10, s), "..",
5147             "");
5148         _malloc_message(umax2s(sspace_max, 10, s), "]\n", "", "");
5149         _malloc_message("Medium sizes: [", umax2s(medium_min, 10, s), "..", "");
5150         _malloc_message(umax2s(medium_max, 10, s), "]\n", "", "");
5151         if (opt_lg_dirty_mult >= 0) {
5152                 _malloc_message("Min active:dirty page ratio per arena: ",
5153                     umax2s((1U << opt_lg_dirty_mult), 10, s), ":1\n", "");
5154         } else {
5155                 _malloc_message("Min active:dirty page ratio per arena: N/A\n", "",
5156                     "", "");
5157         }
5158 #ifdef MALLOC_TCACHE
5159         _malloc_message("Thread cache slots per size class: ",
5160             tcache_nslots ? umax2s(tcache_nslots, 10, s) : "N/A", "\n", "");
5161         _malloc_message("Thread cache GC sweep interval: ",
5162             (tcache_nslots && tcache_gc_incr > 0) ?
5163             umax2s((1U << opt_lg_tcache_gc_sweep), 10, s) : "N/A", "", "");
5164         _malloc_message(" (increment interval: ",
5165             (tcache_nslots && tcache_gc_incr > 0) ?  umax2s(tcache_gc_incr, 10, s)
5166             : "N/A", ")\n", "");
5167 #endif
5168         _malloc_message("Chunk size: ", umax2s(chunksize, 10, s), "", "");
5169         _malloc_message(" (2^", umax2s(opt_lg_chunk, 10, s), ")\n", "");
5170
5171 #ifdef MALLOC_STATS
5172         {
5173                 size_t allocated, mapped;
5174                 unsigned i;
5175                 arena_t *arena;
5176
5177                 /* Calculate and print allocated/mapped stats. */
5178
5179                 /* arenas. */
5180                 for (i = 0, allocated = 0; i < narenas; i++) {
5181                         if (arenas[i] != NULL) {
5182                                 malloc_spin_lock(&arenas[i]->lock);
5183                                 allocated += arenas[i]->stats.allocated_small;
5184                                 allocated += arenas[i]->stats.allocated_large;
5185                                 malloc_spin_unlock(&arenas[i]->lock);
5186                         }
5187                 }
5188
5189                 /* huge/base. */
5190                 malloc_mutex_lock(&huge_mtx);
5191                 allocated += huge_allocated;
5192                 mapped = stats_chunks.curchunks * chunksize;
5193                 malloc_mutex_unlock(&huge_mtx);
5194
5195                 malloc_mutex_lock(&base_mtx);
5196                 mapped += base_mapped;
5197                 malloc_mutex_unlock(&base_mtx);
5198
5199                 malloc_printf("Allocated: %zu, mapped: %zu\n", allocated,
5200                     mapped);
5201
5202                 /* Print chunk stats. */
5203                 {
5204                         chunk_stats_t chunks_stats;
5205
5206                         malloc_mutex_lock(&huge_mtx);
5207                         chunks_stats = stats_chunks;
5208                         malloc_mutex_unlock(&huge_mtx);
5209
5210                         malloc_printf("chunks: nchunks   "
5211                             "highchunks    curchunks\n");
5212                         malloc_printf("  %13"PRIu64"%13zu%13zu\n",
5213                             chunks_stats.nchunks, chunks_stats.highchunks,
5214                             chunks_stats.curchunks);
5215                 }
5216
5217                 /* Print chunk stats. */
5218                 malloc_printf(
5219                     "huge: nmalloc      ndalloc    allocated\n");
5220                 malloc_printf(" %12"PRIu64" %12"PRIu64" %12zu\n", huge_nmalloc,
5221                     huge_ndalloc, huge_allocated);
5222
5223                 /* Print stats for each arena. */
5224                 for (i = 0; i < narenas; i++) {
5225                         arena = arenas[i];
5226                         if (arena != NULL) {
5227                                 malloc_printf("\narenas[%u]:\n", i);
5228                                 malloc_spin_lock(&arena->lock);
5229                                 arena_stats_print(arena);
5230                                 malloc_spin_unlock(&arena->lock);
5231                         }
5232                 }
5233         }
5234 #endif /* #ifdef MALLOC_STATS */
5235         _malloc_message("--- End malloc statistics ---\n", "", "", "");
5236 }
5237
5238 #ifdef MALLOC_DEBUG
5239 static void
5240 small_size2bin_validate(void)
5241 {
5242         size_t i, size, binind;
5243
5244         assert(small_size2bin[0] == 0xffU);
5245         i = 1;
5246 #  ifdef MALLOC_TINY
5247         /* Tiny. */
5248         for (; i < (1U << LG_TINY_MIN); i++) {
5249                 size = pow2_ceil(1U << LG_TINY_MIN);
5250                 binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
5251                 assert(small_size2bin[i] == binind);
5252         }
5253         for (; i < qspace_min; i++) {
5254                 size = pow2_ceil(i);
5255                 binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
5256                 assert(small_size2bin[i] == binind);
5257         }
5258 #  endif
5259         /* Quantum-spaced. */
5260         for (; i <= qspace_max; i++) {
5261                 size = QUANTUM_CEILING(i);
5262                 binind = ntbins + (size >> LG_QUANTUM) - 1;
5263                 assert(small_size2bin[i] == binind);
5264         }
5265         /* Cacheline-spaced. */
5266         for (; i <= cspace_max; i++) {
5267                 size = CACHELINE_CEILING(i);
5268                 binind = ntbins + nqbins + ((size - cspace_min) >>
5269                     LG_CACHELINE);
5270                 assert(small_size2bin[i] == binind);
5271         }
5272         /* Sub-page. */
5273         for (; i <= sspace_max; i++) {
5274                 size = SUBPAGE_CEILING(i);
5275                 binind = ntbins + nqbins + ncbins + ((size - sspace_min)
5276                     >> LG_SUBPAGE);
5277                 assert(small_size2bin[i] == binind);
5278         }
5279 }
5280 #endif
5281
5282 static bool
5283 small_size2bin_init(void)
5284 {
5285
5286         if (opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT
5287             || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT
5288             || sizeof(const_small_size2bin) != small_maxclass + 1)
5289                 return (small_size2bin_init_hard());
5290
5291         small_size2bin = const_small_size2bin;
5292 #ifdef MALLOC_DEBUG
5293         assert(sizeof(const_small_size2bin) == small_maxclass + 1);
5294         small_size2bin_validate();
5295 #endif
5296         return (false);
5297 }
5298
5299 static bool
5300 small_size2bin_init_hard(void)
5301 {
5302         size_t i, size, binind;
5303         uint8_t *custom_small_size2bin;
5304
5305         assert(opt_lg_qspace_max != LG_QSPACE_MAX_DEFAULT
5306             || opt_lg_cspace_max != LG_CSPACE_MAX_DEFAULT
5307             || sizeof(const_small_size2bin) != small_maxclass + 1);
5308
5309         custom_small_size2bin = (uint8_t *)base_alloc(small_maxclass + 1);
5310         if (custom_small_size2bin == NULL)
5311                 return (true);
5312
5313         custom_small_size2bin[0] = 0xffU;
5314         i = 1;
5315 #ifdef MALLOC_TINY
5316         /* Tiny. */
5317         for (; i < (1U << LG_TINY_MIN); i++) {
5318                 size = pow2_ceil(1U << LG_TINY_MIN);
5319                 binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
5320                 custom_small_size2bin[i] = binind;
5321         }
5322         for (; i < qspace_min; i++) {
5323                 size = pow2_ceil(i);
5324                 binind = ffs((int)(size >> (LG_TINY_MIN + 1)));
5325                 custom_small_size2bin[i] = binind;
5326         }
5327 #endif
5328         /* Quantum-spaced. */
5329         for (; i <= qspace_max; i++) {
5330                 size = QUANTUM_CEILING(i);
5331                 binind = ntbins + (size >> LG_QUANTUM) - 1;
5332                 custom_small_size2bin[i] = binind;
5333         }
5334         /* Cacheline-spaced. */
5335         for (; i <= cspace_max; i++) {
5336                 size = CACHELINE_CEILING(i);
5337                 binind = ntbins + nqbins + ((size - cspace_min) >>
5338                     LG_CACHELINE);
5339                 custom_small_size2bin[i] = binind;
5340         }
5341         /* Sub-page. */
5342         for (; i <= sspace_max; i++) {
5343                 size = SUBPAGE_CEILING(i);
5344                 binind = ntbins + nqbins + ncbins + ((size - sspace_min) >>
5345                     LG_SUBPAGE);
5346                 custom_small_size2bin[i] = binind;
5347         }
5348
5349         small_size2bin = custom_small_size2bin;
5350 #ifdef MALLOC_DEBUG
5351         small_size2bin_validate();
5352 #endif
5353         return (false);
5354 }
5355
5356 static unsigned
5357 malloc_ncpus(void)
5358 {
5359         int mib[2];
5360         unsigned ret;
5361         int error;
5362         size_t len;
5363
5364         error = _elf_aux_info(AT_NCPUS, &ret, sizeof(ret));
5365         if (error != 0 || ret == 0) {
5366                 mib[0] = CTL_HW;
5367                 mib[1] = HW_NCPU;
5368                 len = sizeof(ret);
5369                 if (sysctl(mib, 2, &ret, &len, (void *)NULL, 0) == -1) {
5370                         /* Error. */
5371                         ret = 1;
5372                 }
5373         }
5374
5375         return (ret);
5376 }
5377
5378 /*
5379  * FreeBSD's pthreads implementation calls malloc(3), so the malloc
5380  * implementation has to take pains to avoid infinite recursion during
5381  * initialization.
5382  */
5383 static inline bool
5384 malloc_init(void)
5385 {
5386
5387         if (malloc_initialized == false)
5388                 return (malloc_init_hard());
5389
5390         return (false);
5391 }
5392
5393 static bool
5394 malloc_init_hard(void)
5395 {
5396         unsigned i;
5397         int linklen;
5398         char buf[PATH_MAX + 1];
5399         const char *opts;
5400
5401         malloc_mutex_lock(&init_lock);
5402         if (malloc_initialized) {
5403                 /*
5404                  * Another thread initialized the allocator before this one
5405                  * acquired init_lock.
5406                  */
5407                 malloc_mutex_unlock(&init_lock);
5408                 return (false);
5409         }
5410
5411         /* Get number of CPUs. */
5412         ncpus = malloc_ncpus();
5413
5414         /*
5415          * Increase the chunk size to the largest page size that is greater
5416          * than the default chunk size and less than or equal to 4MB.
5417          */
5418         {
5419                 size_t pagesizes[MAXPAGESIZES];
5420                 int k, nsizes;
5421
5422                 nsizes = getpagesizes(pagesizes, MAXPAGESIZES);
5423                 for (k = 0; k < nsizes; k++)
5424                         if (pagesizes[k] <= (1LU << 22))
5425                                 while ((1LU << opt_lg_chunk) < pagesizes[k])
5426                                         opt_lg_chunk++;
5427         }
5428
5429         for (i = 0; i < 3; i++) {
5430                 unsigned j;
5431
5432                 /* Get runtime configuration. */
5433                 switch (i) {
5434                 case 0:
5435                         if ((linklen = readlink("/etc/malloc.conf", buf,
5436                                                 sizeof(buf) - 1)) != -1) {
5437                                 /*
5438                                  * Use the contents of the "/etc/malloc.conf"
5439                                  * symbolic link's name.
5440                                  */
5441                                 buf[linklen] = '\0';
5442                                 opts = buf;
5443                         } else {
5444                                 /* No configuration specified. */
5445                                 buf[0] = '\0';
5446                                 opts = buf;
5447                         }
5448                         break;
5449                 case 1:
5450                         if (issetugid() == 0 && (opts =
5451                             getenv("MALLOC_OPTIONS")) != NULL) {
5452                                 /*
5453                                  * Do nothing; opts is already initialized to
5454                                  * the value of the MALLOC_OPTIONS environment
5455                                  * variable.
5456                                  */
5457                         } else {
5458                                 /* No configuration specified. */
5459                                 buf[0] = '\0';
5460                                 opts = buf;
5461                         }
5462                         break;
5463                 case 2:
5464                         if (_malloc_options != NULL) {
5465                                 /*
5466                                  * Use options that were compiled into the
5467                                  * program.
5468                                  */
5469                                 opts = _malloc_options;
5470                         } else {
5471                                 /* No configuration specified. */
5472                                 buf[0] = '\0';
5473                                 opts = buf;
5474                         }
5475                         break;
5476                 default:
5477                         /* NOTREACHED */
5478                         assert(false);
5479                         buf[0] = '\0';
5480                         opts = buf;
5481                 }
5482
5483                 for (j = 0; opts[j] != '\0'; j++) {
5484                         unsigned k, nreps;
5485                         bool nseen;
5486
5487                         /* Parse repetition count, if any. */
5488                         for (nreps = 0, nseen = false;; j++, nseen = true) {
5489                                 switch (opts[j]) {
5490                                         case '0': case '1': case '2': case '3':
5491                                         case '4': case '5': case '6': case '7':
5492                                         case '8': case '9':
5493                                                 nreps *= 10;
5494                                                 nreps += opts[j] - '0';
5495                                                 break;
5496                                         default:
5497                                                 goto MALLOC_OUT;
5498                                 }
5499                         }
5500 MALLOC_OUT:
5501                         if (nseen == false)
5502                                 nreps = 1;
5503
5504                         for (k = 0; k < nreps; k++) {
5505                                 switch (opts[j]) {
5506                                 case 'a':
5507                                         opt_abort = false;
5508                                         break;
5509                                 case 'A':
5510                                         opt_abort = true;
5511                                         break;
5512                                 case 'c':
5513                                         if (opt_lg_cspace_max - 1 >
5514                                             opt_lg_qspace_max &&
5515                                             opt_lg_cspace_max >
5516                                             LG_CACHELINE)
5517                                                 opt_lg_cspace_max--;
5518                                         break;
5519                                 case 'C':
5520                                         if (opt_lg_cspace_max < PAGE_SHIFT
5521                                             - 1)
5522                                                 opt_lg_cspace_max++;
5523                                         break;
5524                                 case 'd':
5525 #ifdef MALLOC_DSS
5526                                         opt_dss = false;
5527 #endif
5528                                         break;
5529                                 case 'D':
5530 #ifdef MALLOC_DSS
5531                                         opt_dss = true;
5532 #endif
5533                                         break;
5534                                 case 'e':
5535                                         if (opt_lg_medium_max > PAGE_SHIFT)
5536                                                 opt_lg_medium_max--;
5537                                         break;
5538                                 case 'E':
5539                                         if (opt_lg_medium_max + 1 <
5540                                             opt_lg_chunk)
5541                                                 opt_lg_medium_max++;
5542                                         break;
5543                                 case 'f':
5544                                         if (opt_lg_dirty_mult + 1 <
5545                                             (sizeof(size_t) << 3))
5546                                                 opt_lg_dirty_mult++;
5547                                         break;
5548                                 case 'F':
5549                                         if (opt_lg_dirty_mult >= 0)
5550                                                 opt_lg_dirty_mult--;
5551                                         break;
5552 #ifdef MALLOC_TCACHE
5553                                 case 'g':
5554                                         if (opt_lg_tcache_gc_sweep >= 0)
5555                                                 opt_lg_tcache_gc_sweep--;
5556                                         break;
5557                                 case 'G':
5558                                         if (opt_lg_tcache_gc_sweep + 1 <
5559                                             (sizeof(size_t) << 3))
5560                                                 opt_lg_tcache_gc_sweep++;
5561                                         break;
5562                                 case 'h':
5563                                         if (opt_lg_tcache_nslots > 0)
5564                                                 opt_lg_tcache_nslots--;
5565                                         break;
5566                                 case 'H':
5567                                         if (opt_lg_tcache_nslots + 1 <
5568                                             (sizeof(size_t) << 3))
5569                                                 opt_lg_tcache_nslots++;
5570                                         break;
5571 #endif
5572                                 case 'j':
5573                                         opt_junk = false;
5574                                         break;
5575                                 case 'J':
5576                                         opt_junk = true;
5577                                         break;
5578                                 case 'k':
5579                                         /*
5580                                          * Chunks always require at least one
5581                                          * header page, plus enough room to
5582                                          * hold a run for the largest medium
5583                                          * size class (one page more than the
5584                                          * size).
5585                                          */
5586                                         if ((1U << (opt_lg_chunk - 1)) >=
5587                                             (2U << PAGE_SHIFT) + (1U <<
5588                                             opt_lg_medium_max))
5589                                                 opt_lg_chunk--;
5590                                         break;
5591                                 case 'K':
5592                                         if (opt_lg_chunk + 1 <
5593                                             (sizeof(size_t) << 3))
5594                                                 opt_lg_chunk++;
5595                                         break;
5596                                 case 'm':
5597 #ifdef MALLOC_DSS
5598                                         opt_mmap = false;
5599 #endif
5600                                         break;
5601                                 case 'M':
5602 #ifdef MALLOC_DSS
5603                                         opt_mmap = true;
5604 #endif
5605                                         break;
5606                                 case 'n':
5607                                         opt_narenas_lshift--;
5608                                         break;
5609                                 case 'N':
5610                                         opt_narenas_lshift++;
5611                                         break;
5612                                 case 'p':
5613                                         opt_stats_print = false;
5614                                         break;
5615                                 case 'P':
5616                                         opt_stats_print = true;
5617                                         break;
5618                                 case 'q':
5619                                         if (opt_lg_qspace_max > LG_QUANTUM)
5620                                                 opt_lg_qspace_max--;
5621                                         break;
5622                                 case 'Q':
5623                                         if (opt_lg_qspace_max + 1 <
5624                                             opt_lg_cspace_max)
5625                                                 opt_lg_qspace_max++;
5626                                         break;
5627                                 case 'u':
5628                                         opt_utrace = false;
5629                                         break;
5630                                 case 'U':
5631                                         opt_utrace = true;
5632                                         break;
5633                                 case 'v':
5634                                         opt_sysv = false;
5635                                         break;
5636                                 case 'V':
5637                                         opt_sysv = true;
5638                                         break;
5639                                 case 'x':
5640                                         opt_xmalloc = false;
5641                                         break;
5642                                 case 'X':
5643                                         opt_xmalloc = true;
5644                                         break;
5645                                 case 'z':
5646                                         opt_zero = false;
5647                                         break;
5648                                 case 'Z':
5649                                         opt_zero = true;
5650                                         break;
5651                                 default: {
5652                                         char cbuf[2];
5653
5654                                         cbuf[0] = opts[j];
5655                                         cbuf[1] = '\0';
5656                                         _malloc_message(_getprogname(),
5657                                             ": (malloc) Unsupported character "
5658                                             "in malloc options: '", cbuf,
5659                                             "'\n");
5660                                 }
5661                                 }
5662                         }
5663                 }
5664         }
5665
5666 #ifdef MALLOC_DSS
5667         /* Make sure that there is some method for acquiring memory. */
5668         if (opt_dss == false && opt_mmap == false)
5669                 opt_mmap = true;
5670 #endif
5671         if (opt_stats_print) {
5672                 /* Print statistics at exit. */
5673                 atexit(stats_print_atexit);
5674         }
5675
5676
5677         /* Set variables according to the value of opt_lg_[qc]space_max. */
5678         qspace_max = (1U << opt_lg_qspace_max);
5679         cspace_min = CACHELINE_CEILING(qspace_max);
5680         if (cspace_min == qspace_max)
5681                 cspace_min += CACHELINE;
5682         cspace_max = (1U << opt_lg_cspace_max);
5683         sspace_min = SUBPAGE_CEILING(cspace_max);
5684         if (sspace_min == cspace_max)
5685                 sspace_min += SUBPAGE;
5686         assert(sspace_min < PAGE_SIZE);
5687         sspace_max = PAGE_SIZE - SUBPAGE;
5688         medium_max = (1U << opt_lg_medium_max);
5689
5690 #ifdef MALLOC_TINY
5691         assert(LG_QUANTUM >= LG_TINY_MIN);
5692 #endif
5693         assert(ntbins <= LG_QUANTUM);
5694         nqbins = qspace_max >> LG_QUANTUM;
5695         ncbins = ((cspace_max - cspace_min) >> LG_CACHELINE) + 1;
5696         nsbins = ((sspace_max - sspace_min) >> LG_SUBPAGE) + 1;
5697
5698         /*
5699          * Compute medium size class spacing and the number of medium size
5700          * classes.  Limit spacing to no more than pagesize, but if possible
5701          * use the smallest spacing that does not exceed NMBINS_MAX medium size
5702          * classes.
5703          */
5704         lg_mspace = LG_SUBPAGE;
5705         nmbins = ((medium_max - medium_min) >> lg_mspace) + 1;
5706         while (lg_mspace < PAGE_SHIFT && nmbins > NMBINS_MAX) {
5707                 lg_mspace = lg_mspace + 1;
5708                 nmbins = ((medium_max - medium_min) >> lg_mspace) + 1;
5709         }
5710         mspace_mask = (1U << lg_mspace) - 1U;
5711
5712         mbin0 = ntbins + nqbins + ncbins + nsbins;
5713         nbins = mbin0 + nmbins;
5714         /*
5715          * The small_size2bin lookup table uses uint8_t to encode each bin
5716          * index, so we cannot support more than 256 small size classes.  This
5717          * limit is difficult to exceed (not even possible with 16B quantum and
5718          * 4KiB pages), and such configurations are impractical, but
5719          * nonetheless we need to protect against this case in order to avoid
5720          * undefined behavior.
5721          */
5722         if (mbin0 > 256) {
5723             char line_buf[UMAX2S_BUFSIZE];
5724             _malloc_message(_getprogname(),
5725                 ": (malloc) Too many small size classes (",
5726                 umax2s(mbin0, 10, line_buf), " > max 256)\n");
5727             abort();
5728         }
5729
5730         if (small_size2bin_init()) {
5731                 malloc_mutex_unlock(&init_lock);
5732                 return (true);
5733         }
5734
5735 #ifdef MALLOC_TCACHE
5736         if (opt_lg_tcache_nslots > 0) {
5737                 tcache_nslots = (1U << opt_lg_tcache_nslots);
5738
5739                 /* Compute incremental GC event threshold. */
5740                 if (opt_lg_tcache_gc_sweep >= 0) {
5741                         tcache_gc_incr = ((1U << opt_lg_tcache_gc_sweep) /
5742                             nbins) + (((1U << opt_lg_tcache_gc_sweep) % nbins ==
5743                             0) ? 0 : 1);
5744                 } else
5745                         tcache_gc_incr = 0;
5746         } else
5747                 tcache_nslots = 0;
5748 #endif
5749
5750         /* Set variables according to the value of opt_lg_chunk. */
5751         chunksize = (1LU << opt_lg_chunk);
5752         chunksize_mask = chunksize - 1;
5753         chunk_npages = (chunksize >> PAGE_SHIFT);
5754         {
5755                 size_t header_size;
5756
5757                 /*
5758                  * Compute the header size such that it is large enough to
5759                  * contain the page map.
5760                  */
5761                 header_size = sizeof(arena_chunk_t) +
5762                     (sizeof(arena_chunk_map_t) * (chunk_npages - 1));
5763                 arena_chunk_header_npages = (header_size >> PAGE_SHIFT) +
5764                     ((header_size & PAGE_MASK) != 0);
5765         }
5766         arena_maxclass = chunksize - (arena_chunk_header_npages <<
5767             PAGE_SHIFT);
5768
5769         UTRACE((void *)(intptr_t)(-1), 0, 0);
5770
5771 #ifdef MALLOC_STATS
5772         malloc_mutex_init(&chunks_mtx);
5773         memset(&stats_chunks, 0, sizeof(chunk_stats_t));
5774 #endif
5775
5776         /* Various sanity checks that regard configuration. */
5777         assert(chunksize >= PAGE_SIZE);
5778
5779         /* Initialize chunks data. */
5780         malloc_mutex_init(&huge_mtx);
5781         extent_tree_ad_new(&huge);
5782 #ifdef MALLOC_DSS
5783         malloc_mutex_init(&dss_mtx);
5784         dss_base = sbrk(0);
5785         dss_prev = dss_base;
5786         dss_max = dss_base;
5787         extent_tree_szad_new(&dss_chunks_szad);
5788         extent_tree_ad_new(&dss_chunks_ad);
5789 #endif
5790 #ifdef MALLOC_STATS
5791         huge_nmalloc = 0;
5792         huge_ndalloc = 0;
5793         huge_allocated = 0;
5794 #endif
5795
5796         /* Initialize base allocation data structures. */
5797 #ifdef MALLOC_STATS
5798         base_mapped = 0;
5799 #endif
5800 #ifdef MALLOC_DSS
5801         /*
5802          * Allocate a base chunk here, since it doesn't actually have to be
5803          * chunk-aligned.  Doing this before allocating any other chunks allows
5804          * the use of space that would otherwise be wasted.
5805          */
5806         if (opt_dss)
5807                 base_pages_alloc(0);
5808 #endif
5809         base_nodes = NULL;
5810         malloc_mutex_init(&base_mtx);
5811
5812         if (ncpus > 1) {
5813                 /*
5814                  * For SMP systems, create more than one arena per CPU by
5815                  * default.
5816                  */
5817 #ifdef MALLOC_TCACHE
5818                 if (tcache_nslots) {
5819                         /*
5820                          * Only large object allocation/deallocation is
5821                          * guaranteed to acquire an arena mutex, so we can get
5822                          * away with fewer arenas than without thread caching.
5823                          */
5824                         opt_narenas_lshift += 1;
5825                 } else {
5826 #endif
5827                         /*
5828                          * All allocations must acquire an arena mutex, so use
5829                          * plenty of arenas.
5830                          */
5831                         opt_narenas_lshift += 2;
5832 #ifdef MALLOC_TCACHE
5833                 }
5834 #endif
5835         }
5836
5837         /* Determine how many arenas to use. */
5838         narenas = ncpus;
5839         if (opt_narenas_lshift > 0) {
5840                 if ((narenas << opt_narenas_lshift) > narenas)
5841                         narenas <<= opt_narenas_lshift;
5842                 /*
5843                  * Make sure not to exceed the limits of what base_alloc() can
5844                  * handle.
5845                  */
5846                 if (narenas * sizeof(arena_t *) > chunksize)
5847                         narenas = chunksize / sizeof(arena_t *);
5848         } else if (opt_narenas_lshift < 0) {
5849                 if ((narenas >> -opt_narenas_lshift) < narenas)
5850                         narenas >>= -opt_narenas_lshift;
5851                 /* Make sure there is at least one arena. */
5852                 if (narenas == 0)
5853                         narenas = 1;
5854         }
5855
5856 #ifdef NO_TLS
5857         if (narenas > 1) {
5858                 static const unsigned primes[] = {1, 3, 5, 7, 11, 13, 17, 19,
5859                     23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
5860                     89, 97, 101, 103, 107, 109, 113, 127, 131, 137, 139, 149,
5861                     151, 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, 211,
5862                     223, 227, 229, 233, 239, 241, 251, 257, 263};
5863                 unsigned nprimes, parenas;
5864
5865                 /*
5866                  * Pick a prime number of hash arenas that is more than narenas
5867                  * so that direct hashing of pthread_self() pointers tends to
5868                  * spread allocations evenly among the arenas.
5869                  */
5870                 assert((narenas & 1) == 0); /* narenas must be even. */
5871                 nprimes = (sizeof(primes) >> LG_SIZEOF_INT);
5872                 parenas = primes[nprimes - 1]; /* In case not enough primes. */
5873                 for (i = 1; i < nprimes; i++) {
5874                         if (primes[i] > narenas) {
5875                                 parenas = primes[i];
5876                                 break;
5877                         }
5878                 }
5879                 narenas = parenas;
5880         }
5881 #endif
5882
5883 #ifndef NO_TLS
5884         next_arena = 0;
5885 #endif
5886
5887         /* Allocate and initialize arenas. */
5888         arenas = (arena_t **)base_alloc(sizeof(arena_t *) * narenas);
5889         if (arenas == NULL) {
5890                 malloc_mutex_unlock(&init_lock);
5891                 return (true);
5892         }
5893         /*
5894          * Zero the array.  In practice, this should always be pre-zeroed,
5895          * since it was just mmap()ed, but let's be sure.
5896          */
5897         memset(arenas, 0, sizeof(arena_t *) * narenas);
5898
5899         /*
5900          * Initialize one arena here.  The rest are lazily created in
5901          * choose_arena_hard().
5902          */
5903         arenas_extend(0);
5904         if (arenas[0] == NULL) {
5905                 malloc_mutex_unlock(&init_lock);
5906                 return (true);
5907         }
5908 #ifndef NO_TLS
5909         /*
5910          * Assign the initial arena to the initial thread, in order to avoid
5911          * spurious creation of an extra arena if the application switches to
5912          * threaded mode.
5913          */
5914         arenas_map = arenas[0];
5915 #endif
5916         malloc_spin_init(&arenas_lock);
5917
5918         malloc_initialized = true;
5919         malloc_mutex_unlock(&init_lock);
5920         return (false);
5921 }
5922
5923 /*
5924  * End general internal functions.
5925  */
5926 /******************************************************************************/
5927 /*
5928  * Begin malloc(3)-compatible functions.
5929  */
5930
5931 void *
5932 malloc(size_t size)
5933 {
5934         void *ret;
5935
5936         if (malloc_init()) {
5937                 ret = NULL;
5938                 goto OOM;
5939         }
5940
5941         if (size == 0) {
5942                 if (opt_sysv == false)
5943                         size = 1;
5944                 else {
5945                         if (opt_xmalloc) {
5946                                 _malloc_message(_getprogname(),
5947                                     ": (malloc) Error in malloc(): "
5948                                     "invalid size 0\n", "", "");
5949                                 abort();
5950                         }
5951                         ret = NULL;
5952                         goto RETURN;
5953                 }
5954         }
5955
5956         ret = imalloc(size);
5957
5958 OOM:
5959         if (ret == NULL) {
5960                 if (opt_xmalloc) {
5961                         _malloc_message(_getprogname(),
5962                             ": (malloc) Error in malloc(): out of memory\n", "",
5963                             "");
5964                         abort();
5965                 }
5966                 errno = ENOMEM;
5967         }
5968
5969 RETURN:
5970         UTRACE(0, size, ret);
5971         return (ret);
5972 }
5973
5974 int
5975 posix_memalign(void **memptr, size_t alignment, size_t size)
5976 {
5977         int ret;
5978         void *result;
5979
5980         if (malloc_init())
5981                 result = NULL;
5982         else {
5983                 if (size == 0) {
5984                         if (opt_sysv == false)
5985                                 size = 1;
5986                         else {
5987                                 if (opt_xmalloc) {
5988                                         _malloc_message(_getprogname(),
5989                                             ": (malloc) Error in "
5990                                             "posix_memalign(): invalid "
5991                                             "size 0\n", "", "");
5992                                         abort();
5993                                 }
5994                                 result = NULL;
5995                                 *memptr = NULL;
5996                                 ret = 0;
5997                                 goto RETURN;
5998                         }
5999                 }
6000
6001                 /* Make sure that alignment is a large enough power of 2. */
6002                 if (((alignment - 1) & alignment) != 0
6003                     || alignment < sizeof(void *)) {
6004                         if (opt_xmalloc) {
6005                                 _malloc_message(_getprogname(),
6006                                     ": (malloc) Error in posix_memalign(): "
6007                                     "invalid alignment\n", "", "");
6008                                 abort();
6009                         }
6010                         result = NULL;
6011                         ret = EINVAL;
6012                         goto RETURN;
6013                 }
6014
6015                 result = ipalloc(alignment, size);
6016         }
6017
6018         if (result == NULL) {
6019                 if (opt_xmalloc) {
6020                         _malloc_message(_getprogname(),
6021                         ": (malloc) Error in posix_memalign(): out of memory\n",
6022                         "", "");
6023                         abort();
6024                 }
6025                 ret = ENOMEM;
6026                 goto RETURN;
6027         }
6028
6029         *memptr = result;
6030         ret = 0;
6031
6032 RETURN:
6033         UTRACE(0, size, result);
6034         return (ret);
6035 }
6036
6037 void *
6038 calloc(size_t num, size_t size)
6039 {
6040         void *ret;
6041         size_t num_size;
6042
6043         if (malloc_init()) {
6044                 num_size = 0;
6045                 ret = NULL;
6046                 goto RETURN;
6047         }
6048
6049         num_size = num * size;
6050         if (num_size == 0) {
6051                 if ((opt_sysv == false) && ((num == 0) || (size == 0)))
6052                         num_size = 1;
6053                 else {
6054                         ret = NULL;
6055                         goto RETURN;
6056                 }
6057         /*
6058          * Try to avoid division here.  We know that it isn't possible to
6059          * overflow during multiplication if neither operand uses any of the
6060          * most significant half of the bits in a size_t.
6061          */
6062         } else if (((num | size) & (SIZE_T_MAX << (sizeof(size_t) << 2)))
6063             && (num_size / size != num)) {
6064                 /* size_t overflow. */
6065                 ret = NULL;
6066                 goto RETURN;
6067         }
6068
6069         ret = icalloc(num_size);
6070
6071 RETURN:
6072         if (ret == NULL) {
6073                 if (opt_xmalloc) {
6074                         _malloc_message(_getprogname(),
6075                             ": (malloc) Error in calloc(): out of memory\n", "",
6076                             "");
6077                         abort();
6078                 }
6079                 errno = ENOMEM;
6080         }
6081
6082         UTRACE(0, num_size, ret);
6083         return (ret);
6084 }
6085
6086 void *
6087 realloc(void *ptr, size_t size)
6088 {
6089         void *ret;
6090
6091         if (size == 0) {
6092                 if (opt_sysv == false)
6093                         size = 1;
6094                 else {
6095                         if (ptr != NULL)
6096                                 idalloc(ptr);
6097                         ret = NULL;
6098                         goto RETURN;
6099                 }
6100         }
6101
6102         if (ptr != NULL) {
6103                 assert(malloc_initialized);
6104
6105                 ret = iralloc(ptr, size);
6106
6107                 if (ret == NULL) {
6108                         if (opt_xmalloc) {
6109                                 _malloc_message(_getprogname(),
6110                                     ": (malloc) Error in realloc(): out of "
6111                                     "memory\n", "", "");
6112                                 abort();
6113                         }
6114                         errno = ENOMEM;
6115                 }
6116         } else {
6117                 if (malloc_init())
6118                         ret = NULL;
6119                 else
6120                         ret = imalloc(size);
6121
6122                 if (ret == NULL) {
6123                         if (opt_xmalloc) {
6124                                 _malloc_message(_getprogname(),
6125                                     ": (malloc) Error in realloc(): out of "
6126                                     "memory\n", "", "");
6127                                 abort();
6128                         }
6129                         errno = ENOMEM;
6130                 }
6131         }
6132
6133 RETURN:
6134         UTRACE(ptr, size, ret);
6135         return (ret);
6136 }
6137
6138 void
6139 free(void *ptr)
6140 {
6141
6142         UTRACE(ptr, 0, 0);
6143         if (ptr != NULL) {
6144                 assert(malloc_initialized);
6145
6146                 idalloc(ptr);
6147         }
6148 }
6149
6150 /*
6151  * End malloc(3)-compatible functions.
6152  */
6153 /******************************************************************************/
6154 /*
6155  * Begin non-standard functions.
6156  */
6157
6158 size_t
6159 malloc_usable_size(const void *ptr)
6160 {
6161
6162         assert(ptr != NULL);
6163
6164         return (isalloc(ptr));
6165 }
6166
6167 /*
6168  * End non-standard functions.
6169  */
6170 /******************************************************************************/
6171 /*
6172  * Begin library-private functions.
6173  */
6174
6175 /*
6176  * We provide an unpublished interface in order to receive notifications from
6177  * the pthreads library whenever a thread exits.  This allows us to clean up
6178  * thread caches.
6179  */
6180 void
6181 _malloc_thread_cleanup(void)
6182 {
6183
6184 #ifdef MALLOC_TCACHE
6185         tcache_t *tcache = tcache_tls;
6186
6187         if (tcache != NULL) {
6188                 assert(tcache != (void *)(uintptr_t)1);
6189                 tcache_destroy(tcache);
6190                 tcache_tls = (void *)(uintptr_t)1;
6191         }
6192 #endif
6193 }
6194
6195 /*
6196  * The following functions are used by threading libraries for protection of
6197  * malloc during fork().  These functions are only called if the program is
6198  * running in threaded mode, so there is no need to check whether the program
6199  * is threaded here.
6200  */
6201
6202 void
6203 _malloc_prefork(void)
6204 {
6205         unsigned i;
6206
6207         /* Acquire all mutexes in a safe order. */
6208         malloc_spin_lock(&arenas_lock);
6209         for (i = 0; i < narenas; i++) {
6210                 if (arenas[i] != NULL)
6211                         malloc_spin_lock(&arenas[i]->lock);
6212         }
6213
6214         malloc_mutex_lock(&base_mtx);
6215
6216         malloc_mutex_lock(&huge_mtx);
6217
6218 #ifdef MALLOC_DSS
6219         malloc_mutex_lock(&dss_mtx);
6220 #endif
6221 }
6222
6223 void
6224 _malloc_postfork(void)
6225 {
6226         unsigned i;
6227
6228         /* Release all mutexes, now that fork() has completed. */
6229
6230 #ifdef MALLOC_DSS
6231         malloc_mutex_unlock(&dss_mtx);
6232 #endif
6233
6234         malloc_mutex_unlock(&huge_mtx);
6235
6236         malloc_mutex_unlock(&base_mtx);
6237
6238         for (i = 0; i < narenas; i++) {
6239                 if (arenas[i] != NULL)
6240                         malloc_spin_unlock(&arenas[i]->lock);
6241         }
6242         malloc_spin_unlock(&arenas_lock);
6243 }
6244
6245 /*
6246  * End library-private functions.
6247  */
6248 /******************************************************************************/