From c9624aad5c06a3dda3cb5c65c7ccb60d651f9d36 Mon Sep 17 00:00:00 2001 From: jasone Date: Fri, 11 May 2018 00:32:31 +0000 Subject: [PATCH] Update jemalloc to version 5.1.0. --- contrib/jemalloc/COPYING | 4 +- contrib/jemalloc/ChangeLog | 119 +- contrib/jemalloc/FREEBSD-Xlist | 1 + contrib/jemalloc/FREEBSD-diffs | 46 +- contrib/jemalloc/VERSION | 2 +- contrib/jemalloc/doc/jemalloc.3 | 90 +- .../include/jemalloc/internal/arena_externs.h | 17 +- .../jemalloc/internal/arena_inlines_a.h | 2 +- .../jemalloc/internal/arena_inlines_b.h | 11 +- .../include/jemalloc/internal/arena_stats.h | 237 +++ .../jemalloc/internal/arena_structs_b.h | 85 +- .../include/jemalloc/internal/arena_types.h | 2 - .../internal/background_thread_externs.h | 2 + .../internal/background_thread_structs.h | 1 + .../include/jemalloc/internal/base_externs.h | 5 +- .../include/jemalloc/internal/base_inlines.h | 4 + .../include/jemalloc/internal/base_structs.h | 4 + .../include/jemalloc/internal/base_types.h | 26 + .../jemalloc/include/jemalloc/internal/bin.h | 106 + .../include/jemalloc/internal/bin_stats.h | 51 + .../include/jemalloc/internal/cache_bin.h | 114 ++ .../jemalloc/include/jemalloc/internal/ctl.h | 5 +- .../jemalloc/include/jemalloc/internal/div.h | 41 + .../include/jemalloc/internal/emitter.h | 435 ++++ .../jemalloc/internal/extent_externs.h | 9 +- .../jemalloc/internal/extent_inlines.h | 36 +- .../jemalloc/internal/extent_structs.h | 102 +- .../include/jemalloc/internal/extent_types.h | 8 + .../jemalloc/include/jemalloc/internal/hash.h | 28 +- .../internal/jemalloc_internal_decls.h | 11 +- .../internal/jemalloc_internal_defs.h | 27 + .../internal/jemalloc_internal_inlines_a.h | 9 +- .../internal/jemalloc_internal_inlines_c.h | 26 +- .../internal/jemalloc_internal_macros.h | 3 + .../internal/jemalloc_internal_types.h | 11 +- .../jemalloc/internal/jemalloc_preamble.h | 29 +- .../jemalloc/include/jemalloc/internal/log.h | 115 ++ .../include/jemalloc/internal/malloc_io.h | 40 + .../include/jemalloc/internal/mutex_prof.h | 37 +- .../include/jemalloc/internal/pages.h | 17 + .../jemalloc/internal/private_namespace.h | 42 +- .../jemalloc/internal/prof_inlines_a.h | 11 + .../jemalloc/internal/prof_inlines_b.h | 11 - .../include/jemalloc/internal/rtree.h | 42 +- .../include/jemalloc/internal/rtree_tsd.h | 2 +- .../jemalloc/include/jemalloc/internal/spin.h | 12 +- .../include/jemalloc/internal/stats.h | 134 -- .../include/jemalloc/internal/stats_tsd.h | 12 - .../jemalloc/include/jemalloc/internal/sz.h | 4 +- .../jemalloc/internal/tcache_externs.h | 8 +- .../jemalloc/internal/tcache_inlines.h | 99 +- .../jemalloc/internal/tcache_structs.h | 71 +- .../include/jemalloc/internal/tcache_types.h | 5 - .../include/jemalloc/internal/ticker.h | 38 +- .../jemalloc/include/jemalloc/internal/tsd.h | 2 + .../include/jemalloc/internal/tsd_tls.h | 2 +- .../include/jemalloc/internal/witness.h | 2 +- contrib/jemalloc/include/jemalloc/jemalloc.h | 8 +- contrib/jemalloc/src/arena.c | 406 ++-- contrib/jemalloc/src/background_thread.c | 151 +- contrib/jemalloc/src/base.c | 166 +- contrib/jemalloc/src/bin.c | 50 + contrib/jemalloc/src/ctl.c | 307 ++- contrib/jemalloc/src/div.c | 55 + contrib/jemalloc/src/extent.c | 510 +++-- contrib/jemalloc/src/extent_dss.c | 5 +- contrib/jemalloc/src/jemalloc.c | 198 +- contrib/jemalloc/src/log.c | 78 + contrib/jemalloc/src/malloc_io.c | 17 +- contrib/jemalloc/src/mutex.c | 5 +- contrib/jemalloc/src/pages.c | 228 ++- contrib/jemalloc/src/prof.c | 19 +- contrib/jemalloc/src/stats.c | 1781 +++++++++-------- contrib/jemalloc/src/sz.c | 3 +- contrib/jemalloc/src/tcache.c | 59 +- contrib/jemalloc/src/tsd.c | 10 + lib/libc/stdlib/jemalloc/Makefile.inc | 8 +- 77 files changed, 4387 insertions(+), 2092 deletions(-) create mode 100644 contrib/jemalloc/include/jemalloc/internal/arena_stats.h create mode 100644 contrib/jemalloc/include/jemalloc/internal/bin.h create mode 100644 contrib/jemalloc/include/jemalloc/internal/bin_stats.h create mode 100644 contrib/jemalloc/include/jemalloc/internal/cache_bin.h create mode 100644 contrib/jemalloc/include/jemalloc/internal/div.h create mode 100644 contrib/jemalloc/include/jemalloc/internal/emitter.h create mode 100644 contrib/jemalloc/include/jemalloc/internal/log.h delete mode 100644 contrib/jemalloc/include/jemalloc/internal/stats_tsd.h create mode 100644 contrib/jemalloc/src/bin.c create mode 100644 contrib/jemalloc/src/div.c create mode 100644 contrib/jemalloc/src/log.c diff --git a/contrib/jemalloc/COPYING b/contrib/jemalloc/COPYING index e308632a813..98458d971ac 100644 --- a/contrib/jemalloc/COPYING +++ b/contrib/jemalloc/COPYING @@ -1,10 +1,10 @@ Unless otherwise specified, files in the jemalloc source distribution are subject to the following license: -------------------------------------------------------------------------------- -Copyright (C) 2002-2017 Jason Evans . +Copyright (C) 2002-2018 Jason Evans . All rights reserved. Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. -Copyright (C) 2009-2017 Facebook, Inc. All rights reserved. +Copyright (C) 2009-2018 Facebook, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: diff --git a/contrib/jemalloc/ChangeLog b/contrib/jemalloc/ChangeLog index 967d04d0dff..29a00fb78e5 100644 --- a/contrib/jemalloc/ChangeLog +++ b/contrib/jemalloc/ChangeLog @@ -4,6 +4,123 @@ brevity. Much more detail can be found in the git revision history: https://github.com/jemalloc/jemalloc +* 5.1.0 (May 4th, 2018) + + This release is primarily about fine-tuning, ranging from several new features + to numerous notable performance and portability enhancements. The release and + prior dev versions have been running in multiple large scale applications for + months, and the cumulative improvements are substantial in many cases. + + Given the long and successful production runs, this release is likely a good + candidate for applications to upgrade, from both jemalloc 5.0 and before. For + performance-critical applications, the newly added TUNING.md provides + guidelines on jemalloc tuning. + + New features: + - Implement transparent huge page support for internal metadata. (@interwq) + - Add opt.thp to allow enabling / disabling transparent huge pages for all + mappings. (@interwq) + - Add maximum background thread count option. (@djwatson) + - Allow prof_active to control opt.lg_prof_interval and prof.gdump. + (@interwq) + - Allow arena index lookup based on allocation addresses via mallctl. + (@lionkov) + - Allow disabling initial-exec TLS model. (@davidtgoldblatt, @KenMacD) + - Add opt.lg_extent_max_active_fit to set the max ratio between the size of + the active extent selected (to split off from) and the size of the requested + allocation. (@interwq, @davidtgoldblatt) + - Add retain_grow_limit to set the max size when growing virtual address + space. (@interwq) + - Add mallctl interfaces: + + arena..retain_grow_limit (@interwq) + + arenas.lookup (@lionkov) + + max_background_threads (@djwatson) + + opt.lg_extent_max_active_fit (@interwq) + + opt.max_background_threads (@djwatson) + + opt.metadata_thp (@interwq) + + opt.thp (@interwq) + + stats.metadata_thp (@interwq) + + Portability improvements: + - Support GNU/kFreeBSD configuration. (@paravoid) + - Support m68k, nios2 and SH3 architectures. (@paravoid) + - Fall back to FD_CLOEXEC when O_CLOEXEC is unavailable. (@zonyitoo) + - Fix symbol listing for cross-compiling. (@tamird) + - Fix high bits computation on ARM. (@davidtgoldblatt, @paravoid) + - Disable the CPU_SPINWAIT macro for Power. (@davidtgoldblatt, @marxin) + - Fix MSVC 2015 & 2017 builds. (@rustyx) + - Improve RISC-V support. (@EdSchouten) + - Set name mangling script in strict mode. (@nicolov) + - Avoid MADV_HUGEPAGE on ARM. (@marxin) + - Modify configure to determine return value of strerror_r. + (@davidtgoldblatt, @cferris1000) + - Make sure CXXFLAGS is tested with CPP compiler. (@nehaljwani) + - Fix 32-bit build on MSVC. (@rustyx) + - Fix external symbol on MSVC. (@maksqwe) + - Avoid a printf format specifier warning. (@jasone) + - Add configure option --disable-initial-exec-tls which can allow jemalloc to + be dynamically loaded after program startup. (@davidtgoldblatt, @KenMacD) + - AArch64: Add ILP32 support. (@cmuellner) + - Add --with-lg-vaddr configure option to support cross compiling. + (@cmuellner, @davidtgoldblatt) + + Optimizations and refactors: + - Improve active extent fit with extent_max_active_fit. This considerably + reduces fragmentation over time and improves virtual memory and metadata + usage. (@davidtgoldblatt, @interwq) + - Eagerly coalesce large extents to reduce fragmentation. (@interwq) + - sdallocx: only read size info when page aligned (i.e. possibly sampled), + which speeds up the sized deallocation path significantly. (@interwq) + - Avoid attempting new mappings for in place expansion with retain, since + it rarely succeeds in practice and causes high overhead. (@interwq) + - Refactor OOM handling in newImpl. (@wqfish) + - Add internal fine-grained logging functionality for debugging use. + (@davidtgoldblatt) + - Refactor arena / tcache interactions. (@davidtgoldblatt) + - Refactor extent management with dumpable flag. (@davidtgoldblatt) + - Add runtime detection of lazy purging. (@interwq) + - Use pairing heap instead of red-black tree for extents_avail. (@djwatson) + - Use sysctl on startup in FreeBSD. (@trasz) + - Use thread local prng state instead of atomic. (@djwatson) + - Make decay to always purge one more extent than before, because in + practice large extents are usually the ones that cross the decay threshold. + Purging the additional extent helps save memory as well as reduce VM + fragmentation. (@interwq) + - Fast division by dynamic values. (@davidtgoldblatt) + - Improve the fit for aligned allocation. (@interwq, @edwinsmith) + - Refactor extent_t bitpacking. (@rkmisra) + - Optimize the generated assembly for ticker operations. (@davidtgoldblatt) + - Convert stats printing to use a structured text emitter. (@davidtgoldblatt) + - Remove preserve_lru feature for extents management. (@djwatson) + - Consolidate two memory loads into one on the fast deallocation path. + (@davidtgoldblatt, @interwq) + + Bug fixes (most of the issues are only relevant to jemalloc 5.0): + - Fix deadlock with multithreaded fork in OS X. (@davidtgoldblatt) + - Validate returned file descriptor before use. (@zonyitoo) + - Fix a few background thread initialization and shutdown issues. (@interwq) + - Fix an extent coalesce + decay race by taking both coalescing extents off + the LRU list. (@interwq) + - Fix potentially unbound increase during decay, caused by one thread keep + stashing memory to purge while other threads generating new pages. The + number of pages to purge is checked to prevent this. (@interwq) + - Fix a FreeBSD bootstrap assertion. (@strejda, @interwq) + - Handle 32 bit mutex counters. (@rkmisra) + - Fix a indexing bug when creating background threads. (@davidtgoldblatt, + @binliu19) + - Fix arguments passed to extent_init. (@yuleniwo, @interwq) + - Fix addresses used for ordering mutexes. (@rkmisra) + - Fix abort_conf processing during bootstrap. (@interwq) + - Fix include path order for out-of-tree builds. (@cmuellner) + + Incompatible changes: + - Remove --disable-thp. (@interwq) + - Remove mallctl interfaces: + + config.thp (@interwq) + + Documentation: + - Add TUNING.md. (@interwq, @davidtgoldblatt, @djwatson) + * 5.0.1 (July 1, 2017) This bugfix release fixes several issues, most of which are obscure enough @@ -515,7 +632,7 @@ brevity. Much more detail can be found in the git revision history: these fixes, xallocx() now tries harder to partially fulfill requests for optional extra space. Note that a couple of minor heap profiling optimizations are included, but these are better thought of as performance - fixes that were integral to disovering most of the other bugs. + fixes that were integral to discovering most of the other bugs. Optimizations: - Avoid a chunk metadata read in arena_prof_tctx_set(), since it is in the diff --git a/contrib/jemalloc/FREEBSD-Xlist b/contrib/jemalloc/FREEBSD-Xlist index 13eb35be56e..c343cf99db9 100644 --- a/contrib/jemalloc/FREEBSD-Xlist +++ b/contrib/jemalloc/FREEBSD-Xlist @@ -7,6 +7,7 @@ FREEBSD-* INSTALL.md Makefile* README +TUNING.md autogen.sh autom4te.cache/ bin/ diff --git a/contrib/jemalloc/FREEBSD-diffs b/contrib/jemalloc/FREEBSD-diffs index 5a8f7f73614..9681d554ef9 100644 --- a/contrib/jemalloc/FREEBSD-diffs +++ b/contrib/jemalloc/FREEBSD-diffs @@ -1,5 +1,5 @@ diff --git a/doc/jemalloc.xml.in b/doc/jemalloc.xml.in -index 21e401ac..c26f9f4a 100644 +index 1e12fd3a..c42a7e10 100644 --- a/doc/jemalloc.xml.in +++ b/doc/jemalloc.xml.in @@ -53,11 +53,22 @@ @@ -26,7 +26,7 @@ index 21e401ac..c26f9f4a 100644 Standard API -@@ -3252,4 +3263,18 @@ malloc_conf = "narenas:1";]]> +@@ -3376,4 +3387,18 @@ malloc_conf = "narenas:1";]]> The posix_memalign() function conforms to IEEE Std 1003.1-2001 (POSIX.1). @@ -64,7 +64,7 @@ index cd49afcb..85e2a991 100644 #define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, hooks_libc_hook) diff --git a/include/jemalloc/internal/jemalloc_internal_decls.h b/include/jemalloc/internal/jemalloc_internal_decls.h -index 1efdb56b..12a7e5a8 100644 +index be70df51..84cd70da 100644 --- a/include/jemalloc/internal/jemalloc_internal_decls.h +++ b/include/jemalloc/internal/jemalloc_internal_decls.h @@ -1,6 +1,9 @@ @@ -78,7 +78,7 @@ index 1efdb56b..12a7e5a8 100644 #ifdef _WIN32 # include diff --git a/include/jemalloc/internal/jemalloc_preamble.h.in b/include/jemalloc/internal/jemalloc_preamble.h.in -index 18539a09..c8af8683 100644 +index e621fbc8..dbdd5d6b 100644 --- a/include/jemalloc/internal/jemalloc_preamble.h.in +++ b/include/jemalloc/internal/jemalloc_preamble.h.in @@ -8,6 +8,9 @@ @@ -91,7 +91,7 @@ index 18539a09..c8af8683 100644 #define JEMALLOC_NO_DEMANGLE #ifdef JEMALLOC_JET # undef JEMALLOC_IS_MALLOC -@@ -68,13 +71,7 @@ static const bool config_fill = +@@ -79,13 +82,7 @@ static const bool config_fill = false #endif ; @@ -128,9 +128,23 @@ index 6520c251..0013cbe9 100644 bool malloc_mutex_boot(void); void malloc_mutex_prof_data_reset(tsdn_t *tsdn, malloc_mutex_t *mutex); +diff --git a/include/jemalloc/internal/tsd.h b/include/jemalloc/internal/tsd.h +index 0b9841aa..f03eee61 100644 +--- a/include/jemalloc/internal/tsd.h ++++ b/include/jemalloc/internal/tsd.h +@@ -122,7 +122,8 @@ struct tsd_s { + t use_a_getter_or_setter_instead_##n; + MALLOC_TSD + #undef O +-}; ++/* AddressSanitizer requires TLS data to be aligned to at least 8 bytes. */ ++} JEMALLOC_ALIGNED(16); + + /* + * Wrapper around tsd_t that makes it possible to avoid implicit conversion diff --git a/include/jemalloc/jemalloc_FreeBSD.h b/include/jemalloc/jemalloc_FreeBSD.h new file mode 100644 -index 00000000..355b565c +index 00000000..b752b0e7 --- /dev/null +++ b/include/jemalloc/jemalloc_FreeBSD.h @@ -0,0 +1,185 @@ @@ -203,7 +217,7 @@ index 00000000..355b565c +# define LG_VADDR 32 +# define LG_SIZEOF_PTR 2 +#endif -+#ifdef __riscv__ ++#ifdef __riscv +# define LG_VADDR 64 +# define LG_SIZEOF_PTR 3 +#endif @@ -331,10 +345,10 @@ index f9438912..47d032c1 100755 +#include "jemalloc_FreeBSD.h" EOF diff --git a/src/jemalloc.c b/src/jemalloc.c -index 52c86aa6..868c9e86 100644 +index f93c16fa..e0ad297b 100644 --- a/src/jemalloc.c +++ b/src/jemalloc.c -@@ -20,6 +20,10 @@ +@@ -21,6 +21,10 @@ /******************************************************************************/ /* Data. */ @@ -345,7 +359,7 @@ index 52c86aa6..868c9e86 100644 /* Runtime configuration options. */ const char *je_malloc_conf #ifndef _WIN32 -@@ -2981,6 +2985,103 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) { +@@ -3160,6 +3164,103 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) { */ /******************************************************************************/ /* @@ -449,7 +463,7 @@ index 52c86aa6..868c9e86 100644 * The following functions are used by threading libraries for protection of * malloc during fork(). */ -@@ -3141,4 +3242,11 @@ jemalloc_postfork_child(void) { +@@ -3323,4 +3424,11 @@ jemalloc_postfork_child(void) { ctl_postfork_child(tsd_tsdn(tsd)); } @@ -462,10 +476,10 @@ index 52c86aa6..868c9e86 100644 + /******************************************************************************/ diff --git a/src/malloc_io.c b/src/malloc_io.c -index 6b99afcd..4363cb83 100644 +index 7bdc13f9..c8802c70 100644 --- a/src/malloc_io.c +++ b/src/malloc_io.c -@@ -88,6 +88,20 @@ wrtmessage(void *cbopaque, const char *s) { +@@ -75,6 +75,20 @@ wrtmessage(void *cbopaque, const char *s) { JEMALLOC_EXPORT void (*je_malloc_message)(void *, const char *s); @@ -487,10 +501,10 @@ index 6b99afcd..4363cb83 100644 * Wrapper around malloc_message() that avoids the need for * je_malloc_message(...) throughout the code. diff --git a/src/mutex.c b/src/mutex.c -index a528ef0c..820af613 100644 +index 30222b3e..b2c36283 100644 --- a/src/mutex.c +++ b/src/mutex.c -@@ -40,6 +40,17 @@ pthread_create(pthread_t *__restrict thread, +@@ -41,6 +41,17 @@ pthread_create(pthread_t *__restrict thread, #ifdef JEMALLOC_MUTEX_INIT_CB JEMALLOC_EXPORT int _pthread_mutex_init_calloc_cb(pthread_mutex_t *mutex, void *(calloc_cb)(size_t, size_t)); @@ -508,7 +522,7 @@ index a528ef0c..820af613 100644 #endif void -@@ -130,6 +141,16 @@ mutex_addr_comp(const witness_t *witness1, void *mutex1, +@@ -131,6 +142,16 @@ mutex_addr_comp(const witness_t *witness1, void *mutex1, } bool diff --git a/contrib/jemalloc/VERSION b/contrib/jemalloc/VERSION index 50baf739c66..a3c3033a5a3 100644 --- a/contrib/jemalloc/VERSION +++ b/contrib/jemalloc/VERSION @@ -1 +1 @@ -5.0.1-0-g896ed3a8b3f41998d4fb4d625d30ac63ef2d51fb +5.1.0-0-g61efbda7098de6fe64c362d309824864308c36d4 diff --git a/contrib/jemalloc/doc/jemalloc.3 b/contrib/jemalloc/doc/jemalloc.3 index 0ab82024d26..c8a28342621 100644 --- a/contrib/jemalloc/doc/jemalloc.3 +++ b/contrib/jemalloc/doc/jemalloc.3 @@ -2,12 +2,12 @@ .\" Title: JEMALLOC .\" Author: Jason Evans .\" Generator: DocBook XSL Stylesheets v1.76.1 -.\" Date: 07/01/2017 +.\" Date: 05/08/2018 .\" Manual: User Manual -.\" Source: jemalloc 5.0.1-0-g896ed3a8b3f41998d4fb4d625d30ac63ef2d51fb +.\" Source: jemalloc 5.1.0-0-g61efbda7098de6fe64c362d309824864308c36d4 .\" Language: English .\" -.TH "JEMALLOC" "3" "07/01/2017" "jemalloc 5.0.1-0-g896ed3a8b3f4" "User Manual" +.TH "JEMALLOC" "3" "05/08/2018" "jemalloc 5.1.0-0-g61efbda7098d" "User Manual" .\" ----------------------------------------------------------------- .\" * Define some portability stuff .\" ----------------------------------------------------------------- @@ -31,7 +31,7 @@ jemalloc \- general purpose memory allocation functions .SH "LIBRARY" .PP -This manual describes jemalloc 5\&.0\&.1\-0\-g896ed3a8b3f41998d4fb4d625d30ac63ef2d51fb\&. More information can be found at the +This manual describes jemalloc 5\&.1\&.0\-0\-g61efbda7098de6fe64c362d309824864308c36d4\&. More information can be found at the \m[blue]\fBjemalloc website\fR\m[]\&\s-2\u[1]\d\s+2\&. .PP The following configuration options are enabled in libc\*(Aqs built\-in jemalloc: @@ -741,6 +741,13 @@ opt\&.background_thread can be used to set the default option\&. This option is only available on selected pthread\-based platforms\&. .RE .PP +max_background_threads (\fBsize_t\fR) rw +.RS 4 +Maximum number of background worker threads that will be created\&. This value is capped at +opt\&.max_background_threads +at startup\&. +.RE +.PP config\&.cache_oblivious (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-cache\-oblivious\fR @@ -796,12 +803,6 @@ config\&.stats (\fBbool\fR) r\- was specified during build configuration\&. .RE .PP -config\&.thp (\fBbool\fR) r\- -.RS 4 -\fB\-\-disable\-thp\fR -was not specified during build configuration, and the system supports transparent huge page manipulation\&. -.RE -.PP config\&.utrace (\fBbool\fR) r\- .RS 4 \fB\-\-enable\-utrace\fR @@ -834,6 +835,17 @@ in these cases\&. This option is disabled by default unless is specified during configuration, in which case it is enabled by default\&. .RE .PP +opt\&.metadata_thp (\fBconst char *\fR) r\- +.RS 4 +Controls whether to allow jemalloc to use transparent huge page (THP) for internal metadata (see +stats\&.metadata)\&. +\(lqalways\(rq +allows such usage\&. +\(lqauto\(rq +uses no THP initially, but may begin to do so when metadata usage reaches certain level\&. The default is +\(lqdisabled\(rq\&. +.RE +.PP opt\&.retain (\fBbool\fR) r\- .RS 4 If true, retain unused virtual memory for later reuse rather than discarding it by calling @@ -883,11 +895,18 @@ setting uses one arena per physical CPU, which means the two hyper threads on th .PP opt\&.background_thread (\fBconst bool\fR) r\- .RS 4 -Internal background worker threads enabled/disabled\&. See +Internal background worker threads enabled/disabled\&. Because of potential circular dependencies, enabling background thread using this option may cause crash or deadlock during initialization\&. For a reliable way to use this feature, see background_thread for dynamic control options and details\&. This option is disabled by default\&. .RE .PP +opt\&.max_background_threads (\fBconst size_t\fR) r\- +.RS 4 +Maximum number of background threads that will be created if +background_thread +is set\&. Defaults to number of cpus\&. +.RE +.PP opt\&.dirty_decay_ms (\fBssize_t\fR) r\- .RS 4 Approximate time in milliseconds from the creation of a set of unused dirty pages until an equivalent set of unused dirty pages is purged (i\&.e\&. converted to muzzy via e\&.g\&. @@ -895,7 +914,7 @@ madvise(\fI\&.\&.\&.\fR\fI\fBMADV_FREE\fR\fR) if supported by the operating system, or converted to clean otherwise) and/or reused\&. Dirty pages are defined as previously having been potentially written to by the application, and therefore consuming physical memory, yet having no current use\&. The pages are incrementally purged according to a sigmoidal decay curve that starts and ends with zero purge rate\&. A decay time of 0 causes all unused dirty pages to be purged immediately upon creation\&. A decay time of \-1 disables purging\&. The default decay time is 10 seconds\&. See arenas\&.dirty_decay_ms and -arena\&.\&.muzzy_decay_ms +arena\&.\&.dirty_decay_ms for related dynamic control options\&. See opt\&.muzzy_decay_ms for a description of muzzy pages\&. @@ -911,6 +930,11 @@ arena\&.\&.muzzy_decay_ms for related dynamic control options\&. .RE .PP +opt\&.lg_extent_max_active_fit (\fBsize_t\fR) r\- +.RS 4 +When reusing dirty extents, this determines the (log base 2 of the) maximum ratio between the size of the active extent selected (to split off from) and the size of the requested allocation\&. This prevents the splitting of large active extents for smaller allocations, which can reduce fragmentation over the long run (especially for non\-active extents)\&. Lower value may reduce fragmentation, at the cost of extra active extents\&. The default value is 6, which gives a maximum ratio of 64 (2^6)\&. +.RE +.PP opt\&.stats_print (\fBbool\fR) r\- .RS 4 Enable/disable statistics printing at exit\&. If enabled, the @@ -1008,6 +1032,15 @@ opt\&.lg_tcache_max (\fBsize_t\fR) r\- Maximum size class (log base 2) to cache in the thread\-specific cache (tcache)\&. At a minimum, all small size classes are cached, and at a maximum all large size classes are cached\&. The default maximum is 32 KiB (2^15)\&. .RE .PP +opt\&.thp (\fBconst char *\fR) r\- +.RS 4 +Transparent hugepage (THP) mode\&. Settings "always", "never" and "default" are available if THP is supported by the operating system\&. The "always" setting enables transparent hugepage for all user memory mappings with +\fI\fBMADV_HUGEPAGE\fR\fR; "never" ensures no transparent hugepage with +\fI\fBMADV_NOHUGEPAGE\fR\fR; the default setting "default" makes no changes\&. Note that: this option does not affect THP for jemalloc internal metadata (see +opt\&.metadata_thp); in addition, for arenas with customized +extent_hooks, this option is bypassed as it is implemented as part of the default extent hooks\&. +.RE +.PP opt\&.prof (\fBbool\fR) r\- [\fB\-\-enable\-prof\fR] .RS 4 Memory profiling enabled/disabled\&. If enabled, profile memory allocation activity\&. See the @@ -1248,6 +1281,14 @@ opt\&.muzzy_decay_ms for additional information\&. .RE .PP +arena\&.\&.retain_grow_limit (\fBsize_t\fR) rw +.RS 4 +Maximum size to grow retained region (only relevant when +opt\&.retain +is enabled)\&. This controls the maximum increment to expand virtual memory, or allocation through +arena\&.extent_hooks\&. In particular, if customized extent hooks reserve physical memory (e\&.g\&. 1G huge pages), this is useful to control the allocation hook\*(Aqs input size\&. The default is no limit\&. +.RE +.PP arena\&.\&.extent_hooks (\fBextent_hooks_t *\fR) rw .RS 4 Get or set the extent management hook functions for arena \&. The functions must be capable of operating on all extant extents associated with arena , usually by passing unknown extents to the replaced functions\&. In practice, it is feasible to control allocation for arenas explicitly created via @@ -1278,7 +1319,7 @@ struct extent_hooks_s { The \fBextent_hooks_t\fR structure comprises function pointers which are described individually below\&. jemalloc uses these functions to manage extent lifetime, which starts off with allocation of mapped committed memory, in the simplest case followed by deallocation\&. However, there are performance and platform reasons to retain extents for later reuse\&. Cleanup attempts cascade from deallocation to decommit to forced purging to lazy purging, which gives the extent management functions opportunities to reject the most permanent cleanup operations in favor of less permanent (and often less costly) operations\&. All operations except allocation can be universally opted out of by setting the hook pointers to -\fBNULL\fR, or selectively opted out of by returning failure\&. +\fBNULL\fR, or selectively opted out of by returning failure\&. Note that once the extent hook is set, the structure is accessed directly by the associated arenas, so it must remain valid for the entire lifetime of the arenas\&. .HP \w'typedef\ void\ *(extent_alloc_t)('u .BI "typedef void *(extent_alloc_t)(extent_hooks_t\ *" "extent_hooks" ", void\ *" "new_addr" ", size_t\ " "size" ", size_t\ " "alignment" ", bool\ *" "zero" ", bool\ *" "commit" ", unsigned\ " "arena_ind" ");" .sp @@ -1572,6 +1613,11 @@ arenas\&.create (\fBunsigned\fR, \fBextent_hooks_t *\fR) rw Explicitly create a new arena outside the range of automatically managed arenas, with optionally specified extent hooks, and return the new arena index\&. .RE .PP +arenas\&.lookup (\fBunsigned\fR, \fBvoid*\fR) rw +.RS 4 +Index of the arena to which an allocation belongs to\&. +.RE +.PP prof\&.thread_active_init (\fBbool\fR) rw [\fB\-\-enable\-prof\fR] .RS 4 Control the initial setting for @@ -1648,7 +1694,16 @@ stats\&.metadata (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Total number of bytes dedicated to metadata, which comprise base allocations used for bootstrap\-sensitive allocator metadata structures (see stats\&.arenas\&.\&.base) and internal allocations (see -stats\&.arenas\&.\&.internal)\&. +stats\&.arenas\&.\&.internal)\&. Transparent huge page (enabled with +opt\&.metadata_thp) usage is not considered\&. +.RE +.PP +stats\&.metadata_thp (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Number of transparent huge pages (THP) used for metadata\&. See +stats\&.metadata +and +opt\&.metadata_thp) for details\&. .RE .PP stats\&.resident (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] @@ -1831,6 +1886,13 @@ stats\&.arenas\&.\&.internal (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] Number of bytes dedicated to internal allocations\&. Internal allocations differ from application\-originated allocations in that they are for internal use, and that they are omitted from heap profiles\&. .RE .PP +stats\&.arenas\&.\&.metadata_thp (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] +.RS 4 +Number of transparent huge pages (THP) used for metadata\&. See +opt\&.metadata_thp +for details\&. +.RE +.PP stats\&.arenas\&.\&.resident (\fBsize_t\fR) r\- [\fB\-\-enable\-stats\fR] .RS 4 Maximum number of bytes in physically resident data pages mapped by the arena, comprising all pages dedicated to allocator metadata, pages backing active allocations, and unused dirty pages\&. This is a maximum rather than precise because pages may not actually be physically resident if they correspond to demand\-zeroed virtual memory that has not yet been touched\&. This is a multiple of the page size\&. diff --git a/contrib/jemalloc/include/jemalloc/internal/arena_externs.h b/contrib/jemalloc/include/jemalloc/internal/arena_externs.h index af16d158852..4b3732b41da 100644 --- a/contrib/jemalloc/include/jemalloc/internal/arena_externs.h +++ b/contrib/jemalloc/include/jemalloc/internal/arena_externs.h @@ -1,6 +1,7 @@ #ifndef JEMALLOC_INTERNAL_ARENA_EXTERNS_H #define JEMALLOC_INTERNAL_ARENA_EXTERNS_H +#include "jemalloc/internal/bin.h" #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/pages.h" #include "jemalloc/internal/size_classes.h" @@ -9,25 +10,19 @@ extern ssize_t opt_dirty_decay_ms; extern ssize_t opt_muzzy_decay_ms; -extern const arena_bin_info_t arena_bin_info[NBINS]; - extern percpu_arena_mode_t opt_percpu_arena; extern const char *percpu_arena_mode_names[]; extern const uint64_t h_steps[SMOOTHSTEP_NSTEPS]; extern malloc_mutex_t arenas_lock; -void arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats, - szind_t szind, uint64_t nrequests); -void arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, - size_t size); void arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy); void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats, - malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats); + bin_stats_t *bstats, arena_stats_large_t *lstats); void arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extent_t *extent); #ifdef JEMALLOC_JET @@ -50,11 +45,11 @@ void arena_decay(tsdn_t *tsdn, arena_t *arena, bool is_background_thread, void arena_reset(tsd_t *tsd, arena_t *arena); void arena_destroy(tsd_t *tsd, arena_t *arena); void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes); -void arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info, + cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes); +void arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info, bool zero); -typedef void (arena_dalloc_junk_small_t)(void *, const arena_bin_info_t *); +typedef void (arena_dalloc_junk_small_t)(void *, const bin_info_t *); extern arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small; void *arena_malloc_hard(tsdn_t *tsdn, arena_t *arena, size_t size, @@ -77,6 +72,8 @@ ssize_t arena_dirty_decay_ms_default_get(void); bool arena_dirty_decay_ms_default_set(ssize_t decay_ms); ssize_t arena_muzzy_decay_ms_default_get(void); bool arena_muzzy_decay_ms_default_set(ssize_t decay_ms); +bool arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, + size_t *old_limit, size_t *new_limit); unsigned arena_nthreads_get(arena_t *arena, bool internal); void arena_nthreads_inc(arena_t *arena, bool internal); void arena_nthreads_dec(arena_t *arena, bool internal); diff --git a/contrib/jemalloc/include/jemalloc/internal/arena_inlines_a.h b/contrib/jemalloc/include/jemalloc/internal/arena_inlines_a.h index da5877060a8..9abf7f6ac70 100644 --- a/contrib/jemalloc/include/jemalloc/internal/arena_inlines_a.h +++ b/contrib/jemalloc/include/jemalloc/internal/arena_inlines_a.h @@ -25,7 +25,7 @@ static inline bool arena_prof_accum(tsdn_t *tsdn, arena_t *arena, uint64_t accumbytes) { cassert(config_prof); - if (likely(prof_interval == 0)) { + if (likely(prof_interval == 0 || !prof_active_get_unlocked())) { return false; } diff --git a/contrib/jemalloc/include/jemalloc/internal/arena_inlines_b.h b/contrib/jemalloc/include/jemalloc/internal/arena_inlines_b.h index 003abe116fb..2b7e77e7216 100644 --- a/contrib/jemalloc/include/jemalloc/internal/arena_inlines_b.h +++ b/contrib/jemalloc/include/jemalloc/internal/arena_inlines_b.h @@ -8,13 +8,6 @@ #include "jemalloc/internal/sz.h" #include "jemalloc/internal/ticker.h" -static inline szind_t -arena_bin_index(arena_t *arena, arena_bin_t *bin) { - szind_t binind = (szind_t)(bin - arena->bins); - assert(binind < NBINS); - return binind; -} - JEMALLOC_ALWAYS_INLINE prof_tctx_t * arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) { cassert(config_prof); @@ -35,7 +28,7 @@ arena_prof_tctx_get(tsdn_t *tsdn, const void *ptr, alloc_ctx_t *alloc_ctx) { } JEMALLOC_ALWAYS_INLINE void -arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, +arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, UNUSED size_t usize, alloc_ctx_t *alloc_ctx, prof_tctx_t *tctx) { cassert(config_prof); assert(ptr != NULL); @@ -54,7 +47,7 @@ arena_prof_tctx_set(tsdn_t *tsdn, const void *ptr, size_t usize, } static inline void -arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, prof_tctx_t *tctx) { +arena_prof_tctx_reset(tsdn_t *tsdn, const void *ptr, UNUSED prof_tctx_t *tctx) { cassert(config_prof); assert(ptr != NULL); diff --git a/contrib/jemalloc/include/jemalloc/internal/arena_stats.h b/contrib/jemalloc/include/jemalloc/internal/arena_stats.h new file mode 100644 index 00000000000..5f3dca8b155 --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/arena_stats.h @@ -0,0 +1,237 @@ +#ifndef JEMALLOC_INTERNAL_ARENA_STATS_H +#define JEMALLOC_INTERNAL_ARENA_STATS_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/mutex_prof.h" +#include "jemalloc/internal/size_classes.h" + +/* + * In those architectures that support 64-bit atomics, we use atomic updates for + * our 64-bit values. Otherwise, we use a plain uint64_t and synchronize + * externally. + */ +#ifdef JEMALLOC_ATOMIC_U64 +typedef atomic_u64_t arena_stats_u64_t; +#else +/* Must hold the arena stats mutex while reading atomically. */ +typedef uint64_t arena_stats_u64_t; +#endif + +typedef struct arena_stats_large_s arena_stats_large_t; +struct arena_stats_large_s { + /* + * Total number of allocation/deallocation requests served directly by + * the arena. + */ + arena_stats_u64_t nmalloc; + arena_stats_u64_t ndalloc; + + /* + * Number of allocation requests that correspond to this size class. + * This includes requests served by tcache, though tcache only + * periodically merges into this counter. + */ + arena_stats_u64_t nrequests; /* Partially derived. */ + + /* Current number of allocations of this size class. */ + size_t curlextents; /* Derived. */ +}; + +typedef struct arena_stats_decay_s arena_stats_decay_t; +struct arena_stats_decay_s { + /* Total number of purge sweeps. */ + arena_stats_u64_t npurge; + /* Total number of madvise calls made. */ + arena_stats_u64_t nmadvise; + /* Total number of pages purged. */ + arena_stats_u64_t purged; +}; + +/* + * Arena stats. Note that fields marked "derived" are not directly maintained + * within the arena code; rather their values are derived during stats merge + * requests. + */ +typedef struct arena_stats_s arena_stats_t; +struct arena_stats_s { +#ifndef JEMALLOC_ATOMIC_U64 + malloc_mutex_t mtx; +#endif + + /* Number of bytes currently mapped, excluding retained memory. */ + atomic_zu_t mapped; /* Partially derived. */ + + /* + * Number of unused virtual memory bytes currently retained. Retained + * bytes are technically mapped (though always decommitted or purged), + * but they are excluded from the mapped statistic (above). + */ + atomic_zu_t retained; /* Derived. */ + + arena_stats_decay_t decay_dirty; + arena_stats_decay_t decay_muzzy; + + atomic_zu_t base; /* Derived. */ + atomic_zu_t internal; + atomic_zu_t resident; /* Derived. */ + atomic_zu_t metadata_thp; + + atomic_zu_t allocated_large; /* Derived. */ + arena_stats_u64_t nmalloc_large; /* Derived. */ + arena_stats_u64_t ndalloc_large; /* Derived. */ + arena_stats_u64_t nrequests_large; /* Derived. */ + + /* Number of bytes cached in tcache associated with this arena. */ + atomic_zu_t tcache_bytes; /* Derived. */ + + mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes]; + + /* One element for each large size class. */ + arena_stats_large_t lstats[NSIZES - NBINS]; + + /* Arena uptime. */ + nstime_t uptime; +}; + +static inline bool +arena_stats_init(UNUSED tsdn_t *tsdn, arena_stats_t *arena_stats) { + if (config_debug) { + for (size_t i = 0; i < sizeof(arena_stats_t); i++) { + assert(((char *)arena_stats)[i] == 0); + } + } +#ifndef JEMALLOC_ATOMIC_U64 + if (malloc_mutex_init(&arena_stats->mtx, "arena_stats", + WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) { + return true; + } +#endif + /* Memory is zeroed, so there is no need to clear stats. */ + return false; +} + +static inline void +arena_stats_lock(tsdn_t *tsdn, arena_stats_t *arena_stats) { +#ifndef JEMALLOC_ATOMIC_U64 + malloc_mutex_lock(tsdn, &arena_stats->mtx); +#endif +} + +static inline void +arena_stats_unlock(tsdn_t *tsdn, arena_stats_t *arena_stats) { +#ifndef JEMALLOC_ATOMIC_U64 + malloc_mutex_unlock(tsdn, &arena_stats->mtx); +#endif +} + +static inline uint64_t +arena_stats_read_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, + arena_stats_u64_t *p) { +#ifdef JEMALLOC_ATOMIC_U64 + return atomic_load_u64(p, ATOMIC_RELAXED); +#else + malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); + return *p; +#endif +} + +static inline void +arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, + arena_stats_u64_t *p, uint64_t x) { +#ifdef JEMALLOC_ATOMIC_U64 + atomic_fetch_add_u64(p, x, ATOMIC_RELAXED); +#else + malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); + *p += x; +#endif +} + +UNUSED static inline void +arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, + arena_stats_u64_t *p, uint64_t x) { +#ifdef JEMALLOC_ATOMIC_U64 + UNUSED uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED); + assert(r - x <= r); +#else + malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); + *p -= x; + assert(*p + x >= *p); +#endif +} + +/* + * Non-atomically sets *dst += src. *dst needs external synchronization. + * This lets us avoid the cost of a fetch_add when its unnecessary (note that + * the types here are atomic). + */ +static inline void +arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) { +#ifdef JEMALLOC_ATOMIC_U64 + uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED); + atomic_store_u64(dst, src + cur_dst, ATOMIC_RELAXED); +#else + *dst += src; +#endif +} + +static inline size_t +arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p) { +#ifdef JEMALLOC_ATOMIC_U64 + return atomic_load_zu(p, ATOMIC_RELAXED); +#else + malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); + return atomic_load_zu(p, ATOMIC_RELAXED); +#endif +} + +static inline void +arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p, + size_t x) { +#ifdef JEMALLOC_ATOMIC_U64 + atomic_fetch_add_zu(p, x, ATOMIC_RELAXED); +#else + malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); + size_t cur = atomic_load_zu(p, ATOMIC_RELAXED); + atomic_store_zu(p, cur + x, ATOMIC_RELAXED); +#endif +} + +static inline void +arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p, + size_t x) { +#ifdef JEMALLOC_ATOMIC_U64 + UNUSED size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED); + assert(r - x <= r); +#else + malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); + size_t cur = atomic_load_zu(p, ATOMIC_RELAXED); + atomic_store_zu(p, cur - x, ATOMIC_RELAXED); +#endif +} + +/* Like the _u64 variant, needs an externally synchronized *dst. */ +static inline void +arena_stats_accum_zu(atomic_zu_t *dst, size_t src) { + size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED); + atomic_store_zu(dst, src + cur_dst, ATOMIC_RELAXED); +} + +static inline void +arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats, + szind_t szind, uint64_t nrequests) { + arena_stats_lock(tsdn, arena_stats); + arena_stats_add_u64(tsdn, arena_stats, &arena_stats->lstats[szind - + NBINS].nrequests, nrequests); + arena_stats_unlock(tsdn, arena_stats); +} + +static inline void +arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) { + arena_stats_lock(tsdn, arena_stats); + arena_stats_add_zu(tsdn, arena_stats, &arena_stats->mapped, size); + arena_stats_unlock(tsdn, arena_stats); +} + + +#endif /* JEMALLOC_INTERNAL_ARENA_STATS_H */ diff --git a/contrib/jemalloc/include/jemalloc/internal/arena_structs_b.h b/contrib/jemalloc/include/jemalloc/internal/arena_structs_b.h index d1fffec1936..38bc95962d8 100644 --- a/contrib/jemalloc/include/jemalloc/internal/arena_structs_b.h +++ b/contrib/jemalloc/include/jemalloc/internal/arena_structs_b.h @@ -1,7 +1,9 @@ #ifndef JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H #define JEMALLOC_INTERNAL_ARENA_STRUCTS_B_H +#include "jemalloc/internal/arena_stats.h" #include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/bin.h" #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/jemalloc_internal_types.h" @@ -10,45 +12,8 @@ #include "jemalloc/internal/ql.h" #include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/smoothstep.h" -#include "jemalloc/internal/stats.h" #include "jemalloc/internal/ticker.h" -/* - * Read-only information associated with each element of arena_t's bins array - * is stored separately, partly to reduce memory usage (only one copy, rather - * than one per arena), but mainly to avoid false cacheline sharing. - * - * Each slab has the following layout: - * - * /--------------------\ - * | region 0 | - * |--------------------| - * | region 1 | - * |--------------------| - * | ... | - * | ... | - * | ... | - * |--------------------| - * | region nregs-1 | - * \--------------------/ - */ -struct arena_bin_info_s { - /* Size of regions in a slab for this bin's size class. */ - size_t reg_size; - - /* Total size of a slab for this bin's size class. */ - size_t slab_size; - - /* Total number of regions in a slab for this bin's size class. */ - uint32_t nregs; - - /* - * Metadata used to manipulate bitmaps for slabs associated with this - * bin. - */ - bitmap_info_t bitmap_info; -}; - struct arena_decay_s { /* Synchronizes all non-atomic fields. */ malloc_mutex_t mtx; @@ -104,37 +69,11 @@ struct arena_decay_s { * arena and ctl code. * * Synchronization: Same as associated arena's stats field. */ - decay_stats_t *stats; + arena_stats_decay_t *stats; /* Peak number of pages in associated extents. Used for debug only. */ uint64_t ceil_npages; }; -struct arena_bin_s { - /* All operations on arena_bin_t fields require lock ownership. */ - malloc_mutex_t lock; - - /* - * Current slab being used to service allocations of this bin's size - * class. slabcur is independent of slabs_{nonfull,full}; whenever - * slabcur is reassigned, the previous slab must be deallocated or - * inserted into slabs_{nonfull,full}. - */ - extent_t *slabcur; - - /* - * Heap of non-full slabs. This heap is used to assure that new - * allocations come from the non-full slab that is oldest/lowest in - * memory. - */ - extent_heap_t slabs_nonfull; - - /* List used to track full slabs. */ - extent_list_t slabs_full; - - /* Bin statistics. */ - malloc_bin_stats_t stats; -}; - struct arena_s { /* * Number of threads currently assigned to this arena. Each thread has @@ -162,14 +101,15 @@ struct arena_s { arena_stats_t stats; /* - * List of tcaches for extant threads associated with this arena. - * Stats from these are merged incrementally, and at exit if - * opt_stats_print is enabled. + * Lists of tcaches and cache_bin_array_descriptors for extant threads + * associated with this arena. Stats from these are merged + * incrementally, and at exit if opt_stats_print is enabled. * * Synchronization: tcache_ql_mtx. */ - ql_head(tcache_t) tcache_ql; - malloc_mutex_t tcache_ql_mtx; + ql_head(tcache_t) tcache_ql; + ql_head(cache_bin_array_descriptor_t) cache_bin_array_descriptor_ql; + malloc_mutex_t tcache_ql_mtx; /* Synchronization: internal. */ prof_accum_t prof_accum; @@ -239,9 +179,14 @@ struct arena_s { * be effective even if multiple arenas' extent allocation requests are * highly interleaved. * + * retain_grow_limit is the max allowed size ind to expand (unless the + * required size is greater). Default is no limit, and controlled + * through mallctl only. + * * Synchronization: extent_grow_mtx */ pszind_t extent_grow_next; + pszind_t retain_grow_limit; malloc_mutex_t extent_grow_mtx; /* @@ -258,7 +203,7 @@ struct arena_s { * * Synchronization: internal. */ - arena_bin_t bins[NBINS]; + bin_t bins[NBINS]; /* * Base allocator, from which arena metadata are allocated. diff --git a/contrib/jemalloc/include/jemalloc/internal/arena_types.h b/contrib/jemalloc/include/jemalloc/internal/arena_types.h index a691bd811e0..70001b5f16f 100644 --- a/contrib/jemalloc/include/jemalloc/internal/arena_types.h +++ b/contrib/jemalloc/include/jemalloc/internal/arena_types.h @@ -12,9 +12,7 @@ #define DECAY_NTICKS_PER_UPDATE 1000 typedef struct arena_slab_data_s arena_slab_data_t; -typedef struct arena_bin_info_s arena_bin_info_t; typedef struct arena_decay_s arena_decay_t; -typedef struct arena_bin_s arena_bin_t; typedef struct arena_s arena_t; typedef struct arena_tdata_s arena_tdata_t; typedef struct alloc_ctx_s alloc_ctx_t; diff --git a/contrib/jemalloc/include/jemalloc/internal/background_thread_externs.h b/contrib/jemalloc/include/jemalloc/internal/background_thread_externs.h index 8b4b8471a95..3209aa49ffa 100644 --- a/contrib/jemalloc/include/jemalloc/internal/background_thread_externs.h +++ b/contrib/jemalloc/include/jemalloc/internal/background_thread_externs.h @@ -2,9 +2,11 @@ #define JEMALLOC_INTERNAL_BACKGROUND_THREAD_EXTERNS_H extern bool opt_background_thread; +extern size_t opt_max_background_threads; extern malloc_mutex_t background_thread_lock; extern atomic_b_t background_thread_enabled_state; extern size_t n_background_threads; +extern size_t max_background_threads; extern background_thread_info_t *background_thread_info; extern bool can_enable_background_thread; diff --git a/contrib/jemalloc/include/jemalloc/internal/background_thread_structs.h b/contrib/jemalloc/include/jemalloc/internal/background_thread_structs.h index e69a7d022b4..c1107dfe9c4 100644 --- a/contrib/jemalloc/include/jemalloc/internal/background_thread_structs.h +++ b/contrib/jemalloc/include/jemalloc/internal/background_thread_structs.h @@ -8,6 +8,7 @@ #endif #define BACKGROUND_THREAD_INDEFINITE_SLEEP UINT64_MAX +#define MAX_BACKGROUND_THREAD_LIMIT MALLOCX_ARENA_LIMIT typedef enum { background_thread_stopped, diff --git a/contrib/jemalloc/include/jemalloc/internal/base_externs.h b/contrib/jemalloc/include/jemalloc/internal/base_externs.h index a4fd5ac7d9a..7b705c9b4d1 100644 --- a/contrib/jemalloc/include/jemalloc/internal/base_externs.h +++ b/contrib/jemalloc/include/jemalloc/internal/base_externs.h @@ -1,6 +1,9 @@ #ifndef JEMALLOC_INTERNAL_BASE_EXTERNS_H #define JEMALLOC_INTERNAL_BASE_EXTERNS_H +extern metadata_thp_mode_t opt_metadata_thp; +extern const char *metadata_thp_mode_names[]; + base_t *b0get(void); base_t *base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks); void base_delete(tsdn_t *tsdn, base_t *base); @@ -10,7 +13,7 @@ extent_hooks_t *base_extent_hooks_set(base_t *base, void *base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment); extent_t *base_alloc_extent(tsdn_t *tsdn, base_t *base); void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, - size_t *resident, size_t *mapped); + size_t *resident, size_t *mapped, size_t *n_thp); void base_prefork(tsdn_t *tsdn, base_t *base); void base_postfork_parent(tsdn_t *tsdn, base_t *base); void base_postfork_child(tsdn_t *tsdn, base_t *base); diff --git a/contrib/jemalloc/include/jemalloc/internal/base_inlines.h b/contrib/jemalloc/include/jemalloc/internal/base_inlines.h index 931560bfaea..aec0e2e1e1c 100644 --- a/contrib/jemalloc/include/jemalloc/internal/base_inlines.h +++ b/contrib/jemalloc/include/jemalloc/internal/base_inlines.h @@ -6,4 +6,8 @@ base_ind_get(const base_t *base) { return base->ind; } +static inline bool +metadata_thp_enabled(void) { + return (opt_metadata_thp != metadata_thp_disabled); +} #endif /* JEMALLOC_INTERNAL_BASE_INLINES_H */ diff --git a/contrib/jemalloc/include/jemalloc/internal/base_structs.h b/contrib/jemalloc/include/jemalloc/internal/base_structs.h index 18e227bd5a6..2102247ac43 100644 --- a/contrib/jemalloc/include/jemalloc/internal/base_structs.h +++ b/contrib/jemalloc/include/jemalloc/internal/base_structs.h @@ -30,6 +30,8 @@ struct base_s { /* Protects base_alloc() and base_stats_get() operations. */ malloc_mutex_t mtx; + /* Using THP when true (metadata_thp auto mode). */ + bool auto_thp_switched; /* * Most recent size class in the series of increasingly large base * extents. Logarithmic spacing between subsequent allocations ensures @@ -50,6 +52,8 @@ struct base_s { size_t allocated; size_t resident; size_t mapped; + /* Number of THP regions touched. */ + size_t n_thp; }; #endif /* JEMALLOC_INTERNAL_BASE_STRUCTS_H */ diff --git a/contrib/jemalloc/include/jemalloc/internal/base_types.h b/contrib/jemalloc/include/jemalloc/internal/base_types.h index be7ee82589f..b6db77df7c6 100644 --- a/contrib/jemalloc/include/jemalloc/internal/base_types.h +++ b/contrib/jemalloc/include/jemalloc/internal/base_types.h @@ -4,4 +4,30 @@ typedef struct base_block_s base_block_t; typedef struct base_s base_t; +#define METADATA_THP_DEFAULT metadata_thp_disabled + +/* + * In auto mode, arenas switch to huge pages for the base allocator on the + * second base block. a0 switches to thp on the 5th block (after 20 megabytes + * of metadata), since more metadata (e.g. rtree nodes) come from a0's base. + */ + +#define BASE_AUTO_THP_THRESHOLD 2 +#define BASE_AUTO_THP_THRESHOLD_A0 5 + +typedef enum { + metadata_thp_disabled = 0, + /* + * Lazily enable hugepage for metadata. To avoid high RSS caused by THP + * + low usage arena (i.e. THP becomes a significant percentage), the + * "auto" option only starts using THP after a base allocator used up + * the first THP region. Starting from the second hugepage (in a single + * arena), "auto" behaves the same as "always", i.e. madvise hugepage + * right away. + */ + metadata_thp_auto = 1, + metadata_thp_always = 2, + metadata_thp_mode_limit = 3 +} metadata_thp_mode_t; + #endif /* JEMALLOC_INTERNAL_BASE_TYPES_H */ diff --git a/contrib/jemalloc/include/jemalloc/internal/bin.h b/contrib/jemalloc/include/jemalloc/internal/bin.h new file mode 100644 index 00000000000..9b416ada7ed --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/bin.h @@ -0,0 +1,106 @@ +#ifndef JEMALLOC_INTERNAL_BIN_H +#define JEMALLOC_INTERNAL_BIN_H + +#include "jemalloc/internal/extent_types.h" +#include "jemalloc/internal/extent_structs.h" +#include "jemalloc/internal/mutex.h" +#include "jemalloc/internal/bin_stats.h" + +/* + * A bin contains a set of extents that are currently being used for slab + * allocations. + */ + +/* + * Read-only information associated with each element of arena_t's bins array + * is stored separately, partly to reduce memory usage (only one copy, rather + * than one per arena), but mainly to avoid false cacheline sharing. + * + * Each slab has the following layout: + * + * /--------------------\ + * | region 0 | + * |--------------------| + * | region 1 | + * |--------------------| + * | ... | + * | ... | + * | ... | + * |--------------------| + * | region nregs-1 | + * \--------------------/ + */ +typedef struct bin_info_s bin_info_t; +struct bin_info_s { + /* Size of regions in a slab for this bin's size class. */ + size_t reg_size; + + /* Total size of a slab for this bin's size class. */ + size_t slab_size; + + /* Total number of regions in a slab for this bin's size class. */ + uint32_t nregs; + + /* + * Metadata used to manipulate bitmaps for slabs associated with this + * bin. + */ + bitmap_info_t bitmap_info; +}; + +extern const bin_info_t bin_infos[NBINS]; + + +typedef struct bin_s bin_t; +struct bin_s { + /* All operations on bin_t fields require lock ownership. */ + malloc_mutex_t lock; + + /* + * Current slab being used to service allocations of this bin's size + * class. slabcur is independent of slabs_{nonfull,full}; whenever + * slabcur is reassigned, the previous slab must be deallocated or + * inserted into slabs_{nonfull,full}. + */ + extent_t *slabcur; + + /* + * Heap of non-full slabs. This heap is used to assure that new + * allocations come from the non-full slab that is oldest/lowest in + * memory. + */ + extent_heap_t slabs_nonfull; + + /* List used to track full slabs. */ + extent_list_t slabs_full; + + /* Bin statistics. */ + bin_stats_t stats; +}; + +/* Initializes a bin to empty. Returns true on error. */ +bool bin_init(bin_t *bin); + +/* Forking. */ +void bin_prefork(tsdn_t *tsdn, bin_t *bin); +void bin_postfork_parent(tsdn_t *tsdn, bin_t *bin); +void bin_postfork_child(tsdn_t *tsdn, bin_t *bin); + +/* Stats. */ +static inline void +bin_stats_merge(tsdn_t *tsdn, bin_stats_t *dst_bin_stats, bin_t *bin) { + malloc_mutex_lock(tsdn, &bin->lock); + malloc_mutex_prof_read(tsdn, &dst_bin_stats->mutex_data, &bin->lock); + dst_bin_stats->nmalloc += bin->stats.nmalloc; + dst_bin_stats->ndalloc += bin->stats.ndalloc; + dst_bin_stats->nrequests += bin->stats.nrequests; + dst_bin_stats->curregs += bin->stats.curregs; + dst_bin_stats->nfills += bin->stats.nfills; + dst_bin_stats->nflushes += bin->stats.nflushes; + dst_bin_stats->nslabs += bin->stats.nslabs; + dst_bin_stats->reslabs += bin->stats.reslabs; + dst_bin_stats->curslabs += bin->stats.curslabs; + malloc_mutex_unlock(tsdn, &bin->lock); +} + +#endif /* JEMALLOC_INTERNAL_BIN_H */ diff --git a/contrib/jemalloc/include/jemalloc/internal/bin_stats.h b/contrib/jemalloc/include/jemalloc/internal/bin_stats.h new file mode 100644 index 00000000000..86e673ec446 --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/bin_stats.h @@ -0,0 +1,51 @@ +#ifndef JEMALLOC_INTERNAL_BIN_STATS_H +#define JEMALLOC_INTERNAL_BIN_STATS_H + +#include "jemalloc/internal/mutex_prof.h" + +typedef struct bin_stats_s bin_stats_t; +struct bin_stats_s { + /* + * Total number of allocation/deallocation requests served directly by + * the bin. Note that tcache may allocate an object, then recycle it + * many times, resulting many increments to nrequests, but only one + * each to nmalloc and ndalloc. + */ + uint64_t nmalloc; + uint64_t ndalloc; + + /* + * Number of allocation requests that correspond to the size of this + * bin. This includes requests served by tcache, though tcache only + * periodically merges into this counter. + */ + uint64_t nrequests; + + /* + * Current number of regions of this size class, including regions + * currently cached by tcache. + */ + size_t curregs; + + /* Number of tcache fills from this bin. */ + uint64_t nfills; + + /* Number of tcache flushes to this bin. */ + uint64_t nflushes; + + /* Total number of slabs created for this bin's size class. */ + uint64_t nslabs; + + /* + * Total number of slabs reused by extracting them from the slabs heap + * for this bin's size class. + */ + uint64_t reslabs; + + /* Current number of slabs in this bin. */ + size_t curslabs; + + mutex_prof_data_t mutex_data; +}; + +#endif /* JEMALLOC_INTERNAL_BIN_STATS_H */ diff --git a/contrib/jemalloc/include/jemalloc/internal/cache_bin.h b/contrib/jemalloc/include/jemalloc/internal/cache_bin.h new file mode 100644 index 00000000000..12f3ef2dd0a --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/cache_bin.h @@ -0,0 +1,114 @@ +#ifndef JEMALLOC_INTERNAL_CACHE_BIN_H +#define JEMALLOC_INTERNAL_CACHE_BIN_H + +#include "jemalloc/internal/ql.h" + +/* + * The cache_bins are the mechanism that the tcache and the arena use to + * communicate. The tcache fills from and flushes to the arena by passing a + * cache_bin_t to fill/flush. When the arena needs to pull stats from the + * tcaches associated with it, it does so by iterating over its + * cache_bin_array_descriptor_t objects and reading out per-bin stats it + * contains. This makes it so that the arena need not know about the existence + * of the tcache at all. + */ + + +/* + * The count of the number of cached allocations in a bin. We make this signed + * so that negative numbers can encode "invalid" states (e.g. a low water mark + * of -1 for a cache that has been depleted). + */ +typedef int32_t cache_bin_sz_t; + +typedef struct cache_bin_stats_s cache_bin_stats_t; +struct cache_bin_stats_s { + /* + * Number of allocation requests that corresponded to the size of this + * bin. + */ + uint64_t nrequests; +}; + +/* + * Read-only information associated with each element of tcache_t's tbins array + * is stored separately, mainly to reduce memory usage. + */ +typedef struct cache_bin_info_s cache_bin_info_t; +struct cache_bin_info_s { + /* Upper limit on ncached. */ + cache_bin_sz_t ncached_max; +}; + +typedef struct cache_bin_s cache_bin_t; +struct cache_bin_s { + /* Min # cached since last GC. */ + cache_bin_sz_t low_water; + /* # of cached objects. */ + cache_bin_sz_t ncached; + /* + * ncached and stats are both modified frequently. Let's keep them + * close so that they have a higher chance of being on the same + * cacheline, thus less write-backs. + */ + cache_bin_stats_t tstats; + /* + * Stack of available objects. + * + * To make use of adjacent cacheline prefetch, the items in the avail + * stack goes to higher address for newer allocations. avail points + * just above the available space, which means that + * avail[-ncached, ... -1] are available items and the lowest item will + * be allocated first. + */ + void **avail; +}; + +typedef struct cache_bin_array_descriptor_s cache_bin_array_descriptor_t; +struct cache_bin_array_descriptor_s { + /* + * The arena keeps a list of the cache bins associated with it, for + * stats collection. + */ + ql_elm(cache_bin_array_descriptor_t) link; + /* Pointers to the tcache bins. */ + cache_bin_t *bins_small; + cache_bin_t *bins_large; +}; + +static inline void +cache_bin_array_descriptor_init(cache_bin_array_descriptor_t *descriptor, + cache_bin_t *bins_small, cache_bin_t *bins_large) { + ql_elm_new(descriptor, link); + descriptor->bins_small = bins_small; + descriptor->bins_large = bins_large; +} + +JEMALLOC_ALWAYS_INLINE void * +cache_bin_alloc_easy(cache_bin_t *bin, bool *success) { + void *ret; + + if (unlikely(bin->ncached == 0)) { + bin->low_water = -1; + *success = false; + return NULL; + } + /* + * success (instead of ret) should be checked upon the return of this + * function. We avoid checking (ret == NULL) because there is never a + * null stored on the avail stack (which is unknown to the compiler), + * and eagerly checking ret would cause pipeline stall (waiting for the + * cacheline). + */ + *success = true; + ret = *(bin->avail - bin->ncached); + bin->ncached--; + + if (unlikely(bin->ncached < bin->low_water)) { + bin->low_water = bin->ncached; + } + + return ret; +} + +#endif /* JEMALLOC_INTERNAL_CACHE_BIN_H */ diff --git a/contrib/jemalloc/include/jemalloc/internal/ctl.h b/contrib/jemalloc/include/jemalloc/internal/ctl.h index a91c4cf556b..d927d94801e 100644 --- a/contrib/jemalloc/include/jemalloc/internal/ctl.h +++ b/contrib/jemalloc/include/jemalloc/internal/ctl.h @@ -40,14 +40,15 @@ typedef struct ctl_arena_stats_s { uint64_t ndalloc_small; uint64_t nrequests_small; - malloc_bin_stats_t bstats[NBINS]; - malloc_large_stats_t lstats[NSIZES - NBINS]; + bin_stats_t bstats[NBINS]; + arena_stats_large_t lstats[NSIZES - NBINS]; } ctl_arena_stats_t; typedef struct ctl_stats_s { size_t allocated; size_t active; size_t metadata; + size_t metadata_thp; size_t resident; size_t mapped; size_t retained; diff --git a/contrib/jemalloc/include/jemalloc/internal/div.h b/contrib/jemalloc/include/jemalloc/internal/div.h new file mode 100644 index 00000000000..aebae9398cf --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/div.h @@ -0,0 +1,41 @@ +#ifndef JEMALLOC_INTERNAL_DIV_H +#define JEMALLOC_INTERNAL_DIV_H + +#include "jemalloc/internal/assert.h" + +/* + * This module does the division that computes the index of a region in a slab, + * given its offset relative to the base. + * That is, given a divisor d, an n = i * d (all integers), we'll return i. + * We do some pre-computation to do this more quickly than a CPU division + * instruction. + * We bound n < 2^32, and don't support dividing by one. + */ + +typedef struct div_info_s div_info_t; +struct div_info_s { + uint32_t magic; +#ifdef JEMALLOC_DEBUG + size_t d; +#endif +}; + +void div_init(div_info_t *div_info, size_t divisor); + +static inline size_t +div_compute(div_info_t *div_info, size_t n) { + assert(n <= (uint32_t)-1); + /* + * This generates, e.g. mov; imul; shr on x86-64. On a 32-bit machine, + * the compilers I tried were all smart enough to turn this into the + * appropriate "get the high 32 bits of the result of a multiply" (e.g. + * mul; mov edx eax; on x86, umull on arm, etc.). + */ + size_t i = ((uint64_t)n * (uint64_t)div_info->magic) >> 32; +#ifdef JEMALLOC_DEBUG + assert(i * div_info->d == n); +#endif + return i; +} + +#endif /* JEMALLOC_INTERNAL_DIV_H */ diff --git a/contrib/jemalloc/include/jemalloc/internal/emitter.h b/contrib/jemalloc/include/jemalloc/internal/emitter.h new file mode 100644 index 00000000000..3a2b2f7f2e4 --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/emitter.h @@ -0,0 +1,435 @@ +#ifndef JEMALLOC_INTERNAL_EMITTER_H +#define JEMALLOC_INTERNAL_EMITTER_H + +#include "jemalloc/internal/ql.h" + +typedef enum emitter_output_e emitter_output_t; +enum emitter_output_e { + emitter_output_json, + emitter_output_table +}; + +typedef enum emitter_justify_e emitter_justify_t; +enum emitter_justify_e { + emitter_justify_left, + emitter_justify_right, + /* Not for users; just to pass to internal functions. */ + emitter_justify_none +}; + +typedef enum emitter_type_e emitter_type_t; +enum emitter_type_e { + emitter_type_bool, + emitter_type_int, + emitter_type_unsigned, + emitter_type_uint32, + emitter_type_uint64, + emitter_type_size, + emitter_type_ssize, + emitter_type_string, + /* + * A title is a column title in a table; it's just a string, but it's + * not quoted. + */ + emitter_type_title, +}; + +typedef struct emitter_col_s emitter_col_t; +struct emitter_col_s { + /* Filled in by the user. */ + emitter_justify_t justify; + int width; + emitter_type_t type; + union { + bool bool_val; + int int_val; + unsigned unsigned_val; + uint32_t uint32_val; + uint64_t uint64_val; + size_t size_val; + ssize_t ssize_val; + const char *str_val; + }; + + /* Filled in by initialization. */ + ql_elm(emitter_col_t) link; +}; + +typedef struct emitter_row_s emitter_row_t; +struct emitter_row_s { + ql_head(emitter_col_t) cols; +}; + +static inline void +emitter_row_init(emitter_row_t *row) { + ql_new(&row->cols); +} + +static inline void +emitter_col_init(emitter_col_t *col, emitter_row_t *row) { + ql_elm_new(col, link); + ql_tail_insert(&row->cols, col, link); +} + +typedef struct emitter_s emitter_t; +struct emitter_s { + emitter_output_t output; + /* The output information. */ + void (*write_cb)(void *, const char *); + void *cbopaque; + int nesting_depth; + /* True if we've already emitted a value at the given depth. */ + bool item_at_depth; +}; + +static inline void +emitter_init(emitter_t *emitter, emitter_output_t emitter_output, + void (*write_cb)(void *, const char *), void *cbopaque) { + emitter->output = emitter_output; + emitter->write_cb = write_cb; + emitter->cbopaque = cbopaque; + emitter->item_at_depth = false; + emitter->nesting_depth = 0; +} + +/* Internal convenience function. Write to the emitter the given string. */ +JEMALLOC_FORMAT_PRINTF(2, 3) +static inline void +emitter_printf(emitter_t *emitter, const char *format, ...) { + va_list ap; + + va_start(ap, format); + malloc_vcprintf(emitter->write_cb, emitter->cbopaque, format, ap); + va_end(ap); +} + +/* Write to the emitter the given string, but only in table mode. */ +JEMALLOC_FORMAT_PRINTF(2, 3) +static inline void +emitter_table_printf(emitter_t *emitter, const char *format, ...) { + if (emitter->output == emitter_output_table) { + va_list ap; + va_start(ap, format); + malloc_vcprintf(emitter->write_cb, emitter->cbopaque, format, ap); + va_end(ap); + } +} + +static inline void +emitter_gen_fmt(char *out_fmt, size_t out_size, const char *fmt_specifier, + emitter_justify_t justify, int width) { + size_t written; + if (justify == emitter_justify_none) { + written = malloc_snprintf(out_fmt, out_size, + "%%%s", fmt_specifier); + } else if (justify == emitter_justify_left) { + written = malloc_snprintf(out_fmt, out_size, + "%%-%d%s", width, fmt_specifier); + } else { + written = malloc_snprintf(out_fmt, out_size, + "%%%d%s", width, fmt_specifier); + } + /* Only happens in case of bad format string, which *we* choose. */ + assert(written < out_size); +} + +/* + * Internal. Emit the given value type in the relevant encoding (so that the + * bool true gets mapped to json "true", but the string "true" gets mapped to + * json "\"true\"", for instance. + * + * Width is ignored if justify is emitter_justify_none. + */ +static inline void +emitter_print_value(emitter_t *emitter, emitter_justify_t justify, int width, + emitter_type_t value_type, const void *value) { + size_t str_written; +#define BUF_SIZE 256 +#define FMT_SIZE 10 + /* + * We dynamically generate a format string to emit, to let us use the + * snprintf machinery. This is kinda hacky, but gets the job done + * quickly without having to think about the various snprintf edge + * cases. + */ + char fmt[FMT_SIZE]; + char buf[BUF_SIZE]; + +#define EMIT_SIMPLE(type, format) \ + emitter_gen_fmt(fmt, FMT_SIZE, format, justify, width); \ + emitter_printf(emitter, fmt, *(const type *)value); \ + + switch (value_type) { + case emitter_type_bool: + emitter_gen_fmt(fmt, FMT_SIZE, "s", justify, width); + emitter_printf(emitter, fmt, *(const bool *)value ? + "true" : "false"); + break; + case emitter_type_int: + EMIT_SIMPLE(int, "d") + break; + case emitter_type_unsigned: + EMIT_SIMPLE(unsigned, "u") + break; + case emitter_type_ssize: + EMIT_SIMPLE(ssize_t, "zd") + break; + case emitter_type_size: + EMIT_SIMPLE(size_t, "zu") + break; + case emitter_type_string: + str_written = malloc_snprintf(buf, BUF_SIZE, "\"%s\"", + *(const char *const *)value); + /* + * We control the strings we output; we shouldn't get anything + * anywhere near the fmt size. + */ + assert(str_written < BUF_SIZE); + emitter_gen_fmt(fmt, FMT_SIZE, "s", justify, width); + emitter_printf(emitter, fmt, buf); + break; + case emitter_type_uint32: + EMIT_SIMPLE(uint32_t, FMTu32) + break; + case emitter_type_uint64: + EMIT_SIMPLE(uint64_t, FMTu64) + break; + case emitter_type_title: + EMIT_SIMPLE(char *const, "s"); + break; + default: + unreachable(); + } +#undef BUF_SIZE +#undef FMT_SIZE +} + + +/* Internal functions. In json mode, tracks nesting state. */ +static inline void +emitter_nest_inc(emitter_t *emitter) { + emitter->nesting_depth++; + emitter->item_at_depth = false; +} + +static inline void +emitter_nest_dec(emitter_t *emitter) { + emitter->nesting_depth--; + emitter->item_at_depth = true; +} + +static inline void +emitter_indent(emitter_t *emitter) { + int amount = emitter->nesting_depth; + const char *indent_str; + if (emitter->output == emitter_output_json) { + indent_str = "\t"; + } else { + amount *= 2; + indent_str = " "; + } + for (int i = 0; i < amount; i++) { + emitter_printf(emitter, "%s", indent_str); + } +} + +static inline void +emitter_json_key_prefix(emitter_t *emitter) { + emitter_printf(emitter, "%s\n", emitter->item_at_depth ? "," : ""); + emitter_indent(emitter); +} + +static inline void +emitter_begin(emitter_t *emitter) { + if (emitter->output == emitter_output_json) { + assert(emitter->nesting_depth == 0); + emitter_printf(emitter, "{"); + emitter_nest_inc(emitter); + } else { + // tabular init + emitter_printf(emitter, "%s", ""); + } +} + +static inline void +emitter_end(emitter_t *emitter) { + if (emitter->output == emitter_output_json) { + assert(emitter->nesting_depth == 1); + emitter_nest_dec(emitter); + emitter_printf(emitter, "\n}\n"); + } +} + +/* + * Note emits a different kv pair as well, but only in table mode. Omits the + * note if table_note_key is NULL. + */ +static inline void +emitter_kv_note(emitter_t *emitter, const char *json_key, const char *table_key, + emitter_type_t value_type, const void *value, + const char *table_note_key, emitter_type_t table_note_value_type, + const void *table_note_value) { + if (emitter->output == emitter_output_json) { + assert(emitter->nesting_depth > 0); + emitter_json_key_prefix(emitter); + emitter_printf(emitter, "\"%s\": ", json_key); + emitter_print_value(emitter, emitter_justify_none, -1, + value_type, value); + } else { + emitter_indent(emitter); + emitter_printf(emitter, "%s: ", table_key); + emitter_print_value(emitter, emitter_justify_none, -1, + value_type, value); + if (table_note_key != NULL) { + emitter_printf(emitter, " (%s: ", table_note_key); + emitter_print_value(emitter, emitter_justify_none, -1, + table_note_value_type, table_note_value); + emitter_printf(emitter, ")"); + } + emitter_printf(emitter, "\n"); + } + emitter->item_at_depth = true; +} + +static inline void +emitter_kv(emitter_t *emitter, const char *json_key, const char *table_key, + emitter_type_t value_type, const void *value) { + emitter_kv_note(emitter, json_key, table_key, value_type, value, NULL, + emitter_type_bool, NULL); +} + +static inline void +emitter_json_kv(emitter_t *emitter, const char *json_key, + emitter_type_t value_type, const void *value) { + if (emitter->output == emitter_output_json) { + emitter_kv(emitter, json_key, NULL, value_type, value); + } +} + +static inline void +emitter_table_kv(emitter_t *emitter, const char *table_key, + emitter_type_t value_type, const void *value) { + if (emitter->output == emitter_output_table) { + emitter_kv(emitter, NULL, table_key, value_type, value); + } +} + +static inline void +emitter_dict_begin(emitter_t *emitter, const char *json_key, + const char *table_header) { + if (emitter->output == emitter_output_json) { + emitter_json_key_prefix(emitter); + emitter_printf(emitter, "\"%s\": {", json_key); + emitter_nest_inc(emitter); + } else { + emitter_indent(emitter); + emitter_printf(emitter, "%s\n", table_header); + emitter_nest_inc(emitter); + } +} + +static inline void +emitter_dict_end(emitter_t *emitter) { + if (emitter->output == emitter_output_json) { + assert(emitter->nesting_depth > 0); + emitter_nest_dec(emitter); + emitter_printf(emitter, "\n"); + emitter_indent(emitter); + emitter_printf(emitter, "}"); + } else { + emitter_nest_dec(emitter); + } +} + +static inline void +emitter_json_dict_begin(emitter_t *emitter, const char *json_key) { + if (emitter->output == emitter_output_json) { + emitter_dict_begin(emitter, json_key, NULL); + } +} + +static inline void +emitter_json_dict_end(emitter_t *emitter) { + if (emitter->output == emitter_output_json) { + emitter_dict_end(emitter); + } +} + +static inline void +emitter_table_dict_begin(emitter_t *emitter, const char *table_key) { + if (emitter->output == emitter_output_table) { + emitter_dict_begin(emitter, NULL, table_key); + } +} + +static inline void +emitter_table_dict_end(emitter_t *emitter) { + if (emitter->output == emitter_output_table) { + emitter_dict_end(emitter); + } +} + +static inline void +emitter_json_arr_begin(emitter_t *emitter, const char *json_key) { + if (emitter->output == emitter_output_json) { + emitter_json_key_prefix(emitter); + emitter_printf(emitter, "\"%s\": [", json_key); + emitter_nest_inc(emitter); + } +} + +static inline void +emitter_json_arr_end(emitter_t *emitter) { + if (emitter->output == emitter_output_json) { + assert(emitter->nesting_depth > 0); + emitter_nest_dec(emitter); + emitter_printf(emitter, "\n"); + emitter_indent(emitter); + emitter_printf(emitter, "]"); + } +} + +static inline void +emitter_json_arr_obj_begin(emitter_t *emitter) { + if (emitter->output == emitter_output_json) { + emitter_json_key_prefix(emitter); + emitter_printf(emitter, "{"); + emitter_nest_inc(emitter); + } +} + +static inline void +emitter_json_arr_obj_end(emitter_t *emitter) { + if (emitter->output == emitter_output_json) { + assert(emitter->nesting_depth > 0); + emitter_nest_dec(emitter); + emitter_printf(emitter, "\n"); + emitter_indent(emitter); + emitter_printf(emitter, "}"); + } +} + +static inline void +emitter_json_arr_value(emitter_t *emitter, emitter_type_t value_type, + const void *value) { + if (emitter->output == emitter_output_json) { + emitter_json_key_prefix(emitter); + emitter_print_value(emitter, emitter_justify_none, -1, + value_type, value); + } +} + +static inline void +emitter_table_row(emitter_t *emitter, emitter_row_t *row) { + if (emitter->output != emitter_output_table) { + return; + } + emitter_col_t *col; + ql_foreach(col, &row->cols, link) { + emitter_print_value(emitter, col->justify, col->width, + col->type, (const void *)&col->bool_val); + } + emitter_table_printf(emitter, "\n"); +} + +#endif /* JEMALLOC_INTERNAL_EMITTER_H */ diff --git a/contrib/jemalloc/include/jemalloc/internal/extent_externs.h b/contrib/jemalloc/include/jemalloc/internal/extent_externs.h index 489a813c80d..b8a4d026ceb 100644 --- a/contrib/jemalloc/include/jemalloc/internal/extent_externs.h +++ b/contrib/jemalloc/include/jemalloc/internal/extent_externs.h @@ -4,12 +4,13 @@ #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mutex_pool.h" #include "jemalloc/internal/ph.h" -#include "jemalloc/internal/rb.h" #include "jemalloc/internal/rtree.h" -extern rtree_t extents_rtree; -extern const extent_hooks_t extent_hooks_default; -extern mutex_pool_t extent_mutex_pool; +extern size_t opt_lg_extent_max_active_fit; + +extern rtree_t extents_rtree; +extern const extent_hooks_t extent_hooks_default; +extern mutex_pool_t extent_mutex_pool; extent_t *extent_alloc(tsdn_t *tsdn, arena_t *arena); void extent_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *extent); diff --git a/contrib/jemalloc/include/jemalloc/internal/extent_inlines.h b/contrib/jemalloc/include/jemalloc/internal/extent_inlines.h index bb2bd699ed2..77181df8d24 100644 --- a/contrib/jemalloc/include/jemalloc/internal/extent_inlines.h +++ b/contrib/jemalloc/include/jemalloc/internal/extent_inlines.h @@ -93,6 +93,12 @@ extent_committed_get(const extent_t *extent) { EXTENT_BITS_COMMITTED_SHIFT); } +static inline bool +extent_dumpable_get(const extent_t *extent) { + return (bool)((extent->e_bits & EXTENT_BITS_DUMPABLE_MASK) >> + EXTENT_BITS_DUMPABLE_SHIFT); +} + static inline bool extent_slab_get(const extent_t *extent) { return (bool)((extent->e_bits & EXTENT_BITS_SLAB_MASK) >> @@ -184,15 +190,22 @@ extent_addr_set(extent_t *extent, void *addr) { } static inline void -extent_addr_randomize(tsdn_t *tsdn, extent_t *extent, size_t alignment) { +extent_addr_randomize(UNUSED tsdn_t *tsdn, extent_t *extent, size_t alignment) { assert(extent_base_get(extent) == extent_addr_get(extent)); if (alignment < PAGE) { unsigned lg_range = LG_PAGE - lg_floor(CACHELINE_CEILING(alignment)); - size_t r = - prng_lg_range_zu(&extent_arena_get(extent)->offset_state, - lg_range, true); + size_t r; + if (!tsdn_null(tsdn)) { + tsd_t *tsd = tsdn_tsd(tsdn); + r = (size_t)prng_lg_range_u64( + tsd_offset_statep_get(tsd), lg_range); + } else { + r = prng_lg_range_zu( + &extent_arena_get(extent)->offset_state, + lg_range, true); + } uintptr_t random_offset = ((uintptr_t)r) << (LG_PAGE - lg_range); extent->e_addr = (void *)((uintptr_t)extent->e_addr + @@ -269,6 +282,12 @@ extent_committed_set(extent_t *extent, bool committed) { ((uint64_t)committed << EXTENT_BITS_COMMITTED_SHIFT); } +static inline void +extent_dumpable_set(extent_t *extent, bool dumpable) { + extent->e_bits = (extent->e_bits & ~EXTENT_BITS_DUMPABLE_MASK) | + ((uint64_t)dumpable << EXTENT_BITS_DUMPABLE_SHIFT); +} + static inline void extent_slab_set(extent_t *extent, bool slab) { extent->e_bits = (extent->e_bits & ~EXTENT_BITS_SLAB_MASK) | @@ -283,7 +302,7 @@ extent_prof_tctx_set(extent_t *extent, prof_tctx_t *tctx) { static inline void extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size, bool slab, szind_t szind, size_t sn, extent_state_t state, bool zeroed, - bool committed) { + bool committed, bool dumpable) { assert(addr == PAGE_ADDR2BASE(addr) || !slab); extent_arena_set(extent, arena); @@ -295,6 +314,7 @@ extent_init(extent_t *extent, arena_t *arena, void *addr, size_t size, extent_state_set(extent, state); extent_zeroed_set(extent, zeroed); extent_committed_set(extent, committed); + extent_dumpable_set(extent, dumpable); ql_elm_new(extent, ql_link); if (config_prof) { extent_prof_tctx_set(extent, NULL); @@ -312,6 +332,7 @@ extent_binit(extent_t *extent, void *addr, size_t bsize, size_t sn) { extent_state_set(extent, extent_state_active); extent_zeroed_set(extent, true); extent_committed_set(extent, true); + extent_dumpable_set(extent, true); } static inline void @@ -334,6 +355,11 @@ extent_list_append(extent_list_t *list, extent_t *extent) { ql_tail_insert(list, extent, ql_link); } +static inline void +extent_list_prepend(extent_list_t *list, extent_t *extent) { + ql_head_insert(list, extent, ql_link); +} + static inline void extent_list_replace(extent_list_t *list, extent_t *to_remove, extent_t *to_insert) { diff --git a/contrib/jemalloc/include/jemalloc/internal/extent_structs.h b/contrib/jemalloc/include/jemalloc/internal/extent_structs.h index d2979503458..4873b9e9e49 100644 --- a/contrib/jemalloc/include/jemalloc/internal/extent_structs.h +++ b/contrib/jemalloc/include/jemalloc/internal/extent_structs.h @@ -5,7 +5,6 @@ #include "jemalloc/internal/bitmap.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/ql.h" -#include "jemalloc/internal/rb.h" #include "jemalloc/internal/ph.h" #include "jemalloc/internal/size_classes.h" @@ -24,13 +23,14 @@ struct extent_s { * a: arena_ind * b: slab * c: committed + * d: dumpable * z: zeroed * t: state * i: szind * f: nfree * n: sn * - * nnnnnnnn ... nnnnnfff fffffffi iiiiiiit tzcbaaaa aaaaaaaa + * nnnnnnnn ... nnnnffff ffffffii iiiiiitt zdcbaaaa aaaaaaaa * * arena_ind: Arena from which this extent came, or all 1 bits if * unassociated. @@ -45,6 +45,23 @@ struct extent_s { * as on a system that overcommits and satisfies physical * memory needs on demand via soft page faults. * + * dumpable: The dumpable flag indicates whether or not we've set the + * memory in question to be dumpable. Note that this + * interacts somewhat subtly with user-specified extent hooks, + * since we don't know if *they* are fiddling with + * dumpability (in which case, we don't want to undo whatever + * they're doing). To deal with this scenario, we: + * - Make dumpable false only for memory allocated with the + * default hooks. + * - Only allow memory to go from non-dumpable to dumpable, + * and only once. + * - Never make the OS call to allow dumping when the + * dumpable bit is already set. + * These three constraints mean that we will never + * accidentally dump user memory that the user meant to set + * nondumpable with their extent hooks. + * + * * zeroed: The zeroed flag is used by extent recycling code to track * whether memory is zero-filled. * @@ -69,38 +86,42 @@ struct extent_s { * serial number to both resulting adjacent extents. */ uint64_t e_bits; -#define EXTENT_BITS_ARENA_SHIFT 0 -#define EXTENT_BITS_ARENA_MASK \ - (((uint64_t)(1U << MALLOCX_ARENA_BITS) - 1) << EXTENT_BITS_ARENA_SHIFT) +#define MASK(CURRENT_FIELD_WIDTH, CURRENT_FIELD_SHIFT) ((((((uint64_t)0x1U) << (CURRENT_FIELD_WIDTH)) - 1)) << (CURRENT_FIELD_SHIFT)) + +#define EXTENT_BITS_ARENA_WIDTH MALLOCX_ARENA_BITS +#define EXTENT_BITS_ARENA_SHIFT 0 +#define EXTENT_BITS_ARENA_MASK MASK(EXTENT_BITS_ARENA_WIDTH, EXTENT_BITS_ARENA_SHIFT) -#define EXTENT_BITS_SLAB_SHIFT MALLOCX_ARENA_BITS -#define EXTENT_BITS_SLAB_MASK \ - ((uint64_t)0x1U << EXTENT_BITS_SLAB_SHIFT) +#define EXTENT_BITS_SLAB_WIDTH 1 +#define EXTENT_BITS_SLAB_SHIFT (EXTENT_BITS_ARENA_WIDTH + EXTENT_BITS_ARENA_SHIFT) +#define EXTENT_BITS_SLAB_MASK MASK(EXTENT_BITS_SLAB_WIDTH, EXTENT_BITS_SLAB_SHIFT) -#define EXTENT_BITS_COMMITTED_SHIFT (MALLOCX_ARENA_BITS + 1) -#define EXTENT_BITS_COMMITTED_MASK \ - ((uint64_t)0x1U << EXTENT_BITS_COMMITTED_SHIFT) +#define EXTENT_BITS_COMMITTED_WIDTH 1 +#define EXTENT_BITS_COMMITTED_SHIFT (EXTENT_BITS_SLAB_WIDTH + EXTENT_BITS_SLAB_SHIFT) +#define EXTENT_BITS_COMMITTED_MASK MASK(EXTENT_BITS_COMMITTED_WIDTH, EXTENT_BITS_COMMITTED_SHIFT) -#define EXTENT_BITS_ZEROED_SHIFT (MALLOCX_ARENA_BITS + 2) -#define EXTENT_BITS_ZEROED_MASK \ - ((uint64_t)0x1U << EXTENT_BITS_ZEROED_SHIFT) +#define EXTENT_BITS_DUMPABLE_WIDTH 1 +#define EXTENT_BITS_DUMPABLE_SHIFT (EXTENT_BITS_COMMITTED_WIDTH + EXTENT_BITS_COMMITTED_SHIFT) +#define EXTENT_BITS_DUMPABLE_MASK MASK(EXTENT_BITS_DUMPABLE_WIDTH, EXTENT_BITS_DUMPABLE_SHIFT) -#define EXTENT_BITS_STATE_SHIFT (MALLOCX_ARENA_BITS + 3) -#define EXTENT_BITS_STATE_MASK \ - ((uint64_t)0x3U << EXTENT_BITS_STATE_SHIFT) +#define EXTENT_BITS_ZEROED_WIDTH 1 +#define EXTENT_BITS_ZEROED_SHIFT (EXTENT_BITS_DUMPABLE_WIDTH + EXTENT_BITS_DUMPABLE_SHIFT) +#define EXTENT_BITS_ZEROED_MASK MASK(EXTENT_BITS_ZEROED_WIDTH, EXTENT_BITS_ZEROED_SHIFT) -#define EXTENT_BITS_SZIND_SHIFT (MALLOCX_ARENA_BITS + 5) -#define EXTENT_BITS_SZIND_MASK \ - (((uint64_t)(1U << LG_CEIL_NSIZES) - 1) << EXTENT_BITS_SZIND_SHIFT) +#define EXTENT_BITS_STATE_WIDTH 2 +#define EXTENT_BITS_STATE_SHIFT (EXTENT_BITS_ZEROED_WIDTH + EXTENT_BITS_ZEROED_SHIFT) +#define EXTENT_BITS_STATE_MASK MASK(EXTENT_BITS_STATE_WIDTH, EXTENT_BITS_STATE_SHIFT) -#define EXTENT_BITS_NFREE_SHIFT \ - (MALLOCX_ARENA_BITS + 5 + LG_CEIL_NSIZES) -#define EXTENT_BITS_NFREE_MASK \ - ((uint64_t)((1U << (LG_SLAB_MAXREGS + 1)) - 1) << EXTENT_BITS_NFREE_SHIFT) +#define EXTENT_BITS_SZIND_WIDTH LG_CEIL_NSIZES +#define EXTENT_BITS_SZIND_SHIFT (EXTENT_BITS_STATE_WIDTH + EXTENT_BITS_STATE_SHIFT) +#define EXTENT_BITS_SZIND_MASK MASK(EXTENT_BITS_SZIND_WIDTH, EXTENT_BITS_SZIND_SHIFT) -#define EXTENT_BITS_SN_SHIFT \ - (MALLOCX_ARENA_BITS + 5 + LG_CEIL_NSIZES + (LG_SLAB_MAXREGS + 1)) -#define EXTENT_BITS_SN_MASK (UINT64_MAX << EXTENT_BITS_SN_SHIFT) +#define EXTENT_BITS_NFREE_WIDTH (LG_SLAB_MAXREGS + 1) +#define EXTENT_BITS_NFREE_SHIFT (EXTENT_BITS_SZIND_WIDTH + EXTENT_BITS_SZIND_SHIFT) +#define EXTENT_BITS_NFREE_MASK MASK(EXTENT_BITS_NFREE_WIDTH, EXTENT_BITS_NFREE_SHIFT) + +#define EXTENT_BITS_SN_SHIFT (EXTENT_BITS_NFREE_WIDTH + EXTENT_BITS_NFREE_SHIFT) +#define EXTENT_BITS_SN_MASK (UINT64_MAX << EXTENT_BITS_SN_SHIFT) /* Pointer to the extent that this structure is responsible for. */ void *e_addr; @@ -120,20 +141,19 @@ struct extent_s { size_t e_bsize; }; - union { - /* - * List linkage, used by a variety of lists: - * - arena_bin_t's slabs_full - * - extents_t's LRU - * - stashed dirty extents - * - arena's large allocations - */ - ql_elm(extent_t) ql_link; - /* Red-black tree linkage, used by arena's extent_avail. */ - rb_node(extent_t) rb_link; - }; + /* + * List linkage, used by a variety of lists: + * - bin_t's slabs_full + * - extents_t's LRU + * - stashed dirty extents + * - arena's large allocations + */ + ql_elm(extent_t) ql_link; - /* Linkage for per size class sn/address-ordered heaps. */ + /* + * Linkage for per size class sn/address-ordered heaps, and + * for extent_avail + */ phn(extent_t) ph_link; union { @@ -148,7 +168,7 @@ struct extent_s { }; }; typedef ql_head(extent_t) extent_list_t; -typedef rb_tree(extent_t) extent_tree_t; +typedef ph(extent_t) extent_tree_t; typedef ph(extent_t) extent_heap_t; /* Quantized collection of extents, with built-in LRU queue. */ diff --git a/contrib/jemalloc/include/jemalloc/internal/extent_types.h b/contrib/jemalloc/include/jemalloc/internal/extent_types.h index b6905ce1055..c0561d99f8f 100644 --- a/contrib/jemalloc/include/jemalloc/internal/extent_types.h +++ b/contrib/jemalloc/include/jemalloc/internal/extent_types.h @@ -6,4 +6,12 @@ typedef struct extents_s extents_t; #define EXTENT_HOOKS_INITIALIZER NULL +#define EXTENT_GROW_MAX_PIND (NPSIZES - 1) + +/* + * When reuse (and split) an active extent, (1U << opt_lg_extent_max_active_fit) + * is the max ratio between the size of the active extent and the new extent. + */ +#define LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT 6 + #endif /* JEMALLOC_INTERNAL_EXTENT_TYPES_H */ diff --git a/contrib/jemalloc/include/jemalloc/internal/hash.h b/contrib/jemalloc/include/jemalloc/internal/hash.h index 188296cf0e2..dcfc992df37 100644 --- a/contrib/jemalloc/include/jemalloc/internal/hash.h +++ b/contrib/jemalloc/include/jemalloc/internal/hash.h @@ -260,22 +260,22 @@ hash_x64_128(const void *key, const int len, const uint32_t seed, uint64_t k2 = 0; switch (len & 15) { - case 15: k2 ^= ((uint64_t)(tail[14])) << 48; - case 14: k2 ^= ((uint64_t)(tail[13])) << 40; - case 13: k2 ^= ((uint64_t)(tail[12])) << 32; - case 12: k2 ^= ((uint64_t)(tail[11])) << 24; - case 11: k2 ^= ((uint64_t)(tail[10])) << 16; - case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8; + case 15: k2 ^= ((uint64_t)(tail[14])) << 48; /* falls through */ + case 14: k2 ^= ((uint64_t)(tail[13])) << 40; /* falls through */ + case 13: k2 ^= ((uint64_t)(tail[12])) << 32; /* falls through */ + case 12: k2 ^= ((uint64_t)(tail[11])) << 24; /* falls through */ + case 11: k2 ^= ((uint64_t)(tail[10])) << 16; /* falls through */ + case 10: k2 ^= ((uint64_t)(tail[ 9])) << 8; /* falls through */ case 9: k2 ^= ((uint64_t)(tail[ 8])) << 0; k2 *= c2; k2 = hash_rotl_64(k2, 33); k2 *= c1; h2 ^= k2; - - case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56; - case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48; - case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40; - case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32; - case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24; - case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16; - case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8; + /* falls through */ + case 8: k1 ^= ((uint64_t)(tail[ 7])) << 56; /* falls through */ + case 7: k1 ^= ((uint64_t)(tail[ 6])) << 48; /* falls through */ + case 6: k1 ^= ((uint64_t)(tail[ 5])) << 40; /* falls through */ + case 5: k1 ^= ((uint64_t)(tail[ 4])) << 32; /* falls through */ + case 4: k1 ^= ((uint64_t)(tail[ 3])) << 24; /* falls through */ + case 3: k1 ^= ((uint64_t)(tail[ 2])) << 16; /* falls through */ + case 2: k1 ^= ((uint64_t)(tail[ 1])) << 8; /* falls through */ case 1: k1 ^= ((uint64_t)(tail[ 0])) << 0; k1 *= c1; k1 = hash_rotl_64(k1, 31); k1 *= c2; h1 ^= k1; } diff --git a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h index b24eb54d8fb..84cd70daa22 100644 --- a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h +++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_decls.h @@ -8,7 +8,16 @@ #ifdef _WIN32 # include # include "msvc_compat/windows_extra.h" - +# ifdef _WIN64 +# if LG_VADDR <= 32 +# error Generate the headers using x64 vcargs +# endif +# else +# if LG_VADDR > 32 +# undef LG_VADDR +# define LG_VADDR 32 +# endif +# endif #else # include # include diff --git a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h index a2dd13390de..94a7db7bb2f 100644 --- a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h +++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_defs.h @@ -34,6 +34,8 @@ * order to yield to another virtual CPU. */ #define CPU_SPINWAIT __asm__ volatile("pause") +/* 1 if CPU_SPINWAIT is defined, 0 otherwise. */ +#define HAVE_CPU_SPINWAIT 1 /* * Number of significant bits in virtual addresses. This may be less than the @@ -238,6 +240,12 @@ */ #define JEMALLOC_CACHE_OBLIVIOUS +/* + * If defined, enable logging facilities. We make this a configure option to + * avoid taking extra branches everywhere. + */ +/* #undef JEMALLOC_LOG */ + /* * Darwin (OS X) uses zones to work around Mach-O symbol override shortcomings. */ @@ -255,6 +263,12 @@ /* Defined if madvise(2) is available. */ #define JEMALLOC_HAVE_MADVISE +/* + * Defined if transparent huge pages are supported via the MADV_[NO]HUGEPAGE + * arguments to madvise(2). + */ +/* #undef JEMALLOC_HAVE_MADVISE_HUGE */ + /* * Methods for purging unused pages differ between operating systems. * @@ -272,6 +286,14 @@ #define JEMALLOC_PURGE_MADVISE_DONTNEED /* #undef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS */ +/* Defined if madvise(2) is available but MADV_FREE is not (x86 Linux only). */ +/* #undef JEMALLOC_DEFINE_MADVISE_FREE */ + +/* + * Defined if MADV_DO[NT]DUMP is supported as an argument to madvise. + */ +/* #undef JEMALLOC_MADVISE_DONTDUMP */ + /* * Defined if transparent huge pages (THPs) are supported via the * MADV_[NO]HUGEPAGE arguments to madvise(2), and THP support is enabled. @@ -337,4 +359,9 @@ /* If defined, jemalloc takes the malloc/free/etc. symbol names. */ #define JEMALLOC_IS_MALLOC 1 +/* + * Defined if strerror_r returns char * if _GNU_SOURCE is defined. + */ +/* #undef JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE */ + #endif /* JEMALLOC_INTERNAL_DEFS_H_ */ diff --git a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h index 24ea416297f..c6a1f7eb2ca 100644 --- a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h +++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_a.h @@ -106,16 +106,16 @@ decay_ticker_get(tsd_t *tsd, unsigned ind) { return &tdata->decay_ticker; } -JEMALLOC_ALWAYS_INLINE tcache_bin_t * +JEMALLOC_ALWAYS_INLINE cache_bin_t * tcache_small_bin_get(tcache_t *tcache, szind_t binind) { assert(binind < NBINS); - return &tcache->tbins_small[binind]; + return &tcache->bins_small[binind]; } -JEMALLOC_ALWAYS_INLINE tcache_bin_t * +JEMALLOC_ALWAYS_INLINE cache_bin_t * tcache_large_bin_get(tcache_t *tcache, szind_t binind) { assert(binind >= NBINS &&binind < nhbins); - return &tcache->tbins_large[binind - NBINS]; + return &tcache->bins_large[binind - NBINS]; } JEMALLOC_ALWAYS_INLINE bool @@ -151,6 +151,7 @@ pre_reentrancy(tsd_t *tsd, arena_t *arena) { assert(arena != arena_get(tsd_tsdn(tsd), 0, false)); bool fast = tsd_fast(tsd); + assert(tsd_reentrancy_level_get(tsd) < INT8_MAX); ++*tsd_reentrancy_levelp_get(tsd); if (fast) { /* Prepare slow path for reentrancy. */ diff --git a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h index 43611ca0840..c829ac60cbe 100644 --- a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h +++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_inlines_c.h @@ -5,6 +5,24 @@ #include "jemalloc/internal/sz.h" #include "jemalloc/internal/witness.h" +/* + * Translating the names of the 'i' functions: + * Abbreviations used in the first part of the function name (before + * alloc/dalloc) describe what that function accomplishes: + * a: arena (query) + * s: size (query, or sized deallocation) + * e: extent (query) + * p: aligned (allocates) + * vs: size (query, without knowing that the pointer is into the heap) + * r: rallocx implementation + * x: xallocx implementation + * Abbreviations used in the second part of the function name (after + * alloc/dalloc) describe the arguments it takes + * z: whether to return zeroed memory + * t: accepts a tcache_t * parameter + * m: accepts an arena_t * parameter + */ + JEMALLOC_ALWAYS_INLINE arena_t * iaalloc(tsdn_t *tsdn, const void *ptr) { assert(ptr != NULL); @@ -27,8 +45,10 @@ iallocztm(tsdn_t *tsdn, size_t size, szind_t ind, bool zero, tcache_t *tcache, assert(size != 0); assert(!is_internal || tcache == NULL); assert(!is_internal || arena == NULL || arena_is_auto(arena)); - witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), - WITNESS_RANK_CORE, 0); + if (!tsdn_null(tsdn) && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) == 0) { + witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), + WITNESS_RANK_CORE, 0); + } ret = arena_malloc(tsdn, arena, size, ind, zero, tcache, slow_path); if (config_stats && is_internal && likely(ret != NULL)) { @@ -91,7 +111,7 @@ idalloctm(tsdn_t *tsdn, void *ptr, tcache_t *tcache, alloc_ctx_t *alloc_ctx, if (config_stats && is_internal) { arena_internal_sub(iaalloc(tsdn, ptr), isalloc(tsdn, ptr)); } - if (!is_internal && !tsdn_null(tsdn) && + if (!is_internal && !tsdn_null(tsdn) && tsd_reentrancy_level_get(tsdn_tsd(tsdn)) != 0) { assert(tcache == NULL); } diff --git a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h index 4571895ec37..ed75d3768e5 100644 --- a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h +++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_macros.h @@ -37,4 +37,7 @@ # define JET_MUTABLE const #endif +#define JEMALLOC_VA_ARGS_HEAD(head, ...) head +#define JEMALLOC_VA_ARGS_TAIL(head, ...) __VA_ARGS__ + #endif /* JEMALLOC_INTERNAL_MACROS_H */ diff --git a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h index c72373e86fa..1b750b122c9 100644 --- a/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h +++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_internal_types.h @@ -79,22 +79,29 @@ typedef int malloc_cpuid_t; # ifdef __hppa__ # define LG_QUANTUM 4 # endif +# ifdef __m68k__ +# define LG_QUANTUM 3 +# endif # ifdef __mips__ # define LG_QUANTUM 3 # endif +# ifdef __nios2__ +# define LG_QUANTUM 3 +# endif # ifdef __or1k__ # define LG_QUANTUM 3 # endif # ifdef __powerpc__ # define LG_QUANTUM 4 # endif -# ifdef __riscv +# if defined(__riscv) || defined(__riscv__) # define LG_QUANTUM 4 # endif # ifdef __s390__ # define LG_QUANTUM 4 # endif -# ifdef __SH4__ +# if (defined (__SH3E__) || defined(__SH4_SINGLE__) || defined(__SH4__) || \ + defined(__SH4_SINGLE_ONLY__)) # define LG_QUANTUM 4 # endif # ifdef __tile__ diff --git a/contrib/jemalloc/include/jemalloc/internal/jemalloc_preamble.h b/contrib/jemalloc/include/jemalloc/internal/jemalloc_preamble.h index 63b15a83465..02103b4bccf 100644 --- a/contrib/jemalloc/include/jemalloc/internal/jemalloc_preamble.h +++ b/contrib/jemalloc/include/jemalloc/internal/jemalloc_preamble.h @@ -50,6 +50,10 @@ #endif #include "jemalloc/internal/hooks.h" +#ifdef JEMALLOC_DEFINE_MADVISE_FREE +# define JEMALLOC_MADV_FREE 8 +#endif + static const bool config_debug = #ifdef JEMALLOC_DEBUG true @@ -64,6 +68,13 @@ static const bool have_dss = false #endif ; +static const bool have_madvise_huge = +#ifdef JEMALLOC_HAVE_MADVISE_HUGE + true +#else + false +#endif + ; static const bool config_fill = #ifdef JEMALLOC_FILL true @@ -108,13 +119,6 @@ static const bool config_stats = false #endif ; -static const bool config_thp = -#ifdef JEMALLOC_THP - true -#else - false -#endif - ; static const bool config_tls = #ifdef JEMALLOC_TLS true @@ -143,6 +147,17 @@ static const bool config_cache_oblivious = false #endif ; +/* + * Undocumented, for jemalloc development use only at the moment. See the note + * in jemalloc/internal/log.h. + */ +static const bool config_log = +#ifdef JEMALLOC_LOG + true +#else + false +#endif + ; #ifdef JEMALLOC_HAVE_SCHED_GETCPU /* Currently percpu_arena depends on sched_getcpu. */ #define JEMALLOC_PERCPU_ARENA diff --git a/contrib/jemalloc/include/jemalloc/internal/log.h b/contrib/jemalloc/include/jemalloc/internal/log.h new file mode 100644 index 00000000000..64208586354 --- /dev/null +++ b/contrib/jemalloc/include/jemalloc/internal/log.h @@ -0,0 +1,115 @@ +#ifndef JEMALLOC_INTERNAL_LOG_H +#define JEMALLOC_INTERNAL_LOG_H + +#include "jemalloc/internal/atomic.h" +#include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/mutex.h" + +#ifdef JEMALLOC_LOG +# define JEMALLOC_LOG_VAR_BUFSIZE 1000 +#else +# define JEMALLOC_LOG_VAR_BUFSIZE 1 +#endif + +#define JEMALLOC_LOG_BUFSIZE 4096 + +/* + * The log malloc_conf option is a '|'-delimited list of log_var name segments + * which should be logged. The names are themselves hierarchical, with '.' as + * the delimiter (a "segment" is just a prefix in the log namespace). So, if + * you have: + * + * log("arena", "log msg for arena"); // 1 + * log("arena.a", "log msg for arena.a"); // 2 + * log("arena.b", "log msg for arena.b"); // 3 + * log("arena.a.a", "log msg for arena.a.a"); // 4 + * log("extent.a", "log msg for extent.a"); // 5 + * log("extent.b", "log msg for extent.b"); // 6 + * + * And your malloc_conf option is "log=arena.a|extent", then lines 2, 4, 5, and + * 6 will print at runtime. You can enable logging from all log vars by + * writing "log=.". + * + * None of this should be regarded as a stable API for right now. It's intended + * as a debugging interface, to let us keep around some of our printf-debugging + * statements. + */ + +extern char log_var_names[JEMALLOC_LOG_VAR_BUFSIZE]; +extern atomic_b_t log_init_done; + +typedef struct log_var_s log_var_t; +struct log_var_s { + /* + * Lowest bit is "inited", second lowest is "enabled". Putting them in + * a single word lets us avoid any fences on weak architectures. + */ + atomic_u_t state; + const char *name; +}; + +#define LOG_NOT_INITIALIZED 0U +#define LOG_INITIALIZED_NOT_ENABLED 1U +#define LOG_ENABLED 2U + +#define LOG_VAR_INIT(name_str) {ATOMIC_INIT(LOG_NOT_INITIALIZED), name_str} + +/* + * Returns the value we should assume for state (which is not necessarily + * accurate; if logging is done before logging has finished initializing, then + * we default to doing the safe thing by logging everything). + */ +unsigned log_var_update_state(log_var_t *log_var); + +/* We factor out the metadata management to allow us to test more easily. */ +#define log_do_begin(log_var) \ +if (config_log) { \ + unsigned log_state = atomic_load_u(&(log_var).state, \ + ATOMIC_RELAXED); \ + if (unlikely(log_state == LOG_NOT_INITIALIZED)) { \ + log_state = log_var_update_state(&(log_var)); \ + assert(log_state != LOG_NOT_INITIALIZED); \ + } \ + if (log_state == LOG_ENABLED) { \ + { + /* User code executes here. */ +#define log_do_end(log_var) \ + } \ + } \ +} + +/* + * MSVC has some preprocessor bugs in its expansion of __VA_ARGS__ during + * preprocessing. To work around this, we take all potential extra arguments in + * a var-args functions. Since a varargs macro needs at least one argument in + * the "...", we accept the format string there, and require that the first + * argument in this "..." is a const char *. + */ +static inline void +log_impl_varargs(const char *name, ...) { + char buf[JEMALLOC_LOG_BUFSIZE]; + va_list ap; + + va_start(ap, name); + const char *format = va_arg(ap, const char *); + size_t dst_offset = 0; + dst_offset += malloc_snprintf(buf, JEMALLOC_LOG_BUFSIZE, "%s: ", name); + dst_offset += malloc_vsnprintf(buf + dst_offset, + JEMALLOC_LOG_BUFSIZE - dst_offset, format, ap); + dst_offset += malloc_snprintf(buf + dst_offset, + JEMALLOC_LOG_BUFSIZE - dst_offset, "\n"); + va_end(ap); + + malloc_write(buf); +} + +/* Call as log("log.var.str", "format_string %d", arg_for_format_string); */ +#define LOG(log_var_str, ...) \ +do { \ + static log_var_t log_var = LOG_VAR_INIT(log_var_str); \ + log_do_begin(log_var) \ + log_impl_varargs((log_var).name, __VA_ARGS__); \ + log_do_end(log_var) \ +} while (0) + +#endif /* JEMALLOC_INTERNAL_LOG_H */ diff --git a/contrib/jemalloc/include/jemalloc/internal/malloc_io.h b/contrib/jemalloc/include/jemalloc/internal/malloc_io.h index 47ae58ec352..bfe556b523d 100644 --- a/contrib/jemalloc/include/jemalloc/internal/malloc_io.h +++ b/contrib/jemalloc/include/jemalloc/internal/malloc_io.h @@ -53,10 +53,50 @@ size_t malloc_vsnprintf(char *str, size_t size, const char *format, va_list ap); size_t malloc_snprintf(char *str, size_t size, const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4); +/* + * The caller can set write_cb and cbopaque to null to choose to print with the + * je_malloc_message hook. + */ void malloc_vcprintf(void (*write_cb)(void *, const char *), void *cbopaque, const char *format, va_list ap); void malloc_cprintf(void (*write_cb)(void *, const char *), void *cbopaque, const char *format, ...) JEMALLOC_FORMAT_PRINTF(3, 4); void malloc_printf(const char *format, ...) JEMALLOC_FORMAT_PRINTF(1, 2); +static inline ssize_t +malloc_write_fd(int fd, const void *buf, size_t count) { +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write) + /* + * Use syscall(2) rather than write(2) when possible in order to avoid + * the possibility of memory allocation within libc. This is necessary + * on FreeBSD; most operating systems do not have this problem though. + * + * syscall() returns long or int, depending on platform, so capture the + * result in the widest plausible type to avoid compiler warnings. + */ + long result = syscall(SYS_write, fd, buf, count); +#else + ssize_t result = (ssize_t)write(fd, buf, +#ifdef _WIN32 + (unsigned int) +#endif + count); +#endif + return (ssize_t)result; +} + +static inline ssize_t +malloc_read_fd(int fd, void *buf, size_t count) { +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read) + long result = syscall(SYS_read, fd, buf, count); +#else + ssize_t result = read(fd, buf, +#ifdef _WIN32 + (unsigned int) +#endif + count); +#endif + return (ssize_t)result; +} + #endif /* JEMALLOC_INTERNAL_MALLOC_IO_H */ diff --git a/contrib/jemalloc/include/jemalloc/internal/mutex_prof.h b/contrib/jemalloc/include/jemalloc/internal/mutex_prof.h index 3358bcf5351..ce183d33529 100644 --- a/contrib/jemalloc/include/jemalloc/internal/mutex_prof.h +++ b/contrib/jemalloc/include/jemalloc/internal/mutex_prof.h @@ -35,21 +35,34 @@ typedef enum { mutex_prof_num_arena_mutexes } mutex_prof_arena_ind_t; +#define MUTEX_PROF_UINT64_COUNTERS \ + OP(num_ops, uint64_t, "n_lock_ops") \ + OP(num_wait, uint64_t, "n_waiting") \ + OP(num_spin_acq, uint64_t, "n_spin_acq") \ + OP(num_owner_switch, uint64_t, "n_owner_switch") \ + OP(total_wait_time, uint64_t, "total_wait_ns") \ + OP(max_wait_time, uint64_t, "max_wait_ns") + +#define MUTEX_PROF_UINT32_COUNTERS \ + OP(max_num_thds, uint32_t, "max_n_thds") + #define MUTEX_PROF_COUNTERS \ - OP(num_ops, uint64_t) \ - OP(num_wait, uint64_t) \ - OP(num_spin_acq, uint64_t) \ - OP(num_owner_switch, uint64_t) \ - OP(total_wait_time, uint64_t) \ - OP(max_wait_time, uint64_t) \ - OP(max_num_thds, uint32_t) + MUTEX_PROF_UINT64_COUNTERS \ + MUTEX_PROF_UINT32_COUNTERS -typedef enum { -#define OP(counter, type) mutex_counter_##counter, - MUTEX_PROF_COUNTERS +#define OP(counter, type, human) mutex_counter_##counter, + +#define COUNTER_ENUM(counter_list, t) \ + typedef enum { \ + counter_list \ + mutex_prof_num_##t##_counters \ + } mutex_prof_##t##_counter_ind_t; + +COUNTER_ENUM(MUTEX_PROF_UINT64_COUNTERS, uint64_t) +COUNTER_ENUM(MUTEX_PROF_UINT32_COUNTERS, uint32_t) + +#undef COUNTER_ENUM #undef OP - mutex_prof_num_counters -} mutex_prof_counter_ind_t; typedef struct { /* diff --git a/contrib/jemalloc/include/jemalloc/internal/pages.h b/contrib/jemalloc/include/jemalloc/internal/pages.h index 28383b7f973..7dae633afe5 100644 --- a/contrib/jemalloc/include/jemalloc/internal/pages.h +++ b/contrib/jemalloc/include/jemalloc/internal/pages.h @@ -58,6 +58,20 @@ static const bool pages_can_purge_forced = #endif ; +typedef enum { + thp_mode_default = 0, /* Do not change hugepage settings. */ + thp_mode_always = 1, /* Always set MADV_HUGEPAGE. */ + thp_mode_never = 2, /* Always set MADV_NOHUGEPAGE. */ + + thp_mode_names_limit = 3, /* Used for option processing. */ + thp_mode_not_supported = 3 /* No THP support detected. */ +} thp_mode_t; + +#define THP_MODE_DEFAULT thp_mode_default +extern thp_mode_t opt_thp; +extern thp_mode_t init_system_thp_mode; /* Initial system wide state. */ +extern const char *thp_mode_names[]; + void *pages_map(void *addr, size_t size, size_t alignment, bool *commit); void pages_unmap(void *addr, size_t size); bool pages_commit(void *addr, size_t size); @@ -66,6 +80,9 @@ bool pages_purge_lazy(void *addr, size_t size); bool pages_purge_forced(void *addr, size_t size); bool pages_huge(void *addr, size_t size); bool pages_nohuge(void *addr, size_t size); +bool pages_dontdump(void *addr, size_t size); +bool pages_dodump(void *addr, size_t size); bool pages_boot(void); +void pages_set_thp_state (void *ptr, size_t size); #endif /* JEMALLOC_INTERNAL_PAGES_EXTERNS_H */ diff --git a/contrib/jemalloc/include/jemalloc/internal/private_namespace.h b/contrib/jemalloc/include/jemalloc/internal/private_namespace.h index 00bf8da1f8f..afcc6c40034 100644 --- a/contrib/jemalloc/include/jemalloc/internal/private_namespace.h +++ b/contrib/jemalloc/include/jemalloc/internal/private_namespace.h @@ -30,7 +30,6 @@ #define opt_zero JEMALLOC_N(opt_zero) #define arena_alloc_junk_small JEMALLOC_N(arena_alloc_junk_small) #define arena_basic_stats_merge JEMALLOC_N(arena_basic_stats_merge) -#define arena_bin_info JEMALLOC_N(arena_bin_info) #define arena_boot JEMALLOC_N(arena_boot) #define arena_dalloc_bin_junked_locked JEMALLOC_N(arena_dalloc_bin_junked_locked) #define arena_dalloc_junk_small JEMALLOC_N(arena_dalloc_junk_small) @@ -74,8 +73,7 @@ #define arena_ralloc JEMALLOC_N(arena_ralloc) #define arena_ralloc_no_move JEMALLOC_N(arena_ralloc_no_move) #define arena_reset JEMALLOC_N(arena_reset) -#define arena_stats_large_nrequests_add JEMALLOC_N(arena_stats_large_nrequests_add) -#define arena_stats_mapped_add JEMALLOC_N(arena_stats_mapped_add) +#define arena_retain_grow_limit_get_set JEMALLOC_N(arena_retain_grow_limit_get_set) #define arena_stats_merge JEMALLOC_N(arena_stats_merge) #define arena_tcache_fill_small JEMALLOC_N(arena_tcache_fill_small) #define h_steps JEMALLOC_N(h_steps) @@ -99,8 +97,10 @@ #define background_threads_disable JEMALLOC_N(background_threads_disable) #define background_threads_enable JEMALLOC_N(background_threads_enable) #define can_enable_background_thread JEMALLOC_N(can_enable_background_thread) +#define max_background_threads JEMALLOC_N(max_background_threads) #define n_background_threads JEMALLOC_N(n_background_threads) #define opt_background_thread JEMALLOC_N(opt_background_thread) +#define opt_max_background_threads JEMALLOC_N(opt_max_background_threads) #define pthread_create_wrapper JEMALLOC_N(pthread_create_wrapper) #define b0get JEMALLOC_N(b0get) #define base_alloc JEMALLOC_N(base_alloc) @@ -114,6 +114,13 @@ #define base_postfork_parent JEMALLOC_N(base_postfork_parent) #define base_prefork JEMALLOC_N(base_prefork) #define base_stats_get JEMALLOC_N(base_stats_get) +#define metadata_thp_mode_names JEMALLOC_N(metadata_thp_mode_names) +#define opt_metadata_thp JEMALLOC_N(opt_metadata_thp) +#define bin_infos JEMALLOC_N(bin_infos) +#define bin_init JEMALLOC_N(bin_init) +#define bin_postfork_child JEMALLOC_N(bin_postfork_child) +#define bin_postfork_parent JEMALLOC_N(bin_postfork_parent) +#define bin_prefork JEMALLOC_N(bin_prefork) #define bitmap_info_init JEMALLOC_N(bitmap_info_init) #define bitmap_init JEMALLOC_N(bitmap_init) #define bitmap_size JEMALLOC_N(bitmap_size) @@ -135,27 +142,17 @@ #define ctl_postfork_child JEMALLOC_N(ctl_postfork_child) #define ctl_postfork_parent JEMALLOC_N(ctl_postfork_parent) #define ctl_prefork JEMALLOC_N(ctl_prefork) +#define div_init JEMALLOC_N(div_init) #define extent_alloc JEMALLOC_N(extent_alloc) #define extent_alloc_wrapper JEMALLOC_N(extent_alloc_wrapper) -#define extent_avail_destroy JEMALLOC_N(extent_avail_destroy) -#define extent_avail_destroy_recurse JEMALLOC_N(extent_avail_destroy_recurse) +#define extent_avail_any JEMALLOC_N(extent_avail_any) #define extent_avail_empty JEMALLOC_N(extent_avail_empty) #define extent_avail_first JEMALLOC_N(extent_avail_first) #define extent_avail_insert JEMALLOC_N(extent_avail_insert) -#define extent_avail_iter JEMALLOC_N(extent_avail_iter) -#define extent_avail_iter_recurse JEMALLOC_N(extent_avail_iter_recurse) -#define extent_avail_iter_start JEMALLOC_N(extent_avail_iter_start) -#define extent_avail_last JEMALLOC_N(extent_avail_last) #define extent_avail_new JEMALLOC_N(extent_avail_new) -#define extent_avail_next JEMALLOC_N(extent_avail_next) -#define extent_avail_nsearch JEMALLOC_N(extent_avail_nsearch) -#define extent_avail_prev JEMALLOC_N(extent_avail_prev) -#define extent_avail_psearch JEMALLOC_N(extent_avail_psearch) #define extent_avail_remove JEMALLOC_N(extent_avail_remove) -#define extent_avail_reverse_iter JEMALLOC_N(extent_avail_reverse_iter) -#define extent_avail_reverse_iter_recurse JEMALLOC_N(extent_avail_reverse_iter_recurse) -#define extent_avail_reverse_iter_start JEMALLOC_N(extent_avail_reverse_iter_start) -#define extent_avail_search JEMALLOC_N(extent_avail_search) +#define extent_avail_remove_any JEMALLOC_N(extent_avail_remove_any) +#define extent_avail_remove_first JEMALLOC_N(extent_avail_remove_first) #define extent_boot JEMALLOC_N(extent_boot) #define extent_commit_wrapper JEMALLOC_N(extent_commit_wrapper) #define extent_dalloc JEMALLOC_N(extent_dalloc) @@ -189,6 +186,7 @@ #define extents_prefork JEMALLOC_N(extents_prefork) #define extents_rtree JEMALLOC_N(extents_rtree) #define extents_state_get JEMALLOC_N(extents_state_get) +#define opt_lg_extent_max_active_fit JEMALLOC_N(opt_lg_extent_max_active_fit) #define dss_prec_names JEMALLOC_N(dss_prec_names) #define extent_alloc_dss JEMALLOC_N(extent_alloc_dss) #define extent_dss_boot JEMALLOC_N(extent_dss_boot) @@ -215,6 +213,9 @@ #define large_ralloc JEMALLOC_N(large_ralloc) #define large_ralloc_no_move JEMALLOC_N(large_ralloc_no_move) #define large_salloc JEMALLOC_N(large_salloc) +#define log_init_done JEMALLOC_N(log_init_done) +#define log_var_names JEMALLOC_N(log_var_names) +#define log_var_update_state JEMALLOC_N(log_var_update_state) #define buferror JEMALLOC_N(buferror) #define malloc_cprintf JEMALLOC_N(malloc_cprintf) #define malloc_printf JEMALLOC_N(malloc_printf) @@ -248,15 +249,21 @@ #define nstime_sec JEMALLOC_N(nstime_sec) #define nstime_subtract JEMALLOC_N(nstime_subtract) #define nstime_update JEMALLOC_N(nstime_update) +#define init_system_thp_mode JEMALLOC_N(init_system_thp_mode) +#define opt_thp JEMALLOC_N(opt_thp) #define pages_boot JEMALLOC_N(pages_boot) #define pages_commit JEMALLOC_N(pages_commit) #define pages_decommit JEMALLOC_N(pages_decommit) +#define pages_dodump JEMALLOC_N(pages_dodump) +#define pages_dontdump JEMALLOC_N(pages_dontdump) #define pages_huge JEMALLOC_N(pages_huge) #define pages_map JEMALLOC_N(pages_map) #define pages_nohuge JEMALLOC_N(pages_nohuge) #define pages_purge_forced JEMALLOC_N(pages_purge_forced) #define pages_purge_lazy JEMALLOC_N(pages_purge_lazy) +#define pages_set_thp_state JEMALLOC_N(pages_set_thp_state) #define pages_unmap JEMALLOC_N(pages_unmap) +#define thp_mode_names JEMALLOC_N(thp_mode_names) #define bt2gctx_mtx JEMALLOC_N(bt2gctx_mtx) #define bt_init JEMALLOC_N(bt_init) #define lg_prof_sample JEMALLOC_N(lg_prof_sample) @@ -318,7 +325,6 @@ #define opt_stats_print JEMALLOC_N(opt_stats_print) #define opt_stats_print_opts JEMALLOC_N(opt_stats_print_opts) #define stats_print JEMALLOC_N(stats_print) -#define spin_adaptive JEMALLOC_N(spin_adaptive) #define sz_index2size_tab JEMALLOC_N(sz_index2size_tab) #define sz_pind2sz_tab JEMALLOC_N(sz_pind2sz_tab) #define sz_size2index_tab JEMALLOC_N(sz_size2index_tab) diff --git a/contrib/jemalloc/include/jemalloc/internal/prof_inlines_a.h b/contrib/jemalloc/include/jemalloc/internal/prof_inlines_a.h index eda6839ade4..a6efb4851dc 100644 --- a/contrib/jemalloc/include/jemalloc/internal/prof_inlines_a.h +++ b/contrib/jemalloc/include/jemalloc/internal/prof_inlines_a.h @@ -69,4 +69,15 @@ prof_accum_cancel(tsdn_t *tsdn, prof_accum_t *prof_accum, size_t usize) { #endif } +JEMALLOC_ALWAYS_INLINE bool +prof_active_get_unlocked(void) { + /* + * Even if opt_prof is true, sampling can be temporarily disabled by + * setting prof_active to false. No locking is used when reading + * prof_active in the fast path, so there are no guarantees regarding + * how long it will take for all threads to notice state changes. + */ + return prof_active; +} + #endif /* JEMALLOC_INTERNAL_PROF_INLINES_A_H */ diff --git a/contrib/jemalloc/include/jemalloc/internal/prof_inlines_b.h b/contrib/jemalloc/include/jemalloc/internal/prof_inlines_b.h index d670cb7b8f8..6ff465ad7f0 100644 --- a/contrib/jemalloc/include/jemalloc/internal/prof_inlines_b.h +++ b/contrib/jemalloc/include/jemalloc/internal/prof_inlines_b.h @@ -3,17 +3,6 @@ #include "jemalloc/internal/sz.h" -JEMALLOC_ALWAYS_INLINE bool -prof_active_get_unlocked(void) { - /* - * Even if opt_prof is true, sampling can be temporarily disabled by - * setting prof_active to false. No locking is used when reading - * prof_active in the fast path, so there are no guarantees regarding - * how long it will take for all threads to notice state changes. - */ - return prof_active; -} - JEMALLOC_ALWAYS_INLINE bool prof_gdump_get_unlocked(void) { /* diff --git a/contrib/jemalloc/include/jemalloc/internal/rtree.h b/contrib/jemalloc/include/jemalloc/internal/rtree.h index b5d4db3988f..b59d33a80bc 100644 --- a/contrib/jemalloc/include/jemalloc/internal/rtree.h +++ b/contrib/jemalloc/include/jemalloc/internal/rtree.h @@ -178,9 +178,21 @@ rtree_leaf_elm_bits_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, JEMALLOC_ALWAYS_INLINE extent_t * rtree_leaf_elm_bits_extent_get(uintptr_t bits) { +# ifdef __aarch64__ + /* + * aarch64 doesn't sign extend the highest virtual address bit to set + * the higher ones. Instead, the high bits gets zeroed. + */ + uintptr_t high_bit_mask = ((uintptr_t)1 << LG_VADDR) - 1; + /* Mask off the slab bit. */ + uintptr_t low_bit_mask = ~(uintptr_t)1; + uintptr_t mask = high_bit_mask & low_bit_mask; + return (extent_t *)(bits & mask); +# else /* Restore sign-extended high bits, mask slab bit. */ return (extent_t *)((uintptr_t)((intptr_t)(bits << RTREE_NHIB) >> RTREE_NHIB) & ~((uintptr_t)0x1)); +# endif } JEMALLOC_ALWAYS_INLINE szind_t @@ -196,8 +208,8 @@ rtree_leaf_elm_bits_slab_get(uintptr_t bits) { # endif JEMALLOC_ALWAYS_INLINE extent_t * -rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, - bool dependent) { +rtree_leaf_elm_extent_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree, + rtree_leaf_elm_t *elm, bool dependent) { #ifdef RTREE_LEAF_COMPACT uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent); return rtree_leaf_elm_bits_extent_get(bits); @@ -209,8 +221,8 @@ rtree_leaf_elm_extent_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, } JEMALLOC_ALWAYS_INLINE szind_t -rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, - bool dependent) { +rtree_leaf_elm_szind_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree, + rtree_leaf_elm_t *elm, bool dependent) { #ifdef RTREE_LEAF_COMPACT uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent); return rtree_leaf_elm_bits_szind_get(bits); @@ -221,8 +233,8 @@ rtree_leaf_elm_szind_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, } JEMALLOC_ALWAYS_INLINE bool -rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, - bool dependent) { +rtree_leaf_elm_slab_read(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree, + rtree_leaf_elm_t *elm, bool dependent) { #ifdef RTREE_LEAF_COMPACT uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent); return rtree_leaf_elm_bits_slab_get(bits); @@ -233,8 +245,8 @@ rtree_leaf_elm_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, } static inline void -rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, - extent_t *extent) { +rtree_leaf_elm_extent_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree, + rtree_leaf_elm_t *elm, extent_t *extent) { #ifdef RTREE_LEAF_COMPACT uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true); uintptr_t bits = ((uintptr_t)rtree_leaf_elm_bits_szind_get(old_bits) << @@ -247,8 +259,8 @@ rtree_leaf_elm_extent_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, } static inline void -rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, - szind_t szind) { +rtree_leaf_elm_szind_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree, + rtree_leaf_elm_t *elm, szind_t szind) { assert(szind <= NSIZES); #ifdef RTREE_LEAF_COMPACT @@ -265,8 +277,8 @@ rtree_leaf_elm_szind_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, } static inline void -rtree_leaf_elm_slab_write(tsdn_t *tsdn, rtree_t *rtree, rtree_leaf_elm_t *elm, - bool slab) { +rtree_leaf_elm_slab_write(UNUSED tsdn_t *tsdn, UNUSED rtree_t *rtree, + rtree_leaf_elm_t *elm, bool slab) { #ifdef RTREE_LEAF_COMPACT uintptr_t old_bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, true); @@ -448,8 +460,14 @@ rtree_szind_slab_read(tsdn_t *tsdn, rtree_t *rtree, rtree_ctx_t *rtree_ctx, if (!dependent && elm == NULL) { return true; } +#ifdef RTREE_LEAF_COMPACT + uintptr_t bits = rtree_leaf_elm_bits_read(tsdn, rtree, elm, dependent); + *r_szind = rtree_leaf_elm_bits_szind_get(bits); + *r_slab = rtree_leaf_elm_bits_slab_get(bits); +#else *r_szind = rtree_leaf_elm_szind_read(tsdn, rtree, elm, dependent); *r_slab = rtree_leaf_elm_slab_read(tsdn, rtree, elm, dependent); +#endif return false; } diff --git a/contrib/jemalloc/include/jemalloc/internal/rtree_tsd.h b/contrib/jemalloc/include/jemalloc/internal/rtree_tsd.h index 3cdc8625487..93a75173a8d 100644 --- a/contrib/jemalloc/include/jemalloc/internal/rtree_tsd.h +++ b/contrib/jemalloc/include/jemalloc/internal/rtree_tsd.h @@ -26,7 +26,7 @@ * Zero initializer required for tsd initialization only. Proper initialization * done via rtree_ctx_data_init(). */ -#define RTREE_CTX_ZERO_INITIALIZER {{{0}}} +#define RTREE_CTX_ZERO_INITIALIZER {{{0}}, {{0}}} typedef struct rtree_leaf_elm_s rtree_leaf_elm_t; diff --git a/contrib/jemalloc/include/jemalloc/internal/spin.h b/contrib/jemalloc/include/jemalloc/internal/spin.h index aded0fcc174..22804c687f1 100644 --- a/contrib/jemalloc/include/jemalloc/internal/spin.h +++ b/contrib/jemalloc/include/jemalloc/internal/spin.h @@ -7,13 +7,23 @@ typedef struct { unsigned iteration; } spin_t; +static inline void +spin_cpu_spinwait() { +# if HAVE_CPU_SPINWAIT + CPU_SPINWAIT; +# else + volatile int x = 0; + x = x; +# endif +} + static inline void spin_adaptive(spin_t *spin) { volatile uint32_t i; if (spin->iteration < 5) { for (i = 0; i < (1U << spin->iteration); i++) { - CPU_SPINWAIT; + spin_cpu_spinwait(); } spin->iteration++; } else { diff --git a/contrib/jemalloc/include/jemalloc/internal/stats.h b/contrib/jemalloc/include/jemalloc/internal/stats.h index 1198779ab9c..852e34269ab 100644 --- a/contrib/jemalloc/include/jemalloc/internal/stats.h +++ b/contrib/jemalloc/include/jemalloc/internal/stats.h @@ -1,12 +1,6 @@ #ifndef JEMALLOC_INTERNAL_STATS_H #define JEMALLOC_INTERNAL_STATS_H -#include "jemalloc/internal/atomic.h" -#include "jemalloc/internal/mutex_prof.h" -#include "jemalloc/internal/mutex.h" -#include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/stats_tsd.h" - /* OPTION(opt, var_name, default, set_value_to) */ #define STATS_PRINT_OPTIONS \ OPTION('J', json, false, true) \ @@ -33,132 +27,4 @@ extern char opt_stats_print_opts[stats_print_tot_num_options+1]; void stats_print(void (*write_cb)(void *, const char *), void *cbopaque, const char *opts); -/* - * In those architectures that support 64-bit atomics, we use atomic updates for - * our 64-bit values. Otherwise, we use a plain uint64_t and synchronize - * externally. - */ -#ifdef JEMALLOC_ATOMIC_U64 -typedef atomic_u64_t arena_stats_u64_t; -#else -/* Must hold the arena stats mutex while reading atomically. */ -typedef uint64_t arena_stats_u64_t; -#endif - -typedef struct malloc_bin_stats_s { - /* - * Total number of allocation/deallocation requests served directly by - * the bin. Note that tcache may allocate an object, then recycle it - * many times, resulting many increments to nrequests, but only one - * each to nmalloc and ndalloc. - */ - uint64_t nmalloc; - uint64_t ndalloc; - - /* - * Number of allocation requests that correspond to the size of this - * bin. This includes requests served by tcache, though tcache only - * periodically merges into this counter. - */ - uint64_t nrequests; - - /* - * Current number of regions of this size class, including regions - * currently cached by tcache. - */ - size_t curregs; - - /* Number of tcache fills from this bin. */ - uint64_t nfills; - - /* Number of tcache flushes to this bin. */ - uint64_t nflushes; - - /* Total number of slabs created for this bin's size class. */ - uint64_t nslabs; - - /* - * Total number of slabs reused by extracting them from the slabs heap - * for this bin's size class. - */ - uint64_t reslabs; - - /* Current number of slabs in this bin. */ - size_t curslabs; - - mutex_prof_data_t mutex_data; -} malloc_bin_stats_t; - -typedef struct malloc_large_stats_s { - /* - * Total number of allocation/deallocation requests served directly by - * the arena. - */ - arena_stats_u64_t nmalloc; - arena_stats_u64_t ndalloc; - - /* - * Number of allocation requests that correspond to this size class. - * This includes requests served by tcache, though tcache only - * periodically merges into this counter. - */ - arena_stats_u64_t nrequests; /* Partially derived. */ - - /* Current number of allocations of this size class. */ - size_t curlextents; /* Derived. */ -} malloc_large_stats_t; - -typedef struct decay_stats_s { - /* Total number of purge sweeps. */ - arena_stats_u64_t npurge; - /* Total number of madvise calls made. */ - arena_stats_u64_t nmadvise; - /* Total number of pages purged. */ - arena_stats_u64_t purged; -} decay_stats_t; - -/* - * Arena stats. Note that fields marked "derived" are not directly maintained - * within the arena code; rather their values are derived during stats merge - * requests. - */ -typedef struct arena_stats_s { -#ifndef JEMALLOC_ATOMIC_U64 - malloc_mutex_t mtx; -#endif - - /* Number of bytes currently mapped, excluding retained memory. */ - atomic_zu_t mapped; /* Partially derived. */ - - /* - * Number of unused virtual memory bytes currently retained. Retained - * bytes are technically mapped (though always decommitted or purged), - * but they are excluded from the mapped statistic (above). - */ - atomic_zu_t retained; /* Derived. */ - - decay_stats_t decay_dirty; - decay_stats_t decay_muzzy; - - atomic_zu_t base; /* Derived. */ - atomic_zu_t internal; - atomic_zu_t resident; /* Derived. */ - - atomic_zu_t allocated_large; /* Derived. */ - arena_stats_u64_t nmalloc_large; /* Derived. */ - arena_stats_u64_t ndalloc_large; /* Derived. */ - arena_stats_u64_t nrequests_large; /* Derived. */ - - /* Number of bytes cached in tcache associated with this arena. */ - atomic_zu_t tcache_bytes; /* Derived. */ - - mutex_prof_data_t mutex_prof_data[mutex_prof_num_arena_mutexes]; - - /* One element for each large size class. */ - malloc_large_stats_t lstats[NSIZES - NBINS]; - - /* Arena uptime. */ - nstime_t uptime; -} arena_stats_t; - #endif /* JEMALLOC_INTERNAL_STATS_H */ diff --git a/contrib/jemalloc/include/jemalloc/internal/stats_tsd.h b/contrib/jemalloc/include/jemalloc/internal/stats_tsd.h deleted file mode 100644 index d0c3bbe4945..00000000000 --- a/contrib/jemalloc/include/jemalloc/internal/stats_tsd.h +++ /dev/null @@ -1,12 +0,0 @@ -#ifndef JEMALLOC_INTERNAL_STATS_TSD_H -#define JEMALLOC_INTERNAL_STATS_TSD_H - -typedef struct tcache_bin_stats_s { - /* - * Number of allocation requests that corresponded to the size of this - * bin. - */ - uint64_t nrequests; -} tcache_bin_stats_t; - -#endif /* JEMALLOC_INTERNAL_STATS_TSD_H */ diff --git a/contrib/jemalloc/include/jemalloc/internal/sz.h b/contrib/jemalloc/include/jemalloc/internal/sz.h index 7f640d55ad7..97946289854 100644 --- a/contrib/jemalloc/include/jemalloc/internal/sz.h +++ b/contrib/jemalloc/include/jemalloc/internal/sz.h @@ -61,7 +61,7 @@ sz_psz2ind(size_t psz) { pszind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_PAGE + 1) ? LG_PAGE : x - LG_SIZE_CLASS_GROUP - 1; - size_t delta_inverse_mask = ZD(-1) << lg_delta; + size_t delta_inverse_mask = ZU(-1) << lg_delta; pszind_t mod = ((((psz-1) & delta_inverse_mask) >> lg_delta)) & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); @@ -142,7 +142,7 @@ sz_size2index_compute(size_t size) { szind_t lg_delta = (x < LG_SIZE_CLASS_GROUP + LG_QUANTUM + 1) ? LG_QUANTUM : x - LG_SIZE_CLASS_GROUP - 1; - size_t delta_inverse_mask = ZD(-1) << lg_delta; + size_t delta_inverse_mask = ZU(-1) << lg_delta; szind_t mod = ((((size-1) & delta_inverse_mask) >> lg_delta)) & ((ZU(1) << LG_SIZE_CLASS_GROUP) - 1); diff --git a/contrib/jemalloc/include/jemalloc/internal/tcache_externs.h b/contrib/jemalloc/include/jemalloc/internal/tcache_externs.h index db3e9c7d5d1..790367bd481 100644 --- a/contrib/jemalloc/include/jemalloc/internal/tcache_externs.h +++ b/contrib/jemalloc/include/jemalloc/internal/tcache_externs.h @@ -6,7 +6,7 @@ extern bool opt_tcache; extern ssize_t opt_lg_tcache_max; -extern tcache_bin_info_t *tcache_bin_info; +extern cache_bin_info_t *tcache_bin_info; /* * Number of tcache bins. There are NBINS small-object bins, plus 0 or more @@ -30,10 +30,10 @@ extern tcaches_t *tcaches; size_t tcache_salloc(tsdn_t *tsdn, const void *ptr); void tcache_event_hard(tsd_t *tsd, tcache_t *tcache); void *tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind, bool *tcache_success); -void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, + cache_bin_t *tbin, szind_t binind, bool *tcache_success); +void tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t binind, unsigned rem); -void tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, +void tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind, unsigned rem, tcache_t *tcache); void tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena); diff --git a/contrib/jemalloc/include/jemalloc/internal/tcache_inlines.h b/contrib/jemalloc/include/jemalloc/internal/tcache_inlines.h index c55bcd2723d..0f6ab8cb50a 100644 --- a/contrib/jemalloc/include/jemalloc/internal/tcache_inlines.h +++ b/contrib/jemalloc/include/jemalloc/internal/tcache_inlines.h @@ -1,6 +1,7 @@ #ifndef JEMALLOC_INTERNAL_TCACHE_INLINES_H #define JEMALLOC_INTERNAL_TCACHE_INLINES_H +#include "jemalloc/internal/bin.h" #include "jemalloc/internal/jemalloc_internal_types.h" #include "jemalloc/internal/size_classes.h" #include "jemalloc/internal/sz.h" @@ -38,43 +39,16 @@ tcache_event(tsd_t *tsd, tcache_t *tcache) { } JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_easy(tcache_bin_t *tbin, bool *tcache_success) { +tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, + UNUSED size_t size, szind_t binind, bool zero, bool slow_path) { void *ret; - - if (unlikely(tbin->ncached == 0)) { - tbin->low_water = -1; - *tcache_success = false; - return NULL; - } - /* - * tcache_success (instead of ret) should be checked upon the return of - * this function. We avoid checking (ret == NULL) because there is - * never a null stored on the avail stack (which is unknown to the - * compiler), and eagerly checking ret would cause pipeline stall - * (waiting for the cacheline). - */ - *tcache_success = true; - ret = *(tbin->avail - tbin->ncached); - tbin->ncached--; - - if (unlikely((low_water_t)tbin->ncached < tbin->low_water)) { - tbin->low_water = tbin->ncached; - } - - return ret; -} - -JEMALLOC_ALWAYS_INLINE void * -tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, - szind_t binind, bool zero, bool slow_path) { - void *ret; - tcache_bin_t *tbin; + cache_bin_t *bin; bool tcache_success; size_t usize JEMALLOC_CC_SILENCE_INIT(0); assert(binind < NBINS); - tbin = tcache_small_bin_get(tcache, binind); - ret = tcache_alloc_easy(tbin, &tcache_success); + bin = tcache_small_bin_get(tcache, binind); + ret = cache_bin_alloc_easy(bin, &tcache_success); assert(tcache_success == (ret != NULL)); if (unlikely(!tcache_success)) { bool tcache_hard_success; @@ -84,7 +58,7 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, } ret = tcache_alloc_small_hard(tsd_tsdn(tsd), arena, tcache, - tbin, binind, &tcache_hard_success); + bin, binind, &tcache_hard_success); if (tcache_hard_success == false) { return NULL; } @@ -103,22 +77,21 @@ tcache_alloc_small(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, if (likely(!zero)) { if (slow_path && config_fill) { if (unlikely(opt_junk_alloc)) { - arena_alloc_junk_small(ret, - &arena_bin_info[binind], false); + arena_alloc_junk_small(ret, &bin_infos[binind], + false); } else if (unlikely(opt_zero)) { memset(ret, 0, usize); } } } else { if (slow_path && config_fill && unlikely(opt_junk_alloc)) { - arena_alloc_junk_small(ret, &arena_bin_info[binind], - true); + arena_alloc_junk_small(ret, &bin_infos[binind], true); } memset(ret, 0, usize); } if (config_stats) { - tbin->tstats.nrequests++; + bin->tstats.nrequests++; } if (config_prof) { tcache->prof_accumbytes += usize; @@ -131,12 +104,12 @@ JEMALLOC_ALWAYS_INLINE void * tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, szind_t binind, bool zero, bool slow_path) { void *ret; - tcache_bin_t *tbin; + cache_bin_t *bin; bool tcache_success; assert(binind >= NBINS &&binind < nhbins); - tbin = tcache_large_bin_get(tcache, binind); - ret = tcache_alloc_easy(tbin, &tcache_success); + bin = tcache_large_bin_get(tcache, binind); + ret = cache_bin_alloc_easy(bin, &tcache_success); assert(tcache_success == (ret != NULL)); if (unlikely(!tcache_success)) { /* @@ -176,7 +149,7 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, } if (config_stats) { - tbin->tstats.nrequests++; + bin->tstats.nrequests++; } if (config_prof) { tcache->prof_accumbytes += usize; @@ -190,24 +163,24 @@ tcache_alloc_large(tsd_t *tsd, arena_t *arena, tcache_t *tcache, size_t size, JEMALLOC_ALWAYS_INLINE void tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, bool slow_path) { - tcache_bin_t *tbin; - tcache_bin_info_t *tbin_info; + cache_bin_t *bin; + cache_bin_info_t *bin_info; assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= SMALL_MAXCLASS); if (slow_path && config_fill && unlikely(opt_junk_free)) { - arena_dalloc_junk_small(ptr, &arena_bin_info[binind]); + arena_dalloc_junk_small(ptr, &bin_infos[binind]); } - tbin = tcache_small_bin_get(tcache, binind); - tbin_info = &tcache_bin_info[binind]; - if (unlikely(tbin->ncached == tbin_info->ncached_max)) { - tcache_bin_flush_small(tsd, tcache, tbin, binind, - (tbin_info->ncached_max >> 1)); + bin = tcache_small_bin_get(tcache, binind); + bin_info = &tcache_bin_info[binind]; + if (unlikely(bin->ncached == bin_info->ncached_max)) { + tcache_bin_flush_small(tsd, tcache, bin, binind, + (bin_info->ncached_max >> 1)); } - assert(tbin->ncached < tbin_info->ncached_max); - tbin->ncached++; - *(tbin->avail - tbin->ncached) = ptr; + assert(bin->ncached < bin_info->ncached_max); + bin->ncached++; + *(bin->avail - bin->ncached) = ptr; tcache_event(tsd, tcache); } @@ -215,8 +188,8 @@ tcache_dalloc_small(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, JEMALLOC_ALWAYS_INLINE void tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, bool slow_path) { - tcache_bin_t *tbin; - tcache_bin_info_t *tbin_info; + cache_bin_t *bin; + cache_bin_info_t *bin_info; assert(tcache_salloc(tsd_tsdn(tsd), ptr) > SMALL_MAXCLASS); assert(tcache_salloc(tsd_tsdn(tsd), ptr) <= tcache_maxclass); @@ -225,15 +198,15 @@ tcache_dalloc_large(tsd_t *tsd, tcache_t *tcache, void *ptr, szind_t binind, large_dalloc_junk(ptr, sz_index2size(binind)); } - tbin = tcache_large_bin_get(tcache, binind); - tbin_info = &tcache_bin_info[binind]; - if (unlikely(tbin->ncached == tbin_info->ncached_max)) { - tcache_bin_flush_large(tsd, tbin, binind, - (tbin_info->ncached_max >> 1), tcache); + bin = tcache_large_bin_get(tcache, binind); + bin_info = &tcache_bin_info[binind]; + if (unlikely(bin->ncached == bin_info->ncached_max)) { + tcache_bin_flush_large(tsd, bin, binind, + (bin_info->ncached_max >> 1), tcache); } - assert(tbin->ncached < tbin_info->ncached_max); - tbin->ncached++; - *(tbin->avail - tbin->ncached) = ptr; + assert(bin->ncached < bin_info->ncached_max); + bin->ncached++; + *(bin->avail - bin->ncached) = ptr; tcache_event(tsd, tcache); } diff --git a/contrib/jemalloc/include/jemalloc/internal/tcache_structs.h b/contrib/jemalloc/include/jemalloc/internal/tcache_structs.h index 7eb516fb6b1..07b7387059f 100644 --- a/contrib/jemalloc/include/jemalloc/internal/tcache_structs.h +++ b/contrib/jemalloc/include/jemalloc/internal/tcache_structs.h @@ -3,54 +3,51 @@ #include "jemalloc/internal/ql.h" #include "jemalloc/internal/size_classes.h" -#include "jemalloc/internal/stats_tsd.h" +#include "jemalloc/internal/cache_bin.h" #include "jemalloc/internal/ticker.h" -/* - * Read-only information associated with each element of tcache_t's tbins array - * is stored separately, mainly to reduce memory usage. - */ -struct tcache_bin_info_s { - unsigned ncached_max; /* Upper limit on ncached. */ -}; - -struct tcache_bin_s { - low_water_t low_water; /* Min # cached since last GC. */ - uint32_t ncached; /* # of cached objects. */ +struct tcache_s { /* - * ncached and stats are both modified frequently. Let's keep them - * close so that they have a higher chance of being on the same - * cacheline, thus less write-backs. + * To minimize our cache-footprint, we put the frequently accessed data + * together at the start of this struct. */ - tcache_bin_stats_t tstats; + + /* Cleared after arena_prof_accum(). */ + uint64_t prof_accumbytes; + /* Drives incremental GC. */ + ticker_t gc_ticker; /* - * To make use of adjacent cacheline prefetch, the items in the avail - * stack goes to higher address for newer allocations. avail points - * just above the available space, which means that - * avail[-ncached, ... -1] are available items and the lowest item will - * be allocated first. + * The pointer stacks associated with bins follow as a contiguous array. + * During tcache initialization, the avail pointer in each element of + * tbins is initialized to point to the proper offset within this array. */ - void **avail; /* Stack of available objects. */ -}; + cache_bin_t bins_small[NBINS]; -struct tcache_s { - /* Data accessed frequently first: prof, ticker and small bins. */ - uint64_t prof_accumbytes;/* Cleared after arena_prof_accum(). */ - ticker_t gc_ticker; /* Drives incremental GC. */ /* - * The pointer stacks associated with tbins follow as a contiguous - * array. During tcache initialization, the avail pointer in each - * element of tbins is initialized to point to the proper offset within - * this array. + * This data is less hot; we can be a little less careful with our + * footprint here. */ - tcache_bin_t tbins_small[NBINS]; - /* Data accessed less often below. */ - ql_elm(tcache_t) link; /* Used for aggregating stats. */ - arena_t *arena; /* Associated arena. */ - szind_t next_gc_bin; /* Next bin to GC. */ + /* Lets us track all the tcaches in an arena. */ + ql_elm(tcache_t) link; + /* + * The descriptor lets the arena find our cache bins without seeing the + * tcache definition. This enables arenas to aggregate stats across + * tcaches without having a tcache dependency. + */ + cache_bin_array_descriptor_t cache_bin_array_descriptor; + + /* The arena this tcache is associated with. */ + arena_t *arena; + /* Next bin to GC. */ + szind_t next_gc_bin; /* For small bins, fill (ncached_max >> lg_fill_div). */ uint8_t lg_fill_div[NBINS]; - tcache_bin_t tbins_large[NSIZES-NBINS]; + /* + * We put the cache bins for large size classes at the end of the + * struct, since some of them might not get used. This might end up + * letting us avoid touching an extra page if we don't have to. + */ + cache_bin_t bins_large[NSIZES-NBINS]; }; /* Linkage for list of available (previously used) explicit tcache IDs. */ diff --git a/contrib/jemalloc/include/jemalloc/internal/tcache_types.h b/contrib/jemalloc/include/jemalloc/internal/tcache_types.h index 1155d62cb44..e49bc9d79eb 100644 --- a/contrib/jemalloc/include/jemalloc/internal/tcache_types.h +++ b/contrib/jemalloc/include/jemalloc/internal/tcache_types.h @@ -3,14 +3,9 @@ #include "jemalloc/internal/size_classes.h" -typedef struct tcache_bin_info_s tcache_bin_info_t; -typedef struct tcache_bin_s tcache_bin_t; typedef struct tcache_s tcache_t; typedef struct tcaches_s tcaches_t; -/* ncached is cast to this type for comparison. */ -typedef int32_t low_water_t; - /* * tcache pointers close to NULL are used to encode state information that is * used for two purposes: preventing thread caching on a per thread basis and diff --git a/contrib/jemalloc/include/jemalloc/internal/ticker.h b/contrib/jemalloc/include/jemalloc/internal/ticker.h index 572b96459cc..4b3604708e1 100644 --- a/contrib/jemalloc/include/jemalloc/internal/ticker.h +++ b/contrib/jemalloc/include/jemalloc/internal/ticker.h @@ -32,14 +32,42 @@ ticker_read(const ticker_t *ticker) { return ticker->tick; } +/* + * Not intended to be a public API. Unfortunately, on x86, neither gcc nor + * clang seems smart enough to turn + * ticker->tick -= nticks; + * if (unlikely(ticker->tick < 0)) { + * fixup ticker + * return true; + * } + * return false; + * into + * subq %nticks_reg, (%ticker_reg) + * js fixup ticker + * + * unless we force "fixup ticker" out of line. In that case, gcc gets it right, + * but clang now does worse than before. So, on x86 with gcc, we force it out + * of line, but otherwise let the inlining occur. Ordinarily this wouldn't be + * worth the hassle, but this is on the fast path of both malloc and free (via + * tcache_event). + */ +#if defined(__GNUC__) && !defined(__clang__) \ + && (defined(__x86_64__) || defined(__i386__)) +JEMALLOC_NOINLINE +#endif +static bool +ticker_fixup(ticker_t *ticker) { + ticker->tick = ticker->nticks; + return true; +} + static inline bool ticker_ticks(ticker_t *ticker, int32_t nticks) { - if (unlikely(ticker->tick < nticks)) { - ticker->tick = ticker->nticks; - return true; - } ticker->tick -= nticks; - return(false); + if (unlikely(ticker->tick < 0)) { + return ticker_fixup(ticker); + } + return false; } static inline bool diff --git a/contrib/jemalloc/include/jemalloc/internal/tsd.h b/contrib/jemalloc/include/jemalloc/internal/tsd.h index fc14f8cea03..f03eee61af4 100644 --- a/contrib/jemalloc/include/jemalloc/internal/tsd.h +++ b/contrib/jemalloc/include/jemalloc/internal/tsd.h @@ -65,6 +65,7 @@ typedef void (*test_callback_t)(int *); O(arenas_tdata_bypass, bool, bool) \ O(reentrancy_level, int8_t, int8_t) \ O(narenas_tdata, uint32_t, uint32_t) \ + O(offset_state, uint64_t, uint64_t) \ O(thread_allocated, uint64_t, uint64_t) \ O(thread_deallocated, uint64_t, uint64_t) \ O(prof_tdata, prof_tdata_t *, prof_tdata_t *) \ @@ -84,6 +85,7 @@ typedef void (*test_callback_t)(int *); 0, \ 0, \ 0, \ + 0, \ NULL, \ RTREE_CTX_ZERO_INITIALIZER, \ NULL, \ diff --git a/contrib/jemalloc/include/jemalloc/internal/tsd_tls.h b/contrib/jemalloc/include/jemalloc/internal/tsd_tls.h index 757aaa0eeff..0de64b7b8bf 100644 --- a/contrib/jemalloc/include/jemalloc/internal/tsd_tls.h +++ b/contrib/jemalloc/include/jemalloc/internal/tsd_tls.h @@ -39,7 +39,7 @@ tsd_get_allocates(void) { /* Get/set. */ JEMALLOC_ALWAYS_INLINE tsd_t * -tsd_get(bool init) { +tsd_get(UNUSED bool init) { assert(tsd_booted); return &tsd_tls; } diff --git a/contrib/jemalloc/include/jemalloc/internal/witness.h b/contrib/jemalloc/include/jemalloc/internal/witness.h index 33be6661071..7ace8ae4a11 100644 --- a/contrib/jemalloc/include/jemalloc/internal/witness.h +++ b/contrib/jemalloc/include/jemalloc/internal/witness.h @@ -51,7 +51,7 @@ #define WITNESS_RANK_ARENA_LARGE 19U #define WITNESS_RANK_LEAF 0xffffffffU -#define WITNESS_RANK_ARENA_BIN WITNESS_RANK_LEAF +#define WITNESS_RANK_BIN WITNESS_RANK_LEAF #define WITNESS_RANK_ARENA_STATS WITNESS_RANK_LEAF #define WITNESS_RANK_DSS WITNESS_RANK_LEAF #define WITNESS_RANK_PROF_ACTIVE WITNESS_RANK_LEAF diff --git a/contrib/jemalloc/include/jemalloc/jemalloc.h b/contrib/jemalloc/include/jemalloc/jemalloc.h index a7095432f38..7fd5b967a00 100644 --- a/contrib/jemalloc/include/jemalloc/jemalloc.h +++ b/contrib/jemalloc/include/jemalloc/jemalloc.h @@ -87,12 +87,12 @@ extern "C" { #include #include -#define JEMALLOC_VERSION "5.0.1-0-g896ed3a8b3f41998d4fb4d625d30ac63ef2d51fb" +#define JEMALLOC_VERSION "5.1.0-0-g61efbda7098de6fe64c362d309824864308c36d4" #define JEMALLOC_VERSION_MAJOR 5 -#define JEMALLOC_VERSION_MINOR 0 -#define JEMALLOC_VERSION_BUGFIX 1 +#define JEMALLOC_VERSION_MINOR 1 +#define JEMALLOC_VERSION_BUGFIX 0 #define JEMALLOC_VERSION_NREV 0 -#define JEMALLOC_VERSION_GID "896ed3a8b3f41998d4fb4d625d30ac63ef2d51fb" +#define JEMALLOC_VERSION_GID "61efbda7098de6fe64c362d309824864308c36d4" #define MALLOCX_LG_ALIGN(la) ((int)(la)) #if LG_SIZEOF_PTR == 2 diff --git a/contrib/jemalloc/src/arena.c b/contrib/jemalloc/src/arena.c index 632fce5233e..5d55bf1a060 100644 --- a/contrib/jemalloc/src/arena.c +++ b/contrib/jemalloc/src/arena.c @@ -3,6 +3,7 @@ #include "jemalloc/internal/jemalloc_internal_includes.h" #include "jemalloc/internal/assert.h" +#include "jemalloc/internal/div.h" #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/extent_mmap.h" #include "jemalloc/internal/mutex.h" @@ -32,21 +33,6 @@ ssize_t opt_muzzy_decay_ms = MUZZY_DECAY_MS_DEFAULT; static atomic_zd_t dirty_decay_ms_default; static atomic_zd_t muzzy_decay_ms_default; -const arena_bin_info_t arena_bin_info[NBINS] = { -#define BIN_INFO_bin_yes(reg_size, slab_size, nregs) \ - {reg_size, slab_size, nregs, BITMAP_INFO_INITIALIZER(nregs)}, -#define BIN_INFO_bin_no(reg_size, slab_size, nregs) -#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, \ - lg_delta_lookup) \ - BIN_INFO_bin_##bin((1U<mtx, "arena_stats", - WITNESS_RANK_ARENA_STATS, malloc_mutex_rank_exclusive)) { - return true; - } -#endif - /* Memory is zeroed, so there is no need to clear stats. */ - return false; -} - -static void -arena_stats_lock(tsdn_t *tsdn, arena_stats_t *arena_stats) { -#ifndef JEMALLOC_ATOMIC_U64 - malloc_mutex_lock(tsdn, &arena_stats->mtx); -#endif -} - -static void -arena_stats_unlock(tsdn_t *tsdn, arena_stats_t *arena_stats) { -#ifndef JEMALLOC_ATOMIC_U64 - malloc_mutex_unlock(tsdn, &arena_stats->mtx); -#endif -} - -static uint64_t -arena_stats_read_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, - arena_stats_u64_t *p) { -#ifdef JEMALLOC_ATOMIC_U64 - return atomic_load_u64(p, ATOMIC_RELAXED); -#else - malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); - return *p; -#endif -} - -static void -arena_stats_add_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, - arena_stats_u64_t *p, uint64_t x) { -#ifdef JEMALLOC_ATOMIC_U64 - atomic_fetch_add_u64(p, x, ATOMIC_RELAXED); -#else - malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); - *p += x; -#endif -} - -UNUSED static void -arena_stats_sub_u64(tsdn_t *tsdn, arena_stats_t *arena_stats, - arena_stats_u64_t *p, uint64_t x) { -#ifdef JEMALLOC_ATOMIC_U64 - UNUSED uint64_t r = atomic_fetch_sub_u64(p, x, ATOMIC_RELAXED); - assert(r - x <= r); -#else - malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); - *p -= x; - assert(*p + x >= *p); -#endif -} - -/* - * Non-atomically sets *dst += src. *dst needs external synchronization. - * This lets us avoid the cost of a fetch_add when its unnecessary (note that - * the types here are atomic). - */ -static void -arena_stats_accum_u64(arena_stats_u64_t *dst, uint64_t src) { -#ifdef JEMALLOC_ATOMIC_U64 - uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED); - atomic_store_u64(dst, src + cur_dst, ATOMIC_RELAXED); -#else - *dst += src; -#endif -} - -static size_t -arena_stats_read_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p) { -#ifdef JEMALLOC_ATOMIC_U64 - return atomic_load_zu(p, ATOMIC_RELAXED); -#else - malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); - return atomic_load_zu(p, ATOMIC_RELAXED); -#endif -} - -static void -arena_stats_add_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p, - size_t x) { -#ifdef JEMALLOC_ATOMIC_U64 - atomic_fetch_add_zu(p, x, ATOMIC_RELAXED); -#else - malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); - size_t cur = atomic_load_zu(p, ATOMIC_RELAXED); - atomic_store_zu(p, cur + x, ATOMIC_RELAXED); -#endif -} - -static void -arena_stats_sub_zu(tsdn_t *tsdn, arena_stats_t *arena_stats, atomic_zu_t *p, - size_t x) { -#ifdef JEMALLOC_ATOMIC_U64 - UNUSED size_t r = atomic_fetch_sub_zu(p, x, ATOMIC_RELAXED); - assert(r - x <= r); -#else - malloc_mutex_assert_owner(tsdn, &arena_stats->mtx); - size_t cur = atomic_load_zu(p, ATOMIC_RELAXED); - atomic_store_zu(p, cur - x, ATOMIC_RELAXED); -#endif -} - -/* Like the _u64 variant, needs an externally synchronized *dst. */ -static void -arena_stats_accum_zu(atomic_zu_t *dst, size_t src) { - size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED); - atomic_store_zu(dst, src + cur_dst, ATOMIC_RELAXED); -} - -void -arena_stats_large_nrequests_add(tsdn_t *tsdn, arena_stats_t *arena_stats, - szind_t szind, uint64_t nrequests) { - arena_stats_lock(tsdn, arena_stats); - arena_stats_add_u64(tsdn, arena_stats, &arena_stats->lstats[szind - - NBINS].nrequests, nrequests); - arena_stats_unlock(tsdn, arena_stats); -} - -void -arena_stats_mapped_add(tsdn_t *tsdn, arena_stats_t *arena_stats, size_t size) { - arena_stats_lock(tsdn, arena_stats); - arena_stats_add_zu(tsdn, arena_stats, &arena_stats->mapped, size); - arena_stats_unlock(tsdn, arena_stats); -} - void -arena_basic_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, +arena_basic_stats_merge(UNUSED tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy) { *nthreads += arena_nthreads_get(arena, false); @@ -228,15 +77,15 @@ void arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, const char **dss, ssize_t *dirty_decay_ms, ssize_t *muzzy_decay_ms, size_t *nactive, size_t *ndirty, size_t *nmuzzy, arena_stats_t *astats, - malloc_bin_stats_t *bstats, malloc_large_stats_t *lstats) { + bin_stats_t *bstats, arena_stats_large_t *lstats) { cassert(config_stats); arena_basic_stats_merge(tsdn, arena, nthreads, dss, dirty_decay_ms, muzzy_decay_ms, nactive, ndirty, nmuzzy); - size_t base_allocated, base_resident, base_mapped; + size_t base_allocated, base_resident, base_mapped, metadata_thp; base_stats_get(tsdn, arena->base, &base_allocated, &base_resident, - &base_mapped); + &base_mapped, &metadata_thp); arena_stats_lock(tsdn, &arena->stats); @@ -267,6 +116,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, arena_stats_accum_zu(&astats->base, base_allocated); arena_stats_accum_zu(&astats->internal, arena_internal_get(arena)); + arena_stats_accum_zu(&astats->metadata_thp, metadata_thp); arena_stats_accum_zu(&astats->resident, base_resident + (((atomic_load_zu(&arena->nactive, ATOMIC_RELAXED) + extents_npages_get(&arena->extents_dirty) + @@ -303,16 +153,16 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, /* tcache_bytes counts currently cached bytes. */ atomic_store_zu(&astats->tcache_bytes, 0, ATOMIC_RELAXED); malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx); - tcache_t *tcache; - ql_foreach(tcache, &arena->tcache_ql, link) { + cache_bin_array_descriptor_t *descriptor; + ql_foreach(descriptor, &arena->cache_bin_array_descriptor_ql, link) { szind_t i = 0; for (; i < NBINS; i++) { - tcache_bin_t *tbin = tcache_small_bin_get(tcache, i); + cache_bin_t *tbin = &descriptor->bins_small[i]; arena_stats_accum_zu(&astats->tcache_bytes, tbin->ncached * sz_index2size(i)); } for (; i < nhbins; i++) { - tcache_bin_t *tbin = tcache_large_bin_get(tcache, i); + cache_bin_t *tbin = &descriptor->bins_large[i]; arena_stats_accum_zu(&astats->tcache_bytes, tbin->ncached * sz_index2size(i)); } @@ -351,20 +201,7 @@ arena_stats_merge(tsdn_t *tsdn, arena_t *arena, unsigned *nthreads, nstime_subtract(&astats->uptime, &arena->create_time); for (szind_t i = 0; i < NBINS; i++) { - arena_bin_t *bin = &arena->bins[i]; - - malloc_mutex_lock(tsdn, &bin->lock); - malloc_mutex_prof_read(tsdn, &bstats[i].mutex_data, &bin->lock); - bstats[i].nmalloc += bin->stats.nmalloc; - bstats[i].ndalloc += bin->stats.ndalloc; - bstats[i].nrequests += bin->stats.nrequests; - bstats[i].curregs += bin->stats.curregs; - bstats[i].nfills += bin->stats.nfills; - bstats[i].nflushes += bin->stats.nflushes; - bstats[i].nslabs += bin->stats.nslabs; - bstats[i].reslabs += bin->stats.reslabs; - bstats[i].curslabs += bin->stats.curslabs; - malloc_mutex_unlock(tsdn, &bin->lock); + bin_stats_merge(tsdn, &bstats[i], &arena->bins[i]); } } @@ -384,8 +221,7 @@ arena_extents_dirty_dalloc(tsdn_t *tsdn, arena_t *arena, } static void * -arena_slab_reg_alloc(tsdn_t *tsdn, extent_t *slab, - const arena_bin_info_t *bin_info) { +arena_slab_reg_alloc(extent_t *slab, const bin_info_t *bin_info) { void *ret; arena_slab_data_t *slab_data = extent_slab_data_get(slab); size_t regind; @@ -412,37 +248,22 @@ arena_slab_regind(extent_t *slab, szind_t binind, const void *ptr) { assert((uintptr_t)ptr < (uintptr_t)extent_past_get(slab)); /* Freeing an interior pointer can cause assertion failure. */ assert(((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab)) % - (uintptr_t)arena_bin_info[binind].reg_size == 0); + (uintptr_t)bin_infos[binind].reg_size == 0); - /* Avoid doing division with a variable divisor. */ diff = (size_t)((uintptr_t)ptr - (uintptr_t)extent_addr_get(slab)); - switch (binind) { -#define REGIND_bin_yes(index, reg_size) \ - case index: \ - regind = diff / (reg_size); \ - assert(diff == regind * (reg_size)); \ - break; -#define REGIND_bin_no(index, reg_size) -#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, \ - lg_delta_lookup) \ - REGIND_bin_##bin(index, (1U<nregs); @@ -692,7 +513,8 @@ arena_decay_try_purge(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, bool is_background_thread) { if (current_npages > npages_limit) { arena_decay_to_limit(tsdn, arena, decay, extents, false, - npages_limit, is_background_thread); + npages_limit, current_npages - npages_limit, + is_background_thread); } } @@ -738,7 +560,7 @@ arena_decay_epoch_advance(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, } static void -arena_decay_reinit(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms) { +arena_decay_reinit(arena_decay_t *decay, ssize_t decay_ms) { arena_decay_ms_write(decay, decay_ms); if (decay_ms > 0) { nstime_init(&decay->interval, (uint64_t)decay_ms * @@ -755,8 +577,8 @@ arena_decay_reinit(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms) { } static bool -arena_decay_init(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms, - decay_stats_t *stats) { +arena_decay_init(arena_decay_t *decay, ssize_t decay_ms, + arena_stats_decay_t *stats) { if (config_debug) { for (size_t i = 0; i < sizeof(arena_decay_t); i++) { assert(((char *)decay)[i] == 0); @@ -768,7 +590,7 @@ arena_decay_init(arena_decay_t *decay, extents_t *extents, ssize_t decay_ms, return true; } decay->purging = false; - arena_decay_reinit(decay, extents, decay_ms); + arena_decay_reinit(decay, decay_ms); /* Memory is zeroed, so there is no need to clear stats. */ if (config_stats) { decay->stats = stats; @@ -798,7 +620,8 @@ arena_maybe_decay(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, if (decay_ms <= 0) { if (decay_ms == 0) { arena_decay_to_limit(tsdn, arena, decay, extents, false, - 0, is_background_thread); + 0, extents_npages_get(extents), + is_background_thread); } return false; } @@ -876,7 +699,7 @@ arena_decay_ms_set(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, * infrequent, either between the {-1, 0, >0} states, or a one-time * arbitrary change during initial arena configuration. */ - arena_decay_reinit(decay, extents, decay_ms); + arena_decay_reinit(decay, decay_ms); arena_maybe_decay(tsdn, arena, decay, extents, false); malloc_mutex_unlock(tsdn, &decay->mtx); @@ -900,14 +723,15 @@ arena_muzzy_decay_ms_set(tsdn_t *tsdn, arena_t *arena, static size_t arena_stash_decayed(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extents_t *extents, size_t npages_limit, - extent_list_t *decay_extents) { + size_t npages_decay_max, extent_list_t *decay_extents) { witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); /* Stash extents according to npages_limit. */ size_t nstashed = 0; extent_t *extent; - while ((extent = extents_evict(tsdn, arena, r_extent_hooks, extents, + while (nstashed < npages_decay_max && + (extent = extents_evict(tsdn, arena, r_extent_hooks, extents, npages_limit)) != NULL) { extent_list_append(decay_extents, extent); nstashed += extent_size_get(extent) >> LG_PAGE; @@ -982,12 +806,15 @@ arena_decay_stashed(tsdn_t *tsdn, arena_t *arena, } /* - * npages_limit: Decay as many dirty extents as possible without violating the - * invariant: (extents_npages_get(extents) >= npages_limit) + * npages_limit: Decay at most npages_decay_max pages without violating the + * invariant: (extents_npages_get(extents) >= npages_limit). We need an upper + * bound on number of pages in order to prevent unbounded growth (namely in + * stashed), otherwise unbounded new pages could be added to extents during the + * current decay run, so that the purging thread never finishes. */ static void arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, - extents_t *extents, bool all, size_t npages_limit, + extents_t *extents, bool all, size_t npages_limit, size_t npages_decay_max, bool is_background_thread) { witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 1); @@ -1005,7 +832,7 @@ arena_decay_to_limit(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, extent_list_init(&decay_extents); size_t npurge = arena_stash_decayed(tsdn, arena, &extent_hooks, extents, - npages_limit, &decay_extents); + npages_limit, npages_decay_max, &decay_extents); if (npurge != 0) { UNUSED size_t npurged = arena_decay_stashed(tsdn, arena, &extent_hooks, decay, extents, all, &decay_extents, @@ -1023,7 +850,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, if (all) { malloc_mutex_lock(tsdn, &decay->mtx); arena_decay_to_limit(tsdn, arena, decay, extents, all, 0, - is_background_thread); + extents_npages_get(extents), is_background_thread); malloc_mutex_unlock(tsdn, &decay->mtx); return false; @@ -1036,7 +863,7 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, bool epoch_advanced = arena_maybe_decay(tsdn, arena, decay, extents, is_background_thread); - size_t npages_new; + UNUSED size_t npages_new; if (epoch_advanced) { /* Backlog is updated on epoch advance. */ npages_new = decay->backlog[SMOOTHSTEP_NSTEPS-1]; @@ -1045,7 +872,8 @@ arena_decay_impl(tsdn_t *tsdn, arena_t *arena, arena_decay_t *decay, if (have_background_thread && background_thread_enabled() && epoch_advanced && !is_background_thread) { - background_thread_interval_check(tsdn, arena, decay, npages_new); + background_thread_interval_check(tsdn, arena, decay, + npages_new); } return false; @@ -1082,18 +910,18 @@ arena_slab_dalloc(tsdn_t *tsdn, arena_t *arena, extent_t *slab) { } static void -arena_bin_slabs_nonfull_insert(arena_bin_t *bin, extent_t *slab) { +arena_bin_slabs_nonfull_insert(bin_t *bin, extent_t *slab) { assert(extent_nfree_get(slab) > 0); extent_heap_insert(&bin->slabs_nonfull, slab); } static void -arena_bin_slabs_nonfull_remove(arena_bin_t *bin, extent_t *slab) { +arena_bin_slabs_nonfull_remove(bin_t *bin, extent_t *slab) { extent_heap_remove(&bin->slabs_nonfull, slab); } static extent_t * -arena_bin_slabs_nonfull_tryget(arena_bin_t *bin) { +arena_bin_slabs_nonfull_tryget(bin_t *bin) { extent_t *slab = extent_heap_remove_first(&bin->slabs_nonfull); if (slab == NULL) { return NULL; @@ -1105,7 +933,7 @@ arena_bin_slabs_nonfull_tryget(arena_bin_t *bin) { } static void -arena_bin_slabs_full_insert(arena_t *arena, arena_bin_t *bin, extent_t *slab) { +arena_bin_slabs_full_insert(arena_t *arena, bin_t *bin, extent_t *slab) { assert(extent_nfree_get(slab) == 0); /* * Tracking extents is required by arena_reset, which is not allowed @@ -1119,7 +947,7 @@ arena_bin_slabs_full_insert(arena_t *arena, arena_bin_t *bin, extent_t *slab) { } static void -arena_bin_slabs_full_remove(arena_t *arena, arena_bin_t *bin, extent_t *slab) { +arena_bin_slabs_full_remove(arena_t *arena, bin_t *bin, extent_t *slab) { if (arena_is_auto(arena)) { return; } @@ -1173,7 +1001,7 @@ arena_reset(tsd_t *tsd, arena_t *arena) { /* Bins. */ for (unsigned i = 0; i < NBINS; i++) { extent_t *slab; - arena_bin_t *bin = &arena->bins[i]; + bin_t *bin = &arena->bins[i]; malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); if (bin->slabcur != NULL) { slab = bin->slabcur; @@ -1262,7 +1090,7 @@ arena_destroy(tsd_t *tsd, arena_t *arena) { static extent_t * arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, const arena_bin_info_t *bin_info, + extent_hooks_t **r_extent_hooks, const bin_info_t *bin_info, szind_t szind) { extent_t *slab; bool zero, commit; @@ -1285,7 +1113,7 @@ arena_slab_alloc_hard(tsdn_t *tsdn, arena_t *arena, static extent_t * arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, - const arena_bin_info_t *bin_info) { + const bin_info_t *bin_info) { witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); @@ -1321,10 +1149,10 @@ arena_slab_alloc(tsdn_t *tsdn, arena_t *arena, szind_t binind, } static extent_t * -arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin, +arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, bin_t *bin, szind_t binind) { extent_t *slab; - const arena_bin_info_t *bin_info; + const bin_info_t *bin_info; /* Look for a usable slab. */ slab = arena_bin_slabs_nonfull_tryget(bin); @@ -1333,7 +1161,7 @@ arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin, } /* No existing slabs have any space available. */ - bin_info = &arena_bin_info[binind]; + bin_info = &bin_infos[binind]; /* Allocate a new slab. */ malloc_mutex_unlock(tsdn, &bin->lock); @@ -1364,12 +1192,12 @@ arena_bin_nonfull_slab_get(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin, /* Re-fill bin->slabcur, then call arena_slab_reg_alloc(). */ static void * -arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin, +arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, bin_t *bin, szind_t binind) { - const arena_bin_info_t *bin_info; + const bin_info_t *bin_info; extent_t *slab; - bin_info = &arena_bin_info[binind]; + bin_info = &bin_infos[binind]; if (!arena_is_auto(arena) && bin->slabcur != NULL) { arena_bin_slabs_full_insert(arena, bin, bin->slabcur); bin->slabcur = NULL; @@ -1381,7 +1209,7 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin, * bin lock in arena_bin_nonfull_slab_get(). */ if (extent_nfree_get(bin->slabcur) > 0) { - void *ret = arena_slab_reg_alloc(tsdn, bin->slabcur, + void *ret = arena_slab_reg_alloc(bin->slabcur, bin_info); if (slab != NULL) { /* @@ -1415,14 +1243,14 @@ arena_bin_malloc_hard(tsdn_t *tsdn, arena_t *arena, arena_bin_t *bin, assert(extent_nfree_get(bin->slabcur) > 0); - return arena_slab_reg_alloc(tsdn, slab, bin_info); + return arena_slab_reg_alloc(slab, bin_info); } void arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) { + cache_bin_t *tbin, szind_t binind, uint64_t prof_accumbytes) { unsigned i, nfill; - arena_bin_t *bin; + bin_t *bin; assert(tbin->ncached == 0); @@ -1437,8 +1265,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, void *ptr; if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) > 0) { - ptr = arena_slab_reg_alloc(tsdn, slab, - &arena_bin_info[binind]); + ptr = arena_slab_reg_alloc(slab, &bin_infos[binind]); } else { ptr = arena_bin_malloc_hard(tsdn, arena, bin, binind); } @@ -1455,8 +1282,7 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, break; } if (config_fill && unlikely(opt_junk_alloc)) { - arena_alloc_junk_small(ptr, &arena_bin_info[binind], - true); + arena_alloc_junk_small(ptr, &bin_infos[binind], true); } /* Insert such that low regions get used first. */ *(tbin->avail - nfill + i) = ptr; @@ -1474,14 +1300,14 @@ arena_tcache_fill_small(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, } void -arena_alloc_junk_small(void *ptr, const arena_bin_info_t *bin_info, bool zero) { +arena_alloc_junk_small(void *ptr, const bin_info_t *bin_info, bool zero) { if (!zero) { memset(ptr, JEMALLOC_ALLOC_JUNK, bin_info->reg_size); } } static void -arena_dalloc_junk_small_impl(void *ptr, const arena_bin_info_t *bin_info) { +arena_dalloc_junk_small_impl(void *ptr, const bin_info_t *bin_info) { memset(ptr, JEMALLOC_FREE_JUNK, bin_info->reg_size); } arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small = @@ -1490,7 +1316,7 @@ arena_dalloc_junk_small_t *JET_MUTABLE arena_dalloc_junk_small = static void * arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) { void *ret; - arena_bin_t *bin; + bin_t *bin; size_t usize; extent_t *slab; @@ -1500,7 +1326,7 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) { malloc_mutex_lock(tsdn, &bin->lock); if ((slab = bin->slabcur) != NULL && extent_nfree_get(slab) > 0) { - ret = arena_slab_reg_alloc(tsdn, slab, &arena_bin_info[binind]); + ret = arena_slab_reg_alloc(slab, &bin_infos[binind]); } else { ret = arena_bin_malloc_hard(tsdn, arena, bin, binind); } @@ -1524,14 +1350,14 @@ arena_malloc_small(tsdn_t *tsdn, arena_t *arena, szind_t binind, bool zero) { if (config_fill) { if (unlikely(opt_junk_alloc)) { arena_alloc_junk_small(ret, - &arena_bin_info[binind], false); + &bin_infos[binind], false); } else if (unlikely(opt_zero)) { memset(ret, 0, usize); } } } else { if (config_fill && unlikely(opt_junk_alloc)) { - arena_alloc_junk_small(ret, &arena_bin_info[binind], + arena_alloc_junk_small(ret, &bin_infos[binind], true); } memset(ret, 0, usize); @@ -1636,13 +1462,13 @@ arena_dalloc_promoted(tsdn_t *tsdn, void *ptr, tcache_t *tcache, } static void -arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, arena_bin_t *bin) { +arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, bin_t *bin) { /* Dissociate slab from bin. */ if (slab == bin->slabcur) { bin->slabcur = NULL; } else { szind_t binind = extent_szind_get(slab); - const arena_bin_info_t *bin_info = &arena_bin_info[binind]; + const bin_info_t *bin_info = &bin_infos[binind]; /* * The following block's conditional is necessary because if the @@ -1659,7 +1485,7 @@ arena_dissociate_bin_slab(arena_t *arena, extent_t *slab, arena_bin_t *bin) { static void arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, - arena_bin_t *bin) { + bin_t *bin) { assert(slab != bin->slabcur); malloc_mutex_unlock(tsdn, &bin->lock); @@ -1673,8 +1499,8 @@ arena_dalloc_bin_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, } static void -arena_bin_lower_slab(tsdn_t *tsdn, arena_t *arena, extent_t *slab, - arena_bin_t *bin) { +arena_bin_lower_slab(UNUSED tsdn_t *tsdn, arena_t *arena, extent_t *slab, + bin_t *bin) { assert(extent_nfree_get(slab) > 0); /* @@ -1704,14 +1530,14 @@ arena_dalloc_bin_locked_impl(tsdn_t *tsdn, arena_t *arena, extent_t *slab, void *ptr, bool junked) { arena_slab_data_t *slab_data = extent_slab_data_get(slab); szind_t binind = extent_szind_get(slab); - arena_bin_t *bin = &arena->bins[binind]; - const arena_bin_info_t *bin_info = &arena_bin_info[binind]; + bin_t *bin = &arena->bins[binind]; + const bin_info_t *bin_info = &bin_infos[binind]; if (!junked && config_fill && unlikely(opt_junk_free)) { arena_dalloc_junk_small(ptr, bin_info); } - arena_slab_reg_dalloc(tsdn, slab, slab_data, ptr); + arena_slab_reg_dalloc(slab, slab_data, ptr); unsigned nfree = extent_nfree_get(slab); if (nfree == bin_info->nregs) { arena_dissociate_bin_slab(arena, slab, bin); @@ -1736,7 +1562,7 @@ arena_dalloc_bin_junked_locked(tsdn_t *tsdn, arena_t *arena, extent_t *extent, static void arena_dalloc_bin(tsdn_t *tsdn, arena_t *arena, extent_t *extent, void *ptr) { szind_t binind = extent_szind_get(extent); - arena_bin_t *bin = &arena->bins[binind]; + bin_t *bin = &arena->bins[binind]; malloc_mutex_lock(tsdn, &bin->lock); arena_dalloc_bin_locked_impl(tsdn, arena, extent, ptr, false); @@ -1770,7 +1596,7 @@ arena_ralloc_no_move(tsdn_t *tsdn, void *ptr, size_t oldsize, size_t size, * Avoid moving the allocation if the size class can be left the * same. */ - assert(arena_bin_info[sz_size2index(oldsize)].reg_size == + assert(bin_infos[sz_size2index(oldsize)].reg_size == oldsize); if ((usize_max > SMALL_MAXCLASS || sz_size2index(usize_max) != sz_size2index(oldsize)) && (size > oldsize || usize_max < @@ -1885,6 +1711,33 @@ arena_muzzy_decay_ms_default_set(ssize_t decay_ms) { return false; } +bool +arena_retain_grow_limit_get_set(tsd_t *tsd, arena_t *arena, size_t *old_limit, + size_t *new_limit) { + assert(opt_retain); + + pszind_t new_ind JEMALLOC_CC_SILENCE_INIT(0); + if (new_limit != NULL) { + size_t limit = *new_limit; + /* Grow no more than the new limit. */ + if ((new_ind = sz_psz2ind(limit + 1) - 1) > + EXTENT_GROW_MAX_PIND) { + return true; + } + } + + malloc_mutex_lock(tsd_tsdn(tsd), &arena->extent_grow_mtx); + if (old_limit != NULL) { + *old_limit = sz_pind2sz(arena->retain_grow_limit); + } + if (new_limit != NULL) { + arena->retain_grow_limit = new_ind; + } + malloc_mutex_unlock(tsd_tsdn(tsd), &arena->extent_grow_mtx); + + return false; +} + unsigned arena_nthreads_get(arena_t *arena, bool internal) { return atomic_load_u(&arena->nthreads[internal], ATOMIC_RELAXED); @@ -1935,6 +1788,7 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { } ql_new(&arena->tcache_ql); + ql_new(&arena->cache_bin_array_descriptor_ql); if (malloc_mutex_init(&arena->tcache_ql_mtx, "tcache_ql", WITNESS_RANK_TCACHE_QL, malloc_mutex_rank_exclusive)) { goto label_error; @@ -2001,16 +1855,17 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { goto label_error; } - if (arena_decay_init(&arena->decay_dirty, &arena->extents_dirty, + if (arena_decay_init(&arena->decay_dirty, arena_dirty_decay_ms_default_get(), &arena->stats.decay_dirty)) { goto label_error; } - if (arena_decay_init(&arena->decay_muzzy, &arena->extents_muzzy, + if (arena_decay_init(&arena->decay_muzzy, arena_muzzy_decay_ms_default_get(), &arena->stats.decay_muzzy)) { goto label_error; } arena->extent_grow_next = sz_psz2ind(HUGEPAGE); + arena->retain_grow_limit = EXTENT_GROW_MAX_PIND; if (malloc_mutex_init(&arena->extent_grow_mtx, "extent_grow", WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) { goto label_error; @@ -2024,17 +1879,10 @@ arena_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { /* Initialize bins. */ for (i = 0; i < NBINS; i++) { - arena_bin_t *bin = &arena->bins[i]; - if (malloc_mutex_init(&bin->lock, "arena_bin", - WITNESS_RANK_ARENA_BIN, malloc_mutex_rank_exclusive)) { + bool err = bin_init(&arena->bins[i]); + if (err) { goto label_error; } - bin->slabcur = NULL; - extent_heap_new(&bin->slabs_nonfull); - extent_list_init(&bin->slabs_full); - if (config_stats) { - memset(&bin->stats, 0, sizeof(malloc_bin_stats_t)); - } } arena->base = base; @@ -2070,6 +1918,16 @@ void arena_boot(void) { arena_dirty_decay_ms_default_set(opt_dirty_decay_ms); arena_muzzy_decay_ms_default_set(opt_muzzy_decay_ms); +#define REGIND_bin_yes(index, reg_size) \ + div_init(&arena_binind_div_info[(index)], (reg_size)); +#define REGIND_bin_no(index, reg_size) +#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, \ + lg_delta_lookup) \ + REGIND_bin_##bin(index, (1U<bins[i].lock); + bin_prefork(tsdn, &arena->bins[i]); } } @@ -2124,7 +1982,7 @@ arena_postfork_parent(tsdn_t *tsdn, arena_t *arena) { unsigned i; for (i = 0; i < NBINS; i++) { - malloc_mutex_postfork_parent(tsdn, &arena->bins[i].lock); + bin_postfork_parent(tsdn, &arena->bins[i]); } malloc_mutex_postfork_parent(tsdn, &arena->large_mtx); base_postfork_parent(tsdn, arena->base); @@ -2154,15 +2012,21 @@ arena_postfork_child(tsdn_t *tsdn, arena_t *arena) { } if (config_stats) { ql_new(&arena->tcache_ql); + ql_new(&arena->cache_bin_array_descriptor_ql); tcache_t *tcache = tcache_get(tsdn_tsd(tsdn)); if (tcache != NULL && tcache->arena == arena) { ql_elm_new(tcache, link); ql_tail_insert(&arena->tcache_ql, tcache, link); + cache_bin_array_descriptor_init( + &tcache->cache_bin_array_descriptor, + tcache->bins_small, tcache->bins_large); + ql_tail_insert(&arena->cache_bin_array_descriptor_ql, + &tcache->cache_bin_array_descriptor, link); } } for (i = 0; i < NBINS; i++) { - malloc_mutex_postfork_child(tsdn, &arena->bins[i].lock); + bin_postfork_child(tsdn, &arena->bins[i]); } malloc_mutex_postfork_child(tsdn, &arena->large_mtx); base_postfork_child(tsdn, arena->base); diff --git a/contrib/jemalloc/src/background_thread.c b/contrib/jemalloc/src/background_thread.c index eb30eb5b423..3517a3bb8ed 100644 --- a/contrib/jemalloc/src/background_thread.c +++ b/contrib/jemalloc/src/background_thread.c @@ -11,12 +11,14 @@ #define BACKGROUND_THREAD_DEFAULT false /* Read-only after initialization. */ bool opt_background_thread = BACKGROUND_THREAD_DEFAULT; +size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT; /* Used for thread creation, termination and stats. */ malloc_mutex_t background_thread_lock; /* Indicates global state. Atomic because decay reads this w/o locking. */ atomic_b_t background_thread_enabled_state; size_t n_background_threads; +size_t max_background_threads; /* Thread info per-index. */ background_thread_info_t *background_thread_info; @@ -30,19 +32,20 @@ bool can_enable_background_thread; static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *, void *(*)(void *), void *__restrict); -static pthread_once_t once_control = PTHREAD_ONCE_INIT; static void -pthread_create_wrapper_once(void) { +pthread_create_wrapper_init(void) { #ifdef JEMALLOC_LAZY_LOCK - isthreaded = true; + if (!isthreaded) { + isthreaded = true; + } #endif } int pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr, void *(*start_routine)(void *), void *__restrict arg) { - pthread_once(&once_control, pthread_create_wrapper_once); + pthread_create_wrapper_init(); return pthread_create_fptr(thread, attr, start_routine, arg); } @@ -286,7 +289,7 @@ background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigne uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP; unsigned narenas = narenas_total_get(); - for (unsigned i = ind; i < narenas; i += ncpus) { + for (unsigned i = ind; i < narenas; i += max_background_threads) { arena_t *arena = arena_get(tsdn, i, false); if (!arena) { continue; @@ -379,35 +382,32 @@ background_thread_create_signals_masked(pthread_t *thread, return create_err; } -static void +static bool check_background_thread_creation(tsd_t *tsd, unsigned *n_created, bool *created_threads) { + bool ret = false; if (likely(*n_created == n_background_threads)) { - return; + return ret; } - malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_info[0].mtx); -label_restart: - malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock); - for (unsigned i = 1; i < ncpus; i++) { + tsdn_t *tsdn = tsd_tsdn(tsd); + malloc_mutex_unlock(tsdn, &background_thread_info[0].mtx); + for (unsigned i = 1; i < max_background_threads; i++) { if (created_threads[i]) { continue; } background_thread_info_t *info = &background_thread_info[i]; - malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx); - assert(info->state != background_thread_paused); + malloc_mutex_lock(tsdn, &info->mtx); + /* + * In case of the background_thread_paused state because of + * arena reset, delay the creation. + */ bool create = (info->state == background_thread_started); - malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx); + malloc_mutex_unlock(tsdn, &info->mtx); if (!create) { continue; } - /* - * To avoid deadlock with prefork handlers (which waits for the - * mutex held here), unlock before calling pthread_create(). - */ - malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock); - pre_reentrancy(tsd, NULL); int err = background_thread_create_signals_masked(&info->thread, NULL, background_thread_entry, (void *)(uintptr_t)i); @@ -423,19 +423,21 @@ check_background_thread_creation(tsd_t *tsd, unsigned *n_created, abort(); } } - /* Restart since we unlocked. */ - goto label_restart; + /* Return to restart the loop since we unlocked. */ + ret = true; + break; } - malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_info[0].mtx); - malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock); + malloc_mutex_lock(tsdn, &background_thread_info[0].mtx); + + return ret; } static void background_thread0_work(tsd_t *tsd) { /* Thread0 is also responsible for launching / terminating threads. */ - VARIABLE_ARRAY(bool, created_threads, ncpus); + VARIABLE_ARRAY(bool, created_threads, max_background_threads); unsigned i; - for (i = 1; i < ncpus; i++) { + for (i = 1; i < max_background_threads; i++) { created_threads[i] = false; } /* Start working, and create more threads when asked. */ @@ -445,8 +447,10 @@ background_thread0_work(tsd_t *tsd) { &background_thread_info[0])) { continue; } - check_background_thread_creation(tsd, &n_created, - (bool *)&created_threads); + if (check_background_thread_creation(tsd, &n_created, + (bool *)&created_threads)) { + continue; + } background_work_sleep_once(tsd_tsdn(tsd), &background_thread_info[0], 0); } @@ -456,15 +460,20 @@ background_thread0_work(tsd_t *tsd) { * the global background_thread mutex (and is waiting) for us. */ assert(!background_thread_enabled()); - for (i = 1; i < ncpus; i++) { + for (i = 1; i < max_background_threads; i++) { background_thread_info_t *info = &background_thread_info[i]; assert(info->state != background_thread_paused); if (created_threads[i]) { background_threads_disable_single(tsd, info); } else { malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx); - /* Clear in case the thread wasn't created. */ - info->state = background_thread_stopped; + if (info->state != background_thread_stopped) { + /* The thread was not created. */ + assert(info->state == + background_thread_started); + n_background_threads--; + info->state = background_thread_stopped; + } malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx); } } @@ -498,7 +507,7 @@ background_work(tsd_t *tsd, unsigned ind) { static void * background_thread_entry(void *ind_arg) { unsigned thread_ind = (unsigned)(uintptr_t)ind_arg; - assert(thread_ind < ncpus); + assert(thread_ind < max_background_threads); #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP pthread_setname_np(pthread_self(), "jemalloc_bg_thd"); #endif @@ -532,7 +541,7 @@ background_thread_create(tsd_t *tsd, unsigned arena_ind) { malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock); /* We create at most NCPUs threads. */ - size_t thread_ind = arena_ind % ncpus; + size_t thread_ind = arena_ind % max_background_threads; background_thread_info_t *info = &background_thread_info[thread_ind]; bool need_new_thread; @@ -586,26 +595,29 @@ background_threads_enable(tsd_t *tsd) { assert(background_thread_enabled()); malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock); - VARIABLE_ARRAY(bool, marked, ncpus); + VARIABLE_ARRAY(bool, marked, max_background_threads); unsigned i, nmarked; - for (i = 0; i < ncpus; i++) { + for (i = 0; i < max_background_threads; i++) { marked[i] = false; } nmarked = 0; + /* Thread 0 is required and created at the end. */ + marked[0] = true; /* Mark the threads we need to create for thread 0. */ unsigned n = narenas_total_get(); for (i = 1; i < n; i++) { - if (marked[i % ncpus] || + if (marked[i % max_background_threads] || arena_get(tsd_tsdn(tsd), i, false) == NULL) { continue; } - background_thread_info_t *info = &background_thread_info[i]; + background_thread_info_t *info = &background_thread_info[ + i % max_background_threads]; malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx); assert(info->state == background_thread_stopped); background_thread_init(tsd, info); malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx); - marked[i % ncpus] = true; - if (++nmarked == ncpus) { + marked[i % max_background_threads] = true; + if (++nmarked == max_background_threads) { break; } } @@ -720,14 +732,14 @@ background_thread_prefork0(tsdn_t *tsdn) { void background_thread_prefork1(tsdn_t *tsdn) { - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { malloc_mutex_prefork(tsdn, &background_thread_info[i].mtx); } } void background_thread_postfork_parent(tsdn_t *tsdn) { - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { malloc_mutex_postfork_parent(tsdn, &background_thread_info[i].mtx); } @@ -736,7 +748,7 @@ background_thread_postfork_parent(tsdn_t *tsdn) { void background_thread_postfork_child(tsdn_t *tsdn) { - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { malloc_mutex_postfork_child(tsdn, &background_thread_info[i].mtx); } @@ -749,7 +761,7 @@ background_thread_postfork_child(tsdn_t *tsdn) { malloc_mutex_lock(tsdn, &background_thread_lock); n_background_threads = 0; background_thread_enabled_set(tsdn, false); - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { background_thread_info_t *info = &background_thread_info[i]; malloc_mutex_lock(tsdn, &info->mtx); info->state = background_thread_stopped; @@ -773,7 +785,7 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) { stats->num_threads = n_background_threads; uint64_t num_runs = 0; nstime_init(&stats->run_interval, 0); - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { background_thread_info_t *info = &background_thread_info[i]; malloc_mutex_lock(tsdn, &info->mtx); if (info->state != background_thread_stopped) { @@ -795,6 +807,26 @@ background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) { #undef BILLION #undef BACKGROUND_THREAD_MIN_INTERVAL_NS +static bool +pthread_create_fptr_init(void) { + if (pthread_create_fptr != NULL) { + return false; + } + pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create"); + if (pthread_create_fptr == NULL) { + can_enable_background_thread = false; + if (config_lazy_lock || opt_background_thread) { + malloc_write(": Error in dlsym(RTLD_NEXT, " + "\"pthread_create\")\n"); + abort(); + } + } else { + can_enable_background_thread = true; + } + + return false; +} + /* * When lazy lock is enabled, we need to make sure setting isthreaded before * taking any background_thread locks. This is called early in ctl (instead of @@ -805,7 +837,8 @@ void background_thread_ctl_init(tsdn_t *tsdn) { malloc_mutex_assert_not_owner(tsdn, &background_thread_lock); #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER - pthread_once(&once_control, pthread_create_wrapper_once); + pthread_create_fptr_init(); + pthread_create_wrapper_init(); #endif } @@ -818,18 +851,10 @@ background_thread_boot0(void) { "supports pthread only\n"); return true; } - #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER - pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create"); - if (pthread_create_fptr == NULL) { - can_enable_background_thread = false; - if (config_lazy_lock || opt_background_thread) { - malloc_write(": Error in dlsym(RTLD_NEXT, " - "\"pthread_create\")\n"); - abort(); - } - } else { - can_enable_background_thread = true; + if ((config_lazy_lock || opt_background_thread) && + pthread_create_fptr_init()) { + return true; } #endif return false; @@ -841,6 +866,12 @@ background_thread_boot1(tsdn_t *tsdn) { assert(have_background_thread); assert(narenas_total_get() > 0); + if (opt_max_background_threads == MAX_BACKGROUND_THREAD_LIMIT && + ncpus < MAX_BACKGROUND_THREAD_LIMIT) { + opt_max_background_threads = ncpus; + } + max_background_threads = opt_max_background_threads; + background_thread_enabled_set(tsdn, opt_background_thread); if (malloc_mutex_init(&background_thread_lock, "background_thread_global", @@ -848,17 +879,15 @@ background_thread_boot1(tsdn_t *tsdn) { malloc_mutex_rank_exclusive)) { return true; } - if (opt_background_thread) { - background_thread_ctl_init(tsdn); - } background_thread_info = (background_thread_info_t *)base_alloc(tsdn, - b0get(), ncpus * sizeof(background_thread_info_t), CACHELINE); + b0get(), opt_max_background_threads * + sizeof(background_thread_info_t), CACHELINE); if (background_thread_info == NULL) { return true; } - for (unsigned i = 0; i < ncpus; i++) { + for (unsigned i = 0; i < max_background_threads; i++) { background_thread_info_t *info = &background_thread_info[i]; /* Thread mutex is rank_inclusive because of thread0. */ if (malloc_mutex_init(&info->mtx, "background_thread", diff --git a/contrib/jemalloc/src/base.c b/contrib/jemalloc/src/base.c index 97078b134d1..b0324b5d758 100644 --- a/contrib/jemalloc/src/base.c +++ b/contrib/jemalloc/src/base.c @@ -10,25 +10,40 @@ /******************************************************************************/ /* Data. */ -static base_t *b0; +static base_t *b0; + +metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT; + +const char *metadata_thp_mode_names[] = { + "disabled", + "auto", + "always" +}; /******************************************************************************/ +static inline bool +metadata_thp_madvise(void) { + return (metadata_thp_enabled() && + (init_system_thp_mode == thp_mode_default)); +} + static void * base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size) { void *addr; bool zero = true; bool commit = true; + /* Use huge page sizes and alignment regardless of opt_metadata_thp. */ assert(size == HUGEPAGE_CEILING(size)); - + size_t alignment = HUGEPAGE; if (extent_hooks == &extent_hooks_default) { - addr = extent_alloc_mmap(NULL, size, PAGE, &zero, &commit); + addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit); } else { /* No arena context as we are creating new arenas. */ tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn); pre_reentrancy(tsd, NULL); - addr = extent_hooks->alloc(extent_hooks, NULL, size, PAGE, + addr = extent_hooks->alloc(extent_hooks, NULL, size, alignment, &zero, &commit, ind); post_reentrancy(tsd); } @@ -51,16 +66,16 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr, */ if (extent_hooks == &extent_hooks_default) { if (!extent_dalloc_mmap(addr, size)) { - return; + goto label_done; } if (!pages_decommit(addr, size)) { - return; + goto label_done; } if (!pages_purge_forced(addr, size)) { - return; + goto label_done; } if (!pages_purge_lazy(addr, size)) { - return; + goto label_done; } /* Nothing worked. This should never happen. */ not_reached(); @@ -70,27 +85,33 @@ base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr, if (extent_hooks->dalloc != NULL && !extent_hooks->dalloc(extent_hooks, addr, size, true, ind)) { - goto label_done; + goto label_post_reentrancy; } if (extent_hooks->decommit != NULL && !extent_hooks->decommit(extent_hooks, addr, size, 0, size, ind)) { - goto label_done; + goto label_post_reentrancy; } if (extent_hooks->purge_forced != NULL && !extent_hooks->purge_forced(extent_hooks, addr, size, 0, size, ind)) { - goto label_done; + goto label_post_reentrancy; } if (extent_hooks->purge_lazy != NULL && !extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size, ind)) { - goto label_done; + goto label_post_reentrancy; } /* Nothing worked. That's the application's problem. */ - label_done: + label_post_reentrancy: post_reentrancy(tsd); - return; + } +label_done: + if (metadata_thp_madvise()) { + /* Set NOHUGEPAGE after unmap to avoid kernel defrag. */ + assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 && + (size & HUGEPAGE_MASK) == 0); + pages_nohuge(addr, size); } } @@ -105,6 +126,56 @@ base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr, extent_binit(extent, addr, size, sn); } +static size_t +base_get_num_blocks(base_t *base, bool with_new_block) { + base_block_t *b = base->blocks; + assert(b != NULL); + + size_t n_blocks = with_new_block ? 2 : 1; + while (b->next != NULL) { + n_blocks++; + b = b->next; + } + + return n_blocks; +} + +static void +base_auto_thp_switch(tsdn_t *tsdn, base_t *base) { + assert(opt_metadata_thp == metadata_thp_auto); + malloc_mutex_assert_owner(tsdn, &base->mtx); + if (base->auto_thp_switched) { + return; + } + /* Called when adding a new block. */ + bool should_switch; + if (base_ind_get(base) != 0) { + should_switch = (base_get_num_blocks(base, true) == + BASE_AUTO_THP_THRESHOLD); + } else { + should_switch = (base_get_num_blocks(base, true) == + BASE_AUTO_THP_THRESHOLD_A0); + } + if (!should_switch) { + return; + } + + base->auto_thp_switched = true; + assert(!config_stats || base->n_thp == 0); + /* Make the initial blocks THP lazily. */ + base_block_t *block = base->blocks; + while (block != NULL) { + assert((block->size & HUGEPAGE_MASK) == 0); + pages_huge(block, block->size); + if (config_stats) { + base->n_thp += HUGEPAGE_CEILING(block->size - + extent_bsize_get(&block->extent)) >> LG_HUGEPAGE; + } + block = block->next; + assert(block == NULL || (base_ind_get(base) == 0)); + } +} + static void * base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size, size_t alignment) { @@ -124,8 +195,8 @@ base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size, } static void -base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent, - size_t gap_size, void *addr, size_t size) { +base_extent_bump_alloc_post(base_t *base, extent_t *extent, size_t gap_size, + void *addr, size_t size) { if (extent_bsize_get(extent) > 0) { /* * Compute the index for the largest size class that does not @@ -140,23 +211,31 @@ base_extent_bump_alloc_post(tsdn_t *tsdn, base_t *base, extent_t *extent, base->allocated += size; /* * Add one PAGE to base_resident for every page boundary that is - * crossed by the new allocation. + * crossed by the new allocation. Adjust n_thp similarly when + * metadata_thp is enabled. */ base->resident += PAGE_CEILING((uintptr_t)addr + size) - PAGE_CEILING((uintptr_t)addr - gap_size); assert(base->allocated <= base->resident); assert(base->resident <= base->mapped); + if (metadata_thp_madvise() && (opt_metadata_thp == + metadata_thp_always || base->auto_thp_switched)) { + base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size) + - HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >> + LG_HUGEPAGE; + assert(base->mapped >= base->n_thp << LG_HUGEPAGE); + } } } static void * -base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, extent_t *extent, - size_t size, size_t alignment) { +base_extent_bump_alloc(base_t *base, extent_t *extent, size_t size, + size_t alignment) { void *ret; size_t gap_size; ret = base_extent_bump_alloc_helper(extent, &gap_size, size, alignment); - base_extent_bump_alloc_post(tsdn, base, extent, gap_size, ret, size); + base_extent_bump_alloc_post(base, extent, gap_size, ret, size); return ret; } @@ -166,8 +245,8 @@ base_extent_bump_alloc(tsdn_t *tsdn, base_t *base, extent_t *extent, * On success a pointer to the initialized base_block_t header is returned. */ static base_block_t * -base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, - pszind_t *pind_last, size_t *extent_sn_next, size_t size, +base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks, + unsigned ind, pszind_t *pind_last, size_t *extent_sn_next, size_t size, size_t alignment) { alignment = ALIGNMENT_CEILING(alignment, QUANTUM); size_t usize = ALIGNMENT_CEILING(size, alignment); @@ -193,6 +272,25 @@ base_block_alloc(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, if (block == NULL) { return NULL; } + + if (metadata_thp_madvise()) { + void *addr = (void *)block; + assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 && + (block_size & HUGEPAGE_MASK) == 0); + if (opt_metadata_thp == metadata_thp_always) { + pages_huge(addr, block_size); + } else if (opt_metadata_thp == metadata_thp_auto && + base != NULL) { + /* base != NULL indicates this is not a new base. */ + malloc_mutex_lock(tsdn, &base->mtx); + base_auto_thp_switch(tsdn, base); + if (base->auto_thp_switched) { + pages_huge(addr, block_size); + } + malloc_mutex_unlock(tsdn, &base->mtx); + } + } + *pind_last = sz_psz2ind(block_size); block->size = block_size; block->next = NULL; @@ -216,7 +314,7 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) { * called. */ malloc_mutex_unlock(tsdn, &base->mtx); - base_block_t *block = base_block_alloc(tsdn, extent_hooks, + base_block_t *block = base_block_alloc(tsdn, base, extent_hooks, base_ind_get(base), &base->pind_last, &base->extent_sn_next, size, alignment); malloc_mutex_lock(tsdn, &base->mtx); @@ -229,8 +327,16 @@ base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) { base->allocated += sizeof(base_block_t); base->resident += PAGE_CEILING(sizeof(base_block_t)); base->mapped += block->size; + if (metadata_thp_madvise() && + !(opt_metadata_thp == metadata_thp_auto + && !base->auto_thp_switched)) { + assert(base->n_thp > 0); + base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >> + LG_HUGEPAGE; + } assert(base->allocated <= base->resident); assert(base->resident <= base->mapped); + assert(base->n_thp << LG_HUGEPAGE <= base->mapped); } return &block->extent; } @@ -244,7 +350,7 @@ base_t * base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { pszind_t pind_last = 0; size_t extent_sn_next = 0; - base_block_t *block = base_block_alloc(tsdn, extent_hooks, ind, + base_block_t *block = base_block_alloc(tsdn, NULL, extent_hooks, ind, &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM); if (block == NULL) { return NULL; @@ -265,6 +371,7 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { base->pind_last = pind_last; base->extent_sn_next = extent_sn_next; base->blocks = block; + base->auto_thp_switched = false; for (szind_t i = 0; i < NSIZES; i++) { extent_heap_new(&base->avail[i]); } @@ -272,10 +379,14 @@ base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { base->allocated = sizeof(base_block_t); base->resident = PAGE_CEILING(sizeof(base_block_t)); base->mapped = block->size; + base->n_thp = (opt_metadata_thp == metadata_thp_always) && + metadata_thp_madvise() ? HUGEPAGE_CEILING(sizeof(base_block_t)) + >> LG_HUGEPAGE : 0; assert(base->allocated <= base->resident); assert(base->resident <= base->mapped); + assert(base->n_thp << LG_HUGEPAGE <= base->mapped); } - base_extent_bump_alloc_post(tsdn, base, &block->extent, gap_size, base, + base_extent_bump_alloc_post(base, &block->extent, gap_size, base, base_size); return base; @@ -332,7 +443,7 @@ base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment, goto label_return; } - ret = base_extent_bump_alloc(tsdn, base, extent, usize, alignment); + ret = base_extent_bump_alloc(base, extent, usize, alignment); if (esn != NULL) { *esn = extent_sn_get(extent); } @@ -368,7 +479,7 @@ base_alloc_extent(tsdn_t *tsdn, base_t *base) { void base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident, - size_t *mapped) { + size_t *mapped, size_t *n_thp) { cassert(config_stats); malloc_mutex_lock(tsdn, &base->mtx); @@ -377,6 +488,7 @@ base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident, *allocated = base->allocated; *resident = base->resident; *mapped = base->mapped; + *n_thp = base->n_thp; malloc_mutex_unlock(tsdn, &base->mtx); } diff --git a/contrib/jemalloc/src/bin.c b/contrib/jemalloc/src/bin.c new file mode 100644 index 00000000000..0886bc4ea92 --- /dev/null +++ b/contrib/jemalloc/src/bin.c @@ -0,0 +1,50 @@ +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/bin.h" +#include "jemalloc/internal/witness.h" + +const bin_info_t bin_infos[NBINS] = { +#define BIN_INFO_bin_yes(reg_size, slab_size, nregs) \ + {reg_size, slab_size, nregs, BITMAP_INFO_INITIALIZER(nregs)}, +#define BIN_INFO_bin_no(reg_size, slab_size, nregs) +#define SC(index, lg_grp, lg_delta, ndelta, psz, bin, pgs, \ + lg_delta_lookup) \ + BIN_INFO_bin_##bin((1U<lock, "bin", WITNESS_RANK_BIN, + malloc_mutex_rank_exclusive)) { + return true; + } + bin->slabcur = NULL; + extent_heap_new(&bin->slabs_nonfull); + extent_list_init(&bin->slabs_full); + if (config_stats) { + memset(&bin->stats, 0, sizeof(bin_stats_t)); + } + return false; +} + +void +bin_prefork(tsdn_t *tsdn, bin_t *bin) { + malloc_mutex_prefork(tsdn, &bin->lock); +} + +void +bin_postfork_parent(tsdn_t *tsdn, bin_t *bin) { + malloc_mutex_postfork_parent(tsdn, &bin->lock); +} + +void +bin_postfork_child(tsdn_t *tsdn, bin_t *bin) { + malloc_mutex_postfork_child(tsdn, &bin->lock); +} diff --git a/contrib/jemalloc/src/ctl.c b/contrib/jemalloc/src/ctl.c index 36bc8fb5b75..1e713a3d104 100644 --- a/contrib/jemalloc/src/ctl.c +++ b/contrib/jemalloc/src/ctl.c @@ -57,6 +57,7 @@ static const ctl_named_node_t *n##_index(tsdn_t *tsdn, \ CTL_PROTO(version) CTL_PROTO(epoch) CTL_PROTO(background_thread) +CTL_PROTO(max_background_threads) CTL_PROTO(thread_tcache_enabled) CTL_PROTO(thread_tcache_flush) CTL_PROTO(thread_prof_name) @@ -75,16 +76,17 @@ CTL_PROTO(config_prof) CTL_PROTO(config_prof_libgcc) CTL_PROTO(config_prof_libunwind) CTL_PROTO(config_stats) -CTL_PROTO(config_thp) CTL_PROTO(config_utrace) CTL_PROTO(config_xmalloc) CTL_PROTO(opt_abort) CTL_PROTO(opt_abort_conf) +CTL_PROTO(opt_metadata_thp) CTL_PROTO(opt_retain) CTL_PROTO(opt_dss) CTL_PROTO(opt_narenas) CTL_PROTO(opt_percpu_arena) CTL_PROTO(opt_background_thread) +CTL_PROTO(opt_max_background_threads) CTL_PROTO(opt_dirty_decay_ms) CTL_PROTO(opt_muzzy_decay_ms) CTL_PROTO(opt_stats_print) @@ -94,6 +96,8 @@ CTL_PROTO(opt_zero) CTL_PROTO(opt_utrace) CTL_PROTO(opt_xmalloc) CTL_PROTO(opt_tcache) +CTL_PROTO(opt_thp) +CTL_PROTO(opt_lg_extent_max_active_fit) CTL_PROTO(opt_lg_tcache_max) CTL_PROTO(opt_prof) CTL_PROTO(opt_prof_prefix) @@ -117,6 +121,7 @@ CTL_PROTO(arena_i_dss) CTL_PROTO(arena_i_dirty_decay_ms) CTL_PROTO(arena_i_muzzy_decay_ms) CTL_PROTO(arena_i_extent_hooks) +CTL_PROTO(arena_i_retain_grow_limit) INDEX_PROTO(arena_i) CTL_PROTO(arenas_bin_i_size) CTL_PROTO(arenas_bin_i_nregs) @@ -134,6 +139,7 @@ CTL_PROTO(arenas_nbins) CTL_PROTO(arenas_nhbins) CTL_PROTO(arenas_nlextents) CTL_PROTO(arenas_create) +CTL_PROTO(arenas_lookup) CTL_PROTO(prof_thread_active_init) CTL_PROTO(prof_active) CTL_PROTO(prof_dump) @@ -182,6 +188,7 @@ CTL_PROTO(stats_arenas_i_muzzy_nmadvise) CTL_PROTO(stats_arenas_i_muzzy_purged) CTL_PROTO(stats_arenas_i_base) CTL_PROTO(stats_arenas_i_internal) +CTL_PROTO(stats_arenas_i_metadata_thp) CTL_PROTO(stats_arenas_i_tcache_bytes) CTL_PROTO(stats_arenas_i_resident) INDEX_PROTO(stats_arenas_i) @@ -191,6 +198,7 @@ CTL_PROTO(stats_background_thread_num_threads) CTL_PROTO(stats_background_thread_num_runs) CTL_PROTO(stats_background_thread_run_interval) CTL_PROTO(stats_metadata) +CTL_PROTO(stats_metadata_thp) CTL_PROTO(stats_resident) CTL_PROTO(stats_mapped) CTL_PROTO(stats_retained) @@ -266,7 +274,6 @@ static const ctl_named_node_t config_node[] = { {NAME("prof_libgcc"), CTL(config_prof_libgcc)}, {NAME("prof_libunwind"), CTL(config_prof_libunwind)}, {NAME("stats"), CTL(config_stats)}, - {NAME("thp"), CTL(config_thp)}, {NAME("utrace"), CTL(config_utrace)}, {NAME("xmalloc"), CTL(config_xmalloc)} }; @@ -274,11 +281,13 @@ static const ctl_named_node_t config_node[] = { static const ctl_named_node_t opt_node[] = { {NAME("abort"), CTL(opt_abort)}, {NAME("abort_conf"), CTL(opt_abort_conf)}, + {NAME("metadata_thp"), CTL(opt_metadata_thp)}, {NAME("retain"), CTL(opt_retain)}, {NAME("dss"), CTL(opt_dss)}, {NAME("narenas"), CTL(opt_narenas)}, {NAME("percpu_arena"), CTL(opt_percpu_arena)}, {NAME("background_thread"), CTL(opt_background_thread)}, + {NAME("max_background_threads"), CTL(opt_max_background_threads)}, {NAME("dirty_decay_ms"), CTL(opt_dirty_decay_ms)}, {NAME("muzzy_decay_ms"), CTL(opt_muzzy_decay_ms)}, {NAME("stats_print"), CTL(opt_stats_print)}, @@ -288,6 +297,8 @@ static const ctl_named_node_t opt_node[] = { {NAME("utrace"), CTL(opt_utrace)}, {NAME("xmalloc"), CTL(opt_xmalloc)}, {NAME("tcache"), CTL(opt_tcache)}, + {NAME("thp"), CTL(opt_thp)}, + {NAME("lg_extent_max_active_fit"), CTL(opt_lg_extent_max_active_fit)}, {NAME("lg_tcache_max"), CTL(opt_lg_tcache_max)}, {NAME("prof"), CTL(opt_prof)}, {NAME("prof_prefix"), CTL(opt_prof_prefix)}, @@ -316,7 +327,8 @@ static const ctl_named_node_t arena_i_node[] = { {NAME("dss"), CTL(arena_i_dss)}, {NAME("dirty_decay_ms"), CTL(arena_i_dirty_decay_ms)}, {NAME("muzzy_decay_ms"), CTL(arena_i_muzzy_decay_ms)}, - {NAME("extent_hooks"), CTL(arena_i_extent_hooks)} + {NAME("extent_hooks"), CTL(arena_i_extent_hooks)}, + {NAME("retain_grow_limit"), CTL(arena_i_retain_grow_limit)} }; static const ctl_named_node_t super_arena_i_node[] = { {NAME(""), CHILD(named, arena_i)} @@ -362,7 +374,8 @@ static const ctl_named_node_t arenas_node[] = { {NAME("bin"), CHILD(indexed, arenas_bin)}, {NAME("nlextents"), CTL(arenas_nlextents)}, {NAME("lextent"), CHILD(indexed, arenas_lextent)}, - {NAME("create"), CTL(arenas_create)} + {NAME("create"), CTL(arenas_create)}, + {NAME("lookup"), CTL(arenas_lookup)} }; static const ctl_named_node_t prof_node[] = { @@ -474,6 +487,7 @@ static const ctl_named_node_t stats_arenas_i_node[] = { {NAME("muzzy_purged"), CTL(stats_arenas_i_muzzy_purged)}, {NAME("base"), CTL(stats_arenas_i_base)}, {NAME("internal"), CTL(stats_arenas_i_internal)}, + {NAME("metadata_thp"), CTL(stats_arenas_i_metadata_thp)}, {NAME("tcache_bytes"), CTL(stats_arenas_i_tcache_bytes)}, {NAME("resident"), CTL(stats_arenas_i_resident)}, {NAME("small"), CHILD(named, stats_arenas_i_small)}, @@ -512,6 +526,7 @@ static const ctl_named_node_t stats_node[] = { {NAME("allocated"), CTL(stats_allocated)}, {NAME("active"), CTL(stats_active)}, {NAME("metadata"), CTL(stats_metadata)}, + {NAME("metadata_thp"), CTL(stats_metadata_thp)}, {NAME("resident"), CTL(stats_resident)}, {NAME("mapped"), CTL(stats_mapped)}, {NAME("retained"), CTL(stats_retained)}, @@ -525,6 +540,7 @@ static const ctl_named_node_t root_node[] = { {NAME("version"), CTL(version)}, {NAME("epoch"), CTL(epoch)}, {NAME("background_thread"), CTL(background_thread)}, + {NAME("max_background_threads"), CTL(max_background_threads)}, {NAME("thread"), CHILD(named, thread)}, {NAME("config"), CHILD(named, config)}, {NAME("opt"), CHILD(named, opt)}, @@ -550,7 +566,7 @@ static const ctl_named_node_t super_root_node[] = { * synchronized by the ctl mutex. */ static void -accum_arena_stats_u64(arena_stats_u64_t *dst, arena_stats_u64_t *src) { +ctl_accum_arena_stats_u64(arena_stats_u64_t *dst, arena_stats_u64_t *src) { #ifdef JEMALLOC_ATOMIC_U64 uint64_t cur_dst = atomic_load_u64(dst, ATOMIC_RELAXED); uint64_t cur_src = atomic_load_u64(src, ATOMIC_RELAXED); @@ -562,7 +578,7 @@ accum_arena_stats_u64(arena_stats_u64_t *dst, arena_stats_u64_t *src) { /* Likewise: with ctl mutex synchronization, reading is simple. */ static uint64_t -arena_stats_read_u64(arena_stats_u64_t *p) { +ctl_arena_stats_read_u64(arena_stats_u64_t *p) { #ifdef JEMALLOC_ATOMIC_U64 return atomic_load_u64(p, ATOMIC_RELAXED); #else @@ -570,7 +586,8 @@ arena_stats_read_u64(arena_stats_u64_t *p) { #endif } -static void accum_atomic_zu(atomic_zu_t *dst, atomic_zu_t *src) { +static void +accum_atomic_zu(atomic_zu_t *dst, atomic_zu_t *src) { size_t cur_dst = atomic_load_zu(dst, ATOMIC_RELAXED); size_t cur_src = atomic_load_zu(src, ATOMIC_RELAXED); atomic_store_zu(dst, cur_dst + cur_src, ATOMIC_RELAXED); @@ -680,9 +697,9 @@ ctl_arena_clear(ctl_arena_t *ctl_arena) { ctl_arena->astats->ndalloc_small = 0; ctl_arena->astats->nrequests_small = 0; memset(ctl_arena->astats->bstats, 0, NBINS * - sizeof(malloc_bin_stats_t)); + sizeof(bin_stats_t)); memset(ctl_arena->astats->lstats, 0, (NSIZES - NBINS) * - sizeof(malloc_large_stats_t)); + sizeof(arena_stats_large_t)); } } @@ -745,18 +762,18 @@ ctl_arena_stats_sdmerge(ctl_arena_t *ctl_sdarena, ctl_arena_t *ctl_arena, &astats->astats.retained); } - accum_arena_stats_u64(&sdstats->astats.decay_dirty.npurge, + ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.npurge, &astats->astats.decay_dirty.npurge); - accum_arena_stats_u64(&sdstats->astats.decay_dirty.nmadvise, + ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.nmadvise, &astats->astats.decay_dirty.nmadvise); - accum_arena_stats_u64(&sdstats->astats.decay_dirty.purged, + ctl_accum_arena_stats_u64(&sdstats->astats.decay_dirty.purged, &astats->astats.decay_dirty.purged); - accum_arena_stats_u64(&sdstats->astats.decay_muzzy.npurge, + ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.npurge, &astats->astats.decay_muzzy.npurge); - accum_arena_stats_u64(&sdstats->astats.decay_muzzy.nmadvise, + ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.nmadvise, &astats->astats.decay_muzzy.nmadvise); - accum_arena_stats_u64(&sdstats->astats.decay_muzzy.purged, + ctl_accum_arena_stats_u64(&sdstats->astats.decay_muzzy.purged, &astats->astats.decay_muzzy.purged); #define OP(mtx) malloc_mutex_prof_merge( \ @@ -773,6 +790,8 @@ MUTEX_PROF_ARENA_MUTEXES &astats->astats.internal); accum_atomic_zu(&sdstats->astats.resident, &astats->astats.resident); + accum_atomic_zu(&sdstats->astats.metadata_thp, + &astats->astats.metadata_thp); } else { assert(atomic_load_zu( &astats->astats.internal, ATOMIC_RELAXED) == 0); @@ -794,11 +813,11 @@ MUTEX_PROF_ARENA_MUTEXES assert(atomic_load_zu(&astats->astats.allocated_large, ATOMIC_RELAXED) == 0); } - accum_arena_stats_u64(&sdstats->astats.nmalloc_large, + ctl_accum_arena_stats_u64(&sdstats->astats.nmalloc_large, &astats->astats.nmalloc_large); - accum_arena_stats_u64(&sdstats->astats.ndalloc_large, + ctl_accum_arena_stats_u64(&sdstats->astats.ndalloc_large, &astats->astats.ndalloc_large); - accum_arena_stats_u64(&sdstats->astats.nrequests_large, + ctl_accum_arena_stats_u64(&sdstats->astats.nrequests_large, &astats->astats.nrequests_large); accum_atomic_zu(&sdstats->astats.tcache_bytes, @@ -835,11 +854,11 @@ MUTEX_PROF_ARENA_MUTEXES } for (i = 0; i < NSIZES - NBINS; i++) { - accum_arena_stats_u64(&sdstats->lstats[i].nmalloc, + ctl_accum_arena_stats_u64(&sdstats->lstats[i].nmalloc, &astats->lstats[i].nmalloc); - accum_arena_stats_u64(&sdstats->lstats[i].ndalloc, + ctl_accum_arena_stats_u64(&sdstats->lstats[i].ndalloc, &astats->lstats[i].ndalloc); - accum_arena_stats_u64(&sdstats->lstats[i].nrequests, + ctl_accum_arena_stats_u64(&sdstats->lstats[i].nrequests, &astats->lstats[i].nrequests); if (!destroyed) { sdstats->lstats[i].curlextents += @@ -938,6 +957,8 @@ ctl_refresh(tsdn_t *tsdn) { &ctl_sarena->astats->astats.base, ATOMIC_RELAXED) + atomic_load_zu(&ctl_sarena->astats->astats.internal, ATOMIC_RELAXED); + ctl_stats->metadata_thp = atomic_load_zu( + &ctl_sarena->astats->astats.metadata_thp, ATOMIC_RELAXED); ctl_stats->resident = atomic_load_zu( &ctl_sarena->astats->astats.resident, ATOMIC_RELAXED); ctl_stats->mapped = atomic_load_zu( @@ -1549,6 +1570,71 @@ background_thread_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, return ret; } +static int +max_background_threads_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { + int ret; + size_t oldval; + + if (!have_background_thread) { + return ENOENT; + } + background_thread_ctl_init(tsd_tsdn(tsd)); + + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); + malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock); + if (newp == NULL) { + oldval = max_background_threads; + READ(oldval, size_t); + } else { + if (newlen != sizeof(size_t)) { + ret = EINVAL; + goto label_return; + } + oldval = max_background_threads; + READ(oldval, size_t); + + size_t newval = *(size_t *)newp; + if (newval == oldval) { + ret = 0; + goto label_return; + } + if (newval > opt_max_background_threads) { + ret = EINVAL; + goto label_return; + } + + if (background_thread_enabled()) { + if (!can_enable_background_thread) { + malloc_printf(": Error in dlsym(" + "RTLD_NEXT, \"pthread_create\"). Cannot " + "enable background_thread\n"); + ret = EFAULT; + goto label_return; + } + background_thread_enabled_set(tsd_tsdn(tsd), false); + if (background_threads_disable(tsd)) { + ret = EFAULT; + goto label_return; + } + max_background_threads = newval; + background_thread_enabled_set(tsd_tsdn(tsd), true); + if (background_threads_enable(tsd)) { + ret = EFAULT; + goto label_return; + } + } else { + max_background_threads = newval; + } + } + ret = 0; +label_return: + malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock); + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); + + return ret; +} + /******************************************************************************/ CTL_RO_CONFIG_GEN(config_cache_oblivious, bool) @@ -1560,7 +1646,6 @@ CTL_RO_CONFIG_GEN(config_prof, bool) CTL_RO_CONFIG_GEN(config_prof_libgcc, bool) CTL_RO_CONFIG_GEN(config_prof_libunwind, bool) CTL_RO_CONFIG_GEN(config_stats, bool) -CTL_RO_CONFIG_GEN(config_thp, bool) CTL_RO_CONFIG_GEN(config_utrace, bool) CTL_RO_CONFIG_GEN(config_xmalloc, bool) @@ -1568,12 +1653,15 @@ CTL_RO_CONFIG_GEN(config_xmalloc, bool) CTL_RO_NL_GEN(opt_abort, opt_abort, bool) CTL_RO_NL_GEN(opt_abort_conf, opt_abort_conf, bool) +CTL_RO_NL_GEN(opt_metadata_thp, metadata_thp_mode_names[opt_metadata_thp], + const char *) CTL_RO_NL_GEN(opt_retain, opt_retain, bool) CTL_RO_NL_GEN(opt_dss, opt_dss, const char *) CTL_RO_NL_GEN(opt_narenas, opt_narenas, unsigned) CTL_RO_NL_GEN(opt_percpu_arena, percpu_arena_mode_names[opt_percpu_arena], const char *) CTL_RO_NL_GEN(opt_background_thread, opt_background_thread, bool) +CTL_RO_NL_GEN(opt_max_background_threads, opt_max_background_threads, size_t) CTL_RO_NL_GEN(opt_dirty_decay_ms, opt_dirty_decay_ms, ssize_t) CTL_RO_NL_GEN(opt_muzzy_decay_ms, opt_muzzy_decay_ms, ssize_t) CTL_RO_NL_GEN(opt_stats_print, opt_stats_print, bool) @@ -1583,6 +1671,9 @@ CTL_RO_NL_CGEN(config_fill, opt_zero, opt_zero, bool) CTL_RO_NL_CGEN(config_utrace, opt_utrace, opt_utrace, bool) CTL_RO_NL_CGEN(config_xmalloc, opt_xmalloc, opt_xmalloc, bool) CTL_RO_NL_GEN(opt_tcache, opt_tcache, bool) +CTL_RO_NL_GEN(opt_thp, thp_mode_names[opt_thp], const char *) +CTL_RO_NL_GEN(opt_lg_extent_max_active_fit, opt_lg_extent_max_active_fit, + size_t) CTL_RO_NL_GEN(opt_lg_tcache_max, opt_lg_tcache_max, ssize_t) CTL_RO_NL_CGEN(config_prof, opt_prof, opt_prof, bool) CTL_RO_NL_CGEN(config_prof, opt_prof_prefix, opt_prof_prefix, const char *) @@ -2162,20 +2253,41 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); MIB_UNSIGNED(arena_ind, 1); - if (arena_ind < narenas_total_get() && (arena = - arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) { - if (newp != NULL) { - extent_hooks_t *old_extent_hooks; - extent_hooks_t *new_extent_hooks - JEMALLOC_CC_SILENCE_INIT(NULL); - WRITE(new_extent_hooks, extent_hooks_t *); - old_extent_hooks = extent_hooks_set(tsd, arena, - new_extent_hooks); + if (arena_ind < narenas_total_get()) { + extent_hooks_t *old_extent_hooks; + arena = arena_get(tsd_tsdn(tsd), arena_ind, false); + if (arena == NULL) { + if (arena_ind >= narenas_auto) { + ret = EFAULT; + goto label_return; + } + old_extent_hooks = + (extent_hooks_t *)&extent_hooks_default; READ(old_extent_hooks, extent_hooks_t *); + if (newp != NULL) { + /* Initialize a new arena as a side effect. */ + extent_hooks_t *new_extent_hooks + JEMALLOC_CC_SILENCE_INIT(NULL); + WRITE(new_extent_hooks, extent_hooks_t *); + arena = arena_init(tsd_tsdn(tsd), arena_ind, + new_extent_hooks); + if (arena == NULL) { + ret = EFAULT; + goto label_return; + } + } } else { - extent_hooks_t *old_extent_hooks = - extent_hooks_get(arena); - READ(old_extent_hooks, extent_hooks_t *); + if (newp != NULL) { + extent_hooks_t *new_extent_hooks + JEMALLOC_CC_SILENCE_INIT(NULL); + WRITE(new_extent_hooks, extent_hooks_t *); + old_extent_hooks = extent_hooks_set(tsd, arena, + new_extent_hooks); + READ(old_extent_hooks, extent_hooks_t *); + } else { + old_extent_hooks = extent_hooks_get(arena); + READ(old_extent_hooks, extent_hooks_t *); + } } } else { ret = EFAULT; @@ -2187,6 +2299,42 @@ arena_i_extent_hooks_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, return ret; } +static int +arena_i_retain_grow_limit_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, + void *oldp, size_t *oldlenp, void *newp, size_t newlen) { + int ret; + unsigned arena_ind; + arena_t *arena; + + if (!opt_retain) { + /* Only relevant when retain is enabled. */ + return ENOENT; + } + + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); + MIB_UNSIGNED(arena_ind, 1); + if (arena_ind < narenas_total_get() && (arena = + arena_get(tsd_tsdn(tsd), arena_ind, false)) != NULL) { + size_t old_limit, new_limit; + if (newp != NULL) { + WRITE(new_limit, size_t); + } + bool err = arena_retain_grow_limit_get_set(tsd, arena, + &old_limit, newp != NULL ? &new_limit : NULL); + if (!err) { + READ(old_limit, size_t); + ret = 0; + } else { + ret = EFAULT; + } + } else { + ret = EFAULT; + } +label_return: + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); + return ret; +} + static const ctl_named_node_t * arena_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) { const ctl_named_node_t *ret; @@ -2248,7 +2396,7 @@ arenas_decay_ms_ctl_impl(tsd_t *tsd, const size_t *mib, size_t miblen, ret = EINVAL; goto label_return; } - if (dirty ? arena_dirty_decay_ms_default_set(*(ssize_t *)newp) + if (dirty ? arena_dirty_decay_ms_default_set(*(ssize_t *)newp) : arena_muzzy_decay_ms_default_set(*(ssize_t *)newp)) { ret = EFAULT; goto label_return; @@ -2279,9 +2427,9 @@ CTL_RO_NL_GEN(arenas_page, PAGE, size_t) CTL_RO_NL_GEN(arenas_tcache_max, tcache_maxclass, size_t) CTL_RO_NL_GEN(arenas_nbins, NBINS, unsigned) CTL_RO_NL_GEN(arenas_nhbins, nhbins, unsigned) -CTL_RO_NL_GEN(arenas_bin_i_size, arena_bin_info[mib[2]].reg_size, size_t) -CTL_RO_NL_GEN(arenas_bin_i_nregs, arena_bin_info[mib[2]].nregs, uint32_t) -CTL_RO_NL_GEN(arenas_bin_i_slab_size, arena_bin_info[mib[2]].slab_size, size_t) +CTL_RO_NL_GEN(arenas_bin_i_size, bin_infos[mib[2]].reg_size, size_t) +CTL_RO_NL_GEN(arenas_bin_i_nregs, bin_infos[mib[2]].nregs, uint32_t) +CTL_RO_NL_GEN(arenas_bin_i_slab_size, bin_infos[mib[2]].slab_size, size_t) static const ctl_named_node_t * arenas_bin_i_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, size_t i) { if (i > NBINS) { @@ -2325,6 +2473,36 @@ arenas_create_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, return ret; } +static int +arenas_lookup_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, void *oldp, + size_t *oldlenp, void *newp, size_t newlen) { + int ret; + unsigned arena_ind; + void *ptr; + extent_t *extent; + arena_t *arena; + + ptr = NULL; + ret = EINVAL; + malloc_mutex_lock(tsd_tsdn(tsd), &ctl_mtx); + WRITE(ptr, void *); + extent = iealloc(tsd_tsdn(tsd), ptr); + if (extent == NULL) + goto label_return; + + arena = extent_arena_get(extent); + if (arena == NULL) + goto label_return; + + arena_ind = arena_ind_get(arena); + READ(arena_ind, unsigned); + + ret = 0; +label_return: + malloc_mutex_unlock(tsd_tsdn(tsd), &ctl_mtx); + return ret; +} + /******************************************************************************/ static int @@ -2460,6 +2638,7 @@ CTL_RO_NL_CGEN(config_prof, lg_prof_sample, lg_prof_sample, size_t) CTL_RO_CGEN(config_stats, stats_allocated, ctl_stats->allocated, size_t) CTL_RO_CGEN(config_stats, stats_active, ctl_stats->active, size_t) CTL_RO_CGEN(config_stats, stats_metadata, ctl_stats->metadata, size_t) +CTL_RO_CGEN(config_stats, stats_metadata_thp, ctl_stats->metadata_thp, size_t) CTL_RO_CGEN(config_stats, stats_resident, ctl_stats->resident, size_t) CTL_RO_CGEN(config_stats, stats_mapped, ctl_stats->mapped, size_t) CTL_RO_CGEN(config_stats, stats_retained, ctl_stats->retained, size_t) @@ -2490,24 +2669,24 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_retained, size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_npurge, - arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_dirty.npurge), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.decay_dirty.npurge), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_nmadvise, - arena_stats_read_u64( + ctl_arena_stats_read_u64( &arenas_i(mib[2])->astats->astats.decay_dirty.nmadvise), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_dirty_purged, - arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_dirty.purged), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.decay_dirty.purged), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_npurge, - arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_muzzy.npurge), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.decay_muzzy.npurge), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_nmadvise, - arena_stats_read_u64( + ctl_arena_stats_read_u64( &arenas_i(mib[2])->astats->astats.decay_muzzy.nmadvise), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_muzzy_purged, - arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.decay_muzzy.purged), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.decay_muzzy.purged), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_base, atomic_load_zu(&arenas_i(mib[2])->astats->astats.base, ATOMIC_RELAXED), @@ -2515,6 +2694,9 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_base, CTL_RO_CGEN(config_stats, stats_arenas_i_internal, atomic_load_zu(&arenas_i(mib[2])->astats->astats.internal, ATOMIC_RELAXED), size_t) +CTL_RO_CGEN(config_stats, stats_arenas_i_metadata_thp, + atomic_load_zu(&arenas_i(mib[2])->astats->astats.metadata_thp, + ATOMIC_RELAXED), size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_tcache_bytes, atomic_load_zu(&arenas_i(mib[2])->astats->astats.tcache_bytes, ATOMIC_RELAXED), size_t) @@ -2534,14 +2716,17 @@ CTL_RO_CGEN(config_stats, stats_arenas_i_large_allocated, atomic_load_zu(&arenas_i(mib[2])->astats->astats.allocated_large, ATOMIC_RELAXED), size_t) CTL_RO_CGEN(config_stats, stats_arenas_i_large_nmalloc, - arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.nmalloc_large), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_large_ndalloc, - arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.ndalloc_large), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.ndalloc_large), uint64_t) +/* + * Note: "nmalloc" here instead of "nrequests" in the read. This is intentional. + */ CTL_RO_CGEN(config_stats, stats_arenas_i_large_nrequests, - arena_stats_read_u64(&arenas_i(mib[2])->astats->astats.nmalloc_large), - uint64_t) /* Intentional. */ + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->astats.nmalloc_large), uint64_t) /* Intentional. */ /* Lock profiling related APIs below. */ #define RO_MUTEX_CTL_GEN(n, l) \ @@ -2622,7 +2807,7 @@ stats_mutexes_reset_ctl(tsd_t *tsd, const size_t *mib, size_t miblen, MUTEX_PROF_RESET(arena->base->mtx); for (szind_t i = 0; i < NBINS; i++) { - arena_bin_t *bin = &arena->bins[i]; + bin_t *bin = &arena->bins[i]; MUTEX_PROF_RESET(bin->lock); } } @@ -2659,14 +2844,14 @@ stats_arenas_i_bins_j_index(tsdn_t *tsdn, const size_t *mib, size_t miblen, } CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nmalloc, - arena_stats_read_u64(&arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->lstats[mib[4]].nmalloc), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_ndalloc, - arena_stats_read_u64(&arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->lstats[mib[4]].ndalloc), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_nrequests, - arena_stats_read_u64(&arenas_i(mib[2])->astats->lstats[mib[4]].nrequests), - uint64_t) + ctl_arena_stats_read_u64( + &arenas_i(mib[2])->astats->lstats[mib[4]].nrequests), uint64_t) CTL_RO_CGEN(config_stats, stats_arenas_i_lextents_j_curlextents, arenas_i(mib[2])->astats->lstats[mib[4]].curlextents, size_t) diff --git a/contrib/jemalloc/src/div.c b/contrib/jemalloc/src/div.c new file mode 100644 index 00000000000..808892a133f --- /dev/null +++ b/contrib/jemalloc/src/div.c @@ -0,0 +1,55 @@ +#include "jemalloc/internal/jemalloc_preamble.h" + +#include "jemalloc/internal/div.h" + +#include "jemalloc/internal/assert.h" + +/* + * Suppose we have n = q * d, all integers. We know n and d, and want q = n / d. + * + * For any k, we have (here, all division is exact; not C-style rounding): + * floor(ceil(2^k / d) * n / 2^k) = floor((2^k + r) / d * n / 2^k), where + * r = (-2^k) mod d. + * + * Expanding this out: + * ... = floor(2^k / d * n / 2^k + r / d * n / 2^k) + * = floor(n / d + (r / d) * (n / 2^k)). + * + * The fractional part of n / d is 0 (because of the assumption that d divides n + * exactly), so we have: + * ... = n / d + floor((r / d) * (n / 2^k)) + * + * So that our initial expression is equal to the quantity we seek, so long as + * (r / d) * (n / 2^k) < 1. + * + * r is a remainder mod d, so r < d and r / d < 1 always. We can make + * n / 2 ^ k < 1 by setting k = 32. This gets us a value of magic that works. + */ + +void +div_init(div_info_t *div_info, size_t d) { + /* Nonsensical. */ + assert(d != 0); + /* + * This would make the value of magic too high to fit into a uint32_t + * (we would want magic = 2^32 exactly). This would mess with code gen + * on 32-bit machines. + */ + assert(d != 1); + + uint64_t two_to_k = ((uint64_t)1 << 32); + uint32_t magic = (uint32_t)(two_to_k / d); + + /* + * We want magic = ceil(2^k / d), but C gives us floor. We have to + * increment it unless the result was exact (i.e. unless d is a power of + * two). + */ + if (two_to_k % d != 0) { + magic++; + } + div_info->magic = magic; +#ifdef JEMALLOC_DEBUG + div_info->d = d; +#endif +} diff --git a/contrib/jemalloc/src/extent.c b/contrib/jemalloc/src/extent.c index fa45c84d34f..09d6d771817 100644 --- a/contrib/jemalloc/src/extent.c +++ b/contrib/jemalloc/src/extent.c @@ -17,6 +17,8 @@ rtree_t extents_rtree; /* Keyed by the address of the extent_t being protected. */ mutex_pool_t extent_mutex_pool; +size_t opt_lg_extent_max_active_fit = LG_EXTENT_MAX_ACTIVE_FIT_DEFAULT; + static const bitmap_info_t extents_bitmap_info = BITMAP_INFO_INITIALIZER(NPSIZES+1); @@ -117,7 +119,7 @@ static void extent_record(tsdn_t *tsdn, arena_t *arena, /******************************************************************************/ -rb_gen(UNUSED, extent_avail_, extent_tree_t, extent_t, rb_link, +ph_gen(UNUSED, extent_avail_, extent_tree_t, extent_t, ph_link, extent_esnead_comp) typedef enum { @@ -304,8 +306,7 @@ extents_npages_get(extents_t *extents) { } static void -extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent, - bool preserve_lru) { +extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) { malloc_mutex_assert_owner(tsdn, &extents->mtx); assert(extent_state_get(extent) == extents->state); @@ -317,9 +318,7 @@ extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent, (size_t)pind); } extent_heap_insert(&extents->heaps[pind], extent); - if (!preserve_lru) { - extent_list_append(&extents->lru, extent); - } + extent_list_append(&extents->lru, extent); size_t npages = size >> LG_PAGE; /* * All modifications to npages hold the mutex (as asserted above), so we @@ -333,8 +332,7 @@ extents_insert_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent, } static void -extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent, - bool preserve_lru) { +extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent) { malloc_mutex_assert_owner(tsdn, &extents->mtx); assert(extent_state_get(extent) == extents->state); @@ -346,9 +344,7 @@ extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent, bitmap_set(extents->bitmap, &extents_bitmap_info, (size_t)pind); } - if (!preserve_lru) { - extent_list_remove(&extents->lru, extent); - } + extent_list_remove(&extents->lru, extent); size_t npages = size >> LG_PAGE; /* * As in extents_insert_locked, we hold extents->mtx and so don't need @@ -361,6 +357,43 @@ extents_remove_locked(tsdn_t *tsdn, extents_t *extents, extent_t *extent, cur_extents_npages - (size >> LG_PAGE), ATOMIC_RELAXED); } +/* + * Find an extent with size [min_size, max_size) to satisfy the alignment + * requirement. For each size, try only the first extent in the heap. + */ +static extent_t * +extents_fit_alignment(extents_t *extents, size_t min_size, size_t max_size, + size_t alignment) { + pszind_t pind = sz_psz2ind(extent_size_quantize_ceil(min_size)); + pszind_t pind_max = sz_psz2ind(extent_size_quantize_ceil(max_size)); + + for (pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap, + &extents_bitmap_info, (size_t)pind); i < pind_max; i = + (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info, + (size_t)i+1)) { + assert(i < NPSIZES); + assert(!extent_heap_empty(&extents->heaps[i])); + extent_t *extent = extent_heap_first(&extents->heaps[i]); + uintptr_t base = (uintptr_t)extent_base_get(extent); + size_t candidate_size = extent_size_get(extent); + assert(candidate_size >= min_size); + + uintptr_t next_align = ALIGNMENT_CEILING((uintptr_t)base, + PAGE_CEILING(alignment)); + if (base > next_align || base + candidate_size <= next_align) { + /* Overflow or not crossing the next alignment. */ + continue; + } + + size_t leadsize = next_align - base; + if (candidate_size - leadsize >= min_size) { + return extent; + } + } + + return NULL; +} + /* Do any-best-fit extent selection, i.e. select any extent that best fits. */ static extent_t * extents_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents, @@ -369,8 +402,15 @@ extents_best_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents, pszind_t i = (pszind_t)bitmap_ffu(extents->bitmap, &extents_bitmap_info, (size_t)pind); if (i < NPSIZES+1) { + /* + * In order to reduce fragmentation, avoid reusing and splitting + * large extents for much smaller sizes. + */ + if ((sz_pind2sz(i) >> opt_lg_extent_max_active_fit) > size) { + return NULL; + } assert(!extent_heap_empty(&extents->heaps[i])); - extent_t *extent = extent_heap_any(&extents->heaps[i]); + extent_t *extent = extent_heap_first(&extents->heaps[i]); assert(extent_size_get(extent) >= size); return extent; } @@ -415,12 +455,30 @@ extents_first_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents, */ static extent_t * extents_fit_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents, - size_t size) { + size_t esize, size_t alignment) { malloc_mutex_assert_owner(tsdn, &extents->mtx); - return extents->delay_coalesce ? extents_best_fit_locked(tsdn, arena, - extents, size) : extents_first_fit_locked(tsdn, arena, extents, - size); + size_t max_size = esize + PAGE_CEILING(alignment) - PAGE; + /* Beware size_t wrap-around. */ + if (max_size < esize) { + return NULL; + } + + extent_t *extent = extents->delay_coalesce ? + extents_best_fit_locked(tsdn, arena, extents, max_size) : + extents_first_fit_locked(tsdn, arena, extents, max_size); + + if (alignment > PAGE && extent == NULL) { + /* + * max_size guarantees the alignment requirement but is rather + * pessimistic. Next we try to satisfy the aligned allocation + * with sizes in [esize, max_size). + */ + extent = extents_fit_alignment(extents, esize, max_size, + alignment); + } + + return extent; } static bool @@ -436,7 +494,7 @@ extent_try_delayed_coalesce(tsdn_t *tsdn, arena_t *arena, if (!coalesced) { return true; } - extents_insert_locked(tsdn, extents, extent, true); + extents_insert_locked(tsdn, extents, extent); return false; } @@ -449,8 +507,10 @@ extents_alloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); - return extent_recycle(tsdn, arena, r_extent_hooks, extents, new_addr, - size, pad, alignment, slab, szind, zero, commit, false); + extent_t *extent = extent_recycle(tsdn, arena, r_extent_hooks, extents, + new_addr, size, pad, alignment, slab, szind, zero, commit, false); + assert(extent == NULL || extent_dumpable_get(extent)); + return extent; } void @@ -458,6 +518,7 @@ extents_dalloc(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent) { assert(extent_base_get(extent) != NULL); assert(extent_size_get(extent) != 0); + assert(extent_dumpable_get(extent)); witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); @@ -487,14 +548,13 @@ extents_evict(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, goto label_return; } /* Check the eviction limit. */ - size_t npages = extent_size_get(extent) >> LG_PAGE; size_t extents_npages = atomic_load_zu(&extents->npages, ATOMIC_RELAXED); - if (extents_npages - npages < npages_min) { + if (extents_npages <= npages_min) { extent = NULL; goto label_return; } - extents_remove_locked(tsdn, extents, extent, false); + extents_remove_locked(tsdn, extents, extent); if (!extents->delay_coalesce) { break; } @@ -567,29 +627,29 @@ extents_postfork_child(tsdn_t *tsdn, extents_t *extents) { static void extent_deactivate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents, - extent_t *extent, bool preserve_lru) { + extent_t *extent) { assert(extent_arena_get(extent) == arena); assert(extent_state_get(extent) == extent_state_active); extent_state_set(extent, extents_state_get(extents)); - extents_insert_locked(tsdn, extents, extent, preserve_lru); + extents_insert_locked(tsdn, extents, extent); } static void extent_deactivate(tsdn_t *tsdn, arena_t *arena, extents_t *extents, - extent_t *extent, bool preserve_lru) { + extent_t *extent) { malloc_mutex_lock(tsdn, &extents->mtx); - extent_deactivate_locked(tsdn, arena, extents, extent, preserve_lru); + extent_deactivate_locked(tsdn, arena, extents, extent); malloc_mutex_unlock(tsdn, &extents->mtx); } static void extent_activate_locked(tsdn_t *tsdn, arena_t *arena, extents_t *extents, - extent_t *extent, bool preserve_lru) { + extent_t *extent) { assert(extent_arena_get(extent) == arena); assert(extent_state_get(extent) == extents_state_get(extents)); - extents_remove_locked(tsdn, extents, extent, preserve_lru); + extents_remove_locked(tsdn, extents, extent); extent_state_set(extent, extent_state_active); } @@ -723,6 +783,13 @@ extent_reregister(tsdn_t *tsdn, extent_t *extent) { assert(!err); } +/* + * Removes all pointers to the given extent from the global rtree indices for + * its interior. This is relevant for slab extents, for which we need to do + * metadata lookups at places other than the head of the extent. We deregister + * on the interior, then, when an extent moves from being an active slab to an + * inactive state. + */ static void extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, extent_t *extent) { @@ -737,8 +804,11 @@ extent_interior_deregister(tsdn_t *tsdn, rtree_ctx_t *rtree_ctx, } } +/* + * Removes all pointers to the given extent from the global rtree. + */ static void -extent_deregister(tsdn_t *tsdn, extent_t *extent) { +extent_deregister_impl(tsdn_t *tsdn, extent_t *extent, bool gdump) { rtree_ctx_t rtree_ctx_fallback; rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); rtree_leaf_elm_t *elm_a, *elm_b; @@ -755,16 +825,30 @@ extent_deregister(tsdn_t *tsdn, extent_t *extent) { extent_unlock(tsdn, extent); - if (config_prof) { + if (config_prof && gdump) { extent_gdump_sub(tsdn, extent); } } +static void +extent_deregister(tsdn_t *tsdn, extent_t *extent) { + extent_deregister_impl(tsdn, extent, true); +} + +static void +extent_deregister_no_gdump_sub(tsdn_t *tsdn, extent_t *extent) { + extent_deregister_impl(tsdn, extent, false); +} + +/* + * Tries to find and remove an extent from extents that can be used for the + * given allocation request. + */ static extent_t * extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents, void *new_addr, size_t size, size_t pad, size_t alignment, bool slab, - bool *zero, bool *commit, bool growing_retained) { + bool growing_retained) { witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, growing_retained ? 1 : 0); assert(alignment > 0); @@ -786,11 +870,6 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, } size_t esize = size + pad; - size_t alloc_size = esize + PAGE_CEILING(alignment) - PAGE; - /* Beware size_t wrap-around. */ - if (alloc_size < esize) { - return NULL; - } malloc_mutex_lock(tsdn, &extents->mtx); extent_hooks_assure_initialized(arena, r_extent_hooks); extent_t *extent; @@ -812,86 +891,172 @@ extent_recycle_extract(tsdn_t *tsdn, arena_t *arena, extent_unlock(tsdn, unlock_extent); } } else { - extent = extents_fit_locked(tsdn, arena, extents, alloc_size); + extent = extents_fit_locked(tsdn, arena, extents, esize, + alignment); } if (extent == NULL) { malloc_mutex_unlock(tsdn, &extents->mtx); return NULL; } - extent_activate_locked(tsdn, arena, extents, extent, false); + extent_activate_locked(tsdn, arena, extents, extent); malloc_mutex_unlock(tsdn, &extents->mtx); - if (extent_zeroed_get(extent)) { - *zero = true; - } - if (extent_committed_get(extent)) { - *commit = true; - } - return extent; } -static extent_t * -extent_recycle_split(tsdn_t *tsdn, arena_t *arena, - extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents, +/* + * Given an allocation request and an extent guaranteed to be able to satisfy + * it, this splits off lead and trail extents, leaving extent pointing to an + * extent satisfying the allocation. + * This function doesn't put lead or trail into any extents_t; it's the caller's + * job to ensure that they can be reused. + */ +typedef enum { + /* + * Split successfully. lead, extent, and trail, are modified to extents + * describing the ranges before, in, and after the given allocation. + */ + extent_split_interior_ok, + /* + * The extent can't satisfy the given allocation request. None of the + * input extent_t *s are touched. + */ + extent_split_interior_cant_alloc, + /* + * In a potentially invalid state. Must leak (if *to_leak is non-NULL), + * and salvage what's still salvageable (if *to_salvage is non-NULL). + * None of lead, extent, or trail are valid. + */ + extent_split_interior_error +} extent_split_interior_result_t; + +static extent_split_interior_result_t +extent_split_interior(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, + /* The result of splitting, in case of success. */ + extent_t **extent, extent_t **lead, extent_t **trail, + /* The mess to clean up, in case of error. */ + extent_t **to_leak, extent_t **to_salvage, void *new_addr, size_t size, size_t pad, size_t alignment, bool slab, - szind_t szind, extent_t *extent, bool growing_retained) { + szind_t szind, bool growing_retained) { size_t esize = size + pad; - size_t leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(extent), - PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(extent); + size_t leadsize = ALIGNMENT_CEILING((uintptr_t)extent_base_get(*extent), + PAGE_CEILING(alignment)) - (uintptr_t)extent_base_get(*extent); assert(new_addr == NULL || leadsize == 0); - assert(extent_size_get(extent) >= leadsize + esize); - size_t trailsize = extent_size_get(extent) - leadsize - esize; + if (extent_size_get(*extent) < leadsize + esize) { + return extent_split_interior_cant_alloc; + } + size_t trailsize = extent_size_get(*extent) - leadsize - esize; + + *lead = NULL; + *trail = NULL; + *to_leak = NULL; + *to_salvage = NULL; /* Split the lead. */ if (leadsize != 0) { - extent_t *lead = extent; - extent = extent_split_impl(tsdn, arena, r_extent_hooks, - lead, leadsize, NSIZES, false, esize + trailsize, szind, + *lead = *extent; + *extent = extent_split_impl(tsdn, arena, r_extent_hooks, + *lead, leadsize, NSIZES, false, esize + trailsize, szind, slab, growing_retained); - if (extent == NULL) { - extent_deregister(tsdn, lead); - extents_leak(tsdn, arena, r_extent_hooks, extents, - lead, growing_retained); - return NULL; + if (*extent == NULL) { + *to_leak = *lead; + *lead = NULL; + return extent_split_interior_error; } - extent_deactivate(tsdn, arena, extents, lead, false); } /* Split the trail. */ if (trailsize != 0) { - extent_t *trail = extent_split_impl(tsdn, arena, - r_extent_hooks, extent, esize, szind, slab, trailsize, - NSIZES, false, growing_retained); - if (trail == NULL) { - extent_deregister(tsdn, extent); - extents_leak(tsdn, arena, r_extent_hooks, extents, - extent, growing_retained); - return NULL; + *trail = extent_split_impl(tsdn, arena, r_extent_hooks, *extent, + esize, szind, slab, trailsize, NSIZES, false, + growing_retained); + if (*trail == NULL) { + *to_leak = *extent; + *to_salvage = *lead; + *lead = NULL; + *extent = NULL; + return extent_split_interior_error; } - extent_deactivate(tsdn, arena, extents, trail, false); - } else if (leadsize == 0) { + } + + if (leadsize == 0 && trailsize == 0) { /* * Splitting causes szind to be set as a side effect, but no * splitting occurred. */ - extent_szind_set(extent, szind); + extent_szind_set(*extent, szind); if (szind != NSIZES) { rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, - (uintptr_t)extent_addr_get(extent), szind, slab); - if (slab && extent_size_get(extent) > PAGE) { + (uintptr_t)extent_addr_get(*extent), szind, slab); + if (slab && extent_size_get(*extent) > PAGE) { rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, - (uintptr_t)extent_past_get(extent) - + (uintptr_t)extent_past_get(*extent) - (uintptr_t)PAGE, szind, slab); } } } - return extent; + return extent_split_interior_ok; } +/* + * This fulfills the indicated allocation request out of the given extent (which + * the caller should have ensured was big enough). If there's any unused space + * before or after the resulting allocation, that space is given its own extent + * and put back into extents. + */ +static extent_t * +extent_recycle_split(tsdn_t *tsdn, arena_t *arena, + extent_hooks_t **r_extent_hooks, rtree_ctx_t *rtree_ctx, extents_t *extents, + void *new_addr, size_t size, size_t pad, size_t alignment, bool slab, + szind_t szind, extent_t *extent, bool growing_retained) { + extent_t *lead; + extent_t *trail; + extent_t *to_leak; + extent_t *to_salvage; + + extent_split_interior_result_t result = extent_split_interior( + tsdn, arena, r_extent_hooks, rtree_ctx, &extent, &lead, &trail, + &to_leak, &to_salvage, new_addr, size, pad, alignment, slab, szind, + growing_retained); + + if (result == extent_split_interior_ok) { + if (lead != NULL) { + extent_deactivate(tsdn, arena, extents, lead); + } + if (trail != NULL) { + extent_deactivate(tsdn, arena, extents, trail); + } + return extent; + } else { + /* + * We should have picked an extent that was large enough to + * fulfill our allocation request. + */ + assert(result == extent_split_interior_error); + if (to_salvage != NULL) { + extent_deregister(tsdn, to_salvage); + } + if (to_leak != NULL) { + void *leak = extent_base_get(to_leak); + extent_deregister_no_gdump_sub(tsdn, to_leak); + extents_leak(tsdn, arena, r_extent_hooks, extents, + to_leak, growing_retained); + assert(extent_lock_from_addr(tsdn, rtree_ctx, leak) + == NULL); + } + return NULL; + } + unreachable(); +} + +/* + * Tries to satisfy the given allocation request by reusing one of the extents + * in the given extents_t. + */ static extent_t * extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extents_t *extents, void *new_addr, size_t size, size_t pad, @@ -906,16 +1071,12 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, rtree_ctx_t rtree_ctx_fallback; rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); - bool committed = false; extent_t *extent = extent_recycle_extract(tsdn, arena, r_extent_hooks, - rtree_ctx, extents, new_addr, size, pad, alignment, slab, zero, - &committed, growing_retained); + rtree_ctx, extents, new_addr, size, pad, alignment, slab, + growing_retained); if (extent == NULL) { return NULL; } - if (committed) { - *commit = true; - } extent = extent_recycle_split(tsdn, arena, r_extent_hooks, rtree_ctx, extents, new_addr, size, pad, alignment, slab, szind, extent, @@ -934,6 +1095,13 @@ extent_recycle(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extent_zeroed_set(extent, true); } + if (extent_committed_get(extent)) { + *commit = true; + } + if (extent_zeroed_get(extent)) { + *zero = true; + } + if (pad != 0) { extent_addr_randomize(tsdn, extent, alignment); } @@ -999,11 +1167,12 @@ extent_alloc_core(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, static void * extent_alloc_default_impl(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, size_t alignment, bool *zero, bool *commit) { - void *ret; - - ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero, + void *ret = extent_alloc_core(tsdn, arena, new_addr, size, alignment, zero, commit, (dss_prec_t)atomic_load_u(&arena->dss_prec, ATOMIC_RELAXED)); + if (have_madvise_huge && ret) { + pages_set_thp_state(ret, size); + } return ret; } @@ -1028,7 +1197,18 @@ extent_alloc_default(extent_hooks_t *extent_hooks, void *new_addr, size_t size, static void extent_hook_pre_reentrancy(tsdn_t *tsdn, arena_t *arena) { tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn); - pre_reentrancy(tsd, arena); + if (arena == arena_get(tsd_tsdn(tsd), 0, false)) { + /* + * The only legitimate case of customized extent hooks for a0 is + * hooks with no allocation activities. One such example is to + * place metadata on pre-allocated resources such as huge pages. + * In that case, rely on reentrancy_level checks to catch + * infinite recursions. + */ + pre_reentrancy(tsd, NULL); + } else { + pre_reentrancy(tsd, arena); + } } static void @@ -1081,9 +1261,8 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, void *ptr; if (*r_extent_hooks == &extent_hooks_default) { - ptr = extent_alloc_core(tsdn, arena, NULL, alloc_size, PAGE, - &zeroed, &committed, (dss_prec_t)atomic_load_u( - &arena->dss_prec, ATOMIC_RELAXED)); + ptr = extent_alloc_default_impl(tsdn, arena, NULL, + alloc_size, PAGE, &zeroed, &committed); } else { extent_hook_pre_reentrancy(tsdn, arena); ptr = (*r_extent_hooks)->alloc(*r_extent_hooks, NULL, @@ -1094,21 +1273,18 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, extent_init(extent, arena, ptr, alloc_size, false, NSIZES, arena_extent_sn_next(arena), extent_state_active, zeroed, - committed); + committed, true); if (ptr == NULL) { extent_dalloc(tsdn, arena, extent); goto label_err; } + if (extent_register_no_gdump_add(tsdn, extent)) { extents_leak(tsdn, arena, r_extent_hooks, &arena->extents_retained, extent, true); goto label_err; } - size_t leadsize = ALIGNMENT_CEILING((uintptr_t)ptr, - PAGE_CEILING(alignment)) - (uintptr_t)ptr; - assert(alloc_size >= leadsize + esize); - size_t trailsize = alloc_size - leadsize - esize; if (extent_zeroed_get(extent) && extent_committed_get(extent)) { *zero = true; } @@ -1116,54 +1292,46 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, *commit = true; } - /* Split the lead. */ - if (leadsize != 0) { - extent_t *lead = extent; - extent = extent_split_impl(tsdn, arena, r_extent_hooks, lead, - leadsize, NSIZES, false, esize + trailsize, szind, slab, - true); - if (extent == NULL) { - extent_deregister(tsdn, lead); - extents_leak(tsdn, arena, r_extent_hooks, + rtree_ctx_t rtree_ctx_fallback; + rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); + + extent_t *lead; + extent_t *trail; + extent_t *to_leak; + extent_t *to_salvage; + extent_split_interior_result_t result = extent_split_interior( + tsdn, arena, r_extent_hooks, rtree_ctx, &extent, &lead, &trail, + &to_leak, &to_salvage, NULL, size, pad, alignment, slab, szind, + true); + + if (result == extent_split_interior_ok) { + if (lead != NULL) { + extent_record(tsdn, arena, r_extent_hooks, &arena->extents_retained, lead, true); - goto label_err; } - extent_record(tsdn, arena, r_extent_hooks, - &arena->extents_retained, lead, true); - } - - /* Split the trail. */ - if (trailsize != 0) { - extent_t *trail = extent_split_impl(tsdn, arena, r_extent_hooks, - extent, esize, szind, slab, trailsize, NSIZES, false, true); - if (trail == NULL) { - extent_deregister(tsdn, extent); - extents_leak(tsdn, arena, r_extent_hooks, - &arena->extents_retained, extent, true); - goto label_err; + if (trail != NULL) { + extent_record(tsdn, arena, r_extent_hooks, + &arena->extents_retained, trail, true); } - extent_record(tsdn, arena, r_extent_hooks, - &arena->extents_retained, trail, true); - } else if (leadsize == 0) { + } else { /* - * Splitting causes szind to be set as a side effect, but no - * splitting occurred. + * We should have allocated a sufficiently large extent; the + * cant_alloc case should not occur. */ - rtree_ctx_t rtree_ctx_fallback; - rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, - &rtree_ctx_fallback); - - extent_szind_set(extent, szind); - if (szind != NSIZES) { - rtree_szind_slab_update(tsdn, &extents_rtree, rtree_ctx, - (uintptr_t)extent_addr_get(extent), szind, slab); - if (slab && extent_size_get(extent) > PAGE) { - rtree_szind_slab_update(tsdn, &extents_rtree, - rtree_ctx, - (uintptr_t)extent_past_get(extent) - - (uintptr_t)PAGE, szind, slab); + assert(result == extent_split_interior_error); + if (to_salvage != NULL) { + if (config_prof) { + extent_gdump_add(tsdn, to_salvage); } + extent_record(tsdn, arena, r_extent_hooks, + &arena->extents_retained, to_salvage, true); } + if (to_leak != NULL) { + extent_deregister_no_gdump_sub(tsdn, to_leak); + extents_leak(tsdn, arena, r_extent_hooks, + &arena->extents_retained, to_leak, true); + } + goto label_err; } if (*commit && !extent_committed_get(extent)) { @@ -1177,13 +1345,14 @@ extent_grow_retained(tsdn_t *tsdn, arena_t *arena, } /* - * Increment extent_grow_next if doing so wouldn't exceed the legal + * Increment extent_grow_next if doing so wouldn't exceed the allowed * range. */ - if (arena->extent_grow_next + egn_skip + 1 < NPSIZES) { + if (arena->extent_grow_next + egn_skip + 1 <= + arena->retain_grow_limit) { arena->extent_grow_next += egn_skip + 1; } else { - arena->extent_grow_next = NPSIZES - 1; + arena->extent_grow_next = arena->retain_grow_limit; } /* All opportunities for failure are past. */ malloc_mutex_unlock(tsdn, &arena->extent_grow_mtx); @@ -1271,7 +1440,8 @@ extent_alloc_wrapper_hard(tsdn_t *tsdn, arena_t *arena, return NULL; } extent_init(extent, arena, addr, esize, slab, szind, - arena_extent_sn_next(arena), extent_state_active, zero, commit); + arena_extent_sn_next(arena), extent_state_active, *zero, *commit, + true); if (pad != 0) { extent_addr_randomize(tsdn, extent, alignment); } @@ -1296,10 +1466,20 @@ extent_alloc_wrapper(tsdn_t *tsdn, arena_t *arena, extent_t *extent = extent_alloc_retained(tsdn, arena, r_extent_hooks, new_addr, size, pad, alignment, slab, szind, zero, commit); if (extent == NULL) { + if (opt_retain && new_addr != NULL) { + /* + * When retain is enabled and new_addr is set, we do not + * attempt extent_alloc_wrapper_hard which does mmap + * that is very unlikely to succeed (unless it happens + * to be at the end). + */ + return NULL; + } extent = extent_alloc_wrapper_hard(tsdn, arena, r_extent_hooks, new_addr, size, pad, alignment, slab, szind, zero, commit); } + assert(extent == NULL || extent_dumpable_get(extent)); return extent; } @@ -1329,16 +1509,7 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, bool growing_retained) { assert(extent_can_coalesce(arena, extents, inner, outer)); - if (forward && extents->delay_coalesce) { - /* - * The extent that remains after coalescing must occupy the - * outer extent's position in the LRU. For forward coalescing, - * swap the inner extent into the LRU. - */ - extent_list_replace(&extents->lru, outer, inner); - } - extent_activate_locked(tsdn, arena, extents, outer, - extents->delay_coalesce); + extent_activate_locked(tsdn, arena, extents, outer); malloc_mutex_unlock(tsdn, &extents->mtx); bool err = extent_merge_impl(tsdn, arena, r_extent_hooks, @@ -1346,11 +1517,7 @@ extent_coalesce(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, malloc_mutex_lock(tsdn, &extents->mtx); if (err) { - if (forward && extents->delay_coalesce) { - extent_list_replace(&extents->lru, inner, outer); - } - extent_deactivate_locked(tsdn, arena, extents, outer, - extents->delay_coalesce); + extent_deactivate_locked(tsdn, arena, extents, outer); } return err; @@ -1422,6 +1589,10 @@ extent_try_coalesce(tsdn_t *tsdn, arena_t *arena, return extent; } +/* + * Does the metadata management portions of putting an unused extent into the + * given extents_t (coalesces, deregisters slab interiors, the heap operations). + */ static void extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extents_t *extents, extent_t *extent, bool growing_retained) { @@ -1447,9 +1618,20 @@ extent_record(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, if (!extents->delay_coalesce) { extent = extent_try_coalesce(tsdn, arena, r_extent_hooks, rtree_ctx, extents, extent, NULL, growing_retained); - } - - extent_deactivate_locked(tsdn, arena, extents, extent, false); + } else if (extent_size_get(extent) >= LARGE_MINCLASS) { + /* Always coalesce large extents eagerly. */ + bool coalesced; + size_t prev_size; + do { + prev_size = extent_size_get(extent); + assert(extent_state_get(extent) == extent_state_active); + extent = extent_try_coalesce(tsdn, arena, + r_extent_hooks, rtree_ctx, extents, extent, + &coalesced, growing_retained); + } while (coalesced && + extent_size_get(extent) >= prev_size + LARGE_MINCLASS); + } + extent_deactivate_locked(tsdn, arena, extents, extent); malloc_mutex_unlock(tsdn, &extents->mtx); } @@ -1520,6 +1702,7 @@ extent_dalloc_wrapper_try(tsdn_t *tsdn, arena_t *arena, void extent_dalloc_wrapper(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extent_t *extent) { + assert(extent_dumpable_get(extent)); witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), WITNESS_RANK_CORE, 0); @@ -1780,6 +1963,13 @@ extent_split_default(extent_hooks_t *extent_hooks, void *addr, size_t size, } #endif +/* + * Accepts the extent to split, and the characteristics of each side of the + * split. The 'a' parameters go with the 'lead' of the resulting pair of + * extents (the lower addressed portion of the split), and the 'b' parameters go + * with the trail (the higher addressed portion). This makes 'extent' the lead, + * and returns the trail (except in case of error). + */ static extent_t * extent_split_impl(tsdn_t *tsdn, arena_t *arena, extent_hooks_t **r_extent_hooks, extent_t *extent, size_t size_a, @@ -1803,7 +1993,7 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, extent_init(trail, arena, (void *)((uintptr_t)extent_base_get(extent) + size_a), size_b, slab_b, szind_b, extent_sn_get(extent), extent_state_get(extent), extent_zeroed_get(extent), - extent_committed_get(extent)); + extent_committed_get(extent), extent_dumpable_get(extent)); rtree_ctx_t rtree_ctx_fallback; rtree_ctx_t *rtree_ctx = tsdn_rtree_ctx(tsdn, &rtree_ctx_fallback); @@ -1814,7 +2004,7 @@ extent_split_impl(tsdn_t *tsdn, arena_t *arena, extent_init(&lead, arena, extent_addr_get(extent), size_a, slab_a, szind_a, extent_sn_get(extent), extent_state_get(extent), extent_zeroed_get(extent), - extent_committed_get(extent)); + extent_committed_get(extent), extent_dumpable_get(extent)); extent_rtree_leaf_elms_lookup(tsdn, rtree_ctx, &lead, false, true, &lead_elm_a, &lead_elm_b); diff --git a/contrib/jemalloc/src/extent_dss.c b/contrib/jemalloc/src/extent_dss.c index e72da95870d..2b1ea9cafa0 100644 --- a/contrib/jemalloc/src/extent_dss.c +++ b/contrib/jemalloc/src/extent_dss.c @@ -156,7 +156,7 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, extent_init(gap, arena, gap_addr_page, gap_size_page, false, NSIZES, arena_extent_sn_next(arena), - extent_state_active, false, true); + extent_state_active, false, true, true); } /* * Compute the address just past the end of the desired @@ -199,7 +199,8 @@ extent_alloc_dss(tsdn_t *tsdn, arena_t *arena, void *new_addr, size_t size, extent_init(&extent, arena, ret, size, size, false, NSIZES, - extent_state_active, false, true); + extent_state_active, false, true, + true); if (extent_purge_forced_wrapper(tsdn, arena, &extent_hooks, &extent, 0, size)) { diff --git a/contrib/jemalloc/src/jemalloc.c b/contrib/jemalloc/src/jemalloc.c index 0d747ea84c4..e0ad297be93 100644 --- a/contrib/jemalloc/src/jemalloc.c +++ b/contrib/jemalloc/src/jemalloc.c @@ -8,6 +8,7 @@ #include "jemalloc/internal/extent_dss.h" #include "jemalloc/internal/extent_mmap.h" #include "jemalloc/internal/jemalloc_internal_types.h" +#include "jemalloc/internal/log.h" #include "jemalloc/internal/malloc_io.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/rtree.h" @@ -852,10 +853,8 @@ malloc_conf_error(const char *msg, const char *k, size_t klen, const char *v, size_t vlen) { malloc_printf(": %s: %.*s:%.*s\n", msg, (int)klen, k, (int)vlen, v); + /* If abort_conf is set, error out after processing all options. */ had_conf_error = true; - if (opt_abort_conf) { - malloc_abort_invalid_conf(); - } } static void @@ -1055,8 +1054,22 @@ malloc_conf_init(void) { CONF_HANDLE_BOOL(opt_abort, "abort") CONF_HANDLE_BOOL(opt_abort_conf, "abort_conf") - if (opt_abort_conf && had_conf_error) { - malloc_abort_invalid_conf(); + if (strncmp("metadata_thp", k, klen) == 0) { + int i; + bool match = false; + for (i = 0; i < metadata_thp_mode_limit; i++) { + if (strncmp(metadata_thp_mode_names[i], + v, vlen) == 0) { + opt_metadata_thp = i; + match = true; + break; + } + } + if (!match) { + malloc_conf_error("Invalid conf value", + k, klen, v, vlen); + } + continue; } CONF_HANDLE_BOOL(opt_retain, "retain") if (strncmp("dss", k, klen) == 0) { @@ -1132,12 +1145,14 @@ malloc_conf_init(void) { CONF_HANDLE_BOOL(opt_xmalloc, "xmalloc") } CONF_HANDLE_BOOL(opt_tcache, "tcache") + CONF_HANDLE_SIZE_T(opt_lg_extent_max_active_fit, + "lg_extent_max_active_fit", 0, + (sizeof(size_t) << 3), yes, yes, false) CONF_HANDLE_SSIZE_T(opt_lg_tcache_max, "lg_tcache_max", -1, (sizeof(size_t) << 3) - 1) if (strncmp("percpu_arena", k, klen) == 0) { - int i; bool match = false; - for (i = percpu_arena_mode_names_base; i < + for (int i = percpu_arena_mode_names_base; i < percpu_arena_mode_names_limit; i++) { if (strncmp(percpu_arena_mode_names[i], v, vlen) == 0) { @@ -1159,6 +1174,10 @@ malloc_conf_init(void) { } CONF_HANDLE_BOOL(opt_background_thread, "background_thread"); + CONF_HANDLE_SIZE_T(opt_max_background_threads, + "max_background_threads", 1, + opt_max_background_threads, yes, yes, + true); if (config_prof) { CONF_HANDLE_BOOL(opt_prof, "prof") CONF_HANDLE_CHAR_P(opt_prof_prefix, @@ -1177,6 +1196,37 @@ malloc_conf_init(void) { CONF_HANDLE_BOOL(opt_prof_final, "prof_final") CONF_HANDLE_BOOL(opt_prof_leak, "prof_leak") } + if (config_log) { + if (CONF_MATCH("log")) { + size_t cpylen = ( + vlen <= sizeof(log_var_names) ? + vlen : sizeof(log_var_names) - 1); + strncpy(log_var_names, v, cpylen); + log_var_names[cpylen] = '\0'; + continue; + } + } + if (CONF_MATCH("thp")) { + bool match = false; + for (int i = 0; i < thp_mode_names_limit; i++) { + if (strncmp(thp_mode_names[i],v, vlen) + == 0) { + if (!have_madvise_huge) { + malloc_conf_error( + "No THP support", + k, klen, v, vlen); + } + opt_thp = i; + match = true; + break; + } + } + if (!match) { + malloc_conf_error("Invalid conf value", + k, klen, v, vlen); + } + continue; + } malloc_conf_error("Invalid conf pair", k, klen, v, vlen); #undef CONF_MATCH @@ -1192,7 +1242,11 @@ malloc_conf_init(void) { #undef CONF_HANDLE_SSIZE_T #undef CONF_HANDLE_CHAR_P } + if (opt_abort_conf && had_conf_error) { + malloc_abort_invalid_conf(); + } } + atomic_store_b(&log_init_done, true, ATOMIC_RELEASE); } static bool @@ -1497,6 +1551,8 @@ malloc_init_hard(void) { post_reentrancy(tsd); malloc_mutex_unlock(tsd_tsdn(tsd), &init_lock); + witness_assert_lockless(witness_tsd_tsdn( + tsd_witness_tsdp_get_unsafe(tsd))); malloc_tsd_boot1(); /* Update TSD after tsd_boot1. */ tsd = tsd_fetch(); @@ -1504,8 +1560,11 @@ malloc_init_hard(void) { assert(have_background_thread); /* * Need to finish init & unlock first before creating background - * threads (pthread_create depends on malloc). + * threads (pthread_create depends on malloc). ctl_init (which + * sets isthreaded) needs to be called without holding any lock. */ + background_thread_ctl_init(tsd_tsdn(tsd)); + malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock); bool err = background_thread_create(tsd, 0); malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock); @@ -1705,7 +1764,7 @@ compute_size_with_overflow(bool may_overflow, dynamic_opts_t *dopts, } /* A size_t with its high-half bits all set to 1. */ - const static size_t high_bits = SIZE_T_MAX << (sizeof(size_t) * 8 / 2); + static const size_t high_bits = SIZE_T_MAX << (sizeof(size_t) * 8 / 2); *size = dopts->item_size * dopts->num_items; @@ -1966,6 +2025,8 @@ je_malloc(size_t size) { static_opts_t sopts; dynamic_opts_t dopts; + LOG("core.malloc.entry", "size: %zu", size); + static_opts_init(&sopts); dynamic_opts_init(&dopts); @@ -1980,6 +2041,8 @@ je_malloc(size_t size) { imalloc(&sopts, &dopts); + LOG("core.malloc.exit", "result: %p", ret); + return ret; } @@ -1990,6 +2053,9 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) { static_opts_t sopts; dynamic_opts_t dopts; + LOG("core.posix_memalign.entry", "mem ptr: %p, alignment: %zu, " + "size: %zu", memptr, alignment, size); + static_opts_init(&sopts); dynamic_opts_init(&dopts); @@ -2006,6 +2072,10 @@ je_posix_memalign(void **memptr, size_t alignment, size_t size) { dopts.alignment = alignment; ret = imalloc(&sopts, &dopts); + + LOG("core.posix_memalign.exit", "result: %d, alloc ptr: %p", ret, + *memptr); + return ret; } @@ -2018,6 +2088,9 @@ je_aligned_alloc(size_t alignment, size_t size) { static_opts_t sopts; dynamic_opts_t dopts; + LOG("core.aligned_alloc.entry", "alignment: %zu, size: %zu\n", + alignment, size); + static_opts_init(&sopts); dynamic_opts_init(&dopts); @@ -2036,6 +2109,9 @@ je_aligned_alloc(size_t alignment, size_t size) { dopts.alignment = alignment; imalloc(&sopts, &dopts); + + LOG("core.aligned_alloc.exit", "result: %p", ret); + return ret; } @@ -2047,6 +2123,8 @@ je_calloc(size_t num, size_t size) { static_opts_t sopts; dynamic_opts_t dopts; + LOG("core.calloc.entry", "num: %zu, size: %zu\n", num, size); + static_opts_init(&sopts); dynamic_opts_init(&dopts); @@ -2063,6 +2141,8 @@ je_calloc(size_t num, size_t size) { imalloc(&sopts, &dopts); + LOG("core.calloc.exit", "result: %p", ret); + return ret; } @@ -2165,17 +2245,37 @@ isfree(tsd_t *tsd, void *ptr, size_t usize, tcache_t *tcache, bool slow_path) { assert(malloc_initialized() || IS_INITIALIZER); alloc_ctx_t alloc_ctx, *ctx; - if (config_prof && opt_prof) { + if (!config_cache_oblivious && ((uintptr_t)ptr & PAGE_MASK) != 0) { + /* + * When cache_oblivious is disabled and ptr is not page aligned, + * the allocation was not sampled -- usize can be used to + * determine szind directly. + */ + alloc_ctx.szind = sz_size2index(usize); + alloc_ctx.slab = true; + ctx = &alloc_ctx; + if (config_debug) { + alloc_ctx_t dbg_ctx; + rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd); + rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, + rtree_ctx, (uintptr_t)ptr, true, &dbg_ctx.szind, + &dbg_ctx.slab); + assert(dbg_ctx.szind == alloc_ctx.szind); + assert(dbg_ctx.slab == alloc_ctx.slab); + } + } else if (config_prof && opt_prof) { rtree_ctx_t *rtree_ctx = tsd_rtree_ctx(tsd); rtree_szind_slab_read(tsd_tsdn(tsd), &extents_rtree, rtree_ctx, (uintptr_t)ptr, true, &alloc_ctx.szind, &alloc_ctx.slab); assert(alloc_ctx.szind == sz_size2index(usize)); ctx = &alloc_ctx; - prof_free(tsd, ptr, usize, ctx); } else { ctx = NULL; } + if (config_prof && opt_prof) { + prof_free(tsd, ptr, usize, ctx); + } if (config_stats) { *tsd_thread_deallocatedp_get(tsd) += usize; } @@ -2196,6 +2296,8 @@ je_realloc(void *ptr, size_t size) { size_t usize JEMALLOC_CC_SILENCE_INIT(0); size_t old_usize = 0; + LOG("core.realloc.entry", "ptr: %p, size: %zu\n", ptr, size); + if (unlikely(size == 0)) { if (ptr != NULL) { /* realloc(ptr, 0) is equivalent to free(ptr). */ @@ -2208,6 +2310,8 @@ je_realloc(void *ptr, size_t size) { tcache = NULL; } ifree(tsd, ptr, tcache, true); + + LOG("core.realloc.exit", "result: %p", NULL); return NULL; } size = 1; @@ -2240,7 +2344,9 @@ je_realloc(void *ptr, size_t size) { tsdn = tsd_tsdn(tsd); } else { /* realloc(NULL, size) is equivalent to malloc(size). */ - return je_malloc(size); + void *ret = je_malloc(size); + LOG("core.realloc.exit", "result: %p", ret); + return ret; } if (unlikely(ret == NULL)) { @@ -2261,11 +2367,15 @@ je_realloc(void *ptr, size_t size) { } UTRACE(ptr, size, ret); check_entry_exit_locking(tsdn); + + LOG("core.realloc.exit", "result: %p", ret); return ret; } JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_free(void *ptr) { + LOG("core.free.entry", "ptr: %p", ptr); + UTRACE(ptr, 0, 0); if (likely(ptr != NULL)) { /* @@ -2295,6 +2405,7 @@ je_free(void *ptr) { } check_entry_exit_locking(tsd_tsdn(tsd)); } + LOG("core.free.exit", ""); } /* @@ -2314,6 +2425,9 @@ je_memalign(size_t alignment, size_t size) { static_opts_t sopts; dynamic_opts_t dopts; + LOG("core.memalign.entry", "alignment: %zu, size: %zu\n", alignment, + size); + static_opts_init(&sopts); dynamic_opts_init(&dopts); @@ -2331,6 +2445,8 @@ je_memalign(size_t alignment, size_t size) { dopts.alignment = alignment; imalloc(&sopts, &dopts); + + LOG("core.memalign.exit", "result: %p", ret); return ret; } #endif @@ -2345,6 +2461,8 @@ je_valloc(size_t size) { static_opts_t sopts; dynamic_opts_t dopts; + LOG("core.valloc.entry", "size: %zu\n", size); + static_opts_init(&sopts); dynamic_opts_init(&dopts); @@ -2363,6 +2481,7 @@ je_valloc(size_t size) { imalloc(&sopts, &dopts); + LOG("core.valloc.exit", "result: %p\n", ret); return ret; } #endif @@ -2436,6 +2555,8 @@ je_mallocx(size_t size, int flags) { static_opts_t sopts; dynamic_opts_t dopts; + LOG("core.mallocx.entry", "size: %zu, flags: %d", size, flags); + static_opts_init(&sopts); dynamic_opts_init(&dopts); @@ -2469,6 +2590,8 @@ je_mallocx(size_t size, int flags) { } imalloc(&sopts, &dopts); + + LOG("core.mallocx.exit", "result: %p", ret); return ret; } @@ -2549,6 +2672,10 @@ je_rallocx(void *ptr, size_t size, int flags) { arena_t *arena; tcache_t *tcache; + LOG("core.rallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr, + size, flags); + + assert(ptr != NULL); assert(size != 0); assert(malloc_initialized() || IS_INITIALIZER); @@ -2611,6 +2738,8 @@ je_rallocx(void *ptr, size_t size, int flags) { } UTRACE(ptr, size, p); check_entry_exit_locking(tsd_tsdn(tsd)); + + LOG("core.rallocx.exit", "result: %p", p); return p; label_oom: if (config_xmalloc && unlikely(opt_xmalloc)) { @@ -2619,6 +2748,8 @@ je_rallocx(void *ptr, size_t size, int flags) { } UTRACE(ptr, size, 0); check_entry_exit_locking(tsd_tsdn(tsd)); + + LOG("core.rallocx.exit", "result: %p", NULL); return NULL; } @@ -2705,6 +2836,9 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) { size_t alignment = MALLOCX_ALIGN_GET(flags); bool zero = flags & MALLOCX_ZERO; + LOG("core.xallocx.entry", "ptr: %p, size: %zu, extra: %zu, " + "flags: %d", ptr, size, extra, flags); + assert(ptr != NULL); assert(size != 0); assert(SIZE_T_MAX - size >= extra); @@ -2754,15 +2888,19 @@ je_xallocx(void *ptr, size_t size, size_t extra, int flags) { label_not_resized: UTRACE(ptr, size, ptr); check_entry_exit_locking(tsd_tsdn(tsd)); + + LOG("core.xallocx.exit", "result: %zu", usize); return usize; } JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW JEMALLOC_ATTR(pure) -je_sallocx(const void *ptr, int flags) { +je_sallocx(const void *ptr, UNUSED int flags) { size_t usize; tsdn_t *tsdn; + LOG("core.sallocx.entry", "ptr: %p, flags: %d", ptr, flags); + assert(malloc_initialized() || IS_INITIALIZER); assert(ptr != NULL); @@ -2777,11 +2915,15 @@ je_sallocx(const void *ptr, int flags) { } check_entry_exit_locking(tsdn); + + LOG("core.sallocx.exit", "result: %zu", usize); return usize; } JEMALLOC_EXPORT void JEMALLOC_NOTHROW je_dallocx(void *ptr, int flags) { + LOG("core.dallocx.entry", "ptr: %p, flags: %d", ptr, flags); + assert(ptr != NULL); assert(malloc_initialized() || IS_INITIALIZER); @@ -2819,6 +2961,8 @@ je_dallocx(void *ptr, int flags) { ifree(tsd, ptr, tcache, true); } check_entry_exit_locking(tsd_tsdn(tsd)); + + LOG("core.dallocx.exit", ""); } JEMALLOC_ALWAYS_INLINE size_t @@ -2840,6 +2984,9 @@ je_sdallocx(void *ptr, size_t size, int flags) { assert(ptr != NULL); assert(malloc_initialized() || IS_INITIALIZER); + LOG("core.sdallocx.entry", "ptr: %p, size: %zu, flags: %d", ptr, + size, flags); + tsd_t *tsd = tsd_fetch(); bool fast = tsd_fast(tsd); size_t usize = inallocx(tsd_tsdn(tsd), size, flags); @@ -2876,6 +3023,8 @@ je_sdallocx(void *ptr, size_t size, int flags) { isfree(tsd, ptr, usize, tcache, true); } check_entry_exit_locking(tsd_tsdn(tsd)); + + LOG("core.sdallocx.exit", ""); } JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW @@ -2887,6 +3036,7 @@ je_nallocx(size_t size, int flags) { assert(size != 0); if (unlikely(malloc_init())) { + LOG("core.nallocx.exit", "result: %zu", ZU(0)); return 0; } @@ -2895,10 +3045,12 @@ je_nallocx(size_t size, int flags) { usize = inallocx(tsdn, size, flags); if (unlikely(usize > LARGE_MAXCLASS)) { + LOG("core.nallocx.exit", "result: %zu", ZU(0)); return 0; } check_entry_exit_locking(tsdn); + LOG("core.nallocx.exit", "result: %zu", usize); return usize; } @@ -2908,7 +3060,10 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, int ret; tsd_t *tsd; + LOG("core.mallctl.entry", "name: %s", name); + if (unlikely(malloc_init())) { + LOG("core.mallctl.exit", "result: %d", EAGAIN); return EAGAIN; } @@ -2916,6 +3071,8 @@ je_mallctl(const char *name, void *oldp, size_t *oldlenp, void *newp, check_entry_exit_locking(tsd_tsdn(tsd)); ret = ctl_byname(tsd, name, oldp, oldlenp, newp, newlen); check_entry_exit_locking(tsd_tsdn(tsd)); + + LOG("core.mallctl.exit", "result: %d", ret); return ret; } @@ -2923,7 +3080,10 @@ JEMALLOC_EXPORT int JEMALLOC_NOTHROW je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) { int ret; + LOG("core.mallctlnametomib.entry", "name: %s", name); + if (unlikely(malloc_init())) { + LOG("core.mallctlnametomib.exit", "result: %d", EAGAIN); return EAGAIN; } @@ -2931,6 +3091,8 @@ je_mallctlnametomib(const char *name, size_t *mibp, size_t *miblenp) { check_entry_exit_locking(tsd_tsdn(tsd)); ret = ctl_nametomib(tsd, name, mibp, miblenp); check_entry_exit_locking(tsd_tsdn(tsd)); + + LOG("core.mallctlnametomib.exit", "result: %d", ret); return ret; } @@ -2940,7 +3102,10 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, int ret; tsd_t *tsd; + LOG("core.mallctlbymib.entry", ""); + if (unlikely(malloc_init())) { + LOG("core.mallctlbymib.exit", "result: %d", EAGAIN); return EAGAIN; } @@ -2948,6 +3113,7 @@ je_mallctlbymib(const size_t *mib, size_t miblen, void *oldp, size_t *oldlenp, check_entry_exit_locking(tsd_tsdn(tsd)); ret = ctl_bymib(tsd, mib, miblen, oldp, oldlenp, newp, newlen); check_entry_exit_locking(tsd_tsdn(tsd)); + LOG("core.mallctlbymib.exit", "result: %d", ret); return ret; } @@ -2956,10 +3122,13 @@ je_malloc_stats_print(void (*write_cb)(void *, const char *), void *cbopaque, const char *opts) { tsdn_t *tsdn; + LOG("core.malloc_stats_print.entry", ""); + tsdn = tsdn_fetch(); check_entry_exit_locking(tsdn); stats_print(write_cb, cbopaque, opts); check_entry_exit_locking(tsdn); + LOG("core.malloc_stats_print.exit", ""); } JEMALLOC_EXPORT size_t JEMALLOC_NOTHROW @@ -2967,6 +3136,8 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) { size_t ret; tsdn_t *tsdn; + LOG("core.malloc_usable_size.entry", "ptr: %p", ptr); + assert(malloc_initialized() || IS_INITIALIZER); tsdn = tsdn_fetch(); @@ -2984,6 +3155,7 @@ je_malloc_usable_size(JEMALLOC_USABLE_SIZE_CONST void *ptr) { } check_entry_exit_locking(tsdn); + LOG("core.malloc_usable_size.exit", "result: %zu", ret); return ret; } diff --git a/contrib/jemalloc/src/log.c b/contrib/jemalloc/src/log.c new file mode 100644 index 00000000000..778902fb9b8 --- /dev/null +++ b/contrib/jemalloc/src/log.c @@ -0,0 +1,78 @@ +#include "jemalloc/internal/jemalloc_preamble.h" +#include "jemalloc/internal/jemalloc_internal_includes.h" + +#include "jemalloc/internal/log.h" + +char log_var_names[JEMALLOC_LOG_VAR_BUFSIZE]; +atomic_b_t log_init_done = ATOMIC_INIT(false); + +/* + * Returns true if we were able to pick out a segment. Fills in r_segment_end + * with a pointer to the first character after the end of the string. + */ +static const char * +log_var_extract_segment(const char* segment_begin) { + const char *end; + for (end = segment_begin; *end != '\0' && *end != '|'; end++) { + } + return end; +} + +static bool +log_var_matches_segment(const char *segment_begin, const char *segment_end, + const char *log_var_begin, const char *log_var_end) { + assert(segment_begin <= segment_end); + assert(log_var_begin < log_var_end); + + ptrdiff_t segment_len = segment_end - segment_begin; + ptrdiff_t log_var_len = log_var_end - log_var_begin; + /* The special '.' segment matches everything. */ + if (segment_len == 1 && *segment_begin == '.') { + return true; + } + if (segment_len == log_var_len) { + return strncmp(segment_begin, log_var_begin, segment_len) == 0; + } else if (segment_len < log_var_len) { + return strncmp(segment_begin, log_var_begin, segment_len) == 0 + && log_var_begin[segment_len] == '.'; + } else { + return false; + } +} + +unsigned +log_var_update_state(log_var_t *log_var) { + const char *log_var_begin = log_var->name; + const char *log_var_end = log_var->name + strlen(log_var->name); + + /* Pointer to one before the beginning of the current segment. */ + const char *segment_begin = log_var_names; + + /* + * If log_init done is false, we haven't parsed the malloc conf yet. To + * avoid log-spew, we default to not displaying anything. + */ + if (!atomic_load_b(&log_init_done, ATOMIC_ACQUIRE)) { + return LOG_INITIALIZED_NOT_ENABLED; + } + + while (true) { + const char *segment_end = log_var_extract_segment( + segment_begin); + assert(segment_end < log_var_names + JEMALLOC_LOG_VAR_BUFSIZE); + if (log_var_matches_segment(segment_begin, segment_end, + log_var_begin, log_var_end)) { + atomic_store_u(&log_var->state, LOG_ENABLED, + ATOMIC_RELAXED); + return LOG_ENABLED; + } + if (*segment_end == '\0') { + /* Hit the end of the segment string with no match. */ + atomic_store_u(&log_var->state, + LOG_INITIALIZED_NOT_ENABLED, ATOMIC_RELAXED); + return LOG_INITIALIZED_NOT_ENABLED; + } + /* Otherwise, skip the delimiter and continue. */ + segment_begin = segment_end + 1; + } +} diff --git a/contrib/jemalloc/src/malloc_io.c b/contrib/jemalloc/src/malloc_io.c index 4363cb8350c..c8802c70031 100644 --- a/contrib/jemalloc/src/malloc_io.c +++ b/contrib/jemalloc/src/malloc_io.c @@ -70,20 +70,7 @@ static char *x2s(uintmax_t x, bool alt_form, bool uppercase, char *s, /* malloc_message() setup. */ static void wrtmessage(void *cbopaque, const char *s) { -#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_write) - /* - * Use syscall(2) rather than write(2) when possible in order to avoid - * the possibility of memory allocation within libc. This is necessary - * on FreeBSD; most operating systems do not have this problem though. - * - * syscall() returns long or int, depending on platform, so capture the - * unused result in the widest plausible type to avoid compiler - * warnings. - */ - UNUSED long result = syscall(SYS_write, STDERR_FILENO, s, strlen(s)); -#else - UNUSED ssize_t result = write(STDERR_FILENO, s, strlen(s)); -#endif + malloc_write_fd(STDERR_FILENO, s, strlen(s)); } JEMALLOC_EXPORT void (*je_malloc_message)(void *, const char *s); @@ -125,7 +112,7 @@ buferror(int err, char *buf, size_t buflen) { FormatMessageA(FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, 0, (LPSTR)buf, (DWORD)buflen, NULL); return 0; -#elif defined(__GLIBC__) && defined(_GNU_SOURCE) +#elif defined(JEMALLOC_STRERROR_R_RETURNS_CHAR_WITH_GNU_SOURCE) && defined(_GNU_SOURCE) char *b = strerror_r(err, buf, buflen); if (b != buf) { strncpy(buf, b, buflen); diff --git a/contrib/jemalloc/src/mutex.c b/contrib/jemalloc/src/mutex.c index 820af6133d9..b2c36283987 100644 --- a/contrib/jemalloc/src/mutex.c +++ b/contrib/jemalloc/src/mutex.c @@ -4,6 +4,7 @@ #include "jemalloc/internal/assert.h" #include "jemalloc/internal/malloc_io.h" +#include "jemalloc/internal/spin.h" #ifndef _CRT_SPINCOUNT #define _CRT_SPINCOUNT 4000 @@ -64,7 +65,7 @@ malloc_mutex_lock_slow(malloc_mutex_t *mutex) { int cnt = 0, max_cnt = MALLOC_MUTEX_MAX_SPIN; do { - CPU_SPINWAIT; + spin_cpu_spinwait(); if (!malloc_mutex_trylock_final(mutex)) { data->n_spin_acquired++; return; @@ -194,7 +195,7 @@ malloc_mutex_init(malloc_mutex_t *mutex, const char *name, mutex->lock_order = lock_order; if (lock_order == malloc_mutex_address_ordered) { witness_init(&mutex->witness, name, rank, - mutex_addr_comp, &mutex); + mutex_addr_comp, mutex); } else { witness_init(&mutex->witness, name, rank, NULL, NULL); } diff --git a/contrib/jemalloc/src/pages.c b/contrib/jemalloc/src/pages.c index fec64dd01d7..26002692d60 100644 --- a/contrib/jemalloc/src/pages.c +++ b/contrib/jemalloc/src/pages.c @@ -10,6 +10,9 @@ #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT #include +#ifdef __FreeBSD__ +#include +#endif #endif /******************************************************************************/ @@ -25,6 +28,18 @@ static int mmap_flags; #endif static bool os_overcommits; +const char *thp_mode_names[] = { + "default", + "always", + "never", + "not supported" +}; +thp_mode_t opt_thp = THP_MODE_DEFAULT; +thp_mode_t init_system_thp_mode; + +/* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */ +static bool pages_can_purge_lazy_runtime = true; + /******************************************************************************/ /* * Function prototypes for static functions that are referenced prior to @@ -252,12 +267,25 @@ pages_purge_lazy(void *addr, size_t size) { if (!pages_can_purge_lazy) { return true; } + if (!pages_can_purge_lazy_runtime) { + /* + * Built with lazy purge enabled, but detected it was not + * supported on the current system. + */ + return true; + } #ifdef _WIN32 VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); return false; #elif defined(JEMALLOC_PURGE_MADVISE_FREE) - return (madvise(addr, size, MADV_FREE) != 0); + return (madvise(addr, size, +# ifdef MADV_FREE + MADV_FREE +# else + JEMALLOC_MADV_FREE +# endif + ) != 0); #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS) return (madvise(addr, size, MADV_DONTNEED) != 0); @@ -286,36 +314,84 @@ pages_purge_forced(void *addr, size_t size) { #endif } +static bool +pages_huge_impl(void *addr, size_t size, bool aligned) { + if (aligned) { + assert(HUGEPAGE_ADDR2BASE(addr) == addr); + assert(HUGEPAGE_CEILING(size) == size); + } +#ifdef JEMALLOC_HAVE_MADVISE_HUGE + return (madvise(addr, size, MADV_HUGEPAGE) != 0); +#else + return true; +#endif +} + bool pages_huge(void *addr, size_t size) { - assert(HUGEPAGE_ADDR2BASE(addr) == addr); - assert(HUGEPAGE_CEILING(size) == size); + return pages_huge_impl(addr, size, true); +} -#ifdef JEMALLOC_THP - return (madvise(addr, size, MADV_HUGEPAGE) != 0); +static bool +pages_huge_unaligned(void *addr, size_t size) { + return pages_huge_impl(addr, size, false); +} + +static bool +pages_nohuge_impl(void *addr, size_t size, bool aligned) { + if (aligned) { + assert(HUGEPAGE_ADDR2BASE(addr) == addr); + assert(HUGEPAGE_CEILING(size) == size); + } + +#ifdef JEMALLOC_HAVE_MADVISE_HUGE + return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); #else - return true; + return false; #endif } bool pages_nohuge(void *addr, size_t size) { - assert(HUGEPAGE_ADDR2BASE(addr) == addr); - assert(HUGEPAGE_CEILING(size) == size); + return pages_nohuge_impl(addr, size, true); +} -#ifdef JEMALLOC_THP - return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); +static bool +pages_nohuge_unaligned(void *addr, size_t size) { + return pages_nohuge_impl(addr, size, false); +} + +bool +pages_dontdump(void *addr, size_t size) { + assert(PAGE_ADDR2BASE(addr) == addr); + assert(PAGE_CEILING(size) == size); +#ifdef JEMALLOC_MADVISE_DONTDUMP + return madvise(addr, size, MADV_DONTDUMP) != 0; #else return false; #endif } +bool +pages_dodump(void *addr, size_t size) { + assert(PAGE_ADDR2BASE(addr) == addr); + assert(PAGE_CEILING(size) == size); +#ifdef JEMALLOC_MADVISE_DONTDUMP + return madvise(addr, size, MADV_DODUMP) != 0; +#else + return false; +#endif +} + + static size_t os_page_detect(void) { #ifdef _WIN32 SYSTEM_INFO si; GetSystemInfo(&si); return si.dwPageSize; +#elif defined(__FreeBSD__) + return getpagesize(); #else long result = sysconf(_SC_PAGESIZE); if (result == -1) { @@ -332,9 +408,19 @@ os_overcommits_sysctl(void) { size_t sz; sz = sizeof(vm_overcommit); +#if defined(__FreeBSD__) && defined(VM_OVERCOMMIT) + int mib[2]; + + mib[0] = CTL_VM; + mib[1] = VM_OVERCOMMIT; + if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) { + return false; /* Error. */ + } +#else if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) { return false; /* Error. */ } +#endif return ((vm_overcommit & 0x3) == 0); } @@ -350,27 +436,44 @@ static bool os_overcommits_proc(void) { int fd; char buf[1]; - ssize_t nread; #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) - fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY | - O_CLOEXEC); + #if defined(O_CLOEXEC) + fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY | + O_CLOEXEC); + #else + fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY); + if (fd != -1) { + fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); + } + #endif #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat) - fd = (int)syscall(SYS_openat, - AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); + #if defined(O_CLOEXEC) + fd = (int)syscall(SYS_openat, + AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); + #else + fd = (int)syscall(SYS_openat, + AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY); + if (fd != -1) { + fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); + } + #endif #else - fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); + #if defined(O_CLOEXEC) + fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); + #else + fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY); + if (fd != -1) { + fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); + } + #endif #endif + if (fd == -1) { return false; /* Error. */ } -#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read) - nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf)); -#else - nread = read(fd, &buf, sizeof(buf)); -#endif - + ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) syscall(SYS_close, fd); #else @@ -390,6 +493,71 @@ os_overcommits_proc(void) { } #endif +void +pages_set_thp_state (void *ptr, size_t size) { + if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) { + return; + } + assert(opt_thp != thp_mode_not_supported && + init_system_thp_mode != thp_mode_not_supported); + + if (opt_thp == thp_mode_always + && init_system_thp_mode != thp_mode_never) { + assert(init_system_thp_mode == thp_mode_default); + pages_huge_unaligned(ptr, size); + } else if (opt_thp == thp_mode_never) { + assert(init_system_thp_mode == thp_mode_default || + init_system_thp_mode == thp_mode_always); + pages_nohuge_unaligned(ptr, size); + } +} + +static void +init_thp_state(void) { + if (!have_madvise_huge) { + if (metadata_thp_enabled() && opt_abort) { + malloc_write(": no MADV_HUGEPAGE support\n"); + abort(); + } + goto label_error; + } + + static const char sys_state_madvise[] = "always [madvise] never\n"; + static const char sys_state_always[] = "[always] madvise never\n"; + static const char sys_state_never[] = "always madvise [never]\n"; + char buf[sizeof(sys_state_madvise)]; + +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) + int fd = (int)syscall(SYS_open, + "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); +#else + int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); +#endif + if (fd == -1) { + goto label_error; + } + + ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); +#if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) + syscall(SYS_close, fd); +#else + close(fd); +#endif + + if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) { + init_system_thp_mode = thp_mode_default; + } else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) { + init_system_thp_mode = thp_mode_always; + } else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) { + init_system_thp_mode = thp_mode_never; + } else { + goto label_error; + } + return; +label_error: + opt_thp = init_system_thp_mode = thp_mode_not_supported; +} + bool pages_boot(void) { os_page = os_page_detect(); @@ -418,5 +586,21 @@ pages_boot(void) { os_overcommits = false; #endif + init_thp_state(); + + /* Detect lazy purge runtime support. */ + if (pages_can_purge_lazy) { + bool committed = false; + void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed); + if (madv_free_page == NULL) { + return true; + } + assert(pages_can_purge_lazy_runtime); + if (pages_purge_lazy(madv_free_page, PAGE)) { + pages_can_purge_lazy_runtime = false; + } + os_pages_unmap(madv_free_page, PAGE); + } + return false; } diff --git a/contrib/jemalloc/src/prof.c b/contrib/jemalloc/src/prof.c index 975722c4c38..13df641a030 100644 --- a/contrib/jemalloc/src/prof.c +++ b/contrib/jemalloc/src/prof.c @@ -978,7 +978,7 @@ prof_dump_flush(bool propagate_err) { cassert(config_prof); - err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); + err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end); if (err == -1) { if (!propagate_err) { malloc_write(": write() failed during heap " @@ -1409,7 +1409,15 @@ prof_open_maps(const char *format, ...) { va_start(ap, format); malloc_vsnprintf(filename, sizeof(filename), format, ap); va_end(ap); + +#if defined(O_CLOEXEC) mfd = open(filename, O_RDONLY | O_CLOEXEC); +#else + mfd = open(filename, O_RDONLY); + if (mfd != -1) { + fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC); + } +#endif return mfd; } @@ -1463,8 +1471,9 @@ prof_dump_maps(bool propagate_err) { goto label_return; } } - nread = read(mfd, &prof_dump_buf[prof_dump_buf_end], - PROF_DUMP_BUFSIZE - prof_dump_buf_end); + nread = malloc_read_fd(mfd, + &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE + - prof_dump_buf_end); } while (nread > 0); } else { ret = true; @@ -1772,7 +1781,7 @@ prof_idump(tsdn_t *tsdn) { cassert(config_prof); - if (!prof_booted || tsdn_null(tsdn)) { + if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) { return; } tsd = tsdn_tsd(tsdn); @@ -1829,7 +1838,7 @@ prof_gdump(tsdn_t *tsdn) { cassert(config_prof); - if (!prof_booted || tsdn_null(tsdn)) { + if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) { return; } tsd = tsdn_tsd(tsdn); diff --git a/contrib/jemalloc/src/stats.c b/contrib/jemalloc/src/stats.c index 087df7676e9..08b9507cfe9 100644 --- a/contrib/jemalloc/src/stats.c +++ b/contrib/jemalloc/src/stats.c @@ -4,6 +4,7 @@ #include "jemalloc/internal/assert.h" #include "jemalloc/internal/ctl.h" +#include "jemalloc/internal/emitter.h" #include "jemalloc/internal/mutex.h" #include "jemalloc/internal/mutex_prof.h" @@ -84,41 +85,138 @@ gen_mutex_ctl_str(char *str, size_t buf_len, const char *prefix, } static void -read_arena_bin_mutex_stats(unsigned arena_ind, unsigned bin_ind, - uint64_t results[mutex_prof_num_counters]) { +mutex_stats_init_cols(emitter_row_t *row, const char *table_name, + emitter_col_t *name, + emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters], + emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) { + mutex_prof_uint64_t_counter_ind_t k_uint64_t = 0; + mutex_prof_uint32_t_counter_ind_t k_uint32_t = 0; + + emitter_col_t *col; + + if (name != NULL) { + emitter_col_init(name, row); + name->justify = emitter_justify_left; + name->width = 21; + name->type = emitter_type_title; + name->str_val = table_name; + } + +#define WIDTH_uint32_t 12 +#define WIDTH_uint64_t 16 +#define OP(counter, counter_type, human) \ + col = &col_##counter_type[k_##counter_type]; \ + ++k_##counter_type; \ + emitter_col_init(col, row); \ + col->justify = emitter_justify_right; \ + col->width = WIDTH_##counter_type; \ + col->type = emitter_type_title; \ + col->str_val = human; + MUTEX_PROF_COUNTERS +#undef OP +#undef WIDTH_uint32_t +#undef WIDTH_uint64_t +} + +static void +mutex_stats_read_global(const char *name, emitter_col_t *col_name, + emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters], + emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) { + char cmd[MUTEX_CTL_STR_MAX_LENGTH]; + + col_name->str_val = name; + + emitter_col_t *dst; +#define EMITTER_TYPE_uint32_t emitter_type_uint32 +#define EMITTER_TYPE_uint64_t emitter_type_uint64 +#define OP(counter, counter_type, human) \ + dst = &col_##counter_type[mutex_counter_##counter]; \ + dst->type = EMITTER_TYPE_##counter_type; \ + gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH, \ + "mutexes", name, #counter); \ + CTL_GET(cmd, (counter_type *)&dst->bool_val, counter_type); + MUTEX_PROF_COUNTERS +#undef OP +#undef EMITTER_TYPE_uint32_t +#undef EMITTER_TYPE_uint64_t +} + +static void +mutex_stats_read_arena(unsigned arena_ind, mutex_prof_arena_ind_t mutex_ind, + const char *name, emitter_col_t *col_name, + emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters], + emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) { + char cmd[MUTEX_CTL_STR_MAX_LENGTH]; + + col_name->str_val = name; + + emitter_col_t *dst; +#define EMITTER_TYPE_uint32_t emitter_type_uint32 +#define EMITTER_TYPE_uint64_t emitter_type_uint64 +#define OP(counter, counter_type, human) \ + dst = &col_##counter_type[mutex_counter_##counter]; \ + dst->type = EMITTER_TYPE_##counter_type; \ + gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH, \ + "arenas.0.mutexes", arena_mutex_names[mutex_ind], #counter);\ + CTL_M2_GET(cmd, arena_ind, \ + (counter_type *)&dst->bool_val, counter_type); + MUTEX_PROF_COUNTERS +#undef OP +#undef EMITTER_TYPE_uint32_t +#undef EMITTER_TYPE_uint64_t +} + +static void +mutex_stats_read_arena_bin(unsigned arena_ind, unsigned bin_ind, + emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters], + emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) { char cmd[MUTEX_CTL_STR_MAX_LENGTH]; -#define OP(c, t) \ - gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH, \ - "arenas.0.bins.0","mutex", #c); \ - CTL_M2_M4_GET(cmd, arena_ind, bin_ind, \ - (t *)&results[mutex_counter_##c], t); -MUTEX_PROF_COUNTERS + emitter_col_t *dst; + +#define EMITTER_TYPE_uint32_t emitter_type_uint32 +#define EMITTER_TYPE_uint64_t emitter_type_uint64 +#define OP(counter, counter_type, human) \ + dst = &col_##counter_type[mutex_counter_##counter]; \ + dst->type = EMITTER_TYPE_##counter_type; \ + gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH, \ + "arenas.0.bins.0","mutex", #counter); \ + CTL_M2_M4_GET(cmd, arena_ind, bin_ind, \ + (counter_type *)&dst->bool_val, counter_type); + MUTEX_PROF_COUNTERS #undef OP +#undef EMITTER_TYPE_uint32_t +#undef EMITTER_TYPE_uint64_t } +/* "row" can be NULL to avoid emitting in table mode. */ static void -mutex_stats_output_json(void (*write_cb)(void *, const char *), void *cbopaque, - const char *name, uint64_t stats[mutex_prof_num_counters], - const char *json_indent, bool last) { - malloc_cprintf(write_cb, cbopaque, "%s\"%s\": {\n", json_indent, name); - - mutex_prof_counter_ind_t k = 0; - char *fmt_str[2] = {"%s\t\"%s\": %"FMTu32"%s\n", - "%s\t\"%s\": %"FMTu64"%s\n"}; -#define OP(c, t) \ - malloc_cprintf(write_cb, cbopaque, \ - fmt_str[sizeof(t) / sizeof(uint32_t) - 1], \ - json_indent, #c, (t)stats[mutex_counter_##c], \ - (++k == mutex_prof_num_counters) ? "" : ","); -MUTEX_PROF_COUNTERS +mutex_stats_emit(emitter_t *emitter, emitter_row_t *row, + emitter_col_t col_uint64_t[mutex_prof_num_uint64_t_counters], + emitter_col_t col_uint32_t[mutex_prof_num_uint32_t_counters]) { + if (row != NULL) { + emitter_table_row(emitter, row); + } + + mutex_prof_uint64_t_counter_ind_t k_uint64_t = 0; + mutex_prof_uint32_t_counter_ind_t k_uint32_t = 0; + + emitter_col_t *col; + +#define EMITTER_TYPE_uint32_t emitter_type_uint32 +#define EMITTER_TYPE_uint64_t emitter_type_uint64 +#define OP(counter, type, human) \ + col = &col_##type[k_##type]; \ + ++k_##type; \ + emitter_json_kv(emitter, #counter, EMITTER_TYPE_##type, \ + (const void *)&col->bool_val); + MUTEX_PROF_COUNTERS; #undef OP - malloc_cprintf(write_cb, cbopaque, "%s}%s\n", json_indent, - last ? "" : ","); +#undef EMITTER_TYPE_uint32_t +#undef EMITTER_TYPE_uint64_t } static void -stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, - bool json, bool large, bool mutex, unsigned i) { +stats_arena_bins_print(emitter_t *emitter, bool mutex, unsigned i) { size_t page; bool in_gap, in_gap_prev; unsigned nbins, j; @@ -126,18 +224,71 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.page", &page, size_t); CTL_GET("arenas.nbins", &nbins, unsigned); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"bins\": [\n"); - } else { - char *mutex_counters = " n_lock_ops n_waiting" - " n_spin_acq total_wait_ns max_wait_ns\n"; - malloc_cprintf(write_cb, cbopaque, - "bins: size ind allocated nmalloc" - " ndalloc nrequests curregs curslabs regs" - " pgs util nfills nflushes newslabs" - " reslabs%s", mutex ? mutex_counters : "\n"); + + emitter_row_t header_row; + emitter_row_init(&header_row); + + emitter_row_t row; + emitter_row_init(&row); +#define COL(name, left_or_right, col_width, etype) \ + emitter_col_t col_##name; \ + emitter_col_init(&col_##name, &row); \ + col_##name.justify = emitter_justify_##left_or_right; \ + col_##name.width = col_width; \ + col_##name.type = emitter_type_##etype; \ + emitter_col_t header_col_##name; \ + emitter_col_init(&header_col_##name, &header_row); \ + header_col_##name.justify = emitter_justify_##left_or_right; \ + header_col_##name.width = col_width; \ + header_col_##name.type = emitter_type_title; \ + header_col_##name.str_val = #name; + + COL(size, right, 20, size) + COL(ind, right, 4, unsigned) + COL(allocated, right, 13, uint64) + COL(nmalloc, right, 13, uint64) + COL(ndalloc, right, 13, uint64) + COL(nrequests, right, 13, uint64) + COL(curregs, right, 13, size) + COL(curslabs, right, 13, size) + COL(regs, right, 5, unsigned) + COL(pgs, right, 4, size) + /* To buffer a right- and left-justified column. */ + COL(justify_spacer, right, 1, title) + COL(util, right, 6, title) + COL(nfills, right, 13, uint64) + COL(nflushes, right, 13, uint64) + COL(nslabs, right, 13, uint64) + COL(nreslabs, right, 13, uint64) +#undef COL + + /* Don't want to actually print the name. */ + header_col_justify_spacer.str_val = " "; + col_justify_spacer.str_val = " "; + + + emitter_col_t col_mutex64[mutex_prof_num_uint64_t_counters]; + emitter_col_t col_mutex32[mutex_prof_num_uint32_t_counters]; + + emitter_col_t header_mutex64[mutex_prof_num_uint64_t_counters]; + emitter_col_t header_mutex32[mutex_prof_num_uint32_t_counters]; + + if (mutex) { + mutex_stats_init_cols(&row, NULL, NULL, col_mutex64, + col_mutex32); + mutex_stats_init_cols(&header_row, NULL, NULL, header_mutex64, + header_mutex32); } + + /* + * We print a "bins:" header as part of the table row; we need to adjust + * the header size column to compensate. + */ + header_col_size.width -=5; + emitter_table_printf(emitter, "bins:"); + emitter_table_row(emitter, &header_row); + emitter_json_arr_begin(emitter, "bins"); + for (j = 0, in_gap = false; j < nbins; j++) { uint64_t nslabs; size_t reg_size, slab_size, curregs; @@ -151,8 +302,8 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, in_gap_prev = in_gap; in_gap = (nslabs == 0); - if (!json && in_gap_prev && !in_gap) { - malloc_cprintf(write_cb, cbopaque, + if (in_gap_prev && !in_gap) { + emitter_table_printf(emitter, " ---\n"); } @@ -177,105 +328,127 @@ stats_arena_bins_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_M2_M4_GET("stats.arenas.0.bins.0.curslabs", i, j, &curslabs, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t{\n" - "\t\t\t\t\t\t\"nmalloc\": %"FMTu64",\n" - "\t\t\t\t\t\t\"ndalloc\": %"FMTu64",\n" - "\t\t\t\t\t\t\"curregs\": %zu,\n" - "\t\t\t\t\t\t\"nrequests\": %"FMTu64",\n" - "\t\t\t\t\t\t\"nfills\": %"FMTu64",\n" - "\t\t\t\t\t\t\"nflushes\": %"FMTu64",\n" - "\t\t\t\t\t\t\"nreslabs\": %"FMTu64",\n" - "\t\t\t\t\t\t\"curslabs\": %zu%s\n", - nmalloc, ndalloc, curregs, nrequests, nfills, - nflushes, nreslabs, curslabs, mutex ? "," : ""); - if (mutex) { - uint64_t mutex_stats[mutex_prof_num_counters]; - read_arena_bin_mutex_stats(i, j, mutex_stats); - mutex_stats_output_json(write_cb, cbopaque, - "mutex", mutex_stats, "\t\t\t\t\t\t", true); - } - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t}%s\n", - (j + 1 < nbins) ? "," : ""); - } else if (!in_gap) { - size_t availregs = nregs * curslabs; - char util[6]; - if (get_rate_str((uint64_t)curregs, (uint64_t)availregs, - util)) { - if (availregs == 0) { - malloc_snprintf(util, sizeof(util), - "1"); - } else if (curregs > availregs) { - /* - * Race detected: the counters were read - * in separate mallctl calls and - * concurrent operations happened in - * between. In this case no meaningful - * utilization can be computed. - */ - malloc_snprintf(util, sizeof(util), - " race"); - } else { - not_reached(); - } - } - uint64_t mutex_stats[mutex_prof_num_counters]; - if (mutex) { - read_arena_bin_mutex_stats(i, j, mutex_stats); - } + if (mutex) { + mutex_stats_read_arena_bin(i, j, col_mutex64, + col_mutex32); + } + + emitter_json_arr_obj_begin(emitter); + emitter_json_kv(emitter, "nmalloc", emitter_type_uint64, + &nmalloc); + emitter_json_kv(emitter, "ndalloc", emitter_type_uint64, + &ndalloc); + emitter_json_kv(emitter, "curregs", emitter_type_size, + &curregs); + emitter_json_kv(emitter, "nrequests", emitter_type_uint64, + &nrequests); + emitter_json_kv(emitter, "nfills", emitter_type_uint64, + &nfills); + emitter_json_kv(emitter, "nflushes", emitter_type_uint64, + &nflushes); + emitter_json_kv(emitter, "nreslabs", emitter_type_uint64, + &nreslabs); + emitter_json_kv(emitter, "curslabs", emitter_type_size, + &curslabs); + if (mutex) { + emitter_json_dict_begin(emitter, "mutex"); + mutex_stats_emit(emitter, NULL, col_mutex64, + col_mutex32); + emitter_json_dict_end(emitter); + } + emitter_json_arr_obj_end(emitter); - malloc_cprintf(write_cb, cbopaque, "%20zu %3u %12zu %12" - FMTu64" %12"FMTu64" %12"FMTu64" %12zu %12zu %4u" - " %3zu %-5s %12"FMTu64" %12"FMTu64" %12"FMTu64 - " %12"FMTu64, reg_size, j, curregs * reg_size, - nmalloc, ndalloc, nrequests, curregs, curslabs, - nregs, slab_size / page, util, nfills, nflushes, - nslabs, nreslabs); - - /* Output less info for bin mutexes to save space. */ - if (mutex) { - malloc_cprintf(write_cb, cbopaque, - " %12"FMTu64" %12"FMTu64" %12"FMTu64 - " %14"FMTu64" %12"FMTu64"\n", - mutex_stats[mutex_counter_num_ops], - mutex_stats[mutex_counter_num_wait], - mutex_stats[mutex_counter_num_spin_acq], - mutex_stats[mutex_counter_total_wait_time], - mutex_stats[mutex_counter_max_wait_time]); + size_t availregs = nregs * curslabs; + char util[6]; + if (get_rate_str((uint64_t)curregs, (uint64_t)availregs, util)) + { + if (availregs == 0) { + malloc_snprintf(util, sizeof(util), "1"); + } else if (curregs > availregs) { + /* + * Race detected: the counters were read in + * separate mallctl calls and concurrent + * operations happened in between. In this case + * no meaningful utilization can be computed. + */ + malloc_snprintf(util, sizeof(util), " race"); } else { - malloc_cprintf(write_cb, cbopaque, "\n"); + not_reached(); } } + + col_size.size_val = reg_size; + col_ind.unsigned_val = j; + col_allocated.size_val = curregs * reg_size; + col_nmalloc.uint64_val = nmalloc; + col_ndalloc.uint64_val = ndalloc; + col_nrequests.uint64_val = nrequests; + col_curregs.size_val = curregs; + col_curslabs.size_val = curslabs; + col_regs.unsigned_val = nregs; + col_pgs.size_val = slab_size / page; + col_util.str_val = util; + col_nfills.uint64_val = nfills; + col_nflushes.uint64_val = nflushes; + col_nslabs.uint64_val = nslabs; + col_nreslabs.uint64_val = nreslabs; + + /* + * Note that mutex columns were initialized above, if mutex == + * true. + */ + + emitter_table_row(emitter, &row); } - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t]%s\n", large ? "," : ""); - } else { - if (in_gap) { - malloc_cprintf(write_cb, cbopaque, - " ---\n"); - } + emitter_json_arr_end(emitter); /* Close "bins". */ + + if (in_gap) { + emitter_table_printf(emitter, " ---\n"); } } static void -stats_arena_lextents_print(void (*write_cb)(void *, const char *), - void *cbopaque, bool json, unsigned i) { +stats_arena_lextents_print(emitter_t *emitter, unsigned i) { unsigned nbins, nlextents, j; bool in_gap, in_gap_prev; CTL_GET("arenas.nbins", &nbins, unsigned); CTL_GET("arenas.nlextents", &nlextents, unsigned); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"lextents\": [\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "large: size ind allocated nmalloc" - " ndalloc nrequests curlextents\n"); - } + + emitter_row_t header_row; + emitter_row_init(&header_row); + emitter_row_t row; + emitter_row_init(&row); + +#define COL(name, left_or_right, col_width, etype) \ + emitter_col_t header_##name; \ + emitter_col_init(&header_##name, &header_row); \ + header_##name.justify = emitter_justify_##left_or_right; \ + header_##name.width = col_width; \ + header_##name.type = emitter_type_title; \ + header_##name.str_val = #name; \ + \ + emitter_col_t col_##name; \ + emitter_col_init(&col_##name, &row); \ + col_##name.justify = emitter_justify_##left_or_right; \ + col_##name.width = col_width; \ + col_##name.type = emitter_type_##etype; + + COL(size, right, 20, size) + COL(ind, right, 4, unsigned) + COL(allocated, right, 13, size) + COL(nmalloc, right, 13, uint64) + COL(ndalloc, right, 13, uint64) + COL(nrequests, right, 13, uint64) + COL(curlextents, right, 13, size) +#undef COL + + /* As with bins, we label the large extents table. */ + header_size.width -= 6; + emitter_table_printf(emitter, "large:"); + emitter_table_row(emitter, &header_row); + emitter_json_arr_begin(emitter, "lextents"); + for (j = 0, in_gap = false; j < nlextents; j++) { uint64_t nmalloc, ndalloc, nrequests; size_t lextent_size, curlextents; @@ -289,119 +462,71 @@ stats_arena_lextents_print(void (*write_cb)(void *, const char *), in_gap_prev = in_gap; in_gap = (nrequests == 0); - if (!json && in_gap_prev && !in_gap) { - malloc_cprintf(write_cb, cbopaque, + if (in_gap_prev && !in_gap) { + emitter_table_printf(emitter, " ---\n"); } CTL_M2_GET("arenas.lextent.0.size", j, &lextent_size, size_t); CTL_M2_M4_GET("stats.arenas.0.lextents.0.curlextents", i, j, &curlextents, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t{\n" - "\t\t\t\t\t\t\"curlextents\": %zu\n" - "\t\t\t\t\t}%s\n", - curlextents, - (j + 1 < nlextents) ? "," : ""); - } else if (!in_gap) { - malloc_cprintf(write_cb, cbopaque, - "%20zu %3u %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64" %12zu\n", - lextent_size, nbins + j, - curlextents * lextent_size, nmalloc, ndalloc, - nrequests, curlextents); - } - } - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t]\n"); - } else { - if (in_gap) { - malloc_cprintf(write_cb, cbopaque, - " ---\n"); - } - } -} -static void -read_arena_mutex_stats(unsigned arena_ind, - uint64_t results[mutex_prof_num_arena_mutexes][mutex_prof_num_counters]) { - char cmd[MUTEX_CTL_STR_MAX_LENGTH]; - - mutex_prof_arena_ind_t i; - for (i = 0; i < mutex_prof_num_arena_mutexes; i++) { -#define OP(c, t) \ - gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH, \ - "arenas.0.mutexes", arena_mutex_names[i], #c); \ - CTL_M2_GET(cmd, arena_ind, \ - (t *)&results[i][mutex_counter_##c], t); -MUTEX_PROF_COUNTERS -#undef OP + emitter_json_arr_obj_begin(emitter); + emitter_json_kv(emitter, "curlextents", emitter_type_size, + &curlextents); + emitter_json_arr_obj_end(emitter); + + col_size.size_val = lextent_size; + col_ind.unsigned_val = nbins + j; + col_allocated.size_val = curlextents * lextent_size; + col_nmalloc.uint64_val = nmalloc; + col_ndalloc.uint64_val = ndalloc; + col_nrequests.uint64_val = nrequests; + col_curlextents.size_val = curlextents; + + if (!in_gap) { + emitter_table_row(emitter, &row); + } } -} - -static void -mutex_stats_output(void (*write_cb)(void *, const char *), void *cbopaque, - const char *name, uint64_t stats[mutex_prof_num_counters], - bool first_mutex) { - if (first_mutex) { - /* Print title. */ - malloc_cprintf(write_cb, cbopaque, - " n_lock_ops n_waiting" - " n_spin_acq n_owner_switch total_wait_ns" - " max_wait_ns max_n_thds\n"); + emitter_json_arr_end(emitter); /* Close "lextents". */ + if (in_gap) { + emitter_table_printf(emitter, " ---\n"); } - - malloc_cprintf(write_cb, cbopaque, "%s", name); - malloc_cprintf(write_cb, cbopaque, ":%*c", - (int)(20 - strlen(name)), ' '); - - char *fmt_str[2] = {"%12"FMTu32, "%16"FMTu64}; -#define OP(c, t) \ - malloc_cprintf(write_cb, cbopaque, \ - fmt_str[sizeof(t) / sizeof(uint32_t) - 1], \ - (t)stats[mutex_counter_##c]); -MUTEX_PROF_COUNTERS -#undef OP - malloc_cprintf(write_cb, cbopaque, "\n"); } static void -stats_arena_mutexes_print(void (*write_cb)(void *, const char *), - void *cbopaque, bool json, bool json_end, unsigned arena_ind) { - uint64_t mutex_stats[mutex_prof_num_arena_mutexes][mutex_prof_num_counters]; - read_arena_mutex_stats(arena_ind, mutex_stats); - - /* Output mutex stats. */ - if (json) { - malloc_cprintf(write_cb, cbopaque, "\t\t\t\t\"mutexes\": {\n"); - mutex_prof_arena_ind_t i, last_mutex; - last_mutex = mutex_prof_num_arena_mutexes - 1; - for (i = 0; i < mutex_prof_num_arena_mutexes; i++) { - mutex_stats_output_json(write_cb, cbopaque, - arena_mutex_names[i], mutex_stats[i], - "\t\t\t\t\t", (i == last_mutex)); - } - malloc_cprintf(write_cb, cbopaque, "\t\t\t\t}%s\n", - json_end ? "" : ","); - } else { - mutex_prof_arena_ind_t i; - for (i = 0; i < mutex_prof_num_arena_mutexes; i++) { - mutex_stats_output(write_cb, cbopaque, - arena_mutex_names[i], mutex_stats[i], i == 0); - } +stats_arena_mutexes_print(emitter_t *emitter, unsigned arena_ind) { + emitter_row_t row; + emitter_col_t col_name; + emitter_col_t col64[mutex_prof_num_uint64_t_counters]; + emitter_col_t col32[mutex_prof_num_uint32_t_counters]; + + emitter_row_init(&row); + mutex_stats_init_cols(&row, "", &col_name, col64, col32); + + emitter_json_dict_begin(emitter, "mutexes"); + emitter_table_row(emitter, &row); + + for (mutex_prof_arena_ind_t i = 0; i < mutex_prof_num_arena_mutexes; + i++) { + const char *name = arena_mutex_names[i]; + emitter_json_dict_begin(emitter, name); + mutex_stats_read_arena(arena_ind, i, name, &col_name, col64, + col32); + mutex_stats_emit(emitter, &row, col64, col32); + emitter_json_dict_end(emitter); /* Close the mutex dict. */ } + emitter_json_dict_end(emitter); /* End "mutexes". */ } static void -stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, - bool json, unsigned i, bool bins, bool large, bool mutex) { +stats_arena_print(emitter_t *emitter, unsigned i, bool bins, bool large, + bool mutex) { unsigned nthreads; const char *dss; ssize_t dirty_decay_ms, muzzy_decay_ms; size_t page, pactive, pdirty, pmuzzy, mapped, retained; - size_t base, internal, resident; + size_t base, internal, resident, metadata_thp; uint64_t dirty_npurge, dirty_nmadvise, dirty_purged; uint64_t muzzy_npurge, muzzy_nmadvise, muzzy_purged; size_t small_allocated; @@ -414,31 +539,16 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_GET("arenas.page", &page, size_t); CTL_M2_GET("stats.arenas.0.nthreads", i, &nthreads, unsigned); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"nthreads\": %u,\n", nthreads); - } else { - malloc_cprintf(write_cb, cbopaque, - "assigned threads: %u\n", nthreads); - } + emitter_kv(emitter, "nthreads", "assigned threads", + emitter_type_unsigned, &nthreads); CTL_M2_GET("stats.arenas.0.uptime", i, &uptime, uint64_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"uptime_ns\": %"FMTu64",\n", uptime); - } else { - malloc_cprintf(write_cb, cbopaque, - "uptime: %"FMTu64"\n", uptime); - } + emitter_kv(emitter, "uptime_ns", "uptime", emitter_type_uint64, + &uptime); CTL_M2_GET("stats.arenas.0.dss", i, &dss, const char *); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"dss\": \"%s\",\n", dss); - } else { - malloc_cprintf(write_cb, cbopaque, - "dss allocation precedence: %s\n", dss); - } + emitter_kv(emitter, "dss", "dss allocation precedence", + emitter_type_string, &dss); CTL_M2_GET("stats.arenas.0.dirty_decay_ms", i, &dirty_decay_ms, ssize_t); @@ -455,205 +565,271 @@ stats_arena_print(void (*write_cb)(void *, const char *), void *cbopaque, CTL_M2_GET("stats.arenas.0.muzzy_nmadvise", i, &muzzy_nmadvise, uint64_t); CTL_M2_GET("stats.arenas.0.muzzy_purged", i, &muzzy_purged, uint64_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"dirty_decay_ms\": %zd,\n", dirty_decay_ms); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"muzzy_decay_ms\": %zd,\n", muzzy_decay_ms); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"pactive\": %zu,\n", pactive); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"pdirty\": %zu,\n", pdirty); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"pmuzzy\": %zu,\n", pmuzzy); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"dirty_npurge\": %"FMTu64",\n", dirty_npurge); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"dirty_nmadvise\": %"FMTu64",\n", dirty_nmadvise); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"dirty_purged\": %"FMTu64",\n", dirty_purged); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"muzzy_npurge\": %"FMTu64",\n", muzzy_npurge); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"muzzy_nmadvise\": %"FMTu64",\n", muzzy_nmadvise); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"muzzy_purged\": %"FMTu64",\n", muzzy_purged); - } else { - malloc_cprintf(write_cb, cbopaque, - "decaying: time npages sweeps madvises" - " purged\n"); - if (dirty_decay_ms >= 0) { - malloc_cprintf(write_cb, cbopaque, - " dirty: %5zd %12zu %12"FMTu64" %12"FMTu64" %12" - FMTu64"\n", dirty_decay_ms, pdirty, dirty_npurge, - dirty_nmadvise, dirty_purged); - } else { - malloc_cprintf(write_cb, cbopaque, - " dirty: N/A %12zu %12"FMTu64" %12"FMTu64" %12" - FMTu64"\n", pdirty, dirty_npurge, dirty_nmadvise, - dirty_purged); - } - if (muzzy_decay_ms >= 0) { - malloc_cprintf(write_cb, cbopaque, - " muzzy: %5zd %12zu %12"FMTu64" %12"FMTu64" %12" - FMTu64"\n", muzzy_decay_ms, pmuzzy, muzzy_npurge, - muzzy_nmadvise, muzzy_purged); - } else { - malloc_cprintf(write_cb, cbopaque, - " muzzy: N/A %12zu %12"FMTu64" %12"FMTu64" %12" - FMTu64"\n", pmuzzy, muzzy_npurge, muzzy_nmadvise, - muzzy_purged); - } - } - CTL_M2_GET("stats.arenas.0.small.allocated", i, &small_allocated, - size_t); - CTL_M2_GET("stats.arenas.0.small.nmalloc", i, &small_nmalloc, uint64_t); - CTL_M2_GET("stats.arenas.0.small.ndalloc", i, &small_ndalloc, uint64_t); - CTL_M2_GET("stats.arenas.0.small.nrequests", i, &small_nrequests, - uint64_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"small\": {\n"); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"allocated\": %zu,\n", small_allocated); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", small_nmalloc); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", small_ndalloc); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", small_nrequests); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t},\n"); + emitter_row_t decay_row; + emitter_row_init(&decay_row); + + /* JSON-style emission. */ + emitter_json_kv(emitter, "dirty_decay_ms", emitter_type_ssize, + &dirty_decay_ms); + emitter_json_kv(emitter, "muzzy_decay_ms", emitter_type_ssize, + &muzzy_decay_ms); + + emitter_json_kv(emitter, "pactive", emitter_type_size, &pactive); + emitter_json_kv(emitter, "pdirty", emitter_type_size, &pdirty); + emitter_json_kv(emitter, "pmuzzy", emitter_type_size, &pmuzzy); + + emitter_json_kv(emitter, "dirty_npurge", emitter_type_uint64, + &dirty_npurge); + emitter_json_kv(emitter, "dirty_nmadvise", emitter_type_uint64, + &dirty_nmadvise); + emitter_json_kv(emitter, "dirty_purged", emitter_type_uint64, + &dirty_purged); + + emitter_json_kv(emitter, "muzzy_npurge", emitter_type_uint64, + &muzzy_npurge); + emitter_json_kv(emitter, "muzzy_nmadvise", emitter_type_uint64, + &muzzy_nmadvise); + emitter_json_kv(emitter, "muzzy_purged", emitter_type_uint64, + &muzzy_purged); + + /* Table-style emission. */ + emitter_col_t decay_type; + emitter_col_init(&decay_type, &decay_row); + decay_type.justify = emitter_justify_right; + decay_type.width = 9; + decay_type.type = emitter_type_title; + decay_type.str_val = "decaying:"; + + emitter_col_t decay_time; + emitter_col_init(&decay_time, &decay_row); + decay_time.justify = emitter_justify_right; + decay_time.width = 6; + decay_time.type = emitter_type_title; + decay_time.str_val = "time"; + + emitter_col_t decay_npages; + emitter_col_init(&decay_npages, &decay_row); + decay_npages.justify = emitter_justify_right; + decay_npages.width = 13; + decay_npages.type = emitter_type_title; + decay_npages.str_val = "npages"; + + emitter_col_t decay_sweeps; + emitter_col_init(&decay_sweeps, &decay_row); + decay_sweeps.justify = emitter_justify_right; + decay_sweeps.width = 13; + decay_sweeps.type = emitter_type_title; + decay_sweeps.str_val = "sweeps"; + + emitter_col_t decay_madvises; + emitter_col_init(&decay_madvises, &decay_row); + decay_madvises.justify = emitter_justify_right; + decay_madvises.width = 13; + decay_madvises.type = emitter_type_title; + decay_madvises.str_val = "madvises"; + + emitter_col_t decay_purged; + emitter_col_init(&decay_purged, &decay_row); + decay_purged.justify = emitter_justify_right; + decay_purged.width = 13; + decay_purged.type = emitter_type_title; + decay_purged.str_val = "purged"; + + /* Title row. */ + emitter_table_row(emitter, &decay_row); + + /* Dirty row. */ + decay_type.str_val = "dirty:"; + + if (dirty_decay_ms >= 0) { + decay_time.type = emitter_type_ssize; + decay_time.ssize_val = dirty_decay_ms; } else { - malloc_cprintf(write_cb, cbopaque, - " allocated nmalloc" - " ndalloc nrequests\n"); - malloc_cprintf(write_cb, cbopaque, - "small: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - small_allocated, small_nmalloc, small_ndalloc, - small_nrequests); + decay_time.type = emitter_type_title; + decay_time.str_val = "N/A"; } - CTL_M2_GET("stats.arenas.0.large.allocated", i, &large_allocated, - size_t); - CTL_M2_GET("stats.arenas.0.large.nmalloc", i, &large_nmalloc, uint64_t); - CTL_M2_GET("stats.arenas.0.large.ndalloc", i, &large_ndalloc, uint64_t); - CTL_M2_GET("stats.arenas.0.large.nrequests", i, &large_nrequests, - uint64_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"large\": {\n"); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"allocated\": %zu,\n", large_allocated); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"nmalloc\": %"FMTu64",\n", large_nmalloc); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"ndalloc\": %"FMTu64",\n", large_ndalloc); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"nrequests\": %"FMTu64"\n", large_nrequests); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t},\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "large: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - large_allocated, large_nmalloc, large_ndalloc, - large_nrequests); - malloc_cprintf(write_cb, cbopaque, - "total: %12zu %12"FMTu64" %12"FMTu64 - " %12"FMTu64"\n", - small_allocated + large_allocated, small_nmalloc + - large_nmalloc, small_ndalloc + large_ndalloc, - small_nrequests + large_nrequests); - } - if (!json) { - malloc_cprintf(write_cb, cbopaque, - "active: %12zu\n", pactive * page); - } + decay_npages.type = emitter_type_size; + decay_npages.size_val = pdirty; - CTL_M2_GET("stats.arenas.0.mapped", i, &mapped, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"mapped\": %zu,\n", mapped); - } else { - malloc_cprintf(write_cb, cbopaque, - "mapped: %12zu\n", mapped); - } + decay_sweeps.type = emitter_type_uint64; + decay_sweeps.uint64_val = dirty_npurge; - CTL_M2_GET("stats.arenas.0.retained", i, &retained, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"retained\": %zu,\n", retained); - } else { - malloc_cprintf(write_cb, cbopaque, - "retained: %12zu\n", retained); - } + decay_madvises.type = emitter_type_uint64; + decay_madvises.uint64_val = dirty_nmadvise; - CTL_M2_GET("stats.arenas.0.base", i, &base, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"base\": %zu,\n", base); - } else { - malloc_cprintf(write_cb, cbopaque, - "base: %12zu\n", base); - } + decay_purged.type = emitter_type_uint64; + decay_purged.uint64_val = dirty_purged; - CTL_M2_GET("stats.arenas.0.internal", i, &internal, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"internal\": %zu,\n", internal); - } else { - malloc_cprintf(write_cb, cbopaque, - "internal: %12zu\n", internal); - } + emitter_table_row(emitter, &decay_row); - CTL_M2_GET("stats.arenas.0.tcache_bytes", i, &tcache_bytes, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"tcache\": %zu,\n", tcache_bytes); - } else { - malloc_cprintf(write_cb, cbopaque, - "tcache: %12zu\n", tcache_bytes); - } + /* Muzzy row. */ + decay_type.str_val = "muzzy:"; - CTL_M2_GET("stats.arenas.0.resident", i, &resident, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"resident\": %zu%s\n", resident, - (bins || large || mutex) ? "," : ""); + if (muzzy_decay_ms >= 0) { + decay_time.type = emitter_type_ssize; + decay_time.ssize_val = muzzy_decay_ms; } else { - malloc_cprintf(write_cb, cbopaque, - "resident: %12zu\n", resident); + decay_time.type = emitter_type_title; + decay_time.str_val = "N/A"; } + decay_npages.type = emitter_type_size; + decay_npages.size_val = pmuzzy; + + decay_sweeps.type = emitter_type_uint64; + decay_sweeps.uint64_val = muzzy_npurge; + + decay_madvises.type = emitter_type_uint64; + decay_madvises.uint64_val = muzzy_nmadvise; + + decay_purged.type = emitter_type_uint64; + decay_purged.uint64_val = muzzy_purged; + + emitter_table_row(emitter, &decay_row); + + /* Small / large / total allocation counts. */ + emitter_row_t alloc_count_row; + emitter_row_init(&alloc_count_row); + + emitter_col_t alloc_count_title; + emitter_col_init(&alloc_count_title, &alloc_count_row); + alloc_count_title.justify = emitter_justify_left; + alloc_count_title.width = 25; + alloc_count_title.type = emitter_type_title; + alloc_count_title.str_val = ""; + + emitter_col_t alloc_count_allocated; + emitter_col_init(&alloc_count_allocated, &alloc_count_row); + alloc_count_allocated.justify = emitter_justify_right; + alloc_count_allocated.width = 12; + alloc_count_allocated.type = emitter_type_title; + alloc_count_allocated.str_val = "allocated"; + + emitter_col_t alloc_count_nmalloc; + emitter_col_init(&alloc_count_nmalloc, &alloc_count_row); + alloc_count_nmalloc.justify = emitter_justify_right; + alloc_count_nmalloc.width = 12; + alloc_count_nmalloc.type = emitter_type_title; + alloc_count_nmalloc.str_val = "nmalloc"; + + emitter_col_t alloc_count_ndalloc; + emitter_col_init(&alloc_count_ndalloc, &alloc_count_row); + alloc_count_ndalloc.justify = emitter_justify_right; + alloc_count_ndalloc.width = 12; + alloc_count_ndalloc.type = emitter_type_title; + alloc_count_ndalloc.str_val = "ndalloc"; + + emitter_col_t alloc_count_nrequests; + emitter_col_init(&alloc_count_nrequests, &alloc_count_row); + alloc_count_nrequests.justify = emitter_justify_right; + alloc_count_nrequests.width = 12; + alloc_count_nrequests.type = emitter_type_title; + alloc_count_nrequests.str_val = "nrequests"; + + emitter_table_row(emitter, &alloc_count_row); + +#define GET_AND_EMIT_ALLOC_STAT(small_or_large, name, valtype) \ + CTL_M2_GET("stats.arenas.0." #small_or_large "." #name, i, \ + &small_or_large##_##name, valtype##_t); \ + emitter_json_kv(emitter, #name, emitter_type_##valtype, \ + &small_or_large##_##name); \ + alloc_count_##name.type = emitter_type_##valtype; \ + alloc_count_##name.valtype##_val = small_or_large##_##name; + + emitter_json_dict_begin(emitter, "small"); + alloc_count_title.str_val = "small:"; + + GET_AND_EMIT_ALLOC_STAT(small, allocated, size) + GET_AND_EMIT_ALLOC_STAT(small, nmalloc, uint64) + GET_AND_EMIT_ALLOC_STAT(small, ndalloc, uint64) + GET_AND_EMIT_ALLOC_STAT(small, nrequests, uint64) + + emitter_table_row(emitter, &alloc_count_row); + emitter_json_dict_end(emitter); /* Close "small". */ + + emitter_json_dict_begin(emitter, "large"); + alloc_count_title.str_val = "large:"; + + GET_AND_EMIT_ALLOC_STAT(large, allocated, size) + GET_AND_EMIT_ALLOC_STAT(large, nmalloc, uint64) + GET_AND_EMIT_ALLOC_STAT(large, ndalloc, uint64) + GET_AND_EMIT_ALLOC_STAT(large, nrequests, uint64) + + emitter_table_row(emitter, &alloc_count_row); + emitter_json_dict_end(emitter); /* Close "large". */ + +#undef GET_AND_EMIT_ALLOC_STAT + + /* Aggregated small + large stats are emitter only in table mode. */ + alloc_count_title.str_val = "total:"; + alloc_count_allocated.size_val = small_allocated + large_allocated; + alloc_count_nmalloc.uint64_val = small_nmalloc + large_nmalloc; + alloc_count_ndalloc.uint64_val = small_ndalloc + large_ndalloc; + alloc_count_nrequests.uint64_val = small_nrequests + large_nrequests; + emitter_table_row(emitter, &alloc_count_row); + + emitter_row_t mem_count_row; + emitter_row_init(&mem_count_row); + + emitter_col_t mem_count_title; + emitter_col_init(&mem_count_title, &mem_count_row); + mem_count_title.justify = emitter_justify_left; + mem_count_title.width = 25; + mem_count_title.type = emitter_type_title; + mem_count_title.str_val = ""; + + emitter_col_t mem_count_val; + emitter_col_init(&mem_count_val, &mem_count_row); + mem_count_val.justify = emitter_justify_right; + mem_count_val.width = 12; + mem_count_val.type = emitter_type_title; + mem_count_val.str_val = ""; + + emitter_table_row(emitter, &mem_count_row); + mem_count_val.type = emitter_type_size; + + /* Active count in bytes is emitted only in table mode. */ + mem_count_title.str_val = "active:"; + mem_count_val.size_val = pactive * page; + emitter_table_row(emitter, &mem_count_row); + +#define GET_AND_EMIT_MEM_STAT(stat) \ + CTL_M2_GET("stats.arenas.0."#stat, i, &stat, size_t); \ + emitter_json_kv(emitter, #stat, emitter_type_size, &stat); \ + mem_count_title.str_val = #stat":"; \ + mem_count_val.size_val = stat; \ + emitter_table_row(emitter, &mem_count_row); + + GET_AND_EMIT_MEM_STAT(mapped) + GET_AND_EMIT_MEM_STAT(retained) + GET_AND_EMIT_MEM_STAT(base) + GET_AND_EMIT_MEM_STAT(internal) + GET_AND_EMIT_MEM_STAT(metadata_thp) + GET_AND_EMIT_MEM_STAT(tcache_bytes) + GET_AND_EMIT_MEM_STAT(resident) +#undef GET_AND_EMIT_MEM_STAT + if (mutex) { - stats_arena_mutexes_print(write_cb, cbopaque, json, - !(bins || large), i); + stats_arena_mutexes_print(emitter, i); } if (bins) { - stats_arena_bins_print(write_cb, cbopaque, json, large, mutex, - i); + stats_arena_bins_print(emitter, mutex, i); } if (large) { - stats_arena_lextents_print(write_cb, cbopaque, json, i); + stats_arena_lextents_print(emitter, i); } } static void -stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque, - bool json, bool more) { +stats_general_print(emitter_t *emitter) { const char *cpv; - bool bv; + bool bv, bv2; unsigned uv; uint32_t u32v; uint64_t u64v; - ssize_t ssv; + ssize_t ssv, ssv2; size_t sv, bsz, usz, ssz, sssz, cpsz; bsz = sizeof(bool); @@ -663,365 +839,248 @@ stats_general_print(void (*write_cb)(void *, const char *), void *cbopaque, cpsz = sizeof(const char *); CTL_GET("version", &cpv, const char *); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"version\": \"%s\",\n", cpv); - } else { - malloc_cprintf(write_cb, cbopaque, "Version: %s\n", cpv); - } + emitter_kv(emitter, "version", "Version", emitter_type_string, &cpv); /* config. */ -#define CONFIG_WRITE_BOOL_JSON(n, c) \ - if (json) { \ - CTL_GET("config."#n, &bv, bool); \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %s%s\n", bv ? "true" : "false", \ - (c)); \ + emitter_dict_begin(emitter, "config", "Build-time option settings"); +#define CONFIG_WRITE_BOOL(name) \ + do { \ + CTL_GET("config."#name, &bv, bool); \ + emitter_kv(emitter, #name, "config."#name, \ + emitter_type_bool, &bv); \ + } while (0) + + CONFIG_WRITE_BOOL(cache_oblivious); + CONFIG_WRITE_BOOL(debug); + CONFIG_WRITE_BOOL(fill); + CONFIG_WRITE_BOOL(lazy_lock); + emitter_kv(emitter, "malloc_conf", "config.malloc_conf", + emitter_type_string, &config_malloc_conf); + + CONFIG_WRITE_BOOL(prof); + CONFIG_WRITE_BOOL(prof_libgcc); + CONFIG_WRITE_BOOL(prof_libunwind); + CONFIG_WRITE_BOOL(stats); + CONFIG_WRITE_BOOL(utrace); + CONFIG_WRITE_BOOL(xmalloc); +#undef CONFIG_WRITE_BOOL + emitter_dict_end(emitter); /* Close "config" dict. */ + + /* opt. */ +#define OPT_WRITE(name, var, size, emitter_type) \ + if (je_mallctl("opt."name, (void *)&var, &size, NULL, 0) == \ + 0) { \ + emitter_kv(emitter, name, "opt."name, emitter_type, \ + &var); \ } - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"config\": {\n"); +#define OPT_WRITE_MUTABLE(name, var1, var2, size, emitter_type, \ + altname) \ + if (je_mallctl("opt."name, (void *)&var1, &size, NULL, 0) == \ + 0 && je_mallctl(altname, (void *)&var2, &size, NULL, 0) \ + == 0) { \ + emitter_kv_note(emitter, name, "opt."name, \ + emitter_type, &var1, altname, emitter_type, \ + &var2); \ } - CONFIG_WRITE_BOOL_JSON(cache_oblivious, ",") +#define OPT_WRITE_BOOL(name) OPT_WRITE(name, bv, bsz, emitter_type_bool) +#define OPT_WRITE_BOOL_MUTABLE(name, altname) \ + OPT_WRITE_MUTABLE(name, bv, bv2, bsz, emitter_type_bool, altname) + +#define OPT_WRITE_UNSIGNED(name) \ + OPT_WRITE(name, uv, usz, emitter_type_unsigned) + +#define OPT_WRITE_SSIZE_T(name) \ + OPT_WRITE(name, ssv, sssz, emitter_type_ssize) +#define OPT_WRITE_SSIZE_T_MUTABLE(name, altname) \ + OPT_WRITE_MUTABLE(name, ssv, ssv2, sssz, emitter_type_ssize, \ + altname) + +#define OPT_WRITE_CHAR_P(name) \ + OPT_WRITE(name, cpv, cpsz, emitter_type_string) + + emitter_dict_begin(emitter, "opt", "Run-time option settings"); + + OPT_WRITE_BOOL("abort") + OPT_WRITE_BOOL("abort_conf") + OPT_WRITE_BOOL("retain") + OPT_WRITE_CHAR_P("dss") + OPT_WRITE_UNSIGNED("narenas") + OPT_WRITE_CHAR_P("percpu_arena") + OPT_WRITE_CHAR_P("metadata_thp") + OPT_WRITE_BOOL_MUTABLE("background_thread", "background_thread") + OPT_WRITE_SSIZE_T_MUTABLE("dirty_decay_ms", "arenas.dirty_decay_ms") + OPT_WRITE_SSIZE_T_MUTABLE("muzzy_decay_ms", "arenas.muzzy_decay_ms") + OPT_WRITE_UNSIGNED("lg_extent_max_active_fit") + OPT_WRITE_CHAR_P("junk") + OPT_WRITE_BOOL("zero") + OPT_WRITE_BOOL("utrace") + OPT_WRITE_BOOL("xmalloc") + OPT_WRITE_BOOL("tcache") + OPT_WRITE_SSIZE_T("lg_tcache_max") + OPT_WRITE_CHAR_P("thp") + OPT_WRITE_BOOL("prof") + OPT_WRITE_CHAR_P("prof_prefix") + OPT_WRITE_BOOL_MUTABLE("prof_active", "prof.active") + OPT_WRITE_BOOL_MUTABLE("prof_thread_active_init", + "prof.thread_active_init") + OPT_WRITE_SSIZE_T_MUTABLE("lg_prof_sample", "prof.lg_sample") + OPT_WRITE_BOOL("prof_accum") + OPT_WRITE_SSIZE_T("lg_prof_interval") + OPT_WRITE_BOOL("prof_gdump") + OPT_WRITE_BOOL("prof_final") + OPT_WRITE_BOOL("prof_leak") + OPT_WRITE_BOOL("stats_print") + OPT_WRITE_CHAR_P("stats_print_opts") + + emitter_dict_end(emitter); + +#undef OPT_WRITE +#undef OPT_WRITE_MUTABLE +#undef OPT_WRITE_BOOL +#undef OPT_WRITE_BOOL_MUTABLE +#undef OPT_WRITE_UNSIGNED +#undef OPT_WRITE_SSIZE_T +#undef OPT_WRITE_SSIZE_T_MUTABLE +#undef OPT_WRITE_CHAR_P + + /* prof. */ + if (config_prof) { + emitter_dict_begin(emitter, "prof", "Profiling settings"); - CTL_GET("config.debug", &bv, bool); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"debug\": %s,\n", bv ? "true" : "false"); - } else { - malloc_cprintf(write_cb, cbopaque, "Assertions %s\n", - bv ? "enabled" : "disabled"); - } + CTL_GET("prof.thread_active_init", &bv, bool); + emitter_kv(emitter, "thread_active_init", + "prof.thread_active_init", emitter_type_bool, &bv); - CONFIG_WRITE_BOOL_JSON(fill, ",") - CONFIG_WRITE_BOOL_JSON(lazy_lock, ",") + CTL_GET("prof.active", &bv, bool); + emitter_kv(emitter, "active", "prof.active", emitter_type_bool, + &bv); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"malloc_conf\": \"%s\",\n", - config_malloc_conf); - } else { - malloc_cprintf(write_cb, cbopaque, - "config.malloc_conf: \"%s\"\n", config_malloc_conf); - } + CTL_GET("prof.gdump", &bv, bool); + emitter_kv(emitter, "gdump", "prof.gdump", emitter_type_bool, + &bv); - CONFIG_WRITE_BOOL_JSON(prof, ",") - CONFIG_WRITE_BOOL_JSON(prof_libgcc, ",") - CONFIG_WRITE_BOOL_JSON(prof_libunwind, ",") - CONFIG_WRITE_BOOL_JSON(stats, ",") - CONFIG_WRITE_BOOL_JSON(thp, ",") - CONFIG_WRITE_BOOL_JSON(utrace, ",") - CONFIG_WRITE_BOOL_JSON(xmalloc, "") - - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t},\n"); - } -#undef CONFIG_WRITE_BOOL_JSON + CTL_GET("prof.interval", &u64v, uint64_t); + emitter_kv(emitter, "interval", "prof.interval", + emitter_type_uint64, &u64v); - /* opt. */ -#define OPT_WRITE_BOOL(n, c) \ - if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %s%s\n", bv ? "true" : \ - "false", (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %s\n", bv ? "true" : "false"); \ - } \ - } -#define OPT_WRITE_BOOL_MUTABLE(n, m, c) { \ - bool bv2; \ - if (je_mallctl("opt."#n, (void *)&bv, &bsz, NULL, 0) == 0 && \ - je_mallctl(#m, (void *)&bv2, &bsz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %s%s\n", bv ? "true" : \ - "false", (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %s ("#m": %s)\n", bv ? "true" \ - : "false", bv2 ? "true" : "false"); \ - } \ - } \ -} -#define OPT_WRITE_UNSIGNED(n, c) \ - if (je_mallctl("opt."#n, (void *)&uv, &usz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %u%s\n", uv, (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %u\n", uv); \ - } \ - } -#define OPT_WRITE_SSIZE_T(n, c) \ - if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %zd%s\n", ssv, (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %zd\n", ssv); \ - } \ - } -#define OPT_WRITE_SSIZE_T_MUTABLE(n, m, c) { \ - ssize_t ssv2; \ - if (je_mallctl("opt."#n, (void *)&ssv, &sssz, NULL, 0) == 0 && \ - je_mallctl(#m, (void *)&ssv2, &sssz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": %zd%s\n", ssv, (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": %zd ("#m": %zd)\n", \ - ssv, ssv2); \ - } \ - } \ -} -#define OPT_WRITE_CHAR_P(n, c) \ - if (je_mallctl("opt."#n, (void *)&cpv, &cpsz, NULL, 0) == 0) { \ - if (json) { \ - malloc_cprintf(write_cb, cbopaque, \ - "\t\t\t\""#n"\": \"%s\"%s\n", cpv, (c)); \ - } else { \ - malloc_cprintf(write_cb, cbopaque, \ - " opt."#n": \"%s\"\n", cpv); \ - } \ - } + CTL_GET("prof.lg_sample", &ssv, ssize_t); + emitter_kv(emitter, "lg_sample", "prof.lg_sample", + emitter_type_ssize, &ssv); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"opt\": {\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "Run-time option settings:\n"); - } - OPT_WRITE_BOOL(abort, ",") - OPT_WRITE_BOOL(abort_conf, ",") - OPT_WRITE_BOOL(retain, ",") - OPT_WRITE_CHAR_P(dss, ",") - OPT_WRITE_UNSIGNED(narenas, ",") - OPT_WRITE_CHAR_P(percpu_arena, ",") - OPT_WRITE_BOOL_MUTABLE(background_thread, background_thread, ",") - OPT_WRITE_SSIZE_T_MUTABLE(dirty_decay_ms, arenas.dirty_decay_ms, ",") - OPT_WRITE_SSIZE_T_MUTABLE(muzzy_decay_ms, arenas.muzzy_decay_ms, ",") - OPT_WRITE_CHAR_P(junk, ",") - OPT_WRITE_BOOL(zero, ",") - OPT_WRITE_BOOL(utrace, ",") - OPT_WRITE_BOOL(xmalloc, ",") - OPT_WRITE_BOOL(tcache, ",") - OPT_WRITE_SSIZE_T(lg_tcache_max, ",") - OPT_WRITE_BOOL(prof, ",") - OPT_WRITE_CHAR_P(prof_prefix, ",") - OPT_WRITE_BOOL_MUTABLE(prof_active, prof.active, ",") - OPT_WRITE_BOOL_MUTABLE(prof_thread_active_init, prof.thread_active_init, - ",") - OPT_WRITE_SSIZE_T_MUTABLE(lg_prof_sample, prof.lg_sample, ",") - OPT_WRITE_BOOL(prof_accum, ",") - OPT_WRITE_SSIZE_T(lg_prof_interval, ",") - OPT_WRITE_BOOL(prof_gdump, ",") - OPT_WRITE_BOOL(prof_final, ",") - OPT_WRITE_BOOL(prof_leak, ",") - OPT_WRITE_BOOL(stats_print, ",") - if (json || opt_stats_print) { - /* - * stats_print_opts is always emitted for JSON, so as long as it - * comes last it's safe to unconditionally omit the comma here - * (rather than having to conditionally omit it elsewhere - * depending on configuration). - */ - OPT_WRITE_CHAR_P(stats_print_opts, "") - } - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t},\n"); + emitter_dict_end(emitter); /* Close "prof". */ } -#undef OPT_WRITE_BOOL -#undef OPT_WRITE_BOOL_MUTABLE -#undef OPT_WRITE_SSIZE_T -#undef OPT_WRITE_CHAR_P - /* arenas. */ - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"arenas\": {\n"); - } + /* + * The json output sticks arena info into an "arenas" dict; the table + * output puts them at the top-level. + */ + emitter_json_dict_begin(emitter, "arenas"); CTL_GET("arenas.narenas", &uv, unsigned); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"narenas\": %u,\n", uv); - } else { - malloc_cprintf(write_cb, cbopaque, "Arenas: %u\n", uv); - } + emitter_kv(emitter, "narenas", "Arenas", emitter_type_unsigned, &uv); - if (json) { - CTL_GET("arenas.dirty_decay_ms", &ssv, ssize_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"dirty_decay_ms\": %zd,\n", ssv); + /* + * Decay settings are emitted only in json mode; in table mode, they're + * emitted as notes with the opt output, above. + */ + CTL_GET("arenas.dirty_decay_ms", &ssv, ssize_t); + emitter_json_kv(emitter, "dirty_decay_ms", emitter_type_ssize, &ssv); - CTL_GET("arenas.muzzy_decay_ms", &ssv, ssize_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"muzzy_decay_ms\": %zd,\n", ssv); - } + CTL_GET("arenas.muzzy_decay_ms", &ssv, ssize_t); + emitter_json_kv(emitter, "muzzy_decay_ms", emitter_type_ssize, &ssv); CTL_GET("arenas.quantum", &sv, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"quantum\": %zu,\n", sv); - } else { - malloc_cprintf(write_cb, cbopaque, "Quantum size: %zu\n", sv); - } + emitter_kv(emitter, "quantum", "Quantum size", emitter_type_size, &sv); CTL_GET("arenas.page", &sv, size_t); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"page\": %zu,\n", sv); - } else { - malloc_cprintf(write_cb, cbopaque, "Page size: %zu\n", sv); - } + emitter_kv(emitter, "page", "Page size", emitter_type_size, &sv); if (je_mallctl("arenas.tcache_max", (void *)&sv, &ssz, NULL, 0) == 0) { - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"tcache_max\": %zu,\n", sv); - } else { - malloc_cprintf(write_cb, cbopaque, - "Maximum thread-cached size class: %zu\n", sv); - } + emitter_kv(emitter, "tcache_max", + "Maximum thread-cached size class", emitter_type_size, &sv); } - if (json) { - unsigned nbins, nlextents, i; - - CTL_GET("arenas.nbins", &nbins, unsigned); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"nbins\": %u,\n", nbins); + unsigned nbins; + CTL_GET("arenas.nbins", &nbins, unsigned); + emitter_kv(emitter, "nbins", "Number of bin size classes", + emitter_type_unsigned, &nbins); - CTL_GET("arenas.nhbins", &uv, unsigned); - malloc_cprintf(write_cb, cbopaque, "\t\t\t\"nhbins\": %u,\n", - uv); + unsigned nhbins; + CTL_GET("arenas.nhbins", &nhbins, unsigned); + emitter_kv(emitter, "nhbins", "Number of thread-cache bin size classes", + emitter_type_unsigned, &nhbins); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"bin\": [\n"); - for (i = 0; i < nbins; i++) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t{\n"); + /* + * We do enough mallctls in a loop that we actually want to omit them + * (not just omit the printing). + */ + if (emitter->output == emitter_output_json) { + emitter_json_arr_begin(emitter, "bin"); + for (unsigned i = 0; i < nbins; i++) { + emitter_json_arr_obj_begin(emitter); CTL_M2_GET("arenas.bin.0.size", i, &sv, size_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"size\": %zu,\n", sv); + emitter_json_kv(emitter, "size", emitter_type_size, + &sv); CTL_M2_GET("arenas.bin.0.nregs", i, &u32v, uint32_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"nregs\": %"FMTu32",\n", u32v); + emitter_json_kv(emitter, "nregs", emitter_type_uint32, + &u32v); CTL_M2_GET("arenas.bin.0.slab_size", i, &sv, size_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"slab_size\": %zu\n", sv); + emitter_json_kv(emitter, "slab_size", emitter_type_size, + &sv); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t}%s\n", (i + 1 < nbins) ? "," : ""); + emitter_json_arr_obj_end(emitter); } - malloc_cprintf(write_cb, cbopaque, - "\t\t\t],\n"); + emitter_json_arr_end(emitter); /* Close "bin". */ + } - CTL_GET("arenas.nlextents", &nlextents, unsigned); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"nlextents\": %u,\n", nlextents); + unsigned nlextents; + CTL_GET("arenas.nlextents", &nlextents, unsigned); + emitter_kv(emitter, "nlextents", "Number of large size classes", + emitter_type_unsigned, &nlextents); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"lextent\": [\n"); - for (i = 0; i < nlextents; i++) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t{\n"); + if (emitter->output == emitter_output_json) { + emitter_json_arr_begin(emitter, "lextent"); + for (unsigned i = 0; i < nlextents; i++) { + emitter_json_arr_obj_begin(emitter); CTL_M2_GET("arenas.lextent.0.size", i, &sv, size_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\t\"size\": %zu\n", sv); + emitter_json_kv(emitter, "size", emitter_type_size, + &sv); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t}%s\n", (i + 1 < nlextents) ? "," : ""); + emitter_json_arr_obj_end(emitter); } - malloc_cprintf(write_cb, cbopaque, - "\t\t\t]\n"); - - malloc_cprintf(write_cb, cbopaque, - "\t\t}%s\n", (config_prof || more) ? "," : ""); - } - - /* prof. */ - if (config_prof && json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"prof\": {\n"); - - CTL_GET("prof.thread_active_init", &bv, bool); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"thread_active_init\": %s,\n", bv ? "true" : - "false"); - - CTL_GET("prof.active", &bv, bool); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"active\": %s,\n", bv ? "true" : "false"); - - CTL_GET("prof.gdump", &bv, bool); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"gdump\": %s,\n", bv ? "true" : "false"); - - CTL_GET("prof.interval", &u64v, uint64_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"interval\": %"FMTu64",\n", u64v); - - CTL_GET("prof.lg_sample", &ssv, ssize_t); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"lg_sample\": %zd\n", ssv); - - malloc_cprintf(write_cb, cbopaque, - "\t\t}%s\n", more ? "," : ""); + emitter_json_arr_end(emitter); /* Close "lextent". */ } -} - -static void -read_global_mutex_stats( - uint64_t results[mutex_prof_num_global_mutexes][mutex_prof_num_counters]) { - char cmd[MUTEX_CTL_STR_MAX_LENGTH]; - mutex_prof_global_ind_t i; - for (i = 0; i < mutex_prof_num_global_mutexes; i++) { -#define OP(c, t) \ - gen_mutex_ctl_str(cmd, MUTEX_CTL_STR_MAX_LENGTH, \ - "mutexes", global_mutex_names[i], #c); \ - CTL_GET(cmd, (t *)&results[i][mutex_counter_##c], t); -MUTEX_PROF_COUNTERS -#undef OP - } + emitter_json_dict_end(emitter); /* Close "arenas" */ } static void -stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque, - bool json, bool merged, bool destroyed, bool unmerged, bool bins, - bool large, bool mutex) { - size_t allocated, active, metadata, resident, mapped, retained; +stats_print_helper(emitter_t *emitter, bool merged, bool destroyed, + bool unmerged, bool bins, bool large, bool mutex) { + /* + * These should be deleted. We keep them around for a while, to aid in + * the transition to the emitter code. + */ + size_t allocated, active, metadata, metadata_thp, resident, mapped, + retained; size_t num_background_threads; uint64_t background_thread_num_runs, background_thread_run_interval; CTL_GET("stats.allocated", &allocated, size_t); CTL_GET("stats.active", &active, size_t); CTL_GET("stats.metadata", &metadata, size_t); + CTL_GET("stats.metadata_thp", &metadata_thp, size_t); CTL_GET("stats.resident", &resident, size_t); CTL_GET("stats.mapped", &mapped, size_t); CTL_GET("stats.retained", &retained, size_t); - uint64_t mutex_stats[mutex_prof_num_global_mutexes][mutex_prof_num_counters]; - if (mutex) { - read_global_mutex_stats(mutex_stats); - } - if (have_background_thread) { CTL_GET("stats.background_thread.num_threads", &num_background_threads, size_t); @@ -1035,182 +1094,130 @@ stats_print_helper(void (*write_cb)(void *, const char *), void *cbopaque, background_thread_run_interval = 0; } - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"stats\": {\n"); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"allocated\": %zu,\n", allocated); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"active\": %zu,\n", active); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"metadata\": %zu,\n", metadata); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"resident\": %zu,\n", resident); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"mapped\": %zu,\n", mapped); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"retained\": %zu,\n", retained); - - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"background_thread\": {\n"); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"num_threads\": %zu,\n", num_background_threads); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"num_runs\": %"FMTu64",\n", - background_thread_num_runs); - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\t\"run_interval\": %"FMTu64"\n", - background_thread_run_interval); - malloc_cprintf(write_cb, cbopaque, "\t\t\t}%s\n", - mutex ? "," : ""); + /* Generic global stats. */ + emitter_json_dict_begin(emitter, "stats"); + emitter_json_kv(emitter, "allocated", emitter_type_size, &allocated); + emitter_json_kv(emitter, "active", emitter_type_size, &active); + emitter_json_kv(emitter, "metadata", emitter_type_size, &metadata); + emitter_json_kv(emitter, "metadata_thp", emitter_type_size, + &metadata_thp); + emitter_json_kv(emitter, "resident", emitter_type_size, &resident); + emitter_json_kv(emitter, "mapped", emitter_type_size, &mapped); + emitter_json_kv(emitter, "retained", emitter_type_size, &retained); + + emitter_table_printf(emitter, "Allocated: %zu, active: %zu, " + "metadata: %zu (n_thp %zu), resident: %zu, mapped: %zu, " + "retained: %zu\n", allocated, active, metadata, metadata_thp, + resident, mapped, retained); + + /* Background thread stats. */ + emitter_json_dict_begin(emitter, "background_thread"); + emitter_json_kv(emitter, "num_threads", emitter_type_size, + &num_background_threads); + emitter_json_kv(emitter, "num_runs", emitter_type_uint64, + &background_thread_num_runs); + emitter_json_kv(emitter, "run_interval", emitter_type_uint64, + &background_thread_run_interval); + emitter_json_dict_end(emitter); /* Close "background_thread". */ + + emitter_table_printf(emitter, "Background threads: %zu, " + "num_runs: %"FMTu64", run_interval: %"FMTu64" ns\n", + num_background_threads, background_thread_num_runs, + background_thread_run_interval); - if (mutex) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"mutexes\": {\n"); - mutex_prof_global_ind_t i; - for (i = 0; i < mutex_prof_num_global_mutexes; i++) { - mutex_stats_output_json(write_cb, cbopaque, - global_mutex_names[i], mutex_stats[i], - "\t\t\t\t", - i == mutex_prof_num_global_mutexes - 1); - } - malloc_cprintf(write_cb, cbopaque, "\t\t\t}\n"); - } - malloc_cprintf(write_cb, cbopaque, - "\t\t}%s\n", (merged || unmerged || destroyed) ? "," : ""); - } else { - malloc_cprintf(write_cb, cbopaque, - "Allocated: %zu, active: %zu, metadata: %zu," - " resident: %zu, mapped: %zu, retained: %zu\n", - allocated, active, metadata, resident, mapped, retained); - - if (have_background_thread && num_background_threads > 0) { - malloc_cprintf(write_cb, cbopaque, - "Background threads: %zu, num_runs: %"FMTu64", " - "run_interval: %"FMTu64" ns\n", - num_background_threads, - background_thread_num_runs, - background_thread_run_interval); - } - if (mutex) { - mutex_prof_global_ind_t i; - for (i = 0; i < mutex_prof_num_global_mutexes; i++) { - mutex_stats_output(write_cb, cbopaque, - global_mutex_names[i], mutex_stats[i], - i == 0); - } + if (mutex) { + emitter_row_t row; + emitter_col_t name; + emitter_col_t col64[mutex_prof_num_uint64_t_counters]; + emitter_col_t col32[mutex_prof_num_uint32_t_counters]; + + emitter_row_init(&row); + mutex_stats_init_cols(&row, "", &name, col64, col32); + + emitter_table_row(emitter, &row); + emitter_json_dict_begin(emitter, "mutexes"); + + for (int i = 0; i < mutex_prof_num_global_mutexes; i++) { + mutex_stats_read_global(global_mutex_names[i], &name, + col64, col32); + emitter_json_dict_begin(emitter, global_mutex_names[i]); + mutex_stats_emit(emitter, &row, col64, col32); + emitter_json_dict_end(emitter); } + + emitter_json_dict_end(emitter); /* Close "mutexes". */ } + emitter_json_dict_end(emitter); /* Close "stats". */ + if (merged || destroyed || unmerged) { unsigned narenas; - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\"stats.arenas\": {\n"); - } + emitter_json_dict_begin(emitter, "stats.arenas"); CTL_GET("arenas.narenas", &narenas, unsigned); - { - size_t mib[3]; - size_t miblen = sizeof(mib) / sizeof(size_t); - size_t sz; - VARIABLE_ARRAY(bool, initialized, narenas); - bool destroyed_initialized; - unsigned i, j, ninitialized; - - xmallctlnametomib("arena.0.initialized", mib, &miblen); - for (i = ninitialized = 0; i < narenas; i++) { - mib[1] = i; - sz = sizeof(bool); - xmallctlbymib(mib, miblen, &initialized[i], &sz, - NULL, 0); - if (initialized[i]) { - ninitialized++; - } - } - mib[1] = MALLCTL_ARENAS_DESTROYED; + size_t mib[3]; + size_t miblen = sizeof(mib) / sizeof(size_t); + size_t sz; + VARIABLE_ARRAY(bool, initialized, narenas); + bool destroyed_initialized; + unsigned i, j, ninitialized; + + xmallctlnametomib("arena.0.initialized", mib, &miblen); + for (i = ninitialized = 0; i < narenas; i++) { + mib[1] = i; sz = sizeof(bool); - xmallctlbymib(mib, miblen, &destroyed_initialized, &sz, + xmallctlbymib(mib, miblen, &initialized[i], &sz, NULL, 0); - - /* Merged stats. */ - if (merged && (ninitialized > 1 || !unmerged)) { - /* Print merged arena stats. */ - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"merged\": {\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "\nMerged arenas stats:\n"); - } - stats_arena_print(write_cb, cbopaque, json, - MALLCTL_ARENAS_ALL, bins, large, mutex); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t}%s\n", - ((destroyed_initialized && - destroyed) || unmerged) ? "," : - ""); - } + if (initialized[i]) { + ninitialized++; } + } + mib[1] = MALLCTL_ARENAS_DESTROYED; + sz = sizeof(bool); + xmallctlbymib(mib, miblen, &destroyed_initialized, &sz, + NULL, 0); + + /* Merged stats. */ + if (merged && (ninitialized > 1 || !unmerged)) { + /* Print merged arena stats. */ + emitter_table_printf(emitter, "Merged arenas stats:\n"); + emitter_json_dict_begin(emitter, "merged"); + stats_arena_print(emitter, MALLCTL_ARENAS_ALL, bins, + large, mutex); + emitter_json_dict_end(emitter); /* Close "merged". */ + } - /* Destroyed stats. */ - if (destroyed_initialized && destroyed) { - /* Print destroyed arena stats. */ - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t\"destroyed\": {\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "\nDestroyed arenas stats:\n"); - } - stats_arena_print(write_cb, cbopaque, json, - MALLCTL_ARENAS_DESTROYED, bins, large, - mutex); - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t\t}%s\n", unmerged ? "," : - ""); - } - } + /* Destroyed stats. */ + if (destroyed_initialized && destroyed) { + /* Print destroyed arena stats. */ + emitter_table_printf(emitter, + "Destroyed arenas stats:\n"); + emitter_json_dict_begin(emitter, "destroyed"); + stats_arena_print(emitter, MALLCTL_ARENAS_DESTROYED, + bins, large, mutex); + emitter_json_dict_end(emitter); /* Close "destroyed". */ + } - /* Unmerged stats. */ - if (unmerged) { - for (i = j = 0; i < narenas; i++) { - if (initialized[i]) { - if (json) { - j++; - malloc_cprintf(write_cb, - cbopaque, - "\t\t\t\"%u\": {\n", - i); - } else { - malloc_cprintf(write_cb, - cbopaque, - "\narenas[%u]:\n", - i); - } - stats_arena_print(write_cb, - cbopaque, json, i, bins, - large, mutex); - if (json) { - malloc_cprintf(write_cb, - cbopaque, - "\t\t\t}%s\n", (j < - ninitialized) ? "," - : ""); - } - } + /* Unmerged stats. */ + if (unmerged) { + for (i = j = 0; i < narenas; i++) { + if (initialized[i]) { + char arena_ind_str[20]; + malloc_snprintf(arena_ind_str, + sizeof(arena_ind_str), "%u", i); + emitter_json_dict_begin(emitter, + arena_ind_str); + emitter_table_printf(emitter, + "arenas[%s]:\n", arena_ind_str); + stats_arena_print(emitter, i, bins, + large, mutex); + /* Close "". */ + emitter_json_dict_end(emitter); } } } - - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t\t}\n"); - } + emitter_json_dict_end(emitter); /* Close "stats.arenas". */ } } @@ -1257,29 +1264,23 @@ stats_print(void (*write_cb)(void *, const char *), void *cbopaque, } } - if (json) { - malloc_cprintf(write_cb, cbopaque, - "{\n" - "\t\"jemalloc\": {\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "___ Begin jemalloc statistics ___\n"); - } + emitter_t emitter; + emitter_init(&emitter, + json ? emitter_output_json : emitter_output_table, write_cb, + cbopaque); + emitter_begin(&emitter); + emitter_table_printf(&emitter, "___ Begin jemalloc statistics ___\n"); + emitter_json_dict_begin(&emitter, "jemalloc"); if (general) { - stats_general_print(write_cb, cbopaque, json, config_stats); + stats_general_print(&emitter); } if (config_stats) { - stats_print_helper(write_cb, cbopaque, json, merged, destroyed, - unmerged, bins, large, mutex); + stats_print_helper(&emitter, merged, destroyed, unmerged, + bins, large, mutex); } - if (json) { - malloc_cprintf(write_cb, cbopaque, - "\t}\n" - "}\n"); - } else { - malloc_cprintf(write_cb, cbopaque, - "--- End jemalloc statistics ---\n"); - } + emitter_json_dict_end(&emitter); /* Closes the "jemalloc" dict. */ + emitter_table_printf(&emitter, "--- End jemalloc statistics ---\n"); + emitter_end(&emitter); } diff --git a/contrib/jemalloc/src/sz.c b/contrib/jemalloc/src/sz.c index 0986615f711..9de77e45fff 100644 --- a/contrib/jemalloc/src/sz.c +++ b/contrib/jemalloc/src/sz.c @@ -26,7 +26,8 @@ const size_t sz_index2size_tab[NSIZES] = { JEMALLOC_ALIGNED(CACHELINE) const uint8_t sz_size2index_tab[] = { #if LG_TINY_MIN == 0 -#warning "Dangerous LG_TINY_MIN" +/* The div module doesn't support division by 1. */ +#error "Unsupported LG_TINY_MIN" #define S2B_0(i) i, #elif LG_TINY_MIN == 1 #warning "Dangerous LG_TINY_MIN" diff --git a/contrib/jemalloc/src/tcache.c b/contrib/jemalloc/src/tcache.c index 936ef3140d5..a769a6b17bc 100644 --- a/contrib/jemalloc/src/tcache.c +++ b/contrib/jemalloc/src/tcache.c @@ -12,7 +12,7 @@ bool opt_tcache = true; ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT; -tcache_bin_info_t *tcache_bin_info; +cache_bin_info_t *tcache_bin_info; static unsigned stack_nelms; /* Total stack elms per tcache. */ unsigned nhbins; @@ -40,7 +40,7 @@ void tcache_event_hard(tsd_t *tsd, tcache_t *tcache) { szind_t binind = tcache->next_gc_bin; - tcache_bin_t *tbin; + cache_bin_t *tbin; if (binind < NBINS) { tbin = tcache_small_bin_get(tcache, binind); } else { @@ -58,7 +58,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) { * Reduce fill count by 2X. Limit lg_fill_div such that * the fill count is always at least 1. */ - tcache_bin_info_t *tbin_info = &tcache_bin_info[binind]; + cache_bin_info_t *tbin_info = &tcache_bin_info[binind]; if ((tbin_info->ncached_max >> (tcache->lg_fill_div[binind] + 1)) >= 1) { tcache->lg_fill_div[binind]++; @@ -86,7 +86,7 @@ tcache_event_hard(tsd_t *tsd, tcache_t *tcache) { void * tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, - tcache_bin_t *tbin, szind_t binind, bool *tcache_success) { + cache_bin_t *tbin, szind_t binind, bool *tcache_success) { void *ret; assert(tcache->arena != NULL); @@ -95,18 +95,18 @@ tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache, if (config_prof) { tcache->prof_accumbytes = 0; } - ret = tcache_alloc_easy(tbin, tcache_success); + ret = cache_bin_alloc_easy(tbin, tcache_success); return ret; } void -tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, +tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin, szind_t binind, unsigned rem) { bool merged_stats = false; assert(binind < NBINS); - assert(rem <= tbin->ncached); + assert((cache_bin_sz_t)rem <= tbin->ncached); arena_t *arena = tcache->arena; assert(arena != NULL); @@ -121,7 +121,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, /* Lock the arena bin associated with the first object. */ extent_t *extent = item_extent[0]; arena_t *bin_arena = extent_arena_get(extent); - arena_bin_t *bin = &bin_arena->bins[binind]; + bin_t *bin = &bin_arena->bins[binind]; if (config_prof && bin_arena == arena) { if (arena_prof_accum(tsd_tsdn(tsd), arena, @@ -169,7 +169,7 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, * The flush loop didn't happen to flush to this thread's * arena, so the stats didn't get merged. Manually do so now. */ - arena_bin_t *bin = &arena->bins[binind]; + bin_t *bin = &arena->bins[binind]; malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock); bin->stats.nflushes++; bin->stats.nrequests += tbin->tstats.nrequests; @@ -180,18 +180,18 @@ tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, tcache_bin_t *tbin, memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem * sizeof(void *)); tbin->ncached = rem; - if ((low_water_t)tbin->ncached < tbin->low_water) { + if (tbin->ncached < tbin->low_water) { tbin->low_water = tbin->ncached; } } void -tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, +tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind, unsigned rem, tcache_t *tcache) { bool merged_stats = false; assert(binind < nhbins); - assert(rem <= tbin->ncached); + assert((cache_bin_sz_t)rem <= tbin->ncached); arena_t *arena = tcache->arena; assert(arena != NULL); @@ -278,7 +278,7 @@ tcache_bin_flush_large(tsd_t *tsd, tcache_bin_t *tbin, szind_t binind, memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem * sizeof(void *)); tbin->ncached = rem; - if ((low_water_t)tbin->ncached < tbin->low_water) { + if (tbin->ncached < tbin->low_water) { tbin->low_water = tbin->ncached; } } @@ -291,8 +291,15 @@ tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) { if (config_stats) { /* Link into list of extant tcaches. */ malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx); + ql_elm_new(tcache, link); ql_tail_insert(&arena->tcache_ql, tcache, link); + cache_bin_array_descriptor_init( + &tcache->cache_bin_array_descriptor, tcache->bins_small, + tcache->bins_large); + ql_tail_insert(&arena->cache_bin_array_descriptor_ql, + &tcache->cache_bin_array_descriptor, link); + malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx); } } @@ -316,6 +323,8 @@ tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache) { assert(in_ql); } ql_remove(&arena->tcache_ql, tcache, link); + ql_remove(&arena->cache_bin_array_descriptor_ql, + &tcache->cache_bin_array_descriptor, link); tcache_stats_merge(tsdn, tcache, arena); malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx); } @@ -354,8 +363,8 @@ tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) { size_t stack_offset = 0; assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0); - memset(tcache->tbins_small, 0, sizeof(tcache_bin_t) * NBINS); - memset(tcache->tbins_large, 0, sizeof(tcache_bin_t) * (nhbins - NBINS)); + memset(tcache->bins_small, 0, sizeof(cache_bin_t) * NBINS); + memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - NBINS)); unsigned i = 0; for (; i < NBINS; i++) { tcache->lg_fill_div[i] = 1; @@ -450,7 +459,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) { assert(tcache->arena != NULL); for (unsigned i = 0; i < NBINS; i++) { - tcache_bin_t *tbin = tcache_small_bin_get(tcache, i); + cache_bin_t *tbin = tcache_small_bin_get(tcache, i); tcache_bin_flush_small(tsd, tcache, tbin, i, 0); if (config_stats) { @@ -458,7 +467,7 @@ tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) { } } for (unsigned i = NBINS; i < nhbins; i++) { - tcache_bin_t *tbin = tcache_large_bin_get(tcache, i); + cache_bin_t *tbin = tcache_large_bin_get(tcache, i); tcache_bin_flush_large(tsd, tbin, i, 0, tcache); if (config_stats) { @@ -524,8 +533,8 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) { /* Merge and reset tcache stats. */ for (i = 0; i < NBINS; i++) { - arena_bin_t *bin = &arena->bins[i]; - tcache_bin_t *tbin = tcache_small_bin_get(tcache, i); + bin_t *bin = &arena->bins[i]; + cache_bin_t *tbin = tcache_small_bin_get(tcache, i); malloc_mutex_lock(tsdn, &bin->lock); bin->stats.nrequests += tbin->tstats.nrequests; malloc_mutex_unlock(tsdn, &bin->lock); @@ -533,7 +542,7 @@ tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) { } for (; i < nhbins; i++) { - tcache_bin_t *tbin = tcache_large_bin_get(tcache, i); + cache_bin_t *tbin = tcache_large_bin_get(tcache, i); arena_stats_large_nrequests_add(tsdn, &arena->stats, i, tbin->tstats.nrequests); tbin->tstats.nrequests = 0; @@ -657,21 +666,21 @@ tcache_boot(tsdn_t *tsdn) { nhbins = sz_size2index(tcache_maxclass) + 1; /* Initialize tcache_bin_info. */ - tcache_bin_info = (tcache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins - * sizeof(tcache_bin_info_t), CACHELINE); + tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins + * sizeof(cache_bin_info_t), CACHELINE); if (tcache_bin_info == NULL) { return true; } stack_nelms = 0; unsigned i; for (i = 0; i < NBINS; i++) { - if ((arena_bin_info[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) { + if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) { tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_SMALL_MIN; - } else if ((arena_bin_info[i].nregs << 1) <= + } else if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MAX) { tcache_bin_info[i].ncached_max = - (arena_bin_info[i].nregs << 1); + (bin_infos[i].nregs << 1); } else { tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_SMALL_MAX; diff --git a/contrib/jemalloc/src/tsd.c b/contrib/jemalloc/src/tsd.c index f968992f2b5..c1430682dd5 100644 --- a/contrib/jemalloc/src/tsd.c +++ b/contrib/jemalloc/src/tsd.c @@ -71,6 +71,16 @@ tsd_data_init(tsd_t *tsd) { */ rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd)); + /* + * A nondeterministic seed based on the address of tsd reduces + * the likelihood of lockstep non-uniform cache index + * utilization among identical concurrent processes, but at the + * cost of test repeatability. For debug builds, instead use a + * deterministic seed. + */ + *tsd_offset_statep_get(tsd) = config_debug ? 0 : + (uint64_t)(uintptr_t)tsd; + return tsd_tcache_enabled_data_init(tsd); } diff --git a/lib/libc/stdlib/jemalloc/Makefile.inc b/lib/libc/stdlib/jemalloc/Makefile.inc index b9adf47b955..32c072f7ee8 100644 --- a/lib/libc/stdlib/jemalloc/Makefile.inc +++ b/lib/libc/stdlib/jemalloc/Makefile.inc @@ -2,10 +2,10 @@ .PATH: ${LIBC_SRCTOP}/stdlib/jemalloc -JEMALLOCSRCS:= jemalloc.c arena.c background_thread.c base.c bitmap.c ckh.c \ - ctl.c extent.c extent_dss.c extent_mmap.c hash.c hooks.c large.c \ - malloc_io.c mutex.c mutex_pool.c nstime.c pages.c prng.c prof.c \ - rtree.c stats.c sz.c tcache.c ticker.c tsd.c witness.c +JEMALLOCSRCS:= jemalloc.c arena.c background_thread.c base.c bin.c bitmap.c \ + ckh.c ctl.c div.c extent.c extent_dss.c extent_mmap.c hash.c hooks.c \ + large.c log.c malloc_io.c mutex.c mutex_pool.c nstime.c pages.c \ + prng.c prof.c rtree.c stats.c sz.c tcache.c ticker.c tsd.c witness.c SYM_MAPS+=${LIBC_SRCTOP}/stdlib/jemalloc/Symbol.map -- 2.45.0