1 #ifndef JEMALLOC_INTERNAL_TSD_H
2 #define JEMALLOC_INTERNAL_TSD_H
4 #include "jemalloc/internal/arena_types.h"
5 #include "jemalloc/internal/assert.h"
6 #include "jemalloc/internal/jemalloc_internal_externs.h"
7 #include "jemalloc/internal/prof_types.h"
8 #include "jemalloc/internal/ql.h"
9 #include "jemalloc/internal/rtree_tsd.h"
10 #include "jemalloc/internal/tcache_types.h"
11 #include "jemalloc/internal/tcache_structs.h"
12 #include "jemalloc/internal/util.h"
13 #include "jemalloc/internal/witness.h"
16 * Thread-Specific-Data layout
17 * --- data accessed on tcache fast path: state, rtree_ctx, stats, prof ---
20 * m: thread_allocated (config_stats)
21 * f: thread_deallocated (config_stats)
22 * p: prof_tdata (config_prof)
23 * c: rtree_ctx (rtree cache accessed on deallocation)
25 * --- data not accessed on tcache fast path: arena-related fields ---
26 * d: arenas_tdata_bypass
32 * Loading TSD data is on the critical path of basically all malloc operations.
33 * In particular, tcache and rtree_ctx rely on hot CPU cache to be effective.
34 * Use a compact layout to reduce cache footprint.
35 * +--- 64-bit and 64B cacheline; 1B each letter; First byte on the left. ---+
36 * |---------------------------- 1st cacheline ----------------------------|
37 * | sedrxxxx mmmmmmmm ffffffff pppppppp [c * 32 ........ ........ .......] |
38 * |---------------------------- 2nd cacheline ----------------------------|
39 * | [c * 64 ........ ........ ........ ........ ........ ........ .......] |
40 * |---------------------------- 3nd cacheline ----------------------------|
41 * | [c * 32 ........ ........ .......] iiiiiiii aaaaaaaa oooooooo [t...... |
42 * +-------------------------------------------------------------------------+
43 * Note: the entire tcache is embedded into TSD and spans multiple cachelines.
45 * The last 3 members (i, a and o) before tcache isn't really needed on tcache
46 * fast path. However we have a number of unused tcache bins and witnesses
47 * (never touched unless config_debug) at the end of tcache, so we place them
48 * there to avoid breaking the cachelines and possibly paging in an extra page.
51 typedef void (*test_callback_t)(int *);
52 # define MALLOC_TSD_TEST_DATA_INIT 0x72b65c10
53 # define MALLOC_TEST_TSD \
54 O(test_data, int, int) \
55 O(test_callback, test_callback_t, int)
56 # define MALLOC_TEST_TSD_INITIALIZER , MALLOC_TSD_TEST_DATA_INIT, NULL
58 # define MALLOC_TEST_TSD
59 # define MALLOC_TEST_TSD_INITIALIZER
62 /* O(name, type, nullable type */
64 O(tcache_enabled, bool, bool) \
65 O(arenas_tdata_bypass, bool, bool) \
66 O(reentrancy_level, int8_t, int8_t) \
67 O(narenas_tdata, uint32_t, uint32_t) \
68 O(offset_state, uint64_t, uint64_t) \
69 O(thread_allocated, uint64_t, uint64_t) \
70 O(thread_deallocated, uint64_t, uint64_t) \
71 O(prof_tdata, prof_tdata_t *, prof_tdata_t *) \
72 O(rtree_ctx, rtree_ctx_t, rtree_ctx_t) \
73 O(iarena, arena_t *, arena_t *) \
74 O(arena, arena_t *, arena_t *) \
75 O(arenas_tdata, arena_tdata_t *, arena_tdata_t *)\
76 O(tcache, tcache_t, tcache_t) \
77 O(witness_tsd, witness_tsd_t, witness_tsdn_t) \
80 #define TSD_INITIALIZER { \
81 tsd_state_uninitialized, \
82 TCACHE_ENABLED_ZERO_INITIALIZER, \
90 RTREE_CTX_ZERO_INITIALIZER, \
94 TCACHE_ZERO_INITIALIZER, \
95 WITNESS_TSD_INITIALIZER \
96 MALLOC_TEST_TSD_INITIALIZER \
100 tsd_state_nominal = 0, /* Common case --> jnz. */
101 tsd_state_nominal_slow = 1, /* Initialized but on slow path. */
102 /* the above 2 nominal states should be lower values. */
103 tsd_state_nominal_max = 1, /* used for comparison only. */
104 tsd_state_minimal_initialized = 2,
105 tsd_state_purgatory = 3,
106 tsd_state_reincarnated = 4,
107 tsd_state_uninitialized = 5
110 /* Manually limit tsd_state_t to a single byte. */
111 typedef uint8_t tsd_state_t;
113 /* The actual tsd. */
116 * The contents should be treated as totally opaque outside the tsd
117 * module. Access any thread-local state through the getters and
121 #define O(n, t, nt) \
122 t use_a_getter_or_setter_instead_##n;
125 /* AddressSanitizer requires TLS data to be aligned to at least 8 bytes. */
126 } JEMALLOC_ALIGNED(16);
129 * Wrapper around tsd_t that makes it possible to avoid implicit conversion
130 * between tsd_t and tsdn_t, where tsdn_t is "nullable" and has to be
131 * explicitly converted to tsd_t, which is non-nullable.
136 #define TSDN_NULL ((tsdn_t *)0)
137 JEMALLOC_ALWAYS_INLINE tsdn_t *
138 tsd_tsdn(tsd_t *tsd) {
139 return (tsdn_t *)tsd;
142 JEMALLOC_ALWAYS_INLINE bool
143 tsdn_null(const tsdn_t *tsdn) {
147 JEMALLOC_ALWAYS_INLINE tsd_t *
148 tsdn_tsd(tsdn_t *tsdn) {
149 assert(!tsdn_null(tsdn));
154 void *malloc_tsd_malloc(size_t size);
155 void malloc_tsd_dalloc(void *wrapper);
156 void malloc_tsd_cleanup_register(bool (*f)(void));
157 tsd_t *malloc_tsd_boot0(void);
158 void malloc_tsd_boot1(void);
159 void tsd_cleanup(void *arg);
160 tsd_t *tsd_fetch_slow(tsd_t *tsd, bool internal);
161 void tsd_slow_update(tsd_t *tsd);
164 * We put the platform-specific data declarations and inlines into their own
165 * header files to avoid cluttering this file. They define tsd_boot0,
166 * tsd_boot1, tsd_boot, tsd_booted_get, tsd_get_allocates, tsd_get, and tsd_set.
168 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
169 #include "jemalloc/internal/tsd_malloc_thread_cleanup.h"
170 #elif (defined(JEMALLOC_TLS))
171 #include "jemalloc/internal/tsd_tls.h"
172 #elif (defined(_WIN32))
173 #include "jemalloc/internal/tsd_win.h"
175 #include "jemalloc/internal/tsd_generic.h"
179 * tsd_foop_get_unsafe(tsd) returns a pointer to the thread-local instance of
180 * foo. This omits some safety checks, and so can be used during tsd
181 * initialization and cleanup.
183 #define O(n, t, nt) \
184 JEMALLOC_ALWAYS_INLINE t * \
185 tsd_##n##p_get_unsafe(tsd_t *tsd) { \
186 return &tsd->use_a_getter_or_setter_instead_##n; \
191 /* tsd_foop_get(tsd) returns a pointer to the thread-local instance of foo. */
192 #define O(n, t, nt) \
193 JEMALLOC_ALWAYS_INLINE t * \
194 tsd_##n##p_get(tsd_t *tsd) { \
195 assert(tsd->state == tsd_state_nominal || \
196 tsd->state == tsd_state_nominal_slow || \
197 tsd->state == tsd_state_reincarnated || \
198 tsd->state == tsd_state_minimal_initialized); \
199 return tsd_##n##p_get_unsafe(tsd); \
205 * tsdn_foop_get(tsdn) returns either the thread-local instance of foo (if tsdn
206 * isn't NULL), or NULL (if tsdn is NULL), cast to the nullable pointer type.
208 #define O(n, t, nt) \
209 JEMALLOC_ALWAYS_INLINE nt * \
210 tsdn_##n##p_get(tsdn_t *tsdn) { \
211 if (tsdn_null(tsdn)) { \
214 tsd_t *tsd = tsdn_tsd(tsdn); \
215 return (nt *)tsd_##n##p_get(tsd); \
220 /* tsd_foo_get(tsd) returns the value of the thread-local instance of foo. */
221 #define O(n, t, nt) \
222 JEMALLOC_ALWAYS_INLINE t \
223 tsd_##n##_get(tsd_t *tsd) { \
224 return *tsd_##n##p_get(tsd); \
229 /* tsd_foo_set(tsd, val) updates the thread-local instance of foo to be val. */
230 #define O(n, t, nt) \
231 JEMALLOC_ALWAYS_INLINE void \
232 tsd_##n##_set(tsd_t *tsd, t val) { \
233 assert(tsd->state != tsd_state_reincarnated && \
234 tsd->state != tsd_state_minimal_initialized); \
235 *tsd_##n##p_get(tsd) = val; \
240 JEMALLOC_ALWAYS_INLINE void
241 tsd_assert_fast(tsd_t *tsd) {
242 assert(!malloc_slow && tsd_tcache_enabled_get(tsd) &&
243 tsd_reentrancy_level_get(tsd) == 0);
246 JEMALLOC_ALWAYS_INLINE bool
247 tsd_fast(tsd_t *tsd) {
248 bool fast = (tsd->state == tsd_state_nominal);
250 tsd_assert_fast(tsd);
256 JEMALLOC_ALWAYS_INLINE tsd_t *
257 tsd_fetch_impl(bool init, bool minimal) {
258 tsd_t *tsd = tsd_get(init);
260 if (!init && tsd_get_allocates() && tsd == NULL) {
265 if (unlikely(tsd->state != tsd_state_nominal)) {
266 return tsd_fetch_slow(tsd, minimal);
268 assert(tsd_fast(tsd));
269 tsd_assert_fast(tsd);
274 /* Get a minimal TSD that requires no cleanup. See comments in free(). */
275 JEMALLOC_ALWAYS_INLINE tsd_t *
276 tsd_fetch_min(void) {
277 return tsd_fetch_impl(true, true);
280 /* For internal background threads use only. */
281 JEMALLOC_ALWAYS_INLINE tsd_t *
282 tsd_internal_fetch(void) {
283 tsd_t *tsd = tsd_fetch_min();
284 /* Use reincarnated state to prevent full initialization. */
285 tsd->state = tsd_state_reincarnated;
290 JEMALLOC_ALWAYS_INLINE tsd_t *
292 return tsd_fetch_impl(true, false);
296 tsd_nominal(tsd_t *tsd) {
297 return (tsd->state <= tsd_state_nominal_max);
300 JEMALLOC_ALWAYS_INLINE tsdn_t *
302 if (!tsd_booted_get()) {
306 return tsd_tsdn(tsd_fetch_impl(false, false));
309 JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
310 tsd_rtree_ctx(tsd_t *tsd) {
311 return tsd_rtree_ctxp_get(tsd);
314 JEMALLOC_ALWAYS_INLINE rtree_ctx_t *
315 tsdn_rtree_ctx(tsdn_t *tsdn, rtree_ctx_t *fallback) {
317 * If tsd cannot be accessed, initialize the fallback rtree_ctx and
318 * return a pointer to it.
320 if (unlikely(tsdn_null(tsdn))) {
321 rtree_ctx_data_init(fallback);
324 return tsd_rtree_ctx(tsdn_tsd(tsdn));
327 #endif /* JEMALLOC_INTERNAL_TSD_H */