1 #define JEMALLOC_TSD_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3 #include "jemalloc/internal/jemalloc_internal_includes.h"
5 #include "jemalloc/internal/assert.h"
6 #include "jemalloc/internal/mutex.h"
7 #include "jemalloc/internal/rtree.h"
9 /******************************************************************************/
12 static unsigned ncleanups;
13 static malloc_tsd_cleanup_t cleanups[MALLOC_TSD_CLEANUPS_MAX];
15 /* TSD_INITIALIZER triggers "-Wmissing-field-initializer" */
16 JEMALLOC_DIAGNOSTIC_PUSH
17 JEMALLOC_DIAGNOSTIC_IGNORE_MISSING_STRUCT_FIELD_INITIALIZERS
19 #ifdef JEMALLOC_MALLOC_THREAD_CLEANUP
20 JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
21 JEMALLOC_TSD_TYPE_ATTR(bool) JEMALLOC_TLS_MODEL tsd_initialized = false;
22 bool tsd_booted = false;
23 #elif (defined(JEMALLOC_TLS))
24 JEMALLOC_TSD_TYPE_ATTR(tsd_t) tsd_tls = TSD_INITIALIZER;
25 pthread_key_t tsd_tsd;
26 bool tsd_booted = false;
27 #elif (defined(_WIN32))
29 tsd_wrapper_t tsd_boot_wrapper = {false, TSD_INITIALIZER};
30 bool tsd_booted = false;
34 * This contains a mutex, but it's pretty convenient to allow the mutex code to
35 * have a dependency on tsd. So we define the struct here, and only refer to it
36 * by pointer in the header.
38 struct tsd_init_head_s {
39 ql_head(tsd_init_block_t) blocks;
43 pthread_key_t tsd_tsd;
44 tsd_init_head_t tsd_init_head = {
45 ql_head_initializer(blocks),
46 MALLOC_MUTEX_INITIALIZER
49 tsd_wrapper_t tsd_boot_wrapper = {
53 bool tsd_booted = false;
56 JEMALLOC_DIAGNOSTIC_POP
58 /******************************************************************************/
60 /* A list of all the tsds in the nominal state. */
61 typedef ql_head(tsd_t) tsd_list_t;
62 static tsd_list_t tsd_nominal_tsds = ql_head_initializer(tsd_nominal_tsds);
63 static malloc_mutex_t tsd_nominal_tsds_lock;
65 /* How many slow-path-enabling features are turned on. */
66 static atomic_u32_t tsd_global_slow_count = ATOMIC_INIT(0);
69 tsd_in_nominal_list(tsd_t *tsd) {
73 * We don't know that tsd is nominal; it might not be safe to get data
76 malloc_mutex_lock(TSDN_NULL, &tsd_nominal_tsds_lock);
77 ql_foreach(tsd_list, &tsd_nominal_tsds, TSD_MANGLE(tcache).tsd_link) {
78 if (tsd == tsd_list) {
83 malloc_mutex_unlock(TSDN_NULL, &tsd_nominal_tsds_lock);
88 tsd_add_nominal(tsd_t *tsd) {
89 assert(!tsd_in_nominal_list(tsd));
90 assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
91 ql_elm_new(tsd, TSD_MANGLE(tcache).tsd_link);
92 malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
93 ql_tail_insert(&tsd_nominal_tsds, tsd, TSD_MANGLE(tcache).tsd_link);
94 malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
98 tsd_remove_nominal(tsd_t *tsd) {
99 assert(tsd_in_nominal_list(tsd));
100 assert(tsd_state_get(tsd) <= tsd_state_nominal_max);
101 malloc_mutex_lock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
102 ql_remove(&tsd_nominal_tsds, tsd, TSD_MANGLE(tcache).tsd_link);
103 malloc_mutex_unlock(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
107 tsd_force_recompute(tsdn_t *tsdn) {
109 * The stores to tsd->state here need to synchronize with the exchange
110 * in tsd_slow_update.
112 atomic_fence(ATOMIC_RELEASE);
113 malloc_mutex_lock(tsdn, &tsd_nominal_tsds_lock);
115 ql_foreach(remote_tsd, &tsd_nominal_tsds, TSD_MANGLE(tcache).tsd_link) {
116 assert(tsd_atomic_load(&remote_tsd->state, ATOMIC_RELAXED)
117 <= tsd_state_nominal_max);
118 tsd_atomic_store(&remote_tsd->state, tsd_state_nominal_recompute,
121 malloc_mutex_unlock(tsdn, &tsd_nominal_tsds_lock);
125 tsd_global_slow_inc(tsdn_t *tsdn) {
126 atomic_fetch_add_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
128 * We unconditionally force a recompute, even if the global slow count
129 * was already positive. If we didn't, then it would be possible for us
130 * to return to the user, have the user synchronize externally with some
131 * other thread, and then have that other thread not have picked up the
132 * update yet (since the original incrementing thread might still be
133 * making its way through the tsd list).
135 tsd_force_recompute(tsdn);
138 void tsd_global_slow_dec(tsdn_t *tsdn) {
139 atomic_fetch_sub_u32(&tsd_global_slow_count, 1, ATOMIC_RELAXED);
140 /* See the note in ..._inc(). */
141 tsd_force_recompute(tsdn);
145 tsd_local_slow(tsd_t *tsd) {
146 return !tsd_tcache_enabled_get(tsd)
147 || tsd_reentrancy_level_get(tsd) > 0;
152 return atomic_load_u32(&tsd_global_slow_count, ATOMIC_RELAXED) > 0;
155 /******************************************************************************/
158 tsd_state_compute(tsd_t *tsd) {
159 if (!tsd_nominal(tsd)) {
160 return tsd_state_get(tsd);
162 /* We're in *a* nominal state; but which one? */
163 if (malloc_slow || tsd_local_slow(tsd) || tsd_global_slow()) {
164 return tsd_state_nominal_slow;
166 return tsd_state_nominal;
171 tsd_slow_update(tsd_t *tsd) {
174 uint8_t new_state = tsd_state_compute(tsd);
175 old_state = tsd_atomic_exchange(&tsd->state, new_state,
177 } while (old_state == tsd_state_nominal_recompute);
181 tsd_state_set(tsd_t *tsd, uint8_t new_state) {
182 /* Only the tsd module can change the state *to* recompute. */
183 assert(new_state != tsd_state_nominal_recompute);
184 uint8_t old_state = tsd_atomic_load(&tsd->state, ATOMIC_RELAXED);
185 if (old_state > tsd_state_nominal_max) {
187 * Not currently in the nominal list, but it might need to be
190 assert(!tsd_in_nominal_list(tsd));
191 tsd_atomic_store(&tsd->state, new_state, ATOMIC_RELAXED);
192 if (new_state <= tsd_state_nominal_max) {
193 tsd_add_nominal(tsd);
197 * We're currently nominal. If the new state is non-nominal,
198 * great; we take ourselves off the list and just enter the new
201 assert(tsd_in_nominal_list(tsd));
202 if (new_state > tsd_state_nominal_max) {
203 tsd_remove_nominal(tsd);
204 tsd_atomic_store(&tsd->state, new_state,
208 * This is the tricky case. We're transitioning from
209 * one nominal state to another. The caller can't know
210 * about any races that are occuring at the same time,
211 * so we always have to recompute no matter what.
213 tsd_slow_update(tsd);
219 tsd_data_init(tsd_t *tsd) {
221 * We initialize the rtree context first (before the tcache), since the
222 * tcache initialization depends on it.
224 rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
227 * A nondeterministic seed based on the address of tsd reduces
228 * the likelihood of lockstep non-uniform cache index
229 * utilization among identical concurrent processes, but at the
230 * cost of test repeatability. For debug builds, instead use a
231 * deterministic seed.
233 *tsd_offset_statep_get(tsd) = config_debug ? 0 :
234 (uint64_t)(uintptr_t)tsd;
236 return tsd_tcache_enabled_data_init(tsd);
240 assert_tsd_data_cleanup_done(tsd_t *tsd) {
241 assert(!tsd_nominal(tsd));
242 assert(!tsd_in_nominal_list(tsd));
243 assert(*tsd_arenap_get_unsafe(tsd) == NULL);
244 assert(*tsd_iarenap_get_unsafe(tsd) == NULL);
245 assert(*tsd_arenas_tdata_bypassp_get_unsafe(tsd) == true);
246 assert(*tsd_arenas_tdatap_get_unsafe(tsd) == NULL);
247 assert(*tsd_tcache_enabledp_get_unsafe(tsd) == false);
248 assert(*tsd_prof_tdatap_get_unsafe(tsd) == NULL);
252 tsd_data_init_nocleanup(tsd_t *tsd) {
253 assert(tsd_state_get(tsd) == tsd_state_reincarnated ||
254 tsd_state_get(tsd) == tsd_state_minimal_initialized);
256 * During reincarnation, there is no guarantee that the cleanup function
257 * will be called (deallocation may happen after all tsd destructors).
258 * We set up tsd in a way that no cleanup is needed.
260 rtree_ctx_data_init(tsd_rtree_ctxp_get_unsafe(tsd));
261 *tsd_arenas_tdata_bypassp_get(tsd) = true;
262 *tsd_tcache_enabledp_get_unsafe(tsd) = false;
263 *tsd_reentrancy_levelp_get(tsd) = 1;
264 assert_tsd_data_cleanup_done(tsd);
270 tsd_fetch_slow(tsd_t *tsd, bool minimal) {
271 assert(!tsd_fast(tsd));
273 if (tsd_state_get(tsd) == tsd_state_nominal_slow) {
275 * On slow path but no work needed. Note that we can't
276 * necessarily *assert* that we're slow, because we might be
277 * slow because of an asynchronous modification to global state,
278 * which might be asynchronously modified *back*.
280 } else if (tsd_state_get(tsd) == tsd_state_nominal_recompute) {
281 tsd_slow_update(tsd);
282 } else if (tsd_state_get(tsd) == tsd_state_uninitialized) {
285 tsd_state_set(tsd, tsd_state_nominal);
286 tsd_slow_update(tsd);
287 /* Trigger cleanup handler registration. */
292 tsd_state_set(tsd, tsd_state_minimal_initialized);
294 tsd_data_init_nocleanup(tsd);
296 } else if (tsd_state_get(tsd) == tsd_state_minimal_initialized) {
298 /* Switch to fully initialized. */
299 tsd_state_set(tsd, tsd_state_nominal);
300 assert(*tsd_reentrancy_levelp_get(tsd) >= 1);
301 (*tsd_reentrancy_levelp_get(tsd))--;
302 tsd_slow_update(tsd);
305 assert_tsd_data_cleanup_done(tsd);
307 } else if (tsd_state_get(tsd) == tsd_state_purgatory) {
308 tsd_state_set(tsd, tsd_state_reincarnated);
310 tsd_data_init_nocleanup(tsd);
312 assert(tsd_state_get(tsd) == tsd_state_reincarnated);
319 malloc_tsd_malloc(size_t size) {
320 return a0malloc(CACHELINE_CEILING(size));
324 malloc_tsd_dalloc(void *wrapper) {
328 #if defined(JEMALLOC_MALLOC_THREAD_CLEANUP) || defined(_WIN32)
333 _malloc_thread_cleanup(void) {
334 bool pending[MALLOC_TSD_CLEANUPS_MAX], again;
337 for (i = 0; i < ncleanups; i++) {
343 for (i = 0; i < ncleanups; i++) {
345 pending[i] = cleanups[i]();
356 malloc_tsd_cleanup_register(bool (*f)(void)) {
357 assert(ncleanups < MALLOC_TSD_CLEANUPS_MAX);
358 cleanups[ncleanups] = f;
363 tsd_do_data_cleanup(tsd_t *tsd) {
364 prof_tdata_cleanup(tsd);
367 arenas_tdata_cleanup(tsd);
369 witnesses_cleanup(tsd_witness_tsdp_get_unsafe(tsd));
373 tsd_cleanup(void *arg) {
374 tsd_t *tsd = (tsd_t *)arg;
376 switch (tsd_state_get(tsd)) {
377 case tsd_state_uninitialized:
380 case tsd_state_minimal_initialized:
381 /* This implies the thread only did free() in its life time. */
383 case tsd_state_reincarnated:
385 * Reincarnated means another destructor deallocated memory
386 * after the destructor was called. Cleanup isn't required but
387 * is still called for testing and completeness.
389 assert_tsd_data_cleanup_done(tsd);
391 case tsd_state_nominal:
392 case tsd_state_nominal_slow:
393 tsd_do_data_cleanup(tsd);
394 tsd_state_set(tsd, tsd_state_purgatory);
397 case tsd_state_purgatory:
399 * The previous time this destructor was called, we set the
400 * state to tsd_state_purgatory so that other destructors
401 * wouldn't cause re-creation of the tsd. This time, do
402 * nothing, and do not request another callback.
409 test_callback_t test_callback = *tsd_test_callbackp_get_unsafe(tsd);
410 int *data = tsd_test_datap_get_unsafe(tsd);
411 if (test_callback != NULL) {
418 malloc_tsd_boot0(void) {
422 if (malloc_mutex_init(&tsd_nominal_tsds_lock, "tsd_nominal_tsds_lock",
423 WITNESS_RANK_OMIT, malloc_mutex_rank_exclusive)) {
430 *tsd_arenas_tdata_bypassp_get(tsd) = true;
435 malloc_tsd_boot1(void) {
437 tsd_t *tsd = tsd_fetch();
438 /* malloc_slow has been set properly. Update tsd_slow. */
439 tsd_slow_update(tsd);
440 *tsd_arenas_tdata_bypassp_get(tsd) = false;
445 _tls_callback(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved) {
447 #ifdef JEMALLOC_LAZY_LOCK
448 case DLL_THREAD_ATTACH:
452 case DLL_THREAD_DETACH:
453 _malloc_thread_cleanup();
462 * We need to be able to say "read" here (in the "pragma section"), but have
463 * hooked "read". We won't read for the rest of the file, so we can get away
472 # pragma comment(linker, "/INCLUDE:__tls_used")
473 # pragma comment(linker, "/INCLUDE:_tls_callback")
475 # pragma comment(linker, "/INCLUDE:_tls_used")
476 # pragma comment(linker, "/INCLUDE:" STRINGIFY(tls_callback) )
478 # pragma section(".CRT$XLY",long,read)
480 JEMALLOC_SECTION(".CRT$XLY") JEMALLOC_ATTR(used)
481 BOOL (WINAPI *const tls_callback)(HINSTANCE hinstDLL,
482 DWORD fdwReason, LPVOID lpvReserved) = _tls_callback;
485 #if (!defined(JEMALLOC_MALLOC_THREAD_CLEANUP) && !defined(JEMALLOC_TLS) && \
488 tsd_init_check_recursion(tsd_init_head_t *head, tsd_init_block_t *block) {
489 pthread_t self = pthread_self();
490 tsd_init_block_t *iter;
492 /* Check whether this thread has already inserted into the list. */
493 malloc_mutex_lock(TSDN_NULL, &head->lock);
494 ql_foreach(iter, &head->blocks, link) {
495 if (iter->thread == self) {
496 malloc_mutex_unlock(TSDN_NULL, &head->lock);
500 /* Insert block into list. */
501 ql_elm_new(block, link);
502 block->thread = self;
503 ql_tail_insert(&head->blocks, block, link);
504 malloc_mutex_unlock(TSDN_NULL, &head->lock);
509 tsd_init_finish(tsd_init_head_t *head, tsd_init_block_t *block) {
510 malloc_mutex_lock(TSDN_NULL, &head->lock);
511 ql_remove(&head->blocks, block, link);
512 malloc_mutex_unlock(TSDN_NULL, &head->lock);
517 tsd_prefork(tsd_t *tsd) {
518 malloc_mutex_prefork(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
522 tsd_postfork_parent(tsd_t *tsd) {
523 malloc_mutex_postfork_parent(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
527 tsd_postfork_child(tsd_t *tsd) {
528 malloc_mutex_postfork_child(tsd_tsdn(tsd), &tsd_nominal_tsds_lock);
529 ql_new(&tsd_nominal_tsds);
531 if (tsd_state_get(tsd) <= tsd_state_nominal_max) {
532 tsd_add_nominal(tsd);