1 #define JEMALLOC_TCACHE_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3 #include "jemalloc/internal/jemalloc_internal_includes.h"
5 #include "jemalloc/internal/assert.h"
6 #include "jemalloc/internal/mutex.h"
7 #include "jemalloc/internal/size_classes.h"
9 /******************************************************************************/
12 bool opt_tcache = true;
13 ssize_t opt_lg_tcache_max = LG_TCACHE_MAXCLASS_DEFAULT;
15 cache_bin_info_t *tcache_bin_info;
16 static unsigned stack_nelms; /* Total stack elms per tcache. */
19 size_t tcache_maxclass;
23 /* Index of first element within tcaches that has never been used. */
24 static unsigned tcaches_past;
26 /* Head of singly linked list tracking available tcaches elements. */
27 static tcaches_t *tcaches_avail;
29 /* Protects tcaches{,_past,_avail}. */
30 static malloc_mutex_t tcaches_mtx;
32 /******************************************************************************/
35 tcache_salloc(tsdn_t *tsdn, const void *ptr) {
36 return arena_salloc(tsdn, ptr);
40 tcache_event_hard(tsd_t *tsd, tcache_t *tcache) {
41 szind_t binind = tcache->next_gc_bin;
45 tbin = tcache_small_bin_get(tcache, binind);
47 tbin = tcache_large_bin_get(tcache, binind);
49 if (tbin->low_water > 0) {
51 * Flush (ceiling) 3/4 of the objects below the low water mark.
54 tcache_bin_flush_small(tsd, tcache, tbin, binind,
55 tbin->ncached - tbin->low_water + (tbin->low_water
58 * Reduce fill count by 2X. Limit lg_fill_div such that
59 * the fill count is always at least 1.
61 cache_bin_info_t *tbin_info = &tcache_bin_info[binind];
62 if ((tbin_info->ncached_max >>
63 (tcache->lg_fill_div[binind] + 1)) >= 1) {
64 tcache->lg_fill_div[binind]++;
67 tcache_bin_flush_large(tsd, tbin, binind, tbin->ncached
68 - tbin->low_water + (tbin->low_water >> 2), tcache);
70 } else if (tbin->low_water < 0) {
72 * Increase fill count by 2X for small bins. Make sure
73 * lg_fill_div stays greater than 0.
75 if (binind < NBINS && tcache->lg_fill_div[binind] > 1) {
76 tcache->lg_fill_div[binind]--;
79 tbin->low_water = tbin->ncached;
81 tcache->next_gc_bin++;
82 if (tcache->next_gc_bin == nhbins) {
83 tcache->next_gc_bin = 0;
88 tcache_alloc_small_hard(tsdn_t *tsdn, arena_t *arena, tcache_t *tcache,
89 cache_bin_t *tbin, szind_t binind, bool *tcache_success) {
92 assert(tcache->arena != NULL);
93 arena_tcache_fill_small(tsdn, arena, tcache, tbin, binind,
94 config_prof ? tcache->prof_accumbytes : 0);
96 tcache->prof_accumbytes = 0;
98 ret = cache_bin_alloc_easy(tbin, tcache_success);
104 tcache_bin_flush_small(tsd_t *tsd, tcache_t *tcache, cache_bin_t *tbin,
105 szind_t binind, unsigned rem) {
106 bool merged_stats = false;
108 assert(binind < NBINS);
109 assert((cache_bin_sz_t)rem <= tbin->ncached);
111 arena_t *arena = tcache->arena;
112 assert(arena != NULL);
113 unsigned nflush = tbin->ncached - rem;
114 VARIABLE_ARRAY(extent_t *, item_extent, nflush);
115 /* Look up extent once per item. */
116 for (unsigned i = 0 ; i < nflush; i++) {
117 item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
121 /* Lock the arena bin associated with the first object. */
122 extent_t *extent = item_extent[0];
123 arena_t *bin_arena = extent_arena_get(extent);
124 bin_t *bin = &bin_arena->bins[binind];
126 if (config_prof && bin_arena == arena) {
127 if (arena_prof_accum(tsd_tsdn(tsd), arena,
128 tcache->prof_accumbytes)) {
129 prof_idump(tsd_tsdn(tsd));
131 tcache->prof_accumbytes = 0;
134 malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
135 if (config_stats && bin_arena == arena) {
136 assert(!merged_stats);
138 bin->stats.nflushes++;
139 bin->stats.nrequests += tbin->tstats.nrequests;
140 tbin->tstats.nrequests = 0;
142 unsigned ndeferred = 0;
143 for (unsigned i = 0; i < nflush; i++) {
144 void *ptr = *(tbin->avail - 1 - i);
145 extent = item_extent[i];
146 assert(ptr != NULL && extent != NULL);
148 if (extent_arena_get(extent) == bin_arena) {
149 arena_dalloc_bin_junked_locked(tsd_tsdn(tsd),
150 bin_arena, extent, ptr);
153 * This object was allocated via a different
154 * arena bin than the one that is currently
155 * locked. Stash the object, so that it can be
156 * handled in a future pass.
158 *(tbin->avail - 1 - ndeferred) = ptr;
159 item_extent[ndeferred] = extent;
163 malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
164 arena_decay_ticks(tsd_tsdn(tsd), bin_arena, nflush - ndeferred);
167 if (config_stats && !merged_stats) {
169 * The flush loop didn't happen to flush to this thread's
170 * arena, so the stats didn't get merged. Manually do so now.
172 bin_t *bin = &arena->bins[binind];
173 malloc_mutex_lock(tsd_tsdn(tsd), &bin->lock);
174 bin->stats.nflushes++;
175 bin->stats.nrequests += tbin->tstats.nrequests;
176 tbin->tstats.nrequests = 0;
177 malloc_mutex_unlock(tsd_tsdn(tsd), &bin->lock);
180 memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
183 if (tbin->ncached < tbin->low_water) {
184 tbin->low_water = tbin->ncached;
189 tcache_bin_flush_large(tsd_t *tsd, cache_bin_t *tbin, szind_t binind,
190 unsigned rem, tcache_t *tcache) {
191 bool merged_stats = false;
193 assert(binind < nhbins);
194 assert((cache_bin_sz_t)rem <= tbin->ncached);
196 arena_t *arena = tcache->arena;
197 assert(arena != NULL);
198 unsigned nflush = tbin->ncached - rem;
199 VARIABLE_ARRAY(extent_t *, item_extent, nflush);
200 /* Look up extent once per item. */
201 for (unsigned i = 0 ; i < nflush; i++) {
202 item_extent[i] = iealloc(tsd_tsdn(tsd), *(tbin->avail - 1 - i));
206 /* Lock the arena associated with the first object. */
207 extent_t *extent = item_extent[0];
208 arena_t *locked_arena = extent_arena_get(extent);
215 malloc_mutex_lock(tsd_tsdn(tsd), &locked_arena->large_mtx);
216 for (unsigned i = 0; i < nflush; i++) {
217 void *ptr = *(tbin->avail - 1 - i);
219 extent = item_extent[i];
220 if (extent_arena_get(extent) == locked_arena) {
221 large_dalloc_prep_junked_locked(tsd_tsdn(tsd),
225 if ((config_prof || config_stats) && locked_arena == arena) {
227 idump = arena_prof_accum(tsd_tsdn(tsd), arena,
228 tcache->prof_accumbytes);
229 tcache->prof_accumbytes = 0;
233 arena_stats_large_nrequests_add(tsd_tsdn(tsd),
234 &arena->stats, binind,
235 tbin->tstats.nrequests);
236 tbin->tstats.nrequests = 0;
239 malloc_mutex_unlock(tsd_tsdn(tsd), &locked_arena->large_mtx);
241 unsigned ndeferred = 0;
242 for (unsigned i = 0; i < nflush; i++) {
243 void *ptr = *(tbin->avail - 1 - i);
244 extent = item_extent[i];
245 assert(ptr != NULL && extent != NULL);
247 if (extent_arena_get(extent) == locked_arena) {
248 large_dalloc_finish(tsd_tsdn(tsd), extent);
251 * This object was allocated via a different
252 * arena than the one that is currently locked.
253 * Stash the object, so that it can be handled
256 *(tbin->avail - 1 - ndeferred) = ptr;
257 item_extent[ndeferred] = extent;
261 if (config_prof && idump) {
262 prof_idump(tsd_tsdn(tsd));
264 arena_decay_ticks(tsd_tsdn(tsd), locked_arena, nflush -
268 if (config_stats && !merged_stats) {
270 * The flush loop didn't happen to flush to this thread's
271 * arena, so the stats didn't get merged. Manually do so now.
273 arena_stats_large_nrequests_add(tsd_tsdn(tsd), &arena->stats,
274 binind, tbin->tstats.nrequests);
275 tbin->tstats.nrequests = 0;
278 memmove(tbin->avail - rem, tbin->avail - tbin->ncached, rem *
281 if (tbin->ncached < tbin->low_water) {
282 tbin->low_water = tbin->ncached;
287 tcache_arena_associate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
288 assert(tcache->arena == NULL);
289 tcache->arena = arena;
292 /* Link into list of extant tcaches. */
293 malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
295 ql_elm_new(tcache, link);
296 ql_tail_insert(&arena->tcache_ql, tcache, link);
297 cache_bin_array_descriptor_init(
298 &tcache->cache_bin_array_descriptor, tcache->bins_small,
300 ql_tail_insert(&arena->cache_bin_array_descriptor_ql,
301 &tcache->cache_bin_array_descriptor, link);
303 malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
308 tcache_arena_dissociate(tsdn_t *tsdn, tcache_t *tcache) {
309 arena_t *arena = tcache->arena;
310 assert(arena != NULL);
312 /* Unlink from list of extant tcaches. */
313 malloc_mutex_lock(tsdn, &arena->tcache_ql_mtx);
317 ql_foreach(iter, &arena->tcache_ql, link) {
318 if (iter == tcache) {
325 ql_remove(&arena->tcache_ql, tcache, link);
326 ql_remove(&arena->cache_bin_array_descriptor_ql,
327 &tcache->cache_bin_array_descriptor, link);
328 tcache_stats_merge(tsdn, tcache, arena);
329 malloc_mutex_unlock(tsdn, &arena->tcache_ql_mtx);
331 tcache->arena = NULL;
335 tcache_arena_reassociate(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
336 tcache_arena_dissociate(tsdn, tcache);
337 tcache_arena_associate(tsdn, tcache, arena);
341 tsd_tcache_enabled_data_init(tsd_t *tsd) {
342 /* Called upon tsd initialization. */
343 tsd_tcache_enabled_set(tsd, opt_tcache);
344 tsd_slow_update(tsd);
347 /* Trigger tcache init. */
348 tsd_tcache_data_init(tsd);
354 /* Initialize auto tcache (embedded in TSD). */
356 tcache_init(tsd_t *tsd, tcache_t *tcache, void *avail_stack) {
357 memset(&tcache->link, 0, sizeof(ql_elm(tcache_t)));
358 tcache->prof_accumbytes = 0;
359 tcache->next_gc_bin = 0;
360 tcache->arena = NULL;
362 ticker_init(&tcache->gc_ticker, TCACHE_GC_INCR);
364 size_t stack_offset = 0;
365 assert((TCACHE_NSLOTS_SMALL_MAX & 1U) == 0);
366 memset(tcache->bins_small, 0, sizeof(cache_bin_t) * NBINS);
367 memset(tcache->bins_large, 0, sizeof(cache_bin_t) * (nhbins - NBINS));
369 for (; i < NBINS; i++) {
370 tcache->lg_fill_div[i] = 1;
371 stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
373 * avail points past the available space. Allocations will
374 * access the slots toward higher addresses (for the benefit of
377 tcache_small_bin_get(tcache, i)->avail =
378 (void **)((uintptr_t)avail_stack + (uintptr_t)stack_offset);
380 for (; i < nhbins; i++) {
381 stack_offset += tcache_bin_info[i].ncached_max * sizeof(void *);
382 tcache_large_bin_get(tcache, i)->avail =
383 (void **)((uintptr_t)avail_stack + (uintptr_t)stack_offset);
385 assert(stack_offset == stack_nelms * sizeof(void *));
388 /* Initialize auto tcache (embedded in TSD). */
390 tsd_tcache_data_init(tsd_t *tsd) {
391 tcache_t *tcache = tsd_tcachep_get_unsafe(tsd);
392 assert(tcache_small_bin_get(tcache, 0)->avail == NULL);
393 size_t size = stack_nelms * sizeof(void *);
394 /* Avoid false cacheline sharing. */
395 size = sz_sa2u(size, CACHELINE);
397 void *avail_array = ipallocztm(tsd_tsdn(tsd), size, CACHELINE, true,
398 NULL, true, arena_get(TSDN_NULL, 0, true));
399 if (avail_array == NULL) {
403 tcache_init(tsd, tcache, avail_array);
405 * Initialization is a bit tricky here. After malloc init is done, all
406 * threads can rely on arena_choose and associate tcache accordingly.
407 * However, the thread that does actual malloc bootstrapping relies on
408 * functional tsd, and it can only rely on a0. In that case, we
409 * associate its tcache to a0 temporarily, and later on
410 * arena_choose_hard() will re-associate properly.
412 tcache->arena = NULL;
414 if (!malloc_initialized()) {
415 /* If in initialization, assign to a0. */
416 arena = arena_get(tsd_tsdn(tsd), 0, false);
417 tcache_arena_associate(tsd_tsdn(tsd), tcache, arena);
419 arena = arena_choose(tsd, NULL);
420 /* This may happen if thread.tcache.enabled is used. */
421 if (tcache->arena == NULL) {
422 tcache_arena_associate(tsd_tsdn(tsd), tcache, arena);
425 assert(arena == tcache->arena);
430 /* Created manual tcache for tcache.create mallctl. */
432 tcache_create_explicit(tsd_t *tsd) {
434 size_t size, stack_offset;
436 size = sizeof(tcache_t);
437 /* Naturally align the pointer stacks. */
438 size = PTR_CEILING(size);
440 size += stack_nelms * sizeof(void *);
441 /* Avoid false cacheline sharing. */
442 size = sz_sa2u(size, CACHELINE);
444 tcache = ipallocztm(tsd_tsdn(tsd), size, CACHELINE, true, NULL, true,
445 arena_get(TSDN_NULL, 0, true));
446 if (tcache == NULL) {
450 tcache_init(tsd, tcache,
451 (void *)((uintptr_t)tcache + (uintptr_t)stack_offset));
452 tcache_arena_associate(tsd_tsdn(tsd), tcache, arena_ichoose(tsd, NULL));
458 tcache_flush_cache(tsd_t *tsd, tcache_t *tcache) {
459 assert(tcache->arena != NULL);
461 for (unsigned i = 0; i < NBINS; i++) {
462 cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
463 tcache_bin_flush_small(tsd, tcache, tbin, i, 0);
466 assert(tbin->tstats.nrequests == 0);
469 for (unsigned i = NBINS; i < nhbins; i++) {
470 cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
471 tcache_bin_flush_large(tsd, tbin, i, 0, tcache);
474 assert(tbin->tstats.nrequests == 0);
478 if (config_prof && tcache->prof_accumbytes > 0 &&
479 arena_prof_accum(tsd_tsdn(tsd), tcache->arena,
480 tcache->prof_accumbytes)) {
481 prof_idump(tsd_tsdn(tsd));
486 tcache_flush(tsd_t *tsd) {
487 assert(tcache_available(tsd));
488 tcache_flush_cache(tsd, tsd_tcachep_get(tsd));
492 tcache_destroy(tsd_t *tsd, tcache_t *tcache, bool tsd_tcache) {
493 tcache_flush_cache(tsd, tcache);
494 tcache_arena_dissociate(tsd_tsdn(tsd), tcache);
497 /* Release the avail array for the TSD embedded auto tcache. */
499 (void *)((uintptr_t)tcache_small_bin_get(tcache, 0)->avail -
500 (uintptr_t)tcache_bin_info[0].ncached_max * sizeof(void *));
501 idalloctm(tsd_tsdn(tsd), avail_array, NULL, NULL, true, true);
503 /* Release both the tcache struct and avail array. */
504 idalloctm(tsd_tsdn(tsd), tcache, NULL, NULL, true, true);
508 /* For auto tcache (embedded in TSD) only. */
510 tcache_cleanup(tsd_t *tsd) {
511 tcache_t *tcache = tsd_tcachep_get(tsd);
512 if (!tcache_available(tsd)) {
513 assert(tsd_tcache_enabled_get(tsd) == false);
515 assert(tcache_small_bin_get(tcache, 0)->avail == NULL);
519 assert(tsd_tcache_enabled_get(tsd));
520 assert(tcache_small_bin_get(tcache, 0)->avail != NULL);
522 tcache_destroy(tsd, tcache, true);
524 tcache_small_bin_get(tcache, 0)->avail = NULL;
529 tcache_stats_merge(tsdn_t *tsdn, tcache_t *tcache, arena_t *arena) {
532 cassert(config_stats);
534 /* Merge and reset tcache stats. */
535 for (i = 0; i < NBINS; i++) {
536 bin_t *bin = &arena->bins[i];
537 cache_bin_t *tbin = tcache_small_bin_get(tcache, i);
538 malloc_mutex_lock(tsdn, &bin->lock);
539 bin->stats.nrequests += tbin->tstats.nrequests;
540 malloc_mutex_unlock(tsdn, &bin->lock);
541 tbin->tstats.nrequests = 0;
544 for (; i < nhbins; i++) {
545 cache_bin_t *tbin = tcache_large_bin_get(tcache, i);
546 arena_stats_large_nrequests_add(tsdn, &arena->stats, i,
547 tbin->tstats.nrequests);
548 tbin->tstats.nrequests = 0;
553 tcaches_create_prep(tsd_t *tsd) {
556 malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
558 if (tcaches == NULL) {
559 tcaches = base_alloc(tsd_tsdn(tsd), b0get(), sizeof(tcache_t *)
560 * (MALLOCX_TCACHE_MAX+1), CACHELINE);
561 if (tcaches == NULL) {
567 if (tcaches_avail == NULL && tcaches_past > MALLOCX_TCACHE_MAX) {
574 malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
579 tcaches_create(tsd_t *tsd, unsigned *r_ind) {
580 witness_assert_depth(tsdn_witness_tsdp_get(tsd_tsdn(tsd)), 0);
584 if (tcaches_create_prep(tsd)) {
589 tcache_t *tcache = tcache_create_explicit(tsd);
590 if (tcache == NULL) {
596 malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
597 if (tcaches_avail != NULL) {
599 tcaches_avail = tcaches_avail->next;
600 elm->tcache = tcache;
601 *r_ind = (unsigned)(elm - tcaches);
603 elm = &tcaches[tcaches_past];
604 elm->tcache = tcache;
605 *r_ind = tcaches_past;
608 malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
612 witness_assert_depth(tsdn_witness_tsdp_get(tsd_tsdn(tsd)), 0);
617 tcaches_elm_remove(tsd_t *tsd, tcaches_t *elm) {
618 malloc_mutex_assert_owner(tsd_tsdn(tsd), &tcaches_mtx);
620 if (elm->tcache == NULL) {
623 tcache_t *tcache = elm->tcache;
629 tcaches_flush(tsd_t *tsd, unsigned ind) {
630 malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
631 tcache_t *tcache = tcaches_elm_remove(tsd, &tcaches[ind]);
632 malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
633 if (tcache != NULL) {
634 tcache_destroy(tsd, tcache, false);
639 tcaches_destroy(tsd_t *tsd, unsigned ind) {
640 malloc_mutex_lock(tsd_tsdn(tsd), &tcaches_mtx);
641 tcaches_t *elm = &tcaches[ind];
642 tcache_t *tcache = tcaches_elm_remove(tsd, elm);
643 elm->next = tcaches_avail;
645 malloc_mutex_unlock(tsd_tsdn(tsd), &tcaches_mtx);
646 if (tcache != NULL) {
647 tcache_destroy(tsd, tcache, false);
652 tcache_boot(tsdn_t *tsdn) {
653 /* If necessary, clamp opt_lg_tcache_max. */
654 if (opt_lg_tcache_max < 0 || (ZU(1) << opt_lg_tcache_max) <
656 tcache_maxclass = SMALL_MAXCLASS;
658 tcache_maxclass = (ZU(1) << opt_lg_tcache_max);
661 if (malloc_mutex_init(&tcaches_mtx, "tcaches", WITNESS_RANK_TCACHES,
662 malloc_mutex_rank_exclusive)) {
666 nhbins = sz_size2index(tcache_maxclass) + 1;
668 /* Initialize tcache_bin_info. */
669 tcache_bin_info = (cache_bin_info_t *)base_alloc(tsdn, b0get(), nhbins
670 * sizeof(cache_bin_info_t), CACHELINE);
671 if (tcache_bin_info == NULL) {
676 for (i = 0; i < NBINS; i++) {
677 if ((bin_infos[i].nregs << 1) <= TCACHE_NSLOTS_SMALL_MIN) {
678 tcache_bin_info[i].ncached_max =
679 TCACHE_NSLOTS_SMALL_MIN;
680 } else if ((bin_infos[i].nregs << 1) <=
681 TCACHE_NSLOTS_SMALL_MAX) {
682 tcache_bin_info[i].ncached_max =
683 (bin_infos[i].nregs << 1);
685 tcache_bin_info[i].ncached_max =
686 TCACHE_NSLOTS_SMALL_MAX;
688 stack_nelms += tcache_bin_info[i].ncached_max;
690 for (; i < nhbins; i++) {
691 tcache_bin_info[i].ncached_max = TCACHE_NSLOTS_LARGE;
692 stack_nelms += tcache_bin_info[i].ncached_max;
699 tcache_prefork(tsdn_t *tsdn) {
700 if (!config_prof && opt_tcache) {
701 malloc_mutex_prefork(tsdn, &tcaches_mtx);
706 tcache_postfork_parent(tsdn_t *tsdn) {
707 if (!config_prof && opt_tcache) {
708 malloc_mutex_postfork_parent(tsdn, &tcaches_mtx);
713 tcache_postfork_child(tsdn_t *tsdn) {
714 if (!config_prof && opt_tcache) {
715 malloc_mutex_postfork_child(tsdn, &tcaches_mtx);