1 #define JEMALLOC_PROF_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3 #include "jemalloc/internal/jemalloc_internal_includes.h"
5 #include "jemalloc/internal/assert.h"
6 #include "jemalloc/internal/ckh.h"
7 #include "jemalloc/internal/hash.h"
8 #include "jemalloc/internal/malloc_io.h"
9 #include "jemalloc/internal/mutex.h"
10 #include "jemalloc/internal/emitter.h"
12 /******************************************************************************/
14 #ifdef JEMALLOC_PROF_LIBUNWIND
15 #define UNW_LOCAL_ONLY
16 #include <libunwind.h>
19 #ifdef JEMALLOC_PROF_LIBGCC
21 * We have a circular dependency -- jemalloc_internal.h tells us if we should
22 * use libgcc's unwinding functionality, but after we've included that, we've
23 * already hooked _Unwind_Backtrace. We'll temporarily disable hooking.
25 #undef _Unwind_Backtrace
27 #define _Unwind_Backtrace JEMALLOC_HOOK(_Unwind_Backtrace, test_hooks_libc_hook)
30 /******************************************************************************/
33 bool opt_prof = false;
34 bool opt_prof_active = true;
35 bool opt_prof_thread_active_init = true;
36 size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
37 ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
38 bool opt_prof_gdump = false;
39 bool opt_prof_final = false;
40 bool opt_prof_leak = false;
41 bool opt_prof_accum = false;
42 bool opt_prof_log = false;
44 /* Minimize memory bloat for non-prof builds. */
51 * Initialized as opt_prof_active, and accessed via
52 * prof_active_[gs]et{_unlocked,}().
55 static malloc_mutex_t prof_active_mtx;
58 * Initialized as opt_prof_thread_active_init, and accessed via
59 * prof_thread_active_init_[gs]et().
61 static bool prof_thread_active_init;
62 static malloc_mutex_t prof_thread_active_init_mtx;
65 * Initialized as opt_prof_gdump, and accessed via
66 * prof_gdump_[gs]et{_unlocked,}().
69 static malloc_mutex_t prof_gdump_mtx;
71 uint64_t prof_interval = 0;
73 size_t lg_prof_sample;
75 typedef enum prof_logging_state_e prof_logging_state_t;
76 enum prof_logging_state_e {
77 prof_logging_state_stopped,
78 prof_logging_state_started,
79 prof_logging_state_dumping
83 * - stopped: log_start never called, or previous log_stop has completed.
84 * - started: log_start called, log_stop not called yet. Allocations are logged.
85 * - dumping: log_stop called but not finished; samples are not logged anymore.
87 prof_logging_state_t prof_logging_state = prof_logging_state_stopped;
90 static bool prof_log_dummy = false;
93 /* Incremented for every log file that is output. */
94 static uint64_t log_seq = 0;
95 static char log_filename[
96 /* Minimize memory bloat for non-prof builds. */
102 /* Timestamp for most recent call to log_start(). */
103 static nstime_t log_start_timestamp = NSTIME_ZERO_INITIALIZER;
105 /* Increment these when adding to the log_bt and log_thr linked lists. */
106 static size_t log_bt_index = 0;
107 static size_t log_thr_index = 0;
109 /* Linked list node definitions. These are only used in prof.c. */
110 typedef struct prof_bt_node_s prof_bt_node_t;
112 struct prof_bt_node_s {
113 prof_bt_node_t *next;
116 /* Variable size backtrace vector pointed to by bt. */
120 typedef struct prof_thr_node_s prof_thr_node_t;
122 struct prof_thr_node_s {
123 prof_thr_node_t *next;
126 /* Variable size based on thr_name_sz. */
130 typedef struct prof_alloc_node_s prof_alloc_node_t;
132 /* This is output when logging sampled allocations. */
133 struct prof_alloc_node_s {
134 prof_alloc_node_t *next;
135 /* Indices into an array of thread data. */
136 size_t alloc_thr_ind;
139 /* Indices into an array of backtraces. */
143 uint64_t alloc_time_ns;
144 uint64_t free_time_ns;
150 * Created on the first call to prof_log_start and deleted on prof_log_stop.
151 * These are the backtraces and threads that have already been logged by an
154 static bool log_tables_initialized = false;
155 static ckh_t log_bt_node_set;
156 static ckh_t log_thr_node_set;
158 /* Store linked lists for logged data. */
159 static prof_bt_node_t *log_bt_first = NULL;
160 static prof_bt_node_t *log_bt_last = NULL;
161 static prof_thr_node_t *log_thr_first = NULL;
162 static prof_thr_node_t *log_thr_last = NULL;
163 static prof_alloc_node_t *log_alloc_first = NULL;
164 static prof_alloc_node_t *log_alloc_last = NULL;
166 /* Protects the prof_logging_state and any log_{...} variable. */
167 static malloc_mutex_t log_mtx;
170 * Table of mutexes that are shared among gctx's. These are leaf locks, so
171 * there is no problem with using them for more than one gctx at the same time.
172 * The primary motivation for this sharing though is that gctx's are ephemeral,
173 * and destroying mutexes causes complications for systems that allocate when
174 * creating/destroying mutexes.
176 static malloc_mutex_t *gctx_locks;
177 static atomic_u_t cum_gctxs; /* Atomic counter. */
180 * Table of mutexes that are shared among tdata's. No operations require
181 * holding multiple tdata locks, so there is no problem with using them for more
182 * than one tdata at the same time, even though a gctx lock may be acquired
183 * while holding a tdata lock.
185 static malloc_mutex_t *tdata_locks;
188 * Global hash of (prof_bt_t *)-->(prof_gctx_t *). This is the master data
189 * structure that knows about all backtraces currently captured.
191 static ckh_t bt2gctx;
192 /* Non static to enable profiling. */
193 malloc_mutex_t bt2gctx_mtx;
196 * Tree of all extant prof_tdata_t structures, regardless of state,
197 * {attached,detached,expired}.
199 static prof_tdata_tree_t tdatas;
200 static malloc_mutex_t tdatas_mtx;
202 static uint64_t next_thr_uid;
203 static malloc_mutex_t next_thr_uid_mtx;
205 static malloc_mutex_t prof_dump_seq_mtx;
206 static uint64_t prof_dump_seq;
207 static uint64_t prof_dump_iseq;
208 static uint64_t prof_dump_mseq;
209 static uint64_t prof_dump_useq;
212 * This buffer is rather large for stack allocation, so use a single buffer for
215 static malloc_mutex_t prof_dump_mtx;
216 static char prof_dump_buf[
217 /* Minimize memory bloat for non-prof builds. */
224 static size_t prof_dump_buf_end;
225 static int prof_dump_fd;
227 /* Do not dump any profiles until bootstrapping is complete. */
228 static bool prof_booted = false;
230 /******************************************************************************/
232 * Function prototypes for static functions that are referenced prior to
236 static bool prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx);
237 static void prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx);
238 static bool prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
239 bool even_if_attached);
240 static void prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata,
241 bool even_if_attached);
242 static char *prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name);
244 /* Hashtable functions for log_bt_node_set and log_thr_node_set. */
245 static void prof_thr_node_hash(const void *key, size_t r_hash[2]);
246 static bool prof_thr_node_keycomp(const void *k1, const void *k2);
247 static void prof_bt_node_hash(const void *key, size_t r_hash[2]);
248 static bool prof_bt_node_keycomp(const void *k1, const void *k2);
250 /******************************************************************************/
251 /* Red-black trees. */
254 prof_tctx_comp(const prof_tctx_t *a, const prof_tctx_t *b) {
255 uint64_t a_thr_uid = a->thr_uid;
256 uint64_t b_thr_uid = b->thr_uid;
257 int ret = (a_thr_uid > b_thr_uid) - (a_thr_uid < b_thr_uid);
259 uint64_t a_thr_discrim = a->thr_discrim;
260 uint64_t b_thr_discrim = b->thr_discrim;
261 ret = (a_thr_discrim > b_thr_discrim) - (a_thr_discrim <
264 uint64_t a_tctx_uid = a->tctx_uid;
265 uint64_t b_tctx_uid = b->tctx_uid;
266 ret = (a_tctx_uid > b_tctx_uid) - (a_tctx_uid <
273 rb_gen(static UNUSED, tctx_tree_, prof_tctx_tree_t, prof_tctx_t,
274 tctx_link, prof_tctx_comp)
277 prof_gctx_comp(const prof_gctx_t *a, const prof_gctx_t *b) {
278 unsigned a_len = a->bt.len;
279 unsigned b_len = b->bt.len;
280 unsigned comp_len = (a_len < b_len) ? a_len : b_len;
281 int ret = memcmp(a->bt.vec, b->bt.vec, comp_len * sizeof(void *));
283 ret = (a_len > b_len) - (a_len < b_len);
288 rb_gen(static UNUSED, gctx_tree_, prof_gctx_tree_t, prof_gctx_t, dump_link,
292 prof_tdata_comp(const prof_tdata_t *a, const prof_tdata_t *b) {
294 uint64_t a_uid = a->thr_uid;
295 uint64_t b_uid = b->thr_uid;
297 ret = ((a_uid > b_uid) - (a_uid < b_uid));
299 uint64_t a_discrim = a->thr_discrim;
300 uint64_t b_discrim = b->thr_discrim;
302 ret = ((a_discrim > b_discrim) - (a_discrim < b_discrim));
307 rb_gen(static UNUSED, tdata_tree_, prof_tdata_tree_t, prof_tdata_t, tdata_link,
310 /******************************************************************************/
313 prof_alloc_rollback(tsd_t *tsd, prof_tctx_t *tctx, bool updated) {
316 cassert(config_prof);
320 * Compute a new sample threshold. This isn't very important in
321 * practice, because this function is rarely executed, so the
322 * potential for sample bias is minimal except in contrived
325 tdata = prof_tdata_get(tsd, true);
327 prof_sample_threshold_update(tdata);
331 if ((uintptr_t)tctx > (uintptr_t)1U) {
332 malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
333 tctx->prepared = false;
334 if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
335 prof_tctx_destroy(tsd, tctx);
337 malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
343 prof_malloc_sample_object(tsdn_t *tsdn, const void *ptr, size_t usize,
345 prof_tctx_set(tsdn, ptr, usize, NULL, tctx);
347 /* Get the current time and set this in the extent_t. We'll read this
348 * when free() is called. */
349 nstime_t t = NSTIME_ZERO_INITIALIZER;
351 prof_alloc_time_set(tsdn, ptr, NULL, t);
353 malloc_mutex_lock(tsdn, tctx->tdata->lock);
354 tctx->cnts.curobjs++;
355 tctx->cnts.curbytes += usize;
356 if (opt_prof_accum) {
357 tctx->cnts.accumobjs++;
358 tctx->cnts.accumbytes += usize;
360 tctx->prepared = false;
361 malloc_mutex_unlock(tsdn, tctx->tdata->lock);
365 prof_log_bt_index(tsd_t *tsd, prof_bt_t *bt) {
366 assert(prof_logging_state == prof_logging_state_started);
367 malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
369 prof_bt_node_t dummy_node;
371 prof_bt_node_t *node;
373 /* See if this backtrace is already cached in the table. */
374 if (ckh_search(&log_bt_node_set, (void *)(&dummy_node),
375 (void **)(&node), NULL)) {
376 size_t sz = offsetof(prof_bt_node_t, vec) +
377 (bt->len * sizeof(void *));
378 prof_bt_node_t *new_node = (prof_bt_node_t *)
379 iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
380 true, arena_get(TSDN_NULL, 0, true), true);
381 if (log_bt_first == NULL) {
382 log_bt_first = new_node;
383 log_bt_last = new_node;
385 log_bt_last->next = new_node;
386 log_bt_last = new_node;
389 new_node->next = NULL;
390 new_node->index = log_bt_index;
392 * Copy the backtrace: bt is inside a tdata or gctx, which
393 * might die before prof_log_stop is called.
395 new_node->bt.len = bt->len;
396 memcpy(new_node->vec, bt->vec, bt->len * sizeof(void *));
397 new_node->bt.vec = new_node->vec;
400 ckh_insert(tsd, &log_bt_node_set, (void *)new_node, NULL);
401 return new_node->index;
407 prof_log_thr_index(tsd_t *tsd, uint64_t thr_uid, const char *name) {
408 assert(prof_logging_state == prof_logging_state_started);
409 malloc_mutex_assert_owner(tsd_tsdn(tsd), &log_mtx);
411 prof_thr_node_t dummy_node;
412 dummy_node.thr_uid = thr_uid;
413 prof_thr_node_t *node;
415 /* See if this thread is already cached in the table. */
416 if (ckh_search(&log_thr_node_set, (void *)(&dummy_node),
417 (void **)(&node), NULL)) {
418 size_t sz = offsetof(prof_thr_node_t, name) + strlen(name) + 1;
419 prof_thr_node_t *new_node = (prof_thr_node_t *)
420 iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL,
421 true, arena_get(TSDN_NULL, 0, true), true);
422 if (log_thr_first == NULL) {
423 log_thr_first = new_node;
424 log_thr_last = new_node;
426 log_thr_last->next = new_node;
427 log_thr_last = new_node;
430 new_node->next = NULL;
431 new_node->index = log_thr_index;
432 new_node->thr_uid = thr_uid;
433 strcpy(new_node->name, name);
436 ckh_insert(tsd, &log_thr_node_set, (void *)new_node, NULL);
437 return new_node->index;
444 prof_try_log(tsd_t *tsd, const void *ptr, size_t usize, prof_tctx_t *tctx) {
445 malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
447 prof_tdata_t *cons_tdata = prof_tdata_get(tsd, false);
448 if (cons_tdata == NULL) {
450 * We decide not to log these allocations. cons_tdata will be
451 * NULL only when the current thread is in a weird state (e.g.
452 * it's being destroyed).
457 malloc_mutex_lock(tsd_tsdn(tsd), &log_mtx);
459 if (prof_logging_state != prof_logging_state_started) {
463 if (!log_tables_initialized) {
464 bool err1 = ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
465 prof_bt_node_hash, prof_bt_node_keycomp);
466 bool err2 = ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
467 prof_thr_node_hash, prof_thr_node_keycomp);
471 log_tables_initialized = true;
474 nstime_t alloc_time = prof_alloc_time_get(tsd_tsdn(tsd), ptr,
475 (alloc_ctx_t *)NULL);
476 nstime_t free_time = NSTIME_ZERO_INITIALIZER;
477 nstime_update(&free_time);
479 size_t sz = sizeof(prof_alloc_node_t);
480 prof_alloc_node_t *new_node = (prof_alloc_node_t *)
481 iallocztm(tsd_tsdn(tsd), sz, sz_size2index(sz), false, NULL, true,
482 arena_get(TSDN_NULL, 0, true), true);
484 const char *prod_thr_name = (tctx->tdata->thread_name == NULL)?
485 "" : tctx->tdata->thread_name;
486 const char *cons_thr_name = prof_thread_name_get(tsd);
489 /* Initialize the backtrace, using the buffer in tdata to store it. */
490 bt_init(&bt, cons_tdata->vec);
492 prof_bt_t *cons_bt = &bt;
494 /* We haven't destroyed tctx yet, so gctx should be good to read. */
495 prof_bt_t *prod_bt = &tctx->gctx->bt;
497 new_node->next = NULL;
498 new_node->alloc_thr_ind = prof_log_thr_index(tsd, tctx->tdata->thr_uid,
500 new_node->free_thr_ind = prof_log_thr_index(tsd, cons_tdata->thr_uid,
502 new_node->alloc_bt_ind = prof_log_bt_index(tsd, prod_bt);
503 new_node->free_bt_ind = prof_log_bt_index(tsd, cons_bt);
504 new_node->alloc_time_ns = nstime_ns(&alloc_time);
505 new_node->free_time_ns = nstime_ns(&free_time);
506 new_node->usize = usize;
508 if (log_alloc_first == NULL) {
509 log_alloc_first = new_node;
510 log_alloc_last = new_node;
512 log_alloc_last->next = new_node;
513 log_alloc_last = new_node;
517 malloc_mutex_unlock(tsd_tsdn(tsd), &log_mtx);
521 prof_free_sampled_object(tsd_t *tsd, const void *ptr, size_t usize,
523 malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock);
525 assert(tctx->cnts.curobjs > 0);
526 assert(tctx->cnts.curbytes >= usize);
527 tctx->cnts.curobjs--;
528 tctx->cnts.curbytes -= usize;
530 prof_try_log(tsd, ptr, usize, tctx);
532 if (prof_tctx_should_destroy(tsd_tsdn(tsd), tctx)) {
533 prof_tctx_destroy(tsd, tctx);
535 malloc_mutex_unlock(tsd_tsdn(tsd), tctx->tdata->lock);
540 bt_init(prof_bt_t *bt, void **vec) {
541 cassert(config_prof);
548 prof_enter(tsd_t *tsd, prof_tdata_t *tdata) {
549 cassert(config_prof);
550 assert(tdata == prof_tdata_get(tsd, false));
557 malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
561 prof_leave(tsd_t *tsd, prof_tdata_t *tdata) {
562 cassert(config_prof);
563 assert(tdata == prof_tdata_get(tsd, false));
565 malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
572 idump = tdata->enq_idump;
573 tdata->enq_idump = false;
574 gdump = tdata->enq_gdump;
575 tdata->enq_gdump = false;
578 prof_idump(tsd_tsdn(tsd));
581 prof_gdump(tsd_tsdn(tsd));
586 #ifdef JEMALLOC_PROF_LIBUNWIND
588 prof_backtrace(prof_bt_t *bt) {
591 cassert(config_prof);
592 assert(bt->len == 0);
593 assert(bt->vec != NULL);
595 nframes = unw_backtrace(bt->vec, PROF_BT_MAX);
601 #elif (defined(JEMALLOC_PROF_LIBGCC))
602 static _Unwind_Reason_Code
603 prof_unwind_init_callback(struct _Unwind_Context *context, void *arg) {
604 cassert(config_prof);
606 return _URC_NO_REASON;
609 static _Unwind_Reason_Code
610 prof_unwind_callback(struct _Unwind_Context *context, void *arg) {
611 prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
614 cassert(config_prof);
616 ip = (void *)_Unwind_GetIP(context);
618 return _URC_END_OF_STACK;
620 data->bt->vec[data->bt->len] = ip;
622 if (data->bt->len == data->max) {
623 return _URC_END_OF_STACK;
626 return _URC_NO_REASON;
630 prof_backtrace(prof_bt_t *bt) {
631 prof_unwind_data_t data = {bt, PROF_BT_MAX};
633 cassert(config_prof);
635 _Unwind_Backtrace(prof_unwind_callback, &data);
637 #elif (defined(JEMALLOC_PROF_GCC))
639 prof_backtrace(prof_bt_t *bt) {
640 #define BT_FRAME(i) \
641 if ((i) < PROF_BT_MAX) { \
643 if (__builtin_frame_address(i) == 0) { \
646 p = __builtin_return_address(i); \
656 cassert(config_prof);
802 prof_backtrace(prof_bt_t *bt) {
803 cassert(config_prof);
808 static malloc_mutex_t *
809 prof_gctx_mutex_choose(void) {
810 unsigned ngctxs = atomic_fetch_add_u(&cum_gctxs, 1, ATOMIC_RELAXED);
812 return &gctx_locks[(ngctxs - 1) % PROF_NCTX_LOCKS];
815 static malloc_mutex_t *
816 prof_tdata_mutex_choose(uint64_t thr_uid) {
817 return &tdata_locks[thr_uid % PROF_NTDATA_LOCKS];
821 prof_gctx_create(tsdn_t *tsdn, prof_bt_t *bt) {
823 * Create a single allocation that has space for vec of length bt->len.
825 size_t size = offsetof(prof_gctx_t, vec) + (bt->len * sizeof(void *));
826 prof_gctx_t *gctx = (prof_gctx_t *)iallocztm(tsdn, size,
827 sz_size2index(size), false, NULL, true, arena_get(TSDN_NULL, 0, true),
832 gctx->lock = prof_gctx_mutex_choose();
834 * Set nlimbo to 1, in order to avoid a race condition with
835 * prof_tctx_destroy()/prof_gctx_try_destroy().
838 tctx_tree_new(&gctx->tctxs);
840 memcpy(gctx->vec, bt->vec, bt->len * sizeof(void *));
841 gctx->bt.vec = gctx->vec;
842 gctx->bt.len = bt->len;
847 prof_gctx_try_destroy(tsd_t *tsd, prof_tdata_t *tdata_self, prof_gctx_t *gctx,
848 prof_tdata_t *tdata) {
849 cassert(config_prof);
852 * Check that gctx is still unused by any thread cache before destroying
853 * it. prof_lookup() increments gctx->nlimbo in order to avoid a race
854 * condition with this function, as does prof_tctx_destroy() in order to
855 * avoid a race between the main body of prof_tctx_destroy() and entry
856 * into this function.
858 prof_enter(tsd, tdata_self);
859 malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
860 assert(gctx->nlimbo != 0);
861 if (tctx_tree_empty(&gctx->tctxs) && gctx->nlimbo == 1) {
862 /* Remove gctx from bt2gctx. */
863 if (ckh_remove(tsd, &bt2gctx, &gctx->bt, NULL, NULL)) {
866 prof_leave(tsd, tdata_self);
868 malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
869 idalloctm(tsd_tsdn(tsd), gctx, NULL, NULL, true, true);
872 * Compensate for increment in prof_tctx_destroy() or
876 malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
877 prof_leave(tsd, tdata_self);
882 prof_tctx_should_destroy(tsdn_t *tsdn, prof_tctx_t *tctx) {
883 malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
885 if (opt_prof_accum) {
888 if (tctx->cnts.curobjs != 0) {
891 if (tctx->prepared) {
898 prof_gctx_should_destroy(prof_gctx_t *gctx) {
899 if (opt_prof_accum) {
902 if (!tctx_tree_empty(&gctx->tctxs)) {
905 if (gctx->nlimbo != 0) {
912 prof_tctx_destroy(tsd_t *tsd, prof_tctx_t *tctx) {
913 prof_tdata_t *tdata = tctx->tdata;
914 prof_gctx_t *gctx = tctx->gctx;
915 bool destroy_tdata, destroy_tctx, destroy_gctx;
917 malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock);
919 assert(tctx->cnts.curobjs == 0);
920 assert(tctx->cnts.curbytes == 0);
921 assert(!opt_prof_accum);
922 assert(tctx->cnts.accumobjs == 0);
923 assert(tctx->cnts.accumbytes == 0);
925 ckh_remove(tsd, &tdata->bt2tctx, &gctx->bt, NULL, NULL);
926 destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata, false);
927 malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
929 malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
930 switch (tctx->state) {
931 case prof_tctx_state_nominal:
932 tctx_tree_remove(&gctx->tctxs, tctx);
934 if (prof_gctx_should_destroy(gctx)) {
936 * Increment gctx->nlimbo in order to keep another
937 * thread from winning the race to destroy gctx while
938 * this one has gctx->lock dropped. Without this, it
939 * would be possible for another thread to:
941 * 1) Sample an allocation associated with gctx.
942 * 2) Deallocate the sampled object.
943 * 3) Successfully prof_gctx_try_destroy(gctx).
945 * The result would be that gctx no longer exists by the
946 * time this thread accesses it in
947 * prof_gctx_try_destroy().
952 destroy_gctx = false;
955 case prof_tctx_state_dumping:
957 * A dumping thread needs tctx to remain valid until dumping
958 * has finished. Change state such that the dumping thread will
959 * complete destruction during a late dump iteration phase.
961 tctx->state = prof_tctx_state_purgatory;
962 destroy_tctx = false;
963 destroy_gctx = false;
967 destroy_tctx = false;
968 destroy_gctx = false;
970 malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
972 prof_gctx_try_destroy(tsd, prof_tdata_get(tsd, false), gctx,
976 malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock);
979 prof_tdata_destroy(tsd, tdata, false);
983 idalloctm(tsd_tsdn(tsd), tctx, NULL, NULL, true, true);
988 prof_lookup_global(tsd_t *tsd, prof_bt_t *bt, prof_tdata_t *tdata,
989 void **p_btkey, prof_gctx_t **p_gctx, bool *p_new_gctx) {
1000 prof_enter(tsd, tdata);
1001 if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
1002 /* bt has never been seen before. Insert it. */
1003 prof_leave(tsd, tdata);
1004 tgctx.p = prof_gctx_create(tsd_tsdn(tsd), bt);
1005 if (tgctx.v == NULL) {
1008 prof_enter(tsd, tdata);
1009 if (ckh_search(&bt2gctx, bt, &btkey.v, &gctx.v)) {
1011 btkey.p = &gctx.p->bt;
1012 if (ckh_insert(tsd, &bt2gctx, btkey.v, gctx.v)) {
1014 prof_leave(tsd, tdata);
1015 idalloctm(tsd_tsdn(tsd), gctx.v, NULL, NULL,
1030 * Increment nlimbo, in order to avoid a race condition with
1031 * prof_tctx_destroy()/prof_gctx_try_destroy().
1033 malloc_mutex_lock(tsd_tsdn(tsd), gctx.p->lock);
1035 malloc_mutex_unlock(tsd_tsdn(tsd), gctx.p->lock);
1038 if (tgctx.v != NULL) {
1039 /* Lost race to insert. */
1040 idalloctm(tsd_tsdn(tsd), tgctx.v, NULL, NULL, true,
1044 prof_leave(tsd, tdata);
1048 *p_new_gctx = new_gctx;
1053 prof_lookup(tsd_t *tsd, prof_bt_t *bt) {
1058 prof_tdata_t *tdata;
1061 cassert(config_prof);
1063 tdata = prof_tdata_get(tsd, false);
1064 if (tdata == NULL) {
1068 malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
1069 not_found = ckh_search(&tdata->bt2tctx, bt, NULL, &ret.v);
1070 if (!not_found) { /* Note double negative! */
1071 ret.p->prepared = true;
1073 malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
1077 bool new_gctx, error;
1080 * This thread's cache lacks bt. Look for it in the global
1083 if (prof_lookup_global(tsd, bt, tdata, &btkey, &gctx,
1088 /* Link a prof_tctx_t into gctx for this thread. */
1089 ret.v = iallocztm(tsd_tsdn(tsd), sizeof(prof_tctx_t),
1090 sz_size2index(sizeof(prof_tctx_t)), false, NULL, true,
1091 arena_ichoose(tsd, NULL), true);
1092 if (ret.p == NULL) {
1094 prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
1098 ret.p->tdata = tdata;
1099 ret.p->thr_uid = tdata->thr_uid;
1100 ret.p->thr_discrim = tdata->thr_discrim;
1101 memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
1103 ret.p->tctx_uid = tdata->tctx_uid_next++;
1104 ret.p->prepared = true;
1105 ret.p->state = prof_tctx_state_initializing;
1106 malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
1107 error = ckh_insert(tsd, &tdata->bt2tctx, btkey, ret.v);
1108 malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
1111 prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
1113 idalloctm(tsd_tsdn(tsd), ret.v, NULL, NULL, true, true);
1116 malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
1117 ret.p->state = prof_tctx_state_nominal;
1118 tctx_tree_insert(&gctx->tctxs, ret.p);
1120 malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
1127 * The bodies of this function and prof_leakcheck() are compiled out unless heap
1128 * profiling is enabled, so that it is possible to compile jemalloc with
1129 * floating point support completely disabled. Avoiding floating point code is
1130 * important on memory-constrained systems, but it also enables a workaround for
1131 * versions of glibc that don't properly save/restore floating point registers
1132 * during dynamic lazy symbol loading (which internally calls into whatever
1133 * malloc implementation happens to be integrated into the application). Note
1134 * that some compilers (e.g. gcc 4.8) may use floating point registers for fast
1135 * memory moves, so jemalloc must be compiled with such optimizations disabled
1137 * -mno-sse) in order for the workaround to be complete.
1140 prof_sample_threshold_update(prof_tdata_t *tdata) {
1141 #ifdef JEMALLOC_PROF
1146 if (lg_prof_sample == 0) {
1147 tsd_bytes_until_sample_set(tsd_fetch(), 0);
1152 * Compute sample interval as a geometrically distributed random
1153 * variable with mean (2^lg_prof_sample).
1157 * tdata->bytes_until_sample = | -------- |, where p = ---------------
1158 * | log(1-p) | lg_prof_sample
1161 * For more information on the math, see:
1163 * Non-Uniform Random Variate Generation
1165 * Springer-Verlag, New York, 1986
1167 * (http://luc.devroye.org/rnbookindex.html)
1169 uint64_t r = prng_lg_range_u64(&tdata->prng_state, 53);
1170 double u = (double)r * (1.0/9007199254740992.0L);
1171 uint64_t bytes_until_sample = (uint64_t)(log(u) /
1172 log(1.0 - (1.0 / (double)((uint64_t)1U << lg_prof_sample))))
1174 if (bytes_until_sample > SSIZE_MAX) {
1175 bytes_until_sample = SSIZE_MAX;
1177 tsd_bytes_until_sample_set(tsd_fetch(), bytes_until_sample);
1183 static prof_tdata_t *
1184 prof_tdata_count_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
1186 size_t *tdata_count = (size_t *)arg;
1194 prof_tdata_count(void) {
1195 size_t tdata_count = 0;
1198 tsdn = tsdn_fetch();
1199 malloc_mutex_lock(tsdn, &tdatas_mtx);
1200 tdata_tree_iter(&tdatas, NULL, prof_tdata_count_iter,
1201 (void *)&tdata_count);
1202 malloc_mutex_unlock(tsdn, &tdatas_mtx);
1208 prof_bt_count(void) {
1211 prof_tdata_t *tdata;
1214 tdata = prof_tdata_get(tsd, false);
1215 if (tdata == NULL) {
1219 malloc_mutex_lock(tsd_tsdn(tsd), &bt2gctx_mtx);
1220 bt_count = ckh_count(&bt2gctx);
1221 malloc_mutex_unlock(tsd_tsdn(tsd), &bt2gctx_mtx);
1228 prof_dump_open_impl(bool propagate_err, const char *filename) {
1231 fd = creat(filename, 0644);
1232 if (fd == -1 && !propagate_err) {
1233 malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
1242 prof_dump_open_t *JET_MUTABLE prof_dump_open = prof_dump_open_impl;
1245 prof_dump_flush(bool propagate_err) {
1249 cassert(config_prof);
1251 err = malloc_write_fd(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
1253 if (!propagate_err) {
1254 malloc_write("<jemalloc>: write() failed during heap "
1262 prof_dump_buf_end = 0;
1268 prof_dump_close(bool propagate_err) {
1271 assert(prof_dump_fd != -1);
1272 ret = prof_dump_flush(propagate_err);
1273 close(prof_dump_fd);
1280 prof_dump_write(bool propagate_err, const char *s) {
1283 cassert(config_prof);
1288 /* Flush the buffer if it is full. */
1289 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
1290 if (prof_dump_flush(propagate_err) && propagate_err) {
1295 if (prof_dump_buf_end + slen - i <= PROF_DUMP_BUFSIZE) {
1296 /* Finish writing. */
1299 /* Write as much of s as will fit. */
1300 n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
1302 memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
1303 prof_dump_buf_end += n;
1311 JEMALLOC_FORMAT_PRINTF(2, 3)
1313 prof_dump_printf(bool propagate_err, const char *format, ...) {
1316 char buf[PROF_PRINTF_BUFSIZE];
1318 va_start(ap, format);
1319 malloc_vsnprintf(buf, sizeof(buf), format, ap);
1321 ret = prof_dump_write(propagate_err, buf);
1327 prof_tctx_merge_tdata(tsdn_t *tsdn, prof_tctx_t *tctx, prof_tdata_t *tdata) {
1328 malloc_mutex_assert_owner(tsdn, tctx->tdata->lock);
1330 malloc_mutex_lock(tsdn, tctx->gctx->lock);
1332 switch (tctx->state) {
1333 case prof_tctx_state_initializing:
1334 malloc_mutex_unlock(tsdn, tctx->gctx->lock);
1336 case prof_tctx_state_nominal:
1337 tctx->state = prof_tctx_state_dumping;
1338 malloc_mutex_unlock(tsdn, tctx->gctx->lock);
1340 memcpy(&tctx->dump_cnts, &tctx->cnts, sizeof(prof_cnt_t));
1342 tdata->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
1343 tdata->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
1344 if (opt_prof_accum) {
1345 tdata->cnt_summed.accumobjs +=
1346 tctx->dump_cnts.accumobjs;
1347 tdata->cnt_summed.accumbytes +=
1348 tctx->dump_cnts.accumbytes;
1351 case prof_tctx_state_dumping:
1352 case prof_tctx_state_purgatory:
1358 prof_tctx_merge_gctx(tsdn_t *tsdn, prof_tctx_t *tctx, prof_gctx_t *gctx) {
1359 malloc_mutex_assert_owner(tsdn, gctx->lock);
1361 gctx->cnt_summed.curobjs += tctx->dump_cnts.curobjs;
1362 gctx->cnt_summed.curbytes += tctx->dump_cnts.curbytes;
1363 if (opt_prof_accum) {
1364 gctx->cnt_summed.accumobjs += tctx->dump_cnts.accumobjs;
1365 gctx->cnt_summed.accumbytes += tctx->dump_cnts.accumbytes;
1369 static prof_tctx_t *
1370 prof_tctx_merge_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
1371 tsdn_t *tsdn = (tsdn_t *)arg;
1373 malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
1375 switch (tctx->state) {
1376 case prof_tctx_state_nominal:
1377 /* New since dumping started; ignore. */
1379 case prof_tctx_state_dumping:
1380 case prof_tctx_state_purgatory:
1381 prof_tctx_merge_gctx(tsdn, tctx, tctx->gctx);
1390 struct prof_tctx_dump_iter_arg_s {
1395 static prof_tctx_t *
1396 prof_tctx_dump_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *opaque) {
1397 struct prof_tctx_dump_iter_arg_s *arg =
1398 (struct prof_tctx_dump_iter_arg_s *)opaque;
1400 malloc_mutex_assert_owner(arg->tsdn, tctx->gctx->lock);
1402 switch (tctx->state) {
1403 case prof_tctx_state_initializing:
1404 case prof_tctx_state_nominal:
1405 /* Not captured by this dump. */
1407 case prof_tctx_state_dumping:
1408 case prof_tctx_state_purgatory:
1409 if (prof_dump_printf(arg->propagate_err,
1410 " t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": "
1411 "%"FMTu64"]\n", tctx->thr_uid, tctx->dump_cnts.curobjs,
1412 tctx->dump_cnts.curbytes, tctx->dump_cnts.accumobjs,
1413 tctx->dump_cnts.accumbytes)) {
1423 static prof_tctx_t *
1424 prof_tctx_finish_iter(prof_tctx_tree_t *tctxs, prof_tctx_t *tctx, void *arg) {
1425 tsdn_t *tsdn = (tsdn_t *)arg;
1428 malloc_mutex_assert_owner(tsdn, tctx->gctx->lock);
1430 switch (tctx->state) {
1431 case prof_tctx_state_nominal:
1432 /* New since dumping started; ignore. */
1434 case prof_tctx_state_dumping:
1435 tctx->state = prof_tctx_state_nominal;
1437 case prof_tctx_state_purgatory:
1450 prof_dump_gctx_prep(tsdn_t *tsdn, prof_gctx_t *gctx, prof_gctx_tree_t *gctxs) {
1451 cassert(config_prof);
1453 malloc_mutex_lock(tsdn, gctx->lock);
1456 * Increment nlimbo so that gctx won't go away before dump.
1457 * Additionally, link gctx into the dump list so that it is included in
1458 * prof_dump()'s second pass.
1461 gctx_tree_insert(gctxs, gctx);
1463 memset(&gctx->cnt_summed, 0, sizeof(prof_cnt_t));
1465 malloc_mutex_unlock(tsdn, gctx->lock);
1468 struct prof_gctx_merge_iter_arg_s {
1473 static prof_gctx_t *
1474 prof_gctx_merge_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
1475 struct prof_gctx_merge_iter_arg_s *arg =
1476 (struct prof_gctx_merge_iter_arg_s *)opaque;
1478 malloc_mutex_lock(arg->tsdn, gctx->lock);
1479 tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_merge_iter,
1481 if (gctx->cnt_summed.curobjs != 0) {
1484 malloc_mutex_unlock(arg->tsdn, gctx->lock);
1490 prof_gctx_finish(tsd_t *tsd, prof_gctx_tree_t *gctxs) {
1491 prof_tdata_t *tdata = prof_tdata_get(tsd, false);
1495 * Standard tree iteration won't work here, because as soon as we
1496 * decrement gctx->nlimbo and unlock gctx, another thread can
1497 * concurrently destroy it, which will corrupt the tree. Therefore,
1498 * tear down the tree one node at a time during iteration.
1500 while ((gctx = gctx_tree_first(gctxs)) != NULL) {
1501 gctx_tree_remove(gctxs, gctx);
1502 malloc_mutex_lock(tsd_tsdn(tsd), gctx->lock);
1508 prof_tctx_t *to_destroy =
1509 tctx_tree_iter(&gctx->tctxs, next,
1510 prof_tctx_finish_iter,
1511 (void *)tsd_tsdn(tsd));
1512 if (to_destroy != NULL) {
1513 next = tctx_tree_next(&gctx->tctxs,
1515 tctx_tree_remove(&gctx->tctxs,
1517 idalloctm(tsd_tsdn(tsd), to_destroy,
1518 NULL, NULL, true, true);
1522 } while (next != NULL);
1525 if (prof_gctx_should_destroy(gctx)) {
1527 malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
1528 prof_gctx_try_destroy(tsd, tdata, gctx, tdata);
1530 malloc_mutex_unlock(tsd_tsdn(tsd), gctx->lock);
1535 struct prof_tdata_merge_iter_arg_s {
1540 static prof_tdata_t *
1541 prof_tdata_merge_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
1543 struct prof_tdata_merge_iter_arg_s *arg =
1544 (struct prof_tdata_merge_iter_arg_s *)opaque;
1546 malloc_mutex_lock(arg->tsdn, tdata->lock);
1547 if (!tdata->expired) {
1554 tdata->dumping = true;
1555 memset(&tdata->cnt_summed, 0, sizeof(prof_cnt_t));
1556 for (tabind = 0; !ckh_iter(&tdata->bt2tctx, &tabind, NULL,
1558 prof_tctx_merge_tdata(arg->tsdn, tctx.p, tdata);
1561 arg->cnt_all.curobjs += tdata->cnt_summed.curobjs;
1562 arg->cnt_all.curbytes += tdata->cnt_summed.curbytes;
1563 if (opt_prof_accum) {
1564 arg->cnt_all.accumobjs += tdata->cnt_summed.accumobjs;
1565 arg->cnt_all.accumbytes += tdata->cnt_summed.accumbytes;
1568 tdata->dumping = false;
1570 malloc_mutex_unlock(arg->tsdn, tdata->lock);
1575 static prof_tdata_t *
1576 prof_tdata_dump_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
1578 bool propagate_err = *(bool *)arg;
1580 if (!tdata->dumping) {
1584 if (prof_dump_printf(propagate_err,
1585 " t%"FMTu64": %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]%s%s\n",
1586 tdata->thr_uid, tdata->cnt_summed.curobjs,
1587 tdata->cnt_summed.curbytes, tdata->cnt_summed.accumobjs,
1588 tdata->cnt_summed.accumbytes,
1589 (tdata->thread_name != NULL) ? " " : "",
1590 (tdata->thread_name != NULL) ? tdata->thread_name : "")) {
1597 prof_dump_header_impl(tsdn_t *tsdn, bool propagate_err,
1598 const prof_cnt_t *cnt_all) {
1601 if (prof_dump_printf(propagate_err,
1602 "heap_v2/%"FMTu64"\n"
1603 " t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
1604 ((uint64_t)1U << lg_prof_sample), cnt_all->curobjs,
1605 cnt_all->curbytes, cnt_all->accumobjs, cnt_all->accumbytes)) {
1609 malloc_mutex_lock(tsdn, &tdatas_mtx);
1610 ret = (tdata_tree_iter(&tdatas, NULL, prof_tdata_dump_iter,
1611 (void *)&propagate_err) != NULL);
1612 malloc_mutex_unlock(tsdn, &tdatas_mtx);
1615 prof_dump_header_t *JET_MUTABLE prof_dump_header = prof_dump_header_impl;
1618 prof_dump_gctx(tsdn_t *tsdn, bool propagate_err, prof_gctx_t *gctx,
1619 const prof_bt_t *bt, prof_gctx_tree_t *gctxs) {
1622 struct prof_tctx_dump_iter_arg_s prof_tctx_dump_iter_arg;
1624 cassert(config_prof);
1625 malloc_mutex_assert_owner(tsdn, gctx->lock);
1627 /* Avoid dumping such gctx's that have no useful data. */
1628 if ((!opt_prof_accum && gctx->cnt_summed.curobjs == 0) ||
1629 (opt_prof_accum && gctx->cnt_summed.accumobjs == 0)) {
1630 assert(gctx->cnt_summed.curobjs == 0);
1631 assert(gctx->cnt_summed.curbytes == 0);
1632 assert(gctx->cnt_summed.accumobjs == 0);
1633 assert(gctx->cnt_summed.accumbytes == 0);
1638 if (prof_dump_printf(propagate_err, "@")) {
1642 for (i = 0; i < bt->len; i++) {
1643 if (prof_dump_printf(propagate_err, " %#"FMTxPTR,
1644 (uintptr_t)bt->vec[i])) {
1650 if (prof_dump_printf(propagate_err,
1652 " t*: %"FMTu64": %"FMTu64" [%"FMTu64": %"FMTu64"]\n",
1653 gctx->cnt_summed.curobjs, gctx->cnt_summed.curbytes,
1654 gctx->cnt_summed.accumobjs, gctx->cnt_summed.accumbytes)) {
1659 prof_tctx_dump_iter_arg.tsdn = tsdn;
1660 prof_tctx_dump_iter_arg.propagate_err = propagate_err;
1661 if (tctx_tree_iter(&gctx->tctxs, NULL, prof_tctx_dump_iter,
1662 (void *)&prof_tctx_dump_iter_arg) != NULL) {
1673 JEMALLOC_FORMAT_PRINTF(1, 2)
1675 prof_open_maps(const char *format, ...) {
1678 char filename[PATH_MAX + 1];
1680 va_start(ap, format);
1681 malloc_vsnprintf(filename, sizeof(filename), format, ap);
1684 #if defined(O_CLOEXEC)
1685 mfd = open(filename, O_RDONLY | O_CLOEXEC);
1687 mfd = open(filename, O_RDONLY);
1689 fcntl(mfd, F_SETFD, fcntl(mfd, F_GETFD) | FD_CLOEXEC);
1700 return GetCurrentProcessId();
1707 prof_dump_maps(bool propagate_err) {
1711 cassert(config_prof);
1713 mfd = prof_open_maps("/proc/curproc/map");
1714 #elif defined(_WIN32)
1715 mfd = -1; // Not implemented
1718 int pid = prof_getpid();
1720 mfd = prof_open_maps("/proc/%d/task/%d/maps", pid, pid);
1722 mfd = prof_open_maps("/proc/%d/maps", pid);
1729 if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
1736 prof_dump_buf_end += nread;
1737 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
1738 /* Make space in prof_dump_buf before read(). */
1739 if (prof_dump_flush(propagate_err) &&
1745 nread = malloc_read_fd(mfd,
1746 &prof_dump_buf[prof_dump_buf_end], PROF_DUMP_BUFSIZE
1747 - prof_dump_buf_end);
1748 } while (nread > 0);
1763 * See prof_sample_threshold_update() comment for why the body of this function
1764 * is conditionally compiled.
1767 prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_ngctx,
1768 const char *filename) {
1769 #ifdef JEMALLOC_PROF
1771 * Scaling is equivalent AdjustSamples() in jeprof, but the result may
1772 * differ slightly from what jeprof reports, because here we scale the
1773 * summary values, whereas jeprof scales each context individually and
1774 * reports the sums of the scaled values.
1776 if (cnt_all->curbytes != 0) {
1777 double sample_period = (double)((uint64_t)1 << lg_prof_sample);
1778 double ratio = (((double)cnt_all->curbytes) /
1779 (double)cnt_all->curobjs) / sample_period;
1780 double scale_factor = 1.0 / (1.0 - exp(-ratio));
1781 uint64_t curbytes = (uint64_t)round(((double)cnt_all->curbytes)
1783 uint64_t curobjs = (uint64_t)round(((double)cnt_all->curobjs) *
1786 malloc_printf("<jemalloc>: Leak approximation summary: ~%"FMTu64
1787 " byte%s, ~%"FMTu64" object%s, >= %zu context%s\n",
1788 curbytes, (curbytes != 1) ? "s" : "", curobjs, (curobjs !=
1789 1) ? "s" : "", leak_ngctx, (leak_ngctx != 1) ? "s" : "");
1791 "<jemalloc>: Run jeprof on \"%s\" for leak detail\n",
1797 struct prof_gctx_dump_iter_arg_s {
1802 static prof_gctx_t *
1803 prof_gctx_dump_iter(prof_gctx_tree_t *gctxs, prof_gctx_t *gctx, void *opaque) {
1805 struct prof_gctx_dump_iter_arg_s *arg =
1806 (struct prof_gctx_dump_iter_arg_s *)opaque;
1808 malloc_mutex_lock(arg->tsdn, gctx->lock);
1810 if (prof_dump_gctx(arg->tsdn, arg->propagate_err, gctx, &gctx->bt,
1818 malloc_mutex_unlock(arg->tsdn, gctx->lock);
1823 prof_dump_prep(tsd_t *tsd, prof_tdata_t *tdata,
1824 struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
1825 struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
1826 prof_gctx_tree_t *gctxs) {
1833 prof_enter(tsd, tdata);
1836 * Put gctx's in limbo and clear their counters in preparation for
1839 gctx_tree_new(gctxs);
1840 for (tabind = 0; !ckh_iter(&bt2gctx, &tabind, NULL, &gctx.v);) {
1841 prof_dump_gctx_prep(tsd_tsdn(tsd), gctx.p, gctxs);
1845 * Iterate over tdatas, and for the non-expired ones snapshot their tctx
1846 * stats and merge them into the associated gctx's.
1848 prof_tdata_merge_iter_arg->tsdn = tsd_tsdn(tsd);
1849 memset(&prof_tdata_merge_iter_arg->cnt_all, 0, sizeof(prof_cnt_t));
1850 malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
1851 tdata_tree_iter(&tdatas, NULL, prof_tdata_merge_iter,
1852 (void *)prof_tdata_merge_iter_arg);
1853 malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
1855 /* Merge tctx stats into gctx's. */
1856 prof_gctx_merge_iter_arg->tsdn = tsd_tsdn(tsd);
1857 prof_gctx_merge_iter_arg->leak_ngctx = 0;
1858 gctx_tree_iter(gctxs, NULL, prof_gctx_merge_iter,
1859 (void *)prof_gctx_merge_iter_arg);
1861 prof_leave(tsd, tdata);
1865 prof_dump_file(tsd_t *tsd, bool propagate_err, const char *filename,
1866 bool leakcheck, prof_tdata_t *tdata,
1867 struct prof_tdata_merge_iter_arg_s *prof_tdata_merge_iter_arg,
1868 struct prof_gctx_merge_iter_arg_s *prof_gctx_merge_iter_arg,
1869 struct prof_gctx_dump_iter_arg_s *prof_gctx_dump_iter_arg,
1870 prof_gctx_tree_t *gctxs) {
1871 /* Create dump file. */
1872 if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1) {
1876 /* Dump profile header. */
1877 if (prof_dump_header(tsd_tsdn(tsd), propagate_err,
1878 &prof_tdata_merge_iter_arg->cnt_all)) {
1879 goto label_write_error;
1882 /* Dump per gctx profile stats. */
1883 prof_gctx_dump_iter_arg->tsdn = tsd_tsdn(tsd);
1884 prof_gctx_dump_iter_arg->propagate_err = propagate_err;
1885 if (gctx_tree_iter(gctxs, NULL, prof_gctx_dump_iter,
1886 (void *)prof_gctx_dump_iter_arg) != NULL) {
1887 goto label_write_error;
1890 /* Dump /proc/<pid>/maps if possible. */
1891 if (prof_dump_maps(propagate_err)) {
1892 goto label_write_error;
1895 if (prof_dump_close(propagate_err)) {
1901 prof_dump_close(propagate_err);
1906 prof_dump(tsd_t *tsd, bool propagate_err, const char *filename,
1908 cassert(config_prof);
1909 assert(tsd_reentrancy_level_get(tsd) == 0);
1911 prof_tdata_t * tdata = prof_tdata_get(tsd, true);
1912 if (tdata == NULL) {
1916 pre_reentrancy(tsd, NULL);
1917 malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
1919 prof_gctx_tree_t gctxs;
1920 struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
1921 struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
1922 struct prof_gctx_dump_iter_arg_s prof_gctx_dump_iter_arg;
1923 prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
1924 &prof_gctx_merge_iter_arg, &gctxs);
1925 bool err = prof_dump_file(tsd, propagate_err, filename, leakcheck, tdata,
1926 &prof_tdata_merge_iter_arg, &prof_gctx_merge_iter_arg,
1927 &prof_gctx_dump_iter_arg, &gctxs);
1928 prof_gctx_finish(tsd, &gctxs);
1930 malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
1931 post_reentrancy(tsd);
1938 prof_leakcheck(&prof_tdata_merge_iter_arg.cnt_all,
1939 prof_gctx_merge_iter_arg.leak_ngctx, filename);
1946 prof_cnt_all(uint64_t *curobjs, uint64_t *curbytes, uint64_t *accumobjs,
1947 uint64_t *accumbytes) {
1949 prof_tdata_t *tdata;
1950 struct prof_tdata_merge_iter_arg_s prof_tdata_merge_iter_arg;
1951 struct prof_gctx_merge_iter_arg_s prof_gctx_merge_iter_arg;
1952 prof_gctx_tree_t gctxs;
1955 tdata = prof_tdata_get(tsd, false);
1956 if (tdata == NULL) {
1957 if (curobjs != NULL) {
1960 if (curbytes != NULL) {
1963 if (accumobjs != NULL) {
1966 if (accumbytes != NULL) {
1972 prof_dump_prep(tsd, tdata, &prof_tdata_merge_iter_arg,
1973 &prof_gctx_merge_iter_arg, &gctxs);
1974 prof_gctx_finish(tsd, &gctxs);
1976 if (curobjs != NULL) {
1977 *curobjs = prof_tdata_merge_iter_arg.cnt_all.curobjs;
1979 if (curbytes != NULL) {
1980 *curbytes = prof_tdata_merge_iter_arg.cnt_all.curbytes;
1982 if (accumobjs != NULL) {
1983 *accumobjs = prof_tdata_merge_iter_arg.cnt_all.accumobjs;
1985 if (accumbytes != NULL) {
1986 *accumbytes = prof_tdata_merge_iter_arg.cnt_all.accumbytes;
1991 #define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1)
1992 #define VSEQ_INVALID UINT64_C(0xffffffffffffffff)
1994 prof_dump_filename(char *filename, char v, uint64_t vseq) {
1995 cassert(config_prof);
1997 if (vseq != VSEQ_INVALID) {
1998 /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
1999 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
2000 "%s.%d.%"FMTu64".%c%"FMTu64".heap",
2001 opt_prof_prefix, prof_getpid(), prof_dump_seq, v, vseq);
2003 /* "<prefix>.<pid>.<seq>.<v>.heap" */
2004 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
2005 "%s.%d.%"FMTu64".%c.heap",
2006 opt_prof_prefix, prof_getpid(), prof_dump_seq, v);
2014 char filename[DUMP_FILENAME_BUFSIZE];
2016 cassert(config_prof);
2017 assert(opt_prof_final);
2018 assert(opt_prof_prefix[0] != '\0');
2024 assert(tsd_reentrancy_level_get(tsd) == 0);
2026 malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2027 prof_dump_filename(filename, 'f', VSEQ_INVALID);
2028 malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2029 prof_dump(tsd, false, filename, opt_prof_leak);
2033 prof_accum_init(tsdn_t *tsdn, prof_accum_t *prof_accum) {
2034 cassert(config_prof);
2036 #ifndef JEMALLOC_ATOMIC_U64
2037 if (malloc_mutex_init(&prof_accum->mtx, "prof_accum",
2038 WITNESS_RANK_PROF_ACCUM, malloc_mutex_rank_exclusive)) {
2041 prof_accum->accumbytes = 0;
2043 atomic_store_u64(&prof_accum->accumbytes, 0, ATOMIC_RELAXED);
2049 prof_idump(tsdn_t *tsdn) {
2051 prof_tdata_t *tdata;
2053 cassert(config_prof);
2055 if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
2058 tsd = tsdn_tsd(tsdn);
2059 if (tsd_reentrancy_level_get(tsd) > 0) {
2063 tdata = prof_tdata_get(tsd, false);
2064 if (tdata == NULL) {
2068 tdata->enq_idump = true;
2072 if (opt_prof_prefix[0] != '\0') {
2073 char filename[PATH_MAX + 1];
2074 malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2075 prof_dump_filename(filename, 'i', prof_dump_iseq);
2077 malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2078 prof_dump(tsd, false, filename, false);
2083 prof_mdump(tsd_t *tsd, const char *filename) {
2084 cassert(config_prof);
2085 assert(tsd_reentrancy_level_get(tsd) == 0);
2087 if (!opt_prof || !prof_booted) {
2090 char filename_buf[DUMP_FILENAME_BUFSIZE];
2091 if (filename == NULL) {
2092 /* No filename specified, so automatically generate one. */
2093 if (opt_prof_prefix[0] == '\0') {
2096 malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2097 prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
2099 malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_seq_mtx);
2100 filename = filename_buf;
2102 return prof_dump(tsd, true, filename, false);
2106 prof_gdump(tsdn_t *tsdn) {
2108 prof_tdata_t *tdata;
2110 cassert(config_prof);
2112 if (!prof_booted || tsdn_null(tsdn) || !prof_active_get_unlocked()) {
2115 tsd = tsdn_tsd(tsdn);
2116 if (tsd_reentrancy_level_get(tsd) > 0) {
2120 tdata = prof_tdata_get(tsd, false);
2121 if (tdata == NULL) {
2125 tdata->enq_gdump = true;
2129 if (opt_prof_prefix[0] != '\0') {
2130 char filename[DUMP_FILENAME_BUFSIZE];
2131 malloc_mutex_lock(tsdn, &prof_dump_seq_mtx);
2132 prof_dump_filename(filename, 'u', prof_dump_useq);
2134 malloc_mutex_unlock(tsdn, &prof_dump_seq_mtx);
2135 prof_dump(tsd, false, filename, false);
2140 prof_bt_hash(const void *key, size_t r_hash[2]) {
2141 prof_bt_t *bt = (prof_bt_t *)key;
2143 cassert(config_prof);
2145 hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
2149 prof_bt_keycomp(const void *k1, const void *k2) {
2150 const prof_bt_t *bt1 = (prof_bt_t *)k1;
2151 const prof_bt_t *bt2 = (prof_bt_t *)k2;
2153 cassert(config_prof);
2155 if (bt1->len != bt2->len) {
2158 return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
2162 prof_bt_node_hash(const void *key, size_t r_hash[2]) {
2163 const prof_bt_node_t *bt_node = (prof_bt_node_t *)key;
2164 prof_bt_hash((void *)(&bt_node->bt), r_hash);
2168 prof_bt_node_keycomp(const void *k1, const void *k2) {
2169 const prof_bt_node_t *bt_node1 = (prof_bt_node_t *)k1;
2170 const prof_bt_node_t *bt_node2 = (prof_bt_node_t *)k2;
2171 return prof_bt_keycomp((void *)(&bt_node1->bt),
2172 (void *)(&bt_node2->bt));
2176 prof_thr_node_hash(const void *key, size_t r_hash[2]) {
2177 const prof_thr_node_t *thr_node = (prof_thr_node_t *)key;
2178 hash(&thr_node->thr_uid, sizeof(uint64_t), 0x94122f35U, r_hash);
2182 prof_thr_node_keycomp(const void *k1, const void *k2) {
2183 const prof_thr_node_t *thr_node1 = (prof_thr_node_t *)k1;
2184 const prof_thr_node_t *thr_node2 = (prof_thr_node_t *)k2;
2185 return thr_node1->thr_uid == thr_node2->thr_uid;
2189 prof_thr_uid_alloc(tsdn_t *tsdn) {
2192 malloc_mutex_lock(tsdn, &next_thr_uid_mtx);
2193 thr_uid = next_thr_uid;
2195 malloc_mutex_unlock(tsdn, &next_thr_uid_mtx);
2200 static prof_tdata_t *
2201 prof_tdata_init_impl(tsd_t *tsd, uint64_t thr_uid, uint64_t thr_discrim,
2202 char *thread_name, bool active) {
2203 prof_tdata_t *tdata;
2205 cassert(config_prof);
2207 /* Initialize an empty cache for this thread. */
2208 tdata = (prof_tdata_t *)iallocztm(tsd_tsdn(tsd), sizeof(prof_tdata_t),
2209 sz_size2index(sizeof(prof_tdata_t)), false, NULL, true,
2210 arena_get(TSDN_NULL, 0, true), true);
2211 if (tdata == NULL) {
2215 tdata->lock = prof_tdata_mutex_choose(thr_uid);
2216 tdata->thr_uid = thr_uid;
2217 tdata->thr_discrim = thr_discrim;
2218 tdata->thread_name = thread_name;
2219 tdata->attached = true;
2220 tdata->expired = false;
2221 tdata->tctx_uid_next = 0;
2223 if (ckh_new(tsd, &tdata->bt2tctx, PROF_CKH_MINITEMS, prof_bt_hash,
2225 idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
2229 tdata->prng_state = (uint64_t)(uintptr_t)tdata;
2230 prof_sample_threshold_update(tdata);
2233 tdata->enq_idump = false;
2234 tdata->enq_gdump = false;
2236 tdata->dumping = false;
2237 tdata->active = active;
2239 malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
2240 tdata_tree_insert(&tdatas, tdata);
2241 malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
2247 prof_tdata_init(tsd_t *tsd) {
2248 return prof_tdata_init_impl(tsd, prof_thr_uid_alloc(tsd_tsdn(tsd)), 0,
2249 NULL, prof_thread_active_init_get(tsd_tsdn(tsd)));
2253 prof_tdata_should_destroy_unlocked(prof_tdata_t *tdata, bool even_if_attached) {
2254 if (tdata->attached && !even_if_attached) {
2257 if (ckh_count(&tdata->bt2tctx) != 0) {
2264 prof_tdata_should_destroy(tsdn_t *tsdn, prof_tdata_t *tdata,
2265 bool even_if_attached) {
2266 malloc_mutex_assert_owner(tsdn, tdata->lock);
2268 return prof_tdata_should_destroy_unlocked(tdata, even_if_attached);
2272 prof_tdata_destroy_locked(tsd_t *tsd, prof_tdata_t *tdata,
2273 bool even_if_attached) {
2274 malloc_mutex_assert_owner(tsd_tsdn(tsd), &tdatas_mtx);
2276 tdata_tree_remove(&tdatas, tdata);
2278 assert(prof_tdata_should_destroy_unlocked(tdata, even_if_attached));
2280 if (tdata->thread_name != NULL) {
2281 idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
2284 ckh_delete(tsd, &tdata->bt2tctx);
2285 idalloctm(tsd_tsdn(tsd), tdata, NULL, NULL, true, true);
2289 prof_tdata_destroy(tsd_t *tsd, prof_tdata_t *tdata, bool even_if_attached) {
2290 malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
2291 prof_tdata_destroy_locked(tsd, tdata, even_if_attached);
2292 malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
2296 prof_tdata_detach(tsd_t *tsd, prof_tdata_t *tdata) {
2299 malloc_mutex_lock(tsd_tsdn(tsd), tdata->lock);
2300 if (tdata->attached) {
2301 destroy_tdata = prof_tdata_should_destroy(tsd_tsdn(tsd), tdata,
2304 * Only detach if !destroy_tdata, because detaching would allow
2305 * another thread to win the race to destroy tdata.
2307 if (!destroy_tdata) {
2308 tdata->attached = false;
2310 tsd_prof_tdata_set(tsd, NULL);
2312 destroy_tdata = false;
2314 malloc_mutex_unlock(tsd_tsdn(tsd), tdata->lock);
2315 if (destroy_tdata) {
2316 prof_tdata_destroy(tsd, tdata, true);
2321 prof_tdata_reinit(tsd_t *tsd, prof_tdata_t *tdata) {
2322 uint64_t thr_uid = tdata->thr_uid;
2323 uint64_t thr_discrim = tdata->thr_discrim + 1;
2324 char *thread_name = (tdata->thread_name != NULL) ?
2325 prof_thread_name_alloc(tsd_tsdn(tsd), tdata->thread_name) : NULL;
2326 bool active = tdata->active;
2328 prof_tdata_detach(tsd, tdata);
2329 return prof_tdata_init_impl(tsd, thr_uid, thr_discrim, thread_name,
2334 prof_tdata_expire(tsdn_t *tsdn, prof_tdata_t *tdata) {
2337 malloc_mutex_lock(tsdn, tdata->lock);
2338 if (!tdata->expired) {
2339 tdata->expired = true;
2340 destroy_tdata = tdata->attached ? false :
2341 prof_tdata_should_destroy(tsdn, tdata, false);
2343 destroy_tdata = false;
2345 malloc_mutex_unlock(tsdn, tdata->lock);
2347 return destroy_tdata;
2350 static prof_tdata_t *
2351 prof_tdata_reset_iter(prof_tdata_tree_t *tdatas, prof_tdata_t *tdata,
2353 tsdn_t *tsdn = (tsdn_t *)arg;
2355 return (prof_tdata_expire(tsdn, tdata) ? tdata : NULL);
2359 prof_reset(tsd_t *tsd, size_t lg_sample) {
2362 assert(lg_sample < (sizeof(uint64_t) << 3));
2364 malloc_mutex_lock(tsd_tsdn(tsd), &prof_dump_mtx);
2365 malloc_mutex_lock(tsd_tsdn(tsd), &tdatas_mtx);
2367 lg_prof_sample = lg_sample;
2371 prof_tdata_t *to_destroy = tdata_tree_iter(&tdatas, next,
2372 prof_tdata_reset_iter, (void *)tsd);
2373 if (to_destroy != NULL) {
2374 next = tdata_tree_next(&tdatas, to_destroy);
2375 prof_tdata_destroy_locked(tsd, to_destroy, false);
2379 } while (next != NULL);
2381 malloc_mutex_unlock(tsd_tsdn(tsd), &tdatas_mtx);
2382 malloc_mutex_unlock(tsd_tsdn(tsd), &prof_dump_mtx);
2386 prof_tdata_cleanup(tsd_t *tsd) {
2387 prof_tdata_t *tdata;
2393 tdata = tsd_prof_tdata_get(tsd);
2394 if (tdata != NULL) {
2395 prof_tdata_detach(tsd, tdata);
2400 prof_active_get(tsdn_t *tsdn) {
2401 bool prof_active_current;
2403 malloc_mutex_lock(tsdn, &prof_active_mtx);
2404 prof_active_current = prof_active;
2405 malloc_mutex_unlock(tsdn, &prof_active_mtx);
2406 return prof_active_current;
2410 prof_active_set(tsdn_t *tsdn, bool active) {
2411 bool prof_active_old;
2413 malloc_mutex_lock(tsdn, &prof_active_mtx);
2414 prof_active_old = prof_active;
2415 prof_active = active;
2416 malloc_mutex_unlock(tsdn, &prof_active_mtx);
2417 return prof_active_old;
2422 prof_log_bt_count(void) {
2424 prof_bt_node_t *node = log_bt_first;
2425 while (node != NULL) {
2433 prof_log_alloc_count(void) {
2435 prof_alloc_node_t *node = log_alloc_first;
2436 while (node != NULL) {
2444 prof_log_thr_count(void) {
2446 prof_thr_node_t *node = log_thr_first;
2447 while (node != NULL) {
2455 prof_log_is_logging(void) {
2456 return prof_logging_state == prof_logging_state_started;
2460 prof_log_rep_check(void) {
2461 if (prof_logging_state == prof_logging_state_stopped
2462 && log_tables_initialized) {
2466 if (log_bt_last != NULL && log_bt_last->next != NULL) {
2469 if (log_thr_last != NULL && log_thr_last->next != NULL) {
2472 if (log_alloc_last != NULL && log_alloc_last->next != NULL) {
2476 size_t bt_count = prof_log_bt_count();
2477 size_t thr_count = prof_log_thr_count();
2478 size_t alloc_count = prof_log_alloc_count();
2481 if (prof_logging_state == prof_logging_state_stopped) {
2482 if (bt_count != 0 || thr_count != 0 || alloc_count || 0) {
2487 prof_alloc_node_t *node = log_alloc_first;
2488 while (node != NULL) {
2489 if (node->alloc_bt_ind >= bt_count) {
2492 if (node->free_bt_ind >= bt_count) {
2495 if (node->alloc_thr_ind >= thr_count) {
2498 if (node->free_thr_ind >= thr_count) {
2501 if (node->alloc_time_ns > node->free_time_ns) {
2511 prof_log_dummy_set(bool new_value) {
2512 prof_log_dummy = new_value;
2517 prof_log_start(tsdn_t *tsdn, const char *filename) {
2518 if (!opt_prof || !prof_booted) {
2523 size_t buf_size = PATH_MAX + 1;
2525 malloc_mutex_lock(tsdn, &log_mtx);
2527 if (prof_logging_state != prof_logging_state_stopped) {
2529 } else if (filename == NULL) {
2530 /* Make default name. */
2531 malloc_snprintf(log_filename, buf_size, "%s.%d.%"FMTu64".json",
2532 opt_prof_prefix, prof_getpid(), log_seq);
2534 prof_logging_state = prof_logging_state_started;
2535 } else if (strlen(filename) >= buf_size) {
2538 strcpy(log_filename, filename);
2539 prof_logging_state = prof_logging_state_started;
2543 nstime_update(&log_start_timestamp);
2546 malloc_mutex_unlock(tsdn, &log_mtx);
2551 /* Used as an atexit function to stop logging on exit. */
2553 prof_log_stop_final(void) {
2554 tsd_t *tsd = tsd_fetch();
2555 prof_log_stop(tsd_tsdn(tsd));
2558 struct prof_emitter_cb_arg_s {
2564 prof_emitter_write_cb(void *opaque, const char *to_write) {
2565 struct prof_emitter_cb_arg_s *arg =
2566 (struct prof_emitter_cb_arg_s *)opaque;
2567 size_t bytes = strlen(to_write);
2569 if (prof_log_dummy) {
2573 arg->ret = write(arg->fd, (void *)to_write, bytes);
2577 * prof_log_emit_{...} goes through the appropriate linked list, emitting each
2578 * node to the json and deallocating it.
2581 prof_log_emit_threads(tsd_t *tsd, emitter_t *emitter) {
2582 emitter_json_array_kv_begin(emitter, "threads");
2583 prof_thr_node_t *thr_node = log_thr_first;
2584 prof_thr_node_t *thr_old_node;
2585 while (thr_node != NULL) {
2586 emitter_json_object_begin(emitter);
2588 emitter_json_kv(emitter, "thr_uid", emitter_type_uint64,
2589 &thr_node->thr_uid);
2591 char *thr_name = thr_node->name;
2593 emitter_json_kv(emitter, "thr_name", emitter_type_string,
2596 emitter_json_object_end(emitter);
2597 thr_old_node = thr_node;
2598 thr_node = thr_node->next;
2599 idalloc(tsd, thr_old_node);
2601 emitter_json_array_end(emitter);
2605 prof_log_emit_traces(tsd_t *tsd, emitter_t *emitter) {
2606 emitter_json_array_kv_begin(emitter, "stack_traces");
2607 prof_bt_node_t *bt_node = log_bt_first;
2608 prof_bt_node_t *bt_old_node;
2610 * Calculate how many hex digits we need: twice number of bytes, two for
2611 * "0x", and then one more for terminating '\0'.
2613 char buf[2 * sizeof(intptr_t) + 3];
2614 size_t buf_sz = sizeof(buf);
2615 while (bt_node != NULL) {
2616 emitter_json_array_begin(emitter);
2618 for (i = 0; i < bt_node->bt.len; i++) {
2619 malloc_snprintf(buf, buf_sz, "%p", bt_node->bt.vec[i]);
2620 char *trace_str = buf;
2621 emitter_json_value(emitter, emitter_type_string,
2624 emitter_json_array_end(emitter);
2626 bt_old_node = bt_node;
2627 bt_node = bt_node->next;
2628 idalloc(tsd, bt_old_node);
2630 emitter_json_array_end(emitter);
2634 prof_log_emit_allocs(tsd_t *tsd, emitter_t *emitter) {
2635 emitter_json_array_kv_begin(emitter, "allocations");
2636 prof_alloc_node_t *alloc_node = log_alloc_first;
2637 prof_alloc_node_t *alloc_old_node;
2638 while (alloc_node != NULL) {
2639 emitter_json_object_begin(emitter);
2641 emitter_json_kv(emitter, "alloc_thread", emitter_type_size,
2642 &alloc_node->alloc_thr_ind);
2644 emitter_json_kv(emitter, "free_thread", emitter_type_size,
2645 &alloc_node->free_thr_ind);
2647 emitter_json_kv(emitter, "alloc_trace", emitter_type_size,
2648 &alloc_node->alloc_bt_ind);
2650 emitter_json_kv(emitter, "free_trace", emitter_type_size,
2651 &alloc_node->free_bt_ind);
2653 emitter_json_kv(emitter, "alloc_timestamp",
2654 emitter_type_uint64, &alloc_node->alloc_time_ns);
2656 emitter_json_kv(emitter, "free_timestamp", emitter_type_uint64,
2657 &alloc_node->free_time_ns);
2659 emitter_json_kv(emitter, "usize", emitter_type_uint64,
2660 &alloc_node->usize);
2662 emitter_json_object_end(emitter);
2664 alloc_old_node = alloc_node;
2665 alloc_node = alloc_node->next;
2666 idalloc(tsd, alloc_old_node);
2668 emitter_json_array_end(emitter);
2672 prof_log_emit_metadata(emitter_t *emitter) {
2673 emitter_json_object_kv_begin(emitter, "info");
2675 nstime_t now = NSTIME_ZERO_INITIALIZER;
2677 nstime_update(&now);
2678 uint64_t ns = nstime_ns(&now) - nstime_ns(&log_start_timestamp);
2679 emitter_json_kv(emitter, "duration", emitter_type_uint64, &ns);
2681 char *vers = JEMALLOC_VERSION;
2682 emitter_json_kv(emitter, "version",
2683 emitter_type_string, &vers);
2685 emitter_json_kv(emitter, "lg_sample_rate",
2686 emitter_type_int, &lg_prof_sample);
2688 int pid = prof_getpid();
2689 emitter_json_kv(emitter, "pid", emitter_type_int, &pid);
2691 emitter_json_object_end(emitter);
2696 prof_log_stop(tsdn_t *tsdn) {
2697 if (!opt_prof || !prof_booted) {
2701 tsd_t *tsd = tsdn_tsd(tsdn);
2702 malloc_mutex_lock(tsdn, &log_mtx);
2704 if (prof_logging_state != prof_logging_state_started) {
2705 malloc_mutex_unlock(tsdn, &log_mtx);
2710 * Set the state to dumping. We'll set it to stopped when we're done.
2711 * Since other threads won't be able to start/stop/log when the state is
2712 * dumping, we don't have to hold the lock during the whole method.
2714 prof_logging_state = prof_logging_state_dumping;
2715 malloc_mutex_unlock(tsdn, &log_mtx);
2720 /* Create a file. */
2724 if (prof_log_dummy) {
2727 fd = creat(log_filename, 0644);
2730 fd = creat(log_filename, 0644);
2734 malloc_printf("<jemalloc>: creat() for log file \"%s\" "
2735 " failed with %d\n", log_filename, errno);
2743 struct prof_emitter_cb_arg_s arg;
2745 emitter_init(&emitter, emitter_output_json, &prof_emitter_write_cb,
2748 emitter_begin(&emitter);
2749 prof_log_emit_metadata(&emitter);
2750 prof_log_emit_threads(tsd, &emitter);
2751 prof_log_emit_traces(tsd, &emitter);
2752 prof_log_emit_allocs(tsd, &emitter);
2753 emitter_end(&emitter);
2755 /* Reset global state. */
2756 if (log_tables_initialized) {
2757 ckh_delete(tsd, &log_bt_node_set);
2758 ckh_delete(tsd, &log_thr_node_set);
2760 log_tables_initialized = false;
2763 log_bt_first = NULL;
2765 log_thr_first = NULL;
2766 log_thr_last = NULL;
2767 log_alloc_first = NULL;
2768 log_alloc_last = NULL;
2770 malloc_mutex_lock(tsdn, &log_mtx);
2771 prof_logging_state = prof_logging_state_stopped;
2772 malloc_mutex_unlock(tsdn, &log_mtx);
2775 if (prof_log_dummy) {
2783 prof_thread_name_get(tsd_t *tsd) {
2784 prof_tdata_t *tdata;
2786 tdata = prof_tdata_get(tsd, true);
2787 if (tdata == NULL) {
2790 return (tdata->thread_name != NULL ? tdata->thread_name : "");
2794 prof_thread_name_alloc(tsdn_t *tsdn, const char *thread_name) {
2798 if (thread_name == NULL) {
2802 size = strlen(thread_name) + 1;
2807 ret = iallocztm(tsdn, size, sz_size2index(size), false, NULL, true,
2808 arena_get(TSDN_NULL, 0, true), true);
2812 memcpy(ret, thread_name, size);
2817 prof_thread_name_set(tsd_t *tsd, const char *thread_name) {
2818 prof_tdata_t *tdata;
2822 tdata = prof_tdata_get(tsd, true);
2823 if (tdata == NULL) {
2827 /* Validate input. */
2828 if (thread_name == NULL) {
2831 for (i = 0; thread_name[i] != '\0'; i++) {
2832 char c = thread_name[i];
2833 if (!isgraph(c) && !isblank(c)) {
2838 s = prof_thread_name_alloc(tsd_tsdn(tsd), thread_name);
2843 if (tdata->thread_name != NULL) {
2844 idalloctm(tsd_tsdn(tsd), tdata->thread_name, NULL, NULL, true,
2846 tdata->thread_name = NULL;
2848 if (strlen(s) > 0) {
2849 tdata->thread_name = s;
2855 prof_thread_active_get(tsd_t *tsd) {
2856 prof_tdata_t *tdata;
2858 tdata = prof_tdata_get(tsd, true);
2859 if (tdata == NULL) {
2862 return tdata->active;
2866 prof_thread_active_set(tsd_t *tsd, bool active) {
2867 prof_tdata_t *tdata;
2869 tdata = prof_tdata_get(tsd, true);
2870 if (tdata == NULL) {
2873 tdata->active = active;
2878 prof_thread_active_init_get(tsdn_t *tsdn) {
2881 malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
2882 active_init = prof_thread_active_init;
2883 malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
2888 prof_thread_active_init_set(tsdn_t *tsdn, bool active_init) {
2889 bool active_init_old;
2891 malloc_mutex_lock(tsdn, &prof_thread_active_init_mtx);
2892 active_init_old = prof_thread_active_init;
2893 prof_thread_active_init = active_init;
2894 malloc_mutex_unlock(tsdn, &prof_thread_active_init_mtx);
2895 return active_init_old;
2899 prof_gdump_get(tsdn_t *tsdn) {
2900 bool prof_gdump_current;
2902 malloc_mutex_lock(tsdn, &prof_gdump_mtx);
2903 prof_gdump_current = prof_gdump_val;
2904 malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
2905 return prof_gdump_current;
2909 prof_gdump_set(tsdn_t *tsdn, bool gdump) {
2910 bool prof_gdump_old;
2912 malloc_mutex_lock(tsdn, &prof_gdump_mtx);
2913 prof_gdump_old = prof_gdump_val;
2914 prof_gdump_val = gdump;
2915 malloc_mutex_unlock(tsdn, &prof_gdump_mtx);
2916 return prof_gdump_old;
2921 cassert(config_prof);
2923 memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
2924 sizeof(PROF_PREFIX_DEFAULT));
2929 cassert(config_prof);
2932 * opt_prof must be in its final state before any arenas are
2933 * initialized, so this function must be executed early.
2936 if (opt_prof_leak && !opt_prof) {
2938 * Enable opt_prof, but in such a way that profiles are never
2939 * automatically dumped.
2942 opt_prof_gdump = false;
2943 } else if (opt_prof) {
2944 if (opt_lg_prof_interval >= 0) {
2945 prof_interval = (((uint64_t)1U) <<
2946 opt_lg_prof_interval);
2952 prof_boot2(tsd_t *tsd) {
2953 cassert(config_prof);
2958 lg_prof_sample = opt_lg_prof_sample;
2960 prof_active = opt_prof_active;
2961 if (malloc_mutex_init(&prof_active_mtx, "prof_active",
2962 WITNESS_RANK_PROF_ACTIVE, malloc_mutex_rank_exclusive)) {
2966 prof_gdump_val = opt_prof_gdump;
2967 if (malloc_mutex_init(&prof_gdump_mtx, "prof_gdump",
2968 WITNESS_RANK_PROF_GDUMP, malloc_mutex_rank_exclusive)) {
2972 prof_thread_active_init = opt_prof_thread_active_init;
2973 if (malloc_mutex_init(&prof_thread_active_init_mtx,
2974 "prof_thread_active_init",
2975 WITNESS_RANK_PROF_THREAD_ACTIVE_INIT,
2976 malloc_mutex_rank_exclusive)) {
2980 if (ckh_new(tsd, &bt2gctx, PROF_CKH_MINITEMS, prof_bt_hash,
2984 if (malloc_mutex_init(&bt2gctx_mtx, "prof_bt2gctx",
2985 WITNESS_RANK_PROF_BT2GCTX, malloc_mutex_rank_exclusive)) {
2989 tdata_tree_new(&tdatas);
2990 if (malloc_mutex_init(&tdatas_mtx, "prof_tdatas",
2991 WITNESS_RANK_PROF_TDATAS, malloc_mutex_rank_exclusive)) {
2996 if (malloc_mutex_init(&next_thr_uid_mtx, "prof_next_thr_uid",
2997 WITNESS_RANK_PROF_NEXT_THR_UID, malloc_mutex_rank_exclusive)) {
3001 if (malloc_mutex_init(&prof_dump_seq_mtx, "prof_dump_seq",
3002 WITNESS_RANK_PROF_DUMP_SEQ, malloc_mutex_rank_exclusive)) {
3005 if (malloc_mutex_init(&prof_dump_mtx, "prof_dump",
3006 WITNESS_RANK_PROF_DUMP, malloc_mutex_rank_exclusive)) {
3010 if (opt_prof_final && opt_prof_prefix[0] != '\0' &&
3011 atexit(prof_fdump) != 0) {
3012 malloc_write("<jemalloc>: Error in atexit()\n");
3019 prof_log_start(tsd_tsdn(tsd), NULL);
3022 if (atexit(prof_log_stop_final) != 0) {
3023 malloc_write("<jemalloc>: Error in atexit() "
3030 if (malloc_mutex_init(&log_mtx, "prof_log",
3031 WITNESS_RANK_PROF_LOG, malloc_mutex_rank_exclusive)) {
3035 if (ckh_new(tsd, &log_bt_node_set, PROF_CKH_MINITEMS,
3036 prof_bt_node_hash, prof_bt_node_keycomp)) {
3040 if (ckh_new(tsd, &log_thr_node_set, PROF_CKH_MINITEMS,
3041 prof_thr_node_hash, prof_thr_node_keycomp)) {
3045 log_tables_initialized = true;
3047 gctx_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
3048 b0get(), PROF_NCTX_LOCKS * sizeof(malloc_mutex_t),
3050 if (gctx_locks == NULL) {
3053 for (i = 0; i < PROF_NCTX_LOCKS; i++) {
3054 if (malloc_mutex_init(&gctx_locks[i], "prof_gctx",
3055 WITNESS_RANK_PROF_GCTX,
3056 malloc_mutex_rank_exclusive)) {
3061 tdata_locks = (malloc_mutex_t *)base_alloc(tsd_tsdn(tsd),
3062 b0get(), PROF_NTDATA_LOCKS * sizeof(malloc_mutex_t),
3064 if (tdata_locks == NULL) {
3067 for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
3068 if (malloc_mutex_init(&tdata_locks[i], "prof_tdata",
3069 WITNESS_RANK_PROF_TDATA,
3070 malloc_mutex_rank_exclusive)) {
3074 #ifdef JEMALLOC_PROF_LIBGCC
3076 * Cause the backtracing machinery to allocate its internal
3077 * state before enabling profiling.
3079 _Unwind_Backtrace(prof_unwind_init_callback, NULL);
3088 prof_prefork0(tsdn_t *tsdn) {
3089 if (config_prof && opt_prof) {
3092 malloc_mutex_prefork(tsdn, &prof_dump_mtx);
3093 malloc_mutex_prefork(tsdn, &bt2gctx_mtx);
3094 malloc_mutex_prefork(tsdn, &tdatas_mtx);
3095 for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
3096 malloc_mutex_prefork(tsdn, &tdata_locks[i]);
3098 for (i = 0; i < PROF_NCTX_LOCKS; i++) {
3099 malloc_mutex_prefork(tsdn, &gctx_locks[i]);
3105 prof_prefork1(tsdn_t *tsdn) {
3106 if (config_prof && opt_prof) {
3107 malloc_mutex_prefork(tsdn, &prof_active_mtx);
3108 malloc_mutex_prefork(tsdn, &prof_dump_seq_mtx);
3109 malloc_mutex_prefork(tsdn, &prof_gdump_mtx);
3110 malloc_mutex_prefork(tsdn, &next_thr_uid_mtx);
3111 malloc_mutex_prefork(tsdn, &prof_thread_active_init_mtx);
3116 prof_postfork_parent(tsdn_t *tsdn) {
3117 if (config_prof && opt_prof) {
3120 malloc_mutex_postfork_parent(tsdn,
3121 &prof_thread_active_init_mtx);
3122 malloc_mutex_postfork_parent(tsdn, &next_thr_uid_mtx);
3123 malloc_mutex_postfork_parent(tsdn, &prof_gdump_mtx);
3124 malloc_mutex_postfork_parent(tsdn, &prof_dump_seq_mtx);
3125 malloc_mutex_postfork_parent(tsdn, &prof_active_mtx);
3126 for (i = 0; i < PROF_NCTX_LOCKS; i++) {
3127 malloc_mutex_postfork_parent(tsdn, &gctx_locks[i]);
3129 for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
3130 malloc_mutex_postfork_parent(tsdn, &tdata_locks[i]);
3132 malloc_mutex_postfork_parent(tsdn, &tdatas_mtx);
3133 malloc_mutex_postfork_parent(tsdn, &bt2gctx_mtx);
3134 malloc_mutex_postfork_parent(tsdn, &prof_dump_mtx);
3139 prof_postfork_child(tsdn_t *tsdn) {
3140 if (config_prof && opt_prof) {
3143 malloc_mutex_postfork_child(tsdn, &prof_thread_active_init_mtx);
3144 malloc_mutex_postfork_child(tsdn, &next_thr_uid_mtx);
3145 malloc_mutex_postfork_child(tsdn, &prof_gdump_mtx);
3146 malloc_mutex_postfork_child(tsdn, &prof_dump_seq_mtx);
3147 malloc_mutex_postfork_child(tsdn, &prof_active_mtx);
3148 for (i = 0; i < PROF_NCTX_LOCKS; i++) {
3149 malloc_mutex_postfork_child(tsdn, &gctx_locks[i]);
3151 for (i = 0; i < PROF_NTDATA_LOCKS; i++) {
3152 malloc_mutex_postfork_child(tsdn, &tdata_locks[i]);
3154 malloc_mutex_postfork_child(tsdn, &tdatas_mtx);
3155 malloc_mutex_postfork_child(tsdn, &bt2gctx_mtx);
3156 malloc_mutex_postfork_child(tsdn, &prof_dump_mtx);
3160 /******************************************************************************/