1 #define JEMALLOC_PROF_C_
2 #include "jemalloc/internal/jemalloc_internal.h"
3 /******************************************************************************/
5 #ifdef JEMALLOC_PROF_LIBUNWIND
10 #ifdef JEMALLOC_PROF_LIBGCC
14 /******************************************************************************/
17 malloc_tsd_data(, prof_tdata, prof_tdata_t *, NULL)
19 bool opt_prof = false;
20 bool opt_prof_active = true;
21 size_t opt_lg_prof_sample = LG_PROF_SAMPLE_DEFAULT;
22 ssize_t opt_lg_prof_interval = LG_PROF_INTERVAL_DEFAULT;
23 bool opt_prof_gdump = false;
24 bool opt_prof_final = true;
25 bool opt_prof_leak = false;
26 bool opt_prof_accum = false;
28 /* Minimize memory bloat for non-prof builds. */
34 uint64_t prof_interval = 0;
38 * Table of mutexes that are shared among ctx's. These are leaf locks, so
39 * there is no problem with using them for more than one ctx at the same time.
40 * The primary motivation for this sharing though is that ctx's are ephemeral,
41 * and destroying mutexes causes complications for systems that allocate when
42 * creating/destroying mutexes.
44 static malloc_mutex_t *ctx_locks;
45 static unsigned cum_ctxs; /* Atomic counter. */
48 * Global hash of (prof_bt_t *)-->(prof_ctx_t *). This is the master data
49 * structure that knows about all backtraces currently captured.
52 static malloc_mutex_t bt2ctx_mtx;
54 static malloc_mutex_t prof_dump_seq_mtx;
55 static uint64_t prof_dump_seq;
56 static uint64_t prof_dump_iseq;
57 static uint64_t prof_dump_mseq;
58 static uint64_t prof_dump_useq;
61 * This buffer is rather large for stack allocation, so use a single buffer for
64 static malloc_mutex_t prof_dump_mtx;
65 static char prof_dump_buf[
66 /* Minimize memory bloat for non-prof builds. */
73 static unsigned prof_dump_buf_end;
74 static int prof_dump_fd;
76 /* Do not dump any profiles until bootstrapping is complete. */
77 static bool prof_booted = false;
79 /******************************************************************************/
82 bt_init(prof_bt_t *bt, void **vec)
92 bt_destroy(prof_bt_t *bt)
101 bt_dup(prof_bt_t *bt)
105 cassert(config_prof);
108 * Create a single allocation that has space for vec immediately
109 * following the prof_bt_t structure. The backtraces that get
110 * stored in the backtrace caches are copied from stack-allocated
111 * temporary variables, so size is known at creation time. Making this
112 * a contiguous object improves cache locality.
114 ret = (prof_bt_t *)imalloc(QUANTUM_CEILING(sizeof(prof_bt_t)) +
115 (bt->len * sizeof(void *)));
118 ret->vec = (void **)((uintptr_t)ret +
119 QUANTUM_CEILING(sizeof(prof_bt_t)));
120 memcpy(ret->vec, bt->vec, bt->len * sizeof(void *));
127 prof_enter(prof_tdata_t *prof_tdata)
130 cassert(config_prof);
132 assert(prof_tdata->enq == false);
133 prof_tdata->enq = true;
135 malloc_mutex_lock(&bt2ctx_mtx);
139 prof_leave(prof_tdata_t *prof_tdata)
143 cassert(config_prof);
145 malloc_mutex_unlock(&bt2ctx_mtx);
147 assert(prof_tdata->enq);
148 prof_tdata->enq = false;
149 idump = prof_tdata->enq_idump;
150 prof_tdata->enq_idump = false;
151 gdump = prof_tdata->enq_gdump;
152 prof_tdata->enq_gdump = false;
160 #ifdef JEMALLOC_PROF_LIBUNWIND
162 prof_backtrace(prof_bt_t *bt, unsigned nignore)
169 cassert(config_prof);
170 assert(bt->len == 0);
171 assert(bt->vec != NULL);
174 unw_init_local(&cursor, &uc);
176 /* Throw away (nignore+1) stack frames, if that many exist. */
177 for (i = 0; i < nignore + 1; i++) {
178 err = unw_step(&cursor);
184 * Iterate over stack frames until there are no more, or until no space
187 for (i = 0; i < PROF_BT_MAX; i++) {
188 unw_get_reg(&cursor, UNW_REG_IP, (unw_word_t *)&bt->vec[i]);
190 err = unw_step(&cursor);
195 #elif (defined(JEMALLOC_PROF_LIBGCC))
196 static _Unwind_Reason_Code
197 prof_unwind_init_callback(struct _Unwind_Context *context, void *arg)
200 cassert(config_prof);
202 return (_URC_NO_REASON);
205 static _Unwind_Reason_Code
206 prof_unwind_callback(struct _Unwind_Context *context, void *arg)
208 prof_unwind_data_t *data = (prof_unwind_data_t *)arg;
210 cassert(config_prof);
212 if (data->nignore > 0)
215 data->bt->vec[data->bt->len] = (void *)_Unwind_GetIP(context);
217 if (data->bt->len == data->max)
218 return (_URC_END_OF_STACK);
221 return (_URC_NO_REASON);
225 prof_backtrace(prof_bt_t *bt, unsigned nignore)
227 prof_unwind_data_t data = {bt, nignore, PROF_BT_MAX};
229 cassert(config_prof);
231 _Unwind_Backtrace(prof_unwind_callback, &data);
233 #elif (defined(JEMALLOC_PROF_GCC))
235 prof_backtrace(prof_bt_t *bt, unsigned nignore)
237 #define BT_FRAME(i) \
238 if ((i) < nignore + PROF_BT_MAX) { \
240 if (__builtin_frame_address(i) == 0) \
242 p = __builtin_return_address(i); \
245 if (i >= nignore) { \
246 bt->vec[(i) - nignore] = p; \
247 bt->len = (i) - nignore + 1; \
252 cassert(config_prof);
253 assert(nignore <= 3);
396 /* Extras to compensate for nignore. */
404 prof_backtrace(prof_bt_t *bt, unsigned nignore)
407 cassert(config_prof);
412 static malloc_mutex_t *
413 prof_ctx_mutex_choose(void)
415 unsigned nctxs = atomic_add_u(&cum_ctxs, 1);
417 return (&ctx_locks[(nctxs - 1) % PROF_NCTX_LOCKS]);
421 prof_ctx_init(prof_ctx_t *ctx, prof_bt_t *bt)
425 ctx->lock = prof_ctx_mutex_choose();
427 * Set nlimbo to 1, in order to avoid a race condition with
428 * prof_ctx_merge()/prof_ctx_destroy().
431 ql_elm_new(ctx, dump_link);
432 memset(&ctx->cnt_merged, 0, sizeof(prof_cnt_t));
433 ql_new(&ctx->cnts_ql);
437 prof_ctx_destroy(prof_ctx_t *ctx)
439 prof_tdata_t *prof_tdata;
441 cassert(config_prof);
444 * Check that ctx is still unused by any thread cache before destroying
445 * it. prof_lookup() increments ctx->nlimbo in order to avoid a race
446 * condition with this function, as does prof_ctx_merge() in order to
447 * avoid a race between the main body of prof_ctx_merge() and entry
448 * into this function.
450 prof_tdata = prof_tdata_get(false);
451 assert((uintptr_t)prof_tdata > (uintptr_t)PROF_TDATA_STATE_MAX);
452 prof_enter(prof_tdata);
453 malloc_mutex_lock(ctx->lock);
454 if (ql_first(&ctx->cnts_ql) == NULL && ctx->cnt_merged.curobjs == 0 &&
456 assert(ctx->cnt_merged.curbytes == 0);
457 assert(ctx->cnt_merged.accumobjs == 0);
458 assert(ctx->cnt_merged.accumbytes == 0);
459 /* Remove ctx from bt2ctx. */
460 if (ckh_remove(&bt2ctx, ctx->bt, NULL, NULL))
462 prof_leave(prof_tdata);
464 malloc_mutex_unlock(ctx->lock);
469 * Compensate for increment in prof_ctx_merge() or
473 malloc_mutex_unlock(ctx->lock);
474 prof_leave(prof_tdata);
479 prof_ctx_merge(prof_ctx_t *ctx, prof_thr_cnt_t *cnt)
483 cassert(config_prof);
485 /* Merge cnt stats and detach from ctx. */
486 malloc_mutex_lock(ctx->lock);
487 ctx->cnt_merged.curobjs += cnt->cnts.curobjs;
488 ctx->cnt_merged.curbytes += cnt->cnts.curbytes;
489 ctx->cnt_merged.accumobjs += cnt->cnts.accumobjs;
490 ctx->cnt_merged.accumbytes += cnt->cnts.accumbytes;
491 ql_remove(&ctx->cnts_ql, cnt, cnts_link);
492 if (opt_prof_accum == false && ql_first(&ctx->cnts_ql) == NULL &&
493 ctx->cnt_merged.curobjs == 0 && ctx->nlimbo == 0) {
495 * Increment ctx->nlimbo in order to keep another thread from
496 * winning the race to destroy ctx while this one has ctx->lock
497 * dropped. Without this, it would be possible for another
500 * 1) Sample an allocation associated with ctx.
501 * 2) Deallocate the sampled object.
502 * 3) Successfully prof_ctx_destroy(ctx).
504 * The result would be that ctx no longer exists by the time
505 * this thread accesses it in prof_ctx_destroy().
511 malloc_mutex_unlock(ctx->lock);
513 prof_ctx_destroy(ctx);
517 prof_lookup_global(prof_bt_t *bt, prof_tdata_t *prof_tdata, void **p_btkey,
518 prof_ctx_t **p_ctx, bool *p_new_ctx)
530 prof_enter(prof_tdata);
531 if (ckh_search(&bt2ctx, bt, &btkey.v, &ctx.v)) {
532 /* bt has never been seen before. Insert it. */
533 ctx.v = imalloc(sizeof(prof_ctx_t));
535 prof_leave(prof_tdata);
538 btkey.p = bt_dup(bt);
539 if (btkey.v == NULL) {
540 prof_leave(prof_tdata);
544 prof_ctx_init(ctx.p, btkey.p);
545 if (ckh_insert(&bt2ctx, btkey.v, ctx.v)) {
547 prof_leave(prof_tdata);
555 * Increment nlimbo, in order to avoid a race condition with
556 * prof_ctx_merge()/prof_ctx_destroy().
558 malloc_mutex_lock(ctx.p->lock);
560 malloc_mutex_unlock(ctx.p->lock);
563 prof_leave(prof_tdata);
567 *p_new_ctx = new_ctx;
572 prof_lookup(prof_bt_t *bt)
578 prof_tdata_t *prof_tdata;
580 cassert(config_prof);
582 prof_tdata = prof_tdata_get(false);
583 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
586 if (ckh_search(&prof_tdata->bt2cnt, bt, NULL, &ret.v)) {
592 * This thread's cache lacks bt. Look for it in the global
595 if (prof_lookup_global(bt, prof_tdata, &btkey, &ctx, &new_ctx))
598 /* Link a prof_thd_cnt_t into ctx for this thread. */
599 if (ckh_count(&prof_tdata->bt2cnt) == PROF_TCMAX) {
600 assert(ckh_count(&prof_tdata->bt2cnt) > 0);
602 * Flush the least recently used cnt in order to keep
603 * bt2cnt from becoming too large.
605 ret.p = ql_last(&prof_tdata->lru_ql, lru_link);
606 assert(ret.v != NULL);
607 if (ckh_remove(&prof_tdata->bt2cnt, ret.p->ctx->bt,
610 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
611 prof_ctx_merge(ret.p->ctx, ret.p);
612 /* ret can now be re-used. */
614 assert(ckh_count(&prof_tdata->bt2cnt) < PROF_TCMAX);
615 /* Allocate and partially initialize a new cnt. */
616 ret.v = imalloc(sizeof(prof_thr_cnt_t));
619 prof_ctx_destroy(ctx);
622 ql_elm_new(ret.p, cnts_link);
623 ql_elm_new(ret.p, lru_link);
625 /* Finish initializing ret. */
628 memset(&ret.p->cnts, 0, sizeof(prof_cnt_t));
629 if (ckh_insert(&prof_tdata->bt2cnt, btkey, ret.v)) {
631 prof_ctx_destroy(ctx);
635 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
636 malloc_mutex_lock(ctx->lock);
637 ql_tail_insert(&ctx->cnts_ql, ret.p, cnts_link);
639 malloc_mutex_unlock(ctx->lock);
641 /* Move ret to the front of the LRU. */
642 ql_remove(&prof_tdata->lru_ql, ret.p, lru_link);
643 ql_head_insert(&prof_tdata->lru_ql, ret.p, lru_link);
654 prof_tdata_t *prof_tdata;
656 prof_tdata = prof_tdata_get(false);
657 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
660 prof_enter(prof_tdata);
661 bt_count = ckh_count(&bt2ctx);
662 prof_leave(prof_tdata);
669 #undef prof_dump_open
670 #define prof_dump_open JEMALLOC_N(prof_dump_open_impl)
673 prof_dump_open(bool propagate_err, const char *filename)
677 fd = creat(filename, 0644);
678 if (fd == -1 && propagate_err == false) {
679 malloc_printf("<jemalloc>: creat(\"%s\"), 0644) failed\n",
688 #undef prof_dump_open
689 #define prof_dump_open JEMALLOC_N(prof_dump_open)
690 prof_dump_open_t *prof_dump_open = JEMALLOC_N(prof_dump_open_impl);
694 prof_dump_flush(bool propagate_err)
699 cassert(config_prof);
701 err = write(prof_dump_fd, prof_dump_buf, prof_dump_buf_end);
703 if (propagate_err == false) {
704 malloc_write("<jemalloc>: write() failed during heap "
711 prof_dump_buf_end = 0;
717 prof_dump_close(bool propagate_err)
721 assert(prof_dump_fd != -1);
722 ret = prof_dump_flush(propagate_err);
730 prof_dump_write(bool propagate_err, const char *s)
734 cassert(config_prof);
739 /* Flush the buffer if it is full. */
740 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE)
741 if (prof_dump_flush(propagate_err) && propagate_err)
744 if (prof_dump_buf_end + slen <= PROF_DUMP_BUFSIZE) {
745 /* Finish writing. */
748 /* Write as much of s as will fit. */
749 n = PROF_DUMP_BUFSIZE - prof_dump_buf_end;
751 memcpy(&prof_dump_buf[prof_dump_buf_end], &s[i], n);
752 prof_dump_buf_end += n;
759 JEMALLOC_ATTR(format(printf, 2, 3))
761 prof_dump_printf(bool propagate_err, const char *format, ...)
765 char buf[PROF_PRINTF_BUFSIZE];
767 va_start(ap, format);
768 malloc_vsnprintf(buf, sizeof(buf), format, ap);
770 ret = prof_dump_write(propagate_err, buf);
776 prof_dump_ctx_prep(prof_ctx_t *ctx, prof_cnt_t *cnt_all, size_t *leak_nctx,
777 prof_ctx_list_t *ctx_ql)
779 prof_thr_cnt_t *thr_cnt;
782 cassert(config_prof);
784 malloc_mutex_lock(ctx->lock);
787 * Increment nlimbo so that ctx won't go away before dump.
788 * Additionally, link ctx into the dump list so that it is included in
789 * prof_dump()'s second pass.
792 ql_tail_insert(ctx_ql, ctx, dump_link);
794 memcpy(&ctx->cnt_summed, &ctx->cnt_merged, sizeof(prof_cnt_t));
795 ql_foreach(thr_cnt, &ctx->cnts_ql, cnts_link) {
796 volatile unsigned *epoch = &thr_cnt->epoch;
799 unsigned epoch0 = *epoch;
801 /* Make sure epoch is even. */
805 memcpy(&tcnt, &thr_cnt->cnts, sizeof(prof_cnt_t));
807 /* Terminate if epoch didn't change while reading. */
808 if (*epoch == epoch0)
812 ctx->cnt_summed.curobjs += tcnt.curobjs;
813 ctx->cnt_summed.curbytes += tcnt.curbytes;
814 if (opt_prof_accum) {
815 ctx->cnt_summed.accumobjs += tcnt.accumobjs;
816 ctx->cnt_summed.accumbytes += tcnt.accumbytes;
820 if (ctx->cnt_summed.curobjs != 0)
823 /* Add to cnt_all. */
824 cnt_all->curobjs += ctx->cnt_summed.curobjs;
825 cnt_all->curbytes += ctx->cnt_summed.curbytes;
826 if (opt_prof_accum) {
827 cnt_all->accumobjs += ctx->cnt_summed.accumobjs;
828 cnt_all->accumbytes += ctx->cnt_summed.accumbytes;
831 malloc_mutex_unlock(ctx->lock);
835 prof_dump_header(bool propagate_err, const prof_cnt_t *cnt_all)
838 if (opt_lg_prof_sample == 0) {
839 if (prof_dump_printf(propagate_err,
840 "heap profile: %"PRId64": %"PRId64
841 " [%"PRIu64": %"PRIu64"] @ heapprofile\n",
842 cnt_all->curobjs, cnt_all->curbytes,
843 cnt_all->accumobjs, cnt_all->accumbytes))
846 if (prof_dump_printf(propagate_err,
847 "heap profile: %"PRId64": %"PRId64
848 " [%"PRIu64": %"PRIu64"] @ heap_v2/%"PRIu64"\n",
849 cnt_all->curobjs, cnt_all->curbytes,
850 cnt_all->accumobjs, cnt_all->accumbytes,
851 ((uint64_t)1U << opt_lg_prof_sample)))
859 prof_dump_ctx_cleanup_locked(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql)
863 ql_remove(ctx_ql, ctx, dump_link);
867 prof_dump_ctx_cleanup(prof_ctx_t *ctx, prof_ctx_list_t *ctx_ql)
870 malloc_mutex_lock(ctx->lock);
871 prof_dump_ctx_cleanup_locked(ctx, ctx_ql);
872 malloc_mutex_unlock(ctx->lock);
876 prof_dump_ctx(bool propagate_err, prof_ctx_t *ctx, const prof_bt_t *bt,
877 prof_ctx_list_t *ctx_ql)
882 cassert(config_prof);
885 * Current statistics can sum to 0 as a result of unmerged per thread
886 * statistics. Additionally, interval- and growth-triggered dumps can
887 * occur between the time a ctx is created and when its statistics are
888 * filled in. Avoid dumping any ctx that is an artifact of either
889 * implementation detail.
891 malloc_mutex_lock(ctx->lock);
892 if ((opt_prof_accum == false && ctx->cnt_summed.curobjs == 0) ||
893 (opt_prof_accum && ctx->cnt_summed.accumobjs == 0)) {
894 assert(ctx->cnt_summed.curobjs == 0);
895 assert(ctx->cnt_summed.curbytes == 0);
896 assert(ctx->cnt_summed.accumobjs == 0);
897 assert(ctx->cnt_summed.accumbytes == 0);
902 if (prof_dump_printf(propagate_err, "%"PRId64": %"PRId64
903 " [%"PRIu64": %"PRIu64"] @",
904 ctx->cnt_summed.curobjs, ctx->cnt_summed.curbytes,
905 ctx->cnt_summed.accumobjs, ctx->cnt_summed.accumbytes)) {
910 for (i = 0; i < bt->len; i++) {
911 if (prof_dump_printf(propagate_err, " %#"PRIxPTR,
912 (uintptr_t)bt->vec[i])) {
918 if (prof_dump_write(propagate_err, "\n")) {
925 prof_dump_ctx_cleanup_locked(ctx, ctx_ql);
926 malloc_mutex_unlock(ctx->lock);
931 prof_dump_maps(bool propagate_err)
935 char filename[PATH_MAX + 1];
937 cassert(config_prof);
939 malloc_snprintf(filename, sizeof(filename), "/proc/curproc/map");
941 malloc_snprintf(filename, sizeof(filename), "/proc/%d/maps",
944 mfd = open(filename, O_RDONLY);
948 if (prof_dump_write(propagate_err, "\nMAPPED_LIBRARIES:\n") &&
955 prof_dump_buf_end += nread;
956 if (prof_dump_buf_end == PROF_DUMP_BUFSIZE) {
957 /* Make space in prof_dump_buf before read(). */
958 if (prof_dump_flush(propagate_err) &&
964 nread = read(mfd, &prof_dump_buf[prof_dump_buf_end],
965 PROF_DUMP_BUFSIZE - prof_dump_buf_end);
980 prof_leakcheck(const prof_cnt_t *cnt_all, size_t leak_nctx,
981 const char *filename)
984 if (cnt_all->curbytes != 0) {
985 malloc_printf("<jemalloc>: Leak summary: %"PRId64" byte%s, %"
986 PRId64" object%s, %zu context%s\n",
987 cnt_all->curbytes, (cnt_all->curbytes != 1) ? "s" : "",
988 cnt_all->curobjs, (cnt_all->curobjs != 1) ? "s" : "",
989 leak_nctx, (leak_nctx != 1) ? "s" : "");
991 "<jemalloc>: Run pprof on \"%s\" for leak detail\n",
997 prof_dump(bool propagate_err, const char *filename, bool leakcheck)
999 prof_tdata_t *prof_tdata;
1007 prof_ctx_list_t ctx_ql;
1009 cassert(config_prof);
1011 prof_tdata = prof_tdata_get(false);
1012 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
1015 malloc_mutex_lock(&prof_dump_mtx);
1017 /* Merge per thread profile stats, and sum them in cnt_all. */
1018 memset(&cnt_all, 0, sizeof(prof_cnt_t));
1021 prof_enter(prof_tdata);
1022 for (tabind = 0; ckh_iter(&bt2ctx, &tabind, NULL, &ctx.v) == false;)
1023 prof_dump_ctx_prep(ctx.p, &cnt_all, &leak_nctx, &ctx_ql);
1024 prof_leave(prof_tdata);
1026 /* Create dump file. */
1027 if ((prof_dump_fd = prof_dump_open(propagate_err, filename)) == -1)
1028 goto label_open_close_error;
1030 /* Dump profile header. */
1031 if (prof_dump_header(propagate_err, &cnt_all))
1032 goto label_write_error;
1034 /* Dump per ctx profile stats. */
1035 while ((ctx.p = ql_first(&ctx_ql)) != NULL) {
1036 if (prof_dump_ctx(propagate_err, ctx.p, ctx.p->bt, &ctx_ql))
1037 goto label_write_error;
1040 /* Dump /proc/<pid>/maps if possible. */
1041 if (prof_dump_maps(propagate_err))
1042 goto label_write_error;
1044 if (prof_dump_close(propagate_err))
1045 goto label_open_close_error;
1047 malloc_mutex_unlock(&prof_dump_mtx);
1050 prof_leakcheck(&cnt_all, leak_nctx, filename);
1054 prof_dump_close(propagate_err);
1055 label_open_close_error:
1056 while ((ctx.p = ql_first(&ctx_ql)) != NULL)
1057 prof_dump_ctx_cleanup(ctx.p, &ctx_ql);
1058 malloc_mutex_unlock(&prof_dump_mtx);
1062 #define DUMP_FILENAME_BUFSIZE (PATH_MAX + 1)
1063 #define VSEQ_INVALID UINT64_C(0xffffffffffffffff)
1065 prof_dump_filename(char *filename, char v, int64_t vseq)
1068 cassert(config_prof);
1070 if (vseq != VSEQ_INVALID) {
1071 /* "<prefix>.<pid>.<seq>.v<vseq>.heap" */
1072 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
1073 "%s.%d.%"PRIu64".%c%"PRId64".heap",
1074 opt_prof_prefix, (int)getpid(), prof_dump_seq, v, vseq);
1076 /* "<prefix>.<pid>.<seq>.<v>.heap" */
1077 malloc_snprintf(filename, DUMP_FILENAME_BUFSIZE,
1078 "%s.%d.%"PRIu64".%c.heap",
1079 opt_prof_prefix, (int)getpid(), prof_dump_seq, v);
1087 char filename[DUMP_FILENAME_BUFSIZE];
1089 cassert(config_prof);
1091 if (prof_booted == false)
1094 if (opt_prof_final && opt_prof_prefix[0] != '\0') {
1095 malloc_mutex_lock(&prof_dump_seq_mtx);
1096 prof_dump_filename(filename, 'f', VSEQ_INVALID);
1097 malloc_mutex_unlock(&prof_dump_seq_mtx);
1098 prof_dump(false, filename, opt_prof_leak);
1105 prof_tdata_t *prof_tdata;
1106 char filename[PATH_MAX + 1];
1108 cassert(config_prof);
1110 if (prof_booted == false)
1112 prof_tdata = prof_tdata_get(false);
1113 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
1115 if (prof_tdata->enq) {
1116 prof_tdata->enq_idump = true;
1120 if (opt_prof_prefix[0] != '\0') {
1121 malloc_mutex_lock(&prof_dump_seq_mtx);
1122 prof_dump_filename(filename, 'i', prof_dump_iseq);
1124 malloc_mutex_unlock(&prof_dump_seq_mtx);
1125 prof_dump(false, filename, false);
1130 prof_mdump(const char *filename)
1132 char filename_buf[DUMP_FILENAME_BUFSIZE];
1134 cassert(config_prof);
1136 if (opt_prof == false || prof_booted == false)
1139 if (filename == NULL) {
1140 /* No filename specified, so automatically generate one. */
1141 if (opt_prof_prefix[0] == '\0')
1143 malloc_mutex_lock(&prof_dump_seq_mtx);
1144 prof_dump_filename(filename_buf, 'm', prof_dump_mseq);
1146 malloc_mutex_unlock(&prof_dump_seq_mtx);
1147 filename = filename_buf;
1149 return (prof_dump(true, filename, false));
1155 prof_tdata_t *prof_tdata;
1156 char filename[DUMP_FILENAME_BUFSIZE];
1158 cassert(config_prof);
1160 if (prof_booted == false)
1162 prof_tdata = prof_tdata_get(false);
1163 if ((uintptr_t)prof_tdata <= (uintptr_t)PROF_TDATA_STATE_MAX)
1165 if (prof_tdata->enq) {
1166 prof_tdata->enq_gdump = true;
1170 if (opt_prof_prefix[0] != '\0') {
1171 malloc_mutex_lock(&prof_dump_seq_mtx);
1172 prof_dump_filename(filename, 'u', prof_dump_useq);
1174 malloc_mutex_unlock(&prof_dump_seq_mtx);
1175 prof_dump(false, filename, false);
1180 prof_bt_hash(const void *key, size_t r_hash[2])
1182 prof_bt_t *bt = (prof_bt_t *)key;
1184 cassert(config_prof);
1186 hash(bt->vec, bt->len * sizeof(void *), 0x94122f33U, r_hash);
1190 prof_bt_keycomp(const void *k1, const void *k2)
1192 const prof_bt_t *bt1 = (prof_bt_t *)k1;
1193 const prof_bt_t *bt2 = (prof_bt_t *)k2;
1195 cassert(config_prof);
1197 if (bt1->len != bt2->len)
1199 return (memcmp(bt1->vec, bt2->vec, bt1->len * sizeof(void *)) == 0);
1203 prof_tdata_init(void)
1205 prof_tdata_t *prof_tdata;
1207 cassert(config_prof);
1209 /* Initialize an empty cache for this thread. */
1210 prof_tdata = (prof_tdata_t *)imalloc(sizeof(prof_tdata_t));
1211 if (prof_tdata == NULL)
1214 if (ckh_new(&prof_tdata->bt2cnt, PROF_CKH_MINITEMS,
1215 prof_bt_hash, prof_bt_keycomp)) {
1216 idalloc(prof_tdata);
1219 ql_new(&prof_tdata->lru_ql);
1221 prof_tdata->vec = imalloc(sizeof(void *) * PROF_BT_MAX);
1222 if (prof_tdata->vec == NULL) {
1223 ckh_delete(&prof_tdata->bt2cnt);
1224 idalloc(prof_tdata);
1228 prof_tdata->prng_state = 0;
1229 prof_tdata->threshold = 0;
1230 prof_tdata->accum = 0;
1232 prof_tdata->enq = false;
1233 prof_tdata->enq_idump = false;
1234 prof_tdata->enq_gdump = false;
1236 prof_tdata_tsd_set(&prof_tdata);
1238 return (prof_tdata);
1242 prof_tdata_cleanup(void *arg)
1244 prof_thr_cnt_t *cnt;
1245 prof_tdata_t *prof_tdata = *(prof_tdata_t **)arg;
1247 cassert(config_prof);
1249 if (prof_tdata == PROF_TDATA_STATE_REINCARNATED) {
1251 * Another destructor deallocated memory after this destructor
1252 * was called. Reset prof_tdata to PROF_TDATA_STATE_PURGATORY
1253 * in order to receive another callback.
1255 prof_tdata = PROF_TDATA_STATE_PURGATORY;
1256 prof_tdata_tsd_set(&prof_tdata);
1257 } else if (prof_tdata == PROF_TDATA_STATE_PURGATORY) {
1259 * The previous time this destructor was called, we set the key
1260 * to PROF_TDATA_STATE_PURGATORY so that other destructors
1261 * wouldn't cause re-creation of the prof_tdata. This time, do
1262 * nothing, so that the destructor will not be called again.
1264 } else if (prof_tdata != NULL) {
1266 * Delete the hash table. All of its contents can still be
1267 * iterated over via the LRU.
1269 ckh_delete(&prof_tdata->bt2cnt);
1271 * Iteratively merge cnt's into the global stats and delete
1274 while ((cnt = ql_last(&prof_tdata->lru_ql, lru_link)) != NULL) {
1275 ql_remove(&prof_tdata->lru_ql, cnt, lru_link);
1276 prof_ctx_merge(cnt->ctx, cnt);
1279 idalloc(prof_tdata->vec);
1280 idalloc(prof_tdata);
1281 prof_tdata = PROF_TDATA_STATE_PURGATORY;
1282 prof_tdata_tsd_set(&prof_tdata);
1290 cassert(config_prof);
1292 memcpy(opt_prof_prefix, PROF_PREFIX_DEFAULT,
1293 sizeof(PROF_PREFIX_DEFAULT));
1300 cassert(config_prof);
1303 * opt_prof and prof_promote must be in their final state before any
1304 * arenas are initialized, so this function must be executed early.
1307 if (opt_prof_leak && opt_prof == false) {
1309 * Enable opt_prof, but in such a way that profiles are never
1310 * automatically dumped.
1313 opt_prof_gdump = false;
1314 } else if (opt_prof) {
1315 if (opt_lg_prof_interval >= 0) {
1316 prof_interval = (((uint64_t)1U) <<
1317 opt_lg_prof_interval);
1321 prof_promote = (opt_prof && opt_lg_prof_sample > LG_PAGE);
1328 cassert(config_prof);
1333 if (ckh_new(&bt2ctx, PROF_CKH_MINITEMS, prof_bt_hash,
1336 if (malloc_mutex_init(&bt2ctx_mtx))
1338 if (prof_tdata_tsd_boot()) {
1340 "<jemalloc>: Error in pthread_key_create()\n");
1344 if (malloc_mutex_init(&prof_dump_seq_mtx))
1346 if (malloc_mutex_init(&prof_dump_mtx))
1349 if (atexit(prof_fdump) != 0) {
1350 malloc_write("<jemalloc>: Error in atexit()\n");
1355 ctx_locks = (malloc_mutex_t *)base_alloc(PROF_NCTX_LOCKS *
1356 sizeof(malloc_mutex_t));
1357 if (ctx_locks == NULL)
1359 for (i = 0; i < PROF_NCTX_LOCKS; i++) {
1360 if (malloc_mutex_init(&ctx_locks[i]))
1365 #ifdef JEMALLOC_PROF_LIBGCC
1367 * Cause the backtracing machinery to allocate its internal state
1368 * before enabling profiling.
1370 _Unwind_Backtrace(prof_unwind_init_callback, NULL);
1385 malloc_mutex_prefork(&bt2ctx_mtx);
1386 malloc_mutex_prefork(&prof_dump_seq_mtx);
1387 for (i = 0; i < PROF_NCTX_LOCKS; i++)
1388 malloc_mutex_prefork(&ctx_locks[i]);
1393 prof_postfork_parent(void)
1399 for (i = 0; i < PROF_NCTX_LOCKS; i++)
1400 malloc_mutex_postfork_parent(&ctx_locks[i]);
1401 malloc_mutex_postfork_parent(&prof_dump_seq_mtx);
1402 malloc_mutex_postfork_parent(&bt2ctx_mtx);
1407 prof_postfork_child(void)
1413 for (i = 0; i < PROF_NCTX_LOCKS; i++)
1414 malloc_mutex_postfork_child(&ctx_locks[i]);
1415 malloc_mutex_postfork_child(&prof_dump_seq_mtx);
1416 malloc_mutex_postfork_child(&bt2ctx_mtx);
1420 /******************************************************************************/