]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/jemalloc/src/background_thread.c
MFV r361936:
[FreeBSD/FreeBSD.git] / contrib / jemalloc / src / background_thread.c
1 #define JEMALLOC_BACKGROUND_THREAD_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3 #include "jemalloc/internal/jemalloc_internal_includes.h"
4
5 #include "jemalloc/internal/assert.h"
6
7 JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
8
9 /******************************************************************************/
10 /* Data. */
11
12 /* This option should be opt-in only. */
13 #define BACKGROUND_THREAD_DEFAULT false
14 /* Read-only after initialization. */
15 bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
16 size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT + 1;
17
18 /* Used for thread creation, termination and stats. */
19 malloc_mutex_t background_thread_lock;
20 /* Indicates global state.  Atomic because decay reads this w/o locking. */
21 atomic_b_t background_thread_enabled_state;
22 size_t n_background_threads;
23 size_t max_background_threads;
24 /* Thread info per-index. */
25 background_thread_info_t *background_thread_info;
26
27 /******************************************************************************/
28
29 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
30
31 static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
32     void *(*)(void *), void *__restrict);
33
34 static void
35 pthread_create_wrapper_init(void) {
36 #ifdef JEMALLOC_LAZY_LOCK
37         if (!isthreaded) {
38                 isthreaded = true;
39         }
40 #endif
41 }
42
43 int
44 pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
45     void *(*start_routine)(void *), void *__restrict arg) {
46         pthread_create_wrapper_init();
47
48         return pthread_create_fptr(thread, attr, start_routine, arg);
49 }
50 #endif /* JEMALLOC_PTHREAD_CREATE_WRAPPER */
51
52 #ifndef JEMALLOC_BACKGROUND_THREAD
53 #define NOT_REACHED { not_reached(); }
54 bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
55 bool background_threads_enable(tsd_t *tsd) NOT_REACHED
56 bool background_threads_disable(tsd_t *tsd) NOT_REACHED
57 void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
58     arena_decay_t *decay, size_t npages_new) NOT_REACHED
59 void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
60 void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
61 void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
62 void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
63 bool background_thread_stats_read(tsdn_t *tsdn,
64     background_thread_stats_t *stats) NOT_REACHED
65 void background_thread_ctl_init(tsdn_t *tsdn) NOT_REACHED
66 #undef NOT_REACHED
67 #else
68
69 static bool background_thread_enabled_at_fork;
70
71 static void
72 background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
73         background_thread_wakeup_time_set(tsdn, info, 0);
74         info->npages_to_purge_new = 0;
75         if (config_stats) {
76                 info->tot_n_runs = 0;
77                 nstime_init(&info->tot_sleep_time, 0);
78         }
79 }
80
81 static inline bool
82 set_current_thread_affinity(int cpu) {
83 #if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
84         cpu_set_t cpuset;
85         CPU_ZERO(&cpuset);
86         CPU_SET(cpu, &cpuset);
87         int ret = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
88
89         return (ret != 0);
90 #else
91         return false;
92 #endif
93 }
94
95 /* Threshold for determining when to wake up the background thread. */
96 #define BACKGROUND_THREAD_NPAGES_THRESHOLD UINT64_C(1024)
97 #define BILLION UINT64_C(1000000000)
98 /* Minimal sleep interval 100 ms. */
99 #define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
100
101 static inline size_t
102 decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
103         size_t i;
104         uint64_t sum = 0;
105         for (i = 0; i < interval; i++) {
106                 sum += decay->backlog[i] * h_steps[i];
107         }
108         for (; i < SMOOTHSTEP_NSTEPS; i++) {
109                 sum += decay->backlog[i] * (h_steps[i] - h_steps[i - interval]);
110         }
111
112         return (size_t)(sum >> SMOOTHSTEP_BFP);
113 }
114
115 static uint64_t
116 arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
117     extents_t *extents) {
118         if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
119                 /* Use minimal interval if decay is contended. */
120                 return BACKGROUND_THREAD_MIN_INTERVAL_NS;
121         }
122
123         uint64_t interval;
124         ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
125         if (decay_time <= 0) {
126                 /* Purging is eagerly done or disabled currently. */
127                 interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
128                 goto label_done;
129         }
130
131         uint64_t decay_interval_ns = nstime_ns(&decay->interval);
132         assert(decay_interval_ns > 0);
133         size_t npages = extents_npages_get(extents);
134         if (npages == 0) {
135                 unsigned i;
136                 for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
137                         if (decay->backlog[i] > 0) {
138                                 break;
139                         }
140                 }
141                 if (i == SMOOTHSTEP_NSTEPS) {
142                         /* No dirty pages recorded.  Sleep indefinitely. */
143                         interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
144                         goto label_done;
145                 }
146         }
147         if (npages <= BACKGROUND_THREAD_NPAGES_THRESHOLD) {
148                 /* Use max interval. */
149                 interval = decay_interval_ns * SMOOTHSTEP_NSTEPS;
150                 goto label_done;
151         }
152
153         size_t lb = BACKGROUND_THREAD_MIN_INTERVAL_NS / decay_interval_ns;
154         size_t ub = SMOOTHSTEP_NSTEPS;
155         /* Minimal 2 intervals to ensure reaching next epoch deadline. */
156         lb = (lb < 2) ? 2 : lb;
157         if ((decay_interval_ns * ub <= BACKGROUND_THREAD_MIN_INTERVAL_NS) ||
158             (lb + 2 > ub)) {
159                 interval = BACKGROUND_THREAD_MIN_INTERVAL_NS;
160                 goto label_done;
161         }
162
163         assert(lb + 2 <= ub);
164         size_t npurge_lb, npurge_ub;
165         npurge_lb = decay_npurge_after_interval(decay, lb);
166         if (npurge_lb > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
167                 interval = decay_interval_ns * lb;
168                 goto label_done;
169         }
170         npurge_ub = decay_npurge_after_interval(decay, ub);
171         if (npurge_ub < BACKGROUND_THREAD_NPAGES_THRESHOLD) {
172                 interval = decay_interval_ns * ub;
173                 goto label_done;
174         }
175
176         unsigned n_search = 0;
177         size_t target, npurge;
178         while ((npurge_lb + BACKGROUND_THREAD_NPAGES_THRESHOLD < npurge_ub)
179             && (lb + 2 < ub)) {
180                 target = (lb + ub) / 2;
181                 npurge = decay_npurge_after_interval(decay, target);
182                 if (npurge > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
183                         ub = target;
184                         npurge_ub = npurge;
185                 } else {
186                         lb = target;
187                         npurge_lb = npurge;
188                 }
189                 assert(n_search++ < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
190         }
191         interval = decay_interval_ns * (ub + lb) / 2;
192 label_done:
193         interval = (interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) ?
194             BACKGROUND_THREAD_MIN_INTERVAL_NS : interval;
195         malloc_mutex_unlock(tsdn, &decay->mtx);
196
197         return interval;
198 }
199
200 /* Compute purge interval for background threads. */
201 static uint64_t
202 arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
203         uint64_t i1, i2;
204         i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
205             &arena->extents_dirty);
206         if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
207                 return i1;
208         }
209         i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
210             &arena->extents_muzzy);
211
212         return i1 < i2 ? i1 : i2;
213 }
214
215 static void
216 background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
217     uint64_t interval) {
218         if (config_stats) {
219                 info->tot_n_runs++;
220         }
221         info->npages_to_purge_new = 0;
222
223         struct timeval tv;
224         /* Specific clock required by timedwait. */
225         gettimeofday(&tv, NULL);
226         nstime_t before_sleep;
227         nstime_init2(&before_sleep, tv.tv_sec, tv.tv_usec * 1000);
228
229         int ret;
230         if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
231                 assert(background_thread_indefinite_sleep(info));
232                 ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
233                 assert(ret == 0);
234         } else {
235                 assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
236                     interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
237                 /* We need malloc clock (can be different from tv). */
238                 nstime_t next_wakeup;
239                 nstime_init(&next_wakeup, 0);
240                 nstime_update(&next_wakeup);
241                 nstime_iadd(&next_wakeup, interval);
242                 assert(nstime_ns(&next_wakeup) <
243                     BACKGROUND_THREAD_INDEFINITE_SLEEP);
244                 background_thread_wakeup_time_set(tsdn, info,
245                     nstime_ns(&next_wakeup));
246
247                 nstime_t ts_wakeup;
248                 nstime_copy(&ts_wakeup, &before_sleep);
249                 nstime_iadd(&ts_wakeup, interval);
250                 struct timespec ts;
251                 ts.tv_sec = (size_t)nstime_sec(&ts_wakeup);
252                 ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
253
254                 assert(!background_thread_indefinite_sleep(info));
255                 ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, &ts);
256                 assert(ret == ETIMEDOUT || ret == 0);
257                 background_thread_wakeup_time_set(tsdn, info,
258                     BACKGROUND_THREAD_INDEFINITE_SLEEP);
259         }
260         if (config_stats) {
261                 gettimeofday(&tv, NULL);
262                 nstime_t after_sleep;
263                 nstime_init2(&after_sleep, tv.tv_sec, tv.tv_usec * 1000);
264                 if (nstime_compare(&after_sleep, &before_sleep) > 0) {
265                         nstime_subtract(&after_sleep, &before_sleep);
266                         nstime_add(&info->tot_sleep_time, &after_sleep);
267                 }
268         }
269 }
270
271 static bool
272 background_thread_pause_check(tsdn_t *tsdn, background_thread_info_t *info) {
273         if (unlikely(info->state == background_thread_paused)) {
274                 malloc_mutex_unlock(tsdn, &info->mtx);
275                 /* Wait on global lock to update status. */
276                 malloc_mutex_lock(tsdn, &background_thread_lock);
277                 malloc_mutex_unlock(tsdn, &background_thread_lock);
278                 malloc_mutex_lock(tsdn, &info->mtx);
279                 return true;
280         }
281
282         return false;
283 }
284
285 static inline void
286 background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigned ind) {
287         uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
288         unsigned narenas = narenas_total_get();
289
290         for (unsigned i = ind; i < narenas; i += max_background_threads) {
291                 arena_t *arena = arena_get(tsdn, i, false);
292                 if (!arena) {
293                         continue;
294                 }
295                 arena_decay(tsdn, arena, true, false);
296                 if (min_interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
297                         /* Min interval will be used. */
298                         continue;
299                 }
300                 uint64_t interval = arena_decay_compute_purge_interval(tsdn,
301                     arena);
302                 assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS);
303                 if (min_interval > interval) {
304                         min_interval = interval;
305                 }
306         }
307         background_thread_sleep(tsdn, info, min_interval);
308 }
309
310 static bool
311 background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
312         if (info == &background_thread_info[0]) {
313                 malloc_mutex_assert_owner(tsd_tsdn(tsd),
314                     &background_thread_lock);
315         } else {
316                 malloc_mutex_assert_not_owner(tsd_tsdn(tsd),
317                     &background_thread_lock);
318         }
319
320         pre_reentrancy(tsd, NULL);
321         malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
322         bool has_thread;
323         assert(info->state != background_thread_paused);
324         if (info->state == background_thread_started) {
325                 has_thread = true;
326                 info->state = background_thread_stopped;
327                 pthread_cond_signal(&info->cond);
328         } else {
329                 has_thread = false;
330         }
331         malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
332
333         if (!has_thread) {
334                 post_reentrancy(tsd);
335                 return false;
336         }
337         void *ret;
338         if (pthread_join(info->thread, &ret)) {
339                 post_reentrancy(tsd);
340                 return true;
341         }
342         assert(ret == NULL);
343         n_background_threads--;
344         post_reentrancy(tsd);
345
346         return false;
347 }
348
349 static void *background_thread_entry(void *ind_arg);
350
351 static int
352 background_thread_create_signals_masked(pthread_t *thread,
353     const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg) {
354         /*
355          * Mask signals during thread creation so that the thread inherits
356          * an empty signal set.
357          */
358         sigset_t set;
359         sigfillset(&set);
360         sigset_t oldset;
361         int mask_err = pthread_sigmask(SIG_SETMASK, &set, &oldset);
362         if (mask_err != 0) {
363                 return mask_err;
364         }
365         int create_err = pthread_create_wrapper(thread, attr, start_routine,
366             arg);
367         /*
368          * Restore the signal mask.  Failure to restore the signal mask here
369          * changes program behavior.
370          */
371         int restore_err = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
372         if (restore_err != 0) {
373                 malloc_printf("<jemalloc>: background thread creation "
374                     "failed (%d), and signal mask restoration failed "
375                     "(%d)\n", create_err, restore_err);
376                 if (opt_abort) {
377                         abort();
378                 }
379         }
380         return create_err;
381 }
382
383 static bool
384 check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
385     bool *created_threads) {
386         bool ret = false;
387         if (likely(*n_created == n_background_threads)) {
388                 return ret;
389         }
390
391         tsdn_t *tsdn = tsd_tsdn(tsd);
392         malloc_mutex_unlock(tsdn, &background_thread_info[0].mtx);
393         for (unsigned i = 1; i < max_background_threads; i++) {
394                 if (created_threads[i]) {
395                         continue;
396                 }
397                 background_thread_info_t *info = &background_thread_info[i];
398                 malloc_mutex_lock(tsdn, &info->mtx);
399                 /*
400                  * In case of the background_thread_paused state because of
401                  * arena reset, delay the creation.
402                  */
403                 bool create = (info->state == background_thread_started);
404                 malloc_mutex_unlock(tsdn, &info->mtx);
405                 if (!create) {
406                         continue;
407                 }
408
409                 pre_reentrancy(tsd, NULL);
410                 int err = background_thread_create_signals_masked(&info->thread,
411                     NULL, background_thread_entry, (void *)(uintptr_t)i);
412                 post_reentrancy(tsd);
413
414                 if (err == 0) {
415                         (*n_created)++;
416                         created_threads[i] = true;
417                 } else {
418                         malloc_printf("<jemalloc>: background thread "
419                             "creation failed (%d)\n", err);
420                         if (opt_abort) {
421                                 abort();
422                         }
423                 }
424                 /* Return to restart the loop since we unlocked. */
425                 ret = true;
426                 break;
427         }
428         malloc_mutex_lock(tsdn, &background_thread_info[0].mtx);
429
430         return ret;
431 }
432
433 static void
434 background_thread0_work(tsd_t *tsd) {
435         /* Thread0 is also responsible for launching / terminating threads. */
436         VARIABLE_ARRAY(bool, created_threads, max_background_threads);
437         unsigned i;
438         for (i = 1; i < max_background_threads; i++) {
439                 created_threads[i] = false;
440         }
441         /* Start working, and create more threads when asked. */
442         unsigned n_created = 1;
443         while (background_thread_info[0].state != background_thread_stopped) {
444                 if (background_thread_pause_check(tsd_tsdn(tsd),
445                     &background_thread_info[0])) {
446                         continue;
447                 }
448                 if (check_background_thread_creation(tsd, &n_created,
449                     (bool *)&created_threads)) {
450                         continue;
451                 }
452                 background_work_sleep_once(tsd_tsdn(tsd),
453                     &background_thread_info[0], 0);
454         }
455
456         /*
457          * Shut down other threads at exit.  Note that the ctl thread is holding
458          * the global background_thread mutex (and is waiting) for us.
459          */
460         assert(!background_thread_enabled());
461         for (i = 1; i < max_background_threads; i++) {
462                 background_thread_info_t *info = &background_thread_info[i];
463                 assert(info->state != background_thread_paused);
464                 if (created_threads[i]) {
465                         background_threads_disable_single(tsd, info);
466                 } else {
467                         malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
468                         if (info->state != background_thread_stopped) {
469                                 /* The thread was not created. */
470                                 assert(info->state ==
471                                     background_thread_started);
472                                 n_background_threads--;
473                                 info->state = background_thread_stopped;
474                         }
475                         malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
476                 }
477         }
478         background_thread_info[0].state = background_thread_stopped;
479         assert(n_background_threads == 1);
480 }
481
482 static void
483 background_work(tsd_t *tsd, unsigned ind) {
484         background_thread_info_t *info = &background_thread_info[ind];
485
486         malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
487         background_thread_wakeup_time_set(tsd_tsdn(tsd), info,
488             BACKGROUND_THREAD_INDEFINITE_SLEEP);
489         if (ind == 0) {
490                 background_thread0_work(tsd);
491         } else {
492                 while (info->state != background_thread_stopped) {
493                         if (background_thread_pause_check(tsd_tsdn(tsd),
494                             info)) {
495                                 continue;
496                         }
497                         background_work_sleep_once(tsd_tsdn(tsd), info, ind);
498                 }
499         }
500         assert(info->state == background_thread_stopped);
501         background_thread_wakeup_time_set(tsd_tsdn(tsd), info, 0);
502         malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
503 }
504
505 static void *
506 background_thread_entry(void *ind_arg) {
507         unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
508         assert(thread_ind < max_background_threads);
509 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
510         pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
511 #elif defined(__FreeBSD__)
512         pthread_set_name_np(pthread_self(), "jemalloc_bg_thd");
513 #endif
514         if (opt_percpu_arena != percpu_arena_disabled) {
515                 set_current_thread_affinity((int)thread_ind);
516         }
517         /*
518          * Start periodic background work.  We use internal tsd which avoids
519          * side effects, for example triggering new arena creation (which in
520          * turn triggers another background thread creation).
521          */
522         background_work(tsd_internal_fetch(), thread_ind);
523         assert(pthread_equal(pthread_self(),
524             background_thread_info[thread_ind].thread));
525
526         return NULL;
527 }
528
529 static void
530 background_thread_init(tsd_t *tsd, background_thread_info_t *info) {
531         malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
532         info->state = background_thread_started;
533         background_thread_info_init(tsd_tsdn(tsd), info);
534         n_background_threads++;
535 }
536
537 static bool
538 background_thread_create_locked(tsd_t *tsd, unsigned arena_ind) {
539         assert(have_background_thread);
540         malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
541
542         /* We create at most NCPUs threads. */
543         size_t thread_ind = arena_ind % max_background_threads;
544         background_thread_info_t *info = &background_thread_info[thread_ind];
545
546         bool need_new_thread;
547         malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
548         need_new_thread = background_thread_enabled() &&
549             (info->state == background_thread_stopped);
550         if (need_new_thread) {
551                 background_thread_init(tsd, info);
552         }
553         malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
554         if (!need_new_thread) {
555                 return false;
556         }
557         if (arena_ind != 0) {
558                 /* Threads are created asynchronously by Thread 0. */
559                 background_thread_info_t *t0 = &background_thread_info[0];
560                 malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
561                 assert(t0->state == background_thread_started);
562                 pthread_cond_signal(&t0->cond);
563                 malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
564
565                 return false;
566         }
567
568         pre_reentrancy(tsd, NULL);
569         /*
570          * To avoid complications (besides reentrancy), create internal
571          * background threads with the underlying pthread_create.
572          */
573         int err = background_thread_create_signals_masked(&info->thread, NULL,
574             background_thread_entry, (void *)thread_ind);
575         post_reentrancy(tsd);
576
577         if (err != 0) {
578                 malloc_printf("<jemalloc>: arena 0 background thread creation "
579                     "failed (%d)\n", err);
580                 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
581                 info->state = background_thread_stopped;
582                 n_background_threads--;
583                 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
584
585                 return true;
586         }
587
588         return false;
589 }
590
591 /* Create a new background thread if needed. */
592 bool
593 background_thread_create(tsd_t *tsd, unsigned arena_ind) {
594         assert(have_background_thread);
595
596         bool ret;
597         malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
598         ret = background_thread_create_locked(tsd, arena_ind);
599         malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
600
601         return ret;
602 }
603
604 bool
605 background_threads_enable(tsd_t *tsd) {
606         assert(n_background_threads == 0);
607         assert(background_thread_enabled());
608         malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
609
610         VARIABLE_ARRAY(bool, marked, max_background_threads);
611         unsigned i, nmarked;
612         for (i = 0; i < max_background_threads; i++) {
613                 marked[i] = false;
614         }
615         nmarked = 0;
616         /* Thread 0 is required and created at the end. */
617         marked[0] = true;
618         /* Mark the threads we need to create for thread 0. */
619         unsigned n = narenas_total_get();
620         for (i = 1; i < n; i++) {
621                 if (marked[i % max_background_threads] ||
622                     arena_get(tsd_tsdn(tsd), i, false) == NULL) {
623                         continue;
624                 }
625                 background_thread_info_t *info = &background_thread_info[
626                     i % max_background_threads];
627                 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
628                 assert(info->state == background_thread_stopped);
629                 background_thread_init(tsd, info);
630                 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
631                 marked[i % max_background_threads] = true;
632                 if (++nmarked == max_background_threads) {
633                         break;
634                 }
635         }
636
637         return background_thread_create_locked(tsd, 0);
638 }
639
640 bool
641 background_threads_disable(tsd_t *tsd) {
642         assert(!background_thread_enabled());
643         malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
644
645         /* Thread 0 will be responsible for terminating other threads. */
646         if (background_threads_disable_single(tsd,
647             &background_thread_info[0])) {
648                 return true;
649         }
650         assert(n_background_threads == 0);
651
652         return false;
653 }
654
655 /* Check if we need to signal the background thread early. */
656 void
657 background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
658     arena_decay_t *decay, size_t npages_new) {
659         background_thread_info_t *info = arena_background_thread_info_get(
660             arena);
661         if (malloc_mutex_trylock(tsdn, &info->mtx)) {
662                 /*
663                  * Background thread may hold the mutex for a long period of
664                  * time.  We'd like to avoid the variance on application
665                  * threads.  So keep this non-blocking, and leave the work to a
666                  * future epoch.
667                  */
668                 return;
669         }
670
671         if (info->state != background_thread_started) {
672                 goto label_done;
673         }
674         if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
675                 goto label_done;
676         }
677
678         ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
679         if (decay_time <= 0) {
680                 /* Purging is eagerly done or disabled currently. */
681                 goto label_done_unlock2;
682         }
683         uint64_t decay_interval_ns = nstime_ns(&decay->interval);
684         assert(decay_interval_ns > 0);
685
686         nstime_t diff;
687         nstime_init(&diff, background_thread_wakeup_time_get(info));
688         if (nstime_compare(&diff, &decay->epoch) <= 0) {
689                 goto label_done_unlock2;
690         }
691         nstime_subtract(&diff, &decay->epoch);
692         if (nstime_ns(&diff) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
693                 goto label_done_unlock2;
694         }
695
696         if (npages_new > 0) {
697                 size_t n_epoch = (size_t)(nstime_ns(&diff) / decay_interval_ns);
698                 /*
699                  * Compute how many new pages we would need to purge by the next
700                  * wakeup, which is used to determine if we should signal the
701                  * background thread.
702                  */
703                 uint64_t npurge_new;
704                 if (n_epoch >= SMOOTHSTEP_NSTEPS) {
705                         npurge_new = npages_new;
706                 } else {
707                         uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
708                         assert(h_steps_max >=
709                             h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
710                         npurge_new = npages_new * (h_steps_max -
711                             h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
712                         npurge_new >>= SMOOTHSTEP_BFP;
713                 }
714                 info->npages_to_purge_new += npurge_new;
715         }
716
717         bool should_signal;
718         if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
719                 should_signal = true;
720         } else if (unlikely(background_thread_indefinite_sleep(info)) &&
721             (extents_npages_get(&arena->extents_dirty) > 0 ||
722             extents_npages_get(&arena->extents_muzzy) > 0 ||
723             info->npages_to_purge_new > 0)) {
724                 should_signal = true;
725         } else {
726                 should_signal = false;
727         }
728
729         if (should_signal) {
730                 info->npages_to_purge_new = 0;
731                 pthread_cond_signal(&info->cond);
732         }
733 label_done_unlock2:
734         malloc_mutex_unlock(tsdn, &decay->mtx);
735 label_done:
736         malloc_mutex_unlock(tsdn, &info->mtx);
737 }
738
739 void
740 background_thread_prefork0(tsdn_t *tsdn) {
741         malloc_mutex_prefork(tsdn, &background_thread_lock);
742         background_thread_enabled_at_fork = background_thread_enabled();
743 }
744
745 void
746 background_thread_prefork1(tsdn_t *tsdn) {
747         for (unsigned i = 0; i < max_background_threads; i++) {
748                 malloc_mutex_prefork(tsdn, &background_thread_info[i].mtx);
749         }
750 }
751
752 void
753 background_thread_postfork_parent(tsdn_t *tsdn) {
754         for (unsigned i = 0; i < max_background_threads; i++) {
755                 malloc_mutex_postfork_parent(tsdn,
756                     &background_thread_info[i].mtx);
757         }
758         malloc_mutex_postfork_parent(tsdn, &background_thread_lock);
759 }
760
761 void
762 background_thread_postfork_child(tsdn_t *tsdn) {
763         for (unsigned i = 0; i < max_background_threads; i++) {
764                 malloc_mutex_postfork_child(tsdn,
765                     &background_thread_info[i].mtx);
766         }
767         malloc_mutex_postfork_child(tsdn, &background_thread_lock);
768         if (!background_thread_enabled_at_fork) {
769                 return;
770         }
771
772         /* Clear background_thread state (reset to disabled for child). */
773         malloc_mutex_lock(tsdn, &background_thread_lock);
774         n_background_threads = 0;
775         background_thread_enabled_set(tsdn, false);
776         for (unsigned i = 0; i < max_background_threads; i++) {
777                 background_thread_info_t *info = &background_thread_info[i];
778                 malloc_mutex_lock(tsdn, &info->mtx);
779                 info->state = background_thread_stopped;
780                 int ret = pthread_cond_init(&info->cond, NULL);
781                 assert(ret == 0);
782                 background_thread_info_init(tsdn, info);
783                 malloc_mutex_unlock(tsdn, &info->mtx);
784         }
785         malloc_mutex_unlock(tsdn, &background_thread_lock);
786 }
787
788 bool
789 background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
790         assert(config_stats);
791         malloc_mutex_lock(tsdn, &background_thread_lock);
792         if (!background_thread_enabled()) {
793                 malloc_mutex_unlock(tsdn, &background_thread_lock);
794                 return true;
795         }
796
797         stats->num_threads = n_background_threads;
798         uint64_t num_runs = 0;
799         nstime_init(&stats->run_interval, 0);
800         for (unsigned i = 0; i < max_background_threads; i++) {
801                 background_thread_info_t *info = &background_thread_info[i];
802                 if (malloc_mutex_trylock(tsdn, &info->mtx)) {
803                         /*
804                          * Each background thread run may take a long time;
805                          * avoid waiting on the stats if the thread is active.
806                          */
807                         continue;
808                 }
809                 if (info->state != background_thread_stopped) {
810                         num_runs += info->tot_n_runs;
811                         nstime_add(&stats->run_interval, &info->tot_sleep_time);
812                 }
813                 malloc_mutex_unlock(tsdn, &info->mtx);
814         }
815         stats->num_runs = num_runs;
816         if (num_runs > 0) {
817                 nstime_idivide(&stats->run_interval, num_runs);
818         }
819         malloc_mutex_unlock(tsdn, &background_thread_lock);
820
821         return false;
822 }
823
824 #undef BACKGROUND_THREAD_NPAGES_THRESHOLD
825 #undef BILLION
826 #undef BACKGROUND_THREAD_MIN_INTERVAL_NS
827
828 #ifdef JEMALLOC_HAVE_DLSYM
829 #include <dlfcn.h>
830 #endif
831
832 static bool
833 pthread_create_fptr_init(void) {
834         if (pthread_create_fptr != NULL) {
835                 return false;
836         }
837         /*
838          * Try the next symbol first, because 1) when use lazy_lock we have a
839          * wrapper for pthread_create; and 2) application may define its own
840          * wrapper as well (and can call malloc within the wrapper).
841          */
842 #ifdef JEMALLOC_HAVE_DLSYM
843         pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
844 #else
845         pthread_create_fptr = NULL;
846 #endif
847         if (pthread_create_fptr == NULL) {
848                 if (config_lazy_lock) {
849                         malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
850                             "\"pthread_create\")\n");
851                         abort();
852                 } else {
853                         /* Fall back to the default symbol. */
854                         pthread_create_fptr = pthread_create;
855                 }
856         }
857
858         return false;
859 }
860
861 /*
862  * When lazy lock is enabled, we need to make sure setting isthreaded before
863  * taking any background_thread locks.  This is called early in ctl (instead of
864  * wait for the pthread_create calls to trigger) because the mutex is required
865  * before creating background threads.
866  */
867 void
868 background_thread_ctl_init(tsdn_t *tsdn) {
869         malloc_mutex_assert_not_owner(tsdn, &background_thread_lock);
870 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
871         pthread_create_fptr_init();
872         pthread_create_wrapper_init();
873 #endif
874 }
875
876 #endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
877
878 bool
879 background_thread_boot0(void) {
880         if (!have_background_thread && opt_background_thread) {
881                 malloc_printf("<jemalloc>: option background_thread currently "
882                     "supports pthread only\n");
883                 return true;
884         }
885 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
886         if ((config_lazy_lock || opt_background_thread) &&
887             pthread_create_fptr_init()) {
888                 return true;
889         }
890 #endif
891         return false;
892 }
893
894 bool
895 background_thread_boot1(tsdn_t *tsdn) {
896 #ifdef JEMALLOC_BACKGROUND_THREAD
897         assert(have_background_thread);
898         assert(narenas_total_get() > 0);
899
900         if (opt_max_background_threads > MAX_BACKGROUND_THREAD_LIMIT) {
901                 opt_max_background_threads = DEFAULT_NUM_BACKGROUND_THREAD;
902         }
903         max_background_threads = opt_max_background_threads;
904
905         background_thread_enabled_set(tsdn, opt_background_thread);
906         if (malloc_mutex_init(&background_thread_lock,
907             "background_thread_global",
908             WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
909             malloc_mutex_rank_exclusive)) {
910                 return true;
911         }
912
913         background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
914             b0get(), opt_max_background_threads *
915             sizeof(background_thread_info_t), CACHELINE);
916         if (background_thread_info == NULL) {
917                 return true;
918         }
919
920         for (unsigned i = 0; i < max_background_threads; i++) {
921                 background_thread_info_t *info = &background_thread_info[i];
922                 /* Thread mutex is rank_inclusive because of thread0. */
923                 if (malloc_mutex_init(&info->mtx, "background_thread",
924                     WITNESS_RANK_BACKGROUND_THREAD,
925                     malloc_mutex_address_ordered)) {
926                         return true;
927                 }
928                 if (pthread_cond_init(&info->cond, NULL)) {
929                         return true;
930                 }
931                 malloc_mutex_lock(tsdn, &info->mtx);
932                 info->state = background_thread_stopped;
933                 background_thread_info_init(tsdn, info);
934                 malloc_mutex_unlock(tsdn, &info->mtx);
935         }
936 #endif
937
938         return false;
939 }