]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/jemalloc/src/background_thread.c
Update jemalloc to version 5.1.0.
[FreeBSD/FreeBSD.git] / contrib / jemalloc / src / background_thread.c
1 #define JEMALLOC_BACKGROUND_THREAD_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3 #include "jemalloc/internal/jemalloc_internal_includes.h"
4
5 #include "jemalloc/internal/assert.h"
6
7 /******************************************************************************/
8 /* Data. */
9
10 /* This option should be opt-in only. */
11 #define BACKGROUND_THREAD_DEFAULT false
12 /* Read-only after initialization. */
13 bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
14 size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT;
15
16 /* Used for thread creation, termination and stats. */
17 malloc_mutex_t background_thread_lock;
18 /* Indicates global state.  Atomic because decay reads this w/o locking. */
19 atomic_b_t background_thread_enabled_state;
20 size_t n_background_threads;
21 size_t max_background_threads;
22 /* Thread info per-index. */
23 background_thread_info_t *background_thread_info;
24
25 /* False if no necessary runtime support. */
26 bool can_enable_background_thread;
27
28 /******************************************************************************/
29
30 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
31 #include <dlfcn.h>
32
33 static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
34     void *(*)(void *), void *__restrict);
35
36 static void
37 pthread_create_wrapper_init(void) {
38 #ifdef JEMALLOC_LAZY_LOCK
39         if (!isthreaded) {
40                 isthreaded = true;
41         }
42 #endif
43 }
44
45 int
46 pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
47     void *(*start_routine)(void *), void *__restrict arg) {
48         pthread_create_wrapper_init();
49
50         return pthread_create_fptr(thread, attr, start_routine, arg);
51 }
52 #endif /* JEMALLOC_PTHREAD_CREATE_WRAPPER */
53
54 #ifndef JEMALLOC_BACKGROUND_THREAD
55 #define NOT_REACHED { not_reached(); }
56 bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
57 bool background_threads_enable(tsd_t *tsd) NOT_REACHED
58 bool background_threads_disable(tsd_t *tsd) NOT_REACHED
59 void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
60     arena_decay_t *decay, size_t npages_new) NOT_REACHED
61 void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
62 void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
63 void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
64 void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
65 bool background_thread_stats_read(tsdn_t *tsdn,
66     background_thread_stats_t *stats) NOT_REACHED
67 void background_thread_ctl_init(tsdn_t *tsdn) NOT_REACHED
68 #undef NOT_REACHED
69 #else
70
71 static bool background_thread_enabled_at_fork;
72
73 static void
74 background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
75         background_thread_wakeup_time_set(tsdn, info, 0);
76         info->npages_to_purge_new = 0;
77         if (config_stats) {
78                 info->tot_n_runs = 0;
79                 nstime_init(&info->tot_sleep_time, 0);
80         }
81 }
82
83 static inline bool
84 set_current_thread_affinity(UNUSED int cpu) {
85 #if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
86         cpu_set_t cpuset;
87         CPU_ZERO(&cpuset);
88         CPU_SET(cpu, &cpuset);
89         int ret = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
90
91         return (ret != 0);
92 #else
93         return false;
94 #endif
95 }
96
97 /* Threshold for determining when to wake up the background thread. */
98 #define BACKGROUND_THREAD_NPAGES_THRESHOLD UINT64_C(1024)
99 #define BILLION UINT64_C(1000000000)
100 /* Minimal sleep interval 100 ms. */
101 #define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
102
103 static inline size_t
104 decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
105         size_t i;
106         uint64_t sum = 0;
107         for (i = 0; i < interval; i++) {
108                 sum += decay->backlog[i] * h_steps[i];
109         }
110         for (; i < SMOOTHSTEP_NSTEPS; i++) {
111                 sum += decay->backlog[i] * (h_steps[i] - h_steps[i - interval]);
112         }
113
114         return (size_t)(sum >> SMOOTHSTEP_BFP);
115 }
116
117 static uint64_t
118 arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
119     extents_t *extents) {
120         if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
121                 /* Use minimal interval if decay is contended. */
122                 return BACKGROUND_THREAD_MIN_INTERVAL_NS;
123         }
124
125         uint64_t interval;
126         ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
127         if (decay_time <= 0) {
128                 /* Purging is eagerly done or disabled currently. */
129                 interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
130                 goto label_done;
131         }
132
133         uint64_t decay_interval_ns = nstime_ns(&decay->interval);
134         assert(decay_interval_ns > 0);
135         size_t npages = extents_npages_get(extents);
136         if (npages == 0) {
137                 unsigned i;
138                 for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
139                         if (decay->backlog[i] > 0) {
140                                 break;
141                         }
142                 }
143                 if (i == SMOOTHSTEP_NSTEPS) {
144                         /* No dirty pages recorded.  Sleep indefinitely. */
145                         interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
146                         goto label_done;
147                 }
148         }
149         if (npages <= BACKGROUND_THREAD_NPAGES_THRESHOLD) {
150                 /* Use max interval. */
151                 interval = decay_interval_ns * SMOOTHSTEP_NSTEPS;
152                 goto label_done;
153         }
154
155         size_t lb = BACKGROUND_THREAD_MIN_INTERVAL_NS / decay_interval_ns;
156         size_t ub = SMOOTHSTEP_NSTEPS;
157         /* Minimal 2 intervals to ensure reaching next epoch deadline. */
158         lb = (lb < 2) ? 2 : lb;
159         if ((decay_interval_ns * ub <= BACKGROUND_THREAD_MIN_INTERVAL_NS) ||
160             (lb + 2 > ub)) {
161                 interval = BACKGROUND_THREAD_MIN_INTERVAL_NS;
162                 goto label_done;
163         }
164
165         assert(lb + 2 <= ub);
166         size_t npurge_lb, npurge_ub;
167         npurge_lb = decay_npurge_after_interval(decay, lb);
168         if (npurge_lb > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
169                 interval = decay_interval_ns * lb;
170                 goto label_done;
171         }
172         npurge_ub = decay_npurge_after_interval(decay, ub);
173         if (npurge_ub < BACKGROUND_THREAD_NPAGES_THRESHOLD) {
174                 interval = decay_interval_ns * ub;
175                 goto label_done;
176         }
177
178         unsigned n_search = 0;
179         size_t target, npurge;
180         while ((npurge_lb + BACKGROUND_THREAD_NPAGES_THRESHOLD < npurge_ub)
181             && (lb + 2 < ub)) {
182                 target = (lb + ub) / 2;
183                 npurge = decay_npurge_after_interval(decay, target);
184                 if (npurge > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
185                         ub = target;
186                         npurge_ub = npurge;
187                 } else {
188                         lb = target;
189                         npurge_lb = npurge;
190                 }
191                 assert(n_search++ < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
192         }
193         interval = decay_interval_ns * (ub + lb) / 2;
194 label_done:
195         interval = (interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) ?
196             BACKGROUND_THREAD_MIN_INTERVAL_NS : interval;
197         malloc_mutex_unlock(tsdn, &decay->mtx);
198
199         return interval;
200 }
201
202 /* Compute purge interval for background threads. */
203 static uint64_t
204 arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
205         uint64_t i1, i2;
206         i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
207             &arena->extents_dirty);
208         if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
209                 return i1;
210         }
211         i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
212             &arena->extents_muzzy);
213
214         return i1 < i2 ? i1 : i2;
215 }
216
217 static void
218 background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
219     uint64_t interval) {
220         if (config_stats) {
221                 info->tot_n_runs++;
222         }
223         info->npages_to_purge_new = 0;
224
225         struct timeval tv;
226         /* Specific clock required by timedwait. */
227         gettimeofday(&tv, NULL);
228         nstime_t before_sleep;
229         nstime_init2(&before_sleep, tv.tv_sec, tv.tv_usec * 1000);
230
231         int ret;
232         if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
233                 assert(background_thread_indefinite_sleep(info));
234                 ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
235                 assert(ret == 0);
236         } else {
237                 assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
238                     interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
239                 /* We need malloc clock (can be different from tv). */
240                 nstime_t next_wakeup;
241                 nstime_init(&next_wakeup, 0);
242                 nstime_update(&next_wakeup);
243                 nstime_iadd(&next_wakeup, interval);
244                 assert(nstime_ns(&next_wakeup) <
245                     BACKGROUND_THREAD_INDEFINITE_SLEEP);
246                 background_thread_wakeup_time_set(tsdn, info,
247                     nstime_ns(&next_wakeup));
248
249                 nstime_t ts_wakeup;
250                 nstime_copy(&ts_wakeup, &before_sleep);
251                 nstime_iadd(&ts_wakeup, interval);
252                 struct timespec ts;
253                 ts.tv_sec = (size_t)nstime_sec(&ts_wakeup);
254                 ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
255
256                 assert(!background_thread_indefinite_sleep(info));
257                 ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, &ts);
258                 assert(ret == ETIMEDOUT || ret == 0);
259                 background_thread_wakeup_time_set(tsdn, info,
260                     BACKGROUND_THREAD_INDEFINITE_SLEEP);
261         }
262         if (config_stats) {
263                 gettimeofday(&tv, NULL);
264                 nstime_t after_sleep;
265                 nstime_init2(&after_sleep, tv.tv_sec, tv.tv_usec * 1000);
266                 if (nstime_compare(&after_sleep, &before_sleep) > 0) {
267                         nstime_subtract(&after_sleep, &before_sleep);
268                         nstime_add(&info->tot_sleep_time, &after_sleep);
269                 }
270         }
271 }
272
273 static bool
274 background_thread_pause_check(tsdn_t *tsdn, background_thread_info_t *info) {
275         if (unlikely(info->state == background_thread_paused)) {
276                 malloc_mutex_unlock(tsdn, &info->mtx);
277                 /* Wait on global lock to update status. */
278                 malloc_mutex_lock(tsdn, &background_thread_lock);
279                 malloc_mutex_unlock(tsdn, &background_thread_lock);
280                 malloc_mutex_lock(tsdn, &info->mtx);
281                 return true;
282         }
283
284         return false;
285 }
286
287 static inline void
288 background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigned ind) {
289         uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
290         unsigned narenas = narenas_total_get();
291
292         for (unsigned i = ind; i < narenas; i += max_background_threads) {
293                 arena_t *arena = arena_get(tsdn, i, false);
294                 if (!arena) {
295                         continue;
296                 }
297                 arena_decay(tsdn, arena, true, false);
298                 if (min_interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
299                         /* Min interval will be used. */
300                         continue;
301                 }
302                 uint64_t interval = arena_decay_compute_purge_interval(tsdn,
303                     arena);
304                 assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS);
305                 if (min_interval > interval) {
306                         min_interval = interval;
307                 }
308         }
309         background_thread_sleep(tsdn, info, min_interval);
310 }
311
312 static bool
313 background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
314         if (info == &background_thread_info[0]) {
315                 malloc_mutex_assert_owner(tsd_tsdn(tsd),
316                     &background_thread_lock);
317         } else {
318                 malloc_mutex_assert_not_owner(tsd_tsdn(tsd),
319                     &background_thread_lock);
320         }
321
322         pre_reentrancy(tsd, NULL);
323         malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
324         bool has_thread;
325         assert(info->state != background_thread_paused);
326         if (info->state == background_thread_started) {
327                 has_thread = true;
328                 info->state = background_thread_stopped;
329                 pthread_cond_signal(&info->cond);
330         } else {
331                 has_thread = false;
332         }
333         malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
334
335         if (!has_thread) {
336                 post_reentrancy(tsd);
337                 return false;
338         }
339         void *ret;
340         if (pthread_join(info->thread, &ret)) {
341                 post_reentrancy(tsd);
342                 return true;
343         }
344         assert(ret == NULL);
345         n_background_threads--;
346         post_reentrancy(tsd);
347
348         return false;
349 }
350
351 static void *background_thread_entry(void *ind_arg);
352
353 static int
354 background_thread_create_signals_masked(pthread_t *thread,
355     const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg) {
356         /*
357          * Mask signals during thread creation so that the thread inherits
358          * an empty signal set.
359          */
360         sigset_t set;
361         sigfillset(&set);
362         sigset_t oldset;
363         int mask_err = pthread_sigmask(SIG_SETMASK, &set, &oldset);
364         if (mask_err != 0) {
365                 return mask_err;
366         }
367         int create_err = pthread_create_wrapper(thread, attr, start_routine,
368             arg);
369         /*
370          * Restore the signal mask.  Failure to restore the signal mask here
371          * changes program behavior.
372          */
373         int restore_err = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
374         if (restore_err != 0) {
375                 malloc_printf("<jemalloc>: background thread creation "
376                     "failed (%d), and signal mask restoration failed "
377                     "(%d)\n", create_err, restore_err);
378                 if (opt_abort) {
379                         abort();
380                 }
381         }
382         return create_err;
383 }
384
385 static bool
386 check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
387     bool *created_threads) {
388         bool ret = false;
389         if (likely(*n_created == n_background_threads)) {
390                 return ret;
391         }
392
393         tsdn_t *tsdn = tsd_tsdn(tsd);
394         malloc_mutex_unlock(tsdn, &background_thread_info[0].mtx);
395         for (unsigned i = 1; i < max_background_threads; i++) {
396                 if (created_threads[i]) {
397                         continue;
398                 }
399                 background_thread_info_t *info = &background_thread_info[i];
400                 malloc_mutex_lock(tsdn, &info->mtx);
401                 /*
402                  * In case of the background_thread_paused state because of
403                  * arena reset, delay the creation.
404                  */
405                 bool create = (info->state == background_thread_started);
406                 malloc_mutex_unlock(tsdn, &info->mtx);
407                 if (!create) {
408                         continue;
409                 }
410
411                 pre_reentrancy(tsd, NULL);
412                 int err = background_thread_create_signals_masked(&info->thread,
413                     NULL, background_thread_entry, (void *)(uintptr_t)i);
414                 post_reentrancy(tsd);
415
416                 if (err == 0) {
417                         (*n_created)++;
418                         created_threads[i] = true;
419                 } else {
420                         malloc_printf("<jemalloc>: background thread "
421                             "creation failed (%d)\n", err);
422                         if (opt_abort) {
423                                 abort();
424                         }
425                 }
426                 /* Return to restart the loop since we unlocked. */
427                 ret = true;
428                 break;
429         }
430         malloc_mutex_lock(tsdn, &background_thread_info[0].mtx);
431
432         return ret;
433 }
434
435 static void
436 background_thread0_work(tsd_t *tsd) {
437         /* Thread0 is also responsible for launching / terminating threads. */
438         VARIABLE_ARRAY(bool, created_threads, max_background_threads);
439         unsigned i;
440         for (i = 1; i < max_background_threads; i++) {
441                 created_threads[i] = false;
442         }
443         /* Start working, and create more threads when asked. */
444         unsigned n_created = 1;
445         while (background_thread_info[0].state != background_thread_stopped) {
446                 if (background_thread_pause_check(tsd_tsdn(tsd),
447                     &background_thread_info[0])) {
448                         continue;
449                 }
450                 if (check_background_thread_creation(tsd, &n_created,
451                     (bool *)&created_threads)) {
452                         continue;
453                 }
454                 background_work_sleep_once(tsd_tsdn(tsd),
455                     &background_thread_info[0], 0);
456         }
457
458         /*
459          * Shut down other threads at exit.  Note that the ctl thread is holding
460          * the global background_thread mutex (and is waiting) for us.
461          */
462         assert(!background_thread_enabled());
463         for (i = 1; i < max_background_threads; i++) {
464                 background_thread_info_t *info = &background_thread_info[i];
465                 assert(info->state != background_thread_paused);
466                 if (created_threads[i]) {
467                         background_threads_disable_single(tsd, info);
468                 } else {
469                         malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
470                         if (info->state != background_thread_stopped) {
471                                 /* The thread was not created. */
472                                 assert(info->state ==
473                                     background_thread_started);
474                                 n_background_threads--;
475                                 info->state = background_thread_stopped;
476                         }
477                         malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
478                 }
479         }
480         background_thread_info[0].state = background_thread_stopped;
481         assert(n_background_threads == 1);
482 }
483
484 static void
485 background_work(tsd_t *tsd, unsigned ind) {
486         background_thread_info_t *info = &background_thread_info[ind];
487
488         malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
489         background_thread_wakeup_time_set(tsd_tsdn(tsd), info,
490             BACKGROUND_THREAD_INDEFINITE_SLEEP);
491         if (ind == 0) {
492                 background_thread0_work(tsd);
493         } else {
494                 while (info->state != background_thread_stopped) {
495                         if (background_thread_pause_check(tsd_tsdn(tsd),
496                             info)) {
497                                 continue;
498                         }
499                         background_work_sleep_once(tsd_tsdn(tsd), info, ind);
500                 }
501         }
502         assert(info->state == background_thread_stopped);
503         background_thread_wakeup_time_set(tsd_tsdn(tsd), info, 0);
504         malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
505 }
506
507 static void *
508 background_thread_entry(void *ind_arg) {
509         unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
510         assert(thread_ind < max_background_threads);
511 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
512         pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
513 #endif
514         if (opt_percpu_arena != percpu_arena_disabled) {
515                 set_current_thread_affinity((int)thread_ind);
516         }
517         /*
518          * Start periodic background work.  We use internal tsd which avoids
519          * side effects, for example triggering new arena creation (which in
520          * turn triggers another background thread creation).
521          */
522         background_work(tsd_internal_fetch(), thread_ind);
523         assert(pthread_equal(pthread_self(),
524             background_thread_info[thread_ind].thread));
525
526         return NULL;
527 }
528
529 static void
530 background_thread_init(tsd_t *tsd, background_thread_info_t *info) {
531         malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
532         info->state = background_thread_started;
533         background_thread_info_init(tsd_tsdn(tsd), info);
534         n_background_threads++;
535 }
536
537 /* Create a new background thread if needed. */
538 bool
539 background_thread_create(tsd_t *tsd, unsigned arena_ind) {
540         assert(have_background_thread);
541         malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
542
543         /* We create at most NCPUs threads. */
544         size_t thread_ind = arena_ind % max_background_threads;
545         background_thread_info_t *info = &background_thread_info[thread_ind];
546
547         bool need_new_thread;
548         malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
549         need_new_thread = background_thread_enabled() &&
550             (info->state == background_thread_stopped);
551         if (need_new_thread) {
552                 background_thread_init(tsd, info);
553         }
554         malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
555         if (!need_new_thread) {
556                 return false;
557         }
558         if (arena_ind != 0) {
559                 /* Threads are created asynchronously by Thread 0. */
560                 background_thread_info_t *t0 = &background_thread_info[0];
561                 malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
562                 assert(t0->state == background_thread_started);
563                 pthread_cond_signal(&t0->cond);
564                 malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
565
566                 return false;
567         }
568
569         pre_reentrancy(tsd, NULL);
570         /*
571          * To avoid complications (besides reentrancy), create internal
572          * background threads with the underlying pthread_create.
573          */
574         int err = background_thread_create_signals_masked(&info->thread, NULL,
575             background_thread_entry, (void *)thread_ind);
576         post_reentrancy(tsd);
577
578         if (err != 0) {
579                 malloc_printf("<jemalloc>: arena 0 background thread creation "
580                     "failed (%d)\n", err);
581                 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
582                 info->state = background_thread_stopped;
583                 n_background_threads--;
584                 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
585
586                 return true;
587         }
588
589         return false;
590 }
591
592 bool
593 background_threads_enable(tsd_t *tsd) {
594         assert(n_background_threads == 0);
595         assert(background_thread_enabled());
596         malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
597
598         VARIABLE_ARRAY(bool, marked, max_background_threads);
599         unsigned i, nmarked;
600         for (i = 0; i < max_background_threads; i++) {
601                 marked[i] = false;
602         }
603         nmarked = 0;
604         /* Thread 0 is required and created at the end. */
605         marked[0] = true;
606         /* Mark the threads we need to create for thread 0. */
607         unsigned n = narenas_total_get();
608         for (i = 1; i < n; i++) {
609                 if (marked[i % max_background_threads] ||
610                     arena_get(tsd_tsdn(tsd), i, false) == NULL) {
611                         continue;
612                 }
613                 background_thread_info_t *info = &background_thread_info[
614                     i % max_background_threads];
615                 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
616                 assert(info->state == background_thread_stopped);
617                 background_thread_init(tsd, info);
618                 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
619                 marked[i % max_background_threads] = true;
620                 if (++nmarked == max_background_threads) {
621                         break;
622                 }
623         }
624
625         return background_thread_create(tsd, 0);
626 }
627
628 bool
629 background_threads_disable(tsd_t *tsd) {
630         assert(!background_thread_enabled());
631         malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
632
633         /* Thread 0 will be responsible for terminating other threads. */
634         if (background_threads_disable_single(tsd,
635             &background_thread_info[0])) {
636                 return true;
637         }
638         assert(n_background_threads == 0);
639
640         return false;
641 }
642
643 /* Check if we need to signal the background thread early. */
644 void
645 background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
646     arena_decay_t *decay, size_t npages_new) {
647         background_thread_info_t *info = arena_background_thread_info_get(
648             arena);
649         if (malloc_mutex_trylock(tsdn, &info->mtx)) {
650                 /*
651                  * Background thread may hold the mutex for a long period of
652                  * time.  We'd like to avoid the variance on application
653                  * threads.  So keep this non-blocking, and leave the work to a
654                  * future epoch.
655                  */
656                 return;
657         }
658
659         if (info->state != background_thread_started) {
660                 goto label_done;
661         }
662         if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
663                 goto label_done;
664         }
665
666         ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
667         if (decay_time <= 0) {
668                 /* Purging is eagerly done or disabled currently. */
669                 goto label_done_unlock2;
670         }
671         uint64_t decay_interval_ns = nstime_ns(&decay->interval);
672         assert(decay_interval_ns > 0);
673
674         nstime_t diff;
675         nstime_init(&diff, background_thread_wakeup_time_get(info));
676         if (nstime_compare(&diff, &decay->epoch) <= 0) {
677                 goto label_done_unlock2;
678         }
679         nstime_subtract(&diff, &decay->epoch);
680         if (nstime_ns(&diff) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
681                 goto label_done_unlock2;
682         }
683
684         if (npages_new > 0) {
685                 size_t n_epoch = (size_t)(nstime_ns(&diff) / decay_interval_ns);
686                 /*
687                  * Compute how many new pages we would need to purge by the next
688                  * wakeup, which is used to determine if we should signal the
689                  * background thread.
690                  */
691                 uint64_t npurge_new;
692                 if (n_epoch >= SMOOTHSTEP_NSTEPS) {
693                         npurge_new = npages_new;
694                 } else {
695                         uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
696                         assert(h_steps_max >=
697                             h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
698                         npurge_new = npages_new * (h_steps_max -
699                             h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
700                         npurge_new >>= SMOOTHSTEP_BFP;
701                 }
702                 info->npages_to_purge_new += npurge_new;
703         }
704
705         bool should_signal;
706         if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
707                 should_signal = true;
708         } else if (unlikely(background_thread_indefinite_sleep(info)) &&
709             (extents_npages_get(&arena->extents_dirty) > 0 ||
710             extents_npages_get(&arena->extents_muzzy) > 0 ||
711             info->npages_to_purge_new > 0)) {
712                 should_signal = true;
713         } else {
714                 should_signal = false;
715         }
716
717         if (should_signal) {
718                 info->npages_to_purge_new = 0;
719                 pthread_cond_signal(&info->cond);
720         }
721 label_done_unlock2:
722         malloc_mutex_unlock(tsdn, &decay->mtx);
723 label_done:
724         malloc_mutex_unlock(tsdn, &info->mtx);
725 }
726
727 void
728 background_thread_prefork0(tsdn_t *tsdn) {
729         malloc_mutex_prefork(tsdn, &background_thread_lock);
730         background_thread_enabled_at_fork = background_thread_enabled();
731 }
732
733 void
734 background_thread_prefork1(tsdn_t *tsdn) {
735         for (unsigned i = 0; i < max_background_threads; i++) {
736                 malloc_mutex_prefork(tsdn, &background_thread_info[i].mtx);
737         }
738 }
739
740 void
741 background_thread_postfork_parent(tsdn_t *tsdn) {
742         for (unsigned i = 0; i < max_background_threads; i++) {
743                 malloc_mutex_postfork_parent(tsdn,
744                     &background_thread_info[i].mtx);
745         }
746         malloc_mutex_postfork_parent(tsdn, &background_thread_lock);
747 }
748
749 void
750 background_thread_postfork_child(tsdn_t *tsdn) {
751         for (unsigned i = 0; i < max_background_threads; i++) {
752                 malloc_mutex_postfork_child(tsdn,
753                     &background_thread_info[i].mtx);
754         }
755         malloc_mutex_postfork_child(tsdn, &background_thread_lock);
756         if (!background_thread_enabled_at_fork) {
757                 return;
758         }
759
760         /* Clear background_thread state (reset to disabled for child). */
761         malloc_mutex_lock(tsdn, &background_thread_lock);
762         n_background_threads = 0;
763         background_thread_enabled_set(tsdn, false);
764         for (unsigned i = 0; i < max_background_threads; i++) {
765                 background_thread_info_t *info = &background_thread_info[i];
766                 malloc_mutex_lock(tsdn, &info->mtx);
767                 info->state = background_thread_stopped;
768                 int ret = pthread_cond_init(&info->cond, NULL);
769                 assert(ret == 0);
770                 background_thread_info_init(tsdn, info);
771                 malloc_mutex_unlock(tsdn, &info->mtx);
772         }
773         malloc_mutex_unlock(tsdn, &background_thread_lock);
774 }
775
776 bool
777 background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
778         assert(config_stats);
779         malloc_mutex_lock(tsdn, &background_thread_lock);
780         if (!background_thread_enabled()) {
781                 malloc_mutex_unlock(tsdn, &background_thread_lock);
782                 return true;
783         }
784
785         stats->num_threads = n_background_threads;
786         uint64_t num_runs = 0;
787         nstime_init(&stats->run_interval, 0);
788         for (unsigned i = 0; i < max_background_threads; i++) {
789                 background_thread_info_t *info = &background_thread_info[i];
790                 malloc_mutex_lock(tsdn, &info->mtx);
791                 if (info->state != background_thread_stopped) {
792                         num_runs += info->tot_n_runs;
793                         nstime_add(&stats->run_interval, &info->tot_sleep_time);
794                 }
795                 malloc_mutex_unlock(tsdn, &info->mtx);
796         }
797         stats->num_runs = num_runs;
798         if (num_runs > 0) {
799                 nstime_idivide(&stats->run_interval, num_runs);
800         }
801         malloc_mutex_unlock(tsdn, &background_thread_lock);
802
803         return false;
804 }
805
806 #undef BACKGROUND_THREAD_NPAGES_THRESHOLD
807 #undef BILLION
808 #undef BACKGROUND_THREAD_MIN_INTERVAL_NS
809
810 static bool
811 pthread_create_fptr_init(void) {
812         if (pthread_create_fptr != NULL) {
813                 return false;
814         }
815         pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
816         if (pthread_create_fptr == NULL) {
817                 can_enable_background_thread = false;
818                 if (config_lazy_lock || opt_background_thread) {
819                         malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
820                             "\"pthread_create\")\n");
821                         abort();
822                 }
823         } else {
824                 can_enable_background_thread = true;
825         }
826
827         return false;
828 }
829
830 /*
831  * When lazy lock is enabled, we need to make sure setting isthreaded before
832  * taking any background_thread locks.  This is called early in ctl (instead of
833  * wait for the pthread_create calls to trigger) because the mutex is required
834  * before creating background threads.
835  */
836 void
837 background_thread_ctl_init(tsdn_t *tsdn) {
838         malloc_mutex_assert_not_owner(tsdn, &background_thread_lock);
839 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
840         pthread_create_fptr_init();
841         pthread_create_wrapper_init();
842 #endif
843 }
844
845 #endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
846
847 bool
848 background_thread_boot0(void) {
849         if (!have_background_thread && opt_background_thread) {
850                 malloc_printf("<jemalloc>: option background_thread currently "
851                     "supports pthread only\n");
852                 return true;
853         }
854 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
855         if ((config_lazy_lock || opt_background_thread) &&
856             pthread_create_fptr_init()) {
857                 return true;
858         }
859 #endif
860         return false;
861 }
862
863 bool
864 background_thread_boot1(tsdn_t *tsdn) {
865 #ifdef JEMALLOC_BACKGROUND_THREAD
866         assert(have_background_thread);
867         assert(narenas_total_get() > 0);
868
869         if (opt_max_background_threads == MAX_BACKGROUND_THREAD_LIMIT &&
870             ncpus < MAX_BACKGROUND_THREAD_LIMIT) {
871                 opt_max_background_threads = ncpus;
872         }
873         max_background_threads = opt_max_background_threads;
874
875         background_thread_enabled_set(tsdn, opt_background_thread);
876         if (malloc_mutex_init(&background_thread_lock,
877             "background_thread_global",
878             WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
879             malloc_mutex_rank_exclusive)) {
880                 return true;
881         }
882
883         background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
884             b0get(), opt_max_background_threads *
885             sizeof(background_thread_info_t), CACHELINE);
886         if (background_thread_info == NULL) {
887                 return true;
888         }
889
890         for (unsigned i = 0; i < max_background_threads; i++) {
891                 background_thread_info_t *info = &background_thread_info[i];
892                 /* Thread mutex is rank_inclusive because of thread0. */
893                 if (malloc_mutex_init(&info->mtx, "background_thread",
894                     WITNESS_RANK_BACKGROUND_THREAD,
895                     malloc_mutex_address_ordered)) {
896                         return true;
897                 }
898                 if (pthread_cond_init(&info->cond, NULL)) {
899                         return true;
900                 }
901                 malloc_mutex_lock(tsdn, &info->mtx);
902                 info->state = background_thread_stopped;
903                 background_thread_info_init(tsdn, info);
904                 malloc_mutex_unlock(tsdn, &info->mtx);
905         }
906 #endif
907
908         return false;
909 }