]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - contrib/jemalloc/src/background_thread.c
MFV r316876: 7542 zfs_unmount failed with EZFS_UNSHARENFSFAILED
[FreeBSD/FreeBSD.git] / contrib / jemalloc / src / background_thread.c
1 #define JEMALLOC_BACKGROUND_THREAD_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3 #include "jemalloc/internal/jemalloc_internal_includes.h"
4
5 #include "jemalloc/internal/assert.h"
6
7 /******************************************************************************/
8 /* Data. */
9
10 /* This option should be opt-in only. */
11 #define BACKGROUND_THREAD_DEFAULT false
12 /* Read-only after initialization. */
13 bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
14
15 /* Used for thread creation, termination and stats. */
16 malloc_mutex_t background_thread_lock;
17 /* Indicates global state.  Atomic because decay reads this w/o locking. */
18 atomic_b_t background_thread_enabled_state;
19 size_t n_background_threads;
20 /* Thread info per-index. */
21 background_thread_info_t *background_thread_info;
22
23 /* False if no necessary runtime support. */
24 bool can_enable_background_thread;
25
26 /******************************************************************************/
27
28 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
29 #include <dlfcn.h>
30
31 static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
32     void *(*)(void *), void *__restrict);
33 static pthread_once_t once_control = PTHREAD_ONCE_INIT;
34
35 static void
36 pthread_create_wrapper_once(void) {
37 #ifdef JEMALLOC_LAZY_LOCK
38         isthreaded = true;
39 #endif
40 }
41
42 int
43 pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
44     void *(*start_routine)(void *), void *__restrict arg) {
45         pthread_once(&once_control, pthread_create_wrapper_once);
46
47         return pthread_create_fptr(thread, attr, start_routine, arg);
48 }
49 #endif /* JEMALLOC_PTHREAD_CREATE_WRAPPER */
50
51 #ifndef JEMALLOC_BACKGROUND_THREAD
52 #define NOT_REACHED { not_reached(); }
53 bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
54 bool background_threads_enable(tsd_t *tsd) NOT_REACHED
55 bool background_threads_disable(tsd_t *tsd) NOT_REACHED
56 void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
57     arena_decay_t *decay, size_t npages_new) NOT_REACHED
58 void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
59 void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
60 void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
61 void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
62 bool background_thread_stats_read(tsdn_t *tsdn,
63     background_thread_stats_t *stats) NOT_REACHED
64 void background_thread_ctl_init(tsdn_t *tsdn) NOT_REACHED
65 #undef NOT_REACHED
66 #else
67
68 static bool background_thread_enabled_at_fork;
69
70 static void
71 background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
72         background_thread_wakeup_time_set(tsdn, info, 0);
73         info->npages_to_purge_new = 0;
74         if (config_stats) {
75                 info->tot_n_runs = 0;
76                 nstime_init(&info->tot_sleep_time, 0);
77         }
78 }
79
80 static inline bool
81 set_current_thread_affinity(UNUSED int cpu) {
82 #if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
83         cpu_set_t cpuset;
84         CPU_ZERO(&cpuset);
85         CPU_SET(cpu, &cpuset);
86         int ret = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
87
88         return (ret != 0);
89 #else
90         return false;
91 #endif
92 }
93
94 /* Threshold for determining when to wake up the background thread. */
95 #define BACKGROUND_THREAD_NPAGES_THRESHOLD UINT64_C(1024)
96 #define BILLION UINT64_C(1000000000)
97 /* Minimal sleep interval 100 ms. */
98 #define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
99
100 static inline size_t
101 decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
102         size_t i;
103         uint64_t sum = 0;
104         for (i = 0; i < interval; i++) {
105                 sum += decay->backlog[i] * h_steps[i];
106         }
107         for (; i < SMOOTHSTEP_NSTEPS; i++) {
108                 sum += decay->backlog[i] * (h_steps[i] - h_steps[i - interval]);
109         }
110
111         return (size_t)(sum >> SMOOTHSTEP_BFP);
112 }
113
114 static uint64_t
115 arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
116     extents_t *extents) {
117         if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
118                 /* Use minimal interval if decay is contended. */
119                 return BACKGROUND_THREAD_MIN_INTERVAL_NS;
120         }
121
122         uint64_t interval;
123         ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
124         if (decay_time <= 0) {
125                 /* Purging is eagerly done or disabled currently. */
126                 interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
127                 goto label_done;
128         }
129
130         uint64_t decay_interval_ns = nstime_ns(&decay->interval);
131         assert(decay_interval_ns > 0);
132         size_t npages = extents_npages_get(extents);
133         if (npages == 0) {
134                 unsigned i;
135                 for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
136                         if (decay->backlog[i] > 0) {
137                                 break;
138                         }
139                 }
140                 if (i == SMOOTHSTEP_NSTEPS) {
141                         /* No dirty pages recorded.  Sleep indefinitely. */
142                         interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
143                         goto label_done;
144                 }
145         }
146         if (npages <= BACKGROUND_THREAD_NPAGES_THRESHOLD) {
147                 /* Use max interval. */
148                 interval = decay_interval_ns * SMOOTHSTEP_NSTEPS;
149                 goto label_done;
150         }
151
152         size_t lb = BACKGROUND_THREAD_MIN_INTERVAL_NS / decay_interval_ns;
153         size_t ub = SMOOTHSTEP_NSTEPS;
154         /* Minimal 2 intervals to ensure reaching next epoch deadline. */
155         lb = (lb < 2) ? 2 : lb;
156         if ((decay_interval_ns * ub <= BACKGROUND_THREAD_MIN_INTERVAL_NS) ||
157             (lb + 2 > ub)) {
158                 interval = BACKGROUND_THREAD_MIN_INTERVAL_NS;
159                 goto label_done;
160         }
161
162         assert(lb + 2 <= ub);
163         size_t npurge_lb, npurge_ub;
164         npurge_lb = decay_npurge_after_interval(decay, lb);
165         if (npurge_lb > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
166                 interval = decay_interval_ns * lb;
167                 goto label_done;
168         }
169         npurge_ub = decay_npurge_after_interval(decay, ub);
170         if (npurge_ub < BACKGROUND_THREAD_NPAGES_THRESHOLD) {
171                 interval = decay_interval_ns * ub;
172                 goto label_done;
173         }
174
175         unsigned n_search = 0;
176         size_t target, npurge;
177         while ((npurge_lb + BACKGROUND_THREAD_NPAGES_THRESHOLD < npurge_ub)
178             && (lb + 2 < ub)) {
179                 target = (lb + ub) / 2;
180                 npurge = decay_npurge_after_interval(decay, target);
181                 if (npurge > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
182                         ub = target;
183                         npurge_ub = npurge;
184                 } else {
185                         lb = target;
186                         npurge_lb = npurge;
187                 }
188                 assert(n_search++ < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
189         }
190         interval = decay_interval_ns * (ub + lb) / 2;
191 label_done:
192         interval = (interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) ?
193             BACKGROUND_THREAD_MIN_INTERVAL_NS : interval;
194         malloc_mutex_unlock(tsdn, &decay->mtx);
195
196         return interval;
197 }
198
199 /* Compute purge interval for background threads. */
200 static uint64_t
201 arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
202         uint64_t i1, i2;
203         i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
204             &arena->extents_dirty);
205         if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
206                 return i1;
207         }
208         i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
209             &arena->extents_muzzy);
210
211         return i1 < i2 ? i1 : i2;
212 }
213
214 static void
215 background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
216     uint64_t interval) {
217         if (config_stats) {
218                 info->tot_n_runs++;
219         }
220         info->npages_to_purge_new = 0;
221
222         struct timeval tv;
223         /* Specific clock required by timedwait. */
224         gettimeofday(&tv, NULL);
225         nstime_t before_sleep;
226         nstime_init2(&before_sleep, tv.tv_sec, tv.tv_usec * 1000);
227
228         int ret;
229         if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
230                 assert(background_thread_indefinite_sleep(info));
231                 ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
232                 assert(ret == 0);
233         } else {
234                 assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
235                     interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
236                 /* We need malloc clock (can be different from tv). */
237                 nstime_t next_wakeup;
238                 nstime_init(&next_wakeup, 0);
239                 nstime_update(&next_wakeup);
240                 nstime_iadd(&next_wakeup, interval);
241                 assert(nstime_ns(&next_wakeup) <
242                     BACKGROUND_THREAD_INDEFINITE_SLEEP);
243                 background_thread_wakeup_time_set(tsdn, info,
244                     nstime_ns(&next_wakeup));
245
246                 nstime_t ts_wakeup;
247                 nstime_copy(&ts_wakeup, &before_sleep);
248                 nstime_iadd(&ts_wakeup, interval);
249                 struct timespec ts;
250                 ts.tv_sec = (size_t)nstime_sec(&ts_wakeup);
251                 ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
252
253                 assert(!background_thread_indefinite_sleep(info));
254                 ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, &ts);
255                 assert(ret == ETIMEDOUT || ret == 0);
256                 background_thread_wakeup_time_set(tsdn, info,
257                     BACKGROUND_THREAD_INDEFINITE_SLEEP);
258         }
259         if (config_stats) {
260                 gettimeofday(&tv, NULL);
261                 nstime_t after_sleep;
262                 nstime_init2(&after_sleep, tv.tv_sec, tv.tv_usec * 1000);
263                 if (nstime_compare(&after_sleep, &before_sleep) > 0) {
264                         nstime_subtract(&after_sleep, &before_sleep);
265                         nstime_add(&info->tot_sleep_time, &after_sleep);
266                 }
267         }
268 }
269
270 static bool
271 background_thread_pause_check(tsdn_t *tsdn, background_thread_info_t *info) {
272         if (unlikely(info->state == background_thread_paused)) {
273                 malloc_mutex_unlock(tsdn, &info->mtx);
274                 /* Wait on global lock to update status. */
275                 malloc_mutex_lock(tsdn, &background_thread_lock);
276                 malloc_mutex_unlock(tsdn, &background_thread_lock);
277                 malloc_mutex_lock(tsdn, &info->mtx);
278                 return true;
279         }
280
281         return false;
282 }
283
284 static inline void
285 background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigned ind) {
286         uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
287         unsigned narenas = narenas_total_get();
288
289         for (unsigned i = ind; i < narenas; i += ncpus) {
290                 arena_t *arena = arena_get(tsdn, i, false);
291                 if (!arena) {
292                         continue;
293                 }
294                 arena_decay(tsdn, arena, true, false);
295                 if (min_interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
296                         /* Min interval will be used. */
297                         continue;
298                 }
299                 uint64_t interval = arena_decay_compute_purge_interval(tsdn,
300                     arena);
301                 assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS);
302                 if (min_interval > interval) {
303                         min_interval = interval;
304                 }
305         }
306         background_thread_sleep(tsdn, info, min_interval);
307 }
308
309 static bool
310 background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
311         if (info == &background_thread_info[0]) {
312                 malloc_mutex_assert_owner(tsd_tsdn(tsd),
313                     &background_thread_lock);
314         } else {
315                 malloc_mutex_assert_not_owner(tsd_tsdn(tsd),
316                     &background_thread_lock);
317         }
318
319         pre_reentrancy(tsd, NULL);
320         malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
321         bool has_thread;
322         assert(info->state != background_thread_paused);
323         if (info->state == background_thread_started) {
324                 has_thread = true;
325                 info->state = background_thread_stopped;
326                 pthread_cond_signal(&info->cond);
327         } else {
328                 has_thread = false;
329         }
330         malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
331
332         if (!has_thread) {
333                 post_reentrancy(tsd);
334                 return false;
335         }
336         void *ret;
337         if (pthread_join(info->thread, &ret)) {
338                 post_reentrancy(tsd);
339                 return true;
340         }
341         assert(ret == NULL);
342         n_background_threads--;
343         post_reentrancy(tsd);
344
345         return false;
346 }
347
348 static void *background_thread_entry(void *ind_arg);
349
350 static int
351 background_thread_create_signals_masked(pthread_t *thread,
352     const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg) {
353         /*
354          * Mask signals during thread creation so that the thread inherits
355          * an empty signal set.
356          */
357         sigset_t set;
358         sigfillset(&set);
359         sigset_t oldset;
360         int mask_err = pthread_sigmask(SIG_SETMASK, &set, &oldset);
361         if (mask_err != 0) {
362                 return mask_err;
363         }
364         int create_err = pthread_create_wrapper(thread, attr, start_routine,
365             arg);
366         /*
367          * Restore the signal mask.  Failure to restore the signal mask here
368          * changes program behavior.
369          */
370         int restore_err = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
371         if (restore_err != 0) {
372                 malloc_printf("<jemalloc>: background thread creation "
373                     "failed (%d), and signal mask restoration failed "
374                     "(%d)\n", create_err, restore_err);
375                 if (opt_abort) {
376                         abort();
377                 }
378         }
379         return create_err;
380 }
381
382 static void
383 check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
384     bool *created_threads) {
385         if (likely(*n_created == n_background_threads)) {
386                 return;
387         }
388
389         malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_info[0].mtx);
390 label_restart:
391         malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
392         for (unsigned i = 1; i < ncpus; i++) {
393                 if (created_threads[i]) {
394                         continue;
395                 }
396                 background_thread_info_t *info = &background_thread_info[i];
397                 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
398                 assert(info->state != background_thread_paused);
399                 bool create = (info->state == background_thread_started);
400                 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
401                 if (!create) {
402                         continue;
403                 }
404
405                 /*
406                  * To avoid deadlock with prefork handlers (which waits for the
407                  * mutex held here), unlock before calling pthread_create().
408                  */
409                 malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
410
411                 pre_reentrancy(tsd, NULL);
412                 int err = background_thread_create_signals_masked(&info->thread,
413                     NULL, background_thread_entry, (void *)(uintptr_t)i);
414                 post_reentrancy(tsd);
415
416                 if (err == 0) {
417                         (*n_created)++;
418                         created_threads[i] = true;
419                 } else {
420                         malloc_printf("<jemalloc>: background thread "
421                             "creation failed (%d)\n", err);
422                         if (opt_abort) {
423                                 abort();
424                         }
425                 }
426                 /* Restart since we unlocked. */
427                 goto label_restart;
428         }
429         malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_info[0].mtx);
430         malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
431 }
432
433 static void
434 background_thread0_work(tsd_t *tsd) {
435         /* Thread0 is also responsible for launching / terminating threads. */
436         VARIABLE_ARRAY(bool, created_threads, ncpus);
437         unsigned i;
438         for (i = 1; i < ncpus; i++) {
439                 created_threads[i] = false;
440         }
441         /* Start working, and create more threads when asked. */
442         unsigned n_created = 1;
443         while (background_thread_info[0].state != background_thread_stopped) {
444                 if (background_thread_pause_check(tsd_tsdn(tsd),
445                     &background_thread_info[0])) {
446                         continue;
447                 }
448                 check_background_thread_creation(tsd, &n_created,
449                     (bool *)&created_threads);
450                 background_work_sleep_once(tsd_tsdn(tsd),
451                     &background_thread_info[0], 0);
452         }
453
454         /*
455          * Shut down other threads at exit.  Note that the ctl thread is holding
456          * the global background_thread mutex (and is waiting) for us.
457          */
458         assert(!background_thread_enabled());
459         for (i = 1; i < ncpus; i++) {
460                 background_thread_info_t *info = &background_thread_info[i];
461                 assert(info->state != background_thread_paused);
462                 if (created_threads[i]) {
463                         background_threads_disable_single(tsd, info);
464                 } else {
465                         malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
466                         /* Clear in case the thread wasn't created. */
467                         info->state = background_thread_stopped;
468                         malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
469                 }
470         }
471         background_thread_info[0].state = background_thread_stopped;
472         assert(n_background_threads == 1);
473 }
474
475 static void
476 background_work(tsd_t *tsd, unsigned ind) {
477         background_thread_info_t *info = &background_thread_info[ind];
478
479         malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
480         background_thread_wakeup_time_set(tsd_tsdn(tsd), info,
481             BACKGROUND_THREAD_INDEFINITE_SLEEP);
482         if (ind == 0) {
483                 background_thread0_work(tsd);
484         } else {
485                 while (info->state != background_thread_stopped) {
486                         if (background_thread_pause_check(tsd_tsdn(tsd),
487                             info)) {
488                                 continue;
489                         }
490                         background_work_sleep_once(tsd_tsdn(tsd), info, ind);
491                 }
492         }
493         assert(info->state == background_thread_stopped);
494         background_thread_wakeup_time_set(tsd_tsdn(tsd), info, 0);
495         malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
496 }
497
498 static void *
499 background_thread_entry(void *ind_arg) {
500         unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
501         assert(thread_ind < ncpus);
502 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
503         pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
504 #endif
505         if (opt_percpu_arena != percpu_arena_disabled) {
506                 set_current_thread_affinity((int)thread_ind);
507         }
508         /*
509          * Start periodic background work.  We use internal tsd which avoids
510          * side effects, for example triggering new arena creation (which in
511          * turn triggers another background thread creation).
512          */
513         background_work(tsd_internal_fetch(), thread_ind);
514         assert(pthread_equal(pthread_self(),
515             background_thread_info[thread_ind].thread));
516
517         return NULL;
518 }
519
520 static void
521 background_thread_init(tsd_t *tsd, background_thread_info_t *info) {
522         malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
523         info->state = background_thread_started;
524         background_thread_info_init(tsd_tsdn(tsd), info);
525         n_background_threads++;
526 }
527
528 /* Create a new background thread if needed. */
529 bool
530 background_thread_create(tsd_t *tsd, unsigned arena_ind) {
531         assert(have_background_thread);
532         malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
533
534         /* We create at most NCPUs threads. */
535         size_t thread_ind = arena_ind % ncpus;
536         background_thread_info_t *info = &background_thread_info[thread_ind];
537
538         bool need_new_thread;
539         malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
540         need_new_thread = background_thread_enabled() &&
541             (info->state == background_thread_stopped);
542         if (need_new_thread) {
543                 background_thread_init(tsd, info);
544         }
545         malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
546         if (!need_new_thread) {
547                 return false;
548         }
549         if (arena_ind != 0) {
550                 /* Threads are created asynchronously by Thread 0. */
551                 background_thread_info_t *t0 = &background_thread_info[0];
552                 malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
553                 assert(t0->state == background_thread_started);
554                 pthread_cond_signal(&t0->cond);
555                 malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
556
557                 return false;
558         }
559
560         pre_reentrancy(tsd, NULL);
561         /*
562          * To avoid complications (besides reentrancy), create internal
563          * background threads with the underlying pthread_create.
564          */
565         int err = background_thread_create_signals_masked(&info->thread, NULL,
566             background_thread_entry, (void *)thread_ind);
567         post_reentrancy(tsd);
568
569         if (err != 0) {
570                 malloc_printf("<jemalloc>: arena 0 background thread creation "
571                     "failed (%d)\n", err);
572                 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
573                 info->state = background_thread_stopped;
574                 n_background_threads--;
575                 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
576
577                 return true;
578         }
579
580         return false;
581 }
582
583 bool
584 background_threads_enable(tsd_t *tsd) {
585         assert(n_background_threads == 0);
586         assert(background_thread_enabled());
587         malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
588
589         VARIABLE_ARRAY(bool, marked, ncpus);
590         unsigned i, nmarked;
591         for (i = 0; i < ncpus; i++) {
592                 marked[i] = false;
593         }
594         nmarked = 0;
595         /* Mark the threads we need to create for thread 0. */
596         unsigned n = narenas_total_get();
597         for (i = 1; i < n; i++) {
598                 if (marked[i % ncpus] ||
599                     arena_get(tsd_tsdn(tsd), i, false) == NULL) {
600                         continue;
601                 }
602                 background_thread_info_t *info = &background_thread_info[i];
603                 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
604                 assert(info->state == background_thread_stopped);
605                 background_thread_init(tsd, info);
606                 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
607                 marked[i % ncpus] = true;
608                 if (++nmarked == ncpus) {
609                         break;
610                 }
611         }
612
613         return background_thread_create(tsd, 0);
614 }
615
616 bool
617 background_threads_disable(tsd_t *tsd) {
618         assert(!background_thread_enabled());
619         malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
620
621         /* Thread 0 will be responsible for terminating other threads. */
622         if (background_threads_disable_single(tsd,
623             &background_thread_info[0])) {
624                 return true;
625         }
626         assert(n_background_threads == 0);
627
628         return false;
629 }
630
631 /* Check if we need to signal the background thread early. */
632 void
633 background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
634     arena_decay_t *decay, size_t npages_new) {
635         background_thread_info_t *info = arena_background_thread_info_get(
636             arena);
637         if (malloc_mutex_trylock(tsdn, &info->mtx)) {
638                 /*
639                  * Background thread may hold the mutex for a long period of
640                  * time.  We'd like to avoid the variance on application
641                  * threads.  So keep this non-blocking, and leave the work to a
642                  * future epoch.
643                  */
644                 return;
645         }
646
647         if (info->state != background_thread_started) {
648                 goto label_done;
649         }
650         if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
651                 goto label_done;
652         }
653
654         ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
655         if (decay_time <= 0) {
656                 /* Purging is eagerly done or disabled currently. */
657                 goto label_done_unlock2;
658         }
659         uint64_t decay_interval_ns = nstime_ns(&decay->interval);
660         assert(decay_interval_ns > 0);
661
662         nstime_t diff;
663         nstime_init(&diff, background_thread_wakeup_time_get(info));
664         if (nstime_compare(&diff, &decay->epoch) <= 0) {
665                 goto label_done_unlock2;
666         }
667         nstime_subtract(&diff, &decay->epoch);
668         if (nstime_ns(&diff) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
669                 goto label_done_unlock2;
670         }
671
672         if (npages_new > 0) {
673                 size_t n_epoch = (size_t)(nstime_ns(&diff) / decay_interval_ns);
674                 /*
675                  * Compute how many new pages we would need to purge by the next
676                  * wakeup, which is used to determine if we should signal the
677                  * background thread.
678                  */
679                 uint64_t npurge_new;
680                 if (n_epoch >= SMOOTHSTEP_NSTEPS) {
681                         npurge_new = npages_new;
682                 } else {
683                         uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
684                         assert(h_steps_max >=
685                             h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
686                         npurge_new = npages_new * (h_steps_max -
687                             h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
688                         npurge_new >>= SMOOTHSTEP_BFP;
689                 }
690                 info->npages_to_purge_new += npurge_new;
691         }
692
693         bool should_signal;
694         if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
695                 should_signal = true;
696         } else if (unlikely(background_thread_indefinite_sleep(info)) &&
697             (extents_npages_get(&arena->extents_dirty) > 0 ||
698             extents_npages_get(&arena->extents_muzzy) > 0 ||
699             info->npages_to_purge_new > 0)) {
700                 should_signal = true;
701         } else {
702                 should_signal = false;
703         }
704
705         if (should_signal) {
706                 info->npages_to_purge_new = 0;
707                 pthread_cond_signal(&info->cond);
708         }
709 label_done_unlock2:
710         malloc_mutex_unlock(tsdn, &decay->mtx);
711 label_done:
712         malloc_mutex_unlock(tsdn, &info->mtx);
713 }
714
715 void
716 background_thread_prefork0(tsdn_t *tsdn) {
717         malloc_mutex_prefork(tsdn, &background_thread_lock);
718         background_thread_enabled_at_fork = background_thread_enabled();
719 }
720
721 void
722 background_thread_prefork1(tsdn_t *tsdn) {
723         for (unsigned i = 0; i < ncpus; i++) {
724                 malloc_mutex_prefork(tsdn, &background_thread_info[i].mtx);
725         }
726 }
727
728 void
729 background_thread_postfork_parent(tsdn_t *tsdn) {
730         for (unsigned i = 0; i < ncpus; i++) {
731                 malloc_mutex_postfork_parent(tsdn,
732                     &background_thread_info[i].mtx);
733         }
734         malloc_mutex_postfork_parent(tsdn, &background_thread_lock);
735 }
736
737 void
738 background_thread_postfork_child(tsdn_t *tsdn) {
739         for (unsigned i = 0; i < ncpus; i++) {
740                 malloc_mutex_postfork_child(tsdn,
741                     &background_thread_info[i].mtx);
742         }
743         malloc_mutex_postfork_child(tsdn, &background_thread_lock);
744         if (!background_thread_enabled_at_fork) {
745                 return;
746         }
747
748         /* Clear background_thread state (reset to disabled for child). */
749         malloc_mutex_lock(tsdn, &background_thread_lock);
750         n_background_threads = 0;
751         background_thread_enabled_set(tsdn, false);
752         for (unsigned i = 0; i < ncpus; i++) {
753                 background_thread_info_t *info = &background_thread_info[i];
754                 malloc_mutex_lock(tsdn, &info->mtx);
755                 info->state = background_thread_stopped;
756                 int ret = pthread_cond_init(&info->cond, NULL);
757                 assert(ret == 0);
758                 background_thread_info_init(tsdn, info);
759                 malloc_mutex_unlock(tsdn, &info->mtx);
760         }
761         malloc_mutex_unlock(tsdn, &background_thread_lock);
762 }
763
764 bool
765 background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
766         assert(config_stats);
767         malloc_mutex_lock(tsdn, &background_thread_lock);
768         if (!background_thread_enabled()) {
769                 malloc_mutex_unlock(tsdn, &background_thread_lock);
770                 return true;
771         }
772
773         stats->num_threads = n_background_threads;
774         uint64_t num_runs = 0;
775         nstime_init(&stats->run_interval, 0);
776         for (unsigned i = 0; i < ncpus; i++) {
777                 background_thread_info_t *info = &background_thread_info[i];
778                 malloc_mutex_lock(tsdn, &info->mtx);
779                 if (info->state != background_thread_stopped) {
780                         num_runs += info->tot_n_runs;
781                         nstime_add(&stats->run_interval, &info->tot_sleep_time);
782                 }
783                 malloc_mutex_unlock(tsdn, &info->mtx);
784         }
785         stats->num_runs = num_runs;
786         if (num_runs > 0) {
787                 nstime_idivide(&stats->run_interval, num_runs);
788         }
789         malloc_mutex_unlock(tsdn, &background_thread_lock);
790
791         return false;
792 }
793
794 #undef BACKGROUND_THREAD_NPAGES_THRESHOLD
795 #undef BILLION
796 #undef BACKGROUND_THREAD_MIN_INTERVAL_NS
797
798 /*
799  * When lazy lock is enabled, we need to make sure setting isthreaded before
800  * taking any background_thread locks.  This is called early in ctl (instead of
801  * wait for the pthread_create calls to trigger) because the mutex is required
802  * before creating background threads.
803  */
804 void
805 background_thread_ctl_init(tsdn_t *tsdn) {
806         malloc_mutex_assert_not_owner(tsdn, &background_thread_lock);
807 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
808         pthread_once(&once_control, pthread_create_wrapper_once);
809 #endif
810 }
811
812 #endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
813
814 bool
815 background_thread_boot0(void) {
816         if (!have_background_thread && opt_background_thread) {
817                 malloc_printf("<jemalloc>: option background_thread currently "
818                     "supports pthread only\n");
819                 return true;
820         }
821
822 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
823         pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
824         if (pthread_create_fptr == NULL) {
825                 can_enable_background_thread = false;
826                 if (config_lazy_lock || opt_background_thread) {
827                         malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
828                             "\"pthread_create\")\n");
829                         abort();
830                 }
831         } else {
832                 can_enable_background_thread = true;
833         }
834 #endif
835         return false;
836 }
837
838 bool
839 background_thread_boot1(tsdn_t *tsdn) {
840 #ifdef JEMALLOC_BACKGROUND_THREAD
841         assert(have_background_thread);
842         assert(narenas_total_get() > 0);
843
844         background_thread_enabled_set(tsdn, opt_background_thread);
845         if (malloc_mutex_init(&background_thread_lock,
846             "background_thread_global",
847             WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
848             malloc_mutex_rank_exclusive)) {
849                 return true;
850         }
851         if (opt_background_thread) {
852                 background_thread_ctl_init(tsdn);
853         }
854
855         background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
856             b0get(), ncpus * sizeof(background_thread_info_t), CACHELINE);
857         if (background_thread_info == NULL) {
858                 return true;
859         }
860
861         for (unsigned i = 0; i < ncpus; i++) {
862                 background_thread_info_t *info = &background_thread_info[i];
863                 /* Thread mutex is rank_inclusive because of thread0. */
864                 if (malloc_mutex_init(&info->mtx, "background_thread",
865                     WITNESS_RANK_BACKGROUND_THREAD,
866                     malloc_mutex_address_ordered)) {
867                         return true;
868                 }
869                 if (pthread_cond_init(&info->cond, NULL)) {
870                         return true;
871                 }
872                 malloc_mutex_lock(tsdn, &info->mtx);
873                 info->state = background_thread_stopped;
874                 background_thread_info_init(tsdn, info);
875                 malloc_mutex_unlock(tsdn, &info->mtx);
876         }
877 #endif
878
879         return false;
880 }