2 * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice(s), this list of conditions and the following disclaimer as
10 * the first lines of this file unmodified other than the possible
11 * addition of one or more copyright notices.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice(s), this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 #include "opt_witness.h"
30 #include "opt_hwpmc_hooks.h"
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
39 #include <sys/mutex.h>
41 #include <sys/resourcevar.h>
43 #include <sys/sysctl.h>
44 #include <sys/sched.h>
45 #include <sys/sleepqueue.h>
46 #include <sys/selinfo.h>
47 #include <sys/turnstile.h>
50 #include <sys/cpuset.h>
52 #include <sys/pmckern.h>
55 #include <security/audit/audit.h>
58 #include <vm/vm_extern.h>
60 #include <sys/eventhandler.h>
63 * thread related storage.
65 static uma_zone_t thread_zone;
67 SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation");
69 int max_threads_per_proc = 1500;
70 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW,
71 &max_threads_per_proc, 0, "Limit on threads per proc");
74 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD,
75 &max_threads_hits, 0, "");
77 TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
78 static struct mtx zombie_lock;
79 MTX_SYSINIT(zombie_lock, &zombie_lock, "zombie lock", MTX_SPIN);
81 static void thread_zombie(struct thread *);
84 static struct unrhdr *tid_unrhdr;
87 * Prepare a thread for use.
90 thread_ctor(void *mem, int size, void *arg, int flags)
94 td = (struct thread *)mem;
95 td->td_state = TDS_INACTIVE;
98 td->td_tid = alloc_unr(tid_unrhdr);
102 * Note that td_critnest begins life as 1 because the thread is not
103 * running and is thereby implicitly waiting to be on the receiving
104 * end of a context switch.
107 EVENTHANDLER_INVOKE(thread_ctor, td);
109 audit_thread_alloc(td);
111 umtx_thread_alloc(td);
116 * Reclaim a thread after use.
119 thread_dtor(void *mem, int size, void *arg)
123 td = (struct thread *)mem;
126 /* Verify that this thread is in a safe state to free. */
127 switch (td->td_state) {
133 * We must never unlink a thread that is in one of
134 * these states, because it is currently active.
136 panic("bad state for thread unlinking");
141 panic("bad thread state");
146 audit_thread_free(td);
148 /* Free all OSD associated to this thread. */
151 EVENTHANDLER_INVOKE(thread_dtor, td);
152 free_unr(tid_unrhdr, td->td_tid);
156 * Initialize type-stable parts of a thread (when newly created).
159 thread_init(void *mem, int size, int flags)
163 td = (struct thread *)mem;
165 td->td_sleepqueue = sleepq_alloc();
166 td->td_turnstile = turnstile_alloc();
167 EVENTHANDLER_INVOKE(thread_init, td);
168 td->td_sched = (struct td_sched *)&td[1];
169 umtx_thread_init(td);
175 * Tear down type-stable parts of a thread (just before being discarded).
178 thread_fini(void *mem, int size)
182 td = (struct thread *)mem;
183 EVENTHANDLER_INVOKE(thread_fini, td);
184 turnstile_free(td->td_turnstile);
185 sleepq_free(td->td_sleepqueue);
186 umtx_thread_fini(td);
191 * For a newly created process,
192 * link up all the structures and its initial threads etc.
194 * {arch}/{arch}/machdep.c ia64_init(), init386() etc.
195 * proc_dtor() (should go away)
199 proc_linkup0(struct proc *p, struct thread *td)
201 TAILQ_INIT(&p->p_threads); /* all threads in proc */
206 proc_linkup(struct proc *p, struct thread *td)
209 sigqueue_init(&p->p_sigqueue, p);
210 p->p_ksi = ksiginfo_alloc(1);
211 if (p->p_ksi != NULL) {
212 /* XXX p_ksi may be null if ksiginfo zone is not ready */
213 p->p_ksi->ksi_flags = KSI_EXT | KSI_INS;
215 LIST_INIT(&p->p_mqnotifier);
221 * Initialize global thread allocation resources.
227 mtx_init(&tid_lock, "TID lock", NULL, MTX_DEF);
228 /* leave one number for thread0 */
229 tid_unrhdr = new_unrhdr(PID_MAX + 2, INT_MAX, &tid_lock);
231 thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
232 thread_ctor, thread_dtor, thread_init, thread_fini,
237 * Place an unused thread on the zombie list.
238 * Use the slpq as that must be unused by now.
241 thread_zombie(struct thread *td)
243 mtx_lock_spin(&zombie_lock);
244 TAILQ_INSERT_HEAD(&zombie_threads, td, td_slpq);
245 mtx_unlock_spin(&zombie_lock);
249 * Release a thread that has exited after cpu_throw().
252 thread_stash(struct thread *td)
254 atomic_subtract_rel_int(&td->td_proc->p_exitthreads, 1);
259 * Reap zombie resources.
264 struct thread *td_first, *td_next;
267 * Don't even bother to lock if none at this instant,
268 * we really don't care about the next instant..
270 if (!TAILQ_EMPTY(&zombie_threads)) {
271 mtx_lock_spin(&zombie_lock);
272 td_first = TAILQ_FIRST(&zombie_threads);
274 TAILQ_INIT(&zombie_threads);
275 mtx_unlock_spin(&zombie_lock);
277 td_next = TAILQ_NEXT(td_first, td_slpq);
278 if (td_first->td_ucred)
279 crfree(td_first->td_ucred);
280 thread_free(td_first);
290 thread_alloc(int pages)
294 thread_reap(); /* check if any zombies to get */
296 td = (struct thread *)uma_zalloc(thread_zone, M_WAITOK);
297 KASSERT(td->td_kstack == 0, ("thread_alloc got thread with kstack"));
298 if (!vm_thread_new(td, pages)) {
299 uma_zfree(thread_zone, td);
302 cpu_thread_alloc(td);
307 thread_alloc_stack(struct thread *td, int pages)
310 KASSERT(td->td_kstack == 0,
311 ("thread_alloc_stack called on a thread with kstack"));
312 if (!vm_thread_new(td, pages))
314 cpu_thread_alloc(td);
319 * Deallocate a thread.
322 thread_free(struct thread *td)
325 lock_profile_thread_exit(td);
327 cpuset_rel(td->td_cpuset);
328 td->td_cpuset = NULL;
330 if (td->td_kstack != 0)
331 vm_thread_dispose(td);
332 uma_zfree(thread_zone, td);
336 * Discard the current thread and exit from its context.
337 * Always called with scheduler locked.
339 * Because we can't free a thread while we're operating under its context,
340 * push the current thread into our CPU's deadthread holder. This means
341 * we needn't worry about someone else grabbing our context before we
347 uint64_t new_switchtime;
356 PROC_SLOCK_ASSERT(p, MA_OWNED);
357 mtx_assert(&Giant, MA_NOTOWNED);
359 PROC_LOCK_ASSERT(p, MA_OWNED);
360 KASSERT(p != NULL, ("thread exiting without a process"));
361 CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td,
362 (long)p->p_pid, td->td_name);
363 KASSERT(TAILQ_EMPTY(&td->td_sigqueue.sq_list), ("signal pending"));
366 AUDIT_SYSCALL_EXIT(0, td);
368 umtx_thread_exit(td);
370 * drop FPU & debug register state storage, or any other
371 * architecture specific resources that
372 * would not be on a new untouched process.
374 cpu_thread_exit(td); /* XXXSMP */
376 /* Do the same timestamp bookkeeping that mi_switch() would do. */
377 new_switchtime = cpu_ticks();
378 p->p_rux.rux_runtime += (new_switchtime - PCPU_GET(switchtime));
379 PCPU_SET(switchtime, new_switchtime);
380 PCPU_SET(switchticks, ticks);
381 PCPU_INC(cnt.v_swtch);
382 /* Save our resource usage in our process. */
383 td->td_ru.ru_nvcsw++;
384 rucollect(&p->p_ru, &td->td_ru);
386 * The last thread is left attached to the process
387 * So that the whole bundle gets recycled. Skip
388 * all this stuff if we never had threads.
389 * EXIT clears all sign of other threads when
390 * it goes to single threading, so the last thread always
391 * takes the short path.
393 if (p->p_flag & P_HADTHREADS) {
394 if (p->p_numthreads > 1) {
396 td2 = FIRST_THREAD_IN_PROC(p);
397 sched_exit_thread(td2, td);
400 * The test below is NOT true if we are the
401 * sole exiting thread. P_STOPPED_SINGLE is unset
402 * in exit1() after it is the only survivor.
404 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
405 if (p->p_numthreads == p->p_suspcount) {
406 thread_lock(p->p_singlethread);
407 wakeup_swapper = thread_unsuspend_one(
409 thread_unlock(p->p_singlethread);
415 atomic_add_int(&td->td_proc->p_exitthreads, 1);
416 PCPU_SET(deadthread, td);
419 * The last thread is exiting.. but not through exit()
421 panic ("thread_exit: Last thread exiting on its own");
426 * If this thread is part of a process that is being tracked by hwpmc(4),
427 * inform the module of the thread's impending exit.
429 if (PMC_PROC_IS_USING_PMCS(td->td_proc))
430 PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
436 td->td_state = TDS_INACTIVE;
438 witness_thread_exit(td);
440 CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td);
442 panic("I'm a teapot!");
447 * Do any thread specific cleanups that may be needed in wait()
448 * called with Giant, proc and schedlock not held.
451 thread_wait(struct proc *p)
455 mtx_assert(&Giant, MA_NOTOWNED);
456 KASSERT((p->p_numthreads == 1), ("Multiple threads in wait1()"));
457 td = FIRST_THREAD_IN_PROC(p);
458 /* Lock the last thread so we spin until it exits cpu_throw(). */
461 /* Wait for any remaining threads to exit cpu_throw(). */
462 while (p->p_exitthreads)
463 sched_relinquish(curthread);
464 lock_profile_thread_exit(td);
465 cpuset_rel(td->td_cpuset);
466 td->td_cpuset = NULL;
467 cpu_thread_clean(td);
468 crfree(td->td_ucred);
469 thread_reap(); /* check for zombie threads etc. */
473 * Link a thread to a process.
474 * set up anything that needs to be initialized for it to
475 * be used by the process.
478 thread_link(struct thread *td, struct proc *p)
482 * XXX This can't be enabled because it's called for proc0 before
483 * its lock has been created.
484 * PROC_LOCK_ASSERT(p, MA_OWNED);
486 td->td_state = TDS_INACTIVE;
488 td->td_flags = TDF_INMEM;
490 LIST_INIT(&td->td_contested);
491 LIST_INIT(&td->td_lprof[0]);
492 LIST_INIT(&td->td_lprof[1]);
493 sigqueue_init(&td->td_sigqueue, p);
494 callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
495 TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
500 * Convert a process with one thread to an unthreaded process.
503 thread_unthread(struct thread *td)
505 struct proc *p = td->td_proc;
507 KASSERT((p->p_numthreads == 1), ("Unthreading with >1 threads"));
508 p->p_flag &= ~P_HADTHREADS;
516 thread_unlink(struct thread *td)
518 struct proc *p = td->td_proc;
520 PROC_LOCK_ASSERT(p, MA_OWNED);
521 TAILQ_REMOVE(&p->p_threads, td, td_plist);
523 /* could clear a few other things here */
524 /* Must NOT clear links to proc! */
528 calc_remaining(struct proc *p, int mode)
532 if (mode == SINGLE_EXIT)
533 remaining = p->p_numthreads;
534 else if (mode == SINGLE_BOUNDARY)
535 remaining = p->p_numthreads - p->p_boundary_count;
536 else if (mode == SINGLE_NO_EXIT)
537 remaining = p->p_numthreads - p->p_suspcount;
539 panic("calc_remaining: wrong mode %d", mode);
544 * Enforce single-threading.
546 * Returns 1 if the caller must abort (another thread is waiting to
547 * exit the process or similar). Process is locked!
548 * Returns 0 when you are successfully the only thread running.
549 * A process has successfully single threaded in the suspend mode when
550 * There are no threads in user mode. Threads in the kernel must be
551 * allowed to continue until they get to the user boundary. They may even
552 * copy out their return values and data before suspending. They may however be
553 * accelerated in reaching the user boundary as we will wake up
554 * any sleeping threads that are interruptable. (PCATCH).
557 thread_single(int mode)
562 int remaining, wakeup_swapper;
566 mtx_assert(&Giant, MA_NOTOWNED);
567 PROC_LOCK_ASSERT(p, MA_OWNED);
568 KASSERT((td != NULL), ("curthread is NULL"));
570 if ((p->p_flag & P_HADTHREADS) == 0)
573 /* Is someone already single threading? */
574 if (p->p_singlethread != NULL && p->p_singlethread != td)
577 if (mode == SINGLE_EXIT) {
578 p->p_flag |= P_SINGLE_EXIT;
579 p->p_flag &= ~P_SINGLE_BOUNDARY;
581 p->p_flag &= ~P_SINGLE_EXIT;
582 if (mode == SINGLE_BOUNDARY)
583 p->p_flag |= P_SINGLE_BOUNDARY;
585 p->p_flag &= ~P_SINGLE_BOUNDARY;
587 p->p_flag |= P_STOPPED_SINGLE;
589 p->p_singlethread = td;
590 remaining = calc_remaining(p, mode);
591 while (remaining != 1) {
592 if (P_SHOULDSTOP(p) != P_STOPPED_SINGLE)
595 FOREACH_THREAD_IN_PROC(p, td2) {
599 td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
600 if (TD_IS_INHIBITED(td2)) {
603 if (TD_IS_SUSPENDED(td2))
605 thread_unsuspend_one(td2);
606 if (TD_ON_SLEEPQ(td2) &&
607 (td2->td_flags & TDF_SINTR))
609 sleepq_abort(td2, EINTR);
611 case SINGLE_BOUNDARY:
612 if (TD_IS_SUSPENDED(td2) &&
613 !(td2->td_flags & TDF_BOUNDARY))
615 thread_unsuspend_one(td2);
616 if (TD_ON_SLEEPQ(td2) &&
617 (td2->td_flags & TDF_SINTR))
619 sleepq_abort(td2, ERESTART);
622 if (TD_IS_SUSPENDED(td2) &&
623 !(td2->td_flags & TDF_BOUNDARY))
625 thread_unsuspend_one(td2);
626 if (TD_ON_SLEEPQ(td2) &&
627 (td2->td_flags & TDF_SINTR))
629 sleepq_abort(td2, ERESTART);
636 else if (TD_IS_RUNNING(td2) && td != td2) {
644 remaining = calc_remaining(p, mode);
647 * Maybe we suspended some threads.. was it enough?
654 * Wake us up when everyone else has suspended.
655 * In the mean time we suspend as well.
657 thread_suspend_switch(td);
658 remaining = calc_remaining(p, mode);
660 if (mode == SINGLE_EXIT) {
662 * We have gotten rid of all the other threads and we
663 * are about to either exit or exec. In either case,
664 * we try our utmost to revert to being a non-threaded
667 p->p_singlethread = NULL;
668 p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT);
676 * Called in from locations that can safely check to see
677 * whether we have to suspend or at least throttle for a
678 * single-thread event (e.g. fork).
680 * Such locations include userret().
681 * If the "return_instead" argument is non zero, the thread must be able to
682 * accept 0 (caller may continue), or 1 (caller must abort) as a result.
684 * The 'return_instead' argument tells the function if it may do a
685 * thread_exit() or suspend, or whether the caller must abort and back
688 * If the thread that set the single_threading request has set the
689 * P_SINGLE_EXIT bit in the process flags then this call will never return
690 * if 'return_instead' is false, but will exit.
692 * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
693 *---------------+--------------------+---------------------
694 * 0 | returns 0 | returns 0 or 1
695 * | when ST ends | immediatly
696 *---------------+--------------------+---------------------
697 * 1 | thread exits | returns 1
699 * 0 = thread_exit() or suspension ok,
700 * other = return error instead of stopping the thread.
702 * While a full suspension is under effect, even a single threading
703 * thread would be suspended if it made this call (but it shouldn't).
704 * This call should only be made from places where
705 * thread_exit() would be safe as that may be the outcome unless
706 * return_instead is set.
709 thread_suspend_check(int return_instead)
717 mtx_assert(&Giant, MA_NOTOWNED);
718 PROC_LOCK_ASSERT(p, MA_OWNED);
719 while (P_SHOULDSTOP(p) ||
720 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) {
721 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
722 KASSERT(p->p_singlethread != NULL,
723 ("singlethread not set"));
725 * The only suspension in action is a
726 * single-threading. Single threader need not stop.
727 * XXX Should be safe to access unlocked
728 * as it can only be set to be true by us.
730 if (p->p_singlethread == td)
731 return (0); /* Exempt from stopping. */
733 if ((p->p_flag & P_SINGLE_EXIT) && return_instead)
736 /* Should we goto user boundary if we didn't come from there? */
737 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
738 (p->p_flag & P_SINGLE_BOUNDARY) && return_instead)
741 /* If thread will exit, flush its pending signals */
742 if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td))
743 sigqueue_flush(&td->td_sigqueue);
748 * If the process is waiting for us to exit,
749 * this thread should just suicide.
750 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
752 if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td))
754 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
755 if (p->p_numthreads == p->p_suspcount + 1) {
756 thread_lock(p->p_singlethread);
758 thread_unsuspend_one(p->p_singlethread);
759 thread_unlock(p->p_singlethread);
767 * When a thread suspends, it just
768 * gets taken off all queues.
770 thread_suspend_one(td);
771 if (return_instead == 0) {
772 p->p_boundary_count++;
773 td->td_flags |= TDF_BOUNDARY;
776 mi_switch(SW_INVOL | SWT_SUSPEND, NULL);
777 if (return_instead == 0)
778 td->td_flags &= ~TDF_BOUNDARY;
781 if (return_instead == 0)
782 p->p_boundary_count--;
788 thread_suspend_switch(struct thread *td)
793 KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
794 PROC_LOCK_ASSERT(p, MA_OWNED);
795 PROC_SLOCK_ASSERT(p, MA_OWNED);
797 * We implement thread_suspend_one in stages here to avoid
798 * dropping the proc lock while the thread lock is owned.
804 td->td_flags &= ~TDF_NEEDSUSPCHK;
805 TD_SET_SUSPENDED(td);
809 mi_switch(SW_VOL | SWT_SUSPEND, NULL);
817 thread_suspend_one(struct thread *td)
819 struct proc *p = td->td_proc;
821 PROC_SLOCK_ASSERT(p, MA_OWNED);
822 THREAD_LOCK_ASSERT(td, MA_OWNED);
823 KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
825 td->td_flags &= ~TDF_NEEDSUSPCHK;
826 TD_SET_SUSPENDED(td);
831 thread_unsuspend_one(struct thread *td)
833 struct proc *p = td->td_proc;
835 PROC_SLOCK_ASSERT(p, MA_OWNED);
836 THREAD_LOCK_ASSERT(td, MA_OWNED);
837 KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended"));
838 TD_CLR_SUSPENDED(td);
840 return (setrunnable(td));
844 * Allow all threads blocked by single threading to continue running.
847 thread_unsuspend(struct proc *p)
852 PROC_LOCK_ASSERT(p, MA_OWNED);
853 PROC_SLOCK_ASSERT(p, MA_OWNED);
855 if (!P_SHOULDSTOP(p)) {
856 FOREACH_THREAD_IN_PROC(p, td) {
858 if (TD_IS_SUSPENDED(td)) {
859 wakeup_swapper |= thread_unsuspend_one(td);
863 } else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) &&
864 (p->p_numthreads == p->p_suspcount)) {
866 * Stopping everything also did the job for the single
867 * threading request. Now we've downgraded to single-threaded,
870 thread_lock(p->p_singlethread);
871 wakeup_swapper = thread_unsuspend_one(p->p_singlethread);
872 thread_unlock(p->p_singlethread);
879 * End the single threading mode..
882 thread_single_end(void)
890 PROC_LOCK_ASSERT(p, MA_OWNED);
891 p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY);
893 p->p_singlethread = NULL;
896 * If there are other threads they may now run,
897 * unless of course there is a blanket 'stop order'
898 * on the process. The single threader must be allowed
899 * to continue however as this is a bad place to stop.
901 if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) {
902 FOREACH_THREAD_IN_PROC(p, td) {
904 if (TD_IS_SUSPENDED(td)) {
905 wakeup_swapper |= thread_unsuspend_one(td);
916 thread_find(struct proc *p, lwpid_t tid)
920 PROC_LOCK_ASSERT(p, MA_OWNED);
921 FOREACH_THREAD_IN_PROC(p, td) {
922 if (td->td_tid == tid)