2 * Copyright (C) 2001 Julian Elischer <julian@freebsd.org>.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice(s), this list of conditions and the following disclaimer as
10 * the first lines of this file unmodified other than the possible
11 * addition of one or more copyright notices.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice(s), this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY
20 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
29 #include "opt_witness.h"
30 #include "opt_hwpmc_hooks.h"
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
39 #include <sys/mutex.h>
41 #include <sys/resourcevar.h>
43 #include <sys/sysctl.h>
44 #include <sys/sched.h>
45 #include <sys/sleepqueue.h>
46 #include <sys/selinfo.h>
47 #include <sys/turnstile.h>
50 #include <sys/cpuset.h>
52 #include <sys/pmckern.h>
55 #include <security/audit/audit.h>
58 #include <vm/vm_extern.h>
60 #include <sys/eventhandler.h>
63 * thread related storage.
65 static uma_zone_t thread_zone;
67 SYSCTL_NODE(_kern, OID_AUTO, threads, CTLFLAG_RW, 0, "thread allocation");
69 int max_threads_per_proc = 1500;
70 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_per_proc, CTLFLAG_RW,
71 &max_threads_per_proc, 0, "Limit on threads per proc");
74 SYSCTL_INT(_kern_threads, OID_AUTO, max_threads_hits, CTLFLAG_RD,
75 &max_threads_hits, 0, "");
77 TAILQ_HEAD(, thread) zombie_threads = TAILQ_HEAD_INITIALIZER(zombie_threads);
78 static struct mtx zombie_lock;
79 MTX_SYSINIT(zombie_lock, &zombie_lock, "zombie lock", MTX_SPIN);
81 static void thread_zombie(struct thread *);
84 static struct unrhdr *tid_unrhdr;
87 * Prepare a thread for use.
90 thread_ctor(void *mem, int size, void *arg, int flags)
94 td = (struct thread *)mem;
95 td->td_state = TDS_INACTIVE;
98 td->td_tid = alloc_unr(tid_unrhdr);
102 * Note that td_critnest begins life as 1 because the thread is not
103 * running and is thereby implicitly waiting to be on the receiving
104 * end of a context switch.
107 EVENTHANDLER_INVOKE(thread_ctor, td);
109 audit_thread_alloc(td);
111 umtx_thread_alloc(td);
116 * Reclaim a thread after use.
119 thread_dtor(void *mem, int size, void *arg)
123 td = (struct thread *)mem;
126 /* Verify that this thread is in a safe state to free. */
127 switch (td->td_state) {
133 * We must never unlink a thread that is in one of
134 * these states, because it is currently active.
136 panic("bad state for thread unlinking");
141 panic("bad thread state");
146 audit_thread_free(td);
148 /* Free all OSD associated to this thread. */
151 EVENTHANDLER_INVOKE(thread_dtor, td);
152 free_unr(tid_unrhdr, td->td_tid);
156 * Initialize type-stable parts of a thread (when newly created).
159 thread_init(void *mem, int size, int flags)
163 td = (struct thread *)mem;
165 td->td_sleepqueue = sleepq_alloc();
166 td->td_turnstile = turnstile_alloc();
167 EVENTHANDLER_INVOKE(thread_init, td);
168 td->td_sched = (struct td_sched *)&td[1];
169 umtx_thread_init(td);
175 * Tear down type-stable parts of a thread (just before being discarded).
178 thread_fini(void *mem, int size)
182 td = (struct thread *)mem;
183 EVENTHANDLER_INVOKE(thread_fini, td);
184 turnstile_free(td->td_turnstile);
185 sleepq_free(td->td_sleepqueue);
186 umtx_thread_fini(td);
191 * For a newly created process,
192 * link up all the structures and its initial threads etc.
194 * {arch}/{arch}/machdep.c ia64_init(), init386() etc.
195 * proc_dtor() (should go away)
199 proc_linkup0(struct proc *p, struct thread *td)
201 TAILQ_INIT(&p->p_threads); /* all threads in proc */
206 proc_linkup(struct proc *p, struct thread *td)
209 sigqueue_init(&p->p_sigqueue, p);
210 p->p_ksi = ksiginfo_alloc(1);
211 if (p->p_ksi != NULL) {
212 /* XXX p_ksi may be null if ksiginfo zone is not ready */
213 p->p_ksi->ksi_flags = KSI_EXT | KSI_INS;
215 LIST_INIT(&p->p_mqnotifier);
221 * Initialize global thread allocation resources.
227 mtx_init(&tid_lock, "TID lock", NULL, MTX_DEF);
228 /* leave one number for thread0 */
229 tid_unrhdr = new_unrhdr(PID_MAX + 2, INT_MAX, &tid_lock);
231 thread_zone = uma_zcreate("THREAD", sched_sizeof_thread(),
232 thread_ctor, thread_dtor, thread_init, thread_fini,
237 * Place an unused thread on the zombie list.
238 * Use the slpq as that must be unused by now.
241 thread_zombie(struct thread *td)
243 mtx_lock_spin(&zombie_lock);
244 TAILQ_INSERT_HEAD(&zombie_threads, td, td_slpq);
245 mtx_unlock_spin(&zombie_lock);
249 * Release a thread that has exited after cpu_throw().
252 thread_stash(struct thread *td)
254 atomic_subtract_rel_int(&td->td_proc->p_exitthreads, 1);
259 * Reap zombie resources.
264 struct thread *td_first, *td_next;
267 * Don't even bother to lock if none at this instant,
268 * we really don't care about the next instant..
270 if (!TAILQ_EMPTY(&zombie_threads)) {
271 mtx_lock_spin(&zombie_lock);
272 td_first = TAILQ_FIRST(&zombie_threads);
274 TAILQ_INIT(&zombie_threads);
275 mtx_unlock_spin(&zombie_lock);
277 td_next = TAILQ_NEXT(td_first, td_slpq);
278 if (td_first->td_ucred)
279 crfree(td_first->td_ucred);
280 thread_free(td_first);
290 thread_alloc(int pages)
294 thread_reap(); /* check if any zombies to get */
296 td = (struct thread *)uma_zalloc(thread_zone, M_WAITOK);
297 KASSERT(td->td_kstack == 0, ("thread_alloc got thread with kstack"));
298 if (!vm_thread_new(td, pages)) {
299 uma_zfree(thread_zone, td);
302 cpu_thread_alloc(td);
307 thread_alloc_stack(struct thread *td, int pages)
310 KASSERT(td->td_kstack == 0,
311 ("thread_alloc_stack called on a thread with kstack"));
312 if (!vm_thread_new(td, pages))
314 cpu_thread_alloc(td);
319 * Deallocate a thread.
322 thread_free(struct thread *td)
325 lock_profile_thread_exit(td);
327 cpuset_rel(td->td_cpuset);
328 td->td_cpuset = NULL;
330 if (td->td_kstack != 0)
331 vm_thread_dispose(td);
332 uma_zfree(thread_zone, td);
336 * Discard the current thread and exit from its context.
337 * Always called with scheduler locked.
339 * Because we can't free a thread while we're operating under its context,
340 * push the current thread into our CPU's deadthread holder. This means
341 * we needn't worry about someone else grabbing our context before we
347 uint64_t new_switchtime;
356 PROC_SLOCK_ASSERT(p, MA_OWNED);
357 mtx_assert(&Giant, MA_NOTOWNED);
359 PROC_LOCK_ASSERT(p, MA_OWNED);
360 KASSERT(p != NULL, ("thread exiting without a process"));
361 CTR3(KTR_PROC, "thread_exit: thread %p (pid %ld, %s)", td,
362 (long)p->p_pid, td->td_name);
363 KASSERT(TAILQ_EMPTY(&td->td_sigqueue.sq_list), ("signal pending"));
366 AUDIT_SYSCALL_EXIT(0, td);
368 umtx_thread_exit(td);
370 * drop FPU & debug register state storage, or any other
371 * architecture specific resources that
372 * would not be on a new untouched process.
374 cpu_thread_exit(td); /* XXXSMP */
376 /* Do the same timestamp bookkeeping that mi_switch() would do. */
377 new_switchtime = cpu_ticks();
378 p->p_rux.rux_runtime += (new_switchtime - PCPU_GET(switchtime));
379 PCPU_SET(switchtime, new_switchtime);
380 PCPU_SET(switchticks, ticks);
381 PCPU_INC(cnt.v_swtch);
382 /* Save our resource usage in our process. */
383 td->td_ru.ru_nvcsw++;
384 rucollect(&p->p_ru, &td->td_ru);
386 * The last thread is left attached to the process
387 * So that the whole bundle gets recycled. Skip
388 * all this stuff if we never had threads.
389 * EXIT clears all sign of other threads when
390 * it goes to single threading, so the last thread always
391 * takes the short path.
393 if (p->p_flag & P_HADTHREADS) {
394 if (p->p_numthreads > 1) {
396 td2 = FIRST_THREAD_IN_PROC(p);
397 sched_exit_thread(td2, td);
400 * The test below is NOT true if we are the
401 * sole exiting thread. P_STOPPED_SNGL is unset
402 * in exit1() after it is the only survivor.
404 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
405 if (p->p_numthreads == p->p_suspcount) {
406 thread_lock(p->p_singlethread);
407 wakeup_swapper = thread_unsuspend_one(
409 thread_unlock(p->p_singlethread);
415 atomic_add_int(&td->td_proc->p_exitthreads, 1);
416 PCPU_SET(deadthread, td);
419 * The last thread is exiting.. but not through exit()
421 panic ("thread_exit: Last thread exiting on its own");
426 * If this thread is part of a process that is being tracked by hwpmc(4),
427 * inform the module of the thread's impending exit.
429 if (PMC_PROC_IS_USING_PMCS(td->td_proc))
430 PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
434 /* Save our tick information with both the thread and proc locked */
435 ruxagg(&p->p_rux, td);
437 td->td_state = TDS_INACTIVE;
439 witness_thread_exit(td);
441 CTR1(KTR_PROC, "thread_exit: cpu_throw() thread %p", td);
443 panic("I'm a teapot!");
448 * Do any thread specific cleanups that may be needed in wait()
449 * called with Giant, proc and schedlock not held.
452 thread_wait(struct proc *p)
456 mtx_assert(&Giant, MA_NOTOWNED);
457 KASSERT((p->p_numthreads == 1), ("Multiple threads in wait1()"));
458 td = FIRST_THREAD_IN_PROC(p);
459 /* Lock the last thread so we spin until it exits cpu_throw(). */
462 /* Wait for any remaining threads to exit cpu_throw(). */
463 while (p->p_exitthreads)
464 sched_relinquish(curthread);
465 lock_profile_thread_exit(td);
466 cpuset_rel(td->td_cpuset);
467 td->td_cpuset = NULL;
468 cpu_thread_clean(td);
469 crfree(td->td_ucred);
470 thread_reap(); /* check for zombie threads etc. */
474 * Link a thread to a process.
475 * set up anything that needs to be initialized for it to
476 * be used by the process.
479 thread_link(struct thread *td, struct proc *p)
483 * XXX This can't be enabled because it's called for proc0 before
484 * its lock has been created.
485 * PROC_LOCK_ASSERT(p, MA_OWNED);
487 td->td_state = TDS_INACTIVE;
489 td->td_flags = TDF_INMEM;
491 LIST_INIT(&td->td_contested);
492 LIST_INIT(&td->td_lprof[0]);
493 LIST_INIT(&td->td_lprof[1]);
494 sigqueue_init(&td->td_sigqueue, p);
495 callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
496 TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
501 * Convert a process with one thread to an unthreaded process.
504 thread_unthread(struct thread *td)
506 struct proc *p = td->td_proc;
508 KASSERT((p->p_numthreads == 1), ("Unthreading with >1 threads"));
509 p->p_flag &= ~P_HADTHREADS;
517 thread_unlink(struct thread *td)
519 struct proc *p = td->td_proc;
521 PROC_LOCK_ASSERT(p, MA_OWNED);
522 TAILQ_REMOVE(&p->p_threads, td, td_plist);
524 /* could clear a few other things here */
525 /* Must NOT clear links to proc! */
529 calc_remaining(struct proc *p, int mode)
533 if (mode == SINGLE_EXIT)
534 remaining = p->p_numthreads;
535 else if (mode == SINGLE_BOUNDARY)
536 remaining = p->p_numthreads - p->p_boundary_count;
537 else if (mode == SINGLE_NO_EXIT)
538 remaining = p->p_numthreads - p->p_suspcount;
540 panic("calc_remaining: wrong mode %d", mode);
545 * Enforce single-threading.
547 * Returns 1 if the caller must abort (another thread is waiting to
548 * exit the process or similar). Process is locked!
549 * Returns 0 when you are successfully the only thread running.
550 * A process has successfully single threaded in the suspend mode when
551 * There are no threads in user mode. Threads in the kernel must be
552 * allowed to continue until they get to the user boundary. They may even
553 * copy out their return values and data before suspending. They may however be
554 * accelerated in reaching the user boundary as we will wake up
555 * any sleeping threads that are interruptable. (PCATCH).
558 thread_single(int mode)
563 int remaining, wakeup_swapper;
567 mtx_assert(&Giant, MA_NOTOWNED);
568 PROC_LOCK_ASSERT(p, MA_OWNED);
569 KASSERT((td != NULL), ("curthread is NULL"));
571 if ((p->p_flag & P_HADTHREADS) == 0)
574 /* Is someone already single threading? */
575 if (p->p_singlethread != NULL && p->p_singlethread != td)
578 if (mode == SINGLE_EXIT) {
579 p->p_flag |= P_SINGLE_EXIT;
580 p->p_flag &= ~P_SINGLE_BOUNDARY;
582 p->p_flag &= ~P_SINGLE_EXIT;
583 if (mode == SINGLE_BOUNDARY)
584 p->p_flag |= P_SINGLE_BOUNDARY;
586 p->p_flag &= ~P_SINGLE_BOUNDARY;
588 p->p_flag |= P_STOPPED_SINGLE;
590 p->p_singlethread = td;
591 remaining = calc_remaining(p, mode);
592 while (remaining != 1) {
593 if (P_SHOULDSTOP(p) != P_STOPPED_SINGLE)
596 FOREACH_THREAD_IN_PROC(p, td2) {
600 td2->td_flags |= TDF_ASTPENDING | TDF_NEEDSUSPCHK;
601 if (TD_IS_INHIBITED(td2)) {
604 if (TD_IS_SUSPENDED(td2))
606 thread_unsuspend_one(td2);
607 if (TD_ON_SLEEPQ(td2) &&
608 (td2->td_flags & TDF_SINTR))
610 sleepq_abort(td2, EINTR);
612 case SINGLE_BOUNDARY:
613 if (TD_IS_SUSPENDED(td2) &&
614 !(td2->td_flags & TDF_BOUNDARY))
616 thread_unsuspend_one(td2);
617 if (TD_ON_SLEEPQ(td2) &&
618 (td2->td_flags & TDF_SINTR))
620 sleepq_abort(td2, ERESTART);
623 if (TD_IS_SUSPENDED(td2) &&
624 !(td2->td_flags & TDF_BOUNDARY))
626 thread_unsuspend_one(td2);
627 if (TD_ON_SLEEPQ(td2) &&
628 (td2->td_flags & TDF_SINTR))
630 sleepq_abort(td2, ERESTART);
637 else if (TD_IS_RUNNING(td2) && td != td2) {
645 remaining = calc_remaining(p, mode);
648 * Maybe we suspended some threads.. was it enough?
655 * Wake us up when everyone else has suspended.
656 * In the mean time we suspend as well.
658 thread_suspend_switch(td);
659 remaining = calc_remaining(p, mode);
661 if (mode == SINGLE_EXIT) {
663 * We have gotten rid of all the other threads and we
664 * are about to either exit or exec. In either case,
665 * we try our utmost to revert to being a non-threaded
668 p->p_singlethread = NULL;
669 p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT);
677 * Called in from locations that can safely check to see
678 * whether we have to suspend or at least throttle for a
679 * single-thread event (e.g. fork).
681 * Such locations include userret().
682 * If the "return_instead" argument is non zero, the thread must be able to
683 * accept 0 (caller may continue), or 1 (caller must abort) as a result.
685 * The 'return_instead' argument tells the function if it may do a
686 * thread_exit() or suspend, or whether the caller must abort and back
689 * If the thread that set the single_threading request has set the
690 * P_SINGLE_EXIT bit in the process flags then this call will never return
691 * if 'return_instead' is false, but will exit.
693 * P_SINGLE_EXIT | return_instead == 0| return_instead != 0
694 *---------------+--------------------+---------------------
695 * 0 | returns 0 | returns 0 or 1
696 * | when ST ends | immediatly
697 *---------------+--------------------+---------------------
698 * 1 | thread exits | returns 1
700 * 0 = thread_exit() or suspension ok,
701 * other = return error instead of stopping the thread.
703 * While a full suspension is under effect, even a single threading
704 * thread would be suspended if it made this call (but it shouldn't).
705 * This call should only be made from places where
706 * thread_exit() would be safe as that may be the outcome unless
707 * return_instead is set.
710 thread_suspend_check(int return_instead)
718 mtx_assert(&Giant, MA_NOTOWNED);
719 PROC_LOCK_ASSERT(p, MA_OWNED);
720 while (P_SHOULDSTOP(p) ||
721 ((p->p_flag & P_TRACED) && (td->td_dbgflags & TDB_SUSPEND))) {
722 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
723 KASSERT(p->p_singlethread != NULL,
724 ("singlethread not set"));
726 * The only suspension in action is a
727 * single-threading. Single threader need not stop.
728 * XXX Should be safe to access unlocked
729 * as it can only be set to be true by us.
731 if (p->p_singlethread == td)
732 return (0); /* Exempt from stopping. */
734 if ((p->p_flag & P_SINGLE_EXIT) && return_instead)
737 /* Should we goto user boundary if we didn't come from there? */
738 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE &&
739 (p->p_flag & P_SINGLE_BOUNDARY) && return_instead)
742 /* If thread will exit, flush its pending signals */
743 if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td))
744 sigqueue_flush(&td->td_sigqueue);
749 * If the process is waiting for us to exit,
750 * this thread should just suicide.
751 * Assumes that P_SINGLE_EXIT implies P_STOPPED_SINGLE.
753 if ((p->p_flag & P_SINGLE_EXIT) && (p->p_singlethread != td))
755 if (P_SHOULDSTOP(p) == P_STOPPED_SINGLE) {
756 if (p->p_numthreads == p->p_suspcount + 1) {
757 thread_lock(p->p_singlethread);
759 thread_unsuspend_one(p->p_singlethread);
760 thread_unlock(p->p_singlethread);
768 * When a thread suspends, it just
769 * gets taken off all queues.
771 thread_suspend_one(td);
772 if (return_instead == 0) {
773 p->p_boundary_count++;
774 td->td_flags |= TDF_BOUNDARY;
777 mi_switch(SW_INVOL | SWT_SUSPEND, NULL);
778 if (return_instead == 0)
779 td->td_flags &= ~TDF_BOUNDARY;
782 if (return_instead == 0)
783 p->p_boundary_count--;
789 thread_suspend_switch(struct thread *td)
794 KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
795 PROC_LOCK_ASSERT(p, MA_OWNED);
796 PROC_SLOCK_ASSERT(p, MA_OWNED);
798 * We implement thread_suspend_one in stages here to avoid
799 * dropping the proc lock while the thread lock is owned.
805 td->td_flags &= ~TDF_NEEDSUSPCHK;
806 TD_SET_SUSPENDED(td);
810 mi_switch(SW_VOL | SWT_SUSPEND, NULL);
818 thread_suspend_one(struct thread *td)
820 struct proc *p = td->td_proc;
822 PROC_SLOCK_ASSERT(p, MA_OWNED);
823 THREAD_LOCK_ASSERT(td, MA_OWNED);
824 KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
826 td->td_flags &= ~TDF_NEEDSUSPCHK;
827 TD_SET_SUSPENDED(td);
832 thread_unsuspend_one(struct thread *td)
834 struct proc *p = td->td_proc;
836 PROC_SLOCK_ASSERT(p, MA_OWNED);
837 THREAD_LOCK_ASSERT(td, MA_OWNED);
838 KASSERT(TD_IS_SUSPENDED(td), ("Thread not suspended"));
839 TD_CLR_SUSPENDED(td);
841 return (setrunnable(td));
845 * Allow all threads blocked by single threading to continue running.
848 thread_unsuspend(struct proc *p)
853 PROC_LOCK_ASSERT(p, MA_OWNED);
854 PROC_SLOCK_ASSERT(p, MA_OWNED);
856 if (!P_SHOULDSTOP(p)) {
857 FOREACH_THREAD_IN_PROC(p, td) {
859 if (TD_IS_SUSPENDED(td)) {
860 wakeup_swapper |= thread_unsuspend_one(td);
864 } else if ((P_SHOULDSTOP(p) == P_STOPPED_SINGLE) &&
865 (p->p_numthreads == p->p_suspcount)) {
867 * Stopping everything also did the job for the single
868 * threading request. Now we've downgraded to single-threaded,
871 thread_lock(p->p_singlethread);
872 wakeup_swapper = thread_unsuspend_one(p->p_singlethread);
873 thread_unlock(p->p_singlethread);
880 * End the single threading mode..
883 thread_single_end(void)
891 PROC_LOCK_ASSERT(p, MA_OWNED);
892 p->p_flag &= ~(P_STOPPED_SINGLE | P_SINGLE_EXIT | P_SINGLE_BOUNDARY);
894 p->p_singlethread = NULL;
897 * If there are other threads they may now run,
898 * unless of course there is a blanket 'stop order'
899 * on the process. The single threader must be allowed
900 * to continue however as this is a bad place to stop.
902 if ((p->p_numthreads != 1) && (!P_SHOULDSTOP(p))) {
903 FOREACH_THREAD_IN_PROC(p, td) {
905 if (TD_IS_SUSPENDED(td)) {
906 wakeup_swapper |= thread_unsuspend_one(td);
917 thread_find(struct proc *p, lwpid_t tid)
921 PROC_LOCK_ASSERT(p, MA_OWNED);
922 FOREACH_THREAD_IN_PROC(p, td) {
923 if (td->td_tid == tid)