2 * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 If there are N processors, then there are at most N KSEs (kernel
32 schedulable entities) working to process threads that belong to a
33 KSEGROUP (kg). If there are X of these KSEs actually running at the
34 moment in question, then there are at most M (N-X) of these KSEs on
35 the run queue, as running KSEs are not on the queue.
37 Runnable threads are queued off the KSEGROUP in priority order.
38 If there are M or more threads runnable, the top M threads
39 (by priority) are 'preassigned' to the M KSEs not running. The KSEs take
40 their priority from those threads and are put on the run queue.
42 The last thread that had a priority high enough to have a KSE associated
43 with it, AND IS ON THE RUN QUEUE is pointed to by
44 kg->kg_last_assigned. If no threads queued off the KSEGROUP have KSEs
45 assigned as all the available KSEs are activly running, or because there
46 are no threads queued, that pointer is NULL.
48 When a KSE is removed from the run queue to become runnable, we know
49 it was associated with the highest priority thread in the queue (at the head
50 of the queue). If it is also the last assigned we know M was 1 and must
51 now be 0. Since the thread is no longer queued that pointer must be
52 removed from it. Since we know there were no more KSEs available,
53 (M was 1 and is now 0) and since we are not FREEING our KSE
54 but using it, we know there are STILL no more KSEs available, we can prove
55 that the next thread in the ksegrp list will not have a KSE to assign to
56 it, so we can show that the pointer must be made 'invalid' (NULL).
58 The pointer exists so that when a new thread is made runnable, it can
59 have its priority compared with the last assigned thread to see if
60 it should 'steal' its KSE or not.. i.e. is it 'earlier'
61 on the list than that thread or later.. If it's earlier, then the KSE is
62 removed from the last assigned (which is now not assigned a KSE)
63 and reassigned to the new thread, which is placed earlier in the list.
64 The pointer is then backed up to the previous thread (which may or may not
67 When a thread sleeps or is removed, the KSE becomes available and if there
68 are queued threads that are not assigned KSEs, the highest priority one of
69 them is assigned the KSE, which is then placed back on the run queue at
70 the approipriate place, and the kg->kg_last_assigned pointer is adjusted down
73 The following diagram shows 2 KSEs and 3 threads from a single process.
75 RUNQ: --->KSE---KSE--... (KSEs queued at priorities from threads)
78 KSEGROUP---thread--thread--thread (queued in priority order)
83 The result of this scheme is that the M available KSEs are always
84 queued at the priorities they have inherrited from the M highest priority
85 threads for that KSEGROUP. If this situation changes, the KSEs are
86 reassigned to keep this true.
90 #include <sys/cdefs.h>
91 __FBSDID("$FreeBSD$");
93 #include "opt_sched.h"
95 #ifndef KERN_SWITCH_INCLUDE
96 #include <sys/param.h>
97 #include <sys/systm.h>
99 #include <sys/kernel.h>
101 #include <sys/lock.h>
102 #include <sys/mutex.h>
103 #include <sys/proc.h>
104 #include <sys/queue.h>
105 #include <sys/sched.h>
106 #else /* KERN_SWITCH_INCLUDE */
107 #if defined(SMP) && (defined(__i386__) || defined(__amd64__))
110 #if defined(SMP) && defined(SCHED_4BSD)
111 #include <sys/sysctl.h>
114 /* Uncomment this to enable logging of critical_enter/exit. */
116 #define KTR_CRITICAL KTR_SCHED
118 #define KTR_CRITICAL 0
121 #ifdef FULL_PREEMPTION
123 #error "The FULL_PREEMPTION option requires the PREEMPTION option"
127 CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
129 #define td_kse td_sched
132 * kern.sched.preemption allows user space to determine if preemption support
133 * is compiled in or not. It is not currently a boot or runtime flag that
137 static int kern_sched_preemption = 1;
139 static int kern_sched_preemption = 0;
141 SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD,
142 &kern_sched_preemption, 0, "Kernel preemption enabled");
144 /************************************************************************
145 * Functions that manipulate runnability from a thread perspective. *
146 ************************************************************************/
149 * Select the KSE that will be run next. From that find the thread, and
150 * remove it from the KSEGRP's run queue. If there is thread clustering,
151 * this will be what does it.
155 * Select the thread that will be run next.
169 #if defined(SMP) && (defined(__i386__) || defined(__amd64__))
170 if (smp_active == 0 && PCPU_GET(cpuid) != 0) {
171 /* Shutting down, run idlethread on AP's */
172 td = PCPU_GET(idlethread);
176 CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
178 ke->ke_flags |= KEF_DIDRUN;
180 td->td_kse->ke_flags |= KEF_DIDRUN;
192 KASSERT((td->td_kse == ke), ("kse/thread mismatch"));
194 if (td->td_proc->p_flag & P_HADTHREADS) {
195 if (kg->kg_last_assigned == td) {
196 kg->kg_last_assigned = TAILQ_PREV(td,
197 threadqueue, td_runq);
199 TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
205 CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
206 td, td->td_priority);
208 /* Simulate runq_choose() having returned the idle thread */
209 td = PCPU_GET(idlethread);
213 CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
216 ke->ke_flags |= KEF_DIDRUN;
218 td->td_kse->ke_flags |= KEF_DIDRUN;
222 * If we are in panic, only allow system threads,
223 * plus the one we are running in, to be run.
225 if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
226 (td->td_flags & TDF_INPANIC) == 0)) {
227 /* note that it is no longer on the run queue */
238 * Given a surplus system slot, try assign a new runnable thread to it.
240 * sched_thread_exit() (local)
241 * sched_switch() (local)
242 * sched_thread_exit() (local)
243 * remrunqueue() (local) (not at the moment)
246 slot_fill(struct ksegrp *kg)
250 mtx_assert(&sched_lock, MA_OWNED);
251 while (kg->kg_avail_opennings > 0) {
253 * Find the first unassigned thread
255 if ((td = kg->kg_last_assigned) != NULL)
256 td = TAILQ_NEXT(td, td_runq);
258 td = TAILQ_FIRST(&kg->kg_runq);
261 * If we found one, send it to the system scheduler.
264 kg->kg_last_assigned = td;
265 sched_add(td, SRQ_YIELDING);
266 CTR2(KTR_RUNQ, "slot_fill: td%p -> kg%p", td, kg);
268 /* no threads to use up the slots. quit now */
276 * Remove a thread from its KSEGRP's run queue.
277 * This in turn may remove it from a KSE if it was already assigned
278 * to one, possibly causing a new thread to be assigned to the KSE
279 * and the KSE getting a new priority.
282 remrunqueue(struct thread *td)
284 struct thread *td2, *td3;
288 mtx_assert(&sched_lock, MA_OWNED);
289 KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue"));
292 CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
295 * If it is not a threaded process, take the shortcut.
297 if ((td->td_proc->p_flag & P_HADTHREADS) == 0) {
298 /* remve from sys run queue and free up a slot */
302 td3 = TAILQ_PREV(td, threadqueue, td_runq);
303 TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
304 if (ke->ke_state == KES_ONRUNQ) {
306 * This thread has been assigned to the system run queue.
307 * We need to dissociate it and try assign the
308 * KSE to the next available thread. Then, we should
309 * see if we need to move the KSE in the run queues.
312 td2 = kg->kg_last_assigned;
313 KASSERT((td2 != NULL), ("last assigned has wrong value"));
315 kg->kg_last_assigned = td3;
316 /* slot_fill(kg); */ /* will replace it with another */
323 * Change the priority of a thread that is on the run queue.
326 adjustrunqueue( struct thread *td, int newpri)
333 mtx_assert(&sched_lock, MA_OWNED);
334 KASSERT((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue"));
337 CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td);
340 * If it is not a threaded process, take the shortcut.
342 if ((td->td_proc->p_flag & P_HADTHREADS) == 0) {
343 /* We only care about the kse in the run queue. */
344 td->td_priority = newpri;
346 if (ke->ke_rqindex != (newpri / RQ_PPQ))
348 if (ke->ke_rqindex != newpri)
352 sched_add(td, SRQ_BORING);
357 /* It is a threaded process */
359 if (ke->ke_state == KES_ONRUNQ
361 || ((ke->ke_flags & KEF_ASSIGNED) != 0 &&
362 (ke->ke_flags & KEF_REMOVED) == 0)
365 if (kg->kg_last_assigned == td) {
366 kg->kg_last_assigned =
367 TAILQ_PREV(td, threadqueue, td_runq);
371 TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
373 td->td_priority = newpri;
374 setrunqueue(td, SRQ_BORING);
376 /* We only care about the kse in the run queue. */
377 td->td_priority = newpri;
379 if (ke->ke_rqindex != (newpri / RQ_PPQ))
381 if (ke->ke_rqindex != newpri)
385 sched_add(td, SRQ_BORING);
392 * This function is called when a thread is about to be put on a
393 * ksegrp run queue because it has been made runnable or its
394 * priority has been adjusted and the ksegrp does not have a
395 * free kse slot. It determines if a thread from the same ksegrp
396 * should be preempted. If so, it tries to switch threads
397 * if the thread is on the same cpu or notifies another cpu that
398 * it should switch threads.
402 maybe_preempt_in_ksegrp(struct thread *td)
405 struct thread *running_thread;
407 mtx_assert(&sched_lock, MA_OWNED);
408 running_thread = curthread;
410 if (running_thread->td_ksegrp != td->td_ksegrp)
413 if (td->td_priority >= running_thread->td_priority)
416 #ifndef FULL_PREEMPTION
417 if (td->td_priority > PRI_MAX_ITHD) {
418 running_thread->td_flags |= TDF_NEEDRESCHED;
421 #endif /* FULL_PREEMPTION */
423 if (running_thread->td_critnest > 1)
424 running_thread->td_owepreempt = 1;
426 mi_switch(SW_INVOL, NULL);
428 #else /* PREEMPTION */
429 running_thread->td_flags |= TDF_NEEDRESCHED;
430 #endif /* PREEMPTION */
436 struct thread *running_thread;
439 cpumask_t cpumask,dontuse;
441 struct pcpu *best_pcpu;
442 struct thread *cputhread;
444 mtx_assert(&sched_lock, MA_OWNED);
446 running_thread = curthread;
448 #if !defined(KSEG_PEEMPT_BEST_CPU)
449 if (running_thread->td_ksegrp != td->td_ksegrp) {
453 /* if someone is ahead of this thread, wait our turn */
454 if (td != TAILQ_FIRST(&kg->kg_runq))
457 worst_pri = td->td_priority;
459 dontuse = stopped_cpus | idle_cpus_mask;
462 * Find a cpu with the worst priority that runs at thread from
463 * the same ksegrp - if multiple exist give first the last run
464 * cpu and then the current cpu priority
467 SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
468 cpumask = pc->pc_cpumask;
469 cputhread = pc->pc_curthread;
471 if ((cpumask & dontuse) ||
472 cputhread->td_ksegrp != kg)
475 if (cputhread->td_priority > worst_pri) {
476 worst_pri = cputhread->td_priority;
481 if (cputhread->td_priority == worst_pri &&
483 (td->td_lastcpu == pc->pc_cpuid ||
484 (PCPU_GET(cpumask) == cpumask &&
485 td->td_lastcpu != best_pcpu->pc_cpuid)))
489 /* Check if we need to preempt someone */
490 if (best_pcpu == NULL)
493 #if defined(IPI_PREEMPTION) && defined(PREEMPTION)
494 #if !defined(FULL_PREEMPTION)
495 if (td->td_priority <= PRI_MAX_ITHD)
496 #endif /* ! FULL_PREEMPTION */
498 ipi_selected(best_pcpu->pc_cpumask, IPI_PREEMPT);
501 #endif /* defined(IPI_PREEMPTION) && defined(PREEMPTION) */
503 if (PCPU_GET(cpuid) != best_pcpu->pc_cpuid) {
504 best_pcpu->pc_curthread->td_flags |= TDF_NEEDRESCHED;
505 ipi_selected(best_pcpu->pc_cpumask, IPI_AST);
508 #if !defined(KSEG_PEEMPT_BEST_CPU)
512 if (td->td_priority >= running_thread->td_priority)
516 #if !defined(FULL_PREEMPTION)
517 if (td->td_priority > PRI_MAX_ITHD) {
518 running_thread->td_flags |= TDF_NEEDRESCHED;
520 #endif /* ! FULL_PREEMPTION */
522 if (running_thread->td_critnest > 1)
523 running_thread->td_owepreempt = 1;
525 mi_switch(SW_INVOL, NULL);
527 #else /* PREEMPTION */
528 running_thread->td_flags |= TDF_NEEDRESCHED;
529 #endif /* PREEMPTION */
538 setrunqueue(struct thread *td, int flags)
545 CTR3(KTR_RUNQ, "setrunqueue: td:%p kg:%p pid:%d",
546 td, td->td_ksegrp, td->td_proc->p_pid);
548 CTR2(KTR_RUNQ, "setrunqueue: td:%p pid:%d",
549 td, td->td_proc->p_pid);
551 CTR5(KTR_SCHED, "setrunqueue: %p(%s) prio %d by %p(%s)",
552 td, td->td_proc->p_comm, td->td_priority, curthread,
553 curthread->td_proc->p_comm);
554 mtx_assert(&sched_lock, MA_OWNED);
555 KASSERT((td->td_inhibitors == 0),
556 ("setrunqueue: trying to run inhibitted thread"));
557 KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
558 ("setrunqueue: bad thread state"));
562 if ((td->td_proc->p_flag & P_HADTHREADS) == 0) {
564 * Common path optimisation: Only one of everything
565 * and the KSE is always already attached.
566 * Totally ignore the ksegrp run queue.
568 if (kg->kg_avail_opennings != 1) {
569 if (limitcount < 1) {
571 printf("pid %d: corrected slot count (%d->1)\n",
572 td->td_proc->p_pid, kg->kg_avail_opennings);
575 kg->kg_avail_opennings = 1;
577 sched_add(td, flags);
582 * If the concurrency has reduced, and we would go in the
583 * assigned section, then keep removing entries from the
584 * system run queue, until we are not in that section
585 * or there is room for us to be put in that section.
586 * What we MUST avoid is the case where there are threads of less
587 * priority than the new one scheduled, but it can not
588 * be scheduled itself. That would lead to a non contiguous set
589 * of scheduled threads, and everything would break.
591 tda = kg->kg_last_assigned;
592 while ((kg->kg_avail_opennings <= 0) &&
593 (tda && (tda->td_priority > td->td_priority))) {
595 * None free, but there is one we can commandeer.
598 "setrunqueue: kg:%p: take slot from td: %p", kg, tda);
600 tda = kg->kg_last_assigned =
601 TAILQ_PREV(tda, threadqueue, td_runq);
605 * Add the thread to the ksegrp's run queue at
606 * the appropriate place.
608 TAILQ_FOREACH(td2, &kg->kg_runq, td_runq) {
609 if (td2->td_priority > td->td_priority) {
610 TAILQ_INSERT_BEFORE(td2, td, td_runq);
615 /* We ran off the end of the TAILQ or it was empty. */
616 TAILQ_INSERT_TAIL(&kg->kg_runq, td, td_runq);
620 * If we have a slot to use, then put the thread on the system
621 * run queue and if needed, readjust the last_assigned pointer.
622 * it may be that we need to schedule something anyhow
623 * even if the availabel slots are -ve so that
624 * all the items < last_assigned are scheduled.
626 if (kg->kg_avail_opennings > 0) {
629 * No pre-existing last assigned so whoever is first
630 * gets the slot.. (maybe us)
632 td2 = TAILQ_FIRST(&kg->kg_runq);
633 kg->kg_last_assigned = td2;
634 } else if (tda->td_priority > td->td_priority) {
638 * We are past last_assigned, so
639 * give the next slot to whatever is next,
640 * which may or may not be us.
642 td2 = TAILQ_NEXT(tda, td_runq);
643 kg->kg_last_assigned = td2;
645 sched_add(td2, flags);
647 CTR3(KTR_RUNQ, "setrunqueue: held: td%p kg%p pid%d",
648 td, td->td_ksegrp, td->td_proc->p_pid);
649 if ((flags & SRQ_YIELDING) == 0)
650 maybe_preempt_in_ksegrp(td);
653 sched_add(td, flags);
658 * Kernel thread preemption implementation. Critical sections mark
659 * regions of code in which preemptions are not allowed.
668 CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td,
669 (long)td->td_proc->p_pid, td->td_proc->p_comm, td->td_critnest);
678 KASSERT(td->td_critnest != 0,
679 ("critical_exit: td_critnest == 0"));
681 if (td->td_critnest == 1) {
683 mtx_assert(&sched_lock, MA_NOTOWNED);
684 if (td->td_owepreempt) {
686 mtx_lock_spin(&sched_lock);
688 mi_switch(SW_INVOL, NULL);
689 mtx_unlock_spin(&sched_lock);
695 CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td,
696 (long)td->td_proc->p_pid, td->td_proc->p_comm, td->td_critnest);
700 * This function is called when a thread is about to be put on run queue
701 * because it has been made runnable or its priority has been adjusted. It
702 * determines if the new thread should be immediately preempted to. If so,
703 * it switches to it and eventually returns true. If not, it returns false
704 * so that the caller may place the thread on an appropriate run queue.
707 maybe_preempt(struct thread *td)
714 mtx_assert(&sched_lock, MA_OWNED);
717 * The new thread should not preempt the current thread if any of the
718 * following conditions are true:
720 * - The kernel is in the throes of crashing (panicstr).
721 * - The current thread has a higher (numerically lower) or
722 * equivalent priority. Note that this prevents curthread from
723 * trying to preempt to itself.
724 * - It is too early in the boot for context switches (cold is set).
725 * - The current thread has an inhibitor set or is in the process of
726 * exiting. In this case, the current thread is about to switch
727 * out anyways, so there's no point in preempting. If we did,
728 * the current thread would not be properly resumed as well, so
729 * just avoid that whole landmine.
730 * - If the new thread's priority is not a realtime priority and
731 * the current thread's priority is not an idle priority and
732 * FULL_PREEMPTION is disabled.
734 * If all of these conditions are false, but the current thread is in
735 * a nested critical section, then we have to defer the preemption
736 * until we exit the critical section. Otherwise, switch immediately
740 KASSERT ((ctd->td_kse != NULL && ctd->td_kse->ke_thread == ctd),
741 ("thread has no (or wrong) sched-private part."));
742 KASSERT((td->td_inhibitors == 0),
743 ("maybe_preempt: trying to run inhibitted thread"));
744 pri = td->td_priority;
745 cpri = ctd->td_priority;
746 if (panicstr != NULL || pri >= cpri || cold /* || dumping */ ||
747 TD_IS_INHIBITED(ctd) || td->td_kse->ke_state != KES_THREAD)
749 #ifndef FULL_PREEMPTION
750 if (pri > PRI_MAX_ITHD && cpri < PRI_MIN_IDLE)
754 if (ctd->td_critnest > 1) {
755 CTR1(KTR_PROC, "maybe_preempt: in critical section %d",
757 ctd->td_owepreempt = 1;
762 * Thread is runnable but not yet put on system run queue.
764 MPASS(TD_ON_RUNQ(td));
765 MPASS(td->td_sched->ke_state != KES_ONRUNQ);
767 if (td->td_proc->p_flag & P_HADTHREADS) {
769 * If this is a threaded process we actually ARE on the
770 * ksegrp run queue so take it off that first.
771 * Also undo any damage done to the last_assigned pointer.
772 * XXX Fix setrunqueue so this isn't needed
777 if (kg->kg_last_assigned == td)
778 kg->kg_last_assigned =
779 TAILQ_PREV(td, threadqueue, td_runq);
780 TAILQ_REMOVE(&kg->kg_runq, td, td_runq);
785 CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td,
786 td->td_proc->p_pid, td->td_proc->p_comm);
787 mi_switch(SW_INVOL|SW_PREEMPT, td);
796 /* XXX: There should be a non-static version of this. */
798 printf_caddr_t(void *data)
800 printf("%s", (char *)data);
802 static char preempt_warning[] =
803 "WARNING: Kernel preemption is disabled, expect reduced performance.\n";
804 SYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t,
809 /************************************************************************
810 * SYSTEM RUN QUEUE manipulations and tests *
811 ************************************************************************/
813 * Initialize a run structure.
816 runq_init(struct runq *rq)
820 bzero(rq, sizeof *rq);
821 for (i = 0; i < RQ_NQS; i++)
822 TAILQ_INIT(&rq->rq_queues[i]);
826 * Clear the status bit of the queue corresponding to priority level pri,
827 * indicating that it is empty.
830 runq_clrbit(struct runq *rq, int pri)
834 rqb = &rq->rq_status;
835 CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
836 rqb->rqb_bits[RQB_WORD(pri)],
837 rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
838 RQB_BIT(pri), RQB_WORD(pri));
839 rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
843 * Find the index of the first non-empty run queue. This is done by
844 * scanning the status bits, a set bit indicates a non-empty queue.
847 runq_findbit(struct runq *rq)
853 rqb = &rq->rq_status;
854 for (i = 0; i < RQB_LEN; i++)
855 if (rqb->rqb_bits[i]) {
856 pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
857 CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
858 rqb->rqb_bits[i], i, pri);
866 * Set the status bit of the queue corresponding to priority level pri,
867 * indicating that it is non-empty.
870 runq_setbit(struct runq *rq, int pri)
874 rqb = &rq->rq_status;
875 CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
876 rqb->rqb_bits[RQB_WORD(pri)],
877 rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
878 RQB_BIT(pri), RQB_WORD(pri));
879 rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
883 * Add the KSE to the queue specified by its priority, and set the
884 * corresponding status bit.
887 runq_add(struct runq *rq, struct kse *ke, int flags)
892 pri = ke->ke_thread->td_priority / RQ_PPQ;
893 ke->ke_rqindex = pri;
894 runq_setbit(rq, pri);
895 rqh = &rq->rq_queues[pri];
896 CTR5(KTR_RUNQ, "runq_add: td=%p ke=%p pri=%d %d rqh=%p",
897 ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh);
898 if (flags & SRQ_PREEMPTED) {
899 TAILQ_INSERT_HEAD(rqh, ke, ke_procq);
901 TAILQ_INSERT_TAIL(rqh, ke, ke_procq);
906 * Return true if there are runnable processes of any priority on the run
907 * queue, false otherwise. Has no side effects, does not modify the run
911 runq_check(struct runq *rq)
916 rqb = &rq->rq_status;
917 for (i = 0; i < RQB_LEN; i++)
918 if (rqb->rqb_bits[i]) {
919 CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
920 rqb->rqb_bits[i], i);
923 CTR0(KTR_RUNQ, "runq_check: empty");
928 #if defined(SMP) && defined(SCHED_4BSD)
930 SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, "");
934 * Find the highest priority process on the run queue.
937 runq_choose(struct runq *rq)
943 mtx_assert(&sched_lock, MA_OWNED);
944 while ((pri = runq_findbit(rq)) != -1) {
945 rqh = &rq->rq_queues[pri];
946 #if defined(SMP) && defined(SCHED_4BSD)
947 /* fuzz == 1 is normal.. 0 or less are ignored */
950 * In the first couple of entries, check if
951 * there is one for our CPU as a preference.
953 int count = runq_fuzz;
954 int cpu = PCPU_GET(cpuid);
956 ke2 = ke = TAILQ_FIRST(rqh);
958 while (count-- && ke2) {
959 if (ke->ke_thread->td_lastcpu == cpu) {
963 ke2 = TAILQ_NEXT(ke2, ke_procq);
967 ke = TAILQ_FIRST(rqh);
968 KASSERT(ke != NULL, ("runq_choose: no proc on busy queue"));
970 "runq_choose: pri=%d kse=%p rqh=%p", pri, ke, rqh);
973 CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
979 * Remove the KSE from the queue specified by its priority, and clear the
980 * corresponding status bit if the queue becomes empty.
981 * Caller must set ke->ke_state afterwards.
984 runq_remove(struct runq *rq, struct kse *ke)
990 KASSERT(ke->ke_proc->p_sflag & PS_INMEM,
992 KASSERT(ke->ke_thread->td_proc->p_sflag & PS_INMEM,
994 ("runq_remove: process swapped out"));
995 pri = ke->ke_rqindex;
996 rqh = &rq->rq_queues[pri];
997 CTR5(KTR_RUNQ, "runq_remove: td=%p, ke=%p pri=%d %d rqh=%p",
998 ke->ke_thread, ke, ke->ke_thread->td_priority, pri, rqh);
999 KASSERT(ke != NULL, ("runq_remove: no proc on busy queue"));
1000 TAILQ_REMOVE(rqh, ke, ke_procq);
1001 if (TAILQ_EMPTY(rqh)) {
1002 CTR0(KTR_RUNQ, "runq_remove: empty");
1003 runq_clrbit(rq, pri);
1007 /****** functions that are temporarily here ***********/
1009 extern struct mtx kse_zombie_lock;
1013 * Allocate scheduler specific per-process resources.
1014 * The thread and ksegrp have already been linked in.
1015 * In this case just set the default concurrency value.
1018 * proc_init() (UMA init method)
1021 sched_newproc(struct proc *p, struct ksegrp *kg, struct thread *td)
1024 /* This can go in sched_fork */
1025 sched_init_concurrency(kg);
1030 * thread is being either created or recycled.
1031 * Fix up the per-scheduler resources associated with it.
1033 * sched_fork_thread()
1034 * thread_dtor() (*may go away)
1035 * thread_init() (*may go away)
1038 sched_newthread(struct thread *td)
1040 struct td_sched *ke;
1042 ke = (struct td_sched *) (td + 1);
1043 bzero(ke, sizeof(*ke));
1046 ke->ke_state = KES_THREAD;
1051 * Set up an initial concurrency of 1
1052 * and set the given thread (if given) to be using that
1054 * May be used "offline"..before the ksegrp is attached to the world
1055 * and thus wouldn't need schedlock in that case.
1058 * proc_init() (UMA) via sched_newproc()
1061 sched_init_concurrency(struct ksegrp *kg)
1064 CTR1(KTR_RUNQ,"kg %p init slots and concurrency to 1", kg);
1065 kg->kg_concurrency = 1;
1066 kg->kg_avail_opennings = 1;
1070 * Change the concurrency of an existing ksegrp to N
1078 sched_set_concurrency(struct ksegrp *kg, int concurrency)
1081 CTR4(KTR_RUNQ,"kg %p set concurrency to %d, slots %d -> %d",
1084 kg->kg_avail_opennings,
1085 kg->kg_avail_opennings + (concurrency - kg->kg_concurrency));
1086 kg->kg_avail_opennings += (concurrency - kg->kg_concurrency);
1087 kg->kg_concurrency = concurrency;
1091 * Called from thread_exit() for all exiting thread
1093 * Not to be confused with sched_exit_thread()
1094 * that is only called from thread_exit() for threads exiting
1095 * without the rest of the process exiting because it is also called from
1096 * sched_exit() and we wouldn't want to call it twice.
1097 * XXX This can probably be fixed.
1100 sched_thread_exit(struct thread *td)
1103 SLOT_RELEASE(td->td_ksegrp);
1104 slot_fill(td->td_ksegrp);
1108 #endif /* KERN_SWITCH_INCLUDE */