sys/kern/kern_switch.c

   1 /*-
   2  * Copyright (c) 2001 Jake Burkholder <jake@FreeBSD.org>
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24  * SUCH DAMAGE.
  25  */
  26
  27
  28 #include <sys/cdefs.h>
  29 __FBSDID("$FreeBSD$");
  30
  31 #include "opt_sched.h"
  32
  33 #ifndef KERN_SWITCH_INCLUDE
  34 #include <sys/param.h>
  35 #include <sys/systm.h>
  36 #include <sys/kdb.h>
  37 #include <sys/kernel.h>
  38 #include <sys/ktr.h>
  39 #include <sys/lock.h>
  40 #include <sys/mutex.h>
  41 #include <sys/proc.h>
  42 #include <sys/queue.h>
  43 #include <sys/sched.h>
  44 #else  /* KERN_SWITCH_INCLUDE */
  45 #if defined(SMP) && (defined(__i386__) || defined(__amd64__))
  46 #include <sys/smp.h>
  47 #endif
  48 #if defined(SMP) && defined(SCHED_4BSD)
  49 #include <sys/sysctl.h>
  50 #endif
  51
  52 /* Uncomment this to enable logging of critical_enter/exit. */
  53 #if 0
  54 #define KTR_CRITICAL    KTR_SCHED
  55 #else
  56 #define KTR_CRITICAL    0
  57 #endif
  58
  59 #ifdef FULL_PREEMPTION
  60 #ifndef PREEMPTION
  61 #error "The FULL_PREEMPTION option requires the PREEMPTION option"
  62 #endif
  63 #endif
  64
  65 CTASSERT((RQB_BPW * RQB_LEN) == RQ_NQS);
  66
  67 /*
  68  * kern.sched.preemption allows user space to determine if preemption support
  69  * is compiled in or not.  It is not currently a boot or runtime flag that
  70  * can be changed.
  71  */
  72 #ifdef PREEMPTION
  73 static int kern_sched_preemption = 1;
  74 #else
  75 static int kern_sched_preemption = 0;
  76 #endif
  77 SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD,
  78     &kern_sched_preemption, 0, "Kernel preemption enabled");
  79
  80 /************************************************************************
  81  * Functions that manipulate runnability from a thread perspective.     *
  82  ************************************************************************/
  83 /*
  84  * Select the thread that will be run next.
  85  */
  86 struct thread *
  87 choosethread(void)
  88 {
  89         struct td_sched *ts;
  90         struct thread *td;
  91
  92 #if defined(SMP) && (defined(__i386__) || defined(__amd64__))
  93         if (smp_active == 0 && PCPU_GET(cpuid) != 0) {
  94                 /* Shutting down, run idlethread on AP's */
  95                 td = PCPU_GET(idlethread);
  96                 ts = td->td_sched;
  97                 CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
  98                 ts->ts_flags |= TSF_DIDRUN;
  99                 TD_SET_RUNNING(td);
 100                 return (td);
 101         }
 102 #endif
 103
 104 retry:
 105         ts = sched_choose();
 106         if (ts) {
 107                 td = ts->ts_thread;
 108                 CTR2(KTR_RUNQ, "choosethread: td=%p pri=%d",
 109                     td, td->td_priority);
 110         } else {
 111                 /* Simulate runq_choose() having returned the idle thread */
 112                 td = PCPU_GET(idlethread);
 113                 ts = td->td_sched;
 114                 CTR1(KTR_RUNQ, "choosethread: td=%p (idle)", td);
 115         }
 116         ts->ts_flags |= TSF_DIDRUN;
 117
 118         /*
 119          * If we are in panic, only allow system threads,
 120          * plus the one we are running in, to be run.
 121          */
 122         if (panicstr && ((td->td_proc->p_flag & P_SYSTEM) == 0 &&
 123             (td->td_flags & TDF_INPANIC) == 0)) {
 124                 /* note that it is no longer on the run queue */
 125                 TD_SET_CAN_RUN(td);
 126                 goto retry;
 127         }
 128
 129         TD_SET_RUNNING(td);
 130         return (td);
 131 }
 132
 133
 134 #if 0
 135 /*
 136  * currently not used.. threads remove themselves from the
 137  * run queue by running.
 138  */
 139 static void
 140 remrunqueue(struct thread *td)
 141 {
 142         mtx_assert(&sched_lock, MA_OWNED);
 143         KASSERT((TD_ON_RUNQ(td)), ("remrunqueue: Bad state on run queue"));
 144         CTR1(KTR_RUNQ, "remrunqueue: td%p", td);
 145         TD_SET_CAN_RUN(td);
 146         /* remove from sys run queue */
 147         sched_rem(td);
 148         return;
 149 }
 150 #endif
 151
 152 /*
 153  * Change the priority of a thread that is on the run queue.
 154  */
 155 void
 156 adjustrunqueue( struct thread *td, int newpri)
 157 {
 158         struct td_sched *ts;
 159
 160         mtx_assert(&sched_lock, MA_OWNED);
 161         KASSERT((TD_ON_RUNQ(td)), ("adjustrunqueue: Bad state on run queue"));
 162
 163         ts = td->td_sched;
 164         CTR1(KTR_RUNQ, "adjustrunqueue: td%p", td);
 165                 /* We only care about the td_sched in the run queue. */
 166         td->td_priority = newpri;
 167 #ifndef SCHED_CORE
 168         if (ts->ts_rqindex != (newpri / RQ_PPQ))
 169 #else
 170         if (ts->ts_rqindex != newpri)
 171 #endif
 172         {
 173                 sched_rem(td);
 174                 sched_add(td, SRQ_BORING);
 175         }
 176 }
 177
 178 void
 179 setrunqueue(struct thread *td, int flags)
 180 {
 181
 182         CTR2(KTR_RUNQ, "setrunqueue: td:%p pid:%d",
 183             td, td->td_proc->p_pid);
 184         CTR5(KTR_SCHED, "setrunqueue: %p(%s) prio %d by %p(%s)",
 185             td, td->td_proc->p_comm, td->td_priority, curthread,
 186             curthread->td_proc->p_comm);
 187         mtx_assert(&sched_lock, MA_OWNED);
 188         KASSERT((td->td_inhibitors == 0),
 189                         ("setrunqueue: trying to run inhibitted thread"));
 190         KASSERT((TD_CAN_RUN(td) || TD_IS_RUNNING(td)),
 191             ("setrunqueue: bad thread state"));
 192         TD_SET_RUNQ(td);
 193         sched_add(td, flags);
 194 }
 195
 196 /*
 197  * Kernel thread preemption implementation.  Critical sections mark
 198  * regions of code in which preemptions are not allowed.
 199  */
 200 void
 201 critical_enter(void)
 202 {
 203         struct thread *td;
 204
 205         td = curthread;
 206         td->td_critnest++;
 207         CTR4(KTR_CRITICAL, "critical_enter by thread %p (%ld, %s) to %d", td,
 208             (long)td->td_proc->p_pid, td->td_proc->p_comm, td->td_critnest);
 209 }
 210
 211 void
 212 critical_exit(void)
 213 {
 214         struct thread *td;
 215
 216         td = curthread;
 217         KASSERT(td->td_critnest != 0,
 218             ("critical_exit: td_critnest == 0"));
 219 #ifdef PREEMPTION
 220         if (td->td_critnest == 1) {
 221                 td->td_critnest = 0;
 222                 mtx_assert(&sched_lock, MA_NOTOWNED);
 223                 if (td->td_owepreempt) {
 224                         td->td_critnest = 1;
 225                         mtx_lock_spin(&sched_lock);
 226                         td->td_critnest--;
 227                         mi_switch(SW_INVOL, NULL);
 228                         mtx_unlock_spin(&sched_lock);
 229                 }
 230         } else
 231 #endif
 232                 td->td_critnest--;
 233
 234         CTR4(KTR_CRITICAL, "critical_exit by thread %p (%ld, %s) to %d", td,
 235             (long)td->td_proc->p_pid, td->td_proc->p_comm, td->td_critnest);
 236 }
 237
 238 /*
 239  * This function is called when a thread is about to be put on run queue
 240  * because it has been made runnable or its priority has been adjusted.  It
 241  * determines if the new thread should be immediately preempted to.  If so,
 242  * it switches to it and eventually returns true.  If not, it returns false
 243  * so that the caller may place the thread on an appropriate run queue.
 244  */
 245 int
 246 maybe_preempt(struct thread *td)
 247 {
 248 #ifdef PREEMPTION
 249         struct thread *ctd;
 250         int cpri, pri;
 251 #endif
 252
 253         mtx_assert(&sched_lock, MA_OWNED);
 254 #ifdef PREEMPTION
 255         /*
 256          * The new thread should not preempt the current thread if any of the
 257          * following conditions are true:
 258          *
 259          *  - The kernel is in the throes of crashing (panicstr).
 260          *  - The current thread has a higher (numerically lower) or
 261          *    equivalent priority.  Note that this prevents curthread from
 262          *    trying to preempt to itself.
 263          *  - It is too early in the boot for context switches (cold is set).
 264          *  - The current thread has an inhibitor set or is in the process of
 265          *    exiting.  In this case, the current thread is about to switch
 266          *    out anyways, so there's no point in preempting.  If we did,
 267          *    the current thread would not be properly resumed as well, so
 268          *    just avoid that whole landmine.
 269          *  - If the new thread's priority is not a realtime priority and
 270          *    the current thread's priority is not an idle priority and
 271          *    FULL_PREEMPTION is disabled.
 272          *
 273          * If all of these conditions are false, but the current thread is in
 274          * a nested critical section, then we have to defer the preemption
 275          * until we exit the critical section.  Otherwise, switch immediately
 276          * to the new thread.
 277          */
 278         ctd = curthread;
 279         KASSERT ((ctd->td_sched != NULL && ctd->td_sched->ts_thread == ctd),
 280           ("thread has no (or wrong) sched-private part."));
 281         KASSERT((td->td_inhibitors == 0),
 282                         ("maybe_preempt: trying to run inhibitted thread"));
 283         pri = td->td_priority;
 284         cpri = ctd->td_priority;
 285         if (panicstr != NULL || pri >= cpri || cold /* || dumping */ ||
 286             TD_IS_INHIBITED(ctd) || td->td_sched->ts_state != TSS_THREAD)
 287                 return (0);
 288 #ifndef FULL_PREEMPTION
 289         if (pri > PRI_MAX_ITHD && cpri < PRI_MIN_IDLE)
 290                 return (0);
 291 #endif
 292
 293         if (ctd->td_critnest > 1) {
 294                 CTR1(KTR_PROC, "maybe_preempt: in critical section %d",
 295                     ctd->td_critnest);
 296                 ctd->td_owepreempt = 1;
 297                 return (0);
 298         }
 299
 300         /*
 301          * Thread is runnable but not yet put on system run queue.
 302          */
 303         MPASS(TD_ON_RUNQ(td));
 304         MPASS(td->td_sched->ts_state != TSS_ONRUNQ);
 305         TD_SET_RUNNING(td);
 306         CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td,
 307             td->td_proc->p_pid, td->td_proc->p_comm);
 308         mi_switch(SW_INVOL|SW_PREEMPT, td);
 309         return (1);
 310 #else
 311         return (0);
 312 #endif
 313 }
 314
 315 #if 0
 316 #ifndef PREEMPTION
 317 /* XXX: There should be a non-static version of this. */
 318 static void
 319 printf_caddr_t(void *data)
 320 {
 321         printf("%s", (char *)data);
 322 }
 323 static char preempt_warning[] =
 324     "WARNING: Kernel preemption is disabled, expect reduced performance.\n";
 325 SYSINIT(preempt_warning, SI_SUB_COPYRIGHT, SI_ORDER_ANY, printf_caddr_t,
 326     preempt_warning)
 327 #endif
 328 #endif
 329
 330 /************************************************************************
 331  * SYSTEM RUN QUEUE manipulations and tests                             *
 332  ************************************************************************/
 333 /*
 334  * Initialize a run structure.
 335  */
 336 void
 337 runq_init(struct runq *rq)
 338 {
 339         int i;
 340
 341         bzero(rq, sizeof *rq);
 342         for (i = 0; i < RQ_NQS; i++)
 343                 TAILQ_INIT(&rq->rq_queues[i]);
 344 }
 345
 346 /*
 347  * Clear the status bit of the queue corresponding to priority level pri,
 348  * indicating that it is empty.
 349  */
 350 static __inline void
 351 runq_clrbit(struct runq *rq, int pri)
 352 {
 353         struct rqbits *rqb;
 354
 355         rqb = &rq->rq_status;
 356         CTR4(KTR_RUNQ, "runq_clrbit: bits=%#x %#x bit=%#x word=%d",
 357             rqb->rqb_bits[RQB_WORD(pri)],
 358             rqb->rqb_bits[RQB_WORD(pri)] & ~RQB_BIT(pri),
 359             RQB_BIT(pri), RQB_WORD(pri));
 360         rqb->rqb_bits[RQB_WORD(pri)] &= ~RQB_BIT(pri);
 361 }
 362
 363 /*
 364  * Find the index of the first non-empty run queue.  This is done by
 365  * scanning the status bits, a set bit indicates a non-empty queue.
 366  */
 367 static __inline int
 368 runq_findbit(struct runq *rq)
 369 {
 370         struct rqbits *rqb;
 371         int pri;
 372         int i;
 373
 374         rqb = &rq->rq_status;
 375         for (i = 0; i < RQB_LEN; i++)
 376                 if (rqb->rqb_bits[i]) {
 377                         pri = RQB_FFS(rqb->rqb_bits[i]) + (i << RQB_L2BPW);
 378                         CTR3(KTR_RUNQ, "runq_findbit: bits=%#x i=%d pri=%d",
 379                             rqb->rqb_bits[i], i, pri);
 380                         return (pri);
 381                 }
 382
 383         return (-1);
 384 }
 385
 386 /*
 387  * Set the status bit of the queue corresponding to priority level pri,
 388  * indicating that it is non-empty.
 389  */
 390 static __inline void
 391 runq_setbit(struct runq *rq, int pri)
 392 {
 393         struct rqbits *rqb;
 394
 395         rqb = &rq->rq_status;
 396         CTR4(KTR_RUNQ, "runq_setbit: bits=%#x %#x bit=%#x word=%d",
 397             rqb->rqb_bits[RQB_WORD(pri)],
 398             rqb->rqb_bits[RQB_WORD(pri)] | RQB_BIT(pri),
 399             RQB_BIT(pri), RQB_WORD(pri));
 400         rqb->rqb_bits[RQB_WORD(pri)] |= RQB_BIT(pri);
 401 }
 402
 403 /*
 404  * Add the thread to the queue specified by its priority, and set the
 405  * corresponding status bit.
 406  */
 407 void
 408 runq_add(struct runq *rq, struct td_sched *ts, int flags)
 409 {
 410         struct rqhead *rqh;
 411         int pri;
 412
 413         pri = ts->ts_thread->td_priority / RQ_PPQ;
 414         ts->ts_rqindex = pri;
 415         runq_setbit(rq, pri);
 416         rqh = &rq->rq_queues[pri];
 417         CTR5(KTR_RUNQ, "runq_add: td=%p ts=%p pri=%d %d rqh=%p",
 418             ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh);
 419         if (flags & SRQ_PREEMPTED) {
 420                 TAILQ_INSERT_HEAD(rqh, ts, ts_procq);
 421         } else {
 422                 TAILQ_INSERT_TAIL(rqh, ts, ts_procq);
 423         }
 424 }
 425
 426 /*
 427  * Return true if there are runnable processes of any priority on the run
 428  * queue, false otherwise.  Has no side effects, does not modify the run
 429  * queue structure.
 430  */
 431 int
 432 runq_check(struct runq *rq)
 433 {
 434         struct rqbits *rqb;
 435         int i;
 436
 437         rqb = &rq->rq_status;
 438         for (i = 0; i < RQB_LEN; i++)
 439                 if (rqb->rqb_bits[i]) {
 440                         CTR2(KTR_RUNQ, "runq_check: bits=%#x i=%d",
 441                             rqb->rqb_bits[i], i);
 442                         return (1);
 443                 }
 444         CTR0(KTR_RUNQ, "runq_check: empty");
 445
 446         return (0);
 447 }
 448
 449 #if defined(SMP) && defined(SCHED_4BSD)
 450 int runq_fuzz = 1;
 451 SYSCTL_INT(_kern_sched, OID_AUTO, runq_fuzz, CTLFLAG_RW, &runq_fuzz, 0, "");
 452 #endif
 453
 454 /*
 455  * Find the highest priority process on the run queue.
 456  */
 457 struct td_sched *
 458 runq_choose(struct runq *rq)
 459 {
 460         struct rqhead *rqh;
 461         struct td_sched *ts;
 462         int pri;
 463
 464         mtx_assert(&sched_lock, MA_OWNED);
 465         while ((pri = runq_findbit(rq)) != -1) {
 466                 rqh = &rq->rq_queues[pri];
 467 #if defined(SMP) && defined(SCHED_4BSD)
 468                 /* fuzz == 1 is normal.. 0 or less are ignored */
 469                 if (runq_fuzz > 1) {
 470                         /*
 471                          * In the first couple of entries, check if
 472                          * there is one for our CPU as a preference.
 473                          */
 474                         int count = runq_fuzz;
 475                         int cpu = PCPU_GET(cpuid);
 476                         struct td_sched *ts2;
 477                         ts2 = ts = TAILQ_FIRST(rqh);
 478
 479                         while (count-- && ts2) {
 480                                 if (ts->ts_thread->td_lastcpu == cpu) {
 481                                         ts = ts2;
 482                                         break;
 483                                 }
 484                                 ts2 = TAILQ_NEXT(ts2, ts_procq);
 485                         }
 486                 } else
 487 #endif
 488                         ts = TAILQ_FIRST(rqh);
 489                 KASSERT(ts != NULL, ("runq_choose: no proc on busy queue"));
 490                 CTR3(KTR_RUNQ,
 491                     "runq_choose: pri=%d td_sched=%p rqh=%p", pri, ts, rqh);
 492                 return (ts);
 493         }
 494         CTR1(KTR_RUNQ, "runq_choose: idleproc pri=%d", pri);
 495
 496         return (NULL);
 497 }
 498
 499 /*
 500  * Remove the thread from the queue specified by its priority, and clear the
 501  * corresponding status bit if the queue becomes empty.
 502  * Caller must set ts->ts_state afterwards.
 503  */
 504 void
 505 runq_remove(struct runq *rq, struct td_sched *ts)
 506 {
 507         struct rqhead *rqh;
 508         int pri;
 509
 510         KASSERT(ts->ts_thread->td_proc->p_sflag & PS_INMEM,
 511                 ("runq_remove: process swapped out"));
 512         pri = ts->ts_rqindex;
 513         rqh = &rq->rq_queues[pri];
 514         CTR5(KTR_RUNQ, "runq_remove: td=%p, ts=%p pri=%d %d rqh=%p",
 515             ts->ts_thread, ts, ts->ts_thread->td_priority, pri, rqh);
 516         KASSERT(ts != NULL, ("runq_remove: no proc on busy queue"));
 517         TAILQ_REMOVE(rqh, ts, ts_procq);
 518         if (TAILQ_EMPTY(rqh)) {
 519                 CTR0(KTR_RUNQ, "runq_remove: empty");
 520                 runq_clrbit(rq, pri);
 521         }
 522 }
 523
 524 /****** functions that are temporarily here ***********/
 525 #include <vm/uma.h>
 526 extern struct mtx kse_zombie_lock;
 527
 528 /*
 529  *  Allocate scheduler specific per-process resources.
 530  * The thread and proc have already been linked in.
 531  *
 532  * Called from:
 533  *  proc_init() (UMA init method)
 534  */
 535 void
 536 sched_newproc(struct proc *p, struct thread *td)
 537 {
 538 }
 539
 540 /*
 541  * thread is being either created or recycled.
 542  * Fix up the per-scheduler resources associated with it.
 543  * Called from:
 544  *  sched_fork_thread()
 545  *  thread_dtor()  (*may go away)
 546  *  thread_init()  (*may go away)
 547  */
 548 void
 549 sched_newthread(struct thread *td)
 550 {
 551         struct td_sched *ts;
 552
 553         ts = (struct td_sched *) (td + 1);
 554         bzero(ts, sizeof(*ts));
 555         td->td_sched     = ts;
 556         ts->ts_thread   = td;
 557         ts->ts_state    = TSS_THREAD;
 558 }
 559
 560 /*
 561  * Called from:
 562  *  thr_create()
 563  *  proc_init() (UMA) via sched_newproc()
 564  */
 565 void
 566 sched_init_concurrency(struct proc *p)
 567 {
 568 }
 569
 570 /*
 571  * Change the concurrency of an existing proc to N
 572  * Called from:
 573  *  kse_create()
 574  *  kse_exit()
 575  *  thread_exit()
 576  *  thread_single()
 577  */
 578 void
 579 sched_set_concurrency(struct proc *p, int concurrency)
 580 {
 581 }
 582
 583 /*
 584  * Called from thread_exit() for all exiting thread
 585  *
 586  * Not to be confused with sched_exit_thread()
 587  * that is only called from thread_exit() for threads exiting
 588  * without the rest of the process exiting because it is also called from
 589  * sched_exit() and we wouldn't want to call it twice.
 590  * XXX This can probably be fixed.
 591  */
 592 void
 593 sched_thread_exit(struct thread *td)
 594 {
 595 }
 596
 597 #endif /* KERN_SWITCH_INCLUDE */