sys/kern/subr_sleepqueue.c

   1 /*-
   2  * Copyright (c) 2004 John Baldwin <jhb@FreeBSD.org>
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 3. Neither the name of the author nor the names of any co-contributors
  14  *    may be used to endorse or promote products derived from this software
  15  *    without specific prior written permission.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27  * SUCH DAMAGE.
  28  */
  29
  30 /*
  31  * Implementation of sleep queues used to hold queue of threads blocked on
  32  * a wait channel.  Sleep queues different from turnstiles in that wait
  33  * channels are not owned by anyone, so there is no priority propagation.
  34  * Sleep queues can also provide a timeout and can also be interrupted by
  35  * signals.  That said, there are several similarities between the turnstile
  36  * and sleep queue implementations.  (Note: turnstiles were implemented
  37  * first.)  For example, both use a hash table of the same size where each
  38  * bucket is referred to as a "chain" that contains both a spin lock and
  39  * a linked list of queues.  An individual queue is located by using a hash
  40  * to pick a chain, locking the chain, and then walking the chain searching
  41  * for the queue.  This means that a wait channel object does not need to
  42  * embed it's queue head just as locks do not embed their turnstile queue
  43  * head.  Threads also carry around a sleep queue that they lend to the
  44  * wait channel when blocking.  Just as in turnstiles, the queue includes
  45  * a free list of the sleep queues of other threads blocked on the same
  46  * wait channel in the case of multiple waiters.
  47  *
  48  * Some additional functionality provided by sleep queues include the
  49  * ability to set a timeout.  The timeout is managed using a per-thread
  50  * callout that resumes a thread if it is asleep.  A thread may also
  51  * catch signals while it is asleep (aka an interruptible sleep).  The
  52  * signal code uses sleepq_abort() to interrupt a sleeping thread.  Finally,
  53  * sleep queues also provide some extra assertions.  One is not allowed to
  54  * mix the sleep/wakeup and cv APIs for a given wait channel.  Also, one
  55  * must consistently use the same lock to synchronize with a wait channel,
  56  * though this check is currently only a warning for sleep/wakeup due to
  57  * pre-existing abuse of that API.  The same lock must also be held when
  58  * awakening threads, though that is currently only enforced for condition
  59  * variables.
  60  */
  61
  62 #include <sys/cdefs.h>
  63 __FBSDID("$FreeBSD$");
  64
  65 #include "opt_sleepqueue_profiling.h"
  66 #include "opt_ddb.h"
  67
  68 #include <sys/param.h>
  69 #include <sys/systm.h>
  70 #include <sys/lock.h>
  71 #include <sys/kernel.h>
  72 #include <sys/ktr.h>
  73 #include <sys/malloc.h>
  74 #include <sys/mutex.h>
  75 #include <sys/proc.h>
  76 #include <sys/sched.h>
  77 #include <sys/signalvar.h>
  78 #include <sys/sleepqueue.h>
  79 #include <sys/sysctl.h>
  80
  81 #ifdef DDB
  82 #include <ddb/ddb.h>
  83 #endif
  84
  85 /*
  86  * Constants for the hash table of sleep queue chains.  These constants are
  87  * the same ones that 4BSD (and possibly earlier versions of BSD) used.
  88  * Basically, we ignore the lower 8 bits of the address since most wait
  89  * channel pointers are aligned and only look at the next 7 bits for the
  90  * hash.  SC_TABLESIZE must be a power of two for SC_MASK to work properly.
  91  */
  92 #define SC_TABLESIZE    128                     /* Must be power of 2. */
  93 #define SC_MASK         (SC_TABLESIZE - 1)
  94 #define SC_SHIFT        8
  95 #define SC_HASH(wc)     (((uintptr_t)(wc) >> SC_SHIFT) & SC_MASK)
  96 #define SC_LOOKUP(wc)   &sleepq_chains[SC_HASH(wc)]
  97 #define NR_SLEEPQS      2
  98 /*
  99  * There two different lists of sleep queues.  Both lists are connected
 100  * via the sq_hash entries.  The first list is the sleep queue chain list
 101  * that a sleep queue is on when it is attached to a wait channel.  The
 102  * second list is the free list hung off of a sleep queue that is attached
 103  * to a wait channel.
 104  *
 105  * Each sleep queue also contains the wait channel it is attached to, the
 106  * list of threads blocked on that wait channel, flags specific to the
 107  * wait channel, and the lock used to synchronize with a wait channel.
 108  * The flags are used to catch mismatches between the various consumers
 109  * of the sleep queue API (e.g. sleep/wakeup and condition variables).
 110  * The lock pointer is only used when invariants are enabled for various
 111  * debugging checks.
 112  *
 113  * Locking key:
 114  *  c - sleep queue chain lock
 115  */
 116 struct sleepqueue {
 117         TAILQ_HEAD(, thread) sq_blocked[NR_SLEEPQS];    /* (c) Blocked threads. */
 118         LIST_ENTRY(sleepqueue) sq_hash;         /* (c) Chain and free list. */
 119         LIST_HEAD(, sleepqueue) sq_free;        /* (c) Free queues. */
 120         void    *sq_wchan;                      /* (c) Wait channel. */
 121 #ifdef INVARIANTS
 122         int     sq_type;                        /* (c) Queue type. */
 123         struct lock_object *sq_lock;            /* (c) Associated lock. */
 124 #endif
 125 };
 126
 127 struct sleepqueue_chain {
 128         LIST_HEAD(, sleepqueue) sc_queues;      /* List of sleep queues. */
 129         struct mtx sc_lock;                     /* Spin lock for this chain. */
 130 #ifdef SLEEPQUEUE_PROFILING
 131         u_int   sc_depth;                       /* Length of sc_queues. */
 132         u_int   sc_max_depth;                   /* Max length of sc_queues. */
 133 #endif
 134 };
 135
 136 #ifdef SLEEPQUEUE_PROFILING
 137 u_int sleepq_max_depth;
 138 SYSCTL_NODE(_debug, OID_AUTO, sleepq, CTLFLAG_RD, 0, "sleepq profiling");
 139 SYSCTL_NODE(_debug_sleepq, OID_AUTO, chains, CTLFLAG_RD, 0,
 140     "sleepq chain stats");
 141 SYSCTL_UINT(_debug_sleepq, OID_AUTO, max_depth, CTLFLAG_RD, &sleepq_max_depth,
 142     0, "maxmimum depth achieved of a single chain");
 143 #endif
 144 static struct sleepqueue_chain sleepq_chains[SC_TABLESIZE];
 145
 146 static MALLOC_DEFINE(M_SLEEPQUEUE, "sleepqueue", "sleep queues");
 147
 148 /*
 149  * Prototypes for non-exported routines.
 150  */
 151 static int      sleepq_catch_signals(void *wchan);
 152 static int      sleepq_check_signals(void);
 153 static int      sleepq_check_timeout(void);
 154 static void     sleepq_switch(void *wchan);
 155 static void     sleepq_timeout(void *arg);
 156 static void     sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri);
 157
 158 /*
 159  * Early initialization of sleep queues that is called from the sleepinit()
 160  * SYSINIT.
 161  */
 162 void
 163 init_sleepqueues(void)
 164 {
 165 #ifdef SLEEPQUEUE_PROFILING
 166         struct sysctl_oid *chain_oid;
 167         char chain_name[10];
 168 #endif
 169         int i;
 170
 171         for (i = 0; i < SC_TABLESIZE; i++) {
 172                 LIST_INIT(&sleepq_chains[i].sc_queues);
 173                 mtx_init(&sleepq_chains[i].sc_lock, "sleepq chain", NULL,
 174                     MTX_SPIN);
 175 #ifdef SLEEPQUEUE_PROFILING
 176                 snprintf(chain_name, sizeof(chain_name), "%d", i);
 177                 chain_oid = SYSCTL_ADD_NODE(NULL,
 178                     SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO,
 179                     chain_name, CTLFLAG_RD, NULL, "sleepq chain stats");
 180                 SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
 181                     "depth", CTLFLAG_RD, &sleepq_chains[i].sc_depth, 0, NULL);
 182                 SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO,
 183                     "max_depth", CTLFLAG_RD, &sleepq_chains[i].sc_max_depth, 0,
 184                     NULL);
 185 #endif
 186         }
 187         thread0.td_sleepqueue = sleepq_alloc();
 188 }
 189
 190 /*
 191  * Malloc and initialize a new sleep queue for a new thread.
 192  */
 193 struct sleepqueue *
 194 sleepq_alloc(void)
 195 {
 196         struct sleepqueue *sq;
 197         int i;
 198
 199         sq = malloc(sizeof(struct sleepqueue), M_SLEEPQUEUE, M_WAITOK | M_ZERO);
 200         for (i = 0; i < NR_SLEEPQS; i++)
 201                 TAILQ_INIT(&sq->sq_blocked[i]);
 202         LIST_INIT(&sq->sq_free);
 203         return (sq);
 204 }
 205
 206 /*
 207  * Free a sleep queue when a thread is destroyed.
 208  */
 209 void
 210 sleepq_free(struct sleepqueue *sq)
 211 {
 212         int i;
 213
 214         MPASS(sq != NULL);
 215         for (i = 0; i < NR_SLEEPQS; i++)
 216                 MPASS(TAILQ_EMPTY(&sq->sq_blocked[i]));
 217         free(sq, M_SLEEPQUEUE);
 218 }
 219
 220 /*
 221  * Lock the sleep queue chain associated with the specified wait channel.
 222  */
 223 void
 224 sleepq_lock(void *wchan)
 225 {
 226         struct sleepqueue_chain *sc;
 227
 228         sc = SC_LOOKUP(wchan);
 229         mtx_lock_spin(&sc->sc_lock);
 230 }
 231
 232 /*
 233  * Look up the sleep queue associated with a given wait channel in the hash
 234  * table locking the associated sleep queue chain.  If no queue is found in
 235  * the table, NULL is returned.
 236  */
 237 struct sleepqueue *
 238 sleepq_lookup(void *wchan)
 239 {
 240         struct sleepqueue_chain *sc;
 241         struct sleepqueue *sq;
 242
 243         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 244         sc = SC_LOOKUP(wchan);
 245         mtx_assert(&sc->sc_lock, MA_OWNED);
 246         LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
 247                 if (sq->sq_wchan == wchan)
 248                         return (sq);
 249         return (NULL);
 250 }
 251
 252 /*
 253  * Unlock the sleep queue chain associated with a given wait channel.
 254  */
 255 void
 256 sleepq_release(void *wchan)
 257 {
 258         struct sleepqueue_chain *sc;
 259
 260         sc = SC_LOOKUP(wchan);
 261         mtx_unlock_spin(&sc->sc_lock);
 262 }
 263
 264 /*
 265  * Places the current thread on the sleep queue for the specified wait
 266  * channel.  If INVARIANTS is enabled, then it associates the passed in
 267  * lock with the sleepq to make sure it is held when that sleep queue is
 268  * woken up.
 269  */
 270 void
 271 sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags,
 272     int queue)
 273 {
 274         struct sleepqueue_chain *sc;
 275         struct sleepqueue *sq;
 276         struct thread *td;
 277
 278         td = curthread;
 279         sc = SC_LOOKUP(wchan);
 280         mtx_assert(&sc->sc_lock, MA_OWNED);
 281         MPASS(td->td_sleepqueue != NULL);
 282         MPASS(wchan != NULL);
 283         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 284
 285         /* If this thread is not allowed to sleep, die a horrible death. */
 286         KASSERT(!(td->td_pflags & TDP_NOSLEEPING),
 287             ("Trying sleep, but thread marked as sleeping prohibited"));
 288
 289         /* Look up the sleep queue associated with the wait channel 'wchan'. */
 290         sq = sleepq_lookup(wchan);
 291
 292         /*
 293          * If the wait channel does not already have a sleep queue, use
 294          * this thread's sleep queue.  Otherwise, insert the current thread
 295          * into the sleep queue already in use by this wait channel.
 296          */
 297         if (sq == NULL) {
 298 #ifdef INVARIANTS
 299                 int i;
 300
 301                 sq = td->td_sleepqueue;
 302                 for (i = 0; i < NR_SLEEPQS; i++)
 303                         KASSERT(TAILQ_EMPTY(&sq->sq_blocked[i]),
 304                                 ("thread's sleep queue %d is not empty", i));
 305                 KASSERT(LIST_EMPTY(&sq->sq_free),
 306                     ("thread's sleep queue has a non-empty free list"));
 307                 KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
 308                 sq->sq_lock = lock;
 309                 sq->sq_type = flags & SLEEPQ_TYPE;
 310 #endif
 311 #ifdef SLEEPQUEUE_PROFILING
 312                 sc->sc_depth++;
 313                 if (sc->sc_depth > sc->sc_max_depth) {
 314                         sc->sc_max_depth = sc->sc_depth;
 315                         if (sc->sc_max_depth > sleepq_max_depth)
 316                                 sleepq_max_depth = sc->sc_max_depth;
 317                 }
 318 #endif
 319                 sq = td->td_sleepqueue;
 320                 LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
 321                 sq->sq_wchan = wchan;
 322         } else {
 323                 MPASS(wchan == sq->sq_wchan);
 324                 MPASS(lock == sq->sq_lock);
 325                 MPASS((flags & SLEEPQ_TYPE) == sq->sq_type);
 326                 LIST_INSERT_HEAD(&sq->sq_free, td->td_sleepqueue, sq_hash);
 327         }
 328         TAILQ_INSERT_TAIL(&sq->sq_blocked[queue], td, td_slpq);
 329         td->td_sleepqueue = NULL;
 330         mtx_lock_spin(&sched_lock);
 331         td->td_sqqueue = queue;
 332         td->td_wchan = wchan;
 333         td->td_wmesg = wmesg;
 334         if (flags & SLEEPQ_INTERRUPTIBLE) {
 335                 td->td_flags |= TDF_SINTR;
 336                 td->td_flags &= ~TDF_SLEEPABORT;
 337         }
 338         mtx_unlock_spin(&sched_lock);
 339 }
 340
 341 /*
 342  * Sets a timeout that will remove the current thread from the specified
 343  * sleep queue after timo ticks if the thread has not already been awakened.
 344  */
 345 void
 346 sleepq_set_timeout(void *wchan, int timo)
 347 {
 348         struct sleepqueue_chain *sc;
 349         struct thread *td;
 350
 351         td = curthread;
 352         sc = SC_LOOKUP(wchan);
 353         mtx_assert(&sc->sc_lock, MA_OWNED);
 354         MPASS(TD_ON_SLEEPQ(td));
 355         MPASS(td->td_sleepqueue == NULL);
 356         MPASS(wchan != NULL);
 357         callout_reset(&td->td_slpcallout, timo, sleepq_timeout, td);
 358 }
 359
 360 /*
 361  * Marks the pending sleep of the current thread as interruptible and
 362  * makes an initial check for pending signals before putting a thread
 363  * to sleep. Return with sleep queue and scheduler lock held.
 364  */
 365 static int
 366 sleepq_catch_signals(void *wchan)
 367 {
 368         struct sleepqueue_chain *sc;
 369         struct sleepqueue *sq;
 370         struct thread *td;
 371         struct proc *p;
 372         struct sigacts *ps;
 373         int sig, ret;
 374
 375         td = curthread;
 376         p = curproc;
 377         sc = SC_LOOKUP(wchan);
 378         mtx_assert(&sc->sc_lock, MA_OWNED);
 379         MPASS(wchan != NULL);
 380         CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)",
 381                 (void *)td, (long)p->p_pid, p->p_comm);
 382
 383         MPASS(td->td_flags & TDF_SINTR);
 384         mtx_unlock_spin(&sc->sc_lock);
 385
 386         /* See if there are any pending signals for this thread. */
 387         PROC_LOCK(p);
 388         ps = p->p_sigacts;
 389         mtx_lock(&ps->ps_mtx);
 390         sig = cursig(td);
 391         if (sig == 0) {
 392                 mtx_unlock(&ps->ps_mtx);
 393                 ret = thread_suspend_check(1);
 394                 MPASS(ret == 0 || ret == EINTR || ret == ERESTART);
 395         } else {
 396                 if (SIGISMEMBER(ps->ps_sigintr, sig))
 397                         ret = EINTR;
 398                 else
 399                         ret = ERESTART;
 400                 mtx_unlock(&ps->ps_mtx);
 401         }
 402
 403         if (ret == 0) {
 404                 mtx_lock_spin(&sc->sc_lock);
 405                 /*
 406                  * Lock sched_lock before unlocking proc lock,
 407                  * without this, we could lose a race.
 408                  */
 409                 mtx_lock_spin(&sched_lock);
 410                 PROC_UNLOCK(p);
 411                 if (!(td->td_flags & TDF_INTERRUPT))
 412                         return (0);
 413                 /* KSE threads tried unblocking us. */
 414                 ret = td->td_intrval;
 415                 mtx_unlock_spin(&sched_lock);
 416                 MPASS(ret == EINTR || ret == ERESTART);
 417         } else {
 418                 PROC_UNLOCK(p);
 419                 mtx_lock_spin(&sc->sc_lock);
 420         }
 421         /*
 422          * There were pending signals and this thread is still
 423          * on the sleep queue, remove it from the sleep queue.
 424          */
 425         sq = sleepq_lookup(wchan);
 426         mtx_lock_spin(&sched_lock);
 427         if (TD_ON_SLEEPQ(td))
 428                 sleepq_resume_thread(sq, td, -1);
 429         return (ret);
 430 }
 431
 432 /*
 433  * Switches to another thread if we are still asleep on a sleep queue and
 434  * drop the lock on the sleep queue chain.  Returns with sched_lock held.
 435  */
 436 static void
 437 sleepq_switch(void *wchan)
 438 {
 439         struct sleepqueue_chain *sc;
 440         struct thread *td;
 441
 442         td = curthread;
 443         sc = SC_LOOKUP(wchan);
 444         mtx_assert(&sc->sc_lock, MA_OWNED);
 445         mtx_assert(&sched_lock, MA_OWNED);
 446
 447         /*
 448          * If we have a sleep queue, then we've already been woken up, so
 449          * just return.
 450          */
 451         if (td->td_sleepqueue != NULL) {
 452                 MPASS(!TD_ON_SLEEPQ(td));
 453                 mtx_unlock_spin(&sc->sc_lock);
 454                 return;
 455         }
 456
 457         /*
 458          * Otherwise, actually go to sleep.
 459          */
 460         mtx_unlock_spin(&sc->sc_lock);
 461         sched_sleep(td);
 462         TD_SET_SLEEPING(td);
 463         mi_switch(SW_VOL, NULL);
 464         KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));
 465         CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)",
 466             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_proc->p_comm);
 467 }
 468
 469 /*
 470  * Check to see if we timed out.
 471  */
 472 static int
 473 sleepq_check_timeout(void)
 474 {
 475         struct thread *td;
 476
 477         mtx_assert(&sched_lock, MA_OWNED);
 478         td = curthread;
 479
 480         /*
 481          * If TDF_TIMEOUT is set, we timed out.
 482          */
 483         if (td->td_flags & TDF_TIMEOUT) {
 484                 td->td_flags &= ~TDF_TIMEOUT;
 485                 return (EWOULDBLOCK);
 486         }
 487
 488         /*
 489          * If TDF_TIMOFAIL is set, the timeout ran after we had
 490          * already been woken up.
 491          */
 492         if (td->td_flags & TDF_TIMOFAIL)
 493                 td->td_flags &= ~TDF_TIMOFAIL;
 494
 495         /*
 496          * If callout_stop() fails, then the timeout is running on
 497          * another CPU, so synchronize with it to avoid having it
 498          * accidentally wake up a subsequent sleep.
 499          */
 500         else if (callout_stop(&td->td_slpcallout) == 0) {
 501                 td->td_flags |= TDF_TIMEOUT;
 502                 TD_SET_SLEEPING(td);
 503                 mi_switch(SW_INVOL, NULL);
 504         }
 505         return (0);
 506 }
 507
 508 /*
 509  * Check to see if we were awoken by a signal.
 510  */
 511 static int
 512 sleepq_check_signals(void)
 513 {
 514         struct thread *td;
 515
 516         mtx_assert(&sched_lock, MA_OWNED);
 517         td = curthread;
 518
 519         /* We are no longer in an interruptible sleep. */
 520         if (td->td_flags & TDF_SINTR)
 521                 td->td_flags &= ~TDF_SINTR;
 522
 523         if (td->td_flags & TDF_SLEEPABORT) {
 524                 td->td_flags &= ~TDF_SLEEPABORT;
 525                 return (td->td_intrval);
 526         }
 527
 528         if (td->td_flags & TDF_INTERRUPT)
 529                 return (td->td_intrval);
 530
 531         return (0);
 532 }
 533
 534 /*
 535  * Block the current thread until it is awakened from its sleep queue.
 536  */
 537 void
 538 sleepq_wait(void *wchan)
 539 {
 540
 541         MPASS(!(curthread->td_flags & TDF_SINTR));
 542         mtx_lock_spin(&sched_lock);
 543         sleepq_switch(wchan);
 544         mtx_unlock_spin(&sched_lock);
 545 }
 546
 547 /*
 548  * Block the current thread until it is awakened from its sleep queue
 549  * or it is interrupted by a signal.
 550  */
 551 int
 552 sleepq_wait_sig(void *wchan)
 553 {
 554         int rcatch;
 555         int rval;
 556
 557         rcatch = sleepq_catch_signals(wchan);
 558         if (rcatch == 0)
 559                 sleepq_switch(wchan);
 560         else
 561                 sleepq_release(wchan);
 562         rval = sleepq_check_signals();
 563         mtx_unlock_spin(&sched_lock);
 564         if (rcatch)
 565                 return (rcatch);
 566         return (rval);
 567 }
 568
 569 /*
 570  * Block the current thread until it is awakened from its sleep queue
 571  * or it times out while waiting.
 572  */
 573 int
 574 sleepq_timedwait(void *wchan)
 575 {
 576         int rval;
 577
 578         MPASS(!(curthread->td_flags & TDF_SINTR));
 579         mtx_lock_spin(&sched_lock);
 580         sleepq_switch(wchan);
 581         rval = sleepq_check_timeout();
 582         mtx_unlock_spin(&sched_lock);
 583         return (rval);
 584 }
 585
 586 /*
 587  * Block the current thread until it is awakened from its sleep queue,
 588  * it is interrupted by a signal, or it times out waiting to be awakened.
 589  */
 590 int
 591 sleepq_timedwait_sig(void *wchan)
 592 {
 593         int rcatch, rvalt, rvals;
 594
 595         rcatch = sleepq_catch_signals(wchan);
 596         if (rcatch == 0)
 597                 sleepq_switch(wchan);
 598         else
 599                 sleepq_release(wchan);
 600         rvalt = sleepq_check_timeout();
 601         rvals = sleepq_check_signals();
 602         mtx_unlock_spin(&sched_lock);
 603         if (rcatch)
 604                 return (rcatch);
 605         if (rvals)
 606                 return (rvals);
 607         return (rvalt);
 608 }
 609
 610 /*
 611  * Removes a thread from a sleep queue and makes it
 612  * runnable.
 613  */
 614 static void
 615 sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri)
 616 {
 617         struct sleepqueue_chain *sc;
 618
 619         MPASS(td != NULL);
 620         MPASS(sq->sq_wchan != NULL);
 621         MPASS(td->td_wchan == sq->sq_wchan);
 622         MPASS(td->td_sqqueue < NR_SLEEPQS && td->td_sqqueue >= 0);
 623         sc = SC_LOOKUP(sq->sq_wchan);
 624         mtx_assert(&sc->sc_lock, MA_OWNED);
 625         mtx_assert(&sched_lock, MA_OWNED);
 626
 627         /* Remove the thread from the queue. */
 628         TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq);
 629
 630         /*
 631          * Get a sleep queue for this thread.  If this is the last waiter,
 632          * use the queue itself and take it out of the chain, otherwise,
 633          * remove a queue from the free list.
 634          */
 635         if (LIST_EMPTY(&sq->sq_free)) {
 636                 td->td_sleepqueue = sq;
 637 #ifdef INVARIANTS
 638                 sq->sq_wchan = NULL;
 639 #endif
 640 #ifdef SLEEPQUEUE_PROFILING
 641                 sc->sc_depth--;
 642 #endif
 643         } else
 644                 td->td_sleepqueue = LIST_FIRST(&sq->sq_free);
 645         LIST_REMOVE(td->td_sleepqueue, sq_hash);
 646
 647         td->td_wmesg = NULL;
 648         td->td_wchan = NULL;
 649         td->td_flags &= ~TDF_SINTR;
 650
 651         /*
 652          * Note that thread td might not be sleeping if it is running
 653          * sleepq_catch_signals() on another CPU or is blocked on
 654          * its proc lock to check signals.  It doesn't hurt to clear
 655          * the sleeping flag if it isn't set though, so we just always
 656          * do it.  However, we can't assert that it is set.
 657          */
 658         CTR3(KTR_PROC, "sleepq_wakeup: thread %p (pid %ld, %s)",
 659             (void *)td, (long)td->td_proc->p_pid, td->td_proc->p_comm);
 660         TD_CLR_SLEEPING(td);
 661
 662         /* Adjust priority if requested. */
 663         MPASS(pri == -1 || (pri >= PRI_MIN && pri <= PRI_MAX));
 664         if (pri != -1 && td->td_priority > pri)
 665                 sched_prio(td, pri);
 666         setrunnable(td);
 667 }
 668
 669 /*
 670  * Find the highest priority thread sleeping on a wait channel and resume it.
 671  */
 672 void
 673 sleepq_signal(void *wchan, int flags, int pri, int queue)
 674 {
 675         struct sleepqueue *sq;
 676         struct thread *td, *besttd;
 677
 678         CTR2(KTR_PROC, "sleepq_signal(%p, %d)", wchan, flags);
 679         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 680         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 681         sq = sleepq_lookup(wchan);
 682         if (sq == NULL) {
 683                 sleepq_release(wchan);
 684                 return;
 685         }
 686         KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
 687             ("%s: mismatch between sleep/wakeup and cv_*", __func__));
 688
 689         /*
 690          * Find the highest priority thread on the queue.  If there is a
 691          * tie, use the thread that first appears in the queue as it has
 692          * been sleeping the longest since threads are always added to
 693          * the tail of sleep queues.
 694          */
 695         besttd = NULL;
 696         TAILQ_FOREACH(td, &sq->sq_blocked[queue], td_slpq) {
 697                 if (besttd == NULL || td->td_priority < besttd->td_priority)
 698                         besttd = td;
 699         }
 700         MPASS(besttd != NULL);
 701         mtx_lock_spin(&sched_lock);
 702         sleepq_resume_thread(sq, besttd, pri);
 703         mtx_unlock_spin(&sched_lock);
 704         sleepq_release(wchan);
 705 }
 706
 707 /*
 708  * Resume all threads sleeping on a specified wait channel.
 709  */
 710 void
 711 sleepq_broadcast(void *wchan, int flags, int pri, int queue)
 712 {
 713         struct sleepqueue *sq;
 714
 715         CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags);
 716         KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
 717         MPASS((queue >= 0) && (queue < NR_SLEEPQS));
 718         sq = sleepq_lookup(wchan);
 719         if (sq == NULL) {
 720                 sleepq_release(wchan);
 721                 return;
 722         }
 723         KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
 724             ("%s: mismatch between sleep/wakeup and cv_*", __func__));
 725
 726         /* Resume all blocked threads on the sleep queue. */
 727         mtx_lock_spin(&sched_lock);
 728         while (!TAILQ_EMPTY(&sq->sq_blocked[queue]))
 729                 sleepq_resume_thread(sq, TAILQ_FIRST(&sq->sq_blocked[queue]),
 730                     pri);
 731         mtx_unlock_spin(&sched_lock);
 732         sleepq_release(wchan);
 733 }
 734
 735 /*
 736  * Time sleeping threads out.  When the timeout expires, the thread is
 737  * removed from the sleep queue and made runnable if it is still asleep.
 738  */
 739 static void
 740 sleepq_timeout(void *arg)
 741 {
 742         struct sleepqueue *sq;
 743         struct thread *td;
 744         void *wchan;
 745
 746         td = arg;
 747         CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
 748             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_proc->p_comm);
 749
 750         /*
 751          * First, see if the thread is asleep and get the wait channel if
 752          * it is.
 753          */
 754         mtx_lock_spin(&sched_lock);
 755         if (TD_ON_SLEEPQ(td)) {
 756                 wchan = td->td_wchan;
 757                 mtx_unlock_spin(&sched_lock);
 758                 sleepq_lock(wchan);
 759                 sq = sleepq_lookup(wchan);
 760                 mtx_lock_spin(&sched_lock);
 761         } else {
 762                 wchan = NULL;
 763                 sq = NULL;
 764         }
 765
 766         /*
 767          * At this point, if the thread is still on the sleep queue,
 768          * we have that sleep queue locked as it cannot migrate sleep
 769          * queues while we dropped sched_lock.  If it had resumed and
 770          * was on another CPU while the lock was dropped, it would have
 771          * seen that TDF_TIMEOUT and TDF_TIMOFAIL are clear and the
 772          * call to callout_stop() to stop this routine would have failed
 773          * meaning that it would have already set TDF_TIMEOUT to
 774          * synchronize with this function.
 775          */
 776         if (TD_ON_SLEEPQ(td)) {
 777                 MPASS(td->td_wchan == wchan);
 778                 MPASS(sq != NULL);
 779                 td->td_flags |= TDF_TIMEOUT;
 780                 sleepq_resume_thread(sq, td, -1);
 781                 mtx_unlock_spin(&sched_lock);
 782                 sleepq_release(wchan);
 783                 return;
 784         } else if (wchan != NULL)
 785                 sleepq_release(wchan);
 786
 787         /*
 788          * Now check for the edge cases.  First, if TDF_TIMEOUT is set,
 789          * then the other thread has already yielded to us, so clear
 790          * the flag and resume it.  If TDF_TIMEOUT is not set, then the
 791          * we know that the other thread is not on a sleep queue, but it
 792          * hasn't resumed execution yet.  In that case, set TDF_TIMOFAIL
 793          * to let it know that the timeout has already run and doesn't
 794          * need to be canceled.
 795          */
 796         if (td->td_flags & TDF_TIMEOUT) {
 797                 MPASS(TD_IS_SLEEPING(td));
 798                 td->td_flags &= ~TDF_TIMEOUT;
 799                 TD_CLR_SLEEPING(td);
 800                 setrunnable(td);
 801         } else
 802                 td->td_flags |= TDF_TIMOFAIL;
 803         mtx_unlock_spin(&sched_lock);
 804 }
 805
 806 /*
 807  * Resumes a specific thread from the sleep queue associated with a specific
 808  * wait channel if it is on that queue.
 809  */
 810 void
 811 sleepq_remove(struct thread *td, void *wchan)
 812 {
 813         struct sleepqueue *sq;
 814
 815         /*
 816          * Look up the sleep queue for this wait channel, then re-check
 817          * that the thread is asleep on that channel, if it is not, then
 818          * bail.
 819          */
 820         MPASS(wchan != NULL);
 821         sleepq_lock(wchan);
 822         sq = sleepq_lookup(wchan);
 823         mtx_lock_spin(&sched_lock);
 824         if (!TD_ON_SLEEPQ(td) || td->td_wchan != wchan) {
 825                 mtx_unlock_spin(&sched_lock);
 826                 sleepq_release(wchan);
 827                 return;
 828         }
 829         MPASS(sq != NULL);
 830
 831         /* Thread is asleep on sleep queue sq, so wake it up. */
 832         sleepq_resume_thread(sq, td, -1);
 833         sleepq_release(wchan);
 834         mtx_unlock_spin(&sched_lock);
 835 }
 836
 837 /*
 838  * Abort a thread as if an interrupt had occurred.  Only abort
 839  * interruptible waits (unfortunately it isn't safe to abort others).
 840  *
 841  * XXX: What in the world does the comment below mean?
 842  * Also, whatever the signal code does...
 843  */
 844 void
 845 sleepq_abort(struct thread *td, int intrval)
 846 {
 847         void *wchan;
 848
 849         mtx_assert(&sched_lock, MA_OWNED);
 850         MPASS(TD_ON_SLEEPQ(td));
 851         MPASS(td->td_flags & TDF_SINTR);
 852         MPASS(intrval == EINTR || intrval == ERESTART);
 853
 854         /*
 855          * If the TDF_TIMEOUT flag is set, just leave. A
 856          * timeout is scheduled anyhow.
 857          */
 858         if (td->td_flags & TDF_TIMEOUT)
 859                 return;
 860
 861         CTR3(KTR_PROC, "sleepq_abort: thread %p (pid %ld, %s)",
 862             (void *)td, (long)td->td_proc->p_pid, (void *)td->td_proc->p_comm);
 863         wchan = td->td_wchan;
 864         if (wchan != NULL) {
 865                 td->td_intrval = intrval;
 866                 td->td_flags |= TDF_SLEEPABORT;
 867         }
 868         mtx_unlock_spin(&sched_lock);
 869         sleepq_remove(td, wchan);
 870         mtx_lock_spin(&sched_lock);
 871 }
 872
 873 #ifdef DDB
 874 DB_SHOW_COMMAND(sleepq, db_show_sleepqueue)
 875 {
 876         struct sleepqueue_chain *sc;
 877         struct sleepqueue *sq;
 878 #ifdef INVARIANTS
 879         struct lock_object *lock;
 880 #endif
 881         struct thread *td;
 882         void *wchan;
 883         int i;
 884
 885         if (!have_addr)
 886                 return;
 887
 888         /*
 889          * First, see if there is an active sleep queue for the wait channel
 890          * indicated by the address.
 891          */
 892         wchan = (void *)addr;
 893         sc = SC_LOOKUP(wchan);
 894         LIST_FOREACH(sq, &sc->sc_queues, sq_hash)
 895                 if (sq->sq_wchan == wchan)
 896                         goto found;
 897
 898         /*
 899          * Second, see if there is an active sleep queue at the address
 900          * indicated.
 901          */
 902         for (i = 0; i < SC_TABLESIZE; i++)
 903                 LIST_FOREACH(sq, &sleepq_chains[i].sc_queues, sq_hash) {
 904                         if (sq == (struct sleepqueue *)addr)
 905                                 goto found;
 906                 }
 907
 908         db_printf("Unable to locate a sleep queue via %p\n", (void *)addr);
 909         return;
 910 found:
 911         db_printf("Wait channel: %p\n", sq->sq_wchan);
 912 #ifdef INVARIANTS
 913         db_printf("Queue type: %d\n", sq->sq_type);
 914         if (sq->sq_lock) {
 915                 lock = sq->sq_lock;
 916                 db_printf("Associated Interlock: %p - (%s) %s\n", lock,
 917                     LOCK_CLASS(lock)->lc_name, lock->lo_name);
 918         }
 919 #endif
 920         db_printf("Blocked threads:\n");
 921         for (i = 0; i < NR_SLEEPQS; i++) {
 922                 db_printf("\nQueue[%d]:\n", i);
 923                 if (TAILQ_EMPTY(&sq->sq_blocked[i]))
 924                         db_printf("\tempty\n");
 925                 else
 926                         TAILQ_FOREACH(td, &sq->sq_blocked[0],
 927                                       td_slpq) {
 928                                 db_printf("\t%p (tid %d, pid %d, \"%s\")\n", td,
 929                                           td->td_tid, td->td_proc->p_pid,
 930                                           td->td_name[i] != '\0' ? td->td_name :
 931                                           td->td_proc->p_comm);
 932                         }
 933         }
 934 }
 935
 936 /* Alias 'show sleepqueue' to 'show sleepq'. */
 937 DB_SET(sleepqueue, db_show_sleepqueue, db_show_cmd_set, 0, NULL);
 938 #endif