sys/kern/kern_synch.c

   1 /*-
   2  * SPDX-License-Identifier: BSD-3-Clause
   3  *
   4  * Copyright (c) 1982, 1986, 1990, 1991, 1993
   5  *      The Regents of the University of California.  All rights reserved.
   6  * (c) UNIX System Laboratories, Inc.
   7  * All or some portions of this file are derived from material licensed
   8  * to the University of California by American Telephone and Telegraph
   9  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
  10  * the permission of UNIX System Laboratories, Inc.
  11  *
  12  * Redistribution and use in source and binary forms, with or without
  13  * modification, are permitted provided that the following conditions
  14  * are met:
  15  * 1. Redistributions of source code must retain the above copyright
  16  *    notice, this list of conditions and the following disclaimer.
  17  * 2. Redistributions in binary form must reproduce the above copyright
  18  *    notice, this list of conditions and the following disclaimer in the
  19  *    documentation and/or other materials provided with the distribution.
  20  * 3. Neither the name of the University nor the names of its contributors
  21  *    may be used to endorse or promote products derived from this software
  22  *    without specific prior written permission.
  23  *
  24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  34  * SUCH DAMAGE.
  35  *
  36  *      @(#)kern_synch.c        8.9 (Berkeley) 5/19/95
  37  */
  38
  39 #include <sys/cdefs.h>
  40 __FBSDID("$FreeBSD$");
  41
  42 #include "opt_ktrace.h"
  43 #include "opt_sched.h"
  44
  45 #include <sys/param.h>
  46 #include <sys/systm.h>
  47 #include <sys/condvar.h>
  48 #include <sys/kdb.h>
  49 #include <sys/kernel.h>
  50 #include <sys/ktr.h>
  51 #include <sys/lock.h>
  52 #include <sys/mutex.h>
  53 #include <sys/proc.h>
  54 #include <sys/resourcevar.h>
  55 #include <sys/refcount.h>
  56 #include <sys/sched.h>
  57 #include <sys/sdt.h>
  58 #include <sys/signalvar.h>
  59 #include <sys/sleepqueue.h>
  60 #include <sys/smp.h>
  61 #include <sys/sx.h>
  62 #include <sys/sysctl.h>
  63 #include <sys/sysproto.h>
  64 #include <sys/vmmeter.h>
  65 #ifdef KTRACE
  66 #include <sys/uio.h>
  67 #include <sys/ktrace.h>
  68 #endif
  69
  70 #include <machine/cpu.h>
  71
  72 static void synch_setup(void *dummy);
  73 SYSINIT(synch_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, synch_setup,
  74     NULL);
  75
  76 int     hogticks;
  77 static uint8_t pause_wchan[MAXCPU];
  78
  79 static struct callout loadav_callout;
  80
  81 struct loadavg averunnable =
  82         { {0, 0, 0}, FSCALE };  /* load average, of runnable procs */
  83 /*
  84  * Constants for averages over 1, 5, and 15 minutes
  85  * when sampling at 5 second intervals.
  86  */
  87 static fixpt_t cexp[3] = {
  88         0.9200444146293232 * FSCALE,    /* exp(-1/12) */
  89         0.9834714538216174 * FSCALE,    /* exp(-1/60) */
  90         0.9944598480048967 * FSCALE,    /* exp(-1/180) */
  91 };
  92
  93 /* kernel uses `FSCALE', userland (SHOULD) use kern.fscale */
  94 SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, FSCALE, "");
  95
  96 static void     loadav(void *arg);
  97
  98 SDT_PROVIDER_DECLARE(sched);
  99 SDT_PROBE_DEFINE(sched, , , preempt);
 100
 101 static void
 102 sleepinit(void *unused)
 103 {
 104
 105         hogticks = (hz / 10) * 2;       /* Default only. */
 106         init_sleepqueues();
 107 }
 108
 109 /*
 110  * vmem tries to lock the sleepq mutexes when free'ing kva, so make sure
 111  * it is available.
 112  */
 113 SYSINIT(sleepinit, SI_SUB_KMEM, SI_ORDER_ANY, sleepinit, NULL);
 114
 115 /*
 116  * General sleep call.  Suspends the current thread until a wakeup is
 117  * performed on the specified identifier.  The thread will then be made
 118  * runnable with the specified priority.  Sleeps at most sbt units of time
 119  * (0 means no timeout).  If pri includes the PCATCH flag, let signals
 120  * interrupt the sleep, otherwise ignore them while sleeping.  Returns 0 if
 121  * awakened, EWOULDBLOCK if the timeout expires.  If PCATCH is set and a
 122  * signal becomes pending, ERESTART is returned if the current system
 123  * call should be restarted if possible, and EINTR is returned if the system
 124  * call should be interrupted by the signal (return EINTR).
 125  *
 126  * The lock argument is unlocked before the caller is suspended, and
 127  * re-locked before _sleep() returns.  If priority includes the PDROP
 128  * flag the lock is not re-locked before returning.
 129  */
 130 int
 131 _sleep(void *ident, struct lock_object *lock, int priority,
 132     const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags)
 133 {
 134         struct thread *td;
 135         struct lock_class *class;
 136         uintptr_t lock_state;
 137         int catch, pri, rval, sleepq_flags;
 138         WITNESS_SAVE_DECL(lock_witness);
 139
 140         td = curthread;
 141 #ifdef KTRACE
 142         if (KTRPOINT(td, KTR_CSW))
 143                 ktrcsw(1, 0, wmesg);
 144 #endif
 145         WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, lock,
 146             "Sleeping on \"%s\"", wmesg);
 147         KASSERT(sbt != 0 || mtx_owned(&Giant) || lock != NULL,
 148             ("sleeping without a lock"));
 149         KASSERT(ident != NULL, ("_sleep: NULL ident"));
 150         KASSERT(TD_IS_RUNNING(td), ("_sleep: curthread not running"));
 151         KASSERT(td->td_epochnest == 0, ("sleeping in an epoch section"));
 152         if (priority & PDROP)
 153                 KASSERT(lock != NULL && lock != &Giant.lock_object,
 154                     ("PDROP requires a non-Giant lock"));
 155         if (lock != NULL)
 156                 class = LOCK_CLASS(lock);
 157         else
 158                 class = NULL;
 159
 160         if (SCHEDULER_STOPPED_TD(td)) {
 161                 if (lock != NULL && priority & PDROP)
 162                         class->lc_unlock(lock);
 163                 return (0);
 164         }
 165         catch = priority & PCATCH;
 166         pri = priority & PRIMASK;
 167
 168         KASSERT(!TD_ON_SLEEPQ(td), ("recursive sleep"));
 169
 170         if ((uint8_t *)ident >= &pause_wchan[0] &&
 171             (uint8_t *)ident <= &pause_wchan[MAXCPU - 1])
 172                 sleepq_flags = SLEEPQ_PAUSE;
 173         else
 174                 sleepq_flags = SLEEPQ_SLEEP;
 175         if (catch)
 176                 sleepq_flags |= SLEEPQ_INTERRUPTIBLE;
 177
 178         sleepq_lock(ident);
 179         CTR5(KTR_PROC, "sleep: thread %ld (pid %ld, %s) on %s (%p)",
 180             td->td_tid, td->td_proc->p_pid, td->td_name, wmesg, ident);
 181
 182         if (lock == &Giant.lock_object)
 183                 mtx_assert(&Giant, MA_OWNED);
 184         DROP_GIANT();
 185         if (lock != NULL && lock != &Giant.lock_object &&
 186             !(class->lc_flags & LC_SLEEPABLE)) {
 187                 WITNESS_SAVE(lock, lock_witness);
 188                 lock_state = class->lc_unlock(lock);
 189         } else
 190                 /* GCC needs to follow the Yellow Brick Road */
 191                 lock_state = -1;
 192
 193         /*
 194          * We put ourselves on the sleep queue and start our timeout
 195          * before calling thread_suspend_check, as we could stop there,
 196          * and a wakeup or a SIGCONT (or both) could occur while we were
 197          * stopped without resuming us.  Thus, we must be ready for sleep
 198          * when cursig() is called.  If the wakeup happens while we're
 199          * stopped, then td will no longer be on a sleep queue upon
 200          * return from cursig().
 201          */
 202         sleepq_add(ident, lock, wmesg, sleepq_flags, 0);
 203         if (sbt != 0)
 204                 sleepq_set_timeout_sbt(ident, sbt, pr, flags);
 205         if (lock != NULL && class->lc_flags & LC_SLEEPABLE) {
 206                 sleepq_release(ident);
 207                 WITNESS_SAVE(lock, lock_witness);
 208                 lock_state = class->lc_unlock(lock);
 209                 sleepq_lock(ident);
 210         }
 211         if (sbt != 0 && catch)
 212                 rval = sleepq_timedwait_sig(ident, pri);
 213         else if (sbt != 0)
 214                 rval = sleepq_timedwait(ident, pri);
 215         else if (catch)
 216                 rval = sleepq_wait_sig(ident, pri);
 217         else {
 218                 sleepq_wait(ident, pri);
 219                 rval = 0;
 220         }
 221 #ifdef KTRACE
 222         if (KTRPOINT(td, KTR_CSW))
 223                 ktrcsw(0, 0, wmesg);
 224 #endif
 225         PICKUP_GIANT();
 226         if (lock != NULL && lock != &Giant.lock_object && !(priority & PDROP)) {
 227                 class->lc_lock(lock, lock_state);
 228                 WITNESS_RESTORE(lock, lock_witness);
 229         }
 230         return (rval);
 231 }
 232
 233 int
 234 msleep_spin_sbt(void *ident, struct mtx *mtx, const char *wmesg,
 235     sbintime_t sbt, sbintime_t pr, int flags)
 236 {
 237         struct thread *td;
 238         int rval;
 239         WITNESS_SAVE_DECL(mtx);
 240
 241         td = curthread;
 242         KASSERT(mtx != NULL, ("sleeping without a mutex"));
 243         KASSERT(ident != NULL, ("msleep_spin_sbt: NULL ident"));
 244         KASSERT(TD_IS_RUNNING(td), ("msleep_spin_sbt: curthread not running"));
 245
 246         if (SCHEDULER_STOPPED_TD(td))
 247                 return (0);
 248
 249         sleepq_lock(ident);
 250         CTR5(KTR_PROC, "msleep_spin: thread %ld (pid %ld, %s) on %s (%p)",
 251             td->td_tid, td->td_proc->p_pid, td->td_name, wmesg, ident);
 252
 253         DROP_GIANT();
 254         mtx_assert(mtx, MA_OWNED | MA_NOTRECURSED);
 255         WITNESS_SAVE(&mtx->lock_object, mtx);
 256         mtx_unlock_spin(mtx);
 257
 258         /*
 259          * We put ourselves on the sleep queue and start our timeout.
 260          */
 261         sleepq_add(ident, &mtx->lock_object, wmesg, SLEEPQ_SLEEP, 0);
 262         if (sbt != 0)
 263                 sleepq_set_timeout_sbt(ident, sbt, pr, flags);
 264
 265         /*
 266          * Can't call ktrace with any spin locks held so it can lock the
 267          * ktrace_mtx lock, and WITNESS_WARN considers it an error to hold
 268          * any spin lock.  Thus, we have to drop the sleepq spin lock while
 269          * we handle those requests.  This is safe since we have placed our
 270          * thread on the sleep queue already.
 271          */
 272 #ifdef KTRACE
 273         if (KTRPOINT(td, KTR_CSW)) {
 274                 sleepq_release(ident);
 275                 ktrcsw(1, 0, wmesg);
 276                 sleepq_lock(ident);
 277         }
 278 #endif
 279 #ifdef WITNESS
 280         sleepq_release(ident);
 281         WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "Sleeping on \"%s\"",
 282             wmesg);
 283         sleepq_lock(ident);
 284 #endif
 285         if (sbt != 0)
 286                 rval = sleepq_timedwait(ident, 0);
 287         else {
 288                 sleepq_wait(ident, 0);
 289                 rval = 0;
 290         }
 291 #ifdef KTRACE
 292         if (KTRPOINT(td, KTR_CSW))
 293                 ktrcsw(0, 0, wmesg);
 294 #endif
 295         PICKUP_GIANT();
 296         mtx_lock_spin(mtx);
 297         WITNESS_RESTORE(&mtx->lock_object, mtx);
 298         return (rval);
 299 }
 300
 301 /*
 302  * pause_sbt() delays the calling thread by the given signed binary
 303  * time. During cold bootup, pause_sbt() uses the DELAY() function
 304  * instead of the _sleep() function to do the waiting. The "sbt"
 305  * argument must be greater than or equal to zero. A "sbt" value of
 306  * zero is equivalent to a "sbt" value of one tick.
 307  */
 308 int
 309 pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags)
 310 {
 311         KASSERT(sbt >= 0, ("pause_sbt: timeout must be >= 0"));
 312
 313         /* silently convert invalid timeouts */
 314         if (sbt == 0)
 315                 sbt = tick_sbt;
 316
 317         if ((cold && curthread == &thread0) || kdb_active ||
 318             SCHEDULER_STOPPED()) {
 319                 /*
 320                  * We delay one second at a time to avoid overflowing the
 321                  * system specific DELAY() function(s):
 322                  */
 323                 while (sbt >= SBT_1S) {
 324                         DELAY(1000000);
 325                         sbt -= SBT_1S;
 326                 }
 327                 /* Do the delay remainder, if any */
 328                 sbt = howmany(sbt, SBT_1US);
 329                 if (sbt > 0)
 330                         DELAY(sbt);
 331                 return (EWOULDBLOCK);
 332         }
 333         return (_sleep(&pause_wchan[curcpu], NULL,
 334             (flags & C_CATCH) ? PCATCH : 0, wmesg, sbt, pr, flags));
 335 }
 336
 337 /*
 338  * Potentially release the last reference for refcount.  Check for
 339  * unlikely conditions and signal the caller as to whether it was
 340  * the final ref.
 341  */
 342 bool
 343 refcount_release_last(volatile u_int *count, u_int n, u_int old)
 344 {
 345         u_int waiter;
 346
 347         waiter = old & REFCOUNT_WAITER;
 348         old = REFCOUNT_COUNT(old);
 349         if (__predict_false(n > old || REFCOUNT_SATURATED(old))) {
 350                 /*
 351                  * Avoid multiple destructor invocations if underflow occurred.
 352                  * This is not perfect since the memory backing the containing
 353                  * object may already have been reallocated.
 354                  */
 355                 _refcount_update_saturated(count);
 356                 return (false);
 357         }
 358
 359         /*
 360          * Attempt to atomically clear the waiter bit.  Wakeup waiters
 361          * if we are successful.
 362          */
 363         if (waiter != 0 && atomic_cmpset_int(count, REFCOUNT_WAITER, 0))
 364                 wakeup(__DEVOLATILE(u_int *, count));
 365
 366         /*
 367          * Last reference.  Signal the user to call the destructor.
 368          *
 369          * Ensure that the destructor sees all updates.  The fence_rel
 370          * at the start of refcount_releasen synchronizes with this fence.
 371          */
 372         atomic_thread_fence_acq();
 373         return (true);
 374 }
 375
 376 /*
 377  * Wait for a refcount wakeup.  This does not guarantee that the ref is still
 378  * zero on return and may be subject to transient wakeups.  Callers wanting
 379  * a precise answer should use refcount_wait().
 380  */
 381 void
 382 refcount_sleep(volatile u_int *count, const char *wmesg, int pri)
 383 {
 384         void *wchan;
 385         u_int old;
 386
 387         if (REFCOUNT_COUNT(*count) == 0)
 388                 return;
 389         wchan = __DEVOLATILE(void *, count);
 390         sleepq_lock(wchan);
 391         old = *count;
 392         for (;;) {
 393                 if (REFCOUNT_COUNT(old) == 0) {
 394                         sleepq_release(wchan);
 395                         return;
 396                 }
 397                 if (old & REFCOUNT_WAITER)
 398                         break;
 399                 if (atomic_fcmpset_int(count, &old, old | REFCOUNT_WAITER))
 400                         break;
 401         }
 402         sleepq_add(wchan, NULL, wmesg, 0, 0);
 403         sleepq_wait(wchan, pri);
 404 }
 405
 406 /*
 407  * Make all threads sleeping on the specified identifier runnable.
 408  */
 409 void
 410 wakeup(void *ident)
 411 {
 412         int wakeup_swapper;
 413
 414         sleepq_lock(ident);
 415         wakeup_swapper = sleepq_broadcast(ident, SLEEPQ_SLEEP, 0, 0);
 416         sleepq_release(ident);
 417         if (wakeup_swapper) {
 418                 KASSERT(ident != &proc0,
 419                     ("wakeup and wakeup_swapper and proc0"));
 420                 kick_proc0();
 421         }
 422 }
 423
 424 /*
 425  * Make a thread sleeping on the specified identifier runnable.
 426  * May wake more than one thread if a target thread is currently
 427  * swapped out.
 428  */
 429 void
 430 wakeup_one(void *ident)
 431 {
 432         int wakeup_swapper;
 433
 434         sleepq_lock(ident);
 435         wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP, 0, 0);
 436         sleepq_release(ident);
 437         if (wakeup_swapper)
 438                 kick_proc0();
 439 }
 440
 441 void
 442 wakeup_any(void *ident)
 443 {
 444         int wakeup_swapper;
 445
 446         sleepq_lock(ident);
 447         wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP | SLEEPQ_UNFAIR,
 448             0, 0);
 449         sleepq_release(ident);
 450         if (wakeup_swapper)
 451                 kick_proc0();
 452 }
 453
 454 static void
 455 kdb_switch(void)
 456 {
 457         thread_unlock(curthread);
 458         kdb_backtrace();
 459         kdb_reenter();
 460         panic("%s: did not reenter debugger", __func__);
 461 }
 462
 463 /*
 464  * The machine independent parts of context switching.
 465  */
 466 void
 467 mi_switch(int flags, struct thread *newtd)
 468 {
 469         uint64_t runtime, new_switchtime;
 470         struct thread *td;
 471
 472         td = curthread;                 /* XXX */
 473         THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED);
 474         KASSERT(!TD_ON_RUNQ(td), ("mi_switch: called by old code"));
 475 #ifdef INVARIANTS
 476         if (!TD_ON_LOCK(td) && !TD_IS_RUNNING(td))
 477                 mtx_assert(&Giant, MA_NOTOWNED);
 478 #endif
 479         KASSERT(td->td_critnest == 1 || panicstr,
 480             ("mi_switch: switch in a critical section"));
 481         KASSERT((flags & (SW_INVOL | SW_VOL)) != 0,
 482             ("mi_switch: switch must be voluntary or involuntary"));
 483         KASSERT(newtd != curthread, ("mi_switch: preempting back to ourself"));
 484
 485         /*
 486          * Don't perform context switches from the debugger.
 487          */
 488         if (kdb_active)
 489                 kdb_switch();
 490         if (SCHEDULER_STOPPED_TD(td))
 491                 return;
 492         if (flags & SW_VOL) {
 493                 td->td_ru.ru_nvcsw++;
 494                 td->td_swvoltick = ticks;
 495         } else {
 496                 td->td_ru.ru_nivcsw++;
 497                 td->td_swinvoltick = ticks;
 498         }
 499 #ifdef SCHED_STATS
 500         SCHED_STAT_INC(sched_switch_stats[flags & SW_TYPE_MASK]);
 501 #endif
 502         /*
 503          * Compute the amount of time during which the current
 504          * thread was running, and add that to its total so far.
 505          */
 506         new_switchtime = cpu_ticks();
 507         runtime = new_switchtime - PCPU_GET(switchtime);
 508         td->td_runtime += runtime;
 509         td->td_incruntime += runtime;
 510         PCPU_SET(switchtime, new_switchtime);
 511         td->td_generation++;    /* bump preempt-detect counter */
 512         VM_CNT_INC(v_swtch);
 513         PCPU_SET(switchticks, ticks);
 514         CTR4(KTR_PROC, "mi_switch: old thread %ld (td_sched %p, pid %ld, %s)",
 515             td->td_tid, td_get_sched(td), td->td_proc->p_pid, td->td_name);
 516 #ifdef KDTRACE_HOOKS
 517         if (SDT_PROBES_ENABLED() &&
 518             ((flags & SW_PREEMPT) != 0 || ((flags & SW_INVOL) != 0 &&
 519             (flags & SW_TYPE_MASK) == SWT_NEEDRESCHED)))
 520                 SDT_PROBE0(sched, , , preempt);
 521 #endif
 522         sched_switch(td, newtd, flags);
 523         CTR4(KTR_PROC, "mi_switch: new thread %ld (td_sched %p, pid %ld, %s)",
 524             td->td_tid, td_get_sched(td), td->td_proc->p_pid, td->td_name);
 525
 526         /*
 527          * If the last thread was exiting, finish cleaning it up.
 528          */
 529         if ((td = PCPU_GET(deadthread))) {
 530                 PCPU_SET(deadthread, NULL);
 531                 thread_stash(td);
 532         }
 533 }
 534
 535 /*
 536  * Change thread state to be runnable, placing it on the run queue if
 537  * it is in memory.  If it is swapped out, return true so our caller
 538  * will know to awaken the swapper.
 539  */
 540 int
 541 setrunnable(struct thread *td)
 542 {
 543
 544         THREAD_LOCK_ASSERT(td, MA_OWNED);
 545         KASSERT(td->td_proc->p_state != PRS_ZOMBIE,
 546             ("setrunnable: pid %d is a zombie", td->td_proc->p_pid));
 547         switch (td->td_state) {
 548         case TDS_RUNNING:
 549         case TDS_RUNQ:
 550                 return (0);
 551         case TDS_INHIBITED:
 552                 /*
 553                  * If we are only inhibited because we are swapped out
 554                  * then arange to swap in this process. Otherwise just return.
 555                  */
 556                 if (td->td_inhibitors != TDI_SWAPPED)
 557                         return (0);
 558                 /* FALLTHROUGH */
 559         case TDS_CAN_RUN:
 560                 break;
 561         default:
 562                 printf("state is 0x%x", td->td_state);
 563                 panic("setrunnable(2)");
 564         }
 565         if ((td->td_flags & TDF_INMEM) == 0) {
 566                 if ((td->td_flags & TDF_SWAPINREQ) == 0) {
 567                         td->td_flags |= TDF_SWAPINREQ;
 568                         return (1);
 569                 }
 570         } else
 571                 sched_wakeup(td);
 572         return (0);
 573 }
 574
 575 /*
 576  * Compute a tenex style load average of a quantity on
 577  * 1, 5 and 15 minute intervals.
 578  */
 579 static void
 580 loadav(void *arg)
 581 {
 582         int i, nrun;
 583         struct loadavg *avg;
 584
 585         nrun = sched_load();
 586         avg = &averunnable;
 587
 588         for (i = 0; i < 3; i++)
 589                 avg->ldavg[i] = (cexp[i] * avg->ldavg[i] +
 590                     nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
 591
 592         /*
 593          * Schedule the next update to occur after 5 seconds, but add a
 594          * random variation to avoid synchronisation with processes that
 595          * run at regular intervals.
 596          */
 597         callout_reset_sbt(&loadav_callout,
 598             SBT_1US * (4000000 + (int)(random() % 2000001)), SBT_1US,
 599             loadav, NULL, C_DIRECT_EXEC | C_PREL(32));
 600 }
 601
 602 /* ARGSUSED */
 603 static void
 604 synch_setup(void *dummy)
 605 {
 606         callout_init(&loadav_callout, 1);
 607
 608         /* Kick off timeout driven events by calling first time. */
 609         loadav(NULL);
 610 }
 611
 612 int
 613 should_yield(void)
 614 {
 615
 616         return ((u_int)ticks - (u_int)curthread->td_swvoltick >= hogticks);
 617 }
 618
 619 void
 620 maybe_yield(void)
 621 {
 622
 623         if (should_yield())
 624                 kern_yield(PRI_USER);
 625 }
 626
 627 void
 628 kern_yield(int prio)
 629 {
 630         struct thread *td;
 631
 632         td = curthread;
 633         DROP_GIANT();
 634         thread_lock(td);
 635         if (prio == PRI_USER)
 636                 prio = td->td_user_pri;
 637         if (prio >= 0)
 638                 sched_prio(td, prio);
 639         mi_switch(SW_VOL | SWT_RELINQUISH, NULL);
 640         thread_unlock(td);
 641         PICKUP_GIANT();
 642 }
 643
 644 /*
 645  * General purpose yield system call.
 646  */
 647 int
 648 sys_yield(struct thread *td, struct yield_args *uap)
 649 {
 650
 651         thread_lock(td);
 652         if (PRI_BASE(td->td_pri_class) == PRI_TIMESHARE)
 653                 sched_prio(td, PRI_MAX_TIMESHARE);
 654         mi_switch(SW_VOL | SWT_RELINQUISH, NULL);
 655         thread_unlock(td);
 656         td->td_retval[0] = 0;
 657         return (0);
 658 }