sys/kern/kern_timeout.c

   1 /*-
   2  * Copyright (c) 1982, 1986, 1991, 1993
   3  *      The Regents of the University of California.  All rights reserved.
   4  * (c) UNIX System Laboratories, Inc.
   5  * All or some portions of this file are derived from material licensed
   6  * to the University of California by American Telephone and Telegraph
   7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   8  * the permission of UNIX System Laboratories, Inc.
   9  *
  10  * Redistribution and use in source and binary forms, with or without
  11  * modification, are permitted provided that the following conditions
  12  * are met:
  13  * 1. Redistributions of source code must retain the above copyright
  14  *    notice, this list of conditions and the following disclaimer.
  15  * 2. Redistributions in binary form must reproduce the above copyright
  16  *    notice, this list of conditions and the following disclaimer in the
  17  *    documentation and/or other materials provided with the distribution.
  18  * 4. Neither the name of the University nor the names of its contributors
  19  *    may be used to endorse or promote products derived from this software
  20  *    without specific prior written permission.
  21  *
  22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  32  * SUCH DAMAGE.
  33  *
  34  *      From: @(#)kern_clock.c  8.5 (Berkeley) 1/21/94
  35  */
  36
  37 #include <sys/cdefs.h>
  38 __FBSDID("$FreeBSD$");
  39
  40 #include "opt_kdtrace.h"
  41
  42 #include <sys/param.h>
  43 #include <sys/systm.h>
  44 #include <sys/bus.h>
  45 #include <sys/callout.h>
  46 #include <sys/condvar.h>
  47 #include <sys/interrupt.h>
  48 #include <sys/kernel.h>
  49 #include <sys/ktr.h>
  50 #include <sys/lock.h>
  51 #include <sys/malloc.h>
  52 #include <sys/mutex.h>
  53 #include <sys/proc.h>
  54 #include <sys/sdt.h>
  55 #include <sys/sleepqueue.h>
  56 #include <sys/sysctl.h>
  57 #include <sys/smp.h>
  58
  59 #ifdef SMP
  60 #include <machine/cpu.h>
  61 #endif
  62
  63 SDT_PROVIDER_DEFINE(callout_execute);
  64 SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__start,
  65     "struct callout *");
  66 SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__end,
  67     "struct callout *");
  68
  69 static int avg_depth;
  70 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,
  71     "Average number of items examined per softclock call. Units = 1/1000");
  72 static int avg_gcalls;
  73 SYSCTL_INT(_debug, OID_AUTO, to_avg_gcalls, CTLFLAG_RD, &avg_gcalls, 0,
  74     "Average number of Giant callouts made per softclock call. Units = 1/1000");
  75 static int avg_lockcalls;
  76 SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0,
  77     "Average number of lock callouts made per softclock call. Units = 1/1000");
  78 static int avg_mpcalls;
  79 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
  80     "Average number of MP callouts made per softclock call. Units = 1/1000");
  81 /*
  82  * TODO:
  83  *      allocate more timeout table slots when table overflows.
  84  */
  85 int callwheelsize, callwheelbits, callwheelmask;
  86
  87 /*
  88  * The callout cpu migration entity represents informations necessary for
  89  * describing the migrating callout to the new callout cpu.
  90  * The cached informations are very important for deferring migration when
  91  * the migrating callout is already running.
  92  */
  93 struct cc_mig_ent {
  94 #ifdef SMP
  95         void    (*ce_migration_func)(void *);
  96         void    *ce_migration_arg;
  97         int     ce_migration_cpu;
  98         int     ce_migration_ticks;
  99 #endif
 100 };
 101
 102 /*
 103  * There is one struct callout_cpu per cpu, holding all relevant
 104  * state for the callout processing thread on the individual CPU.
 105  * In particular:
 106  *      cc_ticks is incremented once per tick in callout_cpu().
 107  *      It tracks the global 'ticks' but in a way that the individual
 108  *      threads should not worry about races in the order in which
 109  *      hardclock() and hardclock_cpu() run on the various CPUs.
 110  *      cc_softclock is advanced in callout_cpu() to point to the
 111  *      first entry in cc_callwheel that may need handling. In turn,
 112  *      a softclock() is scheduled so it can serve the various entries i
 113  *      such that cc_softclock <= i <= cc_ticks .
 114  *      XXX maybe cc_softclock and cc_ticks should be volatile ?
 115  *
 116  *      cc_ticks is also used in callout_reset_cpu() to determine
 117  *      when the callout should be served.
 118  */
 119 struct callout_cpu {
 120         struct cc_mig_ent       cc_migrating_entity;
 121         struct mtx              cc_lock;
 122         struct callout          *cc_callout;
 123         struct callout_tailq    *cc_callwheel;
 124         struct callout_list     cc_callfree;
 125         struct callout          *cc_next;
 126         struct callout          *cc_curr;
 127         void                    *cc_cookie;
 128         int                     cc_ticks;
 129         int                     cc_softticks;
 130         int                     cc_cancel;
 131         int                     cc_waiting;
 132         int                     cc_firsttick;
 133 };
 134
 135 #ifdef SMP
 136 #define cc_migration_func       cc_migrating_entity.ce_migration_func
 137 #define cc_migration_arg        cc_migrating_entity.ce_migration_arg
 138 #define cc_migration_cpu        cc_migrating_entity.ce_migration_cpu
 139 #define cc_migration_ticks      cc_migrating_entity.ce_migration_ticks
 140
 141 struct callout_cpu cc_cpu[MAXCPU];
 142 #define CPUBLOCK        MAXCPU
 143 #define CC_CPU(cpu)     (&cc_cpu[(cpu)])
 144 #define CC_SELF()       CC_CPU(PCPU_GET(cpuid))
 145 #else
 146 struct callout_cpu cc_cpu;
 147 #define CC_CPU(cpu)     &cc_cpu
 148 #define CC_SELF()       &cc_cpu
 149 #endif
 150 #define CC_LOCK(cc)     mtx_lock_spin(&(cc)->cc_lock)
 151 #define CC_UNLOCK(cc)   mtx_unlock_spin(&(cc)->cc_lock)
 152 #define CC_LOCK_ASSERT(cc)      mtx_assert(&(cc)->cc_lock, MA_OWNED)
 153
 154 static int timeout_cpu;
 155 void (*callout_new_inserted)(int cpu, int ticks) = NULL;
 156
 157 static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
 158
 159 /**
 160  * Locked by cc_lock:
 161  *   cc_curr         - If a callout is in progress, it is curr_callout.
 162  *                     If curr_callout is non-NULL, threads waiting in
 163  *                     callout_drain() will be woken up as soon as the
 164  *                     relevant callout completes.
 165  *   cc_cancel       - Changing to 1 with both callout_lock and c_lock held
 166  *                     guarantees that the current callout will not run.
 167  *                     The softclock() function sets this to 0 before it
 168  *                     drops callout_lock to acquire c_lock, and it calls
 169  *                     the handler only if curr_cancelled is still 0 after
 170  *                     c_lock is successfully acquired.
 171  *   cc_waiting      - If a thread is waiting in callout_drain(), then
 172  *                     callout_wait is nonzero.  Set only when
 173  *                     curr_callout is non-NULL.
 174  */
 175
 176 /*
 177  * Resets the migration entity tied to a specific callout cpu.
 178  */
 179 static void
 180 cc_cme_cleanup(struct callout_cpu *cc)
 181 {
 182
 183 #ifdef SMP
 184         cc->cc_migration_cpu = CPUBLOCK;
 185         cc->cc_migration_ticks = 0;
 186         cc->cc_migration_func = NULL;
 187         cc->cc_migration_arg = NULL;
 188 #endif
 189 }
 190
 191 /*
 192  * Checks if migration is requested by a specific callout cpu.
 193  */
 194 static int
 195 cc_cme_migrating(struct callout_cpu *cc)
 196 {
 197
 198 #ifdef SMP
 199         return (cc->cc_migration_cpu != CPUBLOCK);
 200 #else
 201         return (0);
 202 #endif
 203 }
 204
 205 /*
 206  * kern_timeout_callwheel_alloc() - kernel low level callwheel initialization
 207  *
 208  *      This code is called very early in the kernel initialization sequence,
 209  *      and may be called more then once.
 210  */
 211 caddr_t
 212 kern_timeout_callwheel_alloc(caddr_t v)
 213 {
 214         struct callout_cpu *cc;
 215
 216         timeout_cpu = PCPU_GET(cpuid);
 217         cc = CC_CPU(timeout_cpu);
 218         /*
 219          * Calculate callout wheel size
 220          */
 221         for (callwheelsize = 1, callwheelbits = 0;
 222              callwheelsize < ncallout;
 223              callwheelsize <<= 1, ++callwheelbits)
 224                 ;
 225         callwheelmask = callwheelsize - 1;
 226
 227         cc->cc_callout = (struct callout *)v;
 228         v = (caddr_t)(cc->cc_callout + ncallout);
 229         cc->cc_callwheel = (struct callout_tailq *)v;
 230         v = (caddr_t)(cc->cc_callwheel + callwheelsize);
 231         return(v);
 232 }
 233
 234 static void
 235 callout_cpu_init(struct callout_cpu *cc)
 236 {
 237         struct callout *c;
 238         int i;
 239
 240         mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE);
 241         SLIST_INIT(&cc->cc_callfree);
 242         for (i = 0; i < callwheelsize; i++) {
 243                 TAILQ_INIT(&cc->cc_callwheel[i]);
 244         }
 245         cc_cme_cleanup(cc);
 246         if (cc->cc_callout == NULL)
 247                 return;
 248         for (i = 0; i < ncallout; i++) {
 249                 c = &cc->cc_callout[i];
 250                 callout_init(c, 0);
 251                 c->c_flags = CALLOUT_LOCAL_ALLOC;
 252                 SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
 253         }
 254 }
 255
 256 #ifdef SMP
 257 /*
 258  * Switches the cpu tied to a specific callout.
 259  * The function expects a locked incoming callout cpu and returns with
 260  * locked outcoming callout cpu.
 261  */
 262 static struct callout_cpu *
 263 callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu)
 264 {
 265         struct callout_cpu *new_cc;
 266
 267         MPASS(c != NULL && cc != NULL);
 268         CC_LOCK_ASSERT(cc);
 269
 270         /*
 271          * Avoid interrupts and preemption firing after the callout cpu
 272          * is blocked in order to avoid deadlocks as the new thread
 273          * may be willing to acquire the callout cpu lock.
 274          */
 275         c->c_cpu = CPUBLOCK;
 276         spinlock_enter();
 277         CC_UNLOCK(cc);
 278         new_cc = CC_CPU(new_cpu);
 279         CC_LOCK(new_cc);
 280         spinlock_exit();
 281         c->c_cpu = new_cpu;
 282         return (new_cc);
 283 }
 284 #endif
 285
 286 /*
 287  * kern_timeout_callwheel_init() - initialize previously reserved callwheel
 288  *                                 space.
 289  *
 290  *      This code is called just once, after the space reserved for the
 291  *      callout wheel has been finalized.
 292  */
 293 void
 294 kern_timeout_callwheel_init(void)
 295 {
 296         callout_cpu_init(CC_CPU(timeout_cpu));
 297 }
 298
 299 /*
 300  * Start standard softclock thread.
 301  */
 302 static void
 303 start_softclock(void *dummy)
 304 {
 305         struct callout_cpu *cc;
 306 #ifdef SMP
 307         int cpu;
 308 #endif
 309
 310         cc = CC_CPU(timeout_cpu);
 311         if (swi_add(&clk_intr_event, "clock", softclock, cc, SWI_CLOCK,
 312             INTR_MPSAFE, &cc->cc_cookie))
 313                 panic("died while creating standard software ithreads");
 314 #ifdef SMP
 315         CPU_FOREACH(cpu) {
 316                 if (cpu == timeout_cpu)
 317                         continue;
 318                 cc = CC_CPU(cpu);
 319                 if (swi_add(NULL, "clock", softclock, cc, SWI_CLOCK,
 320                     INTR_MPSAFE, &cc->cc_cookie))
 321                         panic("died while creating standard software ithreads");
 322                 cc->cc_callout = NULL;  /* Only cpu0 handles timeout(). */
 323                 cc->cc_callwheel = malloc(
 324                     sizeof(struct callout_tailq) * callwheelsize, M_CALLOUT,
 325                     M_WAITOK);
 326                 callout_cpu_init(cc);
 327         }
 328 #endif
 329 }
 330
 331 SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL);
 332
 333 void
 334 callout_tick(void)
 335 {
 336         struct callout_cpu *cc;
 337         int need_softclock;
 338         int bucket;
 339
 340         /*
 341          * Process callouts at a very low cpu priority, so we don't keep the
 342          * relatively high clock interrupt priority any longer than necessary.
 343          */
 344         need_softclock = 0;
 345         cc = CC_SELF();
 346         mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
 347         cc->cc_firsttick = cc->cc_ticks = ticks;
 348         for (; (cc->cc_softticks - cc->cc_ticks) <= 0; cc->cc_softticks++) {
 349                 bucket = cc->cc_softticks & callwheelmask;
 350                 if (!TAILQ_EMPTY(&cc->cc_callwheel[bucket])) {
 351                         need_softclock = 1;
 352                         break;
 353                 }
 354         }
 355         mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
 356         /*
 357          * swi_sched acquires the thread lock, so we don't want to call it
 358          * with cc_lock held; incorrect locking order.
 359          */
 360         if (need_softclock)
 361                 swi_sched(cc->cc_cookie, 0);
 362 }
 363
 364 int
 365 callout_tickstofirst(int limit)
 366 {
 367         struct callout_cpu *cc;
 368         struct callout *c;
 369         struct callout_tailq *sc;
 370         int curticks;
 371         int skip = 1;
 372
 373         cc = CC_SELF();
 374         mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
 375         curticks = cc->cc_ticks;
 376         while( skip < ncallout && skip < limit ) {
 377                 sc = &cc->cc_callwheel[ (curticks+skip) & callwheelmask ];
 378                 /* search scanning ticks */
 379                 TAILQ_FOREACH( c, sc, c_links.tqe ){
 380                         if (c->c_time - curticks <= ncallout)
 381                                 goto out;
 382                 }
 383                 skip++;
 384         }
 385 out:
 386         cc->cc_firsttick = curticks + skip;
 387         mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
 388         return (skip);
 389 }
 390
 391 static struct callout_cpu *
 392 callout_lock(struct callout *c)
 393 {
 394         struct callout_cpu *cc;
 395         int cpu;
 396
 397         for (;;) {
 398                 cpu = c->c_cpu;
 399 #ifdef SMP
 400                 if (cpu == CPUBLOCK) {
 401                         while (c->c_cpu == CPUBLOCK)
 402                                 cpu_spinwait();
 403                         continue;
 404                 }
 405 #endif
 406                 cc = CC_CPU(cpu);
 407                 CC_LOCK(cc);
 408                 if (cpu == c->c_cpu)
 409                         break;
 410                 CC_UNLOCK(cc);
 411         }
 412         return (cc);
 413 }
 414
 415 static void
 416 callout_cc_add(struct callout *c, struct callout_cpu *cc, int to_ticks,
 417     void (*func)(void *), void *arg, int cpu)
 418 {
 419
 420         CC_LOCK_ASSERT(cc);
 421
 422         if (to_ticks <= 0)
 423                 to_ticks = 1;
 424         c->c_arg = arg;
 425         c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
 426         c->c_func = func;
 427         c->c_time = ticks + to_ticks;
 428         TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask],
 429             c, c_links.tqe);
 430         if ((c->c_time - cc->cc_firsttick) < 0 &&
 431             callout_new_inserted != NULL) {
 432                 cc->cc_firsttick = c->c_time;
 433                 (*callout_new_inserted)(cpu,
 434                     to_ticks + (ticks - cc->cc_ticks));
 435         }
 436 }
 437
 438 static void
 439 callout_cc_del(struct callout *c, struct callout_cpu *cc)
 440 {
 441
 442         if ((c->c_flags & CALLOUT_LOCAL_ALLOC) == 0)
 443                 return;
 444         c->c_func = NULL;
 445         SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
 446 }
 447
 448 static void
 449 softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
 450     int *lockcalls, int *gcalls)
 451 {
 452         void (*c_func)(void *);
 453         void *c_arg;
 454         struct lock_class *class;
 455         struct lock_object *c_lock;
 456         int c_flags, sharedlock;
 457 #ifdef SMP
 458         struct callout_cpu *new_cc;
 459         void (*new_func)(void *);
 460         void *new_arg;
 461         int new_cpu, new_ticks;
 462 #endif
 463 #ifdef DIAGNOSTIC
 464         struct bintime bt1, bt2;
 465         struct timespec ts2;
 466         static uint64_t maxdt = 36893488147419102LL;    /* 2 msec */
 467         static timeout_t *lastfunc;
 468 #endif
 469
 470         KASSERT((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) ==
 471             (CALLOUT_PENDING | CALLOUT_ACTIVE),
 472             ("softclock_call_cc: pend|act %p %x", c, c->c_flags));
 473         class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
 474         sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? 0 : 1;
 475         c_lock = c->c_lock;
 476         c_func = c->c_func;
 477         c_arg = c->c_arg;
 478         c_flags = c->c_flags;
 479         if (c->c_flags & CALLOUT_LOCAL_ALLOC)
 480                 c->c_flags = CALLOUT_LOCAL_ALLOC;
 481         else
 482                 c->c_flags &= ~CALLOUT_PENDING;
 483         cc->cc_curr = c;
 484         cc->cc_cancel = 0;
 485         CC_UNLOCK(cc);
 486         if (c_lock != NULL) {
 487                 class->lc_lock(c_lock, sharedlock);
 488                 /*
 489                  * The callout may have been cancelled
 490                  * while we switched locks.
 491                  */
 492                 if (cc->cc_cancel) {
 493                         class->lc_unlock(c_lock);
 494                         goto skip;
 495                 }
 496                 /* The callout cannot be stopped now. */
 497                 cc->cc_cancel = 1;
 498
 499                 if (c_lock == &Giant.lock_object) {
 500                         (*gcalls)++;
 501                         CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
 502                             c, c_func, c_arg);
 503                 } else {
 504                         (*lockcalls)++;
 505                         CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p",
 506                             c, c_func, c_arg);
 507                 }
 508         } else {
 509                 (*mpcalls)++;
 510                 CTR3(KTR_CALLOUT, "callout mpsafe %p func %p arg %p",
 511                     c, c_func, c_arg);
 512         }
 513 #ifdef DIAGNOSTIC
 514         binuptime(&bt1);
 515 #endif
 516         THREAD_NO_SLEEPING();
 517         SDT_PROBE1(callout_execute, kernel, , callout__start, c);
 518         c_func(c_arg);
 519         SDT_PROBE1(callout_execute, kernel, , callout__end, c);
 520         THREAD_SLEEPING_OK();
 521 #ifdef DIAGNOSTIC
 522         binuptime(&bt2);
 523         bintime_sub(&bt2, &bt1);
 524         if (bt2.frac > maxdt) {
 525                 if (lastfunc != c_func || bt2.frac > maxdt * 2) {
 526                         bintime2timespec(&bt2, &ts2);
 527                         printf(
 528                 "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n",
 529                             c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec);
 530                 }
 531                 maxdt = bt2.frac;
 532                 lastfunc = c_func;
 533         }
 534 #endif
 535         CTR1(KTR_CALLOUT, "callout %p finished", c);
 536         if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0)
 537                 class->lc_unlock(c_lock);
 538 skip:
 539         CC_LOCK(cc);
 540         KASSERT(cc->cc_curr == c, ("mishandled cc_curr"));
 541         cc->cc_curr = NULL;
 542         if (cc->cc_waiting) {
 543                 /*
 544                  * There is someone waiting for the
 545                  * callout to complete.
 546                  * If the callout was scheduled for
 547                  * migration just cancel it.
 548                  */
 549                 if (cc_cme_migrating(cc)) {
 550                         cc_cme_cleanup(cc);
 551
 552                         /*
 553                          * It should be assert here that the callout is not
 554                          * destroyed but that is not easy.
 555                          */
 556                         c->c_flags &= ~CALLOUT_DFRMIGRATION;
 557                 }
 558                 cc->cc_waiting = 0;
 559                 CC_UNLOCK(cc);
 560                 wakeup(&cc->cc_waiting);
 561                 CC_LOCK(cc);
 562         } else if (cc_cme_migrating(cc)) {
 563                 KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0,
 564                     ("Migrating legacy callout %p", c));
 565 #ifdef SMP
 566                 /*
 567                  * If the callout was scheduled for
 568                  * migration just perform it now.
 569                  */
 570                 new_cpu = cc->cc_migration_cpu;
 571                 new_ticks = cc->cc_migration_ticks;
 572                 new_func = cc->cc_migration_func;
 573                 new_arg = cc->cc_migration_arg;
 574                 cc_cme_cleanup(cc);
 575
 576                 /*
 577                  * It should be assert here that the callout is not destroyed
 578                  * but that is not easy.
 579                  *
 580                  * As first thing, handle deferred callout stops.
 581                  */
 582                 if ((c->c_flags & CALLOUT_DFRMIGRATION) == 0) {
 583                         CTR3(KTR_CALLOUT,
 584                              "deferred cancelled %p func %p arg %p",
 585                              c, new_func, new_arg);
 586                         callout_cc_del(c, cc);
 587                         return;
 588                 }
 589                 c->c_flags &= ~CALLOUT_DFRMIGRATION;
 590
 591                 new_cc = callout_cpu_switch(c, cc, new_cpu);
 592                 callout_cc_add(c, new_cc, new_ticks, new_func, new_arg,
 593                     new_cpu);
 594                 CC_UNLOCK(new_cc);
 595                 CC_LOCK(cc);
 596 #else
 597                 panic("migration should not happen");
 598 #endif
 599         }
 600         /*
 601          * If the current callout is locally allocated (from
 602          * timeout(9)) then put it on the freelist.
 603          *
 604          * Note: we need to check the cached copy of c_flags because
 605          * if it was not local, then it's not safe to deref the
 606          * callout pointer.
 607          */
 608         KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0 ||
 609             c->c_flags == CALLOUT_LOCAL_ALLOC,
 610             ("corrupted callout"));
 611         if (c_flags & CALLOUT_LOCAL_ALLOC)
 612                 callout_cc_del(c, cc);
 613 }
 614
 615 /*
 616  * The callout mechanism is based on the work of Adam M. Costello and
 617  * George Varghese, published in a technical report entitled "Redesigning
 618  * the BSD Callout and Timer Facilities" and modified slightly for inclusion
 619  * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
 620  * used in this implementation was published by G. Varghese and T. Lauck in
 621  * the paper "Hashed and Hierarchical Timing Wheels: Data Structures for
 622  * the Efficient Implementation of a Timer Facility" in the Proceedings of
 623  * the 11th ACM Annual Symposium on Operating Systems Principles,
 624  * Austin, Texas Nov 1987.
 625  */
 626
 627 /*
 628  * Software (low priority) clock interrupt.
 629  * Run periodic events from timeout queue.
 630  */
 631 void
 632 softclock(void *arg)
 633 {
 634         struct callout_cpu *cc;
 635         struct callout *c;
 636         struct callout_tailq *bucket;
 637         int curticks;
 638         int steps;      /* #steps since we last allowed interrupts */
 639         int depth;
 640         int mpcalls;
 641         int lockcalls;
 642         int gcalls;
 643
 644 #ifndef MAX_SOFTCLOCK_STEPS
 645 #define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */
 646 #endif /* MAX_SOFTCLOCK_STEPS */
 647
 648         mpcalls = 0;
 649         lockcalls = 0;
 650         gcalls = 0;
 651         depth = 0;
 652         steps = 0;
 653         cc = (struct callout_cpu *)arg;
 654         CC_LOCK(cc);
 655         while (cc->cc_softticks - 1 != cc->cc_ticks) {
 656                 /*
 657                  * cc_softticks may be modified by hard clock, so cache
 658                  * it while we work on a given bucket.
 659                  */
 660                 curticks = cc->cc_softticks;
 661                 cc->cc_softticks++;
 662                 bucket = &cc->cc_callwheel[curticks & callwheelmask];
 663                 c = TAILQ_FIRST(bucket);
 664                 while (c != NULL) {
 665                         depth++;
 666                         if (c->c_time != curticks) {
 667                                 c = TAILQ_NEXT(c, c_links.tqe);
 668                                 ++steps;
 669                                 if (steps >= MAX_SOFTCLOCK_STEPS) {
 670                                         cc->cc_next = c;
 671                                         /* Give interrupts a chance. */
 672                                         CC_UNLOCK(cc);
 673                                         ;       /* nothing */
 674                                         CC_LOCK(cc);
 675                                         c = cc->cc_next;
 676                                         steps = 0;
 677                                 }
 678                         } else {
 679                                 cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
 680                                 TAILQ_REMOVE(bucket, c, c_links.tqe);
 681                                 softclock_call_cc(c, cc, &mpcalls,
 682                                     &lockcalls, &gcalls);
 683                                 steps = 0;
 684                                 c = cc->cc_next;
 685                         }
 686                 }
 687         }
 688         avg_depth += (depth * 1000 - avg_depth) >> 8;
 689         avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
 690         avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8;
 691         avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
 692         cc->cc_next = NULL;
 693         CC_UNLOCK(cc);
 694 }
 695
 696 /*
 697  * timeout --
 698  *      Execute a function after a specified length of time.
 699  *
 700  * untimeout --
 701  *      Cancel previous timeout function call.
 702  *
 703  * callout_handle_init --
 704  *      Initialize a handle so that using it with untimeout is benign.
 705  *
 706  *      See AT&T BCI Driver Reference Manual for specification.  This
 707  *      implementation differs from that one in that although an
 708  *      identification value is returned from timeout, the original
 709  *      arguments to timeout as well as the identifier are used to
 710  *      identify entries for untimeout.
 711  */
 712 struct callout_handle
 713 timeout(ftn, arg, to_ticks)
 714         timeout_t *ftn;
 715         void *arg;
 716         int to_ticks;
 717 {
 718         struct callout_cpu *cc;
 719         struct callout *new;
 720         struct callout_handle handle;
 721
 722         cc = CC_CPU(timeout_cpu);
 723         CC_LOCK(cc);
 724         /* Fill in the next free callout structure. */
 725         new = SLIST_FIRST(&cc->cc_callfree);
 726         if (new == NULL)
 727                 /* XXX Attempt to malloc first */
 728                 panic("timeout table full");
 729         SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle);
 730         callout_reset(new, to_ticks, ftn, arg);
 731         handle.callout = new;
 732         CC_UNLOCK(cc);
 733
 734         return (handle);
 735 }
 736
 737 void
 738 untimeout(ftn, arg, handle)
 739         timeout_t *ftn;
 740         void *arg;
 741         struct callout_handle handle;
 742 {
 743         struct callout_cpu *cc;
 744
 745         /*
 746          * Check for a handle that was initialized
 747          * by callout_handle_init, but never used
 748          * for a real timeout.
 749          */
 750         if (handle.callout == NULL)
 751                 return;
 752
 753         cc = callout_lock(handle.callout);
 754         if (handle.callout->c_func == ftn && handle.callout->c_arg == arg)
 755                 callout_stop(handle.callout);
 756         CC_UNLOCK(cc);
 757 }
 758
 759 void
 760 callout_handle_init(struct callout_handle *handle)
 761 {
 762         handle->callout = NULL;
 763 }
 764
 765 /*
 766  * New interface; clients allocate their own callout structures.
 767  *
 768  * callout_reset() - establish or change a timeout
 769  * callout_stop() - disestablish a timeout
 770  * callout_init() - initialize a callout structure so that it can
 771  *      safely be passed to callout_reset() and callout_stop()
 772  *
 773  * <sys/callout.h> defines three convenience macros:
 774  *
 775  * callout_active() - returns truth if callout has not been stopped,
 776  *      drained, or deactivated since the last time the callout was
 777  *      reset.
 778  * callout_pending() - returns truth if callout is still waiting for timeout
 779  * callout_deactivate() - marks the callout as having been serviced
 780  */
 781 int
 782 callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *),
 783     void *arg, int cpu)
 784 {
 785         struct callout_cpu *cc;
 786         int cancelled = 0;
 787
 788         /*
 789          * Don't allow migration of pre-allocated callouts lest they
 790          * become unbalanced.
 791          */
 792         if (c->c_flags & CALLOUT_LOCAL_ALLOC)
 793                 cpu = c->c_cpu;
 794         cc = callout_lock(c);
 795         if (cc->cc_curr == c) {
 796                 /*
 797                  * We're being asked to reschedule a callout which is
 798                  * currently in progress.  If there is a lock then we
 799                  * can cancel the callout if it has not really started.
 800                  */
 801                 if (c->c_lock != NULL && !cc->cc_cancel)
 802                         cancelled = cc->cc_cancel = 1;
 803                 if (cc->cc_waiting) {
 804                         /*
 805                          * Someone has called callout_drain to kill this
 806                          * callout.  Don't reschedule.
 807                          */
 808                         CTR4(KTR_CALLOUT, "%s %p func %p arg %p",
 809                             cancelled ? "cancelled" : "failed to cancel",
 810                             c, c->c_func, c->c_arg);
 811                         CC_UNLOCK(cc);
 812                         return (cancelled);
 813                 }
 814         }
 815         if (c->c_flags & CALLOUT_PENDING) {
 816                 if (cc->cc_next == c) {
 817                         cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
 818                 }
 819                 TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c,
 820                     c_links.tqe);
 821
 822                 cancelled = 1;
 823                 c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
 824         }
 825
 826 #ifdef SMP
 827         /*
 828          * If the callout must migrate try to perform it immediately.
 829          * If the callout is currently running, just defer the migration
 830          * to a more appropriate moment.
 831          */
 832         if (c->c_cpu != cpu) {
 833                 if (cc->cc_curr == c) {
 834                         cc->cc_migration_cpu = cpu;
 835                         cc->cc_migration_ticks = to_ticks;
 836                         cc->cc_migration_func = ftn;
 837                         cc->cc_migration_arg = arg;
 838                         c->c_flags |= CALLOUT_DFRMIGRATION;
 839                         CTR5(KTR_CALLOUT,
 840                     "migration of %p func %p arg %p in %d to %u deferred",
 841                             c, c->c_func, c->c_arg, to_ticks, cpu);
 842                         CC_UNLOCK(cc);
 843                         return (cancelled);
 844                 }
 845                 cc = callout_cpu_switch(c, cc, cpu);
 846         }
 847 #endif
 848
 849         callout_cc_add(c, cc, to_ticks, ftn, arg, cpu);
 850         CTR5(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d",
 851             cancelled ? "re" : "", c, c->c_func, c->c_arg, to_ticks);
 852         CC_UNLOCK(cc);
 853
 854         return (cancelled);
 855 }
 856
 857 /*
 858  * Common idioms that can be optimized in the future.
 859  */
 860 int
 861 callout_schedule_on(struct callout *c, int to_ticks, int cpu)
 862 {
 863         return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, cpu);
 864 }
 865
 866 int
 867 callout_schedule(struct callout *c, int to_ticks)
 868 {
 869         return callout_reset_on(c, to_ticks, c->c_func, c->c_arg, c->c_cpu);
 870 }
 871
 872 int
 873 _callout_stop_safe(c, safe)
 874         struct  callout *c;
 875         int     safe;
 876 {
 877         struct callout_cpu *cc, *old_cc;
 878         struct lock_class *class;
 879         int use_lock, sq_locked;
 880
 881         /*
 882          * Some old subsystems don't hold Giant while running a callout_stop(),
 883          * so just discard this check for the moment.
 884          */
 885         if (!safe && c->c_lock != NULL) {
 886                 if (c->c_lock == &Giant.lock_object)
 887                         use_lock = mtx_owned(&Giant);
 888                 else {
 889                         use_lock = 1;
 890                         class = LOCK_CLASS(c->c_lock);
 891                         class->lc_assert(c->c_lock, LA_XLOCKED);
 892                 }
 893         } else
 894                 use_lock = 0;
 895
 896         sq_locked = 0;
 897         old_cc = NULL;
 898 again:
 899         cc = callout_lock(c);
 900
 901         /*
 902          * If the callout was migrating while the callout cpu lock was
 903          * dropped,  just drop the sleepqueue lock and check the states
 904          * again.
 905          */
 906         if (sq_locked != 0 && cc != old_cc) {
 907 #ifdef SMP
 908                 CC_UNLOCK(cc);
 909                 sleepq_release(&old_cc->cc_waiting);
 910                 sq_locked = 0;
 911                 old_cc = NULL;
 912                 goto again;
 913 #else
 914                 panic("migration should not happen");
 915 #endif
 916         }
 917
 918         /*
 919          * If the callout isn't pending, it's not on the queue, so
 920          * don't attempt to remove it from the queue.  We can try to
 921          * stop it by other means however.
 922          */
 923         if (!(c->c_flags & CALLOUT_PENDING)) {
 924                 c->c_flags &= ~CALLOUT_ACTIVE;
 925
 926                 /*
 927                  * If it wasn't on the queue and it isn't the current
 928                  * callout, then we can't stop it, so just bail.
 929                  */
 930                 if (cc->cc_curr != c) {
 931                         CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
 932                             c, c->c_func, c->c_arg);
 933                         CC_UNLOCK(cc);
 934                         if (sq_locked)
 935                                 sleepq_release(&cc->cc_waiting);
 936                         return (0);
 937                 }
 938
 939                 if (safe) {
 940                         /*
 941                          * The current callout is running (or just
 942                          * about to run) and blocking is allowed, so
 943                          * just wait for the current invocation to
 944                          * finish.
 945                          */
 946                         while (cc->cc_curr == c) {
 947
 948                                 /*
 949                                  * Use direct calls to sleepqueue interface
 950                                  * instead of cv/msleep in order to avoid
 951                                  * a LOR between cc_lock and sleepqueue
 952                                  * chain spinlocks.  This piece of code
 953                                  * emulates a msleep_spin() call actually.
 954                                  *
 955                                  * If we already have the sleepqueue chain
 956                                  * locked, then we can safely block.  If we
 957                                  * don't already have it locked, however,
 958                                  * we have to drop the cc_lock to lock
 959                                  * it.  This opens several races, so we
 960                                  * restart at the beginning once we have
 961                                  * both locks.  If nothing has changed, then
 962                                  * we will end up back here with sq_locked
 963                                  * set.
 964                                  */
 965                                 if (!sq_locked) {
 966                                         CC_UNLOCK(cc);
 967                                         sleepq_lock(&cc->cc_waiting);
 968                                         sq_locked = 1;
 969                                         old_cc = cc;
 970                                         goto again;
 971                                 }
 972
 973                                 /*
 974                                  * Migration could be cancelled here, but
 975                                  * as long as it is still not sure when it
 976                                  * will be packed up, just let softclock()
 977                                  * take care of it.
 978                                  */
 979                                 cc->cc_waiting = 1;
 980                                 DROP_GIANT();
 981                                 CC_UNLOCK(cc);
 982                                 sleepq_add(&cc->cc_waiting,
 983                                     &cc->cc_lock.lock_object, "codrain",
 984                                     SLEEPQ_SLEEP, 0);
 985                                 sleepq_wait(&cc->cc_waiting, 0);
 986                                 sq_locked = 0;
 987                                 old_cc = NULL;
 988
 989                                 /* Reacquire locks previously released. */
 990                                 PICKUP_GIANT();
 991                                 CC_LOCK(cc);
 992                         }
 993                 } else if (use_lock && !cc->cc_cancel) {
 994                         /*
 995                          * The current callout is waiting for its
 996                          * lock which we hold.  Cancel the callout
 997                          * and return.  After our caller drops the
 998                          * lock, the callout will be skipped in
 999                          * softclock().
1000                          */
1001                         cc->cc_cancel = 1;
1002                         CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
1003                             c, c->c_func, c->c_arg);
1004                         KASSERT(!cc_cme_migrating(cc),
1005                             ("callout wrongly scheduled for migration"));
1006                         CC_UNLOCK(cc);
1007                         KASSERT(!sq_locked, ("sleepqueue chain locked"));
1008                         return (1);
1009                 } else if ((c->c_flags & CALLOUT_DFRMIGRATION) != 0) {
1010                         c->c_flags &= ~CALLOUT_DFRMIGRATION;
1011                         CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p",
1012                             c, c->c_func, c->c_arg);
1013                         CC_UNLOCK(cc);
1014                         return (1);
1015                 }
1016                 CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
1017                     c, c->c_func, c->c_arg);
1018                 CC_UNLOCK(cc);
1019                 KASSERT(!sq_locked, ("sleepqueue chain still locked"));
1020                 return (0);
1021         }
1022         if (sq_locked)
1023                 sleepq_release(&cc->cc_waiting);
1024
1025         c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
1026
1027         CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
1028             c, c->c_func, c->c_arg);
1029         if (cc->cc_next == c)
1030                 cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
1031         TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c,
1032             c_links.tqe);
1033         callout_cc_del(c, cc);
1034
1035         CC_UNLOCK(cc);
1036         return (1);
1037 }
1038
1039 void
1040 callout_init(c, mpsafe)
1041         struct  callout *c;
1042         int mpsafe;
1043 {
1044         bzero(c, sizeof *c);
1045         if (mpsafe) {
1046                 c->c_lock = NULL;
1047                 c->c_flags = CALLOUT_RETURNUNLOCKED;
1048         } else {
1049                 c->c_lock = &Giant.lock_object;
1050                 c->c_flags = 0;
1051         }
1052         c->c_cpu = timeout_cpu;
1053 }
1054
1055 void
1056 _callout_init_lock(c, lock, flags)
1057         struct  callout *c;
1058         struct  lock_object *lock;
1059         int flags;
1060 {
1061         bzero(c, sizeof *c);
1062         c->c_lock = lock;
1063         KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0,
1064             ("callout_init_lock: bad flags %d", flags));
1065         KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0,
1066             ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock"));
1067         KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags &
1068             (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class",
1069             __func__));
1070         c->c_flags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK);
1071         c->c_cpu = timeout_cpu;
1072 }
1073
1074 #ifdef APM_FIXUP_CALLTODO
1075 /*
1076  * Adjust the kernel calltodo timeout list.  This routine is used after
1077  * an APM resume to recalculate the calltodo timer list values with the
1078  * number of hz's we have been sleeping.  The next hardclock() will detect
1079  * that there are fired timers and run softclock() to execute them.
1080  *
1081  * Please note, I have not done an exhaustive analysis of what code this
1082  * might break.  I am motivated to have my select()'s and alarm()'s that
1083  * have expired during suspend firing upon resume so that the applications
1084  * which set the timer can do the maintanence the timer was for as close
1085  * as possible to the originally intended time.  Testing this code for a
1086  * week showed that resuming from a suspend resulted in 22 to 25 timers
1087  * firing, which seemed independant on whether the suspend was 2 hours or
1088  * 2 days.  Your milage may vary.   - Ken Key <key@cs.utk.edu>
1089  */
1090 void
1091 adjust_timeout_calltodo(time_change)
1092     struct timeval *time_change;
1093 {
1094         register struct callout *p;
1095         unsigned long delta_ticks;
1096
1097         /*
1098          * How many ticks were we asleep?
1099          * (stolen from tvtohz()).
1100          */
1101
1102         /* Don't do anything */
1103         if (time_change->tv_sec < 0)
1104                 return;
1105         else if (time_change->tv_sec <= LONG_MAX / 1000000)
1106                 delta_ticks = (time_change->tv_sec * 1000000 +
1107                                time_change->tv_usec + (tick - 1)) / tick + 1;
1108         else if (time_change->tv_sec <= LONG_MAX / hz)
1109                 delta_ticks = time_change->tv_sec * hz +
1110                               (time_change->tv_usec + (tick - 1)) / tick + 1;
1111         else
1112                 delta_ticks = LONG_MAX;
1113
1114         if (delta_ticks > INT_MAX)
1115                 delta_ticks = INT_MAX;
1116
1117         /*
1118          * Now rip through the timer calltodo list looking for timers
1119          * to expire.
1120          */
1121
1122         /* don't collide with softclock() */
1123         CC_LOCK(cc);
1124         for (p = calltodo.c_next; p != NULL; p = p->c_next) {
1125                 p->c_time -= delta_ticks;
1126
1127                 /* Break if the timer had more time on it than delta_ticks */
1128                 if (p->c_time > 0)
1129                         break;
1130
1131                 /* take back the ticks the timer didn't use (p->c_time <= 0) */
1132                 delta_ticks = -p->c_time;
1133         }
1134         CC_UNLOCK(cc);
1135
1136         return;
1137 }
1138 #endif /* APM_FIXUP_CALLTODO */