sys/kern/subr_epoch.c

   1 /*-
   2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
   3  *
   4  * Copyright (c) 2018, Matthew Macy <mmacy@freebsd.org>
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions
   8  * are met:
   9  * 1. Redistributions of source code must retain the above copyright
  10  *    notice, this list of conditions and the following disclaimer.
  11  * 2. Redistributions in binary form must reproduce the above copyright
  12  *    notice, this list of conditions and the following disclaimer in the
  13  *    documentation and/or other materials provided with the distribution.
  14  *
  15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  25  * SUCH DAMAGE.
  26  *
  27  */
  28
  29 #include <sys/cdefs.h>
  30 __FBSDID("$FreeBSD$");
  31
  32 #include <sys/param.h>
  33 #include <sys/types.h>
  34 #include <sys/systm.h>
  35 #include <sys/counter.h>
  36 #include <sys/epoch.h>
  37 #include <sys/gtaskqueue.h>
  38 #include <sys/kernel.h>
  39 #include <sys/limits.h>
  40 #include <sys/lock.h>
  41 #include <sys/malloc.h>
  42 #include <sys/mutex.h>
  43 #include <sys/pcpu.h>
  44 #include <sys/proc.h>
  45 #include <sys/sched.h>
  46 #include <sys/smp.h>
  47 #include <sys/sysctl.h>
  48 #include <sys/turnstile.h>
  49 #include <vm/vm.h>
  50 #include <vm/vm_extern.h>
  51 #include <vm/vm_kern.h>
  52
  53 #include <ck_epoch.h>
  54
  55 static MALLOC_DEFINE(M_EPOCH, "epoch", "epoch based reclamation");
  56
  57 /* arbitrary --- needs benchmarking */
  58 #define MAX_ADAPTIVE_SPIN 1000
  59 #define MAX_EPOCHS 64
  60
  61 #ifdef __amd64__
  62 #define EPOCH_ALIGN CACHE_LINE_SIZE*2
  63 #else
  64 #define EPOCH_ALIGN CACHE_LINE_SIZE
  65 #endif
  66
  67 CTASSERT(sizeof(epoch_section_t) == sizeof(ck_epoch_section_t));
  68 CTASSERT(sizeof(ck_epoch_entry_t) == sizeof(struct epoch_context));
  69 SYSCTL_NODE(_kern, OID_AUTO, epoch, CTLFLAG_RW, 0, "epoch information");
  70 SYSCTL_NODE(_kern_epoch, OID_AUTO, stats, CTLFLAG_RW, 0, "epoch stats");
  71
  72
  73 /* Stats. */
  74 static counter_u64_t block_count;
  75
  76 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, nblocked, CTLFLAG_RW,
  77     &block_count, "# of times a thread was in an epoch when epoch_wait was called");
  78 static counter_u64_t migrate_count;
  79
  80 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, migrations, CTLFLAG_RW,
  81     &migrate_count, "# of times thread was migrated to another CPU in epoch_wait");
  82 static counter_u64_t turnstile_count;
  83
  84 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, ncontended, CTLFLAG_RW,
  85     &turnstile_count, "# of times a thread was blocked on a lock in an epoch during an epoch_wait");
  86 static counter_u64_t switch_count;
  87
  88 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, switches, CTLFLAG_RW,
  89     &switch_count, "# of times a thread voluntarily context switched in epoch_wait");
  90 static counter_u64_t epoch_call_count;
  91
  92 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, epoch_calls, CTLFLAG_RW,
  93     &epoch_call_count, "# of times a callback was deferred");
  94 static counter_u64_t epoch_call_task_count;
  95
  96 SYSCTL_COUNTER_U64(_kern_epoch_stats, OID_AUTO, epoch_call_tasks, CTLFLAG_RW,
  97     &epoch_call_task_count, "# of times a callback task was run");
  98
  99 TAILQ_HEAD (threadlist, thread);
 100
 101 CK_STACK_CONTAINER(struct ck_epoch_entry, stack_entry,
 102     ck_epoch_entry_container)
 103 typedef struct epoch_record {
 104         ck_epoch_record_t er_record;
 105         volatile struct threadlist er_tdlist;
 106         volatile uint32_t er_gen;
 107         uint32_t er_cpuid;
 108 }      *epoch_record_t;
 109
 110 struct epoch_pcpu_state {
 111         struct epoch_record eps_record;
 112 }       __aligned(EPOCH_ALIGN);
 113
 114 struct epoch {
 115         struct ck_epoch e_epoch __aligned(EPOCH_ALIGN);
 116         struct epoch_pcpu_state *e_pcpu_dom[MAXMEMDOM] __aligned(EPOCH_ALIGN);
 117         int     e_idx;
 118         int     e_flags;
 119         struct epoch_pcpu_state *e_pcpu[0];
 120 };
 121
 122 epoch_t allepochs[MAX_EPOCHS];
 123
 124 DPCPU_DEFINE(struct grouptask, epoch_cb_task);
 125 DPCPU_DEFINE(int, epoch_cb_count);
 126
 127 static __read_mostly int domcount[MAXMEMDOM];
 128 static __read_mostly int domoffsets[MAXMEMDOM];
 129 static __read_mostly int inited;
 130 static __read_mostly int epoch_count;
 131 __read_mostly epoch_t global_epoch;
 132 __read_mostly epoch_t global_epoch_preempt;
 133
 134 static void epoch_call_task(void *context __unused);
 135
 136 #if defined(__powerpc64__) || defined(__powerpc__) || !defined(NUMA)
 137 static bool usedomains = false;
 138 #else
 139 static bool usedomains = true;
 140 #endif
 141 static void
 142 epoch_init(void *arg __unused)
 143 {
 144         int domain, cpu;
 145
 146         block_count = counter_u64_alloc(M_WAITOK);
 147         migrate_count = counter_u64_alloc(M_WAITOK);
 148         turnstile_count = counter_u64_alloc(M_WAITOK);
 149         switch_count = counter_u64_alloc(M_WAITOK);
 150         epoch_call_count = counter_u64_alloc(M_WAITOK);
 151         epoch_call_task_count = counter_u64_alloc(M_WAITOK);
 152         if (usedomains == false)
 153                 goto done;
 154         domain = 0;
 155         domoffsets[0] = 0;
 156         for (domain = 0; domain < vm_ndomains; domain++) {
 157                 domcount[domain] = CPU_COUNT(&cpuset_domain[domain]);
 158                 if (bootverbose)
 159                         printf("domcount[%d] %d\n", domain, domcount[domain]);
 160         }
 161         for (domain = 1; domain < vm_ndomains; domain++)
 162                 domoffsets[domain] = domoffsets[domain - 1] + domcount[domain - 1];
 163
 164         for (domain = 0; domain < vm_ndomains; domain++) {
 165                 if (domcount[domain] == 0) {
 166                         usedomains = false;
 167                         break;
 168                 }
 169         }
 170 done:
 171         CPU_FOREACH(cpu) {
 172                 GROUPTASK_INIT(DPCPU_ID_PTR(cpu, epoch_cb_task), 0, epoch_call_task, NULL);
 173                 taskqgroup_attach_cpu(qgroup_softirq, DPCPU_ID_PTR(cpu, epoch_cb_task), NULL, cpu, -1, "epoch call task");
 174         }
 175         inited = 1;
 176         global_epoch = epoch_alloc(0);
 177         global_epoch_preempt = epoch_alloc(EPOCH_PREEMPT);
 178 }
 179
 180 SYSINIT(epoch, SI_SUB_TASKQ + 1, SI_ORDER_FIRST, epoch_init, NULL);
 181
 182 static void
 183 epoch_init_numa(epoch_t epoch)
 184 {
 185         int domain, cpu_offset;
 186         struct epoch_pcpu_state *eps;
 187         epoch_record_t er;
 188
 189         for (domain = 0; domain < vm_ndomains; domain++) {
 190                 eps = malloc_domain(sizeof(*eps) * domcount[domain], M_EPOCH,
 191                     domain, M_ZERO | M_WAITOK);
 192                 epoch->e_pcpu_dom[domain] = eps;
 193                 cpu_offset = domoffsets[domain];
 194                 for (int i = 0; i < domcount[domain]; i++, eps++) {
 195                         epoch->e_pcpu[cpu_offset + i] = eps;
 196                         er = &eps->eps_record;
 197                         ck_epoch_register(&epoch->e_epoch, &er->er_record, NULL);
 198                         TAILQ_INIT((struct threadlist *)(uintptr_t)&er->er_tdlist);
 199                         er->er_cpuid = cpu_offset + i;
 200                 }
 201         }
 202 }
 203
 204 static void
 205 epoch_init_legacy(epoch_t epoch)
 206 {
 207         struct epoch_pcpu_state *eps;
 208         epoch_record_t er;
 209
 210         eps = malloc(sizeof(*eps) * mp_ncpus, M_EPOCH, M_ZERO | M_WAITOK);
 211         epoch->e_pcpu_dom[0] = eps;
 212         for (int i = 0; i < mp_ncpus; i++, eps++) {
 213                 epoch->e_pcpu[i] = eps;
 214                 er = &eps->eps_record;
 215                 ck_epoch_register(&epoch->e_epoch, &er->er_record, NULL);
 216                 TAILQ_INIT((struct threadlist *)(uintptr_t)&er->er_tdlist);
 217                 er->er_cpuid = i;
 218         }
 219 }
 220
 221 epoch_t
 222 epoch_alloc(int flags)
 223 {
 224         epoch_t epoch;
 225
 226         if (__predict_false(!inited))
 227                 panic("%s called too early in boot", __func__);
 228         epoch = malloc(sizeof(struct epoch) + mp_ncpus * sizeof(void *),
 229             M_EPOCH, M_ZERO | M_WAITOK);
 230         ck_epoch_init(&epoch->e_epoch);
 231         if (usedomains)
 232                 epoch_init_numa(epoch);
 233         else
 234                 epoch_init_legacy(epoch);
 235         MPASS(epoch_count < MAX_EPOCHS - 2);
 236         epoch->e_flags = flags;
 237         epoch->e_idx = epoch_count;
 238         allepochs[epoch_count++] = epoch;
 239         return (epoch);
 240 }
 241
 242 void
 243 epoch_free(epoch_t epoch)
 244 {
 245         int domain;
 246 #ifdef INVARIANTS
 247         struct epoch_pcpu_state *eps;
 248         int cpu;
 249
 250         CPU_FOREACH(cpu) {
 251                 eps = epoch->e_pcpu[cpu];
 252                 MPASS(TAILQ_EMPTY(&eps->eps_record.er_tdlist));
 253         }
 254 #endif
 255         allepochs[epoch->e_idx] = NULL;
 256         epoch_wait(global_epoch);
 257         if (usedomains)
 258                 for (domain = 0; domain < vm_ndomains; domain++)
 259                         free_domain(epoch->e_pcpu_dom[domain], M_EPOCH);
 260         else
 261                 free(epoch->e_pcpu_dom[0], M_EPOCH);
 262         free(epoch, M_EPOCH);
 263 }
 264
 265 #define INIT_CHECK(epoch)                                                               \
 266         do {                                                                                    \
 267                 if (__predict_false((epoch) == NULL))           \
 268                         return;                                                                 \
 269         } while (0)
 270
 271 void
 272 epoch_enter_preempt_internal(epoch_t epoch, struct thread *td)
 273 {
 274         struct epoch_pcpu_state *eps;
 275
 276         MPASS(cold || epoch != NULL);
 277         INIT_CHECK(epoch);
 278         MPASS(epoch->e_flags & EPOCH_PREEMPT);
 279         critical_enter();
 280         td->td_pre_epoch_prio = td->td_priority;
 281         eps = epoch->e_pcpu[curcpu];
 282 #ifdef INVARIANTS
 283         MPASS(td->td_epochnest < UCHAR_MAX - 2);
 284         if (td->td_epochnest > 1) {
 285                 struct thread *curtd;
 286                 int found = 0;
 287
 288                 TAILQ_FOREACH(curtd, &eps->eps_record.er_tdlist, td_epochq)
 289                     if (curtd == td)
 290                         found = 1;
 291                 KASSERT(found, ("recursing on a second epoch"));
 292                 critical_exit();
 293                 return;
 294         }
 295 #endif
 296         TAILQ_INSERT_TAIL(&eps->eps_record.er_tdlist, td, td_epochq);
 297         sched_pin();
 298         ck_epoch_begin(&eps->eps_record.er_record, (ck_epoch_section_t *)&td->td_epoch_section);
 299         critical_exit();
 300 }
 301
 302
 303 void
 304 epoch_enter(epoch_t epoch)
 305 {
 306         ck_epoch_record_t *record;
 307         struct thread *td;
 308
 309         MPASS(cold || epoch != NULL);
 310         td = curthread;
 311
 312         critical_enter();
 313         td->td_epochnest++;
 314         record = &epoch->e_pcpu[curcpu]->eps_record.er_record;
 315         ck_epoch_begin(record, NULL);
 316 }
 317
 318 void
 319 epoch_exit_preempt_internal(epoch_t epoch, struct thread *td)
 320 {
 321         struct epoch_pcpu_state *eps;
 322
 323         MPASS(td->td_epochnest == 0);
 324         INIT_CHECK(epoch);
 325         critical_enter();
 326         eps = epoch->e_pcpu[curcpu];
 327
 328         MPASS(epoch->e_flags & EPOCH_PREEMPT);
 329         ck_epoch_end(&eps->eps_record.er_record, (ck_epoch_section_t *)&td->td_epoch_section);
 330         TAILQ_REMOVE(&eps->eps_record.er_tdlist, td, td_epochq);
 331         eps->eps_record.er_gen++;
 332         sched_unpin();
 333         if (__predict_false(td->td_pre_epoch_prio != td->td_priority)) {
 334                 thread_lock(td);
 335                 sched_prio(td, td->td_pre_epoch_prio);
 336                 thread_unlock(td);
 337         }
 338         critical_exit();
 339 }
 340
 341 void
 342 epoch_exit(epoch_t epoch)
 343 {
 344         ck_epoch_record_t *record;
 345         struct thread *td;
 346
 347         td = curthread;
 348         td->td_epochnest--;
 349         record = &epoch->e_pcpu[curcpu]->eps_record.er_record;
 350         ck_epoch_end(record, NULL);
 351         critical_exit();
 352 }
 353
 354 /*
 355  * epoch_block_handler_preempt is a callback from the ck code when another thread is
 356  * currently in an epoch section.
 357  */
 358 static void
 359 epoch_block_handler_preempt(struct ck_epoch *global __unused, ck_epoch_record_t *cr,
 360     void *arg __unused)
 361 {
 362         epoch_record_t record;
 363         struct thread *td, *tdwait, *owner;
 364         struct turnstile *ts;
 365         struct lock_object *lock;
 366         int spincount, gen;
 367
 368         record = __containerof(cr, struct epoch_record, er_record);
 369         td = curthread;
 370         spincount = 0;
 371         counter_u64_add(block_count, 1);
 372         if (record->er_cpuid != curcpu) {
 373                 /*
 374                  * If the head of the list is running, we can wait for it
 375                  * to remove itself from the list and thus save us the
 376                  * overhead of a migration
 377                  */
 378                 if ((tdwait = TAILQ_FIRST(&record->er_tdlist)) != NULL &&
 379                     TD_IS_RUNNING(tdwait)) {
 380                         gen = record->er_gen;
 381                         thread_unlock(td);
 382                         do {
 383                                 cpu_spinwait();
 384                         } while (tdwait == TAILQ_FIRST(&record->er_tdlist) &&
 385                             gen == record->er_gen && TD_IS_RUNNING(tdwait) &&
 386                             spincount++ < MAX_ADAPTIVE_SPIN);
 387                         thread_lock(td);
 388                         return;
 389                 }
 390                 /*
 391                  * Being on the same CPU as that of the record on which
 392                  * we need to wait allows us access to the thread
 393                  * list associated with that CPU. We can then examine the
 394                  * oldest thread in the queue and wait on its turnstile
 395                  * until it resumes and so on until a grace period
 396                  * elapses.
 397                  *
 398                  */
 399                 counter_u64_add(migrate_count, 1);
 400                 sched_bind(td, record->er_cpuid);
 401                 /*
 402                  * At this point we need to return to the ck code
 403                  * to scan to see if a grace period has elapsed.
 404                  * We can't move on to check the thread list, because
 405                  * in the meantime new threads may have arrived that
 406                  * in fact belong to a different epoch.
 407                  */
 408                 return;
 409         }
 410         /*
 411          * Try to find a thread in an epoch section on this CPU
 412          * waiting on a turnstile. Otherwise find the lowest
 413          * priority thread (highest prio value) and drop our priority
 414          * to match to allow it to run.
 415          */
 416         TAILQ_FOREACH(tdwait, &record->er_tdlist, td_epochq) {
 417                 /*
 418                  * Propagate our priority to any other waiters to prevent us
 419                  * from starving them. They will have their original priority
 420                  * restore on exit from epoch_wait().
 421                  */
 422                 if (!TD_IS_INHIBITED(tdwait) && tdwait->td_priority > td->td_priority) {
 423                         critical_enter();
 424                         thread_unlock(td);
 425                         thread_lock(tdwait);
 426                         sched_prio(tdwait, td->td_priority);
 427                         thread_unlock(tdwait);
 428                         thread_lock(td);
 429                         critical_exit();
 430                 }
 431                 if (TD_IS_INHIBITED(tdwait) && TD_ON_LOCK(tdwait) &&
 432                     ((ts = tdwait->td_blocked) != NULL)) {
 433                         /*
 434                          * We unlock td to allow turnstile_wait to reacquire the
 435                          * the thread lock. Before unlocking it we enter a critical
 436                          * section to prevent preemption after we reenable interrupts
 437                          * by dropping the thread lock in order to prevent tdwait
 438                          * from getting to run.
 439                          */
 440                         critical_enter();
 441                         thread_unlock(td);
 442                         owner = turnstile_lock(ts, &lock);
 443                         /*
 444                          * The owner pointer indicates that the lock succeeded. Only
 445                          * in case we hold the lock and the turnstile we locked is still
 446                          * the one that tdwait is blocked on can we continue. Otherwise
 447                          * The turnstile pointer has been changed out from underneath
 448                          * us, as in the case where the lock holder has signalled tdwait,
 449                          * and we need to continue.
 450                          */
 451                         if (owner != NULL && ts == tdwait->td_blocked) {
 452                                 MPASS(TD_IS_INHIBITED(tdwait) && TD_ON_LOCK(tdwait));
 453                                 critical_exit();
 454                                 turnstile_wait(ts, owner, tdwait->td_tsqueue);
 455                                 counter_u64_add(turnstile_count, 1);
 456                                 thread_lock(td);
 457                                 return;
 458                         } else if (owner != NULL)
 459                                 turnstile_unlock(ts, lock);
 460                         thread_lock(td);
 461                         critical_exit();
 462                         KASSERT(td->td_locks == 0,
 463                             ("%d locks held", td->td_locks));
 464                 }
 465         }
 466         /*
 467          * We didn't find any threads actually blocked on a lock
 468          * so we have nothing to do except context switch away.
 469          */
 470         counter_u64_add(switch_count, 1);
 471         mi_switch(SW_VOL | SWT_RELINQUISH, NULL);
 472
 473         /*
 474          * Release the thread lock while yielding to
 475          * allow other threads to acquire the lock
 476          * pointed to by TDQ_LOCKPTR(td). Else a
 477          * deadlock like situation might happen. (HPS)
 478          */
 479         thread_unlock(td);
 480         thread_lock(td);
 481 }
 482
 483 void
 484 epoch_wait_preempt(epoch_t epoch)
 485 {
 486         struct thread *td;
 487         int was_bound;
 488         int old_cpu;
 489         int old_pinned;
 490         u_char old_prio;
 491 #ifdef INVARIANTS
 492         int locks;
 493
 494         locks = curthread->td_locks;
 495 #endif
 496
 497         MPASS(cold || epoch != NULL);
 498         INIT_CHECK(epoch);
 499
 500         MPASS(epoch->e_flags & EPOCH_PREEMPT);
 501         WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 502             "epoch_wait() can sleep");
 503
 504         td = curthread;
 505         KASSERT(td->td_epochnest == 0, ("epoch_wait() in the middle of an epoch section"));
 506         thread_lock(td);
 507
 508         DROP_GIANT();
 509
 510         old_cpu = PCPU_GET(cpuid);
 511         old_pinned = td->td_pinned;
 512         old_prio = td->td_priority;
 513         was_bound = sched_is_bound(td);
 514         sched_unbind(td);
 515         td->td_pinned = 0;
 516         sched_bind(td, old_cpu);
 517
 518         ck_epoch_synchronize_wait(&epoch->e_epoch, epoch_block_handler_preempt, NULL);
 519
 520         /* restore CPU binding, if any */
 521         if (was_bound != 0) {
 522                 sched_bind(td, old_cpu);
 523         } else {
 524                 /* get thread back to initial CPU, if any */
 525                 if (old_pinned != 0)
 526                         sched_bind(td, old_cpu);
 527                 sched_unbind(td);
 528         }
 529         /* restore pinned after bind */
 530         td->td_pinned = old_pinned;
 531
 532         /* restore thread priority */
 533         sched_prio(td, old_prio);
 534         thread_unlock(td);
 535         PICKUP_GIANT();
 536         KASSERT(td->td_locks == locks,
 537             ("%d residual locks held", td->td_locks - locks));
 538 }
 539
 540 static void
 541 epoch_block_handler(struct ck_epoch *g __unused, ck_epoch_record_t *c __unused,
 542     void *arg __unused)
 543 {
 544         cpu_spinwait();
 545 }
 546
 547 void
 548 epoch_wait(epoch_t epoch)
 549 {
 550
 551         MPASS(cold || epoch != NULL);
 552         INIT_CHECK(epoch);
 553         MPASS(epoch->e_flags == 0);
 554         critical_enter();
 555         ck_epoch_synchronize_wait(&epoch->e_epoch, epoch_block_handler, NULL);
 556         critical_exit();
 557 }
 558
 559 void
 560 epoch_call(epoch_t epoch, epoch_context_t ctx, void (*callback) (epoch_context_t))
 561 {
 562         struct epoch_pcpu_state *eps;
 563         ck_epoch_entry_t *cb;
 564
 565         cb = (void *)ctx;
 566
 567         MPASS(callback);
 568         /* too early in boot to have epoch set up */
 569         if (__predict_false(epoch == NULL))
 570                 goto boottime;
 571
 572         critical_enter();
 573         *DPCPU_PTR(epoch_cb_count) += 1;
 574         eps = epoch->e_pcpu[curcpu];
 575         ck_epoch_call(&eps->eps_record.er_record, cb, (ck_epoch_cb_t *)callback);
 576         critical_exit();
 577         return;
 578 boottime:
 579         callback(ctx);
 580 }
 581
 582 static void
 583 epoch_call_task(void *arg __unused)
 584 {
 585         ck_stack_entry_t *cursor, *head, *next;
 586         ck_epoch_record_t *record;
 587         epoch_t epoch;
 588         ck_stack_t cb_stack;
 589         int i, npending, total;
 590
 591         ck_stack_init(&cb_stack);
 592         critical_enter();
 593         epoch_enter(global_epoch);
 594         for (total = i = 0; i < epoch_count; i++) {
 595                 if (__predict_false((epoch = allepochs[i]) == NULL))
 596                         continue;
 597                 record = &epoch->e_pcpu[curcpu]->eps_record.er_record;
 598                 if ((npending = record->n_pending) == 0)
 599                         continue;
 600                 ck_epoch_poll_deferred(record, &cb_stack);
 601                 total += npending - record->n_pending;
 602         }
 603         epoch_exit(global_epoch);
 604         *DPCPU_PTR(epoch_cb_count) -= total;
 605         critical_exit();
 606
 607         counter_u64_add(epoch_call_count, total);
 608         counter_u64_add(epoch_call_task_count, 1);
 609
 610         head = ck_stack_batch_pop_npsc(&cb_stack);
 611         for (cursor = head; cursor != NULL; cursor = next) {
 612                 struct ck_epoch_entry *entry =
 613                 ck_epoch_entry_container(cursor);
 614
 615                 next = CK_STACK_NEXT(cursor);
 616                 entry->function(entry);
 617         }
 618 }
 619
 620 int
 621 in_epoch(void)
 622 {
 623         return (curthread->td_epochnest != 0);
 624 }