sys/kern/subr_gtaskqueue.c

   1 /*-
   2  * Copyright (c) 2000 Doug Rabson
   3  * Copyright (c) 2014 Jeff Roberson
   4  * Copyright (c) 2016 Matthew Macy
   5  * All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26  * SUCH DAMAGE.
  27  */
  28
  29 #include <sys/cdefs.h>
  30 __FBSDID("$FreeBSD$");
  31
  32 #include <sys/param.h>
  33 #include <sys/systm.h>
  34 #include <sys/bus.h>
  35 #include <sys/cpuset.h>
  36 #include <sys/kernel.h>
  37 #include <sys/kthread.h>
  38 #include <sys/libkern.h>
  39 #include <sys/limits.h>
  40 #include <sys/lock.h>
  41 #include <sys/malloc.h>
  42 #include <sys/mutex.h>
  43 #include <sys/proc.h>
  44 #include <sys/epoch.h>
  45 #include <sys/sched.h>
  46 #include <sys/smp.h>
  47 #include <sys/gtaskqueue.h>
  48 #include <sys/unistd.h>
  49 #include <machine/stdarg.h>
  50
  51 static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues");
  52 static void     gtaskqueue_thread_enqueue(void *);
  53 static void     gtaskqueue_thread_loop(void *arg);
  54 static int      task_is_running(struct gtaskqueue *queue, struct gtask *gtask);
  55 static void     gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask);
  56
  57 TASKQGROUP_DEFINE(softirq, mp_ncpus, 1);
  58 TASKQGROUP_DEFINE(config, 1, 1);
  59
  60 struct gtaskqueue_busy {
  61         struct gtask            *tb_running;
  62         u_int                    tb_seq;
  63         LIST_ENTRY(gtaskqueue_busy) tb_link;
  64 };
  65
  66 typedef void (*gtaskqueue_enqueue_fn)(void *context);
  67
  68 struct gtaskqueue {
  69         STAILQ_HEAD(, gtask)    tq_queue;
  70         LIST_HEAD(, gtaskqueue_busy) tq_active;
  71         u_int                   tq_seq;
  72         int                     tq_callouts;
  73         struct mtx_padalign     tq_mutex;
  74         gtaskqueue_enqueue_fn   tq_enqueue;
  75         void                    *tq_context;
  76         char                    *tq_name;
  77         struct thread           **tq_threads;
  78         int                     tq_tcount;
  79         int                     tq_spin;
  80         int                     tq_flags;
  81         taskqueue_callback_fn   tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
  82         void                    *tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
  83 };
  84
  85 #define TQ_FLAGS_ACTIVE         (1 << 0)
  86 #define TQ_FLAGS_BLOCKED        (1 << 1)
  87 #define TQ_FLAGS_UNLOCKED_ENQUEUE       (1 << 2)
  88
  89 #define DT_CALLOUT_ARMED        (1 << 0)
  90
  91 #define TQ_LOCK(tq)                                                     \
  92         do {                                                            \
  93                 if ((tq)->tq_spin)                                      \
  94                         mtx_lock_spin(&(tq)->tq_mutex);                 \
  95                 else                                                    \
  96                         mtx_lock(&(tq)->tq_mutex);                      \
  97         } while (0)
  98 #define TQ_ASSERT_LOCKED(tq)    mtx_assert(&(tq)->tq_mutex, MA_OWNED)
  99
 100 #define TQ_UNLOCK(tq)                                                   \
 101         do {                                                            \
 102                 if ((tq)->tq_spin)                                      \
 103                         mtx_unlock_spin(&(tq)->tq_mutex);               \
 104                 else                                                    \
 105                         mtx_unlock(&(tq)->tq_mutex);                    \
 106         } while (0)
 107 #define TQ_ASSERT_UNLOCKED(tq)  mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
 108
 109 #ifdef INVARIANTS
 110 static void
 111 gtask_dump(struct gtask *gtask)
 112 {
 113         printf("gtask: %p ta_flags=%x ta_priority=%d ta_func=%p ta_context=%p\n",
 114                gtask, gtask->ta_flags, gtask->ta_priority, gtask->ta_func, gtask->ta_context);
 115 }
 116 #endif
 117
 118 static __inline int
 119 TQ_SLEEP(struct gtaskqueue *tq, void *p, const char *wm)
 120 {
 121         if (tq->tq_spin)
 122                 return (msleep_spin(p, (struct mtx *)&tq->tq_mutex, wm, 0));
 123         return (msleep(p, &tq->tq_mutex, 0, wm, 0));
 124 }
 125
 126 static struct gtaskqueue *
 127 _gtaskqueue_create(const char *name, int mflags,
 128                  taskqueue_enqueue_fn enqueue, void *context,
 129                  int mtxflags, const char *mtxname __unused)
 130 {
 131         struct gtaskqueue *queue;
 132         char *tq_name;
 133
 134         tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO);
 135         if (!tq_name)
 136                 return (NULL);
 137
 138         snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
 139
 140         queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO);
 141         if (!queue) {
 142                 free(tq_name, M_GTASKQUEUE);
 143                 return (NULL);
 144         }
 145
 146         STAILQ_INIT(&queue->tq_queue);
 147         LIST_INIT(&queue->tq_active);
 148         queue->tq_enqueue = enqueue;
 149         queue->tq_context = context;
 150         queue->tq_name = tq_name;
 151         queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
 152         queue->tq_flags |= TQ_FLAGS_ACTIVE;
 153         if (enqueue == gtaskqueue_thread_enqueue)
 154                 queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
 155         mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
 156
 157         return (queue);
 158 }
 159
 160 /*
 161  * Signal a taskqueue thread to terminate.
 162  */
 163 static void
 164 gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq)
 165 {
 166
 167         while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
 168                 wakeup(tq);
 169                 TQ_SLEEP(tq, pp, "gtq_destroy");
 170         }
 171 }
 172
 173 static void
 174 gtaskqueue_free(struct gtaskqueue *queue)
 175 {
 176
 177         TQ_LOCK(queue);
 178         queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
 179         gtaskqueue_terminate(queue->tq_threads, queue);
 180         KASSERT(LIST_EMPTY(&queue->tq_active), ("Tasks still running?"));
 181         KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
 182         mtx_destroy(&queue->tq_mutex);
 183         free(queue->tq_threads, M_GTASKQUEUE);
 184         free(queue->tq_name, M_GTASKQUEUE);
 185         free(queue, M_GTASKQUEUE);
 186 }
 187
 188 /*
 189  * Wait for all to complete, then prevent it from being enqueued
 190  */
 191 void
 192 grouptask_block(struct grouptask *grouptask)
 193 {
 194         struct gtaskqueue *queue = grouptask->gt_taskqueue;
 195         struct gtask *gtask = &grouptask->gt_task;
 196
 197 #ifdef INVARIANTS
 198         if (queue == NULL) {
 199                 gtask_dump(gtask);
 200                 panic("queue == NULL");
 201         }
 202 #endif
 203         TQ_LOCK(queue);
 204         gtask->ta_flags |= TASK_NOENQUEUE;
 205         gtaskqueue_drain_locked(queue, gtask);
 206         TQ_UNLOCK(queue);
 207 }
 208
 209 void
 210 grouptask_unblock(struct grouptask *grouptask)
 211 {
 212         struct gtaskqueue *queue = grouptask->gt_taskqueue;
 213         struct gtask *gtask = &grouptask->gt_task;
 214
 215 #ifdef INVARIANTS
 216         if (queue == NULL) {
 217                 gtask_dump(gtask);
 218                 panic("queue == NULL");
 219         }
 220 #endif
 221         TQ_LOCK(queue);
 222         gtask->ta_flags &= ~TASK_NOENQUEUE;
 223         TQ_UNLOCK(queue);
 224 }
 225
 226 int
 227 grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
 228 {
 229 #ifdef INVARIANTS
 230         if (queue == NULL) {
 231                 gtask_dump(gtask);
 232                 panic("queue == NULL");
 233         }
 234 #endif
 235         TQ_LOCK(queue);
 236         if (gtask->ta_flags & TASK_ENQUEUED) {
 237                 TQ_UNLOCK(queue);
 238                 return (0);
 239         }
 240         if (gtask->ta_flags & TASK_NOENQUEUE) {
 241                 TQ_UNLOCK(queue);
 242                 return (EAGAIN);
 243         }
 244         STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link);
 245         gtask->ta_flags |= TASK_ENQUEUED;
 246         TQ_UNLOCK(queue);
 247         if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
 248                 queue->tq_enqueue(queue->tq_context);
 249         return (0);
 250 }
 251
 252 static void
 253 gtaskqueue_task_nop_fn(void *context)
 254 {
 255 }
 256
 257 /*
 258  * Block until all currently queued tasks in this taskqueue
 259  * have begun execution.  Tasks queued during execution of
 260  * this function are ignored.
 261  */
 262 static void
 263 gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
 264 {
 265         struct gtask t_barrier;
 266
 267         if (STAILQ_EMPTY(&queue->tq_queue))
 268                 return;
 269
 270         /*
 271          * Enqueue our barrier after all current tasks, but with
 272          * the highest priority so that newly queued tasks cannot
 273          * pass it.  Because of the high priority, we can not use
 274          * taskqueue_enqueue_locked directly (which drops the lock
 275          * anyway) so just insert it at tail while we have the
 276          * queue lock.
 277          */
 278         GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier);
 279         STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
 280         t_barrier.ta_flags |= TASK_ENQUEUED;
 281
 282         /*
 283          * Once the barrier has executed, all previously queued tasks
 284          * have completed or are currently executing.
 285          */
 286         while (t_barrier.ta_flags & TASK_ENQUEUED)
 287                 TQ_SLEEP(queue, &t_barrier, "gtq_qdrain");
 288 }
 289
 290 /*
 291  * Block until all currently executing tasks for this taskqueue
 292  * complete.  Tasks that begin execution during the execution
 293  * of this function are ignored.
 294  */
 295 static void
 296 gtaskqueue_drain_tq_active(struct gtaskqueue *queue)
 297 {
 298         struct gtaskqueue_busy *tb;
 299         u_int seq;
 300
 301         if (LIST_EMPTY(&queue->tq_active))
 302                 return;
 303
 304         /* Block taskq_terminate().*/
 305         queue->tq_callouts++;
 306
 307         /* Wait for any active task with sequence from the past. */
 308         seq = queue->tq_seq;
 309 restart:
 310         LIST_FOREACH(tb, &queue->tq_active, tb_link) {
 311                 if ((int)(tb->tb_seq - seq) <= 0) {
 312                         TQ_SLEEP(queue, tb->tb_running, "gtq_adrain");
 313                         goto restart;
 314                 }
 315         }
 316
 317         /* Release taskqueue_terminate(). */
 318         queue->tq_callouts--;
 319         if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
 320                 wakeup_one(queue->tq_threads);
 321 }
 322
 323 void
 324 gtaskqueue_block(struct gtaskqueue *queue)
 325 {
 326
 327         TQ_LOCK(queue);
 328         queue->tq_flags |= TQ_FLAGS_BLOCKED;
 329         TQ_UNLOCK(queue);
 330 }
 331
 332 void
 333 gtaskqueue_unblock(struct gtaskqueue *queue)
 334 {
 335
 336         TQ_LOCK(queue);
 337         queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
 338         if (!STAILQ_EMPTY(&queue->tq_queue))
 339                 queue->tq_enqueue(queue->tq_context);
 340         TQ_UNLOCK(queue);
 341 }
 342
 343 static void
 344 gtaskqueue_run_locked(struct gtaskqueue *queue)
 345 {
 346         struct epoch_tracker et;
 347         struct gtaskqueue_busy tb;
 348         struct gtask *gtask;
 349         bool in_net_epoch;
 350
 351         KASSERT(queue != NULL, ("tq is NULL"));
 352         TQ_ASSERT_LOCKED(queue);
 353         tb.tb_running = NULL;
 354         LIST_INSERT_HEAD(&queue->tq_active, &tb, tb_link);
 355         in_net_epoch = false;
 356
 357         while ((gtask = STAILQ_FIRST(&queue->tq_queue)) != NULL) {
 358                 STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
 359                 gtask->ta_flags &= ~TASK_ENQUEUED;
 360                 tb.tb_running = gtask;
 361                 tb.tb_seq = ++queue->tq_seq;
 362                 TQ_UNLOCK(queue);
 363
 364                 KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL"));
 365                 if (!in_net_epoch && TASK_IS_NET(gtask)) {
 366                         in_net_epoch = true;
 367                         NET_EPOCH_ENTER(et);
 368                 } else if (in_net_epoch && !TASK_IS_NET(gtask)) {
 369                         NET_EPOCH_EXIT(et);
 370                         in_net_epoch = false;
 371                 }
 372                 gtask->ta_func(gtask->ta_context);
 373
 374                 TQ_LOCK(queue);
 375                 wakeup(gtask);
 376         }
 377         if (in_net_epoch)
 378                 NET_EPOCH_EXIT(et);
 379         LIST_REMOVE(&tb, tb_link);
 380 }
 381
 382 static int
 383 task_is_running(struct gtaskqueue *queue, struct gtask *gtask)
 384 {
 385         struct gtaskqueue_busy *tb;
 386
 387         TQ_ASSERT_LOCKED(queue);
 388         LIST_FOREACH(tb, &queue->tq_active, tb_link) {
 389                 if (tb->tb_running == gtask)
 390                         return (1);
 391         }
 392         return (0);
 393 }
 394
 395 static int
 396 gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask)
 397 {
 398
 399         if (gtask->ta_flags & TASK_ENQUEUED)
 400                 STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link);
 401         gtask->ta_flags &= ~TASK_ENQUEUED;
 402         return (task_is_running(queue, gtask) ? EBUSY : 0);
 403 }
 404
 405 int
 406 gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask)
 407 {
 408         int error;
 409
 410         TQ_LOCK(queue);
 411         error = gtaskqueue_cancel_locked(queue, gtask);
 412         TQ_UNLOCK(queue);
 413
 414         return (error);
 415 }
 416
 417 static void
 418 gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask)
 419 {
 420         while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask))
 421                 TQ_SLEEP(queue, gtask, "gtq_drain");
 422 }
 423
 424 void
 425 gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask)
 426 {
 427
 428         if (!queue->tq_spin)
 429                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
 430
 431         TQ_LOCK(queue);
 432         gtaskqueue_drain_locked(queue, gtask);
 433         TQ_UNLOCK(queue);
 434 }
 435
 436 void
 437 gtaskqueue_drain_all(struct gtaskqueue *queue)
 438 {
 439
 440         if (!queue->tq_spin)
 441                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
 442
 443         TQ_LOCK(queue);
 444         gtaskqueue_drain_tq_queue(queue);
 445         gtaskqueue_drain_tq_active(queue);
 446         TQ_UNLOCK(queue);
 447 }
 448
 449 static int
 450 _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
 451     cpuset_t *mask, const char *name, va_list ap)
 452 {
 453         char ktname[MAXCOMLEN + 1];
 454         struct thread *td;
 455         struct gtaskqueue *tq;
 456         int i, error;
 457
 458         if (count <= 0)
 459                 return (EINVAL);
 460
 461         vsnprintf(ktname, sizeof(ktname), name, ap);
 462         tq = *tqp;
 463
 464         tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE,
 465             M_NOWAIT | M_ZERO);
 466         if (tq->tq_threads == NULL) {
 467                 printf("%s: no memory for %s threads\n", __func__, ktname);
 468                 return (ENOMEM);
 469         }
 470
 471         for (i = 0; i < count; i++) {
 472                 if (count == 1)
 473                         error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
 474                             &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
 475                 else
 476                         error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
 477                             &tq->tq_threads[i], RFSTOPPED, 0,
 478                             "%s_%d", ktname, i);
 479                 if (error) {
 480                         /* should be ok to continue, taskqueue_free will dtrt */
 481                         printf("%s: kthread_add(%s): error %d", __func__,
 482                             ktname, error);
 483                         tq->tq_threads[i] = NULL;               /* paranoid */
 484                 } else
 485                         tq->tq_tcount++;
 486         }
 487         for (i = 0; i < count; i++) {
 488                 if (tq->tq_threads[i] == NULL)
 489                         continue;
 490                 td = tq->tq_threads[i];
 491                 if (mask) {
 492                         error = cpuset_setthread(td->td_tid, mask);
 493                         /*
 494                          * Failing to pin is rarely an actual fatal error;
 495                          * it'll just affect performance.
 496                          */
 497                         if (error)
 498                                 printf("%s: curthread=%llu: can't pin; "
 499                                     "error=%d\n",
 500                                     __func__,
 501                                     (unsigned long long) td->td_tid,
 502                                     error);
 503                 }
 504                 thread_lock(td);
 505                 sched_prio(td, pri);
 506                 sched_add(td, SRQ_BORING);
 507         }
 508
 509         return (0);
 510 }
 511
 512 static int
 513 gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
 514     const char *name, ...)
 515 {
 516         va_list ap;
 517         int error;
 518
 519         va_start(ap, name);
 520         error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap);
 521         va_end(ap);
 522         return (error);
 523 }
 524
 525 static inline void
 526 gtaskqueue_run_callback(struct gtaskqueue *tq,
 527     enum taskqueue_callback_type cb_type)
 528 {
 529         taskqueue_callback_fn tq_callback;
 530
 531         TQ_ASSERT_UNLOCKED(tq);
 532         tq_callback = tq->tq_callbacks[cb_type];
 533         if (tq_callback != NULL)
 534                 tq_callback(tq->tq_cb_contexts[cb_type]);
 535 }
 536
 537 static void
 538 gtaskqueue_thread_loop(void *arg)
 539 {
 540         struct gtaskqueue **tqp, *tq;
 541
 542         tqp = arg;
 543         tq = *tqp;
 544         gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
 545         TQ_LOCK(tq);
 546         while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
 547                 /* XXX ? */
 548                 gtaskqueue_run_locked(tq);
 549                 /*
 550                  * Because taskqueue_run() can drop tq_mutex, we need to
 551                  * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
 552                  * meantime, which means we missed a wakeup.
 553                  */
 554                 if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
 555                         break;
 556                 TQ_SLEEP(tq, tq, "-");
 557         }
 558         gtaskqueue_run_locked(tq);
 559         /*
 560          * This thread is on its way out, so just drop the lock temporarily
 561          * in order to call the shutdown callback.  This allows the callback
 562          * to look at the taskqueue, even just before it dies.
 563          */
 564         TQ_UNLOCK(tq);
 565         gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
 566         TQ_LOCK(tq);
 567
 568         /* rendezvous with thread that asked us to terminate */
 569         tq->tq_tcount--;
 570         wakeup_one(tq->tq_threads);
 571         TQ_UNLOCK(tq);
 572         kthread_exit();
 573 }
 574
 575 static void
 576 gtaskqueue_thread_enqueue(void *context)
 577 {
 578         struct gtaskqueue **tqp, *tq;
 579
 580         tqp = context;
 581         tq = *tqp;
 582         wakeup_any(tq);
 583 }
 584
 585 static struct gtaskqueue *
 586 gtaskqueue_create_fast(const char *name, int mflags,
 587                  taskqueue_enqueue_fn enqueue, void *context)
 588 {
 589         return _gtaskqueue_create(name, mflags, enqueue, context,
 590                         MTX_SPIN, "fast_taskqueue");
 591 }
 592
 593 struct taskqgroup_cpu {
 594         LIST_HEAD(, grouptask)  tgc_tasks;
 595         struct gtaskqueue       *tgc_taskq;
 596         int     tgc_cnt;
 597         int     tgc_cpu;
 598 };
 599
 600 struct taskqgroup {
 601         struct taskqgroup_cpu tqg_queue[MAXCPU];
 602         struct mtx      tqg_lock;
 603         const char *    tqg_name;
 604         int             tqg_adjusting;
 605         int             tqg_stride;
 606         int             tqg_cnt;
 607 };
 608
 609 struct taskq_bind_task {
 610         struct gtask bt_task;
 611         int     bt_cpuid;
 612 };
 613
 614 static void
 615 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu)
 616 {
 617         struct taskqgroup_cpu *qcpu;
 618
 619         qcpu = &qgroup->tqg_queue[idx];
 620         LIST_INIT(&qcpu->tgc_tasks);
 621         qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK,
 622             taskqueue_thread_enqueue, &qcpu->tgc_taskq);
 623         gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
 624             "%s_%d", qgroup->tqg_name, idx);
 625         qcpu->tgc_cpu = cpu;
 626 }
 627
 628 static void
 629 taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
 630 {
 631
 632         gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
 633 }
 634
 635 /*
 636  * Find the taskq with least # of tasks that doesn't currently have any
 637  * other queues from the uniq identifier.
 638  */
 639 static int
 640 taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
 641 {
 642         struct grouptask *n;
 643         int i, idx, mincnt;
 644         int strict;
 645
 646         mtx_assert(&qgroup->tqg_lock, MA_OWNED);
 647         if (qgroup->tqg_cnt == 0)
 648                 return (0);
 649         idx = -1;
 650         mincnt = INT_MAX;
 651         /*
 652          * Two passes;  First scan for a queue with the least tasks that
 653          * does not already service this uniq id.  If that fails simply find
 654          * the queue with the least total tasks;
 655          */
 656         for (strict = 1; mincnt == INT_MAX; strict = 0) {
 657                 for (i = 0; i < qgroup->tqg_cnt; i++) {
 658                         if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
 659                                 continue;
 660                         if (strict) {
 661                                 LIST_FOREACH(n,
 662                                     &qgroup->tqg_queue[i].tgc_tasks, gt_list)
 663                                         if (n->gt_uniq == uniq)
 664                                                 break;
 665                                 if (n != NULL)
 666                                         continue;
 667                         }
 668                         mincnt = qgroup->tqg_queue[i].tgc_cnt;
 669                         idx = i;
 670                 }
 671         }
 672         if (idx == -1)
 673                 panic("%s: failed to pick a qid.", __func__);
 674
 675         return (idx);
 676 }
 677
 678 /*
 679  * smp_started is unusable since it is not set for UP kernels or even for
 680  * SMP kernels when there is 1 CPU.  This is usually handled by adding a
 681  * (mp_ncpus == 1) test, but that would be broken here since we need to
 682  * to synchronize with the SI_SUB_SMP ordering.  Even in the pure SMP case
 683  * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP.
 684  *
 685  * So maintain our own flag.  It must be set after all CPUs are started
 686  * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed
 687  * adjustment is properly delayed.  SI_ORDER_FOURTH is clearly before
 688  * SI_ORDER_ANY and unclearly after the CPUs are started.  It would be
 689  * simpler for adjustment to pass a flag indicating if it is delayed.
 690  */
 691
 692 static int tqg_smp_started;
 693
 694 static void
 695 tqg_record_smp_started(void *arg)
 696 {
 697         tqg_smp_started = 1;
 698 }
 699
 700 SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
 701         tqg_record_smp_started, NULL);
 702
 703 void
 704 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
 705     void *uniq, device_t dev, struct resource *irq, const char *name)
 706 {
 707         int cpu, qid, error;
 708
 709         gtask->gt_uniq = uniq;
 710         snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
 711         gtask->gt_dev = dev;
 712         gtask->gt_irq = irq;
 713         gtask->gt_cpu = -1;
 714         mtx_lock(&qgroup->tqg_lock);
 715         qid = taskqgroup_find(qgroup, uniq);
 716         qgroup->tqg_queue[qid].tgc_cnt++;
 717         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
 718         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 719         if (dev != NULL && irq != NULL && tqg_smp_started) {
 720                 cpu = qgroup->tqg_queue[qid].tgc_cpu;
 721                 gtask->gt_cpu = cpu;
 722                 mtx_unlock(&qgroup->tqg_lock);
 723                 error = bus_bind_intr(dev, irq, cpu);
 724                 if (error)
 725                         printf("%s: binding interrupt failed for %s: %d\n",
 726                             __func__, gtask->gt_name, error);
 727         } else
 728                 mtx_unlock(&qgroup->tqg_lock);
 729 }
 730
 731 static void
 732 taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
 733 {
 734         int qid, cpu, error;
 735
 736         mtx_lock(&qgroup->tqg_lock);
 737         qid = taskqgroup_find(qgroup, gtask->gt_uniq);
 738         cpu = qgroup->tqg_queue[qid].tgc_cpu;
 739         if (gtask->gt_dev != NULL && gtask->gt_irq != NULL) {
 740                 mtx_unlock(&qgroup->tqg_lock);
 741                 error = bus_bind_intr(gtask->gt_dev, gtask->gt_irq, cpu);
 742                 mtx_lock(&qgroup->tqg_lock);
 743                 if (error)
 744                         printf("%s: binding interrupt failed for %s: %d\n",
 745                             __func__, gtask->gt_name, error);
 746
 747         }
 748         qgroup->tqg_queue[qid].tgc_cnt++;
 749         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
 750         MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
 751         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 752         mtx_unlock(&qgroup->tqg_lock);
 753 }
 754
 755 int
 756 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
 757     void *uniq, int cpu, device_t dev, struct resource *irq, const char *name)
 758 {
 759         int i, qid, error;
 760
 761         qid = -1;
 762         gtask->gt_uniq = uniq;
 763         snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
 764         gtask->gt_dev = dev;
 765         gtask->gt_irq = irq;
 766         gtask->gt_cpu = cpu;
 767         mtx_lock(&qgroup->tqg_lock);
 768         if (tqg_smp_started) {
 769                 for (i = 0; i < qgroup->tqg_cnt; i++)
 770                         if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
 771                                 qid = i;
 772                                 break;
 773                         }
 774                 if (qid == -1) {
 775                         mtx_unlock(&qgroup->tqg_lock);
 776                         printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
 777                         return (EINVAL);
 778                 }
 779         } else
 780                 qid = 0;
 781         qgroup->tqg_queue[qid].tgc_cnt++;
 782         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
 783         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 784         cpu = qgroup->tqg_queue[qid].tgc_cpu;
 785         mtx_unlock(&qgroup->tqg_lock);
 786
 787         if (dev != NULL && irq != NULL && tqg_smp_started) {
 788                 error = bus_bind_intr(dev, irq, cpu);
 789                 if (error)
 790                         printf("%s: binding interrupt failed for %s: %d\n",
 791                             __func__, gtask->gt_name, error);
 792         }
 793         return (0);
 794 }
 795
 796 static int
 797 taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
 798 {
 799         device_t dev;
 800         struct resource *irq;
 801         int cpu, error, i, qid;
 802
 803         qid = -1;
 804         dev = gtask->gt_dev;
 805         irq = gtask->gt_irq;
 806         cpu = gtask->gt_cpu;
 807         MPASS(tqg_smp_started);
 808         mtx_lock(&qgroup->tqg_lock);
 809         for (i = 0; i < qgroup->tqg_cnt; i++)
 810                 if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
 811                         qid = i;
 812                         break;
 813                 }
 814         if (qid == -1) {
 815                 mtx_unlock(&qgroup->tqg_lock);
 816                 printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
 817                 return (EINVAL);
 818         }
 819         qgroup->tqg_queue[qid].tgc_cnt++;
 820         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
 821         MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
 822         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 823         mtx_unlock(&qgroup->tqg_lock);
 824
 825         if (dev != NULL && irq != NULL) {
 826                 error = bus_bind_intr(dev, irq, cpu);
 827                 if (error)
 828                         printf("%s: binding interrupt failed for %s: %d\n",
 829                             __func__, gtask->gt_name, error);
 830         }
 831         return (0);
 832 }
 833
 834 void
 835 taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
 836 {
 837         int i;
 838
 839         grouptask_block(gtask);
 840         mtx_lock(&qgroup->tqg_lock);
 841         for (i = 0; i < qgroup->tqg_cnt; i++)
 842                 if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
 843                         break;
 844         if (i == qgroup->tqg_cnt)
 845                 panic("%s: task %s not in group", __func__, gtask->gt_name);
 846         qgroup->tqg_queue[i].tgc_cnt--;
 847         LIST_REMOVE(gtask, gt_list);
 848         mtx_unlock(&qgroup->tqg_lock);
 849         gtask->gt_taskqueue = NULL;
 850         gtask->gt_task.ta_flags &= ~TASK_NOENQUEUE;
 851 }
 852
 853 static void
 854 taskqgroup_binder(void *ctx)
 855 {
 856         struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
 857         cpuset_t mask;
 858         int error;
 859
 860         CPU_ZERO(&mask);
 861         CPU_SET(gtask->bt_cpuid, &mask);
 862         error = cpuset_setthread(curthread->td_tid, &mask);
 863         thread_lock(curthread);
 864         sched_bind(curthread, gtask->bt_cpuid);
 865         thread_unlock(curthread);
 866
 867         if (error)
 868                 printf("%s: binding curthread failed: %d\n", __func__, error);
 869         free(gtask, M_DEVBUF);
 870 }
 871
 872 static void
 873 taskqgroup_bind(struct taskqgroup *qgroup)
 874 {
 875         struct taskq_bind_task *gtask;
 876         int i;
 877
 878         /*
 879          * Bind taskqueue threads to specific CPUs, if they have been assigned
 880          * one.
 881          */
 882         if (qgroup->tqg_cnt == 1)
 883                 return;
 884
 885         for (i = 0; i < qgroup->tqg_cnt; i++) {
 886                 gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK);
 887                 GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
 888                 gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
 889                 grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
 890                     &gtask->bt_task);
 891         }
 892 }
 893
 894 static void
 895 taskqgroup_config_init(void *arg)
 896 {
 897         struct taskqgroup *qgroup = qgroup_config;
 898         LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
 899
 900         LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
 901             grouptask, gt_list);
 902         qgroup->tqg_queue[0].tgc_cnt = 0;
 903         taskqgroup_cpu_create(qgroup, 0, 0);
 904
 905         qgroup->tqg_cnt = 1;
 906         qgroup->tqg_stride = 1;
 907 }
 908
 909 SYSINIT(taskqgroup_config_init, SI_SUB_TASKQ, SI_ORDER_SECOND,
 910         taskqgroup_config_init, NULL);
 911
 912 static int
 913 _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
 914 {
 915         LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
 916         struct grouptask *gtask;
 917         int i, k, old_cnt, old_cpu, cpu;
 918
 919         mtx_assert(&qgroup->tqg_lock, MA_OWNED);
 920
 921         if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) {
 922                 printf("%s: failed cnt: %d stride: %d "
 923                     "mp_ncpus: %d tqg_smp_started: %d\n",
 924                     __func__, cnt, stride, mp_ncpus, tqg_smp_started);
 925                 return (EINVAL);
 926         }
 927         if (qgroup->tqg_adjusting) {
 928                 printf("%s failed: adjusting\n", __func__);
 929                 return (EBUSY);
 930         }
 931         qgroup->tqg_adjusting = 1;
 932         old_cnt = qgroup->tqg_cnt;
 933         old_cpu = 0;
 934         if (old_cnt < cnt)
 935                 old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu;
 936         mtx_unlock(&qgroup->tqg_lock);
 937         /*
 938          * Set up queue for tasks added before boot.
 939          */
 940         if (old_cnt == 0) {
 941                 LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
 942                     grouptask, gt_list);
 943                 qgroup->tqg_queue[0].tgc_cnt = 0;
 944         }
 945
 946         /*
 947          * If new taskq threads have been added.
 948          */
 949         cpu = old_cpu;
 950         for (i = old_cnt; i < cnt; i++) {
 951                 taskqgroup_cpu_create(qgroup, i, cpu);
 952
 953                 for (k = 0; k < stride; k++)
 954                         cpu = CPU_NEXT(cpu);
 955         }
 956         mtx_lock(&qgroup->tqg_lock);
 957         qgroup->tqg_cnt = cnt;
 958         qgroup->tqg_stride = stride;
 959
 960         /*
 961          * Adjust drivers to use new taskqs.
 962          */
 963         for (i = 0; i < old_cnt; i++) {
 964                 while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
 965                         LIST_REMOVE(gtask, gt_list);
 966                         qgroup->tqg_queue[i].tgc_cnt--;
 967                         LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
 968                 }
 969         }
 970         mtx_unlock(&qgroup->tqg_lock);
 971
 972         while ((gtask = LIST_FIRST(&gtask_head))) {
 973                 LIST_REMOVE(gtask, gt_list);
 974                 if (gtask->gt_cpu == -1)
 975                         taskqgroup_attach_deferred(qgroup, gtask);
 976                 else if (taskqgroup_attach_cpu_deferred(qgroup, gtask))
 977                         taskqgroup_attach_deferred(qgroup, gtask);
 978         }
 979
 980 #ifdef INVARIANTS
 981         mtx_lock(&qgroup->tqg_lock);
 982         for (i = 0; i < qgroup->tqg_cnt; i++) {
 983                 MPASS(qgroup->tqg_queue[i].tgc_taskq != NULL);
 984                 LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list)
 985                         MPASS(gtask->gt_taskqueue != NULL);
 986         }
 987         mtx_unlock(&qgroup->tqg_lock);
 988 #endif
 989         /*
 990          * If taskq thread count has been reduced.
 991          */
 992         for (i = cnt; i < old_cnt; i++)
 993                 taskqgroup_cpu_remove(qgroup, i);
 994
 995         taskqgroup_bind(qgroup);
 996
 997         mtx_lock(&qgroup->tqg_lock);
 998         qgroup->tqg_adjusting = 0;
 999
1000         return (0);
1001 }
1002
1003 int
1004 taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
1005 {
1006         int error;
1007
1008         mtx_lock(&qgroup->tqg_lock);
1009         error = _taskqgroup_adjust(qgroup, cnt, stride);
1010         mtx_unlock(&qgroup->tqg_lock);
1011
1012         return (error);
1013 }
1014
1015 struct taskqgroup *
1016 taskqgroup_create(const char *name)
1017 {
1018         struct taskqgroup *qgroup;
1019
1020         qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO);
1021         mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
1022         qgroup->tqg_name = name;
1023         LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
1024
1025         return (qgroup);
1026 }
1027
1028 void
1029 taskqgroup_destroy(struct taskqgroup *qgroup)
1030 {
1031
1032 }
1033
1034 void
1035 taskqgroup_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn,
1036     const char *name)
1037 {
1038
1039         GROUPTASK_INIT(gtask, 0, fn, ctx);
1040         taskqgroup_attach(qgroup_config, gtask, gtask, NULL, NULL, name);
1041 }
1042
1043 void
1044 taskqgroup_config_gtask_deinit(struct grouptask *gtask)
1045 {
1046
1047         taskqgroup_detach(qgroup_config, gtask);
1048 }