sys/kern/subr_gtaskqueue.c

   1 /*-
   2  * Copyright (c) 2000 Doug Rabson
   3  * Copyright (c) 2014 Jeff Roberson
   4  * Copyright (c) 2016 Matthew Macy
   5  * All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26  * SUCH DAMAGE.
  27  */
  28
  29 #include <sys/cdefs.h>
  30 __FBSDID("$FreeBSD$");
  31
  32 #include <sys/param.h>
  33 #include <sys/systm.h>
  34 #include <sys/bus.h>
  35 #include <sys/cpuset.h>
  36 #include <sys/interrupt.h>
  37 #include <sys/kernel.h>
  38 #include <sys/kthread.h>
  39 #include <sys/libkern.h>
  40 #include <sys/limits.h>
  41 #include <sys/lock.h>
  42 #include <sys/malloc.h>
  43 #include <sys/mutex.h>
  44 #include <sys/proc.h>
  45 #include <sys/sched.h>
  46 #include <sys/smp.h>
  47 #include <sys/gtaskqueue.h>
  48 #include <sys/unistd.h>
  49 #include <machine/stdarg.h>
  50
  51 static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues");
  52 static void     gtaskqueue_thread_enqueue(void *);
  53 static void     gtaskqueue_thread_loop(void *arg);
  54 static int      task_is_running(struct gtaskqueue *queue, struct gtask *gtask);
  55 static void     gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask);
  56
  57 TASKQGROUP_DEFINE(softirq, mp_ncpus, 1);
  58 TASKQGROUP_DEFINE(config, 1, 1);
  59
  60 struct gtaskqueue_busy {
  61         struct gtask    *tb_running;
  62         TAILQ_ENTRY(gtaskqueue_busy) tb_link;
  63 };
  64
  65 static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1;
  66
  67 typedef void (*gtaskqueue_enqueue_fn)(void *context);
  68
  69 struct gtaskqueue {
  70         STAILQ_HEAD(, gtask)    tq_queue;
  71         gtaskqueue_enqueue_fn   tq_enqueue;
  72         void                    *tq_context;
  73         char                    *tq_name;
  74         TAILQ_HEAD(, gtaskqueue_busy) tq_active;
  75         struct mtx              tq_mutex;
  76         struct thread           **tq_threads;
  77         int                     tq_tcount;
  78         int                     tq_spin;
  79         int                     tq_flags;
  80         int                     tq_callouts;
  81         taskqueue_callback_fn   tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
  82         void                    *tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
  83 };
  84
  85 #define TQ_FLAGS_ACTIVE         (1 << 0)
  86 #define TQ_FLAGS_BLOCKED        (1 << 1)
  87 #define TQ_FLAGS_UNLOCKED_ENQUEUE       (1 << 2)
  88
  89 #define DT_CALLOUT_ARMED        (1 << 0)
  90
  91 #define TQ_LOCK(tq)                                                     \
  92         do {                                                            \
  93                 if ((tq)->tq_spin)                                      \
  94                         mtx_lock_spin(&(tq)->tq_mutex);                 \
  95                 else                                                    \
  96                         mtx_lock(&(tq)->tq_mutex);                      \
  97         } while (0)
  98 #define TQ_ASSERT_LOCKED(tq)    mtx_assert(&(tq)->tq_mutex, MA_OWNED)
  99
 100 #define TQ_UNLOCK(tq)                                                   \
 101         do {                                                            \
 102                 if ((tq)->tq_spin)                                      \
 103                         mtx_unlock_spin(&(tq)->tq_mutex);               \
 104                 else                                                    \
 105                         mtx_unlock(&(tq)->tq_mutex);                    \
 106         } while (0)
 107 #define TQ_ASSERT_UNLOCKED(tq)  mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
 108
 109 #ifdef INVARIANTS
 110 static void
 111 gtask_dump(struct gtask *gtask)
 112 {
 113         printf("gtask: %p ta_flags=%x ta_priority=%d ta_func=%p ta_context=%p\n",
 114                gtask, gtask->ta_flags, gtask->ta_priority, gtask->ta_func, gtask->ta_context);
 115 }
 116 #endif
 117
 118 static __inline int
 119 TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
 120     int t)
 121 {
 122         if (tq->tq_spin)
 123                 return (msleep_spin(p, m, wm, t));
 124         return (msleep(p, m, pri, wm, t));
 125 }
 126
 127 static struct gtaskqueue *
 128 _gtaskqueue_create(const char *name, int mflags,
 129                  taskqueue_enqueue_fn enqueue, void *context,
 130                  int mtxflags, const char *mtxname __unused)
 131 {
 132         struct gtaskqueue *queue;
 133         char *tq_name;
 134
 135         tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO);
 136         if (!tq_name)
 137                 return (NULL);
 138
 139         snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
 140
 141         queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO);
 142         if (!queue) {
 143                 free(tq_name, M_GTASKQUEUE);
 144                 return (NULL);
 145         }
 146
 147         STAILQ_INIT(&queue->tq_queue);
 148         TAILQ_INIT(&queue->tq_active);
 149         queue->tq_enqueue = enqueue;
 150         queue->tq_context = context;
 151         queue->tq_name = tq_name;
 152         queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
 153         queue->tq_flags |= TQ_FLAGS_ACTIVE;
 154         if (enqueue == gtaskqueue_thread_enqueue)
 155                 queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
 156         mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
 157
 158         return (queue);
 159 }
 160
 161
 162 /*
 163  * Signal a taskqueue thread to terminate.
 164  */
 165 static void
 166 gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq)
 167 {
 168
 169         while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
 170                 wakeup(tq);
 171                 TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
 172         }
 173 }
 174
 175 static void
 176 gtaskqueue_free(struct gtaskqueue *queue)
 177 {
 178
 179         TQ_LOCK(queue);
 180         queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
 181         gtaskqueue_terminate(queue->tq_threads, queue);
 182         KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
 183         KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
 184         mtx_destroy(&queue->tq_mutex);
 185         free(queue->tq_threads, M_GTASKQUEUE);
 186         free(queue->tq_name, M_GTASKQUEUE);
 187         free(queue, M_GTASKQUEUE);
 188 }
 189
 190 /*
 191  * Wait for all to complete, then prevent it from being enqueued
 192  */
 193 void
 194 grouptask_block(struct grouptask *grouptask)
 195 {
 196         struct gtaskqueue *queue = grouptask->gt_taskqueue;
 197         struct gtask *gtask = &grouptask->gt_task;
 198
 199 #ifdef INVARIANTS
 200         if (queue == NULL) {
 201                 gtask_dump(gtask);
 202                 panic("queue == NULL");
 203         }
 204 #endif
 205         TQ_LOCK(queue);
 206         gtask->ta_flags |= TASK_NOENQUEUE;
 207         gtaskqueue_drain_locked(queue, gtask);
 208         TQ_UNLOCK(queue);
 209 }
 210
 211 void
 212 grouptask_unblock(struct grouptask *grouptask)
 213 {
 214         struct gtaskqueue *queue = grouptask->gt_taskqueue;
 215         struct gtask *gtask = &grouptask->gt_task;
 216
 217 #ifdef INVARIANTS
 218         if (queue == NULL) {
 219                 gtask_dump(gtask);
 220                 panic("queue == NULL");
 221         }
 222 #endif
 223         TQ_LOCK(queue);
 224         gtask->ta_flags &= ~TASK_NOENQUEUE;
 225         TQ_UNLOCK(queue);
 226 }
 227
 228 int
 229 grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
 230 {
 231 #ifdef INVARIANTS
 232         if (queue == NULL) {
 233                 gtask_dump(gtask);
 234                 panic("queue == NULL");
 235         }
 236 #endif
 237         TQ_LOCK(queue);
 238         if (gtask->ta_flags & TASK_ENQUEUED) {
 239                 TQ_UNLOCK(queue);
 240                 return (0);
 241         }
 242         if (gtask->ta_flags & TASK_NOENQUEUE) {
 243                 TQ_UNLOCK(queue);
 244                 return (EAGAIN);
 245         }
 246         STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link);
 247         gtask->ta_flags |= TASK_ENQUEUED;
 248         TQ_UNLOCK(queue);
 249         if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
 250                 queue->tq_enqueue(queue->tq_context);
 251         return (0);
 252 }
 253
 254 static void
 255 gtaskqueue_task_nop_fn(void *context)
 256 {
 257 }
 258
 259 /*
 260  * Block until all currently queued tasks in this taskqueue
 261  * have begun execution.  Tasks queued during execution of
 262  * this function are ignored.
 263  */
 264 static void
 265 gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
 266 {
 267         struct gtask t_barrier;
 268
 269         if (STAILQ_EMPTY(&queue->tq_queue))
 270                 return;
 271
 272         /*
 273          * Enqueue our barrier after all current tasks, but with
 274          * the highest priority so that newly queued tasks cannot
 275          * pass it.  Because of the high priority, we can not use
 276          * taskqueue_enqueue_locked directly (which drops the lock
 277          * anyway) so just insert it at tail while we have the
 278          * queue lock.
 279          */
 280         GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier);
 281         STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
 282         t_barrier.ta_flags |= TASK_ENQUEUED;
 283
 284         /*
 285          * Once the barrier has executed, all previously queued tasks
 286          * have completed or are currently executing.
 287          */
 288         while (t_barrier.ta_flags & TASK_ENQUEUED)
 289                 TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
 290 }
 291
 292 /*
 293  * Block until all currently executing tasks for this taskqueue
 294  * complete.  Tasks that begin execution during the execution
 295  * of this function are ignored.
 296  */
 297 static void
 298 gtaskqueue_drain_tq_active(struct gtaskqueue *queue)
 299 {
 300         struct gtaskqueue_busy tb_marker, *tb_first;
 301
 302         if (TAILQ_EMPTY(&queue->tq_active))
 303                 return;
 304
 305         /* Block taskq_terminate().*/
 306         queue->tq_callouts++;
 307
 308         /*
 309          * Wait for all currently executing taskqueue threads
 310          * to go idle.
 311          */
 312         tb_marker.tb_running = TB_DRAIN_WAITER;
 313         TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
 314         while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
 315                 TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
 316         TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
 317
 318         /*
 319          * Wakeup any other drain waiter that happened to queue up
 320          * without any intervening active thread.
 321          */
 322         tb_first = TAILQ_FIRST(&queue->tq_active);
 323         if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
 324                 wakeup(tb_first);
 325
 326         /* Release taskqueue_terminate(). */
 327         queue->tq_callouts--;
 328         if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
 329                 wakeup_one(queue->tq_threads);
 330 }
 331
 332 void
 333 gtaskqueue_block(struct gtaskqueue *queue)
 334 {
 335
 336         TQ_LOCK(queue);
 337         queue->tq_flags |= TQ_FLAGS_BLOCKED;
 338         TQ_UNLOCK(queue);
 339 }
 340
 341 void
 342 gtaskqueue_unblock(struct gtaskqueue *queue)
 343 {
 344
 345         TQ_LOCK(queue);
 346         queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
 347         if (!STAILQ_EMPTY(&queue->tq_queue))
 348                 queue->tq_enqueue(queue->tq_context);
 349         TQ_UNLOCK(queue);
 350 }
 351
 352 static void
 353 gtaskqueue_run_locked(struct gtaskqueue *queue)
 354 {
 355         struct gtaskqueue_busy tb;
 356         struct gtaskqueue_busy *tb_first;
 357         struct gtask *gtask;
 358
 359         KASSERT(queue != NULL, ("tq is NULL"));
 360         TQ_ASSERT_LOCKED(queue);
 361         tb.tb_running = NULL;
 362
 363         while (STAILQ_FIRST(&queue->tq_queue)) {
 364                 TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
 365
 366                 /*
 367                  * Carefully remove the first task from the queue and
 368                  * clear its TASK_ENQUEUED flag
 369                  */
 370                 gtask = STAILQ_FIRST(&queue->tq_queue);
 371                 KASSERT(gtask != NULL, ("task is NULL"));
 372                 STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
 373                 gtask->ta_flags &= ~TASK_ENQUEUED;
 374                 tb.tb_running = gtask;
 375                 TQ_UNLOCK(queue);
 376
 377                 KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL"));
 378                 gtask->ta_func(gtask->ta_context);
 379
 380                 TQ_LOCK(queue);
 381                 tb.tb_running = NULL;
 382                 wakeup(gtask);
 383
 384                 TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
 385                 tb_first = TAILQ_FIRST(&queue->tq_active);
 386                 if (tb_first != NULL &&
 387                     tb_first->tb_running == TB_DRAIN_WAITER)
 388                         wakeup(tb_first);
 389         }
 390 }
 391
 392 static int
 393 task_is_running(struct gtaskqueue *queue, struct gtask *gtask)
 394 {
 395         struct gtaskqueue_busy *tb;
 396
 397         TQ_ASSERT_LOCKED(queue);
 398         TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
 399                 if (tb->tb_running == gtask)
 400                         return (1);
 401         }
 402         return (0);
 403 }
 404
 405 static int
 406 gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask)
 407 {
 408
 409         if (gtask->ta_flags & TASK_ENQUEUED)
 410                 STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link);
 411         gtask->ta_flags &= ~TASK_ENQUEUED;
 412         return (task_is_running(queue, gtask) ? EBUSY : 0);
 413 }
 414
 415 int
 416 gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask)
 417 {
 418         int error;
 419
 420         TQ_LOCK(queue);
 421         error = gtaskqueue_cancel_locked(queue, gtask);
 422         TQ_UNLOCK(queue);
 423
 424         return (error);
 425 }
 426
 427 static void
 428 gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask)
 429 {
 430         while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask))
 431                 TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0);
 432 }
 433
 434 void
 435 gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask)
 436 {
 437
 438         if (!queue->tq_spin)
 439                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
 440
 441         TQ_LOCK(queue);
 442         gtaskqueue_drain_locked(queue, gtask);
 443         TQ_UNLOCK(queue);
 444 }
 445
 446 void
 447 gtaskqueue_drain_all(struct gtaskqueue *queue)
 448 {
 449
 450         if (!queue->tq_spin)
 451                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
 452
 453         TQ_LOCK(queue);
 454         gtaskqueue_drain_tq_queue(queue);
 455         gtaskqueue_drain_tq_active(queue);
 456         TQ_UNLOCK(queue);
 457 }
 458
 459 static int
 460 _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
 461     cpuset_t *mask, const char *name, va_list ap)
 462 {
 463         char ktname[MAXCOMLEN + 1];
 464         struct thread *td;
 465         struct gtaskqueue *tq;
 466         int i, error;
 467
 468         if (count <= 0)
 469                 return (EINVAL);
 470
 471         vsnprintf(ktname, sizeof(ktname), name, ap);
 472         tq = *tqp;
 473
 474         tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE,
 475             M_NOWAIT | M_ZERO);
 476         if (tq->tq_threads == NULL) {
 477                 printf("%s: no memory for %s threads\n", __func__, ktname);
 478                 return (ENOMEM);
 479         }
 480
 481         for (i = 0; i < count; i++) {
 482                 if (count == 1)
 483                         error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
 484                             &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
 485                 else
 486                         error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
 487                             &tq->tq_threads[i], RFSTOPPED, 0,
 488                             "%s_%d", ktname, i);
 489                 if (error) {
 490                         /* should be ok to continue, taskqueue_free will dtrt */
 491                         printf("%s: kthread_add(%s): error %d", __func__,
 492                             ktname, error);
 493                         tq->tq_threads[i] = NULL;               /* paranoid */
 494                 } else
 495                         tq->tq_tcount++;
 496         }
 497         for (i = 0; i < count; i++) {
 498                 if (tq->tq_threads[i] == NULL)
 499                         continue;
 500                 td = tq->tq_threads[i];
 501                 if (mask) {
 502                         error = cpuset_setthread(td->td_tid, mask);
 503                         /*
 504                          * Failing to pin is rarely an actual fatal error;
 505                          * it'll just affect performance.
 506                          */
 507                         if (error)
 508                                 printf("%s: curthread=%llu: can't pin; "
 509                                     "error=%d\n",
 510                                     __func__,
 511                                     (unsigned long long) td->td_tid,
 512                                     error);
 513                 }
 514                 thread_lock(td);
 515                 sched_prio(td, pri);
 516                 sched_add(td, SRQ_BORING);
 517                 thread_unlock(td);
 518         }
 519
 520         return (0);
 521 }
 522
 523 static int
 524 gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
 525     const char *name, ...)
 526 {
 527         va_list ap;
 528         int error;
 529
 530         va_start(ap, name);
 531         error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap);
 532         va_end(ap);
 533         return (error);
 534 }
 535
 536 static inline void
 537 gtaskqueue_run_callback(struct gtaskqueue *tq,
 538     enum taskqueue_callback_type cb_type)
 539 {
 540         taskqueue_callback_fn tq_callback;
 541
 542         TQ_ASSERT_UNLOCKED(tq);
 543         tq_callback = tq->tq_callbacks[cb_type];
 544         if (tq_callback != NULL)
 545                 tq_callback(tq->tq_cb_contexts[cb_type]);
 546 }
 547
 548 static void
 549 gtaskqueue_thread_loop(void *arg)
 550 {
 551         struct gtaskqueue **tqp, *tq;
 552
 553         tqp = arg;
 554         tq = *tqp;
 555         gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
 556         TQ_LOCK(tq);
 557         while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
 558                 /* XXX ? */
 559                 gtaskqueue_run_locked(tq);
 560                 /*
 561                  * Because taskqueue_run() can drop tq_mutex, we need to
 562                  * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
 563                  * meantime, which means we missed a wakeup.
 564                  */
 565                 if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
 566                         break;
 567                 TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
 568         }
 569         gtaskqueue_run_locked(tq);
 570         /*
 571          * This thread is on its way out, so just drop the lock temporarily
 572          * in order to call the shutdown callback.  This allows the callback
 573          * to look at the taskqueue, even just before it dies.
 574          */
 575         TQ_UNLOCK(tq);
 576         gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
 577         TQ_LOCK(tq);
 578
 579         /* rendezvous with thread that asked us to terminate */
 580         tq->tq_tcount--;
 581         wakeup_one(tq->tq_threads);
 582         TQ_UNLOCK(tq);
 583         kthread_exit();
 584 }
 585
 586 static void
 587 gtaskqueue_thread_enqueue(void *context)
 588 {
 589         struct gtaskqueue **tqp, *tq;
 590
 591         tqp = context;
 592         tq = *tqp;
 593         wakeup_one(tq);
 594 }
 595
 596
 597 static struct gtaskqueue *
 598 gtaskqueue_create_fast(const char *name, int mflags,
 599                  taskqueue_enqueue_fn enqueue, void *context)
 600 {
 601         return _gtaskqueue_create(name, mflags, enqueue, context,
 602                         MTX_SPIN, "fast_taskqueue");
 603 }
 604
 605
 606 struct taskqgroup_cpu {
 607         LIST_HEAD(, grouptask)  tgc_tasks;
 608         struct gtaskqueue       *tgc_taskq;
 609         int     tgc_cnt;
 610         int     tgc_cpu;
 611 };
 612
 613 struct taskqgroup {
 614         struct taskqgroup_cpu tqg_queue[MAXCPU];
 615         struct mtx      tqg_lock;
 616         const char *    tqg_name;
 617         int             tqg_adjusting;
 618         int             tqg_stride;
 619         int             tqg_cnt;
 620 };
 621
 622 struct taskq_bind_task {
 623         struct gtask bt_task;
 624         int     bt_cpuid;
 625 };
 626
 627 static void
 628 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu)
 629 {
 630         struct taskqgroup_cpu *qcpu;
 631
 632         qcpu = &qgroup->tqg_queue[idx];
 633         LIST_INIT(&qcpu->tgc_tasks);
 634         qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK,
 635             taskqueue_thread_enqueue, &qcpu->tgc_taskq);
 636         gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
 637             "%s_%d", qgroup->tqg_name, idx);
 638         qcpu->tgc_cpu = cpu;
 639 }
 640
 641 static void
 642 taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
 643 {
 644
 645         gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
 646 }
 647
 648 /*
 649  * Find the taskq with least # of tasks that doesn't currently have any
 650  * other queues from the uniq identifier.
 651  */
 652 static int
 653 taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
 654 {
 655         struct grouptask *n;
 656         int i, idx, mincnt;
 657         int strict;
 658
 659         mtx_assert(&qgroup->tqg_lock, MA_OWNED);
 660         if (qgroup->tqg_cnt == 0)
 661                 return (0);
 662         idx = -1;
 663         mincnt = INT_MAX;
 664         /*
 665          * Two passes;  First scan for a queue with the least tasks that
 666          * does not already service this uniq id.  If that fails simply find
 667          * the queue with the least total tasks;
 668          */
 669         for (strict = 1; mincnt == INT_MAX; strict = 0) {
 670                 for (i = 0; i < qgroup->tqg_cnt; i++) {
 671                         if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
 672                                 continue;
 673                         if (strict) {
 674                                 LIST_FOREACH(n,
 675                                     &qgroup->tqg_queue[i].tgc_tasks, gt_list)
 676                                         if (n->gt_uniq == uniq)
 677                                                 break;
 678                                 if (n != NULL)
 679                                         continue;
 680                         }
 681                         mincnt = qgroup->tqg_queue[i].tgc_cnt;
 682                         idx = i;
 683                 }
 684         }
 685         if (idx == -1)
 686                 panic("%s: failed to pick a qid.", __func__);
 687
 688         return (idx);
 689 }
 690
 691 /*
 692  * smp_started is unusable since it is not set for UP kernels or even for
 693  * SMP kernels when there is 1 CPU.  This is usually handled by adding a
 694  * (mp_ncpus == 1) test, but that would be broken here since we need to
 695  * to synchronize with the SI_SUB_SMP ordering.  Even in the pure SMP case
 696  * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP.
 697  *
 698  * So maintain our own flag.  It must be set after all CPUs are started
 699  * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed
 700  * adjustment is properly delayed.  SI_ORDER_FOURTH is clearly before
 701  * SI_ORDER_ANY and unclearly after the CPUs are started.  It would be
 702  * simpler for adjustment to pass a flag indicating if it is delayed.
 703  */
 704
 705 static int tqg_smp_started;
 706
 707 static void
 708 tqg_record_smp_started(void *arg)
 709 {
 710         tqg_smp_started = 1;
 711 }
 712
 713 SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
 714         tqg_record_smp_started, NULL);
 715
 716 void
 717 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
 718     void *uniq, int irq, const char *name)
 719 {
 720         cpuset_t mask;
 721         int qid, error;
 722
 723         gtask->gt_uniq = uniq;
 724         snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
 725         gtask->gt_irq = irq;
 726         gtask->gt_cpu = -1;
 727         mtx_lock(&qgroup->tqg_lock);
 728         qid = taskqgroup_find(qgroup, uniq);
 729         qgroup->tqg_queue[qid].tgc_cnt++;
 730         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
 731         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 732         if (irq != -1 && tqg_smp_started) {
 733                 gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu;
 734                 CPU_ZERO(&mask);
 735                 CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
 736                 mtx_unlock(&qgroup->tqg_lock);
 737                 error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
 738                 if (error)
 739                         printf("%s: binding interrupt failed for %s: %d\n",
 740                             __func__, gtask->gt_name, error);
 741         } else
 742                 mtx_unlock(&qgroup->tqg_lock);
 743 }
 744
 745 static void
 746 taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
 747 {
 748         cpuset_t mask;
 749         int qid, cpu, error;
 750
 751         mtx_lock(&qgroup->tqg_lock);
 752         qid = taskqgroup_find(qgroup, gtask->gt_uniq);
 753         cpu = qgroup->tqg_queue[qid].tgc_cpu;
 754         if (gtask->gt_irq != -1) {
 755                 mtx_unlock(&qgroup->tqg_lock);
 756
 757                 CPU_ZERO(&mask);
 758                 CPU_SET(cpu, &mask);
 759                 error = intr_setaffinity(gtask->gt_irq, CPU_WHICH_IRQ, &mask);
 760                 mtx_lock(&qgroup->tqg_lock);
 761                 if (error)
 762                         printf("%s: binding interrupt failed for %s: %d\n",
 763                             __func__, gtask->gt_name, error);
 764
 765         }
 766         qgroup->tqg_queue[qid].tgc_cnt++;
 767         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
 768         MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
 769         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 770         mtx_unlock(&qgroup->tqg_lock);
 771 }
 772
 773 int
 774 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
 775     void *uniq, int cpu, int irq, const char *name)
 776 {
 777         cpuset_t mask;
 778         int i, qid, error;
 779
 780         qid = -1;
 781         gtask->gt_uniq = uniq;
 782         snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
 783         gtask->gt_irq = irq;
 784         gtask->gt_cpu = cpu;
 785         mtx_lock(&qgroup->tqg_lock);
 786         if (tqg_smp_started) {
 787                 for (i = 0; i < qgroup->tqg_cnt; i++)
 788                         if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
 789                                 qid = i;
 790                                 break;
 791                         }
 792                 if (qid == -1) {
 793                         mtx_unlock(&qgroup->tqg_lock);
 794                         printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
 795                         return (EINVAL);
 796                 }
 797         } else
 798                 qid = 0;
 799         qgroup->tqg_queue[qid].tgc_cnt++;
 800         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
 801         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 802         cpu = qgroup->tqg_queue[qid].tgc_cpu;
 803         mtx_unlock(&qgroup->tqg_lock);
 804
 805         CPU_ZERO(&mask);
 806         CPU_SET(cpu, &mask);
 807         if (irq != -1 && tqg_smp_started) {
 808                 error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
 809                 if (error)
 810                         printf("%s: binding interrupt failed for %s: %d\n",
 811                             __func__, gtask->gt_name, error);
 812         }
 813         return (0);
 814 }
 815
 816 static int
 817 taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
 818 {
 819         cpuset_t mask;
 820         int i, qid, irq, cpu, error;
 821
 822         qid = -1;
 823         irq = gtask->gt_irq;
 824         cpu = gtask->gt_cpu;
 825         MPASS(tqg_smp_started);
 826         mtx_lock(&qgroup->tqg_lock);
 827         for (i = 0; i < qgroup->tqg_cnt; i++)
 828                 if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
 829                         qid = i;
 830                         break;
 831                 }
 832         if (qid == -1) {
 833                 mtx_unlock(&qgroup->tqg_lock);
 834                 printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
 835                 return (EINVAL);
 836         }
 837         qgroup->tqg_queue[qid].tgc_cnt++;
 838         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
 839         MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
 840         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 841         mtx_unlock(&qgroup->tqg_lock);
 842
 843         CPU_ZERO(&mask);
 844         CPU_SET(cpu, &mask);
 845
 846         if (irq != -1) {
 847                 error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
 848                 if (error)
 849                         printf("%s: binding interrupt failed for %s: %d\n",
 850                             __func__, gtask->gt_name, error);
 851         }
 852         return (0);
 853 }
 854
 855 void
 856 taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
 857 {
 858         int i;
 859
 860         grouptask_block(gtask);
 861         mtx_lock(&qgroup->tqg_lock);
 862         for (i = 0; i < qgroup->tqg_cnt; i++)
 863                 if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
 864                         break;
 865         if (i == qgroup->tqg_cnt)
 866                 panic("%s: task %s not in group", __func__, gtask->gt_name);
 867         qgroup->tqg_queue[i].tgc_cnt--;
 868         LIST_REMOVE(gtask, gt_list);
 869         mtx_unlock(&qgroup->tqg_lock);
 870         gtask->gt_taskqueue = NULL;
 871         gtask->gt_task.ta_flags &= ~TASK_NOENQUEUE;
 872 }
 873
 874 static void
 875 taskqgroup_binder(void *ctx)
 876 {
 877         struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
 878         cpuset_t mask;
 879         int error;
 880
 881         CPU_ZERO(&mask);
 882         CPU_SET(gtask->bt_cpuid, &mask);
 883         error = cpuset_setthread(curthread->td_tid, &mask);
 884         thread_lock(curthread);
 885         sched_bind(curthread, gtask->bt_cpuid);
 886         thread_unlock(curthread);
 887
 888         if (error)
 889                 printf("%s: binding curthread failed: %d\n", __func__, error);
 890         free(gtask, M_DEVBUF);
 891 }
 892
 893 static void
 894 taskqgroup_bind(struct taskqgroup *qgroup)
 895 {
 896         struct taskq_bind_task *gtask;
 897         int i;
 898
 899         /*
 900          * Bind taskqueue threads to specific CPUs, if they have been assigned
 901          * one.
 902          */
 903         if (qgroup->tqg_cnt == 1)
 904                 return;
 905
 906         for (i = 0; i < qgroup->tqg_cnt; i++) {
 907                 gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK);
 908                 GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
 909                 gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
 910                 grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
 911                     &gtask->bt_task);
 912         }
 913 }
 914
 915 static void
 916 taskqgroup_config_init(void *arg)
 917 {
 918         struct taskqgroup *qgroup = qgroup_config;
 919         LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
 920
 921         LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
 922             grouptask, gt_list);
 923         qgroup->tqg_queue[0].tgc_cnt = 0;
 924         taskqgroup_cpu_create(qgroup, 0, 0);
 925
 926         qgroup->tqg_cnt = 1;
 927         qgroup->tqg_stride = 1;
 928 }
 929
 930 SYSINIT(taskqgroup_config_init, SI_SUB_TASKQ, SI_ORDER_SECOND,
 931         taskqgroup_config_init, NULL);
 932
 933 static int
 934 _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
 935 {
 936         LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
 937         struct grouptask *gtask;
 938         int i, k, old_cnt, old_cpu, cpu;
 939
 940         mtx_assert(&qgroup->tqg_lock, MA_OWNED);
 941
 942         if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) {
 943                 printf("%s: failed cnt: %d stride: %d "
 944                     "mp_ncpus: %d tqg_smp_started: %d\n",
 945                     __func__, cnt, stride, mp_ncpus, tqg_smp_started);
 946                 return (EINVAL);
 947         }
 948         if (qgroup->tqg_adjusting) {
 949                 printf("%s failed: adjusting\n", __func__);
 950                 return (EBUSY);
 951         }
 952         qgroup->tqg_adjusting = 1;
 953         old_cnt = qgroup->tqg_cnt;
 954         old_cpu = 0;
 955         if (old_cnt < cnt)
 956                 old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu;
 957         mtx_unlock(&qgroup->tqg_lock);
 958         /*
 959          * Set up queue for tasks added before boot.
 960          */
 961         if (old_cnt == 0) {
 962                 LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
 963                     grouptask, gt_list);
 964                 qgroup->tqg_queue[0].tgc_cnt = 0;
 965         }
 966
 967         /*
 968          * If new taskq threads have been added.
 969          */
 970         cpu = old_cpu;
 971         for (i = old_cnt; i < cnt; i++) {
 972                 taskqgroup_cpu_create(qgroup, i, cpu);
 973
 974                 for (k = 0; k < stride; k++)
 975                         cpu = CPU_NEXT(cpu);
 976         }
 977         mtx_lock(&qgroup->tqg_lock);
 978         qgroup->tqg_cnt = cnt;
 979         qgroup->tqg_stride = stride;
 980
 981         /*
 982          * Adjust drivers to use new taskqs.
 983          */
 984         for (i = 0; i < old_cnt; i++) {
 985                 while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
 986                         LIST_REMOVE(gtask, gt_list);
 987                         qgroup->tqg_queue[i].tgc_cnt--;
 988                         LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
 989                 }
 990         }
 991         mtx_unlock(&qgroup->tqg_lock);
 992
 993         while ((gtask = LIST_FIRST(&gtask_head))) {
 994                 LIST_REMOVE(gtask, gt_list);
 995                 if (gtask->gt_cpu == -1)
 996                         taskqgroup_attach_deferred(qgroup, gtask);
 997                 else if (taskqgroup_attach_cpu_deferred(qgroup, gtask))
 998                         taskqgroup_attach_deferred(qgroup, gtask);
 999         }
1000
1001 #ifdef INVARIANTS
1002         mtx_lock(&qgroup->tqg_lock);
1003         for (i = 0; i < qgroup->tqg_cnt; i++) {
1004                 MPASS(qgroup->tqg_queue[i].tgc_taskq != NULL);
1005                 LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list)
1006                         MPASS(gtask->gt_taskqueue != NULL);
1007         }
1008         mtx_unlock(&qgroup->tqg_lock);
1009 #endif
1010         /*
1011          * If taskq thread count has been reduced.
1012          */
1013         for (i = cnt; i < old_cnt; i++)
1014                 taskqgroup_cpu_remove(qgroup, i);
1015
1016         taskqgroup_bind(qgroup);
1017
1018         mtx_lock(&qgroup->tqg_lock);
1019         qgroup->tqg_adjusting = 0;
1020
1021         return (0);
1022 }
1023
1024 int
1025 taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
1026 {
1027         int error;
1028
1029         mtx_lock(&qgroup->tqg_lock);
1030         error = _taskqgroup_adjust(qgroup, cnt, stride);
1031         mtx_unlock(&qgroup->tqg_lock);
1032
1033         return (error);
1034 }
1035
1036 struct taskqgroup *
1037 taskqgroup_create(const char *name)
1038 {
1039         struct taskqgroup *qgroup;
1040
1041         qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO);
1042         mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
1043         qgroup->tqg_name = name;
1044         LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
1045
1046         return (qgroup);
1047 }
1048
1049 void
1050 taskqgroup_destroy(struct taskqgroup *qgroup)
1051 {
1052
1053 }
1054
1055 void
1056 taskqgroup_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn,
1057     const char *name)
1058 {
1059
1060         GROUPTASK_INIT(gtask, 0, fn, ctx);
1061         taskqgroup_attach(qgroup_config, gtask, gtask, -1, name);
1062 }
1063
1064 void
1065 taskqgroup_config_gtask_deinit(struct grouptask *gtask)
1066 {
1067
1068         taskqgroup_detach(qgroup_config, gtask);
1069 }