sys/kern/subr_gtaskqueue.c

   1 /*-
   2  * Copyright (c) 2000 Doug Rabson
   3  * Copyright (c) 2014 Jeff Roberson
   4  * Copyright (c) 2016 Matthew Macy
   5  * All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26  * SUCH DAMAGE.
  27  */
  28
  29 #include <sys/cdefs.h>
  30 __FBSDID("$FreeBSD$");
  31
  32 #include <sys/param.h>
  33 #include <sys/systm.h>
  34 #include <sys/bus.h>
  35 #include <sys/cpuset.h>
  36 #include <sys/interrupt.h>
  37 #include <sys/kernel.h>
  38 #include <sys/kthread.h>
  39 #include <sys/libkern.h>
  40 #include <sys/limits.h>
  41 #include <sys/lock.h>
  42 #include <sys/malloc.h>
  43 #include <sys/mutex.h>
  44 #include <sys/proc.h>
  45 #include <sys/sched.h>
  46 #include <sys/smp.h>
  47 #include <sys/gtaskqueue.h>
  48 #include <sys/unistd.h>
  49 #include <machine/stdarg.h>
  50
  51 static MALLOC_DEFINE(M_GTASKQUEUE, "taskqueue", "Task Queues");
  52 static void     gtaskqueue_thread_enqueue(void *);
  53 static void     gtaskqueue_thread_loop(void *arg);
  54
  55 TASKQGROUP_DEFINE(softirq, mp_ncpus, 1);
  56
  57 struct gtaskqueue_busy {
  58         struct gtask    *tb_running;
  59         TAILQ_ENTRY(gtaskqueue_busy) tb_link;
  60 };
  61
  62 static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1;
  63
  64 struct gtaskqueue {
  65         STAILQ_HEAD(, gtask)    tq_queue;
  66         gtaskqueue_enqueue_fn   tq_enqueue;
  67         void                    *tq_context;
  68         char                    *tq_name;
  69         TAILQ_HEAD(, gtaskqueue_busy) tq_active;
  70         struct mtx              tq_mutex;
  71         struct thread           **tq_threads;
  72         int                     tq_tcount;
  73         int                     tq_spin;
  74         int                     tq_flags;
  75         int                     tq_callouts;
  76         taskqueue_callback_fn   tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
  77         void                    *tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
  78 };
  79
  80 #define TQ_FLAGS_ACTIVE         (1 << 0)
  81 #define TQ_FLAGS_BLOCKED        (1 << 1)
  82 #define TQ_FLAGS_UNLOCKED_ENQUEUE       (1 << 2)
  83
  84 #define DT_CALLOUT_ARMED        (1 << 0)
  85
  86 #define TQ_LOCK(tq)                                                     \
  87         do {                                                            \
  88                 if ((tq)->tq_spin)                                      \
  89                         mtx_lock_spin(&(tq)->tq_mutex);                 \
  90                 else                                                    \
  91                         mtx_lock(&(tq)->tq_mutex);                      \
  92         } while (0)
  93 #define TQ_ASSERT_LOCKED(tq)    mtx_assert(&(tq)->tq_mutex, MA_OWNED)
  94
  95 #define TQ_UNLOCK(tq)                                                   \
  96         do {                                                            \
  97                 if ((tq)->tq_spin)                                      \
  98                         mtx_unlock_spin(&(tq)->tq_mutex);               \
  99                 else                                                    \
 100                         mtx_unlock(&(tq)->tq_mutex);                    \
 101         } while (0)
 102 #define TQ_ASSERT_UNLOCKED(tq)  mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
 103
 104 #ifdef INVARIANTS
 105 static void
 106 gtask_dump(struct gtask *gtask)
 107 {
 108         printf("gtask: %p ta_flags=%x ta_priority=%d ta_func=%p ta_context=%p\n",
 109                gtask, gtask->ta_flags, gtask->ta_priority, gtask->ta_func, gtask->ta_context);
 110 }
 111 #endif
 112
 113 static __inline int
 114 TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
 115     int t)
 116 {
 117         if (tq->tq_spin)
 118                 return (msleep_spin(p, m, wm, t));
 119         return (msleep(p, m, pri, wm, t));
 120 }
 121
 122 static struct gtaskqueue *
 123 _gtaskqueue_create(const char *name, int mflags,
 124                  taskqueue_enqueue_fn enqueue, void *context,
 125                  int mtxflags, const char *mtxname __unused)
 126 {
 127         struct gtaskqueue *queue;
 128         char *tq_name;
 129
 130         tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO);
 131         if (!tq_name)
 132                 return (NULL);
 133
 134         snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
 135
 136         queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO);
 137         if (!queue)
 138                 return (NULL);
 139
 140         STAILQ_INIT(&queue->tq_queue);
 141         TAILQ_INIT(&queue->tq_active);
 142         queue->tq_enqueue = enqueue;
 143         queue->tq_context = context;
 144         queue->tq_name = tq_name;
 145         queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
 146         queue->tq_flags |= TQ_FLAGS_ACTIVE;
 147         if (enqueue == gtaskqueue_thread_enqueue)
 148                 queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
 149         mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
 150
 151         return (queue);
 152 }
 153
 154
 155 /*
 156  * Signal a taskqueue thread to terminate.
 157  */
 158 static void
 159 gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq)
 160 {
 161
 162         while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
 163                 wakeup(tq);
 164                 TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
 165         }
 166 }
 167
 168 static void
 169 gtaskqueue_free(struct gtaskqueue *queue)
 170 {
 171
 172         TQ_LOCK(queue);
 173         queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
 174         gtaskqueue_terminate(queue->tq_threads, queue);
 175         KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
 176         KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
 177         mtx_destroy(&queue->tq_mutex);
 178         free(queue->tq_threads, M_GTASKQUEUE);
 179         free(queue->tq_name, M_GTASKQUEUE);
 180         free(queue, M_GTASKQUEUE);
 181 }
 182
 183 int
 184 grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
 185 {
 186 #ifdef INVARIANTS
 187         if (queue == NULL) {
 188                 gtask_dump(gtask);
 189                 panic("queue == NULL");
 190         }
 191 #endif
 192         TQ_LOCK(queue);
 193         if (gtask->ta_flags & TASK_ENQUEUED) {
 194                 TQ_UNLOCK(queue);
 195                 return (0);
 196         }
 197         STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link);
 198         gtask->ta_flags |= TASK_ENQUEUED;
 199         TQ_UNLOCK(queue);
 200         if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
 201                 queue->tq_enqueue(queue->tq_context);
 202         return (0);
 203 }
 204
 205 static void
 206 gtaskqueue_task_nop_fn(void *context)
 207 {
 208 }
 209
 210 /*
 211  * Block until all currently queued tasks in this taskqueue
 212  * have begun execution.  Tasks queued during execution of
 213  * this function are ignored.
 214  */
 215 static void
 216 gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
 217 {
 218         struct gtask t_barrier;
 219
 220         if (STAILQ_EMPTY(&queue->tq_queue))
 221                 return;
 222
 223         /*
 224          * Enqueue our barrier after all current tasks, but with
 225          * the highest priority so that newly queued tasks cannot
 226          * pass it.  Because of the high priority, we can not use
 227          * taskqueue_enqueue_locked directly (which drops the lock
 228          * anyway) so just insert it at tail while we have the
 229          * queue lock.
 230          */
 231         GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier);
 232         STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
 233         t_barrier.ta_flags |= TASK_ENQUEUED;
 234
 235         /*
 236          * Once the barrier has executed, all previously queued tasks
 237          * have completed or are currently executing.
 238          */
 239         while (t_barrier.ta_flags & TASK_ENQUEUED)
 240                 TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
 241 }
 242
 243 /*
 244  * Block until all currently executing tasks for this taskqueue
 245  * complete.  Tasks that begin execution during the execution
 246  * of this function are ignored.
 247  */
 248 static void
 249 gtaskqueue_drain_tq_active(struct gtaskqueue *queue)
 250 {
 251         struct gtaskqueue_busy tb_marker, *tb_first;
 252
 253         if (TAILQ_EMPTY(&queue->tq_active))
 254                 return;
 255
 256         /* Block taskq_terminate().*/
 257         queue->tq_callouts++;
 258
 259         /*
 260          * Wait for all currently executing taskqueue threads
 261          * to go idle.
 262          */
 263         tb_marker.tb_running = TB_DRAIN_WAITER;
 264         TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
 265         while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
 266                 TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
 267         TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
 268
 269         /*
 270          * Wakeup any other drain waiter that happened to queue up
 271          * without any intervening active thread.
 272          */
 273         tb_first = TAILQ_FIRST(&queue->tq_active);
 274         if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
 275                 wakeup(tb_first);
 276
 277         /* Release taskqueue_terminate(). */
 278         queue->tq_callouts--;
 279         if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
 280                 wakeup_one(queue->tq_threads);
 281 }
 282
 283 void
 284 gtaskqueue_block(struct gtaskqueue *queue)
 285 {
 286
 287         TQ_LOCK(queue);
 288         queue->tq_flags |= TQ_FLAGS_BLOCKED;
 289         TQ_UNLOCK(queue);
 290 }
 291
 292 void
 293 gtaskqueue_unblock(struct gtaskqueue *queue)
 294 {
 295
 296         TQ_LOCK(queue);
 297         queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
 298         if (!STAILQ_EMPTY(&queue->tq_queue))
 299                 queue->tq_enqueue(queue->tq_context);
 300         TQ_UNLOCK(queue);
 301 }
 302
 303 static void
 304 gtaskqueue_run_locked(struct gtaskqueue *queue)
 305 {
 306         struct gtaskqueue_busy tb;
 307         struct gtaskqueue_busy *tb_first;
 308         struct gtask *gtask;
 309
 310         KASSERT(queue != NULL, ("tq is NULL"));
 311         TQ_ASSERT_LOCKED(queue);
 312         tb.tb_running = NULL;
 313
 314         while (STAILQ_FIRST(&queue->tq_queue)) {
 315                 TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
 316
 317                 /*
 318                  * Carefully remove the first task from the queue and
 319                  * clear its TASK_ENQUEUED flag
 320                  */
 321                 gtask = STAILQ_FIRST(&queue->tq_queue);
 322                 KASSERT(gtask != NULL, ("task is NULL"));
 323                 STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
 324                 gtask->ta_flags &= ~TASK_ENQUEUED;
 325                 tb.tb_running = gtask;
 326                 TQ_UNLOCK(queue);
 327
 328                 KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL"));
 329                 gtask->ta_func(gtask->ta_context);
 330
 331                 TQ_LOCK(queue);
 332                 tb.tb_running = NULL;
 333                 wakeup(gtask);
 334
 335                 TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
 336                 tb_first = TAILQ_FIRST(&queue->tq_active);
 337                 if (tb_first != NULL &&
 338                     tb_first->tb_running == TB_DRAIN_WAITER)
 339                         wakeup(tb_first);
 340         }
 341 }
 342
 343 static int
 344 task_is_running(struct gtaskqueue *queue, struct gtask *gtask)
 345 {
 346         struct gtaskqueue_busy *tb;
 347
 348         TQ_ASSERT_LOCKED(queue);
 349         TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
 350                 if (tb->tb_running == gtask)
 351                         return (1);
 352         }
 353         return (0);
 354 }
 355
 356 static int
 357 gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask)
 358 {
 359
 360         if (gtask->ta_flags & TASK_ENQUEUED)
 361                 STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link);
 362         gtask->ta_flags &= ~TASK_ENQUEUED;
 363         return (task_is_running(queue, gtask) ? EBUSY : 0);
 364 }
 365
 366 int
 367 gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask)
 368 {
 369         int error;
 370
 371         TQ_LOCK(queue);
 372         error = gtaskqueue_cancel_locked(queue, gtask);
 373         TQ_UNLOCK(queue);
 374
 375         return (error);
 376 }
 377
 378 void
 379 gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask)
 380 {
 381
 382         if (!queue->tq_spin)
 383                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
 384
 385         TQ_LOCK(queue);
 386         while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask))
 387                 TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0);
 388         TQ_UNLOCK(queue);
 389 }
 390
 391 void
 392 gtaskqueue_drain_all(struct gtaskqueue *queue)
 393 {
 394
 395         if (!queue->tq_spin)
 396                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
 397
 398         TQ_LOCK(queue);
 399         gtaskqueue_drain_tq_queue(queue);
 400         gtaskqueue_drain_tq_active(queue);
 401         TQ_UNLOCK(queue);
 402 }
 403
 404 static int
 405 _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
 406     cpuset_t *mask, const char *name, va_list ap)
 407 {
 408         char ktname[MAXCOMLEN + 1];
 409         struct thread *td;
 410         struct gtaskqueue *tq;
 411         int i, error;
 412
 413         if (count <= 0)
 414                 return (EINVAL);
 415
 416         vsnprintf(ktname, sizeof(ktname), name, ap);
 417         tq = *tqp;
 418
 419         tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE,
 420             M_NOWAIT | M_ZERO);
 421         if (tq->tq_threads == NULL) {
 422                 printf("%s: no memory for %s threads\n", __func__, ktname);
 423                 return (ENOMEM);
 424         }
 425
 426         for (i = 0; i < count; i++) {
 427                 if (count == 1)
 428                         error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
 429                             &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
 430                 else
 431                         error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
 432                             &tq->tq_threads[i], RFSTOPPED, 0,
 433                             "%s_%d", ktname, i);
 434                 if (error) {
 435                         /* should be ok to continue, taskqueue_free will dtrt */
 436                         printf("%s: kthread_add(%s): error %d", __func__,
 437                             ktname, error);
 438                         tq->tq_threads[i] = NULL;               /* paranoid */
 439                 } else
 440                         tq->tq_tcount++;
 441         }
 442         for (i = 0; i < count; i++) {
 443                 if (tq->tq_threads[i] == NULL)
 444                         continue;
 445                 td = tq->tq_threads[i];
 446                 if (mask) {
 447                         error = cpuset_setthread(td->td_tid, mask);
 448                         /*
 449                          * Failing to pin is rarely an actual fatal error;
 450                          * it'll just affect performance.
 451                          */
 452                         if (error)
 453                                 printf("%s: curthread=%llu: can't pin; "
 454                                     "error=%d\n",
 455                                     __func__,
 456                                     (unsigned long long) td->td_tid,
 457                                     error);
 458                 }
 459                 thread_lock(td);
 460                 sched_prio(td, pri);
 461                 sched_add(td, SRQ_BORING);
 462                 thread_unlock(td);
 463         }
 464
 465         return (0);
 466 }
 467
 468 static int
 469 gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
 470     const char *name, ...)
 471 {
 472         va_list ap;
 473         int error;
 474
 475         va_start(ap, name);
 476         error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap);
 477         va_end(ap);
 478         return (error);
 479 }
 480
 481 static inline void
 482 gtaskqueue_run_callback(struct gtaskqueue *tq,
 483     enum taskqueue_callback_type cb_type)
 484 {
 485         taskqueue_callback_fn tq_callback;
 486
 487         TQ_ASSERT_UNLOCKED(tq);
 488         tq_callback = tq->tq_callbacks[cb_type];
 489         if (tq_callback != NULL)
 490                 tq_callback(tq->tq_cb_contexts[cb_type]);
 491 }
 492
 493 static void
 494 gtaskqueue_thread_loop(void *arg)
 495 {
 496         struct gtaskqueue **tqp, *tq;
 497
 498         tqp = arg;
 499         tq = *tqp;
 500         gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
 501         TQ_LOCK(tq);
 502         while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
 503                 /* XXX ? */
 504                 gtaskqueue_run_locked(tq);
 505                 /*
 506                  * Because taskqueue_run() can drop tq_mutex, we need to
 507                  * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
 508                  * meantime, which means we missed a wakeup.
 509                  */
 510                 if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
 511                         break;
 512                 TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
 513         }
 514         gtaskqueue_run_locked(tq);
 515         /*
 516          * This thread is on its way out, so just drop the lock temporarily
 517          * in order to call the shutdown callback.  This allows the callback
 518          * to look at the taskqueue, even just before it dies.
 519          */
 520         TQ_UNLOCK(tq);
 521         gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
 522         TQ_LOCK(tq);
 523
 524         /* rendezvous with thread that asked us to terminate */
 525         tq->tq_tcount--;
 526         wakeup_one(tq->tq_threads);
 527         TQ_UNLOCK(tq);
 528         kthread_exit();
 529 }
 530
 531 static void
 532 gtaskqueue_thread_enqueue(void *context)
 533 {
 534         struct gtaskqueue **tqp, *tq;
 535
 536         tqp = context;
 537         tq = *tqp;
 538         wakeup_one(tq);
 539 }
 540
 541
 542 static struct gtaskqueue *
 543 gtaskqueue_create_fast(const char *name, int mflags,
 544                  taskqueue_enqueue_fn enqueue, void *context)
 545 {
 546         return _gtaskqueue_create(name, mflags, enqueue, context,
 547                         MTX_SPIN, "fast_taskqueue");
 548 }
 549
 550
 551 struct taskqgroup_cpu {
 552         LIST_HEAD(, grouptask)  tgc_tasks;
 553         struct gtaskqueue       *tgc_taskq;
 554         int     tgc_cnt;
 555         int     tgc_cpu;
 556 };
 557
 558 struct taskqgroup {
 559         struct taskqgroup_cpu tqg_queue[MAXCPU];
 560         struct mtx      tqg_lock;
 561         char *          tqg_name;
 562         int             tqg_adjusting;
 563         int             tqg_stride;
 564         int             tqg_cnt;
 565 };
 566
 567 struct taskq_bind_task {
 568         struct gtask bt_task;
 569         int     bt_cpuid;
 570 };
 571
 572 static void
 573 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu)
 574 {
 575         struct taskqgroup_cpu *qcpu;
 576
 577         qcpu = &qgroup->tqg_queue[idx];
 578         LIST_INIT(&qcpu->tgc_tasks);
 579         qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK,
 580             taskqueue_thread_enqueue, &qcpu->tgc_taskq);
 581         gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
 582             "%s_%d", qgroup->tqg_name, idx);
 583         qcpu->tgc_cpu = cpu;
 584 }
 585
 586 static void
 587 taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
 588 {
 589
 590         gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
 591 }
 592
 593 /*
 594  * Find the taskq with least # of tasks that doesn't currently have any
 595  * other queues from the uniq identifier.
 596  */
 597 static int
 598 taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
 599 {
 600         struct grouptask *n;
 601         int i, idx, mincnt;
 602         int strict;
 603
 604         mtx_assert(&qgroup->tqg_lock, MA_OWNED);
 605         if (qgroup->tqg_cnt == 0)
 606                 return (0);
 607         idx = -1;
 608         mincnt = INT_MAX;
 609         /*
 610          * Two passes;  First scan for a queue with the least tasks that
 611          * does not already service this uniq id.  If that fails simply find
 612          * the queue with the least total tasks;
 613          */
 614         for (strict = 1; mincnt == INT_MAX; strict = 0) {
 615                 for (i = 0; i < qgroup->tqg_cnt; i++) {
 616                         if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
 617                                 continue;
 618                         if (strict) {
 619                                 LIST_FOREACH(n,
 620                                     &qgroup->tqg_queue[i].tgc_tasks, gt_list)
 621                                         if (n->gt_uniq == uniq)
 622                                                 break;
 623                                 if (n != NULL)
 624                                         continue;
 625                         }
 626                         mincnt = qgroup->tqg_queue[i].tgc_cnt;
 627                         idx = i;
 628                 }
 629         }
 630         if (idx == -1)
 631                 panic("taskqgroup_find: Failed to pick a qid.");
 632
 633         return (idx);
 634 }
 635
 636 /*
 637  * smp_started is unusable since it is not set for UP kernels or even for
 638  * SMP kernels when there is 1 CPU.  This is usually handled by adding a
 639  * (mp_ncpus == 1) test, but that would be broken here since we need to
 640  * to synchronize with the SI_SUB_SMP ordering.  Even in the pure SMP case
 641  * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP.
 642  *
 643  * So maintain our own flag.  It must be set after all CPUs are started
 644  * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed
 645  * adjustment is properly delayed.  SI_ORDER_FOURTH is clearly before
 646  * SI_ORDER_ANY and unclearly after the CPUs are started.  It would be
 647  * simpler for adjustment to pass a flag indicating if it is delayed.
 648  */
 649
 650 static int tqg_smp_started;
 651
 652 static void
 653 tqg_record_smp_started(void *arg)
 654 {
 655         tqg_smp_started = 1;
 656 }
 657
 658 SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
 659         tqg_record_smp_started, NULL);
 660
 661 void
 662 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
 663     void *uniq, int irq, char *name)
 664 {
 665         cpuset_t mask;
 666         int qid;
 667
 668         gtask->gt_uniq = uniq;
 669         gtask->gt_name = name;
 670         gtask->gt_irq = irq;
 671         gtask->gt_cpu = -1;
 672         mtx_lock(&qgroup->tqg_lock);
 673         qid = taskqgroup_find(qgroup, uniq);
 674         qgroup->tqg_queue[qid].tgc_cnt++;
 675         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
 676         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 677         if (irq != -1 && tqg_smp_started) {
 678                 gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu;
 679                 CPU_ZERO(&mask);
 680                 CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
 681                 mtx_unlock(&qgroup->tqg_lock);
 682                 intr_setaffinity(irq, &mask);
 683         } else
 684                 mtx_unlock(&qgroup->tqg_lock);
 685 }
 686
 687 static void
 688 taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
 689 {
 690         cpuset_t mask;
 691         int qid, cpu;
 692
 693         mtx_lock(&qgroup->tqg_lock);
 694         qid = taskqgroup_find(qgroup, gtask->gt_uniq);
 695         cpu = qgroup->tqg_queue[qid].tgc_cpu;
 696         if (gtask->gt_irq != -1) {
 697                 mtx_unlock(&qgroup->tqg_lock);
 698
 699                 CPU_ZERO(&mask);
 700                 CPU_SET(cpu, &mask);
 701                 intr_setaffinity(gtask->gt_irq, &mask);
 702
 703                 mtx_lock(&qgroup->tqg_lock);
 704         }
 705         qgroup->tqg_queue[qid].tgc_cnt++;
 706
 707         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask,
 708                          gt_list);
 709         MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
 710         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 711         mtx_unlock(&qgroup->tqg_lock);
 712 }
 713
 714 int
 715 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
 716         void *uniq, int cpu, int irq, char *name)
 717 {
 718         cpuset_t mask;
 719         int i, qid;
 720
 721         qid = -1;
 722         gtask->gt_uniq = uniq;
 723         gtask->gt_name = name;
 724         gtask->gt_irq = irq;
 725         gtask->gt_cpu = cpu;
 726         mtx_lock(&qgroup->tqg_lock);
 727         if (tqg_smp_started) {
 728                 for (i = 0; i < qgroup->tqg_cnt; i++)
 729                         if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
 730                                 qid = i;
 731                                 break;
 732                         }
 733                 if (qid == -1) {
 734                         mtx_unlock(&qgroup->tqg_lock);
 735                         return (EINVAL);
 736                 }
 737         } else
 738                 qid = 0;
 739         qgroup->tqg_queue[qid].tgc_cnt++;
 740         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
 741         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 742         cpu = qgroup->tqg_queue[qid].tgc_cpu;
 743         mtx_unlock(&qgroup->tqg_lock);
 744
 745         CPU_ZERO(&mask);
 746         CPU_SET(cpu, &mask);
 747         if (irq != -1 && tqg_smp_started)
 748                 intr_setaffinity(irq, &mask);
 749         return (0);
 750 }
 751
 752 static int
 753 taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
 754 {
 755         cpuset_t mask;
 756         int i, qid, irq, cpu;
 757
 758         qid = -1;
 759         irq = gtask->gt_irq;
 760         cpu = gtask->gt_cpu;
 761         MPASS(tqg_smp_started);
 762         mtx_lock(&qgroup->tqg_lock);
 763         for (i = 0; i < qgroup->tqg_cnt; i++)
 764                 if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
 765                         qid = i;
 766                         break;
 767                 }
 768         if (qid == -1) {
 769                 mtx_unlock(&qgroup->tqg_lock);
 770                 return (EINVAL);
 771         }
 772         qgroup->tqg_queue[qid].tgc_cnt++;
 773         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
 774         MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
 775         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 776         mtx_unlock(&qgroup->tqg_lock);
 777
 778         CPU_ZERO(&mask);
 779         CPU_SET(cpu, &mask);
 780
 781         if (irq != -1)
 782                 intr_setaffinity(irq, &mask);
 783         return (0);
 784 }
 785
 786 void
 787 taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
 788 {
 789         int i;
 790
 791         mtx_lock(&qgroup->tqg_lock);
 792         for (i = 0; i < qgroup->tqg_cnt; i++)
 793                 if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
 794                         break;
 795         if (i == qgroup->tqg_cnt)
 796                 panic("taskqgroup_detach: task not in group\n");
 797         qgroup->tqg_queue[i].tgc_cnt--;
 798         LIST_REMOVE(gtask, gt_list);
 799         mtx_unlock(&qgroup->tqg_lock);
 800         gtask->gt_taskqueue = NULL;
 801 }
 802
 803 static void
 804 taskqgroup_binder(void *ctx)
 805 {
 806         struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
 807         cpuset_t mask;
 808         int error;
 809
 810         CPU_ZERO(&mask);
 811         CPU_SET(gtask->bt_cpuid, &mask);
 812         error = cpuset_setthread(curthread->td_tid, &mask);
 813         thread_lock(curthread);
 814         sched_bind(curthread, gtask->bt_cpuid);
 815         thread_unlock(curthread);
 816
 817         if (error)
 818                 printf("taskqgroup_binder: setaffinity failed: %d\n",
 819                     error);
 820         free(gtask, M_DEVBUF);
 821 }
 822
 823 static void
 824 taskqgroup_bind(struct taskqgroup *qgroup)
 825 {
 826         struct taskq_bind_task *gtask;
 827         int i;
 828
 829         /*
 830          * Bind taskqueue threads to specific CPUs, if they have been assigned
 831          * one.
 832          */
 833         if (qgroup->tqg_cnt == 1)
 834                 return;
 835
 836         for (i = 0; i < qgroup->tqg_cnt; i++) {
 837                 gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK);
 838                 GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
 839                 gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
 840                 grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
 841                     &gtask->bt_task);
 842         }
 843 }
 844
 845 static int
 846 _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
 847 {
 848         LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
 849         struct grouptask *gtask;
 850         int i, k, old_cnt, old_cpu, cpu;
 851
 852         mtx_assert(&qgroup->tqg_lock, MA_OWNED);
 853
 854         if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) {
 855                 printf("%s: failed cnt: %d stride: %d "
 856                     "mp_ncpus: %d tqg_smp_started: %d\n",
 857                     __func__, cnt, stride, mp_ncpus, tqg_smp_started);
 858                 return (EINVAL);
 859         }
 860         if (qgroup->tqg_adjusting) {
 861                 printf("taskqgroup_adjust failed: adjusting\n");
 862                 return (EBUSY);
 863         }
 864         qgroup->tqg_adjusting = 1;
 865         old_cnt = qgroup->tqg_cnt;
 866         old_cpu = 0;
 867         if (old_cnt < cnt)
 868                 old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu;
 869         mtx_unlock(&qgroup->tqg_lock);
 870         /*
 871          * Set up queue for tasks added before boot.
 872          */
 873         if (old_cnt == 0) {
 874                 LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
 875                     grouptask, gt_list);
 876                 qgroup->tqg_queue[0].tgc_cnt = 0;
 877         }
 878
 879         /*
 880          * If new taskq threads have been added.
 881          */
 882         cpu = old_cpu;
 883         for (i = old_cnt; i < cnt; i++) {
 884                 taskqgroup_cpu_create(qgroup, i, cpu);
 885
 886                 for (k = 0; k < stride; k++)
 887                         cpu = CPU_NEXT(cpu);
 888         }
 889         mtx_lock(&qgroup->tqg_lock);
 890         qgroup->tqg_cnt = cnt;
 891         qgroup->tqg_stride = stride;
 892
 893         /*
 894          * Adjust drivers to use new taskqs.
 895          */
 896         for (i = 0; i < old_cnt; i++) {
 897                 while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
 898                         LIST_REMOVE(gtask, gt_list);
 899                         qgroup->tqg_queue[i].tgc_cnt--;
 900                         LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
 901                 }
 902         }
 903         mtx_unlock(&qgroup->tqg_lock);
 904
 905         while ((gtask = LIST_FIRST(&gtask_head))) {
 906                 LIST_REMOVE(gtask, gt_list);
 907                 if (gtask->gt_cpu == -1)
 908                         taskqgroup_attach_deferred(qgroup, gtask);
 909                 else if (taskqgroup_attach_cpu_deferred(qgroup, gtask))
 910                         taskqgroup_attach_deferred(qgroup, gtask);
 911         }
 912
 913 #ifdef INVARIANTS
 914         mtx_lock(&qgroup->tqg_lock);
 915         for (i = 0; i < qgroup->tqg_cnt; i++) {
 916                 MPASS(qgroup->tqg_queue[i].tgc_taskq != NULL);
 917                 LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list)
 918                         MPASS(gtask->gt_taskqueue != NULL);
 919         }
 920         mtx_unlock(&qgroup->tqg_lock);
 921 #endif
 922         /*
 923          * If taskq thread count has been reduced.
 924          */
 925         for (i = cnt; i < old_cnt; i++)
 926                 taskqgroup_cpu_remove(qgroup, i);
 927
 928         taskqgroup_bind(qgroup);
 929
 930         mtx_lock(&qgroup->tqg_lock);
 931         qgroup->tqg_adjusting = 0;
 932
 933         return (0);
 934 }
 935
 936 int
 937 taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
 938 {
 939         int error;
 940
 941         mtx_lock(&qgroup->tqg_lock);
 942         error = _taskqgroup_adjust(qgroup, cnt, stride);
 943         mtx_unlock(&qgroup->tqg_lock);
 944
 945         return (error);
 946 }
 947
 948 struct taskqgroup *
 949 taskqgroup_create(char *name)
 950 {
 951         struct taskqgroup *qgroup;
 952
 953         qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO);
 954         mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
 955         qgroup->tqg_name = name;
 956         LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
 957
 958         return (qgroup);
 959 }
 960
 961 void
 962 taskqgroup_destroy(struct taskqgroup *qgroup)
 963 {
 964
 965 }