sys/kern/subr_gtaskqueue.c

   1 /*-
   2  * Copyright (c) 2000 Doug Rabson
   3  * Copyright (c) 2014 Jeff Roberson
   4  * Copyright (c) 2016 Matthew Macy
   5  * All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26  * SUCH DAMAGE.
  27  */
  28
  29 #include <sys/cdefs.h>
  30 __FBSDID("$FreeBSD$");
  31
  32 #include <sys/param.h>
  33 #include <sys/systm.h>
  34 #include <sys/bus.h>
  35 #include <sys/cpuset.h>
  36 #include <sys/interrupt.h>
  37 #include <sys/kernel.h>
  38 #include <sys/kthread.h>
  39 #include <sys/libkern.h>
  40 #include <sys/limits.h>
  41 #include <sys/lock.h>
  42 #include <sys/malloc.h>
  43 #include <sys/mutex.h>
  44 #include <sys/proc.h>
  45 #include <sys/sched.h>
  46 #include <sys/smp.h>
  47 #include <sys/gtaskqueue.h>
  48 #include <sys/unistd.h>
  49 #include <machine/stdarg.h>
  50
  51 static MALLOC_DEFINE(M_GTASKQUEUE, "taskqueue", "Task Queues");
  52 static void     gtaskqueue_thread_enqueue(void *);
  53 static void     gtaskqueue_thread_loop(void *arg);
  54
  55 struct gtaskqueue_busy {
  56         struct gtask    *tb_running;
  57         TAILQ_ENTRY(gtaskqueue_busy) tb_link;
  58 };
  59
  60 static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1;
  61
  62 struct gtaskqueue {
  63         STAILQ_HEAD(, gtask)    tq_queue;
  64         gtaskqueue_enqueue_fn   tq_enqueue;
  65         void                    *tq_context;
  66         char                    *tq_name;
  67         TAILQ_HEAD(, gtaskqueue_busy) tq_active;
  68         struct mtx              tq_mutex;
  69         struct thread           **tq_threads;
  70         int                     tq_tcount;
  71         int                     tq_spin;
  72         int                     tq_flags;
  73         int                     tq_callouts;
  74         taskqueue_callback_fn   tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
  75         void                    *tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
  76 };
  77
  78 #define TQ_FLAGS_ACTIVE         (1 << 0)
  79 #define TQ_FLAGS_BLOCKED        (1 << 1)
  80 #define TQ_FLAGS_UNLOCKED_ENQUEUE       (1 << 2)
  81
  82 #define DT_CALLOUT_ARMED        (1 << 0)
  83
  84 #define TQ_LOCK(tq)                                                     \
  85         do {                                                            \
  86                 if ((tq)->tq_spin)                                      \
  87                         mtx_lock_spin(&(tq)->tq_mutex);                 \
  88                 else                                                    \
  89                         mtx_lock(&(tq)->tq_mutex);                      \
  90         } while (0)
  91 #define TQ_ASSERT_LOCKED(tq)    mtx_assert(&(tq)->tq_mutex, MA_OWNED)
  92
  93 #define TQ_UNLOCK(tq)                                                   \
  94         do {                                                            \
  95                 if ((tq)->tq_spin)                                      \
  96                         mtx_unlock_spin(&(tq)->tq_mutex);               \
  97                 else                                                    \
  98                         mtx_unlock(&(tq)->tq_mutex);                    \
  99         } while (0)
 100 #define TQ_ASSERT_UNLOCKED(tq)  mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
 101
 102 #ifdef INVARIANTS
 103 static void
 104 gtask_dump(struct gtask *gtask)
 105 {
 106         printf("gtask: %p ta_flags=%x ta_priority=%d ta_func=%p ta_context=%p\n",
 107                gtask, gtask->ta_flags, gtask->ta_priority, gtask->ta_func, gtask->ta_context);
 108 }
 109 #endif
 110
 111 static __inline int
 112 TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
 113     int t)
 114 {
 115         if (tq->tq_spin)
 116                 return (msleep_spin(p, m, wm, t));
 117         return (msleep(p, m, pri, wm, t));
 118 }
 119
 120 static struct gtaskqueue *
 121 _gtaskqueue_create(const char *name, int mflags,
 122                  taskqueue_enqueue_fn enqueue, void *context,
 123                  int mtxflags, const char *mtxname __unused)
 124 {
 125         struct gtaskqueue *queue;
 126         char *tq_name;
 127
 128         tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO);
 129         if (!tq_name)
 130                 return (NULL);
 131
 132         snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
 133
 134         queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO);
 135         if (!queue)
 136                 return (NULL);
 137
 138         STAILQ_INIT(&queue->tq_queue);
 139         TAILQ_INIT(&queue->tq_active);
 140         queue->tq_enqueue = enqueue;
 141         queue->tq_context = context;
 142         queue->tq_name = tq_name;
 143         queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
 144         queue->tq_flags |= TQ_FLAGS_ACTIVE;
 145         if (enqueue == gtaskqueue_thread_enqueue)
 146                 queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
 147         mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
 148
 149         return (queue);
 150 }
 151
 152
 153 /*
 154  * Signal a taskqueue thread to terminate.
 155  */
 156 static void
 157 gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq)
 158 {
 159
 160         while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
 161                 wakeup(tq);
 162                 TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
 163         }
 164 }
 165
 166 static void
 167 gtaskqueue_free(struct gtaskqueue *queue)
 168 {
 169
 170         TQ_LOCK(queue);
 171         queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
 172         gtaskqueue_terminate(queue->tq_threads, queue);
 173         KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
 174         KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
 175         mtx_destroy(&queue->tq_mutex);
 176         free(queue->tq_threads, M_GTASKQUEUE);
 177         free(queue->tq_name, M_GTASKQUEUE);
 178         free(queue, M_GTASKQUEUE);
 179 }
 180
 181 int
 182 grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
 183 {
 184 #ifdef INVARIANTS
 185         if (queue == NULL) {
 186                 gtask_dump(gtask);
 187                 panic("queue == NULL");
 188         }
 189 #endif
 190         TQ_LOCK(queue);
 191         if (gtask->ta_flags & TASK_ENQUEUED) {
 192                 TQ_UNLOCK(queue);
 193                 return (0);
 194         }
 195         STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link);
 196         gtask->ta_flags |= TASK_ENQUEUED;
 197         TQ_UNLOCK(queue);
 198         if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
 199                 queue->tq_enqueue(queue->tq_context);
 200         return (0);
 201 }
 202
 203 static void
 204 gtaskqueue_task_nop_fn(void *context)
 205 {
 206 }
 207
 208 /*
 209  * Block until all currently queued tasks in this taskqueue
 210  * have begun execution.  Tasks queued during execution of
 211  * this function are ignored.
 212  */
 213 static void
 214 gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
 215 {
 216         struct gtask t_barrier;
 217
 218         if (STAILQ_EMPTY(&queue->tq_queue))
 219                 return;
 220
 221         /*
 222          * Enqueue our barrier after all current tasks, but with
 223          * the highest priority so that newly queued tasks cannot
 224          * pass it.  Because of the high priority, we can not use
 225          * taskqueue_enqueue_locked directly (which drops the lock
 226          * anyway) so just insert it at tail while we have the
 227          * queue lock.
 228          */
 229         GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier);
 230         STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
 231         t_barrier.ta_flags |= TASK_ENQUEUED;
 232
 233         /*
 234          * Once the barrier has executed, all previously queued tasks
 235          * have completed or are currently executing.
 236          */
 237         while (t_barrier.ta_flags & TASK_ENQUEUED)
 238                 TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
 239 }
 240
 241 /*
 242  * Block until all currently executing tasks for this taskqueue
 243  * complete.  Tasks that begin execution during the execution
 244  * of this function are ignored.
 245  */
 246 static void
 247 gtaskqueue_drain_tq_active(struct gtaskqueue *queue)
 248 {
 249         struct gtaskqueue_busy tb_marker, *tb_first;
 250
 251         if (TAILQ_EMPTY(&queue->tq_active))
 252                 return;
 253
 254         /* Block taskq_terminate().*/
 255         queue->tq_callouts++;
 256
 257         /*
 258          * Wait for all currently executing taskqueue threads
 259          * to go idle.
 260          */
 261         tb_marker.tb_running = TB_DRAIN_WAITER;
 262         TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
 263         while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
 264                 TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
 265         TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
 266
 267         /*
 268          * Wakeup any other drain waiter that happened to queue up
 269          * without any intervening active thread.
 270          */
 271         tb_first = TAILQ_FIRST(&queue->tq_active);
 272         if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
 273                 wakeup(tb_first);
 274
 275         /* Release taskqueue_terminate(). */
 276         queue->tq_callouts--;
 277         if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
 278                 wakeup_one(queue->tq_threads);
 279 }
 280
 281 void
 282 gtaskqueue_block(struct gtaskqueue *queue)
 283 {
 284
 285         TQ_LOCK(queue);
 286         queue->tq_flags |= TQ_FLAGS_BLOCKED;
 287         TQ_UNLOCK(queue);
 288 }
 289
 290 void
 291 gtaskqueue_unblock(struct gtaskqueue *queue)
 292 {
 293
 294         TQ_LOCK(queue);
 295         queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
 296         if (!STAILQ_EMPTY(&queue->tq_queue))
 297                 queue->tq_enqueue(queue->tq_context);
 298         TQ_UNLOCK(queue);
 299 }
 300
 301 static void
 302 gtaskqueue_run_locked(struct gtaskqueue *queue)
 303 {
 304         struct gtaskqueue_busy tb;
 305         struct gtaskqueue_busy *tb_first;
 306         struct gtask *gtask;
 307
 308         KASSERT(queue != NULL, ("tq is NULL"));
 309         TQ_ASSERT_LOCKED(queue);
 310         tb.tb_running = NULL;
 311
 312         while (STAILQ_FIRST(&queue->tq_queue)) {
 313                 TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
 314
 315                 /*
 316                  * Carefully remove the first task from the queue and
 317                  * clear its TASK_ENQUEUED flag
 318                  */
 319                 gtask = STAILQ_FIRST(&queue->tq_queue);
 320                 KASSERT(gtask != NULL, ("task is NULL"));
 321                 STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
 322                 gtask->ta_flags &= ~TASK_ENQUEUED;
 323                 tb.tb_running = gtask;
 324                 TQ_UNLOCK(queue);
 325
 326                 KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL"));
 327                 gtask->ta_func(gtask->ta_context);
 328
 329                 TQ_LOCK(queue);
 330                 tb.tb_running = NULL;
 331                 wakeup(gtask);
 332
 333                 TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
 334                 tb_first = TAILQ_FIRST(&queue->tq_active);
 335                 if (tb_first != NULL &&
 336                     tb_first->tb_running == TB_DRAIN_WAITER)
 337                         wakeup(tb_first);
 338         }
 339 }
 340
 341 static int
 342 task_is_running(struct gtaskqueue *queue, struct gtask *gtask)
 343 {
 344         struct gtaskqueue_busy *tb;
 345
 346         TQ_ASSERT_LOCKED(queue);
 347         TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
 348                 if (tb->tb_running == gtask)
 349                         return (1);
 350         }
 351         return (0);
 352 }
 353
 354 static int
 355 gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask)
 356 {
 357
 358         if (gtask->ta_flags & TASK_ENQUEUED)
 359                 STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link);
 360         gtask->ta_flags &= ~TASK_ENQUEUED;
 361         return (task_is_running(queue, gtask) ? EBUSY : 0);
 362 }
 363
 364 int
 365 gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask)
 366 {
 367         int error;
 368
 369         TQ_LOCK(queue);
 370         error = gtaskqueue_cancel_locked(queue, gtask);
 371         TQ_UNLOCK(queue);
 372
 373         return (error);
 374 }
 375
 376 void
 377 gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask)
 378 {
 379
 380         if (!queue->tq_spin)
 381                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
 382
 383         TQ_LOCK(queue);
 384         while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask))
 385                 TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0);
 386         TQ_UNLOCK(queue);
 387 }
 388
 389 void
 390 gtaskqueue_drain_all(struct gtaskqueue *queue)
 391 {
 392
 393         if (!queue->tq_spin)
 394                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
 395
 396         TQ_LOCK(queue);
 397         gtaskqueue_drain_tq_queue(queue);
 398         gtaskqueue_drain_tq_active(queue);
 399         TQ_UNLOCK(queue);
 400 }
 401
 402 static int
 403 _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
 404     cpuset_t *mask, const char *name, va_list ap)
 405 {
 406         char ktname[MAXCOMLEN + 1];
 407         struct thread *td;
 408         struct gtaskqueue *tq;
 409         int i, error;
 410
 411         if (count <= 0)
 412                 return (EINVAL);
 413
 414         vsnprintf(ktname, sizeof(ktname), name, ap);
 415         tq = *tqp;
 416
 417         tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE,
 418             M_NOWAIT | M_ZERO);
 419         if (tq->tq_threads == NULL) {
 420                 printf("%s: no memory for %s threads\n", __func__, ktname);
 421                 return (ENOMEM);
 422         }
 423
 424         for (i = 0; i < count; i++) {
 425                 if (count == 1)
 426                         error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
 427                             &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
 428                 else
 429                         error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
 430                             &tq->tq_threads[i], RFSTOPPED, 0,
 431                             "%s_%d", ktname, i);
 432                 if (error) {
 433                         /* should be ok to continue, taskqueue_free will dtrt */
 434                         printf("%s: kthread_add(%s): error %d", __func__,
 435                             ktname, error);
 436                         tq->tq_threads[i] = NULL;               /* paranoid */
 437                 } else
 438                         tq->tq_tcount++;
 439         }
 440         for (i = 0; i < count; i++) {
 441                 if (tq->tq_threads[i] == NULL)
 442                         continue;
 443                 td = tq->tq_threads[i];
 444                 if (mask) {
 445                         error = cpuset_setthread(td->td_tid, mask);
 446                         /*
 447                          * Failing to pin is rarely an actual fatal error;
 448                          * it'll just affect performance.
 449                          */
 450                         if (error)
 451                                 printf("%s: curthread=%llu: can't pin; "
 452                                     "error=%d\n",
 453                                     __func__,
 454                                     (unsigned long long) td->td_tid,
 455                                     error);
 456                 }
 457                 thread_lock(td);
 458                 sched_prio(td, pri);
 459                 sched_add(td, SRQ_BORING);
 460                 thread_unlock(td);
 461         }
 462
 463         return (0);
 464 }
 465
 466 static int
 467 gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
 468     const char *name, ...)
 469 {
 470         va_list ap;
 471         int error;
 472
 473         va_start(ap, name);
 474         error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap);
 475         va_end(ap);
 476         return (error);
 477 }
 478
 479 static inline void
 480 gtaskqueue_run_callback(struct gtaskqueue *tq,
 481     enum taskqueue_callback_type cb_type)
 482 {
 483         taskqueue_callback_fn tq_callback;
 484
 485         TQ_ASSERT_UNLOCKED(tq);
 486         tq_callback = tq->tq_callbacks[cb_type];
 487         if (tq_callback != NULL)
 488                 tq_callback(tq->tq_cb_contexts[cb_type]);
 489 }
 490
 491 static void
 492 gtaskqueue_thread_loop(void *arg)
 493 {
 494         struct gtaskqueue **tqp, *tq;
 495
 496         tqp = arg;
 497         tq = *tqp;
 498         gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
 499         TQ_LOCK(tq);
 500         while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
 501                 /* XXX ? */
 502                 gtaskqueue_run_locked(tq);
 503                 /*
 504                  * Because taskqueue_run() can drop tq_mutex, we need to
 505                  * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
 506                  * meantime, which means we missed a wakeup.
 507                  */
 508                 if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
 509                         break;
 510                 TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
 511         }
 512         gtaskqueue_run_locked(tq);
 513         /*
 514          * This thread is on its way out, so just drop the lock temporarily
 515          * in order to call the shutdown callback.  This allows the callback
 516          * to look at the taskqueue, even just before it dies.
 517          */
 518         TQ_UNLOCK(tq);
 519         gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
 520         TQ_LOCK(tq);
 521
 522         /* rendezvous with thread that asked us to terminate */
 523         tq->tq_tcount--;
 524         wakeup_one(tq->tq_threads);
 525         TQ_UNLOCK(tq);
 526         kthread_exit();
 527 }
 528
 529 static void
 530 gtaskqueue_thread_enqueue(void *context)
 531 {
 532         struct gtaskqueue **tqp, *tq;
 533
 534         tqp = context;
 535         tq = *tqp;
 536         wakeup_one(tq);
 537 }
 538
 539
 540 static struct gtaskqueue *
 541 gtaskqueue_create_fast(const char *name, int mflags,
 542                  taskqueue_enqueue_fn enqueue, void *context)
 543 {
 544         return _gtaskqueue_create(name, mflags, enqueue, context,
 545                         MTX_SPIN, "fast_taskqueue");
 546 }
 547
 548
 549 struct taskqgroup_cpu {
 550         LIST_HEAD(, grouptask)  tgc_tasks;
 551         struct gtaskqueue       *tgc_taskq;
 552         int     tgc_cnt;
 553         int     tgc_cpu;
 554 };
 555
 556 struct taskqgroup {
 557         struct taskqgroup_cpu tqg_queue[MAXCPU];
 558         struct mtx      tqg_lock;
 559         char *          tqg_name;
 560         int             tqg_adjusting;
 561         int             tqg_stride;
 562         int             tqg_cnt;
 563 };
 564
 565 struct taskq_bind_task {
 566         struct gtask bt_task;
 567         int     bt_cpuid;
 568 };
 569
 570 static void
 571 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu)
 572 {
 573         struct taskqgroup_cpu *qcpu;
 574
 575         qcpu = &qgroup->tqg_queue[idx];
 576         LIST_INIT(&qcpu->tgc_tasks);
 577         qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK,
 578             taskqueue_thread_enqueue, &qcpu->tgc_taskq);
 579         gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
 580             "%s_%d", qgroup->tqg_name, idx);
 581         qcpu->tgc_cpu = cpu;
 582 }
 583
 584 static void
 585 taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
 586 {
 587
 588         gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
 589 }
 590
 591 /*
 592  * Find the taskq with least # of tasks that doesn't currently have any
 593  * other queues from the uniq identifier.
 594  */
 595 static int
 596 taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
 597 {
 598         struct grouptask *n;
 599         int i, idx, mincnt;
 600         int strict;
 601
 602         mtx_assert(&qgroup->tqg_lock, MA_OWNED);
 603         if (qgroup->tqg_cnt == 0)
 604                 return (0);
 605         idx = -1;
 606         mincnt = INT_MAX;
 607         /*
 608          * Two passes;  First scan for a queue with the least tasks that
 609          * does not already service this uniq id.  If that fails simply find
 610          * the queue with the least total tasks;
 611          */
 612         for (strict = 1; mincnt == INT_MAX; strict = 0) {
 613                 for (i = 0; i < qgroup->tqg_cnt; i++) {
 614                         if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
 615                                 continue;
 616                         if (strict) {
 617                                 LIST_FOREACH(n,
 618                                     &qgroup->tqg_queue[i].tgc_tasks, gt_list)
 619                                         if (n->gt_uniq == uniq)
 620                                                 break;
 621                                 if (n != NULL)
 622                                         continue;
 623                         }
 624                         mincnt = qgroup->tqg_queue[i].tgc_cnt;
 625                         idx = i;
 626                 }
 627         }
 628         if (idx == -1)
 629                 panic("taskqgroup_find: Failed to pick a qid.");
 630
 631         return (idx);
 632 }
 633
 634 /*
 635  * smp_started is unusable since it is not set for UP kernels or even for
 636  * SMP kernels when there is 1 CPU.  This is usually handled by adding a
 637  * (mp_ncpus == 1) test, but that would be broken here since we need to
 638  * to synchronize with the SI_SUB_SMP ordering.  Even in the pure SMP case
 639  * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP.
 640  *
 641  * So maintain our own flag.  It must be set after all CPUs are started
 642  * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed
 643  * adjustment is properly delayed.  SI_ORDER_FOURTH is clearly before
 644  * SI_ORDER_ANY and unclearly after the CPUs are started.  It would be
 645  * simpler for adjustment to pass a flag indicating if it is delayed.
 646  */
 647
 648 static int tqg_smp_started;
 649
 650 static void
 651 tqg_record_smp_started(void *arg)
 652 {
 653         tqg_smp_started = 1;
 654 }
 655
 656 SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
 657         tqg_record_smp_started, NULL);
 658
 659 void
 660 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
 661     void *uniq, int irq, char *name)
 662 {
 663         cpuset_t mask;
 664         int qid;
 665
 666         gtask->gt_uniq = uniq;
 667         gtask->gt_name = name;
 668         gtask->gt_irq = irq;
 669         gtask->gt_cpu = -1;
 670         mtx_lock(&qgroup->tqg_lock);
 671         qid = taskqgroup_find(qgroup, uniq);
 672         qgroup->tqg_queue[qid].tgc_cnt++;
 673         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
 674         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 675         if (irq != -1 && tqg_smp_started) {
 676                 gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu;
 677                 CPU_ZERO(&mask);
 678                 CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
 679                 mtx_unlock(&qgroup->tqg_lock);
 680                 intr_setaffinity(irq, &mask);
 681         } else
 682                 mtx_unlock(&qgroup->tqg_lock);
 683 }
 684
 685 static void
 686 taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
 687 {
 688         cpuset_t mask;
 689         int qid, cpu;
 690
 691         mtx_lock(&qgroup->tqg_lock);
 692         qid = taskqgroup_find(qgroup, gtask->gt_uniq);
 693         cpu = qgroup->tqg_queue[qid].tgc_cpu;
 694         if (gtask->gt_irq != -1) {
 695                 mtx_unlock(&qgroup->tqg_lock);
 696
 697                 CPU_ZERO(&mask);
 698                 CPU_SET(cpu, &mask);
 699                 intr_setaffinity(gtask->gt_irq, &mask);
 700
 701                 mtx_lock(&qgroup->tqg_lock);
 702         }
 703         qgroup->tqg_queue[qid].tgc_cnt++;
 704
 705         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask,
 706                          gt_list);
 707         MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
 708         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 709         mtx_unlock(&qgroup->tqg_lock);
 710 }
 711
 712 int
 713 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
 714         void *uniq, int cpu, int irq, char *name)
 715 {
 716         cpuset_t mask;
 717         int i, qid;
 718
 719         qid = -1;
 720         gtask->gt_uniq = uniq;
 721         gtask->gt_name = name;
 722         gtask->gt_irq = irq;
 723         gtask->gt_cpu = cpu;
 724         mtx_lock(&qgroup->tqg_lock);
 725         if (tqg_smp_started) {
 726                 for (i = 0; i < qgroup->tqg_cnt; i++)
 727                         if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
 728                                 qid = i;
 729                                 break;
 730                         }
 731                 if (qid == -1) {
 732                         mtx_unlock(&qgroup->tqg_lock);
 733                         return (EINVAL);
 734                 }
 735         } else
 736                 qid = 0;
 737         qgroup->tqg_queue[qid].tgc_cnt++;
 738         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
 739         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 740         cpu = qgroup->tqg_queue[qid].tgc_cpu;
 741         mtx_unlock(&qgroup->tqg_lock);
 742
 743         CPU_ZERO(&mask);
 744         CPU_SET(cpu, &mask);
 745         if (irq != -1 && tqg_smp_started)
 746                 intr_setaffinity(irq, &mask);
 747         return (0);
 748 }
 749
 750 static int
 751 taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
 752 {
 753         cpuset_t mask;
 754         int i, qid, irq, cpu;
 755
 756         qid = -1;
 757         irq = gtask->gt_irq;
 758         cpu = gtask->gt_cpu;
 759         MPASS(tqg_smp_started);
 760         mtx_lock(&qgroup->tqg_lock);
 761         for (i = 0; i < qgroup->tqg_cnt; i++)
 762                 if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
 763                         qid = i;
 764                         break;
 765                 }
 766         if (qid == -1) {
 767                 mtx_unlock(&qgroup->tqg_lock);
 768                 return (EINVAL);
 769         }
 770         qgroup->tqg_queue[qid].tgc_cnt++;
 771         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
 772         MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
 773         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 774         mtx_unlock(&qgroup->tqg_lock);
 775
 776         CPU_ZERO(&mask);
 777         CPU_SET(cpu, &mask);
 778
 779         if (irq != -1)
 780                 intr_setaffinity(irq, &mask);
 781         return (0);
 782 }
 783
 784 void
 785 taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
 786 {
 787         int i;
 788
 789         mtx_lock(&qgroup->tqg_lock);
 790         for (i = 0; i < qgroup->tqg_cnt; i++)
 791                 if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
 792                         break;
 793         if (i == qgroup->tqg_cnt)
 794                 panic("taskqgroup_detach: task not in group\n");
 795         qgroup->tqg_queue[i].tgc_cnt--;
 796         LIST_REMOVE(gtask, gt_list);
 797         mtx_unlock(&qgroup->tqg_lock);
 798         gtask->gt_taskqueue = NULL;
 799 }
 800
 801 static void
 802 taskqgroup_binder(void *ctx)
 803 {
 804         struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
 805         cpuset_t mask;
 806         int error;
 807
 808         CPU_ZERO(&mask);
 809         CPU_SET(gtask->bt_cpuid, &mask);
 810         error = cpuset_setthread(curthread->td_tid, &mask);
 811         thread_lock(curthread);
 812         sched_bind(curthread, gtask->bt_cpuid);
 813         thread_unlock(curthread);
 814
 815         if (error)
 816                 printf("taskqgroup_binder: setaffinity failed: %d\n",
 817                     error);
 818         free(gtask, M_DEVBUF);
 819 }
 820
 821 static void
 822 taskqgroup_bind(struct taskqgroup *qgroup)
 823 {
 824         struct taskq_bind_task *gtask;
 825         int i;
 826
 827         /*
 828          * Bind taskqueue threads to specific CPUs, if they have been assigned
 829          * one.
 830          */
 831         if (qgroup->tqg_cnt == 1)
 832                 return;
 833
 834         for (i = 0; i < qgroup->tqg_cnt; i++) {
 835                 gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK);
 836                 GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
 837                 gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
 838                 grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
 839                     &gtask->bt_task);
 840         }
 841 }
 842
 843 static int
 844 _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
 845 {
 846         LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
 847         struct grouptask *gtask;
 848         int i, k, old_cnt, old_cpu, cpu;
 849
 850         mtx_assert(&qgroup->tqg_lock, MA_OWNED);
 851
 852         if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) {
 853                 printf("%s: failed cnt: %d stride: %d "
 854                     "mp_ncpus: %d tqg_smp_started: %d\n",
 855                     __func__, cnt, stride, mp_ncpus, tqg_smp_started);
 856                 return (EINVAL);
 857         }
 858         if (qgroup->tqg_adjusting) {
 859                 printf("taskqgroup_adjust failed: adjusting\n");
 860                 return (EBUSY);
 861         }
 862         qgroup->tqg_adjusting = 1;
 863         old_cnt = qgroup->tqg_cnt;
 864         old_cpu = 0;
 865         if (old_cnt < cnt)
 866                 old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu;
 867         mtx_unlock(&qgroup->tqg_lock);
 868         /*
 869          * Set up queue for tasks added before boot.
 870          */
 871         if (old_cnt == 0) {
 872                 LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
 873                     grouptask, gt_list);
 874                 qgroup->tqg_queue[0].tgc_cnt = 0;
 875         }
 876
 877         /*
 878          * If new taskq threads have been added.
 879          */
 880         cpu = old_cpu;
 881         for (i = old_cnt; i < cnt; i++) {
 882                 taskqgroup_cpu_create(qgroup, i, cpu);
 883
 884                 for (k = 0; k < stride; k++)
 885                         cpu = CPU_NEXT(cpu);
 886         }
 887         mtx_lock(&qgroup->tqg_lock);
 888         qgroup->tqg_cnt = cnt;
 889         qgroup->tqg_stride = stride;
 890
 891         /*
 892          * Adjust drivers to use new taskqs.
 893          */
 894         for (i = 0; i < old_cnt; i++) {
 895                 while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
 896                         LIST_REMOVE(gtask, gt_list);
 897                         qgroup->tqg_queue[i].tgc_cnt--;
 898                         LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
 899                 }
 900         }
 901         mtx_unlock(&qgroup->tqg_lock);
 902
 903         while ((gtask = LIST_FIRST(&gtask_head))) {
 904                 LIST_REMOVE(gtask, gt_list);
 905                 if (gtask->gt_cpu == -1)
 906                         taskqgroup_attach_deferred(qgroup, gtask);
 907                 else if (taskqgroup_attach_cpu_deferred(qgroup, gtask))
 908                         taskqgroup_attach_deferred(qgroup, gtask);
 909         }
 910
 911 #ifdef INVARIANTS
 912         mtx_lock(&qgroup->tqg_lock);
 913         for (i = 0; i < qgroup->tqg_cnt; i++) {
 914                 MPASS(qgroup->tqg_queue[i].tgc_taskq != NULL);
 915                 LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list)
 916                         MPASS(gtask->gt_taskqueue != NULL);
 917         }
 918         mtx_unlock(&qgroup->tqg_lock);
 919 #endif
 920         /*
 921          * If taskq thread count has been reduced.
 922          */
 923         for (i = cnt; i < old_cnt; i++)
 924                 taskqgroup_cpu_remove(qgroup, i);
 925
 926         taskqgroup_bind(qgroup);
 927
 928         mtx_lock(&qgroup->tqg_lock);
 929         qgroup->tqg_adjusting = 0;
 930
 931         return (0);
 932 }
 933
 934 int
 935 taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
 936 {
 937         int error;
 938
 939         mtx_lock(&qgroup->tqg_lock);
 940         error = _taskqgroup_adjust(qgroup, cnt, stride);
 941         mtx_unlock(&qgroup->tqg_lock);
 942
 943         return (error);
 944 }
 945
 946 struct taskqgroup *
 947 taskqgroup_create(char *name)
 948 {
 949         struct taskqgroup *qgroup;
 950
 951         qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO);
 952         mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
 953         qgroup->tqg_name = name;
 954         LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
 955
 956         return (qgroup);
 957 }
 958
 959 void
 960 taskqgroup_destroy(struct taskqgroup *qgroup)
 961 {
 962
 963 }