sys/kern/subr_gtaskqueue.c

   1 /*-
   2  * Copyright (c) 2000 Doug Rabson
   3  * Copyright (c) 2014 Jeff Roberson
   4  * Copyright (c) 2016 Matthew Macy
   5  * All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26  * SUCH DAMAGE.
  27  */
  28
  29 #include <sys/cdefs.h>
  30 __FBSDID("$FreeBSD$");
  31
  32 #include <sys/param.h>
  33 #include <sys/systm.h>
  34 #include <sys/bus.h>
  35 #include <sys/cpuset.h>
  36 #include <sys/interrupt.h>
  37 #include <sys/kernel.h>
  38 #include <sys/kthread.h>
  39 #include <sys/libkern.h>
  40 #include <sys/limits.h>
  41 #include <sys/lock.h>
  42 #include <sys/malloc.h>
  43 #include <sys/mutex.h>
  44 #include <sys/proc.h>
  45 #include <sys/sched.h>
  46 #include <sys/smp.h>
  47 #include <sys/gtaskqueue.h>
  48 #include <sys/unistd.h>
  49 #include <machine/stdarg.h>
  50
  51 static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues");
  52 static void     gtaskqueue_thread_enqueue(void *);
  53 static void     gtaskqueue_thread_loop(void *arg);
  54 static int      task_is_running(struct gtaskqueue *queue, struct gtask *gtask);
  55 static void     gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask);
  56
  57 TASKQGROUP_DEFINE(softirq, mp_ncpus, 1);
  58 TASKQGROUP_DEFINE(config, 1, 1);
  59
  60 struct gtaskqueue_busy {
  61         struct gtask            *tb_running;
  62         u_int                    tb_seq;
  63         LIST_ENTRY(gtaskqueue_busy) tb_link;
  64 };
  65
  66 typedef void (*gtaskqueue_enqueue_fn)(void *context);
  67
  68 struct gtaskqueue {
  69         STAILQ_HEAD(, gtask)    tq_queue;
  70         LIST_HEAD(, gtaskqueue_busy) tq_active;
  71         u_int                   tq_seq;
  72         int                     tq_callouts;
  73         struct mtx_padalign     tq_mutex;
  74         gtaskqueue_enqueue_fn   tq_enqueue;
  75         void                    *tq_context;
  76         char                    *tq_name;
  77         struct thread           **tq_threads;
  78         int                     tq_tcount;
  79         int                     tq_spin;
  80         int                     tq_flags;
  81         taskqueue_callback_fn   tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
  82         void                    *tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
  83 };
  84
  85 #define TQ_FLAGS_ACTIVE         (1 << 0)
  86 #define TQ_FLAGS_BLOCKED        (1 << 1)
  87 #define TQ_FLAGS_UNLOCKED_ENQUEUE       (1 << 2)
  88
  89 #define DT_CALLOUT_ARMED        (1 << 0)
  90
  91 #define TQ_LOCK(tq)                                                     \
  92         do {                                                            \
  93                 if ((tq)->tq_spin)                                      \
  94                         mtx_lock_spin(&(tq)->tq_mutex);                 \
  95                 else                                                    \
  96                         mtx_lock(&(tq)->tq_mutex);                      \
  97         } while (0)
  98 #define TQ_ASSERT_LOCKED(tq)    mtx_assert(&(tq)->tq_mutex, MA_OWNED)
  99
 100 #define TQ_UNLOCK(tq)                                                   \
 101         do {                                                            \
 102                 if ((tq)->tq_spin)                                      \
 103                         mtx_unlock_spin(&(tq)->tq_mutex);               \
 104                 else                                                    \
 105                         mtx_unlock(&(tq)->tq_mutex);                    \
 106         } while (0)
 107 #define TQ_ASSERT_UNLOCKED(tq)  mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
 108
 109 #ifdef INVARIANTS
 110 static void
 111 gtask_dump(struct gtask *gtask)
 112 {
 113         printf("gtask: %p ta_flags=%x ta_priority=%d ta_func=%p ta_context=%p\n",
 114                gtask, gtask->ta_flags, gtask->ta_priority, gtask->ta_func, gtask->ta_context);
 115 }
 116 #endif
 117
 118 static __inline int
 119 TQ_SLEEP(struct gtaskqueue *tq, void *p, const char *wm)
 120 {
 121         if (tq->tq_spin)
 122                 return (msleep_spin(p, (struct mtx *)&tq->tq_mutex, wm, 0));
 123         return (msleep(p, &tq->tq_mutex, 0, wm, 0));
 124 }
 125
 126 static struct gtaskqueue *
 127 _gtaskqueue_create(const char *name, int mflags,
 128                  taskqueue_enqueue_fn enqueue, void *context,
 129                  int mtxflags, const char *mtxname __unused)
 130 {
 131         struct gtaskqueue *queue;
 132         char *tq_name;
 133
 134         tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO);
 135         if (!tq_name)
 136                 return (NULL);
 137
 138         snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
 139
 140         queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO);
 141         if (!queue) {
 142                 free(tq_name, M_GTASKQUEUE);
 143                 return (NULL);
 144         }
 145
 146         STAILQ_INIT(&queue->tq_queue);
 147         LIST_INIT(&queue->tq_active);
 148         queue->tq_enqueue = enqueue;
 149         queue->tq_context = context;
 150         queue->tq_name = tq_name;
 151         queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
 152         queue->tq_flags |= TQ_FLAGS_ACTIVE;
 153         if (enqueue == gtaskqueue_thread_enqueue)
 154                 queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
 155         mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
 156
 157         return (queue);
 158 }
 159
 160
 161 /*
 162  * Signal a taskqueue thread to terminate.
 163  */
 164 static void
 165 gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq)
 166 {
 167
 168         while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
 169                 wakeup(tq);
 170                 TQ_SLEEP(tq, pp, "gtq_destroy");
 171         }
 172 }
 173
 174 static void
 175 gtaskqueue_free(struct gtaskqueue *queue)
 176 {
 177
 178         TQ_LOCK(queue);
 179         queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
 180         gtaskqueue_terminate(queue->tq_threads, queue);
 181         KASSERT(LIST_EMPTY(&queue->tq_active), ("Tasks still running?"));
 182         KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
 183         mtx_destroy(&queue->tq_mutex);
 184         free(queue->tq_threads, M_GTASKQUEUE);
 185         free(queue->tq_name, M_GTASKQUEUE);
 186         free(queue, M_GTASKQUEUE);
 187 }
 188
 189 /*
 190  * Wait for all to complete, then prevent it from being enqueued
 191  */
 192 void
 193 grouptask_block(struct grouptask *grouptask)
 194 {
 195         struct gtaskqueue *queue = grouptask->gt_taskqueue;
 196         struct gtask *gtask = &grouptask->gt_task;
 197
 198 #ifdef INVARIANTS
 199         if (queue == NULL) {
 200                 gtask_dump(gtask);
 201                 panic("queue == NULL");
 202         }
 203 #endif
 204         TQ_LOCK(queue);
 205         gtask->ta_flags |= TASK_NOENQUEUE;
 206         gtaskqueue_drain_locked(queue, gtask);
 207         TQ_UNLOCK(queue);
 208 }
 209
 210 void
 211 grouptask_unblock(struct grouptask *grouptask)
 212 {
 213         struct gtaskqueue *queue = grouptask->gt_taskqueue;
 214         struct gtask *gtask = &grouptask->gt_task;
 215
 216 #ifdef INVARIANTS
 217         if (queue == NULL) {
 218                 gtask_dump(gtask);
 219                 panic("queue == NULL");
 220         }
 221 #endif
 222         TQ_LOCK(queue);
 223         gtask->ta_flags &= ~TASK_NOENQUEUE;
 224         TQ_UNLOCK(queue);
 225 }
 226
 227 int
 228 grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
 229 {
 230 #ifdef INVARIANTS
 231         if (queue == NULL) {
 232                 gtask_dump(gtask);
 233                 panic("queue == NULL");
 234         }
 235 #endif
 236         TQ_LOCK(queue);
 237         if (gtask->ta_flags & TASK_ENQUEUED) {
 238                 TQ_UNLOCK(queue);
 239                 return (0);
 240         }
 241         if (gtask->ta_flags & TASK_NOENQUEUE) {
 242                 TQ_UNLOCK(queue);
 243                 return (EAGAIN);
 244         }
 245         STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link);
 246         gtask->ta_flags |= TASK_ENQUEUED;
 247         TQ_UNLOCK(queue);
 248         if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
 249                 queue->tq_enqueue(queue->tq_context);
 250         return (0);
 251 }
 252
 253 static void
 254 gtaskqueue_task_nop_fn(void *context)
 255 {
 256 }
 257
 258 /*
 259  * Block until all currently queued tasks in this taskqueue
 260  * have begun execution.  Tasks queued during execution of
 261  * this function are ignored.
 262  */
 263 static void
 264 gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
 265 {
 266         struct gtask t_barrier;
 267
 268         if (STAILQ_EMPTY(&queue->tq_queue))
 269                 return;
 270
 271         /*
 272          * Enqueue our barrier after all current tasks, but with
 273          * the highest priority so that newly queued tasks cannot
 274          * pass it.  Because of the high priority, we can not use
 275          * taskqueue_enqueue_locked directly (which drops the lock
 276          * anyway) so just insert it at tail while we have the
 277          * queue lock.
 278          */
 279         GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier);
 280         STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
 281         t_barrier.ta_flags |= TASK_ENQUEUED;
 282
 283         /*
 284          * Once the barrier has executed, all previously queued tasks
 285          * have completed or are currently executing.
 286          */
 287         while (t_barrier.ta_flags & TASK_ENQUEUED)
 288                 TQ_SLEEP(queue, &t_barrier, "gtq_qdrain");
 289 }
 290
 291 /*
 292  * Block until all currently executing tasks for this taskqueue
 293  * complete.  Tasks that begin execution during the execution
 294  * of this function are ignored.
 295  */
 296 static void
 297 gtaskqueue_drain_tq_active(struct gtaskqueue *queue)
 298 {
 299         struct gtaskqueue_busy *tb;
 300         u_int seq;
 301
 302         if (LIST_EMPTY(&queue->tq_active))
 303                 return;
 304
 305         /* Block taskq_terminate().*/
 306         queue->tq_callouts++;
 307
 308         /* Wait for any active task with sequence from the past. */
 309         seq = queue->tq_seq;
 310 restart:
 311         LIST_FOREACH(tb, &queue->tq_active, tb_link) {
 312                 if ((int)(tb->tb_seq - seq) <= 0) {
 313                         TQ_SLEEP(queue, tb->tb_running, "gtq_adrain");
 314                         goto restart;
 315                 }
 316         }
 317
 318         /* Release taskqueue_terminate(). */
 319         queue->tq_callouts--;
 320         if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
 321                 wakeup_one(queue->tq_threads);
 322 }
 323
 324 void
 325 gtaskqueue_block(struct gtaskqueue *queue)
 326 {
 327
 328         TQ_LOCK(queue);
 329         queue->tq_flags |= TQ_FLAGS_BLOCKED;
 330         TQ_UNLOCK(queue);
 331 }
 332
 333 void
 334 gtaskqueue_unblock(struct gtaskqueue *queue)
 335 {
 336
 337         TQ_LOCK(queue);
 338         queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
 339         if (!STAILQ_EMPTY(&queue->tq_queue))
 340                 queue->tq_enqueue(queue->tq_context);
 341         TQ_UNLOCK(queue);
 342 }
 343
 344 static void
 345 gtaskqueue_run_locked(struct gtaskqueue *queue)
 346 {
 347         struct gtaskqueue_busy tb;
 348         struct gtask *gtask;
 349
 350         KASSERT(queue != NULL, ("tq is NULL"));
 351         TQ_ASSERT_LOCKED(queue);
 352         tb.tb_running = NULL;
 353         LIST_INSERT_HEAD(&queue->tq_active, &tb, tb_link);
 354
 355         while ((gtask = STAILQ_FIRST(&queue->tq_queue)) != NULL) {
 356                 STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
 357                 gtask->ta_flags &= ~TASK_ENQUEUED;
 358                 tb.tb_running = gtask;
 359                 tb.tb_seq = ++queue->tq_seq;
 360                 TQ_UNLOCK(queue);
 361
 362                 KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL"));
 363                 gtask->ta_func(gtask->ta_context);
 364
 365                 TQ_LOCK(queue);
 366                 wakeup(gtask);
 367         }
 368         LIST_REMOVE(&tb, tb_link);
 369 }
 370
 371 static int
 372 task_is_running(struct gtaskqueue *queue, struct gtask *gtask)
 373 {
 374         struct gtaskqueue_busy *tb;
 375
 376         TQ_ASSERT_LOCKED(queue);
 377         LIST_FOREACH(tb, &queue->tq_active, tb_link) {
 378                 if (tb->tb_running == gtask)
 379                         return (1);
 380         }
 381         return (0);
 382 }
 383
 384 static int
 385 gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask)
 386 {
 387
 388         if (gtask->ta_flags & TASK_ENQUEUED)
 389                 STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link);
 390         gtask->ta_flags &= ~TASK_ENQUEUED;
 391         return (task_is_running(queue, gtask) ? EBUSY : 0);
 392 }
 393
 394 int
 395 gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask)
 396 {
 397         int error;
 398
 399         TQ_LOCK(queue);
 400         error = gtaskqueue_cancel_locked(queue, gtask);
 401         TQ_UNLOCK(queue);
 402
 403         return (error);
 404 }
 405
 406 static void
 407 gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask)
 408 {
 409         while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask))
 410                 TQ_SLEEP(queue, gtask, "gtq_drain");
 411 }
 412
 413 void
 414 gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask)
 415 {
 416
 417         if (!queue->tq_spin)
 418                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
 419
 420         TQ_LOCK(queue);
 421         gtaskqueue_drain_locked(queue, gtask);
 422         TQ_UNLOCK(queue);
 423 }
 424
 425 void
 426 gtaskqueue_drain_all(struct gtaskqueue *queue)
 427 {
 428
 429         if (!queue->tq_spin)
 430                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
 431
 432         TQ_LOCK(queue);
 433         gtaskqueue_drain_tq_queue(queue);
 434         gtaskqueue_drain_tq_active(queue);
 435         TQ_UNLOCK(queue);
 436 }
 437
 438 static int
 439 _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
 440     cpuset_t *mask, const char *name, va_list ap)
 441 {
 442         char ktname[MAXCOMLEN + 1];
 443         struct thread *td;
 444         struct gtaskqueue *tq;
 445         int i, error;
 446
 447         if (count <= 0)
 448                 return (EINVAL);
 449
 450         vsnprintf(ktname, sizeof(ktname), name, ap);
 451         tq = *tqp;
 452
 453         tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE,
 454             M_NOWAIT | M_ZERO);
 455         if (tq->tq_threads == NULL) {
 456                 printf("%s: no memory for %s threads\n", __func__, ktname);
 457                 return (ENOMEM);
 458         }
 459
 460         for (i = 0; i < count; i++) {
 461                 if (count == 1)
 462                         error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
 463                             &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
 464                 else
 465                         error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
 466                             &tq->tq_threads[i], RFSTOPPED, 0,
 467                             "%s_%d", ktname, i);
 468                 if (error) {
 469                         /* should be ok to continue, taskqueue_free will dtrt */
 470                         printf("%s: kthread_add(%s): error %d", __func__,
 471                             ktname, error);
 472                         tq->tq_threads[i] = NULL;               /* paranoid */
 473                 } else
 474                         tq->tq_tcount++;
 475         }
 476         for (i = 0; i < count; i++) {
 477                 if (tq->tq_threads[i] == NULL)
 478                         continue;
 479                 td = tq->tq_threads[i];
 480                 if (mask) {
 481                         error = cpuset_setthread(td->td_tid, mask);
 482                         /*
 483                          * Failing to pin is rarely an actual fatal error;
 484                          * it'll just affect performance.
 485                          */
 486                         if (error)
 487                                 printf("%s: curthread=%llu: can't pin; "
 488                                     "error=%d\n",
 489                                     __func__,
 490                                     (unsigned long long) td->td_tid,
 491                                     error);
 492                 }
 493                 thread_lock(td);
 494                 sched_prio(td, pri);
 495                 sched_add(td, SRQ_BORING);
 496                 thread_unlock(td);
 497         }
 498
 499         return (0);
 500 }
 501
 502 static int
 503 gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
 504     const char *name, ...)
 505 {
 506         va_list ap;
 507         int error;
 508
 509         va_start(ap, name);
 510         error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap);
 511         va_end(ap);
 512         return (error);
 513 }
 514
 515 static inline void
 516 gtaskqueue_run_callback(struct gtaskqueue *tq,
 517     enum taskqueue_callback_type cb_type)
 518 {
 519         taskqueue_callback_fn tq_callback;
 520
 521         TQ_ASSERT_UNLOCKED(tq);
 522         tq_callback = tq->tq_callbacks[cb_type];
 523         if (tq_callback != NULL)
 524                 tq_callback(tq->tq_cb_contexts[cb_type]);
 525 }
 526
 527 static void
 528 gtaskqueue_thread_loop(void *arg)
 529 {
 530         struct gtaskqueue **tqp, *tq;
 531
 532         tqp = arg;
 533         tq = *tqp;
 534         gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
 535         TQ_LOCK(tq);
 536         while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
 537                 /* XXX ? */
 538                 gtaskqueue_run_locked(tq);
 539                 /*
 540                  * Because taskqueue_run() can drop tq_mutex, we need to
 541                  * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
 542                  * meantime, which means we missed a wakeup.
 543                  */
 544                 if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
 545                         break;
 546                 TQ_SLEEP(tq, tq, "-");
 547         }
 548         gtaskqueue_run_locked(tq);
 549         /*
 550          * This thread is on its way out, so just drop the lock temporarily
 551          * in order to call the shutdown callback.  This allows the callback
 552          * to look at the taskqueue, even just before it dies.
 553          */
 554         TQ_UNLOCK(tq);
 555         gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
 556         TQ_LOCK(tq);
 557
 558         /* rendezvous with thread that asked us to terminate */
 559         tq->tq_tcount--;
 560         wakeup_one(tq->tq_threads);
 561         TQ_UNLOCK(tq);
 562         kthread_exit();
 563 }
 564
 565 static void
 566 gtaskqueue_thread_enqueue(void *context)
 567 {
 568         struct gtaskqueue **tqp, *tq;
 569
 570         tqp = context;
 571         tq = *tqp;
 572         wakeup_any(tq);
 573 }
 574
 575
 576 static struct gtaskqueue *
 577 gtaskqueue_create_fast(const char *name, int mflags,
 578                  taskqueue_enqueue_fn enqueue, void *context)
 579 {
 580         return _gtaskqueue_create(name, mflags, enqueue, context,
 581                         MTX_SPIN, "fast_taskqueue");
 582 }
 583
 584
 585 struct taskqgroup_cpu {
 586         LIST_HEAD(, grouptask)  tgc_tasks;
 587         struct gtaskqueue       *tgc_taskq;
 588         int     tgc_cnt;
 589         int     tgc_cpu;
 590 };
 591
 592 struct taskqgroup {
 593         struct taskqgroup_cpu tqg_queue[MAXCPU];
 594         struct mtx      tqg_lock;
 595         const char *    tqg_name;
 596         int             tqg_adjusting;
 597         int             tqg_stride;
 598         int             tqg_cnt;
 599 };
 600
 601 struct taskq_bind_task {
 602         struct gtask bt_task;
 603         int     bt_cpuid;
 604 };
 605
 606 static void
 607 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu)
 608 {
 609         struct taskqgroup_cpu *qcpu;
 610
 611         qcpu = &qgroup->tqg_queue[idx];
 612         LIST_INIT(&qcpu->tgc_tasks);
 613         qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK,
 614             taskqueue_thread_enqueue, &qcpu->tgc_taskq);
 615         gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
 616             "%s_%d", qgroup->tqg_name, idx);
 617         qcpu->tgc_cpu = cpu;
 618 }
 619
 620 static void
 621 taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
 622 {
 623
 624         gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
 625 }
 626
 627 /*
 628  * Find the taskq with least # of tasks that doesn't currently have any
 629  * other queues from the uniq identifier.
 630  */
 631 static int
 632 taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
 633 {
 634         struct grouptask *n;
 635         int i, idx, mincnt;
 636         int strict;
 637
 638         mtx_assert(&qgroup->tqg_lock, MA_OWNED);
 639         if (qgroup->tqg_cnt == 0)
 640                 return (0);
 641         idx = -1;
 642         mincnt = INT_MAX;
 643         /*
 644          * Two passes;  First scan for a queue with the least tasks that
 645          * does not already service this uniq id.  If that fails simply find
 646          * the queue with the least total tasks;
 647          */
 648         for (strict = 1; mincnt == INT_MAX; strict = 0) {
 649                 for (i = 0; i < qgroup->tqg_cnt; i++) {
 650                         if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
 651                                 continue;
 652                         if (strict) {
 653                                 LIST_FOREACH(n,
 654                                     &qgroup->tqg_queue[i].tgc_tasks, gt_list)
 655                                         if (n->gt_uniq == uniq)
 656                                                 break;
 657                                 if (n != NULL)
 658                                         continue;
 659                         }
 660                         mincnt = qgroup->tqg_queue[i].tgc_cnt;
 661                         idx = i;
 662                 }
 663         }
 664         if (idx == -1)
 665                 panic("%s: failed to pick a qid.", __func__);
 666
 667         return (idx);
 668 }
 669
 670 /*
 671  * smp_started is unusable since it is not set for UP kernels or even for
 672  * SMP kernels when there is 1 CPU.  This is usually handled by adding a
 673  * (mp_ncpus == 1) test, but that would be broken here since we need to
 674  * to synchronize with the SI_SUB_SMP ordering.  Even in the pure SMP case
 675  * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP.
 676  *
 677  * So maintain our own flag.  It must be set after all CPUs are started
 678  * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed
 679  * adjustment is properly delayed.  SI_ORDER_FOURTH is clearly before
 680  * SI_ORDER_ANY and unclearly after the CPUs are started.  It would be
 681  * simpler for adjustment to pass a flag indicating if it is delayed.
 682  */
 683
 684 static int tqg_smp_started;
 685
 686 static void
 687 tqg_record_smp_started(void *arg)
 688 {
 689         tqg_smp_started = 1;
 690 }
 691
 692 SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
 693         tqg_record_smp_started, NULL);
 694
 695 void
 696 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
 697     void *uniq, int irq, const char *name)
 698 {
 699         cpuset_t mask;
 700         int qid, error;
 701
 702         gtask->gt_uniq = uniq;
 703         snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
 704         gtask->gt_irq = irq;
 705         gtask->gt_cpu = -1;
 706         mtx_lock(&qgroup->tqg_lock);
 707         qid = taskqgroup_find(qgroup, uniq);
 708         qgroup->tqg_queue[qid].tgc_cnt++;
 709         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
 710         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 711         if (irq != -1 && tqg_smp_started) {
 712                 gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu;
 713                 CPU_ZERO(&mask);
 714                 CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
 715                 mtx_unlock(&qgroup->tqg_lock);
 716                 error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
 717                 if (error)
 718                         printf("%s: binding interrupt failed for %s: %d\n",
 719                             __func__, gtask->gt_name, error);
 720         } else
 721                 mtx_unlock(&qgroup->tqg_lock);
 722 }
 723
 724 static void
 725 taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
 726 {
 727         cpuset_t mask;
 728         int qid, cpu, error;
 729
 730         mtx_lock(&qgroup->tqg_lock);
 731         qid = taskqgroup_find(qgroup, gtask->gt_uniq);
 732         cpu = qgroup->tqg_queue[qid].tgc_cpu;
 733         if (gtask->gt_irq != -1) {
 734                 mtx_unlock(&qgroup->tqg_lock);
 735
 736                 CPU_ZERO(&mask);
 737                 CPU_SET(cpu, &mask);
 738                 error = intr_setaffinity(gtask->gt_irq, CPU_WHICH_IRQ, &mask);
 739                 mtx_lock(&qgroup->tqg_lock);
 740                 if (error)
 741                         printf("%s: binding interrupt failed for %s: %d\n",
 742                             __func__, gtask->gt_name, error);
 743
 744         }
 745         qgroup->tqg_queue[qid].tgc_cnt++;
 746         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
 747         MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
 748         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 749         mtx_unlock(&qgroup->tqg_lock);
 750 }
 751
 752 int
 753 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
 754     void *uniq, int cpu, int irq, const char *name)
 755 {
 756         cpuset_t mask;
 757         int i, qid, error;
 758
 759         qid = -1;
 760         gtask->gt_uniq = uniq;
 761         snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
 762         gtask->gt_irq = irq;
 763         gtask->gt_cpu = cpu;
 764         mtx_lock(&qgroup->tqg_lock);
 765         if (tqg_smp_started) {
 766                 for (i = 0; i < qgroup->tqg_cnt; i++)
 767                         if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
 768                                 qid = i;
 769                                 break;
 770                         }
 771                 if (qid == -1) {
 772                         mtx_unlock(&qgroup->tqg_lock);
 773                         printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
 774                         return (EINVAL);
 775                 }
 776         } else
 777                 qid = 0;
 778         qgroup->tqg_queue[qid].tgc_cnt++;
 779         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
 780         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 781         cpu = qgroup->tqg_queue[qid].tgc_cpu;
 782         mtx_unlock(&qgroup->tqg_lock);
 783
 784         CPU_ZERO(&mask);
 785         CPU_SET(cpu, &mask);
 786         if (irq != -1 && tqg_smp_started) {
 787                 error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
 788                 if (error)
 789                         printf("%s: binding interrupt failed for %s: %d\n",
 790                             __func__, gtask->gt_name, error);
 791         }
 792         return (0);
 793 }
 794
 795 static int
 796 taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
 797 {
 798         cpuset_t mask;
 799         int i, qid, irq, cpu, error;
 800
 801         qid = -1;
 802         irq = gtask->gt_irq;
 803         cpu = gtask->gt_cpu;
 804         MPASS(tqg_smp_started);
 805         mtx_lock(&qgroup->tqg_lock);
 806         for (i = 0; i < qgroup->tqg_cnt; i++)
 807                 if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
 808                         qid = i;
 809                         break;
 810                 }
 811         if (qid == -1) {
 812                 mtx_unlock(&qgroup->tqg_lock);
 813                 printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
 814                 return (EINVAL);
 815         }
 816         qgroup->tqg_queue[qid].tgc_cnt++;
 817         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
 818         MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
 819         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 820         mtx_unlock(&qgroup->tqg_lock);
 821
 822         CPU_ZERO(&mask);
 823         CPU_SET(cpu, &mask);
 824
 825         if (irq != -1) {
 826                 error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
 827                 if (error)
 828                         printf("%s: binding interrupt failed for %s: %d\n",
 829                             __func__, gtask->gt_name, error);
 830         }
 831         return (0);
 832 }
 833
 834 void
 835 taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
 836 {
 837         int i;
 838
 839         grouptask_block(gtask);
 840         mtx_lock(&qgroup->tqg_lock);
 841         for (i = 0; i < qgroup->tqg_cnt; i++)
 842                 if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
 843                         break;
 844         if (i == qgroup->tqg_cnt)
 845                 panic("%s: task %s not in group", __func__, gtask->gt_name);
 846         qgroup->tqg_queue[i].tgc_cnt--;
 847         LIST_REMOVE(gtask, gt_list);
 848         mtx_unlock(&qgroup->tqg_lock);
 849         gtask->gt_taskqueue = NULL;
 850         gtask->gt_task.ta_flags &= ~TASK_NOENQUEUE;
 851 }
 852
 853 static void
 854 taskqgroup_binder(void *ctx)
 855 {
 856         struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
 857         cpuset_t mask;
 858         int error;
 859
 860         CPU_ZERO(&mask);
 861         CPU_SET(gtask->bt_cpuid, &mask);
 862         error = cpuset_setthread(curthread->td_tid, &mask);
 863         thread_lock(curthread);
 864         sched_bind(curthread, gtask->bt_cpuid);
 865         thread_unlock(curthread);
 866
 867         if (error)
 868                 printf("%s: binding curthread failed: %d\n", __func__, error);
 869         free(gtask, M_DEVBUF);
 870 }
 871
 872 static void
 873 taskqgroup_bind(struct taskqgroup *qgroup)
 874 {
 875         struct taskq_bind_task *gtask;
 876         int i;
 877
 878         /*
 879          * Bind taskqueue threads to specific CPUs, if they have been assigned
 880          * one.
 881          */
 882         if (qgroup->tqg_cnt == 1)
 883                 return;
 884
 885         for (i = 0; i < qgroup->tqg_cnt; i++) {
 886                 gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK);
 887                 GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
 888                 gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
 889                 grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
 890                     &gtask->bt_task);
 891         }
 892 }
 893
 894 static void
 895 taskqgroup_config_init(void *arg)
 896 {
 897         struct taskqgroup *qgroup = qgroup_config;
 898         LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
 899
 900         LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
 901             grouptask, gt_list);
 902         qgroup->tqg_queue[0].tgc_cnt = 0;
 903         taskqgroup_cpu_create(qgroup, 0, 0);
 904
 905         qgroup->tqg_cnt = 1;
 906         qgroup->tqg_stride = 1;
 907 }
 908
 909 SYSINIT(taskqgroup_config_init, SI_SUB_TASKQ, SI_ORDER_SECOND,
 910         taskqgroup_config_init, NULL);
 911
 912 static int
 913 _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
 914 {
 915         LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
 916         struct grouptask *gtask;
 917         int i, k, old_cnt, old_cpu, cpu;
 918
 919         mtx_assert(&qgroup->tqg_lock, MA_OWNED);
 920
 921         if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) {
 922                 printf("%s: failed cnt: %d stride: %d "
 923                     "mp_ncpus: %d tqg_smp_started: %d\n",
 924                     __func__, cnt, stride, mp_ncpus, tqg_smp_started);
 925                 return (EINVAL);
 926         }
 927         if (qgroup->tqg_adjusting) {
 928                 printf("%s failed: adjusting\n", __func__);
 929                 return (EBUSY);
 930         }
 931         qgroup->tqg_adjusting = 1;
 932         old_cnt = qgroup->tqg_cnt;
 933         old_cpu = 0;
 934         if (old_cnt < cnt)
 935                 old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu;
 936         mtx_unlock(&qgroup->tqg_lock);
 937         /*
 938          * Set up queue for tasks added before boot.
 939          */
 940         if (old_cnt == 0) {
 941                 LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
 942                     grouptask, gt_list);
 943                 qgroup->tqg_queue[0].tgc_cnt = 0;
 944         }
 945
 946         /*
 947          * If new taskq threads have been added.
 948          */
 949         cpu = old_cpu;
 950         for (i = old_cnt; i < cnt; i++) {
 951                 taskqgroup_cpu_create(qgroup, i, cpu);
 952
 953                 for (k = 0; k < stride; k++)
 954                         cpu = CPU_NEXT(cpu);
 955         }
 956         mtx_lock(&qgroup->tqg_lock);
 957         qgroup->tqg_cnt = cnt;
 958         qgroup->tqg_stride = stride;
 959
 960         /*
 961          * Adjust drivers to use new taskqs.
 962          */
 963         for (i = 0; i < old_cnt; i++) {
 964                 while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
 965                         LIST_REMOVE(gtask, gt_list);
 966                         qgroup->tqg_queue[i].tgc_cnt--;
 967                         LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
 968                 }
 969         }
 970         mtx_unlock(&qgroup->tqg_lock);
 971
 972         while ((gtask = LIST_FIRST(&gtask_head))) {
 973                 LIST_REMOVE(gtask, gt_list);
 974                 if (gtask->gt_cpu == -1)
 975                         taskqgroup_attach_deferred(qgroup, gtask);
 976                 else if (taskqgroup_attach_cpu_deferred(qgroup, gtask))
 977                         taskqgroup_attach_deferred(qgroup, gtask);
 978         }
 979
 980 #ifdef INVARIANTS
 981         mtx_lock(&qgroup->tqg_lock);
 982         for (i = 0; i < qgroup->tqg_cnt; i++) {
 983                 MPASS(qgroup->tqg_queue[i].tgc_taskq != NULL);
 984                 LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list)
 985                         MPASS(gtask->gt_taskqueue != NULL);
 986         }
 987         mtx_unlock(&qgroup->tqg_lock);
 988 #endif
 989         /*
 990          * If taskq thread count has been reduced.
 991          */
 992         for (i = cnt; i < old_cnt; i++)
 993                 taskqgroup_cpu_remove(qgroup, i);
 994
 995         taskqgroup_bind(qgroup);
 996
 997         mtx_lock(&qgroup->tqg_lock);
 998         qgroup->tqg_adjusting = 0;
 999
1000         return (0);
1001 }
1002
1003 int
1004 taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
1005 {
1006         int error;
1007
1008         mtx_lock(&qgroup->tqg_lock);
1009         error = _taskqgroup_adjust(qgroup, cnt, stride);
1010         mtx_unlock(&qgroup->tqg_lock);
1011
1012         return (error);
1013 }
1014
1015 struct taskqgroup *
1016 taskqgroup_create(const char *name)
1017 {
1018         struct taskqgroup *qgroup;
1019
1020         qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO);
1021         mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
1022         qgroup->tqg_name = name;
1023         LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
1024
1025         return (qgroup);
1026 }
1027
1028 void
1029 taskqgroup_destroy(struct taskqgroup *qgroup)
1030 {
1031
1032 }
1033
1034 void
1035 taskqgroup_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn,
1036     const char *name)
1037 {
1038
1039         GROUPTASK_INIT(gtask, 0, fn, ctx);
1040         taskqgroup_attach(qgroup_config, gtask, gtask, -1, name);
1041 }
1042
1043 void
1044 taskqgroup_config_gtask_deinit(struct grouptask *gtask)
1045 {
1046
1047         taskqgroup_detach(qgroup_config, gtask);
1048 }