sys/kern/subr_gtaskqueue.c

   1 /*-
   2  * Copyright (c) 2000 Doug Rabson
   3  * Copyright (c) 2014 Jeff Roberson
   4  * Copyright (c) 2016 Matthew Macy
   5  * All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  *
  16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26  * SUCH DAMAGE.
  27  */
  28
  29 #include <sys/cdefs.h>
  30 __FBSDID("$FreeBSD$");
  31
  32 #include <sys/param.h>
  33 #include <sys/systm.h>
  34 #include <sys/bus.h>
  35 #include <sys/cpuset.h>
  36 #include <sys/kernel.h>
  37 #include <sys/kthread.h>
  38 #include <sys/libkern.h>
  39 #include <sys/limits.h>
  40 #include <sys/lock.h>
  41 #include <sys/malloc.h>
  42 #include <sys/mutex.h>
  43 #include <sys/proc.h>
  44 #include <sys/sched.h>
  45 #include <sys/smp.h>
  46 #include <sys/gtaskqueue.h>
  47 #include <sys/unistd.h>
  48 #include <machine/stdarg.h>
  49
  50 static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues");
  51 static void     gtaskqueue_thread_enqueue(void *);
  52 static void     gtaskqueue_thread_loop(void *arg);
  53 static int      task_is_running(struct gtaskqueue *queue, struct gtask *gtask);
  54 static void     gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask);
  55
  56 TASKQGROUP_DEFINE(softirq, mp_ncpus, 1);
  57 TASKQGROUP_DEFINE(config, 1, 1);
  58
  59 struct gtaskqueue_busy {
  60         struct gtask            *tb_running;
  61         u_int                    tb_seq;
  62         LIST_ENTRY(gtaskqueue_busy) tb_link;
  63 };
  64
  65 typedef void (*gtaskqueue_enqueue_fn)(void *context);
  66
  67 struct gtaskqueue {
  68         STAILQ_HEAD(, gtask)    tq_queue;
  69         LIST_HEAD(, gtaskqueue_busy) tq_active;
  70         u_int                   tq_seq;
  71         int                     tq_callouts;
  72         struct mtx_padalign     tq_mutex;
  73         gtaskqueue_enqueue_fn   tq_enqueue;
  74         void                    *tq_context;
  75         char                    *tq_name;
  76         struct thread           **tq_threads;
  77         int                     tq_tcount;
  78         int                     tq_spin;
  79         int                     tq_flags;
  80         taskqueue_callback_fn   tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
  81         void                    *tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
  82 };
  83
  84 #define TQ_FLAGS_ACTIVE         (1 << 0)
  85 #define TQ_FLAGS_BLOCKED        (1 << 1)
  86 #define TQ_FLAGS_UNLOCKED_ENQUEUE       (1 << 2)
  87
  88 #define DT_CALLOUT_ARMED        (1 << 0)
  89
  90 #define TQ_LOCK(tq)                                                     \
  91         do {                                                            \
  92                 if ((tq)->tq_spin)                                      \
  93                         mtx_lock_spin(&(tq)->tq_mutex);                 \
  94                 else                                                    \
  95                         mtx_lock(&(tq)->tq_mutex);                      \
  96         } while (0)
  97 #define TQ_ASSERT_LOCKED(tq)    mtx_assert(&(tq)->tq_mutex, MA_OWNED)
  98
  99 #define TQ_UNLOCK(tq)                                                   \
 100         do {                                                            \
 101                 if ((tq)->tq_spin)                                      \
 102                         mtx_unlock_spin(&(tq)->tq_mutex);               \
 103                 else                                                    \
 104                         mtx_unlock(&(tq)->tq_mutex);                    \
 105         } while (0)
 106 #define TQ_ASSERT_UNLOCKED(tq)  mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
 107
 108 #ifdef INVARIANTS
 109 static void
 110 gtask_dump(struct gtask *gtask)
 111 {
 112         printf("gtask: %p ta_flags=%x ta_priority=%d ta_func=%p ta_context=%p\n",
 113                gtask, gtask->ta_flags, gtask->ta_priority, gtask->ta_func, gtask->ta_context);
 114 }
 115 #endif
 116
 117 static __inline int
 118 TQ_SLEEP(struct gtaskqueue *tq, void *p, const char *wm)
 119 {
 120         if (tq->tq_spin)
 121                 return (msleep_spin(p, (struct mtx *)&tq->tq_mutex, wm, 0));
 122         return (msleep(p, &tq->tq_mutex, 0, wm, 0));
 123 }
 124
 125 static struct gtaskqueue *
 126 _gtaskqueue_create(const char *name, int mflags,
 127                  taskqueue_enqueue_fn enqueue, void *context,
 128                  int mtxflags, const char *mtxname __unused)
 129 {
 130         struct gtaskqueue *queue;
 131         char *tq_name;
 132
 133         tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO);
 134         if (!tq_name)
 135                 return (NULL);
 136
 137         snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
 138
 139         queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO);
 140         if (!queue) {
 141                 free(tq_name, M_GTASKQUEUE);
 142                 return (NULL);
 143         }
 144
 145         STAILQ_INIT(&queue->tq_queue);
 146         LIST_INIT(&queue->tq_active);
 147         queue->tq_enqueue = enqueue;
 148         queue->tq_context = context;
 149         queue->tq_name = tq_name;
 150         queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
 151         queue->tq_flags |= TQ_FLAGS_ACTIVE;
 152         if (enqueue == gtaskqueue_thread_enqueue)
 153                 queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
 154         mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
 155
 156         return (queue);
 157 }
 158
 159 /*
 160  * Signal a taskqueue thread to terminate.
 161  */
 162 static void
 163 gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq)
 164 {
 165
 166         while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
 167                 wakeup(tq);
 168                 TQ_SLEEP(tq, pp, "gtq_destroy");
 169         }
 170 }
 171
 172 static void
 173 gtaskqueue_free(struct gtaskqueue *queue)
 174 {
 175
 176         TQ_LOCK(queue);
 177         queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
 178         gtaskqueue_terminate(queue->tq_threads, queue);
 179         KASSERT(LIST_EMPTY(&queue->tq_active), ("Tasks still running?"));
 180         KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
 181         mtx_destroy(&queue->tq_mutex);
 182         free(queue->tq_threads, M_GTASKQUEUE);
 183         free(queue->tq_name, M_GTASKQUEUE);
 184         free(queue, M_GTASKQUEUE);
 185 }
 186
 187 /*
 188  * Wait for all to complete, then prevent it from being enqueued
 189  */
 190 void
 191 grouptask_block(struct grouptask *grouptask)
 192 {
 193         struct gtaskqueue *queue = grouptask->gt_taskqueue;
 194         struct gtask *gtask = &grouptask->gt_task;
 195
 196 #ifdef INVARIANTS
 197         if (queue == NULL) {
 198                 gtask_dump(gtask);
 199                 panic("queue == NULL");
 200         }
 201 #endif
 202         TQ_LOCK(queue);
 203         gtask->ta_flags |= TASK_NOENQUEUE;
 204         gtaskqueue_drain_locked(queue, gtask);
 205         TQ_UNLOCK(queue);
 206 }
 207
 208 void
 209 grouptask_unblock(struct grouptask *grouptask)
 210 {
 211         struct gtaskqueue *queue = grouptask->gt_taskqueue;
 212         struct gtask *gtask = &grouptask->gt_task;
 213
 214 #ifdef INVARIANTS
 215         if (queue == NULL) {
 216                 gtask_dump(gtask);
 217                 panic("queue == NULL");
 218         }
 219 #endif
 220         TQ_LOCK(queue);
 221         gtask->ta_flags &= ~TASK_NOENQUEUE;
 222         TQ_UNLOCK(queue);
 223 }
 224
 225 int
 226 grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
 227 {
 228 #ifdef INVARIANTS
 229         if (queue == NULL) {
 230                 gtask_dump(gtask);
 231                 panic("queue == NULL");
 232         }
 233 #endif
 234         TQ_LOCK(queue);
 235         if (gtask->ta_flags & TASK_ENQUEUED) {
 236                 TQ_UNLOCK(queue);
 237                 return (0);
 238         }
 239         if (gtask->ta_flags & TASK_NOENQUEUE) {
 240                 TQ_UNLOCK(queue);
 241                 return (EAGAIN);
 242         }
 243         STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link);
 244         gtask->ta_flags |= TASK_ENQUEUED;
 245         TQ_UNLOCK(queue);
 246         if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
 247                 queue->tq_enqueue(queue->tq_context);
 248         return (0);
 249 }
 250
 251 static void
 252 gtaskqueue_task_nop_fn(void *context)
 253 {
 254 }
 255
 256 /*
 257  * Block until all currently queued tasks in this taskqueue
 258  * have begun execution.  Tasks queued during execution of
 259  * this function are ignored.
 260  */
 261 static void
 262 gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
 263 {
 264         struct gtask t_barrier;
 265
 266         if (STAILQ_EMPTY(&queue->tq_queue))
 267                 return;
 268
 269         /*
 270          * Enqueue our barrier after all current tasks, but with
 271          * the highest priority so that newly queued tasks cannot
 272          * pass it.  Because of the high priority, we can not use
 273          * taskqueue_enqueue_locked directly (which drops the lock
 274          * anyway) so just insert it at tail while we have the
 275          * queue lock.
 276          */
 277         GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier);
 278         STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
 279         t_barrier.ta_flags |= TASK_ENQUEUED;
 280
 281         /*
 282          * Once the barrier has executed, all previously queued tasks
 283          * have completed or are currently executing.
 284          */
 285         while (t_barrier.ta_flags & TASK_ENQUEUED)
 286                 TQ_SLEEP(queue, &t_barrier, "gtq_qdrain");
 287 }
 288
 289 /*
 290  * Block until all currently executing tasks for this taskqueue
 291  * complete.  Tasks that begin execution during the execution
 292  * of this function are ignored.
 293  */
 294 static void
 295 gtaskqueue_drain_tq_active(struct gtaskqueue *queue)
 296 {
 297         struct gtaskqueue_busy *tb;
 298         u_int seq;
 299
 300         if (LIST_EMPTY(&queue->tq_active))
 301                 return;
 302
 303         /* Block taskq_terminate().*/
 304         queue->tq_callouts++;
 305
 306         /* Wait for any active task with sequence from the past. */
 307         seq = queue->tq_seq;
 308 restart:
 309         LIST_FOREACH(tb, &queue->tq_active, tb_link) {
 310                 if ((int)(tb->tb_seq - seq) <= 0) {
 311                         TQ_SLEEP(queue, tb->tb_running, "gtq_adrain");
 312                         goto restart;
 313                 }
 314         }
 315
 316         /* Release taskqueue_terminate(). */
 317         queue->tq_callouts--;
 318         if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
 319                 wakeup_one(queue->tq_threads);
 320 }
 321
 322 void
 323 gtaskqueue_block(struct gtaskqueue *queue)
 324 {
 325
 326         TQ_LOCK(queue);
 327         queue->tq_flags |= TQ_FLAGS_BLOCKED;
 328         TQ_UNLOCK(queue);
 329 }
 330
 331 void
 332 gtaskqueue_unblock(struct gtaskqueue *queue)
 333 {
 334
 335         TQ_LOCK(queue);
 336         queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
 337         if (!STAILQ_EMPTY(&queue->tq_queue))
 338                 queue->tq_enqueue(queue->tq_context);
 339         TQ_UNLOCK(queue);
 340 }
 341
 342 static void
 343 gtaskqueue_run_locked(struct gtaskqueue *queue)
 344 {
 345         struct gtaskqueue_busy tb;
 346         struct gtask *gtask;
 347
 348         KASSERT(queue != NULL, ("tq is NULL"));
 349         TQ_ASSERT_LOCKED(queue);
 350         tb.tb_running = NULL;
 351         LIST_INSERT_HEAD(&queue->tq_active, &tb, tb_link);
 352
 353         while ((gtask = STAILQ_FIRST(&queue->tq_queue)) != NULL) {
 354                 STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
 355                 gtask->ta_flags &= ~TASK_ENQUEUED;
 356                 tb.tb_running = gtask;
 357                 tb.tb_seq = ++queue->tq_seq;
 358                 TQ_UNLOCK(queue);
 359
 360                 KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL"));
 361                 gtask->ta_func(gtask->ta_context);
 362
 363                 TQ_LOCK(queue);
 364                 wakeup(gtask);
 365         }
 366         LIST_REMOVE(&tb, tb_link);
 367 }
 368
 369 static int
 370 task_is_running(struct gtaskqueue *queue, struct gtask *gtask)
 371 {
 372         struct gtaskqueue_busy *tb;
 373
 374         TQ_ASSERT_LOCKED(queue);
 375         LIST_FOREACH(tb, &queue->tq_active, tb_link) {
 376                 if (tb->tb_running == gtask)
 377                         return (1);
 378         }
 379         return (0);
 380 }
 381
 382 static int
 383 gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask)
 384 {
 385
 386         if (gtask->ta_flags & TASK_ENQUEUED)
 387                 STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link);
 388         gtask->ta_flags &= ~TASK_ENQUEUED;
 389         return (task_is_running(queue, gtask) ? EBUSY : 0);
 390 }
 391
 392 int
 393 gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask)
 394 {
 395         int error;
 396
 397         TQ_LOCK(queue);
 398         error = gtaskqueue_cancel_locked(queue, gtask);
 399         TQ_UNLOCK(queue);
 400
 401         return (error);
 402 }
 403
 404 static void
 405 gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask)
 406 {
 407         while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask))
 408                 TQ_SLEEP(queue, gtask, "gtq_drain");
 409 }
 410
 411 void
 412 gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask)
 413 {
 414
 415         if (!queue->tq_spin)
 416                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
 417
 418         TQ_LOCK(queue);
 419         gtaskqueue_drain_locked(queue, gtask);
 420         TQ_UNLOCK(queue);
 421 }
 422
 423 void
 424 gtaskqueue_drain_all(struct gtaskqueue *queue)
 425 {
 426
 427         if (!queue->tq_spin)
 428                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
 429
 430         TQ_LOCK(queue);
 431         gtaskqueue_drain_tq_queue(queue);
 432         gtaskqueue_drain_tq_active(queue);
 433         TQ_UNLOCK(queue);
 434 }
 435
 436 static int
 437 _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
 438     cpuset_t *mask, const char *name, va_list ap)
 439 {
 440         char ktname[MAXCOMLEN + 1];
 441         struct thread *td;
 442         struct gtaskqueue *tq;
 443         int i, error;
 444
 445         if (count <= 0)
 446                 return (EINVAL);
 447
 448         vsnprintf(ktname, sizeof(ktname), name, ap);
 449         tq = *tqp;
 450
 451         tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE,
 452             M_NOWAIT | M_ZERO);
 453         if (tq->tq_threads == NULL) {
 454                 printf("%s: no memory for %s threads\n", __func__, ktname);
 455                 return (ENOMEM);
 456         }
 457
 458         for (i = 0; i < count; i++) {
 459                 if (count == 1)
 460                         error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
 461                             &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
 462                 else
 463                         error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
 464                             &tq->tq_threads[i], RFSTOPPED, 0,
 465                             "%s_%d", ktname, i);
 466                 if (error) {
 467                         /* should be ok to continue, taskqueue_free will dtrt */
 468                         printf("%s: kthread_add(%s): error %d", __func__,
 469                             ktname, error);
 470                         tq->tq_threads[i] = NULL;               /* paranoid */
 471                 } else
 472                         tq->tq_tcount++;
 473         }
 474         for (i = 0; i < count; i++) {
 475                 if (tq->tq_threads[i] == NULL)
 476                         continue;
 477                 td = tq->tq_threads[i];
 478                 if (mask) {
 479                         error = cpuset_setthread(td->td_tid, mask);
 480                         /*
 481                          * Failing to pin is rarely an actual fatal error;
 482                          * it'll just affect performance.
 483                          */
 484                         if (error)
 485                                 printf("%s: curthread=%llu: can't pin; "
 486                                     "error=%d\n",
 487                                     __func__,
 488                                     (unsigned long long) td->td_tid,
 489                                     error);
 490                 }
 491                 thread_lock(td);
 492                 sched_prio(td, pri);
 493                 sched_add(td, SRQ_BORING);
 494         }
 495
 496         return (0);
 497 }
 498
 499 static int
 500 gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
 501     const char *name, ...)
 502 {
 503         va_list ap;
 504         int error;
 505
 506         va_start(ap, name);
 507         error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap);
 508         va_end(ap);
 509         return (error);
 510 }
 511
 512 static inline void
 513 gtaskqueue_run_callback(struct gtaskqueue *tq,
 514     enum taskqueue_callback_type cb_type)
 515 {
 516         taskqueue_callback_fn tq_callback;
 517
 518         TQ_ASSERT_UNLOCKED(tq);
 519         tq_callback = tq->tq_callbacks[cb_type];
 520         if (tq_callback != NULL)
 521                 tq_callback(tq->tq_cb_contexts[cb_type]);
 522 }
 523
 524 static void
 525 gtaskqueue_thread_loop(void *arg)
 526 {
 527         struct gtaskqueue **tqp, *tq;
 528
 529         tqp = arg;
 530         tq = *tqp;
 531         gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
 532         TQ_LOCK(tq);
 533         while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
 534                 /* XXX ? */
 535                 gtaskqueue_run_locked(tq);
 536                 /*
 537                  * Because taskqueue_run() can drop tq_mutex, we need to
 538                  * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
 539                  * meantime, which means we missed a wakeup.
 540                  */
 541                 if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
 542                         break;
 543                 TQ_SLEEP(tq, tq, "-");
 544         }
 545         gtaskqueue_run_locked(tq);
 546         /*
 547          * This thread is on its way out, so just drop the lock temporarily
 548          * in order to call the shutdown callback.  This allows the callback
 549          * to look at the taskqueue, even just before it dies.
 550          */
 551         TQ_UNLOCK(tq);
 552         gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
 553         TQ_LOCK(tq);
 554
 555         /* rendezvous with thread that asked us to terminate */
 556         tq->tq_tcount--;
 557         wakeup_one(tq->tq_threads);
 558         TQ_UNLOCK(tq);
 559         kthread_exit();
 560 }
 561
 562 static void
 563 gtaskqueue_thread_enqueue(void *context)
 564 {
 565         struct gtaskqueue **tqp, *tq;
 566
 567         tqp = context;
 568         tq = *tqp;
 569         wakeup_any(tq);
 570 }
 571
 572 static struct gtaskqueue *
 573 gtaskqueue_create_fast(const char *name, int mflags,
 574                  taskqueue_enqueue_fn enqueue, void *context)
 575 {
 576         return _gtaskqueue_create(name, mflags, enqueue, context,
 577                         MTX_SPIN, "fast_taskqueue");
 578 }
 579
 580 struct taskqgroup_cpu {
 581         LIST_HEAD(, grouptask)  tgc_tasks;
 582         struct gtaskqueue       *tgc_taskq;
 583         int     tgc_cnt;
 584         int     tgc_cpu;
 585 };
 586
 587 struct taskqgroup {
 588         struct taskqgroup_cpu tqg_queue[MAXCPU];
 589         struct mtx      tqg_lock;
 590         const char *    tqg_name;
 591         int             tqg_adjusting;
 592         int             tqg_stride;
 593         int             tqg_cnt;
 594 };
 595
 596 struct taskq_bind_task {
 597         struct gtask bt_task;
 598         int     bt_cpuid;
 599 };
 600
 601 static void
 602 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu)
 603 {
 604         struct taskqgroup_cpu *qcpu;
 605
 606         qcpu = &qgroup->tqg_queue[idx];
 607         LIST_INIT(&qcpu->tgc_tasks);
 608         qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK,
 609             taskqueue_thread_enqueue, &qcpu->tgc_taskq);
 610         gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
 611             "%s_%d", qgroup->tqg_name, idx);
 612         qcpu->tgc_cpu = cpu;
 613 }
 614
 615 static void
 616 taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
 617 {
 618
 619         gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
 620 }
 621
 622 /*
 623  * Find the taskq with least # of tasks that doesn't currently have any
 624  * other queues from the uniq identifier.
 625  */
 626 static int
 627 taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
 628 {
 629         struct grouptask *n;
 630         int i, idx, mincnt;
 631         int strict;
 632
 633         mtx_assert(&qgroup->tqg_lock, MA_OWNED);
 634         if (qgroup->tqg_cnt == 0)
 635                 return (0);
 636         idx = -1;
 637         mincnt = INT_MAX;
 638         /*
 639          * Two passes;  First scan for a queue with the least tasks that
 640          * does not already service this uniq id.  If that fails simply find
 641          * the queue with the least total tasks;
 642          */
 643         for (strict = 1; mincnt == INT_MAX; strict = 0) {
 644                 for (i = 0; i < qgroup->tqg_cnt; i++) {
 645                         if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
 646                                 continue;
 647                         if (strict) {
 648                                 LIST_FOREACH(n,
 649                                     &qgroup->tqg_queue[i].tgc_tasks, gt_list)
 650                                         if (n->gt_uniq == uniq)
 651                                                 break;
 652                                 if (n != NULL)
 653                                         continue;
 654                         }
 655                         mincnt = qgroup->tqg_queue[i].tgc_cnt;
 656                         idx = i;
 657                 }
 658         }
 659         if (idx == -1)
 660                 panic("%s: failed to pick a qid.", __func__);
 661
 662         return (idx);
 663 }
 664
 665 /*
 666  * smp_started is unusable since it is not set for UP kernels or even for
 667  * SMP kernels when there is 1 CPU.  This is usually handled by adding a
 668  * (mp_ncpus == 1) test, but that would be broken here since we need to
 669  * to synchronize with the SI_SUB_SMP ordering.  Even in the pure SMP case
 670  * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP.
 671  *
 672  * So maintain our own flag.  It must be set after all CPUs are started
 673  * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed
 674  * adjustment is properly delayed.  SI_ORDER_FOURTH is clearly before
 675  * SI_ORDER_ANY and unclearly after the CPUs are started.  It would be
 676  * simpler for adjustment to pass a flag indicating if it is delayed.
 677  */
 678
 679 static int tqg_smp_started;
 680
 681 static void
 682 tqg_record_smp_started(void *arg)
 683 {
 684         tqg_smp_started = 1;
 685 }
 686
 687 SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
 688         tqg_record_smp_started, NULL);
 689
 690 void
 691 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
 692     void *uniq, device_t dev, struct resource *irq, const char *name)
 693 {
 694         int cpu, qid, error;
 695
 696         gtask->gt_uniq = uniq;
 697         snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
 698         gtask->gt_dev = dev;
 699         gtask->gt_irq = irq;
 700         gtask->gt_cpu = -1;
 701         mtx_lock(&qgroup->tqg_lock);
 702         qid = taskqgroup_find(qgroup, uniq);
 703         qgroup->tqg_queue[qid].tgc_cnt++;
 704         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
 705         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 706         if (dev != NULL && irq != NULL && tqg_smp_started) {
 707                 cpu = qgroup->tqg_queue[qid].tgc_cpu;
 708                 gtask->gt_cpu = cpu;
 709                 mtx_unlock(&qgroup->tqg_lock);
 710                 error = bus_bind_intr(dev, irq, cpu);
 711                 if (error)
 712                         printf("%s: binding interrupt failed for %s: %d\n",
 713                             __func__, gtask->gt_name, error);
 714         } else
 715                 mtx_unlock(&qgroup->tqg_lock);
 716 }
 717
 718 static void
 719 taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
 720 {
 721         int qid, cpu, error;
 722
 723         mtx_lock(&qgroup->tqg_lock);
 724         qid = taskqgroup_find(qgroup, gtask->gt_uniq);
 725         cpu = qgroup->tqg_queue[qid].tgc_cpu;
 726         if (gtask->gt_dev != NULL && gtask->gt_irq != NULL) {
 727                 mtx_unlock(&qgroup->tqg_lock);
 728                 error = bus_bind_intr(gtask->gt_dev, gtask->gt_irq, cpu);
 729                 mtx_lock(&qgroup->tqg_lock);
 730                 if (error)
 731                         printf("%s: binding interrupt failed for %s: %d\n",
 732                             __func__, gtask->gt_name, error);
 733
 734         }
 735         qgroup->tqg_queue[qid].tgc_cnt++;
 736         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
 737         MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
 738         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 739         mtx_unlock(&qgroup->tqg_lock);
 740 }
 741
 742 int
 743 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
 744     void *uniq, int cpu, device_t dev, struct resource *irq, const char *name)
 745 {
 746         int i, qid, error;
 747
 748         qid = -1;
 749         gtask->gt_uniq = uniq;
 750         snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
 751         gtask->gt_dev = dev;
 752         gtask->gt_irq = irq;
 753         gtask->gt_cpu = cpu;
 754         mtx_lock(&qgroup->tqg_lock);
 755         if (tqg_smp_started) {
 756                 for (i = 0; i < qgroup->tqg_cnt; i++)
 757                         if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
 758                                 qid = i;
 759                                 break;
 760                         }
 761                 if (qid == -1) {
 762                         mtx_unlock(&qgroup->tqg_lock);
 763                         printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
 764                         return (EINVAL);
 765                 }
 766         } else
 767                 qid = 0;
 768         qgroup->tqg_queue[qid].tgc_cnt++;
 769         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
 770         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 771         cpu = qgroup->tqg_queue[qid].tgc_cpu;
 772         mtx_unlock(&qgroup->tqg_lock);
 773
 774         if (dev != NULL && irq != NULL && tqg_smp_started) {
 775                 error = bus_bind_intr(dev, irq, cpu);
 776                 if (error)
 777                         printf("%s: binding interrupt failed for %s: %d\n",
 778                             __func__, gtask->gt_name, error);
 779         }
 780         return (0);
 781 }
 782
 783 static int
 784 taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
 785 {
 786         device_t dev;
 787         struct resource *irq;
 788         int cpu, error, i, qid;
 789
 790         qid = -1;
 791         dev = gtask->gt_dev;
 792         irq = gtask->gt_irq;
 793         cpu = gtask->gt_cpu;
 794         MPASS(tqg_smp_started);
 795         mtx_lock(&qgroup->tqg_lock);
 796         for (i = 0; i < qgroup->tqg_cnt; i++)
 797                 if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
 798                         qid = i;
 799                         break;
 800                 }
 801         if (qid == -1) {
 802                 mtx_unlock(&qgroup->tqg_lock);
 803                 printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
 804                 return (EINVAL);
 805         }
 806         qgroup->tqg_queue[qid].tgc_cnt++;
 807         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
 808         MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
 809         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
 810         mtx_unlock(&qgroup->tqg_lock);
 811
 812         if (dev != NULL && irq != NULL) {
 813                 error = bus_bind_intr(dev, irq, cpu);
 814                 if (error)
 815                         printf("%s: binding interrupt failed for %s: %d\n",
 816                             __func__, gtask->gt_name, error);
 817         }
 818         return (0);
 819 }
 820
 821 void
 822 taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
 823 {
 824         int i;
 825
 826         grouptask_block(gtask);
 827         mtx_lock(&qgroup->tqg_lock);
 828         for (i = 0; i < qgroup->tqg_cnt; i++)
 829                 if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
 830                         break;
 831         if (i == qgroup->tqg_cnt)
 832                 panic("%s: task %s not in group", __func__, gtask->gt_name);
 833         qgroup->tqg_queue[i].tgc_cnt--;
 834         LIST_REMOVE(gtask, gt_list);
 835         mtx_unlock(&qgroup->tqg_lock);
 836         gtask->gt_taskqueue = NULL;
 837         gtask->gt_task.ta_flags &= ~TASK_NOENQUEUE;
 838 }
 839
 840 static void
 841 taskqgroup_binder(void *ctx)
 842 {
 843         struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
 844         cpuset_t mask;
 845         int error;
 846
 847         CPU_ZERO(&mask);
 848         CPU_SET(gtask->bt_cpuid, &mask);
 849         error = cpuset_setthread(curthread->td_tid, &mask);
 850         thread_lock(curthread);
 851         sched_bind(curthread, gtask->bt_cpuid);
 852         thread_unlock(curthread);
 853
 854         if (error)
 855                 printf("%s: binding curthread failed: %d\n", __func__, error);
 856         free(gtask, M_DEVBUF);
 857 }
 858
 859 static void
 860 taskqgroup_bind(struct taskqgroup *qgroup)
 861 {
 862         struct taskq_bind_task *gtask;
 863         int i;
 864
 865         /*
 866          * Bind taskqueue threads to specific CPUs, if they have been assigned
 867          * one.
 868          */
 869         if (qgroup->tqg_cnt == 1)
 870                 return;
 871
 872         for (i = 0; i < qgroup->tqg_cnt; i++) {
 873                 gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK);
 874                 GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
 875                 gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
 876                 grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
 877                     &gtask->bt_task);
 878         }
 879 }
 880
 881 static void
 882 taskqgroup_config_init(void *arg)
 883 {
 884         struct taskqgroup *qgroup = qgroup_config;
 885         LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
 886
 887         LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
 888             grouptask, gt_list);
 889         qgroup->tqg_queue[0].tgc_cnt = 0;
 890         taskqgroup_cpu_create(qgroup, 0, 0);
 891
 892         qgroup->tqg_cnt = 1;
 893         qgroup->tqg_stride = 1;
 894 }
 895
 896 SYSINIT(taskqgroup_config_init, SI_SUB_TASKQ, SI_ORDER_SECOND,
 897         taskqgroup_config_init, NULL);
 898
 899 static int
 900 _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
 901 {
 902         LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
 903         struct grouptask *gtask;
 904         int i, k, old_cnt, old_cpu, cpu;
 905
 906         mtx_assert(&qgroup->tqg_lock, MA_OWNED);
 907
 908         if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) {
 909                 printf("%s: failed cnt: %d stride: %d "
 910                     "mp_ncpus: %d tqg_smp_started: %d\n",
 911                     __func__, cnt, stride, mp_ncpus, tqg_smp_started);
 912                 return (EINVAL);
 913         }
 914         if (qgroup->tqg_adjusting) {
 915                 printf("%s failed: adjusting\n", __func__);
 916                 return (EBUSY);
 917         }
 918         qgroup->tqg_adjusting = 1;
 919         old_cnt = qgroup->tqg_cnt;
 920         old_cpu = 0;
 921         if (old_cnt < cnt)
 922                 old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu;
 923         mtx_unlock(&qgroup->tqg_lock);
 924         /*
 925          * Set up queue for tasks added before boot.
 926          */
 927         if (old_cnt == 0) {
 928                 LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
 929                     grouptask, gt_list);
 930                 qgroup->tqg_queue[0].tgc_cnt = 0;
 931         }
 932
 933         /*
 934          * If new taskq threads have been added.
 935          */
 936         cpu = old_cpu;
 937         for (i = old_cnt; i < cnt; i++) {
 938                 taskqgroup_cpu_create(qgroup, i, cpu);
 939
 940                 for (k = 0; k < stride; k++)
 941                         cpu = CPU_NEXT(cpu);
 942         }
 943         mtx_lock(&qgroup->tqg_lock);
 944         qgroup->tqg_cnt = cnt;
 945         qgroup->tqg_stride = stride;
 946
 947         /*
 948          * Adjust drivers to use new taskqs.
 949          */
 950         for (i = 0; i < old_cnt; i++) {
 951                 while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
 952                         LIST_REMOVE(gtask, gt_list);
 953                         qgroup->tqg_queue[i].tgc_cnt--;
 954                         LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
 955                 }
 956         }
 957         mtx_unlock(&qgroup->tqg_lock);
 958
 959         while ((gtask = LIST_FIRST(&gtask_head))) {
 960                 LIST_REMOVE(gtask, gt_list);
 961                 if (gtask->gt_cpu == -1)
 962                         taskqgroup_attach_deferred(qgroup, gtask);
 963                 else if (taskqgroup_attach_cpu_deferred(qgroup, gtask))
 964                         taskqgroup_attach_deferred(qgroup, gtask);
 965         }
 966
 967 #ifdef INVARIANTS
 968         mtx_lock(&qgroup->tqg_lock);
 969         for (i = 0; i < qgroup->tqg_cnt; i++) {
 970                 MPASS(qgroup->tqg_queue[i].tgc_taskq != NULL);
 971                 LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list)
 972                         MPASS(gtask->gt_taskqueue != NULL);
 973         }
 974         mtx_unlock(&qgroup->tqg_lock);
 975 #endif
 976         /*
 977          * If taskq thread count has been reduced.
 978          */
 979         for (i = cnt; i < old_cnt; i++)
 980                 taskqgroup_cpu_remove(qgroup, i);
 981
 982         taskqgroup_bind(qgroup);
 983
 984         mtx_lock(&qgroup->tqg_lock);
 985         qgroup->tqg_adjusting = 0;
 986
 987         return (0);
 988 }
 989
 990 int
 991 taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
 992 {
 993         int error;
 994
 995         mtx_lock(&qgroup->tqg_lock);
 996         error = _taskqgroup_adjust(qgroup, cnt, stride);
 997         mtx_unlock(&qgroup->tqg_lock);
 998
 999         return (error);
1000 }
1001
1002 struct taskqgroup *
1003 taskqgroup_create(const char *name)
1004 {
1005         struct taskqgroup *qgroup;
1006
1007         qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO);
1008         mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
1009         qgroup->tqg_name = name;
1010         LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
1011
1012         return (qgroup);
1013 }
1014
1015 void
1016 taskqgroup_destroy(struct taskqgroup *qgroup)
1017 {
1018
1019 }
1020
1021 void
1022 taskqgroup_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn,
1023     const char *name)
1024 {
1025
1026         GROUPTASK_INIT(gtask, 0, fn, ctx);
1027         taskqgroup_attach(qgroup_config, gtask, gtask, NULL, NULL, name);
1028 }
1029
1030 void
1031 taskqgroup_config_gtask_deinit(struct grouptask *gtask)
1032 {
1033
1034         taskqgroup_detach(qgroup_config, gtask);
1035 }