]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/subr_gtaskqueue.c
MFC r362033:
[FreeBSD/FreeBSD.git] / sys / kern / subr_gtaskqueue.c
1 /*-
2  * Copyright (c) 2000 Doug Rabson
3  * Copyright (c) 2014 Jeff Roberson
4  * Copyright (c) 2016 Matthew Macy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bus.h>
35 #include <sys/cpuset.h>
36 #include <sys/interrupt.h>
37 #include <sys/kernel.h>
38 #include <sys/kthread.h>
39 #include <sys/libkern.h>
40 #include <sys/limits.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/sched.h>
46 #include <sys/smp.h>
47 #include <sys/gtaskqueue.h>
48 #include <sys/unistd.h>
49 #include <machine/stdarg.h>
50
51 static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues");
52 static void     gtaskqueue_thread_enqueue(void *);
53 static void     gtaskqueue_thread_loop(void *arg);
54 static int      task_is_running(struct gtaskqueue *queue, struct gtask *gtask);
55 static void     gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask);
56
57 TASKQGROUP_DEFINE(softirq, mp_ncpus, 1);
58 TASKQGROUP_DEFINE(config, 1, 1);
59
60 struct gtaskqueue_busy {
61         struct gtask            *tb_running;
62         u_int                    tb_seq;
63         LIST_ENTRY(gtaskqueue_busy) tb_link;
64 };
65
66 typedef void (*gtaskqueue_enqueue_fn)(void *context);
67
68 struct gtaskqueue {
69         STAILQ_HEAD(, gtask)    tq_queue;
70         LIST_HEAD(, gtaskqueue_busy) tq_active;
71         u_int                   tq_seq;
72         int                     tq_callouts;
73         struct mtx_padalign     tq_mutex;
74         gtaskqueue_enqueue_fn   tq_enqueue;
75         void                    *tq_context;
76         char                    *tq_name;
77         struct thread           **tq_threads;
78         int                     tq_tcount;
79         int                     tq_spin;
80         int                     tq_flags;
81         taskqueue_callback_fn   tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
82         void                    *tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
83 };
84
85 #define TQ_FLAGS_ACTIVE         (1 << 0)
86 #define TQ_FLAGS_BLOCKED        (1 << 1)
87 #define TQ_FLAGS_UNLOCKED_ENQUEUE       (1 << 2)
88
89 #define DT_CALLOUT_ARMED        (1 << 0)
90
91 #define TQ_LOCK(tq)                                                     \
92         do {                                                            \
93                 if ((tq)->tq_spin)                                      \
94                         mtx_lock_spin(&(tq)->tq_mutex);                 \
95                 else                                                    \
96                         mtx_lock(&(tq)->tq_mutex);                      \
97         } while (0)
98 #define TQ_ASSERT_LOCKED(tq)    mtx_assert(&(tq)->tq_mutex, MA_OWNED)
99
100 #define TQ_UNLOCK(tq)                                                   \
101         do {                                                            \
102                 if ((tq)->tq_spin)                                      \
103                         mtx_unlock_spin(&(tq)->tq_mutex);               \
104                 else                                                    \
105                         mtx_unlock(&(tq)->tq_mutex);                    \
106         } while (0)
107 #define TQ_ASSERT_UNLOCKED(tq)  mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
108
109 #ifdef INVARIANTS
110 static void
111 gtask_dump(struct gtask *gtask)
112 {
113         printf("gtask: %p ta_flags=%x ta_priority=%d ta_func=%p ta_context=%p\n",
114                gtask, gtask->ta_flags, gtask->ta_priority, gtask->ta_func, gtask->ta_context);
115 }
116 #endif
117
118 static __inline int
119 TQ_SLEEP(struct gtaskqueue *tq, void *p, const char *wm)
120 {
121         if (tq->tq_spin)
122                 return (msleep_spin(p, (struct mtx *)&tq->tq_mutex, wm, 0));
123         return (msleep(p, &tq->tq_mutex, 0, wm, 0));
124 }
125
126 static struct gtaskqueue *
127 _gtaskqueue_create(const char *name, int mflags,
128                  taskqueue_enqueue_fn enqueue, void *context,
129                  int mtxflags, const char *mtxname __unused)
130 {
131         struct gtaskqueue *queue;
132         char *tq_name;
133
134         tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO);
135         if (!tq_name)
136                 return (NULL);
137
138         snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
139
140         queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO);
141         if (!queue) {
142                 free(tq_name, M_GTASKQUEUE);
143                 return (NULL);
144         }
145
146         STAILQ_INIT(&queue->tq_queue);
147         LIST_INIT(&queue->tq_active);
148         queue->tq_enqueue = enqueue;
149         queue->tq_context = context;
150         queue->tq_name = tq_name;
151         queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
152         queue->tq_flags |= TQ_FLAGS_ACTIVE;
153         if (enqueue == gtaskqueue_thread_enqueue)
154                 queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
155         mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
156
157         return (queue);
158 }
159
160
161 /*
162  * Signal a taskqueue thread to terminate.
163  */
164 static void
165 gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq)
166 {
167
168         while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
169                 wakeup(tq);
170                 TQ_SLEEP(tq, pp, "gtq_destroy");
171         }
172 }
173
174 static void
175 gtaskqueue_free(struct gtaskqueue *queue)
176 {
177
178         TQ_LOCK(queue);
179         queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
180         gtaskqueue_terminate(queue->tq_threads, queue);
181         KASSERT(LIST_EMPTY(&queue->tq_active), ("Tasks still running?"));
182         KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
183         mtx_destroy(&queue->tq_mutex);
184         free(queue->tq_threads, M_GTASKQUEUE);
185         free(queue->tq_name, M_GTASKQUEUE);
186         free(queue, M_GTASKQUEUE);
187 }
188
189 /*
190  * Wait for all to complete, then prevent it from being enqueued
191  */
192 void
193 grouptask_block(struct grouptask *grouptask)
194 {
195         struct gtaskqueue *queue = grouptask->gt_taskqueue;
196         struct gtask *gtask = &grouptask->gt_task;
197
198 #ifdef INVARIANTS
199         if (queue == NULL) {
200                 gtask_dump(gtask);
201                 panic("queue == NULL");
202         }
203 #endif
204         TQ_LOCK(queue);
205         gtask->ta_flags |= TASK_NOENQUEUE;
206         gtaskqueue_drain_locked(queue, gtask);
207         TQ_UNLOCK(queue);
208 }
209
210 void
211 grouptask_unblock(struct grouptask *grouptask)
212 {
213         struct gtaskqueue *queue = grouptask->gt_taskqueue;
214         struct gtask *gtask = &grouptask->gt_task;
215
216 #ifdef INVARIANTS
217         if (queue == NULL) {
218                 gtask_dump(gtask);
219                 panic("queue == NULL");
220         }
221 #endif
222         TQ_LOCK(queue);
223         gtask->ta_flags &= ~TASK_NOENQUEUE;
224         TQ_UNLOCK(queue);
225 }
226
227 int
228 grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
229 {
230 #ifdef INVARIANTS
231         if (queue == NULL) {
232                 gtask_dump(gtask);
233                 panic("queue == NULL");
234         }
235 #endif
236         TQ_LOCK(queue);
237         if (gtask->ta_flags & TASK_ENQUEUED) {
238                 TQ_UNLOCK(queue);
239                 return (0);
240         }
241         if (gtask->ta_flags & TASK_NOENQUEUE) {
242                 TQ_UNLOCK(queue);
243                 return (EAGAIN);
244         }
245         STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link);
246         gtask->ta_flags |= TASK_ENQUEUED;
247         TQ_UNLOCK(queue);
248         if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
249                 queue->tq_enqueue(queue->tq_context);
250         return (0);
251 }
252
253 static void
254 gtaskqueue_task_nop_fn(void *context)
255 {
256 }
257
258 /*
259  * Block until all currently queued tasks in this taskqueue
260  * have begun execution.  Tasks queued during execution of
261  * this function are ignored.
262  */
263 static void
264 gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
265 {
266         struct gtask t_barrier;
267
268         if (STAILQ_EMPTY(&queue->tq_queue))
269                 return;
270
271         /*
272          * Enqueue our barrier after all current tasks, but with
273          * the highest priority so that newly queued tasks cannot
274          * pass it.  Because of the high priority, we can not use
275          * taskqueue_enqueue_locked directly (which drops the lock
276          * anyway) so just insert it at tail while we have the
277          * queue lock.
278          */
279         GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier);
280         STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
281         t_barrier.ta_flags |= TASK_ENQUEUED;
282
283         /*
284          * Once the barrier has executed, all previously queued tasks
285          * have completed or are currently executing.
286          */
287         while (t_barrier.ta_flags & TASK_ENQUEUED)
288                 TQ_SLEEP(queue, &t_barrier, "gtq_qdrain");
289 }
290
291 /*
292  * Block until all currently executing tasks for this taskqueue
293  * complete.  Tasks that begin execution during the execution
294  * of this function are ignored.
295  */
296 static void
297 gtaskqueue_drain_tq_active(struct gtaskqueue *queue)
298 {
299         struct gtaskqueue_busy *tb;
300         u_int seq;
301
302         if (LIST_EMPTY(&queue->tq_active))
303                 return;
304
305         /* Block taskq_terminate().*/
306         queue->tq_callouts++;
307
308         /* Wait for any active task with sequence from the past. */
309         seq = queue->tq_seq;
310 restart:
311         LIST_FOREACH(tb, &queue->tq_active, tb_link) {
312                 if ((int)(tb->tb_seq - seq) <= 0) {
313                         TQ_SLEEP(queue, tb->tb_running, "gtq_adrain");
314                         goto restart;
315                 }
316         }
317
318         /* Release taskqueue_terminate(). */
319         queue->tq_callouts--;
320         if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
321                 wakeup_one(queue->tq_threads);
322 }
323
324 void
325 gtaskqueue_block(struct gtaskqueue *queue)
326 {
327
328         TQ_LOCK(queue);
329         queue->tq_flags |= TQ_FLAGS_BLOCKED;
330         TQ_UNLOCK(queue);
331 }
332
333 void
334 gtaskqueue_unblock(struct gtaskqueue *queue)
335 {
336
337         TQ_LOCK(queue);
338         queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
339         if (!STAILQ_EMPTY(&queue->tq_queue))
340                 queue->tq_enqueue(queue->tq_context);
341         TQ_UNLOCK(queue);
342 }
343
344 static void
345 gtaskqueue_run_locked(struct gtaskqueue *queue)
346 {
347         struct gtaskqueue_busy tb;
348         struct gtask *gtask;
349
350         KASSERT(queue != NULL, ("tq is NULL"));
351         TQ_ASSERT_LOCKED(queue);
352         tb.tb_running = NULL;
353         LIST_INSERT_HEAD(&queue->tq_active, &tb, tb_link);
354
355         while ((gtask = STAILQ_FIRST(&queue->tq_queue)) != NULL) {
356                 STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
357                 gtask->ta_flags &= ~TASK_ENQUEUED;
358                 tb.tb_running = gtask;
359                 tb.tb_seq = ++queue->tq_seq;
360                 TQ_UNLOCK(queue);
361
362                 KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL"));
363                 gtask->ta_func(gtask->ta_context);
364
365                 TQ_LOCK(queue);
366                 wakeup(gtask);
367         }
368         LIST_REMOVE(&tb, tb_link);
369 }
370
371 static int
372 task_is_running(struct gtaskqueue *queue, struct gtask *gtask)
373 {
374         struct gtaskqueue_busy *tb;
375
376         TQ_ASSERT_LOCKED(queue);
377         LIST_FOREACH(tb, &queue->tq_active, tb_link) {
378                 if (tb->tb_running == gtask)
379                         return (1);
380         }
381         return (0);
382 }
383
384 static int
385 gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask)
386 {
387
388         if (gtask->ta_flags & TASK_ENQUEUED)
389                 STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link);
390         gtask->ta_flags &= ~TASK_ENQUEUED;
391         return (task_is_running(queue, gtask) ? EBUSY : 0);
392 }
393
394 int
395 gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask)
396 {
397         int error;
398
399         TQ_LOCK(queue);
400         error = gtaskqueue_cancel_locked(queue, gtask);
401         TQ_UNLOCK(queue);
402
403         return (error);
404 }
405
406 static void
407 gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask)
408 {
409         while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask))
410                 TQ_SLEEP(queue, gtask, "gtq_drain");
411 }
412
413 void
414 gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask)
415 {
416
417         if (!queue->tq_spin)
418                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
419
420         TQ_LOCK(queue);
421         gtaskqueue_drain_locked(queue, gtask);
422         TQ_UNLOCK(queue);
423 }
424
425 void
426 gtaskqueue_drain_all(struct gtaskqueue *queue)
427 {
428
429         if (!queue->tq_spin)
430                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
431
432         TQ_LOCK(queue);
433         gtaskqueue_drain_tq_queue(queue);
434         gtaskqueue_drain_tq_active(queue);
435         TQ_UNLOCK(queue);
436 }
437
438 static int
439 _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
440     cpuset_t *mask, const char *name, va_list ap)
441 {
442         char ktname[MAXCOMLEN + 1];
443         struct thread *td;
444         struct gtaskqueue *tq;
445         int i, error;
446
447         if (count <= 0)
448                 return (EINVAL);
449
450         vsnprintf(ktname, sizeof(ktname), name, ap);
451         tq = *tqp;
452
453         tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE,
454             M_NOWAIT | M_ZERO);
455         if (tq->tq_threads == NULL) {
456                 printf("%s: no memory for %s threads\n", __func__, ktname);
457                 return (ENOMEM);
458         }
459
460         for (i = 0; i < count; i++) {
461                 if (count == 1)
462                         error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
463                             &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
464                 else
465                         error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
466                             &tq->tq_threads[i], RFSTOPPED, 0,
467                             "%s_%d", ktname, i);
468                 if (error) {
469                         /* should be ok to continue, taskqueue_free will dtrt */
470                         printf("%s: kthread_add(%s): error %d", __func__,
471                             ktname, error);
472                         tq->tq_threads[i] = NULL;               /* paranoid */
473                 } else
474                         tq->tq_tcount++;
475         }
476         for (i = 0; i < count; i++) {
477                 if (tq->tq_threads[i] == NULL)
478                         continue;
479                 td = tq->tq_threads[i];
480                 if (mask) {
481                         error = cpuset_setthread(td->td_tid, mask);
482                         /*
483                          * Failing to pin is rarely an actual fatal error;
484                          * it'll just affect performance.
485                          */
486                         if (error)
487                                 printf("%s: curthread=%llu: can't pin; "
488                                     "error=%d\n",
489                                     __func__,
490                                     (unsigned long long) td->td_tid,
491                                     error);
492                 }
493                 thread_lock(td);
494                 sched_prio(td, pri);
495                 sched_add(td, SRQ_BORING);
496                 thread_unlock(td);
497         }
498
499         return (0);
500 }
501
502 static int
503 gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
504     const char *name, ...)
505 {
506         va_list ap;
507         int error;
508
509         va_start(ap, name);
510         error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap);
511         va_end(ap);
512         return (error);
513 }
514
515 static inline void
516 gtaskqueue_run_callback(struct gtaskqueue *tq,
517     enum taskqueue_callback_type cb_type)
518 {
519         taskqueue_callback_fn tq_callback;
520
521         TQ_ASSERT_UNLOCKED(tq);
522         tq_callback = tq->tq_callbacks[cb_type];
523         if (tq_callback != NULL)
524                 tq_callback(tq->tq_cb_contexts[cb_type]);
525 }
526
527 static void
528 gtaskqueue_thread_loop(void *arg)
529 {
530         struct gtaskqueue **tqp, *tq;
531
532         tqp = arg;
533         tq = *tqp;
534         gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
535         TQ_LOCK(tq);
536         while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
537                 /* XXX ? */
538                 gtaskqueue_run_locked(tq);
539                 /*
540                  * Because taskqueue_run() can drop tq_mutex, we need to
541                  * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
542                  * meantime, which means we missed a wakeup.
543                  */
544                 if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
545                         break;
546                 TQ_SLEEP(tq, tq, "-");
547         }
548         gtaskqueue_run_locked(tq);
549         /*
550          * This thread is on its way out, so just drop the lock temporarily
551          * in order to call the shutdown callback.  This allows the callback
552          * to look at the taskqueue, even just before it dies.
553          */
554         TQ_UNLOCK(tq);
555         gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
556         TQ_LOCK(tq);
557
558         /* rendezvous with thread that asked us to terminate */
559         tq->tq_tcount--;
560         wakeup_one(tq->tq_threads);
561         TQ_UNLOCK(tq);
562         kthread_exit();
563 }
564
565 static void
566 gtaskqueue_thread_enqueue(void *context)
567 {
568         struct gtaskqueue **tqp, *tq;
569
570         tqp = context;
571         tq = *tqp;
572         wakeup_any(tq);
573 }
574
575
576 static struct gtaskqueue *
577 gtaskqueue_create_fast(const char *name, int mflags,
578                  taskqueue_enqueue_fn enqueue, void *context)
579 {
580         return _gtaskqueue_create(name, mflags, enqueue, context,
581                         MTX_SPIN, "fast_taskqueue");
582 }
583
584
585 struct taskqgroup_cpu {
586         LIST_HEAD(, grouptask)  tgc_tasks;
587         struct gtaskqueue       *tgc_taskq;
588         int     tgc_cnt;
589         int     tgc_cpu;
590 };
591
592 struct taskqgroup {
593         struct taskqgroup_cpu tqg_queue[MAXCPU];
594         struct mtx      tqg_lock;
595         const char *    tqg_name;
596         int             tqg_adjusting;
597         int             tqg_stride;
598         int             tqg_cnt;
599 };
600
601 struct taskq_bind_task {
602         struct gtask bt_task;
603         int     bt_cpuid;
604 };
605
606 static void
607 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu)
608 {
609         struct taskqgroup_cpu *qcpu;
610
611         qcpu = &qgroup->tqg_queue[idx];
612         LIST_INIT(&qcpu->tgc_tasks);
613         qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK,
614             taskqueue_thread_enqueue, &qcpu->tgc_taskq);
615         gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
616             "%s_%d", qgroup->tqg_name, idx);
617         qcpu->tgc_cpu = cpu;
618 }
619
620 static void
621 taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
622 {
623
624         gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
625 }
626
627 /*
628  * Find the taskq with least # of tasks that doesn't currently have any
629  * other queues from the uniq identifier.
630  */
631 static int
632 taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
633 {
634         struct grouptask *n;
635         int i, idx, mincnt;
636         int strict;
637
638         mtx_assert(&qgroup->tqg_lock, MA_OWNED);
639         if (qgroup->tqg_cnt == 0)
640                 return (0);
641         idx = -1;
642         mincnt = INT_MAX;
643         /*
644          * Two passes;  First scan for a queue with the least tasks that
645          * does not already service this uniq id.  If that fails simply find
646          * the queue with the least total tasks;
647          */
648         for (strict = 1; mincnt == INT_MAX; strict = 0) {
649                 for (i = 0; i < qgroup->tqg_cnt; i++) {
650                         if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
651                                 continue;
652                         if (strict) {
653                                 LIST_FOREACH(n,
654                                     &qgroup->tqg_queue[i].tgc_tasks, gt_list)
655                                         if (n->gt_uniq == uniq)
656                                                 break;
657                                 if (n != NULL)
658                                         continue;
659                         }
660                         mincnt = qgroup->tqg_queue[i].tgc_cnt;
661                         idx = i;
662                 }
663         }
664         if (idx == -1)
665                 panic("%s: failed to pick a qid.", __func__);
666
667         return (idx);
668 }
669
670 /*
671  * smp_started is unusable since it is not set for UP kernels or even for
672  * SMP kernels when there is 1 CPU.  This is usually handled by adding a
673  * (mp_ncpus == 1) test, but that would be broken here since we need to
674  * to synchronize with the SI_SUB_SMP ordering.  Even in the pure SMP case
675  * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP.
676  *
677  * So maintain our own flag.  It must be set after all CPUs are started
678  * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed
679  * adjustment is properly delayed.  SI_ORDER_FOURTH is clearly before
680  * SI_ORDER_ANY and unclearly after the CPUs are started.  It would be
681  * simpler for adjustment to pass a flag indicating if it is delayed.
682  */ 
683
684 static int tqg_smp_started;
685
686 static void
687 tqg_record_smp_started(void *arg)
688 {
689         tqg_smp_started = 1;
690 }
691
692 SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
693         tqg_record_smp_started, NULL);
694
695 void
696 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
697     void *uniq, int irq, const char *name)
698 {
699         cpuset_t mask;
700         int qid, error;
701
702         gtask->gt_uniq = uniq;
703         snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
704         gtask->gt_irq = irq;
705         gtask->gt_cpu = -1;
706         mtx_lock(&qgroup->tqg_lock);
707         qid = taskqgroup_find(qgroup, uniq);
708         qgroup->tqg_queue[qid].tgc_cnt++;
709         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
710         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
711         if (irq != -1 && tqg_smp_started) {
712                 gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu;
713                 CPU_ZERO(&mask);
714                 CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
715                 mtx_unlock(&qgroup->tqg_lock);
716                 error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
717                 if (error)
718                         printf("%s: binding interrupt failed for %s: %d\n",
719                             __func__, gtask->gt_name, error);
720         } else
721                 mtx_unlock(&qgroup->tqg_lock);
722 }
723
724 static void
725 taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
726 {
727         cpuset_t mask;
728         int qid, cpu, error;
729
730         mtx_lock(&qgroup->tqg_lock);
731         qid = taskqgroup_find(qgroup, gtask->gt_uniq);
732         cpu = qgroup->tqg_queue[qid].tgc_cpu;
733         if (gtask->gt_irq != -1) {
734                 mtx_unlock(&qgroup->tqg_lock);
735
736                 CPU_ZERO(&mask);
737                 CPU_SET(cpu, &mask);
738                 error = intr_setaffinity(gtask->gt_irq, CPU_WHICH_IRQ, &mask);
739                 mtx_lock(&qgroup->tqg_lock);
740                 if (error)
741                         printf("%s: binding interrupt failed for %s: %d\n",
742                             __func__, gtask->gt_name, error);
743
744         }
745         qgroup->tqg_queue[qid].tgc_cnt++;
746         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
747         MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
748         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
749         mtx_unlock(&qgroup->tqg_lock);
750 }
751
752 int
753 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
754     void *uniq, int cpu, int irq, const char *name)
755 {
756         cpuset_t mask;
757         int i, qid, error;
758
759         qid = -1;
760         gtask->gt_uniq = uniq;
761         snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
762         gtask->gt_irq = irq;
763         gtask->gt_cpu = cpu;
764         mtx_lock(&qgroup->tqg_lock);
765         if (tqg_smp_started) {
766                 for (i = 0; i < qgroup->tqg_cnt; i++)
767                         if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
768                                 qid = i;
769                                 break;
770                         }
771                 if (qid == -1) {
772                         mtx_unlock(&qgroup->tqg_lock);
773                         printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
774                         return (EINVAL);
775                 }
776         } else
777                 qid = 0;
778         qgroup->tqg_queue[qid].tgc_cnt++;
779         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
780         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
781         cpu = qgroup->tqg_queue[qid].tgc_cpu;
782         mtx_unlock(&qgroup->tqg_lock);
783
784         CPU_ZERO(&mask);
785         CPU_SET(cpu, &mask);
786         if (irq != -1 && tqg_smp_started) {
787                 error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
788                 if (error)
789                         printf("%s: binding interrupt failed for %s: %d\n",
790                             __func__, gtask->gt_name, error);
791         }
792         return (0);
793 }
794
795 static int
796 taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
797 {
798         cpuset_t mask;
799         int i, qid, irq, cpu, error;
800
801         qid = -1;
802         irq = gtask->gt_irq;
803         cpu = gtask->gt_cpu;
804         MPASS(tqg_smp_started);
805         mtx_lock(&qgroup->tqg_lock);
806         for (i = 0; i < qgroup->tqg_cnt; i++)
807                 if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
808                         qid = i;
809                         break;
810                 }
811         if (qid == -1) {
812                 mtx_unlock(&qgroup->tqg_lock);
813                 printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
814                 return (EINVAL);
815         }
816         qgroup->tqg_queue[qid].tgc_cnt++;
817         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
818         MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
819         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
820         mtx_unlock(&qgroup->tqg_lock);
821
822         CPU_ZERO(&mask);
823         CPU_SET(cpu, &mask);
824
825         if (irq != -1) {
826                 error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
827                 if (error)
828                         printf("%s: binding interrupt failed for %s: %d\n",
829                             __func__, gtask->gt_name, error);
830         }
831         return (0);
832 }
833
834 void
835 taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
836 {
837         int i;
838
839         grouptask_block(gtask);
840         mtx_lock(&qgroup->tqg_lock);
841         for (i = 0; i < qgroup->tqg_cnt; i++)
842                 if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
843                         break;
844         if (i == qgroup->tqg_cnt)
845                 panic("%s: task %s not in group", __func__, gtask->gt_name);
846         qgroup->tqg_queue[i].tgc_cnt--;
847         LIST_REMOVE(gtask, gt_list);
848         mtx_unlock(&qgroup->tqg_lock);
849         gtask->gt_taskqueue = NULL;
850         gtask->gt_task.ta_flags &= ~TASK_NOENQUEUE;
851 }
852
853 static void
854 taskqgroup_binder(void *ctx)
855 {
856         struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
857         cpuset_t mask;
858         int error;
859
860         CPU_ZERO(&mask);
861         CPU_SET(gtask->bt_cpuid, &mask);
862         error = cpuset_setthread(curthread->td_tid, &mask);
863         thread_lock(curthread);
864         sched_bind(curthread, gtask->bt_cpuid);
865         thread_unlock(curthread);
866
867         if (error)
868                 printf("%s: binding curthread failed: %d\n", __func__, error);
869         free(gtask, M_DEVBUF);
870 }
871
872 static void
873 taskqgroup_bind(struct taskqgroup *qgroup)
874 {
875         struct taskq_bind_task *gtask;
876         int i;
877
878         /*
879          * Bind taskqueue threads to specific CPUs, if they have been assigned
880          * one.
881          */
882         if (qgroup->tqg_cnt == 1)
883                 return;
884
885         for (i = 0; i < qgroup->tqg_cnt; i++) {
886                 gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK);
887                 GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
888                 gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
889                 grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
890                     &gtask->bt_task);
891         }
892 }
893
894 static void
895 taskqgroup_config_init(void *arg)
896 {
897         struct taskqgroup *qgroup = qgroup_config;
898         LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
899
900         LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
901             grouptask, gt_list);
902         qgroup->tqg_queue[0].tgc_cnt = 0;
903         taskqgroup_cpu_create(qgroup, 0, 0);
904
905         qgroup->tqg_cnt = 1;
906         qgroup->tqg_stride = 1;
907 }
908
909 SYSINIT(taskqgroup_config_init, SI_SUB_TASKQ, SI_ORDER_SECOND,
910         taskqgroup_config_init, NULL);
911
912 static int
913 _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
914 {
915         LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
916         struct grouptask *gtask;
917         int i, k, old_cnt, old_cpu, cpu;
918
919         mtx_assert(&qgroup->tqg_lock, MA_OWNED);
920
921         if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) {
922                 printf("%s: failed cnt: %d stride: %d "
923                     "mp_ncpus: %d tqg_smp_started: %d\n",
924                     __func__, cnt, stride, mp_ncpus, tqg_smp_started);
925                 return (EINVAL);
926         }
927         if (qgroup->tqg_adjusting) {
928                 printf("%s failed: adjusting\n", __func__);
929                 return (EBUSY);
930         }
931         qgroup->tqg_adjusting = 1;
932         old_cnt = qgroup->tqg_cnt;
933         old_cpu = 0;
934         if (old_cnt < cnt)
935                 old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu;
936         mtx_unlock(&qgroup->tqg_lock);
937         /*
938          * Set up queue for tasks added before boot.
939          */
940         if (old_cnt == 0) {
941                 LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
942                     grouptask, gt_list);
943                 qgroup->tqg_queue[0].tgc_cnt = 0;
944         }
945
946         /*
947          * If new taskq threads have been added.
948          */
949         cpu = old_cpu;
950         for (i = old_cnt; i < cnt; i++) {
951                 taskqgroup_cpu_create(qgroup, i, cpu);
952
953                 for (k = 0; k < stride; k++)
954                         cpu = CPU_NEXT(cpu);
955         }
956         mtx_lock(&qgroup->tqg_lock);
957         qgroup->tqg_cnt = cnt;
958         qgroup->tqg_stride = stride;
959
960         /*
961          * Adjust drivers to use new taskqs.
962          */
963         for (i = 0; i < old_cnt; i++) {
964                 while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
965                         LIST_REMOVE(gtask, gt_list);
966                         qgroup->tqg_queue[i].tgc_cnt--;
967                         LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
968                 }
969         }
970         mtx_unlock(&qgroup->tqg_lock);
971
972         while ((gtask = LIST_FIRST(&gtask_head))) {
973                 LIST_REMOVE(gtask, gt_list);
974                 if (gtask->gt_cpu == -1)
975                         taskqgroup_attach_deferred(qgroup, gtask);
976                 else if (taskqgroup_attach_cpu_deferred(qgroup, gtask))
977                         taskqgroup_attach_deferred(qgroup, gtask);
978         }
979
980 #ifdef INVARIANTS
981         mtx_lock(&qgroup->tqg_lock);
982         for (i = 0; i < qgroup->tqg_cnt; i++) {
983                 MPASS(qgroup->tqg_queue[i].tgc_taskq != NULL);
984                 LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list)
985                         MPASS(gtask->gt_taskqueue != NULL);
986         }
987         mtx_unlock(&qgroup->tqg_lock);
988 #endif
989         /*
990          * If taskq thread count has been reduced.
991          */
992         for (i = cnt; i < old_cnt; i++)
993                 taskqgroup_cpu_remove(qgroup, i);
994
995         taskqgroup_bind(qgroup);
996
997         mtx_lock(&qgroup->tqg_lock);
998         qgroup->tqg_adjusting = 0;
999
1000         return (0);
1001 }
1002
1003 int
1004 taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
1005 {
1006         int error;
1007
1008         mtx_lock(&qgroup->tqg_lock);
1009         error = _taskqgroup_adjust(qgroup, cnt, stride);
1010         mtx_unlock(&qgroup->tqg_lock);
1011
1012         return (error);
1013 }
1014
1015 struct taskqgroup *
1016 taskqgroup_create(const char *name)
1017 {
1018         struct taskqgroup *qgroup;
1019
1020         qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO);
1021         mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
1022         qgroup->tqg_name = name;
1023         LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
1024
1025         return (qgroup);
1026 }
1027
1028 void
1029 taskqgroup_destroy(struct taskqgroup *qgroup)
1030 {
1031
1032 }
1033
1034 void
1035 taskqgroup_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn,
1036     const char *name)
1037 {
1038
1039         GROUPTASK_INIT(gtask, 0, fn, ctx);
1040         taskqgroup_attach(qgroup_config, gtask, gtask, -1, name);
1041 }
1042
1043 void
1044 taskqgroup_config_gtask_deinit(struct grouptask *gtask)
1045 {
1046
1047         taskqgroup_detach(qgroup_config, gtask);
1048 }