]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/subr_gtaskqueue.c
Fix r313495.
[FreeBSD/FreeBSD.git] / sys / kern / subr_gtaskqueue.c
1 /*-
2  * Copyright (c) 2000 Doug Rabson
3  * Copyright (c) 2014 Jeff Roberson
4  * Copyright (c) 2016 Matthew Macy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bus.h>
35 #include <sys/cpuset.h>
36 #include <sys/interrupt.h>
37 #include <sys/kernel.h>
38 #include <sys/kthread.h>
39 #include <sys/libkern.h>
40 #include <sys/limits.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/sched.h>
46 #include <sys/smp.h>
47 #include <sys/gtaskqueue.h>
48 #include <sys/unistd.h>
49 #include <machine/stdarg.h>
50
51 static MALLOC_DEFINE(M_GTASKQUEUE, "taskqueue", "Task Queues");
52 static void     gtaskqueue_thread_enqueue(void *);
53 static void     gtaskqueue_thread_loop(void *arg);
54
55 struct gtaskqueue_busy {
56         struct gtask    *tb_running;
57         TAILQ_ENTRY(gtaskqueue_busy) tb_link;
58 };
59
60 static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1;
61
62 struct gtaskqueue {
63         STAILQ_HEAD(, gtask)    tq_queue;
64         gtaskqueue_enqueue_fn   tq_enqueue;
65         void                    *tq_context;
66         char                    *tq_name;
67         TAILQ_HEAD(, gtaskqueue_busy) tq_active;
68         struct mtx              tq_mutex;
69         struct thread           **tq_threads;
70         int                     tq_tcount;
71         int                     tq_spin;
72         int                     tq_flags;
73         int                     tq_callouts;
74         taskqueue_callback_fn   tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
75         void                    *tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
76 };
77
78 #define TQ_FLAGS_ACTIVE         (1 << 0)
79 #define TQ_FLAGS_BLOCKED        (1 << 1)
80 #define TQ_FLAGS_UNLOCKED_ENQUEUE       (1 << 2)
81
82 #define DT_CALLOUT_ARMED        (1 << 0)
83
84 #define TQ_LOCK(tq)                                                     \
85         do {                                                            \
86                 if ((tq)->tq_spin)                                      \
87                         mtx_lock_spin(&(tq)->tq_mutex);                 \
88                 else                                                    \
89                         mtx_lock(&(tq)->tq_mutex);                      \
90         } while (0)
91 #define TQ_ASSERT_LOCKED(tq)    mtx_assert(&(tq)->tq_mutex, MA_OWNED)
92
93 #define TQ_UNLOCK(tq)                                                   \
94         do {                                                            \
95                 if ((tq)->tq_spin)                                      \
96                         mtx_unlock_spin(&(tq)->tq_mutex);               \
97                 else                                                    \
98                         mtx_unlock(&(tq)->tq_mutex);                    \
99         } while (0)
100 #define TQ_ASSERT_UNLOCKED(tq)  mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
101
102 #ifdef INVARIANTS
103 static void
104 gtask_dump(struct gtask *gtask)
105 {
106         printf("gtask: %p ta_flags=%x ta_priority=%d ta_func=%p ta_context=%p\n",
107                gtask, gtask->ta_flags, gtask->ta_priority, gtask->ta_func, gtask->ta_context);
108 }
109 #endif
110
111 static __inline int
112 TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
113     int t)
114 {
115         if (tq->tq_spin)
116                 return (msleep_spin(p, m, wm, t));
117         return (msleep(p, m, pri, wm, t));
118 }
119
120 static struct gtaskqueue *
121 _gtaskqueue_create(const char *name, int mflags,
122                  taskqueue_enqueue_fn enqueue, void *context,
123                  int mtxflags, const char *mtxname __unused)
124 {
125         struct gtaskqueue *queue;
126         char *tq_name;
127
128         tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO);
129         if (!tq_name)
130                 return (NULL);
131
132         snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
133
134         queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO);
135         if (!queue)
136                 return (NULL);
137
138         STAILQ_INIT(&queue->tq_queue);
139         TAILQ_INIT(&queue->tq_active);
140         queue->tq_enqueue = enqueue;
141         queue->tq_context = context;
142         queue->tq_name = tq_name;
143         queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
144         queue->tq_flags |= TQ_FLAGS_ACTIVE;
145         if (enqueue == gtaskqueue_thread_enqueue)
146                 queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
147         mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
148
149         return (queue);
150 }
151
152
153 /*
154  * Signal a taskqueue thread to terminate.
155  */
156 static void
157 gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq)
158 {
159
160         while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
161                 wakeup(tq);
162                 TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
163         }
164 }
165
166 static void
167 gtaskqueue_free(struct gtaskqueue *queue)
168 {
169
170         TQ_LOCK(queue);
171         queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
172         gtaskqueue_terminate(queue->tq_threads, queue);
173         KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
174         KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
175         mtx_destroy(&queue->tq_mutex);
176         free(queue->tq_threads, M_GTASKQUEUE);
177         free(queue->tq_name, M_GTASKQUEUE);
178         free(queue, M_GTASKQUEUE);
179 }
180
181 int
182 grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
183 {
184 #ifdef INVARIANTS
185         if (queue == NULL) {
186                 gtask_dump(gtask);
187                 panic("queue == NULL");
188         }
189 #endif
190         TQ_LOCK(queue);
191         if (gtask->ta_flags & TASK_ENQUEUED) {
192                 TQ_UNLOCK(queue);
193                 return (0);
194         }
195         STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link);
196         gtask->ta_flags |= TASK_ENQUEUED;
197         TQ_UNLOCK(queue);
198         if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
199                 queue->tq_enqueue(queue->tq_context);
200         return (0);
201 }
202
203 static void
204 gtaskqueue_task_nop_fn(void *context)
205 {
206 }
207
208 /*
209  * Block until all currently queued tasks in this taskqueue
210  * have begun execution.  Tasks queued during execution of
211  * this function are ignored.
212  */
213 static void
214 gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
215 {
216         struct gtask t_barrier;
217
218         if (STAILQ_EMPTY(&queue->tq_queue))
219                 return;
220
221         /*
222          * Enqueue our barrier after all current tasks, but with
223          * the highest priority so that newly queued tasks cannot
224          * pass it.  Because of the high priority, we can not use
225          * taskqueue_enqueue_locked directly (which drops the lock
226          * anyway) so just insert it at tail while we have the
227          * queue lock.
228          */
229         GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier);
230         STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
231         t_barrier.ta_flags |= TASK_ENQUEUED;
232
233         /*
234          * Once the barrier has executed, all previously queued tasks
235          * have completed or are currently executing.
236          */
237         while (t_barrier.ta_flags & TASK_ENQUEUED)
238                 TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
239 }
240
241 /*
242  * Block until all currently executing tasks for this taskqueue
243  * complete.  Tasks that begin execution during the execution
244  * of this function are ignored.
245  */
246 static void
247 gtaskqueue_drain_tq_active(struct gtaskqueue *queue)
248 {
249         struct gtaskqueue_busy tb_marker, *tb_first;
250
251         if (TAILQ_EMPTY(&queue->tq_active))
252                 return;
253
254         /* Block taskq_terminate().*/
255         queue->tq_callouts++;
256
257         /*
258          * Wait for all currently executing taskqueue threads
259          * to go idle.
260          */
261         tb_marker.tb_running = TB_DRAIN_WAITER;
262         TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
263         while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
264                 TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
265         TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
266
267         /*
268          * Wakeup any other drain waiter that happened to queue up
269          * without any intervening active thread.
270          */
271         tb_first = TAILQ_FIRST(&queue->tq_active);
272         if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
273                 wakeup(tb_first);
274
275         /* Release taskqueue_terminate(). */
276         queue->tq_callouts--;
277         if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
278                 wakeup_one(queue->tq_threads);
279 }
280
281 void
282 gtaskqueue_block(struct gtaskqueue *queue)
283 {
284
285         TQ_LOCK(queue);
286         queue->tq_flags |= TQ_FLAGS_BLOCKED;
287         TQ_UNLOCK(queue);
288 }
289
290 void
291 gtaskqueue_unblock(struct gtaskqueue *queue)
292 {
293
294         TQ_LOCK(queue);
295         queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
296         if (!STAILQ_EMPTY(&queue->tq_queue))
297                 queue->tq_enqueue(queue->tq_context);
298         TQ_UNLOCK(queue);
299 }
300
301 static void
302 gtaskqueue_run_locked(struct gtaskqueue *queue)
303 {
304         struct gtaskqueue_busy tb;
305         struct gtaskqueue_busy *tb_first;
306         struct gtask *gtask;
307
308         KASSERT(queue != NULL, ("tq is NULL"));
309         TQ_ASSERT_LOCKED(queue);
310         tb.tb_running = NULL;
311
312         while (STAILQ_FIRST(&queue->tq_queue)) {
313                 TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
314
315                 /*
316                  * Carefully remove the first task from the queue and
317                  * clear its TASK_ENQUEUED flag
318                  */
319                 gtask = STAILQ_FIRST(&queue->tq_queue);
320                 KASSERT(gtask != NULL, ("task is NULL"));
321                 STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
322                 gtask->ta_flags &= ~TASK_ENQUEUED;
323                 tb.tb_running = gtask;
324                 TQ_UNLOCK(queue);
325
326                 KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL"));
327                 gtask->ta_func(gtask->ta_context);
328
329                 TQ_LOCK(queue);
330                 tb.tb_running = NULL;
331                 wakeup(gtask);
332
333                 TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
334                 tb_first = TAILQ_FIRST(&queue->tq_active);
335                 if (tb_first != NULL &&
336                     tb_first->tb_running == TB_DRAIN_WAITER)
337                         wakeup(tb_first);
338         }
339 }
340
341 static int
342 task_is_running(struct gtaskqueue *queue, struct gtask *gtask)
343 {
344         struct gtaskqueue_busy *tb;
345
346         TQ_ASSERT_LOCKED(queue);
347         TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
348                 if (tb->tb_running == gtask)
349                         return (1);
350         }
351         return (0);
352 }
353
354 static int
355 gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask)
356 {
357
358         if (gtask->ta_flags & TASK_ENQUEUED)
359                 STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link);
360         gtask->ta_flags &= ~TASK_ENQUEUED;
361         return (task_is_running(queue, gtask) ? EBUSY : 0);
362 }
363
364 int
365 gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask)
366 {
367         int error;
368
369         TQ_LOCK(queue);
370         error = gtaskqueue_cancel_locked(queue, gtask);
371         TQ_UNLOCK(queue);
372
373         return (error);
374 }
375
376 void
377 gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask)
378 {
379
380         if (!queue->tq_spin)
381                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
382
383         TQ_LOCK(queue);
384         while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask))
385                 TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0);
386         TQ_UNLOCK(queue);
387 }
388
389 void
390 gtaskqueue_drain_all(struct gtaskqueue *queue)
391 {
392
393         if (!queue->tq_spin)
394                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
395
396         TQ_LOCK(queue);
397         gtaskqueue_drain_tq_queue(queue);
398         gtaskqueue_drain_tq_active(queue);
399         TQ_UNLOCK(queue);
400 }
401
402 static int
403 _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
404     cpuset_t *mask, const char *name, va_list ap)
405 {
406         char ktname[MAXCOMLEN + 1];
407         struct thread *td;
408         struct gtaskqueue *tq;
409         int i, error;
410
411         if (count <= 0)
412                 return (EINVAL);
413
414         vsnprintf(ktname, sizeof(ktname), name, ap);
415         tq = *tqp;
416
417         tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE,
418             M_NOWAIT | M_ZERO);
419         if (tq->tq_threads == NULL) {
420                 printf("%s: no memory for %s threads\n", __func__, ktname);
421                 return (ENOMEM);
422         }
423
424         for (i = 0; i < count; i++) {
425                 if (count == 1)
426                         error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
427                             &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
428                 else
429                         error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
430                             &tq->tq_threads[i], RFSTOPPED, 0,
431                             "%s_%d", ktname, i);
432                 if (error) {
433                         /* should be ok to continue, taskqueue_free will dtrt */
434                         printf("%s: kthread_add(%s): error %d", __func__,
435                             ktname, error);
436                         tq->tq_threads[i] = NULL;               /* paranoid */
437                 } else
438                         tq->tq_tcount++;
439         }
440         for (i = 0; i < count; i++) {
441                 if (tq->tq_threads[i] == NULL)
442                         continue;
443                 td = tq->tq_threads[i];
444                 if (mask) {
445                         error = cpuset_setthread(td->td_tid, mask);
446                         /*
447                          * Failing to pin is rarely an actual fatal error;
448                          * it'll just affect performance.
449                          */
450                         if (error)
451                                 printf("%s: curthread=%llu: can't pin; "
452                                     "error=%d\n",
453                                     __func__,
454                                     (unsigned long long) td->td_tid,
455                                     error);
456                 }
457                 thread_lock(td);
458                 sched_prio(td, pri);
459                 sched_add(td, SRQ_BORING);
460                 thread_unlock(td);
461         }
462
463         return (0);
464 }
465
466 static int
467 gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
468     const char *name, ...)
469 {
470         va_list ap;
471         int error;
472
473         va_start(ap, name);
474         error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap);
475         va_end(ap);
476         return (error);
477 }
478
479 static inline void
480 gtaskqueue_run_callback(struct gtaskqueue *tq,
481     enum taskqueue_callback_type cb_type)
482 {
483         taskqueue_callback_fn tq_callback;
484
485         TQ_ASSERT_UNLOCKED(tq);
486         tq_callback = tq->tq_callbacks[cb_type];
487         if (tq_callback != NULL)
488                 tq_callback(tq->tq_cb_contexts[cb_type]);
489 }
490
491 static void
492 gtaskqueue_thread_loop(void *arg)
493 {
494         struct gtaskqueue **tqp, *tq;
495
496         tqp = arg;
497         tq = *tqp;
498         gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
499         TQ_LOCK(tq);
500         while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
501                 /* XXX ? */
502                 gtaskqueue_run_locked(tq);
503                 /*
504                  * Because taskqueue_run() can drop tq_mutex, we need to
505                  * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
506                  * meantime, which means we missed a wakeup.
507                  */
508                 if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
509                         break;
510                 TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
511         }
512         gtaskqueue_run_locked(tq);
513         /*
514          * This thread is on its way out, so just drop the lock temporarily
515          * in order to call the shutdown callback.  This allows the callback
516          * to look at the taskqueue, even just before it dies.
517          */
518         TQ_UNLOCK(tq);
519         gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
520         TQ_LOCK(tq);
521
522         /* rendezvous with thread that asked us to terminate */
523         tq->tq_tcount--;
524         wakeup_one(tq->tq_threads);
525         TQ_UNLOCK(tq);
526         kthread_exit();
527 }
528
529 static void
530 gtaskqueue_thread_enqueue(void *context)
531 {
532         struct gtaskqueue **tqp, *tq;
533
534         tqp = context;
535         tq = *tqp;
536         wakeup_one(tq);
537 }
538
539
540 static struct gtaskqueue *
541 gtaskqueue_create_fast(const char *name, int mflags,
542                  taskqueue_enqueue_fn enqueue, void *context)
543 {
544         return _gtaskqueue_create(name, mflags, enqueue, context,
545                         MTX_SPIN, "fast_taskqueue");
546 }
547
548
549 struct taskqgroup_cpu {
550         LIST_HEAD(, grouptask)  tgc_tasks;
551         struct gtaskqueue       *tgc_taskq;
552         int     tgc_cnt;
553         int     tgc_cpu;
554 };
555
556 struct taskqgroup {
557         struct taskqgroup_cpu tqg_queue[MAXCPU];
558         struct mtx      tqg_lock;
559         char *          tqg_name;
560         int             tqg_adjusting;
561         int             tqg_stride;
562         int             tqg_cnt;
563 };
564
565 struct taskq_bind_task {
566         struct gtask bt_task;
567         int     bt_cpuid;
568 };
569
570 static void
571 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu)
572 {
573         struct taskqgroup_cpu *qcpu;
574
575         qcpu = &qgroup->tqg_queue[idx];
576         LIST_INIT(&qcpu->tgc_tasks);
577         qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK,
578             taskqueue_thread_enqueue, &qcpu->tgc_taskq);
579         gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
580             "%s_%d", qgroup->tqg_name, idx);
581         qcpu->tgc_cpu = cpu;
582 }
583
584 static void
585 taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
586 {
587
588         gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
589 }
590
591 /*
592  * Find the taskq with least # of tasks that doesn't currently have any
593  * other queues from the uniq identifier.
594  */
595 static int
596 taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
597 {
598         struct grouptask *n;
599         int i, idx, mincnt;
600         int strict;
601
602         mtx_assert(&qgroup->tqg_lock, MA_OWNED);
603         if (qgroup->tqg_cnt == 0)
604                 return (0);
605         idx = -1;
606         mincnt = INT_MAX;
607         /*
608          * Two passes;  First scan for a queue with the least tasks that
609          * does not already service this uniq id.  If that fails simply find
610          * the queue with the least total tasks;
611          */
612         for (strict = 1; mincnt == INT_MAX; strict = 0) {
613                 for (i = 0; i < qgroup->tqg_cnt; i++) {
614                         if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
615                                 continue;
616                         if (strict) {
617                                 LIST_FOREACH(n,
618                                     &qgroup->tqg_queue[i].tgc_tasks, gt_list)
619                                         if (n->gt_uniq == uniq)
620                                                 break;
621                                 if (n != NULL)
622                                         continue;
623                         }
624                         mincnt = qgroup->tqg_queue[i].tgc_cnt;
625                         idx = i;
626                 }
627         }
628         if (idx == -1)
629                 panic("taskqgroup_find: Failed to pick a qid.");
630
631         return (idx);
632 }
633
634 /*
635  * smp_started is unusable since it is not set for UP kernels or even for
636  * SMP kernels when there is 1 CPU.  This is usually handled by adding a
637  * (mp_ncpus == 1) test, but that would be broken here since we need to
638  * to synchronize with the SI_SUB_SMP ordering.  Even in the pure SMP case
639  * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP.
640  *
641  * So maintain our own flag.  It must be set after all CPUs are started
642  * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed
643  * adjustment is properly delayed.  SI_ORDER_FOURTH is clearly before
644  * SI_ORDER_ANY and unclearly after the CPUs are started.  It would be
645  * simpler for adjustment to pass a flag indicating if it is delayed.
646  */ 
647
648 static int tqg_smp_started;
649
650 static void
651 tqg_record_smp_started(void *arg)
652 {
653         tqg_smp_started = 1;
654 }
655
656 SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
657         tqg_record_smp_started, NULL);
658
659 void
660 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
661     void *uniq, int irq, char *name)
662 {
663         cpuset_t mask;
664         int qid;
665
666         gtask->gt_uniq = uniq;
667         gtask->gt_name = name;
668         gtask->gt_irq = irq;
669         gtask->gt_cpu = -1;
670         mtx_lock(&qgroup->tqg_lock);
671         qid = taskqgroup_find(qgroup, uniq);
672         qgroup->tqg_queue[qid].tgc_cnt++;
673         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
674         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
675         if (irq != -1 && tqg_smp_started) {
676                 gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu;
677                 CPU_ZERO(&mask);
678                 CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
679                 mtx_unlock(&qgroup->tqg_lock);
680                 intr_setaffinity(irq, &mask);
681         } else
682                 mtx_unlock(&qgroup->tqg_lock);
683 }
684
685 static void
686 taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
687 {
688         cpuset_t mask;
689         int qid, cpu;
690
691         mtx_lock(&qgroup->tqg_lock);
692         qid = taskqgroup_find(qgroup, gtask->gt_uniq);
693         cpu = qgroup->tqg_queue[qid].tgc_cpu;
694         if (gtask->gt_irq != -1) {
695                 mtx_unlock(&qgroup->tqg_lock);
696
697                 CPU_ZERO(&mask);
698                 CPU_SET(cpu, &mask);
699                 intr_setaffinity(gtask->gt_irq, &mask);
700
701                 mtx_lock(&qgroup->tqg_lock);
702         }
703         qgroup->tqg_queue[qid].tgc_cnt++;
704
705         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask,
706                          gt_list);
707         MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
708         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
709         mtx_unlock(&qgroup->tqg_lock);
710 }
711
712 int
713 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
714         void *uniq, int cpu, int irq, char *name)
715 {
716         cpuset_t mask;
717         int i, qid;
718
719         qid = -1;
720         gtask->gt_uniq = uniq;
721         gtask->gt_name = name;
722         gtask->gt_irq = irq;
723         gtask->gt_cpu = cpu;
724         mtx_lock(&qgroup->tqg_lock);
725         if (tqg_smp_started) {
726                 for (i = 0; i < qgroup->tqg_cnt; i++)
727                         if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
728                                 qid = i;
729                                 break;
730                         }
731                 if (qid == -1) {
732                         mtx_unlock(&qgroup->tqg_lock);
733                         return (EINVAL);
734                 }
735         } else
736                 qid = 0;
737         qgroup->tqg_queue[qid].tgc_cnt++;
738         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
739         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
740         cpu = qgroup->tqg_queue[qid].tgc_cpu;
741         mtx_unlock(&qgroup->tqg_lock);
742
743         CPU_ZERO(&mask);
744         CPU_SET(cpu, &mask);
745         if (irq != -1 && tqg_smp_started)
746                 intr_setaffinity(irq, &mask);
747         return (0);
748 }
749
750 static int
751 taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
752 {
753         cpuset_t mask;
754         int i, qid, irq, cpu;
755
756         qid = -1;
757         irq = gtask->gt_irq;
758         cpu = gtask->gt_cpu;
759         MPASS(tqg_smp_started);
760         mtx_lock(&qgroup->tqg_lock);
761         for (i = 0; i < qgroup->tqg_cnt; i++)
762                 if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
763                         qid = i;
764                         break;
765                 }
766         if (qid == -1) {
767                 mtx_unlock(&qgroup->tqg_lock);
768                 return (EINVAL);
769         }
770         qgroup->tqg_queue[qid].tgc_cnt++;
771         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
772         MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
773         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
774         mtx_unlock(&qgroup->tqg_lock);
775
776         CPU_ZERO(&mask);
777         CPU_SET(cpu, &mask);
778
779         if (irq != -1)
780                 intr_setaffinity(irq, &mask);
781         return (0);
782 }
783
784 void
785 taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
786 {
787         int i;
788
789         mtx_lock(&qgroup->tqg_lock);
790         for (i = 0; i < qgroup->tqg_cnt; i++)
791                 if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
792                         break;
793         if (i == qgroup->tqg_cnt)
794                 panic("taskqgroup_detach: task not in group\n");
795         qgroup->tqg_queue[i].tgc_cnt--;
796         LIST_REMOVE(gtask, gt_list);
797         mtx_unlock(&qgroup->tqg_lock);
798         gtask->gt_taskqueue = NULL;
799 }
800
801 static void
802 taskqgroup_binder(void *ctx)
803 {
804         struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
805         cpuset_t mask;
806         int error;
807
808         CPU_ZERO(&mask);
809         CPU_SET(gtask->bt_cpuid, &mask);
810         error = cpuset_setthread(curthread->td_tid, &mask);
811         thread_lock(curthread);
812         sched_bind(curthread, gtask->bt_cpuid);
813         thread_unlock(curthread);
814
815         if (error)
816                 printf("taskqgroup_binder: setaffinity failed: %d\n",
817                     error);
818         free(gtask, M_DEVBUF);
819 }
820
821 static void
822 taskqgroup_bind(struct taskqgroup *qgroup)
823 {
824         struct taskq_bind_task *gtask;
825         int i;
826
827         /*
828          * Bind taskqueue threads to specific CPUs, if they have been assigned
829          * one.
830          */
831         if (qgroup->tqg_cnt == 1)
832                 return;
833
834         for (i = 0; i < qgroup->tqg_cnt; i++) {
835                 gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK);
836                 GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
837                 gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
838                 grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
839                     &gtask->bt_task);
840         }
841 }
842
843 static int
844 _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
845 {
846         LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
847         struct grouptask *gtask;
848         int i, k, old_cnt, old_cpu, cpu;
849
850         mtx_assert(&qgroup->tqg_lock, MA_OWNED);
851
852         if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) {
853                 printf("%s: failed cnt: %d stride: %d "
854                     "mp_ncpus: %d tqg_smp_started: %d\n",
855                     __func__, cnt, stride, mp_ncpus, tqg_smp_started);
856                 return (EINVAL);
857         }
858         if (qgroup->tqg_adjusting) {
859                 printf("taskqgroup_adjust failed: adjusting\n");
860                 return (EBUSY);
861         }
862         qgroup->tqg_adjusting = 1;
863         old_cnt = qgroup->tqg_cnt;
864         old_cpu = 0;
865         if (old_cnt < cnt)
866                 old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu;
867         mtx_unlock(&qgroup->tqg_lock);
868         /*
869          * Set up queue for tasks added before boot.
870          */
871         if (old_cnt == 0) {
872                 LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
873                     grouptask, gt_list);
874                 qgroup->tqg_queue[0].tgc_cnt = 0;
875         }
876
877         /*
878          * If new taskq threads have been added.
879          */
880         cpu = old_cpu;
881         for (i = old_cnt; i < cnt; i++) {
882                 taskqgroup_cpu_create(qgroup, i, cpu);
883
884                 for (k = 0; k < stride; k++)
885                         cpu = CPU_NEXT(cpu);
886         }
887         mtx_lock(&qgroup->tqg_lock);
888         qgroup->tqg_cnt = cnt;
889         qgroup->tqg_stride = stride;
890
891         /*
892          * Adjust drivers to use new taskqs.
893          */
894         for (i = 0; i < old_cnt; i++) {
895                 while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
896                         LIST_REMOVE(gtask, gt_list);
897                         qgroup->tqg_queue[i].tgc_cnt--;
898                         LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
899                 }
900         }
901         mtx_unlock(&qgroup->tqg_lock);
902
903         while ((gtask = LIST_FIRST(&gtask_head))) {
904                 LIST_REMOVE(gtask, gt_list);
905                 if (gtask->gt_cpu == -1)
906                         taskqgroup_attach_deferred(qgroup, gtask);
907                 else if (taskqgroup_attach_cpu_deferred(qgroup, gtask))
908                         taskqgroup_attach_deferred(qgroup, gtask);
909         }
910
911 #ifdef INVARIANTS
912         mtx_lock(&qgroup->tqg_lock);
913         for (i = 0; i < qgroup->tqg_cnt; i++) {
914                 MPASS(qgroup->tqg_queue[i].tgc_taskq != NULL);
915                 LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list)
916                         MPASS(gtask->gt_taskqueue != NULL);
917         }
918         mtx_unlock(&qgroup->tqg_lock);
919 #endif
920         /*
921          * If taskq thread count has been reduced.
922          */
923         for (i = cnt; i < old_cnt; i++)
924                 taskqgroup_cpu_remove(qgroup, i);
925
926         taskqgroup_bind(qgroup);
927
928         mtx_lock(&qgroup->tqg_lock);
929         qgroup->tqg_adjusting = 0;
930
931         return (0);
932 }
933
934 int
935 taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
936 {
937         int error;
938
939         mtx_lock(&qgroup->tqg_lock);
940         error = _taskqgroup_adjust(qgroup, cnt, stride);
941         mtx_unlock(&qgroup->tqg_lock);
942
943         return (error);
944 }
945
946 struct taskqgroup *
947 taskqgroup_create(char *name)
948 {
949         struct taskqgroup *qgroup;
950
951         qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO);
952         mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
953         qgroup->tqg_name = name;
954         LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
955
956         return (qgroup);
957 }
958
959 void
960 taskqgroup_destroy(struct taskqgroup *qgroup)
961 {
962
963 }