]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/subr_gtaskqueue.c
Fix missing pfctl(8) tunable.
[FreeBSD/FreeBSD.git] / sys / kern / subr_gtaskqueue.c
1 /*-
2  * Copyright (c) 2000 Doug Rabson
3  * Copyright (c) 2014 Jeff Roberson
4  * Copyright (c) 2016 Matthew Macy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bus.h>
35 #include <sys/cpuset.h>
36 #include <sys/interrupt.h>
37 #include <sys/kernel.h>
38 #include <sys/kthread.h>
39 #include <sys/libkern.h>
40 #include <sys/limits.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/sched.h>
46 #include <sys/smp.h>
47 #include <sys/gtaskqueue.h>
48 #include <sys/unistd.h>
49 #include <machine/stdarg.h>
50
51 static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues");
52 static void     gtaskqueue_thread_enqueue(void *);
53 static void     gtaskqueue_thread_loop(void *arg);
54
55 TASKQGROUP_DEFINE(softirq, mp_ncpus, 1);
56
57 struct gtaskqueue_busy {
58         struct gtask    *tb_running;
59         TAILQ_ENTRY(gtaskqueue_busy) tb_link;
60 };
61
62 static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1;
63
64 struct gtaskqueue {
65         STAILQ_HEAD(, gtask)    tq_queue;
66         gtaskqueue_enqueue_fn   tq_enqueue;
67         void                    *tq_context;
68         char                    *tq_name;
69         TAILQ_HEAD(, gtaskqueue_busy) tq_active;
70         struct mtx              tq_mutex;
71         struct thread           **tq_threads;
72         int                     tq_tcount;
73         int                     tq_spin;
74         int                     tq_flags;
75         int                     tq_callouts;
76         taskqueue_callback_fn   tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
77         void                    *tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
78 };
79
80 #define TQ_FLAGS_ACTIVE         (1 << 0)
81 #define TQ_FLAGS_BLOCKED        (1 << 1)
82 #define TQ_FLAGS_UNLOCKED_ENQUEUE       (1 << 2)
83
84 #define DT_CALLOUT_ARMED        (1 << 0)
85
86 #define TQ_LOCK(tq)                                                     \
87         do {                                                            \
88                 if ((tq)->tq_spin)                                      \
89                         mtx_lock_spin(&(tq)->tq_mutex);                 \
90                 else                                                    \
91                         mtx_lock(&(tq)->tq_mutex);                      \
92         } while (0)
93 #define TQ_ASSERT_LOCKED(tq)    mtx_assert(&(tq)->tq_mutex, MA_OWNED)
94
95 #define TQ_UNLOCK(tq)                                                   \
96         do {                                                            \
97                 if ((tq)->tq_spin)                                      \
98                         mtx_unlock_spin(&(tq)->tq_mutex);               \
99                 else                                                    \
100                         mtx_unlock(&(tq)->tq_mutex);                    \
101         } while (0)
102 #define TQ_ASSERT_UNLOCKED(tq)  mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
103
104 #ifdef INVARIANTS
105 static void
106 gtask_dump(struct gtask *gtask)
107 {
108         printf("gtask: %p ta_flags=%x ta_priority=%d ta_func=%p ta_context=%p\n",
109                gtask, gtask->ta_flags, gtask->ta_priority, gtask->ta_func, gtask->ta_context);
110 }
111 #endif
112
113 static __inline int
114 TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
115     int t)
116 {
117         if (tq->tq_spin)
118                 return (msleep_spin(p, m, wm, t));
119         return (msleep(p, m, pri, wm, t));
120 }
121
122 static struct gtaskqueue *
123 _gtaskqueue_create(const char *name, int mflags,
124                  taskqueue_enqueue_fn enqueue, void *context,
125                  int mtxflags, const char *mtxname __unused)
126 {
127         struct gtaskqueue *queue;
128         char *tq_name;
129
130         tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO);
131         if (!tq_name)
132                 return (NULL);
133
134         snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
135
136         queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO);
137         if (!queue) {
138                 free(tq_name, M_GTASKQUEUE);
139                 return (NULL);
140         }
141
142         STAILQ_INIT(&queue->tq_queue);
143         TAILQ_INIT(&queue->tq_active);
144         queue->tq_enqueue = enqueue;
145         queue->tq_context = context;
146         queue->tq_name = tq_name;
147         queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
148         queue->tq_flags |= TQ_FLAGS_ACTIVE;
149         if (enqueue == gtaskqueue_thread_enqueue)
150                 queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
151         mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
152
153         return (queue);
154 }
155
156
157 /*
158  * Signal a taskqueue thread to terminate.
159  */
160 static void
161 gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq)
162 {
163
164         while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
165                 wakeup(tq);
166                 TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
167         }
168 }
169
170 static void
171 gtaskqueue_free(struct gtaskqueue *queue)
172 {
173
174         TQ_LOCK(queue);
175         queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
176         gtaskqueue_terminate(queue->tq_threads, queue);
177         KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
178         KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
179         mtx_destroy(&queue->tq_mutex);
180         free(queue->tq_threads, M_GTASKQUEUE);
181         free(queue->tq_name, M_GTASKQUEUE);
182         free(queue, M_GTASKQUEUE);
183 }
184
185 int
186 grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
187 {
188 #ifdef INVARIANTS
189         if (queue == NULL) {
190                 gtask_dump(gtask);
191                 panic("queue == NULL");
192         }
193 #endif
194         TQ_LOCK(queue);
195         if (gtask->ta_flags & TASK_ENQUEUED) {
196                 TQ_UNLOCK(queue);
197                 return (0);
198         }
199         STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link);
200         gtask->ta_flags |= TASK_ENQUEUED;
201         TQ_UNLOCK(queue);
202         if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
203                 queue->tq_enqueue(queue->tq_context);
204         return (0);
205 }
206
207 static void
208 gtaskqueue_task_nop_fn(void *context)
209 {
210 }
211
212 /*
213  * Block until all currently queued tasks in this taskqueue
214  * have begun execution.  Tasks queued during execution of
215  * this function are ignored.
216  */
217 static void
218 gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
219 {
220         struct gtask t_barrier;
221
222         if (STAILQ_EMPTY(&queue->tq_queue))
223                 return;
224
225         /*
226          * Enqueue our barrier after all current tasks, but with
227          * the highest priority so that newly queued tasks cannot
228          * pass it.  Because of the high priority, we can not use
229          * taskqueue_enqueue_locked directly (which drops the lock
230          * anyway) so just insert it at tail while we have the
231          * queue lock.
232          */
233         GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier);
234         STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
235         t_barrier.ta_flags |= TASK_ENQUEUED;
236
237         /*
238          * Once the barrier has executed, all previously queued tasks
239          * have completed or are currently executing.
240          */
241         while (t_barrier.ta_flags & TASK_ENQUEUED)
242                 TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
243 }
244
245 /*
246  * Block until all currently executing tasks for this taskqueue
247  * complete.  Tasks that begin execution during the execution
248  * of this function are ignored.
249  */
250 static void
251 gtaskqueue_drain_tq_active(struct gtaskqueue *queue)
252 {
253         struct gtaskqueue_busy tb_marker, *tb_first;
254
255         if (TAILQ_EMPTY(&queue->tq_active))
256                 return;
257
258         /* Block taskq_terminate().*/
259         queue->tq_callouts++;
260
261         /*
262          * Wait for all currently executing taskqueue threads
263          * to go idle.
264          */
265         tb_marker.tb_running = TB_DRAIN_WAITER;
266         TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
267         while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
268                 TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
269         TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
270
271         /*
272          * Wakeup any other drain waiter that happened to queue up
273          * without any intervening active thread.
274          */
275         tb_first = TAILQ_FIRST(&queue->tq_active);
276         if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
277                 wakeup(tb_first);
278
279         /* Release taskqueue_terminate(). */
280         queue->tq_callouts--;
281         if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
282                 wakeup_one(queue->tq_threads);
283 }
284
285 void
286 gtaskqueue_block(struct gtaskqueue *queue)
287 {
288
289         TQ_LOCK(queue);
290         queue->tq_flags |= TQ_FLAGS_BLOCKED;
291         TQ_UNLOCK(queue);
292 }
293
294 void
295 gtaskqueue_unblock(struct gtaskqueue *queue)
296 {
297
298         TQ_LOCK(queue);
299         queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
300         if (!STAILQ_EMPTY(&queue->tq_queue))
301                 queue->tq_enqueue(queue->tq_context);
302         TQ_UNLOCK(queue);
303 }
304
305 static void
306 gtaskqueue_run_locked(struct gtaskqueue *queue)
307 {
308         struct gtaskqueue_busy tb;
309         struct gtaskqueue_busy *tb_first;
310         struct gtask *gtask;
311
312         KASSERT(queue != NULL, ("tq is NULL"));
313         TQ_ASSERT_LOCKED(queue);
314         tb.tb_running = NULL;
315
316         while (STAILQ_FIRST(&queue->tq_queue)) {
317                 TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
318
319                 /*
320                  * Carefully remove the first task from the queue and
321                  * clear its TASK_ENQUEUED flag
322                  */
323                 gtask = STAILQ_FIRST(&queue->tq_queue);
324                 KASSERT(gtask != NULL, ("task is NULL"));
325                 STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
326                 gtask->ta_flags &= ~TASK_ENQUEUED;
327                 tb.tb_running = gtask;
328                 TQ_UNLOCK(queue);
329
330                 KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL"));
331                 gtask->ta_func(gtask->ta_context);
332
333                 TQ_LOCK(queue);
334                 tb.tb_running = NULL;
335                 wakeup(gtask);
336
337                 TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
338                 tb_first = TAILQ_FIRST(&queue->tq_active);
339                 if (tb_first != NULL &&
340                     tb_first->tb_running == TB_DRAIN_WAITER)
341                         wakeup(tb_first);
342         }
343 }
344
345 static int
346 task_is_running(struct gtaskqueue *queue, struct gtask *gtask)
347 {
348         struct gtaskqueue_busy *tb;
349
350         TQ_ASSERT_LOCKED(queue);
351         TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
352                 if (tb->tb_running == gtask)
353                         return (1);
354         }
355         return (0);
356 }
357
358 static int
359 gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask)
360 {
361
362         if (gtask->ta_flags & TASK_ENQUEUED)
363                 STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link);
364         gtask->ta_flags &= ~TASK_ENQUEUED;
365         return (task_is_running(queue, gtask) ? EBUSY : 0);
366 }
367
368 int
369 gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask)
370 {
371         int error;
372
373         TQ_LOCK(queue);
374         error = gtaskqueue_cancel_locked(queue, gtask);
375         TQ_UNLOCK(queue);
376
377         return (error);
378 }
379
380 void
381 gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask)
382 {
383
384         if (!queue->tq_spin)
385                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
386
387         TQ_LOCK(queue);
388         while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask))
389                 TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0);
390         TQ_UNLOCK(queue);
391 }
392
393 void
394 gtaskqueue_drain_all(struct gtaskqueue *queue)
395 {
396
397         if (!queue->tq_spin)
398                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
399
400         TQ_LOCK(queue);
401         gtaskqueue_drain_tq_queue(queue);
402         gtaskqueue_drain_tq_active(queue);
403         TQ_UNLOCK(queue);
404 }
405
406 static int
407 _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
408     cpuset_t *mask, const char *name, va_list ap)
409 {
410         char ktname[MAXCOMLEN + 1];
411         struct thread *td;
412         struct gtaskqueue *tq;
413         int i, error;
414
415         if (count <= 0)
416                 return (EINVAL);
417
418         vsnprintf(ktname, sizeof(ktname), name, ap);
419         tq = *tqp;
420
421         tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE,
422             M_NOWAIT | M_ZERO);
423         if (tq->tq_threads == NULL) {
424                 printf("%s: no memory for %s threads\n", __func__, ktname);
425                 return (ENOMEM);
426         }
427
428         for (i = 0; i < count; i++) {
429                 if (count == 1)
430                         error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
431                             &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
432                 else
433                         error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
434                             &tq->tq_threads[i], RFSTOPPED, 0,
435                             "%s_%d", ktname, i);
436                 if (error) {
437                         /* should be ok to continue, taskqueue_free will dtrt */
438                         printf("%s: kthread_add(%s): error %d", __func__,
439                             ktname, error);
440                         tq->tq_threads[i] = NULL;               /* paranoid */
441                 } else
442                         tq->tq_tcount++;
443         }
444         for (i = 0; i < count; i++) {
445                 if (tq->tq_threads[i] == NULL)
446                         continue;
447                 td = tq->tq_threads[i];
448                 if (mask) {
449                         error = cpuset_setthread(td->td_tid, mask);
450                         /*
451                          * Failing to pin is rarely an actual fatal error;
452                          * it'll just affect performance.
453                          */
454                         if (error)
455                                 printf("%s: curthread=%llu: can't pin; "
456                                     "error=%d\n",
457                                     __func__,
458                                     (unsigned long long) td->td_tid,
459                                     error);
460                 }
461                 thread_lock(td);
462                 sched_prio(td, pri);
463                 sched_add(td, SRQ_BORING);
464                 thread_unlock(td);
465         }
466
467         return (0);
468 }
469
470 static int
471 gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
472     const char *name, ...)
473 {
474         va_list ap;
475         int error;
476
477         va_start(ap, name);
478         error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap);
479         va_end(ap);
480         return (error);
481 }
482
483 static inline void
484 gtaskqueue_run_callback(struct gtaskqueue *tq,
485     enum taskqueue_callback_type cb_type)
486 {
487         taskqueue_callback_fn tq_callback;
488
489         TQ_ASSERT_UNLOCKED(tq);
490         tq_callback = tq->tq_callbacks[cb_type];
491         if (tq_callback != NULL)
492                 tq_callback(tq->tq_cb_contexts[cb_type]);
493 }
494
495 static void
496 gtaskqueue_thread_loop(void *arg)
497 {
498         struct gtaskqueue **tqp, *tq;
499
500         tqp = arg;
501         tq = *tqp;
502         gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
503         TQ_LOCK(tq);
504         while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
505                 /* XXX ? */
506                 gtaskqueue_run_locked(tq);
507                 /*
508                  * Because taskqueue_run() can drop tq_mutex, we need to
509                  * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
510                  * meantime, which means we missed a wakeup.
511                  */
512                 if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
513                         break;
514                 TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
515         }
516         gtaskqueue_run_locked(tq);
517         /*
518          * This thread is on its way out, so just drop the lock temporarily
519          * in order to call the shutdown callback.  This allows the callback
520          * to look at the taskqueue, even just before it dies.
521          */
522         TQ_UNLOCK(tq);
523         gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
524         TQ_LOCK(tq);
525
526         /* rendezvous with thread that asked us to terminate */
527         tq->tq_tcount--;
528         wakeup_one(tq->tq_threads);
529         TQ_UNLOCK(tq);
530         kthread_exit();
531 }
532
533 static void
534 gtaskqueue_thread_enqueue(void *context)
535 {
536         struct gtaskqueue **tqp, *tq;
537
538         tqp = context;
539         tq = *tqp;
540         wakeup_one(tq);
541 }
542
543
544 static struct gtaskqueue *
545 gtaskqueue_create_fast(const char *name, int mflags,
546                  taskqueue_enqueue_fn enqueue, void *context)
547 {
548         return _gtaskqueue_create(name, mflags, enqueue, context,
549                         MTX_SPIN, "fast_taskqueue");
550 }
551
552
553 struct taskqgroup_cpu {
554         LIST_HEAD(, grouptask)  tgc_tasks;
555         struct gtaskqueue       *tgc_taskq;
556         int     tgc_cnt;
557         int     tgc_cpu;
558 };
559
560 struct taskqgroup {
561         struct taskqgroup_cpu tqg_queue[MAXCPU];
562         struct mtx      tqg_lock;
563         char *          tqg_name;
564         int             tqg_adjusting;
565         int             tqg_stride;
566         int             tqg_cnt;
567 };
568
569 struct taskq_bind_task {
570         struct gtask bt_task;
571         int     bt_cpuid;
572 };
573
574 static void
575 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu)
576 {
577         struct taskqgroup_cpu *qcpu;
578
579         qcpu = &qgroup->tqg_queue[idx];
580         LIST_INIT(&qcpu->tgc_tasks);
581         qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK,
582             taskqueue_thread_enqueue, &qcpu->tgc_taskq);
583         gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
584             "%s_%d", qgroup->tqg_name, idx);
585         qcpu->tgc_cpu = cpu;
586 }
587
588 static void
589 taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
590 {
591
592         gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
593 }
594
595 /*
596  * Find the taskq with least # of tasks that doesn't currently have any
597  * other queues from the uniq identifier.
598  */
599 static int
600 taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
601 {
602         struct grouptask *n;
603         int i, idx, mincnt;
604         int strict;
605
606         mtx_assert(&qgroup->tqg_lock, MA_OWNED);
607         if (qgroup->tqg_cnt == 0)
608                 return (0);
609         idx = -1;
610         mincnt = INT_MAX;
611         /*
612          * Two passes;  First scan for a queue with the least tasks that
613          * does not already service this uniq id.  If that fails simply find
614          * the queue with the least total tasks;
615          */
616         for (strict = 1; mincnt == INT_MAX; strict = 0) {
617                 for (i = 0; i < qgroup->tqg_cnt; i++) {
618                         if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
619                                 continue;
620                         if (strict) {
621                                 LIST_FOREACH(n,
622                                     &qgroup->tqg_queue[i].tgc_tasks, gt_list)
623                                         if (n->gt_uniq == uniq)
624                                                 break;
625                                 if (n != NULL)
626                                         continue;
627                         }
628                         mincnt = qgroup->tqg_queue[i].tgc_cnt;
629                         idx = i;
630                 }
631         }
632         if (idx == -1)
633                 panic("taskqgroup_find: Failed to pick a qid.");
634
635         return (idx);
636 }
637
638 /*
639  * smp_started is unusable since it is not set for UP kernels or even for
640  * SMP kernels when there is 1 CPU.  This is usually handled by adding a
641  * (mp_ncpus == 1) test, but that would be broken here since we need to
642  * to synchronize with the SI_SUB_SMP ordering.  Even in the pure SMP case
643  * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP.
644  *
645  * So maintain our own flag.  It must be set after all CPUs are started
646  * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed
647  * adjustment is properly delayed.  SI_ORDER_FOURTH is clearly before
648  * SI_ORDER_ANY and unclearly after the CPUs are started.  It would be
649  * simpler for adjustment to pass a flag indicating if it is delayed.
650  */ 
651
652 static int tqg_smp_started;
653
654 static void
655 tqg_record_smp_started(void *arg)
656 {
657         tqg_smp_started = 1;
658 }
659
660 SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
661         tqg_record_smp_started, NULL);
662
663 void
664 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
665     void *uniq, int irq, char *name)
666 {
667         cpuset_t mask;
668         int qid, error;
669
670         gtask->gt_uniq = uniq;
671         snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
672         gtask->gt_irq = irq;
673         gtask->gt_cpu = -1;
674         mtx_lock(&qgroup->tqg_lock);
675         qid = taskqgroup_find(qgroup, uniq);
676         qgroup->tqg_queue[qid].tgc_cnt++;
677         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
678         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
679         if (irq != -1 && tqg_smp_started) {
680                 gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu;
681                 CPU_ZERO(&mask);
682                 CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
683                 mtx_unlock(&qgroup->tqg_lock);
684                 error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
685                 if (error)
686                         printf("%s: setaffinity failed for %s: %d\n", __func__, gtask->gt_name, error);
687         } else
688                 mtx_unlock(&qgroup->tqg_lock);
689 }
690
691 static void
692 taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
693 {
694         cpuset_t mask;
695         int qid, cpu, error;
696
697         mtx_lock(&qgroup->tqg_lock);
698         qid = taskqgroup_find(qgroup, gtask->gt_uniq);
699         cpu = qgroup->tqg_queue[qid].tgc_cpu;
700         if (gtask->gt_irq != -1) {
701                 mtx_unlock(&qgroup->tqg_lock);
702
703                 CPU_ZERO(&mask);
704                 CPU_SET(cpu, &mask);
705                 error = intr_setaffinity(gtask->gt_irq, CPU_WHICH_IRQ, &mask);
706                 mtx_lock(&qgroup->tqg_lock);
707                 if (error)
708                         printf("%s: %s setaffinity failed: %d\n", __func__, gtask->gt_name, error);
709
710         }
711         qgroup->tqg_queue[qid].tgc_cnt++;
712
713         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask,
714                          gt_list);
715         MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
716         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
717         mtx_unlock(&qgroup->tqg_lock);
718 }
719
720 int
721 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
722         void *uniq, int cpu, int irq, char *name)
723 {
724         cpuset_t mask;
725         int i, qid, error;
726
727         qid = -1;
728         gtask->gt_uniq = uniq;
729         snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
730         gtask->gt_irq = irq;
731         gtask->gt_cpu = cpu;
732         mtx_lock(&qgroup->tqg_lock);
733         if (tqg_smp_started) {
734                 for (i = 0; i < qgroup->tqg_cnt; i++)
735                         if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
736                                 qid = i;
737                                 break;
738                         }
739                 if (qid == -1) {
740                         mtx_unlock(&qgroup->tqg_lock);
741                         printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
742                         return (EINVAL);
743                 }
744         } else
745                 qid = 0;
746         qgroup->tqg_queue[qid].tgc_cnt++;
747         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
748         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
749         cpu = qgroup->tqg_queue[qid].tgc_cpu;
750         mtx_unlock(&qgroup->tqg_lock);
751
752         CPU_ZERO(&mask);
753         CPU_SET(cpu, &mask);
754         if (irq != -1 && tqg_smp_started) {
755                 error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
756                 if (error)
757                         printf("%s: setaffinity failed: %d\n", __func__, error);
758         }
759         return (0);
760 }
761
762 static int
763 taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
764 {
765         cpuset_t mask;
766         int i, qid, irq, cpu, error;
767
768         qid = -1;
769         irq = gtask->gt_irq;
770         cpu = gtask->gt_cpu;
771         MPASS(tqg_smp_started);
772         mtx_lock(&qgroup->tqg_lock);
773         for (i = 0; i < qgroup->tqg_cnt; i++)
774                 if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
775                         qid = i;
776                         break;
777                 }
778         if (qid == -1) {
779                 mtx_unlock(&qgroup->tqg_lock);
780                 printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
781                 return (EINVAL);
782         }
783         qgroup->tqg_queue[qid].tgc_cnt++;
784         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
785         MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
786         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
787         mtx_unlock(&qgroup->tqg_lock);
788
789         CPU_ZERO(&mask);
790         CPU_SET(cpu, &mask);
791
792         if (irq != -1) {
793                 error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
794                 if (error)
795                         printf("%s: setaffinity failed: %d\n", __func__, error);
796         }
797         return (0);
798 }
799
800 void
801 taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
802 {
803         int i;
804
805         mtx_lock(&qgroup->tqg_lock);
806         for (i = 0; i < qgroup->tqg_cnt; i++)
807                 if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
808                         break;
809         if (i == qgroup->tqg_cnt)
810                 panic("taskqgroup_detach: task %s not in group\n", gtask->gt_name);
811         qgroup->tqg_queue[i].tgc_cnt--;
812         LIST_REMOVE(gtask, gt_list);
813         mtx_unlock(&qgroup->tqg_lock);
814         gtask->gt_taskqueue = NULL;
815 }
816
817 static void
818 taskqgroup_binder(void *ctx)
819 {
820         struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
821         cpuset_t mask;
822         int error;
823
824         CPU_ZERO(&mask);
825         CPU_SET(gtask->bt_cpuid, &mask);
826         error = cpuset_setthread(curthread->td_tid, &mask);
827         thread_lock(curthread);
828         sched_bind(curthread, gtask->bt_cpuid);
829         thread_unlock(curthread);
830
831         if (error)
832                 printf("%s: setaffinity failed: %d\n", __func__,
833                     error);
834         free(gtask, M_DEVBUF);
835 }
836
837 static void
838 taskqgroup_bind(struct taskqgroup *qgroup)
839 {
840         struct taskq_bind_task *gtask;
841         int i;
842
843         /*
844          * Bind taskqueue threads to specific CPUs, if they have been assigned
845          * one.
846          */
847         if (qgroup->tqg_cnt == 1)
848                 return;
849
850         for (i = 0; i < qgroup->tqg_cnt; i++) {
851                 gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK);
852                 GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
853                 gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
854                 grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
855                     &gtask->bt_task);
856         }
857 }
858
859 static int
860 _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
861 {
862         LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
863         struct grouptask *gtask;
864         int i, k, old_cnt, old_cpu, cpu;
865
866         mtx_assert(&qgroup->tqg_lock, MA_OWNED);
867
868         if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) {
869                 printf("%s: failed cnt: %d stride: %d "
870                     "mp_ncpus: %d tqg_smp_started: %d\n",
871                     __func__, cnt, stride, mp_ncpus, tqg_smp_started);
872                 return (EINVAL);
873         }
874         if (qgroup->tqg_adjusting) {
875                 printf("%s failed: adjusting\n", __func__);
876                 return (EBUSY);
877         }
878         qgroup->tqg_adjusting = 1;
879         old_cnt = qgroup->tqg_cnt;
880         old_cpu = 0;
881         if (old_cnt < cnt)
882                 old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu;
883         mtx_unlock(&qgroup->tqg_lock);
884         /*
885          * Set up queue for tasks added before boot.
886          */
887         if (old_cnt == 0) {
888                 LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
889                     grouptask, gt_list);
890                 qgroup->tqg_queue[0].tgc_cnt = 0;
891         }
892
893         /*
894          * If new taskq threads have been added.
895          */
896         cpu = old_cpu;
897         for (i = old_cnt; i < cnt; i++) {
898                 taskqgroup_cpu_create(qgroup, i, cpu);
899
900                 for (k = 0; k < stride; k++)
901                         cpu = CPU_NEXT(cpu);
902         }
903         mtx_lock(&qgroup->tqg_lock);
904         qgroup->tqg_cnt = cnt;
905         qgroup->tqg_stride = stride;
906
907         /*
908          * Adjust drivers to use new taskqs.
909          */
910         for (i = 0; i < old_cnt; i++) {
911                 while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
912                         LIST_REMOVE(gtask, gt_list);
913                         qgroup->tqg_queue[i].tgc_cnt--;
914                         LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
915                 }
916         }
917         mtx_unlock(&qgroup->tqg_lock);
918
919         while ((gtask = LIST_FIRST(&gtask_head))) {
920                 LIST_REMOVE(gtask, gt_list);
921                 if (gtask->gt_cpu == -1)
922                         taskqgroup_attach_deferred(qgroup, gtask);
923                 else if (taskqgroup_attach_cpu_deferred(qgroup, gtask))
924                         taskqgroup_attach_deferred(qgroup, gtask);
925         }
926
927 #ifdef INVARIANTS
928         mtx_lock(&qgroup->tqg_lock);
929         for (i = 0; i < qgroup->tqg_cnt; i++) {
930                 MPASS(qgroup->tqg_queue[i].tgc_taskq != NULL);
931                 LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list)
932                         MPASS(gtask->gt_taskqueue != NULL);
933         }
934         mtx_unlock(&qgroup->tqg_lock);
935 #endif
936         /*
937          * If taskq thread count has been reduced.
938          */
939         for (i = cnt; i < old_cnt; i++)
940                 taskqgroup_cpu_remove(qgroup, i);
941
942         taskqgroup_bind(qgroup);
943
944         mtx_lock(&qgroup->tqg_lock);
945         qgroup->tqg_adjusting = 0;
946
947         return (0);
948 }
949
950 int
951 taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
952 {
953         int error;
954
955         mtx_lock(&qgroup->tqg_lock);
956         error = _taskqgroup_adjust(qgroup, cnt, stride);
957         mtx_unlock(&qgroup->tqg_lock);
958
959         return (error);
960 }
961
962 struct taskqgroup *
963 taskqgroup_create(char *name)
964 {
965         struct taskqgroup *qgroup;
966
967         qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO);
968         mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
969         qgroup->tqg_name = name;
970         LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
971
972         return (qgroup);
973 }
974
975 void
976 taskqgroup_destroy(struct taskqgroup *qgroup)
977 {
978
979 }