]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/subr_gtaskqueue.c
Upgrade to OpenSSH 7.3p1.
[FreeBSD/FreeBSD.git] / sys / kern / subr_gtaskqueue.c
1 /*-
2  * Copyright (c) 2000 Doug Rabson
3  * Copyright (c) 2014 Jeff Roberson
4  * Copyright (c) 2016 Matthew Macy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bus.h>
35 #include <sys/cpuset.h>
36 #include <sys/interrupt.h>
37 #include <sys/kernel.h>
38 #include <sys/kthread.h>
39 #include <sys/libkern.h>
40 #include <sys/limits.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/sched.h>
46 #include <sys/smp.h>
47 #include <sys/gtaskqueue.h>
48 #include <sys/unistd.h>
49 #include <machine/stdarg.h>
50
51 static MALLOC_DEFINE(M_GTASKQUEUE, "taskqueue", "Task Queues");
52 static void     gtaskqueue_thread_enqueue(void *);
53 static void     gtaskqueue_thread_loop(void *arg);
54
55 TASKQGROUP_DEFINE(softirq, mp_ncpus, 1);
56
57 struct gtaskqueue_busy {
58         struct gtask    *tb_running;
59         TAILQ_ENTRY(gtaskqueue_busy) tb_link;
60 };
61
62 static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1;
63
64 struct gtaskqueue {
65         STAILQ_HEAD(, gtask)    tq_queue;
66         gtaskqueue_enqueue_fn   tq_enqueue;
67         void                    *tq_context;
68         char                    *tq_name;
69         TAILQ_HEAD(, gtaskqueue_busy) tq_active;
70         struct mtx              tq_mutex;
71         struct thread           **tq_threads;
72         int                     tq_tcount;
73         int                     tq_spin;
74         int                     tq_flags;
75         int                     tq_callouts;
76         taskqueue_callback_fn   tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
77         void                    *tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
78 };
79
80 #define TQ_FLAGS_ACTIVE         (1 << 0)
81 #define TQ_FLAGS_BLOCKED        (1 << 1)
82 #define TQ_FLAGS_UNLOCKED_ENQUEUE       (1 << 2)
83
84 #define DT_CALLOUT_ARMED        (1 << 0)
85
86 #define TQ_LOCK(tq)                                                     \
87         do {                                                            \
88                 if ((tq)->tq_spin)                                      \
89                         mtx_lock_spin(&(tq)->tq_mutex);                 \
90                 else                                                    \
91                         mtx_lock(&(tq)->tq_mutex);                      \
92         } while (0)
93 #define TQ_ASSERT_LOCKED(tq)    mtx_assert(&(tq)->tq_mutex, MA_OWNED)
94
95 #define TQ_UNLOCK(tq)                                                   \
96         do {                                                            \
97                 if ((tq)->tq_spin)                                      \
98                         mtx_unlock_spin(&(tq)->tq_mutex);               \
99                 else                                                    \
100                         mtx_unlock(&(tq)->tq_mutex);                    \
101         } while (0)
102 #define TQ_ASSERT_UNLOCKED(tq)  mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
103
104 #ifdef INVARIANTS
105 static void
106 gtask_dump(struct gtask *gtask)
107 {
108         printf("gtask: %p ta_flags=%x ta_priority=%d ta_func=%p ta_context=%p\n",
109                gtask, gtask->ta_flags, gtask->ta_priority, gtask->ta_func, gtask->ta_context);
110 }
111 #endif
112
113 static __inline int
114 TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
115     int t)
116 {
117         if (tq->tq_spin)
118                 return (msleep_spin(p, m, wm, t));
119         return (msleep(p, m, pri, wm, t));
120 }
121
122 static struct gtaskqueue *
123 _gtaskqueue_create(const char *name, int mflags,
124                  taskqueue_enqueue_fn enqueue, void *context,
125                  int mtxflags, const char *mtxname __unused)
126 {
127         struct gtaskqueue *queue;
128         char *tq_name;
129
130         tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO);
131         if (!tq_name)
132                 return (NULL);
133
134         snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
135
136         queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO);
137         if (!queue)
138                 return (NULL);
139
140         STAILQ_INIT(&queue->tq_queue);
141         TAILQ_INIT(&queue->tq_active);
142         queue->tq_enqueue = enqueue;
143         queue->tq_context = context;
144         queue->tq_name = tq_name;
145         queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
146         queue->tq_flags |= TQ_FLAGS_ACTIVE;
147         if (enqueue == gtaskqueue_thread_enqueue)
148                 queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
149         mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
150
151         return (queue);
152 }
153
154
155 /*
156  * Signal a taskqueue thread to terminate.
157  */
158 static void
159 gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq)
160 {
161
162         while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
163                 wakeup(tq);
164                 TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
165         }
166 }
167
168 static void
169 gtaskqueue_free(struct gtaskqueue *queue)
170 {
171
172         TQ_LOCK(queue);
173         queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
174         gtaskqueue_terminate(queue->tq_threads, queue);
175         KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
176         KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
177         mtx_destroy(&queue->tq_mutex);
178         free(queue->tq_threads, M_GTASKQUEUE);
179         free(queue->tq_name, M_GTASKQUEUE);
180         free(queue, M_GTASKQUEUE);
181 }
182
183 int
184 grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
185 {
186 #ifdef INVARIANTS
187         if (queue == NULL) {
188                 gtask_dump(gtask);
189                 panic("queue == NULL");
190         }
191 #endif
192         TQ_LOCK(queue);
193         if (gtask->ta_flags & TASK_ENQUEUED) {
194                 TQ_UNLOCK(queue);
195                 return (0);
196         }
197         STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link);
198         gtask->ta_flags |= TASK_ENQUEUED;
199         TQ_UNLOCK(queue);
200         if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
201                 queue->tq_enqueue(queue->tq_context);
202         return (0);
203 }
204
205 static void
206 gtaskqueue_task_nop_fn(void *context)
207 {
208 }
209
210 /*
211  * Block until all currently queued tasks in this taskqueue
212  * have begun execution.  Tasks queued during execution of
213  * this function are ignored.
214  */
215 static void
216 gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
217 {
218         struct gtask t_barrier;
219
220         if (STAILQ_EMPTY(&queue->tq_queue))
221                 return;
222
223         /*
224          * Enqueue our barrier after all current tasks, but with
225          * the highest priority so that newly queued tasks cannot
226          * pass it.  Because of the high priority, we can not use
227          * taskqueue_enqueue_locked directly (which drops the lock
228          * anyway) so just insert it at tail while we have the
229          * queue lock.
230          */
231         GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier);
232         STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
233         t_barrier.ta_flags |= TASK_ENQUEUED;
234
235         /*
236          * Once the barrier has executed, all previously queued tasks
237          * have completed or are currently executing.
238          */
239         while (t_barrier.ta_flags & TASK_ENQUEUED)
240                 TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
241 }
242
243 /*
244  * Block until all currently executing tasks for this taskqueue
245  * complete.  Tasks that begin execution during the execution
246  * of this function are ignored.
247  */
248 static void
249 gtaskqueue_drain_tq_active(struct gtaskqueue *queue)
250 {
251         struct gtaskqueue_busy tb_marker, *tb_first;
252
253         if (TAILQ_EMPTY(&queue->tq_active))
254                 return;
255
256         /* Block taskq_terminate().*/
257         queue->tq_callouts++;
258
259         /*
260          * Wait for all currently executing taskqueue threads
261          * to go idle.
262          */
263         tb_marker.tb_running = TB_DRAIN_WAITER;
264         TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
265         while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
266                 TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
267         TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
268
269         /*
270          * Wakeup any other drain waiter that happened to queue up
271          * without any intervening active thread.
272          */
273         tb_first = TAILQ_FIRST(&queue->tq_active);
274         if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
275                 wakeup(tb_first);
276
277         /* Release taskqueue_terminate(). */
278         queue->tq_callouts--;
279         if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
280                 wakeup_one(queue->tq_threads);
281 }
282
283 void
284 gtaskqueue_block(struct gtaskqueue *queue)
285 {
286
287         TQ_LOCK(queue);
288         queue->tq_flags |= TQ_FLAGS_BLOCKED;
289         TQ_UNLOCK(queue);
290 }
291
292 void
293 gtaskqueue_unblock(struct gtaskqueue *queue)
294 {
295
296         TQ_LOCK(queue);
297         queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
298         if (!STAILQ_EMPTY(&queue->tq_queue))
299                 queue->tq_enqueue(queue->tq_context);
300         TQ_UNLOCK(queue);
301 }
302
303 static void
304 gtaskqueue_run_locked(struct gtaskqueue *queue)
305 {
306         struct gtaskqueue_busy tb;
307         struct gtaskqueue_busy *tb_first;
308         struct gtask *gtask;
309
310         KASSERT(queue != NULL, ("tq is NULL"));
311         TQ_ASSERT_LOCKED(queue);
312         tb.tb_running = NULL;
313
314         while (STAILQ_FIRST(&queue->tq_queue)) {
315                 TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
316
317                 /*
318                  * Carefully remove the first task from the queue and
319                  * clear its TASK_ENQUEUED flag
320                  */
321                 gtask = STAILQ_FIRST(&queue->tq_queue);
322                 KASSERT(gtask != NULL, ("task is NULL"));
323                 STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
324                 gtask->ta_flags &= ~TASK_ENQUEUED;
325                 tb.tb_running = gtask;
326                 TQ_UNLOCK(queue);
327
328                 KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL"));
329                 gtask->ta_func(gtask->ta_context);
330
331                 TQ_LOCK(queue);
332                 tb.tb_running = NULL;
333                 wakeup(gtask);
334
335                 TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
336                 tb_first = TAILQ_FIRST(&queue->tq_active);
337                 if (tb_first != NULL &&
338                     tb_first->tb_running == TB_DRAIN_WAITER)
339                         wakeup(tb_first);
340         }
341 }
342
343 static int
344 task_is_running(struct gtaskqueue *queue, struct gtask *gtask)
345 {
346         struct gtaskqueue_busy *tb;
347
348         TQ_ASSERT_LOCKED(queue);
349         TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
350                 if (tb->tb_running == gtask)
351                         return (1);
352         }
353         return (0);
354 }
355
356 static int
357 gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask)
358 {
359
360         if (gtask->ta_flags & TASK_ENQUEUED)
361                 STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link);
362         gtask->ta_flags &= ~TASK_ENQUEUED;
363         return (task_is_running(queue, gtask) ? EBUSY : 0);
364 }
365
366 int
367 gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask)
368 {
369         int error;
370
371         TQ_LOCK(queue);
372         error = gtaskqueue_cancel_locked(queue, gtask);
373         TQ_UNLOCK(queue);
374
375         return (error);
376 }
377
378 void
379 gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask)
380 {
381
382         if (!queue->tq_spin)
383                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
384
385         TQ_LOCK(queue);
386         while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask))
387                 TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0);
388         TQ_UNLOCK(queue);
389 }
390
391 void
392 gtaskqueue_drain_all(struct gtaskqueue *queue)
393 {
394
395         if (!queue->tq_spin)
396                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
397
398         TQ_LOCK(queue);
399         gtaskqueue_drain_tq_queue(queue);
400         gtaskqueue_drain_tq_active(queue);
401         TQ_UNLOCK(queue);
402 }
403
404 static int
405 _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
406     cpuset_t *mask, const char *name, va_list ap)
407 {
408         char ktname[MAXCOMLEN + 1];
409         struct thread *td;
410         struct gtaskqueue *tq;
411         int i, error;
412
413         if (count <= 0)
414                 return (EINVAL);
415
416         vsnprintf(ktname, sizeof(ktname), name, ap);
417         tq = *tqp;
418
419         tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE,
420             M_NOWAIT | M_ZERO);
421         if (tq->tq_threads == NULL) {
422                 printf("%s: no memory for %s threads\n", __func__, ktname);
423                 return (ENOMEM);
424         }
425
426         for (i = 0; i < count; i++) {
427                 if (count == 1)
428                         error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
429                             &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
430                 else
431                         error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
432                             &tq->tq_threads[i], RFSTOPPED, 0,
433                             "%s_%d", ktname, i);
434                 if (error) {
435                         /* should be ok to continue, taskqueue_free will dtrt */
436                         printf("%s: kthread_add(%s): error %d", __func__,
437                             ktname, error);
438                         tq->tq_threads[i] = NULL;               /* paranoid */
439                 } else
440                         tq->tq_tcount++;
441         }
442         for (i = 0; i < count; i++) {
443                 if (tq->tq_threads[i] == NULL)
444                         continue;
445                 td = tq->tq_threads[i];
446                 if (mask) {
447                         error = cpuset_setthread(td->td_tid, mask);
448                         /*
449                          * Failing to pin is rarely an actual fatal error;
450                          * it'll just affect performance.
451                          */
452                         if (error)
453                                 printf("%s: curthread=%llu: can't pin; "
454                                     "error=%d\n",
455                                     __func__,
456                                     (unsigned long long) td->td_tid,
457                                     error);
458                 }
459                 thread_lock(td);
460                 sched_prio(td, pri);
461                 sched_add(td, SRQ_BORING);
462                 thread_unlock(td);
463         }
464
465         return (0);
466 }
467
468 static int
469 gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
470     const char *name, ...)
471 {
472         va_list ap;
473         int error;
474
475         va_start(ap, name);
476         error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap);
477         va_end(ap);
478         return (error);
479 }
480
481 static inline void
482 gtaskqueue_run_callback(struct gtaskqueue *tq,
483     enum taskqueue_callback_type cb_type)
484 {
485         taskqueue_callback_fn tq_callback;
486
487         TQ_ASSERT_UNLOCKED(tq);
488         tq_callback = tq->tq_callbacks[cb_type];
489         if (tq_callback != NULL)
490                 tq_callback(tq->tq_cb_contexts[cb_type]);
491 }
492
493 static void
494 gtaskqueue_thread_loop(void *arg)
495 {
496         struct gtaskqueue **tqp, *tq;
497
498         tqp = arg;
499         tq = *tqp;
500         gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
501         TQ_LOCK(tq);
502         while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
503                 /* XXX ? */
504                 gtaskqueue_run_locked(tq);
505                 /*
506                  * Because taskqueue_run() can drop tq_mutex, we need to
507                  * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
508                  * meantime, which means we missed a wakeup.
509                  */
510                 if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
511                         break;
512                 TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
513         }
514         gtaskqueue_run_locked(tq);
515         /*
516          * This thread is on its way out, so just drop the lock temporarily
517          * in order to call the shutdown callback.  This allows the callback
518          * to look at the taskqueue, even just before it dies.
519          */
520         TQ_UNLOCK(tq);
521         gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
522         TQ_LOCK(tq);
523
524         /* rendezvous with thread that asked us to terminate */
525         tq->tq_tcount--;
526         wakeup_one(tq->tq_threads);
527         TQ_UNLOCK(tq);
528         kthread_exit();
529 }
530
531 static void
532 gtaskqueue_thread_enqueue(void *context)
533 {
534         struct gtaskqueue **tqp, *tq;
535
536         tqp = context;
537         tq = *tqp;
538         wakeup_one(tq);
539 }
540
541
542 static struct gtaskqueue *
543 gtaskqueue_create_fast(const char *name, int mflags,
544                  taskqueue_enqueue_fn enqueue, void *context)
545 {
546         return _gtaskqueue_create(name, mflags, enqueue, context,
547                         MTX_SPIN, "fast_taskqueue");
548 }
549
550
551 struct taskqgroup_cpu {
552         LIST_HEAD(, grouptask)  tgc_tasks;
553         struct gtaskqueue       *tgc_taskq;
554         int     tgc_cnt;
555         int     tgc_cpu;
556 };
557
558 struct taskqgroup {
559         struct taskqgroup_cpu tqg_queue[MAXCPU];
560         struct mtx      tqg_lock;
561         char *          tqg_name;
562         int             tqg_adjusting;
563         int             tqg_stride;
564         int             tqg_cnt;
565 };
566
567 struct taskq_bind_task {
568         struct gtask bt_task;
569         int     bt_cpuid;
570 };
571
572 static void
573 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu)
574 {
575         struct taskqgroup_cpu *qcpu;
576
577         qcpu = &qgroup->tqg_queue[idx];
578         LIST_INIT(&qcpu->tgc_tasks);
579         qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK,
580             taskqueue_thread_enqueue, &qcpu->tgc_taskq);
581         gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
582             "%s_%d", qgroup->tqg_name, idx);
583         qcpu->tgc_cpu = cpu;
584 }
585
586 static void
587 taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
588 {
589
590         gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
591 }
592
593 /*
594  * Find the taskq with least # of tasks that doesn't currently have any
595  * other queues from the uniq identifier.
596  */
597 static int
598 taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
599 {
600         struct grouptask *n;
601         int i, idx, mincnt;
602         int strict;
603
604         mtx_assert(&qgroup->tqg_lock, MA_OWNED);
605         if (qgroup->tqg_cnt == 0)
606                 return (0);
607         idx = -1;
608         mincnt = INT_MAX;
609         /*
610          * Two passes;  First scan for a queue with the least tasks that
611          * does not already service this uniq id.  If that fails simply find
612          * the queue with the least total tasks;
613          */
614         for (strict = 1; mincnt == INT_MAX; strict = 0) {
615                 for (i = 0; i < qgroup->tqg_cnt; i++) {
616                         if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
617                                 continue;
618                         if (strict) {
619                                 LIST_FOREACH(n,
620                                     &qgroup->tqg_queue[i].tgc_tasks, gt_list)
621                                         if (n->gt_uniq == uniq)
622                                                 break;
623                                 if (n != NULL)
624                                         continue;
625                         }
626                         mincnt = qgroup->tqg_queue[i].tgc_cnt;
627                         idx = i;
628                 }
629         }
630         if (idx == -1)
631                 panic("taskqgroup_find: Failed to pick a qid.");
632
633         return (idx);
634 }
635
636 /*
637  * smp_started is unusable since it is not set for UP kernels or even for
638  * SMP kernels when there is 1 CPU.  This is usually handled by adding a
639  * (mp_ncpus == 1) test, but that would be broken here since we need to
640  * to synchronize with the SI_SUB_SMP ordering.  Even in the pure SMP case
641  * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP.
642  *
643  * So maintain our own flag.  It must be set after all CPUs are started
644  * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed
645  * adjustment is properly delayed.  SI_ORDER_FOURTH is clearly before
646  * SI_ORDER_ANY and unclearly after the CPUs are started.  It would be
647  * simpler for adjustment to pass a flag indicating if it is delayed.
648  */ 
649
650 static int tqg_smp_started;
651
652 static void
653 tqg_record_smp_started(void *arg)
654 {
655         tqg_smp_started = 1;
656 }
657
658 SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
659         tqg_record_smp_started, NULL);
660
661 void
662 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
663     void *uniq, int irq, char *name)
664 {
665         cpuset_t mask;
666         int qid;
667
668         gtask->gt_uniq = uniq;
669         gtask->gt_name = name;
670         gtask->gt_irq = irq;
671         gtask->gt_cpu = -1;
672         mtx_lock(&qgroup->tqg_lock);
673         qid = taskqgroup_find(qgroup, uniq);
674         qgroup->tqg_queue[qid].tgc_cnt++;
675         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
676         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
677         if (irq != -1 && tqg_smp_started) {
678                 gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu;
679                 CPU_ZERO(&mask);
680                 CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
681                 mtx_unlock(&qgroup->tqg_lock);
682                 intr_setaffinity(irq, &mask);
683         } else
684                 mtx_unlock(&qgroup->tqg_lock);
685 }
686
687 static void
688 taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
689 {
690         cpuset_t mask;
691         int qid, cpu;
692
693         mtx_lock(&qgroup->tqg_lock);
694         qid = taskqgroup_find(qgroup, gtask->gt_uniq);
695         cpu = qgroup->tqg_queue[qid].tgc_cpu;
696         if (gtask->gt_irq != -1) {
697                 mtx_unlock(&qgroup->tqg_lock);
698
699                 CPU_ZERO(&mask);
700                 CPU_SET(cpu, &mask);
701                 intr_setaffinity(gtask->gt_irq, &mask);
702
703                 mtx_lock(&qgroup->tqg_lock);
704         }
705         qgroup->tqg_queue[qid].tgc_cnt++;
706
707         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask,
708                          gt_list);
709         MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
710         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
711         mtx_unlock(&qgroup->tqg_lock);
712 }
713
714 int
715 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
716         void *uniq, int cpu, int irq, char *name)
717 {
718         cpuset_t mask;
719         int i, qid;
720
721         qid = -1;
722         gtask->gt_uniq = uniq;
723         gtask->gt_name = name;
724         gtask->gt_irq = irq;
725         gtask->gt_cpu = cpu;
726         mtx_lock(&qgroup->tqg_lock);
727         if (tqg_smp_started) {
728                 for (i = 0; i < qgroup->tqg_cnt; i++)
729                         if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
730                                 qid = i;
731                                 break;
732                         }
733                 if (qid == -1) {
734                         mtx_unlock(&qgroup->tqg_lock);
735                         return (EINVAL);
736                 }
737         } else
738                 qid = 0;
739         qgroup->tqg_queue[qid].tgc_cnt++;
740         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
741         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
742         cpu = qgroup->tqg_queue[qid].tgc_cpu;
743         mtx_unlock(&qgroup->tqg_lock);
744
745         CPU_ZERO(&mask);
746         CPU_SET(cpu, &mask);
747         if (irq != -1 && tqg_smp_started)
748                 intr_setaffinity(irq, &mask);
749         return (0);
750 }
751
752 static int
753 taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
754 {
755         cpuset_t mask;
756         int i, qid, irq, cpu;
757
758         qid = -1;
759         irq = gtask->gt_irq;
760         cpu = gtask->gt_cpu;
761         MPASS(tqg_smp_started);
762         mtx_lock(&qgroup->tqg_lock);
763         for (i = 0; i < qgroup->tqg_cnt; i++)
764                 if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
765                         qid = i;
766                         break;
767                 }
768         if (qid == -1) {
769                 mtx_unlock(&qgroup->tqg_lock);
770                 return (EINVAL);
771         }
772         qgroup->tqg_queue[qid].tgc_cnt++;
773         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
774         MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
775         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
776         mtx_unlock(&qgroup->tqg_lock);
777
778         CPU_ZERO(&mask);
779         CPU_SET(cpu, &mask);
780
781         if (irq != -1)
782                 intr_setaffinity(irq, &mask);
783         return (0);
784 }
785
786 void
787 taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
788 {
789         int i;
790
791         mtx_lock(&qgroup->tqg_lock);
792         for (i = 0; i < qgroup->tqg_cnt; i++)
793                 if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
794                         break;
795         if (i == qgroup->tqg_cnt)
796                 panic("taskqgroup_detach: task not in group\n");
797         qgroup->tqg_queue[i].tgc_cnt--;
798         LIST_REMOVE(gtask, gt_list);
799         mtx_unlock(&qgroup->tqg_lock);
800         gtask->gt_taskqueue = NULL;
801 }
802
803 static void
804 taskqgroup_binder(void *ctx)
805 {
806         struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
807         cpuset_t mask;
808         int error;
809
810         CPU_ZERO(&mask);
811         CPU_SET(gtask->bt_cpuid, &mask);
812         error = cpuset_setthread(curthread->td_tid, &mask);
813         thread_lock(curthread);
814         sched_bind(curthread, gtask->bt_cpuid);
815         thread_unlock(curthread);
816
817         if (error)
818                 printf("taskqgroup_binder: setaffinity failed: %d\n",
819                     error);
820         free(gtask, M_DEVBUF);
821 }
822
823 static void
824 taskqgroup_bind(struct taskqgroup *qgroup)
825 {
826         struct taskq_bind_task *gtask;
827         int i;
828
829         /*
830          * Bind taskqueue threads to specific CPUs, if they have been assigned
831          * one.
832          */
833         if (qgroup->tqg_cnt == 1)
834                 return;
835
836         for (i = 0; i < qgroup->tqg_cnt; i++) {
837                 gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK);
838                 GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
839                 gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
840                 grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
841                     &gtask->bt_task);
842         }
843 }
844
845 static int
846 _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
847 {
848         LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
849         struct grouptask *gtask;
850         int i, k, old_cnt, old_cpu, cpu;
851
852         mtx_assert(&qgroup->tqg_lock, MA_OWNED);
853
854         if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) {
855                 printf("%s: failed cnt: %d stride: %d "
856                     "mp_ncpus: %d tqg_smp_started: %d\n",
857                     __func__, cnt, stride, mp_ncpus, tqg_smp_started);
858                 return (EINVAL);
859         }
860         if (qgroup->tqg_adjusting) {
861                 printf("taskqgroup_adjust failed: adjusting\n");
862                 return (EBUSY);
863         }
864         qgroup->tqg_adjusting = 1;
865         old_cnt = qgroup->tqg_cnt;
866         old_cpu = 0;
867         if (old_cnt < cnt)
868                 old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu;
869         mtx_unlock(&qgroup->tqg_lock);
870         /*
871          * Set up queue for tasks added before boot.
872          */
873         if (old_cnt == 0) {
874                 LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
875                     grouptask, gt_list);
876                 qgroup->tqg_queue[0].tgc_cnt = 0;
877         }
878
879         /*
880          * If new taskq threads have been added.
881          */
882         cpu = old_cpu;
883         for (i = old_cnt; i < cnt; i++) {
884                 taskqgroup_cpu_create(qgroup, i, cpu);
885
886                 for (k = 0; k < stride; k++)
887                         cpu = CPU_NEXT(cpu);
888         }
889         mtx_lock(&qgroup->tqg_lock);
890         qgroup->tqg_cnt = cnt;
891         qgroup->tqg_stride = stride;
892
893         /*
894          * Adjust drivers to use new taskqs.
895          */
896         for (i = 0; i < old_cnt; i++) {
897                 while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
898                         LIST_REMOVE(gtask, gt_list);
899                         qgroup->tqg_queue[i].tgc_cnt--;
900                         LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
901                 }
902         }
903         mtx_unlock(&qgroup->tqg_lock);
904
905         while ((gtask = LIST_FIRST(&gtask_head))) {
906                 LIST_REMOVE(gtask, gt_list);
907                 if (gtask->gt_cpu == -1)
908                         taskqgroup_attach_deferred(qgroup, gtask);
909                 else if (taskqgroup_attach_cpu_deferred(qgroup, gtask))
910                         taskqgroup_attach_deferred(qgroup, gtask);
911         }
912
913 #ifdef INVARIANTS
914         mtx_lock(&qgroup->tqg_lock);
915         for (i = 0; i < qgroup->tqg_cnt; i++) {
916                 MPASS(qgroup->tqg_queue[i].tgc_taskq != NULL);
917                 LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list)
918                         MPASS(gtask->gt_taskqueue != NULL);
919         }
920         mtx_unlock(&qgroup->tqg_lock);
921 #endif
922         /*
923          * If taskq thread count has been reduced.
924          */
925         for (i = cnt; i < old_cnt; i++)
926                 taskqgroup_cpu_remove(qgroup, i);
927
928         taskqgroup_bind(qgroup);
929
930         mtx_lock(&qgroup->tqg_lock);
931         qgroup->tqg_adjusting = 0;
932
933         return (0);
934 }
935
936 int
937 taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
938 {
939         int error;
940
941         mtx_lock(&qgroup->tqg_lock);
942         error = _taskqgroup_adjust(qgroup, cnt, stride);
943         mtx_unlock(&qgroup->tqg_lock);
944
945         return (error);
946 }
947
948 struct taskqgroup *
949 taskqgroup_create(char *name)
950 {
951         struct taskqgroup *qgroup;
952
953         qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO);
954         mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
955         qgroup->tqg_name = name;
956         LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
957
958         return (qgroup);
959 }
960
961 void
962 taskqgroup_destroy(struct taskqgroup *qgroup)
963 {
964
965 }