]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/kern/subr_gtaskqueue.c
MFC r353021: simplify path handling in sysctl_try_reclaim_vnode
[FreeBSD/FreeBSD.git] / sys / kern / subr_gtaskqueue.c
1 /*-
2  * Copyright (c) 2000 Doug Rabson
3  * Copyright (c) 2014 Jeff Roberson
4  * Copyright (c) 2016 Matthew Macy
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/bus.h>
35 #include <sys/cpuset.h>
36 #include <sys/interrupt.h>
37 #include <sys/kernel.h>
38 #include <sys/kthread.h>
39 #include <sys/libkern.h>
40 #include <sys/limits.h>
41 #include <sys/lock.h>
42 #include <sys/malloc.h>
43 #include <sys/mutex.h>
44 #include <sys/proc.h>
45 #include <sys/sched.h>
46 #include <sys/smp.h>
47 #include <sys/gtaskqueue.h>
48 #include <sys/unistd.h>
49 #include <machine/stdarg.h>
50
51 static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues");
52 static void     gtaskqueue_thread_enqueue(void *);
53 static void     gtaskqueue_thread_loop(void *arg);
54 static int      task_is_running(struct gtaskqueue *queue, struct gtask *gtask);
55 static void     gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask);
56
57 TASKQGROUP_DEFINE(softirq, mp_ncpus, 1);
58 TASKQGROUP_DEFINE(config, 1, 1);
59
60 struct gtaskqueue_busy {
61         struct gtask    *tb_running;
62         TAILQ_ENTRY(gtaskqueue_busy) tb_link;
63 };
64
65 static struct gtask * const TB_DRAIN_WAITER = (struct gtask *)0x1;
66
67 typedef void (*gtaskqueue_enqueue_fn)(void *context);
68
69 struct gtaskqueue {
70         STAILQ_HEAD(, gtask)    tq_queue;
71         gtaskqueue_enqueue_fn   tq_enqueue;
72         void                    *tq_context;
73         char                    *tq_name;
74         TAILQ_HEAD(, gtaskqueue_busy) tq_active;
75         struct mtx              tq_mutex;
76         struct thread           **tq_threads;
77         int                     tq_tcount;
78         int                     tq_spin;
79         int                     tq_flags;
80         int                     tq_callouts;
81         taskqueue_callback_fn   tq_callbacks[TASKQUEUE_NUM_CALLBACKS];
82         void                    *tq_cb_contexts[TASKQUEUE_NUM_CALLBACKS];
83 };
84
85 #define TQ_FLAGS_ACTIVE         (1 << 0)
86 #define TQ_FLAGS_BLOCKED        (1 << 1)
87 #define TQ_FLAGS_UNLOCKED_ENQUEUE       (1 << 2)
88
89 #define DT_CALLOUT_ARMED        (1 << 0)
90
91 #define TQ_LOCK(tq)                                                     \
92         do {                                                            \
93                 if ((tq)->tq_spin)                                      \
94                         mtx_lock_spin(&(tq)->tq_mutex);                 \
95                 else                                                    \
96                         mtx_lock(&(tq)->tq_mutex);                      \
97         } while (0)
98 #define TQ_ASSERT_LOCKED(tq)    mtx_assert(&(tq)->tq_mutex, MA_OWNED)
99
100 #define TQ_UNLOCK(tq)                                                   \
101         do {                                                            \
102                 if ((tq)->tq_spin)                                      \
103                         mtx_unlock_spin(&(tq)->tq_mutex);               \
104                 else                                                    \
105                         mtx_unlock(&(tq)->tq_mutex);                    \
106         } while (0)
107 #define TQ_ASSERT_UNLOCKED(tq)  mtx_assert(&(tq)->tq_mutex, MA_NOTOWNED)
108
109 #ifdef INVARIANTS
110 static void
111 gtask_dump(struct gtask *gtask)
112 {
113         printf("gtask: %p ta_flags=%x ta_priority=%d ta_func=%p ta_context=%p\n",
114                gtask, gtask->ta_flags, gtask->ta_priority, gtask->ta_func, gtask->ta_context);
115 }
116 #endif
117
118 static __inline int
119 TQ_SLEEP(struct gtaskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
120     int t)
121 {
122         if (tq->tq_spin)
123                 return (msleep_spin(p, m, wm, t));
124         return (msleep(p, m, pri, wm, t));
125 }
126
127 static struct gtaskqueue *
128 _gtaskqueue_create(const char *name, int mflags,
129                  taskqueue_enqueue_fn enqueue, void *context,
130                  int mtxflags, const char *mtxname __unused)
131 {
132         struct gtaskqueue *queue;
133         char *tq_name;
134
135         tq_name = malloc(TASKQUEUE_NAMELEN, M_GTASKQUEUE, mflags | M_ZERO);
136         if (!tq_name)
137                 return (NULL);
138
139         snprintf(tq_name, TASKQUEUE_NAMELEN, "%s", (name) ? name : "taskqueue");
140
141         queue = malloc(sizeof(struct gtaskqueue), M_GTASKQUEUE, mflags | M_ZERO);
142         if (!queue) {
143                 free(tq_name, M_GTASKQUEUE);
144                 return (NULL);
145         }
146
147         STAILQ_INIT(&queue->tq_queue);
148         TAILQ_INIT(&queue->tq_active);
149         queue->tq_enqueue = enqueue;
150         queue->tq_context = context;
151         queue->tq_name = tq_name;
152         queue->tq_spin = (mtxflags & MTX_SPIN) != 0;
153         queue->tq_flags |= TQ_FLAGS_ACTIVE;
154         if (enqueue == gtaskqueue_thread_enqueue)
155                 queue->tq_flags |= TQ_FLAGS_UNLOCKED_ENQUEUE;
156         mtx_init(&queue->tq_mutex, tq_name, NULL, mtxflags);
157
158         return (queue);
159 }
160
161
162 /*
163  * Signal a taskqueue thread to terminate.
164  */
165 static void
166 gtaskqueue_terminate(struct thread **pp, struct gtaskqueue *tq)
167 {
168
169         while (tq->tq_tcount > 0 || tq->tq_callouts > 0) {
170                 wakeup(tq);
171                 TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0);
172         }
173 }
174
175 static void
176 gtaskqueue_free(struct gtaskqueue *queue)
177 {
178
179         TQ_LOCK(queue);
180         queue->tq_flags &= ~TQ_FLAGS_ACTIVE;
181         gtaskqueue_terminate(queue->tq_threads, queue);
182         KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?"));
183         KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks"));
184         mtx_destroy(&queue->tq_mutex);
185         free(queue->tq_threads, M_GTASKQUEUE);
186         free(queue->tq_name, M_GTASKQUEUE);
187         free(queue, M_GTASKQUEUE);
188 }
189
190 /*
191  * Wait for all to complete, then prevent it from being enqueued
192  */
193 void
194 grouptask_block(struct grouptask *grouptask)
195 {
196         struct gtaskqueue *queue = grouptask->gt_taskqueue;
197         struct gtask *gtask = &grouptask->gt_task;
198
199 #ifdef INVARIANTS
200         if (queue == NULL) {
201                 gtask_dump(gtask);
202                 panic("queue == NULL");
203         }
204 #endif
205         TQ_LOCK(queue);
206         gtask->ta_flags |= TASK_NOENQUEUE;
207         gtaskqueue_drain_locked(queue, gtask);
208         TQ_UNLOCK(queue);
209 }
210
211 void
212 grouptask_unblock(struct grouptask *grouptask)
213 {
214         struct gtaskqueue *queue = grouptask->gt_taskqueue;
215         struct gtask *gtask = &grouptask->gt_task;
216
217 #ifdef INVARIANTS
218         if (queue == NULL) {
219                 gtask_dump(gtask);
220                 panic("queue == NULL");
221         }
222 #endif
223         TQ_LOCK(queue);
224         gtask->ta_flags &= ~TASK_NOENQUEUE;
225         TQ_UNLOCK(queue);
226 }
227
228 int
229 grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask)
230 {
231 #ifdef INVARIANTS
232         if (queue == NULL) {
233                 gtask_dump(gtask);
234                 panic("queue == NULL");
235         }
236 #endif
237         TQ_LOCK(queue);
238         if (gtask->ta_flags & TASK_ENQUEUED) {
239                 TQ_UNLOCK(queue);
240                 return (0);
241         }
242         if (gtask->ta_flags & TASK_NOENQUEUE) {
243                 TQ_UNLOCK(queue);
244                 return (EAGAIN);
245         }
246         STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link);
247         gtask->ta_flags |= TASK_ENQUEUED;
248         TQ_UNLOCK(queue);
249         if ((queue->tq_flags & TQ_FLAGS_BLOCKED) == 0)
250                 queue->tq_enqueue(queue->tq_context);
251         return (0);
252 }
253
254 static void
255 gtaskqueue_task_nop_fn(void *context)
256 {
257 }
258
259 /*
260  * Block until all currently queued tasks in this taskqueue
261  * have begun execution.  Tasks queued during execution of
262  * this function are ignored.
263  */
264 static void
265 gtaskqueue_drain_tq_queue(struct gtaskqueue *queue)
266 {
267         struct gtask t_barrier;
268
269         if (STAILQ_EMPTY(&queue->tq_queue))
270                 return;
271
272         /*
273          * Enqueue our barrier after all current tasks, but with
274          * the highest priority so that newly queued tasks cannot
275          * pass it.  Because of the high priority, we can not use
276          * taskqueue_enqueue_locked directly (which drops the lock
277          * anyway) so just insert it at tail while we have the
278          * queue lock.
279          */
280         GTASK_INIT(&t_barrier, 0, USHRT_MAX, gtaskqueue_task_nop_fn, &t_barrier);
281         STAILQ_INSERT_TAIL(&queue->tq_queue, &t_barrier, ta_link);
282         t_barrier.ta_flags |= TASK_ENQUEUED;
283
284         /*
285          * Once the barrier has executed, all previously queued tasks
286          * have completed or are currently executing.
287          */
288         while (t_barrier.ta_flags & TASK_ENQUEUED)
289                 TQ_SLEEP(queue, &t_barrier, &queue->tq_mutex, PWAIT, "-", 0);
290 }
291
292 /*
293  * Block until all currently executing tasks for this taskqueue
294  * complete.  Tasks that begin execution during the execution
295  * of this function are ignored.
296  */
297 static void
298 gtaskqueue_drain_tq_active(struct gtaskqueue *queue)
299 {
300         struct gtaskqueue_busy tb_marker, *tb_first;
301
302         if (TAILQ_EMPTY(&queue->tq_active))
303                 return;
304
305         /* Block taskq_terminate().*/
306         queue->tq_callouts++;
307
308         /*
309          * Wait for all currently executing taskqueue threads
310          * to go idle.
311          */
312         tb_marker.tb_running = TB_DRAIN_WAITER;
313         TAILQ_INSERT_TAIL(&queue->tq_active, &tb_marker, tb_link);
314         while (TAILQ_FIRST(&queue->tq_active) != &tb_marker)
315                 TQ_SLEEP(queue, &tb_marker, &queue->tq_mutex, PWAIT, "-", 0);
316         TAILQ_REMOVE(&queue->tq_active, &tb_marker, tb_link);
317
318         /*
319          * Wakeup any other drain waiter that happened to queue up
320          * without any intervening active thread.
321          */
322         tb_first = TAILQ_FIRST(&queue->tq_active);
323         if (tb_first != NULL && tb_first->tb_running == TB_DRAIN_WAITER)
324                 wakeup(tb_first);
325
326         /* Release taskqueue_terminate(). */
327         queue->tq_callouts--;
328         if ((queue->tq_flags & TQ_FLAGS_ACTIVE) == 0)
329                 wakeup_one(queue->tq_threads);
330 }
331
332 void
333 gtaskqueue_block(struct gtaskqueue *queue)
334 {
335
336         TQ_LOCK(queue);
337         queue->tq_flags |= TQ_FLAGS_BLOCKED;
338         TQ_UNLOCK(queue);
339 }
340
341 void
342 gtaskqueue_unblock(struct gtaskqueue *queue)
343 {
344
345         TQ_LOCK(queue);
346         queue->tq_flags &= ~TQ_FLAGS_BLOCKED;
347         if (!STAILQ_EMPTY(&queue->tq_queue))
348                 queue->tq_enqueue(queue->tq_context);
349         TQ_UNLOCK(queue);
350 }
351
352 static void
353 gtaskqueue_run_locked(struct gtaskqueue *queue)
354 {
355         struct gtaskqueue_busy tb;
356         struct gtaskqueue_busy *tb_first;
357         struct gtask *gtask;
358
359         KASSERT(queue != NULL, ("tq is NULL"));
360         TQ_ASSERT_LOCKED(queue);
361         tb.tb_running = NULL;
362
363         while (STAILQ_FIRST(&queue->tq_queue)) {
364                 TAILQ_INSERT_TAIL(&queue->tq_active, &tb, tb_link);
365
366                 /*
367                  * Carefully remove the first task from the queue and
368                  * clear its TASK_ENQUEUED flag
369                  */
370                 gtask = STAILQ_FIRST(&queue->tq_queue);
371                 KASSERT(gtask != NULL, ("task is NULL"));
372                 STAILQ_REMOVE_HEAD(&queue->tq_queue, ta_link);
373                 gtask->ta_flags &= ~TASK_ENQUEUED;
374                 tb.tb_running = gtask;
375                 TQ_UNLOCK(queue);
376
377                 KASSERT(gtask->ta_func != NULL, ("task->ta_func is NULL"));
378                 gtask->ta_func(gtask->ta_context);
379
380                 TQ_LOCK(queue);
381                 tb.tb_running = NULL;
382                 wakeup(gtask);
383
384                 TAILQ_REMOVE(&queue->tq_active, &tb, tb_link);
385                 tb_first = TAILQ_FIRST(&queue->tq_active);
386                 if (tb_first != NULL &&
387                     tb_first->tb_running == TB_DRAIN_WAITER)
388                         wakeup(tb_first);
389         }
390 }
391
392 static int
393 task_is_running(struct gtaskqueue *queue, struct gtask *gtask)
394 {
395         struct gtaskqueue_busy *tb;
396
397         TQ_ASSERT_LOCKED(queue);
398         TAILQ_FOREACH(tb, &queue->tq_active, tb_link) {
399                 if (tb->tb_running == gtask)
400                         return (1);
401         }
402         return (0);
403 }
404
405 static int
406 gtaskqueue_cancel_locked(struct gtaskqueue *queue, struct gtask *gtask)
407 {
408
409         if (gtask->ta_flags & TASK_ENQUEUED)
410                 STAILQ_REMOVE(&queue->tq_queue, gtask, gtask, ta_link);
411         gtask->ta_flags &= ~TASK_ENQUEUED;
412         return (task_is_running(queue, gtask) ? EBUSY : 0);
413 }
414
415 int
416 gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask)
417 {
418         int error;
419
420         TQ_LOCK(queue);
421         error = gtaskqueue_cancel_locked(queue, gtask);
422         TQ_UNLOCK(queue);
423
424         return (error);
425 }
426
427 static void
428 gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask)
429 {
430         while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask))
431                 TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0);
432 }
433
434 void
435 gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask)
436 {
437
438         if (!queue->tq_spin)
439                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
440
441         TQ_LOCK(queue);
442         gtaskqueue_drain_locked(queue, gtask);
443         TQ_UNLOCK(queue);
444 }
445
446 void
447 gtaskqueue_drain_all(struct gtaskqueue *queue)
448 {
449
450         if (!queue->tq_spin)
451                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__);
452
453         TQ_LOCK(queue);
454         gtaskqueue_drain_tq_queue(queue);
455         gtaskqueue_drain_tq_active(queue);
456         TQ_UNLOCK(queue);
457 }
458
459 static int
460 _gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
461     cpuset_t *mask, const char *name, va_list ap)
462 {
463         char ktname[MAXCOMLEN + 1];
464         struct thread *td;
465         struct gtaskqueue *tq;
466         int i, error;
467
468         if (count <= 0)
469                 return (EINVAL);
470
471         vsnprintf(ktname, sizeof(ktname), name, ap);
472         tq = *tqp;
473
474         tq->tq_threads = malloc(sizeof(struct thread *) * count, M_GTASKQUEUE,
475             M_NOWAIT | M_ZERO);
476         if (tq->tq_threads == NULL) {
477                 printf("%s: no memory for %s threads\n", __func__, ktname);
478                 return (ENOMEM);
479         }
480
481         for (i = 0; i < count; i++) {
482                 if (count == 1)
483                         error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
484                             &tq->tq_threads[i], RFSTOPPED, 0, "%s", ktname);
485                 else
486                         error = kthread_add(gtaskqueue_thread_loop, tqp, NULL,
487                             &tq->tq_threads[i], RFSTOPPED, 0,
488                             "%s_%d", ktname, i);
489                 if (error) {
490                         /* should be ok to continue, taskqueue_free will dtrt */
491                         printf("%s: kthread_add(%s): error %d", __func__,
492                             ktname, error);
493                         tq->tq_threads[i] = NULL;               /* paranoid */
494                 } else
495                         tq->tq_tcount++;
496         }
497         for (i = 0; i < count; i++) {
498                 if (tq->tq_threads[i] == NULL)
499                         continue;
500                 td = tq->tq_threads[i];
501                 if (mask) {
502                         error = cpuset_setthread(td->td_tid, mask);
503                         /*
504                          * Failing to pin is rarely an actual fatal error;
505                          * it'll just affect performance.
506                          */
507                         if (error)
508                                 printf("%s: curthread=%llu: can't pin; "
509                                     "error=%d\n",
510                                     __func__,
511                                     (unsigned long long) td->td_tid,
512                                     error);
513                 }
514                 thread_lock(td);
515                 sched_prio(td, pri);
516                 sched_add(td, SRQ_BORING);
517                 thread_unlock(td);
518         }
519
520         return (0);
521 }
522
523 static int
524 gtaskqueue_start_threads(struct gtaskqueue **tqp, int count, int pri,
525     const char *name, ...)
526 {
527         va_list ap;
528         int error;
529
530         va_start(ap, name);
531         error = _gtaskqueue_start_threads(tqp, count, pri, NULL, name, ap);
532         va_end(ap);
533         return (error);
534 }
535
536 static inline void
537 gtaskqueue_run_callback(struct gtaskqueue *tq,
538     enum taskqueue_callback_type cb_type)
539 {
540         taskqueue_callback_fn tq_callback;
541
542         TQ_ASSERT_UNLOCKED(tq);
543         tq_callback = tq->tq_callbacks[cb_type];
544         if (tq_callback != NULL)
545                 tq_callback(tq->tq_cb_contexts[cb_type]);
546 }
547
548 static void
549 gtaskqueue_thread_loop(void *arg)
550 {
551         struct gtaskqueue **tqp, *tq;
552
553         tqp = arg;
554         tq = *tqp;
555         gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_INIT);
556         TQ_LOCK(tq);
557         while ((tq->tq_flags & TQ_FLAGS_ACTIVE) != 0) {
558                 /* XXX ? */
559                 gtaskqueue_run_locked(tq);
560                 /*
561                  * Because taskqueue_run() can drop tq_mutex, we need to
562                  * check if the TQ_FLAGS_ACTIVE flag wasn't removed in the
563                  * meantime, which means we missed a wakeup.
564                  */
565                 if ((tq->tq_flags & TQ_FLAGS_ACTIVE) == 0)
566                         break;
567                 TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0);
568         }
569         gtaskqueue_run_locked(tq);
570         /*
571          * This thread is on its way out, so just drop the lock temporarily
572          * in order to call the shutdown callback.  This allows the callback
573          * to look at the taskqueue, even just before it dies.
574          */
575         TQ_UNLOCK(tq);
576         gtaskqueue_run_callback(tq, TASKQUEUE_CALLBACK_TYPE_SHUTDOWN);
577         TQ_LOCK(tq);
578
579         /* rendezvous with thread that asked us to terminate */
580         tq->tq_tcount--;
581         wakeup_one(tq->tq_threads);
582         TQ_UNLOCK(tq);
583         kthread_exit();
584 }
585
586 static void
587 gtaskqueue_thread_enqueue(void *context)
588 {
589         struct gtaskqueue **tqp, *tq;
590
591         tqp = context;
592         tq = *tqp;
593         wakeup_one(tq);
594 }
595
596
597 static struct gtaskqueue *
598 gtaskqueue_create_fast(const char *name, int mflags,
599                  taskqueue_enqueue_fn enqueue, void *context)
600 {
601         return _gtaskqueue_create(name, mflags, enqueue, context,
602                         MTX_SPIN, "fast_taskqueue");
603 }
604
605
606 struct taskqgroup_cpu {
607         LIST_HEAD(, grouptask)  tgc_tasks;
608         struct gtaskqueue       *tgc_taskq;
609         int     tgc_cnt;
610         int     tgc_cpu;
611 };
612
613 struct taskqgroup {
614         struct taskqgroup_cpu tqg_queue[MAXCPU];
615         struct mtx      tqg_lock;
616         const char *    tqg_name;
617         int             tqg_adjusting;
618         int             tqg_stride;
619         int             tqg_cnt;
620 };
621
622 struct taskq_bind_task {
623         struct gtask bt_task;
624         int     bt_cpuid;
625 };
626
627 static void
628 taskqgroup_cpu_create(struct taskqgroup *qgroup, int idx, int cpu)
629 {
630         struct taskqgroup_cpu *qcpu;
631
632         qcpu = &qgroup->tqg_queue[idx];
633         LIST_INIT(&qcpu->tgc_tasks);
634         qcpu->tgc_taskq = gtaskqueue_create_fast(NULL, M_WAITOK,
635             taskqueue_thread_enqueue, &qcpu->tgc_taskq);
636         gtaskqueue_start_threads(&qcpu->tgc_taskq, 1, PI_SOFT,
637             "%s_%d", qgroup->tqg_name, idx);
638         qcpu->tgc_cpu = cpu;
639 }
640
641 static void
642 taskqgroup_cpu_remove(struct taskqgroup *qgroup, int idx)
643 {
644
645         gtaskqueue_free(qgroup->tqg_queue[idx].tgc_taskq);
646 }
647
648 /*
649  * Find the taskq with least # of tasks that doesn't currently have any
650  * other queues from the uniq identifier.
651  */
652 static int
653 taskqgroup_find(struct taskqgroup *qgroup, void *uniq)
654 {
655         struct grouptask *n;
656         int i, idx, mincnt;
657         int strict;
658
659         mtx_assert(&qgroup->tqg_lock, MA_OWNED);
660         if (qgroup->tqg_cnt == 0)
661                 return (0);
662         idx = -1;
663         mincnt = INT_MAX;
664         /*
665          * Two passes;  First scan for a queue with the least tasks that
666          * does not already service this uniq id.  If that fails simply find
667          * the queue with the least total tasks;
668          */
669         for (strict = 1; mincnt == INT_MAX; strict = 0) {
670                 for (i = 0; i < qgroup->tqg_cnt; i++) {
671                         if (qgroup->tqg_queue[i].tgc_cnt > mincnt)
672                                 continue;
673                         if (strict) {
674                                 LIST_FOREACH(n,
675                                     &qgroup->tqg_queue[i].tgc_tasks, gt_list)
676                                         if (n->gt_uniq == uniq)
677                                                 break;
678                                 if (n != NULL)
679                                         continue;
680                         }
681                         mincnt = qgroup->tqg_queue[i].tgc_cnt;
682                         idx = i;
683                 }
684         }
685         if (idx == -1)
686                 panic("%s: failed to pick a qid.", __func__);
687
688         return (idx);
689 }
690
691 /*
692  * smp_started is unusable since it is not set for UP kernels or even for
693  * SMP kernels when there is 1 CPU.  This is usually handled by adding a
694  * (mp_ncpus == 1) test, but that would be broken here since we need to
695  * to synchronize with the SI_SUB_SMP ordering.  Even in the pure SMP case
696  * smp_started only gives a fuzzy ordering relative to SI_SUB_SMP.
697  *
698  * So maintain our own flag.  It must be set after all CPUs are started
699  * and before SI_SUB_SMP:SI_ORDER_ANY so that the SYSINIT for delayed
700  * adjustment is properly delayed.  SI_ORDER_FOURTH is clearly before
701  * SI_ORDER_ANY and unclearly after the CPUs are started.  It would be
702  * simpler for adjustment to pass a flag indicating if it is delayed.
703  */ 
704
705 static int tqg_smp_started;
706
707 static void
708 tqg_record_smp_started(void *arg)
709 {
710         tqg_smp_started = 1;
711 }
712
713 SYSINIT(tqg_record_smp_started, SI_SUB_SMP, SI_ORDER_FOURTH,
714         tqg_record_smp_started, NULL);
715
716 void
717 taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *gtask,
718     void *uniq, int irq, const char *name)
719 {
720         cpuset_t mask;
721         int qid, error;
722
723         gtask->gt_uniq = uniq;
724         snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
725         gtask->gt_irq = irq;
726         gtask->gt_cpu = -1;
727         mtx_lock(&qgroup->tqg_lock);
728         qid = taskqgroup_find(qgroup, uniq);
729         qgroup->tqg_queue[qid].tgc_cnt++;
730         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
731         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
732         if (irq != -1 && tqg_smp_started) {
733                 gtask->gt_cpu = qgroup->tqg_queue[qid].tgc_cpu;
734                 CPU_ZERO(&mask);
735                 CPU_SET(qgroup->tqg_queue[qid].tgc_cpu, &mask);
736                 mtx_unlock(&qgroup->tqg_lock);
737                 error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
738                 if (error)
739                         printf("%s: binding interrupt failed for %s: %d\n",
740                             __func__, gtask->gt_name, error);
741         } else
742                 mtx_unlock(&qgroup->tqg_lock);
743 }
744
745 static void
746 taskqgroup_attach_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
747 {
748         cpuset_t mask;
749         int qid, cpu, error;
750
751         mtx_lock(&qgroup->tqg_lock);
752         qid = taskqgroup_find(qgroup, gtask->gt_uniq);
753         cpu = qgroup->tqg_queue[qid].tgc_cpu;
754         if (gtask->gt_irq != -1) {
755                 mtx_unlock(&qgroup->tqg_lock);
756
757                 CPU_ZERO(&mask);
758                 CPU_SET(cpu, &mask);
759                 error = intr_setaffinity(gtask->gt_irq, CPU_WHICH_IRQ, &mask);
760                 mtx_lock(&qgroup->tqg_lock);
761                 if (error)
762                         printf("%s: binding interrupt failed for %s: %d\n",
763                             __func__, gtask->gt_name, error);
764
765         }
766         qgroup->tqg_queue[qid].tgc_cnt++;
767         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
768         MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
769         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
770         mtx_unlock(&qgroup->tqg_lock);
771 }
772
773 int
774 taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *gtask,
775     void *uniq, int cpu, int irq, const char *name)
776 {
777         cpuset_t mask;
778         int i, qid, error;
779
780         qid = -1;
781         gtask->gt_uniq = uniq;
782         snprintf(gtask->gt_name, GROUPTASK_NAMELEN, "%s", name ? name : "grouptask");
783         gtask->gt_irq = irq;
784         gtask->gt_cpu = cpu;
785         mtx_lock(&qgroup->tqg_lock);
786         if (tqg_smp_started) {
787                 for (i = 0; i < qgroup->tqg_cnt; i++)
788                         if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
789                                 qid = i;
790                                 break;
791                         }
792                 if (qid == -1) {
793                         mtx_unlock(&qgroup->tqg_lock);
794                         printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
795                         return (EINVAL);
796                 }
797         } else
798                 qid = 0;
799         qgroup->tqg_queue[qid].tgc_cnt++;
800         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
801         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
802         cpu = qgroup->tqg_queue[qid].tgc_cpu;
803         mtx_unlock(&qgroup->tqg_lock);
804
805         CPU_ZERO(&mask);
806         CPU_SET(cpu, &mask);
807         if (irq != -1 && tqg_smp_started) {
808                 error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
809                 if (error)
810                         printf("%s: binding interrupt failed for %s: %d\n",
811                             __func__, gtask->gt_name, error);
812         }
813         return (0);
814 }
815
816 static int
817 taskqgroup_attach_cpu_deferred(struct taskqgroup *qgroup, struct grouptask *gtask)
818 {
819         cpuset_t mask;
820         int i, qid, irq, cpu, error;
821
822         qid = -1;
823         irq = gtask->gt_irq;
824         cpu = gtask->gt_cpu;
825         MPASS(tqg_smp_started);
826         mtx_lock(&qgroup->tqg_lock);
827         for (i = 0; i < qgroup->tqg_cnt; i++)
828                 if (qgroup->tqg_queue[i].tgc_cpu == cpu) {
829                         qid = i;
830                         break;
831                 }
832         if (qid == -1) {
833                 mtx_unlock(&qgroup->tqg_lock);
834                 printf("%s: qid not found for %s cpu=%d\n", __func__, gtask->gt_name, cpu);
835                 return (EINVAL);
836         }
837         qgroup->tqg_queue[qid].tgc_cnt++;
838         LIST_INSERT_HEAD(&qgroup->tqg_queue[qid].tgc_tasks, gtask, gt_list);
839         MPASS(qgroup->tqg_queue[qid].tgc_taskq != NULL);
840         gtask->gt_taskqueue = qgroup->tqg_queue[qid].tgc_taskq;
841         mtx_unlock(&qgroup->tqg_lock);
842
843         CPU_ZERO(&mask);
844         CPU_SET(cpu, &mask);
845
846         if (irq != -1) {
847                 error = intr_setaffinity(irq, CPU_WHICH_IRQ, &mask);
848                 if (error)
849                         printf("%s: binding interrupt failed for %s: %d\n",
850                             __func__, gtask->gt_name, error);
851         }
852         return (0);
853 }
854
855 void
856 taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask)
857 {
858         int i;
859
860         grouptask_block(gtask);
861         mtx_lock(&qgroup->tqg_lock);
862         for (i = 0; i < qgroup->tqg_cnt; i++)
863                 if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue)
864                         break;
865         if (i == qgroup->tqg_cnt)
866                 panic("%s: task %s not in group", __func__, gtask->gt_name);
867         qgroup->tqg_queue[i].tgc_cnt--;
868         LIST_REMOVE(gtask, gt_list);
869         mtx_unlock(&qgroup->tqg_lock);
870         gtask->gt_taskqueue = NULL;
871         gtask->gt_task.ta_flags &= ~TASK_NOENQUEUE;
872 }
873
874 static void
875 taskqgroup_binder(void *ctx)
876 {
877         struct taskq_bind_task *gtask = (struct taskq_bind_task *)ctx;
878         cpuset_t mask;
879         int error;
880
881         CPU_ZERO(&mask);
882         CPU_SET(gtask->bt_cpuid, &mask);
883         error = cpuset_setthread(curthread->td_tid, &mask);
884         thread_lock(curthread);
885         sched_bind(curthread, gtask->bt_cpuid);
886         thread_unlock(curthread);
887
888         if (error)
889                 printf("%s: binding curthread failed: %d\n", __func__, error);
890         free(gtask, M_DEVBUF);
891 }
892
893 static void
894 taskqgroup_bind(struct taskqgroup *qgroup)
895 {
896         struct taskq_bind_task *gtask;
897         int i;
898
899         /*
900          * Bind taskqueue threads to specific CPUs, if they have been assigned
901          * one.
902          */
903         if (qgroup->tqg_cnt == 1)
904                 return;
905
906         for (i = 0; i < qgroup->tqg_cnt; i++) {
907                 gtask = malloc(sizeof (*gtask), M_DEVBUF, M_WAITOK);
908                 GTASK_INIT(&gtask->bt_task, 0, 0, taskqgroup_binder, gtask);
909                 gtask->bt_cpuid = qgroup->tqg_queue[i].tgc_cpu;
910                 grouptaskqueue_enqueue(qgroup->tqg_queue[i].tgc_taskq,
911                     &gtask->bt_task);
912         }
913 }
914
915 static void
916 taskqgroup_config_init(void *arg)
917 {
918         struct taskqgroup *qgroup = qgroup_config;
919         LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
920
921         LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
922             grouptask, gt_list);
923         qgroup->tqg_queue[0].tgc_cnt = 0;
924         taskqgroup_cpu_create(qgroup, 0, 0);
925
926         qgroup->tqg_cnt = 1;
927         qgroup->tqg_stride = 1;
928 }
929
930 SYSINIT(taskqgroup_config_init, SI_SUB_TASKQ, SI_ORDER_SECOND,
931         taskqgroup_config_init, NULL);
932
933 static int
934 _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
935 {
936         LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
937         struct grouptask *gtask;
938         int i, k, old_cnt, old_cpu, cpu;
939
940         mtx_assert(&qgroup->tqg_lock, MA_OWNED);
941
942         if (cnt < 1 || cnt * stride > mp_ncpus || !tqg_smp_started) {
943                 printf("%s: failed cnt: %d stride: %d "
944                     "mp_ncpus: %d tqg_smp_started: %d\n",
945                     __func__, cnt, stride, mp_ncpus, tqg_smp_started);
946                 return (EINVAL);
947         }
948         if (qgroup->tqg_adjusting) {
949                 printf("%s failed: adjusting\n", __func__);
950                 return (EBUSY);
951         }
952         qgroup->tqg_adjusting = 1;
953         old_cnt = qgroup->tqg_cnt;
954         old_cpu = 0;
955         if (old_cnt < cnt)
956                 old_cpu = qgroup->tqg_queue[old_cnt].tgc_cpu;
957         mtx_unlock(&qgroup->tqg_lock);
958         /*
959          * Set up queue for tasks added before boot.
960          */
961         if (old_cnt == 0) {
962                 LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
963                     grouptask, gt_list);
964                 qgroup->tqg_queue[0].tgc_cnt = 0;
965         }
966
967         /*
968          * If new taskq threads have been added.
969          */
970         cpu = old_cpu;
971         for (i = old_cnt; i < cnt; i++) {
972                 taskqgroup_cpu_create(qgroup, i, cpu);
973
974                 for (k = 0; k < stride; k++)
975                         cpu = CPU_NEXT(cpu);
976         }
977         mtx_lock(&qgroup->tqg_lock);
978         qgroup->tqg_cnt = cnt;
979         qgroup->tqg_stride = stride;
980
981         /*
982          * Adjust drivers to use new taskqs.
983          */
984         for (i = 0; i < old_cnt; i++) {
985                 while ((gtask = LIST_FIRST(&qgroup->tqg_queue[i].tgc_tasks))) {
986                         LIST_REMOVE(gtask, gt_list);
987                         qgroup->tqg_queue[i].tgc_cnt--;
988                         LIST_INSERT_HEAD(&gtask_head, gtask, gt_list);
989                 }
990         }
991         mtx_unlock(&qgroup->tqg_lock);
992
993         while ((gtask = LIST_FIRST(&gtask_head))) {
994                 LIST_REMOVE(gtask, gt_list);
995                 if (gtask->gt_cpu == -1)
996                         taskqgroup_attach_deferred(qgroup, gtask);
997                 else if (taskqgroup_attach_cpu_deferred(qgroup, gtask))
998                         taskqgroup_attach_deferred(qgroup, gtask);
999         }
1000
1001 #ifdef INVARIANTS
1002         mtx_lock(&qgroup->tqg_lock);
1003         for (i = 0; i < qgroup->tqg_cnt; i++) {
1004                 MPASS(qgroup->tqg_queue[i].tgc_taskq != NULL);
1005                 LIST_FOREACH(gtask, &qgroup->tqg_queue[i].tgc_tasks, gt_list)
1006                         MPASS(gtask->gt_taskqueue != NULL);
1007         }
1008         mtx_unlock(&qgroup->tqg_lock);
1009 #endif
1010         /*
1011          * If taskq thread count has been reduced.
1012          */
1013         for (i = cnt; i < old_cnt; i++)
1014                 taskqgroup_cpu_remove(qgroup, i);
1015
1016         taskqgroup_bind(qgroup);
1017
1018         mtx_lock(&qgroup->tqg_lock);
1019         qgroup->tqg_adjusting = 0;
1020
1021         return (0);
1022 }
1023
1024 int
1025 taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
1026 {
1027         int error;
1028
1029         mtx_lock(&qgroup->tqg_lock);
1030         error = _taskqgroup_adjust(qgroup, cnt, stride);
1031         mtx_unlock(&qgroup->tqg_lock);
1032
1033         return (error);
1034 }
1035
1036 struct taskqgroup *
1037 taskqgroup_create(const char *name)
1038 {
1039         struct taskqgroup *qgroup;
1040
1041         qgroup = malloc(sizeof(*qgroup), M_GTASKQUEUE, M_WAITOK | M_ZERO);
1042         mtx_init(&qgroup->tqg_lock, "taskqgroup", NULL, MTX_DEF);
1043         qgroup->tqg_name = name;
1044         LIST_INIT(&qgroup->tqg_queue[0].tgc_tasks);
1045
1046         return (qgroup);
1047 }
1048
1049 void
1050 taskqgroup_destroy(struct taskqgroup *qgroup)
1051 {
1052
1053 }
1054
1055 void
1056 taskqgroup_config_gtask_init(void *ctx, struct grouptask *gtask, gtask_fn_t *fn,
1057     const char *name)
1058 {
1059
1060         GROUPTASK_INIT(gtask, 0, fn, ctx);
1061         taskqgroup_attach(qgroup_config, gtask, gtask, -1, name);
1062 }
1063
1064 void
1065 taskqgroup_config_gtask_deinit(struct grouptask *gtask)
1066 {
1067
1068         taskqgroup_detach(qgroup_config, gtask);
1069 }