2 * Copyright (c) 2003 John Baldwin <jhb@FreeBSD.org>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * Machine dependent interrupt code for x86. For x86, we have to
31 * deal with different PICs. Thus, we use the passed in vector to lookup
32 * an interrupt source associated with that vector. The interrupt source
33 * describes which PIC the source belongs to and includes methods to handle
37 #include "opt_atpic.h"
40 #include <sys/param.h>
42 #include <sys/interrupt.h>
44 #include <sys/kernel.h>
46 #include <sys/mutex.h>
48 #include <sys/queue.h>
52 #include <sys/sysctl.h>
53 #include <sys/syslog.h>
54 #include <sys/systm.h>
55 #include <sys/taskqueue.h>
56 #include <sys/vmmeter.h>
57 #include <machine/clock.h>
58 #include <machine/intr_machdep.h>
59 #include <machine/smp.h>
65 #include <machine/segments.h>
66 #include <machine/frame.h>
67 #include <dev/ic/i8259.h>
68 #include <x86/isa/icu.h>
69 #include <isa/isareg.h>
72 #define MAX_STRAY_LOG 5
74 typedef void (*mask_fn)(void *);
76 static int intrcnt_index;
77 static struct intsrc *interrupt_sources[NUM_IO_INTS];
79 static struct intsrc *interrupt_sorted[NUM_IO_INTS];
80 CTASSERT(sizeof(interrupt_sources) == sizeof(interrupt_sorted));
81 static int intrbalance;
82 SYSCTL_INT(_hw, OID_AUTO, intrbalance, CTLFLAG_RW, &intrbalance, 0,
83 "Interrupt auto-balance interval (seconds). Zero disables.");
84 static struct timeout_task intrbalance_task;
86 static struct sx intrsrc_lock;
87 static struct mtx intrpic_lock;
88 static struct mtx intrcnt_lock;
89 static TAILQ_HEAD(pics_head, pic) pics;
91 #if defined(SMP) && !defined(EARLY_AP_STARTUP)
92 static int assign_cpu;
95 u_long intrcnt[INTRCNT_COUNT];
96 char intrnames[INTRCNT_COUNT * (MAXCOMLEN + 1)];
97 size_t sintrcnt = sizeof(intrcnt);
98 size_t sintrnames = sizeof(intrnames);
100 static int intr_assign_cpu(void *arg, int cpu);
101 static void intr_disable_src(void *arg);
102 static void intr_init(void *__dummy);
103 static int intr_pic_registered(struct pic *pic);
104 static void intrcnt_setname(const char *name, int index);
105 static void intrcnt_updatename(struct intsrc *is);
106 static void intrcnt_register(struct intsrc *is);
109 intr_pic_registered(struct pic *pic)
113 TAILQ_FOREACH(p, &pics, pics) {
121 * Register a new interrupt controller (PIC). This is to support suspend
122 * and resume where we suspend/resume controllers rather than individual
123 * sources. This also allows controllers with no active sources (such as
124 * 8259As in a system using the APICs) to participate in suspend and resume.
127 intr_register_pic(struct pic *pic)
131 mtx_lock(&intrpic_lock);
132 if (intr_pic_registered(pic))
135 TAILQ_INSERT_TAIL(&pics, pic, pics);
138 mtx_unlock(&intrpic_lock);
143 * Register a new interrupt source with the global interrupt system.
144 * The global interrupts need to be disabled when this function is
148 intr_register_source(struct intsrc *isrc)
152 KASSERT(intr_pic_registered(isrc->is_pic), ("unregistered PIC"));
153 vector = isrc->is_pic->pic_vector(isrc);
154 if (interrupt_sources[vector] != NULL)
156 error = intr_event_create(&isrc->is_event, isrc, 0, vector,
157 intr_disable_src, (mask_fn)isrc->is_pic->pic_enable_source,
158 (mask_fn)isrc->is_pic->pic_eoi_source, intr_assign_cpu, "irq%d:",
162 sx_xlock(&intrsrc_lock);
163 if (interrupt_sources[vector] != NULL) {
164 sx_xunlock(&intrsrc_lock);
165 intr_event_destroy(isrc->is_event);
168 intrcnt_register(isrc);
169 interrupt_sources[vector] = isrc;
170 isrc->is_handlers = 0;
171 sx_xunlock(&intrsrc_lock);
176 intr_lookup_source(int vector)
179 return (interrupt_sources[vector]);
183 intr_add_handler(const char *name, int vector, driver_filter_t filter,
184 driver_intr_t handler, void *arg, enum intr_type flags, void **cookiep)
189 isrc = intr_lookup_source(vector);
192 error = intr_event_add_handler(isrc->is_event, name, filter, handler,
193 arg, intr_priority(flags), flags, cookiep);
195 sx_xlock(&intrsrc_lock);
196 intrcnt_updatename(isrc);
198 if (isrc->is_handlers == 1) {
199 isrc->is_pic->pic_enable_intr(isrc);
200 isrc->is_pic->pic_enable_source(isrc);
202 sx_xunlock(&intrsrc_lock);
208 intr_remove_handler(void *cookie)
213 isrc = intr_handler_source(cookie);
214 error = intr_event_remove_handler(cookie);
216 sx_xlock(&intrsrc_lock);
218 if (isrc->is_handlers == 0) {
219 isrc->is_pic->pic_disable_source(isrc, PIC_NO_EOI);
220 isrc->is_pic->pic_disable_intr(isrc);
222 intrcnt_updatename(isrc);
223 sx_xunlock(&intrsrc_lock);
229 intr_config_intr(int vector, enum intr_trigger trig, enum intr_polarity pol)
233 isrc = intr_lookup_source(vector);
236 return (isrc->is_pic->pic_config_intr(isrc, trig, pol));
240 intr_disable_src(void *arg)
245 isrc->is_pic->pic_disable_source(isrc, PIC_EOI);
249 intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame)
251 struct intr_event *ie;
255 * We count software interrupts when we process them. The
256 * code here follows previous practice, but there's an
257 * argument for counting hardware interrupts when they're
266 * XXX: We assume that IRQ 0 is only used for the ISA timer
269 vector = isrc->is_pic->pic_vector(isrc);
274 * For stray interrupts, mask and EOI the source, bump the
275 * stray count, and log the condition.
277 if (intr_event_handle(ie, frame) != 0) {
278 isrc->is_pic->pic_disable_source(isrc, PIC_EOI);
279 (*isrc->is_straycount)++;
280 if (*isrc->is_straycount < MAX_STRAY_LOG)
281 log(LOG_ERR, "stray irq%d\n", vector);
282 else if (*isrc->is_straycount == MAX_STRAY_LOG)
284 "too many stray irq %d's: not logging anymore\n",
290 intr_resume(bool suspend_cancelled)
297 mtx_lock(&intrpic_lock);
298 TAILQ_FOREACH(pic, &pics, pics) {
299 if (pic->pic_resume != NULL)
300 pic->pic_resume(pic, suspend_cancelled);
302 mtx_unlock(&intrpic_lock);
310 mtx_lock(&intrpic_lock);
311 TAILQ_FOREACH_REVERSE(pic, &pics, pics_head, pics) {
312 if (pic->pic_suspend != NULL)
313 pic->pic_suspend(pic);
315 mtx_unlock(&intrpic_lock);
319 intr_assign_cpu(void *arg, int cpu)
325 #ifdef EARLY_AP_STARTUP
326 MPASS(mp_ncpus == 1 || smp_started);
328 /* Nothing to do if there is only a single CPU. */
329 if (mp_ncpus > 1 && cpu != NOCPU) {
332 * Don't do anything during early boot. We will pick up the
333 * assignment once the APs are started.
335 if (assign_cpu && cpu != NOCPU) {
338 sx_xlock(&intrsrc_lock);
339 error = isrc->is_pic->pic_assign_cpu(isrc, cpu_apic_ids[cpu]);
342 sx_xunlock(&intrsrc_lock);
352 intrcnt_setname(const char *name, int index)
355 snprintf(intrnames + (MAXCOMLEN + 1) * index, MAXCOMLEN + 1, "%-*s",
360 intrcnt_updatename(struct intsrc *is)
363 intrcnt_setname(is->is_event->ie_fullname, is->is_index);
367 intrcnt_register(struct intsrc *is)
369 char straystr[MAXCOMLEN + 1];
371 KASSERT(is->is_event != NULL, ("%s: isrc with no event", __func__));
372 mtx_lock_spin(&intrcnt_lock);
373 is->is_index = intrcnt_index;
375 snprintf(straystr, MAXCOMLEN + 1, "stray irq%d",
376 is->is_pic->pic_vector(is));
377 intrcnt_updatename(is);
378 is->is_count = &intrcnt[is->is_index];
379 intrcnt_setname(straystr, is->is_index + 1);
380 is->is_straycount = &intrcnt[is->is_index + 1];
381 mtx_unlock_spin(&intrcnt_lock);
385 intrcnt_add(const char *name, u_long **countp)
388 mtx_lock_spin(&intrcnt_lock);
389 *countp = &intrcnt[intrcnt_index];
390 intrcnt_setname(name, intrcnt_index);
392 mtx_unlock_spin(&intrcnt_lock);
396 intr_init(void *dummy __unused)
399 intrcnt_setname("???", 0);
402 mtx_init(&intrpic_lock, "intrpic", NULL, MTX_DEF);
403 sx_init(&intrsrc_lock, "intrsrc");
404 mtx_init(&intrcnt_lock, "intrcnt", NULL, MTX_SPIN);
406 SYSINIT(intr_init, SI_SUB_INTR, SI_ORDER_FIRST, intr_init, NULL);
409 intr_init_final(void *dummy __unused)
413 * Enable interrupts on the BSP after all of the interrupt
414 * controllers are initialized. Device interrupts are still
415 * disabled in the interrupt controllers until interrupt
416 * handlers are registered. Interrupts are enabled on each AP
417 * after their first context switch.
421 SYSINIT(intr_init_final, SI_SUB_INTR, SI_ORDER_ANY, intr_init_final, NULL);
424 /* Initialize the two 8259A's to a known-good shutdown state. */
429 outb(IO_ICU1, ICW1_RESET | ICW1_IC4);
430 outb(IO_ICU1 + ICU_IMR_OFFSET, IDT_IO_INTS);
431 outb(IO_ICU1 + ICU_IMR_OFFSET, IRQ_MASK(ICU_SLAVEID));
432 outb(IO_ICU1 + ICU_IMR_OFFSET, MASTER_MODE);
433 outb(IO_ICU1 + ICU_IMR_OFFSET, 0xff);
434 outb(IO_ICU1, OCW3_SEL | OCW3_RR);
436 outb(IO_ICU2, ICW1_RESET | ICW1_IC4);
437 outb(IO_ICU2 + ICU_IMR_OFFSET, IDT_IO_INTS + 8);
438 outb(IO_ICU2 + ICU_IMR_OFFSET, ICU_SLAVEID);
439 outb(IO_ICU2 + ICU_IMR_OFFSET, SLAVE_MODE);
440 outb(IO_ICU2 + ICU_IMR_OFFSET, 0xff);
441 outb(IO_ICU2, OCW3_SEL | OCW3_RR);
445 /* Add a description to an active interrupt handler. */
447 intr_describe(u_int vector, void *ih, const char *descr)
452 isrc = intr_lookup_source(vector);
455 error = intr_event_describe_handler(isrc->is_event, ih, descr);
458 intrcnt_updatename(isrc);
468 sx_xlock(&intrsrc_lock);
469 for (v = 0; v < NUM_IO_INTS; v++) {
470 is = interrupt_sources[v];
473 if (is->is_pic->pic_reprogram_pin != NULL)
474 is->is_pic->pic_reprogram_pin(is);
476 sx_xunlock(&intrsrc_lock);
481 * Dump data about interrupt handlers
483 DB_SHOW_COMMAND(irqs, db_show_irqs)
485 struct intsrc **isrc;
488 if (strcmp(modif, "v") == 0)
492 isrc = interrupt_sources;
493 for (i = 0; i < NUM_IO_INTS && !db_pager_quit; i++, isrc++)
495 db_dump_intr_event((*isrc)->is_event, verbose);
501 * Support for balancing interrupt sources across CPUs. For now we just
502 * allocate CPUs round-robin.
505 cpuset_t intr_cpus = CPUSET_T_INITIALIZER(0x1);
506 static int current_cpu;
509 * Return the CPU that the next interrupt source should use. For now
510 * this just returns the next local APIC according to round-robin.
517 #ifdef EARLY_AP_STARTUP
518 MPASS(mp_ncpus == 1 || smp_started);
520 return (PCPU_GET(apic_id));
522 /* Leave all interrupts on the BSP during boot. */
524 return (PCPU_GET(apic_id));
527 mtx_lock_spin(&icu_lock);
528 apic_id = cpu_apic_ids[current_cpu];
531 if (current_cpu > mp_maxid)
533 } while (!CPU_ISSET(current_cpu, &intr_cpus));
534 mtx_unlock_spin(&icu_lock);
538 /* Attempt to bind the specified IRQ to the specified CPU. */
540 intr_bind(u_int vector, u_char cpu)
544 isrc = intr_lookup_source(vector);
547 return (intr_event_bind(isrc->is_event, cpu));
551 * Add a CPU to our mask of valid CPUs that can be destinations of
555 intr_add_cpu(u_int cpu)
559 panic("%s: Invalid CPU ID", __func__);
561 printf("INTR: Adding local APIC %d as a target\n",
564 CPU_SET(cpu, &intr_cpus);
567 #ifndef EARLY_AP_STARTUP
569 * Distribute all the interrupt sources among the available CPUs once the
570 * AP's have been launched.
573 intr_shuffle_irqs(void *arg __unused)
579 /* Don't bother on UP. */
583 /* Round-robin assign a CPU to each enabled source. */
584 sx_xlock(&intrsrc_lock);
586 for (i = 0; i < NUM_IO_INTS; i++) {
587 isrc = interrupt_sources[i];
588 if (isrc != NULL && isrc->is_handlers > 0) {
590 * If this event is already bound to a CPU,
591 * then assign the source to that CPU instead
592 * of picking one via round-robin. Note that
593 * this is careful to only advance the
594 * round-robin if the CPU assignment succeeds.
596 cpu = isrc->is_event->ie_cpu;
599 if (isrc->is_pic->pic_assign_cpu(isrc,
600 cpu_apic_ids[cpu]) == 0) {
602 if (isrc->is_event->ie_cpu == NOCPU)
607 sx_xunlock(&intrsrc_lock);
609 SYSINIT(intr_shuffle_irqs, SI_SUB_SMP, SI_ORDER_SECOND, intr_shuffle_irqs,
614 * TODO: Export this information in a non-MD fashion, integrate with vmstat -i.
617 sysctl_hw_intrs(SYSCTL_HANDLER_ARGS)
624 error = sysctl_wire_old_buffer(req, 0);
628 sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
629 sx_slock(&intrsrc_lock);
630 for (i = 0; i < NUM_IO_INTS; i++) {
631 isrc = interrupt_sources[i];
634 sbuf_printf(&sbuf, "%s:%d @%d: %ld\n",
635 isrc->is_event->ie_fullname,
641 sx_sunlock(&intrsrc_lock);
642 error = sbuf_finish(&sbuf);
646 SYSCTL_PROC(_hw, OID_AUTO, intrs, CTLTYPE_STRING | CTLFLAG_RW,
647 0, 0, sysctl_hw_intrs, "A", "interrupt:number @cpu: count");
650 * Compare two, possibly NULL, entries in the interrupt source array
654 intrcmp(const void *one, const void *two)
656 const struct intsrc *i1, *i2;
658 i1 = *(const struct intsrc * const *)one;
659 i2 = *(const struct intsrc * const *)two;
660 if (i1 != NULL && i2 != NULL)
661 return (*i1->is_count - *i2->is_count);
670 * Balance IRQs across available CPUs according to load.
673 intr_balance(void *dummy __unused, int pending __unused)
680 interval = intrbalance;
685 * Sort interrupts according to count.
687 sx_xlock(&intrsrc_lock);
688 memcpy(interrupt_sorted, interrupt_sources, sizeof(interrupt_sorted));
689 qsort(interrupt_sorted, NUM_IO_INTS, sizeof(interrupt_sorted[0]),
693 * Restart the scan from the same location to avoid moving in the
699 * Assign round-robin from most loaded to least.
701 for (i = NUM_IO_INTS - 1; i >= 0; i--) {
702 isrc = interrupt_sorted[i];
703 if (isrc == NULL || isrc->is_event->ie_cpu != NOCPU)
707 if (isrc->is_cpu != cpu &&
708 isrc->is_pic->pic_assign_cpu(isrc,
709 cpu_apic_ids[cpu]) == 0)
712 sx_xunlock(&intrsrc_lock);
714 taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task,
715 interval ? hz * interval : hz * 60);
720 intr_balance_init(void *dummy __unused)
723 TIMEOUT_TASK_INIT(taskqueue_thread, &intrbalance_task, 0, intr_balance,
725 taskqueue_enqueue_timeout(taskqueue_thread, &intrbalance_task, hz);
727 SYSINIT(intr_balance_init, SI_SUB_SMP, SI_ORDER_ANY, intr_balance_init, NULL);
731 * Always route interrupts to the current processor in the UP case.
737 return (PCPU_GET(apic_id));