2 * SPDX-License-Identifier: BSD-3-Clause
4 * Copyright (c) 2006 Yahoo!, Inc.
6 * Written by: John Baldwin <jhb@FreeBSD.org>
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the author nor the names of any co-contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * Support for PCI Message Signalled Interrupts (MSI). MSI interrupts on
35 * x86 are basically APIC messages that the northbridge delivers directly
36 * to the local APICs as if they had come from an I/O APIC.
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
43 #include "opt_iommu.h"
45 #include <sys/param.h>
47 #include <sys/kernel.h>
48 #include <sys/limits.h>
50 #include <sys/malloc.h>
51 #include <sys/mutex.h>
53 #include <sys/sysctl.h>
54 #include <sys/systm.h>
55 #include <x86/apicreg.h>
56 #include <machine/cputypes.h>
57 #include <machine/md_var.h>
58 #include <machine/frame.h>
59 #include <machine/intr_machdep.h>
60 #include <x86/apicvar.h>
61 #include <x86/iommu/iommu_intrmap.h>
62 #include <machine/specialreg.h>
63 #include <dev/pci/pcivar.h>
65 /* Fields in address for Intel MSI messages. */
66 #define MSI_INTEL_ADDR_DEST 0x000ff000
67 #define MSI_INTEL_ADDR_RH 0x00000008
68 # define MSI_INTEL_ADDR_RH_ON 0x00000008
69 # define MSI_INTEL_ADDR_RH_OFF 0x00000000
70 #define MSI_INTEL_ADDR_DM 0x00000004
71 # define MSI_INTEL_ADDR_DM_PHYSICAL 0x00000000
72 # define MSI_INTEL_ADDR_DM_LOGICAL 0x00000004
74 /* Fields in data for Intel MSI messages. */
75 #define MSI_INTEL_DATA_TRGRMOD IOART_TRGRMOD /* Trigger mode. */
76 # define MSI_INTEL_DATA_TRGREDG IOART_TRGREDG
77 # define MSI_INTEL_DATA_TRGRLVL IOART_TRGRLVL
78 #define MSI_INTEL_DATA_LEVEL 0x00004000 /* Polarity. */
79 # define MSI_INTEL_DATA_DEASSERT 0x00000000
80 # define MSI_INTEL_DATA_ASSERT 0x00004000
81 #define MSI_INTEL_DATA_DELMOD IOART_DELMOD /* Delivery mode. */
82 # define MSI_INTEL_DATA_DELFIXED IOART_DELFIXED
83 # define MSI_INTEL_DATA_DELLOPRI IOART_DELLOPRI
84 # define MSI_INTEL_DATA_DELSMI IOART_DELSMI
85 # define MSI_INTEL_DATA_DELNMI IOART_DELNMI
86 # define MSI_INTEL_DATA_DELINIT IOART_DELINIT
87 # define MSI_INTEL_DATA_DELEXINT IOART_DELEXINT
88 #define MSI_INTEL_DATA_INTVEC IOART_INTVEC /* Interrupt vector. */
91 * Build Intel MSI message and data values from a source. AMD64 systems
92 * seem to be compatible, so we use the same function for both.
94 #define INTEL_ADDR(msi) \
95 (MSI_INTEL_ADDR_BASE | (msi)->msi_cpu << 12 | \
96 MSI_INTEL_ADDR_RH_OFF | MSI_INTEL_ADDR_DM_PHYSICAL)
97 #define INTEL_DATA(msi) \
98 (MSI_INTEL_DATA_TRGREDG | MSI_INTEL_DATA_DELFIXED | (msi)->msi_vector)
100 static MALLOC_DEFINE(M_MSI, "msi", "PCI MSI");
103 * MSI sources are bunched into groups. This is because MSI forces
104 * all of the messages to share the address and data registers and
105 * thus certain properties (such as the local APIC ID target on x86).
106 * Each group has a 'first' source that contains information global to
107 * the group. These fields are marked with (g) below.
109 * Note that local APIC ID is kind of special. Each message will be
110 * assigned an ID by the system; however, a group will use the ID from
113 * For MSI-X, each message is isolated.
116 struct intsrc msi_intsrc;
117 device_t msi_dev; /* Owning device. (g) */
118 struct msi_intsrc *msi_first; /* First source in group. */
119 u_int msi_irq; /* IRQ cookie. */
120 u_int msi_msix; /* MSI-X message. */
121 u_int msi_vector:8; /* IDT vector. */
122 u_int msi_cpu; /* Local APIC ID. (g) */
123 u_int msi_count:8; /* Messages in this group. (g) */
124 u_int msi_maxcount:8; /* Alignment for this group. (g) */
125 u_int *msi_irqs; /* Group's IRQ list. (g) */
126 u_int msi_remap_cookie;
129 static void msi_create_source(void);
130 static void msi_enable_source(struct intsrc *isrc);
131 static void msi_disable_source(struct intsrc *isrc, int eoi);
132 static void msi_eoi_source(struct intsrc *isrc);
133 static void msi_enable_intr(struct intsrc *isrc);
134 static void msi_disable_intr(struct intsrc *isrc);
135 static int msi_vector(struct intsrc *isrc);
136 static int msi_source_pending(struct intsrc *isrc);
137 static int msi_config_intr(struct intsrc *isrc, enum intr_trigger trig,
138 enum intr_polarity pol);
139 static int msi_assign_cpu(struct intsrc *isrc, u_int apic_id);
141 struct pic msi_pic = {
142 .pic_enable_source = msi_enable_source,
143 .pic_disable_source = msi_disable_source,
144 .pic_eoi_source = msi_eoi_source,
145 .pic_enable_intr = msi_enable_intr,
146 .pic_disable_intr = msi_disable_intr,
147 .pic_vector = msi_vector,
148 .pic_source_pending = msi_source_pending,
151 .pic_config_intr = msi_config_intr,
152 .pic_assign_cpu = msi_assign_cpu,
153 .pic_reprogram_pin = NULL,
157 SYSCTL_UINT(_machdep, OID_AUTO, first_msi_irq, CTLFLAG_RD, &first_msi_irq, 0,
158 "Number of first IRQ reserved for MSI and MSI-X interrupts");
160 u_int num_msi_irqs = 2048;
161 SYSCTL_UINT(_machdep, OID_AUTO, num_msi_irqs, CTLFLAG_RDTUN, &num_msi_irqs, 0,
162 "Number of IRQs reserved for MSI and MSI-X interrupts");
166 * Xen hypervisors prior to 4.6.0 do not properly handle updates to
167 * enabled MSI-X table entries. Allow migration of MSI-X interrupts
168 * to be disabled via a tunable. Values have the following meaning:
170 * -1: automatic detection by FreeBSD
171 * 0: enable migration
172 * 1: disable migration
174 int msix_disable_migration = -1;
175 SYSCTL_INT(_machdep, OID_AUTO, disable_msix_migration, CTLFLAG_RDTUN,
176 &msix_disable_migration, 0,
177 "Disable migration of MSI-X interrupts between CPUs");
180 static int msi_enabled;
181 static u_int msi_last_irq;
182 static struct mtx msi_lock;
185 msi_enable_source(struct intsrc *isrc)
190 msi_disable_source(struct intsrc *isrc, int eoi)
198 msi_eoi_source(struct intsrc *isrc)
205 msi_enable_intr(struct intsrc *isrc)
207 struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
209 apic_enable_vector(msi->msi_cpu, msi->msi_vector);
213 msi_disable_intr(struct intsrc *isrc)
215 struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
217 apic_disable_vector(msi->msi_cpu, msi->msi_vector);
221 msi_vector(struct intsrc *isrc)
223 struct msi_intsrc *msi = (struct msi_intsrc *)isrc;
225 return (msi->msi_irq);
229 msi_source_pending(struct intsrc *isrc)
236 msi_config_intr(struct intsrc *isrc, enum intr_trigger trig,
237 enum intr_polarity pol)
244 msi_assign_cpu(struct intsrc *isrc, u_int apic_id)
246 struct msi_intsrc *sib, *msi = (struct msi_intsrc *)isrc;
252 * Only allow CPUs to be assigned to the first message for an
255 if (msi->msi_first != msi)
259 if (msix_disable_migration && msi->msi_msix)
263 /* Store information to free existing irq. */
264 old_vector = msi->msi_vector;
265 old_id = msi->msi_cpu;
266 if (old_id == apic_id)
269 /* Allocate IDT vectors on this cpu. */
270 if (msi->msi_count > 1) {
271 KASSERT(msi->msi_msix == 0, ("MSI-X message group"));
272 vector = apic_alloc_vectors(apic_id, msi->msi_irqs,
273 msi->msi_count, msi->msi_maxcount);
275 vector = apic_alloc_vector(apic_id, msi->msi_irq);
279 msi->msi_cpu = apic_id;
280 msi->msi_vector = vector;
281 if (msi->msi_intsrc.is_handlers > 0)
282 apic_enable_vector(msi->msi_cpu, msi->msi_vector);
284 printf("msi: Assigning %s IRQ %d to local APIC %u vector %u\n",
285 msi->msi_msix ? "MSI-X" : "MSI", msi->msi_irq,
286 msi->msi_cpu, msi->msi_vector);
287 for (i = 1; i < msi->msi_count; i++) {
288 sib = (struct msi_intsrc *)intr_lookup_source(msi->msi_irqs[i]);
289 sib->msi_cpu = apic_id;
290 sib->msi_vector = vector + i;
291 if (sib->msi_intsrc.is_handlers > 0)
292 apic_enable_vector(sib->msi_cpu, sib->msi_vector);
295 "msi: Assigning MSI IRQ %d to local APIC %u vector %u\n",
296 sib->msi_irq, sib->msi_cpu, sib->msi_vector);
298 BUS_REMAP_INTR(device_get_parent(msi->msi_dev), msi->msi_dev,
302 * Free the old vector after the new one is established. This is done
303 * to prevent races where we could miss an interrupt.
305 if (msi->msi_intsrc.is_handlers > 0)
306 apic_disable_vector(old_id, old_vector);
307 apic_free_vector(old_id, old_vector, msi->msi_irq);
308 for (i = 1; i < msi->msi_count; i++) {
309 sib = (struct msi_intsrc *)intr_lookup_source(msi->msi_irqs[i]);
310 if (sib->msi_intsrc.is_handlers > 0)
311 apic_disable_vector(old_id, old_vector + i);
312 apic_free_vector(old_id, old_vector + i, msi->msi_irqs[i]);
321 /* Check if we have a supported CPU. */
322 switch (cpu_vendor_id) {
323 case CPU_VENDOR_INTEL:
325 case CPU_VENDOR_HYGON:
327 case CPU_VENDOR_CENTAUR:
328 if (CPUID_TO_FAMILY(cpu_id) == 0x6 &&
329 CPUID_TO_MODEL(cpu_id) >= 0xf)
337 if (msix_disable_migration == -1) {
338 /* The default is to allow migration of MSI-X interrupts. */
339 msix_disable_migration = 0;
343 if (num_msi_irqs == 0)
346 first_msi_irq = num_io_irqs;
347 if (num_msi_irqs > UINT_MAX - first_msi_irq)
348 panic("num_msi_irqs too high");
349 num_io_irqs = first_msi_irq + num_msi_irqs;
352 intr_register_pic(&msi_pic);
353 mtx_init(&msi_lock, "msi", NULL, MTX_DEF);
357 msi_create_source(void)
359 struct msi_intsrc *msi;
363 if (msi_last_irq >= num_msi_irqs) {
364 mtx_unlock(&msi_lock);
367 irq = msi_last_irq + first_msi_irq;
369 mtx_unlock(&msi_lock);
371 msi = malloc(sizeof(struct msi_intsrc), M_MSI, M_WAITOK | M_ZERO);
372 msi->msi_intsrc.is_pic = &msi_pic;
374 intr_register_source(&msi->msi_intsrc);
379 * Try to allocate 'count' interrupt sources with contiguous IDT values.
382 msi_alloc(device_t dev, int count, int maxcount, int *irqs)
384 struct msi_intsrc *msi, *fsrc;
385 u_int cpu, domain, *mirqs;
388 u_int cookies[count];
395 if (bus_get_domain(dev, &domain) != 0)
399 mirqs = malloc(count * sizeof(*mirqs), M_MSI, M_WAITOK);
405 /* Try to find 'count' free IRQs. */
407 for (i = first_msi_irq; i < first_msi_irq + num_msi_irqs; i++) {
408 msi = (struct msi_intsrc *)intr_lookup_source(i);
410 /* End of allocated sources, so break. */
414 /* If this is a free one, save its IRQ in the array. */
415 if (msi->msi_dev == NULL) {
423 /* Do we need to create some new sources? */
425 /* If we would exceed the max, give up. */
426 if (i + (count - cnt) > first_msi_irq + num_msi_irqs) {
427 mtx_unlock(&msi_lock);
431 mtx_unlock(&msi_lock);
433 /* We need count - cnt more sources. */
434 while (cnt < count) {
441 /* Ok, we now have the IRQs allocated. */
442 KASSERT(cnt == count, ("count mismatch"));
444 /* Allocate 'count' IDT vectors. */
445 cpu = intr_next_cpu(domain);
446 vector = apic_alloc_vectors(cpu, irqs, count, maxcount);
448 mtx_unlock(&msi_lock);
454 mtx_unlock(&msi_lock);
455 error = iommu_alloc_msi_intr(dev, cookies, count);
457 if (error == EOPNOTSUPP)
460 for (i = 0; i < count; i++)
461 apic_free_vector(cpu, vector + i, irqs[i]);
465 for (i = 0; i < count; i++) {
466 msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
467 msi->msi_remap_cookie = cookies[i];
471 /* Assign IDT vectors and make these messages owned by 'dev'. */
472 fsrc = (struct msi_intsrc *)intr_lookup_source(irqs[0]);
473 for (i = 0; i < count; i++) {
474 msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
477 msi->msi_vector = vector + i;
480 "msi: routing MSI IRQ %d to local APIC %u vector %u\n",
481 msi->msi_irq, msi->msi_cpu, msi->msi_vector);
482 msi->msi_first = fsrc;
483 KASSERT(msi->msi_intsrc.is_handlers == 0,
484 ("dead MSI has handlers"));
486 fsrc->msi_count = count;
487 fsrc->msi_maxcount = maxcount;
489 bcopy(irqs, mirqs, count * sizeof(*mirqs));
490 fsrc->msi_irqs = mirqs;
491 mtx_unlock(&msi_lock);
496 msi_release(int *irqs, int count)
498 struct msi_intsrc *msi, *first;
502 first = (struct msi_intsrc *)intr_lookup_source(irqs[0]);
504 mtx_unlock(&msi_lock);
508 /* Make sure this isn't an MSI-X message. */
509 if (first->msi_msix) {
510 mtx_unlock(&msi_lock);
514 /* Make sure this message is allocated to a group. */
515 if (first->msi_first == NULL) {
516 mtx_unlock(&msi_lock);
521 * Make sure this is the start of a group and that we are releasing
524 if (first->msi_first != first || first->msi_count != count) {
525 mtx_unlock(&msi_lock);
528 KASSERT(first->msi_dev != NULL, ("unowned group"));
530 /* Clear all the extra messages in the group. */
531 for (i = 1; i < count; i++) {
532 msi = (struct msi_intsrc *)intr_lookup_source(irqs[i]);
533 KASSERT(msi->msi_first == first, ("message not in group"));
534 KASSERT(msi->msi_dev == first->msi_dev, ("owner mismatch"));
536 iommu_unmap_msi_intr(first->msi_dev, msi->msi_remap_cookie);
538 msi->msi_first = NULL;
540 apic_free_vector(msi->msi_cpu, msi->msi_vector, msi->msi_irq);
544 /* Clear out the first message. */
546 mtx_unlock(&msi_lock);
547 iommu_unmap_msi_intr(first->msi_dev, first->msi_remap_cookie);
550 first->msi_first = NULL;
551 first->msi_dev = NULL;
552 apic_free_vector(first->msi_cpu, first->msi_vector, first->msi_irq);
553 first->msi_vector = 0;
554 first->msi_count = 0;
555 first->msi_maxcount = 0;
556 free(first->msi_irqs, M_MSI);
557 first->msi_irqs = NULL;
559 mtx_unlock(&msi_lock);
564 msi_map(int irq, uint64_t *addr, uint32_t *data)
566 struct msi_intsrc *msi;
569 struct msi_intsrc *msi1;
574 msi = (struct msi_intsrc *)intr_lookup_source(irq);
576 mtx_unlock(&msi_lock);
580 /* Make sure this message is allocated to a device. */
581 if (msi->msi_dev == NULL) {
582 mtx_unlock(&msi_lock);
587 * If this message isn't an MSI-X message, make sure it's part
588 * of a group, and switch to the first message in the
591 if (!msi->msi_msix) {
592 if (msi->msi_first == NULL) {
593 mtx_unlock(&msi_lock);
596 msi = msi->msi_first;
600 if (!msi->msi_msix) {
601 for (k = msi->msi_count - 1, i = first_msi_irq; k > 0 &&
602 i < first_msi_irq + num_msi_irqs; i++) {
603 if (i == msi->msi_irq)
605 msi1 = (struct msi_intsrc *)intr_lookup_source(i);
606 if (!msi1->msi_msix && msi1->msi_first == msi) {
607 mtx_unlock(&msi_lock);
608 iommu_map_msi_intr(msi1->msi_dev,
609 msi1->msi_cpu, msi1->msi_vector,
610 msi1->msi_remap_cookie, NULL, NULL);
616 mtx_unlock(&msi_lock);
617 error = iommu_map_msi_intr(msi->msi_dev, msi->msi_cpu,
618 msi->msi_vector, msi->msi_remap_cookie, addr, data);
620 mtx_unlock(&msi_lock);
623 if (error == EOPNOTSUPP) {
624 *addr = INTEL_ADDR(msi);
625 *data = INTEL_DATA(msi);
632 msix_alloc(device_t dev, int *irq)
634 struct msi_intsrc *msi;
645 if (bus_get_domain(dev, &domain) != 0)
651 /* Find a free IRQ. */
652 for (i = first_msi_irq; i < first_msi_irq + num_msi_irqs; i++) {
653 msi = (struct msi_intsrc *)intr_lookup_source(i);
655 /* End of allocated sources, so break. */
659 /* Stop at the first free source. */
660 if (msi->msi_dev == NULL)
664 /* Are all IRQs in use? */
665 if (i == first_msi_irq + num_msi_irqs) {
666 mtx_unlock(&msi_lock);
670 /* Do we need to create a new source? */
672 mtx_unlock(&msi_lock);
674 /* Create a new source. */
679 /* Allocate an IDT vector. */
680 cpu = intr_next_cpu(domain);
681 vector = apic_alloc_vector(cpu, i);
683 mtx_unlock(&msi_lock);
689 mtx_unlock(&msi_lock);
690 error = iommu_alloc_msi_intr(dev, &cookie, 1);
692 if (error == EOPNOTSUPP)
696 apic_free_vector(cpu, vector, i);
699 msi->msi_remap_cookie = cookie;
703 printf("msi: routing MSI-X IRQ %d to local APIC %u vector %u\n",
704 msi->msi_irq, cpu, vector);
708 msi->msi_first = msi;
709 msi->msi_vector = vector;
712 msi->msi_maxcount = 1;
713 msi->msi_irqs = NULL;
715 KASSERT(msi->msi_intsrc.is_handlers == 0, ("dead MSI-X has handlers"));
716 mtx_unlock(&msi_lock);
723 msix_release(int irq)
725 struct msi_intsrc *msi;
728 msi = (struct msi_intsrc *)intr_lookup_source(irq);
730 mtx_unlock(&msi_lock);
734 /* Make sure this is an MSI-X message. */
735 if (!msi->msi_msix) {
736 mtx_unlock(&msi_lock);
740 KASSERT(msi->msi_dev != NULL, ("unowned message"));
742 /* Clear out the message. */
744 mtx_unlock(&msi_lock);
745 iommu_unmap_msi_intr(msi->msi_dev, msi->msi_remap_cookie);
748 msi->msi_first = NULL;
750 apic_free_vector(msi->msi_cpu, msi->msi_vector, msi->msi_irq);
754 msi->msi_maxcount = 0;
756 mtx_unlock(&msi_lock);