2 * Copyright (c) 2015-2016 Nathan Whitehorn
3 * Copyright (c) 2017-2018 Semihalf
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/module.h>
35 #include <sys/kernel.h>
36 #include <sys/pciio.h>
37 #include <sys/endian.h>
41 #include <dev/ofw/openfirm.h>
42 #include <dev/ofw/ofw_pci.h>
43 #include <dev/ofw/ofw_bus.h>
44 #include <dev/ofw/ofw_bus_subr.h>
45 #include <dev/ofw/ofwpci.h>
47 #include <dev/pci/pcivar.h>
48 #include <dev/pci/pcireg.h>
50 #include <machine/bus.h>
51 #include <machine/intr_machdep.h>
52 #include <machine/md_var.h>
62 #define OPAL_PCI_TCE_MAX_ENTRIES (1024*1024UL)
63 #define OPAL_PCI_TCE_DEFAULT_SEG_SIZE (16*1024*1024UL)
64 #define OPAL_PCI_TCE_R (1UL << 0)
65 #define OPAL_PCI_TCE_W (1UL << 1)
66 #define PHB3_TCE_KILL_INVAL_ALL (1UL << 63)
71 static int opalpci_probe(device_t);
72 static int opalpci_attach(device_t);
77 static uint32_t opalpci_read_config(device_t, u_int, u_int, u_int,
79 static void opalpci_write_config(device_t, u_int, u_int, u_int,
80 u_int, u_int32_t, int);
81 static int opalpci_alloc_msi(device_t dev, device_t child,
82 int count, int maxcount, int *irqs);
83 static int opalpci_release_msi(device_t dev, device_t child,
84 int count, int *irqs);
85 static int opalpci_alloc_msix(device_t dev, device_t child,
87 static int opalpci_release_msix(device_t dev, device_t child,
89 static int opalpci_map_msi(device_t dev, device_t child,
90 int irq, uint64_t *addr, uint32_t *data);
91 static int opalpci_route_interrupt(device_t bus, device_t dev, int pin);
96 static void opalpic_pic_enable(device_t dev, u_int irq, u_int vector, void **);
97 static void opalpic_pic_eoi(device_t dev, u_int irq, void *);
100 static bus_dma_tag_t opalpci_get_dma_tag(device_t dev, device_t child);
105 #define OPAL_M32_WINDOW_TYPE 1
106 #define OPAL_M64_WINDOW_TYPE 2
107 #define OPAL_IO_WINDOW_TYPE 3
109 #define OPAL_RESET_PHB_COMPLETE 1
110 #define OPAL_RESET_PCI_IODA_TABLE 6
112 #define OPAL_DISABLE_M64 0
113 #define OPAL_ENABLE_M64_SPLIT 1
114 #define OPAL_ENABLE_M64_NON_SPLIT 2
116 #define OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO 1
117 #define OPAL_EEH_ACTION_CLEAR_FREEZE_DMA 2
118 #define OPAL_EEH_ACTION_CLEAR_FREEZE_ALL 3
123 #define OPAL_PCI_DEFAULT_PE 1
125 #define OPAL_PCI_BUS_SPACE_LOWADDR_32BIT 0x7FFFFFFFUL
130 static device_method_t opalpci_methods[] = {
131 /* Device interface */
132 DEVMETHOD(device_probe, opalpci_probe),
133 DEVMETHOD(device_attach, opalpci_attach),
136 DEVMETHOD(pcib_read_config, opalpci_read_config),
137 DEVMETHOD(pcib_write_config, opalpci_write_config),
139 DEVMETHOD(pcib_alloc_msi, opalpci_alloc_msi),
140 DEVMETHOD(pcib_release_msi, opalpci_release_msi),
141 DEVMETHOD(pcib_alloc_msix, opalpci_alloc_msix),
142 DEVMETHOD(pcib_release_msix, opalpci_release_msix),
143 DEVMETHOD(pcib_map_msi, opalpci_map_msi),
144 DEVMETHOD(pcib_route_interrupt, opalpci_route_interrupt),
146 /* PIC interface for MSIs */
147 DEVMETHOD(pic_enable, opalpic_pic_enable),
148 DEVMETHOD(pic_eoi, opalpic_pic_eoi),
151 DEVMETHOD(bus_get_dma_tag, opalpci_get_dma_tag),
152 DEVMETHOD(bus_get_cpus, ofw_pcibus_get_cpus),
153 DEVMETHOD(bus_get_domain, ofw_pcibus_get_domain),
158 struct opalpci_softc {
159 struct ofw_pci_softc ofw_sc;
162 int msi_base; /* Base XIVE number */
163 int base_msi_irq; /* Base IRQ assigned by FreeBSD to this PIC */
164 uint64_t *tce; /* TCE table for 1:1 mapping */
165 struct resource *r_reg;
168 static devclass_t opalpci_devclass;
169 DEFINE_CLASS_1(pcib, opalpci_driver, opalpci_methods,
170 sizeof(struct opalpci_softc), ofw_pci_driver);
171 EARLY_DRIVER_MODULE(opalpci, ofwbus, opalpci_driver, opalpci_devclass, 0, 0,
175 opalpci_probe(device_t dev)
179 if (opal_check() != 0)
182 type = ofw_bus_get_type(dev);
184 if (type == NULL || (strcmp(type, "pci") != 0 &&
185 strcmp(type, "pciex") != 0))
188 if (!OF_hasprop(ofw_bus_get_node(dev), "ibm,opal-phbid"))
191 device_set_desc(dev, "OPAL Host-PCI bridge");
192 return (BUS_PROBE_GENERIC);
196 pci_phb3_tce_invalidate_entire(struct opalpci_softc *sc)
200 bus_write_8(sc->r_reg, 0x210, PHB3_TCE_KILL_INVAL_ALL);
204 /* Simple function to round to a power of 2 */
206 round_pow2(uint64_t val)
209 return (1 << (flsl(val + (val - 1)) - 1));
213 * Starting with skiboot 5.10 PCIe nodes have a new property,
214 * "ibm,supported-tce-sizes", to denote the TCE sizes available. This allows us
215 * to avoid hard-coding the maximum TCE size allowed, and instead provide a sane
216 * default (however, the "sane" default, which works for all targets, is 64k,
217 * limiting us to 64GB if we have 1M entries.
220 max_tce_size(device_t dev)
223 cell_t sizes[64]; /* Property is a list of bit-widths, up to 64-bits */
226 node = ofw_bus_get_node(dev);
228 count = OF_getencprop(node, "ibm,supported-tce-sizes",
229 sizes, sizeof(sizes));
230 if (count < (int) sizeof(cell_t))
231 return OPAL_PCI_TCE_DEFAULT_SEG_SIZE;
233 count /= sizeof(cell_t);
235 return (1ULL << sizes[count - 1]);
239 opalpci_attach(device_t dev)
241 struct opalpci_softc *sc;
242 cell_t id[2], m64ranges[2], m64window[6], npe;
248 uint64_t tce_tbl_size;
252 sc = device_get_softc(dev);
253 node = ofw_bus_get_node(dev);
255 switch (OF_getproplen(node, "ibm,opal-phbid")) {
257 OF_getencprop(node, "ibm,opal-phbid", id, 8);
258 sc->phb_id = ((uint64_t)id[0] << 32) | id[1];
261 OF_getencprop(node, "ibm,opal-phbid", id, 4);
265 device_printf(dev, "PHB ID property had wrong length (%zd)\n",
266 OF_getproplen(node, "ibm,opal-phbid"));
271 device_printf(dev, "OPAL ID %#lx\n", sc->phb_id);
274 sc->r_reg = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
275 &rid, RF_ACTIVE | RF_SHAREABLE);
276 if (sc->r_reg == NULL) {
277 device_printf(dev, "Failed to allocate PHB[%jd] registers\n",
278 (uintmax_t)sc->phb_id);
284 * Reset PCI IODA table
286 err = opal_call(OPAL_PCI_RESET, sc->phb_id, OPAL_RESET_PCI_IODA_TABLE,
289 device_printf(dev, "IODA table reset failed: %d\n", err);
292 err = opal_call(OPAL_PCI_RESET, sc->phb_id, OPAL_RESET_PHB_COMPLETE,
295 device_printf(dev, "PHB reset failed: %d\n", err);
299 while ((err = opal_call(OPAL_PCI_POLL, sc->phb_id)) > 0) {
300 DELAY(1000*(err + 1)); /* Returns expected delay in ms */
304 device_printf(dev, "WARNING: PHB IODA reset poll failed: %d\n", err);
306 err = opal_call(OPAL_PCI_RESET, sc->phb_id, OPAL_RESET_PHB_COMPLETE,
309 device_printf(dev, "PHB reset failed: %d\n", err);
313 while ((err = opal_call(OPAL_PCI_POLL, sc->phb_id)) > 0) {
314 DELAY(1000*(err + 1)); /* Returns expected delay in ms */
320 * Map all devices on the bus to partitionable endpoint one until
321 * such time as we start wanting to do things like bhyve.
323 err = opal_call(OPAL_PCI_SET_PE, sc->phb_id, OPAL_PCI_DEFAULT_PE,
324 0, OPAL_PCI_BUS_ANY, OPAL_IGNORE_RID_DEVICE_NUMBER,
325 OPAL_IGNORE_RID_FUNC_NUMBER, OPAL_MAP_PE);
327 device_printf(dev, "PE mapping failed: %d\n", err);
332 * Turn on MMIO, mapped to PE 1
334 if (OF_getencprop(node, "ibm,opal-num-pes", &npe, 4) != 4)
336 for (i = 0; i < npe; i++) {
337 err = opal_call(OPAL_PCI_MAP_PE_MMIO_WINDOW, sc->phb_id,
338 OPAL_PCI_DEFAULT_PE, OPAL_M32_WINDOW_TYPE, 0, i);
340 device_printf(dev, "MMIO %d map failed: %d\n", i, err);
343 if (OF_getencprop(node, "ibm,opal-available-m64-ranges",
344 m64ranges, sizeof(m64ranges)) == sizeof(m64ranges))
345 m64bar = m64ranges[0];
349 /* XXX: multiple M64 windows? */
350 if (OF_getencprop(node, "ibm,opal-m64-window",
351 m64window, sizeof(m64window)) == sizeof(m64window)) {
352 opal_call(OPAL_PCI_PHB_MMIO_ENABLE, sc->phb_id,
353 OPAL_M64_WINDOW_TYPE, m64bar, 0);
354 opal_call(OPAL_PCI_SET_PHB_MEM_WINDOW, sc->phb_id,
355 OPAL_M64_WINDOW_TYPE, m64bar /* index */,
356 ((uint64_t)m64window[2] << 32) | m64window[3], 0,
357 ((uint64_t)m64window[4] << 32) | m64window[5]);
358 opal_call(OPAL_PCI_MAP_PE_MMIO_WINDOW, sc->phb_id,
359 OPAL_PCI_DEFAULT_PE, OPAL_M64_WINDOW_TYPE,
360 m64bar /* index */, 0);
361 opal_call(OPAL_PCI_PHB_MMIO_ENABLE, sc->phb_id,
362 OPAL_M64_WINDOW_TYPE, m64bar, OPAL_ENABLE_M64_NON_SPLIT);
366 * Enable IOMMU for PE1 - map everything 1:1 using
367 * segments of max_tce_size size
369 tce_size = max_tce_size(dev);
370 maxmem = roundup2(powerpc_ptob(Maxmem), tce_size);
371 entries = round_pow2(maxmem / tce_size);
372 tce_tbl_size = MAX(entries * sizeof(uint64_t), 4096);
373 if (entries > OPAL_PCI_TCE_MAX_ENTRIES)
374 panic("POWERNV supports only %jdGB of memory space\n",
375 (uintmax_t)((OPAL_PCI_TCE_MAX_ENTRIES * tce_size) >> 30));
377 device_printf(dev, "Mapping 0-%#jx for DMA\n", (uintmax_t)maxmem);
378 sc->tce = contigmalloc(tce_tbl_size,
379 M_DEVBUF, M_NOWAIT | M_ZERO, 0,
380 BUS_SPACE_MAXADDR, tce_tbl_size, 0);
382 panic("Failed to allocate TCE memory for PHB %jd\n",
383 (uintmax_t)sc->phb_id);
385 for (i = 0; i < entries; i++)
386 sc->tce[i] = (i * tce_size) | OPAL_PCI_TCE_R | OPAL_PCI_TCE_W;
388 /* Map TCE for every PE. It seems necessary for Power8 */
389 for (i = 0; i < npe; i++) {
390 err = opal_call(OPAL_PCI_MAP_PE_DMA_WINDOW, sc->phb_id,
392 1, pmap_kextract((uint64_t)&sc->tce[0]),
393 tce_tbl_size, tce_size);
395 device_printf(dev, "DMA IOMMU mapping failed: %d\n", err);
399 err = opal_call(OPAL_PCI_MAP_PE_DMA_WINDOW_REAL, sc->phb_id,
401 (1UL << 59), maxmem);
403 device_printf(dev, "DMA 64b bypass mapping failed: %d\n", err);
409 * Invalidate all previous TCE entries.
411 * TODO: add support for other PHBs than PHB3
413 pci_phb3_tce_invalidate_entire(sc);
419 if (OF_getproplen(node, "ibm,opal-msi-ranges") > 0) {
420 cell_t msi_ranges[2];
421 OF_getencprop(node, "ibm,opal-msi-ranges",
422 msi_ranges, sizeof(msi_ranges));
423 sc->msi_base = msi_ranges[0];
425 sc->msi_vmem = vmem_create("OPAL MSI", msi_ranges[0],
426 msi_ranges[1], 1, 16, M_BESTFIT | M_WAITOK);
428 sc->base_msi_irq = powerpc_register_pic(dev,
429 OF_xref_from_node(node),
430 msi_ranges[0] + msi_ranges[1], 0, FALSE);
433 device_printf(dev, "Supports %d MSIs starting at %d\n",
434 msi_ranges[1], msi_ranges[0]);
437 /* Create the parent DMA tag */
439 * Constrain it to POWER8 PHB (ioda2) for now. It seems to mess up on
442 if (ofw_bus_is_compatible(dev, "ibm,ioda2-phb")) {
443 err = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
444 1, 0, /* alignment, bounds */
445 OPAL_PCI_BUS_SPACE_LOWADDR_32BIT, /* lowaddr */
446 BUS_SPACE_MAXADDR_32BIT, /* highaddr */
447 NULL, NULL, /* filter, filterarg */
448 BUS_SPACE_MAXSIZE, /* maxsize */
449 BUS_SPACE_UNRESTRICTED, /* nsegments */
450 BUS_SPACE_MAXSIZE, /* maxsegsize */
452 NULL, NULL, /* lockfunc, lockarg */
453 &sc->ofw_sc.sc_dmat);
455 device_printf(dev, "Failed to create DMA tag\n");
461 * General OFW PCI attach
463 err = ofw_pci_init(dev);
468 * Unfreeze non-config-space PCI operations. Let this fail silently
469 * if e.g. there is no current freeze.
471 opal_call(OPAL_PCI_EEH_FREEZE_CLEAR, sc->phb_id, OPAL_PCI_DEFAULT_PE,
472 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
475 * OPAL stores 64-bit BARs in a special property rather than "ranges"
477 if (OF_getencprop(node, "ibm,opal-m64-window",
478 m64window, sizeof(m64window)) == sizeof(m64window)) {
479 struct ofw_pci_range *rp;
481 sc->ofw_sc.sc_nrange++;
482 sc->ofw_sc.sc_range = realloc(sc->ofw_sc.sc_range,
483 sc->ofw_sc.sc_nrange * sizeof(sc->ofw_sc.sc_range[0]),
485 rp = &sc->ofw_sc.sc_range[sc->ofw_sc.sc_nrange-1];
486 rp->pci_hi = OFW_PCI_PHYS_HI_SPACE_MEM64 |
487 OFW_PCI_PHYS_HI_PREFETCHABLE;
488 rp->pci = ((uint64_t)m64window[0] << 32) | m64window[1];
489 rp->host = ((uint64_t)m64window[2] << 32) | m64window[3];
490 rp->size = ((uint64_t)m64window[4] << 32) | m64window[5];
491 rman_manage_region(&sc->ofw_sc.sc_mem_rman, rp->pci,
492 rp->pci + rp->size - 1);
495 return (ofw_pci_attach(dev));
499 opalpci_read_config(device_t dev, u_int bus, u_int slot, u_int func, u_int reg,
502 struct opalpci_softc *sc;
503 uint64_t config_addr;
509 sc = device_get_softc(dev);
511 config_addr = (bus << 8) | ((slot & 0x1f) << 3) | (func & 0x7);
515 error = opal_call(OPAL_PCI_CONFIG_READ_BYTE, sc->phb_id,
516 config_addr, reg, vtophys(&byte));
520 error = opal_call(OPAL_PCI_CONFIG_READ_HALF_WORD, sc->phb_id,
521 config_addr, reg, vtophys(&half));
525 error = opal_call(OPAL_PCI_CONFIG_READ_WORD, sc->phb_id,
526 config_addr, reg, vtophys(&word));
529 error = OPAL_SUCCESS;
534 * Poking config state for non-existant devices can make
535 * the host bridge hang up. Clear any errors.
537 * XXX: Make this conditional on the existence of a freeze
539 opal_call(OPAL_PCI_EEH_FREEZE_CLEAR, sc->phb_id, OPAL_PCI_DEFAULT_PE,
540 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
542 if (error != OPAL_SUCCESS)
549 opalpci_write_config(device_t dev, u_int bus, u_int slot, u_int func,
550 u_int reg, uint32_t val, int width)
552 struct opalpci_softc *sc;
553 uint64_t config_addr;
554 int error = OPAL_SUCCESS;
556 sc = device_get_softc(dev);
558 config_addr = (bus << 8) | ((slot & 0x1f) << 3) | (func & 0x7);
562 error = opal_call(OPAL_PCI_CONFIG_WRITE_BYTE, sc->phb_id,
563 config_addr, reg, val);
566 error = opal_call(OPAL_PCI_CONFIG_WRITE_HALF_WORD, sc->phb_id,
567 config_addr, reg, val);
570 error = opal_call(OPAL_PCI_CONFIG_WRITE_WORD, sc->phb_id,
571 config_addr, reg, val);
575 if (error != OPAL_SUCCESS) {
577 * Poking config state for non-existant devices can make
578 * the host bridge hang up. Clear any errors.
580 opal_call(OPAL_PCI_EEH_FREEZE_CLEAR, sc->phb_id,
581 OPAL_PCI_DEFAULT_PE, OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
586 opalpci_route_interrupt(device_t bus, device_t dev, int pin)
593 opalpci_alloc_msi(device_t dev, device_t child, int count, int maxcount,
596 struct opalpci_softc *sc;
601 sc = device_get_softc(dev);
602 if (sc->msi_vmem == NULL)
605 err = vmem_xalloc(sc->msi_vmem, count, powerof2(count), 0, 0,
606 VMEM_ADDR_MIN, VMEM_ADDR_MAX, M_BESTFIT | M_WAITOK, &start);
611 xref = OF_xref_from_node(ofw_bus_get_node(dev));
612 for (i = 0; i < count; i++)
613 irqs[i] = MAP_IRQ(xref, start + i);
619 opalpci_release_msi(device_t dev, device_t child, int count, int *irqs)
621 struct opalpci_softc *sc;
623 sc = device_get_softc(dev);
624 if (sc->msi_vmem == NULL)
627 vmem_xfree(sc->msi_vmem, irqs[0] - sc->base_msi_irq, count);
632 opalpci_alloc_msix(device_t dev, device_t child, int *irq)
634 return (opalpci_alloc_msi(dev, child, 1, 1, irq));
638 opalpci_release_msix(device_t dev, device_t child, int irq)
640 return (opalpci_release_msi(dev, child, 1, &irq));
644 opalpci_map_msi(device_t dev, device_t child, int irq, uint64_t *addr,
647 struct opalpci_softc *sc;
648 struct pci_devinfo *dinfo;
651 sc = device_get_softc(dev);
652 if (sc->msi_vmem == NULL)
655 xive = irq - sc->base_msi_irq - sc->msi_base;
656 opal_call(OPAL_PCI_SET_XIVE_PE, sc->phb_id, OPAL_PCI_DEFAULT_PE, xive);
658 dinfo = device_get_ivars(child);
659 if (dinfo->cfg.msi.msi_alloc > 0 &&
660 (dinfo->cfg.msi.msi_ctrl & PCIM_MSICTRL_64BIT) == 0) {
662 err = opal_call(OPAL_GET_MSI_32, sc->phb_id,
663 OPAL_PCI_DEFAULT_PE, xive, 1, vtophys(&msi32),
665 *addr = be32toh(msi32);
667 err = opal_call(OPAL_GET_MSI_64, sc->phb_id,
668 OPAL_PCI_DEFAULT_PE, xive, 1, vtophys(addr), vtophys(data));
669 *addr = be64toh(*addr);
671 *data = be32toh(*data);
673 if (bootverbose && err != 0)
674 device_printf(child, "OPAL MSI mapping error: %d\n", err);
676 return ((err == 0) ? 0 : ENXIO);
680 opalpic_pic_enable(device_t dev, u_int irq, u_int vector, void **priv)
682 struct opalpci_softc *sc = device_get_softc(dev);
684 PIC_ENABLE(root_pic, irq, vector, priv);
685 opal_call(OPAL_PCI_MSI_EOI, sc->phb_id, irq, priv);
688 static void opalpic_pic_eoi(device_t dev, u_int irq, void *priv)
690 struct opalpci_softc *sc;
692 sc = device_get_softc(dev);
693 opal_call(OPAL_PCI_MSI_EOI, sc->phb_id, irq);
695 PIC_EOI(root_pic, irq, priv);
699 opalpci_get_dma_tag(device_t dev, device_t child)
701 struct opalpci_softc *sc;
703 sc = device_get_softc(dev);
704 return (sc->ofw_sc.sc_dmat);