2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2011 NetApp, Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/kernel.h>
37 #include <sys/malloc.h>
38 #include <sys/module.h>
40 #include <sys/pciio.h>
43 #include <sys/sysctl.h>
45 #include <dev/pci/pcivar.h>
46 #include <dev/pci/pcireg.h>
48 #include <machine/resource.h>
50 #include <machine/vmm.h>
51 #include <machine/vmm_dev.h>
53 #include "vmm_lapic.h"
61 #define MAX_MSIMSGS 32
64 * If the MSI-X table is located in the middle of a BAR then that MMIO
65 * region gets split into two segments - one segment above the MSI-X table
66 * and the other segment below the MSI-X table - with a hole in place of
67 * the MSI-X table so accesses to it can be trapped and emulated.
69 * So, allocate a MMIO segment for each BAR register + 1 additional segment.
71 #define MAX_MMIOSEGS ((PCIR_MAX_BAR_0 + 1) + 1)
73 MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
75 struct pptintr_arg { /* pptintr(pptintr_arg) */
76 struct pptdev *pptdev;
89 struct vm *vm; /* owner of this device */
90 TAILQ_ENTRY(pptdev) next;
91 struct pptseg mmio[MAX_MMIOSEGS];
93 int num_msgs; /* guest state */
95 int startrid; /* host state */
96 struct resource *res[MAX_MSIMSGS];
97 void *cookie[MAX_MSIMSGS];
98 struct pptintr_arg arg[MAX_MSIMSGS];
106 struct resource *msix_table_res;
107 struct resource *msix_pba_res;
108 struct resource **res;
110 struct pptintr_arg *arg;
114 SYSCTL_DECL(_hw_vmm);
115 SYSCTL_NODE(_hw_vmm, OID_AUTO, ppt, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
116 "bhyve passthru devices");
118 static int num_pptdevs;
119 SYSCTL_INT(_hw_vmm_ppt, OID_AUTO, devices, CTLFLAG_RD, &num_pptdevs, 0,
120 "number of pci passthru devices");
122 static TAILQ_HEAD(, pptdev) pptdev_list = TAILQ_HEAD_INITIALIZER(pptdev_list);
125 ppt_probe(device_t dev)
128 struct pci_devinfo *dinfo;
130 dinfo = (struct pci_devinfo *)device_get_ivars(dev);
132 bus = pci_get_bus(dev);
133 slot = pci_get_slot(dev);
134 func = pci_get_function(dev);
137 * To qualify as a pci passthrough device a device must:
138 * - be allowed by administrator to be used in this role
139 * - be an endpoint device
141 if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
143 else if (vmm_is_pptdev(bus, slot, func))
147 * Returning BUS_PROBE_NOWILDCARD here matches devices that the
148 * SR-IOV infrastructure specified as "ppt" passthrough devices.
149 * All normal devices that did not have "ppt" specified as their
150 * driver will not be matched by this.
152 return (BUS_PROBE_NOWILDCARD);
156 ppt_attach(device_t dev)
160 ppt = device_get_softc(dev);
162 iommu_remove_device(iommu_host_domain(), pci_get_rid(dev));
164 TAILQ_INSERT_TAIL(&pptdev_list, ppt, next);
168 device_printf(dev, "attached\n");
174 ppt_detach(device_t dev)
178 ppt = device_get_softc(dev);
183 TAILQ_REMOVE(&pptdev_list, ppt, next);
184 pci_disable_busmaster(dev);
185 iommu_add_device(iommu_host_domain(), pci_get_rid(dev));
190 static device_method_t ppt_methods[] = {
191 /* Device interface */
192 DEVMETHOD(device_probe, ppt_probe),
193 DEVMETHOD(device_attach, ppt_attach),
194 DEVMETHOD(device_detach, ppt_detach),
198 static devclass_t ppt_devclass;
199 DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, sizeof(struct pptdev));
200 DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
202 static struct pptdev *
203 ppt_find(int bus, int slot, int func)
209 TAILQ_FOREACH(ppt, &pptdev_list, next) {
211 b = pci_get_bus(dev);
212 s = pci_get_slot(dev);
213 f = pci_get_function(dev);
214 if (bus == b && slot == s && func == f)
221 ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
226 for (i = 0; i < MAX_MMIOSEGS; i++) {
230 (void)vm_unmap_mmio(vm, seg->gpa, seg->len);
231 bzero(seg, sizeof(struct pptseg));
236 ppt_teardown_msi(struct pptdev *ppt)
240 struct resource *res;
242 if (ppt->msi.num_msgs == 0)
245 for (i = 0; i < ppt->msi.num_msgs; i++) {
246 rid = ppt->msi.startrid + i;
247 res = ppt->msi.res[i];
248 cookie = ppt->msi.cookie[i];
251 bus_teardown_intr(ppt->dev, res, cookie);
254 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
256 ppt->msi.res[i] = NULL;
257 ppt->msi.cookie[i] = NULL;
260 if (ppt->msi.startrid == 1)
261 pci_release_msi(ppt->dev);
263 ppt->msi.num_msgs = 0;
267 ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
270 struct resource *res;
273 rid = ppt->msix.startrid + idx;
274 res = ppt->msix.res[idx];
275 cookie = ppt->msix.cookie[idx];
278 bus_teardown_intr(ppt->dev, res, cookie);
281 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
283 ppt->msix.res[idx] = NULL;
284 ppt->msix.cookie[idx] = NULL;
288 ppt_teardown_msix(struct pptdev *ppt)
292 if (ppt->msix.num_msgs == 0)
295 for (i = 0; i < ppt->msix.num_msgs; i++)
296 ppt_teardown_msix_intr(ppt, i);
298 free(ppt->msix.res, M_PPTMSIX);
299 free(ppt->msix.cookie, M_PPTMSIX);
300 free(ppt->msix.arg, M_PPTMSIX);
302 pci_release_msi(ppt->dev);
304 if (ppt->msix.msix_table_res) {
305 bus_release_resource(ppt->dev, SYS_RES_MEMORY,
306 ppt->msix.msix_table_rid,
307 ppt->msix.msix_table_res);
308 ppt->msix.msix_table_res = NULL;
309 ppt->msix.msix_table_rid = 0;
311 if (ppt->msix.msix_pba_res) {
312 bus_release_resource(ppt->dev, SYS_RES_MEMORY,
313 ppt->msix.msix_pba_rid,
314 ppt->msix.msix_pba_res);
315 ppt->msix.msix_pba_res = NULL;
316 ppt->msix.msix_pba_rid = 0;
319 ppt->msix.num_msgs = 0;
323 ppt_avail_devices(void)
326 return (num_pptdevs);
330 ppt_assigned_devices(struct vm *vm)
336 TAILQ_FOREACH(ppt, &pptdev_list, next) {
344 ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
350 TAILQ_FOREACH(ppt, &pptdev_list, next) {
354 for (i = 0; i < MAX_MMIOSEGS; i++) {
358 if (gpa >= seg->gpa && gpa < seg->gpa + seg->len)
367 ppt_pci_reset(device_t dev)
371 max(pcie_get_max_completion_timeout(dev) / 1000, 10), true))
374 pci_power_reset(dev);
378 ppt_assign_device(struct vm *vm, int bus, int slot, int func)
382 ppt = ppt_find(bus, slot, func);
385 * If this device is owned by a different VM then we
386 * cannot change its owner.
388 if (ppt->vm != NULL && ppt->vm != vm)
391 pci_save_state(ppt->dev);
392 ppt_pci_reset(ppt->dev);
393 pci_restore_state(ppt->dev);
395 iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
402 ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
406 ppt = ppt_find(bus, slot, func);
409 * If this device is not owned by this 'vm' then bail out.
414 pci_save_state(ppt->dev);
415 ppt_pci_reset(ppt->dev);
416 pci_restore_state(ppt->dev);
417 ppt_unmap_mmio(vm, ppt);
418 ppt_teardown_msi(ppt);
419 ppt_teardown_msix(ppt);
420 iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
428 ppt_unassign_all(struct vm *vm)
434 TAILQ_FOREACH(ppt, &pptdev_list, next) {
437 bus = pci_get_bus(dev);
438 slot = pci_get_slot(dev);
439 func = pci_get_function(dev);
440 vm_unassign_pptdev(vm, bus, slot, func);
448 ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
449 vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
455 ppt = ppt_find(bus, slot, func);
460 for (i = 0; i < MAX_MMIOSEGS; i++) {
463 error = vm_map_mmio(vm, gpa, len, hpa);
480 struct pptintr_arg *pptarg;
483 ppt = pptarg->pptdev;
486 lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data);
490 * This is not expected to happen - panic?
495 * For legacy interrupts give other filters a chance in case
496 * the interrupt was not generated by the passthrough device.
498 if (ppt->msi.startrid == 0)
499 return (FILTER_STRAY);
501 return (FILTER_HANDLED);
505 ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
506 uint64_t addr, uint64_t msg, int numvec)
509 int msi_count, startrid, error, tmp;
512 if (numvec < 0 || numvec > MAX_MSIMSGS)
515 ppt = ppt_find(bus, slot, func);
518 if (ppt->vm != vm) /* Make sure we own this device */
521 /* Free any allocated resources */
522 ppt_teardown_msi(ppt);
524 if (numvec == 0) /* nothing more to do */
528 msi_count = pci_msi_count(ppt->dev);
529 if (msi_count == 0) {
530 startrid = 0; /* legacy interrupt */
532 flags |= RF_SHAREABLE;
534 startrid = 1; /* MSI */
537 * The device must be capable of supporting the number of vectors
538 * the guest wants to allocate.
540 if (numvec > msi_count)
544 * Make sure that we can allocate all the MSI vectors that are needed
549 error = pci_alloc_msi(ppt->dev, &tmp);
552 else if (tmp != numvec) {
553 pci_release_msi(ppt->dev);
560 ppt->msi.startrid = startrid;
563 * Allocate the irq resource and attach it to the interrupt handler.
565 for (i = 0; i < numvec; i++) {
566 ppt->msi.num_msgs = i + 1;
567 ppt->msi.cookie[i] = NULL;
570 ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
572 if (ppt->msi.res[i] == NULL)
575 ppt->msi.arg[i].pptdev = ppt;
576 ppt->msi.arg[i].addr = addr;
577 ppt->msi.arg[i].msg_data = msg + i;
579 error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
580 INTR_TYPE_NET | INTR_MPSAFE,
581 pptintr, NULL, &ppt->msi.arg[i],
582 &ppt->msi.cookie[i]);
588 ppt_teardown_msi(ppt);
596 ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
597 int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
600 struct pci_devinfo *dinfo;
601 int numvec, alloced, rid, error;
602 size_t res_size, cookie_size, arg_size;
604 ppt = ppt_find(bus, slot, func);
607 if (ppt->vm != vm) /* Make sure we own this device */
610 dinfo = device_get_ivars(ppt->dev);
615 * First-time configuration:
616 * Allocate the MSI-X table
617 * Allocate the IRQ resources
618 * Set up some variables in ppt->msix
620 if (ppt->msix.num_msgs == 0) {
621 numvec = pci_msix_count(ppt->dev);
625 ppt->msix.startrid = 1;
626 ppt->msix.num_msgs = numvec;
628 res_size = numvec * sizeof(ppt->msix.res[0]);
629 cookie_size = numvec * sizeof(ppt->msix.cookie[0]);
630 arg_size = numvec * sizeof(ppt->msix.arg[0]);
632 ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO);
633 ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX,
635 ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO);
637 rid = dinfo->cfg.msix.msix_table_bar;
638 ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev,
639 SYS_RES_MEMORY, &rid, RF_ACTIVE);
641 if (ppt->msix.msix_table_res == NULL) {
642 ppt_teardown_msix(ppt);
645 ppt->msix.msix_table_rid = rid;
647 if (dinfo->cfg.msix.msix_table_bar !=
648 dinfo->cfg.msix.msix_pba_bar) {
649 rid = dinfo->cfg.msix.msix_pba_bar;
650 ppt->msix.msix_pba_res = bus_alloc_resource_any(
651 ppt->dev, SYS_RES_MEMORY, &rid, RF_ACTIVE);
653 if (ppt->msix.msix_pba_res == NULL) {
654 ppt_teardown_msix(ppt);
657 ppt->msix.msix_pba_rid = rid;
661 error = pci_alloc_msix(ppt->dev, &alloced);
662 if (error || alloced != numvec) {
663 ppt_teardown_msix(ppt);
664 return (error == 0 ? ENOSPC: error);
668 if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
669 /* Tear down the IRQ if it's already set up */
670 ppt_teardown_msix_intr(ppt, idx);
672 /* Allocate the IRQ resource */
673 ppt->msix.cookie[idx] = NULL;
674 rid = ppt->msix.startrid + idx;
675 ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
677 if (ppt->msix.res[idx] == NULL)
680 ppt->msix.arg[idx].pptdev = ppt;
681 ppt->msix.arg[idx].addr = addr;
682 ppt->msix.arg[idx].msg_data = msg;
684 /* Setup the MSI-X interrupt */
685 error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
686 INTR_TYPE_NET | INTR_MPSAFE,
687 pptintr, NULL, &ppt->msix.arg[idx],
688 &ppt->msix.cookie[idx]);
691 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]);
692 ppt->msix.cookie[idx] = NULL;
693 ppt->msix.res[idx] = NULL;
697 /* Masked, tear it down if it's already been set up */
698 ppt_teardown_msix_intr(ppt, idx);