2 * Copyright (c) 2011 NetApp, Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
38 #include <sys/pciio.h>
41 #include <sys/sysctl.h>
43 #include <dev/pci/pcivar.h>
44 #include <dev/pci/pcireg.h>
46 #include <machine/resource.h>
48 #include <machine/vmm.h>
49 #include <machine/vmm_dev.h>
51 #include "vmm_lapic.h"
59 #define MAX_MSIMSGS 32
62 * If the MSI-X table is located in the middle of a BAR then that MMIO
63 * region gets split into two segments - one segment above the MSI-X table
64 * and the other segment below the MSI-X table - with a hole in place of
65 * the MSI-X table so accesses to it can be trapped and emulated.
67 * So, allocate a MMIO segment for each BAR register + 1 additional segment.
69 #define MAX_MMIOSEGS ((PCIR_MAX_BAR_0 + 1) + 1)
71 MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
73 struct pptintr_arg { /* pptintr(pptintr_arg) */
74 struct pptdev *pptdev;
81 struct vm *vm; /* owner of this device */
82 TAILQ_ENTRY(pptdev) next;
83 struct vm_memory_segment mmio[MAX_MMIOSEGS];
85 int num_msgs; /* guest state */
87 int startrid; /* host state */
88 struct resource *res[MAX_MSIMSGS];
89 void *cookie[MAX_MSIMSGS];
90 struct pptintr_arg arg[MAX_MSIMSGS];
97 struct resource *msix_table_res;
98 struct resource **res;
100 struct pptintr_arg *arg;
104 SYSCTL_DECL(_hw_vmm);
105 SYSCTL_NODE(_hw_vmm, OID_AUTO, ppt, CTLFLAG_RW, 0, "bhyve passthru devices");
107 static int num_pptdevs;
108 SYSCTL_INT(_hw_vmm_ppt, OID_AUTO, devices, CTLFLAG_RD, &num_pptdevs, 0,
109 "number of pci passthru devices");
111 static TAILQ_HEAD(, pptdev) pptdev_list = TAILQ_HEAD_INITIALIZER(pptdev_list);
114 ppt_probe(device_t dev)
117 struct pci_devinfo *dinfo;
119 dinfo = (struct pci_devinfo *)device_get_ivars(dev);
121 bus = pci_get_bus(dev);
122 slot = pci_get_slot(dev);
123 func = pci_get_function(dev);
126 * To qualify as a pci passthrough device a device must:
127 * - be allowed by administrator to be used in this role
128 * - be an endpoint device
130 if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
132 else if (vmm_is_pptdev(bus, slot, func))
136 * Returning BUS_PROBE_NOWILDCARD here matches devices that the
137 * SR-IOV infrastructure specified as "ppt" passthrough devices.
138 * All normal devices that did not have "ppt" specified as their
139 * driver will not be matched by this.
141 return (BUS_PROBE_NOWILDCARD);
145 ppt_attach(device_t dev)
149 ppt = device_get_softc(dev);
152 TAILQ_INSERT_TAIL(&pptdev_list, ppt, next);
156 device_printf(dev, "attached\n");
162 ppt_detach(device_t dev)
166 ppt = device_get_softc(dev);
171 TAILQ_REMOVE(&pptdev_list, ppt, next);
176 static device_method_t ppt_methods[] = {
177 /* Device interface */
178 DEVMETHOD(device_probe, ppt_probe),
179 DEVMETHOD(device_attach, ppt_attach),
180 DEVMETHOD(device_detach, ppt_detach),
184 static devclass_t ppt_devclass;
185 DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, sizeof(struct pptdev));
186 DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
188 static struct pptdev *
189 ppt_find(int bus, int slot, int func)
195 TAILQ_FOREACH(ppt, &pptdev_list, next) {
197 b = pci_get_bus(dev);
198 s = pci_get_slot(dev);
199 f = pci_get_function(dev);
200 if (bus == b && slot == s && func == f)
207 ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
210 struct vm_memory_segment *seg;
212 for (i = 0; i < MAX_MMIOSEGS; i++) {
216 (void)vm_unmap_mmio(vm, seg->gpa, seg->len);
217 bzero(seg, sizeof(struct vm_memory_segment));
222 ppt_teardown_msi(struct pptdev *ppt)
226 struct resource *res;
228 if (ppt->msi.num_msgs == 0)
231 for (i = 0; i < ppt->msi.num_msgs; i++) {
232 rid = ppt->msi.startrid + i;
233 res = ppt->msi.res[i];
234 cookie = ppt->msi.cookie[i];
237 bus_teardown_intr(ppt->dev, res, cookie);
240 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
242 ppt->msi.res[i] = NULL;
243 ppt->msi.cookie[i] = NULL;
246 if (ppt->msi.startrid == 1)
247 pci_release_msi(ppt->dev);
249 ppt->msi.num_msgs = 0;
253 ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
256 struct resource *res;
259 rid = ppt->msix.startrid + idx;
260 res = ppt->msix.res[idx];
261 cookie = ppt->msix.cookie[idx];
264 bus_teardown_intr(ppt->dev, res, cookie);
267 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
269 ppt->msix.res[idx] = NULL;
270 ppt->msix.cookie[idx] = NULL;
274 ppt_teardown_msix(struct pptdev *ppt)
278 if (ppt->msix.num_msgs == 0)
281 for (i = 0; i < ppt->msix.num_msgs; i++)
282 ppt_teardown_msix_intr(ppt, i);
284 if (ppt->msix.msix_table_res) {
285 bus_release_resource(ppt->dev, SYS_RES_MEMORY,
286 ppt->msix.msix_table_rid,
287 ppt->msix.msix_table_res);
288 ppt->msix.msix_table_res = NULL;
289 ppt->msix.msix_table_rid = 0;
292 free(ppt->msix.res, M_PPTMSIX);
293 free(ppt->msix.cookie, M_PPTMSIX);
294 free(ppt->msix.arg, M_PPTMSIX);
296 pci_release_msi(ppt->dev);
298 ppt->msix.num_msgs = 0;
302 ppt_avail_devices(void)
305 return (num_pptdevs);
309 ppt_assigned_devices(struct vm *vm)
315 TAILQ_FOREACH(ppt, &pptdev_list, next) {
323 ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
327 struct vm_memory_segment *seg;
329 TAILQ_FOREACH(ppt, &pptdev_list, next) {
333 for (i = 0; i < MAX_MMIOSEGS; i++) {
337 if (gpa >= seg->gpa && gpa < seg->gpa + seg->len)
346 ppt_assign_device(struct vm *vm, int bus, int slot, int func)
350 ppt = ppt_find(bus, slot, func);
353 * If this device is owned by a different VM then we
354 * cannot change its owner.
356 if (ppt->vm != NULL && ppt->vm != vm)
360 iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
367 ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
371 ppt = ppt_find(bus, slot, func);
374 * If this device is not owned by this 'vm' then bail out.
378 ppt_unmap_mmio(vm, ppt);
379 ppt_teardown_msi(ppt);
380 ppt_teardown_msix(ppt);
381 iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
389 ppt_unassign_all(struct vm *vm)
395 TAILQ_FOREACH(ppt, &pptdev_list, next) {
398 bus = pci_get_bus(dev);
399 slot = pci_get_slot(dev);
400 func = pci_get_function(dev);
401 vm_unassign_pptdev(vm, bus, slot, func);
409 ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
410 vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
413 struct vm_memory_segment *seg;
416 ppt = ppt_find(bus, slot, func);
421 for (i = 0; i < MAX_MMIOSEGS; i++) {
424 error = vm_map_mmio(vm, gpa, len, hpa);
441 struct pptintr_arg *pptarg;
444 ppt = pptarg->pptdev;
447 lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data);
451 * This is not expected to happen - panic?
456 * For legacy interrupts give other filters a chance in case
457 * the interrupt was not generated by the passthrough device.
459 if (ppt->msi.startrid == 0)
460 return (FILTER_STRAY);
462 return (FILTER_HANDLED);
466 ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
467 uint64_t addr, uint64_t msg, int numvec)
470 int msi_count, startrid, error, tmp;
473 if (numvec < 0 || numvec > MAX_MSIMSGS)
476 ppt = ppt_find(bus, slot, func);
479 if (ppt->vm != vm) /* Make sure we own this device */
482 /* Free any allocated resources */
483 ppt_teardown_msi(ppt);
485 if (numvec == 0) /* nothing more to do */
489 msi_count = pci_msi_count(ppt->dev);
490 if (msi_count == 0) {
491 startrid = 0; /* legacy interrupt */
493 flags |= RF_SHAREABLE;
495 startrid = 1; /* MSI */
498 * The device must be capable of supporting the number of vectors
499 * the guest wants to allocate.
501 if (numvec > msi_count)
505 * Make sure that we can allocate all the MSI vectors that are needed
510 error = pci_alloc_msi(ppt->dev, &tmp);
513 else if (tmp != numvec) {
514 pci_release_msi(ppt->dev);
521 ppt->msi.startrid = startrid;
524 * Allocate the irq resource and attach it to the interrupt handler.
526 for (i = 0; i < numvec; i++) {
527 ppt->msi.num_msgs = i + 1;
528 ppt->msi.cookie[i] = NULL;
531 ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
533 if (ppt->msi.res[i] == NULL)
536 ppt->msi.arg[i].pptdev = ppt;
537 ppt->msi.arg[i].addr = addr;
538 ppt->msi.arg[i].msg_data = msg + i;
540 error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
541 INTR_TYPE_NET | INTR_MPSAFE,
542 pptintr, NULL, &ppt->msi.arg[i],
543 &ppt->msi.cookie[i]);
549 ppt_teardown_msi(ppt);
557 ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
558 int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
561 struct pci_devinfo *dinfo;
562 int numvec, alloced, rid, error;
563 size_t res_size, cookie_size, arg_size;
565 ppt = ppt_find(bus, slot, func);
568 if (ppt->vm != vm) /* Make sure we own this device */
571 dinfo = device_get_ivars(ppt->dev);
576 * First-time configuration:
577 * Allocate the MSI-X table
578 * Allocate the IRQ resources
579 * Set up some variables in ppt->msix
581 if (ppt->msix.num_msgs == 0) {
582 numvec = pci_msix_count(ppt->dev);
586 ppt->msix.startrid = 1;
587 ppt->msix.num_msgs = numvec;
589 res_size = numvec * sizeof(ppt->msix.res[0]);
590 cookie_size = numvec * sizeof(ppt->msix.cookie[0]);
591 arg_size = numvec * sizeof(ppt->msix.arg[0]);
593 ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO);
594 ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX,
596 ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO);
598 rid = dinfo->cfg.msix.msix_table_bar;
599 ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev,
600 SYS_RES_MEMORY, &rid, RF_ACTIVE);
602 if (ppt->msix.msix_table_res == NULL) {
603 ppt_teardown_msix(ppt);
606 ppt->msix.msix_table_rid = rid;
609 error = pci_alloc_msix(ppt->dev, &alloced);
610 if (error || alloced != numvec) {
611 ppt_teardown_msix(ppt);
612 return (error == 0 ? ENOSPC: error);
616 if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
617 /* Tear down the IRQ if it's already set up */
618 ppt_teardown_msix_intr(ppt, idx);
620 /* Allocate the IRQ resource */
621 ppt->msix.cookie[idx] = NULL;
622 rid = ppt->msix.startrid + idx;
623 ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
625 if (ppt->msix.res[idx] == NULL)
628 ppt->msix.arg[idx].pptdev = ppt;
629 ppt->msix.arg[idx].addr = addr;
630 ppt->msix.arg[idx].msg_data = msg;
632 /* Setup the MSI-X interrupt */
633 error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
634 INTR_TYPE_NET | INTR_MPSAFE,
635 pptintr, NULL, &ppt->msix.arg[idx],
636 &ppt->msix.cookie[idx]);
639 bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]);
640 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]);
641 ppt->msix.cookie[idx] = NULL;
642 ppt->msix.res[idx] = NULL;
646 /* Masked, tear it down if it's already been set up */
647 ppt_teardown_msix_intr(ppt, idx);