2 * Copyright (c) 2011 NetApp, Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
38 #include <sys/pciio.h>
41 #include <sys/sysctl.h>
43 #include <dev/pci/pcivar.h>
44 #include <dev/pci/pcireg.h>
46 #include <machine/resource.h>
48 #include <machine/vmm.h>
49 #include <machine/vmm_dev.h>
51 #include "vmm_lapic.h"
59 #define MAX_MSIMSGS 32
62 * If the MSI-X table is located in the middle of a BAR then that MMIO
63 * region gets split into two segments - one segment above the MSI-X table
64 * and the other segment below the MSI-X table - with a hole in place of
65 * the MSI-X table so accesses to it can be trapped and emulated.
67 * So, allocate a MMIO segment for each BAR register + 1 additional segment.
69 #define MAX_MMIOSEGS ((PCIR_MAX_BAR_0 + 1) + 1)
71 MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
73 struct pptintr_arg { /* pptintr(pptintr_arg) */
74 struct pptdev *pptdev;
87 struct vm *vm; /* owner of this device */
88 TAILQ_ENTRY(pptdev) next;
89 struct pptseg mmio[MAX_MMIOSEGS];
91 int num_msgs; /* guest state */
93 int startrid; /* host state */
94 struct resource *res[MAX_MSIMSGS];
95 void *cookie[MAX_MSIMSGS];
96 struct pptintr_arg arg[MAX_MSIMSGS];
103 struct resource *msix_table_res;
104 struct resource **res;
106 struct pptintr_arg *arg;
110 SYSCTL_DECL(_hw_vmm);
111 SYSCTL_NODE(_hw_vmm, OID_AUTO, ppt, CTLFLAG_RW, 0, "bhyve passthru devices");
113 static int num_pptdevs;
114 SYSCTL_INT(_hw_vmm_ppt, OID_AUTO, devices, CTLFLAG_RD, &num_pptdevs, 0,
115 "number of pci passthru devices");
117 static TAILQ_HEAD(, pptdev) pptdev_list = TAILQ_HEAD_INITIALIZER(pptdev_list);
120 ppt_probe(device_t dev)
123 struct pci_devinfo *dinfo;
125 dinfo = (struct pci_devinfo *)device_get_ivars(dev);
127 bus = pci_get_bus(dev);
128 slot = pci_get_slot(dev);
129 func = pci_get_function(dev);
132 * To qualify as a pci passthrough device a device must:
133 * - be allowed by administrator to be used in this role
134 * - be an endpoint device
136 if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL)
138 else if (vmm_is_pptdev(bus, slot, func))
142 * Returning BUS_PROBE_NOWILDCARD here matches devices that the
143 * SR-IOV infrastructure specified as "ppt" passthrough devices.
144 * All normal devices that did not have "ppt" specified as their
145 * driver will not be matched by this.
147 return (BUS_PROBE_NOWILDCARD);
151 ppt_attach(device_t dev)
155 ppt = device_get_softc(dev);
158 TAILQ_INSERT_TAIL(&pptdev_list, ppt, next);
162 device_printf(dev, "attached\n");
168 ppt_detach(device_t dev)
172 ppt = device_get_softc(dev);
177 TAILQ_REMOVE(&pptdev_list, ppt, next);
182 static device_method_t ppt_methods[] = {
183 /* Device interface */
184 DEVMETHOD(device_probe, ppt_probe),
185 DEVMETHOD(device_attach, ppt_attach),
186 DEVMETHOD(device_detach, ppt_detach),
190 static devclass_t ppt_devclass;
191 DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, sizeof(struct pptdev));
192 DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
194 static struct pptdev *
195 ppt_find(int bus, int slot, int func)
201 TAILQ_FOREACH(ppt, &pptdev_list, next) {
203 b = pci_get_bus(dev);
204 s = pci_get_slot(dev);
205 f = pci_get_function(dev);
206 if (bus == b && slot == s && func == f)
213 ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
218 for (i = 0; i < MAX_MMIOSEGS; i++) {
222 (void)vm_unmap_mmio(vm, seg->gpa, seg->len);
223 bzero(seg, sizeof(struct pptseg));
228 ppt_teardown_msi(struct pptdev *ppt)
232 struct resource *res;
234 if (ppt->msi.num_msgs == 0)
237 for (i = 0; i < ppt->msi.num_msgs; i++) {
238 rid = ppt->msi.startrid + i;
239 res = ppt->msi.res[i];
240 cookie = ppt->msi.cookie[i];
243 bus_teardown_intr(ppt->dev, res, cookie);
246 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
248 ppt->msi.res[i] = NULL;
249 ppt->msi.cookie[i] = NULL;
252 if (ppt->msi.startrid == 1)
253 pci_release_msi(ppt->dev);
255 ppt->msi.num_msgs = 0;
259 ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
262 struct resource *res;
265 rid = ppt->msix.startrid + idx;
266 res = ppt->msix.res[idx];
267 cookie = ppt->msix.cookie[idx];
270 bus_teardown_intr(ppt->dev, res, cookie);
273 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
275 ppt->msix.res[idx] = NULL;
276 ppt->msix.cookie[idx] = NULL;
280 ppt_teardown_msix(struct pptdev *ppt)
284 if (ppt->msix.num_msgs == 0)
287 for (i = 0; i < ppt->msix.num_msgs; i++)
288 ppt_teardown_msix_intr(ppt, i);
290 if (ppt->msix.msix_table_res) {
291 bus_release_resource(ppt->dev, SYS_RES_MEMORY,
292 ppt->msix.msix_table_rid,
293 ppt->msix.msix_table_res);
294 ppt->msix.msix_table_res = NULL;
295 ppt->msix.msix_table_rid = 0;
298 free(ppt->msix.res, M_PPTMSIX);
299 free(ppt->msix.cookie, M_PPTMSIX);
300 free(ppt->msix.arg, M_PPTMSIX);
302 pci_release_msi(ppt->dev);
304 ppt->msix.num_msgs = 0;
308 ppt_avail_devices(void)
311 return (num_pptdevs);
315 ppt_assigned_devices(struct vm *vm)
321 TAILQ_FOREACH(ppt, &pptdev_list, next) {
329 ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
335 TAILQ_FOREACH(ppt, &pptdev_list, next) {
339 for (i = 0; i < MAX_MMIOSEGS; i++) {
343 if (gpa >= seg->gpa && gpa < seg->gpa + seg->len)
352 ppt_assign_device(struct vm *vm, int bus, int slot, int func)
356 ppt = ppt_find(bus, slot, func);
359 * If this device is owned by a different VM then we
360 * cannot change its owner.
362 if (ppt->vm != NULL && ppt->vm != vm)
366 iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
373 ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
377 ppt = ppt_find(bus, slot, func);
380 * If this device is not owned by this 'vm' then bail out.
384 ppt_unmap_mmio(vm, ppt);
385 ppt_teardown_msi(ppt);
386 ppt_teardown_msix(ppt);
387 iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev));
395 ppt_unassign_all(struct vm *vm)
401 TAILQ_FOREACH(ppt, &pptdev_list, next) {
404 bus = pci_get_bus(dev);
405 slot = pci_get_slot(dev);
406 func = pci_get_function(dev);
407 vm_unassign_pptdev(vm, bus, slot, func);
415 ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
416 vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
422 ppt = ppt_find(bus, slot, func);
427 for (i = 0; i < MAX_MMIOSEGS; i++) {
430 error = vm_map_mmio(vm, gpa, len, hpa);
447 struct pptintr_arg *pptarg;
450 ppt = pptarg->pptdev;
453 lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data);
457 * This is not expected to happen - panic?
462 * For legacy interrupts give other filters a chance in case
463 * the interrupt was not generated by the passthrough device.
465 if (ppt->msi.startrid == 0)
466 return (FILTER_STRAY);
468 return (FILTER_HANDLED);
472 ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
473 uint64_t addr, uint64_t msg, int numvec)
476 int msi_count, startrid, error, tmp;
479 if (numvec < 0 || numvec > MAX_MSIMSGS)
482 ppt = ppt_find(bus, slot, func);
485 if (ppt->vm != vm) /* Make sure we own this device */
488 /* Free any allocated resources */
489 ppt_teardown_msi(ppt);
491 if (numvec == 0) /* nothing more to do */
495 msi_count = pci_msi_count(ppt->dev);
496 if (msi_count == 0) {
497 startrid = 0; /* legacy interrupt */
499 flags |= RF_SHAREABLE;
501 startrid = 1; /* MSI */
504 * The device must be capable of supporting the number of vectors
505 * the guest wants to allocate.
507 if (numvec > msi_count)
511 * Make sure that we can allocate all the MSI vectors that are needed
516 error = pci_alloc_msi(ppt->dev, &tmp);
519 else if (tmp != numvec) {
520 pci_release_msi(ppt->dev);
527 ppt->msi.startrid = startrid;
530 * Allocate the irq resource and attach it to the interrupt handler.
532 for (i = 0; i < numvec; i++) {
533 ppt->msi.num_msgs = i + 1;
534 ppt->msi.cookie[i] = NULL;
537 ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
539 if (ppt->msi.res[i] == NULL)
542 ppt->msi.arg[i].pptdev = ppt;
543 ppt->msi.arg[i].addr = addr;
544 ppt->msi.arg[i].msg_data = msg + i;
546 error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
547 INTR_TYPE_NET | INTR_MPSAFE,
548 pptintr, NULL, &ppt->msi.arg[i],
549 &ppt->msi.cookie[i]);
555 ppt_teardown_msi(ppt);
563 ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
564 int idx, uint64_t addr, uint64_t msg, uint32_t vector_control)
567 struct pci_devinfo *dinfo;
568 int numvec, alloced, rid, error;
569 size_t res_size, cookie_size, arg_size;
571 ppt = ppt_find(bus, slot, func);
574 if (ppt->vm != vm) /* Make sure we own this device */
577 dinfo = device_get_ivars(ppt->dev);
582 * First-time configuration:
583 * Allocate the MSI-X table
584 * Allocate the IRQ resources
585 * Set up some variables in ppt->msix
587 if (ppt->msix.num_msgs == 0) {
588 numvec = pci_msix_count(ppt->dev);
592 ppt->msix.startrid = 1;
593 ppt->msix.num_msgs = numvec;
595 res_size = numvec * sizeof(ppt->msix.res[0]);
596 cookie_size = numvec * sizeof(ppt->msix.cookie[0]);
597 arg_size = numvec * sizeof(ppt->msix.arg[0]);
599 ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO);
600 ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX,
602 ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO);
604 rid = dinfo->cfg.msix.msix_table_bar;
605 ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev,
606 SYS_RES_MEMORY, &rid, RF_ACTIVE);
608 if (ppt->msix.msix_table_res == NULL) {
609 ppt_teardown_msix(ppt);
612 ppt->msix.msix_table_rid = rid;
615 error = pci_alloc_msix(ppt->dev, &alloced);
616 if (error || alloced != numvec) {
617 ppt_teardown_msix(ppt);
618 return (error == 0 ? ENOSPC: error);
622 if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
623 /* Tear down the IRQ if it's already set up */
624 ppt_teardown_msix_intr(ppt, idx);
626 /* Allocate the IRQ resource */
627 ppt->msix.cookie[idx] = NULL;
628 rid = ppt->msix.startrid + idx;
629 ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
631 if (ppt->msix.res[idx] == NULL)
634 ppt->msix.arg[idx].pptdev = ppt;
635 ppt->msix.arg[idx].addr = addr;
636 ppt->msix.arg[idx].msg_data = msg;
638 /* Setup the MSI-X interrupt */
639 error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
640 INTR_TYPE_NET | INTR_MPSAFE,
641 pptintr, NULL, &ppt->msix.arg[idx],
642 &ppt->msix.cookie[idx]);
645 bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]);
646 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]);
647 ppt->msix.cookie[idx] = NULL;
648 ppt->msix.res[idx] = NULL;
652 /* Masked, tear it down if it's already been set up */
653 ppt_teardown_msix_intr(ppt, idx);