2 * Copyright (c) 2011 NetApp, Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/kernel.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
38 #include <sys/pciio.h>
42 #include <dev/pci/pcivar.h>
43 #include <dev/pci/pcireg.h>
45 #include <machine/resource.h>
47 #include <machine/vmm.h>
48 #include <machine/vmm_dev.h>
50 #include "vmm_lapic.h"
58 #define MAX_PPTDEVS (sizeof(pptdevs) / sizeof(pptdevs[0]))
59 #define MAX_MSIMSGS 32
62 * If the MSI-X table is located in the middle of a BAR then that MMIO
63 * region gets split into two segments - one segment above the MSI-X table
64 * and the other segment below the MSI-X table - with a hole in place of
65 * the MSI-X table so accesses to it can be trapped and emulated.
67 * So, allocate a MMIO segment for each BAR register + 1 additional segment.
69 #define MAX_MMIOSEGS ((PCIR_MAX_BAR_0 + 1) + 1)
71 MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources");
73 struct pptintr_arg { /* pptintr(pptintr_arg) */
74 struct pptdev *pptdev;
79 static struct pptdev {
81 struct vm *vm; /* owner of this device */
82 struct vm_memory_segment mmio[MAX_MMIOSEGS];
84 int num_msgs; /* guest state */
86 int startrid; /* host state */
87 struct resource *res[MAX_MSIMSGS];
88 void *cookie[MAX_MSIMSGS];
89 struct pptintr_arg arg[MAX_MSIMSGS];
96 struct resource *msix_table_res;
97 struct resource **res;
99 struct pptintr_arg *arg;
103 static int num_pptdevs;
106 ppt_probe(device_t dev)
109 struct pci_devinfo *dinfo;
111 dinfo = (struct pci_devinfo *)device_get_ivars(dev);
113 bus = pci_get_bus(dev);
114 slot = pci_get_slot(dev);
115 func = pci_get_function(dev);
118 * To qualify as a pci passthrough device a device must:
119 * - be allowed by administrator to be used in this role
120 * - be an endpoint device
122 if (vmm_is_pptdev(bus, slot, func) &&
123 (dinfo->cfg.hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_NORMAL)
130 ppt_attach(device_t dev)
134 if (num_pptdevs >= MAX_PPTDEVS) {
135 printf("ppt_attach: maximum number of pci passthrough devices "
141 pptdevs[n].dev = dev;
144 device_printf(dev, "attached\n");
150 ppt_detach(device_t dev)
153 * XXX check whether there are any pci passthrough devices assigned
154 * to guests before we allow this driver to detach.
160 static device_method_t ppt_methods[] = {
161 /* Device interface */
162 DEVMETHOD(device_probe, ppt_probe),
163 DEVMETHOD(device_attach, ppt_attach),
164 DEVMETHOD(device_detach, ppt_detach),
168 static devclass_t ppt_devclass;
169 DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, 0);
170 DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL);
172 static struct pptdev *
173 ppt_find(int bus, int slot, int func)
178 for (i = 0; i < num_pptdevs; i++) {
179 dev = pptdevs[i].dev;
180 b = pci_get_bus(dev);
181 s = pci_get_slot(dev);
182 f = pci_get_function(dev);
183 if (bus == b && slot == s && func == f)
184 return (&pptdevs[i]);
190 ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
193 struct vm_memory_segment *seg;
195 for (i = 0; i < MAX_MMIOSEGS; i++) {
199 (void)vm_unmap_mmio(vm, seg->gpa, seg->len);
200 bzero(seg, sizeof(struct vm_memory_segment));
205 ppt_teardown_msi(struct pptdev *ppt)
209 struct resource *res;
211 if (ppt->msi.num_msgs == 0)
214 for (i = 0; i < ppt->msi.num_msgs; i++) {
215 rid = ppt->msi.startrid + i;
216 res = ppt->msi.res[i];
217 cookie = ppt->msi.cookie[i];
220 bus_teardown_intr(ppt->dev, res, cookie);
223 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
225 ppt->msi.res[i] = NULL;
226 ppt->msi.cookie[i] = NULL;
229 if (ppt->msi.startrid == 1)
230 pci_release_msi(ppt->dev);
232 ppt->msi.num_msgs = 0;
236 ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
239 struct resource *res;
242 rid = ppt->msix.startrid + idx;
243 res = ppt->msix.res[idx];
244 cookie = ppt->msix.cookie[idx];
247 bus_teardown_intr(ppt->dev, res, cookie);
250 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res);
252 ppt->msix.res[idx] = NULL;
253 ppt->msix.cookie[idx] = NULL;
257 ppt_teardown_msix(struct pptdev *ppt)
261 if (ppt->msix.num_msgs == 0)
264 for (i = 0; i < ppt->msix.num_msgs; i++)
265 ppt_teardown_msix_intr(ppt, i);
267 if (ppt->msix.msix_table_res) {
268 bus_release_resource(ppt->dev, SYS_RES_MEMORY,
269 ppt->msix.msix_table_rid,
270 ppt->msix.msix_table_res);
271 ppt->msix.msix_table_res = NULL;
272 ppt->msix.msix_table_rid = 0;
275 free(ppt->msix.res, M_PPTMSIX);
276 free(ppt->msix.cookie, M_PPTMSIX);
277 free(ppt->msix.arg, M_PPTMSIX);
279 pci_release_msi(ppt->dev);
281 ppt->msix.num_msgs = 0;
285 ppt_num_devices(struct vm *vm)
290 for (i = 0; i < num_pptdevs; i++) {
291 if (pptdevs[i].vm == vm)
298 ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
302 struct vm_memory_segment *seg;
304 for (n = 0; n < num_pptdevs; n++) {
309 for (i = 0; i < MAX_MMIOSEGS; i++) {
313 if (gpa >= seg->gpa && gpa < seg->gpa + seg->len)
322 ppt_assign_device(struct vm *vm, int bus, int slot, int func)
326 ppt = ppt_find(bus, slot, func);
329 * If this device is owned by a different VM then we
330 * cannot change its owner.
332 if (ppt->vm != NULL && ppt->vm != vm)
336 iommu_add_device(vm_iommu_domain(vm), bus, slot, func);
343 ppt_unassign_device(struct vm *vm, int bus, int slot, int func)
347 ppt = ppt_find(bus, slot, func);
350 * If this device is not owned by this 'vm' then bail out.
354 ppt_unmap_mmio(vm, ppt);
355 ppt_teardown_msi(ppt);
356 ppt_teardown_msix(ppt);
357 iommu_remove_device(vm_iommu_domain(vm), bus, slot, func);
365 ppt_unassign_all(struct vm *vm)
367 int i, bus, slot, func;
370 for (i = 0; i < num_pptdevs; i++) {
371 if (pptdevs[i].vm == vm) {
372 dev = pptdevs[i].dev;
373 bus = pci_get_bus(dev);
374 slot = pci_get_slot(dev);
375 func = pci_get_function(dev);
376 vm_unassign_pptdev(vm, bus, slot, func);
384 ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
385 vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
388 struct vm_memory_segment *seg;
391 ppt = ppt_find(bus, slot, func);
396 for (i = 0; i < MAX_MMIOSEGS; i++) {
399 error = vm_map_mmio(vm, gpa, len, hpa);
417 struct pptintr_arg *pptarg;
420 ppt = pptarg->pptdev;
424 (void) lapic_set_intr(ppt->vm, pptarg->vcpu, vec);
428 * This is not expected to happen - panic?
433 * For legacy interrupts give other filters a chance in case
434 * the interrupt was not generated by the passthrough device.
436 if (ppt->msi.startrid == 0)
437 return (FILTER_STRAY);
439 return (FILTER_HANDLED);
443 ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func,
444 int destcpu, int vector, int numvec)
447 int msi_count, startrid, error, tmp;
450 if ((destcpu >= VM_MAXCPU || destcpu < 0) ||
451 (vector < 0 || vector > 255) ||
452 (numvec < 0 || numvec > MAX_MSIMSGS))
455 ppt = ppt_find(bus, slot, func);
458 if (ppt->vm != vm) /* Make sure we own this device */
461 /* Free any allocated resources */
462 ppt_teardown_msi(ppt);
464 if (numvec == 0) /* nothing more to do */
468 msi_count = pci_msi_count(ppt->dev);
469 if (msi_count == 0) {
470 startrid = 0; /* legacy interrupt */
472 flags |= RF_SHAREABLE;
474 startrid = 1; /* MSI */
477 * The device must be capable of supporting the number of vectors
478 * the guest wants to allocate.
480 if (numvec > msi_count)
484 * Make sure that we can allocate all the MSI vectors that are needed
489 error = pci_alloc_msi(ppt->dev, &tmp);
492 else if (tmp != numvec) {
493 pci_release_msi(ppt->dev);
500 ppt->msi.startrid = startrid;
503 * Allocate the irq resource and attach it to the interrupt handler.
505 for (i = 0; i < numvec; i++) {
506 ppt->msi.num_msgs = i + 1;
507 ppt->msi.cookie[i] = NULL;
510 ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
512 if (ppt->msi.res[i] == NULL)
515 ppt->msi.arg[i].pptdev = ppt;
516 ppt->msi.arg[i].vec = vector + i;
517 ppt->msi.arg[i].vcpu = destcpu;
519 error = bus_setup_intr(ppt->dev, ppt->msi.res[i],
520 INTR_TYPE_NET | INTR_MPSAFE,
521 pptintr, NULL, &ppt->msi.arg[i],
522 &ppt->msi.cookie[i]);
528 ppt_teardown_msi(ppt);
536 ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func,
537 int idx, uint32_t msg, uint32_t vector_control, uint64_t addr)
540 struct pci_devinfo *dinfo;
541 int numvec, alloced, rid, error;
542 size_t res_size, cookie_size, arg_size;
544 ppt = ppt_find(bus, slot, func);
547 if (ppt->vm != vm) /* Make sure we own this device */
550 dinfo = device_get_ivars(ppt->dev);
555 * First-time configuration:
556 * Allocate the MSI-X table
557 * Allocate the IRQ resources
558 * Set up some variables in ppt->msix
560 if (ppt->msix.num_msgs == 0) {
561 numvec = pci_msix_count(ppt->dev);
565 ppt->msix.startrid = 1;
566 ppt->msix.num_msgs = numvec;
568 res_size = numvec * sizeof(ppt->msix.res[0]);
569 cookie_size = numvec * sizeof(ppt->msix.cookie[0]);
570 arg_size = numvec * sizeof(ppt->msix.arg[0]);
572 ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO);
573 ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX,
575 ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO);
577 rid = dinfo->cfg.msix.msix_table_bar;
578 ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev,
579 SYS_RES_MEMORY, &rid, RF_ACTIVE);
581 if (ppt->msix.msix_table_res == NULL) {
582 ppt_teardown_msix(ppt);
585 ppt->msix.msix_table_rid = rid;
588 error = pci_alloc_msix(ppt->dev, &alloced);
589 if (error || alloced != numvec) {
590 ppt_teardown_msix(ppt);
591 return (error == 0 ? ENOSPC: error);
595 if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
596 /* Tear down the IRQ if it's already set up */
597 ppt_teardown_msix_intr(ppt, idx);
599 /* Allocate the IRQ resource */
600 ppt->msix.cookie[idx] = NULL;
601 rid = ppt->msix.startrid + idx;
602 ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ,
604 if (ppt->msix.res[idx] == NULL)
607 ppt->msix.arg[idx].pptdev = ppt;
608 ppt->msix.arg[idx].vec = msg & 0xFF;
609 ppt->msix.arg[idx].vcpu = (addr >> 12) & 0xFF;
611 /* Setup the MSI-X interrupt */
612 error = bus_setup_intr(ppt->dev, ppt->msix.res[idx],
613 INTR_TYPE_NET | INTR_MPSAFE,
614 pptintr, NULL, &ppt->msix.arg[idx],
615 &ppt->msix.cookie[idx]);
618 bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]);
619 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]);
620 ppt->msix.cookie[idx] = NULL;
621 ppt->msix.res[idx] = NULL;
625 /* Masked, tear it down if it's already been set up */
626 ppt_teardown_msix_intr(ppt, idx);