2 * Copyright (c) 2013-2015 Sandvine Inc. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
34 #include <sys/kernel.h>
35 #include <sys/systm.h>
37 #include <sys/fcntl.h>
38 #include <sys/ioccom.h>
40 #include <sys/linker.h>
41 #include <sys/malloc.h>
42 #include <sys/module.h>
43 #include <sys/pciio.h>
44 #include <sys/queue.h>
46 #include <sys/sysctl.h>
48 #include <machine/bus.h>
50 #include <dev/pci/pcireg.h>
51 #include <dev/pci/pcivar.h>
52 #include <dev/pci/pci_private.h>
53 #include <dev/pci/pci_iov_private.h>
58 static MALLOC_DEFINE(M_SRIOV, "sr_iov", "PCI SR-IOV allocations");
60 static d_ioctl_t pci_iov_ioctl;
62 static struct cdevsw iov_cdevsw = {
63 .d_version = D_VERSION,
65 .d_ioctl = pci_iov_ioctl
68 #define IOV_READ(d, r, w) \
69 pci_read_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, w)
71 #define IOV_WRITE(d, r, v, w) \
72 pci_write_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, v, w)
75 pci_iov_attach_method(device_t bus, device_t dev)
78 struct pci_devinfo *dinfo;
79 struct pcicfg_iov *iov;
84 dinfo = device_get_ivars(dev);
85 pcib = device_get_parent(bus);
87 error = pci_find_extcap(dev, PCIZ_SRIOV, &iov_pos);
92 version = pci_read_config(dev, iov_pos, 4);
93 if (PCI_EXTCAP_VER(version) != 1) {
96 "Unsupported version of SR-IOV (%d) detected\n",
97 PCI_EXTCAP_VER(version));
102 iov = malloc(sizeof(*dinfo->cfg.iov), M_SRIOV, M_WAITOK | M_ZERO);
105 if (dinfo->cfg.iov != NULL) {
109 iov->iov_pos = iov_pos;
111 iov->iov_cdev = make_dev(&iov_cdevsw, device_get_unit(dev),
112 UID_ROOT, GID_WHEEL, 0600, "iov/%s", device_get_nameunit(dev));
114 if (iov->iov_cdev == NULL) {
119 dinfo->cfg.iov = iov;
120 iov->iov_cdev->si_drv1 = dinfo;
132 pci_iov_detach_method(device_t bus, device_t dev)
134 struct pci_devinfo *dinfo;
135 struct pcicfg_iov *iov;
138 dinfo = device_get_ivars(dev);
139 iov = dinfo->cfg.iov;
146 if (iov->iov_num_vfs != 0) {
151 dinfo->cfg.iov = NULL;
154 destroy_dev(iov->iov_cdev);
155 iov->iov_cdev = NULL;
165 pci_iov_alloc_bar(struct pci_devinfo *dinfo, int bar, pci_addr_t bar_shift)
167 struct resource *res;
168 struct pcicfg_iov *iov;
174 iov = dinfo->cfg.iov;
175 dev = dinfo->cfg.dev;
176 bus = device_get_parent(dev);
177 rid = iov->iov_pos + PCIR_SRIOV_BAR(bar);
178 bar_size = 1 << bar_shift;
180 res = pci_alloc_multi_resource(bus, dev, SYS_RES_MEMORY, &rid, 0ul,
181 ~0ul, 1, iov->iov_num_vfs, RF_ACTIVE);
186 iov->iov_bar[bar].res = res;
187 iov->iov_bar[bar].bar_size = bar_size;
188 iov->iov_bar[bar].bar_shift = bar_shift;
190 start = rman_get_start(res);
191 end = rman_get_end(res);
192 return (rman_manage_region(&iov->rman, start, end));
196 pci_iov_add_bars(struct pcicfg_iov *iov, struct pci_devinfo *dinfo)
198 struct pci_iov_bar *bar;
202 for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
203 bar = &iov->iov_bar[i];
204 if (bar->res != NULL) {
205 bar_start = rman_get_start(bar->res) +
206 dinfo->cfg.vf.index * bar->bar_size;
208 pci_add_bar(dinfo->cfg.dev, PCIR_BAR(i), bar_start,
215 * Set the ARI_EN bit in the lowest-numbered PCI function with the SR-IOV
216 * capability. This bit is only writeable on the lowest-numbered PF but
217 * affects all PFs on the device.
220 pci_iov_set_ari(device_t bus)
224 int i, error, devcount, lowest_func, lowest_pos, iov_pos, dev_func;
227 /* If ARI is disabled on the downstream port there is nothing to do. */
228 if (!PCIB_ARI_ENABLED(device_get_parent(bus)))
231 error = device_get_children(bus, &devlist, &devcount);
237 for (i = 0; i < devcount; i++) {
238 if (pci_find_extcap(devlist[i], PCIZ_SRIOV, &iov_pos) == 0) {
239 dev_func = pci_get_function(devlist[i]);
240 if (lowest == NULL || dev_func < lowest_func) {
242 lowest_func = dev_func;
243 lowest_pos = iov_pos;
249 * If we called this function some device must have the SR-IOV
252 KASSERT(lowest != NULL,
253 ("Could not find child of %s with SR-IOV capability",
254 device_get_nameunit(bus)));
256 iov_ctl = pci_read_config(lowest, iov_pos + PCIR_SRIOV_CTL, 2);
257 iov_ctl |= PCIM_SRIOV_ARI_EN;
258 pci_write_config(lowest, iov_pos + PCIR_SRIOV_CTL, iov_ctl, 2);
259 free(devlist, M_TEMP);
264 pci_iov_config_page_size(struct pci_devinfo *dinfo)
266 uint32_t page_cap, page_size;
268 page_cap = IOV_READ(dinfo, PCIR_SRIOV_PAGE_CAP, 4);
271 * If the system page size is less than the smallest SR-IOV page size
272 * then round up to the smallest SR-IOV page size.
274 if (PAGE_SHIFT < PCI_SRIOV_BASE_PAGE_SHIFT)
275 page_size = (1 << 0);
277 page_size = (1 << (PAGE_SHIFT - PCI_SRIOV_BASE_PAGE_SHIFT));
279 /* Check that the device supports the system page size. */
280 if (!(page_size & page_cap))
283 IOV_WRITE(dinfo, PCIR_SRIOV_PAGE_SIZE, page_size, 4);
288 pci_iov_init_rman(device_t pf, struct pcicfg_iov *iov)
292 iov->rman.rm_start = 0;
293 iov->rman.rm_end = ~0ul;
294 iov->rman.rm_type = RMAN_ARRAY;
295 snprintf(iov->rman_name, sizeof(iov->rman_name), "%s VF I/O memory",
296 device_get_nameunit(pf));
297 iov->rman.rm_descr = iov->rman_name;
299 error = rman_init(&iov->rman);
303 iov->iov_flags |= IOV_RMAN_INITED;
308 pci_iov_setup_bars(struct pci_devinfo *dinfo)
311 struct pcicfg_iov *iov;
312 pci_addr_t bar_value, testval;
313 int i, last_64, error;
315 iov = dinfo->cfg.iov;
316 dev = dinfo->cfg.dev;
319 for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
321 * If a PCI BAR is a 64-bit wide BAR, then it spans two
322 * consecutive registers. Therefore if the last BAR that
323 * we looked at was a 64-bit BAR, we need to skip this
324 * register as it's the second half of the last BAR.
328 iov->iov_pos + PCIR_SRIOV_BAR(i),
329 &bar_value, &testval, &last_64);
332 error = pci_iov_alloc_bar(dinfo, i,
333 pci_mapsize(testval));
345 pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const char *driver,
346 uint16_t first_rid, uint16_t rid_stride)
348 device_t bus, dev, vf;
349 struct pcicfg_iov *iov;
350 struct pci_devinfo *vfinfo;
353 uint16_t vid, did, next_rid;
355 iov = dinfo->cfg.iov;
356 dev = dinfo->cfg.dev;
357 bus = device_get_parent(dev);
358 size = dinfo->cfg.devinfo_size;
359 next_rid = first_rid;
360 vid = pci_get_vendor(dev);
361 did = IOV_READ(dinfo, PCIR_SRIOV_VF_DID, 2);
363 for (i = 0; i < iov->iov_num_vfs; i++, next_rid += rid_stride) {
366 vf = PCI_CREATE_IOV_CHILD(bus, dev, next_rid, vid, did);
370 vfinfo = device_get_ivars(vf);
372 vfinfo->cfg.iov = iov;
373 vfinfo->cfg.vf.index = i;
375 pci_iov_add_bars(iov, vfinfo);
377 error = PCI_ADD_VF(dev, i);
379 device_printf(dev, "Failed to add VF %d\n", i);
380 pci_delete_child(bus, vf);
384 bus_generic_attach(bus);
388 pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg)
392 struct pci_devinfo *dinfo;
393 struct pcicfg_iov *iov;
395 uint16_t rid_off, rid_stride;
396 uint16_t first_rid, last_rid;
402 dinfo = cdev->si_drv1;
403 iov = dinfo->cfg.iov;
404 dev = dinfo->cfg.dev;
405 bus = device_get_parent(dev);
408 if (iov->iov_num_vfs != 0) {
413 total_vfs = IOV_READ(dinfo, PCIR_SRIOV_TOTAL_VFS, 2);
415 if (arg->num_vfs > total_vfs) {
421 * If we are creating passthrough devices then force the ppt driver to
422 * attach to prevent a VF driver from claming the VFs.
424 if (arg->passthrough)
429 error = pci_iov_config_page_size(dinfo);
433 error = pci_iov_set_ari(bus);
437 error = PCI_INIT_IOV(dev, arg->num_vfs);
443 IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, arg->num_vfs, 2);
445 rid_off = IOV_READ(dinfo, PCIR_SRIOV_VF_OFF, 2);
446 rid_stride = IOV_READ(dinfo, PCIR_SRIOV_VF_STRIDE, 2);
448 first_rid = pci_get_rid(dev) + rid_off;
449 last_rid = first_rid + (arg->num_vfs - 1) * rid_stride;
451 /* We don't yet support allocating extra bus numbers for VFs. */
452 if (pci_get_bus(dev) != PCI_RID2BUS(last_rid)) {
457 iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
458 iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE);
459 IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
461 error = pci_iov_init_rman(dev, iov);
465 iov->iov_num_vfs = arg->num_vfs;
467 error = pci_iov_setup_bars(dinfo);
471 iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2);
472 iov_ctl |= PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE;
473 IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2);
475 /* Per specification, we must wait 100ms before accessing VFs. */
476 pause("iov", roundup(hz, 10));
477 pci_iov_enumerate_vfs(dinfo, driver, first_rid, rid_stride);
485 for (i = 0; i <= PCIR_MAX_BAR_0; i++) {
486 if (iov->iov_bar[i].res != NULL) {
487 pci_release_resource(bus, dev, SYS_RES_MEMORY,
488 iov->iov_pos + PCIR_SRIOV_BAR(i),
489 iov->iov_bar[i].res);
490 pci_delete_resource(bus, dev, SYS_RES_MEMORY,
491 iov->iov_pos + PCIR_SRIOV_BAR(i));
492 iov->iov_bar[i].res = NULL;
496 if (iov->iov_flags & IOV_RMAN_INITED) {
497 rman_fini(&iov->rman);
498 iov->iov_flags &= ~IOV_RMAN_INITED;
500 iov->iov_num_vfs = 0;
506 pci_iov_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag,
512 return (pci_iov_config(dev, (struct pci_iov_arg *)data));
519 pci_vf_alloc_mem_resource(device_t dev, device_t child, int *rid, u_long start,
520 u_long end, u_long count, u_int flags)
522 struct pci_devinfo *dinfo;
523 struct pcicfg_iov *iov;
525 struct resource *res;
526 struct resource_list_entry *rle;
527 u_long bar_start, bar_end;
528 pci_addr_t bar_length;
531 dinfo = device_get_ivars(child);
532 iov = dinfo->cfg.iov;
534 map = pci_find_bar(child, *rid);
538 bar_length = 1 << map->pm_size;
539 bar_start = map->pm_value;
540 bar_end = bar_start + bar_length - 1;
542 /* Make sure that the resource fits the constraints. */
543 if (bar_start >= end || bar_end <= bar_start || count != 1)
546 /* Clamp the resource to the constraints if necessary. */
547 if (bar_start < start)
551 bar_length = bar_end - bar_start + 1;
553 res = rman_reserve_resource(&iov->rman, bar_start, bar_end,
554 bar_length, flags, child);
558 rle = resource_list_add(&dinfo->resources, SYS_RES_MEMORY, *rid,
559 bar_start, bar_end, 1);
561 rman_release_resource(res);
565 rman_set_rid(res, *rid);
567 if (flags & RF_ACTIVE) {
568 error = bus_activate_resource(child, SYS_RES_MEMORY, *rid, res);
570 resource_list_delete(&dinfo->resources, SYS_RES_MEMORY,
572 rman_release_resource(res);
582 pci_vf_release_mem_resource(device_t dev, device_t child, int rid,
585 struct pci_devinfo *dinfo;
586 struct resource_list_entry *rle;
589 dinfo = device_get_ivars(child);
591 if (rman_get_flags(r) & RF_ACTIVE) {
592 error = bus_deactivate_resource(child, SYS_RES_MEMORY, rid, r);
597 rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY, rid);
600 resource_list_delete(&dinfo->resources, SYS_RES_MEMORY,
604 return (rman_release_resource(r));