From e9309eac19c3c8d0ba90e8e926b6107761093861 Mon Sep 17 00:00:00 2001 From: Ryan Stone Date: Sun, 1 Mar 2015 00:40:26 +0000 Subject: [PATCH] Allocate PCI I/O memory spaces for VFs When creating VFs, we must size each SR-IOV BAR on the PF and allocate a configuous I/O memory window large enough for every VF. However, the window only needs to be aligned to a boundary equal to the size of the window for a single VF. When a VF attempts to allocate an I/O memory resource, we must intercept the request in the pci driver and pass it off to the SR-IOV code, which will allocate the correct window from the pre-allocated memory space for the PF. Inform the pci driver about the size and address of the BARs on the VF when the VF is created. This is required by pciconf -b and bhyve. Differential Revision: https://reviews.freebsd.org/D78 Reviewed by: jhb MFC after: 1 month Sponsored by: Sandvine Inc. --- sys/dev/pci/pci.c | 35 ++++++ sys/dev/pci/pci_iov.c | 227 +++++++++++++++++++++++++++++++++- sys/dev/pci/pci_iov_private.h | 14 +++ sys/dev/pci/pci_private.h | 5 + sys/dev/pci/pcivar.h | 2 +- 5 files changed, 279 insertions(+), 4 deletions(-) diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c index e103f04e50a..b106c67ccfe 100644 --- a/sys/dev/pci/pci.c +++ b/sys/dev/pci/pci.c @@ -4695,11 +4695,30 @@ struct resource * pci_alloc_resource(device_t dev, device_t child, int type, int *rid, u_long start, u_long end, u_long count, u_int flags) { +#ifdef PCI_IOV + struct pci_devinfo *dinfo; +#endif if (device_get_parent(child) != dev) return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid, start, end, count, flags)); +#ifdef PCI_IOV + dinfo = device_get_ivars(child); + if (dinfo->cfg.flags & PCICFG_VF) { + switch (type) { + /* VFs can't have I/O BARs. */ + case SYS_RES_IOPORT: + return (NULL); + case SYS_RES_MEMORY: + return (pci_vf_alloc_mem_resource(dev, child, rid, + start, end, count, flags)); + } + + /* Fall through for other types of resource allocations. */ + } +#endif + return (pci_alloc_multi_resource(dev, child, type, rid, start, end, count, 1, flags)); } @@ -4718,6 +4737,22 @@ pci_release_resource(device_t dev, device_t child, int type, int rid, dinfo = device_get_ivars(child); cfg = &dinfo->cfg; + +#ifdef PCI_IOV + if (dinfo->cfg.flags & PCICFG_VF) { + switch (type) { + /* VFs can't have I/O BARs. */ + case SYS_RES_IOPORT: + return (EDOOFUS); + case SYS_RES_MEMORY: + return (pci_vf_release_mem_resource(dev, child, rid, + r)); + } + + /* Fall through for other types of resource allocations. */ + } +#endif + #ifdef NEW_PCIB /* * PCI-PCI bridge I/O window resources are not BARs. For diff --git a/sys/dev/pci/pci_iov.c b/sys/dev/pci/pci_iov.c index f710c4b9f8d..598c4e93312 100644 --- a/sys/dev/pci/pci_iov.c +++ b/sys/dev/pci/pci_iov.c @@ -106,7 +106,6 @@ pci_iov_attach_method(device_t bus, device_t dev) error = EBUSY; goto cleanup; } - iov->iov_pos = iov_pos; iov->iov_cdev = make_dev(&iov_cdevsw, device_get_unit(dev), @@ -162,6 +161,56 @@ pci_iov_detach_method(device_t bus, device_t dev) return (0); } +static int +pci_iov_alloc_bar(struct pci_devinfo *dinfo, int bar, pci_addr_t bar_shift) +{ + struct resource *res; + struct pcicfg_iov *iov; + device_t dev, bus; + u_long start, end; + pci_addr_t bar_size; + int rid; + + iov = dinfo->cfg.iov; + dev = dinfo->cfg.dev; + bus = device_get_parent(dev); + rid = iov->iov_pos + PCIR_SRIOV_BAR(bar); + bar_size = 1 << bar_shift; + + res = pci_alloc_multi_resource(bus, dev, SYS_RES_MEMORY, &rid, 0ul, + ~0ul, 1, iov->iov_num_vfs, RF_ACTIVE); + + if (res == NULL) + return (ENXIO); + + iov->iov_bar[bar].res = res; + iov->iov_bar[bar].bar_size = bar_size; + iov->iov_bar[bar].bar_shift = bar_shift; + + start = rman_get_start(res); + end = rman_get_end(res); + return (rman_manage_region(&iov->rman, start, end)); +} + +static void +pci_iov_add_bars(struct pcicfg_iov *iov, struct pci_devinfo *dinfo) +{ + struct pci_iov_bar *bar; + uint64_t bar_start; + int i; + + for (i = 0; i <= PCIR_MAX_BAR_0; i++) { + bar = &iov->iov_bar[i]; + if (bar->res != NULL) { + bar_start = rman_get_start(bar->res) + + dinfo->cfg.vf.index * bar->bar_size; + + pci_add_bar(dinfo->cfg.dev, PCIR_BAR(i), bar_start, + bar->bar_shift); + } + } +} + /* * Set the ARI_EN bit in the lowest-numbered PCI function with the SR-IOV * capability. This bit is only writeable on the lowest-numbered PF but @@ -235,6 +284,63 @@ pci_iov_config_page_size(struct pci_devinfo *dinfo) return (0); } +static int +pci_iov_init_rman(device_t pf, struct pcicfg_iov *iov) +{ + int error; + + iov->rman.rm_start = 0; + iov->rman.rm_end = ~0ul; + iov->rman.rm_type = RMAN_ARRAY; + snprintf(iov->rman_name, sizeof(iov->rman_name), "%s VF I/O memory", + device_get_nameunit(pf)); + iov->rman.rm_descr = iov->rman_name; + + error = rman_init(&iov->rman); + if (error != 0) + return (error); + + iov->iov_flags |= IOV_RMAN_INITED; + return (0); +} + +static int +pci_iov_setup_bars(struct pci_devinfo *dinfo) +{ + device_t dev; + struct pcicfg_iov *iov; + pci_addr_t bar_value, testval; + int i, last_64, error; + + iov = dinfo->cfg.iov; + dev = dinfo->cfg.dev; + last_64 = 0; + + for (i = 0; i <= PCIR_MAX_BAR_0; i++) { + /* + * If a PCI BAR is a 64-bit wide BAR, then it spans two + * consecutive registers. Therefore if the last BAR that + * we looked at was a 64-bit BAR, we need to skip this + * register as it's the second half of the last BAR. + */ + if (!last_64) { + pci_read_bar(dev, + iov->iov_pos + PCIR_SRIOV_BAR(i), + &bar_value, &testval, &last_64); + + if (testval != 0) { + error = pci_iov_alloc_bar(dinfo, i, + pci_mapsize(testval)); + if (error != 0) + return (error); + } + } else + last_64 = 0; + } + + return (0); +} + static void pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const char *driver, uint16_t first_rid, uint16_t rid_stride) @@ -266,6 +372,8 @@ pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const char *driver, vfinfo->cfg.iov = iov; vfinfo->cfg.vf.index = i; + pci_iov_add_bars(iov, vfinfo); + error = PCI_ADD_VF(dev, i); if (error != 0) { device_printf(dev, "Failed to add VF %d\n", i); @@ -283,7 +391,7 @@ pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg) const char *driver; struct pci_devinfo *dinfo; struct pcicfg_iov *iov; - int error; + int i, error; uint16_t rid_off, rid_stride; uint16_t first_rid, last_rid; uint16_t iov_ctl; @@ -350,10 +458,18 @@ pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg) iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE); IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2); + error = pci_iov_init_rman(dev, iov); + if (error != 0) + goto out; + iov->iov_num_vfs = arg->num_vfs; + error = pci_iov_setup_bars(dinfo); + if (error != 0) + goto out; + iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2); - iov_ctl |= PCIM_SRIOV_VF_EN; + iov_ctl |= PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE; IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2); /* Per specification, we must wait 100ms before accessing VFs. */ @@ -365,6 +481,22 @@ pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg) out: if (iov_inited) PCI_UNINIT_IOV(dev); + + for (i = 0; i <= PCIR_MAX_BAR_0; i++) { + if (iov->iov_bar[i].res != NULL) { + pci_release_resource(bus, dev, SYS_RES_MEMORY, + iov->iov_pos + PCIR_SRIOV_BAR(i), + iov->iov_bar[i].res); + pci_delete_resource(bus, dev, SYS_RES_MEMORY, + iov->iov_pos + PCIR_SRIOV_BAR(i)); + iov->iov_bar[i].res = NULL; + } + } + + if (iov->iov_flags & IOV_RMAN_INITED) { + rman_fini(&iov->rman); + iov->iov_flags &= ~IOV_RMAN_INITED; + } iov->iov_num_vfs = 0; mtx_unlock(&Giant); return (error); @@ -383,3 +515,92 @@ pci_iov_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, } } +struct resource * +pci_vf_alloc_mem_resource(device_t dev, device_t child, int *rid, u_long start, + u_long end, u_long count, u_int flags) +{ + struct pci_devinfo *dinfo; + struct pcicfg_iov *iov; + struct pci_map *map; + struct resource *res; + struct resource_list_entry *rle; + u_long bar_start, bar_end; + pci_addr_t bar_length; + int error; + + dinfo = device_get_ivars(child); + iov = dinfo->cfg.iov; + + map = pci_find_bar(child, *rid); + if (map == NULL) + return (NULL); + + bar_length = 1 << map->pm_size; + bar_start = map->pm_value; + bar_end = bar_start + bar_length - 1; + + /* Make sure that the resource fits the constraints. */ + if (bar_start >= end || bar_end <= bar_start || count != 1) + return (NULL); + + /* Clamp the resource to the constraints if necessary. */ + if (bar_start < start) + bar_start = start; + if (bar_end > end) + bar_end = end; + bar_length = bar_end - bar_start + 1; + + res = rman_reserve_resource(&iov->rman, bar_start, bar_end, + bar_length, flags, child); + if (res == NULL) + return (NULL); + + rle = resource_list_add(&dinfo->resources, SYS_RES_MEMORY, *rid, + bar_start, bar_end, 1); + if (rle == NULL) { + rman_release_resource(res); + return (NULL); + } + + rman_set_rid(res, *rid); + + if (flags & RF_ACTIVE) { + error = bus_activate_resource(child, SYS_RES_MEMORY, *rid, res); + if (error != 0) { + resource_list_delete(&dinfo->resources, SYS_RES_MEMORY, + *rid); + rman_release_resource(res); + return (NULL); + } + } + rle->res = res; + + return (res); +} + +int +pci_vf_release_mem_resource(device_t dev, device_t child, int rid, + struct resource *r) +{ + struct pci_devinfo *dinfo; + struct resource_list_entry *rle; + int error; + + dinfo = device_get_ivars(child); + + if (rman_get_flags(r) & RF_ACTIVE) { + error = bus_deactivate_resource(child, SYS_RES_MEMORY, rid, r); + if (error != 0) + return (error); + } + + rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY, rid); + if (rle != NULL) { + rle->res = NULL; + resource_list_delete(&dinfo->resources, SYS_RES_MEMORY, + rid); + } + + return (rman_release_resource(r)); +} + diff --git a/sys/dev/pci/pci_iov_private.h b/sys/dev/pci/pci_iov_private.h index 74a4279b6a6..d8a42c03e8b 100644 --- a/sys/dev/pci/pci_iov_private.h +++ b/sys/dev/pci/pci_iov_private.h @@ -29,12 +29,26 @@ #ifndef _PCI_IOV_PRIVATE_H_ #define _PCI_IOV_PRIVATE_H_ +struct pci_iov_bar { + struct resource *res; + + pci_addr_t bar_size; + pci_addr_t bar_shift; +}; + struct pcicfg_iov { struct cdev *iov_cdev; + + struct pci_iov_bar iov_bar[PCIR_MAX_BAR_0 + 1]; + struct rman rman; + char rman_name[64]; int iov_pos; int iov_num_vfs; + uint32_t iov_flags; }; +#define IOV_RMAN_INITED 0x0001 + #endif diff --git a/sys/dev/pci/pci_private.h b/sys/dev/pci/pci_private.h index bf89ef36778..27e1dae6f87 100644 --- a/sys/dev/pci/pci_private.h +++ b/sys/dev/pci/pci_private.h @@ -158,4 +158,9 @@ int pci_iov_detach_method(device_t bus, device_t dev); device_t pci_create_iov_child_method(device_t bus, device_t pf, uint16_t rid, uint16_t vid, uint16_t did); +struct resource *pci_vf_alloc_mem_resource(device_t dev, device_t child, + int *rid, u_long start, u_long end, u_long count, + u_int flags); +int pci_vf_release_mem_resource(device_t dev, device_t child, + int rid, struct resource *r); #endif /* _PCI_PRIVATE_H_ */ diff --git a/sys/dev/pci/pcivar.h b/sys/dev/pci/pcivar.h index c433940ffb2..69604a78fa3 100644 --- a/sys/dev/pci/pcivar.h +++ b/sys/dev/pci/pcivar.h @@ -50,7 +50,7 @@ struct pcicfg_pp { struct pci_map { pci_addr_t pm_value; /* Raw BAR value */ pci_addr_t pm_size; - uint8_t pm_reg; + uint16_t pm_reg; STAILQ_ENTRY(pci_map) pm_link; }; -- 2.45.0