From eff8c9eb5683a4d4d696567337dc6ff6e7f91a12 Mon Sep 17 00:00:00 2001 From: luigi Date: Thu, 27 Oct 2016 09:46:22 +0000 Subject: [PATCH] Various fixes for ptnet/ptnetmap (passthrough of netmap ports). In detail: - use PCI_VENDOR and PCI_DEVICE ids from a publicly allocated range (thanks to RedHat) - export memory pool information through PCI registers - improve mechanism for configuring passthrough on different hypervisors Code is from Vincenzo Maffione as a follow up to his GSOC work. --- sys/dev/netmap/if_ptnet.c | 29 ++-- sys/dev/netmap/netmap.c | 21 ++- sys/dev/netmap/netmap_freebsd.c | 110 +++++-------- sys/dev/netmap/netmap_kern.h | 11 +- sys/dev/netmap/netmap_mem2.c | 269 +++++++++++++------------------- sys/dev/netmap/netmap_mem2.h | 4 +- sys/dev/netmap/netmap_pt.c | 53 ++++--- sys/dev/netmap/netmap_vale.c | 2 +- sys/net/netmap.h | 26 +-- sys/net/netmap_virt.h | 173 +++++++++++--------- 10 files changed, 314 insertions(+), 384 deletions(-) diff --git a/sys/dev/netmap/if_ptnet.c b/sys/dev/netmap/if_ptnet.c index 90a90e984a5..4c7072774df 100644 --- a/sys/dev/netmap/if_ptnet.c +++ b/sys/dev/netmap/if_ptnet.c @@ -291,7 +291,7 @@ static inline void ptnet_kick(struct ptnet_queue *pq) static int ptnet_attach(device_t dev) { - uint32_t ptfeatures = PTNETMAP_F_BASE; + uint32_t ptfeatures = 0; unsigned int num_rx_rings, num_tx_rings; struct netmap_adapter na_arg; unsigned int nifp_offset; @@ -315,19 +315,12 @@ ptnet_attach(device_t dev) return (ENXIO); } - /* Check if we are supported by the hypervisor. If not, - * bail out immediately. */ + /* Negotiate features with the hypervisor. */ if (ptnet_vnet_hdr) { ptfeatures |= PTNETMAP_F_VNET_HDR; } bus_write_4(sc->iomem, PTNET_IO_PTFEAT, ptfeatures); /* wanted */ ptfeatures = bus_read_4(sc->iomem, PTNET_IO_PTFEAT); /* acked */ - if (!(ptfeatures & PTNETMAP_F_BASE)) { - device_printf(dev, "Hypervisor does not support netmap " - "passthorugh\n"); - err = ENXIO; - goto err_path; - } sc->ptfeatures = ptfeatures; /* Allocate CSB and carry out CSB allocation protocol (CSBBAH first, @@ -474,7 +467,8 @@ ptnet_attach(device_t dev) na_arg.nm_txsync = ptnet_nm_txsync; na_arg.nm_rxsync = ptnet_nm_rxsync; - netmap_pt_guest_attach(&na_arg, sc->csb, nifp_offset, ptnet_nm_ptctl); + netmap_pt_guest_attach(&na_arg, sc->csb, nifp_offset, + bus_read_4(sc->iomem, PTNET_IO_HOSTMEMID)); /* Now a netmap adapter for this ifp has been allocated, and it * can be accessed through NA(ifp). We also have to initialize the CSB @@ -1082,13 +1076,12 @@ static uint32_t ptnet_nm_ptctl(if_t ifp, uint32_t cmd) { struct ptnet_softc *sc = if_getsoftc(ifp); - int ret; - + /* + * Write a command and read back error status, + * with zero meaning success. + */ bus_write_4(sc->iomem, PTNET_IO_PTCTL, cmd); - ret = bus_read_4(sc->iomem, PTNET_IO_PTSTS); - device_printf(sc->dev, "PTCTL %u, ret %u\n", cmd, ret); - - return ret; + return bus_read_4(sc->iomem, PTNET_IO_PTCTL); } static int @@ -1196,7 +1189,7 @@ ptnet_nm_register(struct netmap_adapter *na, int onoff) /* Make sure the host adapter passed through is ready * for txsync/rxsync. */ - ret = ptnet_nm_ptctl(ifp, PTNETMAP_PTCTL_REGIF); + ret = ptnet_nm_ptctl(ifp, PTNETMAP_PTCTL_CREATE); if (ret) { return ret; } @@ -1246,7 +1239,7 @@ ptnet_nm_register(struct netmap_adapter *na, int onoff) } if (sc->ptna->backend_regifs == 0) { - ret = ptnet_nm_ptctl(ifp, PTNETMAP_PTCTL_UNREGIF); + ret = ptnet_nm_ptctl(ifp, PTNETMAP_PTCTL_DELETE); } } diff --git a/sys/dev/netmap/netmap.c b/sys/dev/netmap/netmap.c index 46aca2eab5e..15e44815acc 100644 --- a/sys/dev/netmap/netmap.c +++ b/sys/dev/netmap/netmap.c @@ -2186,7 +2186,11 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread break; case NIOCREGIF: - /* possibly attach/detach NIC and VALE switch */ + /* + * If nmr->nr_cmd is not zero, this NIOCREGIF is not really + * a regif operation, but a different one, specified by the + * value of nmr->nr_cmd. + */ i = nmr->nr_cmd; if (i == NETMAP_BDG_ATTACH || i == NETMAP_BDG_DETACH || i == NETMAP_BDG_VNET_HDR @@ -2194,12 +2198,15 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread || i == NETMAP_BDG_DELIF || i == NETMAP_BDG_POLLING_ON || i == NETMAP_BDG_POLLING_OFF) { + /* possibly attach/detach NIC and VALE switch */ error = netmap_bdg_ctl(nmr, NULL); break; } else if (i == NETMAP_PT_HOST_CREATE || i == NETMAP_PT_HOST_DELETE) { + /* forward the command to the ptnetmap subsystem */ error = ptnetmap_ctl(nmr, priv->np_na); break; } else if (i == NETMAP_VNET_HDR_GET) { + /* get vnet-header length for this netmap port */ struct ifnet *ifp; NMG_LOCK(); @@ -2210,6 +2217,10 @@ netmap_ioctl(struct netmap_priv_d *priv, u_long cmd, caddr_t data, struct thread netmap_unget_na(na, ifp); NMG_UNLOCK(); break; + } else if (i == NETMAP_POOLS_INFO_GET) { + /* get information from the memory allocator */ + error = netmap_mem_pools_info_get(nmr, priv->np_na); + break; } else if (i != 0) { D("nr_cmd must be 0 not %d", i); error = EINVAL; @@ -2873,17 +2884,15 @@ netmap_attach(struct netmap_adapter *arg) #ifdef WITH_PTNETMAP_GUEST int -netmap_pt_guest_attach(struct netmap_adapter *arg, - void *csb, - unsigned int nifp_offset, - nm_pt_guest_ptctl_t ptctl) +netmap_pt_guest_attach(struct netmap_adapter *arg, void *csb, + unsigned int nifp_offset, unsigned int memid) { struct netmap_pt_guest_adapter *ptna; struct ifnet *ifp = arg ? arg->ifp : NULL; int error; /* get allocator */ - arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, ptctl); + arg->nm_mem = netmap_mem_pt_guest_new(ifp, nifp_offset, memid); if (arg->nm_mem == NULL) return ENOMEM; arg->na_flags |= NAF_MEM_OWNER; diff --git a/sys/dev/netmap/netmap_freebsd.c b/sys/dev/netmap/netmap_freebsd.c index d83f21e255e..2aecb53b47e 100644 --- a/sys/dev/netmap/netmap_freebsd.c +++ b/sys/dev/netmap/netmap_freebsd.c @@ -625,19 +625,6 @@ static devclass_t ptnetmap_devclass; DRIVER_MODULE_ORDERED(ptn_memdev, pci, ptn_memdev_driver, ptnetmap_devclass, NULL, NULL, SI_ORDER_MIDDLE + 1); -/* - * I/O port read/write wrappers. - * Some are not used, so we keep them commented out until needed - */ -#define ptn_ioread16(ptn_dev, reg) bus_read_2((ptn_dev)->pci_io, (reg)) -#define ptn_ioread32(ptn_dev, reg) bus_read_4((ptn_dev)->pci_io, (reg)) -#if 0 -#define ptn_ioread8(ptn_dev, reg) bus_read_1((ptn_dev)->pci_io, (reg)) -#define ptn_iowrite8(ptn_dev, reg, val) bus_write_1((ptn_dev)->pci_io, (reg), (val)) -#define ptn_iowrite16(ptn_dev, reg, val) bus_write_2((ptn_dev)->pci_io, (reg), (val)) -#define ptn_iowrite32(ptn_dev, reg, val) bus_write_4((ptn_dev)->pci_io, (reg), (val)) -#endif /* unused */ - /* * Map host netmap memory through PCI-BAR in the guest OS, * returning physical (nm_paddr) and virtual (nm_addr) addresses @@ -645,19 +632,20 @@ DRIVER_MODULE_ORDERED(ptn_memdev, pci, ptn_memdev_driver, ptnetmap_devclass, */ int nm_os_pt_memdev_iomap(struct ptnetmap_memdev *ptn_dev, vm_paddr_t *nm_paddr, - void **nm_addr) + void **nm_addr, uint64_t *mem_size) { - uint32_t mem_size; int rid; D("ptn_memdev_driver iomap"); rid = PCIR_BAR(PTNETMAP_MEM_PCI_BAR); - mem_size = ptn_ioread32(ptn_dev, PTNETMAP_IO_PCI_MEMSIZE); + *mem_size = bus_read_4(ptn_dev->pci_io, PTNET_MDEV_IO_MEMSIZE_HI); + *mem_size = bus_read_4(ptn_dev->pci_io, PTNET_MDEV_IO_MEMSIZE_LO) | + (*mem_size << 32); /* map memory allocator */ ptn_dev->pci_mem = bus_alloc_resource(ptn_dev->dev, SYS_RES_MEMORY, - &rid, 0, ~0, mem_size, RF_ACTIVE); + &rid, 0, ~0, *mem_size, RF_ACTIVE); if (ptn_dev->pci_mem == NULL) { *nm_paddr = 0; *nm_addr = 0; @@ -667,14 +655,20 @@ nm_os_pt_memdev_iomap(struct ptnetmap_memdev *ptn_dev, vm_paddr_t *nm_paddr, *nm_paddr = rman_get_start(ptn_dev->pci_mem); *nm_addr = rman_get_virtual(ptn_dev->pci_mem); - D("=== BAR %d start %lx len %lx mem_size %x ===", + D("=== BAR %d start %lx len %lx mem_size %lx ===", PTNETMAP_MEM_PCI_BAR, (unsigned long)(*nm_paddr), (unsigned long)rman_get_size(ptn_dev->pci_mem), - mem_size); + (unsigned long)*mem_size); return (0); } +uint32_t +nm_os_pt_memdev_ioread(struct ptnetmap_memdev *ptn_dev, unsigned int reg) +{ + return bus_read_4(ptn_dev->pci_io, reg); +} + /* Unmap host netmap memory. */ void nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *ptn_dev) @@ -730,7 +724,7 @@ ptn_memdev_attach(device_t dev) return (ENXIO); } - mem_id = ptn_ioread16(ptn_dev, PTNETMAP_IO_PCI_HOSTID); + mem_id = bus_read_4(ptn_dev->pci_io, PTNET_MDEV_IO_MEMID); /* create guest allocator */ ptn_dev->nm_mem = netmap_mem_pt_guest_attach(ptn_dev, mem_id); @@ -740,7 +734,7 @@ ptn_memdev_attach(device_t dev) } netmap_mem_get(ptn_dev->nm_mem); - D("ptn_memdev_driver probe OK - host_id: %d", mem_id); + D("ptn_memdev_driver probe OK - host_mem_id: %d", mem_id); return (0); } @@ -993,12 +987,7 @@ nm_os_ncpus(void) struct nm_kthread_ctx { struct thread *user_td; /* thread user-space (kthread creator) to send ioctl */ - /* notification to guest (interrupt) */ - int irq_fd; /* ioctl fd */ - struct nm_kth_ioctl irq_ioctl; /* ioctl arguments */ - - /* notification from guest */ - void *ioevent_file; /* tsleep() argument */ + struct ptnetmap_cfgentry_bhyve cfg; /* worker function and parameter */ nm_kthread_worker_fn_t worker_fn; @@ -1034,8 +1023,8 @@ nm_os_kthread_wakeup_worker(struct nm_kthread *nmk) */ mtx_lock(&nmk->worker_lock); nmk->scheduled++; - if (nmk->worker_ctx.ioevent_file) { - wakeup(nmk->worker_ctx.ioevent_file); + if (nmk->worker_ctx.cfg.wchan) { + wakeup((void *)nmk->worker_ctx.cfg.wchan); } mtx_unlock(&nmk->worker_lock); } @@ -1046,11 +1035,13 @@ nm_os_kthread_send_irq(struct nm_kthread *nmk) struct nm_kthread_ctx *ctx = &nmk->worker_ctx; int err; - if (ctx->user_td && ctx->irq_fd > 0) { - err = kern_ioctl(ctx->user_td, ctx->irq_fd, ctx->irq_ioctl.com, (caddr_t)&ctx->irq_ioctl.data.msix); + if (ctx->user_td && ctx->cfg.ioctl_fd > 0) { + err = kern_ioctl(ctx->user_td, ctx->cfg.ioctl_fd, ctx->cfg.ioctl_cmd, + (caddr_t)&ctx->cfg.ioctl_data); if (err) { - D("kern_ioctl error: %d ioctl parameters: fd %d com %ju data %p", - err, ctx->irq_fd, (uintmax_t)ctx->irq_ioctl.com, &ctx->irq_ioctl.data); + D("kern_ioctl error: %d ioctl parameters: fd %d com %lu data %p", + err, ctx->cfg.ioctl_fd, (unsigned long)ctx->cfg.ioctl_cmd, + &ctx->cfg.ioctl_data); } } } @@ -1082,10 +1073,10 @@ nm_kthread_worker(void *data) } /* - * if ioevent_file is not defined, we don't have notification + * if wchan is not defined, we don't have notification * mechanism and we continually execute worker_fn() */ - if (!ctx->ioevent_file) { + if (!ctx->cfg.wchan) { ctx->worker_fn(ctx->worker_private); /* worker body */ } else { /* checks if there is a pending notification */ @@ -1099,7 +1090,7 @@ nm_kthread_worker(void *data) continue; } else if (nmk->run) { /* wait on event with one second timeout */ - msleep_spin(ctx->ioevent_file, &nmk->worker_lock, + msleep_spin((void *)ctx->cfg.wchan, &nmk->worker_lock, "nmk_ev", hz); nmk->scheduled++; } @@ -1110,29 +1101,6 @@ nm_kthread_worker(void *data) kthread_exit(); } -static int -nm_kthread_open_files(struct nm_kthread *nmk, struct nm_kthread_cfg *cfg) -{ - /* send irq through ioctl to bhyve (vmm.ko) */ - if (cfg->event.irqfd) { - nmk->worker_ctx.irq_fd = cfg->event.irqfd; - nmk->worker_ctx.irq_ioctl = cfg->event.ioctl; - } - /* ring.ioeventfd contains the chan where do tsleep to wait events */ - if (cfg->event.ioeventfd) { - nmk->worker_ctx.ioevent_file = (void *)cfg->event.ioeventfd; - } - - return 0; -} - -static void -nm_kthread_close_files(struct nm_kthread *nmk) -{ - nmk->worker_ctx.irq_fd = 0; - nmk->worker_ctx.ioevent_file = NULL; -} - void nm_os_kthread_set_affinity(struct nm_kthread *nmk, int affinity) { @@ -1140,10 +1108,15 @@ nm_os_kthread_set_affinity(struct nm_kthread *nmk, int affinity) } struct nm_kthread * -nm_os_kthread_create(struct nm_kthread_cfg *cfg) +nm_os_kthread_create(struct nm_kthread_cfg *cfg, unsigned int cfgtype, + void *opaque) { struct nm_kthread *nmk = NULL; - int error; + + if (cfgtype != PTNETMAP_CFGTYPE_BHYVE) { + D("Unsupported cfgtype %u", cfgtype); + return NULL; + } nmk = malloc(sizeof(*nmk), M_DEVBUF, M_NOWAIT | M_ZERO); if (!nmk) @@ -1158,15 +1131,12 @@ nm_os_kthread_create(struct nm_kthread_cfg *cfg) /* attach kthread to user process (ptnetmap) */ nmk->attach_user = cfg->attach_user; - /* open event fd */ - error = nm_kthread_open_files(nmk, cfg); - if (error) - goto err; + /* store kick/interrupt configuration */ + if (opaque) { + nmk->worker_ctx.cfg = *((struct ptnetmap_cfgentry_bhyve *)opaque); + } return nmk; -err: - free(nmk, M_DEVBUF); - return NULL; } int @@ -1194,7 +1164,7 @@ nm_os_kthread_start(struct nm_kthread *nmk) goto err; } - D("nm_kthread started td 0x%p", nmk->worker); + D("nm_kthread started td %p", nmk->worker); return 0; err: @@ -1228,7 +1198,7 @@ nm_os_kthread_delete(struct nm_kthread *nmk) nm_os_kthread_stop(nmk); } - nm_kthread_close_files(nmk); + memset(&nmk->worker_ctx.cfg, 0, sizeof(nmk->worker_ctx.cfg)); free(nmk, M_DEVBUF); } diff --git a/sys/dev/netmap/netmap_kern.h b/sys/dev/netmap/netmap_kern.h index 28e69d7ab09..f904476721b 100644 --- a/sys/dev/netmap/netmap_kern.h +++ b/sys/dev/netmap/netmap_kern.h @@ -2009,13 +2009,14 @@ typedef void (*nm_kthread_worker_fn_t)(void *data); /* kthread configuration */ struct nm_kthread_cfg { long type; /* kthread type/identifier */ - struct ptnet_ring_cfg event; /* event/ioctl fd */ nm_kthread_worker_fn_t worker_fn; /* worker function */ void *worker_private;/* worker parameter */ int attach_user; /* attach kthread to user process */ }; /* kthread configuration */ -struct nm_kthread *nm_os_kthread_create(struct nm_kthread_cfg *cfg); +struct nm_kthread *nm_os_kthread_create(struct nm_kthread_cfg *cfg, + unsigned int cfgtype, + void *opaque); int nm_os_kthread_start(struct nm_kthread *); void nm_os_kthread_stop(struct nm_kthread *); void nm_os_kthread_delete(struct nm_kthread *); @@ -2053,8 +2054,6 @@ nm_ptnetmap_host_on(struct netmap_adapter *na) #ifdef WITH_PTNETMAP_GUEST /* ptnetmap GUEST routines */ -typedef uint32_t (*nm_pt_guest_ptctl_t)(struct ifnet *, uint32_t); - /* * netmap adapter for guest ptnetmap ports */ @@ -2076,8 +2075,8 @@ struct netmap_pt_guest_adapter { }; -int netmap_pt_guest_attach(struct netmap_adapter *, void *, - unsigned int, nm_pt_guest_ptctl_t); +int netmap_pt_guest_attach(struct netmap_adapter *na, void *csb, + unsigned int nifp_offset, unsigned int memid); struct ptnet_ring; bool netmap_pt_guest_txsync(struct ptnet_ring *ptring, struct netmap_kring *kring, int flags); diff --git a/sys/dev/netmap/netmap_mem2.c b/sys/dev/netmap/netmap_mem2.c index bb0f9c8b6f3..ab89d3af65a 100644 --- a/sys/dev/netmap/netmap_mem2.c +++ b/sys/dev/netmap/netmap_mem2.c @@ -147,39 +147,6 @@ struct netmap_mem_ops { typedef uint16_t nm_memid_t; -/* - * Shared info for netmap allocator - * - * Each allocator contains this structur as first netmap_if. - * In this way, we can share same details about allocator - * to the VM. - * Used in ptnetmap. - */ -struct netmap_mem_shared_info { -#ifndef _WIN32 - struct netmap_if up; /* ends with a 0-sized array, which VSC does not like */ -#else /* !_WIN32 */ - char up[sizeof(struct netmap_if)]; -#endif /* !_WIN32 */ - uint64_t features; -#define NMS_FEAT_BUF_POOL 0x0001 -#define NMS_FEAT_MEMSIZE 0x0002 - - uint32_t buf_pool_offset; - uint32_t buf_pool_objtotal; - uint32_t buf_pool_objsize; - uint32_t totalsize; -}; - -#define NMS_NAME "nms_info" -#define NMS_VERSION 1 -static const struct netmap_if nms_if_blueprint = { - .ni_name = NMS_NAME, - .ni_version = NMS_VERSION, - .ni_tx_rings = 0, - .ni_rx_rings = 0 -}; - struct netmap_mem_d { NMA_LOCK_T nm_mtx; /* protect the allocator */ u_int nm_totalsize; /* shorthand */ @@ -312,8 +279,6 @@ netmap_mem_finalize(struct netmap_mem_d *nmd, struct netmap_adapter *na) return nmd->lasterr; } -static int netmap_mem_init_shared_info(struct netmap_mem_d *nmd); - void netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na) { @@ -362,13 +327,9 @@ netmap_mem_deref(struct netmap_mem_d *nmd, struct netmap_adapter *na) if (nmd->pools[NETMAP_BUF_POOL].bitmap) { /* XXX This check is a workaround that prevents a * NULL pointer crash which currently happens only - * with ptnetmap guests. Also, - * netmap_mem_init_shared_info must not be called - * by ptnetmap guest. */ + * with ptnetmap guests. + * Removed shared-info --> is the bug still there? */ nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3; - - /* expose info to the ptnetmap guest */ - netmap_mem_init_shared_info(nmd); } } nmd->ops->nmd_deref(nmd); @@ -1390,30 +1351,6 @@ netmap_mem_map(struct netmap_obj_pool *p, struct netmap_adapter *na) return 0; } -static int -netmap_mem_init_shared_info(struct netmap_mem_d *nmd) -{ - struct netmap_mem_shared_info *nms_info; - ssize_t base; - - /* Use the first slot in IF_POOL */ - nms_info = netmap_if_malloc(nmd, sizeof(*nms_info)); - if (nms_info == NULL) { - return ENOMEM; - } - - base = netmap_if_offset(nmd, nms_info); - - memcpy(&nms_info->up, &nms_if_blueprint, sizeof(nms_if_blueprint)); - nms_info->buf_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal + nmd->pools[NETMAP_RING_POOL].memtotal; - nms_info->buf_pool_objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal; - nms_info->buf_pool_objsize = nmd->pools[NETMAP_BUF_POOL]._objsize; - nms_info->totalsize = nmd->nm_totalsize; - nms_info->features = NMS_FEAT_BUF_POOL | NMS_FEAT_MEMSIZE; - - return 0; -} - static int netmap_mem_finalize_all(struct netmap_mem_d *nmd) { @@ -1433,11 +1370,6 @@ netmap_mem_finalize_all(struct netmap_mem_d *nmd) nmd->pools[NETMAP_BUF_POOL].bitmap[0] = ~3; nmd->flags |= NETMAP_MEM_FINALIZED; - /* expose info to the ptnetmap guest */ - nmd->lasterr = netmap_mem_init_shared_info(nmd); - if (nmd->lasterr) - goto error; - if (netmap_verbose) D("interfaces %d KB, rings %d KB, buffers %d MB", nmd->pools[NETMAP_IF_POOL].memtotal >> 10, @@ -1929,12 +1861,54 @@ struct netmap_mem_ops netmap_mem_private_ops = { .nmd_rings_delete = netmap_mem2_rings_delete }; +int +netmap_mem_pools_info_get(struct nmreq *nmr, struct netmap_adapter *na) +{ + uintptr_t *pp = (uintptr_t *)&nmr->nr_arg1; + struct netmap_pools_info *upi = (struct netmap_pools_info *)(*pp); + struct netmap_mem_d *nmd = na->nm_mem; + struct netmap_pools_info pi; + unsigned int memsize; + uint16_t memid; + int ret; + + if (!nmd) { + return -1; + } + + ret = netmap_mem_get_info(nmd, &memsize, NULL, &memid); + if (ret) { + return ret; + } + + pi.memsize = memsize; + pi.memid = memid; + pi.if_pool_offset = 0; + pi.if_pool_objtotal = nmd->pools[NETMAP_IF_POOL].objtotal; + pi.if_pool_objsize = nmd->pools[NETMAP_IF_POOL]._objsize; + + pi.ring_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal; + pi.ring_pool_objtotal = nmd->pools[NETMAP_RING_POOL].objtotal; + pi.ring_pool_objsize = nmd->pools[NETMAP_RING_POOL]._objsize; + + pi.buf_pool_offset = nmd->pools[NETMAP_IF_POOL].memtotal + + nmd->pools[NETMAP_RING_POOL].memtotal; + pi.buf_pool_objtotal = nmd->pools[NETMAP_BUF_POOL].objtotal; + pi.buf_pool_objsize = nmd->pools[NETMAP_BUF_POOL]._objsize; + + ret = copyout(&pi, upi, sizeof(pi)); + if (ret) { + return ret; + } + + return 0; +} + #ifdef WITH_PTNETMAP_GUEST struct mem_pt_if { struct mem_pt_if *next; struct ifnet *ifp; unsigned int nifp_offset; - nm_pt_guest_ptctl_t ptctl; }; /* Netmap allocator for ptnetmap guests. */ @@ -1944,16 +1918,15 @@ struct netmap_mem_ptg { vm_paddr_t nm_paddr; /* physical address in the guest */ void *nm_addr; /* virtual address in the guest */ struct netmap_lut buf_lut; /* lookup table for BUF pool in the guest */ - nm_memid_t nm_host_id; /* allocator identifier in the host */ - struct ptnetmap_memdev *ptn_dev; + nm_memid_t host_mem_id; /* allocator identifier in the host */ + struct ptnetmap_memdev *ptn_dev;/* ptnetmap memdev */ struct mem_pt_if *pt_ifs; /* list of interfaces in passthrough */ }; /* Link a passthrough interface to a passthrough netmap allocator. */ static int netmap_mem_pt_guest_ifp_add(struct netmap_mem_d *nmd, struct ifnet *ifp, - unsigned int nifp_offset, - nm_pt_guest_ptctl_t ptctl) + unsigned int nifp_offset) { struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd; struct mem_pt_if *ptif = malloc(sizeof(*ptif), M_NETMAP, @@ -1967,7 +1940,6 @@ netmap_mem_pt_guest_ifp_add(struct netmap_mem_d *nmd, struct ifnet *ifp, ptif->ifp = ifp; ptif->nifp_offset = nifp_offset; - ptif->ptctl = ptctl; if (ptnmd->pt_ifs) { ptif->next = ptnmd->pt_ifs; @@ -2029,62 +2001,6 @@ netmap_mem_pt_guest_ifp_del(struct netmap_mem_d *nmd, struct ifnet *ifp) return ret; } -/* Read allocator info from the first netmap_if (only on finalize) */ -static int -netmap_mem_pt_guest_read_shared_info(struct netmap_mem_d *nmd) -{ - struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd; - struct netmap_mem_shared_info *nms_info; - uint32_t bufsize; - uint32_t nbuffers; - char *vaddr; - vm_paddr_t paddr; - int i; - - nms_info = (struct netmap_mem_shared_info *)ptnmd->nm_addr; - if (strncmp(nms_info->up.ni_name, NMS_NAME, sizeof(NMS_NAME)) != 0) { - D("error, the first slot does not contain shared info"); - return EINVAL; - } - /* check features mem_shared info */ - if ((nms_info->features & (NMS_FEAT_BUF_POOL | NMS_FEAT_MEMSIZE)) != - (NMS_FEAT_BUF_POOL | NMS_FEAT_MEMSIZE)) { - D("error, the shared info does not contain BUF_POOL and MEMSIZE"); - return EINVAL; - } - - bufsize = nms_info->buf_pool_objsize; - nbuffers = nms_info->buf_pool_objtotal; - - /* allocate the lut */ - if (ptnmd->buf_lut.lut == NULL) { - D("allocating lut"); - ptnmd->buf_lut.lut = nm_alloc_lut(nbuffers); - if (ptnmd->buf_lut.lut == NULL) { - D("lut allocation failed"); - return ENOMEM; - } - } - - /* we have physically contiguous memory mapped through PCI BAR */ - vaddr = (char *)(ptnmd->nm_addr) + nms_info->buf_pool_offset; - paddr = ptnmd->nm_paddr + nms_info->buf_pool_offset; - - for (i = 0; i < nbuffers; i++) { - ptnmd->buf_lut.lut[i].vaddr = vaddr; - ptnmd->buf_lut.lut[i].paddr = paddr; - vaddr += bufsize; - paddr += bufsize; - } - - ptnmd->buf_lut.objtotal = nbuffers; - ptnmd->buf_lut.objsize = bufsize; - - nmd->nm_totalsize = nms_info->totalsize; - - return 0; -} - static int netmap_mem_pt_guest_get_lut(struct netmap_mem_d *nmd, struct netmap_lut *lut) { @@ -2147,6 +2063,13 @@ static int netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd) { struct netmap_mem_ptg *ptnmd = (struct netmap_mem_ptg *)nmd; + uint64_t mem_size; + uint32_t bufsize; + uint32_t nbuffers; + uint32_t poolofs; + vm_paddr_t paddr; + char *vaddr; + int i; int error = 0; nmd->active++; @@ -2159,16 +2082,45 @@ netmap_mem_pt_guest_finalize(struct netmap_mem_d *nmd) error = ENOMEM; goto err; } - /* map memory through ptnetmap-memdev BAR */ + /* Map memory through ptnetmap-memdev BAR. */ error = nm_os_pt_memdev_iomap(ptnmd->ptn_dev, &ptnmd->nm_paddr, - &ptnmd->nm_addr); + &ptnmd->nm_addr, &mem_size); if (error) goto err; - /* read allcator info and create lut */ - error = netmap_mem_pt_guest_read_shared_info(nmd); - if (error) - goto err; + /* Initialize the lut using the information contained in the + * ptnetmap memory device. */ + bufsize = nm_os_pt_memdev_ioread(ptnmd->ptn_dev, + PTNET_MDEV_IO_BUF_POOL_OBJSZ); + nbuffers = nm_os_pt_memdev_ioread(ptnmd->ptn_dev, + PTNET_MDEV_IO_BUF_POOL_OBJNUM); + + /* allocate the lut */ + if (ptnmd->buf_lut.lut == NULL) { + D("allocating lut"); + ptnmd->buf_lut.lut = nm_alloc_lut(nbuffers); + if (ptnmd->buf_lut.lut == NULL) { + D("lut allocation failed"); + return ENOMEM; + } + } + + /* we have physically contiguous memory mapped through PCI BAR */ + poolofs = nm_os_pt_memdev_ioread(ptnmd->ptn_dev, + PTNET_MDEV_IO_BUF_POOL_OFS); + vaddr = (char *)(ptnmd->nm_addr) + poolofs; + paddr = ptnmd->nm_paddr + poolofs; + + for (i = 0; i < nbuffers; i++) { + ptnmd->buf_lut.lut[i].vaddr = vaddr; + ptnmd->buf_lut.lut[i].paddr = paddr; + vaddr += bufsize; + paddr += bufsize; + } + + ptnmd->buf_lut.objtotal = nbuffers; + ptnmd->buf_lut.objsize = bufsize; + nmd->nm_totalsize = (unsigned int)mem_size; nmd->flags |= NETMAP_MEM_FINALIZED; out: @@ -2248,15 +2200,10 @@ netmap_mem_pt_guest_if_delete(struct netmap_adapter *na, struct netmap_if *nifp) struct mem_pt_if *ptif; NMA_LOCK(na->nm_mem); - ptif = netmap_mem_pt_guest_ifp_lookup(na->nm_mem, na->ifp); if (ptif == NULL) { D("Error: interface %p is not in passthrough", na->ifp); - goto out; } - - ptif->ptctl(na->ifp, PTNETMAP_PTCTL_IFDELETE); -out: NMA_UNLOCK(na->nm_mem); } @@ -2295,7 +2242,6 @@ netmap_mem_pt_guest_rings_create(struct netmap_adapter *na) nifp->ring_ofs[i + na->num_tx_rings + 1]); } - //error = ptif->ptctl->nm_ptctl(ifp, PTNETMAP_PTCTL_RINGSCREATE); error = 0; out: NMA_UNLOCK(na->nm_mem); @@ -2331,7 +2277,7 @@ static struct netmap_mem_ops netmap_mem_pt_guest_ops = { /* Called with NMA_LOCK(&nm_mem) held. */ static struct netmap_mem_d * -netmap_mem_pt_guest_find_hostid(nm_memid_t host_id) +netmap_mem_pt_guest_find_memid(nm_memid_t mem_id) { struct netmap_mem_d *mem = NULL; struct netmap_mem_d *scan = netmap_last_mem_d; @@ -2339,7 +2285,7 @@ netmap_mem_pt_guest_find_hostid(nm_memid_t host_id) do { /* find ptnetmap allocator through host ID */ if (scan->ops->nmd_deref == netmap_mem_pt_guest_deref && - ((struct netmap_mem_ptg *)(scan))->nm_host_id == host_id) { + ((struct netmap_mem_ptg *)(scan))->host_mem_id == mem_id) { mem = scan; break; } @@ -2351,7 +2297,7 @@ netmap_mem_pt_guest_find_hostid(nm_memid_t host_id) /* Called with NMA_LOCK(&nm_mem) held. */ static struct netmap_mem_d * -netmap_mem_pt_guest_create(nm_memid_t host_id) +netmap_mem_pt_guest_create(nm_memid_t mem_id) { struct netmap_mem_ptg *ptnmd; int err = 0; @@ -2364,7 +2310,7 @@ netmap_mem_pt_guest_create(nm_memid_t host_id) } ptnmd->up.ops = &netmap_mem_pt_guest_ops; - ptnmd->nm_host_id = host_id; + ptnmd->host_mem_id = mem_id; ptnmd->pt_ifs = NULL; /* Assign new id in the guest (We have the lock) */ @@ -2388,14 +2334,14 @@ netmap_mem_pt_guest_create(nm_memid_t host_id) * if it is not there */ static struct netmap_mem_d * -netmap_mem_pt_guest_get(nm_memid_t host_id) +netmap_mem_pt_guest_get(nm_memid_t mem_id) { struct netmap_mem_d *nmd; NMA_LOCK(&nm_mem); - nmd = netmap_mem_pt_guest_find_hostid(host_id); + nmd = netmap_mem_pt_guest_find_memid(mem_id); if (nmd == NULL) { - nmd = netmap_mem_pt_guest_create(host_id); + nmd = netmap_mem_pt_guest_create(mem_id); } NMA_UNLOCK(&nm_mem); @@ -2404,7 +2350,7 @@ netmap_mem_pt_guest_get(nm_memid_t host_id) /* * The guest allocator can be created by ptnetmap_memdev (during the device - * attach) or by ptnetmap device (e1000/virtio), during the netmap_attach. + * attach) or by ptnetmap device (ptnet), during the netmap_attach. * * The order is not important (we have different order in LINUX and FreeBSD). * The first one, creates the device, and the second one simply attaches it. @@ -2413,12 +2359,12 @@ netmap_mem_pt_guest_get(nm_memid_t host_id) /* Called when ptnetmap_memdev is attaching, to attach a new allocator in * the guest */ struct netmap_mem_d * -netmap_mem_pt_guest_attach(struct ptnetmap_memdev *ptn_dev, nm_memid_t host_id) +netmap_mem_pt_guest_attach(struct ptnetmap_memdev *ptn_dev, nm_memid_t mem_id) { struct netmap_mem_d *nmd; struct netmap_mem_ptg *ptnmd; - nmd = netmap_mem_pt_guest_get(host_id); + nmd = netmap_mem_pt_guest_get(mem_id); /* assign this device to the guest allocator */ if (nmd) { @@ -2429,27 +2375,22 @@ netmap_mem_pt_guest_attach(struct ptnetmap_memdev *ptn_dev, nm_memid_t host_id) return nmd; } -/* Called when ptnetmap device (virtio/e1000) is attaching */ +/* Called when ptnet device is attaching */ struct netmap_mem_d * netmap_mem_pt_guest_new(struct ifnet *ifp, unsigned int nifp_offset, - nm_pt_guest_ptctl_t ptctl) + unsigned int memid) { struct netmap_mem_d *nmd; - nm_memid_t host_id; - if (ifp == NULL || ptctl == NULL) { + if (ifp == NULL) { return NULL; } - /* Get the host id allocator. */ - host_id = ptctl(ifp, PTNETMAP_PTCTL_HOSTMEMID); - - nmd = netmap_mem_pt_guest_get(host_id); + nmd = netmap_mem_pt_guest_get((nm_memid_t)memid); if (nmd) { - netmap_mem_pt_guest_ifp_add(nmd, ifp, nifp_offset, - ptctl); + netmap_mem_pt_guest_ifp_add(nmd, ifp, nifp_offset); } return nmd; diff --git a/sys/dev/netmap/netmap_mem2.h b/sys/dev/netmap/netmap_mem2.h index 7f4c5e9e962..f170df9d549 100644 --- a/sys/dev/netmap/netmap_mem2.h +++ b/sys/dev/netmap/netmap_mem2.h @@ -167,12 +167,14 @@ void netmap_mem_put(struct netmap_mem_d *); #ifdef WITH_PTNETMAP_GUEST struct netmap_mem_d* netmap_mem_pt_guest_new(struct ifnet *, unsigned int nifp_offset, - nm_pt_guest_ptctl_t); + unsigned int memid); struct ptnetmap_memdev; struct netmap_mem_d* netmap_mem_pt_guest_attach(struct ptnetmap_memdev *, uint16_t); int netmap_mem_pt_guest_ifp_del(struct netmap_mem_d *, struct ifnet *); #endif /* WITH_PTNETMAP_GUEST */ +int netmap_mem_pools_info_get(struct nmreq *, struct netmap_adapter *); + #define NETMAP_MEM_PRIVATE 0x2 /* allocator uses private address space */ #define NETMAP_MEM_IO 0x4 /* the underlying memory is mmapped I/O */ diff --git a/sys/dev/netmap/netmap_pt.c b/sys/dev/netmap/netmap_pt.c index 56434a23614..3913f4b957f 100644 --- a/sys/dev/netmap/netmap_pt.c +++ b/sys/dev/netmap/netmap_pt.c @@ -560,13 +560,34 @@ ptnetmap_print_configuration(struct ptnetmap_cfg *cfg) { int k; - D("[PTN] configuration:"); - D(" CSB ptrings @%p, num_rings=%u, features %08x", cfg->ptrings, - cfg->num_rings, cfg->features); + D("ptnetmap configuration:"); + D(" CSB ptrings @%p, num_rings=%u, cfgtype %08x", cfg->ptrings, + cfg->num_rings, cfg->cfgtype); for (k = 0; k < cfg->num_rings; k++) { - D(" ring #%d: iofd=%llu, irqfd=%llu", k, - (unsigned long long)cfg->entries[k].ioeventfd, - (unsigned long long)cfg->entries[k].irqfd); + switch (cfg->cfgtype) { + case PTNETMAP_CFGTYPE_QEMU: { + struct ptnetmap_cfgentry_qemu *e = + (struct ptnetmap_cfgentry_qemu *)(cfg+1) + k; + D(" ring #%d: ioeventfd=%lu, irqfd=%lu", k, + (unsigned long)e->ioeventfd, + (unsigned long)e->irqfd); + break; + } + + case PTNETMAP_CFGTYPE_BHYVE: + { + struct ptnetmap_cfgentry_bhyve *e = + (struct ptnetmap_cfgentry_bhyve *)(cfg+1) + k; + D(" ring #%d: wchan=%lu, ioctl_fd=%lu, " + "ioctl_cmd=%lu, msix_msg_data=%lu, msix_addr=%lu", + k, (unsigned long)e->wchan, + (unsigned long)e->ioctl_fd, + (unsigned long)e->ioctl_cmd, + (unsigned long)e->ioctl_data.msg_data, + (unsigned long)e->ioctl_data.addr); + break; + } + } } } @@ -632,6 +653,7 @@ ptnetmap_create_kthreads(struct netmap_pt_host_adapter *pth_na, struct ptnetmap_state *ptns = pth_na->ptns; struct nm_kthread_cfg nmk_cfg; unsigned int num_rings; + uint8_t *cfg_entries = (uint8_t *)(cfg + 1); int k; num_rings = pth_na->up.num_tx_rings + @@ -640,7 +662,6 @@ ptnetmap_create_kthreads(struct netmap_pt_host_adapter *pth_na, for (k = 0; k < num_rings; k++) { nmk_cfg.attach_user = 1; /* attach kthread to user process */ nmk_cfg.worker_private = ptnetmap_kring(pth_na, k); - nmk_cfg.event = *(cfg->entries + k); nmk_cfg.type = k; if (k < pth_na->up.num_tx_rings) { nmk_cfg.worker_fn = ptnetmap_tx_handler; @@ -648,7 +669,8 @@ ptnetmap_create_kthreads(struct netmap_pt_host_adapter *pth_na, nmk_cfg.worker_fn = ptnetmap_rx_handler; } - ptns->kthreads[k] = nm_os_kthread_create(&nmk_cfg); + ptns->kthreads[k] = nm_os_kthread_create(&nmk_cfg, + cfg->cfgtype, cfg_entries + k * cfg->entry_size); if (ptns->kthreads[k] == NULL) { goto err; } @@ -727,7 +749,7 @@ ptnetmap_read_cfg(struct nmreq *nmr) return NULL; } - cfglen = sizeof(tmp) + tmp.num_rings * sizeof(struct ptnet_ring_cfg); + cfglen = sizeof(tmp) + tmp.num_rings * tmp.entry_size; cfg = malloc(cfglen, M_DEVBUF, M_NOWAIT | M_ZERO); if (!cfg) { return NULL; @@ -750,7 +772,6 @@ static int ptnetmap_create(struct netmap_pt_host_adapter *pth_na, struct ptnetmap_cfg *cfg) { - unsigned ft_mask = (PTNETMAP_CFG_FEAT_CSB | PTNETMAP_CFG_FEAT_EVENTFD); struct ptnetmap_state *ptns; unsigned int num_rings; int ret, i; @@ -761,12 +782,6 @@ ptnetmap_create(struct netmap_pt_host_adapter *pth_na, return EINVAL; } - if ((cfg->features & ft_mask) != ft_mask) { - D("ERROR ptnetmap_cfg(%x) does not contain CSB and EVENTFD", - cfg->features); - return EINVAL; - } - num_rings = pth_na->up.num_tx_rings + pth_na->up.num_rx_rings; if (num_rings != cfg->num_rings) { @@ -1240,9 +1255,9 @@ netmap_get_pt_host_na(struct nmreq *nmr, struct netmap_adapter **na, int create) #ifdef WITH_PTNETMAP_GUEST /* - * GUEST ptnetmap generic txsync()/rxsync() used in e1000/virtio-net device - * driver notify is set when we need to send notification to the host - * (driver-specific) + * Guest ptnetmap txsync()/rxsync() routines, used in ptnet device drivers. + * These routines are reused across the different operating systems supported + * by netmap. */ /* diff --git a/sys/dev/netmap/netmap_vale.c b/sys/dev/netmap/netmap_vale.c index 78c53409c0b..71b3aedddd4 100644 --- a/sys/dev/netmap/netmap_vale.c +++ b/sys/dev/netmap/netmap_vale.c @@ -913,7 +913,7 @@ nm_bdg_create_kthreads(struct nm_bdg_polling_state *bps) kcfg.type = i; kcfg.worker_private = t; - t->nmk = nm_os_kthread_create(&kcfg); + t->nmk = nm_os_kthread_create(&kcfg, 0, NULL); if (t->nmk == NULL) { goto cleanup; } diff --git a/sys/net/netmap.h b/sys/net/netmap.h index 14b5e2b3212..3e0cdab4248 100644 --- a/sys/net/netmap.h +++ b/sys/net/netmap.h @@ -525,6 +525,7 @@ struct nmreq { #define NETMAP_BDG_POLLING_ON 10 /* delete polling kthread */ #define NETMAP_BDG_POLLING_OFF 11 /* delete polling kthread */ #define NETMAP_VNET_HDR_GET 12 /* get the port virtio-net-hdr length */ +#define NETMAP_POOLS_INFO_GET 13 /* get memory allocator pools info */ uint16_t nr_arg1; /* reserve extra rings in NIOCREGIF */ #define NETMAP_BDG_HOST 1 /* attach the host stack on ATTACH */ @@ -644,29 +645,4 @@ struct nm_ifreq { char data[NM_IFRDATA_LEN]; }; -/* - * netmap kernel thread configuration - */ -/* bhyve/vmm.ko MSIX parameters for IOCTL */ -struct ptn_vmm_ioctl_msix { - uint64_t msg; - uint64_t addr; -}; - -/* IOCTL parameters */ -struct nm_kth_ioctl { - uint64_t com; - /* We use union to support more ioctl commands. */ - union { - struct ptn_vmm_ioctl_msix msix; - } data; -}; - -/* Configuration of a ptnetmap ring */ -struct ptnet_ring_cfg { - uint64_t ioeventfd; /* eventfd in linux, tsleep() parameter in FreeBSD */ - uint64_t irqfd; /* eventfd in linux, ioctl fd in FreeBSD */ - struct nm_kth_ioctl ioctl; /* ioctl parameter to send irq (only used in bhyve/FreeBSD) */ - uint64_t reserved[4]; /* reserved to support of more hypervisors */ -}; #endif /* _NET_NETMAP_H_ */ diff --git a/sys/net/netmap_virt.h b/sys/net/netmap_virt.h index e17dcf854a9..e5c823ea2f2 100644 --- a/sys/net/netmap_virt.h +++ b/sys/net/netmap_virt.h @@ -32,13 +32,6 @@ #ifndef NETMAP_VIRT_H #define NETMAP_VIRT_H -#define NETMAP_VIRT_CSB_SIZE 4096 - -/* ptnetmap features */ -#define PTNETMAP_F_BASE 1 -#define PTNETMAP_F_FULL 2 /* not used */ -#define PTNETMAP_F_VNET_HDR 4 - /* * ptnetmap_memdev: device used to expose memory into the guest VM * @@ -49,83 +42,126 @@ /* PCI identifiers and PCI BARs for the ptnetmap memdev * and ptnetmap network interface. */ #define PTNETMAP_MEMDEV_NAME "ptnetmap-memdev" -#define PTNETMAP_PCI_VENDOR_ID 0x3333 /* TODO change vendor_id */ -#define PTNETMAP_PCI_DEVICE_ID 0x0001 /* memory device */ -#define PTNETMAP_PCI_NETIF_ID 0x0002 /* ptnet network interface */ +#define PTNETMAP_PCI_VENDOR_ID 0x1b36 /* QEMU virtual devices */ +#define PTNETMAP_PCI_DEVICE_ID 0x000c /* memory device */ +#define PTNETMAP_PCI_NETIF_ID 0x000d /* ptnet network interface */ #define PTNETMAP_IO_PCI_BAR 0 #define PTNETMAP_MEM_PCI_BAR 1 #define PTNETMAP_MSIX_PCI_BAR 2 /* Registers for the ptnetmap memdev */ -/* 32 bit r/o */ -#define PTNETMAP_IO_PCI_MEMSIZE 0 /* size of the netmap memory shared - * between guest and host */ -/* 16 bit r/o */ -#define PTNETMAP_IO_PCI_HOSTID 4 /* memory allocator ID in netmap host */ -#define PTNETMAP_IO_SIZE 6 +#define PTNET_MDEV_IO_MEMSIZE_LO 0 /* netmap memory size (low) */ +#define PTNET_MDEV_IO_MEMSIZE_HI 4 /* netmap_memory_size (high) */ +#define PTNET_MDEV_IO_MEMID 8 /* memory allocator ID in the host */ +#define PTNET_MDEV_IO_IF_POOL_OFS 64 +#define PTNET_MDEV_IO_IF_POOL_OBJNUM 68 +#define PTNET_MDEV_IO_IF_POOL_OBJSZ 72 +#define PTNET_MDEV_IO_RING_POOL_OFS 76 +#define PTNET_MDEV_IO_RING_POOL_OBJNUM 80 +#define PTNET_MDEV_IO_RING_POOL_OBJSZ 84 +#define PTNET_MDEV_IO_BUF_POOL_OFS 88 +#define PTNET_MDEV_IO_BUF_POOL_OBJNUM 92 +#define PTNET_MDEV_IO_BUF_POOL_OBJSZ 96 +#define PTNET_MDEV_IO_END 100 /* * ptnetmap configuration * - * The hypervisor (QEMU or bhyve) sends this struct to the host netmap - * module through an ioctl() command when it wants to start the ptnetmap - * kthreads. + * The ptnet kthreads (running in host kernel-space) need to be configured + * in order to know how to intercept guest kicks (I/O register writes) and + * how to inject MSI-X interrupts to the guest. The configuration may vary + * depending on the hypervisor. Currently, we support QEMU/KVM on Linux and + * and bhyve on FreeBSD. + * The configuration is passed by the hypervisor to the host netmap module + * by means of an ioctl() with nr_cmd=NETMAP_PT_HOST_CREATE, and it is + * specified by the ptnetmap_cfg struct. This struct contains an header + * with general informations and an array of entries whose size depends + * on the hypervisor. The NETMAP_PT_HOST_CREATE command is issued every + * time the kthreads are started. */ struct ptnetmap_cfg { -#define PTNETMAP_CFG_FEAT_CSB 0x0001 -#define PTNETMAP_CFG_FEAT_EVENTFD 0x0002 -#define PTNETMAP_CFG_FEAT_IOCTL 0x0004 - uint32_t features; - void *ptrings; /* ptrings inside CSB */ - uint32_t num_rings; /* number of entries */ - struct ptnet_ring_cfg entries[0]; /* per-ptring configuration */ +#define PTNETMAP_CFGTYPE_QEMU 0x1 +#define PTNETMAP_CFGTYPE_BHYVE 0x2 + uint16_t cfgtype; /* how to interpret the cfg entries */ + uint16_t entry_size; /* size of a config entry */ + uint32_t num_rings; /* number of config entries */ + void *ptrings; /* ptrings inside CSB */ + /* Configuration entries are allocated right after the struct. */ +}; + +/* Configuration of a ptnetmap ring for QEMU. */ +struct ptnetmap_cfgentry_qemu { + uint32_t ioeventfd; /* to intercept guest register access */ + uint32_t irqfd; /* to inject guest interrupts */ +}; + +/* Configuration of a ptnetmap ring for bhyve. */ +struct ptnetmap_cfgentry_bhyve { + uint64_t wchan; /* tsleep() parameter, to wake up kthread */ + uint32_t ioctl_fd; /* ioctl fd */ + /* ioctl parameters to send irq */ + uint32_t ioctl_cmd; + /* vmm.ko MSIX parameters for IOCTL */ + struct { + uint64_t msg_data; + uint64_t addr; + } ioctl_data; +}; + +/* + * Structure filled-in by the kernel when asked for allocator info + * through NETMAP_POOLS_INFO_GET. Used by hypervisors supporting + * ptnetmap. + */ +struct netmap_pools_info { + uint64_t memsize; /* same as nmr->nr_memsize */ + uint32_t memid; /* same as nmr->nr_arg2 */ + uint32_t if_pool_offset; + uint32_t if_pool_objtotal; + uint32_t if_pool_objsize; + uint32_t ring_pool_offset; + uint32_t ring_pool_objtotal; + uint32_t ring_pool_objsize; + uint32_t buf_pool_offset; + uint32_t buf_pool_objtotal; + uint32_t buf_pool_objsize; }; /* - * Functions used to write ptnetmap_cfg from/to the nmreq. - * The user-space application writes the pointer of ptnetmap_cfg - * (user-space buffer) starting from nr_arg1 field, so that the kernel - * can read it with copyin (copy_from_user). + * Pass a pointer to a userspace buffer to be passed to kernelspace for write + * or read. Used by NETMAP_PT_HOST_CREATE and NETMAP_POOLS_INFO_GET. */ static inline void -ptnetmap_write_cfg(struct nmreq *nmr, struct ptnetmap_cfg *cfg) +nmreq_pointer_put(struct nmreq *nmr, void *userptr) { - uintptr_t *nmr_ptncfg = (uintptr_t *)&nmr->nr_arg1; - *nmr_ptncfg = (uintptr_t)cfg; + uintptr_t *pp = (uintptr_t *)&nmr->nr_arg1; + *pp = (uintptr_t)userptr; } -/* ptnetmap control commands */ -#define PTNETMAP_PTCTL_CONFIG 1 -#define PTNETMAP_PTCTL_FINALIZE 2 -#define PTNETMAP_PTCTL_IFNEW 3 -#define PTNETMAP_PTCTL_IFDELETE 4 -#define PTNETMAP_PTCTL_RINGSCREATE 5 -#define PTNETMAP_PTCTL_RINGSDELETE 6 -#define PTNETMAP_PTCTL_DEREF 7 -#define PTNETMAP_PTCTL_TXSYNC 8 -#define PTNETMAP_PTCTL_RXSYNC 9 -#define PTNETMAP_PTCTL_REGIF 10 -#define PTNETMAP_PTCTL_UNREGIF 11 -#define PTNETMAP_PTCTL_HOSTMEMID 12 - +/* ptnetmap features */ +#define PTNETMAP_F_VNET_HDR 1 /* I/O registers for the ptnet device. */ #define PTNET_IO_PTFEAT 0 #define PTNET_IO_PTCTL 4 -#define PTNET_IO_PTSTS 8 -#define PTNET_IO_MAC_LO 12 -#define PTNET_IO_MAC_HI 16 -#define PTNET_IO_CSBBAH 20 -#define PTNET_IO_CSBBAL 24 -#define PTNET_IO_NIFP_OFS 28 -#define PTNET_IO_NUM_TX_RINGS 32 -#define PTNET_IO_NUM_RX_RINGS 36 -#define PTNET_IO_NUM_TX_SLOTS 40 -#define PTNET_IO_NUM_RX_SLOTS 44 -#define PTNET_IO_VNET_HDR_LEN 48 +#define PTNET_IO_MAC_LO 8 +#define PTNET_IO_MAC_HI 12 +#define PTNET_IO_CSBBAH 16 +#define PTNET_IO_CSBBAL 20 +#define PTNET_IO_NIFP_OFS 24 +#define PTNET_IO_NUM_TX_RINGS 28 +#define PTNET_IO_NUM_RX_RINGS 32 +#define PTNET_IO_NUM_TX_SLOTS 36 +#define PTNET_IO_NUM_RX_SLOTS 40 +#define PTNET_IO_VNET_HDR_LEN 44 +#define PTNET_IO_HOSTMEMID 48 #define PTNET_IO_END 52 #define PTNET_IO_KICK_BASE 128 -#define PTNET_IO_MASK 0xff +#define PTNET_IO_MASK 0xff + +/* ptnetmap control commands (values for PTCTL register) */ +#define PTNETMAP_PTCTL_CREATE 1 +#define PTNETMAP_PTCTL_DELETE 2 /* If defined, CSB is allocated by the guest, not by the host. */ #define PTNET_CSB_ALLOC @@ -145,29 +181,18 @@ struct ptnet_ring { /* CSB for the ptnet device. */ struct ptnet_csb { +#define NETMAP_VIRT_CSB_SIZE 4096 struct ptnet_ring rings[NETMAP_VIRT_CSB_SIZE/sizeof(struct ptnet_ring)]; }; -#if defined (WITH_PTNETMAP_HOST) || defined (WITH_PTNETMAP_GUEST) - -/* return l_elem - r_elem with wraparound */ -static inline uint32_t -ptn_sub(uint32_t l_elem, uint32_t r_elem, uint32_t num_slots) -{ - int64_t res; - - res = (int64_t)(l_elem) - r_elem; - - return (res < 0) ? res + num_slots : res; -} -#endif /* WITH_PTNETMAP_HOST || WITH_PTNETMAP_GUEST */ - #ifdef WITH_PTNETMAP_GUEST /* ptnetmap_memdev routines used to talk with ptnetmap_memdev device driver */ struct ptnetmap_memdev; -int nm_os_pt_memdev_iomap(struct ptnetmap_memdev *, vm_paddr_t *, void **); +int nm_os_pt_memdev_iomap(struct ptnetmap_memdev *, vm_paddr_t *, void **, + uint64_t *); void nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *); +uint32_t nm_os_pt_memdev_ioread(struct ptnetmap_memdev *, unsigned int); /* Guest driver: Write kring pointers (cur, head) to the CSB. * This routine is coupled with ptnetmap_host_read_kring_csb(). */ -- 2.45.0