From 2ee83330d6946f002f0ecccdcfa9a074207a2476 Mon Sep 17 00:00:00 2001 From: bryanv Date: Wed, 7 May 2014 03:17:21 +0000 Subject: [PATCH] MFC r255111, r255112, r255131, r255167, r256066, r261150, r261151, r261164, r261166, r261167, r261168, r261394, r261395: This updates the network driver to support multiple queues, and several bug fixes. Note that multiqueue support is not compiled in by default since that would change ALTQ behavior. - Sync VirtIO net device header file from recent Linux - Import multiqueue VirtIO net driver - Fix build with gcc - Do not hold the vtnet Rx queue lock when calling up into the stack - Read and write the MAC address in the config space byte by byte - Also include the mbuf's csum_flags in an assert message - Remove stray space - Move duplicated transmit start code into a single function - Avoid queue unlock followed by relock when the enable interrupt race is lost - Check for a full virtqueue in the multiqueue transmit path - Do not place the sglist used for Rx/Tx on the stack - Use m_defrag() instead of m_collapse() to compact a long mbuf chain git-svn-id: svn://svn.freebsd.org/base/stable/9@265480 ccf9f872-aa2e-dd11-9fc8-001c23d0bc1f --- sys/dev/virtio/network/if_vtnet.c | 3800 +++++++++++++++++--------- sys/dev/virtio/network/if_vtnetvar.h | 251 +- sys/dev/virtio/network/virtio_net.h | 49 +- sys/modules/virtio/network/Makefile | 15 + 4 files changed, 2723 insertions(+), 1392 deletions(-) diff --git a/sys/dev/virtio/network/if_vtnet.c b/sys/dev/virtio/network/if_vtnet.c index 89604d1b2..be968e8f4 100644 --- a/sys/dev/virtio/network/if_vtnet.c +++ b/sys/dev/virtio/network/if_vtnet.c @@ -29,13 +29,12 @@ #include __FBSDID("$FreeBSD$"); -#ifdef HAVE_KERNEL_OPTION_HEADERS -#include "opt_device_polling.h" -#endif +#define VTNET_LEGACY_TX #include #include #include +#include #include #include #include @@ -46,6 +45,9 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#include +#include #include @@ -63,6 +65,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -79,6 +82,9 @@ __FBSDID("$FreeBSD$"); #include "virtio_if.h" +#include "opt_inet.h" +#include "opt_inet6.h" + static int vtnet_modevent(module_t, int, void *); static int vtnet_probe(device_t); @@ -87,82 +93,140 @@ static int vtnet_detach(device_t); static int vtnet_suspend(device_t); static int vtnet_resume(device_t); static int vtnet_shutdown(device_t); +static int vtnet_attach_completed(device_t); static int vtnet_config_change(device_t); static void vtnet_negotiate_features(struct vtnet_softc *); +static void vtnet_setup_features(struct vtnet_softc *); +static int vtnet_init_rxq(struct vtnet_softc *, int); +static int vtnet_init_txq(struct vtnet_softc *, int); +static int vtnet_alloc_rxtx_queues(struct vtnet_softc *); +static void vtnet_free_rxtx_queues(struct vtnet_softc *); +static int vtnet_alloc_rx_filters(struct vtnet_softc *); +static void vtnet_free_rx_filters(struct vtnet_softc *); static int vtnet_alloc_virtqueues(struct vtnet_softc *); -static void vtnet_get_hwaddr(struct vtnet_softc *); -static void vtnet_set_hwaddr(struct vtnet_softc *); -static int vtnet_is_link_up(struct vtnet_softc *); -static void vtnet_update_link_status(struct vtnet_softc *); -static void vtnet_watchdog(struct vtnet_softc *); +static int vtnet_setup_interface(struct vtnet_softc *); static int vtnet_change_mtu(struct vtnet_softc *, int); static int vtnet_ioctl(struct ifnet *, u_long, caddr_t); -static int vtnet_init_rx_vq(struct vtnet_softc *); -static void vtnet_free_rx_mbufs(struct vtnet_softc *); -static void vtnet_free_tx_mbufs(struct vtnet_softc *); -static void vtnet_free_ctrl_vq(struct vtnet_softc *); - -#ifdef DEVICE_POLLING -static poll_handler_t vtnet_poll; -#endif - -static struct mbuf * vtnet_alloc_rxbuf(struct vtnet_softc *, int, - struct mbuf **); -static int vtnet_replace_rxbuf(struct vtnet_softc *, +static int vtnet_rxq_populate(struct vtnet_rxq *); +static void vtnet_rxq_free_mbufs(struct vtnet_rxq *); +static struct mbuf * + vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **); +static int vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *, struct mbuf *, int); -static int vtnet_newbuf(struct vtnet_softc *); -static void vtnet_discard_merged_rxbuf(struct vtnet_softc *, int); -static void vtnet_discard_rxbuf(struct vtnet_softc *, struct mbuf *); -static int vtnet_enqueue_rxbuf(struct vtnet_softc *, struct mbuf *); -static void vtnet_vlan_tag_remove(struct mbuf *); -static int vtnet_rx_csum(struct vtnet_softc *, struct mbuf *, +static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int); +static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *); +static int vtnet_rxq_new_buf(struct vtnet_rxq *); +static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *, + struct virtio_net_hdr *); +static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int); +static void vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *); +static int vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int); +static void vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *, struct virtio_net_hdr *); -static int vtnet_rxeof_merged(struct vtnet_softc *, struct mbuf *, int); -static int vtnet_rxeof(struct vtnet_softc *, int, int *); +static int vtnet_rxq_eof(struct vtnet_rxq *); static void vtnet_rx_vq_intr(void *); +static void vtnet_rxq_tq_intr(void *, int); -static void vtnet_txeof(struct vtnet_softc *); -static struct mbuf * vtnet_tx_offload(struct vtnet_softc *, struct mbuf *, +static void vtnet_txq_free_mbufs(struct vtnet_txq *); +static int vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *, + int *, int *, int *); +static int vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int, + int, struct virtio_net_hdr *); +static struct mbuf * + vtnet_txq_offload(struct vtnet_txq *, struct mbuf *, struct virtio_net_hdr *); -static int vtnet_enqueue_txbuf(struct vtnet_softc *, struct mbuf **, +static int vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **, struct vtnet_tx_header *); -static int vtnet_encap(struct vtnet_softc *, struct mbuf **); -static void vtnet_start_locked(struct ifnet *); +static int vtnet_txq_encap(struct vtnet_txq *, struct mbuf **); +#ifdef VTNET_LEGACY_TX +static void vtnet_start_locked(struct vtnet_txq *, struct ifnet *); static void vtnet_start(struct ifnet *); -static void vtnet_tick(void *); +#else +static int vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *); +static int vtnet_txq_mq_start(struct ifnet *, struct mbuf *); +static void vtnet_txq_tq_deferred(void *, int); +#endif +static void vtnet_txq_start(struct vtnet_txq *); +static void vtnet_txq_tq_intr(void *, int); +static void vtnet_txq_eof(struct vtnet_txq *); static void vtnet_tx_vq_intr(void *); +static void vtnet_tx_start_all(struct vtnet_softc *); +#ifndef VTNET_LEGACY_TX +static void vtnet_qflush(struct ifnet *); +#endif + +static int vtnet_watchdog(struct vtnet_txq *); +static void vtnet_rxq_accum_stats(struct vtnet_rxq *, + struct vtnet_rxq_stats *); +static void vtnet_txq_accum_stats(struct vtnet_txq *, + struct vtnet_txq_stats *); +static void vtnet_accumulate_stats(struct vtnet_softc *); +static void vtnet_tick(void *); + +static void vtnet_start_taskqueues(struct vtnet_softc *); +static void vtnet_free_taskqueues(struct vtnet_softc *); +static void vtnet_drain_taskqueues(struct vtnet_softc *); + +static void vtnet_drain_rxtx_queues(struct vtnet_softc *); +static void vtnet_stop_rendezvous(struct vtnet_softc *); static void vtnet_stop(struct vtnet_softc *); +static int vtnet_virtio_reinit(struct vtnet_softc *); +static void vtnet_init_rx_filters(struct vtnet_softc *); +static int vtnet_init_rx_queues(struct vtnet_softc *); +static int vtnet_init_tx_queues(struct vtnet_softc *); +static int vtnet_init_rxtx_queues(struct vtnet_softc *); +static void vtnet_set_active_vq_pairs(struct vtnet_softc *); static int vtnet_reinit(struct vtnet_softc *); static void vtnet_init_locked(struct vtnet_softc *); static void vtnet_init(void *); +static void vtnet_free_ctrl_vq(struct vtnet_softc *); static void vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *, struct sglist *, int, int); - -static void vtnet_rx_filter(struct vtnet_softc *sc); +static int vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *); +static int vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t); static int vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int); static int vtnet_set_promisc(struct vtnet_softc *, int); static int vtnet_set_allmulti(struct vtnet_softc *, int); +static void vtnet_attach_disable_promisc(struct vtnet_softc *); +static void vtnet_rx_filter(struct vtnet_softc *); static void vtnet_rx_filter_mac(struct vtnet_softc *); - static int vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t); static void vtnet_rx_filter_vlan(struct vtnet_softc *); -static void vtnet_set_vlan_filter(struct vtnet_softc *, int, uint16_t); +static void vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t); static void vtnet_register_vlan(void *, struct ifnet *, uint16_t); static void vtnet_unregister_vlan(void *, struct ifnet *, uint16_t); +static int vtnet_is_link_up(struct vtnet_softc *); +static void vtnet_update_link_status(struct vtnet_softc *); static int vtnet_ifmedia_upd(struct ifnet *); static void vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *); +static void vtnet_get_hwaddr(struct vtnet_softc *); +static void vtnet_set_hwaddr(struct vtnet_softc *); +static void vtnet_vlan_tag_remove(struct mbuf *); -static void vtnet_add_statistics(struct vtnet_softc *); - -static int vtnet_enable_rx_intr(struct vtnet_softc *); -static int vtnet_enable_tx_intr(struct vtnet_softc *); -static void vtnet_disable_rx_intr(struct vtnet_softc *); -static void vtnet_disable_tx_intr(struct vtnet_softc *); +static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *, + struct sysctl_oid_list *, struct vtnet_rxq *); +static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *, + struct sysctl_oid_list *, struct vtnet_txq *); +static void vtnet_setup_queue_sysctl(struct vtnet_softc *); +static void vtnet_setup_sysctl(struct vtnet_softc *); + +static int vtnet_rxq_enable_intr(struct vtnet_rxq *); +static void vtnet_rxq_disable_intr(struct vtnet_rxq *); +static int vtnet_txq_enable_intr(struct vtnet_txq *); +static void vtnet_txq_disable_intr(struct vtnet_txq *); +static void vtnet_enable_rx_interrupts(struct vtnet_softc *); +static void vtnet_enable_tx_interrupts(struct vtnet_softc *); +static void vtnet_enable_interrupts(struct vtnet_softc *); +static void vtnet_disable_rx_interrupts(struct vtnet_softc *); +static void vtnet_disable_tx_interrupts(struct vtnet_softc *); +static void vtnet_disable_interrupts(struct vtnet_softc *); + +static int vtnet_tunable_int(struct vtnet_softc *, const char *, int); /* Tunables. */ static int vtnet_csum_disable = 0; @@ -171,16 +235,25 @@ static int vtnet_tso_disable = 0; TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable); static int vtnet_lro_disable = 0; TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable); +static int vtnet_mq_disable = 0; +TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable); +static int vtnet_mq_max_pairs = 0; +TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs); +static int vtnet_rx_process_limit = 512; +TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit); /* - * Reducing the number of transmit completed interrupts can - * improve performance. To do so, the define below keeps the - * Tx vq interrupt disabled and adds calls to vtnet_txeof() - * in the start and watchdog paths. The price to pay for this - * is the m_free'ing of transmitted mbufs may be delayed until - * the watchdog fires. + * Reducing the number of transmit completed interrupts can improve + * performance. To do so, the define below keeps the Tx vq interrupt + * disabled and adds calls to vtnet_txeof() in the start and watchdog + * paths. The price to pay for this is the m_free'ing of transmitted + * mbufs may be delayed until the watchdog fires. + * + * BMV: Reintroduce this later as a run-time option, if it makes + * sense after the EVENT_IDX feature is supported. + * + * #define VTNET_TX_INTR_MODERATION */ -#define VTNET_TX_INTR_MODERATION static uma_zone_t vtnet_tx_header_zone; @@ -203,21 +276,25 @@ static struct virtio_feature_desc vtnet_feature_desc[] = { { VIRTIO_NET_F_CTRL_RX, "RxMode" }, { VIRTIO_NET_F_CTRL_VLAN, "VLanFilter" }, { VIRTIO_NET_F_CTRL_RX_EXTRA, "RxModeExtra" }, + { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" }, + { VIRTIO_NET_F_MQ, "Multiqueue" }, + { VIRTIO_NET_F_CTRL_MAC_ADDR, "SetMacAddress" }, { 0, NULL } }; static device_method_t vtnet_methods[] = { /* Device methods. */ - DEVMETHOD(device_probe, vtnet_probe), - DEVMETHOD(device_attach, vtnet_attach), - DEVMETHOD(device_detach, vtnet_detach), - DEVMETHOD(device_suspend, vtnet_suspend), - DEVMETHOD(device_resume, vtnet_resume), - DEVMETHOD(device_shutdown, vtnet_shutdown), + DEVMETHOD(device_probe, vtnet_probe), + DEVMETHOD(device_attach, vtnet_attach), + DEVMETHOD(device_detach, vtnet_detach), + DEVMETHOD(device_suspend, vtnet_suspend), + DEVMETHOD(device_resume, vtnet_resume), + DEVMETHOD(device_shutdown, vtnet_shutdown), /* VirtIO methods. */ - DEVMETHOD(virtio_config_change, vtnet_config_change), + DEVMETHOD(virtio_attach_completed, vtnet_attach_completed), + DEVMETHOD(virtio_config_change, vtnet_config_change), DEVMETHOD_END }; @@ -282,56 +359,31 @@ static int vtnet_attach(device_t dev) { struct vtnet_softc *sc; - struct ifnet *ifp; - int tx_size, error; + int error; sc = device_get_softc(dev); sc->vtnet_dev = dev; - VTNET_LOCK_INIT(sc); - callout_init_mtx(&sc->vtnet_tick_ch, VTNET_MTX(sc), 0); - - ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd, - vtnet_ifmedia_sts); - ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL); - ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE); - - vtnet_add_statistics(sc); - + /* Register our feature descriptions. */ virtio_set_feature_desc(dev, vtnet_feature_desc); - vtnet_negotiate_features(sc); - - if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) { - sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS; - sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); - } else - sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr); - sc->vtnet_rx_mbuf_size = MCLBYTES; - sc->vtnet_rx_mbuf_count = VTNET_NEEDED_RX_MBUFS(sc); + VTNET_CORE_LOCK_INIT(sc); + callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0); - if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) { - sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ; - - if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX)) { - sc->vtnet_mac_filter = malloc( - sizeof(struct vtnet_mac_filter), M_DEVBUF, - M_NOWAIT | M_ZERO); - if (sc->vtnet_mac_filter == NULL) { - device_printf(dev, - "cannot allocate mac filter table\n"); - error = ENOMEM; - goto fail; - } + vtnet_setup_sysctl(sc); + vtnet_setup_features(sc); - sc->vtnet_flags |= VTNET_FLAG_CTRL_RX; - } - - if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN)) - sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER; + error = vtnet_alloc_rx_filters(sc); + if (error) { + device_printf(dev, "cannot allocate Rx filters\n"); + goto fail; } - vtnet_get_hwaddr(sc); + error = vtnet_alloc_rxtx_queues(sc); + if (error) { + device_printf(dev, "cannot allocate queues\n"); + goto fail; + } error = vtnet_alloc_virtqueues(sc); if (error) { @@ -339,111 +391,21 @@ vtnet_attach(device_t dev) goto fail; } - ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER); - if (ifp == NULL) { - device_printf(dev, "cannot allocate ifnet structure\n"); - error = ENOSPC; + error = vtnet_setup_interface(sc); + if (error) { + device_printf(dev, "cannot setup interface\n"); goto fail; } - ifp->if_softc = sc; - if_initname(ifp, device_get_name(dev), device_get_unit(dev)); - ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; - ifp->if_init = vtnet_init; - ifp->if_start = vtnet_start; - ifp->if_ioctl = vtnet_ioctl; - - sc->vtnet_rx_size = virtqueue_size(sc->vtnet_rx_vq); - sc->vtnet_rx_process_limit = sc->vtnet_rx_size; - - tx_size = virtqueue_size(sc->vtnet_tx_vq); - sc->vtnet_tx_size = tx_size; - IFQ_SET_MAXLEN(&ifp->if_snd, tx_size - 1); - ifp->if_snd.ifq_drv_maxlen = tx_size - 1; - IFQ_SET_READY(&ifp->if_snd); - - ether_ifattach(ifp, sc->vtnet_hwaddr); - - if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS)) - ifp->if_capabilities |= IFCAP_LINKSTATE; - - /* Tell the upper layer(s) we support long frames. */ - ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); - ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU; - - if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) { - ifp->if_capabilities |= IFCAP_TXCSUM; - - if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4)) - ifp->if_capabilities |= IFCAP_TSO4; - if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) - ifp->if_capabilities |= IFCAP_TSO6; - if (ifp->if_capabilities & IFCAP_TSO) - ifp->if_capabilities |= IFCAP_VLAN_HWTSO; - - if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN)) - sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; - } - - if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) { - ifp->if_capabilities |= IFCAP_RXCSUM; - - if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) || - virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6)) - ifp->if_capabilities |= IFCAP_LRO; - } - - if (ifp->if_capabilities & IFCAP_HWCSUM) { - /* - * VirtIO does not support VLAN tagging, but we can fake - * it by inserting and removing the 802.1Q header during - * transmit and receive. We are then able to do checksum - * offloading of VLAN frames. - */ - ifp->if_capabilities |= - IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; - } - - ifp->if_capenable = ifp->if_capabilities; - - /* - * Capabilities after here are not enabled by default. - */ - - if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { - ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; - - sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config, - vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST); - sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, - vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); - } - -#ifdef DEVICE_POLLING - ifp->if_capabilities |= IFCAP_POLLING; -#endif - error = virtio_setup_intr(dev, INTR_TYPE_NET); if (error) { device_printf(dev, "cannot setup virtqueue interrupts\n"); - ether_ifdetach(ifp); + /* BMV: This will crash if during boot! */ + ether_ifdetach(sc->vtnet_ifp); goto fail; } - /* - * Device defaults to promiscuous mode for backwards - * compatibility. Turn it off if possible. - */ - if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { - VTNET_LOCK(sc); - if (vtnet_set_promisc(sc, 0) != 0) { - ifp->if_flags |= IFF_PROMISC; - device_printf(dev, - "cannot disable promiscuous mode\n"); - } - VTNET_UNLOCK(sc); - } else - ifp->if_flags |= IFF_PROMISC; + vtnet_start_taskqueues(sc); fail: if (error) @@ -461,24 +423,19 @@ vtnet_detach(device_t dev) sc = device_get_softc(dev); ifp = sc->vtnet_ifp; - KASSERT(mtx_initialized(VTNET_MTX(sc)), - ("vtnet mutex not initialized")); - -#ifdef DEVICE_POLLING - if (ifp != NULL && ifp->if_capenable & IFCAP_POLLING) - ether_poll_deregister(ifp); -#endif - if (device_is_attached(dev)) { - VTNET_LOCK(sc); + VTNET_CORE_LOCK(sc); vtnet_stop(sc); - VTNET_UNLOCK(sc); + VTNET_CORE_UNLOCK(sc); callout_drain(&sc->vtnet_tick_ch); + vtnet_drain_taskqueues(sc); ether_ifdetach(ifp); } + vtnet_free_taskqueues(sc); + if (sc->vtnet_vlan_attach != NULL) { EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach); sc->vtnet_vlan_attach = NULL; @@ -488,25 +445,20 @@ vtnet_detach(device_t dev) sc->vtnet_vlan_detach = NULL; } - if (sc->vtnet_mac_filter != NULL) { - free(sc->vtnet_mac_filter, M_DEVBUF); - sc->vtnet_mac_filter = NULL; - } + ifmedia_removeall(&sc->vtnet_media); if (ifp != NULL) { if_free(ifp); sc->vtnet_ifp = NULL; } - if (sc->vtnet_rx_vq != NULL) - vtnet_free_rx_mbufs(sc); - if (sc->vtnet_tx_vq != NULL) - vtnet_free_tx_mbufs(sc); + vtnet_free_rxtx_queues(sc); + vtnet_free_rx_filters(sc); + if (sc->vtnet_ctrl_vq != NULL) vtnet_free_ctrl_vq(sc); - ifmedia_removeall(&sc->vtnet_media); - VTNET_LOCK_DESTROY(sc); + VTNET_CORE_LOCK_DESTROY(sc); return (0); } @@ -518,10 +470,10 @@ vtnet_suspend(device_t dev) sc = device_get_softc(dev); - VTNET_LOCK(sc); + VTNET_CORE_LOCK(sc); vtnet_stop(sc); sc->vtnet_flags |= VTNET_FLAG_SUSPENDED; - VTNET_UNLOCK(sc); + VTNET_CORE_UNLOCK(sc); return (0); } @@ -535,11 +487,11 @@ vtnet_resume(device_t dev) sc = device_get_softc(dev); ifp = sc->vtnet_ifp; - VTNET_LOCK(sc); + VTNET_CORE_LOCK(sc); if (ifp->if_flags & IFF_UP) vtnet_init_locked(sc); sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED; - VTNET_UNLOCK(sc); + VTNET_CORE_UNLOCK(sc); return (0); } @@ -555,6 +507,15 @@ vtnet_shutdown(device_t dev) return (vtnet_suspend(dev)); } +static int +vtnet_attach_completed(device_t dev) +{ + + vtnet_attach_disable_promisc(device_get_softc(dev)); + + return (0); +} + static int vtnet_config_change(device_t dev) { @@ -562,9 +523,11 @@ vtnet_config_change(device_t dev) sc = device_get_softc(dev); - VTNET_LOCK(sc); + VTNET_CORE_LOCK(sc); vtnet_update_link_status(sc); - VTNET_UNLOCK(sc); + if (sc->vtnet_link_active != 0) + vtnet_tx_start_all(sc); + VTNET_CORE_UNLOCK(sc); return (0); } @@ -578,188 +541,512 @@ vtnet_negotiate_features(struct vtnet_softc *sc) dev = sc->vtnet_dev; mask = 0; - if (vtnet_csum_disable) - mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM; - /* - * TSO and LRO are only available when their corresponding - * checksum offload feature is also negotiated. + * TSO and LRO are only available when their corresponding checksum + * offload feature is also negotiated. */ - - if (vtnet_csum_disable || vtnet_tso_disable) - mask |= VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO6 | - VIRTIO_NET_F_HOST_ECN; - - if (vtnet_csum_disable || vtnet_lro_disable) + if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) { + mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM; + mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES; + } + if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable)) + mask |= VTNET_TSO_FEATURES; + if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable)) mask |= VTNET_LRO_FEATURES; + if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable)) + mask |= VIRTIO_NET_F_MQ; +#ifdef VTNET_LEGACY_TX + mask |= VIRTIO_NET_F_MQ; +#endif features = VTNET_FEATURES & ~mask; -#ifdef VTNET_TX_INTR_MODERATION - features |= VIRTIO_F_NOTIFY_ON_EMPTY; -#endif sc->vtnet_features = virtio_negotiate_features(dev, features); - if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0 && - virtio_with_feature(dev, VTNET_LRO_FEATURES)) { - /* - * LRO without mergeable buffers requires special care. This - * is not ideal because every receive buffer must be large - * enough to hold the maximum TCP packet, the Ethernet header, - * and the vtnet_rx_header. This requires up to 34 descriptors - * when using MCLBYTES clusters. If we do not have indirect - * descriptors, LRO is disabled since the virtqueue will not - * be able to contain very many receive buffers. - */ - if (virtio_with_feature(dev, - VIRTIO_RING_F_INDIRECT_DESC) == 0) { - device_printf(dev, - "LRO disabled due to lack of both mergeable " - "buffers and indirect descriptors\n"); + if (virtio_with_feature(dev, VTNET_LRO_FEATURES) == 0) + return; + if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) + return; - sc->vtnet_features = virtio_negotiate_features(dev, - features & ~VTNET_LRO_FEATURES); - } else - sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG; - } + /* + * LRO without mergeable buffers requires special care. This is not + * ideal because every receive buffer must be large enough to hold + * the maximum TCP packet, the Ethernet header, and the header. This + * requires up to 34 descriptors with MCLBYTES clusters. If we do + * not have indirect descriptors, LRO is disabled since the virtqueue + * will not contain very many receive buffers. + */ + if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0) { + device_printf(dev, + "LRO disabled due to both mergeable buffers and indirect " + "descriptors not negotiated\n"); + + features &= ~VTNET_LRO_FEATURES; + sc->vtnet_features = virtio_negotiate_features(dev, features); + } else + sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG; } -static int -vtnet_alloc_virtqueues(struct vtnet_softc *sc) +static void +vtnet_setup_features(struct vtnet_softc *sc) { device_t dev; - struct vq_alloc_info vq_info[3]; - int nvqs, rxsegs; + int max_pairs, max; dev = sc->vtnet_dev; - nvqs = 2; - /* - * Indirect descriptors are not needed for the Rx - * virtqueue when mergeable buffers are negotiated. - * The header is placed inline with the data, not - * in a separate descriptor, and mbuf clusters are - * always physically contiguous. - */ - if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { - rxsegs = sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG ? - VTNET_MAX_RX_SEGS : VTNET_MIN_RX_SEGS; + vtnet_negotiate_features(sc); + + if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX)) + sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX; + + if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) { + /* This feature should always be negotiated. */ + sc->vtnet_flags |= VTNET_FLAG_MAC; + } + + if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) { + sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS; + sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); } else - rxsegs = 0; + sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr); - VQ_ALLOC_INFO_INIT(&vq_info[0], rxsegs, - vtnet_rx_vq_intr, sc, &sc->vtnet_rx_vq, - "%s receive", device_get_nameunit(dev)); + if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) + sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS; + else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) + sc->vtnet_rx_nsegs = VTNET_MAX_RX_SEGS; + else + sc->vtnet_rx_nsegs = VTNET_MIN_RX_SEGS; - VQ_ALLOC_INFO_INIT(&vq_info[1], VTNET_MAX_TX_SEGS, - vtnet_tx_vq_intr, sc, &sc->vtnet_tx_vq, - "%s transmit", device_get_nameunit(dev)); + if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) || + virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) || + virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) + sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS; + else + sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS; - if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { - nvqs++; + if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) { + sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ; + + if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX)) + sc->vtnet_flags |= VTNET_FLAG_CTRL_RX; + if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN)) + sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER; + if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR)) + sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC; + } + + if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) && + sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { + max_pairs = virtio_read_dev_config_2(dev, + offsetof(struct virtio_net_config, max_virtqueue_pairs)); + if (max_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || + max_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) + max_pairs = 1; + } else + max_pairs = 1; - VQ_ALLOC_INFO_INIT(&vq_info[2], 0, NULL, NULL, - &sc->vtnet_ctrl_vq, "%s control", - device_get_nameunit(dev)); + if (max_pairs > 1) { + /* + * Limit the maximum number of queue pairs to the number of + * CPUs or the configured maximum. The actual number of + * queues that get used may be less. + */ + max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs); + if (max > 0 && max_pairs > max) + max_pairs = max; + if (max_pairs > mp_ncpus) + max_pairs = mp_ncpus; + if (max_pairs > VTNET_MAX_QUEUE_PAIRS) + max_pairs = VTNET_MAX_QUEUE_PAIRS; + if (max_pairs > 1) + sc->vtnet_flags |= VTNET_FLAG_MULTIQ; } - return (virtio_alloc_virtqueues(dev, 0, nvqs, vq_info)); + sc->vtnet_max_vq_pairs = max_pairs; } -static void -vtnet_get_hwaddr(struct vtnet_softc *sc) +static int +vtnet_init_rxq(struct vtnet_softc *sc, int id) { - device_t dev; + struct vtnet_rxq *rxq; - dev = sc->vtnet_dev; + rxq = &sc->vtnet_rxqs[id]; - if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) { - virtio_read_device_config(dev, - offsetof(struct virtio_net_config, mac), - sc->vtnet_hwaddr, ETHER_ADDR_LEN); - } else { - /* Generate random locally administered unicast address. */ - sc->vtnet_hwaddr[0] = 0xB2; - arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0); + snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d", + device_get_nameunit(sc->vtnet_dev), id); + mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF); - vtnet_set_hwaddr(sc); - } -} + rxq->vtnrx_sc = sc; + rxq->vtnrx_id = id; -static void -vtnet_set_hwaddr(struct vtnet_softc *sc) -{ - device_t dev; + rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT); + if (rxq->vtnrx_sg == NULL) + return (ENOMEM); - dev = sc->vtnet_dev; + TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq); + rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT, + taskqueue_thread_enqueue, &rxq->vtnrx_tq); - virtio_write_device_config(dev, - offsetof(struct virtio_net_config, mac), - sc->vtnet_hwaddr, ETHER_ADDR_LEN); + return (rxq->vtnrx_tq == NULL ? ENOMEM : 0); } static int -vtnet_is_link_up(struct vtnet_softc *sc) +vtnet_init_txq(struct vtnet_softc *sc, int id) { - device_t dev; - struct ifnet *ifp; - uint16_t status; + struct vtnet_txq *txq; - dev = sc->vtnet_dev; - ifp = sc->vtnet_ifp; + txq = &sc->vtnet_txqs[id]; - VTNET_LOCK_ASSERT(sc); + snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d", + device_get_nameunit(sc->vtnet_dev), id); + mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF); - if ((ifp->if_capenable & IFCAP_LINKSTATE) == 0) - return (1); + txq->vtntx_sc = sc; + txq->vtntx_id = id; - status = virtio_read_dev_config_2(dev, - offsetof(struct virtio_net_config, status)); + txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT); + if (txq->vtntx_sg == NULL) + return (ENOMEM); - return ((status & VIRTIO_NET_S_LINK_UP) != 0); +#ifndef VTNET_LEGACY_TX + txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF, + M_NOWAIT, &txq->vtntx_mtx); + if (txq->vtntx_br == NULL) + return (ENOMEM); + + TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq); +#endif + TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq); + txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT, + taskqueue_thread_enqueue, &txq->vtntx_tq); + if (txq->vtntx_tq == NULL) + return (ENOMEM); + + return (0); } -static void -vtnet_update_link_status(struct vtnet_softc *sc) +static int +vtnet_alloc_rxtx_queues(struct vtnet_softc *sc) { - struct ifnet *ifp; - int link; + int i, npairs, error; - ifp = sc->vtnet_ifp; + npairs = sc->vtnet_max_vq_pairs; - link = vtnet_is_link_up(sc); + sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF, + M_NOWAIT | M_ZERO); + sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF, + M_NOWAIT | M_ZERO); + if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL) + return (ENOMEM); - if (link && ((sc->vtnet_flags & VTNET_FLAG_LINK) == 0)) { - sc->vtnet_flags |= VTNET_FLAG_LINK; - if_link_state_change(ifp, LINK_STATE_UP); - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - vtnet_start_locked(ifp); - } else if (!link && (sc->vtnet_flags & VTNET_FLAG_LINK)) { - sc->vtnet_flags &= ~VTNET_FLAG_LINK; - if_link_state_change(ifp, LINK_STATE_DOWN); + for (i = 0; i < npairs; i++) { + error = vtnet_init_rxq(sc, i); + if (error) + return (error); + error = vtnet_init_txq(sc, i); + if (error) + return (error); } + + vtnet_setup_queue_sysctl(sc); + + return (0); } static void -vtnet_watchdog(struct vtnet_softc *sc) +vtnet_destroy_rxq(struct vtnet_rxq *rxq) { - struct ifnet *ifp; - ifp = sc->vtnet_ifp; + rxq->vtnrx_sc = NULL; + rxq->vtnrx_id = -1; -#ifdef VTNET_TX_INTR_MODERATION - vtnet_txeof(sc); -#endif + if (rxq->vtnrx_sg != NULL) { + sglist_free(rxq->vtnrx_sg); + rxq->vtnrx_sg = NULL; + } - if (sc->vtnet_watchdog_timer == 0 || --sc->vtnet_watchdog_timer) - return; + if (mtx_initialized(&rxq->vtnrx_mtx) != 0) + mtx_destroy(&rxq->vtnrx_mtx); +} + +static void +vtnet_destroy_txq(struct vtnet_txq *txq) +{ + + txq->vtntx_sc = NULL; + txq->vtntx_id = -1; + + if (txq->vtntx_sg != NULL) { + sglist_free(txq->vtntx_sg); + txq->vtntx_sg = NULL; + } - if_printf(ifp, "watchdog timeout -- resetting\n"); -#ifdef VTNET_DEBUG - virtqueue_dump(sc->vtnet_tx_vq); +#ifndef VTNET_LEGACY_TX + if (txq->vtntx_br != NULL) { + buf_ring_free(txq->vtntx_br, M_DEVBUF); + txq->vtntx_br = NULL; + } #endif - ifp->if_oerrors++; - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - vtnet_init_locked(sc); + + if (mtx_initialized(&txq->vtntx_mtx) != 0) + mtx_destroy(&txq->vtntx_mtx); +} + +static void +vtnet_free_rxtx_queues(struct vtnet_softc *sc) +{ + int i; + + if (sc->vtnet_rxqs != NULL) { + for (i = 0; i < sc->vtnet_max_vq_pairs; i++) + vtnet_destroy_rxq(&sc->vtnet_rxqs[i]); + free(sc->vtnet_rxqs, M_DEVBUF); + sc->vtnet_rxqs = NULL; + } + + if (sc->vtnet_txqs != NULL) { + for (i = 0; i < sc->vtnet_max_vq_pairs; i++) + vtnet_destroy_txq(&sc->vtnet_txqs[i]); + free(sc->vtnet_txqs, M_DEVBUF); + sc->vtnet_txqs = NULL; + } +} + +static int +vtnet_alloc_rx_filters(struct vtnet_softc *sc) +{ + + if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { + sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter), + M_DEVBUF, M_NOWAIT | M_ZERO); + if (sc->vtnet_mac_filter == NULL) + return (ENOMEM); + } + + if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { + sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) * + VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO); + if (sc->vtnet_vlan_filter == NULL) + return (ENOMEM); + } + + return (0); +} + +static void +vtnet_free_rx_filters(struct vtnet_softc *sc) +{ + + if (sc->vtnet_mac_filter != NULL) { + free(sc->vtnet_mac_filter, M_DEVBUF); + sc->vtnet_mac_filter = NULL; + } + + if (sc->vtnet_vlan_filter != NULL) { + free(sc->vtnet_vlan_filter, M_DEVBUF); + sc->vtnet_vlan_filter = NULL; + } +} + +static int +vtnet_alloc_virtqueues(struct vtnet_softc *sc) +{ + device_t dev; + struct vq_alloc_info *info; + struct vtnet_rxq *rxq; + struct vtnet_txq *txq; + int i, idx, flags, nvqs, error; + + dev = sc->vtnet_dev; + flags = 0; + + nvqs = sc->vtnet_max_vq_pairs * 2; + if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) + nvqs++; + + info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT); + if (info == NULL) + return (ENOMEM); + + for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) { + rxq = &sc->vtnet_rxqs[i]; + VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs, + vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq, + "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id); + + txq = &sc->vtnet_txqs[i]; + VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs, + vtnet_tx_vq_intr, txq, &txq->vtntx_vq, + "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id); + } + + if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { + VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL, + &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev)); + } + + /* + * Enable interrupt binding if this is multiqueue. This only matters + * when per-vq MSIX is available. + */ + if (sc->vtnet_flags & VTNET_FLAG_MULTIQ) + flags |= 0; + + error = virtio_alloc_virtqueues(dev, flags, nvqs, info); + free(info, M_TEMP); + + return (error); +} + +static int +vtnet_setup_interface(struct vtnet_softc *sc) +{ + device_t dev; + struct ifnet *ifp; + int limit; + + dev = sc->vtnet_dev; + + ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER); + if (ifp == NULL) { + device_printf(dev, "cannot allocate ifnet structure\n"); + return (ENOSPC); + } + + if_initname(ifp, device_get_name(dev), device_get_unit(dev)); + ifp->if_softc = sc; + ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; + ifp->if_init = vtnet_init; + ifp->if_ioctl = vtnet_ioctl; + +#ifndef VTNET_LEGACY_TX + ifp->if_transmit = vtnet_txq_mq_start; + ifp->if_qflush = vtnet_qflush; +#else + struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq; + ifp->if_start = vtnet_start; + IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1); + ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1; + IFQ_SET_READY(&ifp->if_snd); +#endif + + ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd, + vtnet_ifmedia_sts); + ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL); + ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE); + + /* Read (or generate) the MAC address for the adapter. */ + vtnet_get_hwaddr(sc); + + ether_ifattach(ifp, sc->vtnet_hwaddr); + + if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS)) + ifp->if_capabilities |= IFCAP_LINKSTATE; + + /* Tell the upper layer(s) we support long frames. */ + ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); + ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU; + + if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) { + ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6; + + if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) { + ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6; + sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; + } else { + if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4)) + ifp->if_capabilities |= IFCAP_TSO4; + if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) + ifp->if_capabilities |= IFCAP_TSO6; + if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN)) + sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; + } + + if (ifp->if_capabilities & IFCAP_TSO) + ifp->if_capabilities |= IFCAP_VLAN_HWTSO; + } + + if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) + ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6; + + if (ifp->if_capabilities & IFCAP_HWCSUM) { + /* + * VirtIO does not support VLAN tagging, but we can fake + * it by inserting and removing the 802.1Q header during + * transmit and receive. We are then able to do checksum + * offloading of VLAN frames. + */ + ifp->if_capabilities |= + IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; + } + + ifp->if_capenable = ifp->if_capabilities; + + /* + * Capabilities after here are not enabled by default. + */ + + if (ifp->if_capabilities & IFCAP_RXCSUM) { + if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) || + virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6)) + ifp->if_capabilities |= IFCAP_LRO; + } + + if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { + ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; + + sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config, + vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST); + sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, + vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); + } + + limit = vtnet_tunable_int(sc, "rx_process_limit", + vtnet_rx_process_limit); + if (limit < 0) + limit = INT_MAX; + sc->vtnet_rx_process_limit = limit; + + return (0); +} + +static int +vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu) +{ + struct ifnet *ifp; + int frame_size, clsize; + + ifp = sc->vtnet_ifp; + + if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU) + return (EINVAL); + + frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) + + new_mtu; + + /* + * Based on the new MTU (and hence frame size) determine which + * cluster size is most appropriate for the receive queues. + */ + if (frame_size <= MCLBYTES) { + clsize = MCLBYTES; + } else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { + /* Avoid going past 9K jumbos. */ + if (frame_size > MJUM9BYTES) + return (EINVAL); + clsize = MJUM9BYTES; + } else + clsize = MJUMPAGESIZE; + + ifp->if_mtu = new_mtu; + sc->vtnet_rx_new_clsize = clsize; + + if (ifp->if_drv_flags & IFF_DRV_RUNNING) { + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + vtnet_init_locked(sc); + } + + return (0); } static int @@ -771,22 +1058,19 @@ vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) sc = ifp->if_softc; ifr = (struct ifreq *) data; - reinit = 0; error = 0; switch (cmd) { case SIOCSIFMTU: - if (ifr->ifr_mtu < ETHERMIN || ifr->ifr_mtu > VTNET_MAX_MTU) - error = EINVAL; - else if (ifp->if_mtu != ifr->ifr_mtu) { - VTNET_LOCK(sc); + if (ifp->if_mtu != ifr->ifr_mtu) { + VTNET_CORE_LOCK(sc); error = vtnet_change_mtu(sc, ifr->ifr_mtu); - VTNET_UNLOCK(sc); + VTNET_CORE_UNLOCK(sc); } break; case SIOCSIFFLAGS: - VTNET_LOCK(sc); + VTNET_CORE_LOCK(sc); if ((ifp->if_flags & IFF_UP) == 0) { if (ifp->if_drv_flags & IFF_DRV_RUNNING) vtnet_stop(sc); @@ -803,16 +1087,17 @@ vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) if (error == 0) sc->vtnet_if_flags = ifp->if_flags; - VTNET_UNLOCK(sc); + VTNET_CORE_UNLOCK(sc); break; case SIOCADDMULTI: case SIOCDELMULTI: - VTNET_LOCK(sc); - if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) && - (ifp->if_drv_flags & IFF_DRV_RUNNING)) + if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) + break; + VTNET_CORE_LOCK(sc); + if (ifp->if_drv_flags & IFF_DRV_RUNNING) vtnet_rx_filter_mac(sc); - VTNET_UNLOCK(sc); + VTNET_CORE_UNLOCK(sc); break; case SIOCSIFMEDIA: @@ -821,68 +1106,36 @@ vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) break; case SIOCSIFCAP: + VTNET_CORE_LOCK(sc); mask = ifr->ifr_reqcap ^ ifp->if_capenable; -#ifdef DEVICE_POLLING - if (mask & IFCAP_POLLING) { - if (ifr->ifr_reqcap & IFCAP_POLLING) { - error = ether_poll_register(vtnet_poll, ifp); - if (error) - break; - - VTNET_LOCK(sc); - vtnet_disable_rx_intr(sc); - vtnet_disable_tx_intr(sc); - ifp->if_capenable |= IFCAP_POLLING; - VTNET_UNLOCK(sc); - } else { - error = ether_poll_deregister(ifp); - - /* Enable interrupts even in error case. */ - VTNET_LOCK(sc); - vtnet_enable_tx_intr(sc); - vtnet_enable_rx_intr(sc); - ifp->if_capenable &= ~IFCAP_POLLING; - VTNET_UNLOCK(sc); - } - } -#endif - VTNET_LOCK(sc); - - if (mask & IFCAP_TXCSUM) { + if (mask & IFCAP_TXCSUM) ifp->if_capenable ^= IFCAP_TXCSUM; - if (ifp->if_capenable & IFCAP_TXCSUM) - ifp->if_hwassist |= VTNET_CSUM_OFFLOAD; - else - ifp->if_hwassist &= ~VTNET_CSUM_OFFLOAD; - } - - if (mask & IFCAP_TSO4) { + if (mask & IFCAP_TXCSUM_IPV6) + ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; + if (mask & IFCAP_TSO4) ifp->if_capenable ^= IFCAP_TSO4; - if (ifp->if_capenable & IFCAP_TSO4) - ifp->if_hwassist |= CSUM_TSO; - else - ifp->if_hwassist &= ~CSUM_TSO; - } - - if (mask & IFCAP_RXCSUM) { - ifp->if_capenable ^= IFCAP_RXCSUM; - reinit = 1; - } + if (mask & IFCAP_TSO6) + ifp->if_capenable ^= IFCAP_TSO6; - if (mask & IFCAP_LRO) { - ifp->if_capenable ^= IFCAP_LRO; + if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO | + IFCAP_VLAN_HWFILTER)) { + /* These Rx features require us to renegotiate. */ reinit = 1; - } - if (mask & IFCAP_VLAN_HWFILTER) { - ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; - reinit = 1; - } + if (mask & IFCAP_RXCSUM) + ifp->if_capenable ^= IFCAP_RXCSUM; + if (mask & IFCAP_RXCSUM_IPV6) + ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; + if (mask & IFCAP_LRO) + ifp->if_capenable ^= IFCAP_LRO; + if (mask & IFCAP_VLAN_HWFILTER) + ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; + } else + reinit = 0; if (mask & IFCAP_VLAN_HWTSO) ifp->if_capenable ^= IFCAP_VLAN_HWTSO; - if (mask & IFCAP_VLAN_HWTAGGING) ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; @@ -890,9 +1143,10 @@ vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) ifp->if_drv_flags &= ~IFF_DRV_RUNNING; vtnet_init_locked(sc); } + + VTNET_CORE_UNLOCK(sc); VLAN_CAPABILITIES(ifp); - VTNET_UNLOCK(sc); break; default: @@ -900,80 +1154,32 @@ vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) break; } - VTNET_LOCK_ASSERT_NOTOWNED(sc); + VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc); return (error); } static int -vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu) -{ - struct ifnet *ifp; - int new_frame_size, clsize; - - ifp = sc->vtnet_ifp; - - if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { - new_frame_size = sizeof(struct vtnet_rx_header) + - sizeof(struct ether_vlan_header) + new_mtu; - - if (new_frame_size > MJUM9BYTES) - return (EINVAL); - - if (new_frame_size <= MCLBYTES) - clsize = MCLBYTES; - else - clsize = MJUM9BYTES; - } else { - new_frame_size = sizeof(struct virtio_net_hdr_mrg_rxbuf) + - sizeof(struct ether_vlan_header) + new_mtu; - - if (new_frame_size <= MCLBYTES) - clsize = MCLBYTES; - else - clsize = MJUMPAGESIZE; - } - - sc->vtnet_rx_mbuf_size = clsize; - sc->vtnet_rx_mbuf_count = VTNET_NEEDED_RX_MBUFS(sc); - KASSERT(sc->vtnet_rx_mbuf_count < VTNET_MAX_RX_SEGS, - ("too many rx mbufs: %d", sc->vtnet_rx_mbuf_count)); - - ifp->if_mtu = new_mtu; - - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - ifp->if_drv_flags &= ~IFF_DRV_RUNNING; - vtnet_init_locked(sc); - } - - return (0); -} - -static int -vtnet_init_rx_vq(struct vtnet_softc *sc) +vtnet_rxq_populate(struct vtnet_rxq *rxq) { struct virtqueue *vq; int nbufs, error; - vq = sc->vtnet_rx_vq; - nbufs = 0; + vq = rxq->vtnrx_vq; error = ENOSPC; - while (!virtqueue_full(vq)) { - if ((error = vtnet_newbuf(sc)) != 0) + for (nbufs = 0; !virtqueue_full(vq); nbufs++) { + error = vtnet_rxq_new_buf(rxq); + if (error) break; - nbufs++; } if (nbufs > 0) { virtqueue_notify(vq); - /* * EMSGSIZE signifies the virtqueue did not have enough * entries available to hold the last mbuf. This is not - * an error. We should not get ENOSPC since we check if - * the virtqueue is full before attempting to add a - * buffer. + * an error. */ if (error == EMSGSIZE) error = 0; @@ -983,107 +1189,49 @@ vtnet_init_rx_vq(struct vtnet_softc *sc) } static void -vtnet_free_rx_mbufs(struct vtnet_softc *sc) +vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq) { struct virtqueue *vq; struct mbuf *m; int last; - vq = sc->vtnet_rx_vq; + vq = rxq->vtnrx_vq; last = 0; while ((m = virtqueue_drain(vq, &last)) != NULL) m_freem(m); - KASSERT(virtqueue_empty(vq), ("mbufs remaining in Rx Vq")); + KASSERT(virtqueue_empty(vq), + ("%s: mbufs remaining in rx queue %p", __func__, rxq)); } -static void -vtnet_free_tx_mbufs(struct vtnet_softc *sc) +static struct mbuf * +vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp) { - struct virtqueue *vq; - struct vtnet_tx_header *txhdr; - int last; + struct mbuf *m_head, *m_tail, *m; + int i, clsize; - vq = sc->vtnet_tx_vq; - last = 0; + clsize = sc->vtnet_rx_clsize; - while ((txhdr = virtqueue_drain(vq, &last)) != NULL) { - m_freem(txhdr->vth_mbuf); - uma_zfree(vtnet_tx_header_zone, txhdr); - } + KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG, + ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs)); - KASSERT(virtqueue_empty(vq), ("mbufs remaining in Tx Vq")); -} + m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize); + if (m_head == NULL) + goto fail; -static void -vtnet_free_ctrl_vq(struct vtnet_softc *sc) -{ + m_head->m_len = clsize; + m_tail = m_head; - /* - * The control virtqueue is only polled, therefore - * it should already be empty. - */ - KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq), - ("Ctrl Vq not empty")); -} + /* Allocate the rest of the chain. */ + for (i = 1; i < nbufs; i++) { + m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize); + if (m == NULL) + goto fail; -#ifdef DEVICE_POLLING -static int -vtnet_poll(struct ifnet *ifp, enum poll_cmd cmd, int count) -{ - struct vtnet_softc *sc; - int rx_done; - - sc = ifp->if_softc; - rx_done = 0; - - VTNET_LOCK(sc); - if (ifp->if_drv_flags & IFF_DRV_RUNNING) { - if (cmd == POLL_AND_CHECK_STATUS) - vtnet_update_link_status(sc); - - if (virtqueue_nused(sc->vtnet_rx_vq) > 0) - vtnet_rxeof(sc, count, &rx_done); - - vtnet_txeof(sc); - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - vtnet_start_locked(ifp); - } - VTNET_UNLOCK(sc); - - return (rx_done); -} -#endif /* DEVICE_POLLING */ - -static struct mbuf * -vtnet_alloc_rxbuf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp) -{ - struct mbuf *m_head, *m_tail, *m; - int i, clsize; - - clsize = sc->vtnet_rx_mbuf_size; - - m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize); - if (m_head == NULL) - goto fail; - - m_head->m_len = clsize; - m_tail = m_head; - - if (nbufs > 1) { - KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG, - ("chained Rx mbuf requested without LRO_NOMRG")); - - for (i = 1; i < nbufs; i++) { - m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize); - if (m == NULL) - goto fail; - - m->m_len = clsize; - m_tail->m_next = m; - m_tail = m; - } + m->m_len = clsize; + m_tail->m_next = m; + m_tail = m; } if (m_tailp != NULL) @@ -1098,43 +1246,48 @@ fail: return (NULL); } +/* + * Slow path for when LRO without mergeable buffers is negotiated. + */ static int -vtnet_replace_rxbuf(struct vtnet_softc *sc, struct mbuf *m0, int len0) +vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0, + int len0) { + struct vtnet_softc *sc; struct mbuf *m, *m_prev; struct mbuf *m_new, *m_tail; int len, clsize, nreplace, error; - m = m0; - m_prev = NULL; - len = len0; + sc = rxq->vtnrx_sc; + clsize = sc->vtnet_rx_clsize; + m_prev = NULL; m_tail = NULL; - clsize = sc->vtnet_rx_mbuf_size; nreplace = 0; - KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || - m->m_next == NULL, ("chained Rx mbuf without LRO_NOMRG")); + m = m0; + len = len0; /* - * Since LRO_NOMRG mbuf chains are so large, we want to avoid - * allocating an entire chain for each received frame. When - * the received frame's length is less than that of the chain, - * the unused mbufs are reassigned to the new chain. + * Since these mbuf chains are so large, we avoid allocating an + * entire replacement chain if possible. When the received frame + * did not consume the entire chain, the unused mbufs are moved + * to the replacement chain. */ while (len > 0) { /* - * Something is seriously wrong if we received - * a frame larger than the mbuf chain. Drop it. + * Something is seriously wrong if we received a frame + * larger than the chain. Drop it. */ if (m == NULL) { sc->vtnet_stats.rx_frame_too_large++; return (EMSGSIZE); } + /* We always allocate the same cluster size. */ KASSERT(m->m_len == clsize, - ("mbuf length not expected cluster size: %d", - m->m_len)); + ("%s: mbuf size %d is not the cluster size %d", + __func__, m->m_len, clsize)); m->m_len = MIN(m->m_len, len); len -= m->m_len; @@ -1144,27 +1297,26 @@ vtnet_replace_rxbuf(struct vtnet_softc *sc, struct mbuf *m0, int len0) nreplace++; } - KASSERT(m_prev != NULL, ("m_prev == NULL")); - KASSERT(nreplace <= sc->vtnet_rx_mbuf_count, - ("too many replacement mbufs: %d/%d", nreplace, - sc->vtnet_rx_mbuf_count)); + KASSERT(nreplace <= sc->vtnet_rx_nmbufs, + ("%s: too many replacement mbufs %d max %d", __func__, nreplace, + sc->vtnet_rx_nmbufs)); - m_new = vtnet_alloc_rxbuf(sc, nreplace, &m_tail); + m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail); if (m_new == NULL) { m_prev->m_len = clsize; return (ENOBUFS); } /* - * Move unused mbufs, if any, from the original chain - * onto the end of the new chain. + * Move any unused mbufs from the received chain onto the end + * of the new chain. */ if (m_prev->m_next != NULL) { m_tail->m_next = m_prev->m_next; m_prev->m_next = NULL; } - error = vtnet_enqueue_rxbuf(sc, m_new); + error = vtnet_rxq_enqueue_buf(rxq, m_new); if (error) { /* * BAD! We could not enqueue the replacement mbuf chain. We @@ -1189,343 +1341,321 @@ vtnet_replace_rxbuf(struct vtnet_softc *sc, struct mbuf *m0, int len0) } static int -vtnet_newbuf(struct vtnet_softc *sc) +vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len) { - struct mbuf *m; + struct vtnet_softc *sc; + struct mbuf *m_new; int error; - m = vtnet_alloc_rxbuf(sc, sc->vtnet_rx_mbuf_count, NULL); - if (m == NULL) - return (ENOBUFS); - - error = vtnet_enqueue_rxbuf(sc, m); - if (error) - m_freem(m); - - return (error); -} + sc = rxq->vtnrx_sc; -static void -vtnet_discard_merged_rxbuf(struct vtnet_softc *sc, int nbufs) -{ - struct virtqueue *vq; - struct mbuf *m; + KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL, + ("%s: chained mbuf without LRO_NOMRG", __func__)); - vq = sc->vtnet_rx_vq; + if (m->m_next == NULL) { + /* Fast-path for the common case of just one mbuf. */ + if (m->m_len < len) + return (EINVAL); - while (--nbufs > 0) { - if ((m = virtqueue_dequeue(vq, NULL)) == NULL) - break; - vtnet_discard_rxbuf(sc, m); - } -} + m_new = vtnet_rx_alloc_buf(sc, 1, NULL); + if (m_new == NULL) + return (ENOBUFS); -static void -vtnet_discard_rxbuf(struct vtnet_softc *sc, struct mbuf *m) -{ - int error; + error = vtnet_rxq_enqueue_buf(rxq, m_new); + if (error) { + /* + * The new mbuf is suppose to be an identical + * copy of the one just dequeued so this is an + * unexpected error. + */ + m_freem(m_new); + sc->vtnet_stats.rx_enq_replacement_failed++; + } else + m->m_len = len; + } else + error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len); - /* - * Requeue the discarded mbuf. This should always be - * successful since it was just dequeued. - */ - error = vtnet_enqueue_rxbuf(sc, m); - KASSERT(error == 0, ("cannot requeue discarded mbuf")); + return (error); } static int -vtnet_enqueue_rxbuf(struct vtnet_softc *sc, struct mbuf *m) +vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m) { - struct sglist sg; - struct sglist_seg segs[VTNET_MAX_RX_SEGS]; + struct vtnet_softc *sc; + struct sglist *sg; struct vtnet_rx_header *rxhdr; - struct virtio_net_hdr *hdr; uint8_t *mdata; int offset, error; - VTNET_LOCK_ASSERT(sc); - KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || - m->m_next == NULL, ("chained Rx mbuf without LRO_NOMRG")); - - sglist_init(&sg, VTNET_MAX_RX_SEGS, segs); - + sc = rxq->vtnrx_sc; + sg = rxq->vtnrx_sg; mdata = mtod(m, uint8_t *); - offset = 0; + VTNET_RXQ_LOCK_ASSERT(rxq); + KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL, + ("%s: chained mbuf without LRO_NOMRG", __func__)); + KASSERT(m->m_len == sc->vtnet_rx_clsize, + ("%s: unexpected cluster size %d/%d", __func__, m->m_len, + sc->vtnet_rx_clsize)); + + sglist_reset(sg); if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { + MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr)); rxhdr = (struct vtnet_rx_header *) mdata; - hdr = &rxhdr->vrh_hdr; - offset += sizeof(struct vtnet_rx_header); - - error = sglist_append(&sg, hdr, sc->vtnet_hdr_size); - KASSERT(error == 0, ("cannot add header to sglist")); - } - - error = sglist_append(&sg, mdata + offset, m->m_len - offset); - if (error) - return (error); + sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size); + offset = sizeof(struct vtnet_rx_header); + } else + offset = 0; + sglist_append(sg, mdata + offset, m->m_len - offset); if (m->m_next != NULL) { - error = sglist_append_mbuf(&sg, m->m_next); - if (error) - return (error); + error = sglist_append_mbuf(sg, m->m_next); + MPASS(error == 0); } - return (virtqueue_enqueue(sc->vtnet_rx_vq, m, &sg, 0, sg.sg_nseg)); + error = virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg); + + return (error); } -static void -vtnet_vlan_tag_remove(struct mbuf *m) +static int +vtnet_rxq_new_buf(struct vtnet_rxq *rxq) { - struct ether_vlan_header *evl; + struct vtnet_softc *sc; + struct mbuf *m; + int error; - evl = mtod(m, struct ether_vlan_header *); + sc = rxq->vtnrx_sc; - m->m_pkthdr.ether_vtag = ntohs(evl->evl_tag); - m->m_flags |= M_VLANTAG; + m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL); + if (m == NULL) + return (ENOBUFS); - /* Strip the 802.1Q header. */ - bcopy((char *) evl, (char *) evl + ETHER_VLAN_ENCAP_LEN, - ETHER_HDR_LEN - ETHER_TYPE_LEN); - m_adj(m, ETHER_VLAN_ENCAP_LEN); + error = vtnet_rxq_enqueue_buf(rxq, m); + if (error) + m_freem(m); + + return (error); } -#ifdef notyet +/* + * Use the checksum offset in the VirtIO header to set the + * correct CSUM_* flags. + */ static int -vtnet_rx_csum(struct vtnet_softc *sc, struct mbuf *m, - struct virtio_net_hdr *hdr) +vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m, + uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr) { - struct ether_header *eh; - struct ether_vlan_header *evh; - struct ip *ip; - struct ip6_hdr *ip6; - struct udphdr *udp; - int ip_offset, csum_start, csum_offset, hlen; - uint16_t eth_type; - uint8_t ip_proto; - - /* - * Convert the VirtIO checksum interface to FreeBSD's interface. - * The host only provides us with the offset at which to start - * checksumming, and the offset from that to place the completed - * checksum. While this maps well with how Linux does checksums, - * for FreeBSD, we must parse the received packet in order to set - * the appropriate CSUM_* flags. - */ - - /* - * Every mbuf added to the receive virtqueue is always at least - * MCLBYTES big, so assume something is amiss if the first mbuf - * does not contain both the Ethernet and protocol headers. - */ - ip_offset = sizeof(struct ether_header); - if (m->m_len < ip_offset) - return (1); + struct vtnet_softc *sc; +#if defined(INET) || defined(INET6) + int offset = hdr->csum_start + hdr->csum_offset; +#endif - eh = mtod(m, struct ether_header *); - eth_type = ntohs(eh->ether_type); - if (eth_type == ETHERTYPE_VLAN) { - ip_offset = sizeof(struct ether_vlan_header); - if (m->m_len < ip_offset) - return (1); - evh = mtod(m, struct ether_vlan_header *); - eth_type = ntohs(evh->evl_proto); - } + sc = rxq->vtnrx_sc; + /* Only do a basic sanity check on the offset. */ switch (eth_type) { +#if defined(INET) case ETHERTYPE_IP: - if (m->m_len < ip_offset + sizeof(struct ip)) - return (1); - - ip = (struct ip *)(mtod(m, uint8_t *) + ip_offset); - /* Sanity check the IP header. */ - if (ip->ip_v != IPVERSION) - return (1); - hlen = ip->ip_hl << 2; - if (hlen < sizeof(struct ip)) - return (1); - if (ntohs(ip->ip_len) < hlen) - return (1); - if (ntohs(ip->ip_len) != (m->m_pkthdr.len - ip_offset)) + if (__predict_false(offset < ip_start + sizeof(struct ip))) return (1); - - ip_proto = ip->ip_p; - csum_start = ip_offset + hlen; break; - +#endif +#if defined(INET6) case ETHERTYPE_IPV6: - if (m->m_len < ip_offset + sizeof(struct ip6_hdr)) + if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr))) return (1); - - /* - * XXX FreeBSD does not handle any IPv6 checksum offloading - * at the moment. - */ - - ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + ip_offset); - /* XXX Assume no extension headers are present. */ - ip_proto = ip6->ip6_nxt; - csum_start = ip_offset + sizeof(struct ip6_hdr); break; - +#endif default: sc->vtnet_stats.rx_csum_bad_ethtype++; return (1); } - /* Assume checksum begins right after the IP header. */ - if (hdr->csum_start != csum_start) { - sc->vtnet_stats.rx_csum_bad_start++; - return (1); - } - - switch (ip_proto) { - case IPPROTO_TCP: - csum_offset = offsetof(struct tcphdr, th_sum); - break; - - case IPPROTO_UDP: - csum_offset = offsetof(struct udphdr, uh_sum); + /* + * Use the offset to determine the appropriate CSUM_* flags. This is + * a bit dirty, but we can get by with it since the checksum offsets + * happen to be different. We assume the host host does not do IPv4 + * header checksum offloading. + */ + switch (hdr->csum_offset) { + case offsetof(struct udphdr, uh_sum): + case offsetof(struct tcphdr, th_sum): + m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + m->m_pkthdr.csum_data = 0xFFFF; break; - - case IPPROTO_SCTP: - csum_offset = offsetof(struct sctphdr, checksum); + case offsetof(struct sctphdr, checksum): + m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; break; - default: - sc->vtnet_stats.rx_csum_bad_ipproto++; - return (1); - } - - if (hdr->csum_offset != csum_offset) { sc->vtnet_stats.rx_csum_bad_offset++; return (1); } - /* - * The IP header checksum is almost certainly valid but I'm - * uncertain if that is guaranteed. - * - * m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED | CSUM_IP_VALID; - */ + return (0); +} - switch (ip_proto) { - case IPPROTO_UDP: - if (m->m_len < csum_start + sizeof(struct udphdr)) - return (1); +static int +vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m, + uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr) +{ + struct vtnet_softc *sc; + int offset, proto; - udp = (struct udphdr *)(mtod(m, uint8_t *) + csum_start); - if (udp->uh_sum == 0) - return (0); + sc = rxq->vtnrx_sc; - /* FALLTHROUGH */ + switch (eth_type) { +#if defined(INET) + case ETHERTYPE_IP: { + struct ip *ip; + if (__predict_false(m->m_len < ip_start + sizeof(struct ip))) + return (1); + ip = (struct ip *)(m->m_data + ip_start); + proto = ip->ip_p; + offset = ip_start + (ip->ip_hl << 2); + break; + } +#endif +#if defined(INET6) + case ETHERTYPE_IPV6: + if (__predict_false(m->m_len < ip_start + + sizeof(struct ip6_hdr))) + return (1); + offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto); + if (__predict_false(offset < 0)) + return (1); + break; +#endif + default: + sc->vtnet_stats.rx_csum_bad_ethtype++; + return (1); + } + switch (proto) { case IPPROTO_TCP: + if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) + return (1); + m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; + m->m_pkthdr.csum_data = 0xFFFF; + break; + case IPPROTO_UDP: + if (__predict_false(m->m_len < offset + sizeof(struct udphdr))) + return (1); m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; m->m_pkthdr.csum_data = 0xFFFF; break; - case IPPROTO_SCTP: + if (__predict_false(m->m_len < offset + sizeof(struct sctphdr))) + return (1); m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; break; + default: + /* + * For the remaining protocols, FreeBSD does not support + * checksum offloading, so the checksum will be recomputed. + */ +#if 0 + if_printf(sc->vtnet_ifp, "cksum offload of unsupported " + "protocol eth_type=%#x proto=%d csum_start=%d " + "csum_offset=%d\n", __func__, eth_type, proto, + hdr->csum_start, hdr->csum_offset); +#endif + break; } - sc->vtnet_stats.rx_csum_offloaded++; - return (0); } -#endif /* - * Alternative method of doing receive checksum offloading. Rather - * than parsing the received frame down to the IP header, use the - * csum_offset to determine which CSUM_* flags are appropriate. We - * can get by with doing this only because the checksum offsets are - * unique for the things we care about. + * Set the appropriate CSUM_* flags. Unfortunately, the information + * provided is not directly useful to us. The VirtIO header gives the + * offset of the checksum, which is all Linux needs, but this is not + * how FreeBSD does things. We are forced to peek inside the packet + * a bit. + * + * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD + * could accept the offsets and let the stack figure it out. */ static int -vtnet_rx_csum(struct vtnet_softc *sc, struct mbuf *m, +vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m, struct virtio_net_hdr *hdr) { struct ether_header *eh; struct ether_vlan_header *evh; - struct udphdr *udp; - int csum_len; uint16_t eth_type; - - csum_len = hdr->csum_start + hdr->csum_offset; - - if (csum_len < sizeof(struct ether_header) + sizeof(struct ip)) - return (1); - if (m->m_len < csum_len) - return (1); + int offset, error; eh = mtod(m, struct ether_header *); eth_type = ntohs(eh->ether_type); if (eth_type == ETHERTYPE_VLAN) { + /* BMV: We should handle nested VLAN tags too. */ evh = mtod(m, struct ether_vlan_header *); eth_type = ntohs(evh->evl_proto); - } - - if (eth_type != ETHERTYPE_IP && eth_type != ETHERTYPE_IPV6) { - sc->vtnet_stats.rx_csum_bad_ethtype++; - return (1); - } - - /* Use the offset to determine the appropriate CSUM_* flags. */ - switch (hdr->csum_offset) { - case offsetof(struct udphdr, uh_sum): - if (m->m_len < hdr->csum_start + sizeof(struct udphdr)) - return (1); - udp = (struct udphdr *)(mtod(m, uint8_t *) + hdr->csum_start); - if (udp->uh_sum == 0) - return (0); + offset = sizeof(struct ether_vlan_header); + } else + offset = sizeof(struct ether_header); - /* FALLTHROUGH */ + if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) + error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr); + else + error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr); - case offsetof(struct tcphdr, th_sum): - m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; - m->m_pkthdr.csum_data = 0xFFFF; - break; + return (error); +} - case offsetof(struct sctphdr, checksum): - m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; - break; +static void +vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs) +{ + struct mbuf *m; - default: - sc->vtnet_stats.rx_csum_bad_offset++; - return (1); + while (--nbufs > 0) { + m = virtqueue_dequeue(rxq->vtnrx_vq, NULL); + if (m == NULL) + break; + vtnet_rxq_discard_buf(rxq, m); } +} - sc->vtnet_stats.rx_csum_offloaded++; +static void +vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m) +{ + int error; - return (0); + /* + * Requeue the discarded mbuf. This should always be successful + * since it was just dequeued. + */ + error = vtnet_rxq_enqueue_buf(rxq, m); + KASSERT(error == 0, + ("%s: cannot requeue discarded mbuf %d", __func__, error)); } static int -vtnet_rxeof_merged(struct vtnet_softc *sc, struct mbuf *m_head, int nbufs) +vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs) { + struct vtnet_softc *sc; struct ifnet *ifp; struct virtqueue *vq; struct mbuf *m, *m_tail; int len; + sc = rxq->vtnrx_sc; + vq = rxq->vtnrx_vq; ifp = sc->vtnet_ifp; - vq = sc->vtnet_rx_vq; m_tail = m_head; while (--nbufs > 0) { m = virtqueue_dequeue(vq, &len); if (m == NULL) { - ifp->if_ierrors++; + rxq->vtnrx_stats.vrxs_ierrors++; goto fail; } - if (vtnet_newbuf(sc) != 0) { - ifp->if_iqdrops++; - vtnet_discard_rxbuf(sc, m); + if (vtnet_rxq_new_buf(rxq) != 0) { + rxq->vtnrx_stats.vrxs_iqdrops++; + vtnet_rxq_discard_buf(rxq, m); if (nbufs > 1) - vtnet_discard_merged_rxbuf(sc, nbufs); + vtnet_rxq_discard_merged_bufs(rxq, nbufs); goto fail; } @@ -1549,35 +1679,83 @@ fail: return (1); } -static int -vtnet_rxeof(struct vtnet_softc *sc, int count, int *rx_npktsp) +static void +vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m, + struct virtio_net_hdr *hdr) { - struct virtio_net_hdr lhdr; + struct vtnet_softc *sc; struct ifnet *ifp; - struct virtqueue *vq; - struct mbuf *m; struct ether_header *eh; - struct virtio_net_hdr *hdr; - struct virtio_net_hdr_mrg_rxbuf *mhdr; - int len, deq, nbufs, adjsz, rx_npkts; + sc = rxq->vtnrx_sc; ifp = sc->vtnet_ifp; - vq = sc->vtnet_rx_vq; - hdr = &lhdr; - deq = 0; - rx_npkts = 0; - VTNET_LOCK_ASSERT(sc); + if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { + eh = mtod(m, struct ether_header *); + if (eh->ether_type == htons(ETHERTYPE_VLAN)) { + vtnet_vlan_tag_remove(m); + /* + * With the 802.1Q header removed, update the + * checksum starting location accordingly. + */ + if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) + hdr->csum_start -= ETHER_VLAN_ENCAP_LEN; + } + } - while (--count >= 0) { - m = virtqueue_dequeue(vq, &len); - if (m == NULL) + m->m_pkthdr.flowid = rxq->vtnrx_id; + m->m_flags |= M_FLOWID; + + /* + * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum + * distinction that Linux does. Need to reevaluate if performing + * offloading for the NEEDS_CSUM case is really appropriate. + */ + if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM | + VIRTIO_NET_HDR_F_DATA_VALID)) { + if (vtnet_rxq_csum(rxq, m, hdr) == 0) + rxq->vtnrx_stats.vrxs_csum++; + else + rxq->vtnrx_stats.vrxs_csum_failed++; + } + + rxq->vtnrx_stats.vrxs_ipackets++; + rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len; + + VTNET_RXQ_UNLOCK(rxq); + (*ifp->if_input)(ifp, m); + VTNET_RXQ_LOCK(rxq); +} + +static int +vtnet_rxq_eof(struct vtnet_rxq *rxq) +{ + struct virtio_net_hdr lhdr, *hdr; + struct vtnet_softc *sc; + struct ifnet *ifp; + struct virtqueue *vq; + struct mbuf *m; + struct virtio_net_hdr_mrg_rxbuf *mhdr; + int len, deq, nbufs, adjsz, count; + + sc = rxq->vtnrx_sc; + vq = rxq->vtnrx_vq; + ifp = sc->vtnet_ifp; + hdr = &lhdr; + deq = 0; + count = sc->vtnet_rx_process_limit; + + VTNET_RXQ_LOCK_ASSERT(rxq); + + while (count-- > 0) { + m = virtqueue_dequeue(vq, &len); + if (m == NULL) break; deq++; if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) { - ifp->if_ierrors++; - vtnet_discard_rxbuf(sc, m); + rxq->vtnrx_stats.vrxs_ierrors++; + vtnet_rxq_discard_buf(rxq, m); continue; } @@ -1585,8 +1763,8 @@ vtnet_rxeof(struct vtnet_softc *sc, int count, int *rx_npktsp) nbufs = 1; adjsz = sizeof(struct vtnet_rx_header); /* - * Account for our pad between the header and - * the actual start of the frame. + * Account for our pad inserted between the header + * and the actual start of the frame. */ len += VTNET_RX_HEADER_PAD; } else { @@ -1595,11 +1773,11 @@ vtnet_rxeof(struct vtnet_softc *sc, int count, int *rx_npktsp) adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf); } - if (vtnet_replace_rxbuf(sc, m, len) != 0) { - ifp->if_iqdrops++; - vtnet_discard_rxbuf(sc, m); + if (vtnet_rxq_replace_buf(rxq, m, len) != 0) { + rxq->vtnrx_stats.vrxs_iqdrops++; + vtnet_rxq_discard_buf(rxq, m); if (nbufs > 1) - vtnet_discard_merged_rxbuf(sc, nbufs); + vtnet_rxq_discard_merged_bufs(rxq, nbufs); continue; } @@ -1608,51 +1786,26 @@ vtnet_rxeof(struct vtnet_softc *sc, int count, int *rx_npktsp) m->m_pkthdr.csum_flags = 0; if (nbufs > 1) { - if (vtnet_rxeof_merged(sc, m, nbufs) != 0) + /* Dequeue the rest of chain. */ + if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0) continue; } - ifp->if_ipackets++; - /* * Save copy of header before we strip it. For both mergeable - * and non-mergeable, the VirtIO header is placed first in the - * mbuf's data. We no longer need num_buffers, so always use a - * virtio_net_hdr. + * and non-mergeable, the header is at the beginning of the + * mbuf data. We no longer need num_buffers, so always use a + * regular header. + * + * BMV: Is this memcpy() expensive? We know the mbuf data is + * still valid even after the m_adj(). */ memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr)); m_adj(m, adjsz); - if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { - eh = mtod(m, struct ether_header *); - if (eh->ether_type == htons(ETHERTYPE_VLAN)) { - vtnet_vlan_tag_remove(m); - - /* - * With the 802.1Q header removed, update the - * checksum starting location accordingly. - */ - if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) - hdr->csum_start -= - ETHER_VLAN_ENCAP_LEN; - } - } + vtnet_rxq_input(rxq, m, hdr); - if (ifp->if_capenable & IFCAP_RXCSUM && - hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) { - if (vtnet_rx_csum(sc, m, hdr) != 0) - sc->vtnet_stats.rx_csum_failed++; - } - - VTNET_UNLOCK(sc); - rx_npkts++; - (*ifp->if_input)(ifp, m); - VTNET_LOCK(sc); - - /* - * The interface may have been stopped while we were - * passing the packet up the network stack. - */ + /* Must recheck after dropping the Rx lock. */ if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) break; } @@ -1660,232 +1813,297 @@ vtnet_rxeof(struct vtnet_softc *sc, int count, int *rx_npktsp) if (deq > 0) virtqueue_notify(vq); - if (rx_npktsp != NULL) - *rx_npktsp = rx_npkts; - return (count > 0 ? 0 : EAGAIN); } static void -vtnet_rx_vq_intr(void *xsc) +vtnet_rx_vq_intr(void *xrxq) { struct vtnet_softc *sc; + struct vtnet_rxq *rxq; struct ifnet *ifp; - int more; + int tries, more; - sc = xsc; + rxq = xrxq; + sc = rxq->vtnrx_sc; ifp = sc->vtnet_ifp; + tries = 0; -again: - VTNET_LOCK(sc); + if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) { + /* + * Ignore this interrupt. Either this is a spurious interrupt + * or multiqueue without per-VQ MSIX so every queue needs to + * be polled (a brain dead configuration we could try harder + * to avoid). + */ + vtnet_rxq_disable_intr(rxq); + return; + } + + VTNET_RXQ_LOCK(rxq); -#ifdef DEVICE_POLLING - if (ifp->if_capenable & IFCAP_POLLING) { - VTNET_UNLOCK(sc); +again: + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { + VTNET_RXQ_UNLOCK(rxq); return; } -#endif + + more = vtnet_rxq_eof(rxq); + if (more || vtnet_rxq_enable_intr(rxq) != 0) { + if (!more) + vtnet_rxq_disable_intr(rxq); + /* + * This is an occasional condition or race (when !more), + * so retry a few times before scheduling the taskqueue. + */ + if (tries++ < VTNET_INTR_DISABLE_RETRIES) + goto again; + + VTNET_RXQ_UNLOCK(rxq); + rxq->vtnrx_stats.vrxs_rescheduled++; + taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); + } else + VTNET_RXQ_UNLOCK(rxq); +} + +static void +vtnet_rxq_tq_intr(void *xrxq, int pending) +{ + struct vtnet_softc *sc; + struct vtnet_rxq *rxq; + struct ifnet *ifp; + int more; + + rxq = xrxq; + sc = rxq->vtnrx_sc; + ifp = sc->vtnet_ifp; + + VTNET_RXQ_LOCK(rxq); if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { - vtnet_enable_rx_intr(sc); - VTNET_UNLOCK(sc); + VTNET_RXQ_UNLOCK(rxq); return; } - more = vtnet_rxeof(sc, sc->vtnet_rx_process_limit, NULL); - if (more || vtnet_enable_rx_intr(sc) != 0) { + more = vtnet_rxq_eof(rxq); + if (more || vtnet_rxq_enable_intr(rxq) != 0) { if (!more) - vtnet_disable_rx_intr(sc); - sc->vtnet_stats.rx_task_rescheduled++; - VTNET_UNLOCK(sc); - goto again; + vtnet_rxq_disable_intr(rxq); + rxq->vtnrx_stats.vrxs_rescheduled++; + taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); } - VTNET_UNLOCK(sc); + VTNET_RXQ_UNLOCK(rxq); } static void -vtnet_txeof(struct vtnet_softc *sc) +vtnet_txq_free_mbufs(struct vtnet_txq *txq) { struct virtqueue *vq; - struct ifnet *ifp; struct vtnet_tx_header *txhdr; - int deq; - - vq = sc->vtnet_tx_vq; - ifp = sc->vtnet_ifp; - deq = 0; + int last; - VTNET_LOCK_ASSERT(sc); + vq = txq->vtntx_vq; + last = 0; - while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) { - deq++; - ifp->if_opackets++; + while ((txhdr = virtqueue_drain(vq, &last)) != NULL) { m_freem(txhdr->vth_mbuf); uma_zfree(vtnet_tx_header_zone, txhdr); } - if (deq > 0) { - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; - if (virtqueue_empty(vq)) - sc->vtnet_watchdog_timer = 0; - } + KASSERT(virtqueue_empty(vq), + ("%s: mbufs remaining in tx queue %p", __func__, txq)); } -static struct mbuf * -vtnet_tx_offload(struct vtnet_softc *sc, struct mbuf *m, - struct virtio_net_hdr *hdr) +/* + * BMV: Much of this can go away once we finally have offsets in + * the mbuf packet header. Bug andre@. + */ +static int +vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, + int *etype, int *proto, int *start) { - struct ifnet *ifp; - struct ether_header *eh; + struct vtnet_softc *sc; struct ether_vlan_header *evh; - struct ip *ip; - struct ip6_hdr *ip6; - struct tcphdr *tcp; - int ip_offset; - uint16_t eth_type, csum_start; - uint8_t ip_proto, gso_type; + int offset; - ifp = sc->vtnet_ifp; + sc = txq->vtntx_sc; - ip_offset = sizeof(struct ether_header); - if (m->m_len < ip_offset) { - if ((m = m_pullup(m, ip_offset)) == NULL) - return (NULL); + evh = mtod(m, struct ether_vlan_header *); + if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { + /* BMV: We should handle nested VLAN tags too. */ + *etype = ntohs(evh->evl_proto); + offset = sizeof(struct ether_vlan_header); + } else { + *etype = ntohs(evh->evl_encap_proto); + offset = sizeof(struct ether_header); + } + + switch (*etype) { +#if defined(INET) + case ETHERTYPE_IP: { + struct ip *ip, iphdr; + if (__predict_false(m->m_len < offset + sizeof(struct ip))) { + m_copydata(m, offset, sizeof(struct ip), + (caddr_t) &iphdr); + ip = &iphdr; + } else + ip = (struct ip *)(m->m_data + offset); + *proto = ip->ip_p; + *start = offset + (ip->ip_hl << 2); + break; } - - eh = mtod(m, struct ether_header *); - eth_type = ntohs(eh->ether_type); - if (eth_type == ETHERTYPE_VLAN) { - ip_offset = sizeof(struct ether_vlan_header); - if (m->m_len < ip_offset) { - if ((m = m_pullup(m, ip_offset)) == NULL) - return (NULL); - } - evh = mtod(m, struct ether_vlan_header *); - eth_type = ntohs(evh->evl_proto); +#endif +#if defined(INET6) + case ETHERTYPE_IPV6: + *proto = -1; + *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto); + /* Assert the network stack sent us a valid packet. */ + KASSERT(*start > offset, + ("%s: mbuf %p start %d offset %d proto %d", __func__, m, + *start, offset, *proto)); + break; +#endif + default: + sc->vtnet_stats.tx_csum_bad_ethtype++; + return (EINVAL); } - switch (eth_type) { - case ETHERTYPE_IP: - if (m->m_len < ip_offset + sizeof(struct ip)) { - m = m_pullup(m, ip_offset + sizeof(struct ip)); - if (m == NULL) - return (NULL); - } + return (0); +} - ip = (struct ip *)(mtod(m, uint8_t *) + ip_offset); - ip_proto = ip->ip_p; - csum_start = ip_offset + (ip->ip_hl << 2); - gso_type = VIRTIO_NET_HDR_GSO_TCPV4; - break; +static int +vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type, + int offset, struct virtio_net_hdr *hdr) +{ + static struct timeval lastecn; + static int curecn; + struct vtnet_softc *sc; + struct tcphdr *tcp, tcphdr; - case ETHERTYPE_IPV6: - if (m->m_len < ip_offset + sizeof(struct ip6_hdr)) { - m = m_pullup(m, ip_offset + sizeof(struct ip6_hdr)); - if (m == NULL) - return (NULL); - } + sc = txq->vtntx_sc; + + if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) { + m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr); + tcp = &tcphdr; + } else + tcp = (struct tcphdr *)(m->m_data + offset); - ip6 = (struct ip6_hdr *)(mtod(m, uint8_t *) + ip_offset); + hdr->hdr_len = offset + (tcp->th_off << 2); + hdr->gso_size = m->m_pkthdr.tso_segsz; + hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 : + VIRTIO_NET_HDR_GSO_TCPV6; + + if (tcp->th_flags & TH_CWR) { /* - * XXX Assume no extension headers are present. Presently, - * this will always be true in the case of TSO, and FreeBSD - * does not perform checksum offloading of IPv6 yet. + * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD, + * ECN support is not on a per-interface basis, but globally via + * the net.inet.tcp.ecn.enable sysctl knob. The default is off. */ - ip_proto = ip6->ip6_nxt; - csum_start = ip_offset + sizeof(struct ip6_hdr); - gso_type = VIRTIO_NET_HDR_GSO_TCPV6; - break; - - default: - return (m); + if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) { + if (ppsratecheck(&lastecn, &curecn, 1)) + if_printf(sc->vtnet_ifp, + "TSO with ECN not negotiated with host\n"); + return (ENOTSUP); + } + hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; } - if (m->m_pkthdr.csum_flags & VTNET_CSUM_OFFLOAD) { - hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM; - hdr->csum_start = csum_start; - hdr->csum_offset = m->m_pkthdr.csum_data; + txq->vtntx_stats.vtxs_tso++; - sc->vtnet_stats.tx_csum_offloaded++; - } + return (0); +} - if (m->m_pkthdr.csum_flags & CSUM_TSO) { - if (ip_proto != IPPROTO_TCP) - return (m); +static struct mbuf * +vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m, + struct virtio_net_hdr *hdr) +{ + struct vtnet_softc *sc; + int flags, etype, csum_start, proto, error; - if (m->m_len < csum_start + sizeof(struct tcphdr)) { - m = m_pullup(m, csum_start + sizeof(struct tcphdr)); - if (m == NULL) - return (NULL); - } + sc = txq->vtntx_sc; + flags = m->m_pkthdr.csum_flags; - tcp = (struct tcphdr *)(mtod(m, uint8_t *) + csum_start); - hdr->gso_type = gso_type; - hdr->hdr_len = csum_start + (tcp->th_off << 2); - hdr->gso_size = m->m_pkthdr.tso_segsz; + error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start); + if (error) + goto drop; - if (tcp->th_flags & TH_CWR) { - /* - * Drop if we did not negotiate VIRTIO_NET_F_HOST_ECN. - * ECN support is only configurable globally with the - * net.inet.tcp.ecn.enable sysctl knob. - */ - if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) { - if_printf(ifp, "TSO with ECN not supported " - "by host\n"); - m_freem(m); - return (NULL); - } + if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) || + (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) { + /* + * We could compare the IP protocol vs the CSUM_ flag too, + * but that really should not be necessary. + */ + hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM; + hdr->csum_start = csum_start; + hdr->csum_offset = m->m_pkthdr.csum_data; + txq->vtntx_stats.vtxs_csum++; + } - hdr->flags |= VIRTIO_NET_HDR_GSO_ECN; + if (flags & CSUM_TSO) { + if (__predict_false(proto != IPPROTO_TCP)) { + /* Likely failed to correctly parse the mbuf. */ + sc->vtnet_stats.tx_tso_not_tcp++; + goto drop; } - sc->vtnet_stats.tx_tso_offloaded++; + KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM, + ("%s: mbuf %p TSO without checksum offload %#x", + __func__, m, flags)); + + error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr); + if (error) + goto drop; } return (m); + +drop: + m_freem(m); + return (NULL); } static int -vtnet_enqueue_txbuf(struct vtnet_softc *sc, struct mbuf **m_head, +vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head, struct vtnet_tx_header *txhdr) { - struct sglist sg; - struct sglist_seg segs[VTNET_MAX_TX_SEGS]; + struct vtnet_softc *sc; struct virtqueue *vq; + struct sglist *sg; struct mbuf *m; - int collapsed, error; + int error; - vq = sc->vtnet_tx_vq; + sc = txq->vtntx_sc; + vq = txq->vtntx_vq; + sg = txq->vtntx_sg; m = *m_head; - collapsed = 0; - sglist_init(&sg, VTNET_MAX_TX_SEGS, segs); - error = sglist_append(&sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size); - KASSERT(error == 0 && sg.sg_nseg == 1, - ("%s: cannot add header to sglist error %d", __func__, error)); + sglist_reset(sg); + error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size); + KASSERT(error == 0 && sg->sg_nseg == 1, + ("%s: error %d adding header to sglist", __func__, error)); -again: - error = sglist_append_mbuf(&sg, m); + error = sglist_append_mbuf(sg, m); if (error) { - if (collapsed) - goto fail; - - m = m_collapse(m, M_NOWAIT, VTNET_MAX_TX_SEGS - 1); + m = m_defrag(m, M_NOWAIT); if (m == NULL) goto fail; *m_head = m; - collapsed = 1; - goto again; + sc->vtnet_stats.tx_defragged++; + + error = sglist_append_mbuf(sg, m); + if (error) + goto fail; } txhdr->vth_mbuf = m; + error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0); - return (virtqueue_enqueue(vq, txhdr, &sg, sg.sg_nseg, 0)); + return (error); fail: + sc->vtnet_stats.tx_defrag_failed++; m_freem(*m_head); *m_head = NULL; @@ -1893,28 +2111,29 @@ fail: } static int -vtnet_encap(struct vtnet_softc *sc, struct mbuf **m_head) +vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head) { + struct vtnet_softc *sc; struct vtnet_tx_header *txhdr; struct virtio_net_hdr *hdr; struct mbuf *m; int error; + sc = txq->vtntx_sc; m = *m_head; M_ASSERTPKTHDR(m); txhdr = uma_zalloc(vtnet_tx_header_zone, M_NOWAIT | M_ZERO); if (txhdr == NULL) { - *m_head = NULL; m_freem(m); + *m_head = NULL; return (ENOMEM); } /* - * Always use the non-mergeable header to simplify things. When - * the mergeable feature is negotiated, the num_buffers field - * must be set to zero. We use vtnet_hdr_size later to enqueue - * the correct header size to the host. + * Always use the non-mergeable header, regardless if the feature + * was negotiated. For transmit, num_buffers is always zero. The + * vtnet_hdr_size is used to enqueue the correct header size. */ hdr = &txhdr->vth_uhdr.hdr; @@ -1927,140 +2146,597 @@ vtnet_encap(struct vtnet_softc *sc, struct mbuf **m_head) m->m_flags &= ~M_VLANTAG; } - if (m->m_pkthdr.csum_flags != 0) { - m = vtnet_tx_offload(sc, m, hdr); + if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) { + m = vtnet_txq_offload(txq, m, hdr); if ((*m_head = m) == NULL) { error = ENOBUFS; goto fail; } } - error = vtnet_enqueue_txbuf(sc, m_head, txhdr); + error = vtnet_txq_enqueue_buf(txq, m_head, txhdr); + if (error == 0) + return (0); + fail: - if (error) + uma_zfree(vtnet_tx_header_zone, txhdr); + + return (error); +} + +#ifdef VTNET_LEGACY_TX + +static void +vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp) +{ + struct vtnet_softc *sc; + struct virtqueue *vq; + struct mbuf *m0; + int enq; + + sc = txq->vtntx_sc; + vq = txq->vtntx_vq; + enq = 0; + + VTNET_TXQ_LOCK_ASSERT(txq); + + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || + sc->vtnet_link_active == 0) + return; + + vtnet_txq_eof(txq); + + while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { + if (virtqueue_full(vq)) + break; + + IFQ_DRV_DEQUEUE(&ifp->if_snd, m0); + if (m0 == NULL) + break; + + if (vtnet_txq_encap(txq, &m0) != 0) { + if (m0 != NULL) + IFQ_DRV_PREPEND(&ifp->if_snd, m0); + break; + } + + enq++; + ETHER_BPF_MTAP(ifp, m0); + } + + if (enq > 0) { + virtqueue_notify(vq); + txq->vtntx_watchdog = VTNET_TX_TIMEOUT; + } +} + +static void +vtnet_start(struct ifnet *ifp) +{ + struct vtnet_softc *sc; + struct vtnet_txq *txq; + + sc = ifp->if_softc; + txq = &sc->vtnet_txqs[0]; + + VTNET_TXQ_LOCK(txq); + vtnet_start_locked(txq, ifp); + VTNET_TXQ_UNLOCK(txq); +} + +#else /* !VTNET_LEGACY_TX */ + +static int +vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m) +{ + struct vtnet_softc *sc; + struct virtqueue *vq; + struct buf_ring *br; + struct ifnet *ifp; + int enq, error; + + sc = txq->vtntx_sc; + vq = txq->vtntx_vq; + br = txq->vtntx_br; + ifp = sc->vtnet_ifp; + enq = 0; + error = 0; + + VTNET_TXQ_LOCK_ASSERT(txq); + + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || + sc->vtnet_link_active == 0) { + if (m != NULL) + error = drbr_enqueue(ifp, br, m); + return (error); + } + + if (m != NULL) { + error = drbr_enqueue(ifp, br, m); + if (error) + return (error); + } + + vtnet_txq_eof(txq); + + while ((m = drbr_peek(ifp, br)) != NULL) { + if (virtqueue_full(vq)) { + drbr_putback(ifp, br, m); + error = ENOBUFS; + break; + } + + error = vtnet_txq_encap(txq, &m); + if (error) { + if (m != NULL) + drbr_putback(ifp, br, m); + else + drbr_advance(ifp, br); + break; + } + drbr_advance(ifp, br); + + enq++; + ETHER_BPF_MTAP(ifp, m); + } + + if (enq > 0) { + virtqueue_notify(vq); + txq->vtntx_watchdog = VTNET_TX_TIMEOUT; + } + + return (error); +} + +static int +vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m) +{ + struct vtnet_softc *sc; + struct vtnet_txq *txq; + int i, npairs, error; + + sc = ifp->if_softc; + npairs = sc->vtnet_act_vq_pairs; + + if (m->m_flags & M_FLOWID) + i = m->m_pkthdr.flowid % npairs; + else + i = curcpu % npairs; + + txq = &sc->vtnet_txqs[i]; + + if (VTNET_TXQ_TRYLOCK(txq) != 0) { + error = vtnet_txq_mq_start_locked(txq, m); + VTNET_TXQ_UNLOCK(txq); + } else { + error = drbr_enqueue(ifp, txq->vtntx_br, m); + taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask); + } + + return (error); +} + +static void +vtnet_txq_tq_deferred(void *xtxq, int pending) +{ + struct vtnet_softc *sc; + struct vtnet_txq *txq; + + txq = xtxq; + sc = txq->vtntx_sc; + + VTNET_TXQ_LOCK(txq); + if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br)) + vtnet_txq_mq_start_locked(txq, NULL); + VTNET_TXQ_UNLOCK(txq); +} + +#endif /* VTNET_LEGACY_TX */ + +static void +vtnet_txq_start(struct vtnet_txq *txq) +{ + struct vtnet_softc *sc; + struct ifnet *ifp; + + sc = txq->vtntx_sc; + ifp = sc->vtnet_ifp; + +#ifdef VTNET_LEGACY_TX + if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) + vtnet_start_locked(txq, ifp); +#else + if (!drbr_empty(ifp, txq->vtntx_br)) + vtnet_txq_mq_start_locked(txq, NULL); +#endif +} + +static void +vtnet_txq_tq_intr(void *xtxq, int pending) +{ + struct vtnet_softc *sc; + struct vtnet_txq *txq; + struct ifnet *ifp; + + txq = xtxq; + sc = txq->vtntx_sc; + ifp = sc->vtnet_ifp; + + VTNET_TXQ_LOCK(txq); + + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { + VTNET_TXQ_UNLOCK(txq); + return; + } + + vtnet_txq_eof(txq); + + vtnet_txq_start(txq); + + if (vtnet_txq_enable_intr(txq) != 0) { + vtnet_txq_disable_intr(txq); + txq->vtntx_stats.vtxs_rescheduled++; + taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); + } + + VTNET_TXQ_UNLOCK(txq); +} + +static void +vtnet_txq_eof(struct vtnet_txq *txq) +{ + struct virtqueue *vq; + struct vtnet_tx_header *txhdr; + struct mbuf *m; + + vq = txq->vtntx_vq; + VTNET_TXQ_LOCK_ASSERT(txq); + + while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) { + m = txhdr->vth_mbuf; + + txq->vtntx_stats.vtxs_opackets++; + txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len; + if (m->m_flags & M_MCAST) + txq->vtntx_stats.vtxs_omcasts++; + + m_freem(m); uma_zfree(vtnet_tx_header_zone, txhdr); + } + + if (virtqueue_empty(vq)) + txq->vtntx_watchdog = 0; +} + +static void +vtnet_tx_vq_intr(void *xtxq) +{ + struct vtnet_softc *sc; + struct vtnet_txq *txq; + struct ifnet *ifp; + int tries; + + txq = xtxq; + sc = txq->vtntx_sc; + ifp = sc->vtnet_ifp; + tries = 0; + + if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) { + /* + * Ignore this interrupt. Either this is a spurious interrupt + * or multiqueue without per-VQ MSIX so every queue needs to + * be polled (a brain dead configuration we could try harder + * to avoid). + */ + vtnet_txq_disable_intr(txq); + return; + } + + VTNET_TXQ_LOCK(txq); + +again: + if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { + VTNET_TXQ_UNLOCK(txq); + return; + } + + vtnet_txq_eof(txq); + + vtnet_txq_start(txq); + + if (vtnet_txq_enable_intr(txq) != 0) { + vtnet_txq_disable_intr(txq); + /* + * This is an occasional race, so retry a few times + * before scheduling the taskqueue. + */ + if (tries++ < VTNET_INTR_DISABLE_RETRIES) + goto again; + + VTNET_TXQ_UNLOCK(txq); + txq->vtntx_stats.vtxs_rescheduled++; + taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); + } else + VTNET_TXQ_UNLOCK(txq); +} + +static void +vtnet_tx_start_all(struct vtnet_softc *sc) +{ + struct vtnet_txq *txq; + int i; + + VTNET_CORE_LOCK_ASSERT(sc); + + for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { + txq = &sc->vtnet_txqs[i]; + + VTNET_TXQ_LOCK(txq); + vtnet_txq_start(txq); + VTNET_TXQ_UNLOCK(txq); + } +} + +#ifndef VTNET_LEGACY_TX +static void +vtnet_qflush(struct ifnet *ifp) +{ + struct vtnet_softc *sc; + struct vtnet_txq *txq; + struct mbuf *m; + int i; + + sc = ifp->if_softc; + + for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { + txq = &sc->vtnet_txqs[i]; + + VTNET_TXQ_LOCK(txq); + while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL) + m_freem(m); + VTNET_TXQ_UNLOCK(txq); + } - return (error); + if_qflush(ifp); } +#endif -static void -vtnet_start(struct ifnet *ifp) +static int +vtnet_watchdog(struct vtnet_txq *txq) { struct vtnet_softc *sc; - sc = ifp->if_softc; + sc = txq->vtntx_sc; + + VTNET_TXQ_LOCK(txq); + if (sc->vtnet_flags & VTNET_FLAG_EVENT_IDX) + vtnet_txq_eof(txq); + if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) { + VTNET_TXQ_UNLOCK(txq); + return (0); + } + VTNET_TXQ_UNLOCK(txq); - VTNET_LOCK(sc); - vtnet_start_locked(ifp); - VTNET_UNLOCK(sc); + if_printf(sc->vtnet_ifp, "watchdog timeout on queue %d\n", + txq->vtntx_id); + return (1); } static void -vtnet_start_locked(struct ifnet *ifp) +vtnet_rxq_accum_stats(struct vtnet_rxq *rxq, struct vtnet_rxq_stats *accum) { - struct vtnet_softc *sc; - struct virtqueue *vq; - struct mbuf *m0; - int enq; + struct vtnet_rxq_stats *st; - sc = ifp->if_softc; - vq = sc->vtnet_tx_vq; - enq = 0; + st = &rxq->vtnrx_stats; - VTNET_LOCK_ASSERT(sc); + accum->vrxs_ipackets += st->vrxs_ipackets; + accum->vrxs_ibytes += st->vrxs_ibytes; + accum->vrxs_iqdrops += st->vrxs_iqdrops; + accum->vrxs_csum += st->vrxs_csum; + accum->vrxs_csum_failed += st->vrxs_csum_failed; + accum->vrxs_rescheduled += st->vrxs_rescheduled; +} - if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != - IFF_DRV_RUNNING || ((sc->vtnet_flags & VTNET_FLAG_LINK) == 0)) - return; +static void +vtnet_txq_accum_stats(struct vtnet_txq *txq, struct vtnet_txq_stats *accum) +{ + struct vtnet_txq_stats *st; -#ifdef VTNET_TX_INTR_MODERATION - if (virtqueue_nused(vq) >= sc->vtnet_tx_size / 2) - vtnet_txeof(sc); -#endif + st = &txq->vtntx_stats; - while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { - if (virtqueue_full(vq)) { - ifp->if_drv_flags |= IFF_DRV_OACTIVE; - break; - } + accum->vtxs_opackets += st->vtxs_opackets; + accum->vtxs_obytes += st->vtxs_obytes; + accum->vtxs_csum += st->vtxs_csum; + accum->vtxs_tso += st->vtxs_tso; + accum->vtxs_rescheduled += st->vtxs_rescheduled; +} - IFQ_DRV_DEQUEUE(&ifp->if_snd, m0); - if (m0 == NULL) - break; +static void +vtnet_accumulate_stats(struct vtnet_softc *sc) +{ + struct ifnet *ifp; + struct vtnet_statistics *st; + struct vtnet_rxq_stats rxaccum; + struct vtnet_txq_stats txaccum; + int i; - if (vtnet_encap(sc, &m0) != 0) { - if (m0 == NULL) - break; - IFQ_DRV_PREPEND(&ifp->if_snd, m0); - ifp->if_drv_flags |= IFF_DRV_OACTIVE; - break; - } + ifp = sc->vtnet_ifp; + st = &sc->vtnet_stats; + bzero(&rxaccum, sizeof(struct vtnet_rxq_stats)); + bzero(&txaccum, sizeof(struct vtnet_txq_stats)); - enq++; - ETHER_BPF_MTAP(ifp, m0); + for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { + vtnet_rxq_accum_stats(&sc->vtnet_rxqs[i], &rxaccum); + vtnet_txq_accum_stats(&sc->vtnet_txqs[i], &txaccum); } - if (enq > 0) { - virtqueue_notify(vq); - sc->vtnet_watchdog_timer = VTNET_WATCHDOG_TIMEOUT; - } + st->rx_csum_offloaded = rxaccum.vrxs_csum; + st->rx_csum_failed = rxaccum.vrxs_csum_failed; + st->rx_task_rescheduled = rxaccum.vrxs_rescheduled; + st->tx_csum_offloaded = txaccum.vtxs_csum; + st->tx_tso_offloaded = txaccum.vtxs_tso; + st->tx_task_rescheduled = txaccum.vtxs_rescheduled; + + /* + * With the exception of if_ierrors, these ifnet statistics are + * only updated in the driver, so just set them to our accumulated + * values. if_ierrors is updated in ether_input() for malformed + * frames that we should have already discarded. + */ + ifp->if_ipackets = rxaccum.vrxs_ipackets; + ifp->if_iqdrops = rxaccum.vrxs_iqdrops; + ifp->if_ierrors = rxaccum.vrxs_ierrors; + ifp->if_opackets = txaccum.vtxs_opackets; +#ifndef VTNET_LEGACY_TX + ifp->if_obytes = txaccum.vtxs_obytes; + ifp->if_omcasts = txaccum.vtxs_omcasts; +#endif } static void vtnet_tick(void *xsc) { struct vtnet_softc *sc; + struct ifnet *ifp; + int i, timedout; sc = xsc; + ifp = sc->vtnet_ifp; + timedout = 0; - VTNET_LOCK_ASSERT(sc); -#ifdef VTNET_DEBUG - virtqueue_dump(sc->vtnet_rx_vq); - virtqueue_dump(sc->vtnet_tx_vq); -#endif + VTNET_CORE_LOCK_ASSERT(sc); + vtnet_accumulate_stats(sc); - vtnet_watchdog(sc); - callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc); + for (i = 0; i < sc->vtnet_act_vq_pairs; i++) + timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]); + + if (timedout != 0) { + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + vtnet_init_locked(sc); + } else + callout_schedule(&sc->vtnet_tick_ch, hz); } static void -vtnet_tx_vq_intr(void *xsc) +vtnet_start_taskqueues(struct vtnet_softc *sc) { - struct vtnet_softc *sc; - struct ifnet *ifp; + device_t dev; + struct vtnet_rxq *rxq; + struct vtnet_txq *txq; + int i, error; - sc = xsc; - ifp = sc->vtnet_ifp; + dev = sc->vtnet_dev; -again: - VTNET_LOCK(sc); + /* + * Errors here are very difficult to recover from - we cannot + * easily fail because, if this is during boot, we will hang + * when freeing any successfully started taskqueues because + * the scheduler isn't up yet. + * + * Most drivers just ignore the return value - it only fails + * with ENOMEM so an error is not likely. + */ + for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { + rxq = &sc->vtnet_rxqs[i]; + error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET, + "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id); + if (error) { + device_printf(dev, "failed to start rx taskq %d\n", + rxq->vtnrx_id); + } -#ifdef DEVICE_POLLING - if (ifp->if_capenable & IFCAP_POLLING) { - VTNET_UNLOCK(sc); - return; + txq = &sc->vtnet_txqs[i]; + error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET, + "%s txq %d", device_get_nameunit(dev), txq->vtntx_id); + if (error) { + device_printf(dev, "failed to start tx taskq %d\n", + txq->vtntx_id); + } } -#endif +} - if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { - vtnet_enable_tx_intr(sc); - VTNET_UNLOCK(sc); - return; +static void +vtnet_free_taskqueues(struct vtnet_softc *sc) +{ + struct vtnet_rxq *rxq; + struct vtnet_txq *txq; + int i; + + for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { + rxq = &sc->vtnet_rxqs[i]; + if (rxq->vtnrx_tq != NULL) { + taskqueue_free(rxq->vtnrx_tq); + rxq->vtnrx_vq = NULL; + } + + txq = &sc->vtnet_txqs[i]; + if (txq->vtntx_tq != NULL) { + taskqueue_free(txq->vtntx_tq); + txq->vtntx_tq = NULL; + } + } +} + +static void +vtnet_drain_taskqueues(struct vtnet_softc *sc) +{ + struct vtnet_rxq *rxq; + struct vtnet_txq *txq; + int i; + + for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { + rxq = &sc->vtnet_rxqs[i]; + if (rxq->vtnrx_tq != NULL) + taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); + + txq = &sc->vtnet_txqs[i]; + if (txq->vtntx_tq != NULL) { + taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask); +#ifndef VTNET_LEGACY_TX + taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask); +#endif + } } +} - vtnet_txeof(sc); +static void +vtnet_drain_rxtx_queues(struct vtnet_softc *sc) +{ + struct vtnet_rxq *rxq; + struct vtnet_txq *txq; + int i; - if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) - vtnet_start_locked(ifp); + for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { + rxq = &sc->vtnet_rxqs[i]; + vtnet_rxq_free_mbufs(rxq); - if (vtnet_enable_tx_intr(sc) != 0) { - vtnet_disable_tx_intr(sc); - sc->vtnet_stats.tx_task_rescheduled++; - VTNET_UNLOCK(sc); - goto again; + txq = &sc->vtnet_txqs[i]; + vtnet_txq_free_mbufs(txq); } +} + +static void +vtnet_stop_rendezvous(struct vtnet_softc *sc) +{ + struct vtnet_rxq *rxq; + struct vtnet_txq *txq; + int i; + + /* + * Lock and unlock the per-queue mutex so we known the stop + * state is visible. Doing only the active queues should be + * sufficient, but it does not cost much extra to do all the + * queues. Note we hold the core mutex here too. + */ + for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { + rxq = &sc->vtnet_rxqs[i]; + VTNET_RXQ_LOCK(rxq); + VTNET_RXQ_UNLOCK(rxq); - VTNET_UNLOCK(sc); + txq = &sc->vtnet_txqs[i]; + VTNET_TXQ_LOCK(txq); + VTNET_TXQ_UNLOCK(txq); + } } static void @@ -2072,46 +2748,60 @@ vtnet_stop(struct vtnet_softc *sc) dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; - VTNET_LOCK_ASSERT(sc); + VTNET_CORE_LOCK_ASSERT(sc); - sc->vtnet_watchdog_timer = 0; + ifp->if_drv_flags &= ~IFF_DRV_RUNNING; + sc->vtnet_link_active = 0; callout_stop(&sc->vtnet_tick_ch); - ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE); - vtnet_disable_rx_intr(sc); - vtnet_disable_tx_intr(sc); + /* Only advisory. */ + vtnet_disable_interrupts(sc); /* - * Stop the host VirtIO adapter. Note this will reset the host - * adapter's state back to the pre-initialized state, so in - * order to make the device usable again, we must drive it - * through virtio_reinit() and virtio_reinit_complete(). + * Stop the host adapter. This resets it to the pre-initialized + * state. It will not generate any interrupts until after it is + * reinitialized. */ virtio_stop(dev); + vtnet_stop_rendezvous(sc); - sc->vtnet_flags &= ~VTNET_FLAG_LINK; - - vtnet_free_rx_mbufs(sc); - vtnet_free_tx_mbufs(sc); + /* Free any mbufs left in the virtqueues. */ + vtnet_drain_rxtx_queues(sc); } static int -vtnet_reinit(struct vtnet_softc *sc) +vtnet_virtio_reinit(struct vtnet_softc *sc) { + device_t dev; struct ifnet *ifp; uint64_t features; + int mask, error; + dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; features = sc->vtnet_features; + mask = 0; +#if defined(INET) + mask |= IFCAP_RXCSUM; +#endif +#if defined (INET6) + mask |= IFCAP_RXCSUM_IPV6; +#endif + /* * Re-negotiate with the host, removing any disabled receive * features. Transmit features are disabled only on our side * via if_capenable and if_hwassist. */ - if (ifp->if_capabilities & IFCAP_RXCSUM) { - if ((ifp->if_capenable & IFCAP_RXCSUM) == 0) + if (ifp->if_capabilities & mask) { + /* + * We require both IPv4 and IPv6 offloading to be enabled + * in order to negotiated it: VirtIO does not distinguish + * between the two. + */ + if ((ifp->if_capenable & mask) != mask) features &= ~VIRTIO_NET_F_GUEST_CSUM; } @@ -2125,86 +2815,206 @@ vtnet_reinit(struct vtnet_softc *sc) features &= ~VIRTIO_NET_F_CTRL_VLAN; } - return (virtio_reinit(sc->vtnet_dev, features)); + error = virtio_reinit(dev, features); + if (error) + device_printf(dev, "virtio reinit error %d\n", error); + + return (error); } static void -vtnet_init_locked(struct vtnet_softc *sc) +vtnet_init_rx_filters(struct vtnet_softc *sc) { - device_t dev; struct ifnet *ifp; - int error; - dev = sc->vtnet_dev; ifp = sc->vtnet_ifp; - VTNET_LOCK_ASSERT(sc); + if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { + /* Restore promiscuous and all-multicast modes. */ + vtnet_rx_filter(sc); + /* Restore filtered MAC addresses. */ + vtnet_rx_filter_mac(sc); + } - if (ifp->if_drv_flags & IFF_DRV_RUNNING) + if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) + vtnet_rx_filter_vlan(sc); +} + +static int +vtnet_init_rx_queues(struct vtnet_softc *sc) +{ + device_t dev; + struct vtnet_rxq *rxq; + int i, clsize, error; + + dev = sc->vtnet_dev; + + /* + * Use the new cluster size if one has been set (via a MTU + * change). Otherwise, use the standard 2K clusters. + * + * BMV: It might make sense to use page sized clusters as + * the default (depending on the features negotiated). + */ + if (sc->vtnet_rx_new_clsize != 0) { + clsize = sc->vtnet_rx_new_clsize; + sc->vtnet_rx_new_clsize = 0; + } else + clsize = MCLBYTES; + + sc->vtnet_rx_clsize = clsize; + sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize); + + KASSERT(sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS || + sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs, + ("%s: too many rx mbufs %d for %d segments", __func__, + sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs)); + + for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { + rxq = &sc->vtnet_rxqs[i]; + + /* Hold the lock to satisfy asserts. */ + VTNET_RXQ_LOCK(rxq); + error = vtnet_rxq_populate(rxq); + VTNET_RXQ_UNLOCK(rxq); + + if (error) { + device_printf(dev, + "cannot allocate mbufs for Rx queue %d\n", i); + return (error); + } + } + + return (0); +} + +static int +vtnet_init_tx_queues(struct vtnet_softc *sc) +{ + struct vtnet_txq *txq; + int i; + + for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { + txq = &sc->vtnet_txqs[i]; + txq->vtntx_watchdog = 0; + } + + return (0); +} + +static int +vtnet_init_rxtx_queues(struct vtnet_softc *sc) +{ + int error; + + error = vtnet_init_rx_queues(sc); + if (error) + return (error); + + error = vtnet_init_tx_queues(sc); + if (error) + return (error); + + return (0); +} + +static void +vtnet_set_active_vq_pairs(struct vtnet_softc *sc) +{ + device_t dev; + int npairs; + + dev = sc->vtnet_dev; + + if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) { + MPASS(sc->vtnet_max_vq_pairs == 1); + sc->vtnet_act_vq_pairs = 1; return; + } - /* Stop host's adapter, cancel any pending I/O. */ - vtnet_stop(sc); + /* BMV: Just use the maximum configured for now. */ + npairs = sc->vtnet_max_vq_pairs; - /* Reinitialize the host device. */ - error = vtnet_reinit(sc); - if (error) { + if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) { device_printf(dev, - "reinitialization failed, stopping device...\n"); - vtnet_stop(sc); - return; + "cannot set active queue pairs to %d\n", npairs); + npairs = 1; } - /* Update host with assigned MAC address. */ + sc->vtnet_act_vq_pairs = npairs; +} + +static int +vtnet_reinit(struct vtnet_softc *sc) +{ + device_t dev; + struct ifnet *ifp; + int error; + + dev = sc->vtnet_dev; + ifp = sc->vtnet_ifp; + + /* Use the current MAC address. */ bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN); vtnet_set_hwaddr(sc); + vtnet_set_active_vq_pairs(sc); + ifp->if_hwassist = 0; if (ifp->if_capenable & IFCAP_TXCSUM) ifp->if_hwassist |= VTNET_CSUM_OFFLOAD; + if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) + ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6; if (ifp->if_capenable & IFCAP_TSO4) ifp->if_hwassist |= CSUM_TSO; + if (ifp->if_capenable & IFCAP_TSO6) + ifp->if_hwassist |= CSUM_TSO; /* No CSUM_TSO_IPV6. */ - error = vtnet_init_rx_vq(sc); - if (error) { - device_printf(dev, - "cannot allocate mbufs for Rx virtqueue\n"); - vtnet_stop(sc); - return; - } + if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) + vtnet_init_rx_filters(sc); - if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { - if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { - /* Restore promiscuous and all-multicast modes. */ - vtnet_rx_filter(sc); + error = vtnet_init_rxtx_queues(sc); + if (error) + return (error); - /* Restore filtered MAC addresses. */ - vtnet_rx_filter_mac(sc); - } + vtnet_enable_interrupts(sc); + ifp->if_drv_flags |= IFF_DRV_RUNNING; - /* Restore VLAN filters. */ - if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) - vtnet_rx_filter_vlan(sc); - } + return (0); +} -#ifdef DEVICE_POLLING - if (ifp->if_capenable & IFCAP_POLLING) { - vtnet_disable_rx_intr(sc); - vtnet_disable_tx_intr(sc); - } else -#endif - { - vtnet_enable_rx_intr(sc); - vtnet_enable_tx_intr(sc); - } +static void +vtnet_init_locked(struct vtnet_softc *sc) +{ + device_t dev; + struct ifnet *ifp; - ifp->if_drv_flags |= IFF_DRV_RUNNING; - ifp->if_drv_flags &= ~IFF_DRV_OACTIVE; + dev = sc->vtnet_dev; + ifp = sc->vtnet_ifp; + + VTNET_CORE_LOCK_ASSERT(sc); + + if (ifp->if_drv_flags & IFF_DRV_RUNNING) + return; + + vtnet_stop(sc); + + /* Reinitialize with the host. */ + if (vtnet_virtio_reinit(sc) != 0) + goto fail; + + if (vtnet_reinit(sc) != 0) + goto fail; virtio_reinit_complete(dev); vtnet_update_link_status(sc); callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc); + + return; + +fail: + vtnet_stop(sc); } static void @@ -2214,9 +3024,24 @@ vtnet_init(void *xsc) sc = xsc; - VTNET_LOCK(sc); + VTNET_CORE_LOCK(sc); vtnet_init_locked(sc); - VTNET_UNLOCK(sc); + VTNET_CORE_UNLOCK(sc); +} + +static void +vtnet_free_ctrl_vq(struct vtnet_softc *sc) +{ + struct virtqueue *vq; + + vq = sc->vtnet_ctrl_vq; + + /* + * The control virtqueue is only polled and therefore it should + * already be empty. + */ + KASSERT(virtqueue_empty(vq), + ("%s: ctrl vq %p not empty", __func__, vq)); } static void @@ -2224,87 +3049,117 @@ vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie, struct sglist *sg, int readable, int writable) { struct virtqueue *vq; - void *c; vq = sc->vtnet_ctrl_vq; - VTNET_LOCK_ASSERT(sc); + VTNET_CORE_LOCK_ASSERT(sc); KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ, - ("no control virtqueue")); - KASSERT(virtqueue_empty(vq), - ("control command already enqueued")); + ("%s: CTRL_VQ feature not negotiated", __func__)); + + if (!virtqueue_empty(vq)) + return; + if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0) + return; + + /* + * Poll for the response, but the command is likely already + * done when we return from the notify. + */ + virtqueue_notify(vq); + virtqueue_poll(vq, NULL); +} + +static int +vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr) +{ + struct virtio_net_ctrl_hdr hdr; + struct sglist_seg segs[3]; + struct sglist sg; + uint8_t ack; + int error; + + hdr.class = VIRTIO_NET_CTRL_MAC; + hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET; + ack = VIRTIO_NET_ERR; - if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0) - return; + sglist_init(&sg, 3, segs); + error = 0; + error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); + error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN); + error |= sglist_append(&sg, &ack, sizeof(uint8_t)); + KASSERT(error == 0 && sg.sg_nseg == 3, + ("%s: error %d adding set MAC msg to sglist", __func__, error)); - virtqueue_notify(vq); + vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); - /* - * Poll until the command is complete. Previously, we would - * sleep until the control virtqueue interrupt handler woke - * us up, but dropping the VTNET_MTX leads to serialization - * difficulties. - * - * Furthermore, it appears QEMU/KVM only allocates three MSIX - * vectors. Two of those vectors are needed for the Rx and Tx - * virtqueues. We do not support sharing both a Vq and config - * changed notification on the same MSIX vector. - */ - c = virtqueue_poll(vq, NULL); - KASSERT(c == cookie, ("unexpected control command response")); + return (ack == VIRTIO_NET_OK ? 0 : EIO); } -static void -vtnet_rx_filter(struct vtnet_softc *sc) +static int +vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs) { - device_t dev; - struct ifnet *ifp; + struct sglist_seg segs[3]; + struct sglist sg; + struct { + struct virtio_net_ctrl_hdr hdr; + uint8_t pad1; + struct virtio_net_ctrl_mq mq; + uint8_t pad2; + uint8_t ack; + } s; + int error; - dev = sc->vtnet_dev; - ifp = sc->vtnet_ifp; + s.hdr.class = VIRTIO_NET_CTRL_MQ; + s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; + s.mq.virtqueue_pairs = npairs; + s.ack = VIRTIO_NET_ERR; - VTNET_LOCK_ASSERT(sc); - KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX, - ("CTRL_RX feature not negotiated")); + sglist_init(&sg, 3, segs); + error = 0; + error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); + error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq)); + error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); + KASSERT(error == 0 && sg.sg_nseg == 3, + ("%s: error %d adding MQ message to sglist", __func__, error)); - if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0) - device_printf(dev, "cannot %s promiscuous mode\n", - ifp->if_flags & IFF_PROMISC ? "enable" : "disable"); + vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); - if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0) - device_printf(dev, "cannot %s all-multicast mode\n", - ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable"); + return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } static int vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on) { - struct virtio_net_ctrl_hdr hdr; struct sglist_seg segs[3]; struct sglist sg; - uint8_t onoff, ack; + struct { + struct virtio_net_ctrl_hdr hdr; + uint8_t pad1; + uint8_t onoff; + uint8_t pad2; + uint8_t ack; + } s; int error; - if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) - return (ENOTSUP); - - error = 0; + KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX, + ("%s: CTRL_RX feature not negotiated", __func__)); - hdr.class = VIRTIO_NET_CTRL_RX; - hdr.cmd = cmd; - onoff = !!on; - ack = VIRTIO_NET_ERR; + s.hdr.class = VIRTIO_NET_CTRL_RX; + s.hdr.cmd = cmd; + s.onoff = !!on; + s.ack = VIRTIO_NET_ERR; sglist_init(&sg, 3, segs); - error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); - error |= sglist_append(&sg, &onoff, sizeof(uint8_t)); - error |= sglist_append(&sg, &ack, sizeof(uint8_t)); + error = 0; + error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); + error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t)); + error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 3, - ("error adding Rx filter message to sglist")); + ("%s: error %d adding Rx message to sglist", __func__, error)); - vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); + vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); - return (ack == VIRTIO_NET_OK ? 0 : EIO); + return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } static int @@ -2321,6 +3176,48 @@ vtnet_set_allmulti(struct vtnet_softc *sc, int on) return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on)); } +/* + * The device defaults to promiscuous mode for backwards compatibility. + * Turn it off at attach time if possible. + */ +static void +vtnet_attach_disable_promisc(struct vtnet_softc *sc) +{ + struct ifnet *ifp; + + ifp = sc->vtnet_ifp; + + VTNET_CORE_LOCK(sc); + if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) { + ifp->if_flags |= IFF_PROMISC; + } else if (vtnet_set_promisc(sc, 0) != 0) { + ifp->if_flags |= IFF_PROMISC; + device_printf(sc->vtnet_dev, + "cannot disable default promiscuous mode\n"); + } + VTNET_CORE_UNLOCK(sc); +} + +static void +vtnet_rx_filter(struct vtnet_softc *sc) +{ + device_t dev; + struct ifnet *ifp; + + dev = sc->vtnet_dev; + ifp = sc->vtnet_ifp; + + VTNET_CORE_LOCK_ASSERT(sc); + + if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0) + device_printf(dev, "cannot %s promiscuous mode\n", + ifp->if_flags & IFF_PROMISC ? "enable" : "disable"); + + if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0) + device_printf(dev, "cannot %s all-multicast mode\n", + ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable"); +} + static void vtnet_rx_filter_mac(struct vtnet_softc *sc) { @@ -2340,19 +3237,23 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc) mcnt = 0; promisc = 0; allmulti = 0; - error = 0; - VTNET_LOCK_ASSERT(sc); + VTNET_CORE_LOCK_ASSERT(sc); KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX, - ("CTRL_RX feature not negotiated")); + ("%s: CTRL_RX feature not negotiated", __func__)); /* Unicast MAC addresses: */ if_addr_rlock(ifp); TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_LINK) continue; - else if (ucnt == VTNET_MAX_MAC_ENTRIES) + else if (memcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr), + sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0) + continue; + else if (ucnt == VTNET_MAX_MAC_ENTRIES) { + promisc = 1; break; + } bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr), &filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN); @@ -2360,10 +3261,8 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc) } if_addr_runlock(ifp); - if (ucnt >= VTNET_MAX_MAC_ENTRIES) { - promisc = 1; + if (promisc != 0) { filter->vmf_unicast.nentries = 0; - if_printf(ifp, "more than %d MAC addresses assigned, " "falling back to promiscuous mode\n", VTNET_MAX_MAC_ENTRIES); @@ -2375,8 +3274,10 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc) TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; - else if (mcnt == VTNET_MAX_MAC_ENTRIES) + else if (mcnt == VTNET_MAX_MAC_ENTRIES) { + allmulti = 1; break; + } bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), &filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN); @@ -2384,17 +3285,15 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc) } if_maddr_runlock(ifp); - if (mcnt >= VTNET_MAX_MAC_ENTRIES) { - allmulti = 1; + if (allmulti != 0) { filter->vmf_multicast.nentries = 0; - if_printf(ifp, "more than %d multicast MAC addresses " "assigned, falling back to all-multicast mode\n", VTNET_MAX_MAC_ENTRIES); } else filter->vmf_multicast.nentries = mcnt; - if (promisc && allmulti) + if (promisc != 0 && allmulti != 0) goto out; hdr.class = VIRTIO_NET_CTRL_MAC; @@ -2402,6 +3301,7 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc) ack = VIRTIO_NET_ERR; sglist_init(&sg, 4, segs); + error = 0; error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); error |= sglist_append(&sg, &filter->vmf_unicast, sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN); @@ -2409,7 +3309,7 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc) sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN); error |= sglist_append(&sg, &ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 4, - ("error adding MAC filtering message to sglist")); + ("%s: error %d adding MAC filter msg to sglist", __func__, error)); vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); @@ -2417,111 +3317,99 @@ vtnet_rx_filter_mac(struct vtnet_softc *sc) if_printf(ifp, "error setting host MAC filter table\n"); out: - if (promisc) - if (vtnet_set_promisc(sc, 1) != 0) - if_printf(ifp, "cannot enable promiscuous mode\n"); - if (allmulti) - if (vtnet_set_allmulti(sc, 1) != 0) - if_printf(ifp, "cannot enable all-multicast mode\n"); + if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0) + if_printf(ifp, "cannot enable promiscuous mode\n"); + if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0) + if_printf(ifp, "cannot enable all-multicast mode\n"); } static int vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) { - struct virtio_net_ctrl_hdr hdr; struct sglist_seg segs[3]; struct sglist sg; - uint8_t ack; + struct { + struct virtio_net_ctrl_hdr hdr; + uint8_t pad1; + uint16_t tag; + uint8_t pad2; + uint8_t ack; + } s; int error; - hdr.class = VIRTIO_NET_CTRL_VLAN; - hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL; - ack = VIRTIO_NET_ERR; - error = 0; + s.hdr.class = VIRTIO_NET_CTRL_VLAN; + s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL; + s.tag = tag; + s.ack = VIRTIO_NET_ERR; sglist_init(&sg, 3, segs); - error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); - error |= sglist_append(&sg, &tag, sizeof(uint16_t)); - error |= sglist_append(&sg, &ack, sizeof(uint8_t)); + error = 0; + error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); + error |= sglist_append(&sg, &s.tag, sizeof(uint16_t)); + error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); KASSERT(error == 0 && sg.sg_nseg == 3, - ("error adding VLAN control message to sglist")); + ("%s: error %d adding VLAN message to sglist", __func__, error)); - vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); + vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); - return (ack == VIRTIO_NET_OK ? 0 : EIO); + return (s.ack == VIRTIO_NET_OK ? 0 : EIO); } static void vtnet_rx_filter_vlan(struct vtnet_softc *sc) { - device_t dev; - uint32_t w, mask; + uint32_t w; uint16_t tag; - int i, nvlans, error; + int i, bit; - VTNET_LOCK_ASSERT(sc); + VTNET_CORE_LOCK_ASSERT(sc); KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER, - ("VLAN_FILTER feature not negotiated")); + ("%s: VLAN_FILTER feature not negotiated", __func__)); - dev = sc->vtnet_dev; - nvlans = sc->vtnet_nvlans; - error = 0; + /* Enable the filter for each configured VLAN. */ + for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) { + w = sc->vtnet_vlan_filter[i]; + + while ((bit = ffs(w) - 1) != -1) { + w &= ~(1 << bit); + tag = sizeof(w) * CHAR_BIT * i + bit; - /* Enable filtering for each configured VLAN. */ - for (i = 0; i < VTNET_VLAN_SHADOW_SIZE && nvlans > 0; i++) { - w = sc->vtnet_vlan_shadow[i]; - for (mask = 1, tag = i * 32; w != 0; mask <<= 1, tag++) { - if ((w & mask) != 0) { - w &= ~mask; - nvlans--; - if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) - error++; + if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) { + device_printf(sc->vtnet_dev, + "cannot enable VLAN %d filter\n", tag); } } } - - KASSERT(nvlans == 0, ("VLAN count incorrect")); - if (error) - device_printf(dev, "cannot restore VLAN filter table\n"); } static void -vtnet_set_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) +vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) { struct ifnet *ifp; int idx, bit; - KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER, - ("VLAN_FILTER feature not negotiated")); - - if ((tag == 0) || (tag > 4095)) - return; - ifp = sc->vtnet_ifp; idx = (tag >> 5) & 0x7F; bit = tag & 0x1F; - VTNET_LOCK(sc); + if (tag == 0 || tag > 4095) + return; - /* Update shadow VLAN table. */ - if (add) { - sc->vtnet_nvlans++; - sc->vtnet_vlan_shadow[idx] |= (1 << bit); - } else { - sc->vtnet_nvlans--; - sc->vtnet_vlan_shadow[idx] &= ~(1 << bit); - } + VTNET_CORE_LOCK(sc); - if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) { - if (vtnet_exec_vlan_filter(sc, add, tag) != 0) { - device_printf(sc->vtnet_dev, - "cannot %s VLAN %d %s the host filter table\n", - add ? "add" : "remove", tag, - add ? "to" : "from"); - } + if (add) + sc->vtnet_vlan_filter[idx] |= (1 << bit); + else + sc->vtnet_vlan_filter[idx] &= ~(1 << bit); + + if (ifp->if_capenable & IFCAP_VLAN_HWFILTER && + vtnet_exec_vlan_filter(sc, add, tag) != 0) { + device_printf(sc->vtnet_dev, + "cannot %s VLAN %d %s the host filter table\n", + add ? "add" : "remove", tag, add ? "to" : "from"); } - VTNET_UNLOCK(sc); + VTNET_CORE_UNLOCK(sc); } static void @@ -2531,7 +3419,7 @@ vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag) if (ifp->if_softc != arg) return; - vtnet_set_vlan_filter(arg, 1, tag); + vtnet_update_vlan_filter(arg, 1, tag); } static void @@ -2541,7 +3429,47 @@ vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag) if (ifp->if_softc != arg) return; - vtnet_set_vlan_filter(arg, 0, tag); + vtnet_update_vlan_filter(arg, 0, tag); +} + +static int +vtnet_is_link_up(struct vtnet_softc *sc) +{ + device_t dev; + struct ifnet *ifp; + uint16_t status; + + dev = sc->vtnet_dev; + ifp = sc->vtnet_ifp; + + if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0) + status = VIRTIO_NET_S_LINK_UP; + else + status = virtio_read_dev_config_2(dev, + offsetof(struct virtio_net_config, status)); + + return ((status & VIRTIO_NET_S_LINK_UP) != 0); +} + +static void +vtnet_update_link_status(struct vtnet_softc *sc) +{ + struct ifnet *ifp; + int link; + + ifp = sc->vtnet_ifp; + + VTNET_CORE_LOCK_ASSERT(sc); + link = vtnet_is_link_up(sc); + + /* Notify if the link status has changed. */ + if (link != 0 && sc->vtnet_link_active == 0) { + sc->vtnet_link_active = 1; + if_link_state_change(ifp, LINK_STATE_UP); + } else if (link == 0 && sc->vtnet_link_active != 0) { + sc->vtnet_link_active = 0; + if_link_state_change(ifp, LINK_STATE_DOWN); + } } static int @@ -2569,112 +3497,344 @@ vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) ifmr->ifm_status = IFM_AVALID; ifmr->ifm_active = IFM_ETHER; - VTNET_LOCK(sc); + VTNET_CORE_LOCK(sc); if (vtnet_is_link_up(sc) != 0) { ifmr->ifm_status |= IFM_ACTIVE; ifmr->ifm_active |= VTNET_MEDIATYPE; } else ifmr->ifm_active |= IFM_NONE; - VTNET_UNLOCK(sc); + VTNET_CORE_UNLOCK(sc); } static void -vtnet_add_statistics(struct vtnet_softc *sc) +vtnet_set_hwaddr(struct vtnet_softc *sc) { device_t dev; - struct vtnet_statistics *stats; - struct sysctl_ctx_list *ctx; + int i; + + dev = sc->vtnet_dev; + + if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) { + if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0) + device_printf(dev, "unable to set MAC address\n"); + } else if (sc->vtnet_flags & VTNET_FLAG_MAC) { + for (i = 0; i < ETHER_ADDR_LEN; i++) { + virtio_write_dev_config_1(dev, + offsetof(struct virtio_net_config, mac) + i, + sc->vtnet_hwaddr[i]); + } + } +} + +static void +vtnet_get_hwaddr(struct vtnet_softc *sc) +{ + device_t dev; + int i; + + dev = sc->vtnet_dev; + + if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) { + /* + * Generate a random locally administered unicast address. + * + * It would be nice to generate the same MAC address across + * reboots, but it seems all the hosts currently available + * support the MAC feature, so this isn't too important. + */ + sc->vtnet_hwaddr[0] = 0xB2; + arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0); + vtnet_set_hwaddr(sc); + return; + } + + for (i = 0; i < ETHER_ADDR_LEN; i++) { + sc->vtnet_hwaddr[i] = virtio_read_dev_config_1(dev, + offsetof(struct virtio_net_config, mac) + i); + } +} + +static void +vtnet_vlan_tag_remove(struct mbuf *m) +{ + struct ether_vlan_header *evh; + + evh = mtod(m, struct ether_vlan_header *); + m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag); + m->m_flags |= M_VLANTAG; + + /* Strip the 802.1Q header. */ + bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN, + ETHER_HDR_LEN - ETHER_TYPE_LEN); + m_adj(m, ETHER_VLAN_ENCAP_LEN); +} + +static void +vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx, + struct sysctl_oid_list *child, struct vtnet_rxq *rxq) +{ + struct sysctl_oid *node; + struct sysctl_oid_list *list; + struct vtnet_rxq_stats *stats; + char namebuf[16]; + + snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id); + node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, + CTLFLAG_RD, NULL, "Receive Queue"); + list = SYSCTL_CHILDREN(node); + + stats = &rxq->vtnrx_stats; + + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD, + &stats->vrxs_ipackets, "Receive packets"); + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD, + &stats->vrxs_ibytes, "Receive bytes"); + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD, + &stats->vrxs_iqdrops, "Receive drops"); + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD, + &stats->vrxs_ierrors, "Receive errors"); + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, + &stats->vrxs_csum, "Receive checksum offloaded"); + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD, + &stats->vrxs_csum_failed, "Receive checksum offload failed"); + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, + &stats->vrxs_rescheduled, + "Receive interrupt handler rescheduled"); +} + +static void +vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx, + struct sysctl_oid_list *child, struct vtnet_txq *txq) +{ + struct sysctl_oid *node; + struct sysctl_oid_list *list; + struct vtnet_txq_stats *stats; + char namebuf[16]; + + snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id); + node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, + CTLFLAG_RD, NULL, "Transmit Queue"); + list = SYSCTL_CHILDREN(node); + + stats = &txq->vtntx_stats; + + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD, + &stats->vtxs_opackets, "Transmit packets"); + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD, + &stats->vtxs_obytes, "Transmit bytes"); + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD, + &stats->vtxs_omcasts, "Transmit multicasts"); + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, + &stats->vtxs_csum, "Transmit checksum offloaded"); + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD, + &stats->vtxs_tso, "Transmit segmentation offloaded"); + SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, + &stats->vtxs_rescheduled, + "Transmit interrupt handler rescheduled"); +} + +static void +vtnet_setup_queue_sysctl(struct vtnet_softc *sc) +{ + device_t dev; + struct sysctl_ctx_list *ctx; struct sysctl_oid *tree; struct sysctl_oid_list *child; + int i; dev = sc->vtnet_dev; - stats = &sc->vtnet_stats; ctx = device_get_sysctl_ctx(dev); tree = device_get_sysctl_tree(dev); child = SYSCTL_CHILDREN(tree); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "mbuf_alloc_failed", + for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { + vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]); + vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]); + } +} + +static void +vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx, + struct sysctl_oid_list *child, struct vtnet_softc *sc) +{ + struct vtnet_statistics *stats; + + stats = &sc->vtnet_stats; + + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed", CTLFLAG_RD, &stats->mbuf_alloc_failed, "Mbuf cluster allocation failures"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_frame_too_large", + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large", CTLFLAG_RD, &stats->rx_frame_too_large, "Received frame larger than the mbuf chain"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_enq_replacement_failed", + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed", CTLFLAG_RD, &stats->rx_enq_replacement_failed, "Enqueuing the replacement receive mbuf failed"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_mergeable_failed", + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed", CTLFLAG_RD, &stats->rx_mergeable_failed, "Mergeable buffers receive failures"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_ethtype", + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype", CTLFLAG_RD, &stats->rx_csum_bad_ethtype, "Received checksum offloaded buffer with unsupported " "Ethernet type"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_start", - CTLFLAG_RD, &stats->rx_csum_bad_start, - "Received checksum offloaded buffer with incorrect start offset"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_ipproto", + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto", CTLFLAG_RD, &stats->rx_csum_bad_ipproto, "Received checksum offloaded buffer with incorrect IP protocol"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_bad_offset", + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset", CTLFLAG_RD, &stats->rx_csum_bad_offset, "Received checksum offloaded buffer with incorrect offset"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_failed", + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto", + CTLFLAG_RD, &stats->rx_csum_bad_proto, + "Received checksum offloaded buffer with incorrect protocol"); + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed", CTLFLAG_RD, &stats->rx_csum_failed, "Received buffer checksum offload failed"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_csum_offloaded", + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded", CTLFLAG_RD, &stats->rx_csum_offloaded, "Received buffer checksum offload succeeded"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "rx_task_rescheduled", + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled", CTLFLAG_RD, &stats->rx_task_rescheduled, "Times the receive interrupt task rescheduled itself"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_csum_offloaded", - CTLFLAG_RD, &stats->tx_csum_offloaded, - "Offloaded checksum of transmitted buffer"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_tso_offloaded", - CTLFLAG_RD, &stats->tx_tso_offloaded, - "Segmentation offload of transmitted buffer"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_csum_bad_ethtype", + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype", CTLFLAG_RD, &stats->tx_csum_bad_ethtype, "Aborted transmit of checksum offloaded buffer with unknown " "Ethernet type"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_tso_bad_ethtype", + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype", CTLFLAG_RD, &stats->tx_tso_bad_ethtype, "Aborted transmit of TSO buffer with unknown Ethernet type"); - SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "tx_task_rescheduled", + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp", + CTLFLAG_RD, &stats->tx_tso_not_tcp, + "Aborted transmit of TSO buffer with non TCP protocol"); + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged", + CTLFLAG_RD, &stats->tx_defragged, + "Transmit mbufs defragged"); + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed", + CTLFLAG_RD, &stats->tx_defrag_failed, + "Aborted transmit of buffer because defrag failed"); + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded", + CTLFLAG_RD, &stats->tx_csum_offloaded, + "Offloaded checksum of transmitted buffer"); + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded", + CTLFLAG_RD, &stats->tx_tso_offloaded, + "Segmentation offload of transmitted buffer"); + SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled", CTLFLAG_RD, &stats->tx_task_rescheduled, "Times the transmit interrupt task rescheduled itself"); } +static void +vtnet_setup_sysctl(struct vtnet_softc *sc) +{ + device_t dev; + struct sysctl_ctx_list *ctx; + struct sysctl_oid *tree; + struct sysctl_oid_list *child; + + dev = sc->vtnet_dev; + ctx = device_get_sysctl_ctx(dev); + tree = device_get_sysctl_tree(dev); + child = SYSCTL_CHILDREN(tree); + + SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs", + CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0, + "Maximum number of supported virtqueue pairs"); + SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs", + CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0, + "Number of active virtqueue pairs"); + + vtnet_setup_stat_sysctl(ctx, child, sc); +} + static int -vtnet_enable_rx_intr(struct vtnet_softc *sc) +vtnet_rxq_enable_intr(struct vtnet_rxq *rxq) { - return (virtqueue_enable_intr(sc->vtnet_rx_vq)); + return (virtqueue_enable_intr(rxq->vtnrx_vq)); } static void -vtnet_disable_rx_intr(struct vtnet_softc *sc) +vtnet_rxq_disable_intr(struct vtnet_rxq *rxq) { - virtqueue_disable_intr(sc->vtnet_rx_vq); + virtqueue_disable_intr(rxq->vtnrx_vq); } static int -vtnet_enable_tx_intr(struct vtnet_softc *sc) +vtnet_txq_enable_intr(struct vtnet_txq *txq) { -#ifdef VTNET_TX_INTR_MODERATION - return (0); -#else - return (virtqueue_enable_intr(sc->vtnet_tx_vq)); -#endif + return (virtqueue_postpone_intr(txq->vtntx_vq, VQ_POSTPONE_LONG)); +} + +static void +vtnet_txq_disable_intr(struct vtnet_txq *txq) +{ + + virtqueue_disable_intr(txq->vtntx_vq); +} + +static void +vtnet_enable_rx_interrupts(struct vtnet_softc *sc) +{ + int i; + + for (i = 0; i < sc->vtnet_act_vq_pairs; i++) + vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]); +} + +static void +vtnet_enable_tx_interrupts(struct vtnet_softc *sc) +{ + int i; + + for (i = 0; i < sc->vtnet_act_vq_pairs; i++) + vtnet_txq_enable_intr(&sc->vtnet_txqs[i]); +} + +static void +vtnet_enable_interrupts(struct vtnet_softc *sc) +{ + + vtnet_enable_rx_interrupts(sc); + vtnet_enable_tx_interrupts(sc); } static void -vtnet_disable_tx_intr(struct vtnet_softc *sc) +vtnet_disable_rx_interrupts(struct vtnet_softc *sc) { + int i; + + for (i = 0; i < sc->vtnet_act_vq_pairs; i++) + vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]); +} + +static void +vtnet_disable_tx_interrupts(struct vtnet_softc *sc) +{ + int i; + + for (i = 0; i < sc->vtnet_act_vq_pairs; i++) + vtnet_txq_disable_intr(&sc->vtnet_txqs[i]); +} + +static void +vtnet_disable_interrupts(struct vtnet_softc *sc) +{ + + vtnet_disable_rx_interrupts(sc); + vtnet_disable_tx_interrupts(sc); +} + +static int +vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def) +{ + char path[64]; + + snprintf(path, sizeof(path), + "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob); + TUNABLE_INT_FETCH(path, &def); - virtqueue_disable_intr(sc->vtnet_tx_vq); + return (def); } diff --git a/sys/dev/virtio/network/if_vtnetvar.h b/sys/dev/virtio/network/if_vtnetvar.h index d870436e4..aa6d6341b 100644 --- a/sys/dev/virtio/network/if_vtnetvar.h +++ b/sys/dev/virtio/network/if_vtnetvar.h @@ -29,83 +29,171 @@ #ifndef _IF_VTNETVAR_H #define _IF_VTNETVAR_H +struct vtnet_softc; + struct vtnet_statistics { - unsigned long mbuf_alloc_failed; - - unsigned long rx_frame_too_large; - unsigned long rx_enq_replacement_failed; - unsigned long rx_mergeable_failed; - unsigned long rx_csum_bad_ethtype; - unsigned long rx_csum_bad_start; - unsigned long rx_csum_bad_ipproto; - unsigned long rx_csum_bad_offset; - unsigned long rx_csum_failed; - unsigned long rx_csum_offloaded; - unsigned long rx_task_rescheduled; - - unsigned long tx_csum_offloaded; - unsigned long tx_tso_offloaded; - unsigned long tx_csum_bad_ethtype; - unsigned long tx_tso_bad_ethtype; - unsigned long tx_task_rescheduled; + uint64_t mbuf_alloc_failed; + + uint64_t rx_frame_too_large; + uint64_t rx_enq_replacement_failed; + uint64_t rx_mergeable_failed; + uint64_t rx_csum_bad_ethtype; + uint64_t rx_csum_bad_ipproto; + uint64_t rx_csum_bad_offset; + uint64_t rx_csum_bad_proto; + uint64_t tx_csum_bad_ethtype; + uint64_t tx_tso_bad_ethtype; + uint64_t tx_tso_not_tcp; + uint64_t tx_defragged; + uint64_t tx_defrag_failed; + + /* + * These are accumulated from each Rx/Tx queue. + */ + uint64_t rx_csum_failed; + uint64_t rx_csum_offloaded; + uint64_t rx_task_rescheduled; + uint64_t tx_csum_offloaded; + uint64_t tx_tso_offloaded; + uint64_t tx_task_rescheduled; +}; + +struct vtnet_rxq_stats { + uint64_t vrxs_ipackets; /* if_ipackets */ + uint64_t vrxs_ibytes; /* if_ibytes */ + uint64_t vrxs_iqdrops; /* if_iqdrops */ + uint64_t vrxs_ierrors; /* if_ierrors */ + uint64_t vrxs_csum; + uint64_t vrxs_csum_failed; + uint64_t vrxs_rescheduled; }; +struct vtnet_rxq { + struct mtx vtnrx_mtx; + struct vtnet_softc *vtnrx_sc; + struct virtqueue *vtnrx_vq; + struct sglist *vtnrx_sg; + int vtnrx_id; + int vtnrx_process_limit; + struct vtnet_rxq_stats vtnrx_stats; + struct taskqueue *vtnrx_tq; + struct task vtnrx_intrtask; + char vtnrx_name[16]; +} __aligned(CACHE_LINE_SIZE); + +#define VTNET_RXQ_LOCK(_rxq) mtx_lock(&(_rxq)->vtnrx_mtx) +#define VTNET_RXQ_UNLOCK(_rxq) mtx_unlock(&(_rxq)->vtnrx_mtx) +#define VTNET_RXQ_LOCK_ASSERT(_rxq) \ + mtx_assert(&(_rxq)->vtnrx_mtx, MA_OWNED) +#define VTNET_RXQ_LOCK_ASSERT_NOTOWNED(_rxq) \ + mtx_assert(&(_rxq)->vtnrx_mtx, MA_NOTOWNED) + +struct vtnet_txq_stats { + uint64_t vtxs_opackets; /* if_opackets */ + uint64_t vtxs_obytes; /* if_obytes */ + uint64_t vtxs_omcasts; /* if_omcasts */ + uint64_t vtxs_csum; + uint64_t vtxs_tso; + uint64_t vtxs_rescheduled; +}; + +struct vtnet_txq { + struct mtx vtntx_mtx; + struct vtnet_softc *vtntx_sc; + struct virtqueue *vtntx_vq; + struct sglist *vtntx_sg; +#ifndef VTNET_LEGACY_TX + struct buf_ring *vtntx_br; +#endif + int vtntx_id; + int vtntx_watchdog; + struct vtnet_txq_stats vtntx_stats; + struct taskqueue *vtntx_tq; + struct task vtntx_intrtask; +#ifndef VTNET_LEGACY_TX + struct task vtntx_defrtask; +#endif + char vtntx_name[16]; +} __aligned(CACHE_LINE_SIZE); + +#define VTNET_TXQ_LOCK(_txq) mtx_lock(&(_txq)->vtntx_mtx) +#define VTNET_TXQ_TRYLOCK(_txq) mtx_trylock(&(_txq)->vtntx_mtx) +#define VTNET_TXQ_UNLOCK(_txq) mtx_unlock(&(_txq)->vtntx_mtx) +#define VTNET_TXQ_LOCK_ASSERT(_txq) \ + mtx_assert(&(_txq)->vtntx_mtx, MA_OWNED) +#define VTNET_TXQ_LOCK_ASSERT_NOTOWNED(_txq) \ + mtx_assert(&(_txq)->vtntx_mtx, MA_NOTOWNED) + struct vtnet_softc { device_t vtnet_dev; struct ifnet *vtnet_ifp; - struct mtx vtnet_mtx; + struct vtnet_rxq *vtnet_rxqs; + struct vtnet_txq *vtnet_txqs; uint32_t vtnet_flags; -#define VTNET_FLAG_LINK 0x0001 -#define VTNET_FLAG_SUSPENDED 0x0002 +#define VTNET_FLAG_SUSPENDED 0x0001 +#define VTNET_FLAG_MAC 0x0002 #define VTNET_FLAG_CTRL_VQ 0x0004 #define VTNET_FLAG_CTRL_RX 0x0008 -#define VTNET_FLAG_VLAN_FILTER 0x0010 -#define VTNET_FLAG_TSO_ECN 0x0020 -#define VTNET_FLAG_MRG_RXBUFS 0x0040 -#define VTNET_FLAG_LRO_NOMRG 0x0080 - - struct virtqueue *vtnet_rx_vq; - struct virtqueue *vtnet_tx_vq; - struct virtqueue *vtnet_ctrl_vq; +#define VTNET_FLAG_CTRL_MAC 0x0010 +#define VTNET_FLAG_VLAN_FILTER 0x0020 +#define VTNET_FLAG_TSO_ECN 0x0040 +#define VTNET_FLAG_MRG_RXBUFS 0x0080 +#define VTNET_FLAG_LRO_NOMRG 0x0100 +#define VTNET_FLAG_MULTIQ 0x0200 +#define VTNET_FLAG_EVENT_IDX 0x0400 + int vtnet_link_active; int vtnet_hdr_size; - int vtnet_tx_size; - int vtnet_rx_size; int vtnet_rx_process_limit; - int vtnet_rx_mbuf_size; - int vtnet_rx_mbuf_count; + int vtnet_rx_nsegs; + int vtnet_rx_nmbufs; + int vtnet_rx_clsize; + int vtnet_rx_new_clsize; + int vtnet_tx_nsegs; int vtnet_if_flags; - int vtnet_watchdog_timer; - uint64_t vtnet_features; + int vtnet_act_vq_pairs; + int vtnet_max_vq_pairs; - struct vtnet_statistics vtnet_stats; + struct virtqueue *vtnet_ctrl_vq; + struct vtnet_mac_filter *vtnet_mac_filter; + uint32_t *vtnet_vlan_filter; + uint64_t vtnet_features; + struct vtnet_statistics vtnet_stats; struct callout vtnet_tick_ch; - + struct ifmedia vtnet_media; eventhandler_tag vtnet_vlan_attach; eventhandler_tag vtnet_vlan_detach; - struct ifmedia vtnet_media; - /* - * Fake media type; the host does not provide us with - * any real media information. - */ -#define VTNET_MEDIATYPE (IFM_ETHER | IFM_1000_T | IFM_FDX) + struct mtx vtnet_mtx; + char vtnet_mtx_name[16]; char vtnet_hwaddr[ETHER_ADDR_LEN]; +}; - struct vtnet_mac_filter *vtnet_mac_filter; - /* - * During reset, the host's VLAN filtering table is lost. The - * array below is used to restore all the VLANs configured on - * this interface after a reset. - */ -#define VTNET_VLAN_SHADOW_SIZE (4096 / 32) - int vtnet_nvlans; - uint32_t vtnet_vlan_shadow[VTNET_VLAN_SHADOW_SIZE]; +/* + * Maximum number of queue pairs we will autoconfigure to. + */ +#define VTNET_MAX_QUEUE_PAIRS 8 - char vtnet_mtx_name[16]; -}; +/* + * Additional completed entries can appear in a virtqueue before we can + * reenable interrupts. Number of times to retry before scheduling the + * taskqueue to process the completed entries. + */ +#define VTNET_INTR_DISABLE_RETRIES 4 + +/* + * Fake the media type. The host does not provide us with any real media + * information. + */ +#define VTNET_MEDIATYPE (IFM_ETHER | IFM_10G_T | IFM_FDX) + +/* + * Number of words to allocate for the VLAN shadow table. There is one + * bit for each VLAN. + */ +#define VTNET_VLAN_FILTER_NWORDS (4096 / 32) /* * When mergeable buffers are not negotiated, the vtnet_rx_header structure @@ -161,8 +249,12 @@ struct vtnet_mac_filter { */ CTASSERT(sizeof(struct vtnet_mac_filter) <= PAGE_SIZE); -#define VTNET_WATCHDOG_TIMEOUT 5 +#define VTNET_TX_TIMEOUT 5 #define VTNET_CSUM_OFFLOAD (CSUM_TCP | CSUM_UDP | CSUM_SCTP) +#define VTNET_CSUM_OFFLOAD_IPV6 (CSUM_TCP_IPV6 | CSUM_UDP_IPV6 | CSUM_SCTP_IPV6) + +#define VTNET_CSUM_ALL_OFFLOAD \ + (VTNET_CSUM_OFFLOAD | VTNET_CSUM_OFFLOAD_IPV6 | CSUM_TSO) /* Features desired/implemented by this driver. */ #define VTNET_FEATURES \ @@ -170,8 +262,10 @@ CTASSERT(sizeof(struct vtnet_mac_filter) <= PAGE_SIZE); VIRTIO_NET_F_STATUS | \ VIRTIO_NET_F_CTRL_VQ | \ VIRTIO_NET_F_CTRL_RX | \ + VIRTIO_NET_F_CTRL_MAC_ADDR | \ VIRTIO_NET_F_CTRL_VLAN | \ VIRTIO_NET_F_CSUM | \ + VIRTIO_NET_F_GSO | \ VIRTIO_NET_F_HOST_TSO4 | \ VIRTIO_NET_F_HOST_TSO6 | \ VIRTIO_NET_F_HOST_ECN | \ @@ -180,8 +274,17 @@ CTASSERT(sizeof(struct vtnet_mac_filter) <= PAGE_SIZE); VIRTIO_NET_F_GUEST_TSO6 | \ VIRTIO_NET_F_GUEST_ECN | \ VIRTIO_NET_F_MRG_RXBUF | \ + VIRTIO_NET_F_MQ | \ + VIRTIO_RING_F_EVENT_IDX | \ VIRTIO_RING_F_INDIRECT_DESC) +/* + * The VIRTIO_NET_F_HOST_TSO[46] features permit us to send the host + * frames larger than 1514 bytes. + */ +#define VTNET_TSO_FEATURES (VIRTIO_NET_F_GSO | VIRTIO_NET_F_HOST_TSO4 | \ + VIRTIO_NET_F_HOST_TSO6 | VIRTIO_NET_F_HOST_ECN) + /* * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us * frames larger than 1514 bytes. We do not yet support software LRO @@ -195,11 +298,14 @@ CTASSERT(sizeof(struct vtnet_mac_filter) <= PAGE_SIZE); /* * Used to preallocate the Vq indirect descriptors. The first segment - * is reserved for the header. + * is reserved for the header, except for mergeable buffers since the + * header is placed inline with the data. */ +#define VTNET_MRG_RX_SEGS 1 #define VTNET_MIN_RX_SEGS 2 #define VTNET_MAX_RX_SEGS 34 -#define VTNET_MAX_TX_SEGS 34 +#define VTNET_MIN_TX_SEGS 4 +#define VTNET_MAX_TX_SEGS 64 /* * Assert we can receive and transmit the maximum with regular @@ -208,28 +314,35 @@ CTASSERT(sizeof(struct vtnet_mac_filter) <= PAGE_SIZE); CTASSERT(((VTNET_MAX_RX_SEGS - 1) * MCLBYTES) >= VTNET_MAX_RX_SIZE); CTASSERT(((VTNET_MAX_TX_SEGS - 1) * MCLBYTES) >= VTNET_MAX_MTU); +/* + * Number of slots in the Tx bufrings. This value matches most other + * multiqueue drivers. + */ +#define VTNET_DEFAULT_BUFRING_SIZE 4096 + /* * Determine how many mbufs are in each receive buffer. For LRO without - * mergeable descriptors, we must allocate an mbuf chain large enough to + * mergeable buffers, we must allocate an mbuf chain large enough to * hold both the vtnet_rx_header and the maximum receivable data. */ -#define VTNET_NEEDED_RX_MBUFS(_sc) \ +#define VTNET_NEEDED_RX_MBUFS(_sc, _clsize) \ ((_sc)->vtnet_flags & VTNET_FLAG_LRO_NOMRG) == 0 ? 1 : \ howmany(sizeof(struct vtnet_rx_header) + VTNET_MAX_RX_SIZE, \ - (_sc)->vtnet_rx_mbuf_size) + (_clsize)) -#define VTNET_MTX(_sc) &(_sc)->vtnet_mtx -#define VTNET_LOCK(_sc) mtx_lock(VTNET_MTX((_sc))) -#define VTNET_UNLOCK(_sc) mtx_unlock(VTNET_MTX((_sc))) -#define VTNET_LOCK_DESTROY(_sc) mtx_destroy(VTNET_MTX((_sc))) -#define VTNET_LOCK_ASSERT(_sc) mtx_assert(VTNET_MTX((_sc)), MA_OWNED) -#define VTNET_LOCK_ASSERT_NOTOWNED(_sc) \ - mtx_assert(VTNET_MTX((_sc)), MA_NOTOWNED) +#define VTNET_CORE_MTX(_sc) &(_sc)->vtnet_mtx +#define VTNET_CORE_LOCK(_sc) mtx_lock(VTNET_CORE_MTX((_sc))) +#define VTNET_CORE_UNLOCK(_sc) mtx_unlock(VTNET_CORE_MTX((_sc))) +#define VTNET_CORE_LOCK_DESTROY(_sc) mtx_destroy(VTNET_CORE_MTX((_sc))) +#define VTNET_CORE_LOCK_ASSERT(_sc) \ + mtx_assert(VTNET_CORE_MTX((_sc)), MA_OWNED) +#define VTNET_CORE_LOCK_ASSERT_NOTOWNED(_sc) \ + mtx_assert(VTNET_CORE_MTX((_sc)), MA_NOTOWNED) -#define VTNET_LOCK_INIT(_sc) do { \ +#define VTNET_CORE_LOCK_INIT(_sc) do { \ snprintf((_sc)->vtnet_mtx_name, sizeof((_sc)->vtnet_mtx_name), \ "%s", device_get_nameunit((_sc)->vtnet_dev)); \ - mtx_init(VTNET_MTX((_sc)), (_sc)->vtnet_mtx_name, \ + mtx_init(VTNET_CORE_MTX((_sc)), (_sc)->vtnet_mtx_name, \ "VTNET Core Lock", MTX_DEF); \ } while (0) diff --git a/sys/dev/virtio/network/virtio_net.h b/sys/dev/virtio/network/virtio_net.h index 15a73ccbb..f4f9febf5 100644 --- a/sys/dev/virtio/network/virtio_net.h +++ b/sys/dev/virtio/network/virtio_net.h @@ -50,14 +50,22 @@ #define VIRTIO_NET_F_CTRL_RX 0x40000 /* Control channel RX mode support */ #define VIRTIO_NET_F_CTRL_VLAN 0x80000 /* Control channel VLAN filtering */ #define VIRTIO_NET_F_CTRL_RX_EXTRA 0x100000 /* Extra RX mode control support */ +#define VIRTIO_NET_F_GUEST_ANNOUNCE 0x200000 /* Announce device on network */ +#define VIRTIO_NET_F_MQ 0x400000 /* Device supports RFS */ +#define VIRTIO_NET_F_CTRL_MAC_ADDR 0x800000 /* Set MAC address */ #define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ struct virtio_net_config { /* The config defining mac address (if VIRTIO_NET_F_MAC) */ - uint8_t mac[ETHER_ADDR_LEN]; + uint8_t mac[ETHER_ADDR_LEN]; /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */ uint16_t status; + /* Maximum number of each of transmit and receive queues; + * see VIRTIO_NET_F_MQ and VIRTIO_NET_CTRL_MQ. + * Legal values are between 1 and 0x8000. + */ + uint16_t max_virtqueue_pairs; } __packed; /* @@ -66,6 +74,7 @@ struct virtio_net_config { */ struct virtio_net_hdr { #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start,csum_offset*/ +#define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ uint8_t flags; #define VIRTIO_NET_HDR_GSO_NONE 0 /* Not a GSO frame */ #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* GSO frame, IPv4 TCP (TSO) */ @@ -100,8 +109,6 @@ struct virtio_net_ctrl_hdr { uint8_t cmd; } __packed; -typedef uint8_t virtio_net_ctrl_ack; - #define VIRTIO_NET_OK 0 #define VIRTIO_NET_ERR 1 @@ -134,6 +141,10 @@ typedef uint8_t virtio_net_ctrl_ack; * first sg list contains unicast addresses, the second is for multicast. * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature * is available. + * + * The ADDR_SET command requests one out scatterlist, it contains a + * 6 bytes MAC address. This functionality is present if the + * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available. */ struct virtio_net_ctrl_mac { uint32_t entries; @@ -142,6 +153,7 @@ struct virtio_net_ctrl_mac { #define VIRTIO_NET_CTRL_MAC 1 #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0 +#define VIRTIO_NET_CTRL_MAC_ADDR_SET 1 /* * Control VLAN filtering @@ -156,4 +168,35 @@ struct virtio_net_ctrl_mac { #define VIRTIO_NET_CTRL_VLAN_ADD 0 #define VIRTIO_NET_CTRL_VLAN_DEL 1 +/* + * Control link announce acknowledgement + * + * The command VIRTIO_NET_CTRL_ANNOUNCE_ACK is used to indicate that + * driver has recevied the notification; device would clear the + * VIRTIO_NET_S_ANNOUNCE bit in the status field after it receives + * this command. + */ +#define VIRTIO_NET_CTRL_ANNOUNCE 3 +#define VIRTIO_NET_CTRL_ANNOUNCE_ACK 0 + +/* + * Control Receive Flow Steering + * + * The command VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET enables Receive Flow + * Steering, specifying the number of the transmit and receive queues + * that will be used. After the command is consumed and acked by the + * device, the device will not steer new packets on receive virtqueues + * other than specified nor read from transmit virtqueues other than + * specified. Accordingly, driver should not transmit new packets on + * virtqueues other than specified. + */ +struct virtio_net_ctrl_mq { + uint16_t virtqueue_pairs; +} __packed; + +#define VIRTIO_NET_CTRL_MQ 4 +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 + #endif /* _VIRTIO_NET_H */ diff --git a/sys/modules/virtio/network/Makefile b/sys/modules/virtio/network/Makefile index 8463309c9..f124d9963 100644 --- a/sys/modules/virtio/network/Makefile +++ b/sys/modules/virtio/network/Makefile @@ -23,14 +23,29 @@ # SUCH DAMAGE. # +.include + .PATH: ${.CURDIR}/../../../dev/virtio/network KMOD= if_vtnet SRCS= if_vtnet.c SRCS+= virtio_bus_if.h virtio_if.h SRCS+= bus_if.h device_if.h +SRCS+= opt_inet.h opt_inet6.h MFILES= kern/bus_if.m kern/device_if.m \ dev/virtio/virtio_bus_if.m dev/virtio/virtio_if.m +.if !defined(KERNBUILDDIR) +.if ${MK_INET_SUPPORT} != "no" +opt_inet.h: + @echo "#define INET 1" > ${.TARGET} +.endif + +.if ${MK_INET6_SUPPORT} != "no" +opt_inet6.h: + @echo "#define INET6 1" > ${.TARGET} +.endif +.endif + .include -- 2.45.0