2 * Copyright (c) 2013 Tsubai Masanari
3 * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4 * Copyright (c) 2018 Patrick Kelsey
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
21 /* Driver for VMware vmxnet3 virtual ethernet devices. */
23 #include <sys/cdefs.h>
24 __FBSDID("$FreeBSD$");
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/endian.h>
32 #include <sys/sockio.h>
34 #include <sys/malloc.h>
35 #include <sys/module.h>
36 #include <sys/socket.h>
37 #include <sys/sysctl.h>
42 #include <net/ethernet.h>
44 #include <net/if_var.h>
45 #include <net/if_arp.h>
46 #include <net/if_dl.h>
47 #include <net/if_types.h>
48 #include <net/if_media.h>
49 #include <net/if_vlan_var.h>
50 #include <net/iflib.h>
52 #include <net/rss_config.h>
55 #include <netinet/in_systm.h>
56 #include <netinet/in.h>
57 #include <netinet/ip.h>
58 #include <netinet/ip6.h>
59 #include <netinet6/ip6_var.h>
60 #include <netinet/udp.h>
61 #include <netinet/tcp.h>
63 #include <machine/bus.h>
64 #include <machine/resource.h>
68 #include <dev/pci/pcireg.h>
69 #include <dev/pci/pcivar.h>
73 #include "if_vmxreg.h"
74 #include "if_vmxvar.h"
77 #include "opt_inet6.h"
80 #define VMXNET3_VMWARE_VENDOR_ID 0x15AD
81 #define VMXNET3_VMWARE_DEVICE_ID 0x07B0
83 static pci_vendor_info_t vmxnet3_vendor_info_array[] =
85 PVID(VMXNET3_VMWARE_VENDOR_ID, VMXNET3_VMWARE_DEVICE_ID, "VMware VMXNET3 Ethernet Adapter"),
86 /* required last entry */
90 static void *vmxnet3_register(device_t);
91 static int vmxnet3_attach_pre(if_ctx_t);
92 static int vmxnet3_msix_intr_assign(if_ctx_t, int);
93 static void vmxnet3_free_irqs(struct vmxnet3_softc *);
94 static int vmxnet3_attach_post(if_ctx_t);
95 static int vmxnet3_detach(if_ctx_t);
96 static int vmxnet3_shutdown(if_ctx_t);
97 static int vmxnet3_suspend(if_ctx_t);
98 static int vmxnet3_resume(if_ctx_t);
100 static int vmxnet3_alloc_resources(struct vmxnet3_softc *);
101 static void vmxnet3_free_resources(struct vmxnet3_softc *);
102 static int vmxnet3_check_version(struct vmxnet3_softc *);
103 static void vmxnet3_set_interrupt_idx(struct vmxnet3_softc *);
105 static int vmxnet3_queues_shared_alloc(struct vmxnet3_softc *);
106 static void vmxnet3_init_txq(struct vmxnet3_softc *, int);
107 static int vmxnet3_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
108 static void vmxnet3_init_rxq(struct vmxnet3_softc *, int, int);
109 static int vmxnet3_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
110 static void vmxnet3_queues_free(if_ctx_t);
112 static int vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
113 static void vmxnet3_free_shared_data(struct vmxnet3_softc *);
114 static int vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
115 static void vmxnet3_free_mcast_table(struct vmxnet3_softc *);
116 static void vmxnet3_init_shared_data(struct vmxnet3_softc *);
117 static void vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
118 static void vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
119 static int vmxnet3_alloc_data(struct vmxnet3_softc *);
120 static void vmxnet3_free_data(struct vmxnet3_softc *);
122 static void vmxnet3_evintr(struct vmxnet3_softc *);
123 static int vmxnet3_isc_txd_encap(void *, if_pkt_info_t);
124 static void vmxnet3_isc_txd_flush(void *, uint16_t, qidx_t);
125 static int vmxnet3_isc_txd_credits_update(void *, uint16_t, bool);
126 static int vmxnet3_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t);
127 static int vmxnet3_isc_rxd_pkt_get(void *, if_rxd_info_t);
128 static void vmxnet3_isc_rxd_refill(void *, if_rxd_update_t);
129 static void vmxnet3_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t);
130 static int vmxnet3_legacy_intr(void *);
131 static int vmxnet3_rxq_intr(void *);
132 static int vmxnet3_event_intr(void *);
134 static void vmxnet3_stop(if_ctx_t);
136 static void vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
137 static void vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
138 static void vmxnet3_reinit_queues(struct vmxnet3_softc *);
139 static int vmxnet3_enable_device(struct vmxnet3_softc *);
140 static void vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
141 static void vmxnet3_init(if_ctx_t);
142 static void vmxnet3_multi_set(if_ctx_t);
143 static int vmxnet3_mtu_set(if_ctx_t, uint32_t);
144 static void vmxnet3_media_status(if_ctx_t, struct ifmediareq *);
145 static int vmxnet3_media_change(if_ctx_t);
146 static int vmxnet3_promisc_set(if_ctx_t, int);
147 static uint64_t vmxnet3_get_counter(if_ctx_t, ift_counter);
148 static void vmxnet3_update_admin_status(if_ctx_t);
149 static void vmxnet3_txq_timer(if_ctx_t, uint16_t);
151 static void vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
153 static void vmxnet3_vlan_register(if_ctx_t, uint16_t);
154 static void vmxnet3_vlan_unregister(if_ctx_t, uint16_t);
155 static void vmxnet3_set_rxfilter(struct vmxnet3_softc *, int);
157 static void vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
158 static int vmxnet3_link_is_up(struct vmxnet3_softc *);
159 static void vmxnet3_link_status(struct vmxnet3_softc *);
160 static void vmxnet3_set_lladdr(struct vmxnet3_softc *);
161 static void vmxnet3_get_lladdr(struct vmxnet3_softc *);
163 static void vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
164 struct sysctl_ctx_list *, struct sysctl_oid_list *);
165 static void vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
166 struct sysctl_ctx_list *, struct sysctl_oid_list *);
167 static void vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
168 struct sysctl_ctx_list *, struct sysctl_oid_list *);
169 static void vmxnet3_setup_sysctl(struct vmxnet3_softc *);
171 static void vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
173 static uint32_t vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
174 static void vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
176 static void vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
177 static uint32_t vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
179 static int vmxnet3_tx_queue_intr_enable(if_ctx_t, uint16_t);
180 static int vmxnet3_rx_queue_intr_enable(if_ctx_t, uint16_t);
181 static void vmxnet3_link_intr_enable(if_ctx_t);
182 static void vmxnet3_enable_intr(struct vmxnet3_softc *, int);
183 static void vmxnet3_disable_intr(struct vmxnet3_softc *, int);
184 static void vmxnet3_intr_enable_all(if_ctx_t);
185 static void vmxnet3_intr_disable_all(if_ctx_t);
190 VMXNET3_BARRIER_RDWR,
193 static void vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
196 static device_method_t vmxnet3_methods[] = {
197 /* Device interface */
198 DEVMETHOD(device_register, vmxnet3_register),
199 DEVMETHOD(device_probe, iflib_device_probe),
200 DEVMETHOD(device_attach, iflib_device_attach),
201 DEVMETHOD(device_detach, iflib_device_detach),
202 DEVMETHOD(device_shutdown, iflib_device_shutdown),
203 DEVMETHOD(device_suspend, iflib_device_suspend),
204 DEVMETHOD(device_resume, iflib_device_resume),
208 static driver_t vmxnet3_driver = {
209 "vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
212 static devclass_t vmxnet3_devclass;
213 DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
214 IFLIB_PNP_INFO(pci, vmx, vmxnet3_vendor_info_array);
215 MODULE_VERSION(vmx, 2);
217 MODULE_DEPEND(vmx, pci, 1, 1, 1);
218 MODULE_DEPEND(vmx, ether, 1, 1, 1);
219 MODULE_DEPEND(vmx, iflib, 1, 1, 1);
221 static device_method_t vmxnet3_iflib_methods[] = {
222 DEVMETHOD(ifdi_tx_queues_alloc, vmxnet3_tx_queues_alloc),
223 DEVMETHOD(ifdi_rx_queues_alloc, vmxnet3_rx_queues_alloc),
224 DEVMETHOD(ifdi_queues_free, vmxnet3_queues_free),
226 DEVMETHOD(ifdi_attach_pre, vmxnet3_attach_pre),
227 DEVMETHOD(ifdi_attach_post, vmxnet3_attach_post),
228 DEVMETHOD(ifdi_detach, vmxnet3_detach),
230 DEVMETHOD(ifdi_init, vmxnet3_init),
231 DEVMETHOD(ifdi_stop, vmxnet3_stop),
232 DEVMETHOD(ifdi_multi_set, vmxnet3_multi_set),
233 DEVMETHOD(ifdi_mtu_set, vmxnet3_mtu_set),
234 DEVMETHOD(ifdi_media_status, vmxnet3_media_status),
235 DEVMETHOD(ifdi_media_change, vmxnet3_media_change),
236 DEVMETHOD(ifdi_promisc_set, vmxnet3_promisc_set),
237 DEVMETHOD(ifdi_get_counter, vmxnet3_get_counter),
238 DEVMETHOD(ifdi_update_admin_status, vmxnet3_update_admin_status),
239 DEVMETHOD(ifdi_timer, vmxnet3_txq_timer),
241 DEVMETHOD(ifdi_tx_queue_intr_enable, vmxnet3_tx_queue_intr_enable),
242 DEVMETHOD(ifdi_rx_queue_intr_enable, vmxnet3_rx_queue_intr_enable),
243 DEVMETHOD(ifdi_link_intr_enable, vmxnet3_link_intr_enable),
244 DEVMETHOD(ifdi_intr_enable, vmxnet3_intr_enable_all),
245 DEVMETHOD(ifdi_intr_disable, vmxnet3_intr_disable_all),
246 DEVMETHOD(ifdi_msix_intr_assign, vmxnet3_msix_intr_assign),
248 DEVMETHOD(ifdi_vlan_register, vmxnet3_vlan_register),
249 DEVMETHOD(ifdi_vlan_unregister, vmxnet3_vlan_unregister),
251 DEVMETHOD(ifdi_shutdown, vmxnet3_shutdown),
252 DEVMETHOD(ifdi_suspend, vmxnet3_suspend),
253 DEVMETHOD(ifdi_resume, vmxnet3_resume),
258 static driver_t vmxnet3_iflib_driver = {
259 "vmx", vmxnet3_iflib_methods, sizeof(struct vmxnet3_softc)
262 struct if_txrx vmxnet3_txrx = {
263 .ift_txd_encap = vmxnet3_isc_txd_encap,
264 .ift_txd_flush = vmxnet3_isc_txd_flush,
265 .ift_txd_credits_update = vmxnet3_isc_txd_credits_update,
266 .ift_rxd_available = vmxnet3_isc_rxd_available,
267 .ift_rxd_pkt_get = vmxnet3_isc_rxd_pkt_get,
268 .ift_rxd_refill = vmxnet3_isc_rxd_refill,
269 .ift_rxd_flush = vmxnet3_isc_rxd_flush,
270 .ift_legacy_intr = vmxnet3_legacy_intr
273 static struct if_shared_ctx vmxnet3_sctx_init = {
274 .isc_magic = IFLIB_MAGIC,
277 .isc_tx_maxsize = VMXNET3_TX_MAXSIZE,
278 .isc_tx_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
279 .isc_tso_maxsize = VMXNET3_TSO_MAXSIZE + sizeof(struct ether_vlan_header),
280 .isc_tso_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
283 * These values are used to configure the busdma tag used for
284 * receive descriptors. Each receive descriptor only points to one
287 .isc_rx_maxsize = VMXNET3_RX_MAXSEGSIZE, /* One buf per descriptor */
288 .isc_rx_nsegments = 1, /* One mapping per descriptor */
289 .isc_rx_maxsegsize = VMXNET3_RX_MAXSEGSIZE,
291 .isc_admin_intrcnt = 1,
292 .isc_vendor_info = vmxnet3_vendor_info_array,
293 .isc_driver_version = "2",
294 .isc_driver = &vmxnet3_iflib_driver,
295 .isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_SINGLE_IRQ_RX_ONLY,
298 * Number of receive queues per receive queue set, with associated
299 * descriptor settings for each.
302 .isc_nfl = 2, /* one free list for each receive command queue */
303 .isc_nrxd_min = {VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC},
304 .isc_nrxd_max = {VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC},
305 .isc_nrxd_default = {VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC},
308 * Number of transmit queues per transmit queue set, with associated
309 * descriptor settings for each.
312 .isc_ntxd_min = {VMXNET3_MIN_TX_NDESC, VMXNET3_MIN_TX_NDESC},
313 .isc_ntxd_max = {VMXNET3_MAX_TX_NDESC, VMXNET3_MAX_TX_NDESC},
314 .isc_ntxd_default = {VMXNET3_DEF_TX_NDESC, VMXNET3_DEF_TX_NDESC},
318 vmxnet3_register(device_t dev)
320 return (&vmxnet3_sctx_init);
324 trunc_powerof2(int val)
327 return (1U << (fls(val) - 1));
331 vmxnet3_attach_pre(if_ctx_t ctx)
334 if_softc_ctx_t scctx;
335 struct vmxnet3_softc *sc;
336 uint32_t intr_config;
339 dev = iflib_get_dev(ctx);
340 sc = iflib_get_softc(ctx);
343 sc->vmx_sctx = iflib_get_sctx(ctx);
344 sc->vmx_scctx = iflib_get_softc_ctx(ctx);
345 sc->vmx_ifp = iflib_get_ifp(ctx);
346 sc->vmx_media = iflib_get_media(ctx);
347 scctx = sc->vmx_scctx;
349 scctx->isc_tx_nsegments = VMXNET3_TX_MAXSEGS;
350 scctx->isc_tx_tso_segments_max = VMXNET3_TX_MAXSEGS;
351 /* isc_tx_tso_size_max doesn't include possible vlan header */
352 scctx->isc_tx_tso_size_max = VMXNET3_TSO_MAXSIZE;
353 scctx->isc_tx_tso_segsize_max = VMXNET3_TX_MAXSEGSIZE;
354 scctx->isc_txrx = &vmxnet3_txrx;
356 /* If 0, the iflib tunable was not set, so set to the default */
357 if (scctx->isc_nrxqsets == 0)
358 scctx->isc_nrxqsets = VMXNET3_DEF_RX_QUEUES;
359 scctx->isc_nrxqsets = trunc_powerof2(scctx->isc_nrxqsets);
360 scctx->isc_nrxqsets_max = min(VMXNET3_MAX_RX_QUEUES, mp_ncpus);
361 scctx->isc_nrxqsets_max = trunc_powerof2(scctx->isc_nrxqsets_max);
363 /* If 0, the iflib tunable was not set, so set to the default */
364 if (scctx->isc_ntxqsets == 0)
365 scctx->isc_ntxqsets = VMXNET3_DEF_TX_QUEUES;
366 scctx->isc_ntxqsets = trunc_powerof2(scctx->isc_ntxqsets);
367 scctx->isc_ntxqsets_max = min(VMXNET3_MAX_TX_QUEUES, mp_ncpus);
368 scctx->isc_ntxqsets_max = trunc_powerof2(scctx->isc_ntxqsets_max);
371 * Enforce that the transmit completion queue descriptor count is
372 * the same as the transmit command queue descriptor count.
374 scctx->isc_ntxd[0] = scctx->isc_ntxd[1];
375 scctx->isc_txqsizes[0] =
376 sizeof(struct vmxnet3_txcompdesc) * scctx->isc_ntxd[0];
377 scctx->isc_txqsizes[1] =
378 sizeof(struct vmxnet3_txdesc) * scctx->isc_ntxd[1];
381 * Enforce that the receive completion queue descriptor count is the
382 * sum of the receive command queue descriptor counts, and that the
383 * second receive command queue descriptor count is the same as the
386 scctx->isc_nrxd[2] = scctx->isc_nrxd[1];
387 scctx->isc_nrxd[0] = scctx->isc_nrxd[1] + scctx->isc_nrxd[2];
388 scctx->isc_rxqsizes[0] =
389 sizeof(struct vmxnet3_rxcompdesc) * scctx->isc_nrxd[0];
390 scctx->isc_rxqsizes[1] =
391 sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[1];
392 scctx->isc_rxqsizes[2] =
393 sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[2];
396 * Initialize the max frame size and descriptor queue buffer
399 vmxnet3_mtu_set(ctx, if_getmtu(sc->vmx_ifp));
401 scctx->isc_rss_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
404 error = vmxnet3_alloc_resources(sc);
408 /* Check device versions */
409 error = vmxnet3_check_version(sc);
414 * The interrupt mode can be set in the hypervisor configuration via
415 * the parameter ethernet<N>.intrMode.
417 intr_config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
418 sc->vmx_intr_mask_mode = (intr_config >> 2) & 0x03;
421 * Configure the softc context to attempt to configure the interrupt
422 * mode now indicated by intr_config. iflib will follow the usual
423 * fallback path MSI-X -> MSI -> LEGACY, starting at the configured
426 switch (intr_config & 0x03) {
427 case VMXNET3_IT_AUTO:
428 case VMXNET3_IT_MSIX:
429 scctx->isc_msix_bar = pci_msix_table_bar(dev);
432 scctx->isc_msix_bar = -1;
433 scctx->isc_disable_msix = 1;
435 case VMXNET3_IT_LEGACY:
436 scctx->isc_msix_bar = 0;
440 scctx->isc_tx_csum_flags = VMXNET3_CSUM_ALL_OFFLOAD;
441 scctx->isc_capabilities = scctx->isc_capenable =
442 IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 |
443 IFCAP_TSO4 | IFCAP_TSO6 |
444 IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 |
445 IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
446 IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO |
449 /* These capabilities are not enabled by default. */
450 scctx->isc_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
452 vmxnet3_get_lladdr(sc);
453 iflib_set_mac(ctx, sc->vmx_lladdr);
458 * We must completely clean up anything allocated above as iflib
459 * will not invoke any other driver entry points as a result of this
462 vmxnet3_free_resources(sc);
468 vmxnet3_msix_intr_assign(if_ctx_t ctx, int msix)
470 struct vmxnet3_softc *sc;
471 if_softc_ctx_t scctx;
472 struct vmxnet3_rxqueue *rxq;
477 sc = iflib_get_softc(ctx);
478 scctx = sc->vmx_scctx;
480 for (i = 0; i < scctx->isc_nrxqsets; i++) {
481 snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
483 rxq = &sc->vmx_rxq[i];
484 error = iflib_irq_alloc_generic(ctx, &rxq->vxrxq_irq, i + 1,
485 IFLIB_INTR_RX, vmxnet3_rxq_intr, rxq, i, irq_name);
487 device_printf(iflib_get_dev(ctx),
488 "Failed to register rxq %d interrupt handler\n", i);
493 for (i = 0; i < scctx->isc_ntxqsets; i++) {
494 snprintf(irq_name, sizeof(irq_name), "txq%d", i);
497 * Don't provide the corresponding rxq irq for reference -
498 * we want the transmit task to be attached to a task queue
499 * that is different from the one used by the corresponding
500 * rxq irq. That is because the TX doorbell writes are very
501 * expensive as virtualized MMIO operations, so we want to
502 * be able to defer them to another core when possible so
503 * that they don't steal receive processing cycles during
504 * stack turnarounds like TCP ACK generation. The other
505 * piece to this approach is enabling the iflib abdicate
506 * option (currently via an interface-specific
509 iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i,
513 error = iflib_irq_alloc_generic(ctx, &sc->vmx_event_intr_irq,
514 scctx->isc_nrxqsets + 1, IFLIB_INTR_ADMIN, vmxnet3_event_intr, sc, 0,
517 device_printf(iflib_get_dev(ctx),
518 "Failed to register event interrupt handler\n");
526 vmxnet3_free_irqs(struct vmxnet3_softc *sc)
528 if_softc_ctx_t scctx;
529 struct vmxnet3_rxqueue *rxq;
532 scctx = sc->vmx_scctx;
534 for (i = 0; i < scctx->isc_nrxqsets; i++) {
535 rxq = &sc->vmx_rxq[i];
536 iflib_irq_free(sc->vmx_ctx, &rxq->vxrxq_irq);
539 iflib_irq_free(sc->vmx_ctx, &sc->vmx_event_intr_irq);
543 vmxnet3_attach_post(if_ctx_t ctx)
546 if_softc_ctx_t scctx;
547 struct vmxnet3_softc *sc;
550 dev = iflib_get_dev(ctx);
551 scctx = iflib_get_softc_ctx(ctx);
552 sc = iflib_get_softc(ctx);
554 if (scctx->isc_nrxqsets > 1)
555 sc->vmx_flags |= VMXNET3_FLAG_RSS;
557 error = vmxnet3_alloc_data(sc);
561 vmxnet3_set_interrupt_idx(sc);
562 vmxnet3_setup_sysctl(sc);
564 ifmedia_add(sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
565 ifmedia_set(sc->vmx_media, IFM_ETHER | IFM_AUTO);
572 vmxnet3_detach(if_ctx_t ctx)
574 struct vmxnet3_softc *sc;
576 sc = iflib_get_softc(ctx);
578 vmxnet3_free_irqs(sc);
579 vmxnet3_free_data(sc);
580 vmxnet3_free_resources(sc);
586 vmxnet3_shutdown(if_ctx_t ctx)
593 vmxnet3_suspend(if_ctx_t ctx)
600 vmxnet3_resume(if_ctx_t ctx)
607 vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
615 sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
617 if (sc->vmx_res0 == NULL) {
619 "could not map BAR0 memory\n");
623 sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
624 sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
627 sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
629 if (sc->vmx_res1 == NULL) {
631 "could not map BAR1 memory\n");
635 sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
636 sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
642 vmxnet3_free_resources(struct vmxnet3_softc *sc)
648 if (sc->vmx_res0 != NULL) {
649 bus_release_resource(dev, SYS_RES_MEMORY,
650 rman_get_rid(sc->vmx_res0), sc->vmx_res0);
654 if (sc->vmx_res1 != NULL) {
655 bus_release_resource(dev, SYS_RES_MEMORY,
656 rman_get_rid(sc->vmx_res1), sc->vmx_res1);
662 vmxnet3_check_version(struct vmxnet3_softc *sc)
669 version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
670 if ((version & 0x01) == 0) {
671 device_printf(dev, "unsupported hardware version %#x\n",
675 vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
677 version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
678 if ((version & 0x01) == 0) {
679 device_printf(dev, "unsupported UPT version %#x\n", version);
682 vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
688 vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
690 if_softc_ctx_t scctx;
691 struct vmxnet3_txqueue *txq;
692 struct vmxnet3_txq_shared *txs;
693 struct vmxnet3_rxqueue *rxq;
694 struct vmxnet3_rxq_shared *rxs;
698 scctx = sc->vmx_scctx;
701 * There is always one interrupt per receive queue, assigned
702 * starting with the first interrupt. When there is only one
703 * interrupt available, the event interrupt shares the receive queue
704 * interrupt, otherwise it uses the interrupt following the last
705 * receive queue interrupt. Transmit queues are not assigned
706 * interrupts, so they are given indexes beyond the indexes that
707 * correspond to the real interrupts.
710 /* The event interrupt is always the last vector. */
711 sc->vmx_event_intr_idx = scctx->isc_vectors - 1;
714 for (i = 0; i < scctx->isc_nrxqsets; i++, intr_idx++) {
715 rxq = &sc->vmx_rxq[i];
717 rxq->vxrxq_intr_idx = intr_idx;
718 rxs->intr_idx = rxq->vxrxq_intr_idx;
722 * Assign the tx queues interrupt indexes above what we are actually
723 * using. These interrupts will never be enabled.
725 intr_idx = scctx->isc_vectors;
726 for (i = 0; i < scctx->isc_ntxqsets; i++, intr_idx++) {
727 txq = &sc->vmx_txq[i];
729 txq->vxtxq_intr_idx = intr_idx;
730 txs->intr_idx = txq->vxtxq_intr_idx;
735 vmxnet3_queues_shared_alloc(struct vmxnet3_softc *sc)
737 if_softc_ctx_t scctx;
741 scctx = sc->vmx_scctx;
744 * The txq and rxq shared data areas must be allocated contiguously
745 * as vmxnet3_driver_shared contains only a single address member
746 * for the shared queue data area.
748 size = scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared) +
749 scctx->isc_nrxqsets * sizeof(struct vmxnet3_rxq_shared);
750 error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128, &sc->vmx_qs_dma, 0);
752 device_printf(sc->vmx_dev, "cannot alloc queue shared memory\n");
760 vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
762 struct vmxnet3_txqueue *txq;
763 struct vmxnet3_comp_ring *txc;
764 struct vmxnet3_txring *txr;
765 if_softc_ctx_t scctx;
767 txq = &sc->vmx_txq[q];
768 txc = &txq->vxtxq_comp_ring;
769 txr = &txq->vxtxq_cmd_ring;
770 scctx = sc->vmx_scctx;
772 snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
773 device_get_nameunit(sc->vmx_dev), q);
777 txc->vxcr_ndesc = scctx->isc_ntxd[0];
778 txr->vxtxr_ndesc = scctx->isc_ntxd[1];
782 vmxnet3_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
783 int ntxqs, int ntxqsets)
785 struct vmxnet3_softc *sc;
790 sc = iflib_get_softc(ctx);
792 /* Allocate the array of transmit queues */
793 sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
794 ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
795 if (sc->vmx_txq == NULL)
798 /* Initialize driver state for each transmit queue */
799 for (q = 0; q < ntxqsets; q++)
800 vmxnet3_init_txq(sc, q);
803 * Allocate queue state that is shared with the device. This check
804 * and call is performed in both vmxnet3_tx_queues_alloc() and
805 * vmxnet3_rx_queues_alloc() so that we don't have to care which
806 * order iflib invokes those routines in.
808 if (sc->vmx_qs_dma.idi_size == 0) {
809 error = vmxnet3_queues_shared_alloc(sc);
814 kva = sc->vmx_qs_dma.idi_vaddr;
815 for (q = 0; q < ntxqsets; q++) {
816 sc->vmx_txq[q].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
817 kva += sizeof(struct vmxnet3_txq_shared);
820 /* Record descriptor ring vaddrs and paddrs */
821 for (q = 0; q < ntxqsets; q++) {
822 struct vmxnet3_txqueue *txq;
823 struct vmxnet3_txring *txr;
824 struct vmxnet3_comp_ring *txc;
826 txq = &sc->vmx_txq[q];
827 txc = &txq->vxtxq_comp_ring;
828 txr = &txq->vxtxq_cmd_ring;
830 /* Completion ring */
832 (struct vmxnet3_txcompdesc *) vaddrs[q * ntxqs + 0];
833 txc->vxcr_paddr = paddrs[q * ntxqs + 0];
837 (struct vmxnet3_txdesc *) vaddrs[q * ntxqs + 1];
838 txr->vxtxr_paddr = paddrs[q * ntxqs + 1];
845 vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q, int nrxqs)
847 struct vmxnet3_rxqueue *rxq;
848 struct vmxnet3_comp_ring *rxc;
849 struct vmxnet3_rxring *rxr;
850 if_softc_ctx_t scctx;
853 rxq = &sc->vmx_rxq[q];
854 rxc = &rxq->vxrxq_comp_ring;
855 scctx = sc->vmx_scctx;
857 snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
858 device_get_nameunit(sc->vmx_dev), q);
864 * First rxq is the completion queue, so there are nrxqs - 1 command
865 * rings starting at iflib queue id 1.
867 rxc->vxcr_ndesc = scctx->isc_nrxd[0];
868 for (i = 0; i < nrxqs - 1; i++) {
869 rxr = &rxq->vxrxq_cmd_ring[i];
870 rxr->vxrxr_ndesc = scctx->isc_nrxd[i + 1];
875 vmxnet3_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
876 int nrxqs, int nrxqsets)
878 struct vmxnet3_softc *sc;
879 if_softc_ctx_t scctx;
885 sc = iflib_get_softc(ctx);
886 scctx = sc->vmx_scctx;
888 /* Allocate the array of receive queues */
889 sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
890 nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
891 if (sc->vmx_rxq == NULL)
894 /* Initialize driver state for each receive queue */
895 for (q = 0; q < nrxqsets; q++)
896 vmxnet3_init_rxq(sc, q, nrxqs);
899 * Allocate queue state that is shared with the device. This check
900 * and call is performed in both vmxnet3_tx_queues_alloc() and
901 * vmxnet3_rx_queues_alloc() so that we don't have to care which
902 * order iflib invokes those routines in.
904 if (sc->vmx_qs_dma.idi_size == 0) {
905 error = vmxnet3_queues_shared_alloc(sc);
910 kva = sc->vmx_qs_dma.idi_vaddr +
911 scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared);
912 for (q = 0; q < nrxqsets; q++) {
913 sc->vmx_rxq[q].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
914 kva += sizeof(struct vmxnet3_rxq_shared);
917 /* Record descriptor ring vaddrs and paddrs */
918 for (q = 0; q < nrxqsets; q++) {
919 struct vmxnet3_rxqueue *rxq;
920 struct vmxnet3_rxring *rxr;
921 struct vmxnet3_comp_ring *rxc;
923 rxq = &sc->vmx_rxq[q];
924 rxc = &rxq->vxrxq_comp_ring;
926 /* Completion ring */
928 (struct vmxnet3_rxcompdesc *) vaddrs[q * nrxqs + 0];
929 rxc->vxcr_paddr = paddrs[q * nrxqs + 0];
931 /* Command ring(s) */
932 for (i = 0; i < nrxqs - 1; i++) {
933 rxr = &rxq->vxrxq_cmd_ring[i];
936 (struct vmxnet3_rxdesc *) vaddrs[q * nrxqs + 1 + i];
937 rxr->vxrxr_paddr = paddrs[q * nrxqs + 1 + i];
945 vmxnet3_queues_free(if_ctx_t ctx)
947 struct vmxnet3_softc *sc;
949 sc = iflib_get_softc(ctx);
951 /* Free queue state area that is shared with the device */
952 if (sc->vmx_qs_dma.idi_size != 0) {
953 iflib_dma_free(&sc->vmx_qs_dma);
954 sc->vmx_qs_dma.idi_size = 0;
957 /* Free array of receive queues */
958 if (sc->vmx_rxq != NULL) {
959 free(sc->vmx_rxq, M_DEVBUF);
963 /* Free array of transmit queues */
964 if (sc->vmx_txq != NULL) {
965 free(sc->vmx_txq, M_DEVBUF);
971 vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
979 /* Top level state structure shared with the device */
980 size = sizeof(struct vmxnet3_driver_shared);
981 error = iflib_dma_alloc_align(sc->vmx_ctx, size, 1, &sc->vmx_ds_dma, 0);
983 device_printf(dev, "cannot alloc shared memory\n");
986 sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.idi_vaddr;
988 /* RSS table state shared with the device */
989 if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
990 size = sizeof(struct vmxnet3_rss_shared);
991 error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128,
992 &sc->vmx_rss_dma, 0);
994 device_printf(dev, "cannot alloc rss shared memory\n");
998 (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.idi_vaddr;
1005 vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1008 /* Free RSS table state shared with the device */
1009 if (sc->vmx_rss != NULL) {
1010 iflib_dma_free(&sc->vmx_rss_dma);
1014 /* Free top level state structure shared with the device */
1015 if (sc->vmx_ds != NULL) {
1016 iflib_dma_free(&sc->vmx_ds_dma);
1022 vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1026 /* Multicast table state shared with the device */
1027 error = iflib_dma_alloc_align(sc->vmx_ctx,
1028 VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN, 32, &sc->vmx_mcast_dma, 0);
1030 device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1032 sc->vmx_mcast = sc->vmx_mcast_dma.idi_vaddr;
1038 vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1041 /* Free multicast table state shared with the device */
1042 if (sc->vmx_mcast != NULL) {
1043 iflib_dma_free(&sc->vmx_mcast_dma);
1044 sc->vmx_mcast = NULL;
1049 vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1051 struct vmxnet3_driver_shared *ds;
1052 if_shared_ctx_t sctx;
1053 if_softc_ctx_t scctx;
1054 struct vmxnet3_txqueue *txq;
1055 struct vmxnet3_txq_shared *txs;
1056 struct vmxnet3_rxqueue *rxq;
1057 struct vmxnet3_rxq_shared *rxs;
1061 sctx = sc->vmx_sctx;
1062 scctx = sc->vmx_scctx;
1065 * Initialize fields of the shared data that remains the same across
1066 * reinits. Note the shared data is zero'd when allocated.
1069 ds->magic = VMXNET3_REV1_MAGIC;
1072 ds->version = VMXNET3_DRIVER_VERSION;
1073 ds->guest = VMXNET3_GOS_FREEBSD |
1079 ds->vmxnet3_revision = 1;
1080 ds->upt_version = 1;
1083 ds->driver_data = vtophys(sc);
1084 ds->driver_data_len = sizeof(struct vmxnet3_softc);
1085 ds->queue_shared = sc->vmx_qs_dma.idi_paddr;
1086 ds->queue_shared_len = sc->vmx_qs_dma.idi_size;
1087 ds->nrxsg_max = IFLIB_MAX_RX_SEGS;
1090 if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1091 ds->rss.version = 1;
1092 ds->rss.paddr = sc->vmx_rss_dma.idi_paddr;
1093 ds->rss.len = sc->vmx_rss_dma.idi_size;
1096 /* Interrupt control. */
1097 ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1099 * Total number of interrupt indexes we are using in the shared
1100 * config data, even though we don't actually allocate interrupt
1101 * resources for the tx queues. Some versions of the device will
1102 * fail to initialize successfully if interrupt indexes are used in
1103 * the shared config that exceed the number of interrupts configured
1106 ds->nintr = (scctx->isc_vectors == 1) ?
1107 2 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1);
1108 ds->evintr = sc->vmx_event_intr_idx;
1109 ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1111 for (i = 0; i < ds->nintr; i++)
1112 ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1114 /* Receive filter. */
1115 ds->mcast_table = sc->vmx_mcast_dma.idi_paddr;
1116 ds->mcast_tablelen = sc->vmx_mcast_dma.idi_size;
1119 for (i = 0; i < scctx->isc_ntxqsets; i++) {
1120 txq = &sc->vmx_txq[i];
1121 txs = txq->vxtxq_ts;
1123 txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_paddr;
1124 txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1125 txs->comp_ring = txq->vxtxq_comp_ring.vxcr_paddr;
1126 txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1127 txs->driver_data = vtophys(txq);
1128 txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1132 for (i = 0; i < scctx->isc_nrxqsets; i++) {
1133 rxq = &sc->vmx_rxq[i];
1134 rxs = rxq->vxrxq_rs;
1136 rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_paddr;
1137 rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1138 rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_paddr;
1139 rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1140 rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_paddr;
1141 rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1142 rxs->driver_data = vtophys(rxq);
1143 rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1148 vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1151 * Use the same key as the Linux driver until FreeBSD can do
1152 * RSS (presumably Toeplitz) in software.
1154 static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1155 0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1156 0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1157 0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1158 0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1159 0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1162 struct vmxnet3_driver_shared *ds;
1163 if_softc_ctx_t scctx;
1164 struct vmxnet3_rss_shared *rss;
1171 scctx = sc->vmx_scctx;
1175 UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1176 UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1177 rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1178 rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1179 rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1182 * If the software RSS is configured to anything else other than
1183 * Toeplitz, then just do Toeplitz in "hardware" for the sake of
1184 * the packet distribution, but report the hash as opaque to
1185 * disengage from the software RSS.
1187 rss_algo = rss_gethashalgo();
1188 if (rss_algo == RSS_HASH_TOEPLITZ) {
1189 rss_getkey(rss->hash_key);
1190 for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) {
1191 rss->ind_table[i] = rss_get_indirection_to_bucket(i) %
1192 scctx->isc_nrxqsets;
1194 sc->vmx_flags |= VMXNET3_FLAG_SOFT_RSS;
1198 memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1199 for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1200 rss->ind_table[i] = i % scctx->isc_nrxqsets;
1201 sc->vmx_flags &= ~VMXNET3_FLAG_SOFT_RSS;
1206 vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1209 struct vmxnet3_driver_shared *ds;
1210 if_softc_ctx_t scctx;
1214 scctx = sc->vmx_scctx;
1216 ds->mtu = ifp->if_mtu;
1217 ds->ntxqueue = scctx->isc_ntxqsets;
1218 ds->nrxqueue = scctx->isc_nrxqsets;
1220 ds->upt_features = 0;
1221 if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1222 ds->upt_features |= UPT1_F_CSUM;
1223 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1224 ds->upt_features |= UPT1_F_VLAN;
1225 if (ifp->if_capenable & IFCAP_LRO)
1226 ds->upt_features |= UPT1_F_LRO;
1228 if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1229 ds->upt_features |= UPT1_F_RSS;
1230 vmxnet3_reinit_rss_shared_data(sc);
1233 vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.idi_paddr);
1234 vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1235 (uint64_t) sc->vmx_ds_dma.idi_paddr >> 32);
1239 vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1243 error = vmxnet3_alloc_shared_data(sc);
1247 error = vmxnet3_alloc_mcast_table(sc);
1251 vmxnet3_init_shared_data(sc);
1257 vmxnet3_free_data(struct vmxnet3_softc *sc)
1260 vmxnet3_free_mcast_table(sc);
1261 vmxnet3_free_shared_data(sc);
1265 vmxnet3_evintr(struct vmxnet3_softc *sc)
1268 struct vmxnet3_txq_shared *ts;
1269 struct vmxnet3_rxq_shared *rs;
1275 event = sc->vmx_ds->event;
1276 vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1278 if (event & VMXNET3_EVENT_LINK)
1279 vmxnet3_link_status(sc);
1281 if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1282 vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1283 ts = sc->vmx_txq[0].vxtxq_ts;
1284 if (ts->stopped != 0)
1285 device_printf(dev, "Tx queue error %#x\n", ts->error);
1286 rs = sc->vmx_rxq[0].vxrxq_rs;
1287 if (rs->stopped != 0)
1288 device_printf(dev, "Rx queue error %#x\n", rs->error);
1290 /* XXX - rely on liflib watchdog to reset us? */
1291 device_printf(dev, "Rx/Tx queue error event ... "
1292 "waiting for iflib watchdog reset\n");
1295 if (event & VMXNET3_EVENT_DIC)
1296 device_printf(dev, "device implementation change event\n");
1297 if (event & VMXNET3_EVENT_DEBUG)
1298 device_printf(dev, "debug event\n");
1302 vmxnet3_isc_txd_encap(void *vsc, if_pkt_info_t pi)
1304 struct vmxnet3_softc *sc;
1305 struct vmxnet3_txqueue *txq;
1306 struct vmxnet3_txring *txr;
1307 struct vmxnet3_txdesc *txd, *sop;
1308 bus_dma_segment_t *segs;
1316 txq = &sc->vmx_txq[pi->ipi_qsidx];
1317 txr = &txq->vxtxq_cmd_ring;
1318 segs = pi->ipi_segs;
1319 nsegs = pi->ipi_nsegs;
1320 pidx = pi->ipi_pidx;
1322 KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
1323 ("%s: packet with too many segments %d", __func__, nsegs));
1325 sop = &txr->vxtxr_txd[pidx];
1326 gen = txr->vxtxr_gen ^ 1; /* Owned by cpu (yet) */
1328 for (i = 0; i < nsegs; i++) {
1329 txd = &txr->vxtxr_txd[pidx];
1331 txd->addr = segs[i].ds_addr;
1332 txd->len = segs[i].ds_len;
1335 txd->offload_mode = VMXNET3_OM_NONE;
1336 txd->offload_pos = 0;
1343 if (++pidx == txr->vxtxr_ndesc) {
1345 txr->vxtxr_gen ^= 1;
1347 gen = txr->vxtxr_gen;
1350 txd->compreq = !!(pi->ipi_flags & IPI_TX_INTR);
1351 pi->ipi_new_pidx = pidx;
1356 if (pi->ipi_mflags & M_VLANTAG) {
1358 sop->vtag = pi->ipi_vtag;
1362 * TSO and checksum offloads
1364 hdrlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
1365 if (pi->ipi_csum_flags & CSUM_TSO) {
1366 sop->offload_mode = VMXNET3_OM_TSO;
1367 sop->hlen = hdrlen + pi->ipi_tcp_hlen;
1368 sop->offload_pos = pi->ipi_tso_segsz;
1369 } else if (pi->ipi_csum_flags & (VMXNET3_CSUM_OFFLOAD |
1370 VMXNET3_CSUM_OFFLOAD_IPV6)) {
1371 sop->offload_mode = VMXNET3_OM_CSUM;
1373 sop->offload_pos = hdrlen +
1374 ((pi->ipi_ipproto == IPPROTO_TCP) ?
1375 offsetof(struct tcphdr, th_sum) :
1376 offsetof(struct udphdr, uh_sum));
1379 /* Finally, change the ownership. */
1380 vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
1387 vmxnet3_isc_txd_flush(void *vsc, uint16_t txqid, qidx_t pidx)
1389 struct vmxnet3_softc *sc;
1390 struct vmxnet3_txqueue *txq;
1393 txq = &sc->vmx_txq[txqid];
1396 * pidx is what we last set ipi_new_pidx to in
1397 * vmxnet3_isc_txd_encap()
1401 * Avoid expensive register updates if the flush request is
1404 if (txq->vxtxq_last_flush == pidx)
1406 txq->vxtxq_last_flush = pidx;
1407 vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), pidx);
1411 vmxnet3_isc_txd_credits_update(void *vsc, uint16_t txqid, bool clear)
1413 struct vmxnet3_softc *sc;
1414 struct vmxnet3_txqueue *txq;
1415 struct vmxnet3_comp_ring *txc;
1416 struct vmxnet3_txcompdesc *txcd;
1417 struct vmxnet3_txring *txr;
1421 txq = &sc->vmx_txq[txqid];
1422 txc = &txq->vxtxq_comp_ring;
1423 txr = &txq->vxtxq_cmd_ring;
1426 * If clear is true, we need to report the number of TX command ring
1427 * descriptors that have been processed by the device. If clear is
1428 * false, we just need to report whether or not at least one TX
1429 * command ring descriptor has been processed by the device.
1433 txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1434 if (txcd->gen != txc->vxcr_gen)
1438 vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1440 if (++txc->vxcr_next == txc->vxcr_ndesc) {
1445 if (txcd->eop_idx < txr->vxtxr_next)
1446 processed += txr->vxtxr_ndesc -
1447 (txr->vxtxr_next - txcd->eop_idx) + 1;
1449 processed += txcd->eop_idx - txr->vxtxr_next + 1;
1450 txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1457 vmxnet3_isc_rxd_available(void *vsc, uint16_t rxqid, qidx_t idx, qidx_t budget)
1459 struct vmxnet3_softc *sc;
1460 struct vmxnet3_rxqueue *rxq;
1461 struct vmxnet3_comp_ring *rxc;
1462 struct vmxnet3_rxcompdesc *rxcd;
1469 rxq = &sc->vmx_rxq[rxqid];
1470 rxc = &rxq->vxrxq_comp_ring;
1473 completed_gen = rxc->vxcr_gen;
1475 rxcd = &rxc->vxcr_u.rxcd[idx];
1476 if (rxcd->gen != completed_gen)
1478 vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1482 KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1484 KASSERT(!rxcd->sop, ("%s: unexpected sop", __func__));
1485 expect_sop = rxcd->eop;
1487 if (rxcd->eop && (rxcd->len != 0))
1491 if (++idx == rxc->vxcr_ndesc) {
1501 vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
1503 struct vmxnet3_softc *sc;
1504 if_softc_ctx_t scctx;
1505 struct vmxnet3_rxqueue *rxq;
1506 struct vmxnet3_comp_ring *rxc;
1507 struct vmxnet3_rxcompdesc *rxcd;
1508 struct vmxnet3_rxring *rxr;
1509 struct vmxnet3_rxdesc *rxd;
1518 scctx = sc->vmx_scctx;
1519 rxq = &sc->vmx_rxq[ri->iri_qsidx];
1520 rxc = &rxq->vxrxq_comp_ring;
1523 * Get a single packet starting at the given index in the completion
1524 * queue. That we have been called indicates that
1525 * vmxnet3_isc_rxd_available() has already verified that either
1526 * there is a complete packet available starting at the given index,
1527 * or there are one or more zero length packets starting at the
1528 * given index followed by a complete packet, so no verification of
1529 * ownership of the descriptors (and no associated read barrier) is
1532 cqidx = ri->iri_cidx;
1533 rxcd = &rxc->vxcr_u.rxcd[cqidx];
1534 while (rxcd->len == 0) {
1535 KASSERT(rxcd->sop && rxcd->eop,
1536 ("%s: zero-length packet without both sop and eop set",
1538 rxc->vxcr_zero_length++;
1539 if (++cqidx == rxc->vxcr_ndesc) {
1543 rxcd = &rxc->vxcr_u.rxcd[cqidx];
1545 KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1549 * Types other than M_HASHTYPE_NONE and M_HASHTYPE_OPAQUE_HASH should
1550 * be used only if the software RSS is enabled and it uses the same
1551 * algorithm and the hash key as the "hardware". If the software RSS
1552 * is not enabled, then it's simply pointless to use those types.
1553 * If it's enabled but with different parameters, then hash values will
1556 ri->iri_flowid = rxcd->rss_hash;
1558 if ((sc->vmx_flags & VMXNET3_FLAG_SOFT_RSS) != 0) {
1559 switch (rxcd->rss_type) {
1560 case VMXNET3_RCD_RSS_TYPE_NONE:
1561 ri->iri_flowid = ri->iri_qsidx;
1562 ri->iri_rsstype = M_HASHTYPE_NONE;
1564 case VMXNET3_RCD_RSS_TYPE_IPV4:
1565 ri->iri_rsstype = M_HASHTYPE_RSS_IPV4;
1567 case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
1568 ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV4;
1570 case VMXNET3_RCD_RSS_TYPE_IPV6:
1571 ri->iri_rsstype = M_HASHTYPE_RSS_IPV6;
1573 case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
1574 ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV6;
1577 ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1583 switch (rxcd->rss_type) {
1584 case VMXNET3_RCD_RSS_TYPE_NONE:
1585 ri->iri_flowid = ri->iri_qsidx;
1586 ri->iri_rsstype = M_HASHTYPE_NONE;
1589 ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1595 * The queue numbering scheme used for rxcd->qid is as follows:
1596 * - All of the command ring 0s are numbered [0, nrxqsets - 1]
1597 * - All of the command ring 1s are numbered [nrxqsets, 2*nrxqsets - 1]
1599 * Thus, rxcd->qid less than nrxqsets indicates command ring (and
1600 * flid) 0, and rxcd->qid greater than or equal to nrxqsets
1601 * indicates command ring (and flid) 1.
1606 rxcd = &rxc->vxcr_u.rxcd[cqidx];
1607 KASSERT(rxcd->gen == rxc->vxcr_gen,
1608 ("%s: generation mismatch", __func__));
1609 flid = (rxcd->qid >= scctx->isc_nrxqsets) ? 1 : 0;
1610 rxr = &rxq->vxrxq_cmd_ring[flid];
1611 rxd = &rxr->vxrxr_rxd[rxcd->rxd_idx];
1613 frag = &ri->iri_frags[nfrags];
1614 frag->irf_flid = flid;
1615 frag->irf_idx = rxcd->rxd_idx;
1616 frag->irf_len = rxcd->len;
1617 total_len += rxcd->len;
1619 if (++cqidx == rxc->vxcr_ndesc) {
1623 } while (!rxcd->eop);
1625 ri->iri_cidx = cqidx;
1626 ri->iri_nfrags = nfrags;
1627 ri->iri_len = total_len;
1630 * If there's an error, the last descriptor in the packet will
1631 * have the error indicator set. In this case, set all
1632 * fragment lengths to zero. This will cause iflib to discard
1633 * the packet, but process all associated descriptors through
1634 * the refill mechanism.
1636 if (__predict_false(rxcd->error)) {
1637 rxc->vxcr_pkt_errors++;
1638 for (i = 0; i < nfrags; i++) {
1639 frag = &ri->iri_frags[i];
1643 /* Checksum offload information is in the last descriptor. */
1644 if (!rxcd->no_csum) {
1645 uint32_t csum_flags = 0;
1648 csum_flags |= CSUM_IP_CHECKED;
1649 if (rxcd->ipcsum_ok)
1650 csum_flags |= CSUM_IP_VALID;
1652 if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) {
1653 csum_flags |= CSUM_L4_CALC;
1654 if (rxcd->csum_ok) {
1655 csum_flags |= CSUM_L4_VALID;
1656 ri->iri_csum_data = 0xffff;
1659 ri->iri_csum_flags = csum_flags;
1662 /* VLAN information is in the last descriptor. */
1664 ri->iri_flags |= M_VLANTAG;
1665 ri->iri_vtag = rxcd->vtag;
1673 vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru)
1675 struct vmxnet3_softc *sc;
1676 struct vmxnet3_rxqueue *rxq;
1677 struct vmxnet3_rxring *rxr;
1678 struct vmxnet3_rxdesc *rxd;
1687 count = iru->iru_count;
1688 len = iru->iru_buf_size;
1689 flid = iru->iru_flidx;
1690 paddrs = iru->iru_paddrs;
1693 rxq = &sc->vmx_rxq[iru->iru_qsidx];
1694 rxr = &rxq->vxrxq_cmd_ring[flid];
1695 rxd = rxr->vxrxr_rxd;
1698 * Command ring 0 is filled with BTYPE_HEAD descriptors, and
1699 * command ring 1 is filled with BTYPE_BODY descriptors.
1701 btype = (flid == 0) ? VMXNET3_BTYPE_HEAD : VMXNET3_BTYPE_BODY;
1703 * The refill entries from iflib will advance monotonically,
1704 * but the refilled descriptors may not be contiguous due to
1705 * earlier skipping of descriptors by the device. The refill
1706 * entries from iflib need an entire state update, while the
1707 * descriptors previously skipped by the device only need to
1708 * have their generation numbers updated.
1710 idx = rxr->vxrxr_refill_start;
1713 if (idx == iru->iru_idxs[i]) {
1714 rxd[idx].addr = paddrs[i];
1716 rxd[idx].btype = btype;
1719 rxr->vxrxr_desc_skips++;
1720 rxd[idx].gen = rxr->vxrxr_gen;
1722 if (++idx == rxr->vxrxr_ndesc) {
1724 rxr->vxrxr_gen ^= 1;
1726 } while (i != count);
1727 rxr->vxrxr_refill_start = idx;
1731 vmxnet3_isc_rxd_flush(void *vsc, uint16_t rxqid, uint8_t flid, qidx_t pidx)
1733 struct vmxnet3_softc *sc;
1734 struct vmxnet3_rxqueue *rxq;
1735 struct vmxnet3_rxring *rxr;
1739 rxq = &sc->vmx_rxq[rxqid];
1740 rxr = &rxq->vxrxq_cmd_ring[flid];
1743 r = VMXNET3_BAR0_RXH1(rxqid);
1745 r = VMXNET3_BAR0_RXH2(rxqid);
1748 * pidx is the index of the last descriptor with a buffer the device
1749 * can use, and the device needs to be told which index is one past
1752 if (++pidx == rxr->vxrxr_ndesc)
1754 vmxnet3_write_bar0(sc, r, pidx);
1758 vmxnet3_legacy_intr(void *xsc)
1760 struct vmxnet3_softc *sc;
1761 if_softc_ctx_t scctx;
1765 scctx = sc->vmx_scctx;
1769 * When there is only a single interrupt configured, this routine
1770 * runs in fast interrupt context, following which the rxq 0 task
1773 if (scctx->isc_intr == IFLIB_INTR_LEGACY) {
1774 if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
1775 return (FILTER_HANDLED);
1777 if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1778 vmxnet3_intr_disable_all(ctx);
1780 if (sc->vmx_ds->event != 0)
1781 iflib_admin_intr_deferred(ctx);
1784 * XXX - When there is both rxq and event activity, do we care
1785 * whether the rxq 0 task or the admin task re-enables the interrupt
1788 return (FILTER_SCHEDULE_THREAD);
1792 vmxnet3_rxq_intr(void *vrxq)
1794 struct vmxnet3_softc *sc;
1795 struct vmxnet3_rxqueue *rxq;
1800 if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1801 vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
1803 return (FILTER_SCHEDULE_THREAD);
1807 vmxnet3_event_intr(void *vsc)
1809 struct vmxnet3_softc *sc;
1813 if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1814 vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
1817 * The work will be done via vmxnet3_update_admin_status(), and the
1818 * interrupt will be re-enabled in vmxnet3_link_intr_enable().
1820 * The interrupt will be re-enabled by vmxnet3_link_intr_enable().
1822 return (FILTER_SCHEDULE_THREAD);
1826 vmxnet3_stop(if_ctx_t ctx)
1828 struct vmxnet3_softc *sc;
1830 sc = iflib_get_softc(ctx);
1832 sc->vmx_link_active = 0;
1833 vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
1834 vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
1838 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
1840 struct vmxnet3_txring *txr;
1841 struct vmxnet3_comp_ring *txc;
1843 txq->vxtxq_last_flush = -1;
1845 txr = &txq->vxtxq_cmd_ring;
1846 txr->vxtxr_next = 0;
1847 txr->vxtxr_gen = VMXNET3_INIT_GEN;
1849 * iflib has zeroed out the descriptor array during the prior attach
1853 txc = &txq->vxtxq_comp_ring;
1855 txc->vxcr_gen = VMXNET3_INIT_GEN;
1857 * iflib has zeroed out the descriptor array during the prior attach
1863 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
1865 struct vmxnet3_rxring *rxr;
1866 struct vmxnet3_comp_ring *rxc;
1870 * The descriptors will be populated with buffers during a
1871 * subsequent invocation of vmxnet3_isc_rxd_refill()
1873 for (i = 0; i < sc->vmx_sctx->isc_nrxqs - 1; i++) {
1874 rxr = &rxq->vxrxq_cmd_ring[i];
1875 rxr->vxrxr_gen = VMXNET3_INIT_GEN;
1876 rxr->vxrxr_desc_skips = 0;
1877 rxr->vxrxr_refill_start = 0;
1879 * iflib has zeroed out the descriptor array during the
1880 * prior attach or stop
1884 for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
1885 rxr = &rxq->vxrxq_cmd_ring[i];
1887 rxr->vxrxr_desc_skips = 0;
1888 rxr->vxrxr_refill_start = 0;
1889 bzero(rxr->vxrxr_rxd,
1890 rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
1893 rxc = &rxq->vxrxq_comp_ring;
1895 rxc->vxcr_gen = VMXNET3_INIT_GEN;
1896 rxc->vxcr_zero_length = 0;
1897 rxc->vxcr_pkt_errors = 0;
1899 * iflib has zeroed out the descriptor array during the prior attach
1905 vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
1907 if_softc_ctx_t scctx;
1910 scctx = sc->vmx_scctx;
1912 for (q = 0; q < scctx->isc_ntxqsets; q++)
1913 vmxnet3_txinit(sc, &sc->vmx_txq[q]);
1915 for (q = 0; q < scctx->isc_nrxqsets; q++)
1916 vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
1920 vmxnet3_enable_device(struct vmxnet3_softc *sc)
1922 if_softc_ctx_t scctx;
1925 scctx = sc->vmx_scctx;
1927 if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
1928 device_printf(sc->vmx_dev, "device enable command failed!\n");
1932 /* Reset the Rx queue heads. */
1933 for (q = 0; q < scctx->isc_nrxqsets; q++) {
1934 vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
1935 vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
1942 vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
1948 vmxnet3_set_rxfilter(sc, if_getflags(ifp));
1950 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1951 bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
1952 sizeof(sc->vmx_ds->vlan_filter));
1954 bzero(sc->vmx_ds->vlan_filter,
1955 sizeof(sc->vmx_ds->vlan_filter));
1956 vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
1960 vmxnet3_init(if_ctx_t ctx)
1962 struct vmxnet3_softc *sc;
1964 sc = iflib_get_softc(ctx);
1966 /* Use the current MAC address. */
1967 bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1968 vmxnet3_set_lladdr(sc);
1970 vmxnet3_reinit_shared_data(sc);
1971 vmxnet3_reinit_queues(sc);
1973 vmxnet3_enable_device(sc);
1975 vmxnet3_reinit_rxfilters(sc);
1976 vmxnet3_link_status(sc);
1980 vmxnet3_multi_set(if_ctx_t ctx)
1983 vmxnet3_set_rxfilter(iflib_get_softc(ctx),
1984 if_getflags(iflib_get_ifp(ctx)));
1988 vmxnet3_mtu_set(if_ctx_t ctx, uint32_t mtu)
1990 struct vmxnet3_softc *sc;
1991 if_softc_ctx_t scctx;
1993 sc = iflib_get_softc(ctx);
1994 scctx = sc->vmx_scctx;
1996 if (mtu > VMXNET3_TX_MAXSIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN +
2001 * Update the max frame size so that the rx mbuf size is
2002 * chosen based on the new mtu during the interface init that
2003 * will occur after this routine returns.
2005 scctx->isc_max_frame_size = mtu +
2006 ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN;
2007 /* RX completion queue - n/a */
2008 scctx->isc_rxd_buf_size[0] = 0;
2010 * For header-type descriptors (used for first segment of
2011 * packet), let iflib determine the buffer size based on the
2014 scctx->isc_rxd_buf_size[1] = 0;
2016 * For body-type descriptors (used for jumbo frames and LRO),
2017 * always use page-sized buffers.
2019 scctx->isc_rxd_buf_size[2] = MJUMPAGESIZE;
2025 vmxnet3_media_status(if_ctx_t ctx, struct ifmediareq * ifmr)
2027 struct vmxnet3_softc *sc;
2029 sc = iflib_get_softc(ctx);
2031 ifmr->ifm_status = IFM_AVALID;
2032 ifmr->ifm_active = IFM_ETHER;
2034 if (vmxnet3_link_is_up(sc) != 0) {
2035 ifmr->ifm_status |= IFM_ACTIVE;
2036 ifmr->ifm_active |= IFM_AUTO;
2038 ifmr->ifm_active |= IFM_NONE;
2042 vmxnet3_media_change(if_ctx_t ctx)
2050 vmxnet3_promisc_set(if_ctx_t ctx, int flags)
2053 vmxnet3_set_rxfilter(iflib_get_softc(ctx), flags);
2059 vmxnet3_get_counter(if_ctx_t ctx, ift_counter cnt)
2061 if_t ifp = iflib_get_ifp(ctx);
2063 if (cnt < IFCOUNTERS)
2064 return if_get_counter_default(ifp, cnt);
2070 vmxnet3_update_admin_status(if_ctx_t ctx)
2072 struct vmxnet3_softc *sc;
2074 sc = iflib_get_softc(ctx);
2075 if (sc->vmx_ds->event != 0)
2078 vmxnet3_refresh_host_stats(sc);
2082 vmxnet3_txq_timer(if_ctx_t ctx, uint16_t qid)
2084 /* Host stats refresh is global, so just trigger it on txq 0 */
2086 vmxnet3_refresh_host_stats(iflib_get_softc(ctx));
2090 vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
2094 if (tag == 0 || tag > 4095)
2097 idx = (tag >> 5) & 0x7F;
2100 /* Update our private VLAN bitvector. */
2102 sc->vmx_vlan_filter[idx] |= (1 << bit);
2104 sc->vmx_vlan_filter[idx] &= ~(1 << bit);
2108 vmxnet3_vlan_register(if_ctx_t ctx, uint16_t tag)
2111 vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 1, tag);
2115 vmxnet3_vlan_unregister(if_ctx_t ctx, uint16_t tag)
2118 vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 0, tag);
2122 vmxnet3_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int count)
2124 struct vmxnet3_softc *sc = arg;
2126 if (count < VMXNET3_MULTICAST_MAX)
2127 bcopy(LLADDR(sdl), &sc->vmx_mcast[count * ETHER_ADDR_LEN],
2134 vmxnet3_set_rxfilter(struct vmxnet3_softc *sc, int flags)
2137 struct vmxnet3_driver_shared *ds;
2143 mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
2144 if (flags & IFF_PROMISC)
2145 mode |= VMXNET3_RXMODE_PROMISC;
2146 if (flags & IFF_ALLMULTI)
2147 mode |= VMXNET3_RXMODE_ALLMULTI;
2151 cnt = if_foreach_llmaddr(ifp, vmxnet3_hash_maddr, sc);
2152 if (cnt >= VMXNET3_MULTICAST_MAX) {
2154 mode |= VMXNET3_RXMODE_ALLMULTI;
2156 mode |= VMXNET3_RXMODE_MCAST;
2157 ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
2162 vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
2163 vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
2167 vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
2170 vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
2174 vmxnet3_link_is_up(struct vmxnet3_softc *sc)
2178 status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
2179 return !!(status & 0x1);
2183 vmxnet3_link_status(struct vmxnet3_softc *sc)
2190 link = vmxnet3_link_is_up(sc);
2191 speed = IF_Gbps(10);
2193 if (link != 0 && sc->vmx_link_active == 0) {
2194 sc->vmx_link_active = 1;
2195 iflib_link_state_change(ctx, LINK_STATE_UP, speed);
2196 } else if (link == 0 && sc->vmx_link_active != 0) {
2197 sc->vmx_link_active = 0;
2198 iflib_link_state_change(ctx, LINK_STATE_DOWN, speed);
2203 vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
2207 ml = sc->vmx_lladdr[0];
2208 ml |= sc->vmx_lladdr[1] << 8;
2209 ml |= sc->vmx_lladdr[2] << 16;
2210 ml |= sc->vmx_lladdr[3] << 24;
2211 vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
2213 mh = sc->vmx_lladdr[4];
2214 mh |= sc->vmx_lladdr[5] << 8;
2215 vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
2219 vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
2223 ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
2224 mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
2226 sc->vmx_lladdr[0] = ml;
2227 sc->vmx_lladdr[1] = ml >> 8;
2228 sc->vmx_lladdr[2] = ml >> 16;
2229 sc->vmx_lladdr[3] = ml >> 24;
2230 sc->vmx_lladdr[4] = mh;
2231 sc->vmx_lladdr[5] = mh >> 8;
2235 vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
2236 struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2238 struct sysctl_oid *node, *txsnode;
2239 struct sysctl_oid_list *list, *txslist;
2240 struct UPT1_TxStats *txstats;
2243 txstats = &txq->vxtxq_ts->stats;
2245 snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
2246 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
2247 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
2248 txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
2251 * Add statistics reported by the host. These are updated by the
2252 * iflib txq timer on txq 0.
2254 txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats",
2255 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics");
2256 txslist = SYSCTL_CHILDREN(txsnode);
2257 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
2258 &txstats->TSO_packets, "TSO packets");
2259 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
2260 &txstats->TSO_bytes, "TSO bytes");
2261 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2262 &txstats->ucast_packets, "Unicast packets");
2263 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2264 &txstats->ucast_bytes, "Unicast bytes");
2265 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2266 &txstats->mcast_packets, "Multicast packets");
2267 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2268 &txstats->mcast_bytes, "Multicast bytes");
2269 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
2270 &txstats->error, "Errors");
2271 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
2272 &txstats->discard, "Discards");
2276 vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
2277 struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2279 struct sysctl_oid *node, *rxsnode;
2280 struct sysctl_oid_list *list, *rxslist;
2281 struct UPT1_RxStats *rxstats;
2284 rxstats = &rxq->vxrxq_rs->stats;
2286 snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
2287 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
2288 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
2289 rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
2292 * Add statistics reported by the host. These are updated by the
2293 * iflib txq timer on txq 0.
2295 rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats",
2296 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics");
2297 rxslist = SYSCTL_CHILDREN(rxsnode);
2298 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
2299 &rxstats->LRO_packets, "LRO packets");
2300 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
2301 &rxstats->LRO_bytes, "LRO bytes");
2302 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2303 &rxstats->ucast_packets, "Unicast packets");
2304 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2305 &rxstats->ucast_bytes, "Unicast bytes");
2306 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2307 &rxstats->mcast_packets, "Multicast packets");
2308 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2309 &rxstats->mcast_bytes, "Multicast bytes");
2310 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
2311 &rxstats->bcast_packets, "Broadcast packets");
2312 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
2313 &rxstats->bcast_bytes, "Broadcast bytes");
2314 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
2315 &rxstats->nobuffer, "No buffer");
2316 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
2317 &rxstats->error, "Errors");
2321 vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
2322 struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2324 if_softc_ctx_t scctx;
2325 struct sysctl_oid *node;
2326 struct sysctl_oid_list *list;
2329 scctx = sc->vmx_scctx;
2331 for (i = 0; i < scctx->isc_ntxqsets; i++) {
2332 struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
2334 node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
2335 "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
2336 list = SYSCTL_CHILDREN(node);
2338 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
2339 &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
2340 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
2341 &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
2342 SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
2343 &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
2344 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
2345 &txq->vxtxq_comp_ring.vxcr_next, 0, "");
2346 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2347 &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
2348 SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2349 &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
2352 for (i = 0; i < scctx->isc_nrxqsets; i++) {
2353 struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
2355 node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
2356 "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
2357 list = SYSCTL_CHILDREN(node);
2359 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
2360 &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
2361 SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
2362 &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
2363 SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd0_desc_skips", CTLFLAG_RD,
2364 &rxq->vxrxq_cmd_ring[0].vxrxr_desc_skips, 0, "");
2365 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
2366 &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
2367 SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
2368 &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
2369 SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd1_desc_skips", CTLFLAG_RD,
2370 &rxq->vxrxq_cmd_ring[1].vxrxr_desc_skips, 0, "");
2371 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2372 &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
2373 SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2374 &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
2375 SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length", CTLFLAG_RD,
2376 &rxq->vxrxq_comp_ring.vxcr_zero_length, 0, "");
2377 SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_pkt_errors", CTLFLAG_RD,
2378 &rxq->vxrxq_comp_ring.vxcr_pkt_errors, 0, "");
2383 vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
2384 struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2386 if_softc_ctx_t scctx;
2389 scctx = sc->vmx_scctx;
2391 for (i = 0; i < scctx->isc_ntxqsets; i++)
2392 vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
2393 for (i = 0; i < scctx->isc_nrxqsets; i++)
2394 vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
2396 vmxnet3_setup_debug_sysctl(sc, ctx, child);
2400 vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
2403 struct sysctl_ctx_list *ctx;
2404 struct sysctl_oid *tree;
2405 struct sysctl_oid_list *child;
2408 ctx = device_get_sysctl_ctx(dev);
2409 tree = device_get_sysctl_tree(dev);
2410 child = SYSCTL_CHILDREN(tree);
2412 vmxnet3_setup_queue_sysctl(sc, ctx, child);
2416 vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2419 bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
2423 vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
2426 return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
2430 vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2433 bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
2437 vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2440 vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
2444 vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2447 vmxnet3_write_cmd(sc, cmd);
2448 bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
2449 BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
2450 return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
2454 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
2457 vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
2461 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
2464 vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
2468 vmxnet3_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2470 /* Not using interrupts for TX */
2475 vmxnet3_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2477 struct vmxnet3_softc *sc;
2479 sc = iflib_get_softc(ctx);
2480 vmxnet3_enable_intr(sc, sc->vmx_rxq[qid].vxrxq_intr_idx);
2485 vmxnet3_link_intr_enable(if_ctx_t ctx)
2487 struct vmxnet3_softc *sc;
2489 sc = iflib_get_softc(ctx);
2490 vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2494 vmxnet3_intr_enable_all(if_ctx_t ctx)
2496 struct vmxnet3_softc *sc;
2497 if_softc_ctx_t scctx;
2500 sc = iflib_get_softc(ctx);
2501 scctx = sc->vmx_scctx;
2502 sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
2503 for (i = 0; i < scctx->isc_vectors; i++)
2504 vmxnet3_enable_intr(sc, i);
2508 vmxnet3_intr_disable_all(if_ctx_t ctx)
2510 struct vmxnet3_softc *sc;
2513 sc = iflib_get_softc(ctx);
2515 * iflib may invoke this routine before vmxnet3_attach_post() has
2516 * run, which is before the top level shared data area is
2517 * initialized and the device made aware of it.
2519 if (sc->vmx_ds != NULL)
2520 sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
2521 for (i = 0; i < VMXNET3_MAX_INTRS; i++)
2522 vmxnet3_disable_intr(sc, i);
2526 * Since this is a purely paravirtualized device, we do not have
2527 * to worry about DMA coherency. But at times, we must make sure
2528 * both the compiler and CPU do not reorder memory operations.
2531 vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
2535 case VMXNET3_BARRIER_RD:
2538 case VMXNET3_BARRIER_WR:
2541 case VMXNET3_BARRIER_RDWR:
2545 panic("%s: bad barrier type %d", __func__, type);