]> CyberLeo.Net >> Repos - FreeBSD/releng/10.2.git/blob - sys/dev/vmware/vmxnet3/if_vmx.c
- Copy stable/10@285827 to releng/10.2 in preparation for 10.2-RC1
[FreeBSD/releng/10.2.git] / sys / dev / vmware / vmxnet3 / if_vmx.c
1 /*-
2  * Copyright (c) 2013 Tsubai Masanari
3  * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  *
17  * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
18  */
19
20 /* Driver for VMware vmxnet3 virtual ethernet devices. */
21
22 #include <sys/cdefs.h>
23 __FBSDID("$FreeBSD$");
24
25 #include <sys/param.h>
26 #include <sys/systm.h>
27 #include <sys/kernel.h>
28 #include <sys/endian.h>
29 #include <sys/sockio.h>
30 #include <sys/mbuf.h>
31 #include <sys/malloc.h>
32 #include <sys/module.h>
33 #include <sys/socket.h>
34 #include <sys/sysctl.h>
35 #include <sys/smp.h>
36 #include <sys/taskqueue.h>
37 #include <vm/vm.h>
38 #include <vm/pmap.h>
39
40 #include <net/ethernet.h>
41 #include <net/if.h>
42 #include <net/if_arp.h>
43 #include <net/if_dl.h>
44 #include <net/if_types.h>
45 #include <net/if_media.h>
46 #include <net/if_vlan_var.h>
47
48 #include <net/bpf.h>
49
50 #include <netinet/in_systm.h>
51 #include <netinet/in.h>
52 #include <netinet/ip.h>
53 #include <netinet/ip6.h>
54 #include <netinet6/ip6_var.h>
55 #include <netinet/udp.h>
56 #include <netinet/tcp.h>
57
58 #include <machine/in_cksum.h>
59
60 #include <machine/bus.h>
61 #include <machine/resource.h>
62 #include <sys/bus.h>
63 #include <sys/rman.h>
64
65 #include <dev/pci/pcireg.h>
66 #include <dev/pci/pcivar.h>
67
68 #include "if_vmxreg.h"
69 #include "if_vmxvar.h"
70
71 #include "opt_inet.h"
72 #include "opt_inet6.h"
73
74 #ifdef VMXNET3_FAILPOINTS
75 #include <sys/fail.h>
76 static SYSCTL_NODE(DEBUG_FP, OID_AUTO, vmxnet3, CTLFLAG_RW, 0,
77     "vmxnet3 fail points");
78 #define VMXNET3_FP      _debug_fail_point_vmxnet3
79 #endif
80
81 static int      vmxnet3_probe(device_t);
82 static int      vmxnet3_attach(device_t);
83 static int      vmxnet3_detach(device_t);
84 static int      vmxnet3_shutdown(device_t);
85
86 static int      vmxnet3_alloc_resources(struct vmxnet3_softc *);
87 static void     vmxnet3_free_resources(struct vmxnet3_softc *);
88 static int      vmxnet3_check_version(struct vmxnet3_softc *);
89 static void     vmxnet3_initial_config(struct vmxnet3_softc *);
90 static void     vmxnet3_check_multiqueue(struct vmxnet3_softc *);
91
92 static int      vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *);
93 static int      vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *);
94 static int      vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *);
95 static int      vmxnet3_alloc_interrupt(struct vmxnet3_softc *, int, int,
96                     struct vmxnet3_interrupt *);
97 static int      vmxnet3_alloc_intr_resources(struct vmxnet3_softc *);
98 static int      vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *);
99 static int      vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *);
100 static int      vmxnet3_setup_interrupts(struct vmxnet3_softc *);
101 static int      vmxnet3_alloc_interrupts(struct vmxnet3_softc *);
102
103 static void     vmxnet3_free_interrupt(struct vmxnet3_softc *,
104                     struct vmxnet3_interrupt *);
105 static void     vmxnet3_free_interrupts(struct vmxnet3_softc *);
106
107 #ifndef VMXNET3_LEGACY_TX
108 static int      vmxnet3_alloc_taskqueue(struct vmxnet3_softc *);
109 static void     vmxnet3_start_taskqueue(struct vmxnet3_softc *);
110 static void     vmxnet3_drain_taskqueue(struct vmxnet3_softc *);
111 static void     vmxnet3_free_taskqueue(struct vmxnet3_softc *);
112 #endif
113
114 static int      vmxnet3_init_rxq(struct vmxnet3_softc *, int);
115 static int      vmxnet3_init_txq(struct vmxnet3_softc *, int);
116 static int      vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *);
117 static void     vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *);
118 static void     vmxnet3_destroy_txq(struct vmxnet3_txqueue *);
119 static void     vmxnet3_free_rxtx_queues(struct vmxnet3_softc *);
120
121 static int      vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
122 static void     vmxnet3_free_shared_data(struct vmxnet3_softc *);
123 static int      vmxnet3_alloc_txq_data(struct vmxnet3_softc *);
124 static void     vmxnet3_free_txq_data(struct vmxnet3_softc *);
125 static int      vmxnet3_alloc_rxq_data(struct vmxnet3_softc *);
126 static void     vmxnet3_free_rxq_data(struct vmxnet3_softc *);
127 static int      vmxnet3_alloc_queue_data(struct vmxnet3_softc *);
128 static void     vmxnet3_free_queue_data(struct vmxnet3_softc *);
129 static int      vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
130 static void     vmxnet3_init_shared_data(struct vmxnet3_softc *);
131 static void     vmxnet3_reinit_interface(struct vmxnet3_softc *);
132 static void     vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
133 static void     vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
134 static int      vmxnet3_alloc_data(struct vmxnet3_softc *);
135 static void     vmxnet3_free_data(struct vmxnet3_softc *);
136 static int      vmxnet3_setup_interface(struct vmxnet3_softc *);
137
138 static void     vmxnet3_evintr(struct vmxnet3_softc *);
139 static void     vmxnet3_txq_eof(struct vmxnet3_txqueue *);
140 static void     vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *, struct mbuf *);
141 static int      vmxnet3_newbuf(struct vmxnet3_softc *, struct vmxnet3_rxring *);
142 static void     vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *,
143                     struct vmxnet3_rxring *, int);
144 static void     vmxnet3_rxq_eof(struct vmxnet3_rxqueue *);
145 static void     vmxnet3_legacy_intr(void *);
146 static void     vmxnet3_txq_intr(void *);
147 static void     vmxnet3_rxq_intr(void *);
148 static void     vmxnet3_event_intr(void *);
149
150 static void     vmxnet3_txstop(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
151 static void     vmxnet3_rxstop(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
152 static void     vmxnet3_stop(struct vmxnet3_softc *);
153
154 static void     vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
155 static int      vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
156 static int      vmxnet3_reinit_queues(struct vmxnet3_softc *);
157 static int      vmxnet3_enable_device(struct vmxnet3_softc *);
158 static void     vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
159 static int      vmxnet3_reinit(struct vmxnet3_softc *);
160 static void     vmxnet3_init_locked(struct vmxnet3_softc *);
161 static void     vmxnet3_init(void *);
162
163 static int      vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *,struct mbuf *,
164                     int *, int *, int *);
165 static int      vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *, struct mbuf **,
166                     bus_dmamap_t, bus_dma_segment_t [], int *);
167 static void     vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *, bus_dmamap_t);
168 static int      vmxnet3_txq_encap(struct vmxnet3_txqueue *, struct mbuf **);
169 #ifdef VMXNET3_LEGACY_TX
170 static void     vmxnet3_start_locked(struct ifnet *);
171 static void     vmxnet3_start(struct ifnet *);
172 #else
173 static int      vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *,
174                     struct mbuf *);
175 static int      vmxnet3_txq_mq_start(struct ifnet *, struct mbuf *);
176 static void     vmxnet3_txq_tq_deferred(void *, int);
177 #endif
178 static void     vmxnet3_txq_start(struct vmxnet3_txqueue *);
179 static void     vmxnet3_tx_start_all(struct vmxnet3_softc *);
180
181 static void     vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
182                     uint16_t);
183 static void     vmxnet3_register_vlan(void *, struct ifnet *, uint16_t);
184 static void     vmxnet3_unregister_vlan(void *, struct ifnet *, uint16_t);
185 static void     vmxnet3_set_rxfilter(struct vmxnet3_softc *);
186 static int      vmxnet3_change_mtu(struct vmxnet3_softc *, int);
187 static int      vmxnet3_ioctl(struct ifnet *, u_long, caddr_t);
188
189 #ifndef VMXNET3_LEGACY_TX
190 static void     vmxnet3_qflush(struct ifnet *);
191 #endif
192
193 static int      vmxnet3_watchdog(struct vmxnet3_txqueue *);
194 static void     vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
195 static void     vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *,
196                     struct vmxnet3_txq_stats *);
197 static void     vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *,
198                     struct vmxnet3_rxq_stats *);
199 static void     vmxnet3_tick(void *);
200 static void     vmxnet3_link_status(struct vmxnet3_softc *);
201 static void     vmxnet3_media_status(struct ifnet *, struct ifmediareq *);
202 static int      vmxnet3_media_change(struct ifnet *);
203 static void     vmxnet3_set_lladdr(struct vmxnet3_softc *);
204 static void     vmxnet3_get_lladdr(struct vmxnet3_softc *);
205
206 static void     vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
207                     struct sysctl_ctx_list *, struct sysctl_oid_list *);
208 static void     vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
209                     struct sysctl_ctx_list *, struct sysctl_oid_list *);
210 static void     vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
211                     struct sysctl_ctx_list *, struct sysctl_oid_list *);
212 static void     vmxnet3_setup_sysctl(struct vmxnet3_softc *);
213
214 static void     vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
215                     uint32_t);
216 static uint32_t vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
217 static void     vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
218                     uint32_t);
219 static void     vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
220 static uint32_t vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
221
222 static void     vmxnet3_enable_intr(struct vmxnet3_softc *, int);
223 static void     vmxnet3_disable_intr(struct vmxnet3_softc *, int);
224 static void     vmxnet3_enable_all_intrs(struct vmxnet3_softc *);
225 static void     vmxnet3_disable_all_intrs(struct vmxnet3_softc *);
226
227 static int      vmxnet3_dma_malloc(struct vmxnet3_softc *, bus_size_t,
228                     bus_size_t, struct vmxnet3_dma_alloc *);
229 static void     vmxnet3_dma_free(struct vmxnet3_softc *,
230                     struct vmxnet3_dma_alloc *);
231 static int      vmxnet3_tunable_int(struct vmxnet3_softc *,
232                     const char *, int);
233
234 typedef enum {
235         VMXNET3_BARRIER_RD,
236         VMXNET3_BARRIER_WR,
237         VMXNET3_BARRIER_RDWR,
238 } vmxnet3_barrier_t;
239
240 static void     vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
241
242 /* Tunables. */
243 static int vmxnet3_mq_disable = 0;
244 TUNABLE_INT("hw.vmx.mq_disable", &vmxnet3_mq_disable);
245 static int vmxnet3_default_txnqueue = VMXNET3_DEF_TX_QUEUES;
246 TUNABLE_INT("hw.vmx.txnqueue", &vmxnet3_default_txnqueue);
247 static int vmxnet3_default_rxnqueue = VMXNET3_DEF_RX_QUEUES;
248 TUNABLE_INT("hw.vmx.rxnqueue", &vmxnet3_default_rxnqueue);
249 static int vmxnet3_default_txndesc = VMXNET3_DEF_TX_NDESC;
250 TUNABLE_INT("hw.vmx.txndesc", &vmxnet3_default_txndesc);
251 static int vmxnet3_default_rxndesc = VMXNET3_DEF_RX_NDESC;
252 TUNABLE_INT("hw.vmx.rxndesc", &vmxnet3_default_rxndesc);
253
254 static device_method_t vmxnet3_methods[] = {
255         /* Device interface. */
256         DEVMETHOD(device_probe,         vmxnet3_probe),
257         DEVMETHOD(device_attach,        vmxnet3_attach),
258         DEVMETHOD(device_detach,        vmxnet3_detach),
259         DEVMETHOD(device_shutdown,      vmxnet3_shutdown),
260
261         DEVMETHOD_END
262 };
263
264 static driver_t vmxnet3_driver = {
265         "vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
266 };
267
268 static devclass_t vmxnet3_devclass;
269 DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
270
271 MODULE_DEPEND(vmx, pci, 1, 1, 1);
272 MODULE_DEPEND(vmx, ether, 1, 1, 1);
273
274 #define VMXNET3_VMWARE_VENDOR_ID        0x15AD
275 #define VMXNET3_VMWARE_DEVICE_ID        0x07B0
276
277 static int
278 vmxnet3_probe(device_t dev)
279 {
280
281         if (pci_get_vendor(dev) == VMXNET3_VMWARE_VENDOR_ID &&
282             pci_get_device(dev) == VMXNET3_VMWARE_DEVICE_ID) {
283                 device_set_desc(dev, "VMware VMXNET3 Ethernet Adapter");
284                 return (BUS_PROBE_DEFAULT);
285         }
286
287         return (ENXIO);
288 }
289
290 static int
291 vmxnet3_attach(device_t dev)
292 {
293         struct vmxnet3_softc *sc;
294         int error;
295
296         sc = device_get_softc(dev);
297         sc->vmx_dev = dev;
298
299         pci_enable_busmaster(dev);
300
301         VMXNET3_CORE_LOCK_INIT(sc, device_get_nameunit(dev));
302         callout_init_mtx(&sc->vmx_tick, &sc->vmx_mtx, 0);
303
304         vmxnet3_initial_config(sc);
305
306         error = vmxnet3_alloc_resources(sc);
307         if (error)
308                 goto fail;
309
310         error = vmxnet3_check_version(sc);
311         if (error)
312                 goto fail;
313
314         error = vmxnet3_alloc_rxtx_queues(sc);
315         if (error)
316                 goto fail;
317
318 #ifndef VMXNET3_LEGACY_TX
319         error = vmxnet3_alloc_taskqueue(sc);
320         if (error)
321                 goto fail;
322 #endif
323
324         error = vmxnet3_alloc_interrupts(sc);
325         if (error)
326                 goto fail;
327
328         vmxnet3_check_multiqueue(sc);
329
330         error = vmxnet3_alloc_data(sc);
331         if (error)
332                 goto fail;
333
334         error = vmxnet3_setup_interface(sc);
335         if (error)
336                 goto fail;
337
338         error = vmxnet3_setup_interrupts(sc);
339         if (error) {
340                 ether_ifdetach(sc->vmx_ifp);
341                 device_printf(dev, "could not set up interrupt\n");
342                 goto fail;
343         }
344
345         vmxnet3_setup_sysctl(sc);
346 #ifndef VMXNET3_LEGACY_TX
347         vmxnet3_start_taskqueue(sc);
348 #endif
349
350 fail:
351         if (error)
352                 vmxnet3_detach(dev);
353
354         return (error);
355 }
356
357 static int
358 vmxnet3_detach(device_t dev)
359 {
360         struct vmxnet3_softc *sc;
361         struct ifnet *ifp;
362
363         sc = device_get_softc(dev);
364         ifp = sc->vmx_ifp;
365
366         if (device_is_attached(dev)) {
367                 VMXNET3_CORE_LOCK(sc);
368                 vmxnet3_stop(sc);
369                 VMXNET3_CORE_UNLOCK(sc);
370
371                 callout_drain(&sc->vmx_tick);
372 #ifndef VMXNET3_LEGACY_TX
373                 vmxnet3_drain_taskqueue(sc);
374 #endif
375
376                 ether_ifdetach(ifp);
377         }
378
379         if (sc->vmx_vlan_attach != NULL) {
380                 EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_attach);
381                 sc->vmx_vlan_attach = NULL;
382         }
383         if (sc->vmx_vlan_detach != NULL) {
384                 EVENTHANDLER_DEREGISTER(vlan_config, sc->vmx_vlan_detach);
385                 sc->vmx_vlan_detach = NULL;
386         }
387
388 #ifndef VMXNET3_LEGACY_TX
389         vmxnet3_free_taskqueue(sc);
390 #endif
391         vmxnet3_free_interrupts(sc);
392
393         if (ifp != NULL) {
394                 if_free(ifp);
395                 sc->vmx_ifp = NULL;
396         }
397
398         ifmedia_removeall(&sc->vmx_media);
399
400         vmxnet3_free_data(sc);
401         vmxnet3_free_resources(sc);
402         vmxnet3_free_rxtx_queues(sc);
403
404         VMXNET3_CORE_LOCK_DESTROY(sc);
405
406         return (0);
407 }
408
409 static int
410 vmxnet3_shutdown(device_t dev)
411 {
412
413         return (0);
414 }
415
416 static int
417 vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
418 {
419         device_t dev;
420         int rid;
421
422         dev = sc->vmx_dev;
423
424         rid = PCIR_BAR(0);
425         sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
426             RF_ACTIVE);
427         if (sc->vmx_res0 == NULL) {
428                 device_printf(dev,
429                     "could not map BAR0 memory\n");
430                 return (ENXIO);
431         }
432
433         sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
434         sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
435
436         rid = PCIR_BAR(1);
437         sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
438             RF_ACTIVE);
439         if (sc->vmx_res1 == NULL) {
440                 device_printf(dev,
441                     "could not map BAR1 memory\n");
442                 return (ENXIO);
443         }
444
445         sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
446         sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
447
448         if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) {
449                 rid = PCIR_BAR(2);
450                 sc->vmx_msix_res = bus_alloc_resource_any(dev,
451                     SYS_RES_MEMORY, &rid, RF_ACTIVE);
452         }
453
454         if (sc->vmx_msix_res == NULL)
455                 sc->vmx_flags |= VMXNET3_FLAG_NO_MSIX;
456
457         return (0);
458 }
459
460 static void
461 vmxnet3_free_resources(struct vmxnet3_softc *sc)
462 {
463         device_t dev;
464         int rid;
465
466         dev = sc->vmx_dev;
467
468         if (sc->vmx_res0 != NULL) {
469                 rid = PCIR_BAR(0);
470                 bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res0);
471                 sc->vmx_res0 = NULL;
472         }
473
474         if (sc->vmx_res1 != NULL) {
475                 rid = PCIR_BAR(1);
476                 bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res1);
477                 sc->vmx_res1 = NULL;
478         }
479
480         if (sc->vmx_msix_res != NULL) {
481                 rid = PCIR_BAR(2);
482                 bus_release_resource(dev, SYS_RES_MEMORY, rid,
483                     sc->vmx_msix_res);
484                 sc->vmx_msix_res = NULL;
485         }
486 }
487
488 static int
489 vmxnet3_check_version(struct vmxnet3_softc *sc)
490 {
491         device_t dev;
492         uint32_t version;
493
494         dev = sc->vmx_dev;
495
496         version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
497         if ((version & 0x01) == 0) {
498                 device_printf(dev, "unsupported hardware version %#x\n",
499                     version);
500                 return (ENOTSUP);
501         }
502         vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
503
504         version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
505         if ((version & 0x01) == 0) {
506                 device_printf(dev, "unsupported UPT version %#x\n", version);
507                 return (ENOTSUP);
508         }
509         vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
510
511         return (0);
512 }
513
514 static void
515 vmxnet3_initial_config(struct vmxnet3_softc *sc)
516 {
517         int nqueue, ndesc;
518
519         nqueue = vmxnet3_tunable_int(sc, "txnqueue", vmxnet3_default_txnqueue);
520         if (nqueue > VMXNET3_MAX_TX_QUEUES || nqueue < 1)
521                 nqueue = VMXNET3_DEF_TX_QUEUES;
522         if (nqueue > mp_ncpus)
523                 nqueue = mp_ncpus;
524         sc->vmx_max_ntxqueues = nqueue;
525
526         nqueue = vmxnet3_tunable_int(sc, "rxnqueue", vmxnet3_default_rxnqueue);
527         if (nqueue > VMXNET3_MAX_RX_QUEUES || nqueue < 1)
528                 nqueue = VMXNET3_DEF_RX_QUEUES;
529         if (nqueue > mp_ncpus)
530                 nqueue = mp_ncpus;
531         sc->vmx_max_nrxqueues = nqueue;
532
533         if (vmxnet3_tunable_int(sc, "mq_disable", vmxnet3_mq_disable)) {
534                 sc->vmx_max_nrxqueues = 1;
535                 sc->vmx_max_ntxqueues = 1;
536         }
537
538         ndesc = vmxnet3_tunable_int(sc, "txd", vmxnet3_default_txndesc);
539         if (ndesc > VMXNET3_MAX_TX_NDESC || ndesc < VMXNET3_MIN_TX_NDESC)
540                 ndesc = VMXNET3_DEF_TX_NDESC;
541         if (ndesc & VMXNET3_MASK_TX_NDESC)
542                 ndesc &= ~VMXNET3_MASK_TX_NDESC;
543         sc->vmx_ntxdescs = ndesc;
544
545         ndesc = vmxnet3_tunable_int(sc, "rxd", vmxnet3_default_rxndesc);
546         if (ndesc > VMXNET3_MAX_RX_NDESC || ndesc < VMXNET3_MIN_RX_NDESC)
547                 ndesc = VMXNET3_DEF_RX_NDESC;
548         if (ndesc & VMXNET3_MASK_RX_NDESC)
549                 ndesc &= ~VMXNET3_MASK_RX_NDESC;
550         sc->vmx_nrxdescs = ndesc;
551         sc->vmx_max_rxsegs = VMXNET3_MAX_RX_SEGS;
552 }
553
554 static void
555 vmxnet3_check_multiqueue(struct vmxnet3_softc *sc)
556 {
557
558         if (sc->vmx_intr_type != VMXNET3_IT_MSIX)
559                 goto out;
560
561         /* BMV: Just use the maximum configured for now. */
562         sc->vmx_nrxqueues = sc->vmx_max_nrxqueues;
563         sc->vmx_ntxqueues = sc->vmx_max_ntxqueues;
564
565         if (sc->vmx_nrxqueues > 1)
566                 sc->vmx_flags |= VMXNET3_FLAG_RSS;
567
568         return;
569
570 out:
571         sc->vmx_ntxqueues = 1;
572         sc->vmx_nrxqueues = 1;
573 }
574
575 static int
576 vmxnet3_alloc_msix_interrupts(struct vmxnet3_softc *sc)
577 {
578         device_t dev;
579         int nmsix, cnt, required;
580
581         dev = sc->vmx_dev;
582
583         if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX)
584                 return (1);
585
586         /* Allocate an additional vector for the events interrupt. */
587         required = sc->vmx_max_nrxqueues + sc->vmx_max_ntxqueues + 1;
588
589         nmsix = pci_msix_count(dev);
590         if (nmsix < required)
591                 return (1);
592
593         cnt = required;
594         if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
595                 sc->vmx_nintrs = required;
596                 return (0);
597         } else
598                 pci_release_msi(dev);
599
600         /* BMV TODO Fallback to sharing MSIX vectors if possible. */
601
602         return (1);
603 }
604
605 static int
606 vmxnet3_alloc_msi_interrupts(struct vmxnet3_softc *sc)
607 {
608         device_t dev;
609         int nmsi, cnt, required;
610
611         dev = sc->vmx_dev;
612         required = 1;
613
614         nmsi = pci_msi_count(dev);
615         if (nmsi < required)
616                 return (1);
617
618         cnt = required;
619         if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required) {
620                 sc->vmx_nintrs = 1;
621                 return (0);
622         } else
623                 pci_release_msi(dev);
624
625         return (1);
626 }
627
628 static int
629 vmxnet3_alloc_legacy_interrupts(struct vmxnet3_softc *sc)
630 {
631
632         sc->vmx_nintrs = 1;
633         return (0);
634 }
635
636 static int
637 vmxnet3_alloc_interrupt(struct vmxnet3_softc *sc, int rid, int flags,
638     struct vmxnet3_interrupt *intr)
639 {
640         struct resource *irq;
641
642         irq = bus_alloc_resource_any(sc->vmx_dev, SYS_RES_IRQ, &rid, flags);
643         if (irq == NULL)
644                 return (ENXIO);
645
646         intr->vmxi_irq = irq;
647         intr->vmxi_rid = rid;
648
649         return (0);
650 }
651
652 static int
653 vmxnet3_alloc_intr_resources(struct vmxnet3_softc *sc)
654 {
655         int i, rid, flags, error;
656
657         rid = 0;
658         flags = RF_ACTIVE;
659
660         if (sc->vmx_intr_type == VMXNET3_IT_LEGACY)
661                 flags |= RF_SHAREABLE;
662         else
663                 rid = 1;
664
665         for (i = 0; i < sc->vmx_nintrs; i++, rid++) {
666                 error = vmxnet3_alloc_interrupt(sc, rid, flags,
667                     &sc->vmx_intrs[i]);
668                 if (error)
669                         return (error);
670         }
671
672         return (0);
673 }
674
675 static int
676 vmxnet3_setup_msix_interrupts(struct vmxnet3_softc *sc)
677 {
678         device_t dev;
679         struct vmxnet3_txqueue *txq;
680         struct vmxnet3_rxqueue *rxq;
681         struct vmxnet3_interrupt *intr;
682         enum intr_type type;
683         int i, error;
684
685         dev = sc->vmx_dev;
686         intr = &sc->vmx_intrs[0];
687         type = INTR_TYPE_NET | INTR_MPSAFE;
688
689         for (i = 0; i < sc->vmx_ntxqueues; i++, intr++) {
690                 txq = &sc->vmx_txq[i];
691                 error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
692                      vmxnet3_txq_intr, txq, &intr->vmxi_handler);
693                 if (error)
694                         return (error);
695                 bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler,
696                     "tq%d", i);
697                 txq->vxtxq_intr_idx = intr->vmxi_rid - 1;
698         }
699
700         for (i = 0; i < sc->vmx_nrxqueues; i++, intr++) {
701                 rxq = &sc->vmx_rxq[i];
702                 error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
703                     vmxnet3_rxq_intr, rxq, &intr->vmxi_handler);
704                 if (error)
705                         return (error);
706                 bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler,
707                     "rq%d", i);
708                 rxq->vxrxq_intr_idx = intr->vmxi_rid - 1;
709         }
710
711         error = bus_setup_intr(dev, intr->vmxi_irq, type, NULL,
712             vmxnet3_event_intr, sc, &intr->vmxi_handler);
713         if (error)
714                 return (error);
715         bus_describe_intr(dev, intr->vmxi_irq, intr->vmxi_handler, "event");
716         sc->vmx_event_intr_idx = intr->vmxi_rid - 1;
717
718         return (0);
719 }
720
721 static int
722 vmxnet3_setup_legacy_interrupt(struct vmxnet3_softc *sc)
723 {
724         struct vmxnet3_interrupt *intr;
725         int i, error;
726
727         intr = &sc->vmx_intrs[0];
728         error = bus_setup_intr(sc->vmx_dev, intr->vmxi_irq,
729             INTR_TYPE_NET | INTR_MPSAFE, NULL, vmxnet3_legacy_intr, sc,
730             &intr->vmxi_handler);
731
732         for (i = 0; i < sc->vmx_ntxqueues; i++)
733                 sc->vmx_txq[i].vxtxq_intr_idx = 0;
734         for (i = 0; i < sc->vmx_nrxqueues; i++)
735                 sc->vmx_rxq[i].vxrxq_intr_idx = 0;
736         sc->vmx_event_intr_idx = 0;
737
738         return (error);
739 }
740
741 static void
742 vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
743 {
744         struct vmxnet3_txqueue *txq;
745         struct vmxnet3_txq_shared *txs;
746         struct vmxnet3_rxqueue *rxq;
747         struct vmxnet3_rxq_shared *rxs;
748         int i;
749
750         sc->vmx_ds->evintr = sc->vmx_event_intr_idx;
751
752         for (i = 0; i < sc->vmx_ntxqueues; i++) {
753                 txq = &sc->vmx_txq[i];
754                 txs = txq->vxtxq_ts;
755                 txs->intr_idx = txq->vxtxq_intr_idx;
756         }
757
758         for (i = 0; i < sc->vmx_nrxqueues; i++) {
759                 rxq = &sc->vmx_rxq[i];
760                 rxs = rxq->vxrxq_rs;
761                 rxs->intr_idx = rxq->vxrxq_intr_idx;
762         }
763 }
764
765 static int
766 vmxnet3_setup_interrupts(struct vmxnet3_softc *sc)
767 {
768         int error;
769
770         error = vmxnet3_alloc_intr_resources(sc);
771         if (error)
772                 return (error);
773
774         switch (sc->vmx_intr_type) {
775         case VMXNET3_IT_MSIX:
776                 error = vmxnet3_setup_msix_interrupts(sc);
777                 break;
778         case VMXNET3_IT_MSI:
779         case VMXNET3_IT_LEGACY:
780                 error = vmxnet3_setup_legacy_interrupt(sc);
781                 break;
782         default:
783                 panic("%s: invalid interrupt type %d", __func__,
784                     sc->vmx_intr_type);
785         }
786
787         if (error == 0)
788                 vmxnet3_set_interrupt_idx(sc);
789
790         return (error);
791 }
792
793 static int
794 vmxnet3_alloc_interrupts(struct vmxnet3_softc *sc)
795 {
796         device_t dev;
797         uint32_t config;
798         int error;
799
800         dev = sc->vmx_dev;
801         config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
802
803         sc->vmx_intr_type = config & 0x03;
804         sc->vmx_intr_mask_mode = (config >> 2) & 0x03;
805
806         switch (sc->vmx_intr_type) {
807         case VMXNET3_IT_AUTO:
808                 sc->vmx_intr_type = VMXNET3_IT_MSIX;
809                 /* FALLTHROUGH */
810         case VMXNET3_IT_MSIX:
811                 error = vmxnet3_alloc_msix_interrupts(sc);
812                 if (error == 0)
813                         break;
814                 sc->vmx_intr_type = VMXNET3_IT_MSI;
815                 /* FALLTHROUGH */
816         case VMXNET3_IT_MSI:
817                 error = vmxnet3_alloc_msi_interrupts(sc);
818                 if (error == 0)
819                         break;
820                 sc->vmx_intr_type = VMXNET3_IT_LEGACY;
821                 /* FALLTHROUGH */
822         case VMXNET3_IT_LEGACY:
823                 error = vmxnet3_alloc_legacy_interrupts(sc);
824                 if (error == 0)
825                         break;
826                 /* FALLTHROUGH */
827         default:
828                 sc->vmx_intr_type = -1;
829                 device_printf(dev, "cannot allocate any interrupt resources\n");
830                 return (ENXIO);
831         }
832
833         return (error);
834 }
835
836 static void
837 vmxnet3_free_interrupt(struct vmxnet3_softc *sc,
838     struct vmxnet3_interrupt *intr)
839 {
840         device_t dev;
841
842         dev = sc->vmx_dev;
843
844         if (intr->vmxi_handler != NULL) {
845                 bus_teardown_intr(dev, intr->vmxi_irq, intr->vmxi_handler);
846                 intr->vmxi_handler = NULL;
847         }
848
849         if (intr->vmxi_irq != NULL) {
850                 bus_release_resource(dev, SYS_RES_IRQ, intr->vmxi_rid,
851                     intr->vmxi_irq);
852                 intr->vmxi_irq = NULL;
853                 intr->vmxi_rid = -1;
854         }
855 }
856
857 static void
858 vmxnet3_free_interrupts(struct vmxnet3_softc *sc)
859 {
860         int i;
861
862         for (i = 0; i < sc->vmx_nintrs; i++)
863                 vmxnet3_free_interrupt(sc, &sc->vmx_intrs[i]);
864
865         if (sc->vmx_intr_type == VMXNET3_IT_MSI ||
866             sc->vmx_intr_type == VMXNET3_IT_MSIX)
867                 pci_release_msi(sc->vmx_dev);
868 }
869
870 #ifndef VMXNET3_LEGACY_TX
871 static int
872 vmxnet3_alloc_taskqueue(struct vmxnet3_softc *sc)
873 {
874         device_t dev;
875
876         dev = sc->vmx_dev;
877
878         sc->vmx_tq = taskqueue_create(device_get_nameunit(dev), M_NOWAIT,
879             taskqueue_thread_enqueue, &sc->vmx_tq);
880         if (sc->vmx_tq == NULL)
881                 return (ENOMEM);
882
883         return (0);
884 }
885
886 static void
887 vmxnet3_start_taskqueue(struct vmxnet3_softc *sc)
888 {
889         device_t dev;
890         int nthreads, error;
891
892         dev = sc->vmx_dev;
893
894         /*
895          * The taskqueue is typically not frequently used, so a dedicated
896          * thread for each queue is unnecessary.
897          */
898         nthreads = MAX(1, sc->vmx_ntxqueues / 2);
899
900         /*
901          * Most drivers just ignore the return value - it only fails
902          * with ENOMEM so an error is not likely. It is hard for us
903          * to recover from an error here.
904          */
905         error = taskqueue_start_threads(&sc->vmx_tq, nthreads, PI_NET,
906             "%s taskq", device_get_nameunit(dev));
907         if (error)
908                 device_printf(dev, "failed to start taskqueue: %d", error);
909 }
910
911 static void
912 vmxnet3_drain_taskqueue(struct vmxnet3_softc *sc)
913 {
914         struct vmxnet3_txqueue *txq;
915         int i;
916
917         if (sc->vmx_tq != NULL) {
918                 for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
919                         txq = &sc->vmx_txq[i];
920                         taskqueue_drain(sc->vmx_tq, &txq->vxtxq_defrtask);
921                 }
922         }
923 }
924
925 static void
926 vmxnet3_free_taskqueue(struct vmxnet3_softc *sc)
927 {
928         if (sc->vmx_tq != NULL) {
929                 taskqueue_free(sc->vmx_tq);
930                 sc->vmx_tq = NULL;
931         }
932 }
933 #endif
934
935 static int
936 vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q)
937 {
938         struct vmxnet3_rxqueue *rxq;
939         struct vmxnet3_rxring *rxr;
940         int i;
941
942         rxq = &sc->vmx_rxq[q];
943
944         snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
945             device_get_nameunit(sc->vmx_dev), q);
946         mtx_init(&rxq->vxrxq_mtx, rxq->vxrxq_name, NULL, MTX_DEF);
947
948         rxq->vxrxq_sc = sc;
949         rxq->vxrxq_id = q;
950
951         for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
952                 rxr = &rxq->vxrxq_cmd_ring[i];
953                 rxr->vxrxr_rid = i;
954                 rxr->vxrxr_ndesc = sc->vmx_nrxdescs;
955                 rxr->vxrxr_rxbuf = malloc(rxr->vxrxr_ndesc *
956                     sizeof(struct vmxnet3_rxbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
957                 if (rxr->vxrxr_rxbuf == NULL)
958                         return (ENOMEM);
959
960                 rxq->vxrxq_comp_ring.vxcr_ndesc += sc->vmx_nrxdescs;
961         }
962
963         return (0);
964 }
965
966 static int
967 vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
968 {
969         struct vmxnet3_txqueue *txq;
970         struct vmxnet3_txring *txr;
971
972         txq = &sc->vmx_txq[q];
973         txr = &txq->vxtxq_cmd_ring;
974
975         snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
976             device_get_nameunit(sc->vmx_dev), q);
977         mtx_init(&txq->vxtxq_mtx, txq->vxtxq_name, NULL, MTX_DEF);
978
979         txq->vxtxq_sc = sc;
980         txq->vxtxq_id = q;
981
982         txr->vxtxr_ndesc = sc->vmx_ntxdescs;
983         txr->vxtxr_txbuf = malloc(txr->vxtxr_ndesc *
984             sizeof(struct vmxnet3_txbuf), M_DEVBUF, M_NOWAIT | M_ZERO);
985         if (txr->vxtxr_txbuf == NULL)
986                 return (ENOMEM);
987
988         txq->vxtxq_comp_ring.vxcr_ndesc = sc->vmx_ntxdescs;
989
990 #ifndef VMXNET3_LEGACY_TX
991         TASK_INIT(&txq->vxtxq_defrtask, 0, vmxnet3_txq_tq_deferred, txq);
992
993         txq->vxtxq_br = buf_ring_alloc(VMXNET3_DEF_BUFRING_SIZE, M_DEVBUF,
994             M_NOWAIT, &txq->vxtxq_mtx);
995         if (txq->vxtxq_br == NULL)
996                 return (ENOMEM);
997 #endif
998
999         return (0);
1000 }
1001
1002 static int
1003 vmxnet3_alloc_rxtx_queues(struct vmxnet3_softc *sc)
1004 {
1005         int i, error;
1006
1007         /*
1008          * Only attempt to create multiple queues if MSIX is available. MSIX is
1009          * disabled by default because its apparently broken for devices passed
1010          * through by at least ESXi 5.1. The hw.pci.honor_msi_blacklist tunable
1011          * must be set to zero for MSIX. This check prevents us from allocating
1012          * queue structures that we will not use.
1013          */
1014         if (sc->vmx_flags & VMXNET3_FLAG_NO_MSIX) {
1015                 sc->vmx_max_nrxqueues = 1;
1016                 sc->vmx_max_ntxqueues = 1;
1017         }
1018
1019         sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
1020             sc->vmx_max_nrxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1021         sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
1022             sc->vmx_max_ntxqueues, M_DEVBUF, M_NOWAIT | M_ZERO);
1023         if (sc->vmx_rxq == NULL || sc->vmx_txq == NULL)
1024                 return (ENOMEM);
1025
1026         for (i = 0; i < sc->vmx_max_nrxqueues; i++) {
1027                 error = vmxnet3_init_rxq(sc, i);
1028                 if (error)
1029                         return (error);
1030         }
1031
1032         for (i = 0; i < sc->vmx_max_ntxqueues; i++) {
1033                 error = vmxnet3_init_txq(sc, i);
1034                 if (error)
1035                         return (error);
1036         }
1037
1038         return (0);
1039 }
1040
1041 static void
1042 vmxnet3_destroy_rxq(struct vmxnet3_rxqueue *rxq)
1043 {
1044         struct vmxnet3_rxring *rxr;
1045         int i;
1046
1047         rxq->vxrxq_sc = NULL;
1048         rxq->vxrxq_id = -1;
1049
1050         for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1051                 rxr = &rxq->vxrxq_cmd_ring[i];
1052
1053                 if (rxr->vxrxr_rxbuf != NULL) {
1054                         free(rxr->vxrxr_rxbuf, M_DEVBUF);
1055                         rxr->vxrxr_rxbuf = NULL;
1056                 }
1057         }
1058
1059         if (mtx_initialized(&rxq->vxrxq_mtx) != 0)
1060                 mtx_destroy(&rxq->vxrxq_mtx);
1061 }
1062
1063 static void
1064 vmxnet3_destroy_txq(struct vmxnet3_txqueue *txq)
1065 {
1066         struct vmxnet3_txring *txr;
1067
1068         txr = &txq->vxtxq_cmd_ring;
1069
1070         txq->vxtxq_sc = NULL;
1071         txq->vxtxq_id = -1;
1072
1073 #ifndef VMXNET3_LEGACY_TX
1074         if (txq->vxtxq_br != NULL) {
1075                 buf_ring_free(txq->vxtxq_br, M_DEVBUF);
1076                 txq->vxtxq_br = NULL;
1077         }
1078 #endif
1079
1080         if (txr->vxtxr_txbuf != NULL) {
1081                 free(txr->vxtxr_txbuf, M_DEVBUF);
1082                 txr->vxtxr_txbuf = NULL;
1083         }
1084
1085         if (mtx_initialized(&txq->vxtxq_mtx) != 0)
1086                 mtx_destroy(&txq->vxtxq_mtx);
1087 }
1088
1089 static void
1090 vmxnet3_free_rxtx_queues(struct vmxnet3_softc *sc)
1091 {
1092         int i;
1093
1094         if (sc->vmx_rxq != NULL) {
1095                 for (i = 0; i < sc->vmx_max_nrxqueues; i++)
1096                         vmxnet3_destroy_rxq(&sc->vmx_rxq[i]);
1097                 free(sc->vmx_rxq, M_DEVBUF);
1098                 sc->vmx_rxq = NULL;
1099         }
1100
1101         if (sc->vmx_txq != NULL) {
1102                 for (i = 0; i < sc->vmx_max_ntxqueues; i++)
1103                         vmxnet3_destroy_txq(&sc->vmx_txq[i]);
1104                 free(sc->vmx_txq, M_DEVBUF);
1105                 sc->vmx_txq = NULL;
1106         }
1107 }
1108
1109 static int
1110 vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
1111 {
1112         device_t dev;
1113         uint8_t *kva;
1114         size_t size;
1115         int i, error;
1116
1117         dev = sc->vmx_dev;
1118
1119         size = sizeof(struct vmxnet3_driver_shared);
1120         error = vmxnet3_dma_malloc(sc, size, 1, &sc->vmx_ds_dma);
1121         if (error) {
1122                 device_printf(dev, "cannot alloc shared memory\n");
1123                 return (error);
1124         }
1125         sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.dma_vaddr;
1126
1127         size = sc->vmx_ntxqueues * sizeof(struct vmxnet3_txq_shared) +
1128             sc->vmx_nrxqueues * sizeof(struct vmxnet3_rxq_shared);
1129         error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_qs_dma);
1130         if (error) {
1131                 device_printf(dev, "cannot alloc queue shared memory\n");
1132                 return (error);
1133         }
1134         sc->vmx_qs = (void *) sc->vmx_qs_dma.dma_vaddr;
1135         kva = sc->vmx_qs;
1136
1137         for (i = 0; i < sc->vmx_ntxqueues; i++) {
1138                 sc->vmx_txq[i].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
1139                 kva += sizeof(struct vmxnet3_txq_shared);
1140         }
1141         for (i = 0; i < sc->vmx_nrxqueues; i++) {
1142                 sc->vmx_rxq[i].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
1143                 kva += sizeof(struct vmxnet3_rxq_shared);
1144         }
1145
1146         if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1147                 size = sizeof(struct vmxnet3_rss_shared);
1148                 error = vmxnet3_dma_malloc(sc, size, 128, &sc->vmx_rss_dma);
1149                 if (error) {
1150                         device_printf(dev, "cannot alloc rss shared memory\n");
1151                         return (error);
1152                 }
1153                 sc->vmx_rss =
1154                     (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.dma_vaddr;
1155         }
1156
1157         return (0);
1158 }
1159
1160 static void
1161 vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1162 {
1163
1164         if (sc->vmx_rss != NULL) {
1165                 vmxnet3_dma_free(sc, &sc->vmx_rss_dma);
1166                 sc->vmx_rss = NULL;
1167         }
1168
1169         if (sc->vmx_qs != NULL) {
1170                 vmxnet3_dma_free(sc, &sc->vmx_qs_dma);
1171                 sc->vmx_qs = NULL;
1172         }
1173
1174         if (sc->vmx_ds != NULL) {
1175                 vmxnet3_dma_free(sc, &sc->vmx_ds_dma);
1176                 sc->vmx_ds = NULL;
1177         }
1178 }
1179
1180 static int
1181 vmxnet3_alloc_txq_data(struct vmxnet3_softc *sc)
1182 {
1183         device_t dev;
1184         struct vmxnet3_txqueue *txq;
1185         struct vmxnet3_txring *txr;
1186         struct vmxnet3_comp_ring *txc;
1187         size_t descsz, compsz;
1188         int i, q, error;
1189
1190         dev = sc->vmx_dev;
1191
1192         for (q = 0; q < sc->vmx_ntxqueues; q++) {
1193                 txq = &sc->vmx_txq[q];
1194                 txr = &txq->vxtxq_cmd_ring;
1195                 txc = &txq->vxtxq_comp_ring;
1196
1197                 descsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc);
1198                 compsz = txr->vxtxr_ndesc * sizeof(struct vmxnet3_txcompdesc);
1199
1200                 error = bus_dma_tag_create(bus_get_dma_tag(dev),
1201                     1, 0,                       /* alignment, boundary */
1202                     BUS_SPACE_MAXADDR,          /* lowaddr */
1203                     BUS_SPACE_MAXADDR,          /* highaddr */
1204                     NULL, NULL,                 /* filter, filterarg */
1205                     VMXNET3_TX_MAXSIZE,         /* maxsize */
1206                     VMXNET3_TX_MAXSEGS,         /* nsegments */
1207                     VMXNET3_TX_MAXSEGSIZE,      /* maxsegsize */
1208                     0,                          /* flags */
1209                     NULL, NULL,                 /* lockfunc, lockarg */
1210                     &txr->vxtxr_txtag);
1211                 if (error) {
1212                         device_printf(dev,
1213                             "unable to create Tx buffer tag for queue %d\n", q);
1214                         return (error);
1215                 }
1216
1217                 error = vmxnet3_dma_malloc(sc, descsz, 512, &txr->vxtxr_dma);
1218                 if (error) {
1219                         device_printf(dev, "cannot alloc Tx descriptors for "
1220                             "queue %d error %d\n", q, error);
1221                         return (error);
1222                 }
1223                 txr->vxtxr_txd =
1224                     (struct vmxnet3_txdesc *) txr->vxtxr_dma.dma_vaddr;
1225
1226                 error = vmxnet3_dma_malloc(sc, compsz, 512, &txc->vxcr_dma);
1227                 if (error) {
1228                         device_printf(dev, "cannot alloc Tx comp descriptors "
1229                            "for queue %d error %d\n", q, error);
1230                         return (error);
1231                 }
1232                 txc->vxcr_u.txcd =
1233                     (struct vmxnet3_txcompdesc *) txc->vxcr_dma.dma_vaddr;
1234
1235                 for (i = 0; i < txr->vxtxr_ndesc; i++) {
1236                         error = bus_dmamap_create(txr->vxtxr_txtag, 0,
1237                             &txr->vxtxr_txbuf[i].vtxb_dmamap);
1238                         if (error) {
1239                                 device_printf(dev, "unable to create Tx buf "
1240                                     "dmamap for queue %d idx %d\n", q, i);
1241                                 return (error);
1242                         }
1243                 }
1244         }
1245
1246         return (0);
1247 }
1248
1249 static void
1250 vmxnet3_free_txq_data(struct vmxnet3_softc *sc)
1251 {
1252         device_t dev;
1253         struct vmxnet3_txqueue *txq;
1254         struct vmxnet3_txring *txr;
1255         struct vmxnet3_comp_ring *txc;
1256         struct vmxnet3_txbuf *txb;
1257         int i, q;
1258
1259         dev = sc->vmx_dev;
1260
1261         for (q = 0; q < sc->vmx_ntxqueues; q++) {
1262                 txq = &sc->vmx_txq[q];
1263                 txr = &txq->vxtxq_cmd_ring;
1264                 txc = &txq->vxtxq_comp_ring;
1265
1266                 for (i = 0; i < txr->vxtxr_ndesc; i++) {
1267                         txb = &txr->vxtxr_txbuf[i];
1268                         if (txb->vtxb_dmamap != NULL) {
1269                                 bus_dmamap_destroy(txr->vxtxr_txtag,
1270                                     txb->vtxb_dmamap);
1271                                 txb->vtxb_dmamap = NULL;
1272                         }
1273                 }
1274
1275                 if (txc->vxcr_u.txcd != NULL) {
1276                         vmxnet3_dma_free(sc, &txc->vxcr_dma);
1277                         txc->vxcr_u.txcd = NULL;
1278                 }
1279
1280                 if (txr->vxtxr_txd != NULL) {
1281                         vmxnet3_dma_free(sc, &txr->vxtxr_dma);
1282                         txr->vxtxr_txd = NULL;
1283                 }
1284
1285                 if (txr->vxtxr_txtag != NULL) {
1286                         bus_dma_tag_destroy(txr->vxtxr_txtag);
1287                         txr->vxtxr_txtag = NULL;
1288                 }
1289         }
1290 }
1291
1292 static int
1293 vmxnet3_alloc_rxq_data(struct vmxnet3_softc *sc)
1294 {
1295         device_t dev;
1296         struct vmxnet3_rxqueue *rxq;
1297         struct vmxnet3_rxring *rxr;
1298         struct vmxnet3_comp_ring *rxc;
1299         int descsz, compsz;
1300         int i, j, q, error;
1301
1302         dev = sc->vmx_dev;
1303
1304         for (q = 0; q < sc->vmx_nrxqueues; q++) {
1305                 rxq = &sc->vmx_rxq[q];
1306                 rxc = &rxq->vxrxq_comp_ring;
1307                 compsz = 0;
1308
1309                 for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1310                         rxr = &rxq->vxrxq_cmd_ring[i];
1311
1312                         descsz = rxr->vxrxr_ndesc *
1313                             sizeof(struct vmxnet3_rxdesc);
1314                         compsz += rxr->vxrxr_ndesc *
1315                             sizeof(struct vmxnet3_rxcompdesc);
1316
1317                         error = bus_dma_tag_create(bus_get_dma_tag(dev),
1318                             1, 0,               /* alignment, boundary */
1319                             BUS_SPACE_MAXADDR,  /* lowaddr */
1320                             BUS_SPACE_MAXADDR,  /* highaddr */
1321                             NULL, NULL,         /* filter, filterarg */
1322                             MJUMPAGESIZE,       /* maxsize */
1323                             1,                  /* nsegments */
1324                             MJUMPAGESIZE,       /* maxsegsize */
1325                             0,                  /* flags */
1326                             NULL, NULL,         /* lockfunc, lockarg */
1327                             &rxr->vxrxr_rxtag);
1328                         if (error) {
1329                                 device_printf(dev,
1330                                     "unable to create Rx buffer tag for "
1331                                     "queue %d\n", q);
1332                                 return (error);
1333                         }
1334
1335                         error = vmxnet3_dma_malloc(sc, descsz, 512,
1336                             &rxr->vxrxr_dma);
1337                         if (error) {
1338                                 device_printf(dev, "cannot allocate Rx "
1339                                     "descriptors for queue %d/%d error %d\n",
1340                                     i, q, error);
1341                                 return (error);
1342                         }
1343                         rxr->vxrxr_rxd =
1344                             (struct vmxnet3_rxdesc *) rxr->vxrxr_dma.dma_vaddr;
1345                 }
1346
1347                 error = vmxnet3_dma_malloc(sc, compsz, 512, &rxc->vxcr_dma);
1348                 if (error) {
1349                         device_printf(dev, "cannot alloc Rx comp descriptors "
1350                             "for queue %d error %d\n", q, error);
1351                         return (error);
1352                 }
1353                 rxc->vxcr_u.rxcd =
1354                     (struct vmxnet3_rxcompdesc *) rxc->vxcr_dma.dma_vaddr;
1355
1356                 for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1357                         rxr = &rxq->vxrxq_cmd_ring[i];
1358
1359                         error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1360                             &rxr->vxrxr_spare_dmap);
1361                         if (error) {
1362                                 device_printf(dev, "unable to create spare "
1363                                     "dmamap for queue %d/%d error %d\n",
1364                                     q, i, error);
1365                                 return (error);
1366                         }
1367
1368                         for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1369                                 error = bus_dmamap_create(rxr->vxrxr_rxtag, 0,
1370                                     &rxr->vxrxr_rxbuf[j].vrxb_dmamap);
1371                                 if (error) {
1372                                         device_printf(dev, "unable to create "
1373                                             "dmamap for queue %d/%d slot %d "
1374                                             "error %d\n",
1375                                             q, i, j, error);
1376                                         return (error);
1377                                 }
1378                         }
1379                 }
1380         }
1381
1382         return (0);
1383 }
1384
1385 static void
1386 vmxnet3_free_rxq_data(struct vmxnet3_softc *sc)
1387 {
1388         device_t dev;
1389         struct vmxnet3_rxqueue *rxq;
1390         struct vmxnet3_rxring *rxr;
1391         struct vmxnet3_comp_ring *rxc;
1392         struct vmxnet3_rxbuf *rxb;
1393         int i, j, q;
1394
1395         dev = sc->vmx_dev;
1396
1397         for (q = 0; q < sc->vmx_nrxqueues; q++) {
1398                 rxq = &sc->vmx_rxq[q];
1399                 rxc = &rxq->vxrxq_comp_ring;
1400
1401                 for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1402                         rxr = &rxq->vxrxq_cmd_ring[i];
1403
1404                         if (rxr->vxrxr_spare_dmap != NULL) {
1405                                 bus_dmamap_destroy(rxr->vxrxr_rxtag,
1406                                     rxr->vxrxr_spare_dmap);
1407                                 rxr->vxrxr_spare_dmap = NULL;
1408                         }
1409
1410                         for (j = 0; j < rxr->vxrxr_ndesc; j++) {
1411                                 rxb = &rxr->vxrxr_rxbuf[j];
1412                                 if (rxb->vrxb_dmamap != NULL) {
1413                                         bus_dmamap_destroy(rxr->vxrxr_rxtag,
1414                                             rxb->vrxb_dmamap);
1415                                         rxb->vrxb_dmamap = NULL;
1416                                 }
1417                         }
1418                 }
1419
1420                 if (rxc->vxcr_u.rxcd != NULL) {
1421                         vmxnet3_dma_free(sc, &rxc->vxcr_dma);
1422                         rxc->vxcr_u.rxcd = NULL;
1423                 }
1424
1425                 for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
1426                         rxr = &rxq->vxrxq_cmd_ring[i];
1427
1428                         if (rxr->vxrxr_rxd != NULL) {
1429                                 vmxnet3_dma_free(sc, &rxr->vxrxr_dma);
1430                                 rxr->vxrxr_rxd = NULL;
1431                         }
1432
1433                         if (rxr->vxrxr_rxtag != NULL) {
1434                                 bus_dma_tag_destroy(rxr->vxrxr_rxtag);
1435                                 rxr->vxrxr_rxtag = NULL;
1436                         }
1437                 }
1438         }
1439 }
1440
1441 static int
1442 vmxnet3_alloc_queue_data(struct vmxnet3_softc *sc)
1443 {
1444         int error;
1445
1446         error = vmxnet3_alloc_txq_data(sc);
1447         if (error)
1448                 return (error);
1449
1450         error = vmxnet3_alloc_rxq_data(sc);
1451         if (error)
1452                 return (error);
1453
1454         return (0);
1455 }
1456
1457 static void
1458 vmxnet3_free_queue_data(struct vmxnet3_softc *sc)
1459 {
1460
1461         if (sc->vmx_rxq != NULL)
1462                 vmxnet3_free_rxq_data(sc);
1463
1464         if (sc->vmx_txq != NULL)
1465                 vmxnet3_free_txq_data(sc);
1466 }
1467
1468 static int
1469 vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1470 {
1471         int error;
1472
1473         error = vmxnet3_dma_malloc(sc, VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN,
1474             32, &sc->vmx_mcast_dma);
1475         if (error)
1476                 device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1477         else
1478                 sc->vmx_mcast = sc->vmx_mcast_dma.dma_vaddr;
1479
1480         return (error);
1481 }
1482
1483 static void
1484 vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1485 {
1486
1487         if (sc->vmx_mcast != NULL) {
1488                 vmxnet3_dma_free(sc, &sc->vmx_mcast_dma);
1489                 sc->vmx_mcast = NULL;
1490         }
1491 }
1492
1493 static void
1494 vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1495 {
1496         struct vmxnet3_driver_shared *ds;
1497         struct vmxnet3_txqueue *txq;
1498         struct vmxnet3_txq_shared *txs;
1499         struct vmxnet3_rxqueue *rxq;
1500         struct vmxnet3_rxq_shared *rxs;
1501         int i;
1502
1503         ds = sc->vmx_ds;
1504
1505         /*
1506          * Initialize fields of the shared data that remains the same across
1507          * reinits. Note the shared data is zero'd when allocated.
1508          */
1509
1510         ds->magic = VMXNET3_REV1_MAGIC;
1511
1512         /* DriverInfo */
1513         ds->version = VMXNET3_DRIVER_VERSION;
1514         ds->guest = VMXNET3_GOS_FREEBSD |
1515 #ifdef __LP64__
1516             VMXNET3_GOS_64BIT;
1517 #else
1518             VMXNET3_GOS_32BIT;
1519 #endif
1520         ds->vmxnet3_revision = 1;
1521         ds->upt_version = 1;
1522
1523         /* Misc. conf */
1524         ds->driver_data = vtophys(sc);
1525         ds->driver_data_len = sizeof(struct vmxnet3_softc);
1526         ds->queue_shared = sc->vmx_qs_dma.dma_paddr;
1527         ds->queue_shared_len = sc->vmx_qs_dma.dma_size;
1528         ds->nrxsg_max = sc->vmx_max_rxsegs;
1529
1530         /* RSS conf */
1531         if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1532                 ds->rss.version = 1;
1533                 ds->rss.paddr = sc->vmx_rss_dma.dma_paddr;
1534                 ds->rss.len = sc->vmx_rss_dma.dma_size;
1535         }
1536
1537         /* Interrupt control. */
1538         ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1539         ds->nintr = sc->vmx_nintrs;
1540         ds->evintr = sc->vmx_event_intr_idx;
1541         ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1542
1543         for (i = 0; i < sc->vmx_nintrs; i++)
1544                 ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1545
1546         /* Receive filter. */
1547         ds->mcast_table = sc->vmx_mcast_dma.dma_paddr;
1548         ds->mcast_tablelen = sc->vmx_mcast_dma.dma_size;
1549
1550         /* Tx queues */
1551         for (i = 0; i < sc->vmx_ntxqueues; i++) {
1552                 txq = &sc->vmx_txq[i];
1553                 txs = txq->vxtxq_ts;
1554
1555                 txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_dma.dma_paddr;
1556                 txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1557                 txs->comp_ring = txq->vxtxq_comp_ring.vxcr_dma.dma_paddr;
1558                 txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1559                 txs->driver_data = vtophys(txq);
1560                 txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1561         }
1562
1563         /* Rx queues */
1564         for (i = 0; i < sc->vmx_nrxqueues; i++) {
1565                 rxq = &sc->vmx_rxq[i];
1566                 rxs = rxq->vxrxq_rs;
1567
1568                 rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_dma.dma_paddr;
1569                 rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1570                 rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_dma.dma_paddr;
1571                 rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1572                 rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_dma.dma_paddr;
1573                 rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1574                 rxs->driver_data = vtophys(rxq);
1575                 rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1576         }
1577 }
1578
1579 static void
1580 vmxnet3_reinit_interface(struct vmxnet3_softc *sc)
1581 {
1582         struct ifnet *ifp;
1583
1584         ifp = sc->vmx_ifp;
1585
1586         /* Use the current MAC address. */
1587         bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1588         vmxnet3_set_lladdr(sc);
1589
1590         ifp->if_hwassist = 0;
1591         if (ifp->if_capenable & IFCAP_TXCSUM)
1592                 ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD;
1593         if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
1594                 ifp->if_hwassist |= VMXNET3_CSUM_OFFLOAD_IPV6;
1595         if (ifp->if_capenable & IFCAP_TSO4)
1596                 ifp->if_hwassist |= CSUM_IP_TSO;
1597         if (ifp->if_capenable & IFCAP_TSO6)
1598                 ifp->if_hwassist |= CSUM_IP6_TSO;
1599 }
1600
1601 static void
1602 vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1603 {
1604         /*
1605          * Use the same key as the Linux driver until FreeBSD can do
1606          * RSS (presumably Toeplitz) in software.
1607          */
1608         static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1609             0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1610             0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1611             0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1612             0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1613             0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1614         };
1615
1616         struct vmxnet3_driver_shared *ds;
1617         struct vmxnet3_rss_shared *rss;
1618         int i;
1619
1620         ds = sc->vmx_ds;
1621         rss = sc->vmx_rss;
1622
1623         rss->hash_type =
1624             UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1625             UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1626         rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1627         rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1628         rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1629         memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1630
1631         for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1632                 rss->ind_table[i] = i % sc->vmx_nrxqueues;
1633 }
1634
1635 static void
1636 vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1637 {
1638         struct ifnet *ifp;
1639         struct vmxnet3_driver_shared *ds;
1640
1641         ifp = sc->vmx_ifp;
1642         ds = sc->vmx_ds;
1643
1644         ds->mtu = ifp->if_mtu;
1645         ds->ntxqueue = sc->vmx_ntxqueues;
1646         ds->nrxqueue = sc->vmx_nrxqueues;
1647
1648         ds->upt_features = 0;
1649         if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1650                 ds->upt_features |= UPT1_F_CSUM;
1651         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1652                 ds->upt_features |= UPT1_F_VLAN;
1653         if (ifp->if_capenable & IFCAP_LRO)
1654                 ds->upt_features |= UPT1_F_LRO;
1655
1656         if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1657                 ds->upt_features |= UPT1_F_RSS;
1658                 vmxnet3_reinit_rss_shared_data(sc);
1659         }
1660
1661         vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.dma_paddr);
1662         vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1663             (uint64_t) sc->vmx_ds_dma.dma_paddr >> 32);
1664 }
1665
1666 static int
1667 vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1668 {
1669         int error;
1670
1671         error = vmxnet3_alloc_shared_data(sc);
1672         if (error)
1673                 return (error);
1674
1675         error = vmxnet3_alloc_queue_data(sc);
1676         if (error)
1677                 return (error);
1678
1679         error = vmxnet3_alloc_mcast_table(sc);
1680         if (error)
1681                 return (error);
1682
1683         vmxnet3_init_shared_data(sc);
1684
1685         return (0);
1686 }
1687
1688 static void
1689 vmxnet3_free_data(struct vmxnet3_softc *sc)
1690 {
1691
1692         vmxnet3_free_mcast_table(sc);
1693         vmxnet3_free_queue_data(sc);
1694         vmxnet3_free_shared_data(sc);
1695 }
1696
1697 static int
1698 vmxnet3_setup_interface(struct vmxnet3_softc *sc)
1699 {
1700         device_t dev;
1701         struct ifnet *ifp;
1702
1703         dev = sc->vmx_dev;
1704
1705         ifp = sc->vmx_ifp = if_alloc(IFT_ETHER);
1706         if (ifp == NULL) {
1707                 device_printf(dev, "cannot allocate ifnet structure\n");
1708                 return (ENOSPC);
1709         }
1710
1711         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1712 #if __FreeBSD_version < 1000025
1713         ifp->if_baudrate = 1000000000;
1714 #elif __FreeBSD_version < 1100011
1715         if_initbaudrate(ifp, IF_Gbps(10));
1716 #else
1717         ifp->if_baudrate = IF_Gbps(10);
1718 #endif
1719         ifp->if_softc = sc;
1720         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1721         ifp->if_init = vmxnet3_init;
1722         ifp->if_ioctl = vmxnet3_ioctl;
1723         ifp->if_hw_tsomax = 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
1724         ifp->if_hw_tsomaxsegcount = VMXNET3_TX_MAXSEGS;
1725         ifp->if_hw_tsomaxsegsize = VMXNET3_TX_MAXSEGSIZE;
1726
1727 #ifdef VMXNET3_LEGACY_TX
1728         ifp->if_start = vmxnet3_start;
1729         ifp->if_snd.ifq_drv_maxlen = sc->vmx_ntxdescs - 1;
1730         IFQ_SET_MAXLEN(&ifp->if_snd, sc->vmx_ntxdescs - 1);
1731         IFQ_SET_READY(&ifp->if_snd);
1732 #else
1733         ifp->if_transmit = vmxnet3_txq_mq_start;
1734         ifp->if_qflush = vmxnet3_qflush;
1735 #endif
1736
1737         vmxnet3_get_lladdr(sc);
1738         ether_ifattach(ifp, sc->vmx_lladdr);
1739
1740         ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_TXCSUM;
1741         ifp->if_capabilities |= IFCAP_RXCSUM_IPV6 | IFCAP_TXCSUM_IPV6;
1742         ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
1743         ifp->if_capabilities |= IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
1744             IFCAP_VLAN_HWCSUM;
1745         ifp->if_capenable = ifp->if_capabilities;
1746
1747         /* These capabilities are not enabled by default. */
1748         ifp->if_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
1749
1750         sc->vmx_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1751             vmxnet3_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1752         sc->vmx_vlan_detach = EVENTHANDLER_REGISTER(vlan_config,
1753             vmxnet3_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1754
1755         ifmedia_init(&sc->vmx_media, 0, vmxnet3_media_change,
1756             vmxnet3_media_status);
1757         ifmedia_add(&sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1758         ifmedia_set(&sc->vmx_media, IFM_ETHER | IFM_AUTO);
1759
1760         return (0);
1761 }
1762
1763 static void
1764 vmxnet3_evintr(struct vmxnet3_softc *sc)
1765 {
1766         device_t dev;
1767         struct ifnet *ifp;
1768         struct vmxnet3_txq_shared *ts;
1769         struct vmxnet3_rxq_shared *rs;
1770         uint32_t event;
1771         int reset;
1772
1773         dev = sc->vmx_dev;
1774         ifp = sc->vmx_ifp;
1775         reset = 0;
1776
1777         VMXNET3_CORE_LOCK(sc);
1778
1779         /* Clear events. */
1780         event = sc->vmx_ds->event;
1781         vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1782
1783         if (event & VMXNET3_EVENT_LINK) {
1784                 vmxnet3_link_status(sc);
1785                 if (sc->vmx_link_active != 0)
1786                         vmxnet3_tx_start_all(sc);
1787         }
1788
1789         if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1790                 reset = 1;
1791                 vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1792                 ts = sc->vmx_txq[0].vxtxq_ts;
1793                 if (ts->stopped != 0)
1794                         device_printf(dev, "Tx queue error %#x\n", ts->error);
1795                 rs = sc->vmx_rxq[0].vxrxq_rs;
1796                 if (rs->stopped != 0)
1797                         device_printf(dev, "Rx queue error %#x\n", rs->error);
1798                 device_printf(dev, "Rx/Tx queue error event ... resetting\n");
1799         }
1800
1801         if (event & VMXNET3_EVENT_DIC)
1802                 device_printf(dev, "device implementation change event\n");
1803         if (event & VMXNET3_EVENT_DEBUG)
1804                 device_printf(dev, "debug event\n");
1805
1806         if (reset != 0) {
1807                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1808                 vmxnet3_init_locked(sc);
1809         }
1810
1811         VMXNET3_CORE_UNLOCK(sc);
1812 }
1813
1814 static void
1815 vmxnet3_txq_eof(struct vmxnet3_txqueue *txq)
1816 {
1817         struct vmxnet3_softc *sc;
1818         struct ifnet *ifp;
1819         struct vmxnet3_txring *txr;
1820         struct vmxnet3_comp_ring *txc;
1821         struct vmxnet3_txcompdesc *txcd;
1822         struct vmxnet3_txbuf *txb;
1823         struct mbuf *m;
1824         u_int sop;
1825
1826         sc = txq->vxtxq_sc;
1827         ifp = sc->vmx_ifp;
1828         txr = &txq->vxtxq_cmd_ring;
1829         txc = &txq->vxtxq_comp_ring;
1830
1831         VMXNET3_TXQ_LOCK_ASSERT(txq);
1832
1833         for (;;) {
1834                 txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1835                 if (txcd->gen != txc->vxcr_gen)
1836                         break;
1837                 vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1838
1839                 if (++txc->vxcr_next == txc->vxcr_ndesc) {
1840                         txc->vxcr_next = 0;
1841                         txc->vxcr_gen ^= 1;
1842                 }
1843
1844                 sop = txr->vxtxr_next;
1845                 txb = &txr->vxtxr_txbuf[sop];
1846
1847                 if ((m = txb->vtxb_m) != NULL) {
1848                         bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
1849                             BUS_DMASYNC_POSTWRITE);
1850                         bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
1851
1852                         txq->vxtxq_stats.vmtxs_opackets++;
1853                         txq->vxtxq_stats.vmtxs_obytes += m->m_pkthdr.len;
1854                         if (m->m_flags & M_MCAST)
1855                                 txq->vxtxq_stats.vmtxs_omcasts++;
1856
1857                         m_freem(m);
1858                         txb->vtxb_m = NULL;
1859                 }
1860
1861                 txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1862         }
1863
1864         if (txr->vxtxr_head == txr->vxtxr_next)
1865                 txq->vxtxq_watchdog = 0;
1866 }
1867
1868 static int
1869 vmxnet3_newbuf(struct vmxnet3_softc *sc, struct vmxnet3_rxring *rxr)
1870 {
1871         struct ifnet *ifp;
1872         struct mbuf *m;
1873         struct vmxnet3_rxdesc *rxd;
1874         struct vmxnet3_rxbuf *rxb;
1875         bus_dma_tag_t tag;
1876         bus_dmamap_t dmap;
1877         bus_dma_segment_t segs[1];
1878         int idx, clsize, btype, flags, nsegs, error;
1879
1880         ifp = sc->vmx_ifp;
1881         tag = rxr->vxrxr_rxtag;
1882         dmap = rxr->vxrxr_spare_dmap;
1883         idx = rxr->vxrxr_fill;
1884         rxd = &rxr->vxrxr_rxd[idx];
1885         rxb = &rxr->vxrxr_rxbuf[idx];
1886
1887 #ifdef VMXNET3_FAILPOINTS
1888         KFAIL_POINT_CODE(VMXNET3_FP, newbuf, return ENOBUFS);
1889         if (rxr->vxrxr_rid != 0)
1890                 KFAIL_POINT_CODE(VMXNET3_FP, newbuf_body_only, return ENOBUFS);
1891 #endif
1892
1893         if (rxr->vxrxr_rid == 0 && (idx % sc->vmx_rx_max_chain) == 0) {
1894                 flags = M_PKTHDR;
1895                 clsize = MCLBYTES;
1896                 btype = VMXNET3_BTYPE_HEAD;
1897         } else {
1898 #if __FreeBSD_version < 902001
1899                 /*
1900                  * These mbufs will never be used for the start of a frame.
1901                  * Roughly prior to branching releng/9.2, the load_mbuf_sg()
1902                  * required the mbuf to always be a packet header. Avoid
1903                  * unnecessary mbuf initialization in newer versions where
1904                  * that is not the case.
1905                  */
1906                 flags = M_PKTHDR;
1907 #else
1908                 flags = 0;
1909 #endif
1910                 clsize = MJUMPAGESIZE;
1911                 btype = VMXNET3_BTYPE_BODY;
1912         }
1913
1914         m = m_getjcl(M_NOWAIT, MT_DATA, flags, clsize);
1915         if (m == NULL) {
1916                 sc->vmx_stats.vmst_mgetcl_failed++;
1917                 return (ENOBUFS);
1918         }
1919
1920         if (btype == VMXNET3_BTYPE_HEAD) {
1921                 m->m_len = m->m_pkthdr.len = clsize;
1922                 m_adj(m, ETHER_ALIGN);
1923         } else
1924                 m->m_len = clsize;
1925
1926         error = bus_dmamap_load_mbuf_sg(tag, dmap, m, &segs[0], &nsegs,
1927             BUS_DMA_NOWAIT);
1928         if (error) {
1929                 m_freem(m);
1930                 sc->vmx_stats.vmst_mbuf_load_failed++;
1931                 return (error);
1932         }
1933         KASSERT(nsegs == 1,
1934             ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
1935 #if __FreeBSD_version < 902001
1936         if (btype == VMXNET3_BTYPE_BODY)
1937                 m->m_flags &= ~M_PKTHDR;
1938 #endif
1939
1940         if (rxb->vrxb_m != NULL) {
1941                 bus_dmamap_sync(tag, rxb->vrxb_dmamap, BUS_DMASYNC_POSTREAD);
1942                 bus_dmamap_unload(tag, rxb->vrxb_dmamap);
1943         }
1944
1945         rxr->vxrxr_spare_dmap = rxb->vrxb_dmamap;
1946         rxb->vrxb_dmamap = dmap;
1947         rxb->vrxb_m = m;
1948
1949         rxd->addr = segs[0].ds_addr;
1950         rxd->len = segs[0].ds_len;
1951         rxd->btype = btype;
1952         rxd->gen = rxr->vxrxr_gen;
1953
1954         vmxnet3_rxr_increment_fill(rxr);
1955         return (0);
1956 }
1957
1958 static void
1959 vmxnet3_rxq_eof_discard(struct vmxnet3_rxqueue *rxq,
1960     struct vmxnet3_rxring *rxr, int idx)
1961 {
1962         struct vmxnet3_rxdesc *rxd;
1963
1964         rxd = &rxr->vxrxr_rxd[idx];
1965         rxd->gen = rxr->vxrxr_gen;
1966         vmxnet3_rxr_increment_fill(rxr);
1967 }
1968
1969 static void
1970 vmxnet3_rxq_discard_chain(struct vmxnet3_rxqueue *rxq)
1971 {
1972         struct vmxnet3_softc *sc;
1973         struct vmxnet3_rxring *rxr;
1974         struct vmxnet3_comp_ring *rxc;
1975         struct vmxnet3_rxcompdesc *rxcd;
1976         int idx, eof;
1977
1978         sc = rxq->vxrxq_sc;
1979         rxc = &rxq->vxrxq_comp_ring;
1980
1981         do {
1982                 rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
1983                 if (rxcd->gen != rxc->vxcr_gen)
1984                         break;          /* Not expected. */
1985                 vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1986
1987                 if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
1988                         rxc->vxcr_next = 0;
1989                         rxc->vxcr_gen ^= 1;
1990                 }
1991
1992                 idx = rxcd->rxd_idx;
1993                 eof = rxcd->eop;
1994                 if (rxcd->qid < sc->vmx_nrxqueues)
1995                         rxr = &rxq->vxrxq_cmd_ring[0];
1996                 else
1997                         rxr = &rxq->vxrxq_cmd_ring[1];
1998                 vmxnet3_rxq_eof_discard(rxq, rxr, idx);
1999         } while (!eof);
2000 }
2001
2002 static void
2003 vmxnet3_rx_csum(struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2004 {
2005
2006         if (rxcd->ipv4) {
2007                 m->m_pkthdr.csum_flags |= CSUM_IP_CHECKED;
2008                 if (rxcd->ipcsum_ok)
2009                         m->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2010         }
2011
2012         if (!rxcd->fragment) {
2013                 if (rxcd->csum_ok && (rxcd->tcp || rxcd->udp)) {
2014                         m->m_pkthdr.csum_flags |= CSUM_DATA_VALID |
2015                             CSUM_PSEUDO_HDR;
2016                         m->m_pkthdr.csum_data = 0xFFFF;
2017                 }
2018         }
2019 }
2020
2021 static void
2022 vmxnet3_rxq_input(struct vmxnet3_rxqueue *rxq,
2023     struct vmxnet3_rxcompdesc *rxcd, struct mbuf *m)
2024 {
2025         struct vmxnet3_softc *sc;
2026         struct ifnet *ifp;
2027
2028         sc = rxq->vxrxq_sc;
2029         ifp = sc->vmx_ifp;
2030
2031         if (rxcd->error) {
2032                 rxq->vxrxq_stats.vmrxs_ierrors++;
2033                 m_freem(m);
2034                 return;
2035         }
2036
2037 #ifdef notyet
2038         switch (rxcd->rss_type) {
2039         case VMXNET3_RCD_RSS_TYPE_IPV4:
2040                 m->m_pkthdr.flowid = rxcd->rss_hash;
2041                 M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV4);
2042                 break;
2043         case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
2044                 m->m_pkthdr.flowid = rxcd->rss_hash;
2045                 M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV4);
2046                 break;
2047         case VMXNET3_RCD_RSS_TYPE_IPV6:
2048                 m->m_pkthdr.flowid = rxcd->rss_hash;
2049                 M_HASHTYPE_SET(m, M_HASHTYPE_RSS_IPV6);
2050                 break;
2051         case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
2052                 m->m_pkthdr.flowid = rxcd->rss_hash;
2053                 M_HASHTYPE_SET(m, M_HASHTYPE_RSS_TCP_IPV6);
2054                 break;
2055         default: /* VMXNET3_RCD_RSS_TYPE_NONE */
2056                 m->m_pkthdr.flowid = rxq->vxrxq_id;
2057                 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2058                 break;
2059         }
2060 #else
2061         m->m_pkthdr.flowid = rxq->vxrxq_id;
2062         M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2063 #endif
2064
2065         if (!rxcd->no_csum)
2066                 vmxnet3_rx_csum(rxcd, m);
2067         if (rxcd->vlan) {
2068                 m->m_flags |= M_VLANTAG;
2069                 m->m_pkthdr.ether_vtag = rxcd->vtag;
2070         }
2071
2072         rxq->vxrxq_stats.vmrxs_ipackets++;
2073         rxq->vxrxq_stats.vmrxs_ibytes += m->m_pkthdr.len;
2074
2075         VMXNET3_RXQ_UNLOCK(rxq);
2076         (*ifp->if_input)(ifp, m);
2077         VMXNET3_RXQ_LOCK(rxq);
2078 }
2079
2080 static void
2081 vmxnet3_rxq_eof(struct vmxnet3_rxqueue *rxq)
2082 {
2083         struct vmxnet3_softc *sc;
2084         struct ifnet *ifp;
2085         struct vmxnet3_rxring *rxr;
2086         struct vmxnet3_comp_ring *rxc;
2087         struct vmxnet3_rxdesc *rxd;
2088         struct vmxnet3_rxcompdesc *rxcd;
2089         struct mbuf *m, *m_head, *m_tail;
2090         int idx, length;
2091
2092         sc = rxq->vxrxq_sc;
2093         ifp = sc->vmx_ifp;
2094         rxc = &rxq->vxrxq_comp_ring;
2095
2096         VMXNET3_RXQ_LOCK_ASSERT(rxq);
2097
2098         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2099                 return;
2100
2101         m_head = rxq->vxrxq_mhead;
2102         rxq->vxrxq_mhead = NULL;
2103         m_tail = rxq->vxrxq_mtail;
2104         rxq->vxrxq_mtail = NULL;
2105         MPASS(m_head == NULL || m_tail != NULL);
2106
2107         for (;;) {
2108                 rxcd = &rxc->vxcr_u.rxcd[rxc->vxcr_next];
2109                 if (rxcd->gen != rxc->vxcr_gen) {
2110                         rxq->vxrxq_mhead = m_head;
2111                         rxq->vxrxq_mtail = m_tail;
2112                         break;
2113                 }
2114                 vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
2115
2116                 if (++rxc->vxcr_next == rxc->vxcr_ndesc) {
2117                         rxc->vxcr_next = 0;
2118                         rxc->vxcr_gen ^= 1;
2119                 }
2120
2121                 idx = rxcd->rxd_idx;
2122                 length = rxcd->len;
2123                 if (rxcd->qid < sc->vmx_nrxqueues)
2124                         rxr = &rxq->vxrxq_cmd_ring[0];
2125                 else
2126                         rxr = &rxq->vxrxq_cmd_ring[1];
2127                 rxd = &rxr->vxrxr_rxd[idx];
2128
2129                 m = rxr->vxrxr_rxbuf[idx].vrxb_m;
2130                 KASSERT(m != NULL, ("%s: queue %d idx %d without mbuf",
2131                     __func__, rxcd->qid, idx));
2132
2133                 /*
2134                  * The host may skip descriptors. We detect this when this
2135                  * descriptor does not match the previous fill index. Catch
2136                  * up with the host now.
2137                  */
2138                 if (__predict_false(rxr->vxrxr_fill != idx)) {
2139                         while (rxr->vxrxr_fill != idx) {
2140                                 rxr->vxrxr_rxd[rxr->vxrxr_fill].gen =
2141                                     rxr->vxrxr_gen;
2142                                 vmxnet3_rxr_increment_fill(rxr);
2143                         }
2144                 }
2145
2146                 if (rxcd->sop) {
2147                         KASSERT(rxd->btype == VMXNET3_BTYPE_HEAD,
2148                             ("%s: start of frame w/o head buffer", __func__));
2149                         KASSERT(rxr == &rxq->vxrxq_cmd_ring[0],
2150                             ("%s: start of frame not in ring 0", __func__));
2151                         KASSERT((idx % sc->vmx_rx_max_chain) == 0,
2152                             ("%s: start of frame at unexcepted index %d (%d)",
2153                              __func__, idx, sc->vmx_rx_max_chain));
2154                         KASSERT(m_head == NULL,
2155                             ("%s: duplicate start of frame?", __func__));
2156
2157                         if (length == 0) {
2158                                 /* Just ignore this descriptor. */
2159                                 vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2160                                 goto nextp;
2161                         }
2162
2163                         if (vmxnet3_newbuf(sc, rxr) != 0) {
2164                                 rxq->vxrxq_stats.vmrxs_iqdrops++;
2165                                 vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2166                                 if (!rxcd->eop)
2167                                         vmxnet3_rxq_discard_chain(rxq);
2168                                 goto nextp;
2169                         }
2170
2171                         m->m_pkthdr.rcvif = ifp;
2172                         m->m_pkthdr.len = m->m_len = length;
2173                         m->m_pkthdr.csum_flags = 0;
2174                         m_head = m_tail = m;
2175
2176                 } else {
2177                         KASSERT(rxd->btype == VMXNET3_BTYPE_BODY,
2178                             ("%s: non start of frame w/o body buffer", __func__));
2179                         KASSERT(m_head != NULL,
2180                             ("%s: frame not started?", __func__));
2181
2182                         if (vmxnet3_newbuf(sc, rxr) != 0) {
2183                                 rxq->vxrxq_stats.vmrxs_iqdrops++;
2184                                 vmxnet3_rxq_eof_discard(rxq, rxr, idx);
2185                                 if (!rxcd->eop)
2186                                         vmxnet3_rxq_discard_chain(rxq);
2187                                 m_freem(m_head);
2188                                 m_head = m_tail = NULL;
2189                                 goto nextp;
2190                         }
2191
2192                         m->m_len = length;
2193                         m_head->m_pkthdr.len += length;
2194                         m_tail->m_next = m;
2195                         m_tail = m;
2196                 }
2197
2198                 if (rxcd->eop) {
2199                         vmxnet3_rxq_input(rxq, rxcd, m_head);
2200                         m_head = m_tail = NULL;
2201
2202                         /* Must recheck after dropping the Rx lock. */
2203                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
2204                                 break;
2205                 }
2206
2207 nextp:
2208                 if (__predict_false(rxq->vxrxq_rs->update_rxhead)) {
2209                         int qid = rxcd->qid;
2210                         bus_size_t r;
2211
2212                         idx = (idx + 1) % rxr->vxrxr_ndesc;
2213                         if (qid >= sc->vmx_nrxqueues) {
2214                                 qid -= sc->vmx_nrxqueues;
2215                                 r = VMXNET3_BAR0_RXH2(qid);
2216                         } else
2217                                 r = VMXNET3_BAR0_RXH1(qid);
2218                         vmxnet3_write_bar0(sc, r, idx);
2219                 }
2220         }
2221 }
2222
2223 static void
2224 vmxnet3_legacy_intr(void *xsc)
2225 {
2226         struct vmxnet3_softc *sc;
2227         struct vmxnet3_rxqueue *rxq;
2228         struct vmxnet3_txqueue *txq;
2229
2230         sc = xsc;
2231         rxq = &sc->vmx_rxq[0];
2232         txq = &sc->vmx_txq[0];
2233
2234         if (sc->vmx_intr_type == VMXNET3_IT_LEGACY) {
2235                 if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
2236                         return;
2237         }
2238         if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2239                 vmxnet3_disable_all_intrs(sc);
2240
2241         if (sc->vmx_ds->event != 0)
2242                 vmxnet3_evintr(sc);
2243
2244         VMXNET3_RXQ_LOCK(rxq);
2245         vmxnet3_rxq_eof(rxq);
2246         VMXNET3_RXQ_UNLOCK(rxq);
2247
2248         VMXNET3_TXQ_LOCK(txq);
2249         vmxnet3_txq_eof(txq);
2250         vmxnet3_txq_start(txq);
2251         VMXNET3_TXQ_UNLOCK(txq);
2252
2253         vmxnet3_enable_all_intrs(sc);
2254 }
2255
2256 static void
2257 vmxnet3_txq_intr(void *xtxq)
2258 {
2259         struct vmxnet3_softc *sc;
2260         struct vmxnet3_txqueue *txq;
2261
2262         txq = xtxq;
2263         sc = txq->vxtxq_sc;
2264
2265         if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2266                 vmxnet3_disable_intr(sc, txq->vxtxq_intr_idx);
2267
2268         VMXNET3_TXQ_LOCK(txq);
2269         vmxnet3_txq_eof(txq);
2270         vmxnet3_txq_start(txq);
2271         VMXNET3_TXQ_UNLOCK(txq);
2272
2273         vmxnet3_enable_intr(sc, txq->vxtxq_intr_idx);
2274 }
2275
2276 static void
2277 vmxnet3_rxq_intr(void *xrxq)
2278 {
2279         struct vmxnet3_softc *sc;
2280         struct vmxnet3_rxqueue *rxq;
2281
2282         rxq = xrxq;
2283         sc = rxq->vxrxq_sc;
2284
2285         if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2286                 vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
2287
2288         VMXNET3_RXQ_LOCK(rxq);
2289         vmxnet3_rxq_eof(rxq);
2290         VMXNET3_RXQ_UNLOCK(rxq);
2291
2292         vmxnet3_enable_intr(sc, rxq->vxrxq_intr_idx);
2293 }
2294
2295 static void
2296 vmxnet3_event_intr(void *xsc)
2297 {
2298         struct vmxnet3_softc *sc;
2299
2300         sc = xsc;
2301
2302         if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
2303                 vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
2304
2305         if (sc->vmx_ds->event != 0)
2306                 vmxnet3_evintr(sc);
2307
2308         vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2309 }
2310
2311 static void
2312 vmxnet3_txstop(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2313 {
2314         struct vmxnet3_txring *txr;
2315         struct vmxnet3_txbuf *txb;
2316         int i;
2317
2318         txr = &txq->vxtxq_cmd_ring;
2319
2320         for (i = 0; i < txr->vxtxr_ndesc; i++) {
2321                 txb = &txr->vxtxr_txbuf[i];
2322
2323                 if (txb->vtxb_m == NULL)
2324                         continue;
2325
2326                 bus_dmamap_sync(txr->vxtxr_txtag, txb->vtxb_dmamap,
2327                     BUS_DMASYNC_POSTWRITE);
2328                 bus_dmamap_unload(txr->vxtxr_txtag, txb->vtxb_dmamap);
2329                 m_freem(txb->vtxb_m);
2330                 txb->vtxb_m = NULL;
2331         }
2332 }
2333
2334 static void
2335 vmxnet3_rxstop(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2336 {
2337         struct vmxnet3_rxring *rxr;
2338         struct vmxnet3_rxbuf *rxb;
2339         int i, j;
2340
2341         if (rxq->vxrxq_mhead != NULL) {
2342                 m_freem(rxq->vxrxq_mhead);
2343                 rxq->vxrxq_mhead = NULL;
2344                 rxq->vxrxq_mtail = NULL;
2345         }
2346
2347         for (i = 0; i < VMXNET3_RXRINGS_PERQ; i++) {
2348                 rxr = &rxq->vxrxq_cmd_ring[i];
2349
2350                 for (j = 0; j < rxr->vxrxr_ndesc; j++) {
2351                         rxb = &rxr->vxrxr_rxbuf[j];
2352
2353                         if (rxb->vrxb_m == NULL)
2354                                 continue;
2355
2356                         bus_dmamap_sync(rxr->vxrxr_rxtag, rxb->vrxb_dmamap,
2357                             BUS_DMASYNC_POSTREAD);
2358                         bus_dmamap_unload(rxr->vxrxr_rxtag, rxb->vrxb_dmamap);
2359                         m_freem(rxb->vrxb_m);
2360                         rxb->vrxb_m = NULL;
2361                 }
2362         }
2363 }
2364
2365 static void
2366 vmxnet3_stop_rendezvous(struct vmxnet3_softc *sc)
2367 {
2368         struct vmxnet3_rxqueue *rxq;
2369         struct vmxnet3_txqueue *txq;
2370         int i;
2371
2372         for (i = 0; i < sc->vmx_nrxqueues; i++) {
2373                 rxq = &sc->vmx_rxq[i];
2374                 VMXNET3_RXQ_LOCK(rxq);
2375                 VMXNET3_RXQ_UNLOCK(rxq);
2376         }
2377
2378         for (i = 0; i < sc->vmx_ntxqueues; i++) {
2379                 txq = &sc->vmx_txq[i];
2380                 VMXNET3_TXQ_LOCK(txq);
2381                 VMXNET3_TXQ_UNLOCK(txq);
2382         }
2383 }
2384
2385 static void
2386 vmxnet3_stop(struct vmxnet3_softc *sc)
2387 {
2388         struct ifnet *ifp;
2389         int q;
2390
2391         ifp = sc->vmx_ifp;
2392         VMXNET3_CORE_LOCK_ASSERT(sc);
2393
2394         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2395         sc->vmx_link_active = 0;
2396         callout_stop(&sc->vmx_tick);
2397
2398         /* Disable interrupts. */
2399         vmxnet3_disable_all_intrs(sc);
2400         vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
2401
2402         vmxnet3_stop_rendezvous(sc);
2403
2404         for (q = 0; q < sc->vmx_ntxqueues; q++)
2405                 vmxnet3_txstop(sc, &sc->vmx_txq[q]);
2406         for (q = 0; q < sc->vmx_nrxqueues; q++)
2407                 vmxnet3_rxstop(sc, &sc->vmx_rxq[q]);
2408
2409         vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
2410 }
2411
2412 static void
2413 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
2414 {
2415         struct vmxnet3_txring *txr;
2416         struct vmxnet3_comp_ring *txc;
2417
2418         txr = &txq->vxtxq_cmd_ring;
2419         txr->vxtxr_head = 0;
2420         txr->vxtxr_next = 0;
2421         txr->vxtxr_gen = VMXNET3_INIT_GEN;
2422         bzero(txr->vxtxr_txd,
2423             txr->vxtxr_ndesc * sizeof(struct vmxnet3_txdesc));
2424
2425         txc = &txq->vxtxq_comp_ring;
2426         txc->vxcr_next = 0;
2427         txc->vxcr_gen = VMXNET3_INIT_GEN;
2428         bzero(txc->vxcr_u.txcd,
2429             txc->vxcr_ndesc * sizeof(struct vmxnet3_txcompdesc));
2430 }
2431
2432 static int
2433 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
2434 {
2435         struct ifnet *ifp;
2436         struct vmxnet3_rxring *rxr;
2437         struct vmxnet3_comp_ring *rxc;
2438         int i, populate, idx, frame_size, error;
2439
2440         ifp = sc->vmx_ifp;
2441         frame_size = ETHER_ALIGN + sizeof(struct ether_vlan_header) +
2442             ifp->if_mtu;
2443
2444         /*
2445          * If the MTU causes us to exceed what a regular sized cluster can
2446          * handle, we allocate a second MJUMPAGESIZE cluster after it in
2447          * ring 0. If in use, ring 1 always contains MJUMPAGESIZE clusters.
2448          *
2449          * Keep rx_max_chain a divisor of the maximum Rx ring size to make
2450          * our life easier. We do not support changing the ring size after
2451          * the attach.
2452          */
2453         if (frame_size <= MCLBYTES)
2454                 sc->vmx_rx_max_chain = 1;
2455         else
2456                 sc->vmx_rx_max_chain = 2;
2457
2458         /*
2459          * Only populate ring 1 if the configuration will take advantage
2460          * of it. That is either when LRO is enabled or the frame size
2461          * exceeds what ring 0 can contain.
2462          */
2463         if ((ifp->if_capenable & IFCAP_LRO) == 0 &&
2464             frame_size <= MCLBYTES + MJUMPAGESIZE)
2465                 populate = 1;
2466         else
2467                 populate = VMXNET3_RXRINGS_PERQ;
2468
2469         for (i = 0; i < populate; i++) {
2470                 rxr = &rxq->vxrxq_cmd_ring[i];
2471                 rxr->vxrxr_fill = 0;
2472                 rxr->vxrxr_gen = VMXNET3_INIT_GEN;
2473                 bzero(rxr->vxrxr_rxd,
2474                     rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2475
2476                 for (idx = 0; idx < rxr->vxrxr_ndesc; idx++) {
2477                         error = vmxnet3_newbuf(sc, rxr);
2478                         if (error)
2479                                 return (error);
2480                 }
2481         }
2482
2483         for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
2484                 rxr = &rxq->vxrxq_cmd_ring[i];
2485                 rxr->vxrxr_fill = 0;
2486                 rxr->vxrxr_gen = 0;
2487                 bzero(rxr->vxrxr_rxd,
2488                     rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
2489         }
2490
2491         rxc = &rxq->vxrxq_comp_ring;
2492         rxc->vxcr_next = 0;
2493         rxc->vxcr_gen = VMXNET3_INIT_GEN;
2494         bzero(rxc->vxcr_u.rxcd,
2495             rxc->vxcr_ndesc * sizeof(struct vmxnet3_rxcompdesc));
2496
2497         return (0);
2498 }
2499
2500 static int
2501 vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
2502 {
2503         device_t dev;
2504         int q, error;
2505
2506         dev = sc->vmx_dev;
2507
2508         for (q = 0; q < sc->vmx_ntxqueues; q++)
2509                 vmxnet3_txinit(sc, &sc->vmx_txq[q]);
2510
2511         for (q = 0; q < sc->vmx_nrxqueues; q++) {
2512                 error = vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
2513                 if (error) {
2514                         device_printf(dev, "cannot populate Rx queue %d\n", q);
2515                         return (error);
2516                 }
2517         }
2518
2519         return (0);
2520 }
2521
2522 static int
2523 vmxnet3_enable_device(struct vmxnet3_softc *sc)
2524 {
2525         int q;
2526
2527         if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
2528                 device_printf(sc->vmx_dev, "device enable command failed!\n");
2529                 return (1);
2530         }
2531
2532         /* Reset the Rx queue heads. */
2533         for (q = 0; q < sc->vmx_nrxqueues; q++) {
2534                 vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
2535                 vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
2536         }
2537
2538         return (0);
2539 }
2540
2541 static void
2542 vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
2543 {
2544         struct ifnet *ifp;
2545
2546         ifp = sc->vmx_ifp;
2547
2548         vmxnet3_set_rxfilter(sc);
2549
2550         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2551                 bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
2552                     sizeof(sc->vmx_ds->vlan_filter));
2553         else
2554                 bzero(sc->vmx_ds->vlan_filter,
2555                     sizeof(sc->vmx_ds->vlan_filter));
2556         vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
2557 }
2558
2559 static int
2560 vmxnet3_reinit(struct vmxnet3_softc *sc)
2561 {
2562
2563         vmxnet3_reinit_interface(sc);
2564         vmxnet3_reinit_shared_data(sc);
2565
2566         if (vmxnet3_reinit_queues(sc) != 0)
2567                 return (ENXIO);
2568
2569         if (vmxnet3_enable_device(sc) != 0)
2570                 return (ENXIO);
2571
2572         vmxnet3_reinit_rxfilters(sc);
2573
2574         return (0);
2575 }
2576
2577 static void
2578 vmxnet3_init_locked(struct vmxnet3_softc *sc)
2579 {
2580         struct ifnet *ifp;
2581
2582         ifp = sc->vmx_ifp;
2583
2584         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2585                 return;
2586
2587         vmxnet3_stop(sc);
2588
2589         if (vmxnet3_reinit(sc) != 0) {
2590                 vmxnet3_stop(sc);
2591                 return;
2592         }
2593
2594         ifp->if_drv_flags |= IFF_DRV_RUNNING;
2595         vmxnet3_link_status(sc);
2596
2597         vmxnet3_enable_all_intrs(sc);
2598         callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
2599 }
2600
2601 static void
2602 vmxnet3_init(void *xsc)
2603 {
2604         struct vmxnet3_softc *sc;
2605
2606         sc = xsc;
2607
2608         VMXNET3_CORE_LOCK(sc);
2609         vmxnet3_init_locked(sc);
2610         VMXNET3_CORE_UNLOCK(sc);
2611 }
2612
2613 /*
2614  * BMV: Much of this can go away once we finally have offsets in
2615  * the mbuf packet header. Bug andre@.
2616  */
2617 static int
2618 vmxnet3_txq_offload_ctx(struct vmxnet3_txqueue *txq, struct mbuf *m,
2619     int *etype, int *proto, int *start)
2620 {
2621         struct ether_vlan_header *evh;
2622         int offset;
2623 #if defined(INET)
2624         struct ip *ip = NULL;
2625         struct ip iphdr;
2626 #endif
2627 #if defined(INET6)
2628         struct ip6_hdr *ip6 = NULL;
2629         struct ip6_hdr ip6hdr;
2630 #endif
2631
2632         evh = mtod(m, struct ether_vlan_header *);
2633         if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2634                 /* BMV: We should handle nested VLAN tags too. */
2635                 *etype = ntohs(evh->evl_proto);
2636                 offset = sizeof(struct ether_vlan_header);
2637         } else {
2638                 *etype = ntohs(evh->evl_encap_proto);
2639                 offset = sizeof(struct ether_header);
2640         }
2641
2642         switch (*etype) {
2643 #if defined(INET)
2644         case ETHERTYPE_IP:
2645                 if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2646                         m_copydata(m, offset, sizeof(struct ip),
2647                             (caddr_t) &iphdr);
2648                         ip = &iphdr;
2649                 } else
2650                         ip = mtodo(m, offset);
2651                 *proto = ip->ip_p;
2652                 *start = offset + (ip->ip_hl << 2);
2653                 break;
2654 #endif
2655 #if defined(INET6)
2656         case ETHERTYPE_IPV6:
2657                 if (__predict_false(m->m_len <
2658                     offset + sizeof(struct ip6_hdr))) {
2659                         m_copydata(m, offset, sizeof(struct ip6_hdr),
2660                             (caddr_t) &ip6hdr);
2661                         ip6 = &ip6hdr;
2662                 } else
2663                         ip6 = mtodo(m, offset);
2664                 *proto = -1;
2665                 *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2666                 /* Assert the network stack sent us a valid packet. */
2667                 KASSERT(*start > offset,
2668                     ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2669                     *start, offset, *proto));
2670                 break;
2671 #endif
2672         default:
2673                 return (EINVAL);
2674         }
2675
2676         if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2677                 struct tcphdr *tcp, tcphdr;
2678                 uint16_t sum;
2679
2680                 if (__predict_false(*proto != IPPROTO_TCP)) {
2681                         /* Likely failed to correctly parse the mbuf. */
2682                         return (EINVAL);
2683                 }
2684
2685                 txq->vxtxq_stats.vmtxs_tso++;
2686
2687                 switch (*etype) {
2688 #if defined(INET)
2689                 case ETHERTYPE_IP:
2690                         sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr,
2691                             htons(IPPROTO_TCP));
2692                         break;
2693 #endif
2694 #if defined(INET6)
2695                 case ETHERTYPE_IPV6:
2696                         sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
2697                         break;
2698 #endif
2699                 default:
2700                         sum = 0;
2701                         break;
2702                 }
2703
2704                 if (m->m_len < *start + sizeof(struct tcphdr)) {
2705                         m_copyback(m, *start + offsetof(struct tcphdr, th_sum),
2706                             sizeof(uint16_t), (caddr_t) &sum);
2707                         m_copydata(m, *start, sizeof(struct tcphdr),
2708                             (caddr_t) &tcphdr);
2709                         tcp = &tcphdr;
2710                 } else {
2711                         tcp = mtodo(m, *start);
2712                         tcp->th_sum = sum;
2713                 }
2714
2715                 /*
2716                  * For TSO, the size of the protocol header is also
2717                  * included in the descriptor header size.
2718                  */
2719                 *start += (tcp->th_off << 2);
2720         } else
2721                 txq->vxtxq_stats.vmtxs_csum++;
2722
2723         return (0);
2724 }
2725
2726 static int
2727 vmxnet3_txq_load_mbuf(struct vmxnet3_txqueue *txq, struct mbuf **m0,
2728     bus_dmamap_t dmap, bus_dma_segment_t segs[], int *nsegs)
2729 {
2730         struct vmxnet3_txring *txr;
2731         struct mbuf *m;
2732         bus_dma_tag_t tag;
2733         int error;
2734
2735         txr = &txq->vxtxq_cmd_ring;
2736         m = *m0;
2737         tag = txr->vxtxr_txtag;
2738
2739         error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2740         if (error == 0 || error != EFBIG)
2741                 return (error);
2742
2743         m = m_defrag(m, M_NOWAIT);
2744         if (m != NULL) {
2745                 *m0 = m;
2746                 error = bus_dmamap_load_mbuf_sg(tag, dmap, m, segs, nsegs, 0);
2747         } else
2748                 error = ENOBUFS;
2749
2750         if (error) {
2751                 m_freem(*m0);
2752                 *m0 = NULL;
2753                 txq->vxtxq_sc->vmx_stats.vmst_defrag_failed++;
2754         } else
2755                 txq->vxtxq_sc->vmx_stats.vmst_defragged++;
2756
2757         return (error);
2758 }
2759
2760 static void
2761 vmxnet3_txq_unload_mbuf(struct vmxnet3_txqueue *txq, bus_dmamap_t dmap)
2762 {
2763         struct vmxnet3_txring *txr;
2764
2765         txr = &txq->vxtxq_cmd_ring;
2766         bus_dmamap_unload(txr->vxtxr_txtag, dmap);
2767 }
2768
2769 static int
2770 vmxnet3_txq_encap(struct vmxnet3_txqueue *txq, struct mbuf **m0)
2771 {
2772         struct vmxnet3_softc *sc;
2773         struct vmxnet3_txring *txr;
2774         struct vmxnet3_txdesc *txd, *sop;
2775         struct mbuf *m;
2776         bus_dmamap_t dmap;
2777         bus_dma_segment_t segs[VMXNET3_TX_MAXSEGS];
2778         int i, gen, nsegs, etype, proto, start, error;
2779
2780         sc = txq->vxtxq_sc;
2781         start = 0;
2782         txd = NULL;
2783         txr = &txq->vxtxq_cmd_ring;
2784         dmap = txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_dmamap;
2785
2786         error = vmxnet3_txq_load_mbuf(txq, m0, dmap, segs, &nsegs);
2787         if (error)
2788                 return (error);
2789
2790         m = *m0;
2791         M_ASSERTPKTHDR(m);
2792         KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
2793             ("%s: mbuf %p with too many segments %d", __func__, m, nsegs));
2794
2795         if (VMXNET3_TXRING_AVAIL(txr) < nsegs) {
2796                 txq->vxtxq_stats.vmtxs_full++;
2797                 vmxnet3_txq_unload_mbuf(txq, dmap);
2798                 return (ENOSPC);
2799         } else if (m->m_pkthdr.csum_flags & VMXNET3_CSUM_ALL_OFFLOAD) {
2800                 error = vmxnet3_txq_offload_ctx(txq, m, &etype, &proto, &start);
2801                 if (error) {
2802                         txq->vxtxq_stats.vmtxs_offload_failed++;
2803                         vmxnet3_txq_unload_mbuf(txq, dmap);
2804                         m_freem(m);
2805                         *m0 = NULL;
2806                         return (error);
2807                 }
2808         }
2809
2810         txr->vxtxr_txbuf[txr->vxtxr_head].vtxb_m = m;
2811         sop = &txr->vxtxr_txd[txr->vxtxr_head];
2812         gen = txr->vxtxr_gen ^ 1;       /* Owned by cpu (yet) */
2813
2814         for (i = 0; i < nsegs; i++) {
2815                 txd = &txr->vxtxr_txd[txr->vxtxr_head];
2816
2817                 txd->addr = segs[i].ds_addr;
2818                 txd->len = segs[i].ds_len;
2819                 txd->gen = gen;
2820                 txd->dtype = 0;
2821                 txd->offload_mode = VMXNET3_OM_NONE;
2822                 txd->offload_pos = 0;
2823                 txd->hlen = 0;
2824                 txd->eop = 0;
2825                 txd->compreq = 0;
2826                 txd->vtag_mode = 0;
2827                 txd->vtag = 0;
2828
2829                 if (++txr->vxtxr_head == txr->vxtxr_ndesc) {
2830                         txr->vxtxr_head = 0;
2831                         txr->vxtxr_gen ^= 1;
2832                 }
2833                 gen = txr->vxtxr_gen;
2834         }
2835         txd->eop = 1;
2836         txd->compreq = 1;
2837
2838         if (m->m_flags & M_VLANTAG) {
2839                 sop->vtag_mode = 1;
2840                 sop->vtag = m->m_pkthdr.ether_vtag;
2841         }
2842
2843         if (m->m_pkthdr.csum_flags & CSUM_TSO) {
2844                 sop->offload_mode = VMXNET3_OM_TSO;
2845                 sop->hlen = start;
2846                 sop->offload_pos = m->m_pkthdr.tso_segsz;
2847         } else if (m->m_pkthdr.csum_flags & (VMXNET3_CSUM_OFFLOAD |
2848             VMXNET3_CSUM_OFFLOAD_IPV6)) {
2849                 sop->offload_mode = VMXNET3_OM_CSUM;
2850                 sop->hlen = start;
2851                 sop->offload_pos = start + m->m_pkthdr.csum_data;
2852         }
2853
2854         /* Finally, change the ownership. */
2855         vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
2856         sop->gen ^= 1;
2857
2858         txq->vxtxq_ts->npending += nsegs;
2859         if (txq->vxtxq_ts->npending >= txq->vxtxq_ts->intr_threshold) {
2860                 txq->vxtxq_ts->npending = 0;
2861                 vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id),
2862                     txr->vxtxr_head);
2863         }
2864
2865         return (0);
2866 }
2867
2868 #ifdef VMXNET3_LEGACY_TX
2869
2870 static void
2871 vmxnet3_start_locked(struct ifnet *ifp)
2872 {
2873         struct vmxnet3_softc *sc;
2874         struct vmxnet3_txqueue *txq;
2875         struct vmxnet3_txring *txr;
2876         struct mbuf *m_head;
2877         int tx, avail;
2878
2879         sc = ifp->if_softc;
2880         txq = &sc->vmx_txq[0];
2881         txr = &txq->vxtxq_cmd_ring;
2882         tx = 0;
2883
2884         VMXNET3_TXQ_LOCK_ASSERT(txq);
2885
2886         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2887             sc->vmx_link_active == 0)
2888                 return;
2889
2890         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
2891                 if ((avail = VMXNET3_TXRING_AVAIL(txr)) < 2)
2892                         break;
2893
2894                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
2895                 if (m_head == NULL)
2896                         break;
2897
2898                 /* Assume worse case if this mbuf is the head of a chain. */
2899                 if (m_head->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
2900                         IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2901                         break;
2902                 }
2903
2904                 if (vmxnet3_txq_encap(txq, &m_head) != 0) {
2905                         if (m_head != NULL)
2906                                 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
2907                         break;
2908                 }
2909
2910                 tx++;
2911                 ETHER_BPF_MTAP(ifp, m_head);
2912         }
2913
2914         if (tx > 0)
2915                 txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
2916 }
2917
2918 static void
2919 vmxnet3_start(struct ifnet *ifp)
2920 {
2921         struct vmxnet3_softc *sc;
2922         struct vmxnet3_txqueue *txq;
2923
2924         sc = ifp->if_softc;
2925         txq = &sc->vmx_txq[0];
2926
2927         VMXNET3_TXQ_LOCK(txq);
2928         vmxnet3_start_locked(ifp);
2929         VMXNET3_TXQ_UNLOCK(txq);
2930 }
2931
2932 #else /* !VMXNET3_LEGACY_TX */
2933
2934 static int
2935 vmxnet3_txq_mq_start_locked(struct vmxnet3_txqueue *txq, struct mbuf *m)
2936 {
2937         struct vmxnet3_softc *sc;
2938         struct vmxnet3_txring *txr;
2939         struct buf_ring *br;
2940         struct ifnet *ifp;
2941         int tx, avail, error;
2942
2943         sc = txq->vxtxq_sc;
2944         br = txq->vxtxq_br;
2945         ifp = sc->vmx_ifp;
2946         txr = &txq->vxtxq_cmd_ring;
2947         tx = 0;
2948         error = 0;
2949
2950         VMXNET3_TXQ_LOCK_ASSERT(txq);
2951
2952         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2953             sc->vmx_link_active == 0) {
2954                 if (m != NULL)
2955                         error = drbr_enqueue(ifp, br, m);
2956                 return (error);
2957         }
2958
2959         if (m != NULL) {
2960                 error = drbr_enqueue(ifp, br, m);
2961                 if (error)
2962                         return (error);
2963         }
2964
2965         while ((avail = VMXNET3_TXRING_AVAIL(txr)) >= 2) {
2966                 m = drbr_peek(ifp, br);
2967                 if (m == NULL)
2968                         break;
2969
2970                 /* Assume worse case if this mbuf is the head of a chain. */
2971                 if (m->m_next != NULL && avail < VMXNET3_TX_MAXSEGS) {
2972                         drbr_putback(ifp, br, m);
2973                         break;
2974                 }
2975
2976                 if (vmxnet3_txq_encap(txq, &m) != 0) {
2977                         if (m != NULL)
2978                                 drbr_putback(ifp, br, m);
2979                         else
2980                                 drbr_advance(ifp, br);
2981                         break;
2982                 }
2983                 drbr_advance(ifp, br);
2984
2985                 tx++;
2986                 ETHER_BPF_MTAP(ifp, m);
2987         }
2988
2989         if (tx > 0)
2990                 txq->vxtxq_watchdog = VMXNET3_WATCHDOG_TIMEOUT;
2991
2992         return (0);
2993 }
2994
2995 static int
2996 vmxnet3_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
2997 {
2998         struct vmxnet3_softc *sc;
2999         struct vmxnet3_txqueue *txq;
3000         int i, ntxq, error;
3001
3002         sc = ifp->if_softc;
3003         ntxq = sc->vmx_ntxqueues;
3004
3005         /* check if flowid is set */
3006         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
3007                 i = m->m_pkthdr.flowid % ntxq;
3008         else
3009                 i = curcpu % ntxq;
3010
3011         txq = &sc->vmx_txq[i];
3012
3013         if (VMXNET3_TXQ_TRYLOCK(txq) != 0) {
3014                 error = vmxnet3_txq_mq_start_locked(txq, m);
3015                 VMXNET3_TXQ_UNLOCK(txq);
3016         } else {
3017                 error = drbr_enqueue(ifp, txq->vxtxq_br, m);
3018                 taskqueue_enqueue(sc->vmx_tq, &txq->vxtxq_defrtask);
3019         }
3020
3021         return (error);
3022 }
3023
3024 static void
3025 vmxnet3_txq_tq_deferred(void *xtxq, int pending)
3026 {
3027         struct vmxnet3_softc *sc;
3028         struct vmxnet3_txqueue *txq;
3029
3030         txq = xtxq;
3031         sc = txq->vxtxq_sc;
3032
3033         VMXNET3_TXQ_LOCK(txq);
3034         if (!drbr_empty(sc->vmx_ifp, txq->vxtxq_br))
3035                 vmxnet3_txq_mq_start_locked(txq, NULL);
3036         VMXNET3_TXQ_UNLOCK(txq);
3037 }
3038
3039 #endif /* VMXNET3_LEGACY_TX */
3040
3041 static void
3042 vmxnet3_txq_start(struct vmxnet3_txqueue *txq)
3043 {
3044         struct vmxnet3_softc *sc;
3045         struct ifnet *ifp;
3046
3047         sc = txq->vxtxq_sc;
3048         ifp = sc->vmx_ifp;
3049
3050 #ifdef VMXNET3_LEGACY_TX
3051         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
3052                 vmxnet3_start_locked(ifp);
3053 #else
3054         if (!drbr_empty(ifp, txq->vxtxq_br))
3055                 vmxnet3_txq_mq_start_locked(txq, NULL);
3056 #endif
3057 }
3058
3059 static void
3060 vmxnet3_tx_start_all(struct vmxnet3_softc *sc)
3061 {
3062         struct vmxnet3_txqueue *txq;
3063         int i;
3064
3065         VMXNET3_CORE_LOCK_ASSERT(sc);
3066
3067         for (i = 0; i < sc->vmx_ntxqueues; i++) {
3068                 txq = &sc->vmx_txq[i];
3069
3070                 VMXNET3_TXQ_LOCK(txq);
3071                 vmxnet3_txq_start(txq);
3072                 VMXNET3_TXQ_UNLOCK(txq);
3073         }
3074 }
3075
3076 static void
3077 vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
3078 {
3079         struct ifnet *ifp;
3080         int idx, bit;
3081
3082         ifp = sc->vmx_ifp;
3083         idx = (tag >> 5) & 0x7F;
3084         bit = tag & 0x1F;
3085
3086         if (tag == 0 || tag > 4095)
3087                 return;
3088
3089         VMXNET3_CORE_LOCK(sc);
3090
3091         /* Update our private VLAN bitvector. */
3092         if (add)
3093                 sc->vmx_vlan_filter[idx] |= (1 << bit);
3094         else
3095                 sc->vmx_vlan_filter[idx] &= ~(1 << bit);
3096
3097         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) {
3098                 if (add)
3099                         sc->vmx_ds->vlan_filter[idx] |= (1 << bit);
3100                 else
3101                         sc->vmx_ds->vlan_filter[idx] &= ~(1 << bit);
3102                 vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
3103         }
3104
3105         VMXNET3_CORE_UNLOCK(sc);
3106 }
3107
3108 static void
3109 vmxnet3_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3110 {
3111
3112         if (ifp->if_softc == arg)
3113                 vmxnet3_update_vlan_filter(arg, 1, tag);
3114 }
3115
3116 static void
3117 vmxnet3_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3118 {
3119
3120         if (ifp->if_softc == arg)
3121                 vmxnet3_update_vlan_filter(arg, 0, tag);
3122 }
3123
3124 static void
3125 vmxnet3_set_rxfilter(struct vmxnet3_softc *sc)
3126 {
3127         struct ifnet *ifp;
3128         struct vmxnet3_driver_shared *ds;
3129         struct ifmultiaddr *ifma;
3130         u_int mode;
3131
3132         ifp = sc->vmx_ifp;
3133         ds = sc->vmx_ds;
3134
3135         mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
3136         if (ifp->if_flags & IFF_PROMISC)
3137                 mode |= VMXNET3_RXMODE_PROMISC;
3138         if (ifp->if_flags & IFF_ALLMULTI)
3139                 mode |= VMXNET3_RXMODE_ALLMULTI;
3140         else {
3141                 int cnt = 0, overflow = 0;
3142
3143                 if_maddr_rlock(ifp);
3144                 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
3145                         if (ifma->ifma_addr->sa_family != AF_LINK)
3146                                 continue;
3147                         else if (cnt == VMXNET3_MULTICAST_MAX) {
3148                                 overflow = 1;
3149                                 break;
3150                         }
3151
3152                         bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
3153                            &sc->vmx_mcast[cnt*ETHER_ADDR_LEN], ETHER_ADDR_LEN);
3154                         cnt++;
3155                 }
3156                 if_maddr_runlock(ifp);
3157
3158                 if (overflow != 0) {
3159                         cnt = 0;
3160                         mode |= VMXNET3_RXMODE_ALLMULTI;
3161                 } else if (cnt > 0)
3162                         mode |= VMXNET3_RXMODE_MCAST;
3163                 ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
3164         }
3165
3166         ds->rxmode = mode;
3167
3168         vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
3169         vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
3170 }
3171
3172 static int
3173 vmxnet3_change_mtu(struct vmxnet3_softc *sc, int mtu)
3174 {
3175         struct ifnet *ifp;
3176
3177         ifp = sc->vmx_ifp;
3178
3179         if (mtu < VMXNET3_MIN_MTU || mtu > VMXNET3_MAX_MTU)
3180                 return (EINVAL);
3181
3182         ifp->if_mtu = mtu;
3183
3184         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
3185                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3186                 vmxnet3_init_locked(sc);
3187         }
3188
3189         return (0);
3190 }
3191
3192 static int
3193 vmxnet3_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
3194 {
3195         struct vmxnet3_softc *sc;
3196         struct ifreq *ifr;
3197         int reinit, mask, error;
3198
3199         sc = ifp->if_softc;
3200         ifr = (struct ifreq *) data;
3201         error = 0;
3202
3203         switch (cmd) {
3204         case SIOCSIFMTU:
3205                 if (ifp->if_mtu != ifr->ifr_mtu) {
3206                         VMXNET3_CORE_LOCK(sc);
3207                         error = vmxnet3_change_mtu(sc, ifr->ifr_mtu);
3208                         VMXNET3_CORE_UNLOCK(sc);
3209                 }
3210                 break;
3211
3212         case SIOCSIFFLAGS:
3213                 VMXNET3_CORE_LOCK(sc);
3214                 if (ifp->if_flags & IFF_UP) {
3215                         if ((ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3216                                 if ((ifp->if_flags ^ sc->vmx_if_flags) &
3217                                     (IFF_PROMISC | IFF_ALLMULTI)) {
3218                                         vmxnet3_set_rxfilter(sc);
3219                                 }
3220                         } else
3221                                 vmxnet3_init_locked(sc);
3222                 } else {
3223                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3224                                 vmxnet3_stop(sc);
3225                 }
3226                 sc->vmx_if_flags = ifp->if_flags;
3227                 VMXNET3_CORE_UNLOCK(sc);
3228                 break;
3229
3230         case SIOCADDMULTI:
3231         case SIOCDELMULTI:
3232                 VMXNET3_CORE_LOCK(sc);
3233                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3234                         vmxnet3_set_rxfilter(sc);
3235                 VMXNET3_CORE_UNLOCK(sc);
3236                 break;
3237
3238         case SIOCSIFMEDIA:
3239         case SIOCGIFMEDIA:
3240                 error = ifmedia_ioctl(ifp, ifr, &sc->vmx_media, cmd);
3241                 break;
3242
3243         case SIOCSIFCAP:
3244                 VMXNET3_CORE_LOCK(sc);
3245                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
3246
3247                 if (mask & IFCAP_TXCSUM)
3248                         ifp->if_capenable ^= IFCAP_TXCSUM;
3249                 if (mask & IFCAP_TXCSUM_IPV6)
3250                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
3251                 if (mask & IFCAP_TSO4)
3252                         ifp->if_capenable ^= IFCAP_TSO4;
3253                 if (mask & IFCAP_TSO6)
3254                         ifp->if_capenable ^= IFCAP_TSO6;
3255
3256                 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
3257                     IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWFILTER)) {
3258                         /* Changing these features requires us to reinit. */
3259                         reinit = 1;
3260
3261                         if (mask & IFCAP_RXCSUM)
3262                                 ifp->if_capenable ^= IFCAP_RXCSUM;
3263                         if (mask & IFCAP_RXCSUM_IPV6)
3264                                 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
3265                         if (mask & IFCAP_LRO)
3266                                 ifp->if_capenable ^= IFCAP_LRO;
3267                         if (mask & IFCAP_VLAN_HWTAGGING)
3268                                 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
3269                         if (mask & IFCAP_VLAN_HWFILTER)
3270                                 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
3271                 } else
3272                         reinit = 0;
3273
3274                 if (mask & IFCAP_VLAN_HWTSO)
3275                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
3276
3277                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
3278                         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3279                         vmxnet3_init_locked(sc);
3280                 }
3281
3282                 VMXNET3_CORE_UNLOCK(sc);
3283                 VLAN_CAPABILITIES(ifp);
3284                 break;
3285
3286         default:
3287                 error = ether_ioctl(ifp, cmd, data);
3288                 break;
3289         }
3290
3291         VMXNET3_CORE_LOCK_ASSERT_NOTOWNED(sc);
3292
3293         return (error);
3294 }
3295
3296 #ifndef VMXNET3_LEGACY_TX
3297 static void
3298 vmxnet3_qflush(struct ifnet *ifp)
3299 {
3300         struct vmxnet3_softc *sc;
3301         struct vmxnet3_txqueue *txq;
3302         struct mbuf *m;
3303         int i;
3304
3305         sc = ifp->if_softc;
3306
3307         for (i = 0; i < sc->vmx_ntxqueues; i++) {
3308                 txq = &sc->vmx_txq[i];
3309
3310                 VMXNET3_TXQ_LOCK(txq);
3311                 while ((m = buf_ring_dequeue_sc(txq->vxtxq_br)) != NULL)
3312                         m_freem(m);
3313                 VMXNET3_TXQ_UNLOCK(txq);
3314         }
3315
3316         if_qflush(ifp);
3317 }
3318 #endif
3319
3320 static int
3321 vmxnet3_watchdog(struct vmxnet3_txqueue *txq)
3322 {
3323         struct vmxnet3_softc *sc;
3324
3325         sc = txq->vxtxq_sc;
3326
3327         VMXNET3_TXQ_LOCK(txq);
3328         if (txq->vxtxq_watchdog == 0 || --txq->vxtxq_watchdog) {
3329                 VMXNET3_TXQ_UNLOCK(txq);
3330                 return (0);
3331         }
3332         VMXNET3_TXQ_UNLOCK(txq);
3333
3334         if_printf(sc->vmx_ifp, "watchdog timeout on queue %d\n",
3335             txq->vxtxq_id);
3336         return (1);
3337 }
3338
3339 static void
3340 vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
3341 {
3342
3343         vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
3344 }
3345
3346 static void
3347 vmxnet3_txq_accum_stats(struct vmxnet3_txqueue *txq,
3348     struct vmxnet3_txq_stats *accum)
3349 {
3350         struct vmxnet3_txq_stats *st;
3351
3352         st = &txq->vxtxq_stats;
3353
3354         accum->vmtxs_opackets += st->vmtxs_opackets;
3355         accum->vmtxs_obytes += st->vmtxs_obytes;
3356         accum->vmtxs_omcasts += st->vmtxs_omcasts;
3357         accum->vmtxs_csum += st->vmtxs_csum;
3358         accum->vmtxs_tso += st->vmtxs_tso;
3359         accum->vmtxs_full += st->vmtxs_full;
3360         accum->vmtxs_offload_failed += st->vmtxs_offload_failed;
3361 }
3362
3363 static void
3364 vmxnet3_rxq_accum_stats(struct vmxnet3_rxqueue *rxq,
3365     struct vmxnet3_rxq_stats *accum)
3366 {
3367         struct vmxnet3_rxq_stats *st;
3368
3369         st = &rxq->vxrxq_stats;
3370
3371         accum->vmrxs_ipackets += st->vmrxs_ipackets;
3372         accum->vmrxs_ibytes += st->vmrxs_ibytes;
3373         accum->vmrxs_iqdrops += st->vmrxs_iqdrops;
3374         accum->vmrxs_ierrors += st->vmrxs_ierrors;
3375 }
3376
3377 static void
3378 vmxnet3_accumulate_stats(struct vmxnet3_softc *sc)
3379 {
3380         struct ifnet *ifp;
3381         struct vmxnet3_statistics *st;
3382         struct vmxnet3_txq_stats txaccum;
3383         struct vmxnet3_rxq_stats rxaccum;
3384         int i;
3385
3386         ifp = sc->vmx_ifp;
3387         st = &sc->vmx_stats;
3388
3389         bzero(&txaccum, sizeof(struct vmxnet3_txq_stats));
3390         bzero(&rxaccum, sizeof(struct vmxnet3_rxq_stats));
3391
3392         for (i = 0; i < sc->vmx_ntxqueues; i++)
3393                 vmxnet3_txq_accum_stats(&sc->vmx_txq[i], &txaccum);
3394         for (i = 0; i < sc->vmx_nrxqueues; i++)
3395                 vmxnet3_rxq_accum_stats(&sc->vmx_rxq[i], &rxaccum);
3396
3397         /*
3398          * With the exception of if_ierrors, these ifnet statistics are
3399          * only updated in the driver, so just set them to our accumulated
3400          * values. if_ierrors is updated in ether_input() for malformed
3401          * frames that we should have already discarded.
3402          */
3403         ifp->if_ipackets = rxaccum.vmrxs_ipackets;
3404         ifp->if_iqdrops = rxaccum.vmrxs_iqdrops;
3405         ifp->if_ierrors = rxaccum.vmrxs_ierrors;
3406         ifp->if_opackets = txaccum.vmtxs_opackets;
3407 #ifndef VMXNET3_LEGACY_TX
3408         ifp->if_obytes = txaccum.vmtxs_obytes;
3409         ifp->if_omcasts = txaccum.vmtxs_omcasts;
3410 #endif
3411 }
3412
3413 static void
3414 vmxnet3_tick(void *xsc)
3415 {
3416         struct vmxnet3_softc *sc;
3417         struct ifnet *ifp;
3418         int i, timedout;
3419
3420         sc = xsc;
3421         ifp = sc->vmx_ifp;
3422         timedout = 0;
3423
3424         VMXNET3_CORE_LOCK_ASSERT(sc);
3425
3426         vmxnet3_accumulate_stats(sc);
3427         vmxnet3_refresh_host_stats(sc);
3428
3429         for (i = 0; i < sc->vmx_ntxqueues; i++)
3430                 timedout |= vmxnet3_watchdog(&sc->vmx_txq[i]);
3431
3432         if (timedout != 0) {
3433                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
3434                 vmxnet3_init_locked(sc);
3435         } else
3436                 callout_reset(&sc->vmx_tick, hz, vmxnet3_tick, sc);
3437 }
3438
3439 static int
3440 vmxnet3_link_is_up(struct vmxnet3_softc *sc)
3441 {
3442         uint32_t status;
3443
3444         /* Also update the link speed while here. */
3445         status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
3446         sc->vmx_link_speed = status >> 16;
3447         return !!(status & 0x1);
3448 }
3449
3450 static void
3451 vmxnet3_link_status(struct vmxnet3_softc *sc)
3452 {
3453         struct ifnet *ifp;
3454         int link;
3455
3456         ifp = sc->vmx_ifp;
3457         link = vmxnet3_link_is_up(sc);
3458
3459         if (link != 0 && sc->vmx_link_active == 0) {
3460                 sc->vmx_link_active = 1;
3461                 if_link_state_change(ifp, LINK_STATE_UP);
3462         } else if (link == 0 && sc->vmx_link_active != 0) {
3463                 sc->vmx_link_active = 0;
3464                 if_link_state_change(ifp, LINK_STATE_DOWN);
3465         }
3466 }
3467
3468 static void
3469 vmxnet3_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
3470 {
3471         struct vmxnet3_softc *sc;
3472
3473         sc = ifp->if_softc;
3474
3475         ifmr->ifm_active = IFM_ETHER | IFM_AUTO;
3476         ifmr->ifm_status = IFM_AVALID;
3477
3478         VMXNET3_CORE_LOCK(sc);
3479         if (vmxnet3_link_is_up(sc) != 0)
3480                 ifmr->ifm_status |= IFM_ACTIVE;
3481         else
3482                 ifmr->ifm_status |= IFM_NONE;
3483         VMXNET3_CORE_UNLOCK(sc);
3484 }
3485
3486 static int
3487 vmxnet3_media_change(struct ifnet *ifp)
3488 {
3489
3490         /* Ignore. */
3491         return (0);
3492 }
3493
3494 static void
3495 vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
3496 {
3497         uint32_t ml, mh;
3498
3499         ml  = sc->vmx_lladdr[0];
3500         ml |= sc->vmx_lladdr[1] << 8;
3501         ml |= sc->vmx_lladdr[2] << 16;
3502         ml |= sc->vmx_lladdr[3] << 24;
3503         vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
3504
3505         mh  = sc->vmx_lladdr[4];
3506         mh |= sc->vmx_lladdr[5] << 8;
3507         vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
3508 }
3509
3510 static void
3511 vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
3512 {
3513         uint32_t ml, mh;
3514
3515         ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
3516         mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
3517
3518         sc->vmx_lladdr[0] = ml;
3519         sc->vmx_lladdr[1] = ml >> 8;
3520         sc->vmx_lladdr[2] = ml >> 16;
3521         sc->vmx_lladdr[3] = ml >> 24;
3522         sc->vmx_lladdr[4] = mh;
3523         sc->vmx_lladdr[5] = mh >> 8;
3524 }
3525
3526 static void
3527 vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
3528     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3529 {
3530         struct sysctl_oid *node, *txsnode;
3531         struct sysctl_oid_list *list, *txslist;
3532         struct vmxnet3_txq_stats *stats;
3533         struct UPT1_TxStats *txstats;
3534         char namebuf[16];
3535
3536         stats = &txq->vxtxq_stats;
3537         txstats = &txq->vxtxq_ts->stats;
3538
3539         snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
3540         node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3541             NULL, "Transmit Queue");
3542         txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
3543
3544         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3545             &stats->vmtxs_opackets, "Transmit packets");
3546         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3547             &stats->vmtxs_obytes, "Transmit bytes");
3548         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3549             &stats->vmtxs_omcasts, "Transmit multicasts");
3550         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3551             &stats->vmtxs_csum, "Transmit checksum offloaded");
3552         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3553             &stats->vmtxs_tso, "Transmit TCP segmentation offloaded");
3554         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ringfull", CTLFLAG_RD,
3555             &stats->vmtxs_full, "Transmit ring full");
3556         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "offload_failed", CTLFLAG_RD,
3557             &stats->vmtxs_offload_failed, "Transmit checksum offload failed");
3558
3559         /*
3560          * Add statistics reported by the host. These are updated once
3561          * per second.
3562          */
3563         txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3564             NULL, "Host Statistics");
3565         txslist = SYSCTL_CHILDREN(txsnode);
3566         SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
3567             &txstats->TSO_packets, "TSO packets");
3568         SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
3569             &txstats->TSO_bytes, "TSO bytes");
3570         SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3571             &txstats->ucast_packets, "Unicast packets");
3572         SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3573             &txstats->ucast_bytes, "Unicast bytes");
3574         SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3575             &txstats->mcast_packets, "Multicast packets");
3576         SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3577             &txstats->mcast_bytes, "Multicast bytes");
3578         SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
3579             &txstats->error, "Errors");
3580         SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
3581             &txstats->discard, "Discards");
3582 }
3583
3584 static void
3585 vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
3586     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3587 {
3588         struct sysctl_oid *node, *rxsnode;
3589         struct sysctl_oid_list *list, *rxslist;
3590         struct vmxnet3_rxq_stats *stats;
3591         struct UPT1_RxStats *rxstats;
3592         char namebuf[16];
3593
3594         stats = &rxq->vxrxq_stats;
3595         rxstats = &rxq->vxrxq_rs->stats;
3596
3597         snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
3598         node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
3599             NULL, "Receive Queue");
3600         rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
3601
3602         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3603             &stats->vmrxs_ipackets, "Receive packets");
3604         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3605             &stats->vmrxs_ibytes, "Receive bytes");
3606         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3607             &stats->vmrxs_iqdrops, "Receive drops");
3608         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3609             &stats->vmrxs_ierrors, "Receive errors");
3610
3611         /*
3612          * Add statistics reported by the host. These are updated once
3613          * per second.
3614          */
3615         rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
3616             NULL, "Host Statistics");
3617         rxslist = SYSCTL_CHILDREN(rxsnode);
3618         SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
3619             &rxstats->LRO_packets, "LRO packets");
3620         SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
3621             &rxstats->LRO_bytes, "LRO bytes");
3622         SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
3623             &rxstats->ucast_packets, "Unicast packets");
3624         SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
3625             &rxstats->ucast_bytes, "Unicast bytes");
3626         SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
3627             &rxstats->mcast_packets, "Multicast packets");
3628         SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
3629             &rxstats->mcast_bytes, "Multicast bytes");
3630         SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
3631             &rxstats->bcast_packets, "Broadcast packets");
3632         SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
3633             &rxstats->bcast_bytes, "Broadcast bytes");
3634         SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
3635             &rxstats->nobuffer, "No buffer");
3636         SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
3637             &rxstats->error, "Errors");
3638 }
3639
3640 static void
3641 vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
3642     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3643 {
3644         struct sysctl_oid *node;
3645         struct sysctl_oid_list *list;
3646         int i;
3647
3648         for (i = 0; i < sc->vmx_ntxqueues; i++) {
3649                 struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
3650
3651                 node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
3652                     "debug", CTLFLAG_RD, NULL, "");
3653                 list = SYSCTL_CHILDREN(node);
3654
3655                 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_head", CTLFLAG_RD,
3656                     &txq->vxtxq_cmd_ring.vxtxr_head, 0, "");
3657                 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
3658                     &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
3659                 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
3660                     &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
3661                 SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
3662                     &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
3663                 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3664                     &txq->vxtxq_comp_ring.vxcr_next, 0, "");
3665                 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3666                     &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
3667                 SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3668                     &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
3669         }
3670
3671         for (i = 0; i < sc->vmx_nrxqueues; i++) {
3672                 struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
3673
3674                 node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
3675                     "debug", CTLFLAG_RD, NULL, "");
3676                 list = SYSCTL_CHILDREN(node);
3677
3678                 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_fill", CTLFLAG_RD,
3679                     &rxq->vxrxq_cmd_ring[0].vxrxr_fill, 0, "");
3680                 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
3681                     &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
3682                 SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
3683                     &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
3684                 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_fill", CTLFLAG_RD,
3685                     &rxq->vxrxq_cmd_ring[1].vxrxr_fill, 0, "");
3686                 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
3687                     &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
3688                 SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
3689                     &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
3690                 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
3691                     &rxq->vxrxq_comp_ring.vxcr_next, 0, "");
3692                 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
3693                     &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
3694                 SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
3695                     &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
3696         }
3697 }
3698
3699 static void
3700 vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
3701     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
3702 {
3703         int i;
3704
3705         for (i = 0; i < sc->vmx_ntxqueues; i++)
3706                 vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
3707         for (i = 0; i < sc->vmx_nrxqueues; i++)
3708                 vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
3709
3710         vmxnet3_setup_debug_sysctl(sc, ctx, child);
3711 }
3712
3713 static void
3714 vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
3715 {
3716         device_t dev;
3717         struct vmxnet3_statistics *stats;
3718         struct sysctl_ctx_list *ctx;
3719         struct sysctl_oid *tree;
3720         struct sysctl_oid_list *child;
3721
3722         dev = sc->vmx_dev;
3723         ctx = device_get_sysctl_ctx(dev);
3724         tree = device_get_sysctl_tree(dev);
3725         child = SYSCTL_CHILDREN(tree);
3726
3727         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_ntxqueues", CTLFLAG_RD,
3728             &sc->vmx_max_ntxqueues, 0, "Maximum number of Tx queues");
3729         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_nrxqueues", CTLFLAG_RD,
3730             &sc->vmx_max_nrxqueues, 0, "Maximum number of Rx queues");
3731         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "ntxqueues", CTLFLAG_RD,
3732             &sc->vmx_ntxqueues, 0, "Number of Tx queues");
3733         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "nrxqueues", CTLFLAG_RD,
3734             &sc->vmx_nrxqueues, 0, "Number of Rx queues");
3735
3736         stats = &sc->vmx_stats;
3737         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defragged", CTLFLAG_RD,
3738             &stats->vmst_defragged, 0, "Tx mbuf chains defragged");
3739         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "defrag_failed", CTLFLAG_RD,
3740             &stats->vmst_defrag_failed, 0,
3741             "Tx mbuf dropped because defrag failed");
3742         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mgetcl_failed", CTLFLAG_RD,
3743             &stats->vmst_mgetcl_failed, 0, "mbuf cluster allocation failed");
3744         SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "mbuf_load_failed", CTLFLAG_RD,
3745             &stats->vmst_mbuf_load_failed, 0, "mbuf load segments failed");
3746
3747         vmxnet3_setup_queue_sysctl(sc, ctx, child);
3748 }
3749
3750 static void
3751 vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3752 {
3753
3754         bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
3755 }
3756
3757 static uint32_t
3758 vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
3759 {
3760
3761         return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
3762 }
3763
3764 static void
3765 vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
3766 {
3767
3768         bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
3769 }
3770
3771 static void
3772 vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3773 {
3774
3775         vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
3776 }
3777
3778 static uint32_t
3779 vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
3780 {
3781
3782         vmxnet3_write_cmd(sc, cmd);
3783         bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
3784             BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
3785         return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
3786 }
3787
3788 static void
3789 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
3790 {
3791
3792         vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
3793 }
3794
3795 static void
3796 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
3797 {
3798
3799         vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
3800 }
3801
3802 static void
3803 vmxnet3_enable_all_intrs(struct vmxnet3_softc *sc)
3804 {
3805         int i;
3806
3807         sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
3808         for (i = 0; i < sc->vmx_nintrs; i++)
3809                 vmxnet3_enable_intr(sc, i);
3810 }
3811
3812 static void
3813 vmxnet3_disable_all_intrs(struct vmxnet3_softc *sc)
3814 {
3815         int i;
3816
3817         sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
3818         for (i = 0; i < sc->vmx_nintrs; i++)
3819                 vmxnet3_disable_intr(sc, i);
3820 }
3821
3822 static void
3823 vmxnet3_dmamap_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
3824 {
3825         bus_addr_t *baddr = arg;
3826
3827         if (error == 0)
3828                 *baddr = segs->ds_addr;
3829 }
3830
3831 static int
3832 vmxnet3_dma_malloc(struct vmxnet3_softc *sc, bus_size_t size, bus_size_t align,
3833     struct vmxnet3_dma_alloc *dma)
3834 {
3835         device_t dev;
3836         int error;
3837
3838         dev = sc->vmx_dev;
3839         bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3840
3841         error = bus_dma_tag_create(bus_get_dma_tag(dev),
3842             align, 0,           /* alignment, bounds */
3843             BUS_SPACE_MAXADDR,  /* lowaddr */
3844             BUS_SPACE_MAXADDR,  /* highaddr */
3845             NULL, NULL,         /* filter, filterarg */
3846             size,               /* maxsize */
3847             1,                  /* nsegments */
3848             size,               /* maxsegsize */
3849             BUS_DMA_ALLOCNOW,   /* flags */
3850             NULL,               /* lockfunc */
3851             NULL,               /* lockfuncarg */
3852             &dma->dma_tag);
3853         if (error) {
3854                 device_printf(dev, "bus_dma_tag_create failed: %d\n", error);
3855                 goto fail;
3856         }
3857
3858         error = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
3859             BUS_DMA_ZERO | BUS_DMA_NOWAIT, &dma->dma_map);
3860         if (error) {
3861                 device_printf(dev, "bus_dmamem_alloc failed: %d\n", error);
3862                 goto fail;
3863         }
3864
3865         error = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
3866             size, vmxnet3_dmamap_cb, &dma->dma_paddr, BUS_DMA_NOWAIT);
3867         if (error) {
3868                 device_printf(dev, "bus_dmamap_load failed: %d\n", error);
3869                 goto fail;
3870         }
3871
3872         dma->dma_size = size;
3873
3874 fail:
3875         if (error)
3876                 vmxnet3_dma_free(sc, dma);
3877
3878         return (error);
3879 }
3880
3881 static void
3882 vmxnet3_dma_free(struct vmxnet3_softc *sc, struct vmxnet3_dma_alloc *dma)
3883 {
3884
3885         if (dma->dma_tag != NULL) {
3886                 if (dma->dma_map != NULL) {
3887                         bus_dmamap_sync(dma->dma_tag, dma->dma_map,
3888                             BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3889                         bus_dmamap_unload(dma->dma_tag, dma->dma_map);
3890                 }
3891
3892                 if (dma->dma_vaddr != NULL) {
3893                         bus_dmamem_free(dma->dma_tag, dma->dma_vaddr,
3894                             dma->dma_map);
3895                 }
3896
3897                 bus_dma_tag_destroy(dma->dma_tag);
3898         }
3899         bzero(dma, sizeof(struct vmxnet3_dma_alloc));
3900 }
3901
3902 static int
3903 vmxnet3_tunable_int(struct vmxnet3_softc *sc, const char *knob, int def)
3904 {
3905         char path[64];
3906
3907         snprintf(path, sizeof(path),
3908             "hw.vmx.%d.%s", device_get_unit(sc->vmx_dev), knob);
3909         TUNABLE_INT_FETCH(path, &def);
3910
3911         return (def);
3912 }
3913
3914 /*
3915  * Since this is a purely paravirtualized device, we do not have
3916  * to worry about DMA coherency. But at times, we must make sure
3917  * both the compiler and CPU do not reorder memory operations.
3918  */
3919 static inline void
3920 vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
3921 {
3922
3923         switch (type) {
3924         case VMXNET3_BARRIER_RD:
3925                 rmb();
3926                 break;
3927         case VMXNET3_BARRIER_WR:
3928                 wmb();
3929                 break;
3930         case VMXNET3_BARRIER_RDWR:
3931                 mb();
3932                 break;
3933         default:
3934                 panic("%s: bad barrier type %d", __func__, type);
3935         }
3936 }