]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/virtio/network/if_vtnet.c
Merge once more from ^/vendor/llvm-project/release-10.x, to get the
[FreeBSD/FreeBSD.git] / sys / dev / virtio / network / if_vtnet.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 /* Driver for VirtIO network devices. */
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33
34 #include <sys/param.h>
35 #include <sys/eventhandler.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/sockio.h>
39 #include <sys/mbuf.h>
40 #include <sys/malloc.h>
41 #include <sys/module.h>
42 #include <sys/socket.h>
43 #include <sys/sysctl.h>
44 #include <sys/random.h>
45 #include <sys/sglist.h>
46 #include <sys/lock.h>
47 #include <sys/mutex.h>
48 #include <sys/taskqueue.h>
49 #include <sys/smp.h>
50 #include <machine/smp.h>
51
52 #include <vm/uma.h>
53
54 #include <net/debugnet.h>
55 #include <net/ethernet.h>
56 #include <net/pfil.h>
57 #include <net/if.h>
58 #include <net/if_var.h>
59 #include <net/if_arp.h>
60 #include <net/if_dl.h>
61 #include <net/if_types.h>
62 #include <net/if_media.h>
63 #include <net/if_vlan_var.h>
64
65 #include <net/bpf.h>
66
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/ip.h>
70 #include <netinet/ip6.h>
71 #include <netinet6/ip6_var.h>
72 #include <netinet/udp.h>
73 #include <netinet/tcp.h>
74
75 #include <machine/bus.h>
76 #include <machine/resource.h>
77 #include <sys/bus.h>
78 #include <sys/rman.h>
79
80 #include <dev/virtio/virtio.h>
81 #include <dev/virtio/virtqueue.h>
82 #include <dev/virtio/network/virtio_net.h>
83 #include <dev/virtio/network/if_vtnetvar.h>
84 #include "virtio_if.h"
85
86 #include "opt_inet.h"
87 #include "opt_inet6.h"
88
89 static int      vtnet_modevent(module_t, int, void *);
90
91 static int      vtnet_probe(device_t);
92 static int      vtnet_attach(device_t);
93 static int      vtnet_detach(device_t);
94 static int      vtnet_suspend(device_t);
95 static int      vtnet_resume(device_t);
96 static int      vtnet_shutdown(device_t);
97 static int      vtnet_attach_completed(device_t);
98 static int      vtnet_config_change(device_t);
99
100 static void     vtnet_negotiate_features(struct vtnet_softc *);
101 static void     vtnet_setup_features(struct vtnet_softc *);
102 static int      vtnet_init_rxq(struct vtnet_softc *, int);
103 static int      vtnet_init_txq(struct vtnet_softc *, int);
104 static int      vtnet_alloc_rxtx_queues(struct vtnet_softc *);
105 static void     vtnet_free_rxtx_queues(struct vtnet_softc *);
106 static int      vtnet_alloc_rx_filters(struct vtnet_softc *);
107 static void     vtnet_free_rx_filters(struct vtnet_softc *);
108 static int      vtnet_alloc_virtqueues(struct vtnet_softc *);
109 static int      vtnet_setup_interface(struct vtnet_softc *);
110 static int      vtnet_change_mtu(struct vtnet_softc *, int);
111 static int      vtnet_ioctl(struct ifnet *, u_long, caddr_t);
112 static uint64_t vtnet_get_counter(struct ifnet *, ift_counter);
113
114 static int      vtnet_rxq_populate(struct vtnet_rxq *);
115 static void     vtnet_rxq_free_mbufs(struct vtnet_rxq *);
116 static struct mbuf *
117                 vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
118 static int      vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *,
119                     struct mbuf *, int);
120 static int      vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
121 static int      vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
122 static int      vtnet_rxq_new_buf(struct vtnet_rxq *);
123 static int      vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
124                      struct virtio_net_hdr *);
125 static void     vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
126 static void     vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
127 static int      vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
128 static void     vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
129                     struct virtio_net_hdr *);
130 static int      vtnet_rxq_eof(struct vtnet_rxq *);
131 static void     vtnet_rx_vq_intr(void *);
132 static void     vtnet_rxq_tq_intr(void *, int);
133
134 static int      vtnet_txq_below_threshold(struct vtnet_txq *);
135 static int      vtnet_txq_notify(struct vtnet_txq *);
136 static void     vtnet_txq_free_mbufs(struct vtnet_txq *);
137 static int      vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
138                     int *, int *, int *);
139 static int      vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
140                     int, struct virtio_net_hdr *);
141 static struct mbuf *
142                 vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
143                     struct virtio_net_hdr *);
144 static int      vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
145                     struct vtnet_tx_header *);
146 static int      vtnet_txq_encap(struct vtnet_txq *, struct mbuf **, int);
147 #ifdef VTNET_LEGACY_TX
148 static void     vtnet_start_locked(struct vtnet_txq *, struct ifnet *);
149 static void     vtnet_start(struct ifnet *);
150 #else
151 static int      vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
152 static int      vtnet_txq_mq_start(struct ifnet *, struct mbuf *);
153 static void     vtnet_txq_tq_deferred(void *, int);
154 #endif
155 static void     vtnet_txq_start(struct vtnet_txq *);
156 static void     vtnet_txq_tq_intr(void *, int);
157 static int      vtnet_txq_eof(struct vtnet_txq *);
158 static void     vtnet_tx_vq_intr(void *);
159 static void     vtnet_tx_start_all(struct vtnet_softc *);
160
161 #ifndef VTNET_LEGACY_TX
162 static void     vtnet_qflush(struct ifnet *);
163 #endif
164
165 static int      vtnet_watchdog(struct vtnet_txq *);
166 static void     vtnet_accum_stats(struct vtnet_softc *,
167                     struct vtnet_rxq_stats *, struct vtnet_txq_stats *);
168 static void     vtnet_tick(void *);
169
170 static void     vtnet_start_taskqueues(struct vtnet_softc *);
171 static void     vtnet_free_taskqueues(struct vtnet_softc *);
172 static void     vtnet_drain_taskqueues(struct vtnet_softc *);
173
174 static void     vtnet_drain_rxtx_queues(struct vtnet_softc *);
175 static void     vtnet_stop_rendezvous(struct vtnet_softc *);
176 static void     vtnet_stop(struct vtnet_softc *);
177 static int      vtnet_virtio_reinit(struct vtnet_softc *);
178 static void     vtnet_init_rx_filters(struct vtnet_softc *);
179 static int      vtnet_init_rx_queues(struct vtnet_softc *);
180 static int      vtnet_init_tx_queues(struct vtnet_softc *);
181 static int      vtnet_init_rxtx_queues(struct vtnet_softc *);
182 static void     vtnet_set_active_vq_pairs(struct vtnet_softc *);
183 static int      vtnet_reinit(struct vtnet_softc *);
184 static void     vtnet_init_locked(struct vtnet_softc *);
185 static void     vtnet_init(void *);
186
187 static void     vtnet_free_ctrl_vq(struct vtnet_softc *);
188 static void     vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
189                     struct sglist *, int, int);
190 static int      vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
191 static int      vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
192 static int      vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int);
193 static int      vtnet_set_promisc(struct vtnet_softc *, int);
194 static int      vtnet_set_allmulti(struct vtnet_softc *, int);
195 static void     vtnet_attach_disable_promisc(struct vtnet_softc *);
196 static void     vtnet_rx_filter(struct vtnet_softc *);
197 static void     vtnet_rx_filter_mac(struct vtnet_softc *);
198 static int      vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
199 static void     vtnet_rx_filter_vlan(struct vtnet_softc *);
200 static void     vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
201 static void     vtnet_register_vlan(void *, struct ifnet *, uint16_t);
202 static void     vtnet_unregister_vlan(void *, struct ifnet *, uint16_t);
203
204 static int      vtnet_is_link_up(struct vtnet_softc *);
205 static void     vtnet_update_link_status(struct vtnet_softc *);
206 static int      vtnet_ifmedia_upd(struct ifnet *);
207 static void     vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
208 static void     vtnet_get_hwaddr(struct vtnet_softc *);
209 static void     vtnet_set_hwaddr(struct vtnet_softc *);
210 static void     vtnet_vlan_tag_remove(struct mbuf *);
211 static void     vtnet_set_rx_process_limit(struct vtnet_softc *);
212 static void     vtnet_set_tx_intr_threshold(struct vtnet_softc *);
213
214 static void     vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
215                     struct sysctl_oid_list *, struct vtnet_rxq *);
216 static void     vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
217                     struct sysctl_oid_list *, struct vtnet_txq *);
218 static void     vtnet_setup_queue_sysctl(struct vtnet_softc *);
219 static void     vtnet_setup_sysctl(struct vtnet_softc *);
220
221 static int      vtnet_rxq_enable_intr(struct vtnet_rxq *);
222 static void     vtnet_rxq_disable_intr(struct vtnet_rxq *);
223 static int      vtnet_txq_enable_intr(struct vtnet_txq *);
224 static void     vtnet_txq_disable_intr(struct vtnet_txq *);
225 static void     vtnet_enable_rx_interrupts(struct vtnet_softc *);
226 static void     vtnet_enable_tx_interrupts(struct vtnet_softc *);
227 static void     vtnet_enable_interrupts(struct vtnet_softc *);
228 static void     vtnet_disable_rx_interrupts(struct vtnet_softc *);
229 static void     vtnet_disable_tx_interrupts(struct vtnet_softc *);
230 static void     vtnet_disable_interrupts(struct vtnet_softc *);
231
232 static int      vtnet_tunable_int(struct vtnet_softc *, const char *, int);
233
234 DEBUGNET_DEFINE(vtnet);
235
236 /* Tunables. */
237 static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
238     "VNET driver parameters");
239 static int vtnet_csum_disable = 0;
240 TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable);
241 SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN,
242     &vtnet_csum_disable, 0, "Disables receive and send checksum offload");
243 static int vtnet_tso_disable = 0;
244 TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
245 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN, &vtnet_tso_disable,
246     0, "Disables TCP Segmentation Offload");
247 static int vtnet_lro_disable = 0;
248 TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
249 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN, &vtnet_lro_disable,
250     0, "Disables TCP Large Receive Offload");
251 static int vtnet_mq_disable = 0;
252 TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable);
253 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN, &vtnet_mq_disable,
254     0, "Disables Multi Queue support");
255 static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS;
256 TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs);
257 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN,
258     &vtnet_mq_max_pairs, 0, "Sets the maximum number of Multi Queue pairs");
259 static int vtnet_rx_process_limit = 512;
260 TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
261 SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
262     &vtnet_rx_process_limit, 0,
263     "Limits the number RX segments processed in a single pass");
264
265 static uma_zone_t vtnet_tx_header_zone;
266
267 static struct virtio_feature_desc vtnet_feature_desc[] = {
268         { VIRTIO_NET_F_CSUM,            "TxChecksum"    },
269         { VIRTIO_NET_F_GUEST_CSUM,      "RxChecksum"    },
270         { VIRTIO_NET_F_MAC,             "MacAddress"    },
271         { VIRTIO_NET_F_GSO,             "TxAllGSO"      },
272         { VIRTIO_NET_F_GUEST_TSO4,      "RxTSOv4"       },
273         { VIRTIO_NET_F_GUEST_TSO6,      "RxTSOv6"       },
274         { VIRTIO_NET_F_GUEST_ECN,       "RxECN"         },
275         { VIRTIO_NET_F_GUEST_UFO,       "RxUFO"         },
276         { VIRTIO_NET_F_HOST_TSO4,       "TxTSOv4"       },
277         { VIRTIO_NET_F_HOST_TSO6,       "TxTSOv6"       },
278         { VIRTIO_NET_F_HOST_ECN,        "TxTSOECN"      },
279         { VIRTIO_NET_F_HOST_UFO,        "TxUFO"         },
280         { VIRTIO_NET_F_MRG_RXBUF,       "MrgRxBuf"      },
281         { VIRTIO_NET_F_STATUS,          "Status"        },
282         { VIRTIO_NET_F_CTRL_VQ,         "ControlVq"     },
283         { VIRTIO_NET_F_CTRL_RX,         "RxMode"        },
284         { VIRTIO_NET_F_CTRL_VLAN,       "VLanFilter"    },
285         { VIRTIO_NET_F_CTRL_RX_EXTRA,   "RxModeExtra"   },
286         { VIRTIO_NET_F_GUEST_ANNOUNCE,  "GuestAnnounce" },
287         { VIRTIO_NET_F_MQ,              "Multiqueue"    },
288         { VIRTIO_NET_F_CTRL_MAC_ADDR,   "SetMacAddress" },
289
290         { 0, NULL }
291 };
292
293 static device_method_t vtnet_methods[] = {
294         /* Device methods. */
295         DEVMETHOD(device_probe,                 vtnet_probe),
296         DEVMETHOD(device_attach,                vtnet_attach),
297         DEVMETHOD(device_detach,                vtnet_detach),
298         DEVMETHOD(device_suspend,               vtnet_suspend),
299         DEVMETHOD(device_resume,                vtnet_resume),
300         DEVMETHOD(device_shutdown,              vtnet_shutdown),
301
302         /* VirtIO methods. */
303         DEVMETHOD(virtio_attach_completed,      vtnet_attach_completed),
304         DEVMETHOD(virtio_config_change,         vtnet_config_change),
305
306         DEVMETHOD_END
307 };
308
309 #ifdef DEV_NETMAP
310 #include <dev/netmap/if_vtnet_netmap.h>
311 #endif /* DEV_NETMAP */
312
313 static driver_t vtnet_driver = {
314         "vtnet",
315         vtnet_methods,
316         sizeof(struct vtnet_softc)
317 };
318 static devclass_t vtnet_devclass;
319
320 DRIVER_MODULE(vtnet, virtio_mmio, vtnet_driver, vtnet_devclass,
321     vtnet_modevent, 0);
322 DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass,
323     vtnet_modevent, 0);
324 MODULE_VERSION(vtnet, 1);
325 MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
326 #ifdef DEV_NETMAP
327 MODULE_DEPEND(vtnet, netmap, 1, 1, 1);
328 #endif /* DEV_NETMAP */
329
330 VIRTIO_SIMPLE_PNPTABLE(vtnet, VIRTIO_ID_NETWORK, "VirtIO Networking Adapter");
331 VIRTIO_SIMPLE_PNPINFO(virtio_mmio, vtnet);
332 VIRTIO_SIMPLE_PNPINFO(virtio_pci, vtnet);
333
334 static int
335 vtnet_modevent(module_t mod, int type, void *unused)
336 {
337         int error = 0;
338         static int loaded = 0;
339
340         switch (type) {
341         case MOD_LOAD:
342                 if (loaded++ == 0) {
343                         vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr",
344                                 sizeof(struct vtnet_tx_header),
345                                 NULL, NULL, NULL, NULL, 0, 0);
346 #ifdef DEBUGNET
347                         /*
348                          * We need to allocate from this zone in the transmit path, so ensure
349                          * that we have at least one item per header available.
350                          * XXX add a separate zone like we do for mbufs? otherwise we may alloc
351                          * buckets
352                          */
353                         uma_zone_reserve(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
354                         uma_prealloc(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
355 #endif
356                 }
357                 break;
358         case MOD_QUIESCE:
359                 if (uma_zone_get_cur(vtnet_tx_header_zone) > 0)
360                         error = EBUSY;
361                 break;
362         case MOD_UNLOAD:
363                 if (--loaded == 0) {
364                         uma_zdestroy(vtnet_tx_header_zone);
365                         vtnet_tx_header_zone = NULL;
366                 }
367                 break;
368         case MOD_SHUTDOWN:
369                 break;
370         default:
371                 error = EOPNOTSUPP;
372                 break;
373         }
374
375         return (error);
376 }
377
378 static int
379 vtnet_probe(device_t dev)
380 {
381         return (VIRTIO_SIMPLE_PROBE(dev, vtnet));
382 }
383
384 static int
385 vtnet_attach(device_t dev)
386 {
387         struct vtnet_softc *sc;
388         int error;
389
390         sc = device_get_softc(dev);
391         sc->vtnet_dev = dev;
392
393         /* Register our feature descriptions. */
394         virtio_set_feature_desc(dev, vtnet_feature_desc);
395
396         VTNET_CORE_LOCK_INIT(sc);
397         callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
398
399         vtnet_setup_sysctl(sc);
400         vtnet_setup_features(sc);
401
402         error = vtnet_alloc_rx_filters(sc);
403         if (error) {
404                 device_printf(dev, "cannot allocate Rx filters\n");
405                 goto fail;
406         }
407
408         error = vtnet_alloc_rxtx_queues(sc);
409         if (error) {
410                 device_printf(dev, "cannot allocate queues\n");
411                 goto fail;
412         }
413
414         error = vtnet_alloc_virtqueues(sc);
415         if (error) {
416                 device_printf(dev, "cannot allocate virtqueues\n");
417                 goto fail;
418         }
419
420         error = vtnet_setup_interface(sc);
421         if (error) {
422                 device_printf(dev, "cannot setup interface\n");
423                 goto fail;
424         }
425
426         error = virtio_setup_intr(dev, INTR_TYPE_NET);
427         if (error) {
428                 device_printf(dev, "cannot setup virtqueue interrupts\n");
429                 /* BMV: This will crash if during boot! */
430                 ether_ifdetach(sc->vtnet_ifp);
431                 goto fail;
432         }
433
434 #ifdef DEV_NETMAP
435         vtnet_netmap_attach(sc);
436 #endif /* DEV_NETMAP */
437
438         vtnet_start_taskqueues(sc);
439
440 fail:
441         if (error)
442                 vtnet_detach(dev);
443
444         return (error);
445 }
446
447 static int
448 vtnet_detach(device_t dev)
449 {
450         struct vtnet_softc *sc;
451         struct ifnet *ifp;
452
453         sc = device_get_softc(dev);
454         ifp = sc->vtnet_ifp;
455
456         if (device_is_attached(dev)) {
457                 VTNET_CORE_LOCK(sc);
458                 vtnet_stop(sc);
459                 VTNET_CORE_UNLOCK(sc);
460
461                 callout_drain(&sc->vtnet_tick_ch);
462                 vtnet_drain_taskqueues(sc);
463
464                 ether_ifdetach(ifp);
465         }
466
467 #ifdef DEV_NETMAP
468         netmap_detach(ifp);
469 #endif /* DEV_NETMAP */
470
471         vtnet_free_taskqueues(sc);
472
473         if (sc->vtnet_vlan_attach != NULL) {
474                 EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
475                 sc->vtnet_vlan_attach = NULL;
476         }
477         if (sc->vtnet_vlan_detach != NULL) {
478                 EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach);
479                 sc->vtnet_vlan_detach = NULL;
480         }
481
482         ifmedia_removeall(&sc->vtnet_media);
483
484         if (ifp != NULL) {
485                 if_free(ifp);
486                 sc->vtnet_ifp = NULL;
487         }
488
489         vtnet_free_rxtx_queues(sc);
490         vtnet_free_rx_filters(sc);
491
492         if (sc->vtnet_ctrl_vq != NULL)
493                 vtnet_free_ctrl_vq(sc);
494
495         VTNET_CORE_LOCK_DESTROY(sc);
496
497         return (0);
498 }
499
500 static int
501 vtnet_suspend(device_t dev)
502 {
503         struct vtnet_softc *sc;
504
505         sc = device_get_softc(dev);
506
507         VTNET_CORE_LOCK(sc);
508         vtnet_stop(sc);
509         sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
510         VTNET_CORE_UNLOCK(sc);
511
512         return (0);
513 }
514
515 static int
516 vtnet_resume(device_t dev)
517 {
518         struct vtnet_softc *sc;
519         struct ifnet *ifp;
520
521         sc = device_get_softc(dev);
522         ifp = sc->vtnet_ifp;
523
524         VTNET_CORE_LOCK(sc);
525         if (ifp->if_flags & IFF_UP)
526                 vtnet_init_locked(sc);
527         sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
528         VTNET_CORE_UNLOCK(sc);
529
530         return (0);
531 }
532
533 static int
534 vtnet_shutdown(device_t dev)
535 {
536
537         /*
538          * Suspend already does all of what we need to
539          * do here; we just never expect to be resumed.
540          */
541         return (vtnet_suspend(dev));
542 }
543
544 static int
545 vtnet_attach_completed(device_t dev)
546 {
547
548         vtnet_attach_disable_promisc(device_get_softc(dev));
549
550         return (0);
551 }
552
553 static int
554 vtnet_config_change(device_t dev)
555 {
556         struct vtnet_softc *sc;
557
558         sc = device_get_softc(dev);
559
560         VTNET_CORE_LOCK(sc);
561         vtnet_update_link_status(sc);
562         if (sc->vtnet_link_active != 0)
563                 vtnet_tx_start_all(sc);
564         VTNET_CORE_UNLOCK(sc);
565
566         return (0);
567 }
568
569 static void
570 vtnet_negotiate_features(struct vtnet_softc *sc)
571 {
572         device_t dev;
573         uint64_t mask, features;
574
575         dev = sc->vtnet_dev;
576         mask = 0;
577
578         /*
579          * TSO and LRO are only available when their corresponding checksum
580          * offload feature is also negotiated.
581          */
582         if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) {
583                 mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
584                 mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES;
585         }
586         if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
587                 mask |= VTNET_TSO_FEATURES;
588         if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
589                 mask |= VTNET_LRO_FEATURES;
590 #ifndef VTNET_LEGACY_TX
591         if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
592                 mask |= VIRTIO_NET_F_MQ;
593 #else
594         mask |= VIRTIO_NET_F_MQ;
595 #endif
596
597         features = VTNET_FEATURES & ~mask;
598         sc->vtnet_features = virtio_negotiate_features(dev, features);
599
600         if (virtio_with_feature(dev, VTNET_LRO_FEATURES) &&
601             virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) {
602                 /*
603                  * LRO without mergeable buffers requires special care. This
604                  * is not ideal because every receive buffer must be large
605                  * enough to hold the maximum TCP packet, the Ethernet header,
606                  * and the header. This requires up to 34 descriptors with
607                  * MCLBYTES clusters. If we do not have indirect descriptors,
608                  * LRO is disabled since the virtqueue will not contain very
609                  * many receive buffers.
610                  */
611                 if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
612                         device_printf(dev,
613                             "LRO disabled due to both mergeable buffers and "
614                             "indirect descriptors not negotiated\n");
615
616                         features &= ~VTNET_LRO_FEATURES;
617                         sc->vtnet_features =
618                             virtio_negotiate_features(dev, features);
619                 } else
620                         sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
621         }
622 }
623
624 static void
625 vtnet_setup_features(struct vtnet_softc *sc)
626 {
627         device_t dev;
628
629         dev = sc->vtnet_dev;
630
631         vtnet_negotiate_features(sc);
632
633         if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
634                 sc->vtnet_flags |= VTNET_FLAG_INDIRECT;
635         if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX))
636                 sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX;
637
638         if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
639                 /* This feature should always be negotiated. */
640                 sc->vtnet_flags |= VTNET_FLAG_MAC;
641         }
642
643         if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
644                 sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
645                 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
646         } else
647                 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
648
649         if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
650                 sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS;
651         else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
652                 sc->vtnet_rx_nsegs = VTNET_MAX_RX_SEGS;
653         else
654                 sc->vtnet_rx_nsegs = VTNET_MIN_RX_SEGS;
655
656         if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) ||
657             virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
658             virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
659                 sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS;
660         else
661                 sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS;
662
663         if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
664                 sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
665
666                 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
667                         sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
668                 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
669                         sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
670                 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
671                         sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
672         }
673
674         if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) &&
675             sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
676                 sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev,
677                     offsetof(struct virtio_net_config, max_virtqueue_pairs));
678         } else
679                 sc->vtnet_max_vq_pairs = 1;
680
681         if (sc->vtnet_max_vq_pairs > 1) {
682                 /*
683                  * Limit the maximum number of queue pairs to the lower of
684                  * the number of CPUs and the configured maximum.
685                  * The actual number of queues that get used may be less.
686                  */
687                 int max;
688
689                 max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
690                 if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN) {
691                         if (max > mp_ncpus)
692                                 max = mp_ncpus;
693                         if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
694                                 max = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX;
695                         if (max > 1) {
696                                 sc->vtnet_requested_vq_pairs = max;
697                                 sc->vtnet_flags |= VTNET_FLAG_MULTIQ;
698                         }
699                 }
700         }
701 }
702
703 static int
704 vtnet_init_rxq(struct vtnet_softc *sc, int id)
705 {
706         struct vtnet_rxq *rxq;
707
708         rxq = &sc->vtnet_rxqs[id];
709
710         snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
711             device_get_nameunit(sc->vtnet_dev), id);
712         mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
713
714         rxq->vtnrx_sc = sc;
715         rxq->vtnrx_id = id;
716
717         rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT);
718         if (rxq->vtnrx_sg == NULL)
719                 return (ENOMEM);
720
721         NET_TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
722         rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
723             taskqueue_thread_enqueue, &rxq->vtnrx_tq);
724
725         return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
726 }
727
728 static int
729 vtnet_init_txq(struct vtnet_softc *sc, int id)
730 {
731         struct vtnet_txq *txq;
732
733         txq = &sc->vtnet_txqs[id];
734
735         snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
736             device_get_nameunit(sc->vtnet_dev), id);
737         mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
738
739         txq->vtntx_sc = sc;
740         txq->vtntx_id = id;
741
742         txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT);
743         if (txq->vtntx_sg == NULL)
744                 return (ENOMEM);
745
746 #ifndef VTNET_LEGACY_TX
747         txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
748             M_NOWAIT, &txq->vtntx_mtx);
749         if (txq->vtntx_br == NULL)
750                 return (ENOMEM);
751
752         TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
753 #endif
754         TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
755         txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
756             taskqueue_thread_enqueue, &txq->vtntx_tq);
757         if (txq->vtntx_tq == NULL)
758                 return (ENOMEM);
759
760         return (0);
761 }
762
763 static int
764 vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
765 {
766         int i, npairs, error;
767
768         npairs = sc->vtnet_max_vq_pairs;
769
770         sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
771             M_NOWAIT | M_ZERO);
772         sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
773             M_NOWAIT | M_ZERO);
774         if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
775                 return (ENOMEM);
776
777         for (i = 0; i < npairs; i++) {
778                 error = vtnet_init_rxq(sc, i);
779                 if (error)
780                         return (error);
781                 error = vtnet_init_txq(sc, i);
782                 if (error)
783                         return (error);
784         }
785
786         vtnet_setup_queue_sysctl(sc);
787
788         return (0);
789 }
790
791 static void
792 vtnet_destroy_rxq(struct vtnet_rxq *rxq)
793 {
794
795         rxq->vtnrx_sc = NULL;
796         rxq->vtnrx_id = -1;
797
798         if (rxq->vtnrx_sg != NULL) {
799                 sglist_free(rxq->vtnrx_sg);
800                 rxq->vtnrx_sg = NULL;
801         }
802
803         if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
804                 mtx_destroy(&rxq->vtnrx_mtx);
805 }
806
807 static void
808 vtnet_destroy_txq(struct vtnet_txq *txq)
809 {
810
811         txq->vtntx_sc = NULL;
812         txq->vtntx_id = -1;
813
814         if (txq->vtntx_sg != NULL) {
815                 sglist_free(txq->vtntx_sg);
816                 txq->vtntx_sg = NULL;
817         }
818
819 #ifndef VTNET_LEGACY_TX
820         if (txq->vtntx_br != NULL) {
821                 buf_ring_free(txq->vtntx_br, M_DEVBUF);
822                 txq->vtntx_br = NULL;
823         }
824 #endif
825
826         if (mtx_initialized(&txq->vtntx_mtx) != 0)
827                 mtx_destroy(&txq->vtntx_mtx);
828 }
829
830 static void
831 vtnet_free_rxtx_queues(struct vtnet_softc *sc)
832 {
833         int i;
834
835         if (sc->vtnet_rxqs != NULL) {
836                 for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
837                         vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
838                 free(sc->vtnet_rxqs, M_DEVBUF);
839                 sc->vtnet_rxqs = NULL;
840         }
841
842         if (sc->vtnet_txqs != NULL) {
843                 for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
844                         vtnet_destroy_txq(&sc->vtnet_txqs[i]);
845                 free(sc->vtnet_txqs, M_DEVBUF);
846                 sc->vtnet_txqs = NULL;
847         }
848 }
849
850 static int
851 vtnet_alloc_rx_filters(struct vtnet_softc *sc)
852 {
853
854         if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
855                 sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
856                     M_DEVBUF, M_NOWAIT | M_ZERO);
857                 if (sc->vtnet_mac_filter == NULL)
858                         return (ENOMEM);
859         }
860
861         if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
862                 sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
863                     VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
864                 if (sc->vtnet_vlan_filter == NULL)
865                         return (ENOMEM);
866         }
867
868         return (0);
869 }
870
871 static void
872 vtnet_free_rx_filters(struct vtnet_softc *sc)
873 {
874
875         if (sc->vtnet_mac_filter != NULL) {
876                 free(sc->vtnet_mac_filter, M_DEVBUF);
877                 sc->vtnet_mac_filter = NULL;
878         }
879
880         if (sc->vtnet_vlan_filter != NULL) {
881                 free(sc->vtnet_vlan_filter, M_DEVBUF);
882                 sc->vtnet_vlan_filter = NULL;
883         }
884 }
885
886 static int
887 vtnet_alloc_virtqueues(struct vtnet_softc *sc)
888 {
889         device_t dev;
890         struct vq_alloc_info *info;
891         struct vtnet_rxq *rxq;
892         struct vtnet_txq *txq;
893         int i, idx, flags, nvqs, error;
894
895         dev = sc->vtnet_dev;
896         flags = 0;
897
898         nvqs = sc->vtnet_max_vq_pairs * 2;
899         if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
900                 nvqs++;
901
902         info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT);
903         if (info == NULL)
904                 return (ENOMEM);
905
906         for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) {
907                 rxq = &sc->vtnet_rxqs[i];
908                 VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs,
909                     vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
910                     "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id);
911
912                 txq = &sc->vtnet_txqs[i];
913                 VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs,
914                     vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
915                     "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id);
916         }
917
918         if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
919                 VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
920                     &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
921         }
922
923         /*
924          * Enable interrupt binding if this is multiqueue. This only matters
925          * when per-vq MSIX is available.
926          */
927         if (sc->vtnet_flags & VTNET_FLAG_MULTIQ)
928                 flags |= 0;
929
930         error = virtio_alloc_virtqueues(dev, flags, nvqs, info);
931         free(info, M_TEMP);
932
933         return (error);
934 }
935
936 static int
937 vtnet_setup_interface(struct vtnet_softc *sc)
938 {
939         device_t dev;
940         struct pfil_head_args pa;
941         struct ifnet *ifp;
942
943         dev = sc->vtnet_dev;
944
945         ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
946         if (ifp == NULL) {
947                 device_printf(dev, "cannot allocate ifnet structure\n");
948                 return (ENOSPC);
949         }
950
951         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
952         ifp->if_baudrate = IF_Gbps(10); /* Approx. */
953         ifp->if_softc = sc;
954         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
955             IFF_KNOWSEPOCH;
956         ifp->if_init = vtnet_init;
957         ifp->if_ioctl = vtnet_ioctl;
958         ifp->if_get_counter = vtnet_get_counter;
959 #ifndef VTNET_LEGACY_TX
960         ifp->if_transmit = vtnet_txq_mq_start;
961         ifp->if_qflush = vtnet_qflush;
962 #else
963         struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
964         ifp->if_start = vtnet_start;
965         IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1);
966         ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1;
967         IFQ_SET_READY(&ifp->if_snd);
968 #endif
969
970         ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
971             vtnet_ifmedia_sts);
972         ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
973         ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
974
975         /* Read (or generate) the MAC address for the adapter. */
976         vtnet_get_hwaddr(sc);
977
978         ether_ifattach(ifp, sc->vtnet_hwaddr);
979
980         if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
981                 ifp->if_capabilities |= IFCAP_LINKSTATE;
982
983         /* Tell the upper layer(s) we support long frames. */
984         ifp->if_hdrlen = sizeof(struct ether_vlan_header);
985         ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
986
987         if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
988                 ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6;
989
990                 if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) {
991                         ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
992                         sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
993                 } else {
994                         if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
995                                 ifp->if_capabilities |= IFCAP_TSO4;
996                         if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
997                                 ifp->if_capabilities |= IFCAP_TSO6;
998                         if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
999                                 sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
1000                 }
1001
1002                 if (ifp->if_capabilities & IFCAP_TSO)
1003                         ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
1004         }
1005
1006         if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
1007                 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6;
1008
1009                 if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
1010                     virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
1011                         ifp->if_capabilities |= IFCAP_LRO;
1012         }
1013
1014         if (ifp->if_capabilities & IFCAP_HWCSUM) {
1015                 /*
1016                  * VirtIO does not support VLAN tagging, but we can fake
1017                  * it by inserting and removing the 802.1Q header during
1018                  * transmit and receive. We are then able to do checksum
1019                  * offloading of VLAN frames.
1020                  */
1021                 ifp->if_capabilities |=
1022                     IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
1023         }
1024
1025         ifp->if_capenable = ifp->if_capabilities;
1026
1027         /*
1028          * Capabilities after here are not enabled by default.
1029          */
1030
1031         if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
1032                 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
1033
1034                 sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1035                     vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1036                 sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
1037                     vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1038         }
1039
1040         vtnet_set_rx_process_limit(sc);
1041         vtnet_set_tx_intr_threshold(sc);
1042
1043         DEBUGNET_SET(ifp, vtnet);
1044
1045         pa.pa_version = PFIL_VERSION;
1046         pa.pa_flags = PFIL_IN;
1047         pa.pa_type = PFIL_TYPE_ETHERNET;
1048         pa.pa_headname = ifp->if_xname;
1049         sc->vtnet_pfil = pfil_head_register(&pa);
1050
1051         return (0);
1052 }
1053
1054 static int
1055 vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
1056 {
1057         struct ifnet *ifp;
1058         int frame_size, clsize;
1059
1060         ifp = sc->vtnet_ifp;
1061
1062         if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU)
1063                 return (EINVAL);
1064
1065         frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) +
1066             new_mtu;
1067
1068         /*
1069          * Based on the new MTU (and hence frame size) determine which
1070          * cluster size is most appropriate for the receive queues.
1071          */
1072         if (frame_size <= MCLBYTES) {
1073                 clsize = MCLBYTES;
1074         } else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1075                 /* Avoid going past 9K jumbos. */
1076                 if (frame_size > MJUM9BYTES)
1077                         return (EINVAL);
1078                 clsize = MJUM9BYTES;
1079         } else
1080                 clsize = MJUMPAGESIZE;
1081
1082         ifp->if_mtu = new_mtu;
1083         sc->vtnet_rx_new_clsize = clsize;
1084
1085         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1086                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1087                 vtnet_init_locked(sc);
1088         }
1089
1090         return (0);
1091 }
1092
1093 static int
1094 vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1095 {
1096         struct vtnet_softc *sc;
1097         struct ifreq *ifr;
1098         int reinit, mask, error;
1099
1100         sc = ifp->if_softc;
1101         ifr = (struct ifreq *) data;
1102         error = 0;
1103
1104         switch (cmd) {
1105         case SIOCSIFMTU:
1106                 if (ifp->if_mtu != ifr->ifr_mtu) {
1107                         VTNET_CORE_LOCK(sc);
1108                         error = vtnet_change_mtu(sc, ifr->ifr_mtu);
1109                         VTNET_CORE_UNLOCK(sc);
1110                 }
1111                 break;
1112
1113         case SIOCSIFFLAGS:
1114                 VTNET_CORE_LOCK(sc);
1115                 if ((ifp->if_flags & IFF_UP) == 0) {
1116                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1117                                 vtnet_stop(sc);
1118                 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1119                         if ((ifp->if_flags ^ sc->vtnet_if_flags) &
1120                             (IFF_PROMISC | IFF_ALLMULTI)) {
1121                                 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
1122                                         vtnet_rx_filter(sc);
1123                                 else {
1124                                         ifp->if_flags |= IFF_PROMISC;
1125                                         if ((ifp->if_flags ^ sc->vtnet_if_flags)
1126                                             & IFF_ALLMULTI)
1127                                                 error = ENOTSUP;
1128                                 }
1129                         }
1130                 } else
1131                         vtnet_init_locked(sc);
1132
1133                 if (error == 0)
1134                         sc->vtnet_if_flags = ifp->if_flags;
1135                 VTNET_CORE_UNLOCK(sc);
1136                 break;
1137
1138         case SIOCADDMULTI:
1139         case SIOCDELMULTI:
1140                 if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0)
1141                         break;
1142                 VTNET_CORE_LOCK(sc);
1143                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1144                         vtnet_rx_filter_mac(sc);
1145                 VTNET_CORE_UNLOCK(sc);
1146                 break;
1147
1148         case SIOCSIFMEDIA:
1149         case SIOCGIFMEDIA:
1150                 error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
1151                 break;
1152
1153         case SIOCSIFCAP:
1154                 VTNET_CORE_LOCK(sc);
1155                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1156
1157                 if (mask & IFCAP_TXCSUM)
1158                         ifp->if_capenable ^= IFCAP_TXCSUM;
1159                 if (mask & IFCAP_TXCSUM_IPV6)
1160                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1161                 if (mask & IFCAP_TSO4)
1162                         ifp->if_capenable ^= IFCAP_TSO4;
1163                 if (mask & IFCAP_TSO6)
1164                         ifp->if_capenable ^= IFCAP_TSO6;
1165
1166                 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
1167                     IFCAP_VLAN_HWFILTER)) {
1168                         /* These Rx features require us to renegotiate. */
1169                         reinit = 1;
1170
1171                         if (mask & IFCAP_RXCSUM)
1172                                 ifp->if_capenable ^= IFCAP_RXCSUM;
1173                         if (mask & IFCAP_RXCSUM_IPV6)
1174                                 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1175                         if (mask & IFCAP_LRO)
1176                                 ifp->if_capenable ^= IFCAP_LRO;
1177                         if (mask & IFCAP_VLAN_HWFILTER)
1178                                 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1179                 } else
1180                         reinit = 0;
1181
1182                 if (mask & IFCAP_VLAN_HWTSO)
1183                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1184                 if (mask & IFCAP_VLAN_HWTAGGING)
1185                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1186
1187                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1188                         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1189                         vtnet_init_locked(sc);
1190                 }
1191
1192                 VTNET_CORE_UNLOCK(sc);
1193                 VLAN_CAPABILITIES(ifp);
1194
1195                 break;
1196
1197         default:
1198                 error = ether_ioctl(ifp, cmd, data);
1199                 break;
1200         }
1201
1202         VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
1203
1204         return (error);
1205 }
1206
1207 static int
1208 vtnet_rxq_populate(struct vtnet_rxq *rxq)
1209 {
1210         struct virtqueue *vq;
1211         int nbufs, error;
1212
1213 #ifdef DEV_NETMAP
1214         error = vtnet_netmap_rxq_populate(rxq);
1215         if (error >= 0)
1216                 return (error);
1217 #endif  /* DEV_NETMAP */
1218
1219         vq = rxq->vtnrx_vq;
1220         error = ENOSPC;
1221
1222         for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
1223                 error = vtnet_rxq_new_buf(rxq);
1224                 if (error)
1225                         break;
1226         }
1227
1228         if (nbufs > 0) {
1229                 virtqueue_notify(vq);
1230                 /*
1231                  * EMSGSIZE signifies the virtqueue did not have enough
1232                  * entries available to hold the last mbuf. This is not
1233                  * an error.
1234                  */
1235                 if (error == EMSGSIZE)
1236                         error = 0;
1237         }
1238
1239         return (error);
1240 }
1241
1242 static void
1243 vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
1244 {
1245         struct virtqueue *vq;
1246         struct mbuf *m;
1247         int last;
1248 #ifdef DEV_NETMAP
1249         int netmap_bufs = vtnet_netmap_queue_on(rxq->vtnrx_sc, NR_RX,
1250                                                 rxq->vtnrx_id);
1251 #else  /* !DEV_NETMAP */
1252         int netmap_bufs = 0;
1253 #endif /* !DEV_NETMAP */
1254
1255         vq = rxq->vtnrx_vq;
1256         last = 0;
1257
1258         while ((m = virtqueue_drain(vq, &last)) != NULL) {
1259                 if (!netmap_bufs)
1260                         m_freem(m);
1261         }
1262
1263         KASSERT(virtqueue_empty(vq),
1264             ("%s: mbufs remaining in rx queue %p", __func__, rxq));
1265 }
1266
1267 static struct mbuf *
1268 vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
1269 {
1270         struct mbuf *m_head, *m_tail, *m;
1271         int i, clsize;
1272
1273         clsize = sc->vtnet_rx_clsize;
1274
1275         KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1276             ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs));
1277
1278         m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize);
1279         if (m_head == NULL)
1280                 goto fail;
1281
1282         m_head->m_len = clsize;
1283         m_tail = m_head;
1284
1285         /* Allocate the rest of the chain. */
1286         for (i = 1; i < nbufs; i++) {
1287                 m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize);
1288                 if (m == NULL)
1289                         goto fail;
1290
1291                 m->m_len = clsize;
1292                 m_tail->m_next = m;
1293                 m_tail = m;
1294         }
1295
1296         if (m_tailp != NULL)
1297                 *m_tailp = m_tail;
1298
1299         return (m_head);
1300
1301 fail:
1302         sc->vtnet_stats.mbuf_alloc_failed++;
1303         m_freem(m_head);
1304
1305         return (NULL);
1306 }
1307
1308 /*
1309  * Slow path for when LRO without mergeable buffers is negotiated.
1310  */
1311 static int
1312 vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
1313     int len0)
1314 {
1315         struct vtnet_softc *sc;
1316         struct mbuf *m, *m_prev;
1317         struct mbuf *m_new, *m_tail;
1318         int len, clsize, nreplace, error;
1319
1320         sc = rxq->vtnrx_sc;
1321         clsize = sc->vtnet_rx_clsize;
1322
1323         m_prev = NULL;
1324         m_tail = NULL;
1325         nreplace = 0;
1326
1327         m = m0;
1328         len = len0;
1329
1330         /*
1331          * Since these mbuf chains are so large, we avoid allocating an
1332          * entire replacement chain if possible. When the received frame
1333          * did not consume the entire chain, the unused mbufs are moved
1334          * to the replacement chain.
1335          */
1336         while (len > 0) {
1337                 /*
1338                  * Something is seriously wrong if we received a frame
1339                  * larger than the chain. Drop it.
1340                  */
1341                 if (m == NULL) {
1342                         sc->vtnet_stats.rx_frame_too_large++;
1343                         return (EMSGSIZE);
1344                 }
1345
1346                 /* We always allocate the same cluster size. */
1347                 KASSERT(m->m_len == clsize,
1348                     ("%s: mbuf size %d is not the cluster size %d",
1349                     __func__, m->m_len, clsize));
1350
1351                 m->m_len = MIN(m->m_len, len);
1352                 len -= m->m_len;
1353
1354                 m_prev = m;
1355                 m = m->m_next;
1356                 nreplace++;
1357         }
1358
1359         KASSERT(nreplace <= sc->vtnet_rx_nmbufs,
1360             ("%s: too many replacement mbufs %d max %d", __func__, nreplace,
1361             sc->vtnet_rx_nmbufs));
1362
1363         m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
1364         if (m_new == NULL) {
1365                 m_prev->m_len = clsize;
1366                 return (ENOBUFS);
1367         }
1368
1369         /*
1370          * Move any unused mbufs from the received chain onto the end
1371          * of the new chain.
1372          */
1373         if (m_prev->m_next != NULL) {
1374                 m_tail->m_next = m_prev->m_next;
1375                 m_prev->m_next = NULL;
1376         }
1377
1378         error = vtnet_rxq_enqueue_buf(rxq, m_new);
1379         if (error) {
1380                 /*
1381                  * BAD! We could not enqueue the replacement mbuf chain. We
1382                  * must restore the m0 chain to the original state if it was
1383                  * modified so we can subsequently discard it.
1384                  *
1385                  * NOTE: The replacement is suppose to be an identical copy
1386                  * to the one just dequeued so this is an unexpected error.
1387                  */
1388                 sc->vtnet_stats.rx_enq_replacement_failed++;
1389
1390                 if (m_tail->m_next != NULL) {
1391                         m_prev->m_next = m_tail->m_next;
1392                         m_tail->m_next = NULL;
1393                 }
1394
1395                 m_prev->m_len = clsize;
1396                 m_freem(m_new);
1397         }
1398
1399         return (error);
1400 }
1401
1402 static int
1403 vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
1404 {
1405         struct vtnet_softc *sc;
1406         struct mbuf *m_new;
1407         int error;
1408
1409         sc = rxq->vtnrx_sc;
1410
1411         KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
1412             ("%s: chained mbuf without LRO_NOMRG", __func__));
1413
1414         if (m->m_next == NULL) {
1415                 /* Fast-path for the common case of just one mbuf. */
1416                 if (m->m_len < len)
1417                         return (EINVAL);
1418
1419                 m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
1420                 if (m_new == NULL)
1421                         return (ENOBUFS);
1422
1423                 error = vtnet_rxq_enqueue_buf(rxq, m_new);
1424                 if (error) {
1425                         /*
1426                          * The new mbuf is suppose to be an identical
1427                          * copy of the one just dequeued so this is an
1428                          * unexpected error.
1429                          */
1430                         m_freem(m_new);
1431                         sc->vtnet_stats.rx_enq_replacement_failed++;
1432                 } else
1433                         m->m_len = len;
1434         } else
1435                 error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len);
1436
1437         return (error);
1438 }
1439
1440 static int
1441 vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1442 {
1443         struct vtnet_softc *sc;
1444         struct sglist *sg;
1445         struct vtnet_rx_header *rxhdr;
1446         uint8_t *mdata;
1447         int offset, error;
1448
1449         sc = rxq->vtnrx_sc;
1450         sg = rxq->vtnrx_sg;
1451         mdata = mtod(m, uint8_t *);
1452
1453         VTNET_RXQ_LOCK_ASSERT(rxq);
1454         KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
1455             ("%s: chained mbuf without LRO_NOMRG", __func__));
1456         KASSERT(m->m_len == sc->vtnet_rx_clsize,
1457             ("%s: unexpected cluster size %d/%d", __func__, m->m_len,
1458              sc->vtnet_rx_clsize));
1459
1460         sglist_reset(sg);
1461         if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1462                 MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
1463                 rxhdr = (struct vtnet_rx_header *) mdata;
1464                 sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
1465                 offset = sizeof(struct vtnet_rx_header);
1466         } else
1467                 offset = 0;
1468
1469         sglist_append(sg, mdata + offset, m->m_len - offset);
1470         if (m->m_next != NULL) {
1471                 error = sglist_append_mbuf(sg, m->m_next);
1472                 MPASS(error == 0);
1473         }
1474
1475         error = virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg);
1476
1477         return (error);
1478 }
1479
1480 static int
1481 vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
1482 {
1483         struct vtnet_softc *sc;
1484         struct mbuf *m;
1485         int error;
1486
1487         sc = rxq->vtnrx_sc;
1488
1489         m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
1490         if (m == NULL)
1491                 return (ENOBUFS);
1492
1493         error = vtnet_rxq_enqueue_buf(rxq, m);
1494         if (error)
1495                 m_freem(m);
1496
1497         return (error);
1498 }
1499
1500 /*
1501  * Use the checksum offset in the VirtIO header to set the
1502  * correct CSUM_* flags.
1503  */
1504 static int
1505 vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m,
1506     uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
1507 {
1508         struct vtnet_softc *sc;
1509 #if defined(INET) || defined(INET6)
1510         int offset = hdr->csum_start + hdr->csum_offset;
1511 #endif
1512
1513         sc = rxq->vtnrx_sc;
1514
1515         /* Only do a basic sanity check on the offset. */
1516         switch (eth_type) {
1517 #if defined(INET)
1518         case ETHERTYPE_IP:
1519                 if (__predict_false(offset < ip_start + sizeof(struct ip)))
1520                         return (1);
1521                 break;
1522 #endif
1523 #if defined(INET6)
1524         case ETHERTYPE_IPV6:
1525                 if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr)))
1526                         return (1);
1527                 break;
1528 #endif
1529         default:
1530                 sc->vtnet_stats.rx_csum_bad_ethtype++;
1531                 return (1);
1532         }
1533
1534         /*
1535          * Use the offset to determine the appropriate CSUM_* flags. This is
1536          * a bit dirty, but we can get by with it since the checksum offsets
1537          * happen to be different. We assume the host host does not do IPv4
1538          * header checksum offloading.
1539          */
1540         switch (hdr->csum_offset) {
1541         case offsetof(struct udphdr, uh_sum):
1542         case offsetof(struct tcphdr, th_sum):
1543                 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1544                 m->m_pkthdr.csum_data = 0xFFFF;
1545                 break;
1546         default:
1547                 sc->vtnet_stats.rx_csum_bad_offset++;
1548                 return (1);
1549         }
1550
1551         return (0);
1552 }
1553
1554 static int
1555 vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m,
1556     uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
1557 {
1558         struct vtnet_softc *sc;
1559         int offset, proto;
1560
1561         sc = rxq->vtnrx_sc;
1562
1563         switch (eth_type) {
1564 #if defined(INET)
1565         case ETHERTYPE_IP: {
1566                 struct ip *ip;
1567                 if (__predict_false(m->m_len < ip_start + sizeof(struct ip)))
1568                         return (1);
1569                 ip = (struct ip *)(m->m_data + ip_start);
1570                 proto = ip->ip_p;
1571                 offset = ip_start + (ip->ip_hl << 2);
1572                 break;
1573         }
1574 #endif
1575 #if defined(INET6)
1576         case ETHERTYPE_IPV6:
1577                 if (__predict_false(m->m_len < ip_start +
1578                     sizeof(struct ip6_hdr)))
1579                         return (1);
1580                 offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto);
1581                 if (__predict_false(offset < 0))
1582                         return (1);
1583                 break;
1584 #endif
1585         default:
1586                 sc->vtnet_stats.rx_csum_bad_ethtype++;
1587                 return (1);
1588         }
1589
1590         switch (proto) {
1591         case IPPROTO_TCP:
1592                 if (__predict_false(m->m_len < offset + sizeof(struct tcphdr)))
1593                         return (1);
1594                 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1595                 m->m_pkthdr.csum_data = 0xFFFF;
1596                 break;
1597         case IPPROTO_UDP:
1598                 if (__predict_false(m->m_len < offset + sizeof(struct udphdr)))
1599                         return (1);
1600                 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1601                 m->m_pkthdr.csum_data = 0xFFFF;
1602                 break;
1603         default:
1604                 /*
1605                  * For the remaining protocols, FreeBSD does not support
1606                  * checksum offloading, so the checksum will be recomputed.
1607                  */
1608 #if 0
1609                 if_printf(sc->vtnet_ifp, "cksum offload of unsupported "
1610                     "protocol eth_type=%#x proto=%d csum_start=%d "
1611                     "csum_offset=%d\n", __func__, eth_type, proto,
1612                     hdr->csum_start, hdr->csum_offset);
1613 #endif
1614                 break;
1615         }
1616
1617         return (0);
1618 }
1619
1620 /*
1621  * Set the appropriate CSUM_* flags. Unfortunately, the information
1622  * provided is not directly useful to us. The VirtIO header gives the
1623  * offset of the checksum, which is all Linux needs, but this is not
1624  * how FreeBSD does things. We are forced to peek inside the packet
1625  * a bit.
1626  *
1627  * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD
1628  * could accept the offsets and let the stack figure it out.
1629  */
1630 static int
1631 vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
1632     struct virtio_net_hdr *hdr)
1633 {
1634         struct ether_header *eh;
1635         struct ether_vlan_header *evh;
1636         uint16_t eth_type;
1637         int offset, error;
1638
1639         eh = mtod(m, struct ether_header *);
1640         eth_type = ntohs(eh->ether_type);
1641         if (eth_type == ETHERTYPE_VLAN) {
1642                 /* BMV: We should handle nested VLAN tags too. */
1643                 evh = mtod(m, struct ether_vlan_header *);
1644                 eth_type = ntohs(evh->evl_proto);
1645                 offset = sizeof(struct ether_vlan_header);
1646         } else
1647                 offset = sizeof(struct ether_header);
1648
1649         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1650                 error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr);
1651         else
1652                 error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr);
1653
1654         return (error);
1655 }
1656
1657 static void
1658 vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
1659 {
1660         struct mbuf *m;
1661
1662         while (--nbufs > 0) {
1663                 m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
1664                 if (m == NULL)
1665                         break;
1666                 vtnet_rxq_discard_buf(rxq, m);
1667         }
1668 }
1669
1670 static void
1671 vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1672 {
1673         int error;
1674
1675         /*
1676          * Requeue the discarded mbuf. This should always be successful
1677          * since it was just dequeued.
1678          */
1679         error = vtnet_rxq_enqueue_buf(rxq, m);
1680         KASSERT(error == 0,
1681             ("%s: cannot requeue discarded mbuf %d", __func__, error));
1682 }
1683
1684 static int
1685 vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
1686 {
1687         struct vtnet_softc *sc;
1688         struct virtqueue *vq;
1689         struct mbuf *m, *m_tail;
1690         int len;
1691
1692         sc = rxq->vtnrx_sc;
1693         vq = rxq->vtnrx_vq;
1694         m_tail = m_head;
1695
1696         while (--nbufs > 0) {
1697                 m = virtqueue_dequeue(vq, &len);
1698                 if (m == NULL) {
1699                         rxq->vtnrx_stats.vrxs_ierrors++;
1700                         goto fail;
1701                 }
1702
1703                 if (vtnet_rxq_new_buf(rxq) != 0) {
1704                         rxq->vtnrx_stats.vrxs_iqdrops++;
1705                         vtnet_rxq_discard_buf(rxq, m);
1706                         if (nbufs > 1)
1707                                 vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1708                         goto fail;
1709                 }
1710
1711                 if (m->m_len < len)
1712                         len = m->m_len;
1713
1714                 m->m_len = len;
1715                 m->m_flags &= ~M_PKTHDR;
1716
1717                 m_head->m_pkthdr.len += len;
1718                 m_tail->m_next = m;
1719                 m_tail = m;
1720         }
1721
1722         return (0);
1723
1724 fail:
1725         sc->vtnet_stats.rx_mergeable_failed++;
1726         m_freem(m_head);
1727
1728         return (1);
1729 }
1730
1731 static void
1732 vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
1733     struct virtio_net_hdr *hdr)
1734 {
1735         struct vtnet_softc *sc;
1736         struct ifnet *ifp;
1737         struct ether_header *eh;
1738
1739         sc = rxq->vtnrx_sc;
1740         ifp = sc->vtnet_ifp;
1741
1742         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1743                 eh = mtod(m, struct ether_header *);
1744                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1745                         vtnet_vlan_tag_remove(m);
1746                         /*
1747                          * With the 802.1Q header removed, update the
1748                          * checksum starting location accordingly.
1749                          */
1750                         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1751                                 hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
1752                 }
1753         }
1754
1755         m->m_pkthdr.flowid = rxq->vtnrx_id;
1756         M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
1757
1758         /*
1759          * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum
1760          * distinction that Linux does. Need to reevaluate if performing
1761          * offloading for the NEEDS_CSUM case is really appropriate.
1762          */
1763         if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM |
1764             VIRTIO_NET_HDR_F_DATA_VALID)) {
1765                 if (vtnet_rxq_csum(rxq, m, hdr) == 0)
1766                         rxq->vtnrx_stats.vrxs_csum++;
1767                 else
1768                         rxq->vtnrx_stats.vrxs_csum_failed++;
1769         }
1770
1771         rxq->vtnrx_stats.vrxs_ipackets++;
1772         rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
1773
1774         VTNET_RXQ_UNLOCK(rxq);
1775         (*ifp->if_input)(ifp, m);
1776         VTNET_RXQ_LOCK(rxq);
1777 }
1778
1779 static int
1780 vtnet_rxq_eof(struct vtnet_rxq *rxq)
1781 {
1782         struct virtio_net_hdr lhdr, *hdr;
1783         struct vtnet_softc *sc;
1784         struct ifnet *ifp;
1785         struct virtqueue *vq;
1786         struct mbuf *m, *mr;
1787         struct virtio_net_hdr_mrg_rxbuf *mhdr;
1788         int len, deq, nbufs, adjsz, count;
1789         pfil_return_t pfil;
1790         bool pfil_done;
1791
1792         sc = rxq->vtnrx_sc;
1793         vq = rxq->vtnrx_vq;
1794         ifp = sc->vtnet_ifp;
1795         hdr = &lhdr;
1796         deq = 0;
1797         count = sc->vtnet_rx_process_limit;
1798
1799         VTNET_RXQ_LOCK_ASSERT(rxq);
1800
1801         while (count-- > 0) {
1802                 m = virtqueue_dequeue(vq, &len);
1803                 if (m == NULL)
1804                         break;
1805                 deq++;
1806
1807                 if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
1808                         rxq->vtnrx_stats.vrxs_ierrors++;
1809                         vtnet_rxq_discard_buf(rxq, m);
1810                         continue;
1811                 }
1812
1813                 if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1814                         nbufs = 1;
1815                         adjsz = sizeof(struct vtnet_rx_header);
1816                         /*
1817                          * Account for our pad inserted between the header
1818                          * and the actual start of the frame.
1819                          */
1820                         len += VTNET_RX_HEADER_PAD;
1821                 } else {
1822                         mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
1823                         nbufs = mhdr->num_buffers;
1824                         adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1825                 }
1826
1827                 /*
1828                  * If we have enough data in first mbuf, run it through
1829                  * pfil as a memory buffer before dequeueing the rest.
1830                  */
1831                 if (PFIL_HOOKED_IN(sc->vtnet_pfil) &&
1832                     len - adjsz >= ETHER_HDR_LEN + max_protohdr) {
1833                         pfil = pfil_run_hooks(sc->vtnet_pfil,
1834                             m->m_data + adjsz, ifp,
1835                             (len - adjsz) | PFIL_MEMPTR | PFIL_IN, NULL);
1836                         switch (pfil) {
1837                         case PFIL_REALLOCED:
1838                                 mr = pfil_mem2mbuf(m->m_data + adjsz);
1839                                 vtnet_rxq_input(rxq, mr, hdr);
1840                                 /* FALLTHROUGH */
1841                         case PFIL_DROPPED:
1842                         case PFIL_CONSUMED:
1843                                 vtnet_rxq_discard_buf(rxq, m);
1844                                 if (nbufs > 1)
1845                                         vtnet_rxq_discard_merged_bufs(rxq,
1846                                             nbufs);
1847                                 continue;
1848                         default:
1849                                 KASSERT(pfil == PFIL_PASS,
1850                                     ("Filter returned %d!\n", pfil));
1851                         };
1852                         pfil_done = true;
1853                 } else
1854                         pfil_done = false;
1855
1856                 if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
1857                         rxq->vtnrx_stats.vrxs_iqdrops++;
1858                         vtnet_rxq_discard_buf(rxq, m);
1859                         if (nbufs > 1)
1860                                 vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1861                         continue;
1862                 }
1863
1864                 m->m_pkthdr.len = len;
1865                 m->m_pkthdr.rcvif = ifp;
1866                 m->m_pkthdr.csum_flags = 0;
1867
1868                 if (nbufs > 1) {
1869                         /* Dequeue the rest of chain. */
1870                         if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
1871                                 continue;
1872                 }
1873
1874                 /*
1875                  * Save copy of header before we strip it. For both mergeable
1876                  * and non-mergeable, the header is at the beginning of the
1877                  * mbuf data. We no longer need num_buffers, so always use a
1878                  * regular header.
1879                  *
1880                  * BMV: Is this memcpy() expensive? We know the mbuf data is
1881                  * still valid even after the m_adj().
1882                  */
1883                 memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr));
1884                 m_adj(m, adjsz);
1885
1886                 if (PFIL_HOOKED_IN(sc->vtnet_pfil) && pfil_done == false) {
1887                         pfil = pfil_run_hooks(sc->vtnet_pfil, &m, ifp, PFIL_IN,
1888                             NULL);
1889                         switch (pfil) {
1890                         case PFIL_DROPPED:
1891                         case PFIL_CONSUMED:
1892                                 continue;
1893                         default:
1894                                 KASSERT(pfil == PFIL_PASS,
1895                                     ("Filter returned %d!\n", pfil));
1896                         }
1897                 }
1898
1899                 vtnet_rxq_input(rxq, m, hdr);
1900
1901                 /* Must recheck after dropping the Rx lock. */
1902                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1903                         break;
1904         }
1905
1906         if (deq > 0)
1907                 virtqueue_notify(vq);
1908
1909         return (count > 0 ? 0 : EAGAIN);
1910 }
1911
1912 static void
1913 vtnet_rx_vq_intr(void *xrxq)
1914 {
1915         struct vtnet_softc *sc;
1916         struct vtnet_rxq *rxq;
1917         struct ifnet *ifp;
1918         int tries, more;
1919
1920         rxq = xrxq;
1921         sc = rxq->vtnrx_sc;
1922         ifp = sc->vtnet_ifp;
1923         tries = 0;
1924
1925         if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
1926                 /*
1927                  * Ignore this interrupt. Either this is a spurious interrupt
1928                  * or multiqueue without per-VQ MSIX so every queue needs to
1929                  * be polled (a brain dead configuration we could try harder
1930                  * to avoid).
1931                  */
1932                 vtnet_rxq_disable_intr(rxq);
1933                 return;
1934         }
1935
1936 #ifdef DEV_NETMAP
1937         if (netmap_rx_irq(ifp, rxq->vtnrx_id, &more) != NM_IRQ_PASS)
1938                 return;
1939 #endif /* DEV_NETMAP */
1940
1941         VTNET_RXQ_LOCK(rxq);
1942
1943 again:
1944         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1945                 VTNET_RXQ_UNLOCK(rxq);
1946                 return;
1947         }
1948
1949         more = vtnet_rxq_eof(rxq);
1950         if (more || vtnet_rxq_enable_intr(rxq) != 0) {
1951                 if (!more)
1952                         vtnet_rxq_disable_intr(rxq);
1953                 /*
1954                  * This is an occasional condition or race (when !more),
1955                  * so retry a few times before scheduling the taskqueue.
1956                  */
1957                 if (tries++ < VTNET_INTR_DISABLE_RETRIES)
1958                         goto again;
1959
1960                 VTNET_RXQ_UNLOCK(rxq);
1961                 rxq->vtnrx_stats.vrxs_rescheduled++;
1962                 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
1963         } else
1964                 VTNET_RXQ_UNLOCK(rxq);
1965 }
1966
1967 static void
1968 vtnet_rxq_tq_intr(void *xrxq, int pending)
1969 {
1970         struct vtnet_softc *sc;
1971         struct vtnet_rxq *rxq;
1972         struct ifnet *ifp;
1973         int more;
1974
1975         rxq = xrxq;
1976         sc = rxq->vtnrx_sc;
1977         ifp = sc->vtnet_ifp;
1978
1979         VTNET_RXQ_LOCK(rxq);
1980
1981         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1982                 VTNET_RXQ_UNLOCK(rxq);
1983                 return;
1984         }
1985
1986         more = vtnet_rxq_eof(rxq);
1987         if (more || vtnet_rxq_enable_intr(rxq) != 0) {
1988                 if (!more)
1989                         vtnet_rxq_disable_intr(rxq);
1990                 rxq->vtnrx_stats.vrxs_rescheduled++;
1991                 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
1992         }
1993
1994         VTNET_RXQ_UNLOCK(rxq);
1995 }
1996
1997 static int
1998 vtnet_txq_below_threshold(struct vtnet_txq *txq)
1999 {
2000         struct vtnet_softc *sc;
2001         struct virtqueue *vq;
2002
2003         sc = txq->vtntx_sc;
2004         vq = txq->vtntx_vq;
2005
2006         return (virtqueue_nfree(vq) <= sc->vtnet_tx_intr_thresh);
2007 }
2008
2009 static int
2010 vtnet_txq_notify(struct vtnet_txq *txq)
2011 {
2012         struct virtqueue *vq;
2013
2014         vq = txq->vtntx_vq;
2015
2016         txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
2017         virtqueue_notify(vq);
2018
2019         if (vtnet_txq_enable_intr(txq) == 0)
2020                 return (0);
2021
2022         /*
2023          * Drain frames that were completed since last checked. If this
2024          * causes the queue to go above the threshold, the caller should
2025          * continue transmitting.
2026          */
2027         if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
2028                 virtqueue_disable_intr(vq);
2029                 return (1);
2030         }
2031
2032         return (0);
2033 }
2034
2035 static void
2036 vtnet_txq_free_mbufs(struct vtnet_txq *txq)
2037 {
2038         struct virtqueue *vq;
2039         struct vtnet_tx_header *txhdr;
2040         int last;
2041 #ifdef DEV_NETMAP
2042         int netmap_bufs = vtnet_netmap_queue_on(txq->vtntx_sc, NR_TX,
2043                                                 txq->vtntx_id);
2044 #else  /* !DEV_NETMAP */
2045         int netmap_bufs = 0;
2046 #endif /* !DEV_NETMAP */
2047
2048         vq = txq->vtntx_vq;
2049         last = 0;
2050
2051         while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
2052                 if (!netmap_bufs) {
2053                         m_freem(txhdr->vth_mbuf);
2054                         uma_zfree(vtnet_tx_header_zone, txhdr);
2055                 }
2056         }
2057
2058         KASSERT(virtqueue_empty(vq),
2059             ("%s: mbufs remaining in tx queue %p", __func__, txq));
2060 }
2061
2062 /*
2063  * BMV: Much of this can go away once we finally have offsets in
2064  * the mbuf packet header. Bug andre@.
2065  */
2066 static int
2067 vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m,
2068     int *etype, int *proto, int *start)
2069 {
2070         struct vtnet_softc *sc;
2071         struct ether_vlan_header *evh;
2072         int offset;
2073
2074         sc = txq->vtntx_sc;
2075
2076         evh = mtod(m, struct ether_vlan_header *);
2077         if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2078                 /* BMV: We should handle nested VLAN tags too. */
2079                 *etype = ntohs(evh->evl_proto);
2080                 offset = sizeof(struct ether_vlan_header);
2081         } else {
2082                 *etype = ntohs(evh->evl_encap_proto);
2083                 offset = sizeof(struct ether_header);
2084         }
2085
2086         switch (*etype) {
2087 #if defined(INET)
2088         case ETHERTYPE_IP: {
2089                 struct ip *ip, iphdr;
2090                 if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2091                         m_copydata(m, offset, sizeof(struct ip),
2092                             (caddr_t) &iphdr);
2093                         ip = &iphdr;
2094                 } else
2095                         ip = (struct ip *)(m->m_data + offset);
2096                 *proto = ip->ip_p;
2097                 *start = offset + (ip->ip_hl << 2);
2098                 break;
2099         }
2100 #endif
2101 #if defined(INET6)
2102         case ETHERTYPE_IPV6:
2103                 *proto = -1;
2104                 *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2105                 /* Assert the network stack sent us a valid packet. */
2106                 KASSERT(*start > offset,
2107                     ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2108                     *start, offset, *proto));
2109                 break;
2110 #endif
2111         default:
2112                 sc->vtnet_stats.tx_csum_bad_ethtype++;
2113                 return (EINVAL);
2114         }
2115
2116         return (0);
2117 }
2118
2119 static int
2120 vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
2121     int offset, struct virtio_net_hdr *hdr)
2122 {
2123         static struct timeval lastecn;
2124         static int curecn;
2125         struct vtnet_softc *sc;
2126         struct tcphdr *tcp, tcphdr;
2127
2128         sc = txq->vtntx_sc;
2129
2130         if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
2131                 m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
2132                 tcp = &tcphdr;
2133         } else
2134                 tcp = (struct tcphdr *)(m->m_data + offset);
2135
2136         hdr->hdr_len = offset + (tcp->th_off << 2);
2137         hdr->gso_size = m->m_pkthdr.tso_segsz;
2138         hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
2139             VIRTIO_NET_HDR_GSO_TCPV6;
2140
2141         if (tcp->th_flags & TH_CWR) {
2142                 /*
2143                  * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD,
2144                  * ECN support is not on a per-interface basis, but globally via
2145                  * the net.inet.tcp.ecn.enable sysctl knob. The default is off.
2146                  */
2147                 if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
2148                         if (ppsratecheck(&lastecn, &curecn, 1))
2149                                 if_printf(sc->vtnet_ifp,
2150                                     "TSO with ECN not negotiated with host\n");
2151                         return (ENOTSUP);
2152                 }
2153                 hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
2154         }
2155
2156         txq->vtntx_stats.vtxs_tso++;
2157
2158         return (0);
2159 }
2160
2161 static struct mbuf *
2162 vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
2163     struct virtio_net_hdr *hdr)
2164 {
2165         struct vtnet_softc *sc;
2166         int flags, etype, csum_start, proto, error;
2167
2168         sc = txq->vtntx_sc;
2169         flags = m->m_pkthdr.csum_flags;
2170
2171         error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
2172         if (error)
2173                 goto drop;
2174
2175         if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) ||
2176             (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) {
2177                 /*
2178                  * We could compare the IP protocol vs the CSUM_ flag too,
2179                  * but that really should not be necessary.
2180                  */
2181                 hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
2182                 hdr->csum_start = csum_start;
2183                 hdr->csum_offset = m->m_pkthdr.csum_data;
2184                 txq->vtntx_stats.vtxs_csum++;
2185         }
2186
2187         if (flags & CSUM_TSO) {
2188                 if (__predict_false(proto != IPPROTO_TCP)) {
2189                         /* Likely failed to correctly parse the mbuf. */
2190                         sc->vtnet_stats.tx_tso_not_tcp++;
2191                         goto drop;
2192                 }
2193
2194                 KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM,
2195                     ("%s: mbuf %p TSO without checksum offload %#x",
2196                     __func__, m, flags));
2197
2198                 error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
2199                 if (error)
2200                         goto drop;
2201         }
2202
2203         return (m);
2204
2205 drop:
2206         m_freem(m);
2207         return (NULL);
2208 }
2209
2210 static int
2211 vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
2212     struct vtnet_tx_header *txhdr)
2213 {
2214         struct vtnet_softc *sc;
2215         struct virtqueue *vq;
2216         struct sglist *sg;
2217         struct mbuf *m;
2218         int error;
2219
2220         sc = txq->vtntx_sc;
2221         vq = txq->vtntx_vq;
2222         sg = txq->vtntx_sg;
2223         m = *m_head;
2224
2225         sglist_reset(sg);
2226         error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
2227         KASSERT(error == 0 && sg->sg_nseg == 1,
2228             ("%s: error %d adding header to sglist", __func__, error));
2229
2230         error = sglist_append_mbuf(sg, m);
2231         if (error) {
2232                 m = m_defrag(m, M_NOWAIT);
2233                 if (m == NULL)
2234                         goto fail;
2235
2236                 *m_head = m;
2237                 sc->vtnet_stats.tx_defragged++;
2238
2239                 error = sglist_append_mbuf(sg, m);
2240                 if (error)
2241                         goto fail;
2242         }
2243
2244         txhdr->vth_mbuf = m;
2245         error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0);
2246
2247         return (error);
2248
2249 fail:
2250         sc->vtnet_stats.tx_defrag_failed++;
2251         m_freem(*m_head);
2252         *m_head = NULL;
2253
2254         return (ENOBUFS);
2255 }
2256
2257 static int
2258 vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags)
2259 {
2260         struct vtnet_tx_header *txhdr;
2261         struct virtio_net_hdr *hdr;
2262         struct mbuf *m;
2263         int error;
2264
2265         m = *m_head;
2266         M_ASSERTPKTHDR(m);
2267
2268         txhdr = uma_zalloc(vtnet_tx_header_zone, flags | M_ZERO);
2269         if (txhdr == NULL) {
2270                 m_freem(m);
2271                 *m_head = NULL;
2272                 return (ENOMEM);
2273         }
2274
2275         /*
2276          * Always use the non-mergeable header, regardless if the feature
2277          * was negotiated. For transmit, num_buffers is always zero. The
2278          * vtnet_hdr_size is used to enqueue the correct header size.
2279          */
2280         hdr = &txhdr->vth_uhdr.hdr;
2281
2282         if (m->m_flags & M_VLANTAG) {
2283                 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
2284                 if ((*m_head = m) == NULL) {
2285                         error = ENOBUFS;
2286                         goto fail;
2287                 }
2288                 m->m_flags &= ~M_VLANTAG;
2289         }
2290
2291         if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
2292                 m = vtnet_txq_offload(txq, m, hdr);
2293                 if ((*m_head = m) == NULL) {
2294                         error = ENOBUFS;
2295                         goto fail;
2296                 }
2297         }
2298
2299         error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
2300         if (error == 0)
2301                 return (0);
2302
2303 fail:
2304         uma_zfree(vtnet_tx_header_zone, txhdr);
2305
2306         return (error);
2307 }
2308
2309 #ifdef VTNET_LEGACY_TX
2310
2311 static void
2312 vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
2313 {
2314         struct vtnet_softc *sc;
2315         struct virtqueue *vq;
2316         struct mbuf *m0;
2317         int tries, enq;
2318
2319         sc = txq->vtntx_sc;
2320         vq = txq->vtntx_vq;
2321         tries = 0;
2322
2323         VTNET_TXQ_LOCK_ASSERT(txq);
2324
2325         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2326             sc->vtnet_link_active == 0)
2327                 return;
2328
2329         vtnet_txq_eof(txq);
2330
2331 again:
2332         enq = 0;
2333
2334         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
2335                 if (virtqueue_full(vq))
2336                         break;
2337
2338                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m0);
2339                 if (m0 == NULL)
2340                         break;
2341
2342                 if (vtnet_txq_encap(txq, &m0, M_NOWAIT) != 0) {
2343                         if (m0 != NULL)
2344                                 IFQ_DRV_PREPEND(&ifp->if_snd, m0);
2345                         break;
2346                 }
2347
2348                 enq++;
2349                 ETHER_BPF_MTAP(ifp, m0);
2350         }
2351
2352         if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2353                 if (tries++ < VTNET_NOTIFY_RETRIES)
2354                         goto again;
2355
2356                 txq->vtntx_stats.vtxs_rescheduled++;
2357                 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2358         }
2359 }
2360
2361 static void
2362 vtnet_start(struct ifnet *ifp)
2363 {
2364         struct vtnet_softc *sc;
2365         struct vtnet_txq *txq;
2366
2367         sc = ifp->if_softc;
2368         txq = &sc->vtnet_txqs[0];
2369
2370         VTNET_TXQ_LOCK(txq);
2371         vtnet_start_locked(txq, ifp);
2372         VTNET_TXQ_UNLOCK(txq);
2373 }
2374
2375 #else /* !VTNET_LEGACY_TX */
2376
2377 static int
2378 vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
2379 {
2380         struct vtnet_softc *sc;
2381         struct virtqueue *vq;
2382         struct buf_ring *br;
2383         struct ifnet *ifp;
2384         int enq, tries, error;
2385
2386         sc = txq->vtntx_sc;
2387         vq = txq->vtntx_vq;
2388         br = txq->vtntx_br;
2389         ifp = sc->vtnet_ifp;
2390         tries = 0;
2391         error = 0;
2392
2393         VTNET_TXQ_LOCK_ASSERT(txq);
2394
2395         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2396             sc->vtnet_link_active == 0) {
2397                 if (m != NULL)
2398                         error = drbr_enqueue(ifp, br, m);
2399                 return (error);
2400         }
2401
2402         if (m != NULL) {
2403                 error = drbr_enqueue(ifp, br, m);
2404                 if (error)
2405                         return (error);
2406         }
2407
2408         vtnet_txq_eof(txq);
2409
2410 again:
2411         enq = 0;
2412
2413         while ((m = drbr_peek(ifp, br)) != NULL) {
2414                 if (virtqueue_full(vq)) {
2415                         drbr_putback(ifp, br, m);
2416                         break;
2417                 }
2418
2419                 if (vtnet_txq_encap(txq, &m, M_NOWAIT) != 0) {
2420                         if (m != NULL)
2421                                 drbr_putback(ifp, br, m);
2422                         else
2423                                 drbr_advance(ifp, br);
2424                         break;
2425                 }
2426                 drbr_advance(ifp, br);
2427
2428                 enq++;
2429                 ETHER_BPF_MTAP(ifp, m);
2430         }
2431
2432         if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2433                 if (tries++ < VTNET_NOTIFY_RETRIES)
2434                         goto again;
2435
2436                 txq->vtntx_stats.vtxs_rescheduled++;
2437                 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2438         }
2439
2440         return (0);
2441 }
2442
2443 static int
2444 vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
2445 {
2446         struct vtnet_softc *sc;
2447         struct vtnet_txq *txq;
2448         int i, npairs, error;
2449
2450         sc = ifp->if_softc;
2451         npairs = sc->vtnet_act_vq_pairs;
2452
2453         /* check if flowid is set */
2454         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
2455                 i = m->m_pkthdr.flowid % npairs;
2456         else
2457                 i = curcpu % npairs;
2458
2459         txq = &sc->vtnet_txqs[i];
2460
2461         if (VTNET_TXQ_TRYLOCK(txq) != 0) {
2462                 error = vtnet_txq_mq_start_locked(txq, m);
2463                 VTNET_TXQ_UNLOCK(txq);
2464         } else {
2465                 error = drbr_enqueue(ifp, txq->vtntx_br, m);
2466                 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
2467         }
2468
2469         return (error);
2470 }
2471
2472 static void
2473 vtnet_txq_tq_deferred(void *xtxq, int pending)
2474 {
2475         struct vtnet_softc *sc;
2476         struct vtnet_txq *txq;
2477
2478         txq = xtxq;
2479         sc = txq->vtntx_sc;
2480
2481         VTNET_TXQ_LOCK(txq);
2482         if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
2483                 vtnet_txq_mq_start_locked(txq, NULL);
2484         VTNET_TXQ_UNLOCK(txq);
2485 }
2486
2487 #endif /* VTNET_LEGACY_TX */
2488
2489 static void
2490 vtnet_txq_start(struct vtnet_txq *txq)
2491 {
2492         struct vtnet_softc *sc;
2493         struct ifnet *ifp;
2494
2495         sc = txq->vtntx_sc;
2496         ifp = sc->vtnet_ifp;
2497
2498 #ifdef VTNET_LEGACY_TX
2499         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2500                 vtnet_start_locked(txq, ifp);
2501 #else
2502         if (!drbr_empty(ifp, txq->vtntx_br))
2503                 vtnet_txq_mq_start_locked(txq, NULL);
2504 #endif
2505 }
2506
2507 static void
2508 vtnet_txq_tq_intr(void *xtxq, int pending)
2509 {
2510         struct vtnet_softc *sc;
2511         struct vtnet_txq *txq;
2512         struct ifnet *ifp;
2513
2514         txq = xtxq;
2515         sc = txq->vtntx_sc;
2516         ifp = sc->vtnet_ifp;
2517
2518         VTNET_TXQ_LOCK(txq);
2519
2520         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2521                 VTNET_TXQ_UNLOCK(txq);
2522                 return;
2523         }
2524
2525         vtnet_txq_eof(txq);
2526         vtnet_txq_start(txq);
2527
2528         VTNET_TXQ_UNLOCK(txq);
2529 }
2530
2531 static int
2532 vtnet_txq_eof(struct vtnet_txq *txq)
2533 {
2534         struct virtqueue *vq;
2535         struct vtnet_tx_header *txhdr;
2536         struct mbuf *m;
2537         int deq;
2538
2539         vq = txq->vtntx_vq;
2540         deq = 0;
2541         VTNET_TXQ_LOCK_ASSERT(txq);
2542
2543         while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
2544                 m = txhdr->vth_mbuf;
2545                 deq++;
2546
2547                 txq->vtntx_stats.vtxs_opackets++;
2548                 txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
2549                 if (m->m_flags & M_MCAST)
2550                         txq->vtntx_stats.vtxs_omcasts++;
2551
2552                 m_freem(m);
2553                 uma_zfree(vtnet_tx_header_zone, txhdr);
2554         }
2555
2556         if (virtqueue_empty(vq))
2557                 txq->vtntx_watchdog = 0;
2558
2559         return (deq);
2560 }
2561
2562 static void
2563 vtnet_tx_vq_intr(void *xtxq)
2564 {
2565         struct vtnet_softc *sc;
2566         struct vtnet_txq *txq;
2567         struct ifnet *ifp;
2568
2569         txq = xtxq;
2570         sc = txq->vtntx_sc;
2571         ifp = sc->vtnet_ifp;
2572
2573         if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
2574                 /*
2575                  * Ignore this interrupt. Either this is a spurious interrupt
2576                  * or multiqueue without per-VQ MSIX so every queue needs to
2577                  * be polled (a brain dead configuration we could try harder
2578                  * to avoid).
2579                  */
2580                 vtnet_txq_disable_intr(txq);
2581                 return;
2582         }
2583
2584 #ifdef DEV_NETMAP
2585         if (netmap_tx_irq(ifp, txq->vtntx_id) != NM_IRQ_PASS)
2586                 return;
2587 #endif /* DEV_NETMAP */
2588
2589         VTNET_TXQ_LOCK(txq);
2590
2591         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2592                 VTNET_TXQ_UNLOCK(txq);
2593                 return;
2594         }
2595
2596         vtnet_txq_eof(txq);
2597         vtnet_txq_start(txq);
2598
2599         VTNET_TXQ_UNLOCK(txq);
2600 }
2601
2602 static void
2603 vtnet_tx_start_all(struct vtnet_softc *sc)
2604 {
2605         struct vtnet_txq *txq;
2606         int i;
2607
2608         VTNET_CORE_LOCK_ASSERT(sc);
2609
2610         for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2611                 txq = &sc->vtnet_txqs[i];
2612
2613                 VTNET_TXQ_LOCK(txq);
2614                 vtnet_txq_start(txq);
2615                 VTNET_TXQ_UNLOCK(txq);
2616         }
2617 }
2618
2619 #ifndef VTNET_LEGACY_TX
2620 static void
2621 vtnet_qflush(struct ifnet *ifp)
2622 {
2623         struct vtnet_softc *sc;
2624         struct vtnet_txq *txq;
2625         struct mbuf *m;
2626         int i;
2627
2628         sc = ifp->if_softc;
2629
2630         for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2631                 txq = &sc->vtnet_txqs[i];
2632
2633                 VTNET_TXQ_LOCK(txq);
2634                 while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
2635                         m_freem(m);
2636                 VTNET_TXQ_UNLOCK(txq);
2637         }
2638
2639         if_qflush(ifp);
2640 }
2641 #endif
2642
2643 static int
2644 vtnet_watchdog(struct vtnet_txq *txq)
2645 {
2646         struct ifnet *ifp;
2647
2648         ifp = txq->vtntx_sc->vtnet_ifp;
2649
2650         VTNET_TXQ_LOCK(txq);
2651         if (txq->vtntx_watchdog == 1) {
2652                 /*
2653                  * Only drain completed frames if the watchdog is about to
2654                  * expire. If any frames were drained, there may be enough
2655                  * free descriptors now available to transmit queued frames.
2656                  * In that case, the timer will immediately be decremented
2657                  * below, but the timeout is generous enough that should not
2658                  * be a problem.
2659                  */
2660                 if (vtnet_txq_eof(txq) != 0)
2661                         vtnet_txq_start(txq);
2662         }
2663
2664         if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
2665                 VTNET_TXQ_UNLOCK(txq);
2666                 return (0);
2667         }
2668         VTNET_TXQ_UNLOCK(txq);
2669
2670         if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
2671         return (1);
2672 }
2673
2674 static void
2675 vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc,
2676     struct vtnet_txq_stats *txacc)
2677 {
2678
2679         bzero(rxacc, sizeof(struct vtnet_rxq_stats));
2680         bzero(txacc, sizeof(struct vtnet_txq_stats));
2681
2682         for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2683                 struct vtnet_rxq_stats *rxst;
2684                 struct vtnet_txq_stats *txst;
2685
2686                 rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
2687                 rxacc->vrxs_ipackets += rxst->vrxs_ipackets;
2688                 rxacc->vrxs_ibytes += rxst->vrxs_ibytes;
2689                 rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops;
2690                 rxacc->vrxs_csum += rxst->vrxs_csum;
2691                 rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed;
2692                 rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled;
2693
2694                 txst = &sc->vtnet_txqs[i].vtntx_stats;
2695                 txacc->vtxs_opackets += txst->vtxs_opackets;
2696                 txacc->vtxs_obytes += txst->vtxs_obytes;
2697                 txacc->vtxs_csum += txst->vtxs_csum;
2698                 txacc->vtxs_tso += txst->vtxs_tso;
2699                 txacc->vtxs_rescheduled += txst->vtxs_rescheduled;
2700         }
2701 }
2702
2703 static uint64_t
2704 vtnet_get_counter(if_t ifp, ift_counter cnt)
2705 {
2706         struct vtnet_softc *sc;
2707         struct vtnet_rxq_stats rxaccum;
2708         struct vtnet_txq_stats txaccum;
2709
2710         sc = if_getsoftc(ifp);
2711         vtnet_accum_stats(sc, &rxaccum, &txaccum);
2712
2713         switch (cnt) {
2714         case IFCOUNTER_IPACKETS:
2715                 return (rxaccum.vrxs_ipackets);
2716         case IFCOUNTER_IQDROPS:
2717                 return (rxaccum.vrxs_iqdrops);
2718         case IFCOUNTER_IERRORS:
2719                 return (rxaccum.vrxs_ierrors);
2720         case IFCOUNTER_OPACKETS:
2721                 return (txaccum.vtxs_opackets);
2722 #ifndef VTNET_LEGACY_TX
2723         case IFCOUNTER_OBYTES:
2724                 return (txaccum.vtxs_obytes);
2725         case IFCOUNTER_OMCASTS:
2726                 return (txaccum.vtxs_omcasts);
2727 #endif
2728         default:
2729                 return (if_get_counter_default(ifp, cnt));
2730         }
2731 }
2732
2733 static void
2734 vtnet_tick(void *xsc)
2735 {
2736         struct vtnet_softc *sc;
2737         struct ifnet *ifp;
2738         int i, timedout;
2739
2740         sc = xsc;
2741         ifp = sc->vtnet_ifp;
2742         timedout = 0;
2743
2744         VTNET_CORE_LOCK_ASSERT(sc);
2745
2746         for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
2747                 timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
2748
2749         if (timedout != 0) {
2750                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2751                 vtnet_init_locked(sc);
2752         } else
2753                 callout_schedule(&sc->vtnet_tick_ch, hz);
2754 }
2755
2756 static void
2757 vtnet_start_taskqueues(struct vtnet_softc *sc)
2758 {
2759         device_t dev;
2760         struct vtnet_rxq *rxq;
2761         struct vtnet_txq *txq;
2762         int i, error;
2763
2764         dev = sc->vtnet_dev;
2765
2766         /*
2767          * Errors here are very difficult to recover from - we cannot
2768          * easily fail because, if this is during boot, we will hang
2769          * when freeing any successfully started taskqueues because
2770          * the scheduler isn't up yet.
2771          *
2772          * Most drivers just ignore the return value - it only fails
2773          * with ENOMEM so an error is not likely.
2774          */
2775         for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2776                 rxq = &sc->vtnet_rxqs[i];
2777                 error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
2778                     "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
2779                 if (error) {
2780                         device_printf(dev, "failed to start rx taskq %d\n",
2781                             rxq->vtnrx_id);
2782                 }
2783
2784                 txq = &sc->vtnet_txqs[i];
2785                 error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
2786                     "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
2787                 if (error) {
2788                         device_printf(dev, "failed to start tx taskq %d\n",
2789                             txq->vtntx_id);
2790                 }
2791         }
2792 }
2793
2794 static void
2795 vtnet_free_taskqueues(struct vtnet_softc *sc)
2796 {
2797         struct vtnet_rxq *rxq;
2798         struct vtnet_txq *txq;
2799         int i;
2800
2801         for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2802                 rxq = &sc->vtnet_rxqs[i];
2803                 if (rxq->vtnrx_tq != NULL) {
2804                         taskqueue_free(rxq->vtnrx_tq);
2805                         rxq->vtnrx_tq = NULL;
2806                 }
2807
2808                 txq = &sc->vtnet_txqs[i];
2809                 if (txq->vtntx_tq != NULL) {
2810                         taskqueue_free(txq->vtntx_tq);
2811                         txq->vtntx_tq = NULL;
2812                 }
2813         }
2814 }
2815
2816 static void
2817 vtnet_drain_taskqueues(struct vtnet_softc *sc)
2818 {
2819         struct vtnet_rxq *rxq;
2820         struct vtnet_txq *txq;
2821         int i;
2822
2823         for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2824                 rxq = &sc->vtnet_rxqs[i];
2825                 if (rxq->vtnrx_tq != NULL)
2826                         taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2827
2828                 txq = &sc->vtnet_txqs[i];
2829                 if (txq->vtntx_tq != NULL) {
2830                         taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
2831 #ifndef VTNET_LEGACY_TX
2832                         taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
2833 #endif
2834                 }
2835         }
2836 }
2837
2838 static void
2839 vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
2840 {
2841         struct vtnet_rxq *rxq;
2842         struct vtnet_txq *txq;
2843         int i;
2844
2845         for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2846                 rxq = &sc->vtnet_rxqs[i];
2847                 vtnet_rxq_free_mbufs(rxq);
2848
2849                 txq = &sc->vtnet_txqs[i];
2850                 vtnet_txq_free_mbufs(txq);
2851         }
2852 }
2853
2854 static void
2855 vtnet_stop_rendezvous(struct vtnet_softc *sc)
2856 {
2857         struct vtnet_rxq *rxq;
2858         struct vtnet_txq *txq;
2859         int i;
2860
2861         /*
2862          * Lock and unlock the per-queue mutex so we known the stop
2863          * state is visible. Doing only the active queues should be
2864          * sufficient, but it does not cost much extra to do all the
2865          * queues. Note we hold the core mutex here too.
2866          */
2867         for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2868                 rxq = &sc->vtnet_rxqs[i];
2869                 VTNET_RXQ_LOCK(rxq);
2870                 VTNET_RXQ_UNLOCK(rxq);
2871
2872                 txq = &sc->vtnet_txqs[i];
2873                 VTNET_TXQ_LOCK(txq);
2874                 VTNET_TXQ_UNLOCK(txq);
2875         }
2876 }
2877
2878 static void
2879 vtnet_stop(struct vtnet_softc *sc)
2880 {
2881         device_t dev;
2882         struct ifnet *ifp;
2883
2884         dev = sc->vtnet_dev;
2885         ifp = sc->vtnet_ifp;
2886
2887         VTNET_CORE_LOCK_ASSERT(sc);
2888
2889         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2890         sc->vtnet_link_active = 0;
2891         callout_stop(&sc->vtnet_tick_ch);
2892
2893         /* Only advisory. */
2894         vtnet_disable_interrupts(sc);
2895
2896         /*
2897          * Stop the host adapter. This resets it to the pre-initialized
2898          * state. It will not generate any interrupts until after it is
2899          * reinitialized.
2900          */
2901         virtio_stop(dev);
2902         vtnet_stop_rendezvous(sc);
2903
2904         /* Free any mbufs left in the virtqueues. */
2905         vtnet_drain_rxtx_queues(sc);
2906 }
2907
2908 static int
2909 vtnet_virtio_reinit(struct vtnet_softc *sc)
2910 {
2911         device_t dev;
2912         struct ifnet *ifp;
2913         uint64_t features;
2914         int mask, error;
2915
2916         dev = sc->vtnet_dev;
2917         ifp = sc->vtnet_ifp;
2918         features = sc->vtnet_features;
2919
2920         mask = 0;
2921 #if defined(INET)
2922         mask |= IFCAP_RXCSUM;
2923 #endif
2924 #if defined (INET6)
2925         mask |= IFCAP_RXCSUM_IPV6;
2926 #endif
2927
2928         /*
2929          * Re-negotiate with the host, removing any disabled receive
2930          * features. Transmit features are disabled only on our side
2931          * via if_capenable and if_hwassist.
2932          */
2933
2934         if (ifp->if_capabilities & mask) {
2935                 /*
2936                  * We require both IPv4 and IPv6 offloading to be enabled
2937                  * in order to negotiated it: VirtIO does not distinguish
2938                  * between the two.
2939                  */
2940                 if ((ifp->if_capenable & mask) != mask)
2941                         features &= ~VIRTIO_NET_F_GUEST_CSUM;
2942         }
2943
2944         if (ifp->if_capabilities & IFCAP_LRO) {
2945                 if ((ifp->if_capenable & IFCAP_LRO) == 0)
2946                         features &= ~VTNET_LRO_FEATURES;
2947         }
2948
2949         if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) {
2950                 if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
2951                         features &= ~VIRTIO_NET_F_CTRL_VLAN;
2952         }
2953
2954         error = virtio_reinit(dev, features);
2955         if (error)
2956                 device_printf(dev, "virtio reinit error %d\n", error);
2957
2958         return (error);
2959 }
2960
2961 static void
2962 vtnet_init_rx_filters(struct vtnet_softc *sc)
2963 {
2964         struct ifnet *ifp;
2965
2966         ifp = sc->vtnet_ifp;
2967
2968         if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
2969                 /* Restore promiscuous and all-multicast modes. */
2970                 vtnet_rx_filter(sc);
2971                 /* Restore filtered MAC addresses. */
2972                 vtnet_rx_filter_mac(sc);
2973         }
2974
2975         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
2976                 vtnet_rx_filter_vlan(sc);
2977 }
2978
2979 static int
2980 vtnet_init_rx_queues(struct vtnet_softc *sc)
2981 {
2982         device_t dev;
2983         struct vtnet_rxq *rxq;
2984         int i, clsize, error;
2985
2986         dev = sc->vtnet_dev;
2987
2988         /*
2989          * Use the new cluster size if one has been set (via a MTU
2990          * change). Otherwise, use the standard 2K clusters.
2991          *
2992          * BMV: It might make sense to use page sized clusters as
2993          * the default (depending on the features negotiated).
2994          */
2995         if (sc->vtnet_rx_new_clsize != 0) {
2996                 clsize = sc->vtnet_rx_new_clsize;
2997                 sc->vtnet_rx_new_clsize = 0;
2998         } else
2999                 clsize = MCLBYTES;
3000
3001         sc->vtnet_rx_clsize = clsize;
3002         sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize);
3003
3004         KASSERT(sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS ||
3005             sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs,
3006             ("%s: too many rx mbufs %d for %d segments", __func__,
3007             sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
3008
3009         for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
3010                 rxq = &sc->vtnet_rxqs[i];
3011
3012                 /* Hold the lock to satisfy asserts. */
3013                 VTNET_RXQ_LOCK(rxq);
3014                 error = vtnet_rxq_populate(rxq);
3015                 VTNET_RXQ_UNLOCK(rxq);
3016
3017                 if (error) {
3018                         device_printf(dev,
3019                             "cannot allocate mbufs for Rx queue %d\n", i);
3020                         return (error);
3021                 }
3022         }
3023
3024         return (0);
3025 }
3026
3027 static int
3028 vtnet_init_tx_queues(struct vtnet_softc *sc)
3029 {
3030         struct vtnet_txq *txq;
3031         int i;
3032
3033         for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
3034                 txq = &sc->vtnet_txqs[i];
3035                 txq->vtntx_watchdog = 0;
3036         }
3037
3038         return (0);
3039 }
3040
3041 static int
3042 vtnet_init_rxtx_queues(struct vtnet_softc *sc)
3043 {
3044         int error;
3045
3046         error = vtnet_init_rx_queues(sc);
3047         if (error)
3048                 return (error);
3049
3050         error = vtnet_init_tx_queues(sc);
3051         if (error)
3052                 return (error);
3053
3054         return (0);
3055 }
3056
3057 static void
3058 vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
3059 {
3060         device_t dev;
3061         int npairs;
3062
3063         dev = sc->vtnet_dev;
3064
3065         if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) {
3066                 sc->vtnet_act_vq_pairs = 1;
3067                 return;
3068         }
3069
3070         npairs = sc->vtnet_requested_vq_pairs;
3071
3072         if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
3073                 device_printf(dev,
3074                     "cannot set active queue pairs to %d\n", npairs);
3075                 npairs = 1;
3076         }
3077
3078         sc->vtnet_act_vq_pairs = npairs;
3079 }
3080
3081 static int
3082 vtnet_reinit(struct vtnet_softc *sc)
3083 {
3084         struct ifnet *ifp;
3085         int error;
3086
3087         ifp = sc->vtnet_ifp;
3088
3089         /* Use the current MAC address. */
3090         bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
3091         vtnet_set_hwaddr(sc);
3092
3093         vtnet_set_active_vq_pairs(sc);
3094
3095         ifp->if_hwassist = 0;
3096         if (ifp->if_capenable & IFCAP_TXCSUM)
3097                 ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
3098         if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
3099                 ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6;
3100         if (ifp->if_capenable & IFCAP_TSO4)
3101                 ifp->if_hwassist |= CSUM_IP_TSO;
3102         if (ifp->if_capenable & IFCAP_TSO6)
3103                 ifp->if_hwassist |= CSUM_IP6_TSO;
3104
3105         if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
3106                 vtnet_init_rx_filters(sc);
3107
3108         error = vtnet_init_rxtx_queues(sc);
3109         if (error)
3110                 return (error);
3111
3112         vtnet_enable_interrupts(sc);
3113         ifp->if_drv_flags |= IFF_DRV_RUNNING;
3114
3115         return (0);
3116 }
3117
3118 static void
3119 vtnet_init_locked(struct vtnet_softc *sc)
3120 {
3121         device_t dev;
3122         struct ifnet *ifp;
3123
3124         dev = sc->vtnet_dev;
3125         ifp = sc->vtnet_ifp;
3126
3127         VTNET_CORE_LOCK_ASSERT(sc);
3128
3129         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3130                 return;
3131
3132         vtnet_stop(sc);
3133
3134         /* Reinitialize with the host. */
3135         if (vtnet_virtio_reinit(sc) != 0)
3136                 goto fail;
3137
3138         if (vtnet_reinit(sc) != 0)
3139                 goto fail;
3140
3141         virtio_reinit_complete(dev);
3142
3143         vtnet_update_link_status(sc);
3144         callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
3145
3146         return;
3147
3148 fail:
3149         vtnet_stop(sc);
3150 }
3151
3152 static void
3153 vtnet_init(void *xsc)
3154 {
3155         struct vtnet_softc *sc;
3156
3157         sc = xsc;
3158
3159         VTNET_CORE_LOCK(sc);
3160         vtnet_init_locked(sc);
3161         VTNET_CORE_UNLOCK(sc);
3162 }
3163
3164 static void
3165 vtnet_free_ctrl_vq(struct vtnet_softc *sc)
3166 {
3167         struct virtqueue *vq;
3168
3169         vq = sc->vtnet_ctrl_vq;
3170
3171         /*
3172          * The control virtqueue is only polled and therefore it should
3173          * already be empty.
3174          */
3175         KASSERT(virtqueue_empty(vq),
3176             ("%s: ctrl vq %p not empty", __func__, vq));
3177 }
3178
3179 static void
3180 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
3181     struct sglist *sg, int readable, int writable)
3182 {
3183         struct virtqueue *vq;
3184
3185         vq = sc->vtnet_ctrl_vq;
3186
3187         VTNET_CORE_LOCK_ASSERT(sc);
3188         KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ,
3189             ("%s: CTRL_VQ feature not negotiated", __func__));
3190
3191         if (!virtqueue_empty(vq))
3192                 return;
3193         if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0)
3194                 return;
3195
3196         /*
3197          * Poll for the response, but the command is likely already
3198          * done when we return from the notify.
3199          */
3200         virtqueue_notify(vq);
3201         virtqueue_poll(vq, NULL);
3202 }
3203
3204 static int
3205 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
3206 {
3207         struct virtio_net_ctrl_hdr hdr __aligned(2);
3208         struct sglist_seg segs[3];
3209         struct sglist sg;
3210         uint8_t ack;
3211         int error;
3212
3213         hdr.class = VIRTIO_NET_CTRL_MAC;
3214         hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
3215         ack = VIRTIO_NET_ERR;
3216
3217         sglist_init(&sg, 3, segs);
3218         error = 0;
3219         error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3220         error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN);
3221         error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3222         KASSERT(error == 0 && sg.sg_nseg == 3,
3223             ("%s: error %d adding set MAC msg to sglist", __func__, error));
3224
3225         vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3226
3227         return (ack == VIRTIO_NET_OK ? 0 : EIO);
3228 }
3229
3230 static int
3231 vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
3232 {
3233         struct sglist_seg segs[3];
3234         struct sglist sg;
3235         struct {
3236                 struct virtio_net_ctrl_hdr hdr;
3237                 uint8_t pad1;
3238                 struct virtio_net_ctrl_mq mq;
3239                 uint8_t pad2;
3240                 uint8_t ack;
3241         } s __aligned(2);
3242         int error;
3243
3244         s.hdr.class = VIRTIO_NET_CTRL_MQ;
3245         s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
3246         s.mq.virtqueue_pairs = npairs;
3247         s.ack = VIRTIO_NET_ERR;
3248
3249         sglist_init(&sg, 3, segs);
3250         error = 0;
3251         error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3252         error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
3253         error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3254         KASSERT(error == 0 && sg.sg_nseg == 3,
3255             ("%s: error %d adding MQ message to sglist", __func__, error));
3256
3257         vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3258
3259         return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3260 }
3261
3262 static int
3263 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on)
3264 {
3265         struct sglist_seg segs[3];
3266         struct sglist sg;
3267         struct {
3268                 struct virtio_net_ctrl_hdr hdr;
3269                 uint8_t pad1;
3270                 uint8_t onoff;
3271                 uint8_t pad2;
3272                 uint8_t ack;
3273         } s __aligned(2);
3274         int error;
3275
3276         KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
3277             ("%s: CTRL_RX feature not negotiated", __func__));
3278
3279         s.hdr.class = VIRTIO_NET_CTRL_RX;
3280         s.hdr.cmd = cmd;
3281         s.onoff = !!on;
3282         s.ack = VIRTIO_NET_ERR;
3283
3284         sglist_init(&sg, 3, segs);
3285         error = 0;
3286         error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3287         error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
3288         error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3289         KASSERT(error == 0 && sg.sg_nseg == 3,
3290             ("%s: error %d adding Rx message to sglist", __func__, error));
3291
3292         vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3293
3294         return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3295 }
3296
3297 static int
3298 vtnet_set_promisc(struct vtnet_softc *sc, int on)
3299 {
3300
3301         return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
3302 }
3303
3304 static int
3305 vtnet_set_allmulti(struct vtnet_softc *sc, int on)
3306 {
3307
3308         return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
3309 }
3310
3311 /*
3312  * The device defaults to promiscuous mode for backwards compatibility.
3313  * Turn it off at attach time if possible.
3314  */
3315 static void
3316 vtnet_attach_disable_promisc(struct vtnet_softc *sc)
3317 {
3318         struct ifnet *ifp;
3319
3320         ifp = sc->vtnet_ifp;
3321
3322         VTNET_CORE_LOCK(sc);
3323         if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) {
3324                 ifp->if_flags |= IFF_PROMISC;
3325         } else if (vtnet_set_promisc(sc, 0) != 0) {
3326                 ifp->if_flags |= IFF_PROMISC;
3327                 device_printf(sc->vtnet_dev,
3328                     "cannot disable default promiscuous mode\n");
3329         }
3330         VTNET_CORE_UNLOCK(sc);
3331 }
3332
3333 static void
3334 vtnet_rx_filter(struct vtnet_softc *sc)
3335 {
3336         device_t dev;
3337         struct ifnet *ifp;
3338
3339         dev = sc->vtnet_dev;
3340         ifp = sc->vtnet_ifp;
3341
3342         VTNET_CORE_LOCK_ASSERT(sc);
3343
3344         if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0)
3345                 device_printf(dev, "cannot %s promiscuous mode\n",
3346                     ifp->if_flags & IFF_PROMISC ? "enable" : "disable");
3347
3348         if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0)
3349                 device_printf(dev, "cannot %s all-multicast mode\n",
3350                     ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable");
3351 }
3352
3353 static u_int
3354 vtnet_copy_ifaddr(void *arg, struct sockaddr_dl *sdl, u_int ucnt)
3355 {
3356         struct vtnet_softc *sc = arg;
3357
3358         if (memcmp(LLADDR(sdl), sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
3359                 return (0);
3360
3361         if (ucnt < VTNET_MAX_MAC_ENTRIES)
3362                 bcopy(LLADDR(sdl),
3363                     &sc->vtnet_mac_filter->vmf_unicast.macs[ucnt],
3364                     ETHER_ADDR_LEN);
3365
3366         return (1);
3367 }
3368
3369 static u_int
3370 vtnet_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int mcnt)
3371 {
3372         struct vtnet_mac_filter *filter = arg;
3373
3374         if (mcnt < VTNET_MAX_MAC_ENTRIES)
3375                 bcopy(LLADDR(sdl), &filter->vmf_multicast.macs[mcnt],
3376                     ETHER_ADDR_LEN);
3377
3378         return (1);
3379 }
3380
3381 static void
3382 vtnet_rx_filter_mac(struct vtnet_softc *sc)
3383 {
3384         struct virtio_net_ctrl_hdr hdr __aligned(2);
3385         struct vtnet_mac_filter *filter;
3386         struct sglist_seg segs[4];
3387         struct sglist sg;
3388         struct ifnet *ifp;
3389         bool promisc, allmulti;
3390         u_int ucnt, mcnt;
3391         int error;
3392         uint8_t ack;
3393
3394         ifp = sc->vtnet_ifp;
3395         filter = sc->vtnet_mac_filter;
3396
3397         VTNET_CORE_LOCK_ASSERT(sc);
3398         KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
3399             ("%s: CTRL_RX feature not negotiated", __func__));
3400
3401         /* Unicast MAC addresses: */
3402         ucnt = if_foreach_lladdr(ifp, vtnet_copy_ifaddr, sc);
3403         promisc = (ucnt > VTNET_MAX_MAC_ENTRIES);
3404
3405         if (promisc) {
3406                 filter->vmf_unicast.nentries = 0;
3407                 if_printf(ifp, "more than %d MAC addresses assigned, "
3408                     "falling back to promiscuous mode\n",
3409                     VTNET_MAX_MAC_ENTRIES);
3410         } else
3411                 filter->vmf_unicast.nentries = ucnt;
3412
3413         /* Multicast MAC addresses: */
3414         mcnt = if_foreach_llmaddr(ifp, vtnet_copy_maddr, filter);
3415         allmulti = (mcnt > VTNET_MAX_MAC_ENTRIES);
3416
3417         if (allmulti) {
3418                 filter->vmf_multicast.nentries = 0;
3419                 if_printf(ifp, "more than %d multicast MAC addresses "
3420                     "assigned, falling back to all-multicast mode\n",
3421                     VTNET_MAX_MAC_ENTRIES);
3422         } else
3423                 filter->vmf_multicast.nentries = mcnt;
3424
3425         if (promisc && allmulti)
3426                 goto out;
3427
3428         hdr.class = VIRTIO_NET_CTRL_MAC;
3429         hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
3430         ack = VIRTIO_NET_ERR;
3431
3432         sglist_init(&sg, 4, segs);
3433         error = 0;
3434         error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3435         error |= sglist_append(&sg, &filter->vmf_unicast,
3436             sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN);
3437         error |= sglist_append(&sg, &filter->vmf_multicast,
3438             sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN);
3439         error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3440         KASSERT(error == 0 && sg.sg_nseg == 4,
3441             ("%s: error %d adding MAC filter msg to sglist", __func__, error));
3442
3443         vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3444
3445         if (ack != VIRTIO_NET_OK)
3446                 if_printf(ifp, "error setting host MAC filter table\n");
3447
3448 out:
3449         if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0)
3450                 if_printf(ifp, "cannot enable promiscuous mode\n");
3451         if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0)
3452                 if_printf(ifp, "cannot enable all-multicast mode\n");
3453 }
3454
3455 static int
3456 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3457 {
3458         struct sglist_seg segs[3];
3459         struct sglist sg;
3460         struct {
3461                 struct virtio_net_ctrl_hdr hdr;
3462                 uint8_t pad1;
3463                 uint16_t tag;
3464                 uint8_t pad2;
3465                 uint8_t ack;
3466         } s __aligned(2);
3467         int error;
3468
3469         s.hdr.class = VIRTIO_NET_CTRL_VLAN;
3470         s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
3471         s.tag = tag;
3472         s.ack = VIRTIO_NET_ERR;
3473
3474         sglist_init(&sg, 3, segs);
3475         error = 0;
3476         error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3477         error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
3478         error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3479         KASSERT(error == 0 && sg.sg_nseg == 3,
3480             ("%s: error %d adding VLAN message to sglist", __func__, error));
3481
3482         vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3483
3484         return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3485 }
3486
3487 static void
3488 vtnet_rx_filter_vlan(struct vtnet_softc *sc)
3489 {
3490         uint32_t w;
3491         uint16_t tag;
3492         int i, bit;
3493
3494         VTNET_CORE_LOCK_ASSERT(sc);
3495         KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
3496             ("%s: VLAN_FILTER feature not negotiated", __func__));
3497
3498         /* Enable the filter for each configured VLAN. */
3499         for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
3500                 w = sc->vtnet_vlan_filter[i];
3501
3502                 while ((bit = ffs(w) - 1) != -1) {
3503                         w &= ~(1 << bit);
3504                         tag = sizeof(w) * CHAR_BIT * i + bit;
3505
3506                         if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
3507                                 device_printf(sc->vtnet_dev,
3508                                     "cannot enable VLAN %d filter\n", tag);
3509                         }
3510                 }
3511         }
3512 }
3513
3514 static void
3515 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3516 {
3517         struct ifnet *ifp;
3518         int idx, bit;
3519
3520         ifp = sc->vtnet_ifp;
3521         idx = (tag >> 5) & 0x7F;
3522         bit = tag & 0x1F;
3523
3524         if (tag == 0 || tag > 4095)
3525                 return;
3526
3527         VTNET_CORE_LOCK(sc);
3528
3529         if (add)
3530                 sc->vtnet_vlan_filter[idx] |= (1 << bit);
3531         else
3532                 sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
3533
3534         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER &&
3535             ifp->if_drv_flags & IFF_DRV_RUNNING &&
3536             vtnet_exec_vlan_filter(sc, add, tag) != 0) {
3537                 device_printf(sc->vtnet_dev,
3538                     "cannot %s VLAN %d %s the host filter table\n",
3539                     add ? "add" : "remove", tag, add ? "to" : "from");
3540         }
3541
3542         VTNET_CORE_UNLOCK(sc);
3543 }
3544
3545 static void
3546 vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3547 {
3548
3549         if (ifp->if_softc != arg)
3550                 return;
3551
3552         vtnet_update_vlan_filter(arg, 1, tag);
3553 }
3554
3555 static void
3556 vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3557 {
3558
3559         if (ifp->if_softc != arg)
3560                 return;
3561
3562         vtnet_update_vlan_filter(arg, 0, tag);
3563 }
3564
3565 static int
3566 vtnet_is_link_up(struct vtnet_softc *sc)
3567 {
3568         device_t dev;
3569         struct ifnet *ifp;
3570         uint16_t status;
3571
3572         dev = sc->vtnet_dev;
3573         ifp = sc->vtnet_ifp;
3574
3575         if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0)
3576                 status = VIRTIO_NET_S_LINK_UP;
3577         else
3578                 status = virtio_read_dev_config_2(dev,
3579                     offsetof(struct virtio_net_config, status));
3580
3581         return ((status & VIRTIO_NET_S_LINK_UP) != 0);
3582 }
3583
3584 static void
3585 vtnet_update_link_status(struct vtnet_softc *sc)
3586 {
3587         struct ifnet *ifp;
3588         int link;
3589
3590         ifp = sc->vtnet_ifp;
3591
3592         VTNET_CORE_LOCK_ASSERT(sc);
3593         link = vtnet_is_link_up(sc);
3594
3595         /* Notify if the link status has changed. */
3596         if (link != 0 && sc->vtnet_link_active == 0) {
3597                 sc->vtnet_link_active = 1;
3598                 if_link_state_change(ifp, LINK_STATE_UP);
3599         } else if (link == 0 && sc->vtnet_link_active != 0) {
3600                 sc->vtnet_link_active = 0;
3601                 if_link_state_change(ifp, LINK_STATE_DOWN);
3602         }
3603 }
3604
3605 static int
3606 vtnet_ifmedia_upd(struct ifnet *ifp)
3607 {
3608         struct vtnet_softc *sc;
3609         struct ifmedia *ifm;
3610
3611         sc = ifp->if_softc;
3612         ifm = &sc->vtnet_media;
3613
3614         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
3615                 return (EINVAL);
3616
3617         return (0);
3618 }
3619
3620 static void
3621 vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
3622 {
3623         struct vtnet_softc *sc;
3624
3625         sc = ifp->if_softc;
3626
3627         ifmr->ifm_status = IFM_AVALID;
3628         ifmr->ifm_active = IFM_ETHER;
3629
3630         VTNET_CORE_LOCK(sc);
3631         if (vtnet_is_link_up(sc) != 0) {
3632                 ifmr->ifm_status |= IFM_ACTIVE;
3633                 ifmr->ifm_active |= VTNET_MEDIATYPE;
3634         } else
3635                 ifmr->ifm_active |= IFM_NONE;
3636         VTNET_CORE_UNLOCK(sc);
3637 }
3638
3639 static void
3640 vtnet_set_hwaddr(struct vtnet_softc *sc)
3641 {
3642         device_t dev;
3643         int i;
3644
3645         dev = sc->vtnet_dev;
3646
3647         if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
3648                 if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0)
3649                         device_printf(dev, "unable to set MAC address\n");
3650         } else if (sc->vtnet_flags & VTNET_FLAG_MAC) {
3651                 for (i = 0; i < ETHER_ADDR_LEN; i++) {
3652                         virtio_write_dev_config_1(dev,
3653                             offsetof(struct virtio_net_config, mac) + i,
3654                             sc->vtnet_hwaddr[i]);
3655                 }
3656         }
3657 }
3658
3659 static void
3660 vtnet_get_hwaddr(struct vtnet_softc *sc)
3661 {
3662         device_t dev;
3663         int i;
3664
3665         dev = sc->vtnet_dev;
3666
3667         if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) {
3668                 /*
3669                  * Generate a random locally administered unicast address.
3670                  *
3671                  * It would be nice to generate the same MAC address across
3672                  * reboots, but it seems all the hosts currently available
3673                  * support the MAC feature, so this isn't too important.
3674                  */
3675                 sc->vtnet_hwaddr[0] = 0xB2;
3676                 arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
3677                 vtnet_set_hwaddr(sc);
3678                 return;
3679         }
3680
3681         for (i = 0; i < ETHER_ADDR_LEN; i++) {
3682                 sc->vtnet_hwaddr[i] = virtio_read_dev_config_1(dev,
3683                     offsetof(struct virtio_net_config, mac) + i);
3684         }
3685 }
3686
3687 static void
3688 vtnet_vlan_tag_remove(struct mbuf *m)
3689 {
3690         struct ether_vlan_header *evh;
3691
3692         evh = mtod(m, struct ether_vlan_header *);
3693         m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
3694         m->m_flags |= M_VLANTAG;
3695
3696         /* Strip the 802.1Q header. */
3697         bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
3698             ETHER_HDR_LEN - ETHER_TYPE_LEN);
3699         m_adj(m, ETHER_VLAN_ENCAP_LEN);
3700 }
3701
3702 static void
3703 vtnet_set_rx_process_limit(struct vtnet_softc *sc)
3704 {
3705         int limit;
3706
3707         limit = vtnet_tunable_int(sc, "rx_process_limit",
3708             vtnet_rx_process_limit);
3709         if (limit < 0)
3710                 limit = INT_MAX;
3711         sc->vtnet_rx_process_limit = limit;
3712 }
3713
3714 static void
3715 vtnet_set_tx_intr_threshold(struct vtnet_softc *sc)
3716 {
3717         int size, thresh;
3718
3719         size = virtqueue_size(sc->vtnet_txqs[0].vtntx_vq);
3720
3721         /*
3722          * The Tx interrupt is disabled until the queue free count falls
3723          * below our threshold. Completed frames are drained from the Tx
3724          * virtqueue before transmitting new frames and in the watchdog
3725          * callout, so the frequency of Tx interrupts is greatly reduced,
3726          * at the cost of not freeing mbufs as quickly as they otherwise
3727          * would be.
3728          *
3729          * N.B. We assume all the Tx queues are the same size.
3730          */
3731         thresh = size / 4;
3732
3733         /*
3734          * Without indirect descriptors, leave enough room for the most
3735          * segments we handle.
3736          */
3737         if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 &&
3738             thresh < sc->vtnet_tx_nsegs)
3739                 thresh = sc->vtnet_tx_nsegs;
3740
3741         sc->vtnet_tx_intr_thresh = thresh;
3742 }
3743
3744 static void
3745 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
3746     struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
3747 {
3748         struct sysctl_oid *node;
3749         struct sysctl_oid_list *list;
3750         struct vtnet_rxq_stats *stats;
3751         char namebuf[16];
3752
3753         snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
3754         node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
3755             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
3756         list = SYSCTL_CHILDREN(node);
3757
3758         stats = &rxq->vtnrx_stats;
3759
3760         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3761             &stats->vrxs_ipackets, "Receive packets");
3762         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3763             &stats->vrxs_ibytes, "Receive bytes");
3764         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3765             &stats->vrxs_iqdrops, "Receive drops");
3766         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3767             &stats->vrxs_ierrors, "Receive errors");
3768         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3769             &stats->vrxs_csum, "Receive checksum offloaded");
3770         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
3771             &stats->vrxs_csum_failed, "Receive checksum offload failed");
3772         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
3773             &stats->vrxs_rescheduled,
3774             "Receive interrupt handler rescheduled");
3775 }
3776
3777 static void
3778 vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
3779     struct sysctl_oid_list *child, struct vtnet_txq *txq)
3780 {
3781         struct sysctl_oid *node;
3782         struct sysctl_oid_list *list;
3783         struct vtnet_txq_stats *stats;
3784         char namebuf[16];
3785
3786         snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
3787         node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
3788             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
3789         list = SYSCTL_CHILDREN(node);
3790
3791         stats = &txq->vtntx_stats;
3792
3793         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3794             &stats->vtxs_opackets, "Transmit packets");
3795         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3796             &stats->vtxs_obytes, "Transmit bytes");
3797         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3798             &stats->vtxs_omcasts, "Transmit multicasts");
3799         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3800             &stats->vtxs_csum, "Transmit checksum offloaded");
3801         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3802             &stats->vtxs_tso, "Transmit segmentation offloaded");
3803         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
3804             &stats->vtxs_rescheduled,
3805             "Transmit interrupt handler rescheduled");
3806 }
3807
3808 static void
3809 vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
3810 {
3811         device_t dev;
3812         struct sysctl_ctx_list *ctx;
3813         struct sysctl_oid *tree;
3814         struct sysctl_oid_list *child;
3815         int i;
3816
3817         dev = sc->vtnet_dev;
3818         ctx = device_get_sysctl_ctx(dev);
3819         tree = device_get_sysctl_tree(dev);
3820         child = SYSCTL_CHILDREN(tree);
3821
3822         for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3823                 vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
3824                 vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
3825         }
3826 }
3827
3828 static void
3829 vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
3830     struct sysctl_oid_list *child, struct vtnet_softc *sc)
3831 {
3832         struct vtnet_statistics *stats;
3833         struct vtnet_rxq_stats rxaccum;
3834         struct vtnet_txq_stats txaccum;
3835
3836         vtnet_accum_stats(sc, &rxaccum, &txaccum);
3837
3838         stats = &sc->vtnet_stats;
3839         stats->rx_csum_offloaded = rxaccum.vrxs_csum;
3840         stats->rx_csum_failed = rxaccum.vrxs_csum_failed;
3841         stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
3842         stats->tx_csum_offloaded = txaccum.vtxs_csum;
3843         stats->tx_tso_offloaded = txaccum.vtxs_tso;
3844         stats->tx_task_rescheduled = txaccum.vtxs_rescheduled;
3845
3846         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
3847             CTLFLAG_RD, &stats->mbuf_alloc_failed,
3848             "Mbuf cluster allocation failures");
3849
3850         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
3851             CTLFLAG_RD, &stats->rx_frame_too_large,
3852             "Received frame larger than the mbuf chain");
3853         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
3854             CTLFLAG_RD, &stats->rx_enq_replacement_failed,
3855             "Enqueuing the replacement receive mbuf failed");
3856         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
3857             CTLFLAG_RD, &stats->rx_mergeable_failed,
3858             "Mergeable buffers receive failures");
3859         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
3860             CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
3861             "Received checksum offloaded buffer with unsupported "
3862             "Ethernet type");
3863         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
3864             CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
3865             "Received checksum offloaded buffer with incorrect IP protocol");
3866         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
3867             CTLFLAG_RD, &stats->rx_csum_bad_offset,
3868             "Received checksum offloaded buffer with incorrect offset");
3869         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
3870             CTLFLAG_RD, &stats->rx_csum_bad_proto,
3871             "Received checksum offloaded buffer with incorrect protocol");
3872         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
3873             CTLFLAG_RD, &stats->rx_csum_failed,
3874             "Received buffer checksum offload failed");
3875         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
3876             CTLFLAG_RD, &stats->rx_csum_offloaded,
3877             "Received buffer checksum offload succeeded");
3878         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
3879             CTLFLAG_RD, &stats->rx_task_rescheduled,
3880             "Times the receive interrupt task rescheduled itself");
3881
3882         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
3883             CTLFLAG_RD, &stats->tx_csum_bad_ethtype,
3884             "Aborted transmit of checksum offloaded buffer with unknown "
3885             "Ethernet type");
3886         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
3887             CTLFLAG_RD, &stats->tx_tso_bad_ethtype,
3888             "Aborted transmit of TSO buffer with unknown Ethernet type");
3889         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
3890             CTLFLAG_RD, &stats->tx_tso_not_tcp,
3891             "Aborted transmit of TSO buffer with non TCP protocol");
3892         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
3893             CTLFLAG_RD, &stats->tx_defragged,
3894             "Transmit mbufs defragged");
3895         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
3896             CTLFLAG_RD, &stats->tx_defrag_failed,
3897             "Aborted transmit of buffer because defrag failed");
3898         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
3899             CTLFLAG_RD, &stats->tx_csum_offloaded,
3900             "Offloaded checksum of transmitted buffer");
3901         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
3902             CTLFLAG_RD, &stats->tx_tso_offloaded,
3903             "Segmentation offload of transmitted buffer");
3904         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
3905             CTLFLAG_RD, &stats->tx_task_rescheduled,
3906             "Times the transmit interrupt task rescheduled itself");
3907 }
3908
3909 static void
3910 vtnet_setup_sysctl(struct vtnet_softc *sc)
3911 {
3912         device_t dev;
3913         struct sysctl_ctx_list *ctx;
3914         struct sysctl_oid *tree;
3915         struct sysctl_oid_list *child;
3916
3917         dev = sc->vtnet_dev;
3918         ctx = device_get_sysctl_ctx(dev);
3919         tree = device_get_sysctl_tree(dev);
3920         child = SYSCTL_CHILDREN(tree);
3921
3922         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
3923             CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
3924             "Maximum number of supported virtqueue pairs");
3925         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "requested_vq_pairs",
3926             CTLFLAG_RD, &sc->vtnet_requested_vq_pairs, 0,
3927             "Requested number of virtqueue pairs");
3928         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
3929             CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
3930             "Number of active virtqueue pairs");
3931
3932         vtnet_setup_stat_sysctl(ctx, child, sc);
3933 }
3934
3935 static int
3936 vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
3937 {
3938
3939         return (virtqueue_enable_intr(rxq->vtnrx_vq));
3940 }
3941
3942 static void
3943 vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
3944 {
3945
3946         virtqueue_disable_intr(rxq->vtnrx_vq);
3947 }
3948
3949 static int
3950 vtnet_txq_enable_intr(struct vtnet_txq *txq)
3951 {
3952         struct virtqueue *vq;
3953
3954         vq = txq->vtntx_vq;
3955
3956         if (vtnet_txq_below_threshold(txq) != 0)
3957                 return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
3958
3959         /*
3960          * The free count is above our threshold. Keep the Tx interrupt
3961          * disabled until the queue is fuller.
3962          */
3963         return (0);
3964 }
3965
3966 static void
3967 vtnet_txq_disable_intr(struct vtnet_txq *txq)
3968 {
3969
3970         virtqueue_disable_intr(txq->vtntx_vq);
3971 }
3972
3973 static void
3974 vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
3975 {
3976         int i;
3977
3978         for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3979                 vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]);
3980 }
3981
3982 static void
3983 vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
3984 {
3985         int i;
3986
3987         for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3988                 vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
3989 }
3990
3991 static void
3992 vtnet_enable_interrupts(struct vtnet_softc *sc)
3993 {
3994
3995         vtnet_enable_rx_interrupts(sc);
3996         vtnet_enable_tx_interrupts(sc);
3997 }
3998
3999 static void
4000 vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
4001 {
4002         int i;
4003
4004         for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4005                 vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
4006 }
4007
4008 static void
4009 vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
4010 {
4011         int i;
4012
4013         for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4014                 vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
4015 }
4016
4017 static void
4018 vtnet_disable_interrupts(struct vtnet_softc *sc)
4019 {
4020
4021         vtnet_disable_rx_interrupts(sc);
4022         vtnet_disable_tx_interrupts(sc);
4023 }
4024
4025 static int
4026 vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
4027 {
4028         char path[64];
4029
4030         snprintf(path, sizeof(path),
4031             "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
4032         TUNABLE_INT_FETCH(path, &def);
4033
4034         return (def);
4035 }
4036
4037 #ifdef DEBUGNET
4038 static void
4039 vtnet_debugnet_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize)
4040 {
4041         struct vtnet_softc *sc;
4042
4043         sc = if_getsoftc(ifp);
4044
4045         VTNET_CORE_LOCK(sc);
4046         *nrxr = sc->vtnet_max_vq_pairs;
4047         *ncl = DEBUGNET_MAX_IN_FLIGHT;
4048         *clsize = sc->vtnet_rx_clsize;
4049         VTNET_CORE_UNLOCK(sc);
4050 }
4051
4052 static void
4053 vtnet_debugnet_event(struct ifnet *ifp __unused, enum debugnet_ev event __unused)
4054 {
4055 }
4056
4057 static int
4058 vtnet_debugnet_transmit(struct ifnet *ifp, struct mbuf *m)
4059 {
4060         struct vtnet_softc *sc;
4061         struct vtnet_txq *txq;
4062         int error;
4063
4064         sc = if_getsoftc(ifp);
4065         if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4066             IFF_DRV_RUNNING)
4067                 return (EBUSY);
4068
4069         txq = &sc->vtnet_txqs[0];
4070         error = vtnet_txq_encap(txq, &m, M_NOWAIT | M_USE_RESERVE);
4071         if (error == 0)
4072                 (void)vtnet_txq_notify(txq);
4073         return (error);
4074 }
4075
4076 static int
4077 vtnet_debugnet_poll(struct ifnet *ifp, int count)
4078 {
4079         struct vtnet_softc *sc;
4080         int i;
4081
4082         sc = if_getsoftc(ifp);
4083         if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4084             IFF_DRV_RUNNING)
4085                 return (EBUSY);
4086
4087         (void)vtnet_txq_eof(&sc->vtnet_txqs[0]);
4088         for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
4089                 (void)vtnet_rxq_eof(&sc->vtnet_rxqs[i]);
4090         return (0);
4091 }
4092 #endif /* DEBUGNET */