]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/virtio/network/if_vtnet.c
if_vtnet: Move the Tx interrupt threshold into the Txq structure
[FreeBSD/FreeBSD.git] / sys / dev / virtio / network / if_vtnet.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 /* Driver for VirtIO network devices. */
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33
34 #include <sys/param.h>
35 #include <sys/eventhandler.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/sockio.h>
39 #include <sys/mbuf.h>
40 #include <sys/malloc.h>
41 #include <sys/module.h>
42 #include <sys/socket.h>
43 #include <sys/sysctl.h>
44 #include <sys/random.h>
45 #include <sys/sglist.h>
46 #include <sys/lock.h>
47 #include <sys/mutex.h>
48 #include <sys/taskqueue.h>
49 #include <sys/smp.h>
50 #include <machine/smp.h>
51
52 #include <vm/uma.h>
53
54 #include <net/debugnet.h>
55 #include <net/ethernet.h>
56 #include <net/pfil.h>
57 #include <net/if.h>
58 #include <net/if_var.h>
59 #include <net/if_arp.h>
60 #include <net/if_dl.h>
61 #include <net/if_types.h>
62 #include <net/if_media.h>
63 #include <net/if_vlan_var.h>
64
65 #include <net/bpf.h>
66
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/ip.h>
70 #include <netinet/ip6.h>
71 #include <netinet6/ip6_var.h>
72 #include <netinet/udp.h>
73 #include <netinet/tcp.h>
74
75 #include <machine/bus.h>
76 #include <machine/resource.h>
77 #include <sys/bus.h>
78 #include <sys/rman.h>
79
80 #include <dev/virtio/virtio.h>
81 #include <dev/virtio/virtqueue.h>
82 #include <dev/virtio/network/virtio_net.h>
83 #include <dev/virtio/network/if_vtnetvar.h>
84 #include "virtio_if.h"
85
86 #include "opt_inet.h"
87 #include "opt_inet6.h"
88
89 #if defined(INET) || defined(INET6)
90 #include <machine/in_cksum.h>
91 #endif
92
93 static int      vtnet_modevent(module_t, int, void *);
94
95 static int      vtnet_probe(device_t);
96 static int      vtnet_attach(device_t);
97 static int      vtnet_detach(device_t);
98 static int      vtnet_suspend(device_t);
99 static int      vtnet_resume(device_t);
100 static int      vtnet_shutdown(device_t);
101 static int      vtnet_attach_completed(device_t);
102 static int      vtnet_config_change(device_t);
103
104 static void     vtnet_negotiate_features(struct vtnet_softc *);
105 static void     vtnet_setup_features(struct vtnet_softc *);
106 static int      vtnet_init_rxq(struct vtnet_softc *, int);
107 static int      vtnet_init_txq(struct vtnet_softc *, int);
108 static int      vtnet_alloc_rxtx_queues(struct vtnet_softc *);
109 static void     vtnet_free_rxtx_queues(struct vtnet_softc *);
110 static int      vtnet_alloc_rx_filters(struct vtnet_softc *);
111 static void     vtnet_free_rx_filters(struct vtnet_softc *);
112 static int      vtnet_alloc_virtqueues(struct vtnet_softc *);
113 static int      vtnet_setup_interface(struct vtnet_softc *);
114 static int      vtnet_ioctl_mtu(struct vtnet_softc *, int);
115 static int      vtnet_ioctl(struct ifnet *, u_long, caddr_t);
116 static uint64_t vtnet_get_counter(struct ifnet *, ift_counter);
117
118 static int      vtnet_rxq_populate(struct vtnet_rxq *);
119 static void     vtnet_rxq_free_mbufs(struct vtnet_rxq *);
120 static struct mbuf *
121                 vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
122 static int      vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *,
123                     struct mbuf *, int);
124 static int      vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
125 static int      vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
126 static int      vtnet_rxq_new_buf(struct vtnet_rxq *);
127 static int      vtnet_rxq_csum_needs_csum(struct vtnet_rxq *, struct mbuf *,
128                      uint16_t, int, struct virtio_net_hdr *);
129 static int      vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *,
130                      uint16_t, int, struct virtio_net_hdr *);
131 static int      vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
132                      struct virtio_net_hdr *);
133 static void     vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
134 static void     vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
135 static int      vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
136 static void     vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
137                     struct virtio_net_hdr *);
138 static int      vtnet_rxq_eof(struct vtnet_rxq *);
139 static void     vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries);
140 static void     vtnet_rx_vq_intr(void *);
141 static void     vtnet_rxq_tq_intr(void *, int);
142
143 static int      vtnet_txq_intr_threshold(struct vtnet_txq *);
144 static int      vtnet_txq_below_threshold(struct vtnet_txq *);
145 static int      vtnet_txq_notify(struct vtnet_txq *);
146 static void     vtnet_txq_free_mbufs(struct vtnet_txq *);
147 static int      vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
148                     int *, int *, int *);
149 static int      vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
150                     int, struct virtio_net_hdr *);
151 static struct mbuf *
152                 vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
153                     struct virtio_net_hdr *);
154 static int      vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
155                     struct vtnet_tx_header *);
156 static int      vtnet_txq_encap(struct vtnet_txq *, struct mbuf **, int);
157 #ifdef VTNET_LEGACY_TX
158 static void     vtnet_start_locked(struct vtnet_txq *, struct ifnet *);
159 static void     vtnet_start(struct ifnet *);
160 #else
161 static int      vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
162 static int      vtnet_txq_mq_start(struct ifnet *, struct mbuf *);
163 static void     vtnet_txq_tq_deferred(void *, int);
164 #endif
165 static void     vtnet_txq_start(struct vtnet_txq *);
166 static void     vtnet_txq_tq_intr(void *, int);
167 static int      vtnet_txq_eof(struct vtnet_txq *);
168 static void     vtnet_tx_vq_intr(void *);
169 static void     vtnet_tx_start_all(struct vtnet_softc *);
170
171 #ifndef VTNET_LEGACY_TX
172 static void     vtnet_qflush(struct ifnet *);
173 #endif
174
175 static int      vtnet_watchdog(struct vtnet_txq *);
176 static void     vtnet_accum_stats(struct vtnet_softc *,
177                     struct vtnet_rxq_stats *, struct vtnet_txq_stats *);
178 static void     vtnet_tick(void *);
179
180 static void     vtnet_start_taskqueues(struct vtnet_softc *);
181 static void     vtnet_free_taskqueues(struct vtnet_softc *);
182 static void     vtnet_drain_taskqueues(struct vtnet_softc *);
183
184 static void     vtnet_drain_rxtx_queues(struct vtnet_softc *);
185 static void     vtnet_stop_rendezvous(struct vtnet_softc *);
186 static void     vtnet_stop(struct vtnet_softc *);
187 static int      vtnet_virtio_reinit(struct vtnet_softc *);
188 static void     vtnet_init_rx_filters(struct vtnet_softc *);
189 static int      vtnet_init_rx_queues(struct vtnet_softc *);
190 static int      vtnet_init_tx_queues(struct vtnet_softc *);
191 static int      vtnet_init_rxtx_queues(struct vtnet_softc *);
192 static void     vtnet_set_active_vq_pairs(struct vtnet_softc *);
193 static int      vtnet_reinit(struct vtnet_softc *);
194 static void     vtnet_init_locked(struct vtnet_softc *, int);
195 static void     vtnet_init(void *);
196
197 static void     vtnet_free_ctrl_vq(struct vtnet_softc *);
198 static void     vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
199                     struct sglist *, int, int);
200 static int      vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
201 static int      vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
202 static int      vtnet_ctrl_rx_cmd(struct vtnet_softc *, uint8_t, int);
203 static int      vtnet_set_promisc(struct vtnet_softc *, int);
204 static int      vtnet_set_allmulti(struct vtnet_softc *, int);
205 static void     vtnet_rx_filter(struct vtnet_softc *);
206 static void     vtnet_rx_filter_mac(struct vtnet_softc *);
207 static int      vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
208 static void     vtnet_rx_filter_vlan(struct vtnet_softc *);
209 static void     vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
210 static void     vtnet_register_vlan(void *, struct ifnet *, uint16_t);
211 static void     vtnet_unregister_vlan(void *, struct ifnet *, uint16_t);
212
213 static void     vtnet_update_speed_duplex(struct vtnet_softc *);
214 static int      vtnet_is_link_up(struct vtnet_softc *);
215 static void     vtnet_update_link_status(struct vtnet_softc *);
216 static int      vtnet_ifmedia_upd(struct ifnet *);
217 static void     vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
218 static void     vtnet_get_macaddr(struct vtnet_softc *);
219 static void     vtnet_set_macaddr(struct vtnet_softc *);
220 static void     vtnet_attached_set_macaddr(struct vtnet_softc *);
221 static void     vtnet_vlan_tag_remove(struct mbuf *);
222 static void     vtnet_set_rx_process_limit(struct vtnet_softc *);
223
224 static void     vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
225                     struct sysctl_oid_list *, struct vtnet_rxq *);
226 static void     vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
227                     struct sysctl_oid_list *, struct vtnet_txq *);
228 static void     vtnet_setup_queue_sysctl(struct vtnet_softc *);
229 static void     vtnet_setup_sysctl(struct vtnet_softc *);
230
231 static int      vtnet_rxq_enable_intr(struct vtnet_rxq *);
232 static void     vtnet_rxq_disable_intr(struct vtnet_rxq *);
233 static int      vtnet_txq_enable_intr(struct vtnet_txq *);
234 static void     vtnet_txq_disable_intr(struct vtnet_txq *);
235 static void     vtnet_enable_rx_interrupts(struct vtnet_softc *);
236 static void     vtnet_enable_tx_interrupts(struct vtnet_softc *);
237 static void     vtnet_enable_interrupts(struct vtnet_softc *);
238 static void     vtnet_disable_rx_interrupts(struct vtnet_softc *);
239 static void     vtnet_disable_tx_interrupts(struct vtnet_softc *);
240 static void     vtnet_disable_interrupts(struct vtnet_softc *);
241
242 static int      vtnet_tunable_int(struct vtnet_softc *, const char *, int);
243
244 DEBUGNET_DEFINE(vtnet);
245
246 #define vtnet_htog16(_sc, _val) virtio_htog16(vtnet_modern(_sc), _val)
247 #define vtnet_htog32(_sc, _val) virtio_htog32(vtnet_modern(_sc), _val)
248 #define vtnet_htog64(_sc, _val) virtio_htog64(vtnet_modern(_sc), _val)
249 #define vtnet_gtoh16(_sc, _val) virtio_gtoh16(vtnet_modern(_sc), _val)
250 #define vtnet_gtoh32(_sc, _val) virtio_gtoh32(vtnet_modern(_sc), _val)
251 #define vtnet_gtoh64(_sc, _val) virtio_gtoh64(vtnet_modern(_sc), _val)
252
253 /* Tunables. */
254 static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
255     "VirtIO Net driver parameters");
256
257 static int vtnet_csum_disable = 0;
258 TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable);
259 SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN,
260     &vtnet_csum_disable, 0, "Disables receive and send checksum offload");
261
262 static int vtnet_fixup_needs_csum = 0;
263 SYSCTL_INT(_hw_vtnet, OID_AUTO, fixup_needs_csum, CTLFLAG_RDTUN,
264     &vtnet_fixup_needs_csum, 0,
265     "Calculate valid checksum for NEEDS_CSUM packets");
266
267 static int vtnet_tso_disable = 0;
268 TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
269 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN, &vtnet_tso_disable,
270     0, "Disables TCP Segmentation Offload");
271
272 static int vtnet_lro_disable = 0;
273 TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
274 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN, &vtnet_lro_disable,
275     0, "Disables TCP Large Receive Offload");
276
277 static int vtnet_mq_disable = 0;
278 TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable);
279 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN, &vtnet_mq_disable,
280     0, "Disables multiqueue support");
281
282 static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS;
283 TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs);
284 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN,
285     &vtnet_mq_max_pairs, 0, "Sets the maximum number of multiqueue pairs");
286
287 static int vtnet_rx_process_limit = 1024;
288 TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
289 SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
290     &vtnet_rx_process_limit, 0, "Limits RX segments processed in a single pass");
291
292 static uma_zone_t vtnet_tx_header_zone;
293
294 static struct virtio_feature_desc vtnet_feature_desc[] = {
295         { VIRTIO_NET_F_CSUM,                    "TxChecksum"            },
296         { VIRTIO_NET_F_GUEST_CSUM,              "RxChecksum"            },
297         { VIRTIO_NET_F_CTRL_GUEST_OFFLOADS,     "CtrlRxOffloads"        },
298         { VIRTIO_NET_F_MAC,                     "MAC"                   },
299         { VIRTIO_NET_F_GSO,                     "TxGSO"                 },
300         { VIRTIO_NET_F_GUEST_TSO4,              "RxLROv4"               },
301         { VIRTIO_NET_F_GUEST_TSO6,              "RxLROv6"               },
302         { VIRTIO_NET_F_GUEST_ECN,               "RxLROECN"              },
303         { VIRTIO_NET_F_GUEST_UFO,               "RxUFO"                 },
304         { VIRTIO_NET_F_HOST_TSO4,               "TxTSOv4"               },
305         { VIRTIO_NET_F_HOST_TSO6,               "TxTSOv6"               },
306         { VIRTIO_NET_F_HOST_ECN,                "TxTSOECN"              },
307         { VIRTIO_NET_F_HOST_UFO,                "TxUFO"                 },
308         { VIRTIO_NET_F_MRG_RXBUF,               "MrgRxBuf"              },
309         { VIRTIO_NET_F_STATUS,                  "Status"                },
310         { VIRTIO_NET_F_CTRL_VQ,                 "CtrlVq"                },
311         { VIRTIO_NET_F_CTRL_RX,                 "CtrlRxMode"            },
312         { VIRTIO_NET_F_CTRL_VLAN,               "CtrlVLANFilter"        },
313         { VIRTIO_NET_F_CTRL_RX_EXTRA,           "CtrlRxModeExtra"       },
314         { VIRTIO_NET_F_GUEST_ANNOUNCE,          "GuestAnnounce"         },
315         { VIRTIO_NET_F_MQ,                      "Multiqueue"            },
316         { VIRTIO_NET_F_CTRL_MAC_ADDR,           "CtrlMacAddr"           },
317         { VIRTIO_NET_F_SPEED_DUPLEX,            "SpeedDuplex"           },
318
319         { 0, NULL }
320 };
321
322 static device_method_t vtnet_methods[] = {
323         /* Device methods. */
324         DEVMETHOD(device_probe,                 vtnet_probe),
325         DEVMETHOD(device_attach,                vtnet_attach),
326         DEVMETHOD(device_detach,                vtnet_detach),
327         DEVMETHOD(device_suspend,               vtnet_suspend),
328         DEVMETHOD(device_resume,                vtnet_resume),
329         DEVMETHOD(device_shutdown,              vtnet_shutdown),
330
331         /* VirtIO methods. */
332         DEVMETHOD(virtio_attach_completed,      vtnet_attach_completed),
333         DEVMETHOD(virtio_config_change,         vtnet_config_change),
334
335         DEVMETHOD_END
336 };
337
338 #ifdef DEV_NETMAP
339 #include <dev/netmap/if_vtnet_netmap.h>
340 #endif
341
342 static driver_t vtnet_driver = {
343     .name = "vtnet",
344     .methods = vtnet_methods,
345     .size = sizeof(struct vtnet_softc)
346 };
347 static devclass_t vtnet_devclass;
348
349 DRIVER_MODULE(vtnet, virtio_mmio, vtnet_driver, vtnet_devclass,
350     vtnet_modevent, 0);
351 DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass,
352     vtnet_modevent, 0);
353 MODULE_VERSION(vtnet, 1);
354 MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
355 #ifdef DEV_NETMAP
356 MODULE_DEPEND(vtnet, netmap, 1, 1, 1);
357 #endif
358
359 VIRTIO_SIMPLE_PNPTABLE(vtnet, VIRTIO_ID_NETWORK, "VirtIO Networking Adapter");
360 VIRTIO_SIMPLE_PNPINFO(virtio_mmio, vtnet);
361 VIRTIO_SIMPLE_PNPINFO(virtio_pci, vtnet);
362
363 static int
364 vtnet_modevent(module_t mod, int type, void *unused)
365 {
366         int error = 0;
367         static int loaded = 0;
368
369         switch (type) {
370         case MOD_LOAD:
371                 if (loaded++ == 0) {
372                         vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr",
373                                 sizeof(struct vtnet_tx_header),
374                                 NULL, NULL, NULL, NULL, 0, 0);
375 #ifdef DEBUGNET
376                         /*
377                          * We need to allocate from this zone in the transmit path, so ensure
378                          * that we have at least one item per header available.
379                          * XXX add a separate zone like we do for mbufs? otherwise we may alloc
380                          * buckets
381                          */
382                         uma_zone_reserve(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
383                         uma_prealloc(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
384 #endif
385                 }
386                 break;
387         case MOD_QUIESCE:
388                 if (uma_zone_get_cur(vtnet_tx_header_zone) > 0)
389                         error = EBUSY;
390                 break;
391         case MOD_UNLOAD:
392                 if (--loaded == 0) {
393                         uma_zdestroy(vtnet_tx_header_zone);
394                         vtnet_tx_header_zone = NULL;
395                 }
396                 break;
397         case MOD_SHUTDOWN:
398                 break;
399         default:
400                 error = EOPNOTSUPP;
401                 break;
402         }
403
404         return (error);
405 }
406
407 static int
408 vtnet_probe(device_t dev)
409 {
410         return (VIRTIO_SIMPLE_PROBE(dev, vtnet));
411 }
412
413 static int
414 vtnet_attach(device_t dev)
415 {
416         struct vtnet_softc *sc;
417         int error;
418
419         sc = device_get_softc(dev);
420         sc->vtnet_dev = dev;
421
422         virtio_set_feature_desc(dev, vtnet_feature_desc);
423
424         VTNET_CORE_LOCK_INIT(sc);
425         callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
426
427         vtnet_setup_sysctl(sc);
428         vtnet_setup_features(sc);
429
430         error = vtnet_alloc_rx_filters(sc);
431         if (error) {
432                 device_printf(dev, "cannot allocate Rx filters\n");
433                 goto fail;
434         }
435
436         error = vtnet_alloc_rxtx_queues(sc);
437         if (error) {
438                 device_printf(dev, "cannot allocate queues\n");
439                 goto fail;
440         }
441
442         error = vtnet_alloc_virtqueues(sc);
443         if (error) {
444                 device_printf(dev, "cannot allocate virtqueues\n");
445                 goto fail;
446         }
447
448         error = vtnet_setup_interface(sc);
449         if (error) {
450                 device_printf(dev, "cannot setup interface\n");
451                 goto fail;
452         }
453
454         error = virtio_setup_intr(dev, INTR_TYPE_NET);
455         if (error) {
456                 device_printf(dev, "cannot setup interrupts\n");
457                 ether_ifdetach(sc->vtnet_ifp);
458                 goto fail;
459         }
460
461 #ifdef DEV_NETMAP
462         vtnet_netmap_attach(sc);
463 #endif
464         vtnet_start_taskqueues(sc);
465
466 fail:
467         if (error)
468                 vtnet_detach(dev);
469
470         return (error);
471 }
472
473 static int
474 vtnet_detach(device_t dev)
475 {
476         struct vtnet_softc *sc;
477         struct ifnet *ifp;
478
479         sc = device_get_softc(dev);
480         ifp = sc->vtnet_ifp;
481
482         if (device_is_attached(dev)) {
483                 VTNET_CORE_LOCK(sc);
484                 vtnet_stop(sc);
485                 VTNET_CORE_UNLOCK(sc);
486
487                 callout_drain(&sc->vtnet_tick_ch);
488                 vtnet_drain_taskqueues(sc);
489
490                 ether_ifdetach(ifp);
491         }
492
493 #ifdef DEV_NETMAP
494         netmap_detach(ifp);
495 #endif
496
497         vtnet_free_taskqueues(sc);
498
499         if (sc->vtnet_vlan_attach != NULL) {
500                 EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
501                 sc->vtnet_vlan_attach = NULL;
502         }
503         if (sc->vtnet_vlan_detach != NULL) {
504                 EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach);
505                 sc->vtnet_vlan_detach = NULL;
506         }
507
508         ifmedia_removeall(&sc->vtnet_media);
509
510         if (ifp != NULL) {
511                 if_free(ifp);
512                 sc->vtnet_ifp = NULL;
513         }
514
515         vtnet_free_rxtx_queues(sc);
516         vtnet_free_rx_filters(sc);
517
518         if (sc->vtnet_ctrl_vq != NULL)
519                 vtnet_free_ctrl_vq(sc);
520
521         VTNET_CORE_LOCK_DESTROY(sc);
522
523         return (0);
524 }
525
526 static int
527 vtnet_suspend(device_t dev)
528 {
529         struct vtnet_softc *sc;
530
531         sc = device_get_softc(dev);
532
533         VTNET_CORE_LOCK(sc);
534         vtnet_stop(sc);
535         sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
536         VTNET_CORE_UNLOCK(sc);
537
538         return (0);
539 }
540
541 static int
542 vtnet_resume(device_t dev)
543 {
544         struct vtnet_softc *sc;
545         struct ifnet *ifp;
546
547         sc = device_get_softc(dev);
548         ifp = sc->vtnet_ifp;
549
550         VTNET_CORE_LOCK(sc);
551         if (ifp->if_flags & IFF_UP)
552                 vtnet_init_locked(sc, 0);
553         sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
554         VTNET_CORE_UNLOCK(sc);
555
556         return (0);
557 }
558
559 static int
560 vtnet_shutdown(device_t dev)
561 {
562         /*
563          * Suspend already does all of what we need to
564          * do here; we just never expect to be resumed.
565          */
566         return (vtnet_suspend(dev));
567 }
568
569 static int
570 vtnet_attach_completed(device_t dev)
571 {
572         struct vtnet_softc *sc;
573
574         sc = device_get_softc(dev);
575
576         VTNET_CORE_LOCK(sc);
577         vtnet_attached_set_macaddr(sc);
578         VTNET_CORE_UNLOCK(sc);
579
580         return (0);
581 }
582
583 static int
584 vtnet_config_change(device_t dev)
585 {
586         struct vtnet_softc *sc;
587
588         sc = device_get_softc(dev);
589
590         VTNET_CORE_LOCK(sc);
591         vtnet_update_link_status(sc);
592         if (sc->vtnet_link_active != 0)
593                 vtnet_tx_start_all(sc);
594         VTNET_CORE_UNLOCK(sc);
595
596         return (0);
597 }
598
599 static void
600 vtnet_negotiate_features(struct vtnet_softc *sc)
601 {
602         device_t dev;
603         uint64_t features;
604         int no_csum;
605
606         dev = sc->vtnet_dev;
607         features = virtio_bus_is_modern(dev) ? VTNET_MODERN_FEATURES :
608             VTNET_LEGACY_FEATURES;
609
610         /*
611          * TSO and LRO are only available when their corresponding checksum
612          * offload feature is also negotiated.
613          */
614         no_csum = vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable);
615         if (no_csum)
616                 features &= ~(VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM);
617         if (no_csum || vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
618                 features &= ~VTNET_TSO_FEATURES;
619         if (no_csum || vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
620                 features &= ~VTNET_LRO_FEATURES;
621
622 #ifndef VTNET_LEGACY_TX
623         if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
624                 features &= ~VIRTIO_NET_F_MQ;
625 #else
626         features &= ~VIRTIO_NET_F_MQ;
627 #endif
628
629         sc->vtnet_features = virtio_negotiate_features(dev, features);
630
631         if (virtio_with_feature(dev, VTNET_LRO_FEATURES) &&
632             virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) {
633                 /*
634                  * LRO without mergeable buffers requires special care. This
635                  * is not ideal because every receive buffer must be large
636                  * enough to hold the maximum TCP packet, the Ethernet header,
637                  * and the header. This requires up to 34 descriptors with
638                  * MCLBYTES clusters. If we do not have indirect descriptors,
639                  * LRO is disabled since the virtqueue will not contain very
640                  * many receive buffers.
641                  */
642                 if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
643                         device_printf(dev,
644                             "LRO disabled since both mergeable buffers and "
645                             "indirect descriptors were not negotiated\n");
646                         features &= ~VTNET_LRO_FEATURES;
647                         sc->vtnet_features =
648                             virtio_negotiate_features(dev, features);
649                 } else
650                         sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
651         }
652
653         virtio_finalize_features(dev);
654 }
655
656 static void
657 vtnet_setup_features(struct vtnet_softc *sc)
658 {
659         device_t dev;
660
661         dev = sc->vtnet_dev;
662
663         vtnet_negotiate_features(sc);
664
665         if (virtio_with_feature(dev, VIRTIO_F_VERSION_1))
666                 sc->vtnet_flags |= VTNET_FLAG_MODERN;
667         if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
668                 sc->vtnet_flags |= VTNET_FLAG_INDIRECT;
669         if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX))
670                 sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX;
671
672         if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
673                 /* This feature should always be negotiated. */
674                 sc->vtnet_flags |= VTNET_FLAG_MAC;
675         }
676
677         if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) {
678                 sc->vtnet_max_mtu = virtio_read_dev_config_2(dev,
679                     offsetof(struct virtio_net_config, mtu));
680         } else
681                 sc->vtnet_max_mtu = VTNET_MAX_MTU;
682
683         if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
684                 sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
685                 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
686         } else if (vtnet_modern(sc)) {
687                 /* This is identical to the mergeable header. */
688                 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_v1);
689         } else
690                 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
691
692         if (vtnet_modern(sc) || sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
693                 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_INLINE;
694         else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
695                 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_LRO_NOMRG;
696         else
697                 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_SEPARATE;
698
699         if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) ||
700             virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
701             virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
702                 sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MAX;
703         else
704                 sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MIN;
705
706         sc->vtnet_max_vq_pairs = 1;
707
708         if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
709                 sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
710
711                 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
712                         sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
713                 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
714                         sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
715                 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
716                         sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
717
718                 if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) {
719                         sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev,
720                             offsetof(struct virtio_net_config,
721                             max_virtqueue_pairs));
722                 }
723         }
724
725         if (sc->vtnet_max_vq_pairs > 1) {
726                 int max;
727
728                 /*
729                  * Limit the maximum number of queue pairs to the lower of
730                  * the number of CPUs and the configured maximum. The actual
731                  * number of queues that get used may be less.
732                  */
733                 max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
734                 if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN) {
735                         if (max > mp_ncpus)
736                                 max = mp_ncpus;
737                         if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
738                                 max = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX;
739                         if (max > 1) {
740                                 sc->vtnet_requested_vq_pairs = max;
741                                 sc->vtnet_flags |= VTNET_FLAG_MQ;
742                         }
743                 }
744         }
745 }
746
747 static int
748 vtnet_init_rxq(struct vtnet_softc *sc, int id)
749 {
750         struct vtnet_rxq *rxq;
751
752         rxq = &sc->vtnet_rxqs[id];
753
754         snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
755             device_get_nameunit(sc->vtnet_dev), id);
756         mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
757
758         rxq->vtnrx_sc = sc;
759         rxq->vtnrx_id = id;
760
761         rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT);
762         if (rxq->vtnrx_sg == NULL)
763                 return (ENOMEM);
764
765         NET_TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
766         rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
767             taskqueue_thread_enqueue, &rxq->vtnrx_tq);
768
769         return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
770 }
771
772 static int
773 vtnet_init_txq(struct vtnet_softc *sc, int id)
774 {
775         struct vtnet_txq *txq;
776
777         txq = &sc->vtnet_txqs[id];
778
779         snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
780             device_get_nameunit(sc->vtnet_dev), id);
781         mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
782
783         txq->vtntx_sc = sc;
784         txq->vtntx_id = id;
785
786         txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT);
787         if (txq->vtntx_sg == NULL)
788                 return (ENOMEM);
789
790 #ifndef VTNET_LEGACY_TX
791         txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
792             M_NOWAIT, &txq->vtntx_mtx);
793         if (txq->vtntx_br == NULL)
794                 return (ENOMEM);
795
796         TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
797 #endif
798         TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
799         txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
800             taskqueue_thread_enqueue, &txq->vtntx_tq);
801         if (txq->vtntx_tq == NULL)
802                 return (ENOMEM);
803
804         return (0);
805 }
806
807 static int
808 vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
809 {
810         int i, npairs, error;
811
812         npairs = sc->vtnet_max_vq_pairs;
813
814         sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
815             M_NOWAIT | M_ZERO);
816         sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
817             M_NOWAIT | M_ZERO);
818         if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
819                 return (ENOMEM);
820
821         for (i = 0; i < npairs; i++) {
822                 error = vtnet_init_rxq(sc, i);
823                 if (error)
824                         return (error);
825                 error = vtnet_init_txq(sc, i);
826                 if (error)
827                         return (error);
828         }
829
830         vtnet_setup_queue_sysctl(sc);
831
832         return (0);
833 }
834
835 static void
836 vtnet_destroy_rxq(struct vtnet_rxq *rxq)
837 {
838
839         rxq->vtnrx_sc = NULL;
840         rxq->vtnrx_id = -1;
841
842         if (rxq->vtnrx_sg != NULL) {
843                 sglist_free(rxq->vtnrx_sg);
844                 rxq->vtnrx_sg = NULL;
845         }
846
847         if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
848                 mtx_destroy(&rxq->vtnrx_mtx);
849 }
850
851 static void
852 vtnet_destroy_txq(struct vtnet_txq *txq)
853 {
854
855         txq->vtntx_sc = NULL;
856         txq->vtntx_id = -1;
857
858         if (txq->vtntx_sg != NULL) {
859                 sglist_free(txq->vtntx_sg);
860                 txq->vtntx_sg = NULL;
861         }
862
863 #ifndef VTNET_LEGACY_TX
864         if (txq->vtntx_br != NULL) {
865                 buf_ring_free(txq->vtntx_br, M_DEVBUF);
866                 txq->vtntx_br = NULL;
867         }
868 #endif
869
870         if (mtx_initialized(&txq->vtntx_mtx) != 0)
871                 mtx_destroy(&txq->vtntx_mtx);
872 }
873
874 static void
875 vtnet_free_rxtx_queues(struct vtnet_softc *sc)
876 {
877         int i;
878
879         if (sc->vtnet_rxqs != NULL) {
880                 for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
881                         vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
882                 free(sc->vtnet_rxqs, M_DEVBUF);
883                 sc->vtnet_rxqs = NULL;
884         }
885
886         if (sc->vtnet_txqs != NULL) {
887                 for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
888                         vtnet_destroy_txq(&sc->vtnet_txqs[i]);
889                 free(sc->vtnet_txqs, M_DEVBUF);
890                 sc->vtnet_txqs = NULL;
891         }
892 }
893
894 static int
895 vtnet_alloc_rx_filters(struct vtnet_softc *sc)
896 {
897
898         if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
899                 sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
900                     M_DEVBUF, M_NOWAIT | M_ZERO);
901                 if (sc->vtnet_mac_filter == NULL)
902                         return (ENOMEM);
903         }
904
905         if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
906                 sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
907                     VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
908                 if (sc->vtnet_vlan_filter == NULL)
909                         return (ENOMEM);
910         }
911
912         return (0);
913 }
914
915 static void
916 vtnet_free_rx_filters(struct vtnet_softc *sc)
917 {
918
919         if (sc->vtnet_mac_filter != NULL) {
920                 free(sc->vtnet_mac_filter, M_DEVBUF);
921                 sc->vtnet_mac_filter = NULL;
922         }
923
924         if (sc->vtnet_vlan_filter != NULL) {
925                 free(sc->vtnet_vlan_filter, M_DEVBUF);
926                 sc->vtnet_vlan_filter = NULL;
927         }
928 }
929
930 static int
931 vtnet_alloc_virtqueues(struct vtnet_softc *sc)
932 {
933         device_t dev;
934         struct vq_alloc_info *info;
935         struct vtnet_rxq *rxq;
936         struct vtnet_txq *txq;
937         int i, idx, flags, nvqs, error;
938
939         dev = sc->vtnet_dev;
940         flags = 0;
941
942         nvqs = sc->vtnet_max_vq_pairs * 2;
943         if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
944                 nvqs++;
945
946         info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT);
947         if (info == NULL)
948                 return (ENOMEM);
949
950         for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx += 2) {
951                 rxq = &sc->vtnet_rxqs[i];
952                 VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs,
953                     vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
954                     "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id);
955
956                 txq = &sc->vtnet_txqs[i];
957                 VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs,
958                     vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
959                     "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id);
960         }
961
962         if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
963                 VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
964                     &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
965         }
966
967         /*
968          * TODO: Enable interrupt binding if this is multiqueue. This will
969          * only matter when per-virtqueue MSIX is available.
970          */
971         if (sc->vtnet_flags & VTNET_FLAG_MQ)
972                 flags |= 0;
973
974         error = virtio_alloc_virtqueues(dev, flags, nvqs, info);
975         free(info, M_TEMP);
976
977         return (error);
978 }
979
980 static int
981 vtnet_setup_interface(struct vtnet_softc *sc)
982 {
983         device_t dev;
984         struct pfil_head_args pa;
985         struct ifnet *ifp;
986
987         dev = sc->vtnet_dev;
988
989         ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
990         if (ifp == NULL) {
991                 device_printf(dev, "cannot allocate ifnet structure\n");
992                 return (ENOSPC);
993         }
994
995         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
996         ifp->if_softc = sc;
997         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
998             IFF_KNOWSEPOCH;
999         ifp->if_init = vtnet_init;
1000         ifp->if_ioctl = vtnet_ioctl;
1001         ifp->if_get_counter = vtnet_get_counter;
1002 #ifndef VTNET_LEGACY_TX
1003         ifp->if_transmit = vtnet_txq_mq_start;
1004         ifp->if_qflush = vtnet_qflush;
1005 #else
1006         struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
1007         ifp->if_start = vtnet_start;
1008         IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1);
1009         ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1;
1010         IFQ_SET_READY(&ifp->if_snd);
1011 #endif
1012
1013         if (virtio_with_feature(dev, VIRTIO_NET_F_SPEED_DUPLEX)) {
1014                 uint32_t speed = virtio_read_dev_config_4(dev,
1015                     offsetof(struct virtio_net_config, speed));
1016                 if (speed != -1)
1017                         ifp->if_baudrate = IF_Mbps(speed);
1018                 else
1019                         ifp->if_baudrate = IF_Gbps(10); /* Approx. */
1020         } else
1021                 ifp->if_baudrate = IF_Gbps(10); /* Approx. */
1022
1023         ifmedia_init(&sc->vtnet_media, 0, vtnet_ifmedia_upd, vtnet_ifmedia_sts);
1024         ifmedia_add(&sc->vtnet_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1025         ifmedia_set(&sc->vtnet_media, IFM_ETHER | IFM_AUTO);
1026
1027         vtnet_get_macaddr(sc);
1028         ether_ifattach(ifp, sc->vtnet_hwaddr);
1029
1030         if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
1031                 ifp->if_capabilities |= IFCAP_LINKSTATE;
1032
1033         /* Tell the upper layer(s) we support long frames. */
1034         ifp->if_hdrlen = sizeof(struct ether_vlan_header);
1035         ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
1036
1037         if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
1038                 int gso;
1039
1040                 ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6;
1041
1042                 gso = virtio_with_feature(dev, VIRTIO_NET_F_GSO);
1043                 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
1044                         ifp->if_capabilities |= IFCAP_TSO4;
1045                 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
1046                         ifp->if_capabilities |= IFCAP_TSO6;
1047                 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
1048                         sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
1049
1050                 if (ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6))
1051                         ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
1052         }
1053
1054         if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
1055                 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6;
1056
1057                 if (vtnet_tunable_int(sc, "fixup_needs_csum",
1058                     vtnet_fixup_needs_csum) != 0)
1059                         sc->vtnet_flags |= VTNET_FLAG_FIXUP_NEEDS_CSUM;
1060
1061                 if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
1062                     virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6) ||
1063                     virtio_with_feature(dev, VIRTIO_NET_F_GUEST_ECN))
1064                         ifp->if_capabilities |= IFCAP_LRO;
1065         }
1066
1067         if (ifp->if_capabilities & IFCAP_HWCSUM) {
1068                 /*
1069                  * VirtIO does not support VLAN tagging, but we can fake
1070                  * it by inserting and removing the 802.1Q header during
1071                  * transmit and receive. We are then able to do checksum
1072                  * offloading of VLAN frames.
1073                  */
1074                 ifp->if_capabilities |=
1075                     IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
1076         }
1077
1078         ifp->if_capenable = ifp->if_capabilities;
1079
1080         /*
1081          * Capabilities after here are not enabled by default.
1082          */
1083         if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
1084                 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
1085
1086                 sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1087                     vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1088                 sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
1089                     vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1090         }
1091
1092         vtnet_set_rx_process_limit(sc);
1093
1094         DEBUGNET_SET(ifp, vtnet);
1095
1096         pa.pa_version = PFIL_VERSION;
1097         pa.pa_flags = PFIL_IN;
1098         pa.pa_type = PFIL_TYPE_ETHERNET;
1099         pa.pa_headname = ifp->if_xname;
1100         sc->vtnet_pfil = pfil_head_register(&pa);
1101
1102         return (0);
1103 }
1104
1105 static int
1106 vtnet_rx_cluster_size(struct vtnet_softc *sc, int mtu)
1107 {
1108         int framesz;
1109
1110         if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
1111                 return (MJUMPAGESIZE);
1112         else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
1113                 return (MCLBYTES);
1114
1115         /*
1116          * Try to scale the receive mbuf cluster size from the MTU. We
1117          * could also use the VQ size to influence the selected size,
1118          * but that would only matter for very small queues.
1119          */
1120         if (vtnet_modern(sc)) {
1121                 MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr_v1));
1122                 framesz = sizeof(struct virtio_net_hdr_v1);
1123         } else
1124                 framesz = sizeof(struct vtnet_rx_header);
1125         framesz += sizeof(struct ether_vlan_header) + mtu;
1126
1127         if (framesz <= MCLBYTES)
1128                 return (MCLBYTES);
1129         else if (framesz <= MJUMPAGESIZE)
1130                 return (MJUMPAGESIZE);
1131         else if (framesz <= MJUM9BYTES)
1132                 return (MJUM9BYTES);
1133
1134         /* Sane default; avoid 16KB clusters. */
1135         return (MCLBYTES);
1136 }
1137
1138 static int
1139 vtnet_ioctl_mtu(struct vtnet_softc *sc, int mtu)
1140 {
1141         struct ifnet *ifp;
1142         int clustersz;
1143
1144         ifp = sc->vtnet_ifp;
1145         VTNET_CORE_LOCK_ASSERT(sc);
1146
1147         if (ifp->if_mtu == mtu)
1148                 return (0);
1149         else if (mtu < ETHERMIN || mtu > sc->vtnet_max_mtu)
1150                 return (EINVAL);
1151
1152         ifp->if_mtu = mtu;
1153         clustersz = vtnet_rx_cluster_size(sc, mtu);
1154
1155         if (clustersz != sc->vtnet_rx_clustersz &&
1156             ifp->if_drv_flags & IFF_DRV_RUNNING) {
1157                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1158                 vtnet_init_locked(sc, 0);
1159         }
1160
1161         return (0);
1162 }
1163
1164 static int
1165 vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1166 {
1167         struct vtnet_softc *sc;
1168         struct ifreq *ifr;
1169         int reinit, mask, error;
1170
1171         sc = ifp->if_softc;
1172         ifr = (struct ifreq *) data;
1173         error = 0;
1174
1175         switch (cmd) {
1176         case SIOCSIFMTU:
1177                 VTNET_CORE_LOCK(sc);
1178                 error = vtnet_ioctl_mtu(sc, ifr->ifr_mtu);
1179                 VTNET_CORE_UNLOCK(sc);
1180                 break;
1181
1182         case SIOCSIFFLAGS:
1183                 VTNET_CORE_LOCK(sc);
1184                 if ((ifp->if_flags & IFF_UP) == 0) {
1185                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1186                                 vtnet_stop(sc);
1187                 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1188                         if ((ifp->if_flags ^ sc->vtnet_if_flags) &
1189                             (IFF_PROMISC | IFF_ALLMULTI)) {
1190                                 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
1191                                         vtnet_rx_filter(sc);
1192                                 else {
1193                                         ifp->if_flags |= IFF_PROMISC;
1194                                         if ((ifp->if_flags ^ sc->vtnet_if_flags)
1195                                             & IFF_ALLMULTI)
1196                                                 error = ENOTSUP;
1197                                 }
1198                         }
1199                 } else
1200                         vtnet_init_locked(sc, 0);
1201
1202                 if (error == 0)
1203                         sc->vtnet_if_flags = ifp->if_flags;
1204                 VTNET_CORE_UNLOCK(sc);
1205                 break;
1206
1207         case SIOCADDMULTI:
1208         case SIOCDELMULTI:
1209                 VTNET_CORE_LOCK(sc);
1210                 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX &&
1211                     ifp->if_drv_flags & IFF_DRV_RUNNING)
1212                         vtnet_rx_filter_mac(sc);
1213                 VTNET_CORE_UNLOCK(sc);
1214                 break;
1215
1216         case SIOCSIFMEDIA:
1217         case SIOCGIFMEDIA:
1218                 error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
1219                 break;
1220
1221         case SIOCSIFCAP:
1222                 VTNET_CORE_LOCK(sc);
1223                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1224
1225                 if (mask & IFCAP_TXCSUM)
1226                         ifp->if_capenable ^= IFCAP_TXCSUM;
1227                 if (mask & IFCAP_TXCSUM_IPV6)
1228                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1229                 if (mask & IFCAP_TSO4)
1230                         ifp->if_capenable ^= IFCAP_TSO4;
1231                 if (mask & IFCAP_TSO6)
1232                         ifp->if_capenable ^= IFCAP_TSO6;
1233
1234                 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
1235                     IFCAP_VLAN_HWFILTER)) {
1236                         /* These Rx features require us to renegotiate. */
1237                         reinit = 1;
1238
1239                         if (mask & IFCAP_RXCSUM)
1240                                 ifp->if_capenable ^= IFCAP_RXCSUM;
1241                         if (mask & IFCAP_RXCSUM_IPV6)
1242                                 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1243                         if (mask & IFCAP_LRO)
1244                                 ifp->if_capenable ^= IFCAP_LRO;
1245                         if (mask & IFCAP_VLAN_HWFILTER)
1246                                 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1247                 } else
1248                         reinit = 0;
1249
1250                 if (mask & IFCAP_VLAN_HWTSO)
1251                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1252                 if (mask & IFCAP_VLAN_HWTAGGING)
1253                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1254
1255                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1256                         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1257                         vtnet_init_locked(sc, 0);
1258                 }
1259
1260                 VTNET_CORE_UNLOCK(sc);
1261                 VLAN_CAPABILITIES(ifp);
1262
1263                 break;
1264
1265         default:
1266                 error = ether_ioctl(ifp, cmd, data);
1267                 break;
1268         }
1269
1270         VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
1271
1272         return (error);
1273 }
1274
1275 static int
1276 vtnet_rxq_populate(struct vtnet_rxq *rxq)
1277 {
1278         struct virtqueue *vq;
1279         int nbufs, error;
1280
1281 #ifdef DEV_NETMAP
1282         error = vtnet_netmap_rxq_populate(rxq);
1283         if (error >= 0)
1284                 return (error);
1285 #endif  /* DEV_NETMAP */
1286
1287         vq = rxq->vtnrx_vq;
1288         error = ENOSPC;
1289
1290         for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
1291                 error = vtnet_rxq_new_buf(rxq);
1292                 if (error)
1293                         break;
1294         }
1295
1296         if (nbufs > 0) {
1297                 virtqueue_notify(vq);
1298                 /*
1299                  * EMSGSIZE signifies the virtqueue did not have enough
1300                  * entries available to hold the last mbuf. This is not
1301                  * an error.
1302                  */
1303                 if (error == EMSGSIZE)
1304                         error = 0;
1305         }
1306
1307         return (error);
1308 }
1309
1310 static void
1311 vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
1312 {
1313         struct virtqueue *vq;
1314         struct mbuf *m;
1315         int last;
1316 #ifdef DEV_NETMAP
1317         struct netmap_kring *kring = netmap_kring_on(NA(rxq->vtnrx_sc->vtnet_ifp),
1318                                                         rxq->vtnrx_id, NR_RX);
1319 #else  /* !DEV_NETMAP */
1320         void *kring = NULL;
1321 #endif /* !DEV_NETMAP */
1322
1323         vq = rxq->vtnrx_vq;
1324         last = 0;
1325
1326         while ((m = virtqueue_drain(vq, &last)) != NULL) {
1327                 if (kring == NULL)
1328                         m_freem(m);
1329         }
1330
1331         KASSERT(virtqueue_empty(vq),
1332             ("%s: mbufs remaining in rx queue %p", __func__, rxq));
1333 }
1334
1335 static struct mbuf *
1336 vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
1337 {
1338         struct mbuf *m_head, *m_tail, *m;
1339         int i, size;
1340
1341         m_head = NULL;
1342         size = sc->vtnet_rx_clustersz;
1343
1344         KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1345             ("%s: mbuf %d chain requested without LRO_NOMRG", __func__, nbufs));
1346
1347         for (i = 0; i < nbufs; i++) {
1348                 m = m_getjcl(M_NOWAIT, MT_DATA, i == 0 ? M_PKTHDR : 0, size);
1349                 if (m == NULL) {
1350                         sc->vtnet_stats.mbuf_alloc_failed++;
1351                         m_freem(m_head);
1352                         return (NULL);
1353                 }
1354
1355                 m->m_len = size;
1356                 if (m_head != NULL) {
1357                         m_tail->m_next = m;
1358                         m_tail = m;
1359                 } else
1360                         m_head = m_tail = m;
1361         }
1362
1363         if (m_tailp != NULL)
1364                 *m_tailp = m_tail;
1365
1366         return (m_head);
1367 }
1368
1369 /*
1370  * Slow path for when LRO without mergeable buffers is negotiated.
1371  */
1372 static int
1373 vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
1374     int len0)
1375 {
1376         struct vtnet_softc *sc;
1377         struct mbuf *m, *m_prev, *m_new, *m_tail;
1378         int len, clustersz, nreplace, error;
1379
1380         sc = rxq->vtnrx_sc;
1381         clustersz = sc->vtnet_rx_clustersz;
1382
1383         m_prev = NULL;
1384         m_tail = NULL;
1385         nreplace = 0;
1386
1387         m = m0;
1388         len = len0;
1389
1390         /*
1391          * Since these mbuf chains are so large, avoid allocating a complete
1392          * replacement when the received frame did not consume the entire
1393          * chain. Unused mbufs are moved to the tail of the replacement mbuf.
1394          */
1395         while (len > 0) {
1396                 if (m == NULL) {
1397                         sc->vtnet_stats.rx_frame_too_large++;
1398                         return (EMSGSIZE);
1399                 }
1400
1401                 /*
1402                  * Every mbuf should have the expected cluster size since that
1403                  * is also used to allocate the replacements.
1404                  */
1405                 KASSERT(m->m_len == clustersz,
1406                     ("%s: mbuf size %d not expected cluster size %d", __func__,
1407                     m->m_len, clustersz));
1408
1409                 m->m_len = MIN(m->m_len, len);
1410                 len -= m->m_len;
1411
1412                 m_prev = m;
1413                 m = m->m_next;
1414                 nreplace++;
1415         }
1416
1417         KASSERT(nreplace > 0 && nreplace <= sc->vtnet_rx_nmbufs,
1418             ("%s: invalid replacement mbuf count %d max %d", __func__,
1419             nreplace, sc->vtnet_rx_nmbufs));
1420
1421         m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
1422         if (m_new == NULL) {
1423                 m_prev->m_len = clustersz;
1424                 return (ENOBUFS);
1425         }
1426
1427         /*
1428          * Move any unused mbufs from the received mbuf chain onto the
1429          * end of the replacement chain.
1430          */
1431         if (m_prev->m_next != NULL) {
1432                 m_tail->m_next = m_prev->m_next;
1433                 m_prev->m_next = NULL;
1434         }
1435
1436         error = vtnet_rxq_enqueue_buf(rxq, m_new);
1437         if (error) {
1438                 /*
1439                  * The replacement is suppose to be an copy of the one
1440                  * dequeued so this is a very unexpected error.
1441                  *
1442                  * Restore the m0 chain to the original state if it was
1443                  * modified so we can then discard it.
1444                  */
1445                 if (m_tail->m_next != NULL) {
1446                         m_prev->m_next = m_tail->m_next;
1447                         m_tail->m_next = NULL;
1448                 }
1449                 m_prev->m_len = clustersz;
1450                 sc->vtnet_stats.rx_enq_replacement_failed++;
1451                 m_freem(m_new);
1452         }
1453
1454         return (error);
1455 }
1456
1457 static int
1458 vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
1459 {
1460         struct vtnet_softc *sc;
1461         struct mbuf *m_new;
1462         int error;
1463
1464         sc = rxq->vtnrx_sc;
1465
1466         if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
1467                 return (vtnet_rxq_replace_lro_nomrg_buf(rxq, m, len));
1468
1469         MPASS(m->m_next == NULL);
1470         if (m->m_len < len)
1471                 return (EMSGSIZE);
1472
1473         m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
1474         if (m_new == NULL)
1475                 return (ENOBUFS);
1476
1477         error = vtnet_rxq_enqueue_buf(rxq, m_new);
1478         if (error) {
1479                 sc->vtnet_stats.rx_enq_replacement_failed++;
1480                 m_freem(m_new);
1481         } else
1482                 m->m_len = len;
1483
1484         return (error);
1485 }
1486
1487 static int
1488 vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1489 {
1490         struct vtnet_softc *sc;
1491         struct sglist *sg;
1492         int header_inlined, error;
1493
1494         sc = rxq->vtnrx_sc;
1495         sg = rxq->vtnrx_sg;
1496
1497         KASSERT(m->m_next == NULL || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1498             ("%s: mbuf chain without LRO_NOMRG", __func__));
1499         VTNET_RXQ_LOCK_ASSERT(rxq);
1500
1501         sglist_reset(sg);
1502         header_inlined = vtnet_modern(sc) ||
1503             (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) != 0; /* TODO: ANY_LAYOUT */
1504
1505         if (header_inlined)
1506                 error = sglist_append_mbuf(sg, m);
1507         else {
1508                 struct vtnet_rx_header *rxhdr =
1509                     mtod(m, struct vtnet_rx_header *);
1510                 MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
1511
1512                 /* Append the header and remaining mbuf data. */
1513                 error = sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
1514                 if (error)
1515                         return (error);
1516                 error = sglist_append(sg, &rxhdr[1],
1517                     m->m_len - sizeof(struct vtnet_rx_header));
1518                 if (error)
1519                         return (error);
1520
1521                 if (m->m_next != NULL)
1522                         error = sglist_append_mbuf(sg, m->m_next);
1523         }
1524
1525         if (error)
1526                 return (error);
1527
1528         return (virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg));
1529 }
1530
1531 static int
1532 vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
1533 {
1534         struct vtnet_softc *sc;
1535         struct mbuf *m;
1536         int error;
1537
1538         sc = rxq->vtnrx_sc;
1539
1540         m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
1541         if (m == NULL)
1542                 return (ENOBUFS);
1543
1544         error = vtnet_rxq_enqueue_buf(rxq, m);
1545         if (error)
1546                 m_freem(m);
1547
1548         return (error);
1549 }
1550
1551 static int
1552 vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t etype,
1553     int hoff, struct virtio_net_hdr *hdr)
1554 {
1555         struct vtnet_softc *sc;
1556         int error;
1557
1558         sc = rxq->vtnrx_sc;
1559
1560         /*
1561          * NEEDS_CSUM corresponds to Linux's CHECKSUM_PARTIAL, but FreeBSD does
1562          * not have an analogous CSUM flag. The checksum has been validated,
1563          * but is incomplete (TCP/UDP pseudo header).
1564          *
1565          * The packet is likely from another VM on the same host that itself
1566          * performed checksum offloading so Tx/Rx is basically a memcpy and
1567          * the checksum has little value.
1568          *
1569          * Default to receiving the packet as-is for performance reasons, but
1570          * this can cause issues if the packet is to be forwarded because it
1571          * does not contain a valid checksum. This patch may be helpful:
1572          * https://reviews.freebsd.org/D6611. In the meantime, have the driver
1573          * compute the checksum if requested.
1574          *
1575          * BMV: Need to add an CSUM_PARTIAL flag?
1576          */
1577         if ((sc->vtnet_flags & VTNET_FLAG_FIXUP_NEEDS_CSUM) == 0) {
1578                 error = vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr);
1579                 return (error);
1580         }
1581
1582         /*
1583          * Compute the checksum in the driver so the packet will contain a
1584          * valid checksum. The checksum is at csum_offset from csum_start.
1585          */
1586         switch (etype) {
1587 #if defined(INET) || defined(INET6)
1588         case ETHERTYPE_IP:
1589         case ETHERTYPE_IPV6: {
1590                 int csum_off, csum_end;
1591                 uint16_t csum;
1592
1593                 csum_off = hdr->csum_start + hdr->csum_offset;
1594                 csum_end = csum_off + sizeof(uint16_t);
1595
1596                 /* Assume checksum will be in the first mbuf. */
1597                 if (m->m_len < csum_end || m->m_pkthdr.len < csum_end)
1598                         return (1);
1599
1600                 /*
1601                  * Like in_delayed_cksum()/in6_delayed_cksum(), compute the
1602                  * checksum and write it at the specified offset. We could
1603                  * try to verify the packet: csum_start should probably
1604                  * correspond to the start of the TCP/UDP header.
1605                  *
1606                  * BMV: Need to properly handle UDP with zero checksum. Is
1607                  * the IPv4 header checksum implicitly validated?
1608                  */
1609                 csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start);
1610                 *(uint16_t *)(mtodo(m, csum_off)) = csum;
1611                 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1612                 m->m_pkthdr.csum_data = 0xFFFF;
1613                 break;
1614         }
1615 #endif
1616         default:
1617                 sc->vtnet_stats.rx_csum_bad_ethtype++;
1618                 return (1);
1619         }
1620
1621         return (0);
1622 }
1623
1624 static int
1625 vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m,
1626     uint16_t etype, int hoff, struct virtio_net_hdr *hdr)
1627 {
1628         struct vtnet_softc *sc;
1629         int protocol;
1630
1631         sc = rxq->vtnrx_sc;
1632
1633         switch (etype) {
1634 #if defined(INET)
1635         case ETHERTYPE_IP:
1636                 if (__predict_false(m->m_len < hoff + sizeof(struct ip)))
1637                         protocol = IPPROTO_DONE;
1638                 else {
1639                         struct ip *ip = (struct ip *)(m->m_data + hoff);
1640                         protocol = ip->ip_p;
1641                 }
1642                 break;
1643 #endif
1644 #if defined(INET6)
1645         case ETHERTYPE_IPV6:
1646                 if (__predict_false(m->m_len < hoff + sizeof(struct ip6_hdr))
1647                     || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0)
1648                         protocol = IPPROTO_DONE;
1649                 break;
1650 #endif
1651         default:
1652                 protocol = IPPROTO_DONE;
1653                 break;
1654         }
1655
1656         switch (protocol) {
1657         case IPPROTO_TCP:
1658         case IPPROTO_UDP:
1659                 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1660                 m->m_pkthdr.csum_data = 0xFFFF;
1661                 break;
1662         default:
1663                 /*
1664                  * FreeBSD does not support checksum offloading of this
1665                  * protocol. Let the stack re-verify the checksum later
1666                  * if the protocol is supported.
1667                  */
1668 #if 0
1669                 if_printf(sc->vtnet_ifp,
1670                     "%s: checksum offload of unsupported protocol "
1671                     "etype=%#x protocol=%d csum_start=%d csum_offset=%d\n",
1672                     __func__, etype, protocol, hdr->csum_start,
1673                     hdr->csum_offset);
1674 #endif
1675                 break;
1676         }
1677
1678         return (0);
1679 }
1680
1681 static int
1682 vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
1683     struct virtio_net_hdr *hdr)
1684 {
1685         const struct ether_header *eh;
1686         int hoff;
1687         uint16_t etype;
1688
1689         eh = mtod(m, const struct ether_header *);
1690         etype = ntohs(eh->ether_type);
1691         if (etype == ETHERTYPE_VLAN) {
1692                 /* TODO BMV: Handle QinQ. */
1693                 const struct ether_vlan_header *evh =
1694                     mtod(m, const struct ether_vlan_header *);
1695                 etype = ntohs(evh->evl_proto);
1696                 hoff = sizeof(struct ether_vlan_header);
1697         } else
1698                 hoff = sizeof(struct ether_header);
1699
1700         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1701                 return (vtnet_rxq_csum_needs_csum(rxq, m, etype, hoff, hdr));
1702         else /* VIRTIO_NET_HDR_F_DATA_VALID */
1703                 return (vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr));
1704 }
1705
1706 static void
1707 vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
1708 {
1709         struct mbuf *m;
1710
1711         while (--nbufs > 0) {
1712                 m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
1713                 if (m == NULL)
1714                         break;
1715                 vtnet_rxq_discard_buf(rxq, m);
1716         }
1717 }
1718
1719 static void
1720 vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1721 {
1722         int error;
1723
1724         /*
1725          * Requeue the discarded mbuf. This should always be successful
1726          * since it was just dequeued.
1727          */
1728         error = vtnet_rxq_enqueue_buf(rxq, m);
1729         KASSERT(error == 0,
1730             ("%s: cannot requeue discarded mbuf %d", __func__, error));
1731 }
1732
1733 static int
1734 vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
1735 {
1736         struct vtnet_softc *sc;
1737         struct virtqueue *vq;
1738         struct mbuf *m_tail;
1739
1740         sc = rxq->vtnrx_sc;
1741         vq = rxq->vtnrx_vq;
1742         m_tail = m_head;
1743
1744         while (--nbufs > 0) {
1745                 struct mbuf *m;
1746                 int len;
1747
1748                 m = virtqueue_dequeue(vq, &len);
1749                 if (m == NULL) {
1750                         rxq->vtnrx_stats.vrxs_ierrors++;
1751                         goto fail;
1752                 }
1753
1754                 if (vtnet_rxq_new_buf(rxq) != 0) {
1755                         rxq->vtnrx_stats.vrxs_iqdrops++;
1756                         vtnet_rxq_discard_buf(rxq, m);
1757                         if (nbufs > 1)
1758                                 vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1759                         goto fail;
1760                 }
1761
1762                 if (m->m_len < len)
1763                         len = m->m_len;
1764
1765                 m->m_len = len;
1766                 m->m_flags &= ~M_PKTHDR;
1767
1768                 m_head->m_pkthdr.len += len;
1769                 m_tail->m_next = m;
1770                 m_tail = m;
1771         }
1772
1773         return (0);
1774
1775 fail:
1776         sc->vtnet_stats.rx_mergeable_failed++;
1777         m_freem(m_head);
1778
1779         return (1);
1780 }
1781
1782 static void
1783 vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
1784     struct virtio_net_hdr *hdr)
1785 {
1786         struct vtnet_softc *sc;
1787         struct ifnet *ifp;
1788
1789         sc = rxq->vtnrx_sc;
1790         ifp = sc->vtnet_ifp;
1791
1792         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1793                 struct ether_header *eh = mtod(m, struct ether_header *);
1794                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1795                         vtnet_vlan_tag_remove(m);
1796                         /*
1797                          * With the 802.1Q header removed, update the
1798                          * checksum starting location accordingly.
1799                          */
1800                         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1801                                 hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
1802                 }
1803         }
1804
1805         m->m_pkthdr.flowid = rxq->vtnrx_id;
1806         M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
1807
1808         if (hdr->flags &
1809             (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) {
1810                 if (vtnet_rxq_csum(rxq, m, hdr) == 0)
1811                         rxq->vtnrx_stats.vrxs_csum++;
1812                 else
1813                         rxq->vtnrx_stats.vrxs_csum_failed++;
1814         }
1815
1816         rxq->vtnrx_stats.vrxs_ipackets++;
1817         rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
1818
1819         VTNET_RXQ_UNLOCK(rxq);
1820         (*ifp->if_input)(ifp, m);
1821         VTNET_RXQ_LOCK(rxq);
1822 }
1823
1824 static int
1825 vtnet_rxq_eof(struct vtnet_rxq *rxq)
1826 {
1827         struct virtio_net_hdr lhdr, *hdr;
1828         struct vtnet_softc *sc;
1829         struct ifnet *ifp;
1830         struct virtqueue *vq;
1831         int deq, count;
1832
1833         sc = rxq->vtnrx_sc;
1834         vq = rxq->vtnrx_vq;
1835         ifp = sc->vtnet_ifp;
1836         deq = 0;
1837         count = sc->vtnet_rx_process_limit;
1838
1839         VTNET_RXQ_LOCK_ASSERT(rxq);
1840
1841         while (count-- > 0) {
1842                 struct mbuf *m;
1843                 int len, nbufs, adjsz;
1844
1845                 m = virtqueue_dequeue(vq, &len);
1846                 if (m == NULL)
1847                         break;
1848                 deq++;
1849
1850                 if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
1851                         rxq->vtnrx_stats.vrxs_ierrors++;
1852                         vtnet_rxq_discard_buf(rxq, m);
1853                         continue;
1854                 }
1855
1856                 if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) {
1857                         struct virtio_net_hdr_mrg_rxbuf *mhdr =
1858                             mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
1859                         nbufs = vtnet_htog16(sc, mhdr->num_buffers);
1860                         adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1861                 } else if (vtnet_modern(sc)) {
1862                         nbufs = 1; /* num_buffers is always 1 */
1863                         adjsz = sizeof(struct virtio_net_hdr_v1);
1864                 } else {
1865                         nbufs = 1;
1866                         adjsz = sizeof(struct vtnet_rx_header);
1867                         /*
1868                          * Account for our gap between the header and start of
1869                          * data to keep the segments separated.
1870                          */
1871                         len += VTNET_RX_HEADER_PAD;
1872                 }
1873
1874                 if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
1875                         rxq->vtnrx_stats.vrxs_iqdrops++;
1876                         vtnet_rxq_discard_buf(rxq, m);
1877                         if (nbufs > 1)
1878                                 vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1879                         continue;
1880                 }
1881
1882                 m->m_pkthdr.len = len;
1883                 m->m_pkthdr.rcvif = ifp;
1884                 m->m_pkthdr.csum_flags = 0;
1885
1886                 if (nbufs > 1) {
1887                         /* Dequeue the rest of chain. */
1888                         if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
1889                                 continue;
1890                 }
1891
1892                 /*
1893                  * Save an endian swapped version of the header prior to it
1894                  * being stripped. The header is always at the start of the
1895                  * mbuf data. num_buffers was already saved (and not needed)
1896                  * so use the standard header.
1897                  */
1898                 hdr = mtod(m, struct virtio_net_hdr *);
1899                 lhdr.flags = hdr->flags;
1900                 lhdr.gso_type = hdr->gso_type;
1901                 lhdr.hdr_len = vtnet_htog16(sc, hdr->hdr_len);
1902                 lhdr.gso_size = vtnet_htog16(sc, hdr->gso_size);
1903                 lhdr.csum_start = vtnet_htog16(sc, hdr->csum_start);
1904                 lhdr.csum_offset = vtnet_htog16(sc, hdr->csum_offset);
1905                 m_adj(m, adjsz);
1906
1907                 if (PFIL_HOOKED_IN(sc->vtnet_pfil)) {
1908                         pfil_return_t pfil;
1909
1910                         pfil = pfil_run_hooks(sc->vtnet_pfil, &m, ifp, PFIL_IN,
1911                             NULL);
1912                         switch (pfil) {
1913                         case PFIL_REALLOCED:
1914                                 m = pfil_mem2mbuf(m->m_data);
1915                                 break;
1916                         case PFIL_DROPPED:
1917                         case PFIL_CONSUMED:
1918                                 continue;
1919                         default:
1920                                 KASSERT(pfil == PFIL_PASS,
1921                                     ("Filter returned %d!", pfil));
1922                         }
1923                 }
1924
1925                 vtnet_rxq_input(rxq, m, &lhdr);
1926
1927                 /* Must recheck after dropping the Rx lock. */
1928                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1929                         break;
1930         }
1931
1932         if (deq > 0)
1933                 virtqueue_notify(vq);
1934
1935         return (count > 0 ? 0 : EAGAIN);
1936 }
1937
1938 static void
1939 vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries)
1940 {
1941         struct vtnet_softc *sc;
1942         struct ifnet *ifp;
1943         int more;
1944 #ifdef DEV_NETMAP
1945         int nmirq;
1946 #endif /* DEV_NETMAP */
1947
1948         sc = rxq->vtnrx_sc;
1949         ifp = sc->vtnet_ifp;
1950
1951         if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
1952                 /*
1953                  * Ignore this interrupt. Either this is a spurious interrupt
1954                  * or multiqueue without per-VQ MSIX so every queue needs to
1955                  * be polled (a brain dead configuration we could try harder
1956                  * to avoid).
1957                  */
1958                 vtnet_rxq_disable_intr(rxq);
1959                 return;
1960         }
1961
1962         VTNET_RXQ_LOCK(rxq);
1963
1964 #ifdef DEV_NETMAP
1965         /*
1966          * We call netmap_rx_irq() under lock to prevent concurrent calls.
1967          * This is not necessary to serialize the access to the RX vq, but
1968          * rather to avoid races that may happen if this interface is
1969          * attached to a VALE switch, which would cause received packets
1970          * to stall in the RX queue (nm_kr_tryget() could find the kring
1971          * busy when called from netmap_bwrap_intr_notify()).
1972          */
1973         nmirq = netmap_rx_irq(ifp, rxq->vtnrx_id, &more);
1974         if (nmirq != NM_IRQ_PASS) {
1975                 VTNET_RXQ_UNLOCK(rxq);
1976                 if (nmirq == NM_IRQ_RESCHED) {
1977                         taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
1978                 }
1979                 return;
1980         }
1981 #endif /* DEV_NETMAP */
1982
1983 again:
1984         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1985                 VTNET_RXQ_UNLOCK(rxq);
1986                 return;
1987         }
1988
1989         more = vtnet_rxq_eof(rxq);
1990         if (more || vtnet_rxq_enable_intr(rxq) != 0) {
1991                 if (!more)
1992                         vtnet_rxq_disable_intr(rxq);
1993                 /*
1994                  * This is an occasional condition or race (when !more),
1995                  * so retry a few times before scheduling the taskqueue.
1996                  */
1997                 if (tries-- > 0)
1998                         goto again;
1999
2000                 rxq->vtnrx_stats.vrxs_rescheduled++;
2001                 VTNET_RXQ_UNLOCK(rxq);
2002                 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2003         } else
2004                 VTNET_RXQ_UNLOCK(rxq);
2005 }
2006
2007 static void
2008 vtnet_rx_vq_intr(void *xrxq)
2009 {
2010         struct vtnet_rxq *rxq;
2011
2012         rxq = xrxq;
2013         vtnet_rx_vq_process(rxq, VTNET_INTR_DISABLE_RETRIES);
2014 }
2015
2016 static void
2017 vtnet_rxq_tq_intr(void *xrxq, int pending)
2018 {
2019         struct vtnet_rxq *rxq;
2020
2021         rxq = xrxq;
2022         vtnet_rx_vq_process(rxq, 0);
2023 }
2024
2025 static int
2026 vtnet_txq_intr_threshold(struct vtnet_txq *txq)
2027 {
2028         struct vtnet_softc *sc;
2029         int threshold;
2030
2031         sc = txq->vtntx_sc;
2032
2033         /*
2034          * The Tx interrupt is disabled until the queue free count falls
2035          * below our threshold. Completed frames are drained from the Tx
2036          * virtqueue before transmitting new frames and in the watchdog
2037          * callout, so the frequency of Tx interrupts is greatly reduced,
2038          * at the cost of not freeing mbufs as quickly as they otherwise
2039          * would be.
2040          */
2041         threshold = virtqueue_size(txq->vtntx_vq) / 4;
2042
2043         /*
2044          * Without indirect descriptors, leave enough room for the most
2045          * segments we handle.
2046          */
2047         if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 &&
2048             threshold < sc->vtnet_tx_nsegs)
2049                 threshold = sc->vtnet_tx_nsegs;
2050
2051         return (threshold);
2052 }
2053
2054 static int
2055 vtnet_txq_below_threshold(struct vtnet_txq *txq)
2056 {
2057         struct virtqueue *vq;
2058
2059         vq = txq->vtntx_vq;
2060
2061         return (virtqueue_nfree(vq) <= txq->vtntx_intr_threshold);
2062 }
2063
2064 static int
2065 vtnet_txq_notify(struct vtnet_txq *txq)
2066 {
2067         struct virtqueue *vq;
2068
2069         vq = txq->vtntx_vq;
2070
2071         txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
2072         virtqueue_notify(vq);
2073
2074         if (vtnet_txq_enable_intr(txq) == 0)
2075                 return (0);
2076
2077         /*
2078          * Drain frames that were completed since last checked. If this
2079          * causes the queue to go above the threshold, the caller should
2080          * continue transmitting.
2081          */
2082         if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
2083                 virtqueue_disable_intr(vq);
2084                 return (1);
2085         }
2086
2087         return (0);
2088 }
2089
2090 static void
2091 vtnet_txq_free_mbufs(struct vtnet_txq *txq)
2092 {
2093         struct virtqueue *vq;
2094         struct vtnet_tx_header *txhdr;
2095         int last;
2096 #ifdef DEV_NETMAP
2097         struct netmap_kring *kring = netmap_kring_on(NA(txq->vtntx_sc->vtnet_ifp),
2098                                                         txq->vtntx_id, NR_TX);
2099 #else  /* !DEV_NETMAP */
2100         void *kring = NULL;
2101 #endif /* !DEV_NETMAP */
2102
2103         vq = txq->vtntx_vq;
2104         last = 0;
2105
2106         while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
2107                 if (kring == NULL) {
2108                         m_freem(txhdr->vth_mbuf);
2109                         uma_zfree(vtnet_tx_header_zone, txhdr);
2110                 }
2111         }
2112
2113         KASSERT(virtqueue_empty(vq),
2114             ("%s: mbufs remaining in tx queue %p", __func__, txq));
2115 }
2116
2117 /*
2118  * BMV: This can go away once we finally have offsets in the mbuf header.
2119  */
2120 static int
2121 vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, int *etype,
2122     int *proto, int *start)
2123 {
2124         struct vtnet_softc *sc;
2125         struct ether_vlan_header *evh;
2126         int offset;
2127
2128         sc = txq->vtntx_sc;
2129
2130         evh = mtod(m, struct ether_vlan_header *);
2131         if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2132                 /* BMV: We should handle nested VLAN tags too. */
2133                 *etype = ntohs(evh->evl_proto);
2134                 offset = sizeof(struct ether_vlan_header);
2135         } else {
2136                 *etype = ntohs(evh->evl_encap_proto);
2137                 offset = sizeof(struct ether_header);
2138         }
2139
2140         switch (*etype) {
2141 #if defined(INET)
2142         case ETHERTYPE_IP: {
2143                 struct ip *ip, iphdr;
2144                 if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2145                         m_copydata(m, offset, sizeof(struct ip),
2146                             (caddr_t) &iphdr);
2147                         ip = &iphdr;
2148                 } else
2149                         ip = (struct ip *)(m->m_data + offset);
2150                 *proto = ip->ip_p;
2151                 *start = offset + (ip->ip_hl << 2);
2152                 break;
2153         }
2154 #endif
2155 #if defined(INET6)
2156         case ETHERTYPE_IPV6:
2157                 *proto = -1;
2158                 *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2159                 /* Assert the network stack sent us a valid packet. */
2160                 KASSERT(*start > offset,
2161                     ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2162                     *start, offset, *proto));
2163                 break;
2164 #endif
2165         default:
2166                 sc->vtnet_stats.tx_csum_bad_ethtype++;
2167                 return (EINVAL);
2168         }
2169
2170         return (0);
2171 }
2172
2173 static int
2174 vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
2175     int offset, struct virtio_net_hdr *hdr)
2176 {
2177         static struct timeval lastecn;
2178         static int curecn;
2179         struct vtnet_softc *sc;
2180         struct tcphdr *tcp, tcphdr;
2181
2182         sc = txq->vtntx_sc;
2183
2184         if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
2185                 m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
2186                 tcp = &tcphdr;
2187         } else
2188                 tcp = (struct tcphdr *)(m->m_data + offset);
2189
2190         hdr->hdr_len = vtnet_gtoh16(sc, offset + (tcp->th_off << 2));
2191         hdr->gso_size = vtnet_gtoh16(sc, m->m_pkthdr.tso_segsz);
2192         hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
2193             VIRTIO_NET_HDR_GSO_TCPV6;
2194
2195         if (tcp->th_flags & TH_CWR) {
2196                 /*
2197                  * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD,
2198                  * ECN support is not on a per-interface basis, but globally via
2199                  * the net.inet.tcp.ecn.enable sysctl knob. The default is off.
2200                  */
2201                 if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
2202                         if (ppsratecheck(&lastecn, &curecn, 1))
2203                                 if_printf(sc->vtnet_ifp,
2204                                     "TSO with ECN not negotiated with host\n");
2205                         return (ENOTSUP);
2206                 }
2207                 hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
2208         }
2209
2210         txq->vtntx_stats.vtxs_tso++;
2211
2212         return (0);
2213 }
2214
2215 static struct mbuf *
2216 vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
2217     struct virtio_net_hdr *hdr)
2218 {
2219         struct vtnet_softc *sc;
2220         int flags, etype, csum_start, proto, error;
2221
2222         sc = txq->vtntx_sc;
2223         flags = m->m_pkthdr.csum_flags;
2224
2225         error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
2226         if (error)
2227                 goto drop;
2228
2229         if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) ||
2230             (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) {
2231                 /*
2232                  * We could compare the IP protocol vs the CSUM_ flag too,
2233                  * but that really should not be necessary.
2234                  */
2235                 hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
2236                 hdr->csum_start = vtnet_gtoh16(sc, csum_start);
2237                 hdr->csum_offset = vtnet_gtoh16(sc, m->m_pkthdr.csum_data);
2238                 txq->vtntx_stats.vtxs_csum++;
2239         }
2240
2241         if (flags & CSUM_TSO) {
2242                 if (__predict_false(proto != IPPROTO_TCP)) {
2243                         /* Likely failed to correctly parse the mbuf. */
2244                         sc->vtnet_stats.tx_tso_not_tcp++;
2245                         goto drop;
2246                 }
2247
2248                 KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM,
2249                     ("%s: mbuf %p TSO without checksum offload %#x",
2250                     __func__, m, flags));
2251
2252                 error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
2253                 if (error)
2254                         goto drop;
2255         }
2256
2257         return (m);
2258
2259 drop:
2260         m_freem(m);
2261         return (NULL);
2262 }
2263
2264 static int
2265 vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
2266     struct vtnet_tx_header *txhdr)
2267 {
2268         struct vtnet_softc *sc;
2269         struct virtqueue *vq;
2270         struct sglist *sg;
2271         struct mbuf *m;
2272         int error;
2273
2274         sc = txq->vtntx_sc;
2275         vq = txq->vtntx_vq;
2276         sg = txq->vtntx_sg;
2277         m = *m_head;
2278
2279         sglist_reset(sg);
2280         error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
2281         if (error != 0 || sg->sg_nseg != 1) {
2282                 KASSERT(0, ("%s: cannot add header to sglist error %d nseg %d",
2283                     __func__, error, sg->sg_nseg));
2284                 goto fail;
2285         }
2286
2287         error = sglist_append_mbuf(sg, m);
2288         if (error) {
2289                 m = m_defrag(m, M_NOWAIT);
2290                 if (m == NULL)
2291                         goto fail;
2292
2293                 *m_head = m;
2294                 sc->vtnet_stats.tx_defragged++;
2295
2296                 error = sglist_append_mbuf(sg, m);
2297                 if (error)
2298                         goto fail;
2299         }
2300
2301         txhdr->vth_mbuf = m;
2302         error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0);
2303
2304         return (error);
2305
2306 fail:
2307         sc->vtnet_stats.tx_defrag_failed++;
2308         m_freem(*m_head);
2309         *m_head = NULL;
2310
2311         return (ENOBUFS);
2312 }
2313
2314 static int
2315 vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags)
2316 {
2317         struct vtnet_tx_header *txhdr;
2318         struct virtio_net_hdr *hdr;
2319         struct mbuf *m;
2320         int error;
2321
2322         m = *m_head;
2323         M_ASSERTPKTHDR(m);
2324
2325         txhdr = uma_zalloc(vtnet_tx_header_zone, flags | M_ZERO);
2326         if (txhdr == NULL) {
2327                 m_freem(m);
2328                 *m_head = NULL;
2329                 return (ENOMEM);
2330         }
2331
2332         /*
2333          * Always use the non-mergeable header, regardless if mergable headers
2334          * were negotiated, because for transmit num_buffers is always zero.
2335          * The vtnet_hdr_size is used to enqueue the right header size segment.
2336          */
2337         hdr = &txhdr->vth_uhdr.hdr;
2338
2339         if (m->m_flags & M_VLANTAG) {
2340                 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
2341                 if ((*m_head = m) == NULL) {
2342                         error = ENOBUFS;
2343                         goto fail;
2344                 }
2345                 m->m_flags &= ~M_VLANTAG;
2346         }
2347
2348         if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
2349                 m = vtnet_txq_offload(txq, m, hdr);
2350                 if ((*m_head = m) == NULL) {
2351                         error = ENOBUFS;
2352                         goto fail;
2353                 }
2354         }
2355
2356         error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
2357 fail:
2358         if (error)
2359                 uma_zfree(vtnet_tx_header_zone, txhdr);
2360
2361         return (error);
2362 }
2363
2364 #ifdef VTNET_LEGACY_TX
2365
2366 static void
2367 vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
2368 {
2369         struct vtnet_softc *sc;
2370         struct virtqueue *vq;
2371         struct mbuf *m0;
2372         int tries, enq;
2373
2374         sc = txq->vtntx_sc;
2375         vq = txq->vtntx_vq;
2376         tries = 0;
2377
2378         VTNET_TXQ_LOCK_ASSERT(txq);
2379
2380         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2381             sc->vtnet_link_active == 0)
2382                 return;
2383
2384         vtnet_txq_eof(txq);
2385
2386 again:
2387         enq = 0;
2388
2389         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
2390                 if (virtqueue_full(vq))
2391                         break;
2392
2393                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m0);
2394                 if (m0 == NULL)
2395                         break;
2396
2397                 if (vtnet_txq_encap(txq, &m0, M_NOWAIT) != 0) {
2398                         if (m0 != NULL)
2399                                 IFQ_DRV_PREPEND(&ifp->if_snd, m0);
2400                         break;
2401                 }
2402
2403                 enq++;
2404                 ETHER_BPF_MTAP(ifp, m0);
2405         }
2406
2407         if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2408                 if (tries++ < VTNET_NOTIFY_RETRIES)
2409                         goto again;
2410
2411                 txq->vtntx_stats.vtxs_rescheduled++;
2412                 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2413         }
2414 }
2415
2416 static void
2417 vtnet_start(struct ifnet *ifp)
2418 {
2419         struct vtnet_softc *sc;
2420         struct vtnet_txq *txq;
2421
2422         sc = ifp->if_softc;
2423         txq = &sc->vtnet_txqs[0];
2424
2425         VTNET_TXQ_LOCK(txq);
2426         vtnet_start_locked(txq, ifp);
2427         VTNET_TXQ_UNLOCK(txq);
2428 }
2429
2430 #else /* !VTNET_LEGACY_TX */
2431
2432 static int
2433 vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
2434 {
2435         struct vtnet_softc *sc;
2436         struct virtqueue *vq;
2437         struct buf_ring *br;
2438         struct ifnet *ifp;
2439         int enq, tries, error;
2440
2441         sc = txq->vtntx_sc;
2442         vq = txq->vtntx_vq;
2443         br = txq->vtntx_br;
2444         ifp = sc->vtnet_ifp;
2445         tries = 0;
2446         error = 0;
2447
2448         VTNET_TXQ_LOCK_ASSERT(txq);
2449
2450         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2451             sc->vtnet_link_active == 0) {
2452                 if (m != NULL)
2453                         error = drbr_enqueue(ifp, br, m);
2454                 return (error);
2455         }
2456
2457         if (m != NULL) {
2458                 error = drbr_enqueue(ifp, br, m);
2459                 if (error)
2460                         return (error);
2461         }
2462
2463         vtnet_txq_eof(txq);
2464
2465 again:
2466         enq = 0;
2467
2468         while ((m = drbr_peek(ifp, br)) != NULL) {
2469                 if (virtqueue_full(vq)) {
2470                         drbr_putback(ifp, br, m);
2471                         break;
2472                 }
2473
2474                 if (vtnet_txq_encap(txq, &m, M_NOWAIT) != 0) {
2475                         if (m != NULL)
2476                                 drbr_putback(ifp, br, m);
2477                         else
2478                                 drbr_advance(ifp, br);
2479                         break;
2480                 }
2481                 drbr_advance(ifp, br);
2482
2483                 enq++;
2484                 ETHER_BPF_MTAP(ifp, m);
2485         }
2486
2487         if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2488                 if (tries++ < VTNET_NOTIFY_RETRIES)
2489                         goto again;
2490
2491                 txq->vtntx_stats.vtxs_rescheduled++;
2492                 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2493         }
2494
2495         return (0);
2496 }
2497
2498 static int
2499 vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
2500 {
2501         struct vtnet_softc *sc;
2502         struct vtnet_txq *txq;
2503         int i, npairs, error;
2504
2505         sc = ifp->if_softc;
2506         npairs = sc->vtnet_act_vq_pairs;
2507
2508         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
2509                 i = m->m_pkthdr.flowid % npairs;
2510         else
2511                 i = curcpu % npairs;
2512
2513         txq = &sc->vtnet_txqs[i];
2514
2515         if (VTNET_TXQ_TRYLOCK(txq) != 0) {
2516                 error = vtnet_txq_mq_start_locked(txq, m);
2517                 VTNET_TXQ_UNLOCK(txq);
2518         } else {
2519                 error = drbr_enqueue(ifp, txq->vtntx_br, m);
2520                 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
2521         }
2522
2523         return (error);
2524 }
2525
2526 static void
2527 vtnet_txq_tq_deferred(void *xtxq, int pending)
2528 {
2529         struct vtnet_softc *sc;
2530         struct vtnet_txq *txq;
2531
2532         txq = xtxq;
2533         sc = txq->vtntx_sc;
2534
2535         VTNET_TXQ_LOCK(txq);
2536         if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
2537                 vtnet_txq_mq_start_locked(txq, NULL);
2538         VTNET_TXQ_UNLOCK(txq);
2539 }
2540
2541 #endif /* VTNET_LEGACY_TX */
2542
2543 static void
2544 vtnet_txq_start(struct vtnet_txq *txq)
2545 {
2546         struct vtnet_softc *sc;
2547         struct ifnet *ifp;
2548
2549         sc = txq->vtntx_sc;
2550         ifp = sc->vtnet_ifp;
2551
2552 #ifdef VTNET_LEGACY_TX
2553         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2554                 vtnet_start_locked(txq, ifp);
2555 #else
2556         if (!drbr_empty(ifp, txq->vtntx_br))
2557                 vtnet_txq_mq_start_locked(txq, NULL);
2558 #endif
2559 }
2560
2561 static void
2562 vtnet_txq_tq_intr(void *xtxq, int pending)
2563 {
2564         struct vtnet_softc *sc;
2565         struct vtnet_txq *txq;
2566         struct ifnet *ifp;
2567
2568         txq = xtxq;
2569         sc = txq->vtntx_sc;
2570         ifp = sc->vtnet_ifp;
2571
2572         VTNET_TXQ_LOCK(txq);
2573
2574         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2575                 VTNET_TXQ_UNLOCK(txq);
2576                 return;
2577         }
2578
2579         vtnet_txq_eof(txq);
2580         vtnet_txq_start(txq);
2581
2582         VTNET_TXQ_UNLOCK(txq);
2583 }
2584
2585 static int
2586 vtnet_txq_eof(struct vtnet_txq *txq)
2587 {
2588         struct virtqueue *vq;
2589         struct vtnet_tx_header *txhdr;
2590         struct mbuf *m;
2591         int deq;
2592
2593         vq = txq->vtntx_vq;
2594         deq = 0;
2595         VTNET_TXQ_LOCK_ASSERT(txq);
2596
2597         while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
2598                 m = txhdr->vth_mbuf;
2599                 deq++;
2600
2601                 txq->vtntx_stats.vtxs_opackets++;
2602                 txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
2603                 if (m->m_flags & M_MCAST)
2604                         txq->vtntx_stats.vtxs_omcasts++;
2605
2606                 m_freem(m);
2607                 uma_zfree(vtnet_tx_header_zone, txhdr);
2608         }
2609
2610         if (virtqueue_empty(vq))
2611                 txq->vtntx_watchdog = 0;
2612
2613         return (deq);
2614 }
2615
2616 static void
2617 vtnet_tx_vq_intr(void *xtxq)
2618 {
2619         struct vtnet_softc *sc;
2620         struct vtnet_txq *txq;
2621         struct ifnet *ifp;
2622
2623         txq = xtxq;
2624         sc = txq->vtntx_sc;
2625         ifp = sc->vtnet_ifp;
2626
2627         if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
2628                 /*
2629                  * Ignore this interrupt. Either this is a spurious interrupt
2630                  * or multiqueue without per-VQ MSIX so every queue needs to
2631                  * be polled (a brain dead configuration we could try harder
2632                  * to avoid).
2633                  */
2634                 vtnet_txq_disable_intr(txq);
2635                 return;
2636         }
2637
2638 #ifdef DEV_NETMAP
2639         if (netmap_tx_irq(ifp, txq->vtntx_id) != NM_IRQ_PASS)
2640                 return;
2641 #endif /* DEV_NETMAP */
2642
2643         VTNET_TXQ_LOCK(txq);
2644
2645         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2646                 VTNET_TXQ_UNLOCK(txq);
2647                 return;
2648         }
2649
2650         vtnet_txq_eof(txq);
2651         vtnet_txq_start(txq);
2652
2653         VTNET_TXQ_UNLOCK(txq);
2654 }
2655
2656 static void
2657 vtnet_tx_start_all(struct vtnet_softc *sc)
2658 {
2659         struct vtnet_txq *txq;
2660         int i;
2661
2662         VTNET_CORE_LOCK_ASSERT(sc);
2663
2664         for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2665                 txq = &sc->vtnet_txqs[i];
2666
2667                 VTNET_TXQ_LOCK(txq);
2668                 vtnet_txq_start(txq);
2669                 VTNET_TXQ_UNLOCK(txq);
2670         }
2671 }
2672
2673 #ifndef VTNET_LEGACY_TX
2674 static void
2675 vtnet_qflush(struct ifnet *ifp)
2676 {
2677         struct vtnet_softc *sc;
2678         struct vtnet_txq *txq;
2679         struct mbuf *m;
2680         int i;
2681
2682         sc = ifp->if_softc;
2683
2684         for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2685                 txq = &sc->vtnet_txqs[i];
2686
2687                 VTNET_TXQ_LOCK(txq);
2688                 while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
2689                         m_freem(m);
2690                 VTNET_TXQ_UNLOCK(txq);
2691         }
2692
2693         if_qflush(ifp);
2694 }
2695 #endif
2696
2697 static int
2698 vtnet_watchdog(struct vtnet_txq *txq)
2699 {
2700         struct ifnet *ifp;
2701
2702         ifp = txq->vtntx_sc->vtnet_ifp;
2703
2704         VTNET_TXQ_LOCK(txq);
2705         if (txq->vtntx_watchdog == 1) {
2706                 /*
2707                  * Only drain completed frames if the watchdog is about to
2708                  * expire. If any frames were drained, there may be enough
2709                  * free descriptors now available to transmit queued frames.
2710                  * In that case, the timer will immediately be decremented
2711                  * below, but the timeout is generous enough that should not
2712                  * be a problem.
2713                  */
2714                 if (vtnet_txq_eof(txq) != 0)
2715                         vtnet_txq_start(txq);
2716         }
2717
2718         if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
2719                 VTNET_TXQ_UNLOCK(txq);
2720                 return (0);
2721         }
2722         VTNET_TXQ_UNLOCK(txq);
2723
2724         if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
2725         return (1);
2726 }
2727
2728 static void
2729 vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc,
2730     struct vtnet_txq_stats *txacc)
2731 {
2732
2733         bzero(rxacc, sizeof(struct vtnet_rxq_stats));
2734         bzero(txacc, sizeof(struct vtnet_txq_stats));
2735
2736         for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2737                 struct vtnet_rxq_stats *rxst;
2738                 struct vtnet_txq_stats *txst;
2739
2740                 rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
2741                 rxacc->vrxs_ipackets += rxst->vrxs_ipackets;
2742                 rxacc->vrxs_ibytes += rxst->vrxs_ibytes;
2743                 rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops;
2744                 rxacc->vrxs_csum += rxst->vrxs_csum;
2745                 rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed;
2746                 rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled;
2747
2748                 txst = &sc->vtnet_txqs[i].vtntx_stats;
2749                 txacc->vtxs_opackets += txst->vtxs_opackets;
2750                 txacc->vtxs_obytes += txst->vtxs_obytes;
2751                 txacc->vtxs_csum += txst->vtxs_csum;
2752                 txacc->vtxs_tso += txst->vtxs_tso;
2753                 txacc->vtxs_rescheduled += txst->vtxs_rescheduled;
2754         }
2755 }
2756
2757 static uint64_t
2758 vtnet_get_counter(if_t ifp, ift_counter cnt)
2759 {
2760         struct vtnet_softc *sc;
2761         struct vtnet_rxq_stats rxaccum;
2762         struct vtnet_txq_stats txaccum;
2763
2764         sc = if_getsoftc(ifp);
2765         vtnet_accum_stats(sc, &rxaccum, &txaccum);
2766
2767         switch (cnt) {
2768         case IFCOUNTER_IPACKETS:
2769                 return (rxaccum.vrxs_ipackets);
2770         case IFCOUNTER_IQDROPS:
2771                 return (rxaccum.vrxs_iqdrops);
2772         case IFCOUNTER_IERRORS:
2773                 return (rxaccum.vrxs_ierrors);
2774         case IFCOUNTER_OPACKETS:
2775                 return (txaccum.vtxs_opackets);
2776 #ifndef VTNET_LEGACY_TX
2777         case IFCOUNTER_OBYTES:
2778                 return (txaccum.vtxs_obytes);
2779         case IFCOUNTER_OMCASTS:
2780                 return (txaccum.vtxs_omcasts);
2781 #endif
2782         default:
2783                 return (if_get_counter_default(ifp, cnt));
2784         }
2785 }
2786
2787 static void
2788 vtnet_tick(void *xsc)
2789 {
2790         struct vtnet_softc *sc;
2791         struct ifnet *ifp;
2792         int i, timedout;
2793
2794         sc = xsc;
2795         ifp = sc->vtnet_ifp;
2796         timedout = 0;
2797
2798         VTNET_CORE_LOCK_ASSERT(sc);
2799
2800         for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
2801                 timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
2802
2803         if (timedout != 0) {
2804                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2805                 vtnet_init_locked(sc, 0);
2806         } else
2807                 callout_schedule(&sc->vtnet_tick_ch, hz);
2808 }
2809
2810 static void
2811 vtnet_start_taskqueues(struct vtnet_softc *sc)
2812 {
2813         device_t dev;
2814         struct vtnet_rxq *rxq;
2815         struct vtnet_txq *txq;
2816         int i, error;
2817
2818         dev = sc->vtnet_dev;
2819
2820         /*
2821          * Errors here are very difficult to recover from - we cannot
2822          * easily fail because, if this is during boot, we will hang
2823          * when freeing any successfully started taskqueues because
2824          * the scheduler isn't up yet.
2825          *
2826          * Most drivers just ignore the return value - it only fails
2827          * with ENOMEM so an error is not likely.
2828          */
2829         for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2830                 rxq = &sc->vtnet_rxqs[i];
2831                 error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
2832                     "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
2833                 if (error) {
2834                         device_printf(dev, "failed to start rx taskq %d\n",
2835                             rxq->vtnrx_id);
2836                 }
2837
2838                 txq = &sc->vtnet_txqs[i];
2839                 error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
2840                     "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
2841                 if (error) {
2842                         device_printf(dev, "failed to start tx taskq %d\n",
2843                             txq->vtntx_id);
2844                 }
2845         }
2846 }
2847
2848 static void
2849 vtnet_free_taskqueues(struct vtnet_softc *sc)
2850 {
2851         struct vtnet_rxq *rxq;
2852         struct vtnet_txq *txq;
2853         int i;
2854
2855         for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2856                 rxq = &sc->vtnet_rxqs[i];
2857                 if (rxq->vtnrx_tq != NULL) {
2858                         taskqueue_free(rxq->vtnrx_tq);
2859                         rxq->vtnrx_tq = NULL;
2860                 }
2861
2862                 txq = &sc->vtnet_txqs[i];
2863                 if (txq->vtntx_tq != NULL) {
2864                         taskqueue_free(txq->vtntx_tq);
2865                         txq->vtntx_tq = NULL;
2866                 }
2867         }
2868 }
2869
2870 static void
2871 vtnet_drain_taskqueues(struct vtnet_softc *sc)
2872 {
2873         struct vtnet_rxq *rxq;
2874         struct vtnet_txq *txq;
2875         int i;
2876
2877         for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2878                 rxq = &sc->vtnet_rxqs[i];
2879                 if (rxq->vtnrx_tq != NULL)
2880                         taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2881
2882                 txq = &sc->vtnet_txqs[i];
2883                 if (txq->vtntx_tq != NULL) {
2884                         taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
2885 #ifndef VTNET_LEGACY_TX
2886                         taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
2887 #endif
2888                 }
2889         }
2890 }
2891
2892 static void
2893 vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
2894 {
2895         struct vtnet_rxq *rxq;
2896         struct vtnet_txq *txq;
2897         int i;
2898
2899         for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2900                 rxq = &sc->vtnet_rxqs[i];
2901                 vtnet_rxq_free_mbufs(rxq);
2902
2903                 txq = &sc->vtnet_txqs[i];
2904                 vtnet_txq_free_mbufs(txq);
2905         }
2906 }
2907
2908 static void
2909 vtnet_stop_rendezvous(struct vtnet_softc *sc)
2910 {
2911         struct vtnet_rxq *rxq;
2912         struct vtnet_txq *txq;
2913         int i;
2914
2915         /*
2916          * Lock and unlock the per-queue mutex so we known the stop
2917          * state is visible. Doing only the active queues should be
2918          * sufficient, but it does not cost much extra to do all the
2919          * queues. Note we hold the core mutex here too.
2920          */
2921         for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2922                 rxq = &sc->vtnet_rxqs[i];
2923                 VTNET_RXQ_LOCK(rxq);
2924                 VTNET_RXQ_UNLOCK(rxq);
2925
2926                 txq = &sc->vtnet_txqs[i];
2927                 VTNET_TXQ_LOCK(txq);
2928                 VTNET_TXQ_UNLOCK(txq);
2929         }
2930 }
2931
2932 static void
2933 vtnet_stop(struct vtnet_softc *sc)
2934 {
2935         device_t dev;
2936         struct ifnet *ifp;
2937
2938         dev = sc->vtnet_dev;
2939         ifp = sc->vtnet_ifp;
2940
2941         VTNET_CORE_LOCK_ASSERT(sc);
2942
2943         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2944         sc->vtnet_link_active = 0;
2945         callout_stop(&sc->vtnet_tick_ch);
2946
2947         /* Only advisory. */
2948         vtnet_disable_interrupts(sc);
2949
2950 #ifdef DEV_NETMAP
2951         /* Stop any pending txsync/rxsync and disable them. */
2952         netmap_disable_all_rings(ifp);
2953 #endif /* DEV_NETMAP */
2954
2955         /*
2956          * Stop the host adapter. This resets it to the pre-initialized
2957          * state. It will not generate any interrupts until after it is
2958          * reinitialized.
2959          */
2960         virtio_stop(dev);
2961         vtnet_stop_rendezvous(sc);
2962
2963         /* Free any mbufs left in the virtqueues. */
2964         vtnet_drain_rxtx_queues(sc);
2965 }
2966
2967 static int
2968 vtnet_virtio_reinit(struct vtnet_softc *sc)
2969 {
2970         device_t dev;
2971         struct ifnet *ifp;
2972         uint64_t features;
2973         int mask, error;
2974
2975         dev = sc->vtnet_dev;
2976         ifp = sc->vtnet_ifp;
2977         features = sc->vtnet_features;
2978
2979         mask = 0;
2980 #if defined(INET)
2981         mask |= IFCAP_RXCSUM;
2982 #endif
2983 #if defined (INET6)
2984         mask |= IFCAP_RXCSUM_IPV6;
2985 #endif
2986
2987         /*
2988          * Re-negotiate with the host, removing any disabled receive
2989          * features. Transmit features are disabled only on our side
2990          * via if_capenable and if_hwassist.
2991          */
2992
2993         if (ifp->if_capabilities & mask) {
2994                 /*
2995                  * We require both IPv4 and IPv6 offloading to be enabled
2996                  * in order to negotiated it: VirtIO does not distinguish
2997                  * between the two.
2998                  */
2999                 if ((ifp->if_capenable & mask) != mask)
3000                         features &= ~VIRTIO_NET_F_GUEST_CSUM;
3001         }
3002
3003         if (ifp->if_capabilities & IFCAP_LRO) {
3004                 if ((ifp->if_capenable & IFCAP_LRO) == 0)
3005                         features &= ~VTNET_LRO_FEATURES;
3006         }
3007
3008         if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) {
3009                 if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
3010                         features &= ~VIRTIO_NET_F_CTRL_VLAN;
3011         }
3012
3013         error = virtio_reinit(dev, features);
3014         if (error)
3015                 device_printf(dev, "virtio reinit error %d\n", error);
3016
3017         return (error);
3018 }
3019
3020 static void
3021 vtnet_init_rx_filters(struct vtnet_softc *sc)
3022 {
3023         struct ifnet *ifp;
3024
3025         ifp = sc->vtnet_ifp;
3026
3027         if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
3028                 /* Restore promiscuous and all-multicast modes. */
3029                 vtnet_rx_filter(sc);
3030                 /* Restore filtered MAC addresses. */
3031                 vtnet_rx_filter_mac(sc);
3032         }
3033
3034         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
3035                 vtnet_rx_filter_vlan(sc);
3036 }
3037
3038 static int
3039 vtnet_init_rx_queues(struct vtnet_softc *sc)
3040 {
3041         device_t dev;
3042         struct ifnet *ifp;
3043         struct vtnet_rxq *rxq;
3044         int i, clustersz, error;
3045
3046         dev = sc->vtnet_dev;
3047         ifp = sc->vtnet_ifp;
3048
3049         clustersz = vtnet_rx_cluster_size(sc, ifp->if_mtu);
3050         sc->vtnet_rx_clustersz = clustersz;
3051
3052         if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) {
3053                 sc->vtnet_rx_nmbufs = howmany(sizeof(struct vtnet_rx_header) +
3054                     VTNET_MAX_RX_SIZE, clustersz);
3055                 KASSERT(sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs,
3056                     ("%s: too many rx mbufs %d for %d segments", __func__,
3057                     sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
3058         } else
3059                 sc->vtnet_rx_nmbufs = 1;
3060
3061         for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
3062                 rxq = &sc->vtnet_rxqs[i];
3063
3064                 /* Hold the lock to satisfy asserts. */
3065                 VTNET_RXQ_LOCK(rxq);
3066                 error = vtnet_rxq_populate(rxq);
3067                 VTNET_RXQ_UNLOCK(rxq);
3068
3069                 if (error) {
3070                         device_printf(dev, "cannot populate Rx queue %d\n", i);
3071                         return (error);
3072                 }
3073         }
3074
3075         return (0);
3076 }
3077
3078 static int
3079 vtnet_init_tx_queues(struct vtnet_softc *sc)
3080 {
3081         struct vtnet_txq *txq;
3082         int i;
3083
3084         for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
3085                 txq = &sc->vtnet_txqs[i];
3086                 txq->vtntx_watchdog = 0;
3087                 txq->vtntx_intr_threshold = vtnet_txq_intr_threshold(txq);
3088 #ifdef DEV_NETMAP
3089                 netmap_reset(NA(sc->vtnet_ifp), NR_TX, i, 0);
3090 #endif /* DEV_NETMAP */
3091         }
3092
3093         return (0);
3094 }
3095
3096 static int
3097 vtnet_init_rxtx_queues(struct vtnet_softc *sc)
3098 {
3099         int error;
3100
3101         error = vtnet_init_rx_queues(sc);
3102         if (error)
3103                 return (error);
3104
3105         error = vtnet_init_tx_queues(sc);
3106         if (error)
3107                 return (error);
3108
3109         return (0);
3110 }
3111
3112 static void
3113 vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
3114 {
3115         device_t dev;
3116         int npairs;
3117
3118         dev = sc->vtnet_dev;
3119
3120         if ((sc->vtnet_flags & VTNET_FLAG_MQ) == 0) {
3121                 sc->vtnet_act_vq_pairs = 1;
3122                 return;
3123         }
3124
3125         npairs = sc->vtnet_requested_vq_pairs;
3126
3127         if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
3128                 device_printf(dev,
3129                     "cannot set active queue pairs to %d\n", npairs);
3130                 npairs = 1;
3131         }
3132
3133         sc->vtnet_act_vq_pairs = npairs;
3134 }
3135
3136 static int
3137 vtnet_reinit(struct vtnet_softc *sc)
3138 {
3139         struct ifnet *ifp;
3140         int error;
3141
3142         ifp = sc->vtnet_ifp;
3143
3144         /* Use the current MAC address. */
3145         bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
3146         vtnet_set_macaddr(sc);
3147
3148         vtnet_set_active_vq_pairs(sc);
3149
3150         ifp->if_hwassist = 0;
3151         if (ifp->if_capenable & IFCAP_TXCSUM)
3152                 ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
3153         if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
3154                 ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6;
3155         if (ifp->if_capenable & IFCAP_TSO4)
3156                 ifp->if_hwassist |= CSUM_IP_TSO;
3157         if (ifp->if_capenable & IFCAP_TSO6)
3158                 ifp->if_hwassist |= CSUM_IP6_TSO;
3159
3160         if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
3161                 vtnet_init_rx_filters(sc);
3162
3163         error = vtnet_init_rxtx_queues(sc);
3164         if (error)
3165                 return (error);
3166
3167         vtnet_enable_interrupts(sc);
3168         ifp->if_drv_flags |= IFF_DRV_RUNNING;
3169
3170         return (0);
3171 }
3172
3173 static void
3174 vtnet_init_locked(struct vtnet_softc *sc, int init_mode)
3175 {
3176         device_t dev;
3177         struct ifnet *ifp;
3178
3179         dev = sc->vtnet_dev;
3180         ifp = sc->vtnet_ifp;
3181
3182         VTNET_CORE_LOCK_ASSERT(sc);
3183
3184         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3185                 return;
3186
3187         vtnet_stop(sc);
3188
3189 #ifdef DEV_NETMAP
3190         /* Once stopped we can update the netmap flags, if necessary. */
3191         switch (init_mode) {
3192         case VTNET_INIT_NETMAP_ENTER:
3193                 nm_set_native_flags(NA(ifp));
3194                 break;
3195         case VTNET_INIT_NETMAP_EXIT:
3196                 nm_clear_native_flags(NA(ifp));
3197                 break;
3198         }
3199 #endif /* DEV_NETMAP */
3200
3201         /* Reinitialize with the host. */
3202         if (vtnet_virtio_reinit(sc) != 0)
3203                 goto fail;
3204
3205         if (vtnet_reinit(sc) != 0)
3206                 goto fail;
3207
3208         virtio_reinit_complete(dev);
3209
3210         vtnet_update_link_status(sc);
3211         callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
3212
3213 #ifdef DEV_NETMAP
3214         /* Re-enable txsync/rxsync. */
3215         netmap_enable_all_rings(ifp);
3216 #endif /* DEV_NETMAP */
3217
3218         return;
3219
3220 fail:
3221         vtnet_stop(sc);
3222 }
3223
3224 static void
3225 vtnet_init(void *xsc)
3226 {
3227         struct vtnet_softc *sc;
3228
3229         sc = xsc;
3230
3231         VTNET_CORE_LOCK(sc);
3232         vtnet_init_locked(sc, 0);
3233         VTNET_CORE_UNLOCK(sc);
3234 }
3235
3236 static void
3237 vtnet_free_ctrl_vq(struct vtnet_softc *sc)
3238 {
3239
3240         /*
3241          * The control virtqueue is only polled and therefore it should
3242          * already be empty.
3243          */
3244         KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq),
3245             ("%s: ctrl vq %p not empty", __func__, sc->vtnet_ctrl_vq));
3246 }
3247
3248 static void
3249 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
3250     struct sglist *sg, int readable, int writable)
3251 {
3252         struct virtqueue *vq;
3253
3254         vq = sc->vtnet_ctrl_vq;
3255
3256         VTNET_CORE_LOCK_ASSERT(sc);
3257         MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ);
3258
3259         if (!virtqueue_empty(vq))
3260                 return;
3261
3262         /*
3263          * Poll for the response, but the command is likely completed before
3264          * returning from the notify.
3265          */
3266         if (virtqueue_enqueue(vq, cookie, sg, readable, writable) == 0)  {
3267                 virtqueue_notify(vq);
3268                 virtqueue_poll(vq, NULL);
3269         }
3270 }
3271
3272 static int
3273 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
3274 {
3275         struct sglist_seg segs[3];
3276         struct sglist sg;
3277         struct {
3278                 struct virtio_net_ctrl_hdr hdr __aligned(2);
3279                 uint8_t pad1;
3280                 uint8_t addr[ETHER_ADDR_LEN] __aligned(8);
3281                 uint8_t pad2;
3282                 uint8_t ack;
3283         } s;
3284         int error;
3285
3286         error = 0;
3287         MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_MAC);
3288
3289         s.hdr.class = VIRTIO_NET_CTRL_MAC;
3290         s.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
3291         bcopy(hwaddr, &s.addr[0], ETHER_ADDR_LEN);
3292         s.ack = VIRTIO_NET_ERR;
3293
3294         sglist_init(&sg, nitems(segs), segs);
3295         error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3296         error |= sglist_append(&sg, &s.addr[0], ETHER_ADDR_LEN);
3297         error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3298         MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3299
3300         if (error == 0)
3301                 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3302
3303         return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3304 }
3305
3306 static int
3307 vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
3308 {
3309         struct sglist_seg segs[3];
3310         struct sglist sg;
3311         struct {
3312                 struct virtio_net_ctrl_hdr hdr __aligned(2);
3313                 uint8_t pad1;
3314                 struct virtio_net_ctrl_mq mq __aligned(2);
3315                 uint8_t pad2;
3316                 uint8_t ack;
3317         } s;
3318         int error;
3319
3320         error = 0;
3321         MPASS(sc->vtnet_flags & VTNET_FLAG_MQ);
3322
3323         s.hdr.class = VIRTIO_NET_CTRL_MQ;
3324         s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
3325         s.mq.virtqueue_pairs = vtnet_gtoh16(sc, npairs);
3326         s.ack = VIRTIO_NET_ERR;
3327
3328         sglist_init(&sg, nitems(segs), segs);
3329         error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3330         error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
3331         error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3332         MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3333
3334         if (error == 0)
3335                 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3336
3337         return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3338 }
3339
3340 static int
3341 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, uint8_t cmd, int on)
3342 {
3343         struct sglist_seg segs[3];
3344         struct sglist sg;
3345         struct {
3346                 struct virtio_net_ctrl_hdr hdr __aligned(2);
3347                 uint8_t pad1;
3348                 uint8_t onoff;
3349                 uint8_t pad2;
3350                 uint8_t ack;
3351         } s;
3352         int error;
3353
3354         error = 0;
3355         MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX);
3356
3357         s.hdr.class = VIRTIO_NET_CTRL_RX;
3358         s.hdr.cmd = cmd;
3359         s.onoff = !!on;
3360         s.ack = VIRTIO_NET_ERR;
3361
3362         sglist_init(&sg, nitems(segs), segs);
3363         error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3364         error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
3365         error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3366         MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3367
3368         if (error == 0)
3369                 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3370
3371         return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3372 }
3373
3374 static int
3375 vtnet_set_promisc(struct vtnet_softc *sc, int on)
3376 {
3377         return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
3378 }
3379
3380 static int
3381 vtnet_set_allmulti(struct vtnet_softc *sc, int on)
3382 {
3383         return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
3384 }
3385
3386 static void
3387 vtnet_rx_filter(struct vtnet_softc *sc)
3388 {
3389         device_t dev;
3390         struct ifnet *ifp;
3391
3392         dev = sc->vtnet_dev;
3393         ifp = sc->vtnet_ifp;
3394
3395         VTNET_CORE_LOCK_ASSERT(sc);
3396
3397         if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0) {
3398                 device_printf(dev, "cannot %s promiscuous mode\n",
3399                     ifp->if_flags & IFF_PROMISC ? "enable" : "disable");
3400         }
3401
3402         if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0) {
3403                 device_printf(dev, "cannot %s all-multicast mode\n",
3404                     ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable");
3405         }
3406 }
3407
3408 static u_int
3409 vtnet_copy_ifaddr(void *arg, struct sockaddr_dl *sdl, u_int ucnt)
3410 {
3411         struct vtnet_softc *sc = arg;
3412
3413         if (memcmp(LLADDR(sdl), sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
3414                 return (0);
3415
3416         if (ucnt < VTNET_MAX_MAC_ENTRIES)
3417                 bcopy(LLADDR(sdl),
3418                     &sc->vtnet_mac_filter->vmf_unicast.macs[ucnt],
3419                     ETHER_ADDR_LEN);
3420
3421         return (1);
3422 }
3423
3424 static u_int
3425 vtnet_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int mcnt)
3426 {
3427         struct vtnet_mac_filter *filter = arg;
3428
3429         if (mcnt < VTNET_MAX_MAC_ENTRIES)
3430                 bcopy(LLADDR(sdl), &filter->vmf_multicast.macs[mcnt],
3431                     ETHER_ADDR_LEN);
3432
3433         return (1);
3434 }
3435
3436 static void
3437 vtnet_rx_filter_mac(struct vtnet_softc *sc)
3438 {
3439         struct virtio_net_ctrl_hdr hdr __aligned(2);
3440         struct vtnet_mac_filter *filter;
3441         struct sglist_seg segs[4];
3442         struct sglist sg;
3443         struct ifnet *ifp;
3444         bool promisc, allmulti;
3445         u_int ucnt, mcnt;
3446         int error;
3447         uint8_t ack;
3448
3449         ifp = sc->vtnet_ifp;
3450         filter = sc->vtnet_mac_filter;
3451         error = 0;
3452
3453         MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX);
3454         VTNET_CORE_LOCK_ASSERT(sc);
3455
3456         /* Unicast MAC addresses: */
3457         ucnt = if_foreach_lladdr(ifp, vtnet_copy_ifaddr, sc);
3458         promisc = (ucnt > VTNET_MAX_MAC_ENTRIES);
3459
3460         if (promisc) {
3461                 ucnt = 0;
3462                 if_printf(ifp, "more than %d MAC addresses assigned, "
3463                     "falling back to promiscuous mode\n",
3464                     VTNET_MAX_MAC_ENTRIES);
3465         }
3466
3467         /* Multicast MAC addresses: */
3468         mcnt = if_foreach_llmaddr(ifp, vtnet_copy_maddr, filter);
3469         allmulti = (mcnt > VTNET_MAX_MAC_ENTRIES);
3470
3471         if (allmulti) {
3472                 mcnt = 0;
3473                 if_printf(ifp, "more than %d multicast MAC addresses "
3474                     "assigned, falling back to all-multicast mode\n",
3475                     VTNET_MAX_MAC_ENTRIES);
3476         }
3477
3478         if (promisc && allmulti)
3479                 goto out;
3480
3481         filter->vmf_unicast.nentries = vtnet_gtoh32(sc, ucnt);
3482         filter->vmf_multicast.nentries = vtnet_gtoh32(sc, mcnt);
3483
3484         hdr.class = VIRTIO_NET_CTRL_MAC;
3485         hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
3486         ack = VIRTIO_NET_ERR;
3487
3488         sglist_init(&sg, nitems(segs), segs);
3489         error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3490         error |= sglist_append(&sg, &filter->vmf_unicast,
3491             sizeof(uint32_t) + ucnt * ETHER_ADDR_LEN);
3492         error |= sglist_append(&sg, &filter->vmf_multicast,
3493             sizeof(uint32_t) + mcnt * ETHER_ADDR_LEN);
3494         error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3495         MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3496
3497         if (error == 0)
3498                 vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3499         if (ack != VIRTIO_NET_OK)
3500                 if_printf(ifp, "error setting host MAC filter table\n");
3501
3502 out:
3503         if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0)
3504                 if_printf(ifp, "cannot enable promiscuous mode\n");
3505         if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0)
3506                 if_printf(ifp, "cannot enable all-multicast mode\n");
3507 }
3508
3509 static int
3510 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3511 {
3512         struct sglist_seg segs[3];
3513         struct sglist sg;
3514         struct {
3515                 struct virtio_net_ctrl_hdr hdr __aligned(2);
3516                 uint8_t pad1;
3517                 uint16_t tag __aligned(2);
3518                 uint8_t pad2;
3519                 uint8_t ack;
3520         } s;
3521         int error;
3522
3523         error = 0;
3524         MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER);
3525
3526         s.hdr.class = VIRTIO_NET_CTRL_VLAN;
3527         s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
3528         s.tag = vtnet_gtoh16(sc, tag);
3529         s.ack = VIRTIO_NET_ERR;
3530
3531         sglist_init(&sg, nitems(segs), segs);
3532         error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3533         error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
3534         error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3535         MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3536
3537         if (error == 0)
3538                 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3539
3540         return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3541 }
3542
3543 static void
3544 vtnet_rx_filter_vlan(struct vtnet_softc *sc)
3545 {
3546         int i, bit;
3547         uint32_t w;
3548         uint16_t tag;
3549
3550         MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER);
3551         VTNET_CORE_LOCK_ASSERT(sc);
3552
3553         /* Enable the filter for each configured VLAN. */
3554         for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
3555                 w = sc->vtnet_vlan_filter[i];
3556
3557                 while ((bit = ffs(w) - 1) != -1) {
3558                         w &= ~(1 << bit);
3559                         tag = sizeof(w) * CHAR_BIT * i + bit;
3560
3561                         if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
3562                                 device_printf(sc->vtnet_dev,
3563                                     "cannot enable VLAN %d filter\n", tag);
3564                         }
3565                 }
3566         }
3567 }
3568
3569 static void
3570 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3571 {
3572         struct ifnet *ifp;
3573         int idx, bit;
3574
3575         ifp = sc->vtnet_ifp;
3576         idx = (tag >> 5) & 0x7F;
3577         bit = tag & 0x1F;
3578
3579         if (tag == 0 || tag > 4095)
3580                 return;
3581
3582         VTNET_CORE_LOCK(sc);
3583
3584         if (add)
3585                 sc->vtnet_vlan_filter[idx] |= (1 << bit);
3586         else
3587                 sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
3588
3589         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER &&
3590             ifp->if_drv_flags & IFF_DRV_RUNNING &&
3591             vtnet_exec_vlan_filter(sc, add, tag) != 0) {
3592                 device_printf(sc->vtnet_dev,
3593                     "cannot %s VLAN %d %s the host filter table\n",
3594                     add ? "add" : "remove", tag, add ? "to" : "from");
3595         }
3596
3597         VTNET_CORE_UNLOCK(sc);
3598 }
3599
3600 static void
3601 vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3602 {
3603
3604         if (ifp->if_softc != arg)
3605                 return;
3606
3607         vtnet_update_vlan_filter(arg, 1, tag);
3608 }
3609
3610 static void
3611 vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3612 {
3613
3614         if (ifp->if_softc != arg)
3615                 return;
3616
3617         vtnet_update_vlan_filter(arg, 0, tag);
3618 }
3619
3620 static void
3621 vtnet_update_speed_duplex(struct vtnet_softc *sc)
3622 {
3623         device_t dev;
3624         struct ifnet *ifp;
3625         uint32_t speed;
3626
3627         dev = sc->vtnet_dev;
3628         ifp = sc->vtnet_ifp;
3629
3630         /* BMV: Ignore duplex. */
3631         if ((sc->vtnet_features & VIRTIO_NET_F_SPEED_DUPLEX) == 0)
3632                 speed = -1;
3633         else
3634                 speed = virtio_read_dev_config_4(dev,
3635                     offsetof(struct virtio_net_config, speed));
3636
3637         if (speed != -1)
3638                 ifp->if_baudrate = IF_Mbps(speed);
3639 }
3640
3641 static int
3642 vtnet_is_link_up(struct vtnet_softc *sc)
3643 {
3644         device_t dev;
3645         struct ifnet *ifp;
3646         uint16_t status;
3647
3648         dev = sc->vtnet_dev;
3649         ifp = sc->vtnet_ifp;
3650
3651         if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0)
3652                 status = VIRTIO_NET_S_LINK_UP;
3653         else
3654                 status = virtio_read_dev_config_2(dev,
3655                     offsetof(struct virtio_net_config, status));
3656
3657         return ((status & VIRTIO_NET_S_LINK_UP) != 0);
3658 }
3659
3660 static void
3661 vtnet_update_link_status(struct vtnet_softc *sc)
3662 {
3663         struct ifnet *ifp;
3664         int link;
3665
3666         ifp = sc->vtnet_ifp;
3667         VTNET_CORE_LOCK_ASSERT(sc);
3668         link = vtnet_is_link_up(sc);
3669
3670         /* Notify if the link status has changed. */
3671         if (link != 0 && sc->vtnet_link_active == 0) {
3672                 vtnet_update_speed_duplex(sc);
3673                 sc->vtnet_link_active = 1;
3674                 if_link_state_change(ifp, LINK_STATE_UP);
3675         } else if (link == 0 && sc->vtnet_link_active != 0) {
3676                 sc->vtnet_link_active = 0;
3677                 if_link_state_change(ifp, LINK_STATE_DOWN);
3678         }
3679 }
3680
3681 static int
3682 vtnet_ifmedia_upd(struct ifnet *ifp)
3683 {
3684         return (EOPNOTSUPP);
3685 }
3686
3687 static void
3688 vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
3689 {
3690         struct vtnet_softc *sc;
3691
3692         sc = ifp->if_softc;
3693
3694         ifmr->ifm_status = IFM_AVALID;
3695         ifmr->ifm_active = IFM_ETHER;
3696
3697         VTNET_CORE_LOCK(sc);
3698         if (vtnet_is_link_up(sc) != 0) {
3699                 ifmr->ifm_status |= IFM_ACTIVE;
3700                 ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
3701         } else
3702                 ifmr->ifm_active |= IFM_NONE;
3703         VTNET_CORE_UNLOCK(sc);
3704 }
3705
3706 static void
3707 vtnet_get_macaddr(struct vtnet_softc *sc)
3708 {
3709
3710         if (sc->vtnet_flags & VTNET_FLAG_MAC) {
3711                 virtio_read_device_config_array(sc->vtnet_dev,
3712                     offsetof(struct virtio_net_config, mac),
3713                     &sc->vtnet_hwaddr[0], sizeof(uint8_t), ETHER_ADDR_LEN);
3714         } else {
3715                 /* Generate a random locally administered unicast address. */
3716                 sc->vtnet_hwaddr[0] = 0xB2;
3717                 arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
3718         }
3719 }
3720
3721 static void
3722 vtnet_set_macaddr(struct vtnet_softc *sc)
3723 {
3724         int error;
3725
3726         if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
3727                 error = vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr);
3728                 if (error)
3729                         if_printf(sc->vtnet_ifp, "unable to set MAC address\n");
3730                 return;
3731         }
3732
3733         /* MAC in config is read-only in modern VirtIO. */
3734         if (!vtnet_modern(sc) && sc->vtnet_flags & VTNET_FLAG_MAC) {
3735                 for (int i = 0; i < ETHER_ADDR_LEN; i++) {
3736                         virtio_write_dev_config_1(sc->vtnet_dev,
3737                             offsetof(struct virtio_net_config, mac) + i,
3738                             sc->vtnet_hwaddr[i]);
3739                 }
3740         }
3741 }
3742
3743 static void
3744 vtnet_attached_set_macaddr(struct vtnet_softc *sc)
3745 {
3746
3747         /* Assign MAC address if it was generated. */
3748         if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0)
3749                 vtnet_set_macaddr(sc);
3750 }
3751
3752 static void
3753 vtnet_vlan_tag_remove(struct mbuf *m)
3754 {
3755         struct ether_vlan_header *evh;
3756
3757         evh = mtod(m, struct ether_vlan_header *);
3758         m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
3759         m->m_flags |= M_VLANTAG;
3760
3761         /* Strip the 802.1Q header. */
3762         bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
3763             ETHER_HDR_LEN - ETHER_TYPE_LEN);
3764         m_adj(m, ETHER_VLAN_ENCAP_LEN);
3765 }
3766
3767 static void
3768 vtnet_set_rx_process_limit(struct vtnet_softc *sc)
3769 {
3770         int limit;
3771
3772         limit = vtnet_tunable_int(sc, "rx_process_limit",
3773             vtnet_rx_process_limit);
3774         if (limit < 0)
3775                 limit = INT_MAX;
3776         sc->vtnet_rx_process_limit = limit;
3777 }
3778
3779 static void
3780 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
3781     struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
3782 {
3783         struct sysctl_oid *node;
3784         struct sysctl_oid_list *list;
3785         struct vtnet_rxq_stats *stats;
3786         char namebuf[16];
3787
3788         snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
3789         node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
3790             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
3791         list = SYSCTL_CHILDREN(node);
3792
3793         stats = &rxq->vtnrx_stats;
3794
3795         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3796             &stats->vrxs_ipackets, "Receive packets");
3797         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3798             &stats->vrxs_ibytes, "Receive bytes");
3799         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3800             &stats->vrxs_iqdrops, "Receive drops");
3801         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3802             &stats->vrxs_ierrors, "Receive errors");
3803         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3804             &stats->vrxs_csum, "Receive checksum offloaded");
3805         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
3806             &stats->vrxs_csum_failed, "Receive checksum offload failed");
3807         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
3808             &stats->vrxs_rescheduled,
3809             "Receive interrupt handler rescheduled");
3810 }
3811
3812 static void
3813 vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
3814     struct sysctl_oid_list *child, struct vtnet_txq *txq)
3815 {
3816         struct sysctl_oid *node;
3817         struct sysctl_oid_list *list;
3818         struct vtnet_txq_stats *stats;
3819         char namebuf[16];
3820
3821         snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
3822         node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
3823             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
3824         list = SYSCTL_CHILDREN(node);
3825
3826         stats = &txq->vtntx_stats;
3827
3828         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3829             &stats->vtxs_opackets, "Transmit packets");
3830         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3831             &stats->vtxs_obytes, "Transmit bytes");
3832         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3833             &stats->vtxs_omcasts, "Transmit multicasts");
3834         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3835             &stats->vtxs_csum, "Transmit checksum offloaded");
3836         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3837             &stats->vtxs_tso, "Transmit segmentation offloaded");
3838         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
3839             &stats->vtxs_rescheduled,
3840             "Transmit interrupt handler rescheduled");
3841 }
3842
3843 static void
3844 vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
3845 {
3846         device_t dev;
3847         struct sysctl_ctx_list *ctx;
3848         struct sysctl_oid *tree;
3849         struct sysctl_oid_list *child;
3850         int i;
3851
3852         dev = sc->vtnet_dev;
3853         ctx = device_get_sysctl_ctx(dev);
3854         tree = device_get_sysctl_tree(dev);
3855         child = SYSCTL_CHILDREN(tree);
3856
3857         for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3858                 vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
3859                 vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
3860         }
3861 }
3862
3863 static void
3864 vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
3865     struct sysctl_oid_list *child, struct vtnet_softc *sc)
3866 {
3867         struct vtnet_statistics *stats;
3868         struct vtnet_rxq_stats rxaccum;
3869         struct vtnet_txq_stats txaccum;
3870
3871         vtnet_accum_stats(sc, &rxaccum, &txaccum);
3872
3873         stats = &sc->vtnet_stats;
3874         stats->rx_csum_offloaded = rxaccum.vrxs_csum;
3875         stats->rx_csum_failed = rxaccum.vrxs_csum_failed;
3876         stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
3877         stats->tx_csum_offloaded = txaccum.vtxs_csum;
3878         stats->tx_tso_offloaded = txaccum.vtxs_tso;
3879         stats->tx_task_rescheduled = txaccum.vtxs_rescheduled;
3880
3881         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
3882             CTLFLAG_RD, &stats->mbuf_alloc_failed,
3883             "Mbuf cluster allocation failures");
3884
3885         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
3886             CTLFLAG_RD, &stats->rx_frame_too_large,
3887             "Received frame larger than the mbuf chain");
3888         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
3889             CTLFLAG_RD, &stats->rx_enq_replacement_failed,
3890             "Enqueuing the replacement receive mbuf failed");
3891         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
3892             CTLFLAG_RD, &stats->rx_mergeable_failed,
3893             "Mergeable buffers receive failures");
3894         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
3895             CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
3896             "Received checksum offloaded buffer with unsupported "
3897             "Ethernet type");
3898         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
3899             CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
3900             "Received checksum offloaded buffer with incorrect IP protocol");
3901         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
3902             CTLFLAG_RD, &stats->rx_csum_bad_offset,
3903             "Received checksum offloaded buffer with incorrect offset");
3904         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
3905             CTLFLAG_RD, &stats->rx_csum_bad_proto,
3906             "Received checksum offloaded buffer with incorrect protocol");
3907         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
3908             CTLFLAG_RD, &stats->rx_csum_failed,
3909             "Received buffer checksum offload failed");
3910         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
3911             CTLFLAG_RD, &stats->rx_csum_offloaded,
3912             "Received buffer checksum offload succeeded");
3913         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
3914             CTLFLAG_RD, &stats->rx_task_rescheduled,
3915             "Times the receive interrupt task rescheduled itself");
3916
3917         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
3918             CTLFLAG_RD, &stats->tx_csum_bad_ethtype,
3919             "Aborted transmit of checksum offloaded buffer with unknown "
3920             "Ethernet type");
3921         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
3922             CTLFLAG_RD, &stats->tx_tso_bad_ethtype,
3923             "Aborted transmit of TSO buffer with unknown Ethernet type");
3924         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
3925             CTLFLAG_RD, &stats->tx_tso_not_tcp,
3926             "Aborted transmit of TSO buffer with non TCP protocol");
3927         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
3928             CTLFLAG_RD, &stats->tx_defragged,
3929             "Transmit mbufs defragged");
3930         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
3931             CTLFLAG_RD, &stats->tx_defrag_failed,
3932             "Aborted transmit of buffer because defrag failed");
3933         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
3934             CTLFLAG_RD, &stats->tx_csum_offloaded,
3935             "Offloaded checksum of transmitted buffer");
3936         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
3937             CTLFLAG_RD, &stats->tx_tso_offloaded,
3938             "Segmentation offload of transmitted buffer");
3939         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
3940             CTLFLAG_RD, &stats->tx_task_rescheduled,
3941             "Times the transmit interrupt task rescheduled itself");
3942 }
3943
3944 static void
3945 vtnet_setup_sysctl(struct vtnet_softc *sc)
3946 {
3947         device_t dev;
3948         struct sysctl_ctx_list *ctx;
3949         struct sysctl_oid *tree;
3950         struct sysctl_oid_list *child;
3951
3952         dev = sc->vtnet_dev;
3953         ctx = device_get_sysctl_ctx(dev);
3954         tree = device_get_sysctl_tree(dev);
3955         child = SYSCTL_CHILDREN(tree);
3956
3957         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
3958             CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
3959             "Number of maximum supported virtqueue pairs");
3960         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "requested_vq_pairs",
3961             CTLFLAG_RD, &sc->vtnet_requested_vq_pairs, 0,
3962             "Number of requested virtqueue pairs");
3963         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
3964             CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
3965             "Number of active virtqueue pairs");
3966
3967         vtnet_setup_stat_sysctl(ctx, child, sc);
3968 }
3969
3970 static int
3971 vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
3972 {
3973
3974         return (virtqueue_enable_intr(rxq->vtnrx_vq));
3975 }
3976
3977 static void
3978 vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
3979 {
3980
3981         virtqueue_disable_intr(rxq->vtnrx_vq);
3982 }
3983
3984 static int
3985 vtnet_txq_enable_intr(struct vtnet_txq *txq)
3986 {
3987         struct virtqueue *vq;
3988
3989         vq = txq->vtntx_vq;
3990
3991         if (vtnet_txq_below_threshold(txq) != 0)
3992                 return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
3993
3994         /*
3995          * The free count is above our threshold. Keep the Tx interrupt
3996          * disabled until the queue is fuller.
3997          */
3998         return (0);
3999 }
4000
4001 static void
4002 vtnet_txq_disable_intr(struct vtnet_txq *txq)
4003 {
4004
4005         virtqueue_disable_intr(txq->vtntx_vq);
4006 }
4007
4008 static void
4009 vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
4010 {
4011         int i;
4012
4013         for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4014                 vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]);
4015 }
4016
4017 static void
4018 vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
4019 {
4020         int i;
4021
4022         for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4023                 vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
4024 }
4025
4026 static void
4027 vtnet_enable_interrupts(struct vtnet_softc *sc)
4028 {
4029
4030         vtnet_enable_rx_interrupts(sc);
4031         vtnet_enable_tx_interrupts(sc);
4032 }
4033
4034 static void
4035 vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
4036 {
4037         int i;
4038
4039         for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4040                 vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
4041 }
4042
4043 static void
4044 vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
4045 {
4046         int i;
4047
4048         for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4049                 vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
4050 }
4051
4052 static void
4053 vtnet_disable_interrupts(struct vtnet_softc *sc)
4054 {
4055
4056         vtnet_disable_rx_interrupts(sc);
4057         vtnet_disable_tx_interrupts(sc);
4058 }
4059
4060 static int
4061 vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
4062 {
4063         char path[64];
4064
4065         snprintf(path, sizeof(path),
4066             "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
4067         TUNABLE_INT_FETCH(path, &def);
4068
4069         return (def);
4070 }
4071
4072 #ifdef DEBUGNET
4073 static void
4074 vtnet_debugnet_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize)
4075 {
4076         struct vtnet_softc *sc;
4077
4078         sc = if_getsoftc(ifp);
4079
4080         VTNET_CORE_LOCK(sc);
4081         *nrxr = sc->vtnet_max_vq_pairs;
4082         *ncl = DEBUGNET_MAX_IN_FLIGHT;
4083         *clsize = sc->vtnet_rx_clustersz;
4084         VTNET_CORE_UNLOCK(sc);
4085 }
4086
4087 static void
4088 vtnet_debugnet_event(struct ifnet *ifp __unused, enum debugnet_ev event __unused)
4089 {
4090 }
4091
4092 static int
4093 vtnet_debugnet_transmit(struct ifnet *ifp, struct mbuf *m)
4094 {
4095         struct vtnet_softc *sc;
4096         struct vtnet_txq *txq;
4097         int error;
4098
4099         sc = if_getsoftc(ifp);
4100         if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4101             IFF_DRV_RUNNING)
4102                 return (EBUSY);
4103
4104         txq = &sc->vtnet_txqs[0];
4105         error = vtnet_txq_encap(txq, &m, M_NOWAIT | M_USE_RESERVE);
4106         if (error == 0)
4107                 (void)vtnet_txq_notify(txq);
4108         return (error);
4109 }
4110
4111 static int
4112 vtnet_debugnet_poll(struct ifnet *ifp, int count)
4113 {
4114         struct vtnet_softc *sc;
4115         int i;
4116
4117         sc = if_getsoftc(ifp);
4118         if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4119             IFF_DRV_RUNNING)
4120                 return (EBUSY);
4121
4122         (void)vtnet_txq_eof(&sc->vtnet_txqs[0]);
4123         for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
4124                 (void)vtnet_rxq_eof(&sc->vtnet_rxqs[i]);
4125         return (0);
4126 }
4127 #endif /* DEBUGNET */