]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/virtio/network/if_vtnet.c
MFV r361938:
[FreeBSD/FreeBSD.git] / sys / dev / virtio / network / if_vtnet.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28
29 /* Driver for VirtIO network devices. */
30
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33
34 #include <sys/param.h>
35 #include <sys/eventhandler.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/sockio.h>
39 #include <sys/mbuf.h>
40 #include <sys/malloc.h>
41 #include <sys/module.h>
42 #include <sys/socket.h>
43 #include <sys/sysctl.h>
44 #include <sys/random.h>
45 #include <sys/sglist.h>
46 #include <sys/lock.h>
47 #include <sys/mutex.h>
48 #include <sys/taskqueue.h>
49 #include <sys/smp.h>
50 #include <machine/smp.h>
51
52 #include <vm/uma.h>
53
54 #include <net/debugnet.h>
55 #include <net/ethernet.h>
56 #include <net/pfil.h>
57 #include <net/if.h>
58 #include <net/if_var.h>
59 #include <net/if_arp.h>
60 #include <net/if_dl.h>
61 #include <net/if_types.h>
62 #include <net/if_media.h>
63 #include <net/if_vlan_var.h>
64
65 #include <net/bpf.h>
66
67 #include <netinet/in_systm.h>
68 #include <netinet/in.h>
69 #include <netinet/ip.h>
70 #include <netinet/ip6.h>
71 #include <netinet6/ip6_var.h>
72 #include <netinet/udp.h>
73 #include <netinet/tcp.h>
74
75 #include <machine/bus.h>
76 #include <machine/resource.h>
77 #include <sys/bus.h>
78 #include <sys/rman.h>
79
80 #include <dev/virtio/virtio.h>
81 #include <dev/virtio/virtqueue.h>
82 #include <dev/virtio/network/virtio_net.h>
83 #include <dev/virtio/network/if_vtnetvar.h>
84 #include "virtio_if.h"
85
86 #include "opt_inet.h"
87 #include "opt_inet6.h"
88
89 static int      vtnet_modevent(module_t, int, void *);
90
91 static int      vtnet_probe(device_t);
92 static int      vtnet_attach(device_t);
93 static int      vtnet_detach(device_t);
94 static int      vtnet_suspend(device_t);
95 static int      vtnet_resume(device_t);
96 static int      vtnet_shutdown(device_t);
97 static int      vtnet_attach_completed(device_t);
98 static int      vtnet_config_change(device_t);
99
100 static void     vtnet_negotiate_features(struct vtnet_softc *);
101 static void     vtnet_setup_features(struct vtnet_softc *);
102 static int      vtnet_init_rxq(struct vtnet_softc *, int);
103 static int      vtnet_init_txq(struct vtnet_softc *, int);
104 static int      vtnet_alloc_rxtx_queues(struct vtnet_softc *);
105 static void     vtnet_free_rxtx_queues(struct vtnet_softc *);
106 static int      vtnet_alloc_rx_filters(struct vtnet_softc *);
107 static void     vtnet_free_rx_filters(struct vtnet_softc *);
108 static int      vtnet_alloc_virtqueues(struct vtnet_softc *);
109 static int      vtnet_setup_interface(struct vtnet_softc *);
110 static int      vtnet_change_mtu(struct vtnet_softc *, int);
111 static int      vtnet_ioctl(struct ifnet *, u_long, caddr_t);
112 static uint64_t vtnet_get_counter(struct ifnet *, ift_counter);
113
114 static int      vtnet_rxq_populate(struct vtnet_rxq *);
115 static void     vtnet_rxq_free_mbufs(struct vtnet_rxq *);
116 static struct mbuf *
117                 vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
118 static int      vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *,
119                     struct mbuf *, int);
120 static int      vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
121 static int      vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
122 static int      vtnet_rxq_new_buf(struct vtnet_rxq *);
123 static int      vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
124                      struct virtio_net_hdr *);
125 static void     vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
126 static void     vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
127 static int      vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
128 static void     vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
129                     struct virtio_net_hdr *);
130 static int      vtnet_rxq_eof(struct vtnet_rxq *);
131 static void     vtnet_rx_vq_intr(void *);
132 static void     vtnet_rxq_tq_intr(void *, int);
133
134 static int      vtnet_txq_below_threshold(struct vtnet_txq *);
135 static int      vtnet_txq_notify(struct vtnet_txq *);
136 static void     vtnet_txq_free_mbufs(struct vtnet_txq *);
137 static int      vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
138                     int *, int *, int *);
139 static int      vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
140                     int, struct virtio_net_hdr *);
141 static struct mbuf *
142                 vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
143                     struct virtio_net_hdr *);
144 static int      vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
145                     struct vtnet_tx_header *);
146 static int      vtnet_txq_encap(struct vtnet_txq *, struct mbuf **, int);
147 #ifdef VTNET_LEGACY_TX
148 static void     vtnet_start_locked(struct vtnet_txq *, struct ifnet *);
149 static void     vtnet_start(struct ifnet *);
150 #else
151 static int      vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
152 static int      vtnet_txq_mq_start(struct ifnet *, struct mbuf *);
153 static void     vtnet_txq_tq_deferred(void *, int);
154 #endif
155 static void     vtnet_txq_start(struct vtnet_txq *);
156 static void     vtnet_txq_tq_intr(void *, int);
157 static int      vtnet_txq_eof(struct vtnet_txq *);
158 static void     vtnet_tx_vq_intr(void *);
159 static void     vtnet_tx_start_all(struct vtnet_softc *);
160
161 #ifndef VTNET_LEGACY_TX
162 static void     vtnet_qflush(struct ifnet *);
163 #endif
164
165 static int      vtnet_watchdog(struct vtnet_txq *);
166 static void     vtnet_accum_stats(struct vtnet_softc *,
167                     struct vtnet_rxq_stats *, struct vtnet_txq_stats *);
168 static void     vtnet_tick(void *);
169
170 static void     vtnet_start_taskqueues(struct vtnet_softc *);
171 static void     vtnet_free_taskqueues(struct vtnet_softc *);
172 static void     vtnet_drain_taskqueues(struct vtnet_softc *);
173
174 static void     vtnet_drain_rxtx_queues(struct vtnet_softc *);
175 static void     vtnet_stop_rendezvous(struct vtnet_softc *);
176 static void     vtnet_stop(struct vtnet_softc *);
177 static int      vtnet_virtio_reinit(struct vtnet_softc *);
178 static void     vtnet_init_rx_filters(struct vtnet_softc *);
179 static int      vtnet_init_rx_queues(struct vtnet_softc *);
180 static int      vtnet_init_tx_queues(struct vtnet_softc *);
181 static int      vtnet_init_rxtx_queues(struct vtnet_softc *);
182 static void     vtnet_set_active_vq_pairs(struct vtnet_softc *);
183 static int      vtnet_reinit(struct vtnet_softc *);
184 static void     vtnet_init_locked(struct vtnet_softc *);
185 static void     vtnet_init(void *);
186
187 static void     vtnet_free_ctrl_vq(struct vtnet_softc *);
188 static void     vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
189                     struct sglist *, int, int);
190 static int      vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
191 static int      vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
192 static int      vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int);
193 static int      vtnet_set_promisc(struct vtnet_softc *, int);
194 static int      vtnet_set_allmulti(struct vtnet_softc *, int);
195 static void     vtnet_attach_disable_promisc(struct vtnet_softc *);
196 static void     vtnet_rx_filter(struct vtnet_softc *);
197 static void     vtnet_rx_filter_mac(struct vtnet_softc *);
198 static int      vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
199 static void     vtnet_rx_filter_vlan(struct vtnet_softc *);
200 static void     vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
201 static void     vtnet_register_vlan(void *, struct ifnet *, uint16_t);
202 static void     vtnet_unregister_vlan(void *, struct ifnet *, uint16_t);
203
204 static int      vtnet_is_link_up(struct vtnet_softc *);
205 static void     vtnet_update_link_status(struct vtnet_softc *);
206 static int      vtnet_ifmedia_upd(struct ifnet *);
207 static void     vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *);
208 static void     vtnet_get_hwaddr(struct vtnet_softc *);
209 static void     vtnet_set_hwaddr(struct vtnet_softc *);
210 static void     vtnet_vlan_tag_remove(struct mbuf *);
211 static void     vtnet_set_rx_process_limit(struct vtnet_softc *);
212 static void     vtnet_set_tx_intr_threshold(struct vtnet_softc *);
213
214 static void     vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
215                     struct sysctl_oid_list *, struct vtnet_rxq *);
216 static void     vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
217                     struct sysctl_oid_list *, struct vtnet_txq *);
218 static void     vtnet_setup_queue_sysctl(struct vtnet_softc *);
219 static void     vtnet_setup_sysctl(struct vtnet_softc *);
220
221 static int      vtnet_rxq_enable_intr(struct vtnet_rxq *);
222 static void     vtnet_rxq_disable_intr(struct vtnet_rxq *);
223 static int      vtnet_txq_enable_intr(struct vtnet_txq *);
224 static void     vtnet_txq_disable_intr(struct vtnet_txq *);
225 static void     vtnet_enable_rx_interrupts(struct vtnet_softc *);
226 static void     vtnet_enable_tx_interrupts(struct vtnet_softc *);
227 static void     vtnet_enable_interrupts(struct vtnet_softc *);
228 static void     vtnet_disable_rx_interrupts(struct vtnet_softc *);
229 static void     vtnet_disable_tx_interrupts(struct vtnet_softc *);
230 static void     vtnet_disable_interrupts(struct vtnet_softc *);
231
232 static int      vtnet_tunable_int(struct vtnet_softc *, const char *, int);
233
234 DEBUGNET_DEFINE(vtnet);
235
236 /* Tunables. */
237 static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
238     "VNET driver parameters");
239 static int vtnet_csum_disable = 0;
240 TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable);
241 SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN,
242     &vtnet_csum_disable, 0, "Disables receive and send checksum offload");
243 static int vtnet_tso_disable = 0;
244 TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable);
245 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN, &vtnet_tso_disable,
246     0, "Disables TCP Segmentation Offload");
247 static int vtnet_lro_disable = 0;
248 TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable);
249 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN, &vtnet_lro_disable,
250     0, "Disables TCP Large Receive Offload");
251 static int vtnet_mq_disable = 0;
252 TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable);
253 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN, &vtnet_mq_disable,
254     0, "Disables Multi Queue support");
255 static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS;
256 TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs);
257 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN,
258     &vtnet_mq_max_pairs, 0, "Sets the maximum number of Multi Queue pairs");
259 static int vtnet_rx_process_limit = 512;
260 TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit);
261 SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
262     &vtnet_rx_process_limit, 0,
263     "Limits the number RX segments processed in a single pass");
264
265 static uma_zone_t vtnet_tx_header_zone;
266
267 static struct virtio_feature_desc vtnet_feature_desc[] = {
268         { VIRTIO_NET_F_CSUM,            "TxChecksum"    },
269         { VIRTIO_NET_F_GUEST_CSUM,      "RxChecksum"    },
270         { VIRTIO_NET_F_MAC,             "MacAddress"    },
271         { VIRTIO_NET_F_GSO,             "TxAllGSO"      },
272         { VIRTIO_NET_F_GUEST_TSO4,      "RxTSOv4"       },
273         { VIRTIO_NET_F_GUEST_TSO6,      "RxTSOv6"       },
274         { VIRTIO_NET_F_GUEST_ECN,       "RxECN"         },
275         { VIRTIO_NET_F_GUEST_UFO,       "RxUFO"         },
276         { VIRTIO_NET_F_HOST_TSO4,       "TxTSOv4"       },
277         { VIRTIO_NET_F_HOST_TSO6,       "TxTSOv6"       },
278         { VIRTIO_NET_F_HOST_ECN,        "TxTSOECN"      },
279         { VIRTIO_NET_F_HOST_UFO,        "TxUFO"         },
280         { VIRTIO_NET_F_MRG_RXBUF,       "MrgRxBuf"      },
281         { VIRTIO_NET_F_STATUS,          "Status"        },
282         { VIRTIO_NET_F_CTRL_VQ,         "ControlVq"     },
283         { VIRTIO_NET_F_CTRL_RX,         "RxMode"        },
284         { VIRTIO_NET_F_CTRL_VLAN,       "VLanFilter"    },
285         { VIRTIO_NET_F_CTRL_RX_EXTRA,   "RxModeExtra"   },
286         { VIRTIO_NET_F_GUEST_ANNOUNCE,  "GuestAnnounce" },
287         { VIRTIO_NET_F_MQ,              "Multiqueue"    },
288         { VIRTIO_NET_F_CTRL_MAC_ADDR,   "SetMacAddress" },
289
290         { 0, NULL }
291 };
292
293 static device_method_t vtnet_methods[] = {
294         /* Device methods. */
295         DEVMETHOD(device_probe,                 vtnet_probe),
296         DEVMETHOD(device_attach,                vtnet_attach),
297         DEVMETHOD(device_detach,                vtnet_detach),
298         DEVMETHOD(device_suspend,               vtnet_suspend),
299         DEVMETHOD(device_resume,                vtnet_resume),
300         DEVMETHOD(device_shutdown,              vtnet_shutdown),
301
302         /* VirtIO methods. */
303         DEVMETHOD(virtio_attach_completed,      vtnet_attach_completed),
304         DEVMETHOD(virtio_config_change,         vtnet_config_change),
305
306         DEVMETHOD_END
307 };
308
309 #ifdef DEV_NETMAP
310 #include <dev/netmap/if_vtnet_netmap.h>
311 #endif /* DEV_NETMAP */
312
313 static driver_t vtnet_driver = {
314         "vtnet",
315         vtnet_methods,
316         sizeof(struct vtnet_softc)
317 };
318 static devclass_t vtnet_devclass;
319
320 DRIVER_MODULE(vtnet, virtio_mmio, vtnet_driver, vtnet_devclass,
321     vtnet_modevent, 0);
322 DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass,
323     vtnet_modevent, 0);
324 MODULE_VERSION(vtnet, 1);
325 MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
326 #ifdef DEV_NETMAP
327 MODULE_DEPEND(vtnet, netmap, 1, 1, 1);
328 #endif /* DEV_NETMAP */
329
330 VIRTIO_SIMPLE_PNPTABLE(vtnet, VIRTIO_ID_NETWORK, "VirtIO Networking Adapter");
331 VIRTIO_SIMPLE_PNPINFO(virtio_mmio, vtnet);
332 VIRTIO_SIMPLE_PNPINFO(virtio_pci, vtnet);
333
334 static int
335 vtnet_modevent(module_t mod, int type, void *unused)
336 {
337         int error = 0;
338         static int loaded = 0;
339
340         switch (type) {
341         case MOD_LOAD:
342                 if (loaded++ == 0) {
343                         vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr",
344                                 sizeof(struct vtnet_tx_header),
345                                 NULL, NULL, NULL, NULL, 0, 0);
346 #ifdef DEBUGNET
347                         /*
348                          * We need to allocate from this zone in the transmit path, so ensure
349                          * that we have at least one item per header available.
350                          * XXX add a separate zone like we do for mbufs? otherwise we may alloc
351                          * buckets
352                          */
353                         uma_zone_reserve(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
354                         uma_prealloc(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
355 #endif
356                 }
357                 break;
358         case MOD_QUIESCE:
359                 if (uma_zone_get_cur(vtnet_tx_header_zone) > 0)
360                         error = EBUSY;
361                 break;
362         case MOD_UNLOAD:
363                 if (--loaded == 0) {
364                         uma_zdestroy(vtnet_tx_header_zone);
365                         vtnet_tx_header_zone = NULL;
366                 }
367                 break;
368         case MOD_SHUTDOWN:
369                 break;
370         default:
371                 error = EOPNOTSUPP;
372                 break;
373         }
374
375         return (error);
376 }
377
378 static int
379 vtnet_probe(device_t dev)
380 {
381         return (VIRTIO_SIMPLE_PROBE(dev, vtnet));
382 }
383
384 static int
385 vtnet_attach(device_t dev)
386 {
387         struct vtnet_softc *sc;
388         int error;
389
390         sc = device_get_softc(dev);
391         sc->vtnet_dev = dev;
392
393         /* Register our feature descriptions. */
394         virtio_set_feature_desc(dev, vtnet_feature_desc);
395
396         VTNET_CORE_LOCK_INIT(sc);
397         callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
398
399         vtnet_setup_sysctl(sc);
400         vtnet_setup_features(sc);
401
402         error = vtnet_alloc_rx_filters(sc);
403         if (error) {
404                 device_printf(dev, "cannot allocate Rx filters\n");
405                 goto fail;
406         }
407
408         error = vtnet_alloc_rxtx_queues(sc);
409         if (error) {
410                 device_printf(dev, "cannot allocate queues\n");
411                 goto fail;
412         }
413
414         error = vtnet_alloc_virtqueues(sc);
415         if (error) {
416                 device_printf(dev, "cannot allocate virtqueues\n");
417                 goto fail;
418         }
419
420         error = vtnet_setup_interface(sc);
421         if (error) {
422                 device_printf(dev, "cannot setup interface\n");
423                 goto fail;
424         }
425
426         error = virtio_setup_intr(dev, INTR_TYPE_NET);
427         if (error) {
428                 device_printf(dev, "cannot setup virtqueue interrupts\n");
429                 /* BMV: This will crash if during boot! */
430                 ether_ifdetach(sc->vtnet_ifp);
431                 goto fail;
432         }
433
434 #ifdef DEV_NETMAP
435         vtnet_netmap_attach(sc);
436 #endif /* DEV_NETMAP */
437
438         vtnet_start_taskqueues(sc);
439
440 fail:
441         if (error)
442                 vtnet_detach(dev);
443
444         return (error);
445 }
446
447 static int
448 vtnet_detach(device_t dev)
449 {
450         struct vtnet_softc *sc;
451         struct ifnet *ifp;
452
453         sc = device_get_softc(dev);
454         ifp = sc->vtnet_ifp;
455
456         if (device_is_attached(dev)) {
457                 VTNET_CORE_LOCK(sc);
458                 vtnet_stop(sc);
459                 VTNET_CORE_UNLOCK(sc);
460
461                 callout_drain(&sc->vtnet_tick_ch);
462                 vtnet_drain_taskqueues(sc);
463
464                 ether_ifdetach(ifp);
465         }
466
467 #ifdef DEV_NETMAP
468         netmap_detach(ifp);
469 #endif /* DEV_NETMAP */
470
471         vtnet_free_taskqueues(sc);
472
473         if (sc->vtnet_vlan_attach != NULL) {
474                 EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
475                 sc->vtnet_vlan_attach = NULL;
476         }
477         if (sc->vtnet_vlan_detach != NULL) {
478                 EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach);
479                 sc->vtnet_vlan_detach = NULL;
480         }
481
482         ifmedia_removeall(&sc->vtnet_media);
483
484         if (ifp != NULL) {
485                 if_free(ifp);
486                 sc->vtnet_ifp = NULL;
487         }
488
489         vtnet_free_rxtx_queues(sc);
490         vtnet_free_rx_filters(sc);
491
492         if (sc->vtnet_ctrl_vq != NULL)
493                 vtnet_free_ctrl_vq(sc);
494
495         VTNET_CORE_LOCK_DESTROY(sc);
496
497         return (0);
498 }
499
500 static int
501 vtnet_suspend(device_t dev)
502 {
503         struct vtnet_softc *sc;
504
505         sc = device_get_softc(dev);
506
507         VTNET_CORE_LOCK(sc);
508         vtnet_stop(sc);
509         sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
510         VTNET_CORE_UNLOCK(sc);
511
512         return (0);
513 }
514
515 static int
516 vtnet_resume(device_t dev)
517 {
518         struct vtnet_softc *sc;
519         struct ifnet *ifp;
520
521         sc = device_get_softc(dev);
522         ifp = sc->vtnet_ifp;
523
524         VTNET_CORE_LOCK(sc);
525         if (ifp->if_flags & IFF_UP)
526                 vtnet_init_locked(sc);
527         sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
528         VTNET_CORE_UNLOCK(sc);
529
530         return (0);
531 }
532
533 static int
534 vtnet_shutdown(device_t dev)
535 {
536
537         /*
538          * Suspend already does all of what we need to
539          * do here; we just never expect to be resumed.
540          */
541         return (vtnet_suspend(dev));
542 }
543
544 static int
545 vtnet_attach_completed(device_t dev)
546 {
547
548         vtnet_attach_disable_promisc(device_get_softc(dev));
549
550         return (0);
551 }
552
553 static int
554 vtnet_config_change(device_t dev)
555 {
556         struct vtnet_softc *sc;
557
558         sc = device_get_softc(dev);
559
560         VTNET_CORE_LOCK(sc);
561         vtnet_update_link_status(sc);
562         if (sc->vtnet_link_active != 0)
563                 vtnet_tx_start_all(sc);
564         VTNET_CORE_UNLOCK(sc);
565
566         return (0);
567 }
568
569 static void
570 vtnet_negotiate_features(struct vtnet_softc *sc)
571 {
572         device_t dev;
573         uint64_t mask, features;
574
575         dev = sc->vtnet_dev;
576         mask = 0;
577
578         /*
579          * TSO and LRO are only available when their corresponding checksum
580          * offload feature is also negotiated.
581          */
582         if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) {
583                 mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM;
584                 mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES;
585         }
586         if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
587                 mask |= VTNET_TSO_FEATURES;
588         if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
589                 mask |= VTNET_LRO_FEATURES;
590 #ifndef VTNET_LEGACY_TX
591         if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
592                 mask |= VIRTIO_NET_F_MQ;
593 #else
594         mask |= VIRTIO_NET_F_MQ;
595 #endif
596
597         features = VTNET_FEATURES & ~mask;
598         sc->vtnet_features = virtio_negotiate_features(dev, features);
599
600         if (virtio_with_feature(dev, VTNET_LRO_FEATURES) &&
601             virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) {
602                 /*
603                  * LRO without mergeable buffers requires special care. This
604                  * is not ideal because every receive buffer must be large
605                  * enough to hold the maximum TCP packet, the Ethernet header,
606                  * and the header. This requires up to 34 descriptors with
607                  * MCLBYTES clusters. If we do not have indirect descriptors,
608                  * LRO is disabled since the virtqueue will not contain very
609                  * many receive buffers.
610                  */
611                 if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
612                         device_printf(dev,
613                             "LRO disabled due to both mergeable buffers and "
614                             "indirect descriptors not negotiated\n");
615
616                         features &= ~VTNET_LRO_FEATURES;
617                         sc->vtnet_features =
618                             virtio_negotiate_features(dev, features);
619                 } else
620                         sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
621         }
622 }
623
624 static void
625 vtnet_setup_features(struct vtnet_softc *sc)
626 {
627         device_t dev;
628
629         dev = sc->vtnet_dev;
630
631         vtnet_negotiate_features(sc);
632
633         if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
634                 sc->vtnet_flags |= VTNET_FLAG_INDIRECT;
635         if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX))
636                 sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX;
637
638         if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
639                 /* This feature should always be negotiated. */
640                 sc->vtnet_flags |= VTNET_FLAG_MAC;
641         }
642
643         if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF))
644                 sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
645
646         if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) ||
647             virtio_with_feature(dev, VIRTIO_F_VERSION_1))
648                 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
649         else
650                 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
651
652         if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
653                 sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS;
654         else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
655                 sc->vtnet_rx_nsegs = VTNET_MAX_RX_SEGS;
656         else
657                 sc->vtnet_rx_nsegs = VTNET_MIN_RX_SEGS;
658
659         if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) ||
660             virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
661             virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
662                 sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS;
663         else
664                 sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS;
665
666         if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
667                 sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
668
669                 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
670                         sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
671                 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
672                         sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
673                 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
674                         sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
675         }
676
677         if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) &&
678             sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
679                 sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev,
680                     offsetof(struct virtio_net_config, max_virtqueue_pairs));
681         } else
682                 sc->vtnet_max_vq_pairs = 1;
683
684         if (sc->vtnet_max_vq_pairs > 1) {
685                 /*
686                  * Limit the maximum number of queue pairs to the lower of
687                  * the number of CPUs and the configured maximum.
688                  * The actual number of queues that get used may be less.
689                  */
690                 int max;
691
692                 max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
693                 if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN) {
694                         if (max > mp_ncpus)
695                                 max = mp_ncpus;
696                         if (max > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX)
697                                 max = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX;
698                         if (max > 1) {
699                                 sc->vtnet_requested_vq_pairs = max;
700                                 sc->vtnet_flags |= VTNET_FLAG_MULTIQ;
701                         }
702                 }
703         }
704 }
705
706 static int
707 vtnet_init_rxq(struct vtnet_softc *sc, int id)
708 {
709         struct vtnet_rxq *rxq;
710
711         rxq = &sc->vtnet_rxqs[id];
712
713         snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
714             device_get_nameunit(sc->vtnet_dev), id);
715         mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
716
717         rxq->vtnrx_sc = sc;
718         rxq->vtnrx_id = id;
719
720         rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT);
721         if (rxq->vtnrx_sg == NULL)
722                 return (ENOMEM);
723
724         NET_TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
725         rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
726             taskqueue_thread_enqueue, &rxq->vtnrx_tq);
727
728         return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
729 }
730
731 static int
732 vtnet_init_txq(struct vtnet_softc *sc, int id)
733 {
734         struct vtnet_txq *txq;
735
736         txq = &sc->vtnet_txqs[id];
737
738         snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
739             device_get_nameunit(sc->vtnet_dev), id);
740         mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
741
742         txq->vtntx_sc = sc;
743         txq->vtntx_id = id;
744
745         txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT);
746         if (txq->vtntx_sg == NULL)
747                 return (ENOMEM);
748
749 #ifndef VTNET_LEGACY_TX
750         txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
751             M_NOWAIT, &txq->vtntx_mtx);
752         if (txq->vtntx_br == NULL)
753                 return (ENOMEM);
754
755         TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
756 #endif
757         TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
758         txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
759             taskqueue_thread_enqueue, &txq->vtntx_tq);
760         if (txq->vtntx_tq == NULL)
761                 return (ENOMEM);
762
763         return (0);
764 }
765
766 static int
767 vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
768 {
769         int i, npairs, error;
770
771         npairs = sc->vtnet_max_vq_pairs;
772
773         sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
774             M_NOWAIT | M_ZERO);
775         sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
776             M_NOWAIT | M_ZERO);
777         if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
778                 return (ENOMEM);
779
780         for (i = 0; i < npairs; i++) {
781                 error = vtnet_init_rxq(sc, i);
782                 if (error)
783                         return (error);
784                 error = vtnet_init_txq(sc, i);
785                 if (error)
786                         return (error);
787         }
788
789         vtnet_setup_queue_sysctl(sc);
790
791         return (0);
792 }
793
794 static void
795 vtnet_destroy_rxq(struct vtnet_rxq *rxq)
796 {
797
798         rxq->vtnrx_sc = NULL;
799         rxq->vtnrx_id = -1;
800
801         if (rxq->vtnrx_sg != NULL) {
802                 sglist_free(rxq->vtnrx_sg);
803                 rxq->vtnrx_sg = NULL;
804         }
805
806         if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
807                 mtx_destroy(&rxq->vtnrx_mtx);
808 }
809
810 static void
811 vtnet_destroy_txq(struct vtnet_txq *txq)
812 {
813
814         txq->vtntx_sc = NULL;
815         txq->vtntx_id = -1;
816
817         if (txq->vtntx_sg != NULL) {
818                 sglist_free(txq->vtntx_sg);
819                 txq->vtntx_sg = NULL;
820         }
821
822 #ifndef VTNET_LEGACY_TX
823         if (txq->vtntx_br != NULL) {
824                 buf_ring_free(txq->vtntx_br, M_DEVBUF);
825                 txq->vtntx_br = NULL;
826         }
827 #endif
828
829         if (mtx_initialized(&txq->vtntx_mtx) != 0)
830                 mtx_destroy(&txq->vtntx_mtx);
831 }
832
833 static void
834 vtnet_free_rxtx_queues(struct vtnet_softc *sc)
835 {
836         int i;
837
838         if (sc->vtnet_rxqs != NULL) {
839                 for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
840                         vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
841                 free(sc->vtnet_rxqs, M_DEVBUF);
842                 sc->vtnet_rxqs = NULL;
843         }
844
845         if (sc->vtnet_txqs != NULL) {
846                 for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
847                         vtnet_destroy_txq(&sc->vtnet_txqs[i]);
848                 free(sc->vtnet_txqs, M_DEVBUF);
849                 sc->vtnet_txqs = NULL;
850         }
851 }
852
853 static int
854 vtnet_alloc_rx_filters(struct vtnet_softc *sc)
855 {
856
857         if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
858                 sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
859                     M_DEVBUF, M_NOWAIT | M_ZERO);
860                 if (sc->vtnet_mac_filter == NULL)
861                         return (ENOMEM);
862         }
863
864         if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
865                 sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
866                     VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
867                 if (sc->vtnet_vlan_filter == NULL)
868                         return (ENOMEM);
869         }
870
871         return (0);
872 }
873
874 static void
875 vtnet_free_rx_filters(struct vtnet_softc *sc)
876 {
877
878         if (sc->vtnet_mac_filter != NULL) {
879                 free(sc->vtnet_mac_filter, M_DEVBUF);
880                 sc->vtnet_mac_filter = NULL;
881         }
882
883         if (sc->vtnet_vlan_filter != NULL) {
884                 free(sc->vtnet_vlan_filter, M_DEVBUF);
885                 sc->vtnet_vlan_filter = NULL;
886         }
887 }
888
889 static int
890 vtnet_alloc_virtqueues(struct vtnet_softc *sc)
891 {
892         device_t dev;
893         struct vq_alloc_info *info;
894         struct vtnet_rxq *rxq;
895         struct vtnet_txq *txq;
896         int i, idx, flags, nvqs, error;
897
898         dev = sc->vtnet_dev;
899         flags = 0;
900
901         nvqs = sc->vtnet_max_vq_pairs * 2;
902         if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
903                 nvqs++;
904
905         info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT);
906         if (info == NULL)
907                 return (ENOMEM);
908
909         for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) {
910                 rxq = &sc->vtnet_rxqs[i];
911                 VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs,
912                     vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
913                     "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id);
914
915                 txq = &sc->vtnet_txqs[i];
916                 VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs,
917                     vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
918                     "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id);
919         }
920
921         if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
922                 VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
923                     &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
924         }
925
926         /*
927          * Enable interrupt binding if this is multiqueue. This only matters
928          * when per-vq MSIX is available.
929          */
930         if (sc->vtnet_flags & VTNET_FLAG_MULTIQ)
931                 flags |= 0;
932
933         error = virtio_alloc_virtqueues(dev, flags, nvqs, info);
934         free(info, M_TEMP);
935
936         return (error);
937 }
938
939 static int
940 vtnet_setup_interface(struct vtnet_softc *sc)
941 {
942         device_t dev;
943         struct pfil_head_args pa;
944         struct ifnet *ifp;
945
946         dev = sc->vtnet_dev;
947
948         ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER);
949         if (ifp == NULL) {
950                 device_printf(dev, "cannot allocate ifnet structure\n");
951                 return (ENOSPC);
952         }
953
954         if_initname(ifp, device_get_name(dev), device_get_unit(dev));
955         ifp->if_baudrate = IF_Gbps(10); /* Approx. */
956         ifp->if_softc = sc;
957         ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST |
958             IFF_KNOWSEPOCH;
959         ifp->if_init = vtnet_init;
960         ifp->if_ioctl = vtnet_ioctl;
961         ifp->if_get_counter = vtnet_get_counter;
962 #ifndef VTNET_LEGACY_TX
963         ifp->if_transmit = vtnet_txq_mq_start;
964         ifp->if_qflush = vtnet_qflush;
965 #else
966         struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
967         ifp->if_start = vtnet_start;
968         IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1);
969         ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1;
970         IFQ_SET_READY(&ifp->if_snd);
971 #endif
972
973         ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd,
974             vtnet_ifmedia_sts);
975         ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL);
976         ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE);
977
978         /* Read (or generate) the MAC address for the adapter. */
979         vtnet_get_hwaddr(sc);
980
981         ether_ifattach(ifp, sc->vtnet_hwaddr);
982
983         if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
984                 ifp->if_capabilities |= IFCAP_LINKSTATE;
985
986         /* Tell the upper layer(s) we support long frames. */
987         ifp->if_hdrlen = sizeof(struct ether_vlan_header);
988         ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU;
989
990         if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
991                 ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6;
992
993                 if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) {
994                         ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6;
995                         sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
996                 } else {
997                         if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
998                                 ifp->if_capabilities |= IFCAP_TSO4;
999                         if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
1000                                 ifp->if_capabilities |= IFCAP_TSO6;
1001                         if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
1002                                 sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
1003                 }
1004
1005                 if (ifp->if_capabilities & IFCAP_TSO)
1006                         ifp->if_capabilities |= IFCAP_VLAN_HWTSO;
1007         }
1008
1009         if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
1010                 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6;
1011
1012                 if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) ||
1013                     virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6))
1014                         ifp->if_capabilities |= IFCAP_LRO;
1015         }
1016
1017         if (ifp->if_capabilities & IFCAP_HWCSUM) {
1018                 /*
1019                  * VirtIO does not support VLAN tagging, but we can fake
1020                  * it by inserting and removing the 802.1Q header during
1021                  * transmit and receive. We are then able to do checksum
1022                  * offloading of VLAN frames.
1023                  */
1024                 ifp->if_capabilities |=
1025                     IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM;
1026         }
1027
1028         ifp->if_capenable = ifp->if_capabilities;
1029
1030         /*
1031          * Capabilities after here are not enabled by default.
1032          */
1033
1034         if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
1035                 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER;
1036
1037                 sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1038                     vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1039                 sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
1040                     vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1041         }
1042
1043         vtnet_set_rx_process_limit(sc);
1044         vtnet_set_tx_intr_threshold(sc);
1045
1046         DEBUGNET_SET(ifp, vtnet);
1047
1048         pa.pa_version = PFIL_VERSION;
1049         pa.pa_flags = PFIL_IN;
1050         pa.pa_type = PFIL_TYPE_ETHERNET;
1051         pa.pa_headname = ifp->if_xname;
1052         sc->vtnet_pfil = pfil_head_register(&pa);
1053
1054         return (0);
1055 }
1056
1057 static int
1058 vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu)
1059 {
1060         struct ifnet *ifp;
1061         int frame_size, clsize;
1062
1063         ifp = sc->vtnet_ifp;
1064
1065         if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU)
1066                 return (EINVAL);
1067
1068         frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) +
1069             new_mtu;
1070
1071         /*
1072          * Based on the new MTU (and hence frame size) determine which
1073          * cluster size is most appropriate for the receive queues.
1074          */
1075         if (frame_size <= MCLBYTES) {
1076                 clsize = MCLBYTES;
1077         } else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1078                 /* Avoid going past 9K jumbos. */
1079                 if (frame_size > MJUM9BYTES)
1080                         return (EINVAL);
1081                 clsize = MJUM9BYTES;
1082         } else
1083                 clsize = MJUMPAGESIZE;
1084
1085         ifp->if_mtu = new_mtu;
1086         sc->vtnet_rx_new_clsize = clsize;
1087
1088         if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1089                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1090                 vtnet_init_locked(sc);
1091         }
1092
1093         return (0);
1094 }
1095
1096 static int
1097 vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1098 {
1099         struct vtnet_softc *sc;
1100         struct ifreq *ifr;
1101         int reinit, mask, error;
1102
1103         sc = ifp->if_softc;
1104         ifr = (struct ifreq *) data;
1105         error = 0;
1106
1107         switch (cmd) {
1108         case SIOCSIFMTU:
1109                 if (ifp->if_mtu != ifr->ifr_mtu) {
1110                         VTNET_CORE_LOCK(sc);
1111                         error = vtnet_change_mtu(sc, ifr->ifr_mtu);
1112                         VTNET_CORE_UNLOCK(sc);
1113                 }
1114                 break;
1115
1116         case SIOCSIFFLAGS:
1117                 VTNET_CORE_LOCK(sc);
1118                 if ((ifp->if_flags & IFF_UP) == 0) {
1119                         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1120                                 vtnet_stop(sc);
1121                 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1122                         if ((ifp->if_flags ^ sc->vtnet_if_flags) &
1123                             (IFF_PROMISC | IFF_ALLMULTI)) {
1124                                 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
1125                                         vtnet_rx_filter(sc);
1126                                 else {
1127                                         ifp->if_flags |= IFF_PROMISC;
1128                                         if ((ifp->if_flags ^ sc->vtnet_if_flags)
1129                                             & IFF_ALLMULTI)
1130                                                 error = ENOTSUP;
1131                                 }
1132                         }
1133                 } else
1134                         vtnet_init_locked(sc);
1135
1136                 if (error == 0)
1137                         sc->vtnet_if_flags = ifp->if_flags;
1138                 VTNET_CORE_UNLOCK(sc);
1139                 break;
1140
1141         case SIOCADDMULTI:
1142         case SIOCDELMULTI:
1143                 if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0)
1144                         break;
1145                 VTNET_CORE_LOCK(sc);
1146                 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1147                         vtnet_rx_filter_mac(sc);
1148                 VTNET_CORE_UNLOCK(sc);
1149                 break;
1150
1151         case SIOCSIFMEDIA:
1152         case SIOCGIFMEDIA:
1153                 error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
1154                 break;
1155
1156         case SIOCSIFCAP:
1157                 VTNET_CORE_LOCK(sc);
1158                 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1159
1160                 if (mask & IFCAP_TXCSUM)
1161                         ifp->if_capenable ^= IFCAP_TXCSUM;
1162                 if (mask & IFCAP_TXCSUM_IPV6)
1163                         ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1164                 if (mask & IFCAP_TSO4)
1165                         ifp->if_capenable ^= IFCAP_TSO4;
1166                 if (mask & IFCAP_TSO6)
1167                         ifp->if_capenable ^= IFCAP_TSO6;
1168
1169                 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO |
1170                     IFCAP_VLAN_HWFILTER)) {
1171                         /* These Rx features require us to renegotiate. */
1172                         reinit = 1;
1173
1174                         if (mask & IFCAP_RXCSUM)
1175                                 ifp->if_capenable ^= IFCAP_RXCSUM;
1176                         if (mask & IFCAP_RXCSUM_IPV6)
1177                                 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1178                         if (mask & IFCAP_LRO)
1179                                 ifp->if_capenable ^= IFCAP_LRO;
1180                         if (mask & IFCAP_VLAN_HWFILTER)
1181                                 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER;
1182                 } else
1183                         reinit = 0;
1184
1185                 if (mask & IFCAP_VLAN_HWTSO)
1186                         ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
1187                 if (mask & IFCAP_VLAN_HWTAGGING)
1188                         ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
1189
1190                 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1191                         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1192                         vtnet_init_locked(sc);
1193                 }
1194
1195                 VTNET_CORE_UNLOCK(sc);
1196                 VLAN_CAPABILITIES(ifp);
1197
1198                 break;
1199
1200         default:
1201                 error = ether_ioctl(ifp, cmd, data);
1202                 break;
1203         }
1204
1205         VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
1206
1207         return (error);
1208 }
1209
1210 static int
1211 vtnet_rxq_populate(struct vtnet_rxq *rxq)
1212 {
1213         struct virtqueue *vq;
1214         int nbufs, error;
1215
1216 #ifdef DEV_NETMAP
1217         error = vtnet_netmap_rxq_populate(rxq);
1218         if (error >= 0)
1219                 return (error);
1220 #endif  /* DEV_NETMAP */
1221
1222         vq = rxq->vtnrx_vq;
1223         error = ENOSPC;
1224
1225         for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
1226                 error = vtnet_rxq_new_buf(rxq);
1227                 if (error)
1228                         break;
1229         }
1230
1231         if (nbufs > 0) {
1232                 virtqueue_notify(vq);
1233                 /*
1234                  * EMSGSIZE signifies the virtqueue did not have enough
1235                  * entries available to hold the last mbuf. This is not
1236                  * an error.
1237                  */
1238                 if (error == EMSGSIZE)
1239                         error = 0;
1240         }
1241
1242         return (error);
1243 }
1244
1245 static void
1246 vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
1247 {
1248         struct virtqueue *vq;
1249         struct mbuf *m;
1250         int last;
1251 #ifdef DEV_NETMAP
1252         int netmap_bufs = vtnet_netmap_queue_on(rxq->vtnrx_sc, NR_RX,
1253                                                 rxq->vtnrx_id);
1254 #else  /* !DEV_NETMAP */
1255         int netmap_bufs = 0;
1256 #endif /* !DEV_NETMAP */
1257
1258         vq = rxq->vtnrx_vq;
1259         last = 0;
1260
1261         while ((m = virtqueue_drain(vq, &last)) != NULL) {
1262                 if (!netmap_bufs)
1263                         m_freem(m);
1264         }
1265
1266         KASSERT(virtqueue_empty(vq),
1267             ("%s: mbufs remaining in rx queue %p", __func__, rxq));
1268 }
1269
1270 static struct mbuf *
1271 vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
1272 {
1273         struct mbuf *m_head, *m_tail, *m;
1274         int i, clsize;
1275
1276         clsize = sc->vtnet_rx_clsize;
1277
1278         KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1279             ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs));
1280
1281         m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize);
1282         if (m_head == NULL)
1283                 goto fail;
1284
1285         m_head->m_len = clsize;
1286         m_tail = m_head;
1287
1288         /* Allocate the rest of the chain. */
1289         for (i = 1; i < nbufs; i++) {
1290                 m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize);
1291                 if (m == NULL)
1292                         goto fail;
1293
1294                 m->m_len = clsize;
1295                 m_tail->m_next = m;
1296                 m_tail = m;
1297         }
1298
1299         if (m_tailp != NULL)
1300                 *m_tailp = m_tail;
1301
1302         return (m_head);
1303
1304 fail:
1305         sc->vtnet_stats.mbuf_alloc_failed++;
1306         m_freem(m_head);
1307
1308         return (NULL);
1309 }
1310
1311 /*
1312  * Slow path for when LRO without mergeable buffers is negotiated.
1313  */
1314 static int
1315 vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
1316     int len0)
1317 {
1318         struct vtnet_softc *sc;
1319         struct mbuf *m, *m_prev;
1320         struct mbuf *m_new, *m_tail;
1321         int len, clsize, nreplace, error;
1322
1323         sc = rxq->vtnrx_sc;
1324         clsize = sc->vtnet_rx_clsize;
1325
1326         m_prev = NULL;
1327         m_tail = NULL;
1328         nreplace = 0;
1329
1330         m = m0;
1331         len = len0;
1332
1333         /*
1334          * Since these mbuf chains are so large, we avoid allocating an
1335          * entire replacement chain if possible. When the received frame
1336          * did not consume the entire chain, the unused mbufs are moved
1337          * to the replacement chain.
1338          */
1339         while (len > 0) {
1340                 /*
1341                  * Something is seriously wrong if we received a frame
1342                  * larger than the chain. Drop it.
1343                  */
1344                 if (m == NULL) {
1345                         sc->vtnet_stats.rx_frame_too_large++;
1346                         return (EMSGSIZE);
1347                 }
1348
1349                 /* We always allocate the same cluster size. */
1350                 KASSERT(m->m_len == clsize,
1351                     ("%s: mbuf size %d is not the cluster size %d",
1352                     __func__, m->m_len, clsize));
1353
1354                 m->m_len = MIN(m->m_len, len);
1355                 len -= m->m_len;
1356
1357                 m_prev = m;
1358                 m = m->m_next;
1359                 nreplace++;
1360         }
1361
1362         KASSERT(nreplace <= sc->vtnet_rx_nmbufs,
1363             ("%s: too many replacement mbufs %d max %d", __func__, nreplace,
1364             sc->vtnet_rx_nmbufs));
1365
1366         m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
1367         if (m_new == NULL) {
1368                 m_prev->m_len = clsize;
1369                 return (ENOBUFS);
1370         }
1371
1372         /*
1373          * Move any unused mbufs from the received chain onto the end
1374          * of the new chain.
1375          */
1376         if (m_prev->m_next != NULL) {
1377                 m_tail->m_next = m_prev->m_next;
1378                 m_prev->m_next = NULL;
1379         }
1380
1381         error = vtnet_rxq_enqueue_buf(rxq, m_new);
1382         if (error) {
1383                 /*
1384                  * BAD! We could not enqueue the replacement mbuf chain. We
1385                  * must restore the m0 chain to the original state if it was
1386                  * modified so we can subsequently discard it.
1387                  *
1388                  * NOTE: The replacement is suppose to be an identical copy
1389                  * to the one just dequeued so this is an unexpected error.
1390                  */
1391                 sc->vtnet_stats.rx_enq_replacement_failed++;
1392
1393                 if (m_tail->m_next != NULL) {
1394                         m_prev->m_next = m_tail->m_next;
1395                         m_tail->m_next = NULL;
1396                 }
1397
1398                 m_prev->m_len = clsize;
1399                 m_freem(m_new);
1400         }
1401
1402         return (error);
1403 }
1404
1405 static int
1406 vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
1407 {
1408         struct vtnet_softc *sc;
1409         struct mbuf *m_new;
1410         int error;
1411
1412         sc = rxq->vtnrx_sc;
1413
1414         KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
1415             ("%s: chained mbuf without LRO_NOMRG", __func__));
1416
1417         if (m->m_next == NULL) {
1418                 /* Fast-path for the common case of just one mbuf. */
1419                 if (m->m_len < len)
1420                         return (EINVAL);
1421
1422                 m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
1423                 if (m_new == NULL)
1424                         return (ENOBUFS);
1425
1426                 error = vtnet_rxq_enqueue_buf(rxq, m_new);
1427                 if (error) {
1428                         /*
1429                          * The new mbuf is suppose to be an identical
1430                          * copy of the one just dequeued so this is an
1431                          * unexpected error.
1432                          */
1433                         m_freem(m_new);
1434                         sc->vtnet_stats.rx_enq_replacement_failed++;
1435                 } else
1436                         m->m_len = len;
1437         } else
1438                 error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len);
1439
1440         return (error);
1441 }
1442
1443 static int
1444 vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1445 {
1446         struct vtnet_softc *sc;
1447         struct sglist *sg;
1448         struct vtnet_rx_header *rxhdr;
1449         uint8_t *mdata;
1450         int offset, error;
1451
1452         sc = rxq->vtnrx_sc;
1453         sg = rxq->vtnrx_sg;
1454         mdata = mtod(m, uint8_t *);
1455
1456         VTNET_RXQ_LOCK_ASSERT(rxq);
1457         KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL,
1458             ("%s: chained mbuf without LRO_NOMRG", __func__));
1459         KASSERT(m->m_len == sc->vtnet_rx_clsize,
1460             ("%s: unexpected cluster size %d/%d", __func__, m->m_len,
1461              sc->vtnet_rx_clsize));
1462
1463         sglist_reset(sg);
1464         if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1465                 MPASS(sc->vtnet_hdr_size == sizeof(rxhdr->vrh_uhdr.hdr) ||
1466                     sc->vtnet_hdr_size == sizeof(rxhdr->vrh_uhdr.mhdr));
1467                 rxhdr = (struct vtnet_rx_header *) mdata;
1468                 sglist_append(sg, &rxhdr->vrh_uhdr, sc->vtnet_hdr_size);
1469                 offset = sizeof(struct vtnet_rx_header);
1470         } else
1471                 offset = 0;
1472
1473         sglist_append(sg, mdata + offset, m->m_len - offset);
1474         if (m->m_next != NULL) {
1475                 error = sglist_append_mbuf(sg, m->m_next);
1476                 MPASS(error == 0);
1477         }
1478
1479         error = virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg);
1480
1481         return (error);
1482 }
1483
1484 static int
1485 vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
1486 {
1487         struct vtnet_softc *sc;
1488         struct mbuf *m;
1489         int error;
1490
1491         sc = rxq->vtnrx_sc;
1492
1493         m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
1494         if (m == NULL)
1495                 return (ENOBUFS);
1496
1497         error = vtnet_rxq_enqueue_buf(rxq, m);
1498         if (error)
1499                 m_freem(m);
1500
1501         return (error);
1502 }
1503
1504 /*
1505  * Use the checksum offset in the VirtIO header to set the
1506  * correct CSUM_* flags.
1507  */
1508 static int
1509 vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m,
1510     uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
1511 {
1512         struct vtnet_softc *sc;
1513 #if defined(INET) || defined(INET6)
1514         int offset = hdr->csum_start + hdr->csum_offset;
1515 #endif
1516
1517         sc = rxq->vtnrx_sc;
1518
1519         /* Only do a basic sanity check on the offset. */
1520         switch (eth_type) {
1521 #if defined(INET)
1522         case ETHERTYPE_IP:
1523                 if (__predict_false(offset < ip_start + sizeof(struct ip)))
1524                         return (1);
1525                 break;
1526 #endif
1527 #if defined(INET6)
1528         case ETHERTYPE_IPV6:
1529                 if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr)))
1530                         return (1);
1531                 break;
1532 #endif
1533         default:
1534                 sc->vtnet_stats.rx_csum_bad_ethtype++;
1535                 return (1);
1536         }
1537
1538         /*
1539          * Use the offset to determine the appropriate CSUM_* flags. This is
1540          * a bit dirty, but we can get by with it since the checksum offsets
1541          * happen to be different. We assume the host host does not do IPv4
1542          * header checksum offloading.
1543          */
1544         switch (hdr->csum_offset) {
1545         case offsetof(struct udphdr, uh_sum):
1546         case offsetof(struct tcphdr, th_sum):
1547                 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1548                 m->m_pkthdr.csum_data = 0xFFFF;
1549                 break;
1550         default:
1551                 sc->vtnet_stats.rx_csum_bad_offset++;
1552                 return (1);
1553         }
1554
1555         return (0);
1556 }
1557
1558 static int
1559 vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m,
1560     uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr)
1561 {
1562         struct vtnet_softc *sc;
1563         int offset, proto;
1564
1565         sc = rxq->vtnrx_sc;
1566
1567         switch (eth_type) {
1568 #if defined(INET)
1569         case ETHERTYPE_IP: {
1570                 struct ip *ip;
1571                 if (__predict_false(m->m_len < ip_start + sizeof(struct ip)))
1572                         return (1);
1573                 ip = (struct ip *)(m->m_data + ip_start);
1574                 proto = ip->ip_p;
1575                 offset = ip_start + (ip->ip_hl << 2);
1576                 break;
1577         }
1578 #endif
1579 #if defined(INET6)
1580         case ETHERTYPE_IPV6:
1581                 if (__predict_false(m->m_len < ip_start +
1582                     sizeof(struct ip6_hdr)))
1583                         return (1);
1584                 offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto);
1585                 if (__predict_false(offset < 0))
1586                         return (1);
1587                 break;
1588 #endif
1589         default:
1590                 sc->vtnet_stats.rx_csum_bad_ethtype++;
1591                 return (1);
1592         }
1593
1594         switch (proto) {
1595         case IPPROTO_TCP:
1596                 if (__predict_false(m->m_len < offset + sizeof(struct tcphdr)))
1597                         return (1);
1598                 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1599                 m->m_pkthdr.csum_data = 0xFFFF;
1600                 break;
1601         case IPPROTO_UDP:
1602                 if (__predict_false(m->m_len < offset + sizeof(struct udphdr)))
1603                         return (1);
1604                 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1605                 m->m_pkthdr.csum_data = 0xFFFF;
1606                 break;
1607         default:
1608                 /*
1609                  * For the remaining protocols, FreeBSD does not support
1610                  * checksum offloading, so the checksum will be recomputed.
1611                  */
1612 #if 0
1613                 if_printf(sc->vtnet_ifp, "cksum offload of unsupported "
1614                     "protocol eth_type=%#x proto=%d csum_start=%d "
1615                     "csum_offset=%d\n", __func__, eth_type, proto,
1616                     hdr->csum_start, hdr->csum_offset);
1617 #endif
1618                 break;
1619         }
1620
1621         return (0);
1622 }
1623
1624 /*
1625  * Set the appropriate CSUM_* flags. Unfortunately, the information
1626  * provided is not directly useful to us. The VirtIO header gives the
1627  * offset of the checksum, which is all Linux needs, but this is not
1628  * how FreeBSD does things. We are forced to peek inside the packet
1629  * a bit.
1630  *
1631  * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD
1632  * could accept the offsets and let the stack figure it out.
1633  */
1634 static int
1635 vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
1636     struct virtio_net_hdr *hdr)
1637 {
1638         struct ether_header *eh;
1639         struct ether_vlan_header *evh;
1640         uint16_t eth_type;
1641         int offset, error;
1642
1643         eh = mtod(m, struct ether_header *);
1644         eth_type = ntohs(eh->ether_type);
1645         if (eth_type == ETHERTYPE_VLAN) {
1646                 /* BMV: We should handle nested VLAN tags too. */
1647                 evh = mtod(m, struct ether_vlan_header *);
1648                 eth_type = ntohs(evh->evl_proto);
1649                 offset = sizeof(struct ether_vlan_header);
1650         } else
1651                 offset = sizeof(struct ether_header);
1652
1653         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1654                 error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr);
1655         else
1656                 error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr);
1657
1658         return (error);
1659 }
1660
1661 static void
1662 vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
1663 {
1664         struct mbuf *m;
1665
1666         while (--nbufs > 0) {
1667                 m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
1668                 if (m == NULL)
1669                         break;
1670                 vtnet_rxq_discard_buf(rxq, m);
1671         }
1672 }
1673
1674 static void
1675 vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1676 {
1677         int error;
1678
1679         /*
1680          * Requeue the discarded mbuf. This should always be successful
1681          * since it was just dequeued.
1682          */
1683         error = vtnet_rxq_enqueue_buf(rxq, m);
1684         KASSERT(error == 0,
1685             ("%s: cannot requeue discarded mbuf %d", __func__, error));
1686 }
1687
1688 static int
1689 vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
1690 {
1691         struct vtnet_softc *sc;
1692         struct virtqueue *vq;
1693         struct mbuf *m, *m_tail;
1694         int len;
1695
1696         sc = rxq->vtnrx_sc;
1697         vq = rxq->vtnrx_vq;
1698         m_tail = m_head;
1699
1700         while (--nbufs > 0) {
1701                 m = virtqueue_dequeue(vq, &len);
1702                 if (m == NULL) {
1703                         rxq->vtnrx_stats.vrxs_ierrors++;
1704                         goto fail;
1705                 }
1706
1707                 if (vtnet_rxq_new_buf(rxq) != 0) {
1708                         rxq->vtnrx_stats.vrxs_iqdrops++;
1709                         vtnet_rxq_discard_buf(rxq, m);
1710                         if (nbufs > 1)
1711                                 vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1712                         goto fail;
1713                 }
1714
1715                 if (m->m_len < len)
1716                         len = m->m_len;
1717
1718                 m->m_len = len;
1719                 m->m_flags &= ~M_PKTHDR;
1720
1721                 m_head->m_pkthdr.len += len;
1722                 m_tail->m_next = m;
1723                 m_tail = m;
1724         }
1725
1726         return (0);
1727
1728 fail:
1729         sc->vtnet_stats.rx_mergeable_failed++;
1730         m_freem(m_head);
1731
1732         return (1);
1733 }
1734
1735 static void
1736 vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
1737     struct virtio_net_hdr *hdr)
1738 {
1739         struct vtnet_softc *sc;
1740         struct ifnet *ifp;
1741         struct ether_header *eh;
1742
1743         sc = rxq->vtnrx_sc;
1744         ifp = sc->vtnet_ifp;
1745
1746         if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) {
1747                 eh = mtod(m, struct ether_header *);
1748                 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
1749                         vtnet_vlan_tag_remove(m);
1750                         /*
1751                          * With the 802.1Q header removed, update the
1752                          * checksum starting location accordingly.
1753                          */
1754                         if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1755                                 hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
1756                 }
1757         }
1758
1759         m->m_pkthdr.flowid = rxq->vtnrx_id;
1760         M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
1761
1762         /*
1763          * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum
1764          * distinction that Linux does. Need to reevaluate if performing
1765          * offloading for the NEEDS_CSUM case is really appropriate.
1766          */
1767         if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM |
1768             VIRTIO_NET_HDR_F_DATA_VALID)) {
1769                 if (vtnet_rxq_csum(rxq, m, hdr) == 0)
1770                         rxq->vtnrx_stats.vrxs_csum++;
1771                 else
1772                         rxq->vtnrx_stats.vrxs_csum_failed++;
1773         }
1774
1775         rxq->vtnrx_stats.vrxs_ipackets++;
1776         rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
1777
1778         VTNET_RXQ_UNLOCK(rxq);
1779         (*ifp->if_input)(ifp, m);
1780         VTNET_RXQ_LOCK(rxq);
1781 }
1782
1783 static int
1784 vtnet_rxq_eof(struct vtnet_rxq *rxq)
1785 {
1786         struct virtio_net_hdr lhdr, *hdr;
1787         struct vtnet_softc *sc;
1788         struct ifnet *ifp;
1789         struct virtqueue *vq;
1790         struct mbuf *m, *mr;
1791         struct virtio_net_hdr_mrg_rxbuf *mhdr;
1792         int len, deq, nbufs, adjsz, count;
1793         pfil_return_t pfil;
1794         bool pfil_done;
1795
1796         sc = rxq->vtnrx_sc;
1797         vq = rxq->vtnrx_vq;
1798         ifp = sc->vtnet_ifp;
1799         hdr = &lhdr;
1800         deq = 0;
1801         count = sc->vtnet_rx_process_limit;
1802
1803         VTNET_RXQ_LOCK_ASSERT(rxq);
1804
1805         while (count-- > 0) {
1806                 m = virtqueue_dequeue(vq, &len);
1807                 if (m == NULL)
1808                         break;
1809                 deq++;
1810
1811                 if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
1812                         rxq->vtnrx_stats.vrxs_ierrors++;
1813                         vtnet_rxq_discard_buf(rxq, m);
1814                         continue;
1815                 }
1816
1817                 if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) {
1818                         nbufs = 1;
1819                         adjsz = sizeof(struct vtnet_rx_header);
1820                         /*
1821                          * Account for our pad inserted between the header
1822                          * and the actual start of the frame.
1823                          */
1824                         len += VTNET_RX_HEADER_PAD;
1825                 } else {
1826                         mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
1827                         nbufs = mhdr->num_buffers;
1828                         adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
1829                 }
1830
1831                 /*
1832                  * If we have enough data in first mbuf, run it through
1833                  * pfil as a memory buffer before dequeueing the rest.
1834                  */
1835                 if (PFIL_HOOKED_IN(sc->vtnet_pfil) &&
1836                     len - adjsz >= ETHER_HDR_LEN + max_protohdr) {
1837                         pfil = pfil_run_hooks(sc->vtnet_pfil,
1838                             m->m_data + adjsz, ifp,
1839                             (len - adjsz) | PFIL_MEMPTR | PFIL_IN, NULL);
1840                         switch (pfil) {
1841                         case PFIL_REALLOCED:
1842                                 mr = pfil_mem2mbuf(m->m_data + adjsz);
1843                                 vtnet_rxq_input(rxq, mr, hdr);
1844                                 /* FALLTHROUGH */
1845                         case PFIL_DROPPED:
1846                         case PFIL_CONSUMED:
1847                                 vtnet_rxq_discard_buf(rxq, m);
1848                                 if (nbufs > 1)
1849                                         vtnet_rxq_discard_merged_bufs(rxq,
1850                                             nbufs);
1851                                 continue;
1852                         default:
1853                                 KASSERT(pfil == PFIL_PASS,
1854                                     ("Filter returned %d!\n", pfil));
1855                         };
1856                         pfil_done = true;
1857                 } else
1858                         pfil_done = false;
1859
1860                 if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
1861                         rxq->vtnrx_stats.vrxs_iqdrops++;
1862                         vtnet_rxq_discard_buf(rxq, m);
1863                         if (nbufs > 1)
1864                                 vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1865                         continue;
1866                 }
1867
1868                 m->m_pkthdr.len = len;
1869                 m->m_pkthdr.rcvif = ifp;
1870                 m->m_pkthdr.csum_flags = 0;
1871
1872                 if (nbufs > 1) {
1873                         /* Dequeue the rest of chain. */
1874                         if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
1875                                 continue;
1876                 }
1877
1878                 /*
1879                  * Save copy of header before we strip it. For both mergeable
1880                  * and non-mergeable, the header is at the beginning of the
1881                  * mbuf data. We no longer need num_buffers, so always use a
1882                  * regular header.
1883                  *
1884                  * BMV: Is this memcpy() expensive? We know the mbuf data is
1885                  * still valid even after the m_adj().
1886                  */
1887                 memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr));
1888                 m_adj(m, adjsz);
1889
1890                 if (PFIL_HOOKED_IN(sc->vtnet_pfil) && pfil_done == false) {
1891                         pfil = pfil_run_hooks(sc->vtnet_pfil, &m, ifp, PFIL_IN,
1892                             NULL);
1893                         switch (pfil) {
1894                         case PFIL_DROPPED:
1895                         case PFIL_CONSUMED:
1896                                 continue;
1897                         default:
1898                                 KASSERT(pfil == PFIL_PASS,
1899                                     ("Filter returned %d!\n", pfil));
1900                         }
1901                 }
1902
1903                 vtnet_rxq_input(rxq, m, hdr);
1904
1905                 /* Must recheck after dropping the Rx lock. */
1906                 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1907                         break;
1908         }
1909
1910         if (deq > 0)
1911                 virtqueue_notify(vq);
1912
1913         return (count > 0 ? 0 : EAGAIN);
1914 }
1915
1916 static void
1917 vtnet_rx_vq_intr(void *xrxq)
1918 {
1919         struct vtnet_softc *sc;
1920         struct vtnet_rxq *rxq;
1921         struct ifnet *ifp;
1922         int tries, more;
1923 #ifdef DEV_NETMAP
1924         int nmirq;
1925 #endif /* DEV_NETMAP */
1926
1927         rxq = xrxq;
1928         sc = rxq->vtnrx_sc;
1929         ifp = sc->vtnet_ifp;
1930         tries = 0;
1931
1932         if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
1933                 /*
1934                  * Ignore this interrupt. Either this is a spurious interrupt
1935                  * or multiqueue without per-VQ MSIX so every queue needs to
1936                  * be polled (a brain dead configuration we could try harder
1937                  * to avoid).
1938                  */
1939                 vtnet_rxq_disable_intr(rxq);
1940                 return;
1941         }
1942
1943         VTNET_RXQ_LOCK(rxq);
1944
1945 #ifdef DEV_NETMAP
1946         /*
1947          * We call netmap_rx_irq() under lock to prevent concurrent calls.
1948          * This is not necessary to serialize the access to the RX vq, but
1949          * rather to avoid races that may happen if this interface is
1950          * attached to a VALE switch, which would cause received packets
1951          * to stall in the RX queue (nm_kr_tryget() could find the kring
1952          * busy when called from netmap_bwrap_intr_notify()).
1953          */
1954         nmirq = netmap_rx_irq(ifp, rxq->vtnrx_id, &more);
1955         if (nmirq != NM_IRQ_PASS) {
1956                 VTNET_RXQ_UNLOCK(rxq);
1957                 if (nmirq == NM_IRQ_RESCHED) {
1958                         taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
1959                 }
1960                 return;
1961         }
1962 #endif /* DEV_NETMAP */
1963
1964 again:
1965         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
1966                 VTNET_RXQ_UNLOCK(rxq);
1967                 return;
1968         }
1969
1970         more = vtnet_rxq_eof(rxq);
1971         if (more || vtnet_rxq_enable_intr(rxq) != 0) {
1972                 if (!more)
1973                         vtnet_rxq_disable_intr(rxq);
1974                 /*
1975                  * This is an occasional condition or race (when !more),
1976                  * so retry a few times before scheduling the taskqueue.
1977                  */
1978                 if (tries++ < VTNET_INTR_DISABLE_RETRIES)
1979                         goto again;
1980
1981                 VTNET_RXQ_UNLOCK(rxq);
1982                 rxq->vtnrx_stats.vrxs_rescheduled++;
1983                 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
1984         } else
1985                 VTNET_RXQ_UNLOCK(rxq);
1986 }
1987
1988 static void
1989 vtnet_rxq_tq_intr(void *xrxq, int pending)
1990 {
1991         struct vtnet_softc *sc;
1992         struct vtnet_rxq *rxq;
1993         struct ifnet *ifp;
1994         int more;
1995 #ifdef DEV_NETMAP
1996         int nmirq;
1997 #endif /* DEV_NETMAP */
1998
1999         rxq = xrxq;
2000         sc = rxq->vtnrx_sc;
2001         ifp = sc->vtnet_ifp;
2002
2003         VTNET_RXQ_LOCK(rxq);
2004
2005 #ifdef DEV_NETMAP
2006         nmirq = netmap_rx_irq(ifp, rxq->vtnrx_id, &more);
2007         if (nmirq != NM_IRQ_PASS) {
2008                 VTNET_RXQ_UNLOCK(rxq);
2009                 if (nmirq == NM_IRQ_RESCHED) {
2010                         taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2011                 }
2012                 return;
2013         }
2014 #endif /* DEV_NETMAP */
2015
2016         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2017                 VTNET_RXQ_UNLOCK(rxq);
2018                 return;
2019         }
2020
2021         more = vtnet_rxq_eof(rxq);
2022         if (more || vtnet_rxq_enable_intr(rxq) != 0) {
2023                 if (!more)
2024                         vtnet_rxq_disable_intr(rxq);
2025                 rxq->vtnrx_stats.vrxs_rescheduled++;
2026                 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2027         }
2028
2029         VTNET_RXQ_UNLOCK(rxq);
2030 }
2031
2032 static int
2033 vtnet_txq_below_threshold(struct vtnet_txq *txq)
2034 {
2035         struct vtnet_softc *sc;
2036         struct virtqueue *vq;
2037
2038         sc = txq->vtntx_sc;
2039         vq = txq->vtntx_vq;
2040
2041         return (virtqueue_nfree(vq) <= sc->vtnet_tx_intr_thresh);
2042 }
2043
2044 static int
2045 vtnet_txq_notify(struct vtnet_txq *txq)
2046 {
2047         struct virtqueue *vq;
2048
2049         vq = txq->vtntx_vq;
2050
2051         txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
2052         virtqueue_notify(vq);
2053
2054         if (vtnet_txq_enable_intr(txq) == 0)
2055                 return (0);
2056
2057         /*
2058          * Drain frames that were completed since last checked. If this
2059          * causes the queue to go above the threshold, the caller should
2060          * continue transmitting.
2061          */
2062         if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
2063                 virtqueue_disable_intr(vq);
2064                 return (1);
2065         }
2066
2067         return (0);
2068 }
2069
2070 static void
2071 vtnet_txq_free_mbufs(struct vtnet_txq *txq)
2072 {
2073         struct virtqueue *vq;
2074         struct vtnet_tx_header *txhdr;
2075         int last;
2076 #ifdef DEV_NETMAP
2077         int netmap_bufs = vtnet_netmap_queue_on(txq->vtntx_sc, NR_TX,
2078                                                 txq->vtntx_id);
2079 #else  /* !DEV_NETMAP */
2080         int netmap_bufs = 0;
2081 #endif /* !DEV_NETMAP */
2082
2083         vq = txq->vtntx_vq;
2084         last = 0;
2085
2086         while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
2087                 if (!netmap_bufs) {
2088                         m_freem(txhdr->vth_mbuf);
2089                         uma_zfree(vtnet_tx_header_zone, txhdr);
2090                 }
2091         }
2092
2093         KASSERT(virtqueue_empty(vq),
2094             ("%s: mbufs remaining in tx queue %p", __func__, txq));
2095 }
2096
2097 /*
2098  * BMV: Much of this can go away once we finally have offsets in
2099  * the mbuf packet header. Bug andre@.
2100  */
2101 static int
2102 vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m,
2103     int *etype, int *proto, int *start)
2104 {
2105         struct vtnet_softc *sc;
2106         struct ether_vlan_header *evh;
2107         int offset;
2108
2109         sc = txq->vtntx_sc;
2110
2111         evh = mtod(m, struct ether_vlan_header *);
2112         if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2113                 /* BMV: We should handle nested VLAN tags too. */
2114                 *etype = ntohs(evh->evl_proto);
2115                 offset = sizeof(struct ether_vlan_header);
2116         } else {
2117                 *etype = ntohs(evh->evl_encap_proto);
2118                 offset = sizeof(struct ether_header);
2119         }
2120
2121         switch (*etype) {
2122 #if defined(INET)
2123         case ETHERTYPE_IP: {
2124                 struct ip *ip, iphdr;
2125                 if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2126                         m_copydata(m, offset, sizeof(struct ip),
2127                             (caddr_t) &iphdr);
2128                         ip = &iphdr;
2129                 } else
2130                         ip = (struct ip *)(m->m_data + offset);
2131                 *proto = ip->ip_p;
2132                 *start = offset + (ip->ip_hl << 2);
2133                 break;
2134         }
2135 #endif
2136 #if defined(INET6)
2137         case ETHERTYPE_IPV6:
2138                 *proto = -1;
2139                 *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2140                 /* Assert the network stack sent us a valid packet. */
2141                 KASSERT(*start > offset,
2142                     ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2143                     *start, offset, *proto));
2144                 break;
2145 #endif
2146         default:
2147                 sc->vtnet_stats.tx_csum_bad_ethtype++;
2148                 return (EINVAL);
2149         }
2150
2151         return (0);
2152 }
2153
2154 static int
2155 vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
2156     int offset, struct virtio_net_hdr *hdr)
2157 {
2158         static struct timeval lastecn;
2159         static int curecn;
2160         struct vtnet_softc *sc;
2161         struct tcphdr *tcp, tcphdr;
2162
2163         sc = txq->vtntx_sc;
2164
2165         if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
2166                 m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
2167                 tcp = &tcphdr;
2168         } else
2169                 tcp = (struct tcphdr *)(m->m_data + offset);
2170
2171         hdr->hdr_len = offset + (tcp->th_off << 2);
2172         hdr->gso_size = m->m_pkthdr.tso_segsz;
2173         hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
2174             VIRTIO_NET_HDR_GSO_TCPV6;
2175
2176         if (tcp->th_flags & TH_CWR) {
2177                 /*
2178                  * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD,
2179                  * ECN support is not on a per-interface basis, but globally via
2180                  * the net.inet.tcp.ecn.enable sysctl knob. The default is off.
2181                  */
2182                 if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
2183                         if (ppsratecheck(&lastecn, &curecn, 1))
2184                                 if_printf(sc->vtnet_ifp,
2185                                     "TSO with ECN not negotiated with host\n");
2186                         return (ENOTSUP);
2187                 }
2188                 hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
2189         }
2190
2191         txq->vtntx_stats.vtxs_tso++;
2192
2193         return (0);
2194 }
2195
2196 static struct mbuf *
2197 vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
2198     struct virtio_net_hdr *hdr)
2199 {
2200         struct vtnet_softc *sc;
2201         int flags, etype, csum_start, proto, error;
2202
2203         sc = txq->vtntx_sc;
2204         flags = m->m_pkthdr.csum_flags;
2205
2206         error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
2207         if (error)
2208                 goto drop;
2209
2210         if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) ||
2211             (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) {
2212                 /*
2213                  * We could compare the IP protocol vs the CSUM_ flag too,
2214                  * but that really should not be necessary.
2215                  */
2216                 hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
2217                 hdr->csum_start = csum_start;
2218                 hdr->csum_offset = m->m_pkthdr.csum_data;
2219                 txq->vtntx_stats.vtxs_csum++;
2220         }
2221
2222         if (flags & CSUM_TSO) {
2223                 if (__predict_false(proto != IPPROTO_TCP)) {
2224                         /* Likely failed to correctly parse the mbuf. */
2225                         sc->vtnet_stats.tx_tso_not_tcp++;
2226                         goto drop;
2227                 }
2228
2229                 KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM,
2230                     ("%s: mbuf %p TSO without checksum offload %#x",
2231                     __func__, m, flags));
2232
2233                 error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
2234                 if (error)
2235                         goto drop;
2236         }
2237
2238         return (m);
2239
2240 drop:
2241         m_freem(m);
2242         return (NULL);
2243 }
2244
2245 static int
2246 vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
2247     struct vtnet_tx_header *txhdr)
2248 {
2249         struct vtnet_softc *sc;
2250         struct virtqueue *vq;
2251         struct sglist *sg;
2252         struct mbuf *m;
2253         int error;
2254
2255         sc = txq->vtntx_sc;
2256         vq = txq->vtntx_vq;
2257         sg = txq->vtntx_sg;
2258         m = *m_head;
2259
2260         sglist_reset(sg);
2261         error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
2262         KASSERT(error == 0 && sg->sg_nseg == 1,
2263             ("%s: error %d adding header to sglist", __func__, error));
2264
2265         error = sglist_append_mbuf(sg, m);
2266         if (error) {
2267                 m = m_defrag(m, M_NOWAIT);
2268                 if (m == NULL)
2269                         goto fail;
2270
2271                 *m_head = m;
2272                 sc->vtnet_stats.tx_defragged++;
2273
2274                 error = sglist_append_mbuf(sg, m);
2275                 if (error)
2276                         goto fail;
2277         }
2278
2279         txhdr->vth_mbuf = m;
2280         error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0);
2281
2282         return (error);
2283
2284 fail:
2285         sc->vtnet_stats.tx_defrag_failed++;
2286         m_freem(*m_head);
2287         *m_head = NULL;
2288
2289         return (ENOBUFS);
2290 }
2291
2292 static int
2293 vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags)
2294 {
2295         struct vtnet_tx_header *txhdr;
2296         struct virtio_net_hdr *hdr;
2297         struct mbuf *m;
2298         int error;
2299
2300         m = *m_head;
2301         M_ASSERTPKTHDR(m);
2302
2303         txhdr = uma_zalloc(vtnet_tx_header_zone, flags | M_ZERO);
2304         if (txhdr == NULL) {
2305                 m_freem(m);
2306                 *m_head = NULL;
2307                 return (ENOMEM);
2308         }
2309
2310         /*
2311          * Always use the non-mergeable header, regardless if the feature
2312          * was negotiated. For transmit, num_buffers is always zero. The
2313          * vtnet_hdr_size is used to enqueue the correct header size.
2314          */
2315         hdr = &txhdr->vth_uhdr.hdr;
2316
2317         if (m->m_flags & M_VLANTAG) {
2318                 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
2319                 if ((*m_head = m) == NULL) {
2320                         error = ENOBUFS;
2321                         goto fail;
2322                 }
2323                 m->m_flags &= ~M_VLANTAG;
2324         }
2325
2326         if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
2327                 m = vtnet_txq_offload(txq, m, hdr);
2328                 if ((*m_head = m) == NULL) {
2329                         error = ENOBUFS;
2330                         goto fail;
2331                 }
2332         }
2333
2334         error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
2335         if (error == 0)
2336                 return (0);
2337
2338 fail:
2339         uma_zfree(vtnet_tx_header_zone, txhdr);
2340
2341         return (error);
2342 }
2343
2344 #ifdef VTNET_LEGACY_TX
2345
2346 static void
2347 vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp)
2348 {
2349         struct vtnet_softc *sc;
2350         struct virtqueue *vq;
2351         struct mbuf *m0;
2352         int tries, enq;
2353
2354         sc = txq->vtntx_sc;
2355         vq = txq->vtntx_vq;
2356         tries = 0;
2357
2358         VTNET_TXQ_LOCK_ASSERT(txq);
2359
2360         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2361             sc->vtnet_link_active == 0)
2362                 return;
2363
2364         vtnet_txq_eof(txq);
2365
2366 again:
2367         enq = 0;
2368
2369         while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
2370                 if (virtqueue_full(vq))
2371                         break;
2372
2373                 IFQ_DRV_DEQUEUE(&ifp->if_snd, m0);
2374                 if (m0 == NULL)
2375                         break;
2376
2377                 if (vtnet_txq_encap(txq, &m0, M_NOWAIT) != 0) {
2378                         if (m0 != NULL)
2379                                 IFQ_DRV_PREPEND(&ifp->if_snd, m0);
2380                         break;
2381                 }
2382
2383                 enq++;
2384                 ETHER_BPF_MTAP(ifp, m0);
2385         }
2386
2387         if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2388                 if (tries++ < VTNET_NOTIFY_RETRIES)
2389                         goto again;
2390
2391                 txq->vtntx_stats.vtxs_rescheduled++;
2392                 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2393         }
2394 }
2395
2396 static void
2397 vtnet_start(struct ifnet *ifp)
2398 {
2399         struct vtnet_softc *sc;
2400         struct vtnet_txq *txq;
2401
2402         sc = ifp->if_softc;
2403         txq = &sc->vtnet_txqs[0];
2404
2405         VTNET_TXQ_LOCK(txq);
2406         vtnet_start_locked(txq, ifp);
2407         VTNET_TXQ_UNLOCK(txq);
2408 }
2409
2410 #else /* !VTNET_LEGACY_TX */
2411
2412 static int
2413 vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
2414 {
2415         struct vtnet_softc *sc;
2416         struct virtqueue *vq;
2417         struct buf_ring *br;
2418         struct ifnet *ifp;
2419         int enq, tries, error;
2420
2421         sc = txq->vtntx_sc;
2422         vq = txq->vtntx_vq;
2423         br = txq->vtntx_br;
2424         ifp = sc->vtnet_ifp;
2425         tries = 0;
2426         error = 0;
2427
2428         VTNET_TXQ_LOCK_ASSERT(txq);
2429
2430         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
2431             sc->vtnet_link_active == 0) {
2432                 if (m != NULL)
2433                         error = drbr_enqueue(ifp, br, m);
2434                 return (error);
2435         }
2436
2437         if (m != NULL) {
2438                 error = drbr_enqueue(ifp, br, m);
2439                 if (error)
2440                         return (error);
2441         }
2442
2443         vtnet_txq_eof(txq);
2444
2445 again:
2446         enq = 0;
2447
2448         while ((m = drbr_peek(ifp, br)) != NULL) {
2449                 if (virtqueue_full(vq)) {
2450                         drbr_putback(ifp, br, m);
2451                         break;
2452                 }
2453
2454                 if (vtnet_txq_encap(txq, &m, M_NOWAIT) != 0) {
2455                         if (m != NULL)
2456                                 drbr_putback(ifp, br, m);
2457                         else
2458                                 drbr_advance(ifp, br);
2459                         break;
2460                 }
2461                 drbr_advance(ifp, br);
2462
2463                 enq++;
2464                 ETHER_BPF_MTAP(ifp, m);
2465         }
2466
2467         if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2468                 if (tries++ < VTNET_NOTIFY_RETRIES)
2469                         goto again;
2470
2471                 txq->vtntx_stats.vtxs_rescheduled++;
2472                 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2473         }
2474
2475         return (0);
2476 }
2477
2478 static int
2479 vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m)
2480 {
2481         struct vtnet_softc *sc;
2482         struct vtnet_txq *txq;
2483         int i, npairs, error;
2484
2485         sc = ifp->if_softc;
2486         npairs = sc->vtnet_act_vq_pairs;
2487
2488         /* check if flowid is set */
2489         if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
2490                 i = m->m_pkthdr.flowid % npairs;
2491         else
2492                 i = curcpu % npairs;
2493
2494         txq = &sc->vtnet_txqs[i];
2495
2496         if (VTNET_TXQ_TRYLOCK(txq) != 0) {
2497                 error = vtnet_txq_mq_start_locked(txq, m);
2498                 VTNET_TXQ_UNLOCK(txq);
2499         } else {
2500                 error = drbr_enqueue(ifp, txq->vtntx_br, m);
2501                 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
2502         }
2503
2504         return (error);
2505 }
2506
2507 static void
2508 vtnet_txq_tq_deferred(void *xtxq, int pending)
2509 {
2510         struct vtnet_softc *sc;
2511         struct vtnet_txq *txq;
2512
2513         txq = xtxq;
2514         sc = txq->vtntx_sc;
2515
2516         VTNET_TXQ_LOCK(txq);
2517         if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
2518                 vtnet_txq_mq_start_locked(txq, NULL);
2519         VTNET_TXQ_UNLOCK(txq);
2520 }
2521
2522 #endif /* VTNET_LEGACY_TX */
2523
2524 static void
2525 vtnet_txq_start(struct vtnet_txq *txq)
2526 {
2527         struct vtnet_softc *sc;
2528         struct ifnet *ifp;
2529
2530         sc = txq->vtntx_sc;
2531         ifp = sc->vtnet_ifp;
2532
2533 #ifdef VTNET_LEGACY_TX
2534         if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd))
2535                 vtnet_start_locked(txq, ifp);
2536 #else
2537         if (!drbr_empty(ifp, txq->vtntx_br))
2538                 vtnet_txq_mq_start_locked(txq, NULL);
2539 #endif
2540 }
2541
2542 static void
2543 vtnet_txq_tq_intr(void *xtxq, int pending)
2544 {
2545         struct vtnet_softc *sc;
2546         struct vtnet_txq *txq;
2547         struct ifnet *ifp;
2548
2549         txq = xtxq;
2550         sc = txq->vtntx_sc;
2551         ifp = sc->vtnet_ifp;
2552
2553         VTNET_TXQ_LOCK(txq);
2554
2555         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2556                 VTNET_TXQ_UNLOCK(txq);
2557                 return;
2558         }
2559
2560         vtnet_txq_eof(txq);
2561         vtnet_txq_start(txq);
2562
2563         VTNET_TXQ_UNLOCK(txq);
2564 }
2565
2566 static int
2567 vtnet_txq_eof(struct vtnet_txq *txq)
2568 {
2569         struct virtqueue *vq;
2570         struct vtnet_tx_header *txhdr;
2571         struct mbuf *m;
2572         int deq;
2573
2574         vq = txq->vtntx_vq;
2575         deq = 0;
2576         VTNET_TXQ_LOCK_ASSERT(txq);
2577
2578         while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
2579                 m = txhdr->vth_mbuf;
2580                 deq++;
2581
2582                 txq->vtntx_stats.vtxs_opackets++;
2583                 txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
2584                 if (m->m_flags & M_MCAST)
2585                         txq->vtntx_stats.vtxs_omcasts++;
2586
2587                 m_freem(m);
2588                 uma_zfree(vtnet_tx_header_zone, txhdr);
2589         }
2590
2591         if (virtqueue_empty(vq))
2592                 txq->vtntx_watchdog = 0;
2593
2594         return (deq);
2595 }
2596
2597 static void
2598 vtnet_tx_vq_intr(void *xtxq)
2599 {
2600         struct vtnet_softc *sc;
2601         struct vtnet_txq *txq;
2602         struct ifnet *ifp;
2603
2604         txq = xtxq;
2605         sc = txq->vtntx_sc;
2606         ifp = sc->vtnet_ifp;
2607
2608         if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
2609                 /*
2610                  * Ignore this interrupt. Either this is a spurious interrupt
2611                  * or multiqueue without per-VQ MSIX so every queue needs to
2612                  * be polled (a brain dead configuration we could try harder
2613                  * to avoid).
2614                  */
2615                 vtnet_txq_disable_intr(txq);
2616                 return;
2617         }
2618
2619 #ifdef DEV_NETMAP
2620         if (netmap_tx_irq(ifp, txq->vtntx_id) != NM_IRQ_PASS)
2621                 return;
2622 #endif /* DEV_NETMAP */
2623
2624         VTNET_TXQ_LOCK(txq);
2625
2626         if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2627                 VTNET_TXQ_UNLOCK(txq);
2628                 return;
2629         }
2630
2631         vtnet_txq_eof(txq);
2632         vtnet_txq_start(txq);
2633
2634         VTNET_TXQ_UNLOCK(txq);
2635 }
2636
2637 static void
2638 vtnet_tx_start_all(struct vtnet_softc *sc)
2639 {
2640         struct vtnet_txq *txq;
2641         int i;
2642
2643         VTNET_CORE_LOCK_ASSERT(sc);
2644
2645         for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2646                 txq = &sc->vtnet_txqs[i];
2647
2648                 VTNET_TXQ_LOCK(txq);
2649                 vtnet_txq_start(txq);
2650                 VTNET_TXQ_UNLOCK(txq);
2651         }
2652 }
2653
2654 #ifndef VTNET_LEGACY_TX
2655 static void
2656 vtnet_qflush(struct ifnet *ifp)
2657 {
2658         struct vtnet_softc *sc;
2659         struct vtnet_txq *txq;
2660         struct mbuf *m;
2661         int i;
2662
2663         sc = ifp->if_softc;
2664
2665         for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2666                 txq = &sc->vtnet_txqs[i];
2667
2668                 VTNET_TXQ_LOCK(txq);
2669                 while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
2670                         m_freem(m);
2671                 VTNET_TXQ_UNLOCK(txq);
2672         }
2673
2674         if_qflush(ifp);
2675 }
2676 #endif
2677
2678 static int
2679 vtnet_watchdog(struct vtnet_txq *txq)
2680 {
2681         struct ifnet *ifp;
2682
2683         ifp = txq->vtntx_sc->vtnet_ifp;
2684
2685         VTNET_TXQ_LOCK(txq);
2686         if (txq->vtntx_watchdog == 1) {
2687                 /*
2688                  * Only drain completed frames if the watchdog is about to
2689                  * expire. If any frames were drained, there may be enough
2690                  * free descriptors now available to transmit queued frames.
2691                  * In that case, the timer will immediately be decremented
2692                  * below, but the timeout is generous enough that should not
2693                  * be a problem.
2694                  */
2695                 if (vtnet_txq_eof(txq) != 0)
2696                         vtnet_txq_start(txq);
2697         }
2698
2699         if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
2700                 VTNET_TXQ_UNLOCK(txq);
2701                 return (0);
2702         }
2703         VTNET_TXQ_UNLOCK(txq);
2704
2705         if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
2706         return (1);
2707 }
2708
2709 static void
2710 vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc,
2711     struct vtnet_txq_stats *txacc)
2712 {
2713
2714         bzero(rxacc, sizeof(struct vtnet_rxq_stats));
2715         bzero(txacc, sizeof(struct vtnet_txq_stats));
2716
2717         for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2718                 struct vtnet_rxq_stats *rxst;
2719                 struct vtnet_txq_stats *txst;
2720
2721                 rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
2722                 rxacc->vrxs_ipackets += rxst->vrxs_ipackets;
2723                 rxacc->vrxs_ibytes += rxst->vrxs_ibytes;
2724                 rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops;
2725                 rxacc->vrxs_csum += rxst->vrxs_csum;
2726                 rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed;
2727                 rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled;
2728
2729                 txst = &sc->vtnet_txqs[i].vtntx_stats;
2730                 txacc->vtxs_opackets += txst->vtxs_opackets;
2731                 txacc->vtxs_obytes += txst->vtxs_obytes;
2732                 txacc->vtxs_csum += txst->vtxs_csum;
2733                 txacc->vtxs_tso += txst->vtxs_tso;
2734                 txacc->vtxs_rescheduled += txst->vtxs_rescheduled;
2735         }
2736 }
2737
2738 static uint64_t
2739 vtnet_get_counter(if_t ifp, ift_counter cnt)
2740 {
2741         struct vtnet_softc *sc;
2742         struct vtnet_rxq_stats rxaccum;
2743         struct vtnet_txq_stats txaccum;
2744
2745         sc = if_getsoftc(ifp);
2746         vtnet_accum_stats(sc, &rxaccum, &txaccum);
2747
2748         switch (cnt) {
2749         case IFCOUNTER_IPACKETS:
2750                 return (rxaccum.vrxs_ipackets);
2751         case IFCOUNTER_IQDROPS:
2752                 return (rxaccum.vrxs_iqdrops);
2753         case IFCOUNTER_IERRORS:
2754                 return (rxaccum.vrxs_ierrors);
2755         case IFCOUNTER_OPACKETS:
2756                 return (txaccum.vtxs_opackets);
2757 #ifndef VTNET_LEGACY_TX
2758         case IFCOUNTER_OBYTES:
2759                 return (txaccum.vtxs_obytes);
2760         case IFCOUNTER_OMCASTS:
2761                 return (txaccum.vtxs_omcasts);
2762 #endif
2763         default:
2764                 return (if_get_counter_default(ifp, cnt));
2765         }
2766 }
2767
2768 static void
2769 vtnet_tick(void *xsc)
2770 {
2771         struct vtnet_softc *sc;
2772         struct ifnet *ifp;
2773         int i, timedout;
2774
2775         sc = xsc;
2776         ifp = sc->vtnet_ifp;
2777         timedout = 0;
2778
2779         VTNET_CORE_LOCK_ASSERT(sc);
2780
2781         for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
2782                 timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
2783
2784         if (timedout != 0) {
2785                 ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2786                 vtnet_init_locked(sc);
2787         } else
2788                 callout_schedule(&sc->vtnet_tick_ch, hz);
2789 }
2790
2791 static void
2792 vtnet_start_taskqueues(struct vtnet_softc *sc)
2793 {
2794         device_t dev;
2795         struct vtnet_rxq *rxq;
2796         struct vtnet_txq *txq;
2797         int i, error;
2798
2799         dev = sc->vtnet_dev;
2800
2801         /*
2802          * Errors here are very difficult to recover from - we cannot
2803          * easily fail because, if this is during boot, we will hang
2804          * when freeing any successfully started taskqueues because
2805          * the scheduler isn't up yet.
2806          *
2807          * Most drivers just ignore the return value - it only fails
2808          * with ENOMEM so an error is not likely.
2809          */
2810         for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2811                 rxq = &sc->vtnet_rxqs[i];
2812                 error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
2813                     "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
2814                 if (error) {
2815                         device_printf(dev, "failed to start rx taskq %d\n",
2816                             rxq->vtnrx_id);
2817                 }
2818
2819                 txq = &sc->vtnet_txqs[i];
2820                 error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
2821                     "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
2822                 if (error) {
2823                         device_printf(dev, "failed to start tx taskq %d\n",
2824                             txq->vtntx_id);
2825                 }
2826         }
2827 }
2828
2829 static void
2830 vtnet_free_taskqueues(struct vtnet_softc *sc)
2831 {
2832         struct vtnet_rxq *rxq;
2833         struct vtnet_txq *txq;
2834         int i;
2835
2836         for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2837                 rxq = &sc->vtnet_rxqs[i];
2838                 if (rxq->vtnrx_tq != NULL) {
2839                         taskqueue_free(rxq->vtnrx_tq);
2840                         rxq->vtnrx_tq = NULL;
2841                 }
2842
2843                 txq = &sc->vtnet_txqs[i];
2844                 if (txq->vtntx_tq != NULL) {
2845                         taskqueue_free(txq->vtntx_tq);
2846                         txq->vtntx_tq = NULL;
2847                 }
2848         }
2849 }
2850
2851 static void
2852 vtnet_drain_taskqueues(struct vtnet_softc *sc)
2853 {
2854         struct vtnet_rxq *rxq;
2855         struct vtnet_txq *txq;
2856         int i;
2857
2858         for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2859                 rxq = &sc->vtnet_rxqs[i];
2860                 if (rxq->vtnrx_tq != NULL)
2861                         taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2862
2863                 txq = &sc->vtnet_txqs[i];
2864                 if (txq->vtntx_tq != NULL) {
2865                         taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
2866 #ifndef VTNET_LEGACY_TX
2867                         taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
2868 #endif
2869                 }
2870         }
2871 }
2872
2873 static void
2874 vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
2875 {
2876         struct vtnet_rxq *rxq;
2877         struct vtnet_txq *txq;
2878         int i;
2879
2880         for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2881                 rxq = &sc->vtnet_rxqs[i];
2882                 vtnet_rxq_free_mbufs(rxq);
2883
2884                 txq = &sc->vtnet_txqs[i];
2885                 vtnet_txq_free_mbufs(txq);
2886         }
2887 }
2888
2889 static void
2890 vtnet_stop_rendezvous(struct vtnet_softc *sc)
2891 {
2892         struct vtnet_rxq *rxq;
2893         struct vtnet_txq *txq;
2894         int i;
2895
2896         /*
2897          * Lock and unlock the per-queue mutex so we known the stop
2898          * state is visible. Doing only the active queues should be
2899          * sufficient, but it does not cost much extra to do all the
2900          * queues. Note we hold the core mutex here too.
2901          */
2902         for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
2903                 rxq = &sc->vtnet_rxqs[i];
2904                 VTNET_RXQ_LOCK(rxq);
2905                 VTNET_RXQ_UNLOCK(rxq);
2906
2907                 txq = &sc->vtnet_txqs[i];
2908                 VTNET_TXQ_LOCK(txq);
2909                 VTNET_TXQ_UNLOCK(txq);
2910         }
2911 }
2912
2913 static void
2914 vtnet_stop(struct vtnet_softc *sc)
2915 {
2916         device_t dev;
2917         struct ifnet *ifp;
2918
2919         dev = sc->vtnet_dev;
2920         ifp = sc->vtnet_ifp;
2921
2922         VTNET_CORE_LOCK_ASSERT(sc);
2923
2924         ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
2925         sc->vtnet_link_active = 0;
2926         callout_stop(&sc->vtnet_tick_ch);
2927
2928         /* Only advisory. */
2929         vtnet_disable_interrupts(sc);
2930
2931         /*
2932          * Stop the host adapter. This resets it to the pre-initialized
2933          * state. It will not generate any interrupts until after it is
2934          * reinitialized.
2935          */
2936         virtio_stop(dev);
2937         vtnet_stop_rendezvous(sc);
2938
2939         /* Free any mbufs left in the virtqueues. */
2940         vtnet_drain_rxtx_queues(sc);
2941 }
2942
2943 static int
2944 vtnet_virtio_reinit(struct vtnet_softc *sc)
2945 {
2946         device_t dev;
2947         struct ifnet *ifp;
2948         uint64_t features;
2949         int mask, error;
2950
2951         dev = sc->vtnet_dev;
2952         ifp = sc->vtnet_ifp;
2953         features = sc->vtnet_features;
2954
2955         mask = 0;
2956 #if defined(INET)
2957         mask |= IFCAP_RXCSUM;
2958 #endif
2959 #if defined (INET6)
2960         mask |= IFCAP_RXCSUM_IPV6;
2961 #endif
2962
2963         /*
2964          * Re-negotiate with the host, removing any disabled receive
2965          * features. Transmit features are disabled only on our side
2966          * via if_capenable and if_hwassist.
2967          */
2968
2969         if (ifp->if_capabilities & mask) {
2970                 /*
2971                  * We require both IPv4 and IPv6 offloading to be enabled
2972                  * in order to negotiated it: VirtIO does not distinguish
2973                  * between the two.
2974                  */
2975                 if ((ifp->if_capenable & mask) != mask)
2976                         features &= ~VIRTIO_NET_F_GUEST_CSUM;
2977         }
2978
2979         if (ifp->if_capabilities & IFCAP_LRO) {
2980                 if ((ifp->if_capenable & IFCAP_LRO) == 0)
2981                         features &= ~VTNET_LRO_FEATURES;
2982         }
2983
2984         if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) {
2985                 if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0)
2986                         features &= ~VIRTIO_NET_F_CTRL_VLAN;
2987         }
2988
2989         error = virtio_reinit(dev, features);
2990         if (error)
2991                 device_printf(dev, "virtio reinit error %d\n", error);
2992
2993         return (error);
2994 }
2995
2996 static void
2997 vtnet_init_rx_filters(struct vtnet_softc *sc)
2998 {
2999         struct ifnet *ifp;
3000
3001         ifp = sc->vtnet_ifp;
3002
3003         if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
3004                 /* Restore promiscuous and all-multicast modes. */
3005                 vtnet_rx_filter(sc);
3006                 /* Restore filtered MAC addresses. */
3007                 vtnet_rx_filter_mac(sc);
3008         }
3009
3010         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
3011                 vtnet_rx_filter_vlan(sc);
3012 }
3013
3014 static int
3015 vtnet_init_rx_queues(struct vtnet_softc *sc)
3016 {
3017         device_t dev;
3018         struct vtnet_rxq *rxq;
3019         int i, clsize, error;
3020
3021         dev = sc->vtnet_dev;
3022
3023         /*
3024          * Use the new cluster size if one has been set (via a MTU
3025          * change). Otherwise, use the standard 2K clusters.
3026          *
3027          * BMV: It might make sense to use page sized clusters as
3028          * the default (depending on the features negotiated).
3029          */
3030         if (sc->vtnet_rx_new_clsize != 0) {
3031                 clsize = sc->vtnet_rx_new_clsize;
3032                 sc->vtnet_rx_new_clsize = 0;
3033         } else
3034                 clsize = MCLBYTES;
3035
3036         sc->vtnet_rx_clsize = clsize;
3037         sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize);
3038
3039         KASSERT(sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS ||
3040             sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs,
3041             ("%s: too many rx mbufs %d for %d segments", __func__,
3042             sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
3043
3044         for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
3045                 rxq = &sc->vtnet_rxqs[i];
3046
3047                 /* Hold the lock to satisfy asserts. */
3048                 VTNET_RXQ_LOCK(rxq);
3049                 error = vtnet_rxq_populate(rxq);
3050                 VTNET_RXQ_UNLOCK(rxq);
3051
3052                 if (error) {
3053                         device_printf(dev,
3054                             "cannot allocate mbufs for Rx queue %d\n", i);
3055                         return (error);
3056                 }
3057         }
3058
3059         return (0);
3060 }
3061
3062 static int
3063 vtnet_init_tx_queues(struct vtnet_softc *sc)
3064 {
3065         struct vtnet_txq *txq;
3066         int i;
3067
3068         for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
3069                 txq = &sc->vtnet_txqs[i];
3070                 txq->vtntx_watchdog = 0;
3071         }
3072
3073         return (0);
3074 }
3075
3076 static int
3077 vtnet_init_rxtx_queues(struct vtnet_softc *sc)
3078 {
3079         int error;
3080
3081         error = vtnet_init_rx_queues(sc);
3082         if (error)
3083                 return (error);
3084
3085         error = vtnet_init_tx_queues(sc);
3086         if (error)
3087                 return (error);
3088
3089         return (0);
3090 }
3091
3092 static void
3093 vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
3094 {
3095         device_t dev;
3096         int npairs;
3097
3098         dev = sc->vtnet_dev;
3099
3100         if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) {
3101                 sc->vtnet_act_vq_pairs = 1;
3102                 return;
3103         }
3104
3105         npairs = sc->vtnet_requested_vq_pairs;
3106
3107         if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
3108                 device_printf(dev,
3109                     "cannot set active queue pairs to %d\n", npairs);
3110                 npairs = 1;
3111         }
3112
3113         sc->vtnet_act_vq_pairs = npairs;
3114 }
3115
3116 static int
3117 vtnet_reinit(struct vtnet_softc *sc)
3118 {
3119         struct ifnet *ifp;
3120         int error;
3121
3122         ifp = sc->vtnet_ifp;
3123
3124         /* Use the current MAC address. */
3125         bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
3126         vtnet_set_hwaddr(sc);
3127
3128         vtnet_set_active_vq_pairs(sc);
3129
3130         ifp->if_hwassist = 0;
3131         if (ifp->if_capenable & IFCAP_TXCSUM)
3132                 ifp->if_hwassist |= VTNET_CSUM_OFFLOAD;
3133         if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
3134                 ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6;
3135         if (ifp->if_capenable & IFCAP_TSO4)
3136                 ifp->if_hwassist |= CSUM_IP_TSO;
3137         if (ifp->if_capenable & IFCAP_TSO6)
3138                 ifp->if_hwassist |= CSUM_IP6_TSO;
3139
3140         if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
3141                 vtnet_init_rx_filters(sc);
3142
3143         error = vtnet_init_rxtx_queues(sc);
3144         if (error)
3145                 return (error);
3146
3147         vtnet_enable_interrupts(sc);
3148         ifp->if_drv_flags |= IFF_DRV_RUNNING;
3149
3150         return (0);
3151 }
3152
3153 static void
3154 vtnet_init_locked(struct vtnet_softc *sc)
3155 {
3156         device_t dev;
3157         struct ifnet *ifp;
3158
3159         dev = sc->vtnet_dev;
3160         ifp = sc->vtnet_ifp;
3161
3162         VTNET_CORE_LOCK_ASSERT(sc);
3163
3164         if (ifp->if_drv_flags & IFF_DRV_RUNNING)
3165                 return;
3166
3167         vtnet_stop(sc);
3168
3169         /* Reinitialize with the host. */
3170         if (vtnet_virtio_reinit(sc) != 0)
3171                 goto fail;
3172
3173         if (vtnet_reinit(sc) != 0)
3174                 goto fail;
3175
3176         virtio_reinit_complete(dev);
3177
3178         vtnet_update_link_status(sc);
3179         callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
3180
3181         return;
3182
3183 fail:
3184         vtnet_stop(sc);
3185 }
3186
3187 static void
3188 vtnet_init(void *xsc)
3189 {
3190         struct vtnet_softc *sc;
3191
3192         sc = xsc;
3193
3194         VTNET_CORE_LOCK(sc);
3195         vtnet_init_locked(sc);
3196         VTNET_CORE_UNLOCK(sc);
3197 }
3198
3199 static void
3200 vtnet_free_ctrl_vq(struct vtnet_softc *sc)
3201 {
3202         struct virtqueue *vq;
3203
3204         vq = sc->vtnet_ctrl_vq;
3205
3206         /*
3207          * The control virtqueue is only polled and therefore it should
3208          * already be empty.
3209          */
3210         KASSERT(virtqueue_empty(vq),
3211             ("%s: ctrl vq %p not empty", __func__, vq));
3212 }
3213
3214 static void
3215 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
3216     struct sglist *sg, int readable, int writable)
3217 {
3218         struct virtqueue *vq;
3219
3220         vq = sc->vtnet_ctrl_vq;
3221
3222         VTNET_CORE_LOCK_ASSERT(sc);
3223         KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ,
3224             ("%s: CTRL_VQ feature not negotiated", __func__));
3225
3226         if (!virtqueue_empty(vq))
3227                 return;
3228         if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0)
3229                 return;
3230
3231         /*
3232          * Poll for the response, but the command is likely already
3233          * done when we return from the notify.
3234          */
3235         virtqueue_notify(vq);
3236         virtqueue_poll(vq, NULL);
3237 }
3238
3239 static int
3240 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
3241 {
3242         struct virtio_net_ctrl_hdr hdr __aligned(2);
3243         struct sglist_seg segs[3];
3244         struct sglist sg;
3245         uint8_t ack;
3246         int error;
3247
3248         hdr.class = VIRTIO_NET_CTRL_MAC;
3249         hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
3250         ack = VIRTIO_NET_ERR;
3251
3252         sglist_init(&sg, 3, segs);
3253         error = 0;
3254         error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3255         error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN);
3256         error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3257         KASSERT(error == 0 && sg.sg_nseg == 3,
3258             ("%s: error %d adding set MAC msg to sglist", __func__, error));
3259
3260         vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3261
3262         return (ack == VIRTIO_NET_OK ? 0 : EIO);
3263 }
3264
3265 static int
3266 vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
3267 {
3268         struct sglist_seg segs[3];
3269         struct sglist sg;
3270         struct {
3271                 struct virtio_net_ctrl_hdr hdr;
3272                 uint8_t pad1;
3273                 struct virtio_net_ctrl_mq mq;
3274                 uint8_t pad2;
3275                 uint8_t ack;
3276         } s __aligned(2);
3277         int error;
3278
3279         s.hdr.class = VIRTIO_NET_CTRL_MQ;
3280         s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
3281         s.mq.virtqueue_pairs = npairs;
3282         s.ack = VIRTIO_NET_ERR;
3283
3284         sglist_init(&sg, 3, segs);
3285         error = 0;
3286         error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3287         error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
3288         error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3289         KASSERT(error == 0 && sg.sg_nseg == 3,
3290             ("%s: error %d adding MQ message to sglist", __func__, error));
3291
3292         vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3293
3294         return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3295 }
3296
3297 static int
3298 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on)
3299 {
3300         struct sglist_seg segs[3];
3301         struct sglist sg;
3302         struct {
3303                 struct virtio_net_ctrl_hdr hdr;
3304                 uint8_t pad1;
3305                 uint8_t onoff;
3306                 uint8_t pad2;
3307                 uint8_t ack;
3308         } s __aligned(2);
3309         int error;
3310
3311         KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
3312             ("%s: CTRL_RX feature not negotiated", __func__));
3313
3314         s.hdr.class = VIRTIO_NET_CTRL_RX;
3315         s.hdr.cmd = cmd;
3316         s.onoff = !!on;
3317         s.ack = VIRTIO_NET_ERR;
3318
3319         sglist_init(&sg, 3, segs);
3320         error = 0;
3321         error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3322         error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
3323         error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3324         KASSERT(error == 0 && sg.sg_nseg == 3,
3325             ("%s: error %d adding Rx message to sglist", __func__, error));
3326
3327         vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3328
3329         return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3330 }
3331
3332 static int
3333 vtnet_set_promisc(struct vtnet_softc *sc, int on)
3334 {
3335
3336         return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
3337 }
3338
3339 static int
3340 vtnet_set_allmulti(struct vtnet_softc *sc, int on)
3341 {
3342
3343         return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
3344 }
3345
3346 /*
3347  * The device defaults to promiscuous mode for backwards compatibility.
3348  * Turn it off at attach time if possible.
3349  */
3350 static void
3351 vtnet_attach_disable_promisc(struct vtnet_softc *sc)
3352 {
3353         struct ifnet *ifp;
3354
3355         ifp = sc->vtnet_ifp;
3356
3357         VTNET_CORE_LOCK(sc);
3358         if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) {
3359                 ifp->if_flags |= IFF_PROMISC;
3360         } else if (vtnet_set_promisc(sc, 0) != 0) {
3361                 ifp->if_flags |= IFF_PROMISC;
3362                 device_printf(sc->vtnet_dev,
3363                     "cannot disable default promiscuous mode\n");
3364         }
3365         VTNET_CORE_UNLOCK(sc);
3366 }
3367
3368 static void
3369 vtnet_rx_filter(struct vtnet_softc *sc)
3370 {
3371         device_t dev;
3372         struct ifnet *ifp;
3373
3374         dev = sc->vtnet_dev;
3375         ifp = sc->vtnet_ifp;
3376
3377         VTNET_CORE_LOCK_ASSERT(sc);
3378
3379         if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0)
3380                 device_printf(dev, "cannot %s promiscuous mode\n",
3381                     ifp->if_flags & IFF_PROMISC ? "enable" : "disable");
3382
3383         if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0)
3384                 device_printf(dev, "cannot %s all-multicast mode\n",
3385                     ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable");
3386 }
3387
3388 static u_int
3389 vtnet_copy_ifaddr(void *arg, struct sockaddr_dl *sdl, u_int ucnt)
3390 {
3391         struct vtnet_softc *sc = arg;
3392
3393         if (memcmp(LLADDR(sdl), sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
3394                 return (0);
3395
3396         if (ucnt < VTNET_MAX_MAC_ENTRIES)
3397                 bcopy(LLADDR(sdl),
3398                     &sc->vtnet_mac_filter->vmf_unicast.macs[ucnt],
3399                     ETHER_ADDR_LEN);
3400
3401         return (1);
3402 }
3403
3404 static u_int
3405 vtnet_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int mcnt)
3406 {
3407         struct vtnet_mac_filter *filter = arg;
3408
3409         if (mcnt < VTNET_MAX_MAC_ENTRIES)
3410                 bcopy(LLADDR(sdl), &filter->vmf_multicast.macs[mcnt],
3411                     ETHER_ADDR_LEN);
3412
3413         return (1);
3414 }
3415
3416 static void
3417 vtnet_rx_filter_mac(struct vtnet_softc *sc)
3418 {
3419         struct virtio_net_ctrl_hdr hdr __aligned(2);
3420         struct vtnet_mac_filter *filter;
3421         struct sglist_seg segs[4];
3422         struct sglist sg;
3423         struct ifnet *ifp;
3424         bool promisc, allmulti;
3425         u_int ucnt, mcnt;
3426         int error;
3427         uint8_t ack;
3428
3429         ifp = sc->vtnet_ifp;
3430         filter = sc->vtnet_mac_filter;
3431
3432         VTNET_CORE_LOCK_ASSERT(sc);
3433         KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX,
3434             ("%s: CTRL_RX feature not negotiated", __func__));
3435
3436         /* Unicast MAC addresses: */
3437         ucnt = if_foreach_lladdr(ifp, vtnet_copy_ifaddr, sc);
3438         promisc = (ucnt > VTNET_MAX_MAC_ENTRIES);
3439
3440         if (promisc) {
3441                 filter->vmf_unicast.nentries = 0;
3442                 if_printf(ifp, "more than %d MAC addresses assigned, "
3443                     "falling back to promiscuous mode\n",
3444                     VTNET_MAX_MAC_ENTRIES);
3445         } else
3446                 filter->vmf_unicast.nentries = ucnt;
3447
3448         /* Multicast MAC addresses: */
3449         mcnt = if_foreach_llmaddr(ifp, vtnet_copy_maddr, filter);
3450         allmulti = (mcnt > VTNET_MAX_MAC_ENTRIES);
3451
3452         if (allmulti) {
3453                 filter->vmf_multicast.nentries = 0;
3454                 if_printf(ifp, "more than %d multicast MAC addresses "
3455                     "assigned, falling back to all-multicast mode\n",
3456                     VTNET_MAX_MAC_ENTRIES);
3457         } else
3458                 filter->vmf_multicast.nentries = mcnt;
3459
3460         if (promisc && allmulti)
3461                 goto out;
3462
3463         hdr.class = VIRTIO_NET_CTRL_MAC;
3464         hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
3465         ack = VIRTIO_NET_ERR;
3466
3467         sglist_init(&sg, 4, segs);
3468         error = 0;
3469         error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3470         error |= sglist_append(&sg, &filter->vmf_unicast,
3471             sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN);
3472         error |= sglist_append(&sg, &filter->vmf_multicast,
3473             sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN);
3474         error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3475         KASSERT(error == 0 && sg.sg_nseg == 4,
3476             ("%s: error %d adding MAC filter msg to sglist", __func__, error));
3477
3478         vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3479
3480         if (ack != VIRTIO_NET_OK)
3481                 if_printf(ifp, "error setting host MAC filter table\n");
3482
3483 out:
3484         if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0)
3485                 if_printf(ifp, "cannot enable promiscuous mode\n");
3486         if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0)
3487                 if_printf(ifp, "cannot enable all-multicast mode\n");
3488 }
3489
3490 static int
3491 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3492 {
3493         struct sglist_seg segs[3];
3494         struct sglist sg;
3495         struct {
3496                 struct virtio_net_ctrl_hdr hdr;
3497                 uint8_t pad1;
3498                 uint16_t tag;
3499                 uint8_t pad2;
3500                 uint8_t ack;
3501         } s __aligned(2);
3502         int error;
3503
3504         s.hdr.class = VIRTIO_NET_CTRL_VLAN;
3505         s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
3506         s.tag = tag;
3507         s.ack = VIRTIO_NET_ERR;
3508
3509         sglist_init(&sg, 3, segs);
3510         error = 0;
3511         error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3512         error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
3513         error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3514         KASSERT(error == 0 && sg.sg_nseg == 3,
3515             ("%s: error %d adding VLAN message to sglist", __func__, error));
3516
3517         vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3518
3519         return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3520 }
3521
3522 static void
3523 vtnet_rx_filter_vlan(struct vtnet_softc *sc)
3524 {
3525         uint32_t w;
3526         uint16_t tag;
3527         int i, bit;
3528
3529         VTNET_CORE_LOCK_ASSERT(sc);
3530         KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER,
3531             ("%s: VLAN_FILTER feature not negotiated", __func__));
3532
3533         /* Enable the filter for each configured VLAN. */
3534         for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
3535                 w = sc->vtnet_vlan_filter[i];
3536
3537                 while ((bit = ffs(w) - 1) != -1) {
3538                         w &= ~(1 << bit);
3539                         tag = sizeof(w) * CHAR_BIT * i + bit;
3540
3541                         if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
3542                                 device_printf(sc->vtnet_dev,
3543                                     "cannot enable VLAN %d filter\n", tag);
3544                         }
3545                 }
3546         }
3547 }
3548
3549 static void
3550 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3551 {
3552         struct ifnet *ifp;
3553         int idx, bit;
3554
3555         ifp = sc->vtnet_ifp;
3556         idx = (tag >> 5) & 0x7F;
3557         bit = tag & 0x1F;
3558
3559         if (tag == 0 || tag > 4095)
3560                 return;
3561
3562         VTNET_CORE_LOCK(sc);
3563
3564         if (add)
3565                 sc->vtnet_vlan_filter[idx] |= (1 << bit);
3566         else
3567                 sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
3568
3569         if (ifp->if_capenable & IFCAP_VLAN_HWFILTER &&
3570             ifp->if_drv_flags & IFF_DRV_RUNNING &&
3571             vtnet_exec_vlan_filter(sc, add, tag) != 0) {
3572                 device_printf(sc->vtnet_dev,
3573                     "cannot %s VLAN %d %s the host filter table\n",
3574                     add ? "add" : "remove", tag, add ? "to" : "from");
3575         }
3576
3577         VTNET_CORE_UNLOCK(sc);
3578 }
3579
3580 static void
3581 vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3582 {
3583
3584         if (ifp->if_softc != arg)
3585                 return;
3586
3587         vtnet_update_vlan_filter(arg, 1, tag);
3588 }
3589
3590 static void
3591 vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag)
3592 {
3593
3594         if (ifp->if_softc != arg)
3595                 return;
3596
3597         vtnet_update_vlan_filter(arg, 0, tag);
3598 }
3599
3600 static int
3601 vtnet_is_link_up(struct vtnet_softc *sc)
3602 {
3603         device_t dev;
3604         struct ifnet *ifp;
3605         uint16_t status;
3606
3607         dev = sc->vtnet_dev;
3608         ifp = sc->vtnet_ifp;
3609
3610         if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0)
3611                 status = VIRTIO_NET_S_LINK_UP;
3612         else
3613                 status = virtio_read_dev_config_2(dev,
3614                     offsetof(struct virtio_net_config, status));
3615
3616         return ((status & VIRTIO_NET_S_LINK_UP) != 0);
3617 }
3618
3619 static void
3620 vtnet_update_link_status(struct vtnet_softc *sc)
3621 {
3622         struct ifnet *ifp;
3623         int link;
3624
3625         ifp = sc->vtnet_ifp;
3626
3627         VTNET_CORE_LOCK_ASSERT(sc);
3628         link = vtnet_is_link_up(sc);
3629
3630         /* Notify if the link status has changed. */
3631         if (link != 0 && sc->vtnet_link_active == 0) {
3632                 sc->vtnet_link_active = 1;
3633                 if_link_state_change(ifp, LINK_STATE_UP);
3634         } else if (link == 0 && sc->vtnet_link_active != 0) {
3635                 sc->vtnet_link_active = 0;
3636                 if_link_state_change(ifp, LINK_STATE_DOWN);
3637         }
3638 }
3639
3640 static int
3641 vtnet_ifmedia_upd(struct ifnet *ifp)
3642 {
3643         struct vtnet_softc *sc;
3644         struct ifmedia *ifm;
3645
3646         sc = ifp->if_softc;
3647         ifm = &sc->vtnet_media;
3648
3649         if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER)
3650                 return (EINVAL);
3651
3652         return (0);
3653 }
3654
3655 static void
3656 vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
3657 {
3658         struct vtnet_softc *sc;
3659
3660         sc = ifp->if_softc;
3661
3662         ifmr->ifm_status = IFM_AVALID;
3663         ifmr->ifm_active = IFM_ETHER;
3664
3665         VTNET_CORE_LOCK(sc);
3666         if (vtnet_is_link_up(sc) != 0) {
3667                 ifmr->ifm_status |= IFM_ACTIVE;
3668                 ifmr->ifm_active |= VTNET_MEDIATYPE;
3669         } else
3670                 ifmr->ifm_active |= IFM_NONE;
3671         VTNET_CORE_UNLOCK(sc);
3672 }
3673
3674 static void
3675 vtnet_set_hwaddr(struct vtnet_softc *sc)
3676 {
3677         device_t dev;
3678         int i;
3679
3680         dev = sc->vtnet_dev;
3681
3682         if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
3683                 if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0)
3684                         device_printf(dev, "unable to set MAC address\n");
3685         } else if (sc->vtnet_flags & VTNET_FLAG_MAC) {
3686                 for (i = 0; i < ETHER_ADDR_LEN; i++) {
3687                         virtio_write_dev_config_1(dev,
3688                             offsetof(struct virtio_net_config, mac) + i,
3689                             sc->vtnet_hwaddr[i]);
3690                 }
3691         }
3692 }
3693
3694 static void
3695 vtnet_get_hwaddr(struct vtnet_softc *sc)
3696 {
3697         device_t dev;
3698         int i;
3699
3700         dev = sc->vtnet_dev;
3701
3702         if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) {
3703                 /*
3704                  * Generate a random locally administered unicast address.
3705                  *
3706                  * It would be nice to generate the same MAC address across
3707                  * reboots, but it seems all the hosts currently available
3708                  * support the MAC feature, so this isn't too important.
3709                  */
3710                 sc->vtnet_hwaddr[0] = 0xB2;
3711                 arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
3712                 vtnet_set_hwaddr(sc);
3713                 return;
3714         }
3715
3716         for (i = 0; i < ETHER_ADDR_LEN; i++) {
3717                 sc->vtnet_hwaddr[i] = virtio_read_dev_config_1(dev,
3718                     offsetof(struct virtio_net_config, mac) + i);
3719         }
3720 }
3721
3722 static void
3723 vtnet_vlan_tag_remove(struct mbuf *m)
3724 {
3725         struct ether_vlan_header *evh;
3726
3727         evh = mtod(m, struct ether_vlan_header *);
3728         m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
3729         m->m_flags |= M_VLANTAG;
3730
3731         /* Strip the 802.1Q header. */
3732         bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
3733             ETHER_HDR_LEN - ETHER_TYPE_LEN);
3734         m_adj(m, ETHER_VLAN_ENCAP_LEN);
3735 }
3736
3737 static void
3738 vtnet_set_rx_process_limit(struct vtnet_softc *sc)
3739 {
3740         int limit;
3741
3742         limit = vtnet_tunable_int(sc, "rx_process_limit",
3743             vtnet_rx_process_limit);
3744         if (limit < 0)
3745                 limit = INT_MAX;
3746         sc->vtnet_rx_process_limit = limit;
3747 }
3748
3749 static void
3750 vtnet_set_tx_intr_threshold(struct vtnet_softc *sc)
3751 {
3752         int size, thresh;
3753
3754         size = virtqueue_size(sc->vtnet_txqs[0].vtntx_vq);
3755
3756         /*
3757          * The Tx interrupt is disabled until the queue free count falls
3758          * below our threshold. Completed frames are drained from the Tx
3759          * virtqueue before transmitting new frames and in the watchdog
3760          * callout, so the frequency of Tx interrupts is greatly reduced,
3761          * at the cost of not freeing mbufs as quickly as they otherwise
3762          * would be.
3763          *
3764          * N.B. We assume all the Tx queues are the same size.
3765          */
3766         thresh = size / 4;
3767
3768         /*
3769          * Without indirect descriptors, leave enough room for the most
3770          * segments we handle.
3771          */
3772         if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 &&
3773             thresh < sc->vtnet_tx_nsegs)
3774                 thresh = sc->vtnet_tx_nsegs;
3775
3776         sc->vtnet_tx_intr_thresh = thresh;
3777 }
3778
3779 static void
3780 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
3781     struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
3782 {
3783         struct sysctl_oid *node;
3784         struct sysctl_oid_list *list;
3785         struct vtnet_rxq_stats *stats;
3786         char namebuf[16];
3787
3788         snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
3789         node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
3790             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
3791         list = SYSCTL_CHILDREN(node);
3792
3793         stats = &rxq->vtnrx_stats;
3794
3795         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD,
3796             &stats->vrxs_ipackets, "Receive packets");
3797         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD,
3798             &stats->vrxs_ibytes, "Receive bytes");
3799         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD,
3800             &stats->vrxs_iqdrops, "Receive drops");
3801         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD,
3802             &stats->vrxs_ierrors, "Receive errors");
3803         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3804             &stats->vrxs_csum, "Receive checksum offloaded");
3805         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD,
3806             &stats->vrxs_csum_failed, "Receive checksum offload failed");
3807         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
3808             &stats->vrxs_rescheduled,
3809             "Receive interrupt handler rescheduled");
3810 }
3811
3812 static void
3813 vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
3814     struct sysctl_oid_list *child, struct vtnet_txq *txq)
3815 {
3816         struct sysctl_oid *node;
3817         struct sysctl_oid_list *list;
3818         struct vtnet_txq_stats *stats;
3819         char namebuf[16];
3820
3821         snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
3822         node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
3823             CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
3824         list = SYSCTL_CHILDREN(node);
3825
3826         stats = &txq->vtntx_stats;
3827
3828         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD,
3829             &stats->vtxs_opackets, "Transmit packets");
3830         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD,
3831             &stats->vtxs_obytes, "Transmit bytes");
3832         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD,
3833             &stats->vtxs_omcasts, "Transmit multicasts");
3834         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD,
3835             &stats->vtxs_csum, "Transmit checksum offloaded");
3836         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD,
3837             &stats->vtxs_tso, "Transmit segmentation offloaded");
3838         SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD,
3839             &stats->vtxs_rescheduled,
3840             "Transmit interrupt handler rescheduled");
3841 }
3842
3843 static void
3844 vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
3845 {
3846         device_t dev;
3847         struct sysctl_ctx_list *ctx;
3848         struct sysctl_oid *tree;
3849         struct sysctl_oid_list *child;
3850         int i;
3851
3852         dev = sc->vtnet_dev;
3853         ctx = device_get_sysctl_ctx(dev);
3854         tree = device_get_sysctl_tree(dev);
3855         child = SYSCTL_CHILDREN(tree);
3856
3857         for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3858                 vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
3859                 vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
3860         }
3861 }
3862
3863 static void
3864 vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
3865     struct sysctl_oid_list *child, struct vtnet_softc *sc)
3866 {
3867         struct vtnet_statistics *stats;
3868         struct vtnet_rxq_stats rxaccum;
3869         struct vtnet_txq_stats txaccum;
3870
3871         vtnet_accum_stats(sc, &rxaccum, &txaccum);
3872
3873         stats = &sc->vtnet_stats;
3874         stats->rx_csum_offloaded = rxaccum.vrxs_csum;
3875         stats->rx_csum_failed = rxaccum.vrxs_csum_failed;
3876         stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
3877         stats->tx_csum_offloaded = txaccum.vtxs_csum;
3878         stats->tx_tso_offloaded = txaccum.vtxs_tso;
3879         stats->tx_task_rescheduled = txaccum.vtxs_rescheduled;
3880
3881         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
3882             CTLFLAG_RD, &stats->mbuf_alloc_failed,
3883             "Mbuf cluster allocation failures");
3884
3885         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
3886             CTLFLAG_RD, &stats->rx_frame_too_large,
3887             "Received frame larger than the mbuf chain");
3888         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
3889             CTLFLAG_RD, &stats->rx_enq_replacement_failed,
3890             "Enqueuing the replacement receive mbuf failed");
3891         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
3892             CTLFLAG_RD, &stats->rx_mergeable_failed,
3893             "Mergeable buffers receive failures");
3894         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
3895             CTLFLAG_RD, &stats->rx_csum_bad_ethtype,
3896             "Received checksum offloaded buffer with unsupported "
3897             "Ethernet type");
3898         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
3899             CTLFLAG_RD, &stats->rx_csum_bad_ipproto,
3900             "Received checksum offloaded buffer with incorrect IP protocol");
3901         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
3902             CTLFLAG_RD, &stats->rx_csum_bad_offset,
3903             "Received checksum offloaded buffer with incorrect offset");
3904         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto",
3905             CTLFLAG_RD, &stats->rx_csum_bad_proto,
3906             "Received checksum offloaded buffer with incorrect protocol");
3907         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed",
3908             CTLFLAG_RD, &stats->rx_csum_failed,
3909             "Received buffer checksum offload failed");
3910         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded",
3911             CTLFLAG_RD, &stats->rx_csum_offloaded,
3912             "Received buffer checksum offload succeeded");
3913         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled",
3914             CTLFLAG_RD, &stats->rx_task_rescheduled,
3915             "Times the receive interrupt task rescheduled itself");
3916
3917         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype",
3918             CTLFLAG_RD, &stats->tx_csum_bad_ethtype,
3919             "Aborted transmit of checksum offloaded buffer with unknown "
3920             "Ethernet type");
3921         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype",
3922             CTLFLAG_RD, &stats->tx_tso_bad_ethtype,
3923             "Aborted transmit of TSO buffer with unknown Ethernet type");
3924         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
3925             CTLFLAG_RD, &stats->tx_tso_not_tcp,
3926             "Aborted transmit of TSO buffer with non TCP protocol");
3927         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
3928             CTLFLAG_RD, &stats->tx_defragged,
3929             "Transmit mbufs defragged");
3930         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
3931             CTLFLAG_RD, &stats->tx_defrag_failed,
3932             "Aborted transmit of buffer because defrag failed");
3933         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded",
3934             CTLFLAG_RD, &stats->tx_csum_offloaded,
3935             "Offloaded checksum of transmitted buffer");
3936         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded",
3937             CTLFLAG_RD, &stats->tx_tso_offloaded,
3938             "Segmentation offload of transmitted buffer");
3939         SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled",
3940             CTLFLAG_RD, &stats->tx_task_rescheduled,
3941             "Times the transmit interrupt task rescheduled itself");
3942 }
3943
3944 static void
3945 vtnet_setup_sysctl(struct vtnet_softc *sc)
3946 {
3947         device_t dev;
3948         struct sysctl_ctx_list *ctx;
3949         struct sysctl_oid *tree;
3950         struct sysctl_oid_list *child;
3951
3952         dev = sc->vtnet_dev;
3953         ctx = device_get_sysctl_ctx(dev);
3954         tree = device_get_sysctl_tree(dev);
3955         child = SYSCTL_CHILDREN(tree);
3956
3957         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
3958             CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
3959             "Maximum number of supported virtqueue pairs");
3960         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "requested_vq_pairs",
3961             CTLFLAG_RD, &sc->vtnet_requested_vq_pairs, 0,
3962             "Requested number of virtqueue pairs");
3963         SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
3964             CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
3965             "Number of active virtqueue pairs");
3966
3967         vtnet_setup_stat_sysctl(ctx, child, sc);
3968 }
3969
3970 static int
3971 vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
3972 {
3973
3974         return (virtqueue_enable_intr(rxq->vtnrx_vq));
3975 }
3976
3977 static void
3978 vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
3979 {
3980
3981         virtqueue_disable_intr(rxq->vtnrx_vq);
3982 }
3983
3984 static int
3985 vtnet_txq_enable_intr(struct vtnet_txq *txq)
3986 {
3987         struct virtqueue *vq;
3988
3989         vq = txq->vtntx_vq;
3990
3991         if (vtnet_txq_below_threshold(txq) != 0)
3992                 return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
3993
3994         /*
3995          * The free count is above our threshold. Keep the Tx interrupt
3996          * disabled until the queue is fuller.
3997          */
3998         return (0);
3999 }
4000
4001 static void
4002 vtnet_txq_disable_intr(struct vtnet_txq *txq)
4003 {
4004
4005         virtqueue_disable_intr(txq->vtntx_vq);
4006 }
4007
4008 static void
4009 vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
4010 {
4011         int i;
4012
4013         for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4014                 vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]);
4015 }
4016
4017 static void
4018 vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
4019 {
4020         int i;
4021
4022         for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4023                 vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
4024 }
4025
4026 static void
4027 vtnet_enable_interrupts(struct vtnet_softc *sc)
4028 {
4029
4030         vtnet_enable_rx_interrupts(sc);
4031         vtnet_enable_tx_interrupts(sc);
4032 }
4033
4034 static void
4035 vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
4036 {
4037         int i;
4038
4039         for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4040                 vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
4041 }
4042
4043 static void
4044 vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
4045 {
4046         int i;
4047
4048         for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4049                 vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
4050 }
4051
4052 static void
4053 vtnet_disable_interrupts(struct vtnet_softc *sc)
4054 {
4055
4056         vtnet_disable_rx_interrupts(sc);
4057         vtnet_disable_tx_interrupts(sc);
4058 }
4059
4060 static int
4061 vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
4062 {
4063         char path[64];
4064
4065         snprintf(path, sizeof(path),
4066             "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
4067         TUNABLE_INT_FETCH(path, &def);
4068
4069         return (def);
4070 }
4071
4072 #ifdef DEBUGNET
4073 static void
4074 vtnet_debugnet_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize)
4075 {
4076         struct vtnet_softc *sc;
4077
4078         sc = if_getsoftc(ifp);
4079
4080         VTNET_CORE_LOCK(sc);
4081         *nrxr = sc->vtnet_max_vq_pairs;
4082         *ncl = DEBUGNET_MAX_IN_FLIGHT;
4083         *clsize = sc->vtnet_rx_clsize;
4084         VTNET_CORE_UNLOCK(sc);
4085 }
4086
4087 static void
4088 vtnet_debugnet_event(struct ifnet *ifp __unused, enum debugnet_ev event __unused)
4089 {
4090 }
4091
4092 static int
4093 vtnet_debugnet_transmit(struct ifnet *ifp, struct mbuf *m)
4094 {
4095         struct vtnet_softc *sc;
4096         struct vtnet_txq *txq;
4097         int error;
4098
4099         sc = if_getsoftc(ifp);
4100         if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4101             IFF_DRV_RUNNING)
4102                 return (EBUSY);
4103
4104         txq = &sc->vtnet_txqs[0];
4105         error = vtnet_txq_encap(txq, &m, M_NOWAIT | M_USE_RESERVE);
4106         if (error == 0)
4107                 (void)vtnet_txq_notify(txq);
4108         return (error);
4109 }
4110
4111 static int
4112 vtnet_debugnet_poll(struct ifnet *ifp, int count)
4113 {
4114         struct vtnet_softc *sc;
4115         int i;
4116
4117         sc = if_getsoftc(ifp);
4118         if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4119             IFF_DRV_RUNNING)
4120                 return (EBUSY);
4121
4122         (void)vtnet_txq_eof(&sc->vtnet_txqs[0]);
4123         for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
4124                 (void)vtnet_rxq_eof(&sc->vtnet_rxqs[i]);
4125         return (0);
4126 }
4127 #endif /* DEBUGNET */