2 * Copyright (c) 2010-2012 Citrix Inc.
3 * Copyright (c) 2009-2012,2016 Microsoft Corp.
4 * Copyright (c) 2012 NetApp Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * Copyright (c) 2004-2006 Kip Macy
31 * All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 #include <sys/cdefs.h>
56 __FBSDID("$FreeBSD$");
58 #include "opt_inet6.h"
61 #include <sys/param.h>
63 #include <sys/kernel.h>
64 #include <sys/limits.h>
65 #include <sys/malloc.h>
67 #include <sys/module.h>
69 #include <sys/queue.h>
72 #include <sys/socket.h>
73 #include <sys/sockio.h>
75 #include <sys/sysctl.h>
76 #include <sys/systm.h>
77 #include <sys/taskqueue.h>
78 #include <sys/buf_ring.h>
80 #include <machine/atomic.h>
81 #include <machine/in_cksum.h>
84 #include <net/ethernet.h>
86 #include <net/if_arp.h>
87 #include <net/if_media.h>
88 #include <net/if_types.h>
89 #include <net/if_var.h>
90 #include <net/if_vlan_var.h>
91 #include <net/rndis.h>
93 #include <netinet/in_systm.h>
94 #include <netinet/in.h>
95 #include <netinet/ip.h>
96 #include <netinet/ip6.h>
97 #include <netinet/tcp.h>
98 #include <netinet/tcp_lro.h>
99 #include <netinet/udp.h>
101 #include <dev/hyperv/include/hyperv.h>
102 #include <dev/hyperv/include/hyperv_busdma.h>
103 #include <dev/hyperv/include/vmbus.h>
104 #include <dev/hyperv/include/vmbus_xact.h>
106 #include <dev/hyperv/netvsc/ndis.h>
107 #include <dev/hyperv/netvsc/if_hnreg.h>
108 #include <dev/hyperv/netvsc/if_hnvar.h>
109 #include <dev/hyperv/netvsc/hn_nvs.h>
110 #include <dev/hyperv/netvsc/hn_rndis.h>
112 #include "vmbus_if.h"
114 #define HN_IFSTART_SUPPORT
116 #define HN_RING_CNT_DEF_MAX 8
118 /* YYY should get it from the underlying channel */
119 #define HN_TX_DESC_CNT 512
121 #define HN_RNDIS_PKT_LEN \
122 (sizeof(struct rndis_packet_msg) + \
123 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \
124 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \
125 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \
126 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE))
127 #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE
128 #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE
130 #define HN_TX_DATA_BOUNDARY PAGE_SIZE
131 #define HN_TX_DATA_MAXSIZE IP_MAXPACKET
132 #define HN_TX_DATA_SEGSIZE PAGE_SIZE
133 /* -1 for RNDIS packet message */
134 #define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1)
136 #define HN_DIRECT_TX_SIZE_DEF 128
138 #define HN_EARLY_TXEOF_THRESH 8
140 #define HN_PKTBUF_LEN_DEF (16 * 1024)
142 #define HN_LROENT_CNT_DEF 128
144 #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU)
145 #define HN_LRO_LENLIM_DEF (25 * ETHERMTU)
146 /* YYY 2*MTU is a bit rough, but should be good enough. */
147 #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu)
149 #define HN_LRO_ACKCNT_DEF 1
151 #define HN_LOCK_INIT(sc) \
152 sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev))
153 #define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock)
154 #define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED)
155 #define HN_LOCK(sc) sx_xlock(&(sc)->hn_lock)
156 #define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock)
158 #define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP)
159 #define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP)
160 #define HN_CSUM_IP_HWASSIST(sc) \
161 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK)
162 #define HN_CSUM_IP6_HWASSIST(sc) \
163 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK)
166 #ifndef HN_USE_TXDESC_BUFRING
167 SLIST_ENTRY(hn_txdesc) link;
170 struct hn_tx_ring *txr;
172 uint32_t flags; /* HN_TXD_FLAG_ */
173 struct hn_nvs_sendctx send_ctx;
177 bus_dmamap_t data_dmap;
179 bus_addr_t rndis_pkt_paddr;
180 struct rndis_packet_msg *rndis_pkt;
181 bus_dmamap_t rndis_pkt_dmap;
184 #define HN_TXD_FLAG_ONLIST 0x0001
185 #define HN_TXD_FLAG_DMAMAP 0x0002
194 #define HN_RXINFO_VLAN 0x0001
195 #define HN_RXINFO_CSUM 0x0002
196 #define HN_RXINFO_HASHINF 0x0004
197 #define HN_RXINFO_HASHVAL 0x0008
198 #define HN_RXINFO_ALL \
201 HN_RXINFO_HASHINF | \
204 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff
205 #define HN_NDIS_RXCSUM_INFO_INVALID 0
206 #define HN_NDIS_HASH_INFO_INVALID 0
208 static int hn_probe(device_t);
209 static int hn_attach(device_t);
210 static int hn_detach(device_t);
211 static int hn_shutdown(device_t);
212 static void hn_chan_callback(struct vmbus_channel *,
215 static void hn_init(void *);
216 static int hn_ioctl(struct ifnet *, u_long, caddr_t);
217 #ifdef HN_IFSTART_SUPPORT
218 static void hn_start(struct ifnet *);
220 static int hn_transmit(struct ifnet *, struct mbuf *);
221 static void hn_xmit_qflush(struct ifnet *);
222 static int hn_ifmedia_upd(struct ifnet *);
223 static void hn_ifmedia_sts(struct ifnet *,
224 struct ifmediareq *);
226 static int hn_rndis_rxinfo(const void *, int,
228 static void hn_rndis_rx_data(struct hn_rx_ring *,
230 static void hn_rndis_rx_status(struct hn_softc *,
233 static void hn_nvs_handle_notify(struct hn_softc *,
234 const struct vmbus_chanpkt_hdr *);
235 static void hn_nvs_handle_comp(struct hn_softc *,
236 struct vmbus_channel *,
237 const struct vmbus_chanpkt_hdr *);
238 static void hn_nvs_handle_rxbuf(struct hn_rx_ring *,
239 struct vmbus_channel *,
240 const struct vmbus_chanpkt_hdr *);
241 static void hn_nvs_ack_rxbuf(struct hn_rx_ring *,
242 struct vmbus_channel *, uint64_t);
244 #if __FreeBSD_version >= 1100099
245 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS);
246 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS);
248 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS);
249 static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS);
250 #if __FreeBSD_version < 1100095
251 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS);
253 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS);
255 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
256 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
257 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS);
258 static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS);
259 static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS);
260 static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS);
261 static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS);
262 static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS);
263 static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS);
264 static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS);
266 static void hn_stop(struct hn_softc *);
267 static void hn_init_locked(struct hn_softc *);
268 static int hn_chan_attach(struct hn_softc *,
269 struct vmbus_channel *);
270 static void hn_chan_detach(struct hn_softc *,
271 struct vmbus_channel *);
272 static int hn_attach_subchans(struct hn_softc *);
273 static void hn_detach_allchans(struct hn_softc *);
274 static void hn_chan_rollup(struct hn_rx_ring *,
275 struct hn_tx_ring *);
276 static void hn_set_ring_inuse(struct hn_softc *, int);
277 static int hn_synth_attach(struct hn_softc *, int);
278 static void hn_synth_detach(struct hn_softc *);
279 static int hn_synth_alloc_subchans(struct hn_softc *,
281 static void hn_suspend(struct hn_softc *);
282 static void hn_suspend_data(struct hn_softc *);
283 static void hn_suspend_mgmt(struct hn_softc *);
284 static void hn_resume(struct hn_softc *);
285 static void hn_resume_data(struct hn_softc *);
286 static void hn_resume_mgmt(struct hn_softc *);
287 static void hn_suspend_mgmt_taskfunc(void *, int);
288 static void hn_chan_drain(struct vmbus_channel *);
290 static void hn_update_link_status(struct hn_softc *);
291 static void hn_change_network(struct hn_softc *);
292 static void hn_link_taskfunc(void *, int);
293 static void hn_netchg_init_taskfunc(void *, int);
294 static void hn_netchg_status_taskfunc(void *, int);
295 static void hn_link_status(struct hn_softc *);
297 static int hn_create_rx_data(struct hn_softc *, int);
298 static void hn_destroy_rx_data(struct hn_softc *);
299 static int hn_check_iplen(const struct mbuf *, int);
300 static int hn_set_rxfilter(struct hn_softc *);
301 static int hn_rss_reconfig(struct hn_softc *);
302 static void hn_rss_ind_fixup(struct hn_softc *, int);
303 static int hn_rxpkt(struct hn_rx_ring *, const void *,
304 int, const struct hn_rxinfo *);
306 static int hn_tx_ring_create(struct hn_softc *, int);
307 static void hn_tx_ring_destroy(struct hn_tx_ring *);
308 static int hn_create_tx_data(struct hn_softc *, int);
309 static void hn_fixup_tx_data(struct hn_softc *);
310 static void hn_destroy_tx_data(struct hn_softc *);
311 static void hn_txdesc_dmamap_destroy(struct hn_txdesc *);
312 static int hn_encap(struct hn_tx_ring *,
313 struct hn_txdesc *, struct mbuf **);
314 static int hn_txpkt(struct ifnet *, struct hn_tx_ring *,
316 static void hn_set_chim_size(struct hn_softc *, int);
317 static void hn_set_tso_maxsize(struct hn_softc *, int, int);
318 static bool hn_tx_ring_pending(struct hn_tx_ring *);
319 static void hn_tx_ring_qflush(struct hn_tx_ring *);
320 static void hn_resume_tx(struct hn_softc *, int);
321 static int hn_get_txswq_depth(const struct hn_tx_ring *);
322 static void hn_txpkt_done(struct hn_nvs_sendctx *,
323 struct hn_softc *, struct vmbus_channel *,
325 static int hn_txpkt_sglist(struct hn_tx_ring *,
327 static int hn_txpkt_chim(struct hn_tx_ring *,
329 static int hn_xmit(struct hn_tx_ring *, int);
330 static void hn_xmit_taskfunc(void *, int);
331 static void hn_xmit_txeof(struct hn_tx_ring *);
332 static void hn_xmit_txeof_taskfunc(void *, int);
333 #ifdef HN_IFSTART_SUPPORT
334 static int hn_start_locked(struct hn_tx_ring *, int);
335 static void hn_start_taskfunc(void *, int);
336 static void hn_start_txeof(struct hn_tx_ring *);
337 static void hn_start_txeof_taskfunc(void *, int);
340 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
341 "Hyper-V network interface");
343 /* Trust tcp segements verification on host side. */
344 static int hn_trust_hosttcp = 1;
345 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN,
346 &hn_trust_hosttcp, 0,
347 "Trust tcp segement verification on host side, "
348 "when csum info is missing (global setting)");
350 /* Trust udp datagrams verification on host side. */
351 static int hn_trust_hostudp = 1;
352 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN,
353 &hn_trust_hostudp, 0,
354 "Trust udp datagram verification on host side, "
355 "when csum info is missing (global setting)");
357 /* Trust ip packets verification on host side. */
358 static int hn_trust_hostip = 1;
359 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN,
361 "Trust ip packet verification on host side, "
362 "when csum info is missing (global setting)");
364 /* Limit TSO burst size */
365 static int hn_tso_maxlen = IP_MAXPACKET;
366 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
367 &hn_tso_maxlen, 0, "TSO burst limit");
369 /* Limit chimney send size */
370 static int hn_tx_chimney_size = 0;
371 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN,
372 &hn_tx_chimney_size, 0, "Chimney send packet size limit");
374 /* Limit the size of packet for direct transmission */
375 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF;
376 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN,
377 &hn_direct_tx_size, 0, "Size of the packet for direct transmission");
379 /* # of LRO entries per RX ring */
380 #if defined(INET) || defined(INET6)
381 #if __FreeBSD_version >= 1100095
382 static int hn_lro_entry_count = HN_LROENT_CNT_DEF;
383 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
384 &hn_lro_entry_count, 0, "LRO entry count");
388 /* Use shared TX taskqueue */
389 static int hn_share_tx_taskq = 0;
390 SYSCTL_INT(_hw_hn, OID_AUTO, share_tx_taskq, CTLFLAG_RDTUN,
391 &hn_share_tx_taskq, 0, "Enable shared TX taskqueue");
393 #ifndef HN_USE_TXDESC_BUFRING
394 static int hn_use_txdesc_bufring = 0;
396 static int hn_use_txdesc_bufring = 1;
398 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD,
399 &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors");
401 /* Bind TX taskqueue to the target CPU */
402 static int hn_bind_tx_taskq = -1;
403 SYSCTL_INT(_hw_hn, OID_AUTO, bind_tx_taskq, CTLFLAG_RDTUN,
404 &hn_bind_tx_taskq, 0, "Bind TX taskqueue to the specified cpu");
406 #ifdef HN_IFSTART_SUPPORT
407 /* Use ifnet.if_start instead of ifnet.if_transmit */
408 static int hn_use_if_start = 0;
409 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN,
410 &hn_use_if_start, 0, "Use if_start TX method");
413 /* # of channels to use */
414 static int hn_chan_cnt = 0;
415 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
417 "# of channels to use; each channel has one RX ring and one TX ring");
419 /* # of transmit rings to use */
420 static int hn_tx_ring_cnt = 0;
421 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN,
422 &hn_tx_ring_cnt, 0, "# of TX rings to use");
424 /* Software TX ring deptch */
425 static int hn_tx_swq_depth = 0;
426 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN,
427 &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING");
429 /* Enable sorted LRO, and the depth of the per-channel mbuf queue */
430 #if __FreeBSD_version >= 1100095
431 static u_int hn_lro_mbufq_depth = 0;
432 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
433 &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue");
436 static u_int hn_cpu_index; /* next CPU for channel */
437 static struct taskqueue *hn_tx_taskq; /* shared TX taskqueue */
440 hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = {
441 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
442 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
443 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
444 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
445 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
448 static device_method_t hn_methods[] = {
449 /* Device interface */
450 DEVMETHOD(device_probe, hn_probe),
451 DEVMETHOD(device_attach, hn_attach),
452 DEVMETHOD(device_detach, hn_detach),
453 DEVMETHOD(device_shutdown, hn_shutdown),
457 static driver_t hn_driver = {
460 sizeof(struct hn_softc)
463 static devclass_t hn_devclass;
465 DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0);
466 MODULE_VERSION(hn, 1);
467 MODULE_DEPEND(hn, vmbus, 1, 1, 1);
469 #if __FreeBSD_version >= 1100099
471 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim)
475 for (i = 0; i < sc->hn_rx_ring_inuse; ++i)
476 sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim;
481 hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd)
484 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
485 txd->chim_size == 0, ("invalid rndis sglist txd"));
486 return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA,
487 &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt));
491 hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd)
493 struct hn_nvs_rndis rndis;
495 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID &&
496 txd->chim_size > 0, ("invalid rndis chim txd"));
498 rndis.nvs_type = HN_NVS_TYPE_RNDIS;
499 rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA;
500 rndis.nvs_chim_idx = txd->chim_index;
501 rndis.nvs_chim_sz = txd->chim_size;
503 return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC,
504 &rndis, sizeof(rndis), &txd->send_ctx));
507 static __inline uint32_t
508 hn_chim_alloc(struct hn_softc *sc)
510 int i, bmap_cnt = sc->hn_chim_bmap_cnt;
511 u_long *bmap = sc->hn_chim_bmap;
512 uint32_t ret = HN_NVS_CHIM_IDX_INVALID;
514 for (i = 0; i < bmap_cnt; ++i) {
517 idx = ffsl(~bmap[i]);
521 --idx; /* ffsl is 1-based */
522 KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt,
523 ("invalid i %d and idx %d", i, idx));
525 if (atomic_testandset_long(&bmap[i], idx))
528 ret = i * LONG_BIT + idx;
535 hn_chim_free(struct hn_softc *sc, uint32_t chim_idx)
540 idx = chim_idx / LONG_BIT;
541 KASSERT(idx < sc->hn_chim_bmap_cnt,
542 ("invalid chimney index 0x%x", chim_idx));
544 mask = 1UL << (chim_idx % LONG_BIT);
545 KASSERT(sc->hn_chim_bmap[idx] & mask,
546 ("index bitmap 0x%lx, chimney index %u, "
547 "bitmap idx %d, bitmask 0x%lx",
548 sc->hn_chim_bmap[idx], chim_idx, idx, mask));
550 atomic_clear_long(&sc->hn_chim_bmap[idx], mask);
553 #if defined(INET6) || defined(INET)
555 * NOTE: If this function failed, the m_head would be freed.
557 static __inline struct mbuf *
558 hn_tso_fixup(struct mbuf *m_head)
560 struct ether_vlan_header *evl;
564 KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable"));
566 #define PULLUP_HDR(m, len) \
568 if (__predict_false((m)->m_len < (len))) { \
569 (m) = m_pullup((m), (len)); \
575 PULLUP_HDR(m_head, sizeof(*evl));
576 evl = mtod(m_head, struct ether_vlan_header *);
577 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN))
578 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
580 ehlen = ETHER_HDR_LEN;
583 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
587 PULLUP_HDR(m_head, ehlen + sizeof(*ip));
588 ip = mtodo(m_head, ehlen);
589 iphlen = ip->ip_hl << 2;
591 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th));
592 th = mtodo(m_head, ehlen + iphlen);
596 th->th_sum = in_pseudo(ip->ip_src.s_addr,
597 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
600 #if defined(INET6) && defined(INET)
607 PULLUP_HDR(m_head, ehlen + sizeof(*ip6));
608 ip6 = mtodo(m_head, ehlen);
609 if (ip6->ip6_nxt != IPPROTO_TCP) {
614 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th));
615 th = mtodo(m_head, ehlen + sizeof(*ip6));
618 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
625 #endif /* INET6 || INET */
628 hn_set_rxfilter(struct hn_softc *sc)
630 struct ifnet *ifp = sc->hn_ifp;
636 if (ifp->if_flags & IFF_PROMISC) {
637 filter = NDIS_PACKET_TYPE_PROMISCUOUS;
639 filter = NDIS_PACKET_TYPE_DIRECTED;
640 if (ifp->if_flags & IFF_BROADCAST)
641 filter |= NDIS_PACKET_TYPE_BROADCAST;
644 * See the comment in SIOCADDMULTI/SIOCDELMULTI.
646 /* TODO: support multicast list */
647 if ((ifp->if_flags & IFF_ALLMULTI) ||
648 !TAILQ_EMPTY(&ifp->if_multiaddrs))
649 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
651 /* Always enable ALLMULTI */
652 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
656 if (sc->hn_rx_filter != filter) {
657 error = hn_rndis_set_rxfilter(sc, filter);
659 sc->hn_rx_filter = filter;
665 hn_get_txswq_depth(const struct hn_tx_ring *txr)
668 KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet"));
669 if (hn_tx_swq_depth < txr->hn_txdesc_cnt)
670 return txr->hn_txdesc_cnt;
671 return hn_tx_swq_depth;
675 hn_rss_reconfig(struct hn_softc *sc)
681 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
688 * Direct reconfiguration by setting the UNCHG flags does
689 * _not_ work properly.
692 if_printf(sc->hn_ifp, "disable RSS\n");
693 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE);
695 if_printf(sc->hn_ifp, "RSS disable failed\n");
700 * Reenable the RSS w/ the updated RSS key or indirect
704 if_printf(sc->hn_ifp, "reconfig RSS\n");
705 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
707 if_printf(sc->hn_ifp, "RSS reconfig failed\n");
714 hn_rss_ind_fixup(struct hn_softc *sc, int nchan)
716 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
719 KASSERT(nchan > 1, ("invalid # of channels %d", nchan));
722 * Check indirect table to make sure that all channels in it
725 for (i = 0; i < NDIS_HASH_INDCNT; ++i) {
726 if (rss->rss_ind[i] >= nchan) {
727 if_printf(sc->hn_ifp,
728 "RSS indirect table %d fixup: %u -> %d\n",
729 i, rss->rss_ind[i], nchan - 1);
730 rss->rss_ind[i] = nchan - 1;
736 hn_ifmedia_upd(struct ifnet *ifp __unused)
743 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
745 struct hn_softc *sc = ifp->if_softc;
747 ifmr->ifm_status = IFM_AVALID;
748 ifmr->ifm_active = IFM_ETHER;
750 if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) {
751 ifmr->ifm_active |= IFM_NONE;
754 ifmr->ifm_status |= IFM_ACTIVE;
755 ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
758 /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */
759 static const struct hyperv_guid g_net_vsc_device_type = {
760 .hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
761 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
765 hn_probe(device_t dev)
768 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev,
769 &g_net_vsc_device_type) == 0) {
770 device_set_desc(dev, "Hyper-V Network Interface");
771 return BUS_PROBE_DEFAULT;
777 hn_cpuset_setthread_task(void *xmask, int pending __unused)
779 cpuset_t *mask = xmask;
782 error = cpuset_setthread(curthread->td_tid, mask);
784 panic("curthread=%ju: can't pin; error=%d",
785 (uintmax_t)curthread->td_tid, error);
790 hn_attach(device_t dev)
792 struct hn_softc *sc = device_get_softc(dev);
793 struct sysctl_oid_list *child;
794 struct sysctl_ctx_list *ctx;
795 uint8_t eaddr[ETHER_ADDR_LEN];
796 struct ifnet *ifp = NULL;
797 int error, ring_cnt, tx_ring_cnt;
800 sc->hn_prichan = vmbus_get_channel(dev);
804 * Setup taskqueue for transmission.
806 if (hn_tx_taskq == NULL) {
807 sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK,
808 taskqueue_thread_enqueue, &sc->hn_tx_taskq);
809 taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET, "%s tx",
810 device_get_nameunit(dev));
811 if (hn_bind_tx_taskq >= 0) {
812 int cpu = hn_bind_tx_taskq;
813 struct task cpuset_task;
816 if (cpu > mp_ncpus - 1)
818 CPU_SETOF(cpu, &cpu_set);
819 TASK_INIT(&cpuset_task, 0, hn_cpuset_setthread_task,
821 taskqueue_enqueue(sc->hn_tx_taskq, &cpuset_task);
822 taskqueue_drain(sc->hn_tx_taskq, &cpuset_task);
825 sc->hn_tx_taskq = hn_tx_taskq;
829 * Setup taskqueue for mangement tasks, e.g. link status.
831 sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK,
832 taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0);
833 taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt",
834 device_get_nameunit(dev));
835 TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc);
836 TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc);
837 TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0,
838 hn_netchg_status_taskfunc, sc);
841 * Allocate ifnet and setup its name earlier, so that if_printf
842 * can be used by functions, which will be called after
845 ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER);
847 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
850 * Initialize ifmedia earlier so that it can be unconditionally
851 * destroyed, if error happened later on.
853 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts);
856 * Figure out the # of RX rings (ring_cnt) and the # of TX rings
857 * to use (tx_ring_cnt).
860 * The # of RX rings to use is same as the # of channels to use.
862 ring_cnt = hn_chan_cnt;
866 if (ring_cnt > HN_RING_CNT_DEF_MAX)
867 ring_cnt = HN_RING_CNT_DEF_MAX;
868 } else if (ring_cnt > mp_ncpus) {
872 tx_ring_cnt = hn_tx_ring_cnt;
873 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt)
874 tx_ring_cnt = ring_cnt;
875 #ifdef HN_IFSTART_SUPPORT
876 if (hn_use_if_start) {
877 /* ifnet.if_start only needs one TX ring. */
883 * Set the leader CPU for channels.
885 sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus;
888 * Create enough TX/RX rings, even if only limited number of
889 * channels can be allocated.
891 error = hn_create_tx_data(sc, tx_ring_cnt);
894 error = hn_create_rx_data(sc, ring_cnt);
899 * Create transaction context for NVS and RNDIS transactions.
901 sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev),
902 HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0);
903 if (sc->hn_xact == NULL)
907 * Attach the synthetic parts, i.e. NVS and RNDIS.
909 error = hn_synth_attach(sc, ETHERMTU);
913 error = hn_rndis_get_eaddr(sc, eaddr);
917 #if __FreeBSD_version >= 1100099
918 if (sc->hn_rx_ring_inuse > 1) {
920 * Reduce TCP segment aggregation limit for multiple
921 * RX rings to increase ACK timeliness.
923 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF);
928 * Fixup TX stuffs after synthetic parts are attached.
930 hn_fixup_tx_data(sc);
932 ctx = device_get_sysctl_ctx(dev);
933 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
934 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD,
935 &sc->hn_nvs_ver, 0, "NVS version");
936 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version",
937 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
938 hn_ndis_version_sysctl, "A", "NDIS version");
939 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps",
940 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
941 hn_caps_sysctl, "A", "capabilities");
942 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist",
943 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
944 hn_hwassist_sysctl, "A", "hwassist");
945 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter",
946 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
947 hn_rxfilter_sysctl, "A", "rxfilter");
948 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash",
949 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
950 hn_rss_hash_sysctl, "A", "RSS hash");
951 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size",
952 CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count");
953 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key",
954 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
955 hn_rss_key_sysctl, "IU", "RSS key");
956 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind",
957 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
958 hn_rss_ind_sysctl, "IU", "RSS indirect table");
961 * Setup the ifmedia, which has been initialized earlier.
963 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL);
964 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO);
965 /* XXX ifmedia_set really should do this for us */
966 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media;
969 * Setup the ifnet for this interface.
973 ifp->if_baudrate = IF_Gbps(10);
975 /* if_baudrate is 32bits on 32bit system. */
976 ifp->if_baudrate = IF_Gbps(1);
978 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
979 ifp->if_ioctl = hn_ioctl;
980 ifp->if_init = hn_init;
981 #ifdef HN_IFSTART_SUPPORT
982 if (hn_use_if_start) {
983 int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]);
985 ifp->if_start = hn_start;
986 IFQ_SET_MAXLEN(&ifp->if_snd, qdepth);
987 ifp->if_snd.ifq_drv_maxlen = qdepth - 1;
988 IFQ_SET_READY(&ifp->if_snd);
992 ifp->if_transmit = hn_transmit;
993 ifp->if_qflush = hn_xmit_qflush;
996 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO;
998 /* We can't diff IPv6 packets from IPv4 packets on RX path. */
999 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6;
1001 if (sc->hn_caps & HN_CAP_VLAN) {
1002 /* XXX not sure about VLAN_MTU. */
1003 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
1006 ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist;
1007 if (ifp->if_hwassist & HN_CSUM_IP_MASK)
1008 ifp->if_capabilities |= IFCAP_TXCSUM;
1009 if (ifp->if_hwassist & HN_CSUM_IP6_MASK)
1010 ifp->if_capabilities |= IFCAP_TXCSUM_IPV6;
1011 if (sc->hn_caps & HN_CAP_TSO4) {
1012 ifp->if_capabilities |= IFCAP_TSO4;
1013 ifp->if_hwassist |= CSUM_IP_TSO;
1015 if (sc->hn_caps & HN_CAP_TSO6) {
1016 ifp->if_capabilities |= IFCAP_TSO6;
1017 ifp->if_hwassist |= CSUM_IP6_TSO;
1020 /* Enable all available capabilities by default. */
1021 ifp->if_capenable = ifp->if_capabilities;
1023 if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) {
1024 hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU);
1025 ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX;
1026 ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
1029 ether_ifattach(ifp, eaddr);
1031 if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) {
1032 if_printf(ifp, "TSO segcnt %u segsz %u\n",
1033 ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize);
1036 /* Inform the upper layer about the long frame support. */
1037 ifp->if_hdrlen = sizeof(struct ether_vlan_header);
1040 * Kick off link status check.
1042 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
1043 hn_update_link_status(sc);
1047 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)
1048 hn_synth_detach(sc);
1054 hn_detach(device_t dev)
1056 struct hn_softc *sc = device_get_softc(dev);
1057 struct ifnet *ifp = sc->hn_ifp;
1059 if (device_is_attached(dev)) {
1061 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
1062 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1066 * hn_stop() only suspends data, so managment
1067 * stuffs have to be suspended manually here.
1069 hn_suspend_mgmt(sc);
1070 hn_synth_detach(sc);
1073 ether_ifdetach(ifp);
1076 ifmedia_removeall(&sc->hn_media);
1077 hn_destroy_rx_data(sc);
1078 hn_destroy_tx_data(sc);
1080 if (sc->hn_tx_taskq != hn_tx_taskq)
1081 taskqueue_free(sc->hn_tx_taskq);
1082 taskqueue_free(sc->hn_mgmt_taskq0);
1084 if (sc->hn_xact != NULL)
1085 vmbus_xact_ctx_destroy(sc->hn_xact);
1089 HN_LOCK_DESTROY(sc);
1094 hn_shutdown(device_t dev)
1101 hn_link_status(struct hn_softc *sc)
1103 uint32_t link_status;
1106 error = hn_rndis_get_linkstatus(sc, &link_status);
1108 /* XXX what to do? */
1112 if (link_status == NDIS_MEDIA_STATE_CONNECTED)
1113 sc->hn_link_flags |= HN_LINK_FLAG_LINKUP;
1115 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
1116 if_link_state_change(sc->hn_ifp,
1117 (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ?
1118 LINK_STATE_UP : LINK_STATE_DOWN);
1122 hn_link_taskfunc(void *xsc, int pending __unused)
1124 struct hn_softc *sc = xsc;
1126 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
1132 hn_netchg_init_taskfunc(void *xsc, int pending __unused)
1134 struct hn_softc *sc = xsc;
1136 /* Prevent any link status checks from running. */
1137 sc->hn_link_flags |= HN_LINK_FLAG_NETCHG;
1140 * Fake up a [link down --> link up] state change; 5 seconds
1141 * delay is used, which closely simulates miibus reaction
1142 * upon link down event.
1144 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
1145 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN);
1146 taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0,
1147 &sc->hn_netchg_status, 5 * hz);
1151 hn_netchg_status_taskfunc(void *xsc, int pending __unused)
1153 struct hn_softc *sc = xsc;
1155 /* Re-allow link status checks. */
1156 sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG;
1161 hn_update_link_status(struct hn_softc *sc)
1164 if (sc->hn_mgmt_taskq != NULL)
1165 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task);
1169 hn_change_network(struct hn_softc *sc)
1172 if (sc->hn_mgmt_taskq != NULL)
1173 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init);
1177 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd,
1178 struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs)
1180 struct mbuf *m = *m_head;
1183 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim"));
1185 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap,
1186 m, segs, nsegs, BUS_DMA_NOWAIT);
1187 if (error == EFBIG) {
1190 m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX);
1194 *m_head = m = m_new;
1195 txr->hn_tx_collapsed++;
1197 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag,
1198 txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT);
1201 bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap,
1202 BUS_DMASYNC_PREWRITE);
1203 txd->flags |= HN_TXD_FLAG_DMAMAP;
1209 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd)
1212 KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
1213 ("put an onlist txd %#x", txd->flags));
1215 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
1216 if (atomic_fetchadd_int(&txd->refs, -1) != 1)
1219 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) {
1220 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
1221 ("chim txd uses dmamap"));
1222 hn_chim_free(txr->hn_sc, txd->chim_index);
1223 txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
1224 } else if (txd->flags & HN_TXD_FLAG_DMAMAP) {
1225 bus_dmamap_sync(txr->hn_tx_data_dtag,
1226 txd->data_dmap, BUS_DMASYNC_POSTWRITE);
1227 bus_dmamap_unload(txr->hn_tx_data_dtag,
1229 txd->flags &= ~HN_TXD_FLAG_DMAMAP;
1232 if (txd->m != NULL) {
1237 txd->flags |= HN_TXD_FLAG_ONLIST;
1238 #ifndef HN_USE_TXDESC_BUFRING
1239 mtx_lock_spin(&txr->hn_txlist_spin);
1240 KASSERT(txr->hn_txdesc_avail >= 0 &&
1241 txr->hn_txdesc_avail < txr->hn_txdesc_cnt,
1242 ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail));
1243 txr->hn_txdesc_avail++;
1244 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
1245 mtx_unlock_spin(&txr->hn_txlist_spin);
1247 atomic_add_int(&txr->hn_txdesc_avail, 1);
1248 buf_ring_enqueue(txr->hn_txdesc_br, txd);
1254 static __inline struct hn_txdesc *
1255 hn_txdesc_get(struct hn_tx_ring *txr)
1257 struct hn_txdesc *txd;
1259 #ifndef HN_USE_TXDESC_BUFRING
1260 mtx_lock_spin(&txr->hn_txlist_spin);
1261 txd = SLIST_FIRST(&txr->hn_txlist);
1263 KASSERT(txr->hn_txdesc_avail > 0,
1264 ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail));
1265 txr->hn_txdesc_avail--;
1266 SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
1268 mtx_unlock_spin(&txr->hn_txlist_spin);
1270 txd = buf_ring_dequeue_sc(txr->hn_txdesc_br);
1274 #ifdef HN_USE_TXDESC_BUFRING
1275 atomic_subtract_int(&txr->hn_txdesc_avail, 1);
1277 KASSERT(txd->m == NULL && txd->refs == 0 &&
1278 txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
1279 (txd->flags & HN_TXD_FLAG_ONLIST) &&
1280 (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd"));
1281 txd->flags &= ~HN_TXD_FLAG_ONLIST;
1287 static __inline void
1288 hn_txdesc_hold(struct hn_txdesc *txd)
1291 /* 0->1 transition will never work */
1292 KASSERT(txd->refs > 0, ("invalid refs %d", txd->refs));
1293 atomic_add_int(&txd->refs, 1);
1297 hn_tx_ring_pending(struct hn_tx_ring *txr)
1299 bool pending = false;
1301 #ifndef HN_USE_TXDESC_BUFRING
1302 mtx_lock_spin(&txr->hn_txlist_spin);
1303 if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt)
1305 mtx_unlock_spin(&txr->hn_txlist_spin);
1307 if (!buf_ring_full(txr->hn_txdesc_br))
1313 static __inline void
1314 hn_txeof(struct hn_tx_ring *txr)
1316 txr->hn_has_txeof = 0;
1321 hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc,
1322 struct vmbus_channel *chan, const void *data __unused, int dlen __unused)
1324 struct hn_txdesc *txd = sndc->hn_cbarg;
1325 struct hn_tx_ring *txr;
1328 KASSERT(txr->hn_chan == chan,
1329 ("channel mismatch, on chan%u, should be chan%u",
1330 vmbus_chan_subidx(chan), vmbus_chan_subidx(txr->hn_chan)));
1332 txr->hn_has_txeof = 1;
1333 hn_txdesc_put(txr, txd);
1335 ++txr->hn_txdone_cnt;
1336 if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) {
1337 txr->hn_txdone_cnt = 0;
1338 if (txr->hn_oactive)
1344 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr)
1346 #if defined(INET) || defined(INET6)
1347 struct lro_ctrl *lro = &rxr->hn_lro;
1348 struct lro_entry *queued;
1350 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1351 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1352 tcp_lro_flush(lro, queued);
1358 * 'txr' could be NULL, if multiple channels and
1359 * ifnet.if_start method are enabled.
1361 if (txr == NULL || !txr->hn_has_txeof)
1364 txr->hn_txdone_cnt = 0;
1368 static __inline uint32_t
1369 hn_rndis_pktmsg_offset(uint32_t ofs)
1372 KASSERT(ofs >= sizeof(struct rndis_packet_msg),
1373 ("invalid RNDIS packet msg offset %u", ofs));
1374 return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset));
1377 static __inline void *
1378 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize,
1379 size_t pi_dlen, uint32_t pi_type)
1381 const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen);
1382 struct rndis_pktinfo *pi;
1384 KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0,
1385 ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen));
1388 * Per-packet-info does not move; it only grows.
1391 * rm_pktinfooffset in this phase counts from the beginning
1392 * of rndis_packet_msg.
1394 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize,
1395 ("%u pktinfo overflows RNDIS packet msg", pi_type));
1396 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset +
1397 pkt->rm_pktinfolen);
1398 pkt->rm_pktinfolen += pi_size;
1400 pi->rm_size = pi_size;
1401 pi->rm_type = pi_type;
1402 pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET;
1404 /* Data immediately follow per-packet-info. */
1405 pkt->rm_dataoffset += pi_size;
1407 /* Update RNDIS packet msg length */
1408 pkt->rm_len += pi_size;
1410 return (pi->rm_data);
1415 * If this function fails, then both txd and m_head0 will be freed.
1418 hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0)
1420 bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
1421 int error, nsegs, i;
1422 struct mbuf *m_head = *m_head0;
1423 struct rndis_packet_msg *pkt;
1428 pkt = txd->rndis_pkt;
1429 if (m_head->m_pkthdr.len + HN_RNDIS_PKT_LEN < txr->hn_chim_size) {
1431 * This packet is small enough to fit into a chimney sending
1432 * buffer. Try allocating one chimney sending buffer now.
1434 txr->hn_tx_chimney_tried++;
1435 txd->chim_index = hn_chim_alloc(txr->hn_sc);
1436 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) {
1437 chim = txr->hn_sc->hn_chim +
1438 (txd->chim_index * txr->hn_sc->hn_chim_szmax);
1440 * Directly fill the chimney sending buffer w/ the
1441 * RNDIS packet message.
1447 pkt->rm_type = REMOTE_NDIS_PACKET_MSG;
1448 pkt->rm_len = sizeof(*pkt) + m_head->m_pkthdr.len;
1449 pkt->rm_dataoffset = sizeof(*pkt);
1450 pkt->rm_datalen = m_head->m_pkthdr.len;
1451 pkt->rm_pktinfooffset = sizeof(*pkt);
1452 pkt->rm_pktinfolen = 0;
1454 if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) {
1456 * Set the hash value for this packet, so that the host could
1457 * dispatch the TX done event for this packet back to this TX
1460 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1461 HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL);
1462 *pi_data = txr->hn_tx_idx;
1465 if (m_head->m_flags & M_VLANTAG) {
1466 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1467 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN);
1468 *pi_data = NDIS_VLAN_INFO_MAKE(
1469 EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag),
1470 EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag),
1471 EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag));
1474 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1475 #if defined(INET6) || defined(INET)
1476 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1477 NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO);
1479 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
1480 *pi_data = NDIS_LSO2_INFO_MAKEIPV4(0,
1481 m_head->m_pkthdr.tso_segsz);
1484 #if defined(INET6) && defined(INET)
1489 *pi_data = NDIS_LSO2_INFO_MAKEIPV6(0,
1490 m_head->m_pkthdr.tso_segsz);
1493 #endif /* INET6 || INET */
1494 } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) {
1495 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1496 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM);
1497 if (m_head->m_pkthdr.csum_flags &
1498 (CSUM_IP6_TCP | CSUM_IP6_UDP)) {
1499 *pi_data = NDIS_TXCSUM_INFO_IPV6;
1501 *pi_data = NDIS_TXCSUM_INFO_IPV4;
1502 if (m_head->m_pkthdr.csum_flags & CSUM_IP)
1503 *pi_data |= NDIS_TXCSUM_INFO_IPCS;
1506 if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
1507 *pi_data |= NDIS_TXCSUM_INFO_TCPCS;
1508 else if (m_head->m_pkthdr.csum_flags &
1509 (CSUM_IP_UDP | CSUM_IP6_UDP))
1510 *pi_data |= NDIS_TXCSUM_INFO_UDPCS;
1513 pktlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
1514 /* Convert RNDIS packet message offsets */
1515 pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt->rm_dataoffset);
1516 pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset);
1519 * Fast path: Chimney sending.
1522 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID,
1523 ("chimney buffer is not used"));
1524 KASSERT(pkt == chim, ("RNDIS pkt not in chimney buffer"));
1526 m_copydata(m_head, 0, m_head->m_pkthdr.len,
1527 ((uint8_t *)chim) + pktlen);
1529 txd->chim_size = pkt->rm_len;
1530 txr->hn_gpa_cnt = 0;
1531 txr->hn_tx_chimney++;
1532 txr->hn_sendpkt = hn_txpkt_chim;
1535 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
1536 ("chimney buffer is used"));
1537 KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc"));
1539 error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs);
1544 * This mbuf is not linked w/ the txd yet, so free it now.
1549 freed = hn_txdesc_put(txr, txd);
1551 ("fail to free txd upon txdma error"));
1553 txr->hn_txdma_failed++;
1554 if_inc_counter(txr->hn_sc->hn_ifp, IFCOUNTER_OERRORS, 1);
1559 /* +1 RNDIS packet message */
1560 txr->hn_gpa_cnt = nsegs + 1;
1562 /* send packet with page buffer */
1563 txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr);
1564 txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK;
1565 txr->hn_gpa[0].gpa_len = pktlen;
1568 * Fill the page buffers with mbuf info after the page
1569 * buffer for RNDIS packet message.
1571 for (i = 0; i < nsegs; ++i) {
1572 struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1];
1574 gpa->gpa_page = atop(segs[i].ds_addr);
1575 gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK;
1576 gpa->gpa_len = segs[i].ds_len;
1579 txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
1581 txr->hn_sendpkt = hn_txpkt_sglist;
1585 /* Set the completion routine */
1586 hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd);
1593 * If this function fails, then txd will be freed, but the mbuf
1594 * associated w/ the txd will _not_ be freed.
1597 hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd)
1599 int error, send_failed = 0;
1603 * Make sure that txd is not freed before ETHER_BPF_MTAP.
1605 hn_txdesc_hold(txd);
1606 error = txr->hn_sendpkt(txr, txd);
1608 ETHER_BPF_MTAP(ifp, txd->m);
1609 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
1610 #ifdef HN_IFSTART_SUPPORT
1611 if (!hn_use_if_start)
1614 if_inc_counter(ifp, IFCOUNTER_OBYTES,
1615 txd->m->m_pkthdr.len);
1616 if (txd->m->m_flags & M_MCAST)
1617 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1621 hn_txdesc_put(txr, txd);
1623 if (__predict_false(error)) {
1627 * This should "really rarely" happen.
1629 * XXX Too many RX to be acked or too many sideband
1630 * commands to run? Ask netvsc_channel_rollup()
1631 * to kick start later.
1633 txr->hn_has_txeof = 1;
1635 txr->hn_send_failed++;
1638 * Try sending again after set hn_has_txeof;
1639 * in case that we missed the last
1640 * netvsc_channel_rollup().
1644 if_printf(ifp, "send failed\n");
1647 * Caller will perform further processing on the
1648 * associated mbuf, so don't free it in hn_txdesc_put();
1649 * only unload it from the DMA map in hn_txdesc_put(),
1653 freed = hn_txdesc_put(txr, txd);
1655 ("fail to free txd upon send error"));
1657 txr->hn_send_failed++;
1663 * Append the specified data to the indicated mbuf chain,
1664 * Extend the mbuf chain if the new data does not fit in
1667 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c.
1668 * There should be an equivalent in the kernel mbuf code,
1669 * but there does not appear to be one yet.
1671 * Differs from m_append() in that additional mbufs are
1672 * allocated with cluster size MJUMPAGESIZE, and filled
1675 * Return 1 if able to complete the job; otherwise 0.
1678 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
1681 int remainder, space;
1683 for (m = m0; m->m_next != NULL; m = m->m_next)
1686 space = M_TRAILINGSPACE(m);
1689 * Copy into available space.
1691 if (space > remainder)
1693 bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
1698 while (remainder > 0) {
1700 * Allocate a new mbuf; could check space
1701 * and allocate a cluster instead.
1703 n = m_getjcl(M_DONTWAIT, m->m_type, 0, MJUMPAGESIZE);
1706 n->m_len = min(MJUMPAGESIZE, remainder);
1707 bcopy(cp, mtod(n, caddr_t), n->m_len);
1709 remainder -= n->m_len;
1713 if (m0->m_flags & M_PKTHDR)
1714 m0->m_pkthdr.len += len - remainder;
1716 return (remainder == 0);
1719 #if defined(INET) || defined(INET6)
1721 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m)
1723 #if __FreeBSD_version >= 1100095
1724 if (hn_lro_mbufq_depth) {
1725 tcp_lro_queue_mbuf(lc, m);
1729 return tcp_lro_rx(lc, m, 0);
1734 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen,
1735 const struct hn_rxinfo *info)
1737 struct ifnet *ifp = rxr->hn_ifp;
1739 int size, do_lro = 0, do_csum = 1;
1740 int hash_type = M_HASHTYPE_OPAQUE;
1742 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1746 * Bail out if packet contains more data than configured MTU.
1748 if (dlen > (ifp->if_mtu + ETHER_HDR_LEN)) {
1750 } else if (dlen <= MHLEN) {
1751 m_new = m_gethdr(M_NOWAIT, MT_DATA);
1752 if (m_new == NULL) {
1753 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
1756 memcpy(mtod(m_new, void *), data, dlen);
1757 m_new->m_pkthdr.len = m_new->m_len = dlen;
1758 rxr->hn_small_pkts++;
1761 * Get an mbuf with a cluster. For packets 2K or less,
1762 * get a standard 2K cluster. For anything larger, get a
1763 * 4K cluster. Any buffers larger than 4K can cause problems
1764 * if looped around to the Hyper-V TX channel, so avoid them.
1767 if (dlen > MCLBYTES) {
1769 size = MJUMPAGESIZE;
1772 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
1773 if (m_new == NULL) {
1774 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
1778 hv_m_append(m_new, dlen, data);
1780 m_new->m_pkthdr.rcvif = ifp;
1782 if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0))
1785 /* receive side checksum offload */
1786 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) {
1787 /* IP csum offload */
1788 if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) {
1789 m_new->m_pkthdr.csum_flags |=
1790 (CSUM_IP_CHECKED | CSUM_IP_VALID);
1794 /* TCP/UDP csum offload */
1795 if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK |
1796 NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) {
1797 m_new->m_pkthdr.csum_flags |=
1798 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1799 m_new->m_pkthdr.csum_data = 0xffff;
1800 if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK)
1808 * As of this write (Oct 28th, 2016), host side will turn
1809 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so
1810 * the do_lro setting here is actually _not_ accurate. We
1811 * depend on the RSS hash type check to reset do_lro.
1813 if ((info->csum_info &
1814 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) ==
1815 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK))
1818 const struct ether_header *eh;
1823 if (m_new->m_len < hoff)
1825 eh = mtod(m_new, struct ether_header *);
1826 etype = ntohs(eh->ether_type);
1827 if (etype == ETHERTYPE_VLAN) {
1828 const struct ether_vlan_header *evl;
1830 hoff = sizeof(*evl);
1831 if (m_new->m_len < hoff)
1833 evl = mtod(m_new, struct ether_vlan_header *);
1834 etype = ntohs(evl->evl_proto);
1837 if (etype == ETHERTYPE_IP) {
1840 pr = hn_check_iplen(m_new, hoff);
1841 if (pr == IPPROTO_TCP) {
1843 (rxr->hn_trust_hcsum &
1844 HN_TRUST_HCSUM_TCP)) {
1845 rxr->hn_csum_trusted++;
1846 m_new->m_pkthdr.csum_flags |=
1847 (CSUM_IP_CHECKED | CSUM_IP_VALID |
1848 CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1849 m_new->m_pkthdr.csum_data = 0xffff;
1852 } else if (pr == IPPROTO_UDP) {
1854 (rxr->hn_trust_hcsum &
1855 HN_TRUST_HCSUM_UDP)) {
1856 rxr->hn_csum_trusted++;
1857 m_new->m_pkthdr.csum_flags |=
1858 (CSUM_IP_CHECKED | CSUM_IP_VALID |
1859 CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1860 m_new->m_pkthdr.csum_data = 0xffff;
1862 } else if (pr != IPPROTO_DONE && do_csum &&
1863 (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) {
1864 rxr->hn_csum_trusted++;
1865 m_new->m_pkthdr.csum_flags |=
1866 (CSUM_IP_CHECKED | CSUM_IP_VALID);
1871 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) {
1872 m_new->m_pkthdr.ether_vtag = EVL_MAKETAG(
1873 NDIS_VLAN_INFO_ID(info->vlan_info),
1874 NDIS_VLAN_INFO_PRI(info->vlan_info),
1875 NDIS_VLAN_INFO_CFI(info->vlan_info));
1876 m_new->m_flags |= M_VLANTAG;
1879 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) {
1881 m_new->m_pkthdr.flowid = info->hash_value;
1882 if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) ==
1883 NDIS_HASH_FUNCTION_TOEPLITZ) {
1884 uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK);
1888 * do_lro is resetted, if the hash types are not TCP
1889 * related. See the comment in the above csum_flags
1893 case NDIS_HASH_IPV4:
1894 hash_type = M_HASHTYPE_RSS_IPV4;
1898 case NDIS_HASH_TCP_IPV4:
1899 hash_type = M_HASHTYPE_RSS_TCP_IPV4;
1902 case NDIS_HASH_IPV6:
1903 hash_type = M_HASHTYPE_RSS_IPV6;
1907 case NDIS_HASH_IPV6_EX:
1908 hash_type = M_HASHTYPE_RSS_IPV6_EX;
1912 case NDIS_HASH_TCP_IPV6:
1913 hash_type = M_HASHTYPE_RSS_TCP_IPV6;
1916 case NDIS_HASH_TCP_IPV6_EX:
1917 hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX;
1922 m_new->m_pkthdr.flowid = rxr->hn_rx_idx;
1924 M_HASHTYPE_SET(m_new, hash_type);
1927 * Note: Moved RX completion back to hv_nv_on_receive() so all
1928 * messages (not just data messages) will trigger a response.
1934 if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
1935 #if defined(INET) || defined(INET6)
1936 struct lro_ctrl *lro = &rxr->hn_lro;
1939 rxr->hn_lro_tried++;
1940 if (hn_lro_rx(lro, m_new) == 0) {
1948 /* We're not holding the lock here, so don't release it */
1949 (*ifp->if_input)(ifp, m_new);
1955 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1957 struct hn_softc *sc = ifp->if_softc;
1958 struct ifreq *ifr = (struct ifreq *)data;
1959 int mask, error = 0;
1963 if (ifr->ifr_mtu > HN_MTU_MAX) {
1970 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
1975 if ((sc->hn_caps & HN_CAP_MTU) == 0) {
1976 /* Can't change MTU */
1982 if (ifp->if_mtu == ifr->ifr_mtu) {
1988 * Suspend this interface before the synthetic parts
1994 * Detach the synthetics parts, i.e. NVS and RNDIS.
1996 hn_synth_detach(sc);
1999 * Reattach the synthetic parts, i.e. NVS and RNDIS,
2000 * with the new MTU setting.
2002 error = hn_synth_attach(sc, ifr->ifr_mtu);
2009 * Commit the requested MTU, after the synthetic parts
2010 * have been successfully attached.
2012 ifp->if_mtu = ifr->ifr_mtu;
2015 * Make sure that various parameters based on MTU are
2016 * still valid, after the MTU change.
2018 if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax)
2019 hn_set_chim_size(sc, sc->hn_chim_szmax);
2020 hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu);
2021 #if __FreeBSD_version >= 1100099
2022 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim <
2023 HN_LRO_LENLIM_MIN(ifp))
2024 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp));
2028 * All done! Resume the interface now.
2038 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
2043 if (ifp->if_flags & IFF_UP) {
2044 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2045 hn_set_rxfilter(sc);
2049 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2052 sc->hn_if_flags = ifp->if_flags;
2059 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2061 if (mask & IFCAP_TXCSUM) {
2062 ifp->if_capenable ^= IFCAP_TXCSUM;
2063 if (ifp->if_capenable & IFCAP_TXCSUM)
2064 ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc);
2066 ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc);
2068 if (mask & IFCAP_TXCSUM_IPV6) {
2069 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2070 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
2071 ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc);
2073 ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc);
2076 /* TODO: flip RNDIS offload parameters for RXCSUM. */
2077 if (mask & IFCAP_RXCSUM)
2078 ifp->if_capenable ^= IFCAP_RXCSUM;
2080 /* We can't diff IPv6 packets from IPv4 packets on RX path. */
2081 if (mask & IFCAP_RXCSUM_IPV6)
2082 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2085 if (mask & IFCAP_LRO)
2086 ifp->if_capenable ^= IFCAP_LRO;
2088 if (mask & IFCAP_TSO4) {
2089 ifp->if_capenable ^= IFCAP_TSO4;
2090 if (ifp->if_capenable & IFCAP_TSO4)
2091 ifp->if_hwassist |= CSUM_IP_TSO;
2093 ifp->if_hwassist &= ~CSUM_IP_TSO;
2095 if (mask & IFCAP_TSO6) {
2096 ifp->if_capenable ^= IFCAP_TSO6;
2097 if (ifp->if_capenable & IFCAP_TSO6)
2098 ifp->if_hwassist |= CSUM_IP6_TSO;
2100 ifp->if_hwassist &= ~CSUM_IP6_TSO;
2111 * Multicast uses mutex, while RNDIS RX filter setting
2112 * sleeps. We workaround this by always enabling
2113 * ALLMULTI. ALLMULTI would actually always be on, even
2114 * if we supported the SIOCADDMULTI/SIOCDELMULTI, since
2115 * we don't support multicast address list configuration
2120 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
2124 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2125 hn_set_rxfilter(sc);
2133 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd);
2137 error = ether_ioctl(ifp, cmd, data);
2144 hn_stop(struct hn_softc *sc)
2146 struct ifnet *ifp = sc->hn_ifp;
2151 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
2152 ("synthetic parts were not attached"));
2154 /* Clear RUNNING bit _before_ hn_suspend_data() */
2155 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
2156 hn_suspend_data(sc);
2158 /* Clear OACTIVE bit. */
2159 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
2160 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
2161 sc->hn_tx_ring[i].hn_oactive = 0;
2165 hn_init_locked(struct hn_softc *sc)
2167 struct ifnet *ifp = sc->hn_ifp;
2172 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
2175 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2178 /* Configure RX filter */
2179 hn_set_rxfilter(sc);
2181 /* Clear OACTIVE bit. */
2182 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
2183 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
2184 sc->hn_tx_ring[i].hn_oactive = 0;
2186 /* Clear TX 'suspended' bit. */
2187 hn_resume_tx(sc, sc->hn_tx_ring_inuse);
2189 /* Everything is ready; unleash! */
2190 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
2196 struct hn_softc *sc = xsc;
2203 #if __FreeBSD_version >= 1100099
2206 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS)
2208 struct hn_softc *sc = arg1;
2209 unsigned int lenlim;
2212 lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim;
2213 error = sysctl_handle_int(oidp, &lenlim, 0, req);
2214 if (error || req->newptr == NULL)
2218 if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) ||
2219 lenlim > TCP_LRO_LENGTH_MAX) {
2223 hn_set_lro_lenlim(sc, lenlim);
2230 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS)
2232 struct hn_softc *sc = arg1;
2233 int ackcnt, error, i;
2236 * lro_ackcnt_lim is append count limit,
2237 * +1 to turn it into aggregation limit.
2239 ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1;
2240 error = sysctl_handle_int(oidp, &ackcnt, 0, req);
2241 if (error || req->newptr == NULL)
2244 if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1))
2248 * Convert aggregation limit back to append
2253 for (i = 0; i < sc->hn_rx_ring_inuse; ++i)
2254 sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt;
2262 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS)
2264 struct hn_softc *sc = arg1;
2269 if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum)
2272 error = sysctl_handle_int(oidp, &on, 0, req);
2273 if (error || req->newptr == NULL)
2277 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2278 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
2281 rxr->hn_trust_hcsum |= hcsum;
2283 rxr->hn_trust_hcsum &= ~hcsum;
2290 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS)
2292 struct hn_softc *sc = arg1;
2293 int chim_size, error;
2295 chim_size = sc->hn_tx_ring[0].hn_chim_size;
2296 error = sysctl_handle_int(oidp, &chim_size, 0, req);
2297 if (error || req->newptr == NULL)
2300 if (chim_size > sc->hn_chim_szmax || chim_size <= 0)
2304 hn_set_chim_size(sc, chim_size);
2309 #if __FreeBSD_version < 1100095
2311 hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS)
2313 struct hn_softc *sc = arg1;
2314 int ofs = arg2, i, error;
2315 struct hn_rx_ring *rxr;
2319 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2320 rxr = &sc->hn_rx_ring[i];
2321 stat += *((int *)((uint8_t *)rxr + ofs));
2324 error = sysctl_handle_64(oidp, &stat, 0, req);
2325 if (error || req->newptr == NULL)
2328 /* Zero out this stat. */
2329 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2330 rxr = &sc->hn_rx_ring[i];
2331 *((int *)((uint8_t *)rxr + ofs)) = 0;
2337 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
2339 struct hn_softc *sc = arg1;
2340 int ofs = arg2, i, error;
2341 struct hn_rx_ring *rxr;
2345 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2346 rxr = &sc->hn_rx_ring[i];
2347 stat += *((uint64_t *)((uint8_t *)rxr + ofs));
2350 error = sysctl_handle_64(oidp, &stat, 0, req);
2351 if (error || req->newptr == NULL)
2354 /* Zero out this stat. */
2355 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2356 rxr = &sc->hn_rx_ring[i];
2357 *((uint64_t *)((uint8_t *)rxr + ofs)) = 0;
2365 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
2367 struct hn_softc *sc = arg1;
2368 int ofs = arg2, i, error;
2369 struct hn_rx_ring *rxr;
2373 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2374 rxr = &sc->hn_rx_ring[i];
2375 stat += *((u_long *)((uint8_t *)rxr + ofs));
2378 error = sysctl_handle_long(oidp, &stat, 0, req);
2379 if (error || req->newptr == NULL)
2382 /* Zero out this stat. */
2383 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2384 rxr = &sc->hn_rx_ring[i];
2385 *((u_long *)((uint8_t *)rxr + ofs)) = 0;
2391 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
2393 struct hn_softc *sc = arg1;
2394 int ofs = arg2, i, error;
2395 struct hn_tx_ring *txr;
2399 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
2400 txr = &sc->hn_tx_ring[i];
2401 stat += *((u_long *)((uint8_t *)txr + ofs));
2404 error = sysctl_handle_long(oidp, &stat, 0, req);
2405 if (error || req->newptr == NULL)
2408 /* Zero out this stat. */
2409 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
2410 txr = &sc->hn_tx_ring[i];
2411 *((u_long *)((uint8_t *)txr + ofs)) = 0;
2417 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
2419 struct hn_softc *sc = arg1;
2420 int ofs = arg2, i, error, conf;
2421 struct hn_tx_ring *txr;
2423 txr = &sc->hn_tx_ring[0];
2424 conf = *((int *)((uint8_t *)txr + ofs));
2426 error = sysctl_handle_int(oidp, &conf, 0, req);
2427 if (error || req->newptr == NULL)
2431 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
2432 txr = &sc->hn_tx_ring[i];
2433 *((int *)((uint8_t *)txr + ofs)) = conf;
2441 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS)
2443 struct hn_softc *sc = arg1;
2446 snprintf(verstr, sizeof(verstr), "%u.%u",
2447 HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver),
2448 HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver));
2449 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
2453 hn_caps_sysctl(SYSCTL_HANDLER_ARGS)
2455 struct hn_softc *sc = arg1;
2462 snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS);
2463 return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req);
2467 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS)
2469 struct hn_softc *sc = arg1;
2470 char assist_str[128];
2474 hwassist = sc->hn_ifp->if_hwassist;
2476 snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS);
2477 return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req);
2481 hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS)
2483 struct hn_softc *sc = arg1;
2484 char filter_str[128];
2488 filter = sc->hn_rx_filter;
2490 snprintf(filter_str, sizeof(filter_str), "%b", filter,
2492 return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req);
2496 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS)
2498 struct hn_softc *sc = arg1;
2503 error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
2504 if (error || req->newptr == NULL)
2507 error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
2510 sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
2512 if (sc->hn_rx_ring_inuse > 1) {
2513 error = hn_rss_reconfig(sc);
2515 /* Not RSS capable, at least for now; just save the RSS key. */
2524 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS)
2526 struct hn_softc *sc = arg1;
2531 error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
2532 if (error || req->newptr == NULL)
2536 * Don't allow RSS indirect table change, if this interface is not
2537 * RSS capable currently.
2539 if (sc->hn_rx_ring_inuse == 1) {
2544 error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
2547 sc->hn_flags |= HN_FLAG_HAS_RSSIND;
2549 hn_rss_ind_fixup(sc, sc->hn_rx_ring_inuse);
2550 error = hn_rss_reconfig(sc);
2557 hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS)
2559 struct hn_softc *sc = arg1;
2564 hash = sc->hn_rss_hash;
2566 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS);
2567 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req);
2571 hn_check_iplen(const struct mbuf *m, int hoff)
2573 const struct ip *ip;
2574 int len, iphlen, iplen;
2575 const struct tcphdr *th;
2576 int thoff; /* TCP data offset */
2578 len = hoff + sizeof(struct ip);
2580 /* The packet must be at least the size of an IP header. */
2581 if (m->m_pkthdr.len < len)
2582 return IPPROTO_DONE;
2584 /* The fixed IP header must reside completely in the first mbuf. */
2586 return IPPROTO_DONE;
2588 ip = mtodo(m, hoff);
2590 /* Bound check the packet's stated IP header length. */
2591 iphlen = ip->ip_hl << 2;
2592 if (iphlen < sizeof(struct ip)) /* minimum header length */
2593 return IPPROTO_DONE;
2595 /* The full IP header must reside completely in the one mbuf. */
2596 if (m->m_len < hoff + iphlen)
2597 return IPPROTO_DONE;
2599 iplen = ntohs(ip->ip_len);
2602 * Check that the amount of data in the buffers is as
2603 * at least much as the IP header would have us expect.
2605 if (m->m_pkthdr.len < hoff + iplen)
2606 return IPPROTO_DONE;
2609 * Ignore IP fragments.
2611 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF))
2612 return IPPROTO_DONE;
2615 * The TCP/IP or UDP/IP header must be entirely contained within
2616 * the first fragment of a packet.
2620 if (iplen < iphlen + sizeof(struct tcphdr))
2621 return IPPROTO_DONE;
2622 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr))
2623 return IPPROTO_DONE;
2624 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen);
2625 thoff = th->th_off << 2;
2626 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen)
2627 return IPPROTO_DONE;
2628 if (m->m_len < hoff + iphlen + thoff)
2629 return IPPROTO_DONE;
2632 if (iplen < iphlen + sizeof(struct udphdr))
2633 return IPPROTO_DONE;
2634 if (m->m_len < hoff + iphlen + sizeof(struct udphdr))
2635 return IPPROTO_DONE;
2639 return IPPROTO_DONE;
2646 hn_create_rx_data(struct hn_softc *sc, int ring_cnt)
2648 struct sysctl_oid_list *child;
2649 struct sysctl_ctx_list *ctx;
2650 device_t dev = sc->hn_dev;
2651 #if defined(INET) || defined(INET6)
2652 #if __FreeBSD_version >= 1100095
2659 * Create RXBUF for reception.
2662 * - It is shared by all channels.
2663 * - A large enough buffer is allocated, certain version of NVSes
2664 * may further limit the usable space.
2666 sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
2667 PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma,
2668 BUS_DMA_WAITOK | BUS_DMA_ZERO);
2669 if (sc->hn_rxbuf == NULL) {
2670 device_printf(sc->hn_dev, "allocate rxbuf failed\n");
2674 sc->hn_rx_ring_cnt = ring_cnt;
2675 sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt;
2677 sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt,
2678 M_DEVBUF, M_WAITOK | M_ZERO);
2680 #if defined(INET) || defined(INET6)
2681 #if __FreeBSD_version >= 1100095
2682 lroent_cnt = hn_lro_entry_count;
2683 if (lroent_cnt < TCP_LRO_ENTRIES)
2684 lroent_cnt = TCP_LRO_ENTRIES;
2686 device_printf(dev, "LRO: entry count %d\n", lroent_cnt);
2688 #endif /* INET || INET6 */
2690 ctx = device_get_sysctl_ctx(dev);
2691 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
2693 /* Create dev.hn.UNIT.rx sysctl tree */
2694 sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx",
2695 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
2697 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2698 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
2700 rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
2701 PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE,
2702 &rxr->hn_br_dma, BUS_DMA_WAITOK);
2703 if (rxr->hn_br == NULL) {
2704 device_printf(dev, "allocate bufring failed\n");
2708 if (hn_trust_hosttcp)
2709 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP;
2710 if (hn_trust_hostudp)
2711 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP;
2712 if (hn_trust_hostip)
2713 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP;
2714 rxr->hn_ifp = sc->hn_ifp;
2715 if (i < sc->hn_tx_ring_cnt)
2716 rxr->hn_txr = &sc->hn_tx_ring[i];
2717 rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF;
2718 rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK);
2720 rxr->hn_rxbuf = sc->hn_rxbuf;
2725 #if defined(INET) || defined(INET6)
2726 #if __FreeBSD_version >= 1100095
2727 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt,
2728 hn_lro_mbufq_depth);
2730 tcp_lro_init(&rxr->hn_lro);
2731 rxr->hn_lro.ifp = sc->hn_ifp;
2733 #if __FreeBSD_version >= 1100099
2734 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF;
2735 rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
2737 #endif /* INET || INET6 */
2739 if (sc->hn_rx_sysctl_tree != NULL) {
2743 * Create per RX ring sysctl tree:
2744 * dev.hn.UNIT.rx.RINGID
2746 snprintf(name, sizeof(name), "%d", i);
2747 rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx,
2748 SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree),
2749 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
2751 if (rxr->hn_rx_sysctl_tree != NULL) {
2752 SYSCTL_ADD_ULONG(ctx,
2753 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
2754 OID_AUTO, "packets", CTLFLAG_RW,
2755 &rxr->hn_pkts, "# of packets received");
2756 SYSCTL_ADD_ULONG(ctx,
2757 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
2758 OID_AUTO, "rss_pkts", CTLFLAG_RW,
2760 "# of packets w/ RSS info received");
2762 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
2763 OID_AUTO, "pktbuf_len", CTLFLAG_RD,
2764 &rxr->hn_pktbuf_len, 0,
2765 "Temporary channel packet buffer length");
2770 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
2771 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2772 __offsetof(struct hn_rx_ring, hn_lro.lro_queued),
2773 #if __FreeBSD_version < 1100095
2774 hn_rx_stat_int_sysctl,
2776 hn_rx_stat_u64_sysctl,
2778 "LU", "LRO queued");
2779 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed",
2780 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2781 __offsetof(struct hn_rx_ring, hn_lro.lro_flushed),
2782 #if __FreeBSD_version < 1100095
2783 hn_rx_stat_int_sysctl,
2785 hn_rx_stat_u64_sysctl,
2787 "LU", "LRO flushed");
2788 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried",
2789 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2790 __offsetof(struct hn_rx_ring, hn_lro_tried),
2791 hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries");
2792 #if __FreeBSD_version >= 1100099
2793 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim",
2794 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
2795 hn_lro_lenlim_sysctl, "IU",
2796 "Max # of data bytes to be aggregated by LRO");
2797 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim",
2798 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
2799 hn_lro_ackcnt_sysctl, "I",
2800 "Max # of ACKs to be aggregated by LRO");
2802 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp",
2803 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP,
2804 hn_trust_hcsum_sysctl, "I",
2805 "Trust tcp segement verification on host side, "
2806 "when csum info is missing");
2807 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp",
2808 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP,
2809 hn_trust_hcsum_sysctl, "I",
2810 "Trust udp datagram verification on host side, "
2811 "when csum info is missing");
2812 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip",
2813 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP,
2814 hn_trust_hcsum_sysctl, "I",
2815 "Trust ip packet verification on host side, "
2816 "when csum info is missing");
2817 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip",
2818 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2819 __offsetof(struct hn_rx_ring, hn_csum_ip),
2820 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP");
2821 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp",
2822 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2823 __offsetof(struct hn_rx_ring, hn_csum_tcp),
2824 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP");
2825 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp",
2826 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2827 __offsetof(struct hn_rx_ring, hn_csum_udp),
2828 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP");
2829 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted",
2830 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2831 __offsetof(struct hn_rx_ring, hn_csum_trusted),
2832 hn_rx_stat_ulong_sysctl, "LU",
2833 "# of packets that we trust host's csum verification");
2834 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts",
2835 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2836 __offsetof(struct hn_rx_ring, hn_small_pkts),
2837 hn_rx_stat_ulong_sysctl, "LU", "# of small packets received");
2838 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed",
2839 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2840 __offsetof(struct hn_rx_ring, hn_ack_failed),
2841 hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures");
2842 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt",
2843 CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings");
2844 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse",
2845 CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings");
2851 hn_destroy_rx_data(struct hn_softc *sc)
2855 if (sc->hn_rxbuf != NULL) {
2856 hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf);
2857 sc->hn_rxbuf = NULL;
2860 if (sc->hn_rx_ring_cnt == 0)
2863 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2864 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
2866 if (rxr->hn_br == NULL)
2868 hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br);
2871 #if defined(INET) || defined(INET6)
2872 tcp_lro_free(&rxr->hn_lro);
2874 free(rxr->hn_pktbuf, M_DEVBUF);
2876 free(sc->hn_rx_ring, M_DEVBUF);
2877 sc->hn_rx_ring = NULL;
2879 sc->hn_rx_ring_cnt = 0;
2880 sc->hn_rx_ring_inuse = 0;
2884 hn_tx_ring_create(struct hn_softc *sc, int id)
2886 struct hn_tx_ring *txr = &sc->hn_tx_ring[id];
2887 device_t dev = sc->hn_dev;
2888 bus_dma_tag_t parent_dtag;
2892 txr->hn_tx_idx = id;
2894 #ifndef HN_USE_TXDESC_BUFRING
2895 mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
2897 mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF);
2899 txr->hn_txdesc_cnt = HN_TX_DESC_CNT;
2900 txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt,
2901 M_DEVBUF, M_WAITOK | M_ZERO);
2902 #ifndef HN_USE_TXDESC_BUFRING
2903 SLIST_INIT(&txr->hn_txlist);
2905 txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF,
2906 M_WAITOK, &txr->hn_tx_lock);
2909 txr->hn_tx_taskq = sc->hn_tx_taskq;
2911 #ifdef HN_IFSTART_SUPPORT
2912 if (hn_use_if_start) {
2913 txr->hn_txeof = hn_start_txeof;
2914 TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr);
2915 TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr);
2921 txr->hn_txeof = hn_xmit_txeof;
2922 TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr);
2923 TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr);
2925 br_depth = hn_get_txswq_depth(txr);
2926 txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF,
2927 M_WAITOK, &txr->hn_tx_lock);
2930 txr->hn_direct_tx_size = hn_direct_tx_size;
2933 * Always schedule transmission instead of trying to do direct
2934 * transmission. This one gives the best performance so far.
2936 txr->hn_sched_tx = 1;
2938 parent_dtag = bus_get_dma_tag(dev);
2940 /* DMA tag for RNDIS packet messages. */
2941 error = bus_dma_tag_create(parent_dtag, /* parent */
2942 HN_RNDIS_PKT_ALIGN, /* alignment */
2943 HN_RNDIS_PKT_BOUNDARY, /* boundary */
2944 BUS_SPACE_MAXADDR, /* lowaddr */
2945 BUS_SPACE_MAXADDR, /* highaddr */
2946 NULL, NULL, /* filter, filterarg */
2947 HN_RNDIS_PKT_LEN, /* maxsize */
2949 HN_RNDIS_PKT_LEN, /* maxsegsize */
2951 NULL, /* lockfunc */
2952 NULL, /* lockfuncarg */
2953 &txr->hn_tx_rndis_dtag);
2955 device_printf(dev, "failed to create rndis dmatag\n");
2959 /* DMA tag for data. */
2960 error = bus_dma_tag_create(parent_dtag, /* parent */
2962 HN_TX_DATA_BOUNDARY, /* boundary */
2963 BUS_SPACE_MAXADDR, /* lowaddr */
2964 BUS_SPACE_MAXADDR, /* highaddr */
2965 NULL, NULL, /* filter, filterarg */
2966 HN_TX_DATA_MAXSIZE, /* maxsize */
2967 HN_TX_DATA_SEGCNT_MAX, /* nsegments */
2968 HN_TX_DATA_SEGSIZE, /* maxsegsize */
2970 NULL, /* lockfunc */
2971 NULL, /* lockfuncarg */
2972 &txr->hn_tx_data_dtag);
2974 device_printf(dev, "failed to create data dmatag\n");
2978 for (i = 0; i < txr->hn_txdesc_cnt; ++i) {
2979 struct hn_txdesc *txd = &txr->hn_txdesc[i];
2982 txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
2985 * Allocate and load RNDIS packet message.
2987 error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag,
2988 (void **)&txd->rndis_pkt,
2989 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
2990 &txd->rndis_pkt_dmap);
2993 "failed to allocate rndis_packet_msg, %d\n", i);
2997 error = bus_dmamap_load(txr->hn_tx_rndis_dtag,
2998 txd->rndis_pkt_dmap,
2999 txd->rndis_pkt, HN_RNDIS_PKT_LEN,
3000 hyperv_dma_map_paddr, &txd->rndis_pkt_paddr,
3004 "failed to load rndis_packet_msg, %d\n", i);
3005 bus_dmamem_free(txr->hn_tx_rndis_dtag,
3006 txd->rndis_pkt, txd->rndis_pkt_dmap);
3010 /* DMA map for TX data. */
3011 error = bus_dmamap_create(txr->hn_tx_data_dtag, 0,
3015 "failed to allocate tx data dmamap\n");
3016 bus_dmamap_unload(txr->hn_tx_rndis_dtag,
3017 txd->rndis_pkt_dmap);
3018 bus_dmamem_free(txr->hn_tx_rndis_dtag,
3019 txd->rndis_pkt, txd->rndis_pkt_dmap);
3023 /* All set, put it to list */
3024 txd->flags |= HN_TXD_FLAG_ONLIST;
3025 #ifndef HN_USE_TXDESC_BUFRING
3026 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
3028 buf_ring_enqueue(txr->hn_txdesc_br, txd);
3031 txr->hn_txdesc_avail = txr->hn_txdesc_cnt;
3033 if (sc->hn_tx_sysctl_tree != NULL) {
3034 struct sysctl_oid_list *child;
3035 struct sysctl_ctx_list *ctx;
3039 * Create per TX ring sysctl tree:
3040 * dev.hn.UNIT.tx.RINGID
3042 ctx = device_get_sysctl_ctx(dev);
3043 child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree);
3045 snprintf(name, sizeof(name), "%d", id);
3046 txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
3047 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3049 if (txr->hn_tx_sysctl_tree != NULL) {
3050 child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree);
3052 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
3053 CTLFLAG_RD, &txr->hn_txdesc_avail, 0,
3054 "# of available TX descs");
3055 #ifdef HN_IFSTART_SUPPORT
3056 if (!hn_use_if_start)
3059 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive",
3060 CTLFLAG_RD, &txr->hn_oactive, 0,
3063 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets",
3064 CTLFLAG_RW, &txr->hn_pkts,
3065 "# of packets transmitted");
3073 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd)
3075 struct hn_tx_ring *txr = txd->txr;
3077 KASSERT(txd->m == NULL, ("still has mbuf installed"));
3078 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped"));
3080 bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap);
3081 bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt,
3082 txd->rndis_pkt_dmap);
3083 bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap);
3087 hn_tx_ring_destroy(struct hn_tx_ring *txr)
3089 struct hn_txdesc *txd;
3091 if (txr->hn_txdesc == NULL)
3094 #ifndef HN_USE_TXDESC_BUFRING
3095 while ((txd = SLIST_FIRST(&txr->hn_txlist)) != NULL) {
3096 SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
3097 hn_txdesc_dmamap_destroy(txd);
3100 mtx_lock(&txr->hn_tx_lock);
3101 while ((txd = buf_ring_dequeue_sc(txr->hn_txdesc_br)) != NULL)
3102 hn_txdesc_dmamap_destroy(txd);
3103 mtx_unlock(&txr->hn_tx_lock);
3106 if (txr->hn_tx_data_dtag != NULL)
3107 bus_dma_tag_destroy(txr->hn_tx_data_dtag);
3108 if (txr->hn_tx_rndis_dtag != NULL)
3109 bus_dma_tag_destroy(txr->hn_tx_rndis_dtag);
3111 #ifdef HN_USE_TXDESC_BUFRING
3112 buf_ring_free(txr->hn_txdesc_br, M_DEVBUF);
3115 free(txr->hn_txdesc, M_DEVBUF);
3116 txr->hn_txdesc = NULL;
3118 if (txr->hn_mbuf_br != NULL)
3119 buf_ring_free(txr->hn_mbuf_br, M_DEVBUF);
3121 #ifndef HN_USE_TXDESC_BUFRING
3122 mtx_destroy(&txr->hn_txlist_spin);
3124 mtx_destroy(&txr->hn_tx_lock);
3128 hn_create_tx_data(struct hn_softc *sc, int ring_cnt)
3130 struct sysctl_oid_list *child;
3131 struct sysctl_ctx_list *ctx;
3135 * Create TXBUF for chimney sending.
3137 * NOTE: It is shared by all channels.
3139 sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev),
3140 PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma,
3141 BUS_DMA_WAITOK | BUS_DMA_ZERO);
3142 if (sc->hn_chim == NULL) {
3143 device_printf(sc->hn_dev, "allocate txbuf failed\n");
3147 sc->hn_tx_ring_cnt = ring_cnt;
3148 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
3150 sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt,
3151 M_DEVBUF, M_WAITOK | M_ZERO);
3153 ctx = device_get_sysctl_ctx(sc->hn_dev);
3154 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev));
3156 /* Create dev.hn.UNIT.tx sysctl tree */
3157 sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx",
3158 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3160 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
3163 error = hn_tx_ring_create(sc, i);
3168 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs",
3169 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3170 __offsetof(struct hn_tx_ring, hn_no_txdescs),
3171 hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs");
3172 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed",
3173 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3174 __offsetof(struct hn_tx_ring, hn_send_failed),
3175 hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure");
3176 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed",
3177 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3178 __offsetof(struct hn_tx_ring, hn_txdma_failed),
3179 hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
3180 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
3181 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3182 __offsetof(struct hn_tx_ring, hn_tx_collapsed),
3183 hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed");
3184 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney",
3185 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3186 __offsetof(struct hn_tx_ring, hn_tx_chimney),
3187 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send");
3188 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried",
3189 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3190 __offsetof(struct hn_tx_ring, hn_tx_chimney_tried),
3191 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries");
3192 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
3193 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0,
3194 "# of total TX descs");
3195 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
3196 CTLFLAG_RD, &sc->hn_chim_szmax, 0,
3197 "Chimney send packet size upper boundary");
3198 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
3199 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
3200 hn_chim_size_sysctl, "I", "Chimney send packet size limit");
3201 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size",
3202 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3203 __offsetof(struct hn_tx_ring, hn_direct_tx_size),
3204 hn_tx_conf_int_sysctl, "I",
3205 "Size of the packet for direct transmission");
3206 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx",
3207 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3208 __offsetof(struct hn_tx_ring, hn_sched_tx),
3209 hn_tx_conf_int_sysctl, "I",
3210 "Always schedule transmission "
3211 "instead of doing direct transmission");
3212 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt",
3213 CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings");
3214 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse",
3215 CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings");
3221 hn_set_chim_size(struct hn_softc *sc, int chim_size)
3225 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
3226 sc->hn_tx_ring[i].hn_chim_size = chim_size;
3230 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu)
3232 struct ifnet *ifp = sc->hn_ifp;
3235 if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0)
3238 KASSERT(sc->hn_ndis_tso_sgmin >= 2,
3239 ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin));
3240 tso_minlen = sc->hn_ndis_tso_sgmin * mtu;
3242 KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen &&
3243 sc->hn_ndis_tso_szmax <= IP_MAXPACKET,
3244 ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax));
3246 if (tso_maxlen < tso_minlen)
3247 tso_maxlen = tso_minlen;
3248 else if (tso_maxlen > IP_MAXPACKET)
3249 tso_maxlen = IP_MAXPACKET;
3250 if (tso_maxlen > sc->hn_ndis_tso_szmax)
3251 tso_maxlen = sc->hn_ndis_tso_szmax;
3252 ifp->if_hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
3254 if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax);
3258 hn_fixup_tx_data(struct hn_softc *sc)
3260 uint64_t csum_assist;
3263 hn_set_chim_size(sc, sc->hn_chim_szmax);
3264 if (hn_tx_chimney_size > 0 &&
3265 hn_tx_chimney_size < sc->hn_chim_szmax)
3266 hn_set_chim_size(sc, hn_tx_chimney_size);
3269 if (sc->hn_caps & HN_CAP_IPCS)
3270 csum_assist |= CSUM_IP;
3271 if (sc->hn_caps & HN_CAP_TCP4CS)
3272 csum_assist |= CSUM_IP_TCP;
3273 if (sc->hn_caps & HN_CAP_UDP4CS)
3274 csum_assist |= CSUM_IP_UDP;
3276 if (sc->hn_caps & HN_CAP_TCP6CS)
3277 csum_assist |= CSUM_IP6_TCP;
3278 if (sc->hn_caps & HN_CAP_UDP6CS)
3279 csum_assist |= CSUM_IP6_UDP;
3281 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
3282 sc->hn_tx_ring[i].hn_csum_assist = csum_assist;
3284 if (sc->hn_caps & HN_CAP_HASHVAL) {
3286 * Support HASHVAL pktinfo on TX path.
3289 if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n");
3290 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
3291 sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL;
3296 hn_destroy_tx_data(struct hn_softc *sc)
3300 if (sc->hn_chim != NULL) {
3301 hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim);
3305 if (sc->hn_tx_ring_cnt == 0)
3308 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
3309 hn_tx_ring_destroy(&sc->hn_tx_ring[i]);
3311 free(sc->hn_tx_ring, M_DEVBUF);
3312 sc->hn_tx_ring = NULL;
3314 sc->hn_tx_ring_cnt = 0;
3315 sc->hn_tx_ring_inuse = 0;
3318 #ifdef HN_IFSTART_SUPPORT
3321 hn_start_taskfunc(void *xtxr, int pending __unused)
3323 struct hn_tx_ring *txr = xtxr;
3325 mtx_lock(&txr->hn_tx_lock);
3326 hn_start_locked(txr, 0);
3327 mtx_unlock(&txr->hn_tx_lock);
3331 hn_start_locked(struct hn_tx_ring *txr, int len)
3333 struct hn_softc *sc = txr->hn_sc;
3334 struct ifnet *ifp = sc->hn_ifp;
3336 KASSERT(hn_use_if_start,
3337 ("hn_start_locked is called, when if_start is disabled"));
3338 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
3339 mtx_assert(&txr->hn_tx_lock, MA_OWNED);
3341 if (__predict_false(txr->hn_suspended))
3344 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
3348 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
3349 struct hn_txdesc *txd;
3350 struct mbuf *m_head;
3353 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
3357 if (len > 0 && m_head->m_pkthdr.len > len) {
3359 * This sending could be time consuming; let callers
3360 * dispatch this packet sending (and sending of any
3361 * following up packets) to tx taskqueue.
3363 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
3367 #if defined(INET6) || defined(INET)
3368 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
3369 m_head = hn_tso_fixup(m_head);
3370 if (__predict_false(m_head == NULL)) {
3371 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
3377 txd = hn_txdesc_get(txr);
3379 txr->hn_no_txdescs++;
3380 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
3381 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
3385 error = hn_encap(txr, txd, &m_head);
3387 /* Both txd and m_head are freed */
3391 error = hn_txpkt(ifp, txr, txd);
3392 if (__predict_false(error)) {
3393 /* txd is freed, but m_head is not */
3394 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
3395 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
3403 hn_start(struct ifnet *ifp)
3405 struct hn_softc *sc = ifp->if_softc;
3406 struct hn_tx_ring *txr = &sc->hn_tx_ring[0];
3408 if (txr->hn_sched_tx)
3411 if (mtx_trylock(&txr->hn_tx_lock)) {
3414 sched = hn_start_locked(txr, txr->hn_direct_tx_size);
3415 mtx_unlock(&txr->hn_tx_lock);
3420 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
3424 hn_start_txeof_taskfunc(void *xtxr, int pending __unused)
3426 struct hn_tx_ring *txr = xtxr;
3428 mtx_lock(&txr->hn_tx_lock);
3429 atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE);
3430 hn_start_locked(txr, 0);
3431 mtx_unlock(&txr->hn_tx_lock);
3435 hn_start_txeof(struct hn_tx_ring *txr)
3437 struct hn_softc *sc = txr->hn_sc;
3438 struct ifnet *ifp = sc->hn_ifp;
3440 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
3442 if (txr->hn_sched_tx)
3445 if (mtx_trylock(&txr->hn_tx_lock)) {
3448 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
3449 sched = hn_start_locked(txr, txr->hn_direct_tx_size);
3450 mtx_unlock(&txr->hn_tx_lock);
3452 taskqueue_enqueue(txr->hn_tx_taskq,
3458 * Release the OACTIVE earlier, with the hope, that
3459 * others could catch up. The task will clear the
3460 * flag again with the hn_tx_lock to avoid possible
3463 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
3464 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
3468 #endif /* HN_IFSTART_SUPPORT */
3471 hn_xmit(struct hn_tx_ring *txr, int len)
3473 struct hn_softc *sc = txr->hn_sc;
3474 struct ifnet *ifp = sc->hn_ifp;
3475 struct mbuf *m_head;
3477 mtx_assert(&txr->hn_tx_lock, MA_OWNED);
3478 #ifdef HN_IFSTART_SUPPORT
3479 KASSERT(hn_use_if_start == 0,
3480 ("hn_xmit is called, when if_start is enabled"));
3483 if (__predict_false(txr->hn_suspended))
3486 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive)
3489 while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) {
3490 struct hn_txdesc *txd;
3493 if (len > 0 && m_head->m_pkthdr.len > len) {
3495 * This sending could be time consuming; let callers
3496 * dispatch this packet sending (and sending of any
3497 * following up packets) to tx taskqueue.
3499 drbr_putback(ifp, txr->hn_mbuf_br, m_head);
3503 txd = hn_txdesc_get(txr);
3505 txr->hn_no_txdescs++;
3506 drbr_putback(ifp, txr->hn_mbuf_br, m_head);
3507 txr->hn_oactive = 1;
3511 error = hn_encap(txr, txd, &m_head);
3513 /* Both txd and m_head are freed; discard */
3514 drbr_advance(ifp, txr->hn_mbuf_br);
3518 error = hn_txpkt(ifp, txr, txd);
3519 if (__predict_false(error)) {
3520 /* txd is freed, but m_head is not */
3521 drbr_putback(ifp, txr->hn_mbuf_br, m_head);
3522 txr->hn_oactive = 1;
3527 drbr_advance(ifp, txr->hn_mbuf_br);
3533 hn_transmit(struct ifnet *ifp, struct mbuf *m)
3535 struct hn_softc *sc = ifp->if_softc;
3536 struct hn_tx_ring *txr;
3539 #if defined(INET6) || defined(INET)
3541 * Perform TSO packet header fixup now, since the TSO
3542 * packet header should be cache-hot.
3544 if (m->m_pkthdr.csum_flags & CSUM_TSO) {
3545 m = hn_tso_fixup(m);
3546 if (__predict_false(m == NULL)) {
3547 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
3554 * Select the TX ring based on flowid
3556 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
3557 idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse;
3558 txr = &sc->hn_tx_ring[idx];
3560 error = drbr_enqueue(ifp, txr->hn_mbuf_br, m);
3562 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
3566 if (txr->hn_oactive)
3569 if (txr->hn_sched_tx)
3572 if (mtx_trylock(&txr->hn_tx_lock)) {
3575 sched = hn_xmit(txr, txr->hn_direct_tx_size);
3576 mtx_unlock(&txr->hn_tx_lock);
3581 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
3586 hn_tx_ring_qflush(struct hn_tx_ring *txr)
3590 mtx_lock(&txr->hn_tx_lock);
3591 while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL)
3593 mtx_unlock(&txr->hn_tx_lock);
3597 hn_xmit_qflush(struct ifnet *ifp)
3599 struct hn_softc *sc = ifp->if_softc;
3602 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
3603 hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
3608 hn_xmit_txeof(struct hn_tx_ring *txr)
3611 if (txr->hn_sched_tx)
3614 if (mtx_trylock(&txr->hn_tx_lock)) {
3617 txr->hn_oactive = 0;
3618 sched = hn_xmit(txr, txr->hn_direct_tx_size);
3619 mtx_unlock(&txr->hn_tx_lock);
3621 taskqueue_enqueue(txr->hn_tx_taskq,
3627 * Release the oactive earlier, with the hope, that
3628 * others could catch up. The task will clear the
3629 * oactive again with the hn_tx_lock to avoid possible
3632 txr->hn_oactive = 0;
3633 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
3638 hn_xmit_taskfunc(void *xtxr, int pending __unused)
3640 struct hn_tx_ring *txr = xtxr;
3642 mtx_lock(&txr->hn_tx_lock);
3644 mtx_unlock(&txr->hn_tx_lock);
3648 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused)
3650 struct hn_tx_ring *txr = xtxr;
3652 mtx_lock(&txr->hn_tx_lock);
3653 txr->hn_oactive = 0;
3655 mtx_unlock(&txr->hn_tx_lock);
3659 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan)
3661 struct vmbus_chan_br cbr;
3662 struct hn_rx_ring *rxr;
3663 struct hn_tx_ring *txr = NULL;
3666 idx = vmbus_chan_subidx(chan);
3669 * Link this channel to RX/TX ring.
3671 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
3672 ("invalid channel index %d, should > 0 && < %d",
3673 idx, sc->hn_rx_ring_inuse));
3674 rxr = &sc->hn_rx_ring[idx];
3675 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0,
3676 ("RX ring %d already attached", idx));
3677 rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED;
3680 if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n",
3681 idx, vmbus_chan_id(chan));
3684 if (idx < sc->hn_tx_ring_inuse) {
3685 txr = &sc->hn_tx_ring[idx];
3686 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0,
3687 ("TX ring %d already attached", idx));
3688 txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED;
3690 txr->hn_chan = chan;
3692 if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n",
3693 idx, vmbus_chan_id(chan));
3697 /* Bind this channel to a proper CPU. */
3698 vmbus_chan_cpu_set(chan, (sc->hn_cpu + idx) % mp_ncpus);
3703 cbr.cbr = rxr->hn_br;
3704 cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr;
3705 cbr.cbr_txsz = HN_TXBR_SIZE;
3706 cbr.cbr_rxsz = HN_RXBR_SIZE;
3707 error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr);
3709 if_printf(sc->hn_ifp, "open chan%u failed: %d\n",
3710 vmbus_chan_id(chan), error);
3711 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED;
3713 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED;
3719 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan)
3721 struct hn_rx_ring *rxr;
3724 idx = vmbus_chan_subidx(chan);
3727 * Link this channel to RX/TX ring.
3729 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
3730 ("invalid channel index %d, should > 0 && < %d",
3731 idx, sc->hn_rx_ring_inuse));
3732 rxr = &sc->hn_rx_ring[idx];
3733 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED),
3734 ("RX ring %d is not attached", idx));
3735 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED;
3737 if (idx < sc->hn_tx_ring_inuse) {
3738 struct hn_tx_ring *txr = &sc->hn_tx_ring[idx];
3740 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED),
3741 ("TX ring %d is not attached attached", idx));
3742 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED;
3746 * Close this channel.
3749 * Channel closing does _not_ destroy the target channel.
3751 vmbus_chan_close(chan);
3755 hn_attach_subchans(struct hn_softc *sc)
3757 struct vmbus_channel **subchans;
3758 int subchan_cnt = sc->hn_rx_ring_inuse - 1;
3761 if (subchan_cnt == 0)
3764 /* Attach the sub-channels. */
3765 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
3766 for (i = 0; i < subchan_cnt; ++i) {
3767 error = hn_chan_attach(sc, subchans[i]);
3771 vmbus_subchan_rel(subchans, subchan_cnt);
3774 if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error);
3777 if_printf(sc->hn_ifp, "%d sub-channels attached\n",
3785 hn_detach_allchans(struct hn_softc *sc)
3787 struct vmbus_channel **subchans;
3788 int subchan_cnt = sc->hn_rx_ring_inuse - 1;
3791 if (subchan_cnt == 0)
3794 /* Detach the sub-channels. */
3795 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
3796 for (i = 0; i < subchan_cnt; ++i)
3797 hn_chan_detach(sc, subchans[i]);
3798 vmbus_subchan_rel(subchans, subchan_cnt);
3802 * Detach the primary channel, _after_ all sub-channels
3805 hn_chan_detach(sc, sc->hn_prichan);
3807 /* Wait for sub-channels to be destroyed, if any. */
3808 vmbus_subchan_drain(sc->hn_prichan);
3811 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
3812 KASSERT((sc->hn_rx_ring[i].hn_rx_flags &
3813 HN_RX_FLAG_ATTACHED) == 0,
3814 ("%dth RX ring is still attached", i));
3816 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
3817 KASSERT((sc->hn_tx_ring[i].hn_tx_flags &
3818 HN_TX_FLAG_ATTACHED) == 0,
3819 ("%dth TX ring is still attached", i));
3825 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch)
3827 struct vmbus_channel **subchans;
3828 int nchan, rxr_cnt, error;
3830 nchan = *nsubch + 1;
3833 * Multiple RX/TX rings are not requested.
3840 * Query RSS capabilities, e.g. # of RX rings, and # of indirect
3843 error = hn_rndis_query_rsscaps(sc, &rxr_cnt);
3845 /* No RSS; this is benign. */
3850 if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n",
3854 if (nchan > rxr_cnt)
3857 if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n");
3863 * Allocate sub-channels from NVS.
3865 *nsubch = nchan - 1;
3866 error = hn_nvs_alloc_subchans(sc, nsubch);
3867 if (error || *nsubch == 0) {
3868 /* Failed to allocate sub-channels. */
3874 * Wait for all sub-channels to become ready before moving on.
3876 subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch);
3877 vmbus_subchan_rel(subchans, *nsubch);
3882 hn_synth_attach(struct hn_softc *sc, int mtu)
3884 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
3885 int error, nsubch, nchan, i;
3888 KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0,
3889 ("synthetic parts were attached"));
3891 /* Save capabilities for later verification. */
3892 old_caps = sc->hn_caps;
3895 /* Clear RSS stuffs. */
3896 sc->hn_rss_ind_size = 0;
3897 sc->hn_rss_hash = 0;
3900 * Attach the primary channel _before_ attaching NVS and RNDIS.
3902 error = hn_chan_attach(sc, sc->hn_prichan);
3909 error = hn_nvs_attach(sc, mtu);
3914 * Attach RNDIS _after_ NVS is attached.
3916 error = hn_rndis_attach(sc, mtu);
3921 * Make sure capabilities are not changed.
3923 if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) {
3924 if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n",
3925 old_caps, sc->hn_caps);
3926 /* Restore old capabilities and abort. */
3927 sc->hn_caps = old_caps;
3932 * Allocate sub-channels for multi-TX/RX rings.
3935 * The # of RX rings that can be used is equivalent to the # of
3936 * channels to be requested.
3938 nsubch = sc->hn_rx_ring_cnt - 1;
3939 error = hn_synth_alloc_subchans(sc, &nsubch);
3945 /* Only the primary channel can be used; done */
3950 * Configure RSS key and indirect table _after_ all sub-channels
3954 if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) {
3956 * RSS key is not set yet; set it to the default RSS key.
3959 if_printf(sc->hn_ifp, "setup default RSS key\n");
3960 memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key));
3961 sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
3964 if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) {
3966 * RSS indirect table is not set yet; set it up in round-
3970 if_printf(sc->hn_ifp, "setup default RSS indirect "
3973 for (i = 0; i < NDIS_HASH_INDCNT; ++i)
3974 rss->rss_ind[i] = i % nchan;
3975 sc->hn_flags |= HN_FLAG_HAS_RSSIND;
3978 * # of usable channels may be changed, so we have to
3979 * make sure that all entries in RSS indirect table
3982 hn_rss_ind_fixup(sc, nchan);
3985 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
3988 * Failed to configure RSS key or indirect table; only
3989 * the primary channel can be used.
3995 * Set the # of TX/RX rings that could be used according to
3996 * the # of channels that NVS offered.
3998 hn_set_ring_inuse(sc, nchan);
4001 * Attach the sub-channels, if any.
4003 error = hn_attach_subchans(sc);
4007 sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED;
4013 * The interface must have been suspended though hn_suspend(), before
4014 * this function get called.
4017 hn_synth_detach(struct hn_softc *sc)
4021 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
4022 ("synthetic parts were not attached"));
4024 /* Detach the RNDIS first. */
4025 hn_rndis_detach(sc);
4030 /* Detach all of the channels. */
4031 hn_detach_allchans(sc);
4033 sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED;
4037 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt)
4039 KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt,
4040 ("invalid ring count %d", ring_cnt));
4042 if (sc->hn_tx_ring_cnt > ring_cnt)
4043 sc->hn_tx_ring_inuse = ring_cnt;
4045 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
4046 sc->hn_rx_ring_inuse = ring_cnt;
4049 if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n",
4050 sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse);
4055 hn_chan_drain(struct vmbus_channel *chan)
4058 while (!vmbus_chan_rx_empty(chan) || !vmbus_chan_tx_empty(chan))
4060 vmbus_chan_intr_drain(chan);
4064 hn_suspend_data(struct hn_softc *sc)
4066 struct vmbus_channel **subch = NULL;
4074 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
4075 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
4077 mtx_lock(&txr->hn_tx_lock);
4078 txr->hn_suspended = 1;
4079 mtx_unlock(&txr->hn_tx_lock);
4080 /* No one is able send more packets now. */
4082 /* Wait for all pending sends to finish. */
4083 while (hn_tx_ring_pending(txr))
4084 pause("hnwtx", 1 /* 1 tick */);
4086 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task);
4087 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task);
4091 * Disable RX by clearing RX filter.
4093 sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE;
4094 hn_rndis_set_rxfilter(sc, sc->hn_rx_filter);
4097 * Give RNDIS enough time to flush all pending data packets.
4099 pause("waitrx", (200 * hz) / 1000);
4102 * Drain RX/TX bufrings and interrupts.
4104 nsubch = sc->hn_rx_ring_inuse - 1;
4106 subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
4108 if (subch != NULL) {
4109 for (i = 0; i < nsubch; ++i)
4110 hn_chan_drain(subch[i]);
4112 hn_chan_drain(sc->hn_prichan);
4115 vmbus_subchan_rel(subch, nsubch);
4119 hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused)
4122 ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL;
4126 hn_suspend_mgmt(struct hn_softc *sc)
4133 * Make sure that hn_mgmt_taskq0 can nolonger be accessed
4134 * through hn_mgmt_taskq.
4136 TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc);
4137 vmbus_chan_run_task(sc->hn_prichan, &task);
4140 * Make sure that all pending management tasks are completed.
4142 taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init);
4143 taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status);
4144 taskqueue_drain_all(sc->hn_mgmt_taskq0);
4148 hn_suspend(struct hn_softc *sc)
4151 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING)
4152 hn_suspend_data(sc);
4153 hn_suspend_mgmt(sc);
4157 hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt)
4161 KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt,
4162 ("invalid TX ring count %d", tx_ring_cnt));
4164 for (i = 0; i < tx_ring_cnt; ++i) {
4165 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
4167 mtx_lock(&txr->hn_tx_lock);
4168 txr->hn_suspended = 0;
4169 mtx_unlock(&txr->hn_tx_lock);
4174 hn_resume_data(struct hn_softc *sc)
4183 hn_set_rxfilter(sc);
4186 * Make sure to clear suspend status on "all" TX rings,
4187 * since hn_tx_ring_inuse can be changed after
4188 * hn_suspend_data().
4190 hn_resume_tx(sc, sc->hn_tx_ring_cnt);
4192 #ifdef HN_IFSTART_SUPPORT
4193 if (!hn_use_if_start)
4197 * Flush unused drbrs, since hn_tx_ring_inuse may be
4200 for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i)
4201 hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
4207 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
4208 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
4211 * Use txeof task, so that any pending oactive can be
4214 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
4219 hn_resume_mgmt(struct hn_softc *sc)
4222 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
4225 * Kick off network change detection, if it was pending.
4226 * If no network change was pending, start link status
4227 * checks, which is more lightweight than network change
4230 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
4231 hn_change_network(sc);
4233 hn_update_link_status(sc);
4237 hn_resume(struct hn_softc *sc)
4240 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING)
4246 hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen)
4248 const struct rndis_status_msg *msg;
4251 if (dlen < sizeof(*msg)) {
4252 if_printf(sc->hn_ifp, "invalid RNDIS status\n");
4257 switch (msg->rm_status) {
4258 case RNDIS_STATUS_MEDIA_CONNECT:
4259 case RNDIS_STATUS_MEDIA_DISCONNECT:
4260 hn_update_link_status(sc);
4263 case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG:
4264 /* Not really useful; ignore. */
4267 case RNDIS_STATUS_NETWORK_CHANGE:
4268 ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset);
4269 if (dlen < ofs + msg->rm_stbuflen ||
4270 msg->rm_stbuflen < sizeof(uint32_t)) {
4271 if_printf(sc->hn_ifp, "network changed\n");
4275 memcpy(&change, ((const uint8_t *)msg) + ofs,
4277 if_printf(sc->hn_ifp, "network changed, change %u\n",
4280 hn_change_network(sc);
4284 if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n",
4291 hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info)
4293 const struct rndis_pktinfo *pi = info_data;
4296 while (info_dlen != 0) {
4300 if (__predict_false(info_dlen < sizeof(*pi)))
4302 if (__predict_false(info_dlen < pi->rm_size))
4304 info_dlen -= pi->rm_size;
4306 if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK))
4308 if (__predict_false(pi->rm_size < pi->rm_pktinfooffset))
4310 dlen = pi->rm_size - pi->rm_pktinfooffset;
4313 switch (pi->rm_type) {
4314 case NDIS_PKTINFO_TYPE_VLAN:
4315 if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE))
4317 info->vlan_info = *((const uint32_t *)data);
4318 mask |= HN_RXINFO_VLAN;
4321 case NDIS_PKTINFO_TYPE_CSUM:
4322 if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE))
4324 info->csum_info = *((const uint32_t *)data);
4325 mask |= HN_RXINFO_CSUM;
4328 case HN_NDIS_PKTINFO_TYPE_HASHVAL:
4329 if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE))
4331 info->hash_value = *((const uint32_t *)data);
4332 mask |= HN_RXINFO_HASHVAL;
4335 case HN_NDIS_PKTINFO_TYPE_HASHINF:
4336 if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE))
4338 info->hash_info = *((const uint32_t *)data);
4339 mask |= HN_RXINFO_HASHINF;
4346 if (mask == HN_RXINFO_ALL) {
4347 /* All found; done */
4351 pi = (const struct rndis_pktinfo *)
4352 ((const uint8_t *)pi + pi->rm_size);
4357 * - If there is no hash value, invalidate the hash info.
4359 if ((mask & HN_RXINFO_HASHVAL) == 0)
4360 info->hash_info = HN_NDIS_HASH_INFO_INVALID;
4364 static __inline bool
4365 hn_rndis_check_overlap(int off, int len, int check_off, int check_len)
4368 if (off < check_off) {
4369 if (__predict_true(off + len <= check_off))
4371 } else if (off > check_off) {
4372 if (__predict_true(check_off + check_len <= off))
4379 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen)
4381 const struct rndis_packet_msg *pkt;
4382 struct hn_rxinfo info;
4383 int data_off, pktinfo_off, data_len, pktinfo_len;
4388 if (__predict_false(dlen < sizeof(*pkt))) {
4389 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n");
4394 if (__predict_false(dlen < pkt->rm_len)) {
4395 if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, "
4396 "dlen %d, msglen %u\n", dlen, pkt->rm_len);
4399 if (__predict_false(pkt->rm_len <
4400 pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) {
4401 if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, "
4402 "msglen %u, data %u, oob %u, pktinfo %u\n",
4403 pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen,
4404 pkt->rm_pktinfolen);
4407 if (__predict_false(pkt->rm_datalen == 0)) {
4408 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n");
4415 #define IS_OFFSET_INVALID(ofs) \
4416 ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \
4417 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK))
4419 /* XXX Hyper-V does not meet data offset alignment requirement */
4420 if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) {
4421 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
4422 "data offset %u\n", pkt->rm_dataoffset);
4425 if (__predict_false(pkt->rm_oobdataoffset > 0 &&
4426 IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) {
4427 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
4428 "oob offset %u\n", pkt->rm_oobdataoffset);
4431 if (__predict_true(pkt->rm_pktinfooffset > 0) &&
4432 __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) {
4433 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
4434 "pktinfo offset %u\n", pkt->rm_pktinfooffset);
4438 #undef IS_OFFSET_INVALID
4440 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset);
4441 data_len = pkt->rm_datalen;
4442 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset);
4443 pktinfo_len = pkt->rm_pktinfolen;
4446 * Check OOB coverage.
4448 if (__predict_false(pkt->rm_oobdatalen != 0)) {
4449 int oob_off, oob_len;
4451 if_printf(rxr->hn_ifp, "got oobdata\n");
4452 oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset);
4453 oob_len = pkt->rm_oobdatalen;
4455 if (__predict_false(oob_off + oob_len > pkt->rm_len)) {
4456 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
4457 "oob overflow, msglen %u, oob abs %d len %d\n",
4458 pkt->rm_len, oob_off, oob_len);
4463 * Check against data.
4465 if (hn_rndis_check_overlap(oob_off, oob_len,
4466 data_off, data_len)) {
4467 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
4468 "oob overlaps data, oob abs %d len %d, "
4469 "data abs %d len %d\n",
4470 oob_off, oob_len, data_off, data_len);
4475 * Check against pktinfo.
4477 if (pktinfo_len != 0 &&
4478 hn_rndis_check_overlap(oob_off, oob_len,
4479 pktinfo_off, pktinfo_len)) {
4480 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
4481 "oob overlaps pktinfo, oob abs %d len %d, "
4482 "pktinfo abs %d len %d\n",
4483 oob_off, oob_len, pktinfo_off, pktinfo_len);
4489 * Check per-packet-info coverage and find useful per-packet-info.
4491 info.vlan_info = HN_NDIS_VLAN_INFO_INVALID;
4492 info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID;
4493 info.hash_info = HN_NDIS_HASH_INFO_INVALID;
4494 if (__predict_true(pktinfo_len != 0)) {
4498 if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) {
4499 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
4500 "pktinfo overflow, msglen %u, "
4501 "pktinfo abs %d len %d\n",
4502 pkt->rm_len, pktinfo_off, pktinfo_len);
4507 * Check packet info coverage.
4509 overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len,
4510 data_off, data_len);
4511 if (__predict_false(overlap)) {
4512 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
4513 "pktinfo overlap data, pktinfo abs %d len %d, "
4514 "data abs %d len %d\n",
4515 pktinfo_off, pktinfo_len, data_off, data_len);
4520 * Find useful per-packet-info.
4522 error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off,
4523 pktinfo_len, &info);
4524 if (__predict_false(error)) {
4525 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg "
4531 if (__predict_false(data_off + data_len > pkt->rm_len)) {
4532 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
4533 "data overflow, msglen %u, data abs %d len %d\n",
4534 pkt->rm_len, data_off, data_len);
4537 hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info);
4540 static __inline void
4541 hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen)
4543 const struct rndis_msghdr *hdr;
4545 if (__predict_false(dlen < sizeof(*hdr))) {
4546 if_printf(rxr->hn_ifp, "invalid RNDIS msg\n");
4551 if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) {
4552 /* Hot data path. */
4553 hn_rndis_rx_data(rxr, data, dlen);
4558 if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG)
4559 hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen);
4561 hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen);
4565 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt)
4567 const struct hn_nvs_hdr *hdr;
4569 if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) {
4570 if_printf(sc->hn_ifp, "invalid nvs notify\n");
4573 hdr = VMBUS_CHANPKT_CONST_DATA(pkt);
4575 if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) {
4576 /* Useless; ignore */
4579 if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type);
4583 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan,
4584 const struct vmbus_chanpkt_hdr *pkt)
4586 struct hn_nvs_sendctx *sndc;
4588 sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid;
4589 sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt),
4590 VMBUS_CHANPKT_DATALEN(pkt));
4593 * 'sndc' CAN NOT be accessed anymore, since it can be freed by
4599 hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
4600 const struct vmbus_chanpkt_hdr *pkthdr)
4602 const struct vmbus_chanpkt_rxbuf *pkt;
4603 const struct hn_nvs_hdr *nvs_hdr;
4606 if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) {
4607 if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n");
4610 nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr);
4612 /* Make sure that this is a RNDIS message. */
4613 if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) {
4614 if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n",
4619 hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen);
4620 if (__predict_false(hlen < sizeof(*pkt))) {
4621 if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n");
4624 pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr;
4626 if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) {
4627 if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n",
4632 count = pkt->cp_rxbuf_cnt;
4633 if (__predict_false(hlen <
4634 __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) {
4635 if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count);
4639 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
4640 for (i = 0; i < count; ++i) {
4643 ofs = pkt->cp_rxbuf[i].rb_ofs;
4644 len = pkt->cp_rxbuf[i].rb_len;
4645 if (__predict_false(ofs + len > HN_RXBUF_SIZE)) {
4646 if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, "
4647 "ofs %d, len %d\n", i, ofs, len);
4650 hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len);
4654 * Ack the consumed RXBUF associated w/ this channel packet,
4655 * so that this RXBUF can be recycled by the hypervisor.
4657 hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid);
4661 hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
4664 struct hn_nvs_rndis_ack ack;
4667 ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK;
4668 ack.nvs_status = HN_NVS_STATUS_OK;
4672 error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP,
4673 VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid);
4674 if (__predict_false(error == EAGAIN)) {
4677 * This should _not_ happen in real world, since the
4678 * consumption of the TX bufring from the TX path is
4681 if (rxr->hn_ack_failed == 0)
4682 if_printf(rxr->hn_ifp, "RXBUF ack retry\n");
4683 rxr->hn_ack_failed++;
4690 if_printf(rxr->hn_ifp, "RXBUF ack failed\n");
4695 hn_chan_callback(struct vmbus_channel *chan, void *xrxr)
4697 struct hn_rx_ring *rxr = xrxr;
4698 struct hn_softc *sc = rxr->hn_ifp->if_softc;
4701 struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf;
4704 pktlen = rxr->hn_pktbuf_len;
4705 error = vmbus_chan_recv_pkt(chan, pkt, &pktlen);
4706 if (__predict_false(error == ENOBUFS)) {
4711 * Expand channel packet buffer.
4714 * Use M_WAITOK here, since allocation failure
4717 nlen = rxr->hn_pktbuf_len * 2;
4718 while (nlen < pktlen)
4720 nbuf = malloc(nlen, M_DEVBUF, M_WAITOK);
4722 if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n",
4723 rxr->hn_pktbuf_len, nlen);
4725 free(rxr->hn_pktbuf, M_DEVBUF);
4726 rxr->hn_pktbuf = nbuf;
4727 rxr->hn_pktbuf_len = nlen;
4730 } else if (__predict_false(error == EAGAIN)) {
4731 /* No more channel packets; done! */
4734 KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error));
4736 switch (pkt->cph_type) {
4737 case VMBUS_CHANPKT_TYPE_COMP:
4738 hn_nvs_handle_comp(sc, chan, pkt);
4741 case VMBUS_CHANPKT_TYPE_RXBUF:
4742 hn_nvs_handle_rxbuf(rxr, chan, pkt);
4745 case VMBUS_CHANPKT_TYPE_INBAND:
4746 hn_nvs_handle_notify(sc, pkt);
4750 if_printf(rxr->hn_ifp, "unknown chan pkt %u\n",
4755 hn_chan_rollup(rxr, rxr->hn_txr);
4759 hn_tx_taskq_create(void *arg __unused)
4762 if (vm_guest != VM_GUEST_HV)
4765 if (!hn_share_tx_taskq)
4768 hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK,
4769 taskqueue_thread_enqueue, &hn_tx_taskq);
4770 taskqueue_start_threads(&hn_tx_taskq, 1, PI_NET, "hn tx");
4771 if (hn_bind_tx_taskq >= 0) {
4772 int cpu = hn_bind_tx_taskq;
4773 struct task cpuset_task;
4776 if (cpu > mp_ncpus - 1)
4778 CPU_SETOF(cpu, &cpu_set);
4779 TASK_INIT(&cpuset_task, 0, hn_cpuset_setthread_task, &cpu_set);
4780 taskqueue_enqueue(hn_tx_taskq, &cpuset_task);
4781 taskqueue_drain(hn_tx_taskq, &cpuset_task);
4784 SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_SECOND,
4785 hn_tx_taskq_create, NULL);
4788 hn_tx_taskq_destroy(void *arg __unused)
4791 if (hn_tx_taskq != NULL)
4792 taskqueue_free(hn_tx_taskq);
4794 SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_SECOND,
4795 hn_tx_taskq_destroy, NULL);