2 * Copyright (c) 2010-2012 Citrix Inc.
3 * Copyright (c) 2009-2012,2016 Microsoft Corp.
4 * Copyright (c) 2012 NetApp Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * Copyright (c) 2004-2006 Kip Macy
31 * All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 #include <sys/cdefs.h>
56 __FBSDID("$FreeBSD$");
58 #include "opt_inet6.h"
61 #include <sys/param.h>
63 #include <sys/kernel.h>
64 #include <sys/limits.h>
65 #include <sys/malloc.h>
67 #include <sys/module.h>
69 #include <sys/queue.h>
72 #include <sys/socket.h>
73 #include <sys/sockio.h>
75 #include <sys/sysctl.h>
76 #include <sys/systm.h>
77 #include <sys/taskqueue.h>
78 #include <sys/buf_ring.h>
80 #include <machine/atomic.h>
81 #include <machine/in_cksum.h>
84 #include <net/ethernet.h>
86 #include <net/if_arp.h>
87 #include <net/if_media.h>
88 #include <net/if_types.h>
89 #include <net/if_var.h>
90 #include <net/if_vlan_var.h>
91 #include <net/rndis.h>
93 #include <netinet/in_systm.h>
94 #include <netinet/in.h>
95 #include <netinet/ip.h>
96 #include <netinet/ip6.h>
97 #include <netinet/tcp.h>
98 #include <netinet/tcp_lro.h>
99 #include <netinet/udp.h>
101 #include <dev/hyperv/include/hyperv.h>
102 #include <dev/hyperv/include/hyperv_busdma.h>
103 #include <dev/hyperv/include/vmbus.h>
104 #include <dev/hyperv/include/vmbus_xact.h>
106 #include <dev/hyperv/netvsc/ndis.h>
107 #include <dev/hyperv/netvsc/if_hnreg.h>
108 #include <dev/hyperv/netvsc/if_hnvar.h>
109 #include <dev/hyperv/netvsc/hn_nvs.h>
110 #include <dev/hyperv/netvsc/hn_rndis.h>
112 #include "vmbus_if.h"
114 #define HN_IFSTART_SUPPORT
116 #define HN_RING_CNT_DEF_MAX 8
118 /* YYY should get it from the underlying channel */
119 #define HN_TX_DESC_CNT 512
121 #define HN_RNDIS_PKT_LEN \
122 (sizeof(struct rndis_packet_msg) + \
123 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \
124 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \
125 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \
126 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE))
127 #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE
128 #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE
130 #define HN_TX_DATA_BOUNDARY PAGE_SIZE
131 #define HN_TX_DATA_MAXSIZE IP_MAXPACKET
132 #define HN_TX_DATA_SEGSIZE PAGE_SIZE
133 /* -1 for RNDIS packet message */
134 #define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1)
136 #define HN_DIRECT_TX_SIZE_DEF 128
138 #define HN_EARLY_TXEOF_THRESH 8
140 #define HN_PKTBUF_LEN_DEF (16 * 1024)
142 #define HN_LROENT_CNT_DEF 128
144 #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU)
145 #define HN_LRO_LENLIM_DEF (25 * ETHERMTU)
146 /* YYY 2*MTU is a bit rough, but should be good enough. */
147 #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu)
149 #define HN_LRO_ACKCNT_DEF 1
151 #define HN_LOCK_INIT(sc) \
152 sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev))
153 #define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock)
154 #define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED)
155 #define HN_LOCK(sc) sx_xlock(&(sc)->hn_lock)
156 #define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock)
158 #define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP)
159 #define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP)
160 #define HN_CSUM_IP_HWASSIST(sc) \
161 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK)
162 #define HN_CSUM_IP6_HWASSIST(sc) \
163 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK)
166 #ifndef HN_USE_TXDESC_BUFRING
167 SLIST_ENTRY(hn_txdesc) link;
170 struct hn_tx_ring *txr;
172 uint32_t flags; /* HN_TXD_FLAG_ */
173 struct hn_nvs_sendctx send_ctx;
177 bus_dmamap_t data_dmap;
179 bus_addr_t rndis_pkt_paddr;
180 struct rndis_packet_msg *rndis_pkt;
181 bus_dmamap_t rndis_pkt_dmap;
184 #define HN_TXD_FLAG_ONLIST 0x0001
185 #define HN_TXD_FLAG_DMAMAP 0x0002
194 #define HN_RXINFO_VLAN 0x0001
195 #define HN_RXINFO_CSUM 0x0002
196 #define HN_RXINFO_HASHINF 0x0004
197 #define HN_RXINFO_HASHVAL 0x0008
198 #define HN_RXINFO_ALL \
201 HN_RXINFO_HASHINF | \
204 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff
205 #define HN_NDIS_RXCSUM_INFO_INVALID 0
206 #define HN_NDIS_HASH_INFO_INVALID 0
208 static int hn_probe(device_t);
209 static int hn_attach(device_t);
210 static int hn_detach(device_t);
211 static int hn_shutdown(device_t);
212 static void hn_chan_callback(struct vmbus_channel *,
215 static void hn_init(void *);
216 static int hn_ioctl(struct ifnet *, u_long, caddr_t);
217 #ifdef HN_IFSTART_SUPPORT
218 static void hn_start(struct ifnet *);
220 static int hn_transmit(struct ifnet *, struct mbuf *);
221 static void hn_xmit_qflush(struct ifnet *);
222 static int hn_ifmedia_upd(struct ifnet *);
223 static void hn_ifmedia_sts(struct ifnet *,
224 struct ifmediareq *);
226 static int hn_rndis_rxinfo(const void *, int,
228 static void hn_rndis_rx_data(struct hn_rx_ring *,
230 static void hn_rndis_rx_status(struct hn_softc *,
233 static void hn_nvs_handle_notify(struct hn_softc *,
234 const struct vmbus_chanpkt_hdr *);
235 static void hn_nvs_handle_comp(struct hn_softc *,
236 struct vmbus_channel *,
237 const struct vmbus_chanpkt_hdr *);
238 static void hn_nvs_handle_rxbuf(struct hn_rx_ring *,
239 struct vmbus_channel *,
240 const struct vmbus_chanpkt_hdr *);
241 static void hn_nvs_ack_rxbuf(struct hn_rx_ring *,
242 struct vmbus_channel *, uint64_t);
244 #if __FreeBSD_version >= 1100099
245 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS);
246 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS);
248 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS);
249 static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS);
250 #if __FreeBSD_version < 1100095
251 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS);
253 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS);
255 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
256 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
257 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS);
258 static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS);
259 static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS);
260 static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS);
261 static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS);
262 static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS);
263 static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS);
264 static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS);
266 static void hn_stop(struct hn_softc *);
267 static void hn_init_locked(struct hn_softc *);
268 static int hn_chan_attach(struct hn_softc *,
269 struct vmbus_channel *);
270 static void hn_chan_detach(struct hn_softc *,
271 struct vmbus_channel *);
272 static int hn_attach_subchans(struct hn_softc *);
273 static void hn_detach_allchans(struct hn_softc *);
274 static void hn_chan_rollup(struct hn_rx_ring *,
275 struct hn_tx_ring *);
276 static void hn_set_ring_inuse(struct hn_softc *, int);
277 static int hn_synth_attach(struct hn_softc *, int);
278 static void hn_synth_detach(struct hn_softc *);
279 static int hn_synth_alloc_subchans(struct hn_softc *,
281 static void hn_suspend(struct hn_softc *);
282 static void hn_suspend_data(struct hn_softc *);
283 static void hn_suspend_mgmt(struct hn_softc *);
284 static void hn_resume(struct hn_softc *);
285 static void hn_resume_data(struct hn_softc *);
286 static void hn_resume_mgmt(struct hn_softc *);
287 static void hn_suspend_mgmt_taskfunc(void *, int);
288 static void hn_chan_drain(struct vmbus_channel *);
290 static void hn_update_link_status(struct hn_softc *);
291 static void hn_change_network(struct hn_softc *);
292 static void hn_link_taskfunc(void *, int);
293 static void hn_netchg_init_taskfunc(void *, int);
294 static void hn_netchg_status_taskfunc(void *, int);
295 static void hn_link_status(struct hn_softc *);
297 static int hn_create_rx_data(struct hn_softc *, int);
298 static void hn_destroy_rx_data(struct hn_softc *);
299 static int hn_check_iplen(const struct mbuf *, int);
300 static int hn_set_rxfilter(struct hn_softc *);
301 static int hn_rss_reconfig(struct hn_softc *);
302 static void hn_rss_ind_fixup(struct hn_softc *, int);
303 static int hn_rxpkt(struct hn_rx_ring *, const void *,
304 int, const struct hn_rxinfo *);
306 static int hn_tx_ring_create(struct hn_softc *, int);
307 static void hn_tx_ring_destroy(struct hn_tx_ring *);
308 static int hn_create_tx_data(struct hn_softc *, int);
309 static void hn_fixup_tx_data(struct hn_softc *);
310 static void hn_destroy_tx_data(struct hn_softc *);
311 static void hn_txdesc_dmamap_destroy(struct hn_txdesc *);
312 static int hn_encap(struct hn_tx_ring *,
313 struct hn_txdesc *, struct mbuf **);
314 static int hn_txpkt(struct ifnet *, struct hn_tx_ring *,
316 static void hn_set_chim_size(struct hn_softc *, int);
317 static void hn_set_tso_maxsize(struct hn_softc *, int, int);
318 static bool hn_tx_ring_pending(struct hn_tx_ring *);
319 static void hn_tx_ring_qflush(struct hn_tx_ring *);
320 static void hn_resume_tx(struct hn_softc *, int);
321 static int hn_get_txswq_depth(const struct hn_tx_ring *);
322 static void hn_txpkt_done(struct hn_nvs_sendctx *,
323 struct hn_softc *, struct vmbus_channel *,
325 static int hn_txpkt_sglist(struct hn_tx_ring *,
327 static int hn_txpkt_chim(struct hn_tx_ring *,
329 static int hn_xmit(struct hn_tx_ring *, int);
330 static void hn_xmit_taskfunc(void *, int);
331 static void hn_xmit_txeof(struct hn_tx_ring *);
332 static void hn_xmit_txeof_taskfunc(void *, int);
333 #ifdef HN_IFSTART_SUPPORT
334 static int hn_start_locked(struct hn_tx_ring *, int);
335 static void hn_start_taskfunc(void *, int);
336 static void hn_start_txeof(struct hn_tx_ring *);
337 static void hn_start_txeof_taskfunc(void *, int);
340 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
341 "Hyper-V network interface");
343 /* Trust tcp segements verification on host side. */
344 static int hn_trust_hosttcp = 1;
345 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN,
346 &hn_trust_hosttcp, 0,
347 "Trust tcp segement verification on host side, "
348 "when csum info is missing (global setting)");
350 /* Trust udp datagrams verification on host side. */
351 static int hn_trust_hostudp = 1;
352 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN,
353 &hn_trust_hostudp, 0,
354 "Trust udp datagram verification on host side, "
355 "when csum info is missing (global setting)");
357 /* Trust ip packets verification on host side. */
358 static int hn_trust_hostip = 1;
359 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN,
361 "Trust ip packet verification on host side, "
362 "when csum info is missing (global setting)");
364 /* Limit TSO burst size */
365 static int hn_tso_maxlen = IP_MAXPACKET;
366 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
367 &hn_tso_maxlen, 0, "TSO burst limit");
369 /* Limit chimney send size */
370 static int hn_tx_chimney_size = 0;
371 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN,
372 &hn_tx_chimney_size, 0, "Chimney send packet size limit");
374 /* Limit the size of packet for direct transmission */
375 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF;
376 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN,
377 &hn_direct_tx_size, 0, "Size of the packet for direct transmission");
379 /* # of LRO entries per RX ring */
380 #if defined(INET) || defined(INET6)
381 #if __FreeBSD_version >= 1100095
382 static int hn_lro_entry_count = HN_LROENT_CNT_DEF;
383 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
384 &hn_lro_entry_count, 0, "LRO entry count");
388 /* Use shared TX taskqueue */
389 static int hn_share_tx_taskq = 0;
390 SYSCTL_INT(_hw_hn, OID_AUTO, share_tx_taskq, CTLFLAG_RDTUN,
391 &hn_share_tx_taskq, 0, "Enable shared TX taskqueue");
393 #ifndef HN_USE_TXDESC_BUFRING
394 static int hn_use_txdesc_bufring = 0;
396 static int hn_use_txdesc_bufring = 1;
398 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD,
399 &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors");
401 /* Bind TX taskqueue to the target CPU */
402 static int hn_bind_tx_taskq = -1;
403 SYSCTL_INT(_hw_hn, OID_AUTO, bind_tx_taskq, CTLFLAG_RDTUN,
404 &hn_bind_tx_taskq, 0, "Bind TX taskqueue to the specified cpu");
406 #ifdef HN_IFSTART_SUPPORT
407 /* Use ifnet.if_start instead of ifnet.if_transmit */
408 static int hn_use_if_start = 0;
409 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN,
410 &hn_use_if_start, 0, "Use if_start TX method");
413 /* # of channels to use */
414 static int hn_chan_cnt = 0;
415 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
417 "# of channels to use; each channel has one RX ring and one TX ring");
419 /* # of transmit rings to use */
420 static int hn_tx_ring_cnt = 0;
421 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN,
422 &hn_tx_ring_cnt, 0, "# of TX rings to use");
424 /* Software TX ring deptch */
425 static int hn_tx_swq_depth = 0;
426 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN,
427 &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING");
429 /* Enable sorted LRO, and the depth of the per-channel mbuf queue */
430 #if __FreeBSD_version >= 1100095
431 static u_int hn_lro_mbufq_depth = 0;
432 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
433 &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue");
436 static u_int hn_cpu_index; /* next CPU for channel */
437 static struct taskqueue *hn_tx_taskq; /* shared TX taskqueue */
440 hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = {
441 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
442 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
443 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
444 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
445 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
448 static device_method_t hn_methods[] = {
449 /* Device interface */
450 DEVMETHOD(device_probe, hn_probe),
451 DEVMETHOD(device_attach, hn_attach),
452 DEVMETHOD(device_detach, hn_detach),
453 DEVMETHOD(device_shutdown, hn_shutdown),
457 static driver_t hn_driver = {
460 sizeof(struct hn_softc)
463 static devclass_t hn_devclass;
465 DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0);
466 MODULE_VERSION(hn, 1);
467 MODULE_DEPEND(hn, vmbus, 1, 1, 1);
469 #if __FreeBSD_version >= 1100099
471 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim)
475 for (i = 0; i < sc->hn_rx_ring_inuse; ++i)
476 sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim;
481 hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd)
484 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
485 txd->chim_size == 0, ("invalid rndis sglist txd"));
486 return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA,
487 &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt));
491 hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd)
493 struct hn_nvs_rndis rndis;
495 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID &&
496 txd->chim_size > 0, ("invalid rndis chim txd"));
498 rndis.nvs_type = HN_NVS_TYPE_RNDIS;
499 rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA;
500 rndis.nvs_chim_idx = txd->chim_index;
501 rndis.nvs_chim_sz = txd->chim_size;
503 return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC,
504 &rndis, sizeof(rndis), &txd->send_ctx));
507 static __inline uint32_t
508 hn_chim_alloc(struct hn_softc *sc)
510 int i, bmap_cnt = sc->hn_chim_bmap_cnt;
511 u_long *bmap = sc->hn_chim_bmap;
512 uint32_t ret = HN_NVS_CHIM_IDX_INVALID;
514 for (i = 0; i < bmap_cnt; ++i) {
517 idx = ffsl(~bmap[i]);
521 --idx; /* ffsl is 1-based */
522 KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt,
523 ("invalid i %d and idx %d", i, idx));
525 if (atomic_testandset_long(&bmap[i], idx))
528 ret = i * LONG_BIT + idx;
535 hn_chim_free(struct hn_softc *sc, uint32_t chim_idx)
540 idx = chim_idx / LONG_BIT;
541 KASSERT(idx < sc->hn_chim_bmap_cnt,
542 ("invalid chimney index 0x%x", chim_idx));
544 mask = 1UL << (chim_idx % LONG_BIT);
545 KASSERT(sc->hn_chim_bmap[idx] & mask,
546 ("index bitmap 0x%lx, chimney index %u, "
547 "bitmap idx %d, bitmask 0x%lx",
548 sc->hn_chim_bmap[idx], chim_idx, idx, mask));
550 atomic_clear_long(&sc->hn_chim_bmap[idx], mask);
554 hn_set_rxfilter(struct hn_softc *sc)
556 struct ifnet *ifp = sc->hn_ifp;
562 if (ifp->if_flags & IFF_PROMISC) {
563 filter = NDIS_PACKET_TYPE_PROMISCUOUS;
565 filter = NDIS_PACKET_TYPE_DIRECTED;
566 if (ifp->if_flags & IFF_BROADCAST)
567 filter |= NDIS_PACKET_TYPE_BROADCAST;
570 * See the comment in SIOCADDMULTI/SIOCDELMULTI.
572 /* TODO: support multicast list */
573 if ((ifp->if_flags & IFF_ALLMULTI) ||
574 !TAILQ_EMPTY(&ifp->if_multiaddrs))
575 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
577 /* Always enable ALLMULTI */
578 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
582 if (sc->hn_rx_filter != filter) {
583 error = hn_rndis_set_rxfilter(sc, filter);
585 sc->hn_rx_filter = filter;
591 hn_get_txswq_depth(const struct hn_tx_ring *txr)
594 KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet"));
595 if (hn_tx_swq_depth < txr->hn_txdesc_cnt)
596 return txr->hn_txdesc_cnt;
597 return hn_tx_swq_depth;
601 hn_rss_reconfig(struct hn_softc *sc)
607 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
614 * Direct reconfiguration by setting the UNCHG flags does
615 * _not_ work properly.
618 if_printf(sc->hn_ifp, "disable RSS\n");
619 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE);
621 if_printf(sc->hn_ifp, "RSS disable failed\n");
626 * Reenable the RSS w/ the updated RSS key or indirect
630 if_printf(sc->hn_ifp, "reconfig RSS\n");
631 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
633 if_printf(sc->hn_ifp, "RSS reconfig failed\n");
640 hn_rss_ind_fixup(struct hn_softc *sc, int nchan)
642 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
645 KASSERT(nchan > 1, ("invalid # of channels %d", nchan));
648 * Check indirect table to make sure that all channels in it
651 for (i = 0; i < NDIS_HASH_INDCNT; ++i) {
652 if (rss->rss_ind[i] >= nchan) {
653 if_printf(sc->hn_ifp,
654 "RSS indirect table %d fixup: %u -> %d\n",
655 i, rss->rss_ind[i], nchan - 1);
656 rss->rss_ind[i] = nchan - 1;
662 hn_ifmedia_upd(struct ifnet *ifp __unused)
669 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
671 struct hn_softc *sc = ifp->if_softc;
673 ifmr->ifm_status = IFM_AVALID;
674 ifmr->ifm_active = IFM_ETHER;
676 if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) {
677 ifmr->ifm_active |= IFM_NONE;
680 ifmr->ifm_status |= IFM_ACTIVE;
681 ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
684 /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */
685 static const struct hyperv_guid g_net_vsc_device_type = {
686 .hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
687 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
691 hn_probe(device_t dev)
694 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev,
695 &g_net_vsc_device_type) == 0) {
696 device_set_desc(dev, "Hyper-V Network Interface");
697 return BUS_PROBE_DEFAULT;
703 hn_cpuset_setthread_task(void *xmask, int pending __unused)
705 cpuset_t *mask = xmask;
708 error = cpuset_setthread(curthread->td_tid, mask);
710 panic("curthread=%ju: can't pin; error=%d",
711 (uintmax_t)curthread->td_tid, error);
716 hn_attach(device_t dev)
718 struct hn_softc *sc = device_get_softc(dev);
719 struct sysctl_oid_list *child;
720 struct sysctl_ctx_list *ctx;
721 uint8_t eaddr[ETHER_ADDR_LEN];
722 struct ifnet *ifp = NULL;
723 int error, ring_cnt, tx_ring_cnt;
726 sc->hn_prichan = vmbus_get_channel(dev);
730 * Setup taskqueue for transmission.
732 if (hn_tx_taskq == NULL) {
733 sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK,
734 taskqueue_thread_enqueue, &sc->hn_tx_taskq);
735 taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET, "%s tx",
736 device_get_nameunit(dev));
737 if (hn_bind_tx_taskq >= 0) {
738 int cpu = hn_bind_tx_taskq;
739 struct task cpuset_task;
742 if (cpu > mp_ncpus - 1)
744 CPU_SETOF(cpu, &cpu_set);
745 TASK_INIT(&cpuset_task, 0, hn_cpuset_setthread_task,
747 taskqueue_enqueue(sc->hn_tx_taskq, &cpuset_task);
748 taskqueue_drain(sc->hn_tx_taskq, &cpuset_task);
751 sc->hn_tx_taskq = hn_tx_taskq;
755 * Setup taskqueue for mangement tasks, e.g. link status.
757 sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK,
758 taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0);
759 taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt",
760 device_get_nameunit(dev));
761 TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc);
762 TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc);
763 TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0,
764 hn_netchg_status_taskfunc, sc);
767 * Allocate ifnet and setup its name earlier, so that if_printf
768 * can be used by functions, which will be called after
771 ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER);
773 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
776 * Initialize ifmedia earlier so that it can be unconditionally
777 * destroyed, if error happened later on.
779 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts);
782 * Figure out the # of RX rings (ring_cnt) and the # of TX rings
783 * to use (tx_ring_cnt).
786 * The # of RX rings to use is same as the # of channels to use.
788 ring_cnt = hn_chan_cnt;
792 if (ring_cnt > HN_RING_CNT_DEF_MAX)
793 ring_cnt = HN_RING_CNT_DEF_MAX;
794 } else if (ring_cnt > mp_ncpus) {
798 tx_ring_cnt = hn_tx_ring_cnt;
799 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt)
800 tx_ring_cnt = ring_cnt;
801 #ifdef HN_IFSTART_SUPPORT
802 if (hn_use_if_start) {
803 /* ifnet.if_start only needs one TX ring. */
809 * Set the leader CPU for channels.
811 sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus;
814 * Create enough TX/RX rings, even if only limited number of
815 * channels can be allocated.
817 error = hn_create_tx_data(sc, tx_ring_cnt);
820 error = hn_create_rx_data(sc, ring_cnt);
825 * Create transaction context for NVS and RNDIS transactions.
827 sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev),
828 HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0);
829 if (sc->hn_xact == NULL)
833 * Attach the synthetic parts, i.e. NVS and RNDIS.
835 error = hn_synth_attach(sc, ETHERMTU);
839 error = hn_rndis_get_eaddr(sc, eaddr);
843 #if __FreeBSD_version >= 1100099
844 if (sc->hn_rx_ring_inuse > 1) {
846 * Reduce TCP segment aggregation limit for multiple
847 * RX rings to increase ACK timeliness.
849 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF);
854 * Fixup TX stuffs after synthetic parts are attached.
856 hn_fixup_tx_data(sc);
858 ctx = device_get_sysctl_ctx(dev);
859 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
860 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD,
861 &sc->hn_nvs_ver, 0, "NVS version");
862 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version",
863 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
864 hn_ndis_version_sysctl, "A", "NDIS version");
865 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps",
866 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
867 hn_caps_sysctl, "A", "capabilities");
868 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist",
869 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
870 hn_hwassist_sysctl, "A", "hwassist");
871 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter",
872 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
873 hn_rxfilter_sysctl, "A", "rxfilter");
874 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash",
875 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
876 hn_rss_hash_sysctl, "A", "RSS hash");
877 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size",
878 CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count");
879 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key",
880 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
881 hn_rss_key_sysctl, "IU", "RSS key");
882 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind",
883 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
884 hn_rss_ind_sysctl, "IU", "RSS indirect table");
887 * Setup the ifmedia, which has been initialized earlier.
889 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL);
890 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO);
891 /* XXX ifmedia_set really should do this for us */
892 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media;
895 * Setup the ifnet for this interface.
899 ifp->if_baudrate = IF_Gbps(10);
901 /* if_baudrate is 32bits on 32bit system. */
902 ifp->if_baudrate = IF_Gbps(1);
904 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
905 ifp->if_ioctl = hn_ioctl;
906 ifp->if_init = hn_init;
907 #ifdef HN_IFSTART_SUPPORT
908 if (hn_use_if_start) {
909 int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]);
911 ifp->if_start = hn_start;
912 IFQ_SET_MAXLEN(&ifp->if_snd, qdepth);
913 ifp->if_snd.ifq_drv_maxlen = qdepth - 1;
914 IFQ_SET_READY(&ifp->if_snd);
918 ifp->if_transmit = hn_transmit;
919 ifp->if_qflush = hn_xmit_qflush;
922 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO;
924 /* We can't diff IPv6 packets from IPv4 packets on RX path. */
925 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6;
927 if (sc->hn_caps & HN_CAP_VLAN) {
928 /* XXX not sure about VLAN_MTU. */
929 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
932 ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist;
933 if (ifp->if_hwassist & HN_CSUM_IP_MASK)
934 ifp->if_capabilities |= IFCAP_TXCSUM;
935 if (ifp->if_hwassist & HN_CSUM_IP6_MASK)
936 ifp->if_capabilities |= IFCAP_TXCSUM_IPV6;
937 if (sc->hn_caps & HN_CAP_TSO4) {
938 ifp->if_capabilities |= IFCAP_TSO4;
939 ifp->if_hwassist |= CSUM_IP_TSO;
941 if (sc->hn_caps & HN_CAP_TSO6) {
942 ifp->if_capabilities |= IFCAP_TSO6;
943 ifp->if_hwassist |= CSUM_IP6_TSO;
946 /* Enable all available capabilities by default. */
947 ifp->if_capenable = ifp->if_capabilities;
949 if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) {
950 hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU);
951 ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX;
952 ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
955 ether_ifattach(ifp, eaddr);
957 if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) {
958 if_printf(ifp, "TSO segcnt %u segsz %u\n",
959 ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize);
962 /* Inform the upper layer about the long frame support. */
963 ifp->if_hdrlen = sizeof(struct ether_vlan_header);
966 * Kick off link status check.
968 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
969 hn_update_link_status(sc);
973 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)
980 hn_detach(device_t dev)
982 struct hn_softc *sc = device_get_softc(dev);
983 struct ifnet *ifp = sc->hn_ifp;
985 if (device_is_attached(dev)) {
987 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
988 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
992 * hn_stop() only suspends data, so managment
993 * stuffs have to be suspended manually here.
1002 ifmedia_removeall(&sc->hn_media);
1003 hn_destroy_rx_data(sc);
1004 hn_destroy_tx_data(sc);
1006 if (sc->hn_tx_taskq != hn_tx_taskq)
1007 taskqueue_free(sc->hn_tx_taskq);
1008 taskqueue_free(sc->hn_mgmt_taskq0);
1010 if (sc->hn_xact != NULL)
1011 vmbus_xact_ctx_destroy(sc->hn_xact);
1015 HN_LOCK_DESTROY(sc);
1020 hn_shutdown(device_t dev)
1027 hn_link_status(struct hn_softc *sc)
1029 uint32_t link_status;
1032 error = hn_rndis_get_linkstatus(sc, &link_status);
1034 /* XXX what to do? */
1038 if (link_status == NDIS_MEDIA_STATE_CONNECTED)
1039 sc->hn_link_flags |= HN_LINK_FLAG_LINKUP;
1041 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
1042 if_link_state_change(sc->hn_ifp,
1043 (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ?
1044 LINK_STATE_UP : LINK_STATE_DOWN);
1048 hn_link_taskfunc(void *xsc, int pending __unused)
1050 struct hn_softc *sc = xsc;
1052 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
1058 hn_netchg_init_taskfunc(void *xsc, int pending __unused)
1060 struct hn_softc *sc = xsc;
1062 /* Prevent any link status checks from running. */
1063 sc->hn_link_flags |= HN_LINK_FLAG_NETCHG;
1066 * Fake up a [link down --> link up] state change; 5 seconds
1067 * delay is used, which closely simulates miibus reaction
1068 * upon link down event.
1070 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
1071 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN);
1072 taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0,
1073 &sc->hn_netchg_status, 5 * hz);
1077 hn_netchg_status_taskfunc(void *xsc, int pending __unused)
1079 struct hn_softc *sc = xsc;
1081 /* Re-allow link status checks. */
1082 sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG;
1087 hn_update_link_status(struct hn_softc *sc)
1090 if (sc->hn_mgmt_taskq != NULL)
1091 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task);
1095 hn_change_network(struct hn_softc *sc)
1098 if (sc->hn_mgmt_taskq != NULL)
1099 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init);
1103 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd,
1104 struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs)
1106 struct mbuf *m = *m_head;
1109 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim"));
1111 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap,
1112 m, segs, nsegs, BUS_DMA_NOWAIT);
1113 if (error == EFBIG) {
1116 m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX);
1120 *m_head = m = m_new;
1121 txr->hn_tx_collapsed++;
1123 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag,
1124 txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT);
1127 bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap,
1128 BUS_DMASYNC_PREWRITE);
1129 txd->flags |= HN_TXD_FLAG_DMAMAP;
1135 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd)
1138 KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
1139 ("put an onlist txd %#x", txd->flags));
1141 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
1142 if (atomic_fetchadd_int(&txd->refs, -1) != 1)
1145 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) {
1146 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
1147 ("chim txd uses dmamap"));
1148 hn_chim_free(txr->hn_sc, txd->chim_index);
1149 txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
1150 } else if (txd->flags & HN_TXD_FLAG_DMAMAP) {
1151 bus_dmamap_sync(txr->hn_tx_data_dtag,
1152 txd->data_dmap, BUS_DMASYNC_POSTWRITE);
1153 bus_dmamap_unload(txr->hn_tx_data_dtag,
1155 txd->flags &= ~HN_TXD_FLAG_DMAMAP;
1158 if (txd->m != NULL) {
1163 txd->flags |= HN_TXD_FLAG_ONLIST;
1164 #ifndef HN_USE_TXDESC_BUFRING
1165 mtx_lock_spin(&txr->hn_txlist_spin);
1166 KASSERT(txr->hn_txdesc_avail >= 0 &&
1167 txr->hn_txdesc_avail < txr->hn_txdesc_cnt,
1168 ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail));
1169 txr->hn_txdesc_avail++;
1170 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
1171 mtx_unlock_spin(&txr->hn_txlist_spin);
1173 atomic_add_int(&txr->hn_txdesc_avail, 1);
1174 buf_ring_enqueue(txr->hn_txdesc_br, txd);
1180 static __inline struct hn_txdesc *
1181 hn_txdesc_get(struct hn_tx_ring *txr)
1183 struct hn_txdesc *txd;
1185 #ifndef HN_USE_TXDESC_BUFRING
1186 mtx_lock_spin(&txr->hn_txlist_spin);
1187 txd = SLIST_FIRST(&txr->hn_txlist);
1189 KASSERT(txr->hn_txdesc_avail > 0,
1190 ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail));
1191 txr->hn_txdesc_avail--;
1192 SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
1194 mtx_unlock_spin(&txr->hn_txlist_spin);
1196 txd = buf_ring_dequeue_sc(txr->hn_txdesc_br);
1200 #ifdef HN_USE_TXDESC_BUFRING
1201 atomic_subtract_int(&txr->hn_txdesc_avail, 1);
1203 KASSERT(txd->m == NULL && txd->refs == 0 &&
1204 txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
1205 (txd->flags & HN_TXD_FLAG_ONLIST) &&
1206 (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd"));
1207 txd->flags &= ~HN_TXD_FLAG_ONLIST;
1213 static __inline void
1214 hn_txdesc_hold(struct hn_txdesc *txd)
1217 /* 0->1 transition will never work */
1218 KASSERT(txd->refs > 0, ("invalid refs %d", txd->refs));
1219 atomic_add_int(&txd->refs, 1);
1223 hn_tx_ring_pending(struct hn_tx_ring *txr)
1225 bool pending = false;
1227 #ifndef HN_USE_TXDESC_BUFRING
1228 mtx_lock_spin(&txr->hn_txlist_spin);
1229 if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt)
1231 mtx_unlock_spin(&txr->hn_txlist_spin);
1233 if (!buf_ring_full(txr->hn_txdesc_br))
1239 static __inline void
1240 hn_txeof(struct hn_tx_ring *txr)
1242 txr->hn_has_txeof = 0;
1247 hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc,
1248 struct vmbus_channel *chan, const void *data __unused, int dlen __unused)
1250 struct hn_txdesc *txd = sndc->hn_cbarg;
1251 struct hn_tx_ring *txr;
1254 KASSERT(txr->hn_chan == chan,
1255 ("channel mismatch, on chan%u, should be chan%u",
1256 vmbus_chan_subidx(chan), vmbus_chan_subidx(txr->hn_chan)));
1258 txr->hn_has_txeof = 1;
1259 hn_txdesc_put(txr, txd);
1261 ++txr->hn_txdone_cnt;
1262 if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) {
1263 txr->hn_txdone_cnt = 0;
1264 if (txr->hn_oactive)
1270 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr)
1272 #if defined(INET) || defined(INET6)
1273 struct lro_ctrl *lro = &rxr->hn_lro;
1274 struct lro_entry *queued;
1276 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1277 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1278 tcp_lro_flush(lro, queued);
1284 * 'txr' could be NULL, if multiple channels and
1285 * ifnet.if_start method are enabled.
1287 if (txr == NULL || !txr->hn_has_txeof)
1290 txr->hn_txdone_cnt = 0;
1294 static __inline uint32_t
1295 hn_rndis_pktmsg_offset(uint32_t ofs)
1298 KASSERT(ofs >= sizeof(struct rndis_packet_msg),
1299 ("invalid RNDIS packet msg offset %u", ofs));
1300 return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset));
1303 static __inline void *
1304 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize,
1305 size_t pi_dlen, uint32_t pi_type)
1307 const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen);
1308 struct rndis_pktinfo *pi;
1310 KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0,
1311 ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen));
1314 * Per-packet-info does not move; it only grows.
1317 * rm_pktinfooffset in this phase counts from the beginning
1318 * of rndis_packet_msg.
1320 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize,
1321 ("%u pktinfo overflows RNDIS packet msg", pi_type));
1322 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset +
1323 pkt->rm_pktinfolen);
1324 pkt->rm_pktinfolen += pi_size;
1326 pi->rm_size = pi_size;
1327 pi->rm_type = pi_type;
1328 pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET;
1330 /* Data immediately follow per-packet-info. */
1331 pkt->rm_dataoffset += pi_size;
1333 /* Update RNDIS packet msg length */
1334 pkt->rm_len += pi_size;
1336 return (pi->rm_data);
1341 * If this function fails, then both txd and m_head0 will be freed.
1344 hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0)
1346 bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
1347 int error, nsegs, i;
1348 struct mbuf *m_head = *m_head0;
1349 struct rndis_packet_msg *pkt;
1354 * extension points to the area reserved for the
1355 * rndis_filter_packet, which is placed just after
1356 * the netvsc_packet (and rppi struct, if present;
1357 * length is updated later).
1359 pkt = txd->rndis_pkt;
1360 pkt->rm_type = REMOTE_NDIS_PACKET_MSG;
1361 pkt->rm_len = sizeof(*pkt) + m_head->m_pkthdr.len;
1362 pkt->rm_dataoffset = sizeof(*pkt);
1363 pkt->rm_datalen = m_head->m_pkthdr.len;
1364 pkt->rm_pktinfooffset = sizeof(*pkt);
1365 pkt->rm_pktinfolen = 0;
1367 if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) {
1369 * Set the hash value for this packet, so that the host could
1370 * dispatch the TX done event for this packet back to this TX
1373 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1374 HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL);
1375 *pi_data = txr->hn_tx_idx;
1378 if (m_head->m_flags & M_VLANTAG) {
1379 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1380 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN);
1381 *pi_data = NDIS_VLAN_INFO_MAKE(
1382 EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag),
1383 EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag),
1384 EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag));
1387 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1388 #if defined(INET6) || defined(INET)
1389 struct ether_vlan_header *eh;
1393 * XXX need m_pullup and use mtodo
1395 eh = mtod(m_head, struct ether_vlan_header*);
1396 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
1397 ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1399 ether_len = ETHER_HDR_LEN;
1401 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1402 NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO);
1404 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
1406 (struct ip *)(m_head->m_data + ether_len);
1407 unsigned long iph_len = ip->ip_hl << 2;
1409 (struct tcphdr *)((caddr_t)ip + iph_len);
1413 th->th_sum = in_pseudo(ip->ip_src.s_addr,
1414 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1415 *pi_data = NDIS_LSO2_INFO_MAKEIPV4(0,
1416 m_head->m_pkthdr.tso_segsz);
1419 #if defined(INET6) && defined(INET)
1424 struct ip6_hdr *ip6 = (struct ip6_hdr *)
1425 (m_head->m_data + ether_len);
1426 struct tcphdr *th = (struct tcphdr *)(ip6 + 1);
1429 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
1430 *pi_data = NDIS_LSO2_INFO_MAKEIPV6(0,
1431 m_head->m_pkthdr.tso_segsz);
1434 #endif /* INET6 || INET */
1435 } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) {
1436 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1437 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM);
1438 if (m_head->m_pkthdr.csum_flags &
1439 (CSUM_IP6_TCP | CSUM_IP6_UDP)) {
1440 *pi_data = NDIS_TXCSUM_INFO_IPV6;
1442 *pi_data = NDIS_TXCSUM_INFO_IPV4;
1443 if (m_head->m_pkthdr.csum_flags & CSUM_IP)
1444 *pi_data |= NDIS_TXCSUM_INFO_IPCS;
1447 if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
1448 *pi_data |= NDIS_TXCSUM_INFO_TCPCS;
1449 else if (m_head->m_pkthdr.csum_flags &
1450 (CSUM_IP_UDP | CSUM_IP6_UDP))
1451 *pi_data |= NDIS_TXCSUM_INFO_UDPCS;
1454 pktlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
1455 /* Convert RNDIS packet message offsets */
1456 pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt->rm_dataoffset);
1457 pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset);
1460 * Chimney send, if the packet could fit into one chimney buffer.
1462 if (pkt->rm_len < txr->hn_chim_size) {
1463 txr->hn_tx_chimney_tried++;
1464 txd->chim_index = hn_chim_alloc(txr->hn_sc);
1465 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) {
1466 uint8_t *dest = txr->hn_sc->hn_chim +
1467 (txd->chim_index * txr->hn_sc->hn_chim_szmax);
1469 memcpy(dest, pkt, pktlen);
1471 m_copydata(m_head, 0, m_head->m_pkthdr.len, dest);
1473 txd->chim_size = pkt->rm_len;
1474 txr->hn_gpa_cnt = 0;
1475 txr->hn_tx_chimney++;
1476 txr->hn_sendpkt = hn_txpkt_chim;
1481 error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs);
1486 * This mbuf is not linked w/ the txd yet, so free it now.
1491 freed = hn_txdesc_put(txr, txd);
1493 ("fail to free txd upon txdma error"));
1495 txr->hn_txdma_failed++;
1496 if_inc_counter(txr->hn_sc->hn_ifp, IFCOUNTER_OERRORS, 1);
1501 /* +1 RNDIS packet message */
1502 txr->hn_gpa_cnt = nsegs + 1;
1504 /* send packet with page buffer */
1505 txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr);
1506 txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK;
1507 txr->hn_gpa[0].gpa_len = pktlen;
1510 * Fill the page buffers with mbuf info after the page
1511 * buffer for RNDIS packet message.
1513 for (i = 0; i < nsegs; ++i) {
1514 struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1];
1516 gpa->gpa_page = atop(segs[i].ds_addr);
1517 gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK;
1518 gpa->gpa_len = segs[i].ds_len;
1521 txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
1523 txr->hn_sendpkt = hn_txpkt_sglist;
1527 /* Set the completion routine */
1528 hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd);
1535 * If this function fails, then txd will be freed, but the mbuf
1536 * associated w/ the txd will _not_ be freed.
1539 hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd)
1541 int error, send_failed = 0;
1545 * Make sure that txd is not freed before ETHER_BPF_MTAP.
1547 hn_txdesc_hold(txd);
1548 error = txr->hn_sendpkt(txr, txd);
1550 ETHER_BPF_MTAP(ifp, txd->m);
1551 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
1552 #ifdef HN_IFSTART_SUPPORT
1553 if (!hn_use_if_start)
1556 if_inc_counter(ifp, IFCOUNTER_OBYTES,
1557 txd->m->m_pkthdr.len);
1558 if (txd->m->m_flags & M_MCAST)
1559 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1563 hn_txdesc_put(txr, txd);
1565 if (__predict_false(error)) {
1569 * This should "really rarely" happen.
1571 * XXX Too many RX to be acked or too many sideband
1572 * commands to run? Ask netvsc_channel_rollup()
1573 * to kick start later.
1575 txr->hn_has_txeof = 1;
1577 txr->hn_send_failed++;
1580 * Try sending again after set hn_has_txeof;
1581 * in case that we missed the last
1582 * netvsc_channel_rollup().
1586 if_printf(ifp, "send failed\n");
1589 * Caller will perform further processing on the
1590 * associated mbuf, so don't free it in hn_txdesc_put();
1591 * only unload it from the DMA map in hn_txdesc_put(),
1595 freed = hn_txdesc_put(txr, txd);
1597 ("fail to free txd upon send error"));
1599 txr->hn_send_failed++;
1605 * Append the specified data to the indicated mbuf chain,
1606 * Extend the mbuf chain if the new data does not fit in
1609 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c.
1610 * There should be an equivalent in the kernel mbuf code,
1611 * but there does not appear to be one yet.
1613 * Differs from m_append() in that additional mbufs are
1614 * allocated with cluster size MJUMPAGESIZE, and filled
1617 * Return 1 if able to complete the job; otherwise 0.
1620 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
1623 int remainder, space;
1625 for (m = m0; m->m_next != NULL; m = m->m_next)
1628 space = M_TRAILINGSPACE(m);
1631 * Copy into available space.
1633 if (space > remainder)
1635 bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
1640 while (remainder > 0) {
1642 * Allocate a new mbuf; could check space
1643 * and allocate a cluster instead.
1645 n = m_getjcl(M_DONTWAIT, m->m_type, 0, MJUMPAGESIZE);
1648 n->m_len = min(MJUMPAGESIZE, remainder);
1649 bcopy(cp, mtod(n, caddr_t), n->m_len);
1651 remainder -= n->m_len;
1655 if (m0->m_flags & M_PKTHDR)
1656 m0->m_pkthdr.len += len - remainder;
1658 return (remainder == 0);
1661 #if defined(INET) || defined(INET6)
1663 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m)
1665 #if __FreeBSD_version >= 1100095
1666 if (hn_lro_mbufq_depth) {
1667 tcp_lro_queue_mbuf(lc, m);
1671 return tcp_lro_rx(lc, m, 0);
1676 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen,
1677 const struct hn_rxinfo *info)
1679 struct ifnet *ifp = rxr->hn_ifp;
1681 int size, do_lro = 0, do_csum = 1;
1682 int hash_type = M_HASHTYPE_OPAQUE;
1684 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1688 * Bail out if packet contains more data than configured MTU.
1690 if (dlen > (ifp->if_mtu + ETHER_HDR_LEN)) {
1692 } else if (dlen <= MHLEN) {
1693 m_new = m_gethdr(M_NOWAIT, MT_DATA);
1694 if (m_new == NULL) {
1695 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
1698 memcpy(mtod(m_new, void *), data, dlen);
1699 m_new->m_pkthdr.len = m_new->m_len = dlen;
1700 rxr->hn_small_pkts++;
1703 * Get an mbuf with a cluster. For packets 2K or less,
1704 * get a standard 2K cluster. For anything larger, get a
1705 * 4K cluster. Any buffers larger than 4K can cause problems
1706 * if looped around to the Hyper-V TX channel, so avoid them.
1709 if (dlen > MCLBYTES) {
1711 size = MJUMPAGESIZE;
1714 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
1715 if (m_new == NULL) {
1716 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
1720 hv_m_append(m_new, dlen, data);
1722 m_new->m_pkthdr.rcvif = ifp;
1724 if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0))
1727 /* receive side checksum offload */
1728 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) {
1729 /* IP csum offload */
1730 if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) {
1731 m_new->m_pkthdr.csum_flags |=
1732 (CSUM_IP_CHECKED | CSUM_IP_VALID);
1736 /* TCP/UDP csum offload */
1737 if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK |
1738 NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) {
1739 m_new->m_pkthdr.csum_flags |=
1740 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1741 m_new->m_pkthdr.csum_data = 0xffff;
1742 if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK)
1750 * As of this write (Oct 28th, 2016), host side will turn
1751 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so
1752 * the do_lro setting here is actually _not_ accurate. We
1753 * depend on the RSS hash type check to reset do_lro.
1755 if ((info->csum_info &
1756 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) ==
1757 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK))
1760 const struct ether_header *eh;
1765 if (m_new->m_len < hoff)
1767 eh = mtod(m_new, struct ether_header *);
1768 etype = ntohs(eh->ether_type);
1769 if (etype == ETHERTYPE_VLAN) {
1770 const struct ether_vlan_header *evl;
1772 hoff = sizeof(*evl);
1773 if (m_new->m_len < hoff)
1775 evl = mtod(m_new, struct ether_vlan_header *);
1776 etype = ntohs(evl->evl_proto);
1779 if (etype == ETHERTYPE_IP) {
1782 pr = hn_check_iplen(m_new, hoff);
1783 if (pr == IPPROTO_TCP) {
1785 (rxr->hn_trust_hcsum &
1786 HN_TRUST_HCSUM_TCP)) {
1787 rxr->hn_csum_trusted++;
1788 m_new->m_pkthdr.csum_flags |=
1789 (CSUM_IP_CHECKED | CSUM_IP_VALID |
1790 CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1791 m_new->m_pkthdr.csum_data = 0xffff;
1794 } else if (pr == IPPROTO_UDP) {
1796 (rxr->hn_trust_hcsum &
1797 HN_TRUST_HCSUM_UDP)) {
1798 rxr->hn_csum_trusted++;
1799 m_new->m_pkthdr.csum_flags |=
1800 (CSUM_IP_CHECKED | CSUM_IP_VALID |
1801 CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1802 m_new->m_pkthdr.csum_data = 0xffff;
1804 } else if (pr != IPPROTO_DONE && do_csum &&
1805 (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) {
1806 rxr->hn_csum_trusted++;
1807 m_new->m_pkthdr.csum_flags |=
1808 (CSUM_IP_CHECKED | CSUM_IP_VALID);
1813 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) {
1814 m_new->m_pkthdr.ether_vtag = EVL_MAKETAG(
1815 NDIS_VLAN_INFO_ID(info->vlan_info),
1816 NDIS_VLAN_INFO_PRI(info->vlan_info),
1817 NDIS_VLAN_INFO_CFI(info->vlan_info));
1818 m_new->m_flags |= M_VLANTAG;
1821 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) {
1823 m_new->m_pkthdr.flowid = info->hash_value;
1824 if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) ==
1825 NDIS_HASH_FUNCTION_TOEPLITZ) {
1826 uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK);
1830 * do_lro is resetted, if the hash types are not TCP
1831 * related. See the comment in the above csum_flags
1835 case NDIS_HASH_IPV4:
1836 hash_type = M_HASHTYPE_RSS_IPV4;
1840 case NDIS_HASH_TCP_IPV4:
1841 hash_type = M_HASHTYPE_RSS_TCP_IPV4;
1844 case NDIS_HASH_IPV6:
1845 hash_type = M_HASHTYPE_RSS_IPV6;
1849 case NDIS_HASH_IPV6_EX:
1850 hash_type = M_HASHTYPE_RSS_IPV6_EX;
1854 case NDIS_HASH_TCP_IPV6:
1855 hash_type = M_HASHTYPE_RSS_TCP_IPV6;
1858 case NDIS_HASH_TCP_IPV6_EX:
1859 hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX;
1864 m_new->m_pkthdr.flowid = rxr->hn_rx_idx;
1866 M_HASHTYPE_SET(m_new, hash_type);
1869 * Note: Moved RX completion back to hv_nv_on_receive() so all
1870 * messages (not just data messages) will trigger a response.
1876 if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
1877 #if defined(INET) || defined(INET6)
1878 struct lro_ctrl *lro = &rxr->hn_lro;
1881 rxr->hn_lro_tried++;
1882 if (hn_lro_rx(lro, m_new) == 0) {
1890 /* We're not holding the lock here, so don't release it */
1891 (*ifp->if_input)(ifp, m_new);
1897 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1899 struct hn_softc *sc = ifp->if_softc;
1900 struct ifreq *ifr = (struct ifreq *)data;
1901 int mask, error = 0;
1905 if (ifr->ifr_mtu > HN_MTU_MAX) {
1912 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
1917 if ((sc->hn_caps & HN_CAP_MTU) == 0) {
1918 /* Can't change MTU */
1924 if (ifp->if_mtu == ifr->ifr_mtu) {
1930 * Suspend this interface before the synthetic parts
1936 * Detach the synthetics parts, i.e. NVS and RNDIS.
1938 hn_synth_detach(sc);
1941 * Reattach the synthetic parts, i.e. NVS and RNDIS,
1942 * with the new MTU setting.
1944 error = hn_synth_attach(sc, ifr->ifr_mtu);
1951 * Commit the requested MTU, after the synthetic parts
1952 * have been successfully attached.
1954 ifp->if_mtu = ifr->ifr_mtu;
1957 * Make sure that various parameters based on MTU are
1958 * still valid, after the MTU change.
1960 if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax)
1961 hn_set_chim_size(sc, sc->hn_chim_szmax);
1962 hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu);
1963 #if __FreeBSD_version >= 1100099
1964 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim <
1965 HN_LRO_LENLIM_MIN(ifp))
1966 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp));
1970 * All done! Resume the interface now.
1980 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
1985 if (ifp->if_flags & IFF_UP) {
1986 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1987 hn_set_rxfilter(sc);
1991 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1994 sc->hn_if_flags = ifp->if_flags;
2001 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2003 if (mask & IFCAP_TXCSUM) {
2004 ifp->if_capenable ^= IFCAP_TXCSUM;
2005 if (ifp->if_capenable & IFCAP_TXCSUM)
2006 ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc);
2008 ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc);
2010 if (mask & IFCAP_TXCSUM_IPV6) {
2011 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2012 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
2013 ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc);
2015 ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc);
2018 /* TODO: flip RNDIS offload parameters for RXCSUM. */
2019 if (mask & IFCAP_RXCSUM)
2020 ifp->if_capenable ^= IFCAP_RXCSUM;
2022 /* We can't diff IPv6 packets from IPv4 packets on RX path. */
2023 if (mask & IFCAP_RXCSUM_IPV6)
2024 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2027 if (mask & IFCAP_LRO)
2028 ifp->if_capenable ^= IFCAP_LRO;
2030 if (mask & IFCAP_TSO4) {
2031 ifp->if_capenable ^= IFCAP_TSO4;
2032 if (ifp->if_capenable & IFCAP_TSO4)
2033 ifp->if_hwassist |= CSUM_IP_TSO;
2035 ifp->if_hwassist &= ~CSUM_IP_TSO;
2037 if (mask & IFCAP_TSO6) {
2038 ifp->if_capenable ^= IFCAP_TSO6;
2039 if (ifp->if_capenable & IFCAP_TSO6)
2040 ifp->if_hwassist |= CSUM_IP6_TSO;
2042 ifp->if_hwassist &= ~CSUM_IP6_TSO;
2053 * Multicast uses mutex, while RNDIS RX filter setting
2054 * sleeps. We workaround this by always enabling
2055 * ALLMULTI. ALLMULTI would actually always be on, even
2056 * if we supported the SIOCADDMULTI/SIOCDELMULTI, since
2057 * we don't support multicast address list configuration
2062 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
2066 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2067 hn_set_rxfilter(sc);
2075 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd);
2079 error = ether_ioctl(ifp, cmd, data);
2086 hn_stop(struct hn_softc *sc)
2088 struct ifnet *ifp = sc->hn_ifp;
2093 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
2094 ("synthetic parts were not attached"));
2096 /* Clear RUNNING bit _before_ hn_suspend_data() */
2097 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
2098 hn_suspend_data(sc);
2100 /* Clear OACTIVE bit. */
2101 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
2102 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
2103 sc->hn_tx_ring[i].hn_oactive = 0;
2107 hn_init_locked(struct hn_softc *sc)
2109 struct ifnet *ifp = sc->hn_ifp;
2114 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
2117 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2120 /* Configure RX filter */
2121 hn_set_rxfilter(sc);
2123 /* Clear OACTIVE bit. */
2124 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
2125 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
2126 sc->hn_tx_ring[i].hn_oactive = 0;
2128 /* Clear TX 'suspended' bit. */
2129 hn_resume_tx(sc, sc->hn_tx_ring_inuse);
2131 /* Everything is ready; unleash! */
2132 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
2138 struct hn_softc *sc = xsc;
2145 #if __FreeBSD_version >= 1100099
2148 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS)
2150 struct hn_softc *sc = arg1;
2151 unsigned int lenlim;
2154 lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim;
2155 error = sysctl_handle_int(oidp, &lenlim, 0, req);
2156 if (error || req->newptr == NULL)
2160 if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) ||
2161 lenlim > TCP_LRO_LENGTH_MAX) {
2165 hn_set_lro_lenlim(sc, lenlim);
2172 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS)
2174 struct hn_softc *sc = arg1;
2175 int ackcnt, error, i;
2178 * lro_ackcnt_lim is append count limit,
2179 * +1 to turn it into aggregation limit.
2181 ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1;
2182 error = sysctl_handle_int(oidp, &ackcnt, 0, req);
2183 if (error || req->newptr == NULL)
2186 if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1))
2190 * Convert aggregation limit back to append
2195 for (i = 0; i < sc->hn_rx_ring_inuse; ++i)
2196 sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt;
2204 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS)
2206 struct hn_softc *sc = arg1;
2211 if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum)
2214 error = sysctl_handle_int(oidp, &on, 0, req);
2215 if (error || req->newptr == NULL)
2219 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2220 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
2223 rxr->hn_trust_hcsum |= hcsum;
2225 rxr->hn_trust_hcsum &= ~hcsum;
2232 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS)
2234 struct hn_softc *sc = arg1;
2235 int chim_size, error;
2237 chim_size = sc->hn_tx_ring[0].hn_chim_size;
2238 error = sysctl_handle_int(oidp, &chim_size, 0, req);
2239 if (error || req->newptr == NULL)
2242 if (chim_size > sc->hn_chim_szmax || chim_size <= 0)
2246 hn_set_chim_size(sc, chim_size);
2251 #if __FreeBSD_version < 1100095
2253 hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS)
2255 struct hn_softc *sc = arg1;
2256 int ofs = arg2, i, error;
2257 struct hn_rx_ring *rxr;
2261 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2262 rxr = &sc->hn_rx_ring[i];
2263 stat += *((int *)((uint8_t *)rxr + ofs));
2266 error = sysctl_handle_64(oidp, &stat, 0, req);
2267 if (error || req->newptr == NULL)
2270 /* Zero out this stat. */
2271 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2272 rxr = &sc->hn_rx_ring[i];
2273 *((int *)((uint8_t *)rxr + ofs)) = 0;
2279 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
2281 struct hn_softc *sc = arg1;
2282 int ofs = arg2, i, error;
2283 struct hn_rx_ring *rxr;
2287 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2288 rxr = &sc->hn_rx_ring[i];
2289 stat += *((uint64_t *)((uint8_t *)rxr + ofs));
2292 error = sysctl_handle_64(oidp, &stat, 0, req);
2293 if (error || req->newptr == NULL)
2296 /* Zero out this stat. */
2297 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2298 rxr = &sc->hn_rx_ring[i];
2299 *((uint64_t *)((uint8_t *)rxr + ofs)) = 0;
2307 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
2309 struct hn_softc *sc = arg1;
2310 int ofs = arg2, i, error;
2311 struct hn_rx_ring *rxr;
2315 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2316 rxr = &sc->hn_rx_ring[i];
2317 stat += *((u_long *)((uint8_t *)rxr + ofs));
2320 error = sysctl_handle_long(oidp, &stat, 0, req);
2321 if (error || req->newptr == NULL)
2324 /* Zero out this stat. */
2325 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2326 rxr = &sc->hn_rx_ring[i];
2327 *((u_long *)((uint8_t *)rxr + ofs)) = 0;
2333 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
2335 struct hn_softc *sc = arg1;
2336 int ofs = arg2, i, error;
2337 struct hn_tx_ring *txr;
2341 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
2342 txr = &sc->hn_tx_ring[i];
2343 stat += *((u_long *)((uint8_t *)txr + ofs));
2346 error = sysctl_handle_long(oidp, &stat, 0, req);
2347 if (error || req->newptr == NULL)
2350 /* Zero out this stat. */
2351 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
2352 txr = &sc->hn_tx_ring[i];
2353 *((u_long *)((uint8_t *)txr + ofs)) = 0;
2359 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
2361 struct hn_softc *sc = arg1;
2362 int ofs = arg2, i, error, conf;
2363 struct hn_tx_ring *txr;
2365 txr = &sc->hn_tx_ring[0];
2366 conf = *((int *)((uint8_t *)txr + ofs));
2368 error = sysctl_handle_int(oidp, &conf, 0, req);
2369 if (error || req->newptr == NULL)
2373 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
2374 txr = &sc->hn_tx_ring[i];
2375 *((int *)((uint8_t *)txr + ofs)) = conf;
2383 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS)
2385 struct hn_softc *sc = arg1;
2388 snprintf(verstr, sizeof(verstr), "%u.%u",
2389 HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver),
2390 HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver));
2391 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
2395 hn_caps_sysctl(SYSCTL_HANDLER_ARGS)
2397 struct hn_softc *sc = arg1;
2404 snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS);
2405 return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req);
2409 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS)
2411 struct hn_softc *sc = arg1;
2412 char assist_str[128];
2416 hwassist = sc->hn_ifp->if_hwassist;
2418 snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS);
2419 return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req);
2423 hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS)
2425 struct hn_softc *sc = arg1;
2426 char filter_str[128];
2430 filter = sc->hn_rx_filter;
2432 snprintf(filter_str, sizeof(filter_str), "%b", filter,
2434 return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req);
2438 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS)
2440 struct hn_softc *sc = arg1;
2445 error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
2446 if (error || req->newptr == NULL)
2449 error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
2452 sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
2454 if (sc->hn_rx_ring_inuse > 1) {
2455 error = hn_rss_reconfig(sc);
2457 /* Not RSS capable, at least for now; just save the RSS key. */
2466 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS)
2468 struct hn_softc *sc = arg1;
2473 error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
2474 if (error || req->newptr == NULL)
2478 * Don't allow RSS indirect table change, if this interface is not
2479 * RSS capable currently.
2481 if (sc->hn_rx_ring_inuse == 1) {
2486 error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
2489 sc->hn_flags |= HN_FLAG_HAS_RSSIND;
2491 hn_rss_ind_fixup(sc, sc->hn_rx_ring_inuse);
2492 error = hn_rss_reconfig(sc);
2499 hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS)
2501 struct hn_softc *sc = arg1;
2506 hash = sc->hn_rss_hash;
2508 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS);
2509 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req);
2513 hn_check_iplen(const struct mbuf *m, int hoff)
2515 const struct ip *ip;
2516 int len, iphlen, iplen;
2517 const struct tcphdr *th;
2518 int thoff; /* TCP data offset */
2520 len = hoff + sizeof(struct ip);
2522 /* The packet must be at least the size of an IP header. */
2523 if (m->m_pkthdr.len < len)
2524 return IPPROTO_DONE;
2526 /* The fixed IP header must reside completely in the first mbuf. */
2528 return IPPROTO_DONE;
2530 ip = mtodo(m, hoff);
2532 /* Bound check the packet's stated IP header length. */
2533 iphlen = ip->ip_hl << 2;
2534 if (iphlen < sizeof(struct ip)) /* minimum header length */
2535 return IPPROTO_DONE;
2537 /* The full IP header must reside completely in the one mbuf. */
2538 if (m->m_len < hoff + iphlen)
2539 return IPPROTO_DONE;
2541 iplen = ntohs(ip->ip_len);
2544 * Check that the amount of data in the buffers is as
2545 * at least much as the IP header would have us expect.
2547 if (m->m_pkthdr.len < hoff + iplen)
2548 return IPPROTO_DONE;
2551 * Ignore IP fragments.
2553 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF))
2554 return IPPROTO_DONE;
2557 * The TCP/IP or UDP/IP header must be entirely contained within
2558 * the first fragment of a packet.
2562 if (iplen < iphlen + sizeof(struct tcphdr))
2563 return IPPROTO_DONE;
2564 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr))
2565 return IPPROTO_DONE;
2566 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen);
2567 thoff = th->th_off << 2;
2568 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen)
2569 return IPPROTO_DONE;
2570 if (m->m_len < hoff + iphlen + thoff)
2571 return IPPROTO_DONE;
2574 if (iplen < iphlen + sizeof(struct udphdr))
2575 return IPPROTO_DONE;
2576 if (m->m_len < hoff + iphlen + sizeof(struct udphdr))
2577 return IPPROTO_DONE;
2581 return IPPROTO_DONE;
2588 hn_create_rx_data(struct hn_softc *sc, int ring_cnt)
2590 struct sysctl_oid_list *child;
2591 struct sysctl_ctx_list *ctx;
2592 device_t dev = sc->hn_dev;
2593 #if defined(INET) || defined(INET6)
2594 #if __FreeBSD_version >= 1100095
2601 * Create RXBUF for reception.
2604 * - It is shared by all channels.
2605 * - A large enough buffer is allocated, certain version of NVSes
2606 * may further limit the usable space.
2608 sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
2609 PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma,
2610 BUS_DMA_WAITOK | BUS_DMA_ZERO);
2611 if (sc->hn_rxbuf == NULL) {
2612 device_printf(sc->hn_dev, "allocate rxbuf failed\n");
2616 sc->hn_rx_ring_cnt = ring_cnt;
2617 sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt;
2619 sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt,
2620 M_DEVBUF, M_WAITOK | M_ZERO);
2622 #if defined(INET) || defined(INET6)
2623 #if __FreeBSD_version >= 1100095
2624 lroent_cnt = hn_lro_entry_count;
2625 if (lroent_cnt < TCP_LRO_ENTRIES)
2626 lroent_cnt = TCP_LRO_ENTRIES;
2628 device_printf(dev, "LRO: entry count %d\n", lroent_cnt);
2630 #endif /* INET || INET6 */
2632 ctx = device_get_sysctl_ctx(dev);
2633 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
2635 /* Create dev.hn.UNIT.rx sysctl tree */
2636 sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx",
2637 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
2639 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2640 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
2642 rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
2643 PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE,
2644 &rxr->hn_br_dma, BUS_DMA_WAITOK);
2645 if (rxr->hn_br == NULL) {
2646 device_printf(dev, "allocate bufring failed\n");
2650 if (hn_trust_hosttcp)
2651 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP;
2652 if (hn_trust_hostudp)
2653 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP;
2654 if (hn_trust_hostip)
2655 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP;
2656 rxr->hn_ifp = sc->hn_ifp;
2657 if (i < sc->hn_tx_ring_cnt)
2658 rxr->hn_txr = &sc->hn_tx_ring[i];
2659 rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF;
2660 rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK);
2662 rxr->hn_rxbuf = sc->hn_rxbuf;
2667 #if defined(INET) || defined(INET6)
2668 #if __FreeBSD_version >= 1100095
2669 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt,
2670 hn_lro_mbufq_depth);
2672 tcp_lro_init(&rxr->hn_lro);
2673 rxr->hn_lro.ifp = sc->hn_ifp;
2675 #if __FreeBSD_version >= 1100099
2676 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF;
2677 rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
2679 #endif /* INET || INET6 */
2681 if (sc->hn_rx_sysctl_tree != NULL) {
2685 * Create per RX ring sysctl tree:
2686 * dev.hn.UNIT.rx.RINGID
2688 snprintf(name, sizeof(name), "%d", i);
2689 rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx,
2690 SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree),
2691 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
2693 if (rxr->hn_rx_sysctl_tree != NULL) {
2694 SYSCTL_ADD_ULONG(ctx,
2695 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
2696 OID_AUTO, "packets", CTLFLAG_RW,
2697 &rxr->hn_pkts, "# of packets received");
2698 SYSCTL_ADD_ULONG(ctx,
2699 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
2700 OID_AUTO, "rss_pkts", CTLFLAG_RW,
2702 "# of packets w/ RSS info received");
2704 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
2705 OID_AUTO, "pktbuf_len", CTLFLAG_RD,
2706 &rxr->hn_pktbuf_len, 0,
2707 "Temporary channel packet buffer length");
2712 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
2713 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2714 __offsetof(struct hn_rx_ring, hn_lro.lro_queued),
2715 #if __FreeBSD_version < 1100095
2716 hn_rx_stat_int_sysctl,
2718 hn_rx_stat_u64_sysctl,
2720 "LU", "LRO queued");
2721 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed",
2722 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2723 __offsetof(struct hn_rx_ring, hn_lro.lro_flushed),
2724 #if __FreeBSD_version < 1100095
2725 hn_rx_stat_int_sysctl,
2727 hn_rx_stat_u64_sysctl,
2729 "LU", "LRO flushed");
2730 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried",
2731 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2732 __offsetof(struct hn_rx_ring, hn_lro_tried),
2733 hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries");
2734 #if __FreeBSD_version >= 1100099
2735 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim",
2736 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
2737 hn_lro_lenlim_sysctl, "IU",
2738 "Max # of data bytes to be aggregated by LRO");
2739 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim",
2740 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
2741 hn_lro_ackcnt_sysctl, "I",
2742 "Max # of ACKs to be aggregated by LRO");
2744 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp",
2745 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP,
2746 hn_trust_hcsum_sysctl, "I",
2747 "Trust tcp segement verification on host side, "
2748 "when csum info is missing");
2749 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp",
2750 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP,
2751 hn_trust_hcsum_sysctl, "I",
2752 "Trust udp datagram verification on host side, "
2753 "when csum info is missing");
2754 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip",
2755 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP,
2756 hn_trust_hcsum_sysctl, "I",
2757 "Trust ip packet verification on host side, "
2758 "when csum info is missing");
2759 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip",
2760 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2761 __offsetof(struct hn_rx_ring, hn_csum_ip),
2762 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP");
2763 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp",
2764 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2765 __offsetof(struct hn_rx_ring, hn_csum_tcp),
2766 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP");
2767 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp",
2768 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2769 __offsetof(struct hn_rx_ring, hn_csum_udp),
2770 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP");
2771 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted",
2772 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2773 __offsetof(struct hn_rx_ring, hn_csum_trusted),
2774 hn_rx_stat_ulong_sysctl, "LU",
2775 "# of packets that we trust host's csum verification");
2776 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts",
2777 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2778 __offsetof(struct hn_rx_ring, hn_small_pkts),
2779 hn_rx_stat_ulong_sysctl, "LU", "# of small packets received");
2780 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed",
2781 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2782 __offsetof(struct hn_rx_ring, hn_ack_failed),
2783 hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures");
2784 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt",
2785 CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings");
2786 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse",
2787 CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings");
2793 hn_destroy_rx_data(struct hn_softc *sc)
2797 if (sc->hn_rxbuf != NULL) {
2798 hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf);
2799 sc->hn_rxbuf = NULL;
2802 if (sc->hn_rx_ring_cnt == 0)
2805 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2806 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
2808 if (rxr->hn_br == NULL)
2810 hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br);
2813 #if defined(INET) || defined(INET6)
2814 tcp_lro_free(&rxr->hn_lro);
2816 free(rxr->hn_pktbuf, M_DEVBUF);
2818 free(sc->hn_rx_ring, M_DEVBUF);
2819 sc->hn_rx_ring = NULL;
2821 sc->hn_rx_ring_cnt = 0;
2822 sc->hn_rx_ring_inuse = 0;
2826 hn_tx_ring_create(struct hn_softc *sc, int id)
2828 struct hn_tx_ring *txr = &sc->hn_tx_ring[id];
2829 device_t dev = sc->hn_dev;
2830 bus_dma_tag_t parent_dtag;
2834 txr->hn_tx_idx = id;
2836 #ifndef HN_USE_TXDESC_BUFRING
2837 mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
2839 mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF);
2841 txr->hn_txdesc_cnt = HN_TX_DESC_CNT;
2842 txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt,
2843 M_DEVBUF, M_WAITOK | M_ZERO);
2844 #ifndef HN_USE_TXDESC_BUFRING
2845 SLIST_INIT(&txr->hn_txlist);
2847 txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF,
2848 M_WAITOK, &txr->hn_tx_lock);
2851 txr->hn_tx_taskq = sc->hn_tx_taskq;
2853 #ifdef HN_IFSTART_SUPPORT
2854 if (hn_use_if_start) {
2855 txr->hn_txeof = hn_start_txeof;
2856 TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr);
2857 TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr);
2863 txr->hn_txeof = hn_xmit_txeof;
2864 TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr);
2865 TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr);
2867 br_depth = hn_get_txswq_depth(txr);
2868 txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF,
2869 M_WAITOK, &txr->hn_tx_lock);
2872 txr->hn_direct_tx_size = hn_direct_tx_size;
2875 * Always schedule transmission instead of trying to do direct
2876 * transmission. This one gives the best performance so far.
2878 txr->hn_sched_tx = 1;
2880 parent_dtag = bus_get_dma_tag(dev);
2882 /* DMA tag for RNDIS packet messages. */
2883 error = bus_dma_tag_create(parent_dtag, /* parent */
2884 HN_RNDIS_PKT_ALIGN, /* alignment */
2885 HN_RNDIS_PKT_BOUNDARY, /* boundary */
2886 BUS_SPACE_MAXADDR, /* lowaddr */
2887 BUS_SPACE_MAXADDR, /* highaddr */
2888 NULL, NULL, /* filter, filterarg */
2889 HN_RNDIS_PKT_LEN, /* maxsize */
2891 HN_RNDIS_PKT_LEN, /* maxsegsize */
2893 NULL, /* lockfunc */
2894 NULL, /* lockfuncarg */
2895 &txr->hn_tx_rndis_dtag);
2897 device_printf(dev, "failed to create rndis dmatag\n");
2901 /* DMA tag for data. */
2902 error = bus_dma_tag_create(parent_dtag, /* parent */
2904 HN_TX_DATA_BOUNDARY, /* boundary */
2905 BUS_SPACE_MAXADDR, /* lowaddr */
2906 BUS_SPACE_MAXADDR, /* highaddr */
2907 NULL, NULL, /* filter, filterarg */
2908 HN_TX_DATA_MAXSIZE, /* maxsize */
2909 HN_TX_DATA_SEGCNT_MAX, /* nsegments */
2910 HN_TX_DATA_SEGSIZE, /* maxsegsize */
2912 NULL, /* lockfunc */
2913 NULL, /* lockfuncarg */
2914 &txr->hn_tx_data_dtag);
2916 device_printf(dev, "failed to create data dmatag\n");
2920 for (i = 0; i < txr->hn_txdesc_cnt; ++i) {
2921 struct hn_txdesc *txd = &txr->hn_txdesc[i];
2924 txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
2927 * Allocate and load RNDIS packet message.
2929 error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag,
2930 (void **)&txd->rndis_pkt,
2931 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
2932 &txd->rndis_pkt_dmap);
2935 "failed to allocate rndis_packet_msg, %d\n", i);
2939 error = bus_dmamap_load(txr->hn_tx_rndis_dtag,
2940 txd->rndis_pkt_dmap,
2941 txd->rndis_pkt, HN_RNDIS_PKT_LEN,
2942 hyperv_dma_map_paddr, &txd->rndis_pkt_paddr,
2946 "failed to load rndis_packet_msg, %d\n", i);
2947 bus_dmamem_free(txr->hn_tx_rndis_dtag,
2948 txd->rndis_pkt, txd->rndis_pkt_dmap);
2952 /* DMA map for TX data. */
2953 error = bus_dmamap_create(txr->hn_tx_data_dtag, 0,
2957 "failed to allocate tx data dmamap\n");
2958 bus_dmamap_unload(txr->hn_tx_rndis_dtag,
2959 txd->rndis_pkt_dmap);
2960 bus_dmamem_free(txr->hn_tx_rndis_dtag,
2961 txd->rndis_pkt, txd->rndis_pkt_dmap);
2965 /* All set, put it to list */
2966 txd->flags |= HN_TXD_FLAG_ONLIST;
2967 #ifndef HN_USE_TXDESC_BUFRING
2968 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
2970 buf_ring_enqueue(txr->hn_txdesc_br, txd);
2973 txr->hn_txdesc_avail = txr->hn_txdesc_cnt;
2975 if (sc->hn_tx_sysctl_tree != NULL) {
2976 struct sysctl_oid_list *child;
2977 struct sysctl_ctx_list *ctx;
2981 * Create per TX ring sysctl tree:
2982 * dev.hn.UNIT.tx.RINGID
2984 ctx = device_get_sysctl_ctx(dev);
2985 child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree);
2987 snprintf(name, sizeof(name), "%d", id);
2988 txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
2989 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
2991 if (txr->hn_tx_sysctl_tree != NULL) {
2992 child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree);
2994 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
2995 CTLFLAG_RD, &txr->hn_txdesc_avail, 0,
2996 "# of available TX descs");
2997 #ifdef HN_IFSTART_SUPPORT
2998 if (!hn_use_if_start)
3001 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive",
3002 CTLFLAG_RD, &txr->hn_oactive, 0,
3005 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets",
3006 CTLFLAG_RW, &txr->hn_pkts,
3007 "# of packets transmitted");
3015 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd)
3017 struct hn_tx_ring *txr = txd->txr;
3019 KASSERT(txd->m == NULL, ("still has mbuf installed"));
3020 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped"));
3022 bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap);
3023 bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt,
3024 txd->rndis_pkt_dmap);
3025 bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap);
3029 hn_tx_ring_destroy(struct hn_tx_ring *txr)
3031 struct hn_txdesc *txd;
3033 if (txr->hn_txdesc == NULL)
3036 #ifndef HN_USE_TXDESC_BUFRING
3037 while ((txd = SLIST_FIRST(&txr->hn_txlist)) != NULL) {
3038 SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
3039 hn_txdesc_dmamap_destroy(txd);
3042 mtx_lock(&txr->hn_tx_lock);
3043 while ((txd = buf_ring_dequeue_sc(txr->hn_txdesc_br)) != NULL)
3044 hn_txdesc_dmamap_destroy(txd);
3045 mtx_unlock(&txr->hn_tx_lock);
3048 if (txr->hn_tx_data_dtag != NULL)
3049 bus_dma_tag_destroy(txr->hn_tx_data_dtag);
3050 if (txr->hn_tx_rndis_dtag != NULL)
3051 bus_dma_tag_destroy(txr->hn_tx_rndis_dtag);
3053 #ifdef HN_USE_TXDESC_BUFRING
3054 buf_ring_free(txr->hn_txdesc_br, M_DEVBUF);
3057 free(txr->hn_txdesc, M_DEVBUF);
3058 txr->hn_txdesc = NULL;
3060 if (txr->hn_mbuf_br != NULL)
3061 buf_ring_free(txr->hn_mbuf_br, M_DEVBUF);
3063 #ifndef HN_USE_TXDESC_BUFRING
3064 mtx_destroy(&txr->hn_txlist_spin);
3066 mtx_destroy(&txr->hn_tx_lock);
3070 hn_create_tx_data(struct hn_softc *sc, int ring_cnt)
3072 struct sysctl_oid_list *child;
3073 struct sysctl_ctx_list *ctx;
3077 * Create TXBUF for chimney sending.
3079 * NOTE: It is shared by all channels.
3081 sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev),
3082 PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma,
3083 BUS_DMA_WAITOK | BUS_DMA_ZERO);
3084 if (sc->hn_chim == NULL) {
3085 device_printf(sc->hn_dev, "allocate txbuf failed\n");
3089 sc->hn_tx_ring_cnt = ring_cnt;
3090 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
3092 sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt,
3093 M_DEVBUF, M_WAITOK | M_ZERO);
3095 ctx = device_get_sysctl_ctx(sc->hn_dev);
3096 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev));
3098 /* Create dev.hn.UNIT.tx sysctl tree */
3099 sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx",
3100 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3102 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
3105 error = hn_tx_ring_create(sc, i);
3110 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs",
3111 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3112 __offsetof(struct hn_tx_ring, hn_no_txdescs),
3113 hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs");
3114 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed",
3115 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3116 __offsetof(struct hn_tx_ring, hn_send_failed),
3117 hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure");
3118 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed",
3119 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3120 __offsetof(struct hn_tx_ring, hn_txdma_failed),
3121 hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
3122 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
3123 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3124 __offsetof(struct hn_tx_ring, hn_tx_collapsed),
3125 hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed");
3126 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney",
3127 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3128 __offsetof(struct hn_tx_ring, hn_tx_chimney),
3129 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send");
3130 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried",
3131 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3132 __offsetof(struct hn_tx_ring, hn_tx_chimney_tried),
3133 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries");
3134 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
3135 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0,
3136 "# of total TX descs");
3137 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
3138 CTLFLAG_RD, &sc->hn_chim_szmax, 0,
3139 "Chimney send packet size upper boundary");
3140 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
3141 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
3142 hn_chim_size_sysctl, "I", "Chimney send packet size limit");
3143 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size",
3144 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3145 __offsetof(struct hn_tx_ring, hn_direct_tx_size),
3146 hn_tx_conf_int_sysctl, "I",
3147 "Size of the packet for direct transmission");
3148 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx",
3149 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3150 __offsetof(struct hn_tx_ring, hn_sched_tx),
3151 hn_tx_conf_int_sysctl, "I",
3152 "Always schedule transmission "
3153 "instead of doing direct transmission");
3154 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt",
3155 CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings");
3156 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse",
3157 CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings");
3163 hn_set_chim_size(struct hn_softc *sc, int chim_size)
3167 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
3168 sc->hn_tx_ring[i].hn_chim_size = chim_size;
3172 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu)
3174 struct ifnet *ifp = sc->hn_ifp;
3177 if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0)
3180 KASSERT(sc->hn_ndis_tso_sgmin >= 2,
3181 ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin));
3182 tso_minlen = sc->hn_ndis_tso_sgmin * mtu;
3184 KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen &&
3185 sc->hn_ndis_tso_szmax <= IP_MAXPACKET,
3186 ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax));
3188 if (tso_maxlen < tso_minlen)
3189 tso_maxlen = tso_minlen;
3190 else if (tso_maxlen > IP_MAXPACKET)
3191 tso_maxlen = IP_MAXPACKET;
3192 if (tso_maxlen > sc->hn_ndis_tso_szmax)
3193 tso_maxlen = sc->hn_ndis_tso_szmax;
3194 ifp->if_hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
3196 if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax);
3200 hn_fixup_tx_data(struct hn_softc *sc)
3202 uint64_t csum_assist;
3205 hn_set_chim_size(sc, sc->hn_chim_szmax);
3206 if (hn_tx_chimney_size > 0 &&
3207 hn_tx_chimney_size < sc->hn_chim_szmax)
3208 hn_set_chim_size(sc, hn_tx_chimney_size);
3211 if (sc->hn_caps & HN_CAP_IPCS)
3212 csum_assist |= CSUM_IP;
3213 if (sc->hn_caps & HN_CAP_TCP4CS)
3214 csum_assist |= CSUM_IP_TCP;
3215 if (sc->hn_caps & HN_CAP_UDP4CS)
3216 csum_assist |= CSUM_IP_UDP;
3218 if (sc->hn_caps & HN_CAP_TCP6CS)
3219 csum_assist |= CSUM_IP6_TCP;
3220 if (sc->hn_caps & HN_CAP_UDP6CS)
3221 csum_assist |= CSUM_IP6_UDP;
3223 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
3224 sc->hn_tx_ring[i].hn_csum_assist = csum_assist;
3226 if (sc->hn_caps & HN_CAP_HASHVAL) {
3228 * Support HASHVAL pktinfo on TX path.
3231 if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n");
3232 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
3233 sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL;
3238 hn_destroy_tx_data(struct hn_softc *sc)
3242 if (sc->hn_chim != NULL) {
3243 hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim);
3247 if (sc->hn_tx_ring_cnt == 0)
3250 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
3251 hn_tx_ring_destroy(&sc->hn_tx_ring[i]);
3253 free(sc->hn_tx_ring, M_DEVBUF);
3254 sc->hn_tx_ring = NULL;
3256 sc->hn_tx_ring_cnt = 0;
3257 sc->hn_tx_ring_inuse = 0;
3260 #ifdef HN_IFSTART_SUPPORT
3263 hn_start_taskfunc(void *xtxr, int pending __unused)
3265 struct hn_tx_ring *txr = xtxr;
3267 mtx_lock(&txr->hn_tx_lock);
3268 hn_start_locked(txr, 0);
3269 mtx_unlock(&txr->hn_tx_lock);
3273 hn_start_locked(struct hn_tx_ring *txr, int len)
3275 struct hn_softc *sc = txr->hn_sc;
3276 struct ifnet *ifp = sc->hn_ifp;
3278 KASSERT(hn_use_if_start,
3279 ("hn_start_locked is called, when if_start is disabled"));
3280 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
3281 mtx_assert(&txr->hn_tx_lock, MA_OWNED);
3283 if (__predict_false(txr->hn_suspended))
3286 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
3290 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
3291 struct hn_txdesc *txd;
3292 struct mbuf *m_head;
3295 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
3299 if (len > 0 && m_head->m_pkthdr.len > len) {
3301 * This sending could be time consuming; let callers
3302 * dispatch this packet sending (and sending of any
3303 * following up packets) to tx taskqueue.
3305 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
3309 txd = hn_txdesc_get(txr);
3311 txr->hn_no_txdescs++;
3312 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
3313 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
3317 error = hn_encap(txr, txd, &m_head);
3319 /* Both txd and m_head are freed */
3323 error = hn_txpkt(ifp, txr, txd);
3324 if (__predict_false(error)) {
3325 /* txd is freed, but m_head is not */
3326 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
3327 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
3335 hn_start(struct ifnet *ifp)
3337 struct hn_softc *sc = ifp->if_softc;
3338 struct hn_tx_ring *txr = &sc->hn_tx_ring[0];
3340 if (txr->hn_sched_tx)
3343 if (mtx_trylock(&txr->hn_tx_lock)) {
3346 sched = hn_start_locked(txr, txr->hn_direct_tx_size);
3347 mtx_unlock(&txr->hn_tx_lock);
3352 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
3356 hn_start_txeof_taskfunc(void *xtxr, int pending __unused)
3358 struct hn_tx_ring *txr = xtxr;
3360 mtx_lock(&txr->hn_tx_lock);
3361 atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE);
3362 hn_start_locked(txr, 0);
3363 mtx_unlock(&txr->hn_tx_lock);
3367 hn_start_txeof(struct hn_tx_ring *txr)
3369 struct hn_softc *sc = txr->hn_sc;
3370 struct ifnet *ifp = sc->hn_ifp;
3372 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
3374 if (txr->hn_sched_tx)
3377 if (mtx_trylock(&txr->hn_tx_lock)) {
3380 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
3381 sched = hn_start_locked(txr, txr->hn_direct_tx_size);
3382 mtx_unlock(&txr->hn_tx_lock);
3384 taskqueue_enqueue(txr->hn_tx_taskq,
3390 * Release the OACTIVE earlier, with the hope, that
3391 * others could catch up. The task will clear the
3392 * flag again with the hn_tx_lock to avoid possible
3395 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
3396 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
3400 #endif /* HN_IFSTART_SUPPORT */
3403 hn_xmit(struct hn_tx_ring *txr, int len)
3405 struct hn_softc *sc = txr->hn_sc;
3406 struct ifnet *ifp = sc->hn_ifp;
3407 struct mbuf *m_head;
3409 mtx_assert(&txr->hn_tx_lock, MA_OWNED);
3410 #ifdef HN_IFSTART_SUPPORT
3411 KASSERT(hn_use_if_start == 0,
3412 ("hn_xmit is called, when if_start is enabled"));
3415 if (__predict_false(txr->hn_suspended))
3418 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive)
3421 while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) {
3422 struct hn_txdesc *txd;
3425 if (len > 0 && m_head->m_pkthdr.len > len) {
3427 * This sending could be time consuming; let callers
3428 * dispatch this packet sending (and sending of any
3429 * following up packets) to tx taskqueue.
3431 drbr_putback(ifp, txr->hn_mbuf_br, m_head);
3435 txd = hn_txdesc_get(txr);
3437 txr->hn_no_txdescs++;
3438 drbr_putback(ifp, txr->hn_mbuf_br, m_head);
3439 txr->hn_oactive = 1;
3443 error = hn_encap(txr, txd, &m_head);
3445 /* Both txd and m_head are freed; discard */
3446 drbr_advance(ifp, txr->hn_mbuf_br);
3450 error = hn_txpkt(ifp, txr, txd);
3451 if (__predict_false(error)) {
3452 /* txd is freed, but m_head is not */
3453 drbr_putback(ifp, txr->hn_mbuf_br, m_head);
3454 txr->hn_oactive = 1;
3459 drbr_advance(ifp, txr->hn_mbuf_br);
3465 hn_transmit(struct ifnet *ifp, struct mbuf *m)
3467 struct hn_softc *sc = ifp->if_softc;
3468 struct hn_tx_ring *txr;
3472 * Select the TX ring based on flowid
3474 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
3475 idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse;
3476 txr = &sc->hn_tx_ring[idx];
3478 error = drbr_enqueue(ifp, txr->hn_mbuf_br, m);
3480 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
3484 if (txr->hn_oactive)
3487 if (txr->hn_sched_tx)
3490 if (mtx_trylock(&txr->hn_tx_lock)) {
3493 sched = hn_xmit(txr, txr->hn_direct_tx_size);
3494 mtx_unlock(&txr->hn_tx_lock);
3499 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
3504 hn_tx_ring_qflush(struct hn_tx_ring *txr)
3508 mtx_lock(&txr->hn_tx_lock);
3509 while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL)
3511 mtx_unlock(&txr->hn_tx_lock);
3515 hn_xmit_qflush(struct ifnet *ifp)
3517 struct hn_softc *sc = ifp->if_softc;
3520 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
3521 hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
3526 hn_xmit_txeof(struct hn_tx_ring *txr)
3529 if (txr->hn_sched_tx)
3532 if (mtx_trylock(&txr->hn_tx_lock)) {
3535 txr->hn_oactive = 0;
3536 sched = hn_xmit(txr, txr->hn_direct_tx_size);
3537 mtx_unlock(&txr->hn_tx_lock);
3539 taskqueue_enqueue(txr->hn_tx_taskq,
3545 * Release the oactive earlier, with the hope, that
3546 * others could catch up. The task will clear the
3547 * oactive again with the hn_tx_lock to avoid possible
3550 txr->hn_oactive = 0;
3551 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
3556 hn_xmit_taskfunc(void *xtxr, int pending __unused)
3558 struct hn_tx_ring *txr = xtxr;
3560 mtx_lock(&txr->hn_tx_lock);
3562 mtx_unlock(&txr->hn_tx_lock);
3566 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused)
3568 struct hn_tx_ring *txr = xtxr;
3570 mtx_lock(&txr->hn_tx_lock);
3571 txr->hn_oactive = 0;
3573 mtx_unlock(&txr->hn_tx_lock);
3577 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan)
3579 struct vmbus_chan_br cbr;
3580 struct hn_rx_ring *rxr;
3581 struct hn_tx_ring *txr = NULL;
3584 idx = vmbus_chan_subidx(chan);
3587 * Link this channel to RX/TX ring.
3589 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
3590 ("invalid channel index %d, should > 0 && < %d",
3591 idx, sc->hn_rx_ring_inuse));
3592 rxr = &sc->hn_rx_ring[idx];
3593 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0,
3594 ("RX ring %d already attached", idx));
3595 rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED;
3598 if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n",
3599 idx, vmbus_chan_id(chan));
3602 if (idx < sc->hn_tx_ring_inuse) {
3603 txr = &sc->hn_tx_ring[idx];
3604 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0,
3605 ("TX ring %d already attached", idx));
3606 txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED;
3608 txr->hn_chan = chan;
3610 if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n",
3611 idx, vmbus_chan_id(chan));
3615 /* Bind this channel to a proper CPU. */
3616 vmbus_chan_cpu_set(chan, (sc->hn_cpu + idx) % mp_ncpus);
3621 cbr.cbr = rxr->hn_br;
3622 cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr;
3623 cbr.cbr_txsz = HN_TXBR_SIZE;
3624 cbr.cbr_rxsz = HN_RXBR_SIZE;
3625 error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr);
3627 if_printf(sc->hn_ifp, "open chan%u failed: %d\n",
3628 vmbus_chan_id(chan), error);
3629 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED;
3631 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED;
3637 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan)
3639 struct hn_rx_ring *rxr;
3642 idx = vmbus_chan_subidx(chan);
3645 * Link this channel to RX/TX ring.
3647 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
3648 ("invalid channel index %d, should > 0 && < %d",
3649 idx, sc->hn_rx_ring_inuse));
3650 rxr = &sc->hn_rx_ring[idx];
3651 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED),
3652 ("RX ring %d is not attached", idx));
3653 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED;
3655 if (idx < sc->hn_tx_ring_inuse) {
3656 struct hn_tx_ring *txr = &sc->hn_tx_ring[idx];
3658 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED),
3659 ("TX ring %d is not attached attached", idx));
3660 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED;
3664 * Close this channel.
3667 * Channel closing does _not_ destroy the target channel.
3669 vmbus_chan_close(chan);
3673 hn_attach_subchans(struct hn_softc *sc)
3675 struct vmbus_channel **subchans;
3676 int subchan_cnt = sc->hn_rx_ring_inuse - 1;
3679 if (subchan_cnt == 0)
3682 /* Attach the sub-channels. */
3683 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
3684 for (i = 0; i < subchan_cnt; ++i) {
3685 error = hn_chan_attach(sc, subchans[i]);
3689 vmbus_subchan_rel(subchans, subchan_cnt);
3692 if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error);
3695 if_printf(sc->hn_ifp, "%d sub-channels attached\n",
3703 hn_detach_allchans(struct hn_softc *sc)
3705 struct vmbus_channel **subchans;
3706 int subchan_cnt = sc->hn_rx_ring_inuse - 1;
3709 if (subchan_cnt == 0)
3712 /* Detach the sub-channels. */
3713 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
3714 for (i = 0; i < subchan_cnt; ++i)
3715 hn_chan_detach(sc, subchans[i]);
3716 vmbus_subchan_rel(subchans, subchan_cnt);
3720 * Detach the primary channel, _after_ all sub-channels
3723 hn_chan_detach(sc, sc->hn_prichan);
3725 /* Wait for sub-channels to be destroyed, if any. */
3726 vmbus_subchan_drain(sc->hn_prichan);
3729 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
3730 KASSERT((sc->hn_rx_ring[i].hn_rx_flags &
3731 HN_RX_FLAG_ATTACHED) == 0,
3732 ("%dth RX ring is still attached", i));
3734 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
3735 KASSERT((sc->hn_tx_ring[i].hn_tx_flags &
3736 HN_TX_FLAG_ATTACHED) == 0,
3737 ("%dth TX ring is still attached", i));
3743 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch)
3745 struct vmbus_channel **subchans;
3746 int nchan, rxr_cnt, error;
3748 nchan = *nsubch + 1;
3751 * Multiple RX/TX rings are not requested.
3758 * Query RSS capabilities, e.g. # of RX rings, and # of indirect
3761 error = hn_rndis_query_rsscaps(sc, &rxr_cnt);
3763 /* No RSS; this is benign. */
3768 if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n",
3772 if (nchan > rxr_cnt)
3775 if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n");
3781 * Allocate sub-channels from NVS.
3783 *nsubch = nchan - 1;
3784 error = hn_nvs_alloc_subchans(sc, nsubch);
3785 if (error || *nsubch == 0) {
3786 /* Failed to allocate sub-channels. */
3792 * Wait for all sub-channels to become ready before moving on.
3794 subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch);
3795 vmbus_subchan_rel(subchans, *nsubch);
3800 hn_synth_attach(struct hn_softc *sc, int mtu)
3802 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
3803 int error, nsubch, nchan, i;
3806 KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0,
3807 ("synthetic parts were attached"));
3809 /* Save capabilities for later verification. */
3810 old_caps = sc->hn_caps;
3813 /* Clear RSS stuffs. */
3814 sc->hn_rss_ind_size = 0;
3815 sc->hn_rss_hash = 0;
3818 * Attach the primary channel _before_ attaching NVS and RNDIS.
3820 error = hn_chan_attach(sc, sc->hn_prichan);
3827 error = hn_nvs_attach(sc, mtu);
3832 * Attach RNDIS _after_ NVS is attached.
3834 error = hn_rndis_attach(sc, mtu);
3839 * Make sure capabilities are not changed.
3841 if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) {
3842 if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n",
3843 old_caps, sc->hn_caps);
3844 /* Restore old capabilities and abort. */
3845 sc->hn_caps = old_caps;
3850 * Allocate sub-channels for multi-TX/RX rings.
3853 * The # of RX rings that can be used is equivalent to the # of
3854 * channels to be requested.
3856 nsubch = sc->hn_rx_ring_cnt - 1;
3857 error = hn_synth_alloc_subchans(sc, &nsubch);
3863 /* Only the primary channel can be used; done */
3868 * Configure RSS key and indirect table _after_ all sub-channels
3872 if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) {
3874 * RSS key is not set yet; set it to the default RSS key.
3877 if_printf(sc->hn_ifp, "setup default RSS key\n");
3878 memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key));
3879 sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
3882 if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) {
3884 * RSS indirect table is not set yet; set it up in round-
3888 if_printf(sc->hn_ifp, "setup default RSS indirect "
3891 for (i = 0; i < NDIS_HASH_INDCNT; ++i)
3892 rss->rss_ind[i] = i % nchan;
3893 sc->hn_flags |= HN_FLAG_HAS_RSSIND;
3896 * # of usable channels may be changed, so we have to
3897 * make sure that all entries in RSS indirect table
3900 hn_rss_ind_fixup(sc, nchan);
3903 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
3906 * Failed to configure RSS key or indirect table; only
3907 * the primary channel can be used.
3913 * Set the # of TX/RX rings that could be used according to
3914 * the # of channels that NVS offered.
3916 hn_set_ring_inuse(sc, nchan);
3919 * Attach the sub-channels, if any.
3921 error = hn_attach_subchans(sc);
3925 sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED;
3931 * The interface must have been suspended though hn_suspend(), before
3932 * this function get called.
3935 hn_synth_detach(struct hn_softc *sc)
3939 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
3940 ("synthetic parts were not attached"));
3942 /* Detach the RNDIS first. */
3943 hn_rndis_detach(sc);
3948 /* Detach all of the channels. */
3949 hn_detach_allchans(sc);
3951 sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED;
3955 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt)
3957 KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt,
3958 ("invalid ring count %d", ring_cnt));
3960 if (sc->hn_tx_ring_cnt > ring_cnt)
3961 sc->hn_tx_ring_inuse = ring_cnt;
3963 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
3964 sc->hn_rx_ring_inuse = ring_cnt;
3967 if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n",
3968 sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse);
3973 hn_chan_drain(struct vmbus_channel *chan)
3976 while (!vmbus_chan_rx_empty(chan) || !vmbus_chan_tx_empty(chan))
3978 vmbus_chan_intr_drain(chan);
3982 hn_suspend_data(struct hn_softc *sc)
3984 struct vmbus_channel **subch = NULL;
3992 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
3993 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
3995 mtx_lock(&txr->hn_tx_lock);
3996 txr->hn_suspended = 1;
3997 mtx_unlock(&txr->hn_tx_lock);
3998 /* No one is able send more packets now. */
4000 /* Wait for all pending sends to finish. */
4001 while (hn_tx_ring_pending(txr))
4002 pause("hnwtx", 1 /* 1 tick */);
4004 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task);
4005 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task);
4009 * Disable RX by clearing RX filter.
4011 sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE;
4012 hn_rndis_set_rxfilter(sc, sc->hn_rx_filter);
4015 * Give RNDIS enough time to flush all pending data packets.
4017 pause("waitrx", (200 * hz) / 1000);
4020 * Drain RX/TX bufrings and interrupts.
4022 nsubch = sc->hn_rx_ring_inuse - 1;
4024 subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
4026 if (subch != NULL) {
4027 for (i = 0; i < nsubch; ++i)
4028 hn_chan_drain(subch[i]);
4030 hn_chan_drain(sc->hn_prichan);
4033 vmbus_subchan_rel(subch, nsubch);
4037 hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused)
4040 ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL;
4044 hn_suspend_mgmt(struct hn_softc *sc)
4051 * Make sure that hn_mgmt_taskq0 can nolonger be accessed
4052 * through hn_mgmt_taskq.
4054 TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc);
4055 vmbus_chan_run_task(sc->hn_prichan, &task);
4058 * Make sure that all pending management tasks are completed.
4060 taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init);
4061 taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status);
4062 taskqueue_drain_all(sc->hn_mgmt_taskq0);
4066 hn_suspend(struct hn_softc *sc)
4069 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING)
4070 hn_suspend_data(sc);
4071 hn_suspend_mgmt(sc);
4075 hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt)
4079 KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt,
4080 ("invalid TX ring count %d", tx_ring_cnt));
4082 for (i = 0; i < tx_ring_cnt; ++i) {
4083 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
4085 mtx_lock(&txr->hn_tx_lock);
4086 txr->hn_suspended = 0;
4087 mtx_unlock(&txr->hn_tx_lock);
4092 hn_resume_data(struct hn_softc *sc)
4101 hn_set_rxfilter(sc);
4104 * Make sure to clear suspend status on "all" TX rings,
4105 * since hn_tx_ring_inuse can be changed after
4106 * hn_suspend_data().
4108 hn_resume_tx(sc, sc->hn_tx_ring_cnt);
4110 #ifdef HN_IFSTART_SUPPORT
4111 if (!hn_use_if_start)
4115 * Flush unused drbrs, since hn_tx_ring_inuse may be
4118 for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i)
4119 hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
4125 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
4126 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
4129 * Use txeof task, so that any pending oactive can be
4132 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
4137 hn_resume_mgmt(struct hn_softc *sc)
4140 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
4143 * Kick off network change detection, if it was pending.
4144 * If no network change was pending, start link status
4145 * checks, which is more lightweight than network change
4148 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
4149 hn_change_network(sc);
4151 hn_update_link_status(sc);
4155 hn_resume(struct hn_softc *sc)
4158 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING)
4164 hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen)
4166 const struct rndis_status_msg *msg;
4169 if (dlen < sizeof(*msg)) {
4170 if_printf(sc->hn_ifp, "invalid RNDIS status\n");
4175 switch (msg->rm_status) {
4176 case RNDIS_STATUS_MEDIA_CONNECT:
4177 case RNDIS_STATUS_MEDIA_DISCONNECT:
4178 hn_update_link_status(sc);
4181 case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG:
4182 /* Not really useful; ignore. */
4185 case RNDIS_STATUS_NETWORK_CHANGE:
4186 ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset);
4187 if (dlen < ofs + msg->rm_stbuflen ||
4188 msg->rm_stbuflen < sizeof(uint32_t)) {
4189 if_printf(sc->hn_ifp, "network changed\n");
4193 memcpy(&change, ((const uint8_t *)msg) + ofs,
4195 if_printf(sc->hn_ifp, "network changed, change %u\n",
4198 hn_change_network(sc);
4202 if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n",
4209 hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info)
4211 const struct rndis_pktinfo *pi = info_data;
4214 while (info_dlen != 0) {
4218 if (__predict_false(info_dlen < sizeof(*pi)))
4220 if (__predict_false(info_dlen < pi->rm_size))
4222 info_dlen -= pi->rm_size;
4224 if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK))
4226 if (__predict_false(pi->rm_size < pi->rm_pktinfooffset))
4228 dlen = pi->rm_size - pi->rm_pktinfooffset;
4231 switch (pi->rm_type) {
4232 case NDIS_PKTINFO_TYPE_VLAN:
4233 if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE))
4235 info->vlan_info = *((const uint32_t *)data);
4236 mask |= HN_RXINFO_VLAN;
4239 case NDIS_PKTINFO_TYPE_CSUM:
4240 if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE))
4242 info->csum_info = *((const uint32_t *)data);
4243 mask |= HN_RXINFO_CSUM;
4246 case HN_NDIS_PKTINFO_TYPE_HASHVAL:
4247 if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE))
4249 info->hash_value = *((const uint32_t *)data);
4250 mask |= HN_RXINFO_HASHVAL;
4253 case HN_NDIS_PKTINFO_TYPE_HASHINF:
4254 if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE))
4256 info->hash_info = *((const uint32_t *)data);
4257 mask |= HN_RXINFO_HASHINF;
4264 if (mask == HN_RXINFO_ALL) {
4265 /* All found; done */
4269 pi = (const struct rndis_pktinfo *)
4270 ((const uint8_t *)pi + pi->rm_size);
4275 * - If there is no hash value, invalidate the hash info.
4277 if ((mask & HN_RXINFO_HASHVAL) == 0)
4278 info->hash_info = HN_NDIS_HASH_INFO_INVALID;
4282 static __inline bool
4283 hn_rndis_check_overlap(int off, int len, int check_off, int check_len)
4286 if (off < check_off) {
4287 if (__predict_true(off + len <= check_off))
4289 } else if (off > check_off) {
4290 if (__predict_true(check_off + check_len <= off))
4297 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen)
4299 const struct rndis_packet_msg *pkt;
4300 struct hn_rxinfo info;
4301 int data_off, pktinfo_off, data_len, pktinfo_len;
4306 if (__predict_false(dlen < sizeof(*pkt))) {
4307 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n");
4312 if (__predict_false(dlen < pkt->rm_len)) {
4313 if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, "
4314 "dlen %d, msglen %u\n", dlen, pkt->rm_len);
4317 if (__predict_false(pkt->rm_len <
4318 pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) {
4319 if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, "
4320 "msglen %u, data %u, oob %u, pktinfo %u\n",
4321 pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen,
4322 pkt->rm_pktinfolen);
4325 if (__predict_false(pkt->rm_datalen == 0)) {
4326 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n");
4333 #define IS_OFFSET_INVALID(ofs) \
4334 ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \
4335 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK))
4337 /* XXX Hyper-V does not meet data offset alignment requirement */
4338 if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) {
4339 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
4340 "data offset %u\n", pkt->rm_dataoffset);
4343 if (__predict_false(pkt->rm_oobdataoffset > 0 &&
4344 IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) {
4345 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
4346 "oob offset %u\n", pkt->rm_oobdataoffset);
4349 if (__predict_true(pkt->rm_pktinfooffset > 0) &&
4350 __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) {
4351 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
4352 "pktinfo offset %u\n", pkt->rm_pktinfooffset);
4356 #undef IS_OFFSET_INVALID
4358 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset);
4359 data_len = pkt->rm_datalen;
4360 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset);
4361 pktinfo_len = pkt->rm_pktinfolen;
4364 * Check OOB coverage.
4366 if (__predict_false(pkt->rm_oobdatalen != 0)) {
4367 int oob_off, oob_len;
4369 if_printf(rxr->hn_ifp, "got oobdata\n");
4370 oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset);
4371 oob_len = pkt->rm_oobdatalen;
4373 if (__predict_false(oob_off + oob_len > pkt->rm_len)) {
4374 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
4375 "oob overflow, msglen %u, oob abs %d len %d\n",
4376 pkt->rm_len, oob_off, oob_len);
4381 * Check against data.
4383 if (hn_rndis_check_overlap(oob_off, oob_len,
4384 data_off, data_len)) {
4385 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
4386 "oob overlaps data, oob abs %d len %d, "
4387 "data abs %d len %d\n",
4388 oob_off, oob_len, data_off, data_len);
4393 * Check against pktinfo.
4395 if (pktinfo_len != 0 &&
4396 hn_rndis_check_overlap(oob_off, oob_len,
4397 pktinfo_off, pktinfo_len)) {
4398 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
4399 "oob overlaps pktinfo, oob abs %d len %d, "
4400 "pktinfo abs %d len %d\n",
4401 oob_off, oob_len, pktinfo_off, pktinfo_len);
4407 * Check per-packet-info coverage and find useful per-packet-info.
4409 info.vlan_info = HN_NDIS_VLAN_INFO_INVALID;
4410 info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID;
4411 info.hash_info = HN_NDIS_HASH_INFO_INVALID;
4412 if (__predict_true(pktinfo_len != 0)) {
4416 if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) {
4417 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
4418 "pktinfo overflow, msglen %u, "
4419 "pktinfo abs %d len %d\n",
4420 pkt->rm_len, pktinfo_off, pktinfo_len);
4425 * Check packet info coverage.
4427 overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len,
4428 data_off, data_len);
4429 if (__predict_false(overlap)) {
4430 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
4431 "pktinfo overlap data, pktinfo abs %d len %d, "
4432 "data abs %d len %d\n",
4433 pktinfo_off, pktinfo_len, data_off, data_len);
4438 * Find useful per-packet-info.
4440 error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off,
4441 pktinfo_len, &info);
4442 if (__predict_false(error)) {
4443 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg "
4449 if (__predict_false(data_off + data_len > pkt->rm_len)) {
4450 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
4451 "data overflow, msglen %u, data abs %d len %d\n",
4452 pkt->rm_len, data_off, data_len);
4455 hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info);
4458 static __inline void
4459 hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen)
4461 const struct rndis_msghdr *hdr;
4463 if (__predict_false(dlen < sizeof(*hdr))) {
4464 if_printf(rxr->hn_ifp, "invalid RNDIS msg\n");
4469 if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) {
4470 /* Hot data path. */
4471 hn_rndis_rx_data(rxr, data, dlen);
4476 if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG)
4477 hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen);
4479 hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen);
4483 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt)
4485 const struct hn_nvs_hdr *hdr;
4487 if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) {
4488 if_printf(sc->hn_ifp, "invalid nvs notify\n");
4491 hdr = VMBUS_CHANPKT_CONST_DATA(pkt);
4493 if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) {
4494 /* Useless; ignore */
4497 if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type);
4501 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan,
4502 const struct vmbus_chanpkt_hdr *pkt)
4504 struct hn_nvs_sendctx *sndc;
4506 sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid;
4507 sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt),
4508 VMBUS_CHANPKT_DATALEN(pkt));
4511 * 'sndc' CAN NOT be accessed anymore, since it can be freed by
4517 hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
4518 const struct vmbus_chanpkt_hdr *pkthdr)
4520 const struct vmbus_chanpkt_rxbuf *pkt;
4521 const struct hn_nvs_hdr *nvs_hdr;
4524 if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) {
4525 if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n");
4528 nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr);
4530 /* Make sure that this is a RNDIS message. */
4531 if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) {
4532 if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n",
4537 hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen);
4538 if (__predict_false(hlen < sizeof(*pkt))) {
4539 if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n");
4542 pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr;
4544 if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) {
4545 if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n",
4550 count = pkt->cp_rxbuf_cnt;
4551 if (__predict_false(hlen <
4552 __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) {
4553 if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count);
4557 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
4558 for (i = 0; i < count; ++i) {
4561 ofs = pkt->cp_rxbuf[i].rb_ofs;
4562 len = pkt->cp_rxbuf[i].rb_len;
4563 if (__predict_false(ofs + len > HN_RXBUF_SIZE)) {
4564 if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, "
4565 "ofs %d, len %d\n", i, ofs, len);
4568 hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len);
4572 * Ack the consumed RXBUF associated w/ this channel packet,
4573 * so that this RXBUF can be recycled by the hypervisor.
4575 hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid);
4579 hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
4582 struct hn_nvs_rndis_ack ack;
4585 ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK;
4586 ack.nvs_status = HN_NVS_STATUS_OK;
4590 error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP,
4591 VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid);
4592 if (__predict_false(error == EAGAIN)) {
4595 * This should _not_ happen in real world, since the
4596 * consumption of the TX bufring from the TX path is
4599 if (rxr->hn_ack_failed == 0)
4600 if_printf(rxr->hn_ifp, "RXBUF ack retry\n");
4601 rxr->hn_ack_failed++;
4608 if_printf(rxr->hn_ifp, "RXBUF ack failed\n");
4613 hn_chan_callback(struct vmbus_channel *chan, void *xrxr)
4615 struct hn_rx_ring *rxr = xrxr;
4616 struct hn_softc *sc = rxr->hn_ifp->if_softc;
4619 struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf;
4622 pktlen = rxr->hn_pktbuf_len;
4623 error = vmbus_chan_recv_pkt(chan, pkt, &pktlen);
4624 if (__predict_false(error == ENOBUFS)) {
4629 * Expand channel packet buffer.
4632 * Use M_WAITOK here, since allocation failure
4635 nlen = rxr->hn_pktbuf_len * 2;
4636 while (nlen < pktlen)
4638 nbuf = malloc(nlen, M_DEVBUF, M_WAITOK);
4640 if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n",
4641 rxr->hn_pktbuf_len, nlen);
4643 free(rxr->hn_pktbuf, M_DEVBUF);
4644 rxr->hn_pktbuf = nbuf;
4645 rxr->hn_pktbuf_len = nlen;
4648 } else if (__predict_false(error == EAGAIN)) {
4649 /* No more channel packets; done! */
4652 KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error));
4654 switch (pkt->cph_type) {
4655 case VMBUS_CHANPKT_TYPE_COMP:
4656 hn_nvs_handle_comp(sc, chan, pkt);
4659 case VMBUS_CHANPKT_TYPE_RXBUF:
4660 hn_nvs_handle_rxbuf(rxr, chan, pkt);
4663 case VMBUS_CHANPKT_TYPE_INBAND:
4664 hn_nvs_handle_notify(sc, pkt);
4668 if_printf(rxr->hn_ifp, "unknown chan pkt %u\n",
4673 hn_chan_rollup(rxr, rxr->hn_txr);
4677 hn_tx_taskq_create(void *arg __unused)
4680 if (vm_guest != VM_GUEST_HV)
4683 if (!hn_share_tx_taskq)
4686 hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK,
4687 taskqueue_thread_enqueue, &hn_tx_taskq);
4688 taskqueue_start_threads(&hn_tx_taskq, 1, PI_NET, "hn tx");
4689 if (hn_bind_tx_taskq >= 0) {
4690 int cpu = hn_bind_tx_taskq;
4691 struct task cpuset_task;
4694 if (cpu > mp_ncpus - 1)
4696 CPU_SETOF(cpu, &cpu_set);
4697 TASK_INIT(&cpuset_task, 0, hn_cpuset_setthread_task, &cpu_set);
4698 taskqueue_enqueue(hn_tx_taskq, &cpuset_task);
4699 taskqueue_drain(hn_tx_taskq, &cpuset_task);
4702 SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_SECOND,
4703 hn_tx_taskq_create, NULL);
4706 hn_tx_taskq_destroy(void *arg __unused)
4709 if (hn_tx_taskq != NULL)
4710 taskqueue_free(hn_tx_taskq);
4712 SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_SECOND,
4713 hn_tx_taskq_destroy, NULL);