2 * Copyright (c) 2010-2012 Citrix Inc.
3 * Copyright (c) 2009-2012,2016 Microsoft Corp.
4 * Copyright (c) 2012 NetApp Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * Copyright (c) 2004-2006 Kip Macy
31 * All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 #include <sys/cdefs.h>
56 __FBSDID("$FreeBSD$");
58 #include "opt_inet6.h"
62 #include <sys/param.h>
64 #include <sys/kernel.h>
65 #include <sys/limits.h>
66 #include <sys/malloc.h>
68 #include <sys/module.h>
70 #include <sys/queue.h>
73 #include <sys/socket.h>
74 #include <sys/sockio.h>
76 #include <sys/sysctl.h>
77 #include <sys/systm.h>
78 #include <sys/taskqueue.h>
79 #include <sys/buf_ring.h>
80 #include <sys/eventhandler.h>
82 #include <machine/atomic.h>
83 #include <machine/in_cksum.h>
86 #include <net/ethernet.h>
88 #include <net/if_arp.h>
89 #include <net/if_dl.h>
90 #include <net/if_media.h>
91 #include <net/if_types.h>
92 #include <net/if_var.h>
93 #include <net/if_vlan_var.h>
94 #include <net/rndis.h>
96 #include <netinet/in_systm.h>
97 #include <netinet/in.h>
98 #include <netinet/ip.h>
99 #include <netinet/ip6.h>
100 #include <netinet/tcp.h>
101 #include <netinet/tcp_lro.h>
102 #include <netinet/udp.h>
104 #include <dev/hyperv/include/hyperv.h>
105 #include <dev/hyperv/include/hyperv_busdma.h>
106 #include <dev/hyperv/include/vmbus.h>
107 #include <dev/hyperv/include/vmbus_xact.h>
109 #include <dev/hyperv/netvsc/ndis.h>
110 #include <dev/hyperv/netvsc/if_hnreg.h>
111 #include <dev/hyperv/netvsc/if_hnvar.h>
112 #include <dev/hyperv/netvsc/hn_nvs.h>
113 #include <dev/hyperv/netvsc/hn_rndis.h>
115 #include "vmbus_if.h"
117 #define HN_IFSTART_SUPPORT
119 #define HN_RING_CNT_DEF_MAX 8
121 /* YYY should get it from the underlying channel */
122 #define HN_TX_DESC_CNT 512
124 #define HN_RNDIS_PKT_LEN \
125 (sizeof(struct rndis_packet_msg) + \
126 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \
127 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \
128 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \
129 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE))
130 #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE
131 #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE
133 #define HN_TX_DATA_BOUNDARY PAGE_SIZE
134 #define HN_TX_DATA_MAXSIZE IP_MAXPACKET
135 #define HN_TX_DATA_SEGSIZE PAGE_SIZE
136 /* -1 for RNDIS packet message */
137 #define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1)
139 #define HN_DIRECT_TX_SIZE_DEF 128
141 #define HN_EARLY_TXEOF_THRESH 8
143 #define HN_PKTBUF_LEN_DEF (16 * 1024)
145 #define HN_LROENT_CNT_DEF 128
147 #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU)
148 #define HN_LRO_LENLIM_DEF (25 * ETHERMTU)
149 /* YYY 2*MTU is a bit rough, but should be good enough. */
150 #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu)
152 #define HN_LRO_ACKCNT_DEF 1
154 #define HN_LOCK_INIT(sc) \
155 sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev))
156 #define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock)
157 #define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED)
158 #define HN_LOCK(sc) \
160 while (sx_try_xlock(&(sc)->hn_lock) == 0) \
163 #define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock)
165 #define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP)
166 #define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP)
167 #define HN_CSUM_IP_HWASSIST(sc) \
168 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK)
169 #define HN_CSUM_IP6_HWASSIST(sc) \
170 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK)
172 #define HN_PKTSIZE_MIN(align) \
173 roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \
174 HN_RNDIS_PKT_LEN, (align))
175 #define HN_PKTSIZE(m, align) \
176 roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align))
178 #define HN_RING_IDX2CPU(sc, idx) (((sc)->hn_cpu + (idx)) % mp_ncpus)
181 #ifndef HN_USE_TXDESC_BUFRING
182 SLIST_ENTRY(hn_txdesc) link;
184 STAILQ_ENTRY(hn_txdesc) agg_link;
186 /* Aggregated txdescs, in sending order. */
187 STAILQ_HEAD(, hn_txdesc) agg_list;
189 /* The oldest packet, if transmission aggregation happens. */
191 struct hn_tx_ring *txr;
193 uint32_t flags; /* HN_TXD_FLAG_ */
194 struct hn_nvs_sendctx send_ctx;
198 bus_dmamap_t data_dmap;
200 bus_addr_t rndis_pkt_paddr;
201 struct rndis_packet_msg *rndis_pkt;
202 bus_dmamap_t rndis_pkt_dmap;
205 #define HN_TXD_FLAG_ONLIST 0x0001
206 #define HN_TXD_FLAG_DMAMAP 0x0002
207 #define HN_TXD_FLAG_ONAGG 0x0004
216 struct hn_update_vf {
217 struct hn_rx_ring *rxr;
221 #define HN_RXINFO_VLAN 0x0001
222 #define HN_RXINFO_CSUM 0x0002
223 #define HN_RXINFO_HASHINF 0x0004
224 #define HN_RXINFO_HASHVAL 0x0008
225 #define HN_RXINFO_ALL \
228 HN_RXINFO_HASHINF | \
231 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff
232 #define HN_NDIS_RXCSUM_INFO_INVALID 0
233 #define HN_NDIS_HASH_INFO_INVALID 0
235 static int hn_probe(device_t);
236 static int hn_attach(device_t);
237 static int hn_detach(device_t);
238 static int hn_shutdown(device_t);
239 static void hn_chan_callback(struct vmbus_channel *,
242 static void hn_init(void *);
243 static int hn_ioctl(struct ifnet *, u_long, caddr_t);
244 #ifdef HN_IFSTART_SUPPORT
245 static void hn_start(struct ifnet *);
247 static int hn_transmit(struct ifnet *, struct mbuf *);
248 static void hn_xmit_qflush(struct ifnet *);
249 static int hn_ifmedia_upd(struct ifnet *);
250 static void hn_ifmedia_sts(struct ifnet *,
251 struct ifmediareq *);
253 static int hn_rndis_rxinfo(const void *, int,
255 static void hn_rndis_rx_data(struct hn_rx_ring *,
257 static void hn_rndis_rx_status(struct hn_softc *,
259 static void hn_rndis_init_fixat(struct hn_softc *, int);
261 static void hn_nvs_handle_notify(struct hn_softc *,
262 const struct vmbus_chanpkt_hdr *);
263 static void hn_nvs_handle_comp(struct hn_softc *,
264 struct vmbus_channel *,
265 const struct vmbus_chanpkt_hdr *);
266 static void hn_nvs_handle_rxbuf(struct hn_rx_ring *,
267 struct vmbus_channel *,
268 const struct vmbus_chanpkt_hdr *);
269 static void hn_nvs_ack_rxbuf(struct hn_rx_ring *,
270 struct vmbus_channel *, uint64_t);
272 #if __FreeBSD_version >= 1100099
273 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS);
274 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS);
276 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS);
277 static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS);
278 #if __FreeBSD_version < 1100095
279 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS);
281 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS);
283 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
284 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
285 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS);
286 static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS);
287 static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS);
288 static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS);
289 static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS);
290 static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS);
291 static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS);
292 static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS);
293 static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS);
294 static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS);
295 static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS);
296 static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS);
297 static int hn_polling_sysctl(SYSCTL_HANDLER_ARGS);
298 static int hn_vf_sysctl(SYSCTL_HANDLER_ARGS);
300 static void hn_stop(struct hn_softc *, bool);
301 static void hn_init_locked(struct hn_softc *);
302 static int hn_chan_attach(struct hn_softc *,
303 struct vmbus_channel *);
304 static void hn_chan_detach(struct hn_softc *,
305 struct vmbus_channel *);
306 static int hn_attach_subchans(struct hn_softc *);
307 static void hn_detach_allchans(struct hn_softc *);
308 static void hn_chan_rollup(struct hn_rx_ring *,
309 struct hn_tx_ring *);
310 static void hn_set_ring_inuse(struct hn_softc *, int);
311 static int hn_synth_attach(struct hn_softc *, int);
312 static void hn_synth_detach(struct hn_softc *);
313 static int hn_synth_alloc_subchans(struct hn_softc *,
315 static bool hn_synth_attachable(const struct hn_softc *);
316 static void hn_suspend(struct hn_softc *);
317 static void hn_suspend_data(struct hn_softc *);
318 static void hn_suspend_mgmt(struct hn_softc *);
319 static void hn_resume(struct hn_softc *);
320 static void hn_resume_data(struct hn_softc *);
321 static void hn_resume_mgmt(struct hn_softc *);
322 static void hn_suspend_mgmt_taskfunc(void *, int);
323 static void hn_chan_drain(struct hn_softc *,
324 struct vmbus_channel *);
325 static void hn_disable_rx(struct hn_softc *);
326 static void hn_drain_rxtx(struct hn_softc *, int);
327 static void hn_polling(struct hn_softc *, u_int);
328 static void hn_chan_polling(struct vmbus_channel *, u_int);
330 static void hn_update_link_status(struct hn_softc *);
331 static void hn_change_network(struct hn_softc *);
332 static void hn_link_taskfunc(void *, int);
333 static void hn_netchg_init_taskfunc(void *, int);
334 static void hn_netchg_status_taskfunc(void *, int);
335 static void hn_link_status(struct hn_softc *);
337 static int hn_create_rx_data(struct hn_softc *, int);
338 static void hn_destroy_rx_data(struct hn_softc *);
339 static int hn_check_iplen(const struct mbuf *, int);
340 static int hn_set_rxfilter(struct hn_softc *, uint32_t);
341 static int hn_rxfilter_config(struct hn_softc *);
342 static int hn_rss_reconfig(struct hn_softc *);
343 static void hn_rss_ind_fixup(struct hn_softc *);
344 static int hn_rxpkt(struct hn_rx_ring *, const void *,
345 int, const struct hn_rxinfo *);
347 static int hn_tx_ring_create(struct hn_softc *, int);
348 static void hn_tx_ring_destroy(struct hn_tx_ring *);
349 static int hn_create_tx_data(struct hn_softc *, int);
350 static void hn_fixup_tx_data(struct hn_softc *);
351 static void hn_destroy_tx_data(struct hn_softc *);
352 static void hn_txdesc_dmamap_destroy(struct hn_txdesc *);
353 static void hn_txdesc_gc(struct hn_tx_ring *,
355 static int hn_encap(struct ifnet *, struct hn_tx_ring *,
356 struct hn_txdesc *, struct mbuf **);
357 static int hn_txpkt(struct ifnet *, struct hn_tx_ring *,
359 static void hn_set_chim_size(struct hn_softc *, int);
360 static void hn_set_tso_maxsize(struct hn_softc *, int, int);
361 static bool hn_tx_ring_pending(struct hn_tx_ring *);
362 static void hn_tx_ring_qflush(struct hn_tx_ring *);
363 static void hn_resume_tx(struct hn_softc *, int);
364 static void hn_set_txagg(struct hn_softc *);
365 static void *hn_try_txagg(struct ifnet *,
366 struct hn_tx_ring *, struct hn_txdesc *,
368 static int hn_get_txswq_depth(const struct hn_tx_ring *);
369 static void hn_txpkt_done(struct hn_nvs_sendctx *,
370 struct hn_softc *, struct vmbus_channel *,
372 static int hn_txpkt_sglist(struct hn_tx_ring *,
374 static int hn_txpkt_chim(struct hn_tx_ring *,
376 static int hn_xmit(struct hn_tx_ring *, int);
377 static void hn_xmit_taskfunc(void *, int);
378 static void hn_xmit_txeof(struct hn_tx_ring *);
379 static void hn_xmit_txeof_taskfunc(void *, int);
380 #ifdef HN_IFSTART_SUPPORT
381 static int hn_start_locked(struct hn_tx_ring *, int);
382 static void hn_start_taskfunc(void *, int);
383 static void hn_start_txeof(struct hn_tx_ring *);
384 static void hn_start_txeof_taskfunc(void *, int);
387 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
388 "Hyper-V network interface");
390 /* Trust tcp segements verification on host side. */
391 static int hn_trust_hosttcp = 1;
392 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN,
393 &hn_trust_hosttcp, 0,
394 "Trust tcp segement verification on host side, "
395 "when csum info is missing (global setting)");
397 /* Trust udp datagrams verification on host side. */
398 static int hn_trust_hostudp = 1;
399 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN,
400 &hn_trust_hostudp, 0,
401 "Trust udp datagram verification on host side, "
402 "when csum info is missing (global setting)");
404 /* Trust ip packets verification on host side. */
405 static int hn_trust_hostip = 1;
406 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN,
408 "Trust ip packet verification on host side, "
409 "when csum info is missing (global setting)");
411 /* Limit TSO burst size */
412 static int hn_tso_maxlen = IP_MAXPACKET;
413 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
414 &hn_tso_maxlen, 0, "TSO burst limit");
416 /* Limit chimney send size */
417 static int hn_tx_chimney_size = 0;
418 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN,
419 &hn_tx_chimney_size, 0, "Chimney send packet size limit");
421 /* Limit the size of packet for direct transmission */
422 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF;
423 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN,
424 &hn_direct_tx_size, 0, "Size of the packet for direct transmission");
426 /* # of LRO entries per RX ring */
427 #if defined(INET) || defined(INET6)
428 #if __FreeBSD_version >= 1100095
429 static int hn_lro_entry_count = HN_LROENT_CNT_DEF;
430 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
431 &hn_lro_entry_count, 0, "LRO entry count");
435 static int hn_tx_taskq_cnt = 1;
436 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_cnt, CTLFLAG_RDTUN,
437 &hn_tx_taskq_cnt, 0, "# of TX taskqueues");
439 #define HN_TX_TASKQ_M_INDEP 0
440 #define HN_TX_TASKQ_M_GLOBAL 1
441 #define HN_TX_TASKQ_M_EVTTQ 2
443 static int hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP;
444 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_mode, CTLFLAG_RDTUN,
445 &hn_tx_taskq_mode, 0, "TX taskqueue modes: "
446 "0 - independent, 1 - share global tx taskqs, 2 - share event taskqs");
448 #ifndef HN_USE_TXDESC_BUFRING
449 static int hn_use_txdesc_bufring = 0;
451 static int hn_use_txdesc_bufring = 1;
453 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD,
454 &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors");
456 #ifdef HN_IFSTART_SUPPORT
457 /* Use ifnet.if_start instead of ifnet.if_transmit */
458 static int hn_use_if_start = 0;
459 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN,
460 &hn_use_if_start, 0, "Use if_start TX method");
463 /* # of channels to use */
464 static int hn_chan_cnt = 0;
465 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
467 "# of channels to use; each channel has one RX ring and one TX ring");
469 /* # of transmit rings to use */
470 static int hn_tx_ring_cnt = 0;
471 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN,
472 &hn_tx_ring_cnt, 0, "# of TX rings to use");
474 /* Software TX ring deptch */
475 static int hn_tx_swq_depth = 0;
476 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN,
477 &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING");
479 /* Enable sorted LRO, and the depth of the per-channel mbuf queue */
480 #if __FreeBSD_version >= 1100095
481 static u_int hn_lro_mbufq_depth = 0;
482 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
483 &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue");
486 /* Packet transmission aggregation size limit */
487 static int hn_tx_agg_size = -1;
488 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN,
489 &hn_tx_agg_size, 0, "Packet transmission aggregation size limit");
491 /* Packet transmission aggregation count limit */
492 static int hn_tx_agg_pkts = -1;
493 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN,
494 &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit");
496 static u_int hn_cpu_index; /* next CPU for channel */
497 static struct taskqueue **hn_tx_taskque;/* shared TX taskqueues */
500 hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = {
501 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
502 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
503 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
504 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
505 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
508 static device_method_t hn_methods[] = {
509 /* Device interface */
510 DEVMETHOD(device_probe, hn_probe),
511 DEVMETHOD(device_attach, hn_attach),
512 DEVMETHOD(device_detach, hn_detach),
513 DEVMETHOD(device_shutdown, hn_shutdown),
517 static driver_t hn_driver = {
520 sizeof(struct hn_softc)
523 static devclass_t hn_devclass;
525 DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0);
526 MODULE_VERSION(hn, 1);
527 MODULE_DEPEND(hn, vmbus, 1, 1, 1);
529 #if __FreeBSD_version >= 1100099
531 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim)
535 for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
536 sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim;
541 hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd)
544 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
545 txd->chim_size == 0, ("invalid rndis sglist txd"));
546 return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA,
547 &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt));
551 hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd)
553 struct hn_nvs_rndis rndis;
555 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID &&
556 txd->chim_size > 0, ("invalid rndis chim txd"));
558 rndis.nvs_type = HN_NVS_TYPE_RNDIS;
559 rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA;
560 rndis.nvs_chim_idx = txd->chim_index;
561 rndis.nvs_chim_sz = txd->chim_size;
563 return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC,
564 &rndis, sizeof(rndis), &txd->send_ctx));
567 static __inline uint32_t
568 hn_chim_alloc(struct hn_softc *sc)
570 int i, bmap_cnt = sc->hn_chim_bmap_cnt;
571 u_long *bmap = sc->hn_chim_bmap;
572 uint32_t ret = HN_NVS_CHIM_IDX_INVALID;
574 for (i = 0; i < bmap_cnt; ++i) {
577 idx = ffsl(~bmap[i]);
581 --idx; /* ffsl is 1-based */
582 KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt,
583 ("invalid i %d and idx %d", i, idx));
585 if (atomic_testandset_long(&bmap[i], idx))
588 ret = i * LONG_BIT + idx;
595 hn_chim_free(struct hn_softc *sc, uint32_t chim_idx)
600 idx = chim_idx / LONG_BIT;
601 KASSERT(idx < sc->hn_chim_bmap_cnt,
602 ("invalid chimney index 0x%x", chim_idx));
604 mask = 1UL << (chim_idx % LONG_BIT);
605 KASSERT(sc->hn_chim_bmap[idx] & mask,
606 ("index bitmap 0x%lx, chimney index %u, "
607 "bitmap idx %d, bitmask 0x%lx",
608 sc->hn_chim_bmap[idx], chim_idx, idx, mask));
610 atomic_clear_long(&sc->hn_chim_bmap[idx], mask);
613 #if defined(INET6) || defined(INET)
615 #define PULLUP_HDR(m, len) \
617 if (__predict_false((m)->m_len < (len))) { \
618 (m) = m_pullup((m), (len)); \
625 * NOTE: If this function failed, the m_head would be freed.
627 static __inline struct mbuf *
628 hn_tso_fixup(struct mbuf *m_head)
630 struct ether_vlan_header *evl;
634 KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable"));
636 PULLUP_HDR(m_head, sizeof(*evl));
637 evl = mtod(m_head, struct ether_vlan_header *);
638 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN))
639 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
641 ehlen = ETHER_HDR_LEN;
644 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
648 PULLUP_HDR(m_head, ehlen + sizeof(*ip));
649 ip = mtodo(m_head, ehlen);
650 iphlen = ip->ip_hl << 2;
652 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th));
653 th = mtodo(m_head, ehlen + iphlen);
657 th->th_sum = in_pseudo(ip->ip_src.s_addr,
658 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
661 #if defined(INET6) && defined(INET)
668 PULLUP_HDR(m_head, ehlen + sizeof(*ip6));
669 ip6 = mtodo(m_head, ehlen);
670 if (ip6->ip6_nxt != IPPROTO_TCP) {
675 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th));
676 th = mtodo(m_head, ehlen + sizeof(*ip6));
679 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
687 * NOTE: If this function failed, the m_head would be freed.
689 static __inline struct mbuf *
690 hn_check_tcpsyn(struct mbuf *m_head, int *tcpsyn)
692 const struct ether_vlan_header *evl;
693 const struct tcphdr *th;
698 PULLUP_HDR(m_head, sizeof(*evl));
699 evl = mtod(m_head, const struct ether_vlan_header *);
700 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN))
701 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
703 ehlen = ETHER_HDR_LEN;
706 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TCP) {
710 PULLUP_HDR(m_head, ehlen + sizeof(*ip));
711 ip = mtodo(m_head, ehlen);
712 iphlen = ip->ip_hl << 2;
714 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th));
715 th = mtodo(m_head, ehlen + iphlen);
716 if (th->th_flags & TH_SYN)
720 #if defined(INET6) && defined(INET)
725 const struct ip6_hdr *ip6;
727 PULLUP_HDR(m_head, ehlen + sizeof(*ip6));
728 ip6 = mtodo(m_head, ehlen);
729 if (ip6->ip6_nxt != IPPROTO_TCP)
732 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th));
733 th = mtodo(m_head, ehlen + sizeof(*ip6));
734 if (th->th_flags & TH_SYN)
743 #endif /* INET6 || INET */
746 hn_set_rxfilter(struct hn_softc *sc, uint32_t filter)
752 if (sc->hn_rx_filter != filter) {
753 error = hn_rndis_set_rxfilter(sc, filter);
755 sc->hn_rx_filter = filter;
761 hn_rxfilter_config(struct hn_softc *sc)
763 struct ifnet *ifp = sc->hn_ifp;
768 if ((ifp->if_flags & IFF_PROMISC) ||
769 (sc->hn_flags & HN_FLAG_VF)) {
770 filter = NDIS_PACKET_TYPE_PROMISCUOUS;
772 filter = NDIS_PACKET_TYPE_DIRECTED;
773 if (ifp->if_flags & IFF_BROADCAST)
774 filter |= NDIS_PACKET_TYPE_BROADCAST;
775 /* TODO: support multicast list */
776 if ((ifp->if_flags & IFF_ALLMULTI) ||
777 !TAILQ_EMPTY(&ifp->if_multiaddrs))
778 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
780 return (hn_set_rxfilter(sc, filter));
784 hn_set_txagg(struct hn_softc *sc)
790 * Setup aggregation size.
792 if (sc->hn_agg_size < 0)
795 size = sc->hn_agg_size;
797 if (sc->hn_rndis_agg_size < size)
798 size = sc->hn_rndis_agg_size;
800 /* NOTE: We only aggregate packets using chimney sending buffers. */
801 if (size > (uint32_t)sc->hn_chim_szmax)
802 size = sc->hn_chim_szmax;
804 if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) {
811 /* NOTE: Type of the per TX ring setting is 'int'. */
816 * Setup aggregation packet count.
818 if (sc->hn_agg_pkts < 0)
821 pkts = sc->hn_agg_pkts;
823 if (sc->hn_rndis_agg_pkts < pkts)
824 pkts = sc->hn_rndis_agg_pkts;
833 /* NOTE: Type of the per TX ring setting is 'short'. */
838 /* NOTE: Type of the per TX ring setting is 'short'. */
839 if (sc->hn_rndis_agg_align > SHRT_MAX) {
846 if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n",
847 size, pkts, sc->hn_rndis_agg_align);
850 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
851 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
853 mtx_lock(&txr->hn_tx_lock);
854 txr->hn_agg_szmax = size;
855 txr->hn_agg_pktmax = pkts;
856 txr->hn_agg_align = sc->hn_rndis_agg_align;
857 mtx_unlock(&txr->hn_tx_lock);
862 hn_get_txswq_depth(const struct hn_tx_ring *txr)
865 KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet"));
866 if (hn_tx_swq_depth < txr->hn_txdesc_cnt)
867 return txr->hn_txdesc_cnt;
868 return hn_tx_swq_depth;
872 hn_rss_reconfig(struct hn_softc *sc)
878 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
885 * Direct reconfiguration by setting the UNCHG flags does
886 * _not_ work properly.
889 if_printf(sc->hn_ifp, "disable RSS\n");
890 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE);
892 if_printf(sc->hn_ifp, "RSS disable failed\n");
897 * Reenable the RSS w/ the updated RSS key or indirect
901 if_printf(sc->hn_ifp, "reconfig RSS\n");
902 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
904 if_printf(sc->hn_ifp, "RSS reconfig failed\n");
911 hn_rss_ind_fixup(struct hn_softc *sc)
913 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
916 nchan = sc->hn_rx_ring_inuse;
917 KASSERT(nchan > 1, ("invalid # of channels %d", nchan));
920 * Check indirect table to make sure that all channels in it
923 for (i = 0; i < NDIS_HASH_INDCNT; ++i) {
924 if (rss->rss_ind[i] >= nchan) {
925 if_printf(sc->hn_ifp,
926 "RSS indirect table %d fixup: %u -> %d\n",
927 i, rss->rss_ind[i], nchan - 1);
928 rss->rss_ind[i] = nchan - 1;
934 hn_ifmedia_upd(struct ifnet *ifp __unused)
941 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
943 struct hn_softc *sc = ifp->if_softc;
945 ifmr->ifm_status = IFM_AVALID;
946 ifmr->ifm_active = IFM_ETHER;
948 if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) {
949 ifmr->ifm_active |= IFM_NONE;
952 ifmr->ifm_status |= IFM_ACTIVE;
953 ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
957 hn_update_vf_task(void *arg, int pending __unused)
959 struct hn_update_vf *uv = arg;
961 uv->rxr->hn_vf = uv->vf;
965 hn_update_vf(struct hn_softc *sc, struct ifnet *vf)
967 struct hn_rx_ring *rxr;
968 struct hn_update_vf uv;
974 TASK_INIT(&task, 0, hn_update_vf_task, &uv);
976 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
977 rxr = &sc->hn_rx_ring[i];
979 if (i < sc->hn_rx_ring_inuse) {
982 vmbus_chan_run_task(rxr->hn_chan, &task);
990 hn_set_vf(struct hn_softc *sc, struct ifnet *ifp, bool vf)
992 struct ifnet *hn_ifp;
996 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
1004 if (ifp->if_alloctype != IFT_ETHER)
1007 /* Ignore lagg/vlan interfaces */
1008 if (strcmp(ifp->if_dname, "lagg") == 0 ||
1009 strcmp(ifp->if_dname, "vlan") == 0)
1012 if (bcmp(IF_LLADDR(ifp), IF_LLADDR(hn_ifp), ETHER_ADDR_LEN) != 0)
1015 /* Now we're sure 'ifp' is a real VF device. */
1017 if (sc->hn_flags & HN_FLAG_VF)
1020 sc->hn_flags |= HN_FLAG_VF;
1021 hn_rxfilter_config(sc);
1023 if (!(sc->hn_flags & HN_FLAG_VF))
1026 sc->hn_flags &= ~HN_FLAG_VF;
1027 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING)
1028 hn_rxfilter_config(sc);
1030 hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE);
1033 hn_nvs_set_datapath(sc,
1034 vf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTHETIC);
1036 hn_update_vf(sc, vf ? ifp : NULL);
1039 hn_suspend_mgmt(sc);
1040 sc->hn_link_flags &=
1041 ~(HN_LINK_FLAG_LINKUP | HN_LINK_FLAG_NETCHG);
1042 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN);
1047 devctl_notify("HYPERV_NIC_VF", if_name(hn_ifp),
1048 vf ? "VF_UP" : "VF_DOWN", NULL);
1051 if_printf(hn_ifp, "Data path is switched %s %s\n",
1052 vf ? "to" : "from", if_name(ifp));
1058 hn_ifnet_event(void *arg, struct ifnet *ifp, int event)
1060 if (event != IFNET_EVENT_UP && event != IFNET_EVENT_DOWN)
1063 hn_set_vf(arg, ifp, event == IFNET_EVENT_UP);
1067 hn_ifaddr_event(void *arg, struct ifnet *ifp)
1069 hn_set_vf(arg, ifp, ifp->if_flags & IFF_UP);
1072 /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */
1073 static const struct hyperv_guid g_net_vsc_device_type = {
1074 .hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
1075 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
1079 hn_probe(device_t dev)
1082 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev,
1083 &g_net_vsc_device_type) == 0) {
1084 device_set_desc(dev, "Hyper-V Network Interface");
1085 return BUS_PROBE_DEFAULT;
1091 hn_attach(device_t dev)
1093 struct hn_softc *sc = device_get_softc(dev);
1094 struct sysctl_oid_list *child;
1095 struct sysctl_ctx_list *ctx;
1096 uint8_t eaddr[ETHER_ADDR_LEN];
1097 struct ifnet *ifp = NULL;
1098 int error, ring_cnt, tx_ring_cnt;
1101 sc->hn_prichan = vmbus_get_channel(dev);
1105 * Initialize these tunables once.
1107 sc->hn_agg_size = hn_tx_agg_size;
1108 sc->hn_agg_pkts = hn_tx_agg_pkts;
1111 * Setup taskqueue for transmission.
1113 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_INDEP) {
1117 malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *),
1118 M_DEVBUF, M_WAITOK);
1119 for (i = 0; i < hn_tx_taskq_cnt; ++i) {
1120 sc->hn_tx_taskqs[i] = taskqueue_create("hn_tx",
1121 M_WAITOK, taskqueue_thread_enqueue,
1122 &sc->hn_tx_taskqs[i]);
1123 taskqueue_start_threads(&sc->hn_tx_taskqs[i], 1, PI_NET,
1124 "%s tx%d", device_get_nameunit(dev), i);
1126 } else if (hn_tx_taskq_mode == HN_TX_TASKQ_M_GLOBAL) {
1127 sc->hn_tx_taskqs = hn_tx_taskque;
1131 * Setup taskqueue for mangement tasks, e.g. link status.
1133 sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK,
1134 taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0);
1135 taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt",
1136 device_get_nameunit(dev));
1137 TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc);
1138 TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc);
1139 TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0,
1140 hn_netchg_status_taskfunc, sc);
1143 * Allocate ifnet and setup its name earlier, so that if_printf
1144 * can be used by functions, which will be called after
1147 ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER);
1149 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1152 * Initialize ifmedia earlier so that it can be unconditionally
1153 * destroyed, if error happened later on.
1155 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts);
1158 * Figure out the # of RX rings (ring_cnt) and the # of TX rings
1159 * to use (tx_ring_cnt).
1162 * The # of RX rings to use is same as the # of channels to use.
1164 ring_cnt = hn_chan_cnt;
1165 if (ring_cnt <= 0) {
1167 ring_cnt = mp_ncpus;
1168 if (ring_cnt > HN_RING_CNT_DEF_MAX)
1169 ring_cnt = HN_RING_CNT_DEF_MAX;
1170 } else if (ring_cnt > mp_ncpus) {
1171 ring_cnt = mp_ncpus;
1174 tx_ring_cnt = hn_tx_ring_cnt;
1175 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt)
1176 tx_ring_cnt = ring_cnt;
1177 #ifdef HN_IFSTART_SUPPORT
1178 if (hn_use_if_start) {
1179 /* ifnet.if_start only needs one TX ring. */
1185 * Set the leader CPU for channels.
1187 sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus;
1190 * Create enough TX/RX rings, even if only limited number of
1191 * channels can be allocated.
1193 error = hn_create_tx_data(sc, tx_ring_cnt);
1196 error = hn_create_rx_data(sc, ring_cnt);
1201 * Create transaction context for NVS and RNDIS transactions.
1203 sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev),
1204 HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0);
1205 if (sc->hn_xact == NULL) {
1211 * Install orphan handler for the revocation of this device's
1215 * The processing order is critical here:
1216 * Install the orphan handler, _before_ testing whether this
1217 * device's primary channel has been revoked or not.
1219 vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact);
1220 if (vmbus_chan_is_revoked(sc->hn_prichan)) {
1226 * Attach the synthetic parts, i.e. NVS and RNDIS.
1228 error = hn_synth_attach(sc, ETHERMTU);
1232 error = hn_rndis_get_eaddr(sc, eaddr);
1236 #if __FreeBSD_version >= 1100099
1237 if (sc->hn_rx_ring_inuse > 1) {
1239 * Reduce TCP segment aggregation limit for multiple
1240 * RX rings to increase ACK timeliness.
1242 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF);
1247 * Fixup TX stuffs after synthetic parts are attached.
1249 hn_fixup_tx_data(sc);
1251 ctx = device_get_sysctl_ctx(dev);
1252 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
1253 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD,
1254 &sc->hn_nvs_ver, 0, "NVS version");
1255 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version",
1256 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1257 hn_ndis_version_sysctl, "A", "NDIS version");
1258 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps",
1259 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1260 hn_caps_sysctl, "A", "capabilities");
1261 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist",
1262 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1263 hn_hwassist_sysctl, "A", "hwassist");
1264 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter",
1265 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1266 hn_rxfilter_sysctl, "A", "rxfilter");
1267 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash",
1268 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1269 hn_rss_hash_sysctl, "A", "RSS hash");
1270 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size",
1271 CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count");
1272 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key",
1273 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1274 hn_rss_key_sysctl, "IU", "RSS key");
1275 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind",
1276 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1277 hn_rss_ind_sysctl, "IU", "RSS indirect table");
1278 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size",
1279 CTLFLAG_RD, &sc->hn_rndis_agg_size, 0,
1280 "RNDIS offered packet transmission aggregation size limit");
1281 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts",
1282 CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0,
1283 "RNDIS offered packet transmission aggregation count limit");
1284 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align",
1285 CTLFLAG_RD, &sc->hn_rndis_agg_align, 0,
1286 "RNDIS packet transmission aggregation alignment");
1287 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size",
1288 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1289 hn_txagg_size_sysctl, "I",
1290 "Packet transmission aggregation size, 0 -- disable, -1 -- auto");
1291 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts",
1292 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1293 hn_txagg_pkts_sysctl, "I",
1294 "Packet transmission aggregation packets, "
1295 "0 -- disable, -1 -- auto");
1296 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "polling",
1297 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1298 hn_polling_sysctl, "I",
1299 "Polling frequency: [100,1000000], 0 disable polling");
1300 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf",
1301 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1302 hn_vf_sysctl, "A", "Virtual Function's name");
1305 * Setup the ifmedia, which has been initialized earlier.
1307 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1308 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO);
1309 /* XXX ifmedia_set really should do this for us */
1310 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media;
1313 * Setup the ifnet for this interface.
1317 ifp->if_baudrate = IF_Gbps(10);
1319 /* if_baudrate is 32bits on 32bit system. */
1320 ifp->if_baudrate = IF_Gbps(1);
1322 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1323 ifp->if_ioctl = hn_ioctl;
1324 ifp->if_init = hn_init;
1325 #ifdef HN_IFSTART_SUPPORT
1326 if (hn_use_if_start) {
1327 int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]);
1329 ifp->if_start = hn_start;
1330 IFQ_SET_MAXLEN(&ifp->if_snd, qdepth);
1331 ifp->if_snd.ifq_drv_maxlen = qdepth - 1;
1332 IFQ_SET_READY(&ifp->if_snd);
1336 ifp->if_transmit = hn_transmit;
1337 ifp->if_qflush = hn_xmit_qflush;
1340 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO;
1342 /* We can't diff IPv6 packets from IPv4 packets on RX path. */
1343 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6;
1345 if (sc->hn_caps & HN_CAP_VLAN) {
1346 /* XXX not sure about VLAN_MTU. */
1347 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
1350 ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist;
1351 if (ifp->if_hwassist & HN_CSUM_IP_MASK)
1352 ifp->if_capabilities |= IFCAP_TXCSUM;
1353 if (ifp->if_hwassist & HN_CSUM_IP6_MASK)
1354 ifp->if_capabilities |= IFCAP_TXCSUM_IPV6;
1355 if (sc->hn_caps & HN_CAP_TSO4) {
1356 ifp->if_capabilities |= IFCAP_TSO4;
1357 ifp->if_hwassist |= CSUM_IP_TSO;
1359 if (sc->hn_caps & HN_CAP_TSO6) {
1360 ifp->if_capabilities |= IFCAP_TSO6;
1361 ifp->if_hwassist |= CSUM_IP6_TSO;
1364 /* Enable all available capabilities by default. */
1365 ifp->if_capenable = ifp->if_capabilities;
1368 * Disable IPv6 TSO and TXCSUM by default, they still can
1369 * be enabled through SIOCSIFCAP.
1371 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6);
1372 ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO);
1374 if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) {
1375 hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU);
1376 ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX;
1377 ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
1380 ether_ifattach(ifp, eaddr);
1382 if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) {
1383 if_printf(ifp, "TSO segcnt %u segsz %u\n",
1384 ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize);
1387 /* Inform the upper layer about the long frame support. */
1388 ifp->if_hdrlen = sizeof(struct ether_vlan_header);
1391 * Kick off link status check.
1393 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
1394 hn_update_link_status(sc);
1396 sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event,
1397 hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY);
1399 sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event,
1400 hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY);
1404 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)
1405 hn_synth_detach(sc);
1411 hn_detach(device_t dev)
1413 struct hn_softc *sc = device_get_softc(dev);
1414 struct ifnet *ifp = sc->hn_ifp;
1416 if (sc->hn_ifaddr_evthand != NULL)
1417 EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand);
1418 if (sc->hn_ifnet_evthand != NULL)
1419 EVENTHANDLER_DEREGISTER(ifnet_event, sc->hn_ifnet_evthand);
1421 if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) {
1423 * In case that the vmbus missed the orphan handler
1426 vmbus_xact_ctx_orphan(sc->hn_xact);
1429 if (device_is_attached(dev)) {
1431 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
1432 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1436 * hn_stop() only suspends data, so managment
1437 * stuffs have to be suspended manually here.
1439 hn_suspend_mgmt(sc);
1440 hn_synth_detach(sc);
1443 ether_ifdetach(ifp);
1446 ifmedia_removeall(&sc->hn_media);
1447 hn_destroy_rx_data(sc);
1448 hn_destroy_tx_data(sc);
1450 if (sc->hn_tx_taskqs != NULL && sc->hn_tx_taskqs != hn_tx_taskque) {
1453 for (i = 0; i < hn_tx_taskq_cnt; ++i)
1454 taskqueue_free(sc->hn_tx_taskqs[i]);
1455 free(sc->hn_tx_taskqs, M_DEVBUF);
1457 taskqueue_free(sc->hn_mgmt_taskq0);
1459 if (sc->hn_xact != NULL) {
1461 * Uninstall the orphan handler _before_ the xact is
1464 vmbus_chan_unset_orphan(sc->hn_prichan);
1465 vmbus_xact_ctx_destroy(sc->hn_xact);
1470 HN_LOCK_DESTROY(sc);
1475 hn_shutdown(device_t dev)
1482 hn_link_status(struct hn_softc *sc)
1484 uint32_t link_status;
1487 error = hn_rndis_get_linkstatus(sc, &link_status);
1489 /* XXX what to do? */
1493 if (link_status == NDIS_MEDIA_STATE_CONNECTED)
1494 sc->hn_link_flags |= HN_LINK_FLAG_LINKUP;
1496 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
1497 if_link_state_change(sc->hn_ifp,
1498 (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ?
1499 LINK_STATE_UP : LINK_STATE_DOWN);
1503 hn_link_taskfunc(void *xsc, int pending __unused)
1505 struct hn_softc *sc = xsc;
1507 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
1513 hn_netchg_init_taskfunc(void *xsc, int pending __unused)
1515 struct hn_softc *sc = xsc;
1517 /* Prevent any link status checks from running. */
1518 sc->hn_link_flags |= HN_LINK_FLAG_NETCHG;
1521 * Fake up a [link down --> link up] state change; 5 seconds
1522 * delay is used, which closely simulates miibus reaction
1523 * upon link down event.
1525 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
1526 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN);
1527 taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0,
1528 &sc->hn_netchg_status, 5 * hz);
1532 hn_netchg_status_taskfunc(void *xsc, int pending __unused)
1534 struct hn_softc *sc = xsc;
1536 /* Re-allow link status checks. */
1537 sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG;
1542 hn_update_link_status(struct hn_softc *sc)
1545 if (sc->hn_mgmt_taskq != NULL)
1546 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task);
1550 hn_change_network(struct hn_softc *sc)
1553 if (sc->hn_mgmt_taskq != NULL)
1554 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init);
1558 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd,
1559 struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs)
1561 struct mbuf *m = *m_head;
1564 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim"));
1566 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap,
1567 m, segs, nsegs, BUS_DMA_NOWAIT);
1568 if (error == EFBIG) {
1571 m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX);
1575 *m_head = m = m_new;
1576 txr->hn_tx_collapsed++;
1578 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag,
1579 txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT);
1582 bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap,
1583 BUS_DMASYNC_PREWRITE);
1584 txd->flags |= HN_TXD_FLAG_DMAMAP;
1590 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd)
1593 KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
1594 ("put an onlist txd %#x", txd->flags));
1595 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
1596 ("put an onagg txd %#x", txd->flags));
1598 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
1599 if (atomic_fetchadd_int(&txd->refs, -1) != 1)
1602 if (!STAILQ_EMPTY(&txd->agg_list)) {
1603 struct hn_txdesc *tmp_txd;
1605 while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) {
1608 KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list),
1609 ("resursive aggregation on aggregated txdesc"));
1610 KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG),
1611 ("not aggregated txdesc"));
1612 KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
1613 ("aggregated txdesc uses dmamap"));
1614 KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
1615 ("aggregated txdesc consumes "
1616 "chimney sending buffer"));
1617 KASSERT(tmp_txd->chim_size == 0,
1618 ("aggregated txdesc has non-zero "
1619 "chimney sending size"));
1621 STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link);
1622 tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG;
1623 freed = hn_txdesc_put(txr, tmp_txd);
1624 KASSERT(freed, ("failed to free aggregated txdesc"));
1628 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) {
1629 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
1630 ("chim txd uses dmamap"));
1631 hn_chim_free(txr->hn_sc, txd->chim_index);
1632 txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
1634 } else if (txd->flags & HN_TXD_FLAG_DMAMAP) {
1635 bus_dmamap_sync(txr->hn_tx_data_dtag,
1636 txd->data_dmap, BUS_DMASYNC_POSTWRITE);
1637 bus_dmamap_unload(txr->hn_tx_data_dtag,
1639 txd->flags &= ~HN_TXD_FLAG_DMAMAP;
1642 if (txd->m != NULL) {
1647 txd->flags |= HN_TXD_FLAG_ONLIST;
1648 #ifndef HN_USE_TXDESC_BUFRING
1649 mtx_lock_spin(&txr->hn_txlist_spin);
1650 KASSERT(txr->hn_txdesc_avail >= 0 &&
1651 txr->hn_txdesc_avail < txr->hn_txdesc_cnt,
1652 ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail));
1653 txr->hn_txdesc_avail++;
1654 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
1655 mtx_unlock_spin(&txr->hn_txlist_spin);
1656 #else /* HN_USE_TXDESC_BUFRING */
1658 atomic_add_int(&txr->hn_txdesc_avail, 1);
1660 buf_ring_enqueue(txr->hn_txdesc_br, txd);
1661 #endif /* !HN_USE_TXDESC_BUFRING */
1666 static __inline struct hn_txdesc *
1667 hn_txdesc_get(struct hn_tx_ring *txr)
1669 struct hn_txdesc *txd;
1671 #ifndef HN_USE_TXDESC_BUFRING
1672 mtx_lock_spin(&txr->hn_txlist_spin);
1673 txd = SLIST_FIRST(&txr->hn_txlist);
1675 KASSERT(txr->hn_txdesc_avail > 0,
1676 ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail));
1677 txr->hn_txdesc_avail--;
1678 SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
1680 mtx_unlock_spin(&txr->hn_txlist_spin);
1682 txd = buf_ring_dequeue_sc(txr->hn_txdesc_br);
1686 #ifdef HN_USE_TXDESC_BUFRING
1688 atomic_subtract_int(&txr->hn_txdesc_avail, 1);
1690 #endif /* HN_USE_TXDESC_BUFRING */
1691 KASSERT(txd->m == NULL && txd->refs == 0 &&
1692 STAILQ_EMPTY(&txd->agg_list) &&
1693 txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
1694 txd->chim_size == 0 &&
1695 (txd->flags & HN_TXD_FLAG_ONLIST) &&
1696 (txd->flags & HN_TXD_FLAG_ONAGG) == 0 &&
1697 (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd"));
1698 txd->flags &= ~HN_TXD_FLAG_ONLIST;
1704 static __inline void
1705 hn_txdesc_hold(struct hn_txdesc *txd)
1708 /* 0->1 transition will never work */
1709 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
1710 atomic_add_int(&txd->refs, 1);
1713 static __inline void
1714 hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd)
1717 KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0,
1718 ("recursive aggregation on aggregating txdesc"));
1720 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
1721 ("already aggregated"));
1722 KASSERT(STAILQ_EMPTY(&txd->agg_list),
1723 ("recursive aggregation on to-be-aggregated txdesc"));
1725 txd->flags |= HN_TXD_FLAG_ONAGG;
1726 STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link);
1730 hn_tx_ring_pending(struct hn_tx_ring *txr)
1732 bool pending = false;
1734 #ifndef HN_USE_TXDESC_BUFRING
1735 mtx_lock_spin(&txr->hn_txlist_spin);
1736 if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt)
1738 mtx_unlock_spin(&txr->hn_txlist_spin);
1740 if (!buf_ring_full(txr->hn_txdesc_br))
1746 static __inline void
1747 hn_txeof(struct hn_tx_ring *txr)
1749 txr->hn_has_txeof = 0;
1754 hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc,
1755 struct vmbus_channel *chan, const void *data __unused, int dlen __unused)
1757 struct hn_txdesc *txd = sndc->hn_cbarg;
1758 struct hn_tx_ring *txr;
1761 KASSERT(txr->hn_chan == chan,
1762 ("channel mismatch, on chan%u, should be chan%u",
1763 vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan)));
1765 txr->hn_has_txeof = 1;
1766 hn_txdesc_put(txr, txd);
1768 ++txr->hn_txdone_cnt;
1769 if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) {
1770 txr->hn_txdone_cnt = 0;
1771 if (txr->hn_oactive)
1777 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr)
1779 #if defined(INET) || defined(INET6)
1780 struct lro_ctrl *lro = &rxr->hn_lro;
1781 struct lro_entry *queued;
1783 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1784 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1785 tcp_lro_flush(lro, queued);
1791 * 'txr' could be NULL, if multiple channels and
1792 * ifnet.if_start method are enabled.
1794 if (txr == NULL || !txr->hn_has_txeof)
1797 txr->hn_txdone_cnt = 0;
1801 static __inline uint32_t
1802 hn_rndis_pktmsg_offset(uint32_t ofs)
1805 KASSERT(ofs >= sizeof(struct rndis_packet_msg),
1806 ("invalid RNDIS packet msg offset %u", ofs));
1807 return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset));
1810 static __inline void *
1811 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize,
1812 size_t pi_dlen, uint32_t pi_type)
1814 const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen);
1815 struct rndis_pktinfo *pi;
1817 KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0,
1818 ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen));
1821 * Per-packet-info does not move; it only grows.
1824 * rm_pktinfooffset in this phase counts from the beginning
1825 * of rndis_packet_msg.
1827 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize,
1828 ("%u pktinfo overflows RNDIS packet msg", pi_type));
1829 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset +
1830 pkt->rm_pktinfolen);
1831 pkt->rm_pktinfolen += pi_size;
1833 pi->rm_size = pi_size;
1834 pi->rm_type = pi_type;
1835 pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET;
1837 return (pi->rm_data);
1841 hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr)
1843 struct hn_txdesc *txd;
1847 txd = txr->hn_agg_txd;
1848 KASSERT(txd != NULL, ("no aggregate txdesc"));
1851 * Since hn_txpkt() will reset this temporary stat, save
1852 * it now, so that oerrors can be updated properly, if
1853 * hn_txpkt() ever fails.
1855 pkts = txr->hn_stat_pkts;
1858 * Since txd's mbuf will _not_ be freed upon hn_txpkt()
1859 * failure, save it for later freeing, if hn_txpkt() ever
1863 error = hn_txpkt(ifp, txr, txd);
1864 if (__predict_false(error)) {
1865 /* txd is freed, but m is not. */
1868 txr->hn_flush_failed++;
1869 if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts);
1872 /* Reset all aggregation states. */
1873 txr->hn_agg_txd = NULL;
1874 txr->hn_agg_szleft = 0;
1875 txr->hn_agg_pktleft = 0;
1876 txr->hn_agg_prevpkt = NULL;
1882 hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
1887 if (txr->hn_agg_txd != NULL) {
1888 if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) {
1889 struct hn_txdesc *agg_txd = txr->hn_agg_txd;
1890 struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt;
1894 * Update the previous RNDIS packet's total length,
1895 * it can be increased due to the mandatory alignment
1896 * padding for this RNDIS packet. And update the
1897 * aggregating txdesc's chimney sending buffer size
1901 * Zero-out the padding, as required by the RNDIS spec.
1904 pkt->rm_len = roundup2(olen, txr->hn_agg_align);
1905 agg_txd->chim_size += pkt->rm_len - olen;
1907 /* Link this txdesc to the parent. */
1908 hn_txdesc_agg(agg_txd, txd);
1910 chim = (uint8_t *)pkt + pkt->rm_len;
1911 /* Save the current packet for later fixup. */
1912 txr->hn_agg_prevpkt = chim;
1914 txr->hn_agg_pktleft--;
1915 txr->hn_agg_szleft -= pktsize;
1916 if (txr->hn_agg_szleft <=
1917 HN_PKTSIZE_MIN(txr->hn_agg_align)) {
1919 * Probably can't aggregate more packets,
1920 * flush this aggregating txdesc proactively.
1922 txr->hn_agg_pktleft = 0;
1927 hn_flush_txagg(ifp, txr);
1929 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
1931 txr->hn_tx_chimney_tried++;
1932 txd->chim_index = hn_chim_alloc(txr->hn_sc);
1933 if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID)
1935 txr->hn_tx_chimney++;
1937 chim = txr->hn_sc->hn_chim +
1938 (txd->chim_index * txr->hn_sc->hn_chim_szmax);
1940 if (txr->hn_agg_pktmax > 1 &&
1941 txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) {
1942 txr->hn_agg_txd = txd;
1943 txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1;
1944 txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize;
1945 txr->hn_agg_prevpkt = chim;
1952 * If this function fails, then both txd and m_head0 will be freed.
1955 hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
1956 struct mbuf **m_head0)
1958 bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
1959 int error, nsegs, i;
1960 struct mbuf *m_head = *m_head0;
1961 struct rndis_packet_msg *pkt;
1964 int pkt_hlen, pkt_size;
1966 pkt = txd->rndis_pkt;
1967 pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align);
1968 if (pkt_size < txr->hn_chim_size) {
1969 chim = hn_try_txagg(ifp, txr, txd, pkt_size);
1973 if (txr->hn_agg_txd != NULL)
1974 hn_flush_txagg(ifp, txr);
1977 pkt->rm_type = REMOTE_NDIS_PACKET_MSG;
1978 pkt->rm_len = m_head->m_pkthdr.len;
1979 pkt->rm_dataoffset = 0;
1980 pkt->rm_datalen = m_head->m_pkthdr.len;
1981 pkt->rm_oobdataoffset = 0;
1982 pkt->rm_oobdatalen = 0;
1983 pkt->rm_oobdataelements = 0;
1984 pkt->rm_pktinfooffset = sizeof(*pkt);
1985 pkt->rm_pktinfolen = 0;
1986 pkt->rm_vchandle = 0;
1987 pkt->rm_reserved = 0;
1989 if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) {
1991 * Set the hash value for this packet, so that the host could
1992 * dispatch the TX done event for this packet back to this TX
1995 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1996 HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL);
1997 *pi_data = txr->hn_tx_idx;
2000 if (m_head->m_flags & M_VLANTAG) {
2001 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
2002 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN);
2003 *pi_data = NDIS_VLAN_INFO_MAKE(
2004 EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag),
2005 EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag),
2006 EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag));
2009 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
2010 #if defined(INET6) || defined(INET)
2011 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
2012 NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO);
2014 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
2015 *pi_data = NDIS_LSO2_INFO_MAKEIPV4(0,
2016 m_head->m_pkthdr.tso_segsz);
2019 #if defined(INET6) && defined(INET)
2024 *pi_data = NDIS_LSO2_INFO_MAKEIPV6(0,
2025 m_head->m_pkthdr.tso_segsz);
2028 #endif /* INET6 || INET */
2029 } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) {
2030 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
2031 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM);
2032 if (m_head->m_pkthdr.csum_flags &
2033 (CSUM_IP6_TCP | CSUM_IP6_UDP)) {
2034 *pi_data = NDIS_TXCSUM_INFO_IPV6;
2036 *pi_data = NDIS_TXCSUM_INFO_IPV4;
2037 if (m_head->m_pkthdr.csum_flags & CSUM_IP)
2038 *pi_data |= NDIS_TXCSUM_INFO_IPCS;
2041 if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
2042 *pi_data |= NDIS_TXCSUM_INFO_TCPCS;
2043 else if (m_head->m_pkthdr.csum_flags &
2044 (CSUM_IP_UDP | CSUM_IP6_UDP))
2045 *pi_data |= NDIS_TXCSUM_INFO_UDPCS;
2048 pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
2049 /* Fixup RNDIS packet message total length */
2050 pkt->rm_len += pkt_hlen;
2051 /* Convert RNDIS packet message offsets */
2052 pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt_hlen);
2053 pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset);
2056 * Fast path: Chimney sending.
2059 struct hn_txdesc *tgt_txd = txd;
2061 if (txr->hn_agg_txd != NULL) {
2062 tgt_txd = txr->hn_agg_txd;
2068 KASSERT(pkt == chim,
2069 ("RNDIS pkt not in chimney sending buffer"));
2070 KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID,
2071 ("chimney sending buffer is not used"));
2072 tgt_txd->chim_size += pkt->rm_len;
2074 m_copydata(m_head, 0, m_head->m_pkthdr.len,
2075 ((uint8_t *)chim) + pkt_hlen);
2077 txr->hn_gpa_cnt = 0;
2078 txr->hn_sendpkt = hn_txpkt_chim;
2082 KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc"));
2083 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
2084 ("chimney buffer is used"));
2085 KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc"));
2087 error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs);
2088 if (__predict_false(error)) {
2092 * This mbuf is not linked w/ the txd yet, so free it now.
2097 freed = hn_txdesc_put(txr, txd);
2099 ("fail to free txd upon txdma error"));
2101 txr->hn_txdma_failed++;
2102 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2107 /* +1 RNDIS packet message */
2108 txr->hn_gpa_cnt = nsegs + 1;
2110 /* send packet with page buffer */
2111 txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr);
2112 txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK;
2113 txr->hn_gpa[0].gpa_len = pkt_hlen;
2116 * Fill the page buffers with mbuf info after the page
2117 * buffer for RNDIS packet message.
2119 for (i = 0; i < nsegs; ++i) {
2120 struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1];
2122 gpa->gpa_page = atop(segs[i].ds_addr);
2123 gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK;
2124 gpa->gpa_len = segs[i].ds_len;
2127 txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
2129 txr->hn_sendpkt = hn_txpkt_sglist;
2133 /* Set the completion routine */
2134 hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd);
2136 /* Update temporary stats for later use. */
2137 txr->hn_stat_pkts++;
2138 txr->hn_stat_size += m_head->m_pkthdr.len;
2139 if (m_head->m_flags & M_MCAST)
2140 txr->hn_stat_mcasts++;
2147 * If this function fails, then txd will be freed, but the mbuf
2148 * associated w/ the txd will _not_ be freed.
2151 hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd)
2153 int error, send_failed = 0, has_bpf;
2156 has_bpf = bpf_peers_present(ifp->if_bpf);
2159 * Make sure that this txd and any aggregated txds are not
2160 * freed before ETHER_BPF_MTAP.
2162 hn_txdesc_hold(txd);
2164 error = txr->hn_sendpkt(txr, txd);
2167 const struct hn_txdesc *tmp_txd;
2169 ETHER_BPF_MTAP(ifp, txd->m);
2170 STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link)
2171 ETHER_BPF_MTAP(ifp, tmp_txd->m);
2174 if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts);
2175 #ifdef HN_IFSTART_SUPPORT
2176 if (!hn_use_if_start)
2179 if_inc_counter(ifp, IFCOUNTER_OBYTES,
2181 if (txr->hn_stat_mcasts != 0) {
2182 if_inc_counter(ifp, IFCOUNTER_OMCASTS,
2183 txr->hn_stat_mcasts);
2186 txr->hn_pkts += txr->hn_stat_pkts;
2190 hn_txdesc_put(txr, txd);
2192 if (__predict_false(error)) {
2196 * This should "really rarely" happen.
2198 * XXX Too many RX to be acked or too many sideband
2199 * commands to run? Ask netvsc_channel_rollup()
2200 * to kick start later.
2202 txr->hn_has_txeof = 1;
2204 txr->hn_send_failed++;
2207 * Try sending again after set hn_has_txeof;
2208 * in case that we missed the last
2209 * netvsc_channel_rollup().
2213 if_printf(ifp, "send failed\n");
2216 * Caller will perform further processing on the
2217 * associated mbuf, so don't free it in hn_txdesc_put();
2218 * only unload it from the DMA map in hn_txdesc_put(),
2222 freed = hn_txdesc_put(txr, txd);
2224 ("fail to free txd upon send error"));
2226 txr->hn_send_failed++;
2229 /* Reset temporary stats, after this sending is done. */
2230 txr->hn_stat_size = 0;
2231 txr->hn_stat_pkts = 0;
2232 txr->hn_stat_mcasts = 0;
2238 * Append the specified data to the indicated mbuf chain,
2239 * Extend the mbuf chain if the new data does not fit in
2242 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c.
2243 * There should be an equivalent in the kernel mbuf code,
2244 * but there does not appear to be one yet.
2246 * Differs from m_append() in that additional mbufs are
2247 * allocated with cluster size MJUMPAGESIZE, and filled
2250 * Return 1 if able to complete the job; otherwise 0.
2253 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
2256 int remainder, space;
2258 for (m = m0; m->m_next != NULL; m = m->m_next)
2261 space = M_TRAILINGSPACE(m);
2264 * Copy into available space.
2266 if (space > remainder)
2268 bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
2273 while (remainder > 0) {
2275 * Allocate a new mbuf; could check space
2276 * and allocate a cluster instead.
2278 n = m_getjcl(M_DONTWAIT, m->m_type, 0, MJUMPAGESIZE);
2281 n->m_len = min(MJUMPAGESIZE, remainder);
2282 bcopy(cp, mtod(n, caddr_t), n->m_len);
2284 remainder -= n->m_len;
2288 if (m0->m_flags & M_PKTHDR)
2289 m0->m_pkthdr.len += len - remainder;
2291 return (remainder == 0);
2294 #if defined(INET) || defined(INET6)
2296 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m)
2298 #if __FreeBSD_version >= 1100095
2299 if (hn_lro_mbufq_depth) {
2300 tcp_lro_queue_mbuf(lc, m);
2304 return tcp_lro_rx(lc, m, 0);
2309 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen,
2310 const struct hn_rxinfo *info)
2314 int size, do_lro = 0, do_csum = 1;
2315 int hash_type = M_HASHTYPE_OPAQUE;
2317 /* If the VF is active, inject the packet through the VF */
2318 ifp = rxr->hn_vf ? rxr->hn_vf : rxr->hn_ifp;
2320 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2323 * See the NOTE of hn_rndis_init_fixat(). This
2324 * function can be reached, immediately after the
2325 * RNDIS is initialized but before the ifnet is
2326 * setup on the hn_attach() path; drop the unexpected
2332 if (dlen <= MHLEN) {
2333 m_new = m_gethdr(M_NOWAIT, MT_DATA);
2334 if (m_new == NULL) {
2335 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
2338 memcpy(mtod(m_new, void *), data, dlen);
2339 m_new->m_pkthdr.len = m_new->m_len = dlen;
2340 rxr->hn_small_pkts++;
2343 * Get an mbuf with a cluster. For packets 2K or less,
2344 * get a standard 2K cluster. For anything larger, get a
2345 * 4K cluster. Any buffers larger than 4K can cause problems
2346 * if looped around to the Hyper-V TX channel, so avoid them.
2349 if (dlen > MCLBYTES) {
2351 size = MJUMPAGESIZE;
2354 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
2355 if (m_new == NULL) {
2356 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
2360 hv_m_append(m_new, dlen, data);
2362 m_new->m_pkthdr.rcvif = ifp;
2364 if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0))
2367 /* receive side checksum offload */
2368 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) {
2369 /* IP csum offload */
2370 if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) {
2371 m_new->m_pkthdr.csum_flags |=
2372 (CSUM_IP_CHECKED | CSUM_IP_VALID);
2376 /* TCP/UDP csum offload */
2377 if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK |
2378 NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) {
2379 m_new->m_pkthdr.csum_flags |=
2380 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2381 m_new->m_pkthdr.csum_data = 0xffff;
2382 if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK)
2390 * As of this write (Oct 28th, 2016), host side will turn
2391 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so
2392 * the do_lro setting here is actually _not_ accurate. We
2393 * depend on the RSS hash type check to reset do_lro.
2395 if ((info->csum_info &
2396 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) ==
2397 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK))
2400 const struct ether_header *eh;
2405 if (m_new->m_len < hoff)
2407 eh = mtod(m_new, struct ether_header *);
2408 etype = ntohs(eh->ether_type);
2409 if (etype == ETHERTYPE_VLAN) {
2410 const struct ether_vlan_header *evl;
2412 hoff = sizeof(*evl);
2413 if (m_new->m_len < hoff)
2415 evl = mtod(m_new, struct ether_vlan_header *);
2416 etype = ntohs(evl->evl_proto);
2419 if (etype == ETHERTYPE_IP) {
2422 pr = hn_check_iplen(m_new, hoff);
2423 if (pr == IPPROTO_TCP) {
2425 (rxr->hn_trust_hcsum &
2426 HN_TRUST_HCSUM_TCP)) {
2427 rxr->hn_csum_trusted++;
2428 m_new->m_pkthdr.csum_flags |=
2429 (CSUM_IP_CHECKED | CSUM_IP_VALID |
2430 CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2431 m_new->m_pkthdr.csum_data = 0xffff;
2434 } else if (pr == IPPROTO_UDP) {
2436 (rxr->hn_trust_hcsum &
2437 HN_TRUST_HCSUM_UDP)) {
2438 rxr->hn_csum_trusted++;
2439 m_new->m_pkthdr.csum_flags |=
2440 (CSUM_IP_CHECKED | CSUM_IP_VALID |
2441 CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2442 m_new->m_pkthdr.csum_data = 0xffff;
2444 } else if (pr != IPPROTO_DONE && do_csum &&
2445 (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) {
2446 rxr->hn_csum_trusted++;
2447 m_new->m_pkthdr.csum_flags |=
2448 (CSUM_IP_CHECKED | CSUM_IP_VALID);
2453 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) {
2454 m_new->m_pkthdr.ether_vtag = EVL_MAKETAG(
2455 NDIS_VLAN_INFO_ID(info->vlan_info),
2456 NDIS_VLAN_INFO_PRI(info->vlan_info),
2457 NDIS_VLAN_INFO_CFI(info->vlan_info));
2458 m_new->m_flags |= M_VLANTAG;
2461 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) {
2463 m_new->m_pkthdr.flowid = info->hash_value;
2464 if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) ==
2465 NDIS_HASH_FUNCTION_TOEPLITZ) {
2466 uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK);
2470 * do_lro is resetted, if the hash types are not TCP
2471 * related. See the comment in the above csum_flags
2475 case NDIS_HASH_IPV4:
2476 hash_type = M_HASHTYPE_RSS_IPV4;
2480 case NDIS_HASH_TCP_IPV4:
2481 hash_type = M_HASHTYPE_RSS_TCP_IPV4;
2484 case NDIS_HASH_IPV6:
2485 hash_type = M_HASHTYPE_RSS_IPV6;
2489 case NDIS_HASH_IPV6_EX:
2490 hash_type = M_HASHTYPE_RSS_IPV6_EX;
2494 case NDIS_HASH_TCP_IPV6:
2495 hash_type = M_HASHTYPE_RSS_TCP_IPV6;
2498 case NDIS_HASH_TCP_IPV6_EX:
2499 hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX;
2504 m_new->m_pkthdr.flowid = rxr->hn_rx_idx;
2506 M_HASHTYPE_SET(m_new, hash_type);
2509 * Note: Moved RX completion back to hv_nv_on_receive() so all
2510 * messages (not just data messages) will trigger a response.
2516 if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
2517 #if defined(INET) || defined(INET6)
2518 struct lro_ctrl *lro = &rxr->hn_lro;
2521 rxr->hn_lro_tried++;
2522 if (hn_lro_rx(lro, m_new) == 0) {
2530 /* We're not holding the lock here, so don't release it */
2531 (*ifp->if_input)(ifp, m_new);
2537 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2539 struct hn_softc *sc = ifp->if_softc;
2540 struct ifreq *ifr = (struct ifreq *)data;
2541 int mask, error = 0;
2545 if (ifr->ifr_mtu > HN_MTU_MAX) {
2552 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
2557 if ((sc->hn_caps & HN_CAP_MTU) == 0) {
2558 /* Can't change MTU */
2564 if (ifp->if_mtu == ifr->ifr_mtu) {
2570 * Suspend this interface before the synthetic parts
2576 * Detach the synthetics parts, i.e. NVS and RNDIS.
2578 hn_synth_detach(sc);
2581 * Reattach the synthetic parts, i.e. NVS and RNDIS,
2582 * with the new MTU setting.
2584 error = hn_synth_attach(sc, ifr->ifr_mtu);
2591 * Commit the requested MTU, after the synthetic parts
2592 * have been successfully attached.
2594 ifp->if_mtu = ifr->ifr_mtu;
2597 * Make sure that various parameters based on MTU are
2598 * still valid, after the MTU change.
2600 if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax)
2601 hn_set_chim_size(sc, sc->hn_chim_szmax);
2602 hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu);
2603 #if __FreeBSD_version >= 1100099
2604 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim <
2605 HN_LRO_LENLIM_MIN(ifp))
2606 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp));
2610 * All done! Resume the interface now.
2620 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
2625 if (ifp->if_flags & IFF_UP) {
2626 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2628 * Caller meight hold mutex, e.g.
2629 * bpf; use busy-wait for the RNDIS
2633 hn_rxfilter_config(sc);
2639 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2642 sc->hn_if_flags = ifp->if_flags;
2649 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2651 if (mask & IFCAP_TXCSUM) {
2652 ifp->if_capenable ^= IFCAP_TXCSUM;
2653 if (ifp->if_capenable & IFCAP_TXCSUM)
2654 ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc);
2656 ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc);
2658 if (mask & IFCAP_TXCSUM_IPV6) {
2659 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2660 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
2661 ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc);
2663 ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc);
2666 /* TODO: flip RNDIS offload parameters for RXCSUM. */
2667 if (mask & IFCAP_RXCSUM)
2668 ifp->if_capenable ^= IFCAP_RXCSUM;
2670 /* We can't diff IPv6 packets from IPv4 packets on RX path. */
2671 if (mask & IFCAP_RXCSUM_IPV6)
2672 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2675 if (mask & IFCAP_LRO)
2676 ifp->if_capenable ^= IFCAP_LRO;
2678 if (mask & IFCAP_TSO4) {
2679 ifp->if_capenable ^= IFCAP_TSO4;
2680 if (ifp->if_capenable & IFCAP_TSO4)
2681 ifp->if_hwassist |= CSUM_IP_TSO;
2683 ifp->if_hwassist &= ~CSUM_IP_TSO;
2685 if (mask & IFCAP_TSO6) {
2686 ifp->if_capenable ^= IFCAP_TSO6;
2687 if (ifp->if_capenable & IFCAP_TSO6)
2688 ifp->if_hwassist |= CSUM_IP6_TSO;
2690 ifp->if_hwassist &= ~CSUM_IP6_TSO;
2700 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
2704 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2706 * Multicast uses mutex; use busy-wait for
2710 hn_rxfilter_config(sc);
2719 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd);
2723 error = ether_ioctl(ifp, cmd, data);
2730 hn_stop(struct hn_softc *sc, bool detaching)
2732 struct ifnet *ifp = sc->hn_ifp;
2737 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
2738 ("synthetic parts were not attached"));
2740 /* Disable polling. */
2743 /* Clear RUNNING bit _before_ hn_suspend_data() */
2744 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
2745 hn_suspend_data(sc);
2747 /* Clear OACTIVE bit. */
2748 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
2749 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
2750 sc->hn_tx_ring[i].hn_oactive = 0;
2753 * If the VF is active, make sure the filter is not 0, even if
2754 * the synthetic NIC is down.
2756 if (!detaching && (sc->hn_flags & HN_FLAG_VF))
2757 hn_rxfilter_config(sc);
2761 hn_init_locked(struct hn_softc *sc)
2763 struct ifnet *ifp = sc->hn_ifp;
2768 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
2771 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2774 /* Configure RX filter */
2775 hn_rxfilter_config(sc);
2777 /* Clear OACTIVE bit. */
2778 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
2779 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
2780 sc->hn_tx_ring[i].hn_oactive = 0;
2782 /* Clear TX 'suspended' bit. */
2783 hn_resume_tx(sc, sc->hn_tx_ring_inuse);
2785 /* Everything is ready; unleash! */
2786 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
2788 /* Re-enable polling if requested. */
2789 if (sc->hn_pollhz > 0)
2790 hn_polling(sc, sc->hn_pollhz);
2796 struct hn_softc *sc = xsc;
2803 #if __FreeBSD_version >= 1100099
2806 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS)
2808 struct hn_softc *sc = arg1;
2809 unsigned int lenlim;
2812 lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim;
2813 error = sysctl_handle_int(oidp, &lenlim, 0, req);
2814 if (error || req->newptr == NULL)
2818 if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) ||
2819 lenlim > TCP_LRO_LENGTH_MAX) {
2823 hn_set_lro_lenlim(sc, lenlim);
2830 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS)
2832 struct hn_softc *sc = arg1;
2833 int ackcnt, error, i;
2836 * lro_ackcnt_lim is append count limit,
2837 * +1 to turn it into aggregation limit.
2839 ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1;
2840 error = sysctl_handle_int(oidp, &ackcnt, 0, req);
2841 if (error || req->newptr == NULL)
2844 if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1))
2848 * Convert aggregation limit back to append
2853 for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
2854 sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt;
2862 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS)
2864 struct hn_softc *sc = arg1;
2869 if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum)
2872 error = sysctl_handle_int(oidp, &on, 0, req);
2873 if (error || req->newptr == NULL)
2877 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2878 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
2881 rxr->hn_trust_hcsum |= hcsum;
2883 rxr->hn_trust_hcsum &= ~hcsum;
2890 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS)
2892 struct hn_softc *sc = arg1;
2893 int chim_size, error;
2895 chim_size = sc->hn_tx_ring[0].hn_chim_size;
2896 error = sysctl_handle_int(oidp, &chim_size, 0, req);
2897 if (error || req->newptr == NULL)
2900 if (chim_size > sc->hn_chim_szmax || chim_size <= 0)
2904 hn_set_chim_size(sc, chim_size);
2909 #if __FreeBSD_version < 1100095
2911 hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS)
2913 struct hn_softc *sc = arg1;
2914 int ofs = arg2, i, error;
2915 struct hn_rx_ring *rxr;
2919 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2920 rxr = &sc->hn_rx_ring[i];
2921 stat += *((int *)((uint8_t *)rxr + ofs));
2924 error = sysctl_handle_64(oidp, &stat, 0, req);
2925 if (error || req->newptr == NULL)
2928 /* Zero out this stat. */
2929 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2930 rxr = &sc->hn_rx_ring[i];
2931 *((int *)((uint8_t *)rxr + ofs)) = 0;
2937 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
2939 struct hn_softc *sc = arg1;
2940 int ofs = arg2, i, error;
2941 struct hn_rx_ring *rxr;
2945 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2946 rxr = &sc->hn_rx_ring[i];
2947 stat += *((uint64_t *)((uint8_t *)rxr + ofs));
2950 error = sysctl_handle_64(oidp, &stat, 0, req);
2951 if (error || req->newptr == NULL)
2954 /* Zero out this stat. */
2955 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2956 rxr = &sc->hn_rx_ring[i];
2957 *((uint64_t *)((uint8_t *)rxr + ofs)) = 0;
2965 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
2967 struct hn_softc *sc = arg1;
2968 int ofs = arg2, i, error;
2969 struct hn_rx_ring *rxr;
2973 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2974 rxr = &sc->hn_rx_ring[i];
2975 stat += *((u_long *)((uint8_t *)rxr + ofs));
2978 error = sysctl_handle_long(oidp, &stat, 0, req);
2979 if (error || req->newptr == NULL)
2982 /* Zero out this stat. */
2983 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2984 rxr = &sc->hn_rx_ring[i];
2985 *((u_long *)((uint8_t *)rxr + ofs)) = 0;
2991 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
2993 struct hn_softc *sc = arg1;
2994 int ofs = arg2, i, error;
2995 struct hn_tx_ring *txr;
2999 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
3000 txr = &sc->hn_tx_ring[i];
3001 stat += *((u_long *)((uint8_t *)txr + ofs));
3004 error = sysctl_handle_long(oidp, &stat, 0, req);
3005 if (error || req->newptr == NULL)
3008 /* Zero out this stat. */
3009 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
3010 txr = &sc->hn_tx_ring[i];
3011 *((u_long *)((uint8_t *)txr + ofs)) = 0;
3017 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
3019 struct hn_softc *sc = arg1;
3020 int ofs = arg2, i, error, conf;
3021 struct hn_tx_ring *txr;
3023 txr = &sc->hn_tx_ring[0];
3024 conf = *((int *)((uint8_t *)txr + ofs));
3026 error = sysctl_handle_int(oidp, &conf, 0, req);
3027 if (error || req->newptr == NULL)
3031 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
3032 txr = &sc->hn_tx_ring[i];
3033 *((int *)((uint8_t *)txr + ofs)) = conf;
3041 hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS)
3043 struct hn_softc *sc = arg1;
3046 size = sc->hn_agg_size;
3047 error = sysctl_handle_int(oidp, &size, 0, req);
3048 if (error || req->newptr == NULL)
3052 sc->hn_agg_size = size;
3060 hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS)
3062 struct hn_softc *sc = arg1;
3065 pkts = sc->hn_agg_pkts;
3066 error = sysctl_handle_int(oidp, &pkts, 0, req);
3067 if (error || req->newptr == NULL)
3071 sc->hn_agg_pkts = pkts;
3079 hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS)
3081 struct hn_softc *sc = arg1;
3084 pkts = sc->hn_tx_ring[0].hn_agg_pktmax;
3085 return (sysctl_handle_int(oidp, &pkts, 0, req));
3089 hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS)
3091 struct hn_softc *sc = arg1;
3094 align = sc->hn_tx_ring[0].hn_agg_align;
3095 return (sysctl_handle_int(oidp, &align, 0, req));
3099 hn_chan_polling(struct vmbus_channel *chan, u_int pollhz)
3102 vmbus_chan_poll_disable(chan);
3104 vmbus_chan_poll_enable(chan, pollhz);
3108 hn_polling(struct hn_softc *sc, u_int pollhz)
3110 int nsubch = sc->hn_rx_ring_inuse - 1;
3115 struct vmbus_channel **subch;
3118 subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
3119 for (i = 0; i < nsubch; ++i)
3120 hn_chan_polling(subch[i], pollhz);
3121 vmbus_subchan_rel(subch, nsubch);
3123 hn_chan_polling(sc->hn_prichan, pollhz);
3127 hn_polling_sysctl(SYSCTL_HANDLER_ARGS)
3129 struct hn_softc *sc = arg1;
3132 pollhz = sc->hn_pollhz;
3133 error = sysctl_handle_int(oidp, &pollhz, 0, req);
3134 if (error || req->newptr == NULL)
3138 (pollhz < VMBUS_CHAN_POLLHZ_MIN || pollhz > VMBUS_CHAN_POLLHZ_MAX))
3142 if (sc->hn_pollhz != pollhz) {
3143 sc->hn_pollhz = pollhz;
3144 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) &&
3145 (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
3146 hn_polling(sc, sc->hn_pollhz);
3154 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS)
3156 struct hn_softc *sc = arg1;
3159 snprintf(verstr, sizeof(verstr), "%u.%u",
3160 HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver),
3161 HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver));
3162 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
3166 hn_caps_sysctl(SYSCTL_HANDLER_ARGS)
3168 struct hn_softc *sc = arg1;
3175 snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS);
3176 return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req);
3180 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS)
3182 struct hn_softc *sc = arg1;
3183 char assist_str[128];
3187 hwassist = sc->hn_ifp->if_hwassist;
3189 snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS);
3190 return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req);
3194 hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS)
3196 struct hn_softc *sc = arg1;
3197 char filter_str[128];
3201 filter = sc->hn_rx_filter;
3203 snprintf(filter_str, sizeof(filter_str), "%b", filter,
3205 return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req);
3209 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS)
3211 struct hn_softc *sc = arg1;
3216 error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
3217 if (error || req->newptr == NULL)
3220 error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
3223 sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
3225 if (sc->hn_rx_ring_inuse > 1) {
3226 error = hn_rss_reconfig(sc);
3228 /* Not RSS capable, at least for now; just save the RSS key. */
3237 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS)
3239 struct hn_softc *sc = arg1;
3244 error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
3245 if (error || req->newptr == NULL)
3249 * Don't allow RSS indirect table change, if this interface is not
3250 * RSS capable currently.
3252 if (sc->hn_rx_ring_inuse == 1) {
3257 error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
3260 sc->hn_flags |= HN_FLAG_HAS_RSSIND;
3262 hn_rss_ind_fixup(sc);
3263 error = hn_rss_reconfig(sc);
3270 hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS)
3272 struct hn_softc *sc = arg1;
3277 hash = sc->hn_rss_hash;
3279 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS);
3280 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req);
3284 hn_vf_sysctl(SYSCTL_HANDLER_ARGS)
3286 struct hn_softc *sc = arg1;
3292 vf = sc->hn_rx_ring[0].hn_vf;
3294 snprintf(vf_name, sizeof(vf_name), "%s", if_name(vf));
3296 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req);
3300 hn_check_iplen(const struct mbuf *m, int hoff)
3302 const struct ip *ip;
3303 int len, iphlen, iplen;
3304 const struct tcphdr *th;
3305 int thoff; /* TCP data offset */
3307 len = hoff + sizeof(struct ip);
3309 /* The packet must be at least the size of an IP header. */
3310 if (m->m_pkthdr.len < len)
3311 return IPPROTO_DONE;
3313 /* The fixed IP header must reside completely in the first mbuf. */
3315 return IPPROTO_DONE;
3317 ip = mtodo(m, hoff);
3319 /* Bound check the packet's stated IP header length. */
3320 iphlen = ip->ip_hl << 2;
3321 if (iphlen < sizeof(struct ip)) /* minimum header length */
3322 return IPPROTO_DONE;
3324 /* The full IP header must reside completely in the one mbuf. */
3325 if (m->m_len < hoff + iphlen)
3326 return IPPROTO_DONE;
3328 iplen = ntohs(ip->ip_len);
3331 * Check that the amount of data in the buffers is as
3332 * at least much as the IP header would have us expect.
3334 if (m->m_pkthdr.len < hoff + iplen)
3335 return IPPROTO_DONE;
3338 * Ignore IP fragments.
3340 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF))
3341 return IPPROTO_DONE;
3344 * The TCP/IP or UDP/IP header must be entirely contained within
3345 * the first fragment of a packet.
3349 if (iplen < iphlen + sizeof(struct tcphdr))
3350 return IPPROTO_DONE;
3351 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr))
3352 return IPPROTO_DONE;
3353 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen);
3354 thoff = th->th_off << 2;
3355 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen)
3356 return IPPROTO_DONE;
3357 if (m->m_len < hoff + iphlen + thoff)
3358 return IPPROTO_DONE;
3361 if (iplen < iphlen + sizeof(struct udphdr))
3362 return IPPROTO_DONE;
3363 if (m->m_len < hoff + iphlen + sizeof(struct udphdr))
3364 return IPPROTO_DONE;
3368 return IPPROTO_DONE;
3375 hn_create_rx_data(struct hn_softc *sc, int ring_cnt)
3377 struct sysctl_oid_list *child;
3378 struct sysctl_ctx_list *ctx;
3379 device_t dev = sc->hn_dev;
3380 #if defined(INET) || defined(INET6)
3381 #if __FreeBSD_version >= 1100095
3388 * Create RXBUF for reception.
3391 * - It is shared by all channels.
3392 * - A large enough buffer is allocated, certain version of NVSes
3393 * may further limit the usable space.
3395 sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
3396 PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma,
3397 BUS_DMA_WAITOK | BUS_DMA_ZERO);
3398 if (sc->hn_rxbuf == NULL) {
3399 device_printf(sc->hn_dev, "allocate rxbuf failed\n");
3403 sc->hn_rx_ring_cnt = ring_cnt;
3404 sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt;
3406 sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt,
3407 M_DEVBUF, M_WAITOK | M_ZERO);
3409 #if defined(INET) || defined(INET6)
3410 #if __FreeBSD_version >= 1100095
3411 lroent_cnt = hn_lro_entry_count;
3412 if (lroent_cnt < TCP_LRO_ENTRIES)
3413 lroent_cnt = TCP_LRO_ENTRIES;
3415 device_printf(dev, "LRO: entry count %d\n", lroent_cnt);
3417 #endif /* INET || INET6 */
3419 ctx = device_get_sysctl_ctx(dev);
3420 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
3422 /* Create dev.hn.UNIT.rx sysctl tree */
3423 sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx",
3424 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3426 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
3427 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
3429 rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
3430 PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE,
3431 &rxr->hn_br_dma, BUS_DMA_WAITOK);
3432 if (rxr->hn_br == NULL) {
3433 device_printf(dev, "allocate bufring failed\n");
3437 if (hn_trust_hosttcp)
3438 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP;
3439 if (hn_trust_hostudp)
3440 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP;
3441 if (hn_trust_hostip)
3442 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP;
3443 rxr->hn_ifp = sc->hn_ifp;
3444 if (i < sc->hn_tx_ring_cnt)
3445 rxr->hn_txr = &sc->hn_tx_ring[i];
3446 rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF;
3447 rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK);
3449 rxr->hn_rxbuf = sc->hn_rxbuf;
3454 #if defined(INET) || defined(INET6)
3455 #if __FreeBSD_version >= 1100095
3456 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt,
3457 hn_lro_mbufq_depth);
3459 tcp_lro_init(&rxr->hn_lro);
3460 rxr->hn_lro.ifp = sc->hn_ifp;
3462 #if __FreeBSD_version >= 1100099
3463 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF;
3464 rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
3466 #endif /* INET || INET6 */
3468 if (sc->hn_rx_sysctl_tree != NULL) {
3472 * Create per RX ring sysctl tree:
3473 * dev.hn.UNIT.rx.RINGID
3475 snprintf(name, sizeof(name), "%d", i);
3476 rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx,
3477 SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree),
3478 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3480 if (rxr->hn_rx_sysctl_tree != NULL) {
3481 SYSCTL_ADD_ULONG(ctx,
3482 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
3483 OID_AUTO, "packets", CTLFLAG_RW,
3484 &rxr->hn_pkts, "# of packets received");
3485 SYSCTL_ADD_ULONG(ctx,
3486 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
3487 OID_AUTO, "rss_pkts", CTLFLAG_RW,
3489 "# of packets w/ RSS info received");
3491 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
3492 OID_AUTO, "pktbuf_len", CTLFLAG_RD,
3493 &rxr->hn_pktbuf_len, 0,
3494 "Temporary channel packet buffer length");
3499 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
3500 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3501 __offsetof(struct hn_rx_ring, hn_lro.lro_queued),
3502 #if __FreeBSD_version < 1100095
3503 hn_rx_stat_int_sysctl,
3505 hn_rx_stat_u64_sysctl,
3507 "LU", "LRO queued");
3508 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed",
3509 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3510 __offsetof(struct hn_rx_ring, hn_lro.lro_flushed),
3511 #if __FreeBSD_version < 1100095
3512 hn_rx_stat_int_sysctl,
3514 hn_rx_stat_u64_sysctl,
3516 "LU", "LRO flushed");
3517 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried",
3518 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3519 __offsetof(struct hn_rx_ring, hn_lro_tried),
3520 hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries");
3521 #if __FreeBSD_version >= 1100099
3522 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim",
3523 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
3524 hn_lro_lenlim_sysctl, "IU",
3525 "Max # of data bytes to be aggregated by LRO");
3526 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim",
3527 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
3528 hn_lro_ackcnt_sysctl, "I",
3529 "Max # of ACKs to be aggregated by LRO");
3531 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp",
3532 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP,
3533 hn_trust_hcsum_sysctl, "I",
3534 "Trust tcp segement verification on host side, "
3535 "when csum info is missing");
3536 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp",
3537 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP,
3538 hn_trust_hcsum_sysctl, "I",
3539 "Trust udp datagram verification on host side, "
3540 "when csum info is missing");
3541 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip",
3542 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP,
3543 hn_trust_hcsum_sysctl, "I",
3544 "Trust ip packet verification on host side, "
3545 "when csum info is missing");
3546 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip",
3547 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3548 __offsetof(struct hn_rx_ring, hn_csum_ip),
3549 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP");
3550 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp",
3551 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3552 __offsetof(struct hn_rx_ring, hn_csum_tcp),
3553 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP");
3554 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp",
3555 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3556 __offsetof(struct hn_rx_ring, hn_csum_udp),
3557 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP");
3558 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted",
3559 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3560 __offsetof(struct hn_rx_ring, hn_csum_trusted),
3561 hn_rx_stat_ulong_sysctl, "LU",
3562 "# of packets that we trust host's csum verification");
3563 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts",
3564 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3565 __offsetof(struct hn_rx_ring, hn_small_pkts),
3566 hn_rx_stat_ulong_sysctl, "LU", "# of small packets received");
3567 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed",
3568 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3569 __offsetof(struct hn_rx_ring, hn_ack_failed),
3570 hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures");
3571 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt",
3572 CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings");
3573 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse",
3574 CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings");
3580 hn_destroy_rx_data(struct hn_softc *sc)
3584 if (sc->hn_rxbuf != NULL) {
3585 if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0)
3586 hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf);
3588 device_printf(sc->hn_dev, "RXBUF is referenced\n");
3589 sc->hn_rxbuf = NULL;
3592 if (sc->hn_rx_ring_cnt == 0)
3595 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
3596 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
3598 if (rxr->hn_br == NULL)
3600 if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) {
3601 hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br);
3603 device_printf(sc->hn_dev,
3604 "%dth channel bufring is referenced", i);
3608 #if defined(INET) || defined(INET6)
3609 tcp_lro_free(&rxr->hn_lro);
3611 free(rxr->hn_pktbuf, M_DEVBUF);
3613 free(sc->hn_rx_ring, M_DEVBUF);
3614 sc->hn_rx_ring = NULL;
3616 sc->hn_rx_ring_cnt = 0;
3617 sc->hn_rx_ring_inuse = 0;
3621 hn_tx_ring_create(struct hn_softc *sc, int id)
3623 struct hn_tx_ring *txr = &sc->hn_tx_ring[id];
3624 device_t dev = sc->hn_dev;
3625 bus_dma_tag_t parent_dtag;
3629 txr->hn_tx_idx = id;
3631 #ifndef HN_USE_TXDESC_BUFRING
3632 mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
3634 mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF);
3636 txr->hn_txdesc_cnt = HN_TX_DESC_CNT;
3637 txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt,
3638 M_DEVBUF, M_WAITOK | M_ZERO);
3639 #ifndef HN_USE_TXDESC_BUFRING
3640 SLIST_INIT(&txr->hn_txlist);
3642 txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF,
3643 M_WAITOK, &txr->hn_tx_lock);
3646 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_EVTTQ) {
3647 txr->hn_tx_taskq = VMBUS_GET_EVENT_TASKQ(
3648 device_get_parent(dev), dev, HN_RING_IDX2CPU(sc, id));
3650 txr->hn_tx_taskq = sc->hn_tx_taskqs[id % hn_tx_taskq_cnt];
3653 #ifdef HN_IFSTART_SUPPORT
3654 if (hn_use_if_start) {
3655 txr->hn_txeof = hn_start_txeof;
3656 TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr);
3657 TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr);
3663 txr->hn_txeof = hn_xmit_txeof;
3664 TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr);
3665 TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr);
3667 br_depth = hn_get_txswq_depth(txr);
3668 txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF,
3669 M_WAITOK, &txr->hn_tx_lock);
3672 txr->hn_direct_tx_size = hn_direct_tx_size;
3675 * Always schedule transmission instead of trying to do direct
3676 * transmission. This one gives the best performance so far.
3678 txr->hn_sched_tx = 1;
3680 parent_dtag = bus_get_dma_tag(dev);
3682 /* DMA tag for RNDIS packet messages. */
3683 error = bus_dma_tag_create(parent_dtag, /* parent */
3684 HN_RNDIS_PKT_ALIGN, /* alignment */
3685 HN_RNDIS_PKT_BOUNDARY, /* boundary */
3686 BUS_SPACE_MAXADDR, /* lowaddr */
3687 BUS_SPACE_MAXADDR, /* highaddr */
3688 NULL, NULL, /* filter, filterarg */
3689 HN_RNDIS_PKT_LEN, /* maxsize */
3691 HN_RNDIS_PKT_LEN, /* maxsegsize */
3693 NULL, /* lockfunc */
3694 NULL, /* lockfuncarg */
3695 &txr->hn_tx_rndis_dtag);
3697 device_printf(dev, "failed to create rndis dmatag\n");
3701 /* DMA tag for data. */
3702 error = bus_dma_tag_create(parent_dtag, /* parent */
3704 HN_TX_DATA_BOUNDARY, /* boundary */
3705 BUS_SPACE_MAXADDR, /* lowaddr */
3706 BUS_SPACE_MAXADDR, /* highaddr */
3707 NULL, NULL, /* filter, filterarg */
3708 HN_TX_DATA_MAXSIZE, /* maxsize */
3709 HN_TX_DATA_SEGCNT_MAX, /* nsegments */
3710 HN_TX_DATA_SEGSIZE, /* maxsegsize */
3712 NULL, /* lockfunc */
3713 NULL, /* lockfuncarg */
3714 &txr->hn_tx_data_dtag);
3716 device_printf(dev, "failed to create data dmatag\n");
3720 for (i = 0; i < txr->hn_txdesc_cnt; ++i) {
3721 struct hn_txdesc *txd = &txr->hn_txdesc[i];
3724 txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
3725 STAILQ_INIT(&txd->agg_list);
3728 * Allocate and load RNDIS packet message.
3730 error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag,
3731 (void **)&txd->rndis_pkt,
3732 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
3733 &txd->rndis_pkt_dmap);
3736 "failed to allocate rndis_packet_msg, %d\n", i);
3740 error = bus_dmamap_load(txr->hn_tx_rndis_dtag,
3741 txd->rndis_pkt_dmap,
3742 txd->rndis_pkt, HN_RNDIS_PKT_LEN,
3743 hyperv_dma_map_paddr, &txd->rndis_pkt_paddr,
3747 "failed to load rndis_packet_msg, %d\n", i);
3748 bus_dmamem_free(txr->hn_tx_rndis_dtag,
3749 txd->rndis_pkt, txd->rndis_pkt_dmap);
3753 /* DMA map for TX data. */
3754 error = bus_dmamap_create(txr->hn_tx_data_dtag, 0,
3758 "failed to allocate tx data dmamap\n");
3759 bus_dmamap_unload(txr->hn_tx_rndis_dtag,
3760 txd->rndis_pkt_dmap);
3761 bus_dmamem_free(txr->hn_tx_rndis_dtag,
3762 txd->rndis_pkt, txd->rndis_pkt_dmap);
3766 /* All set, put it to list */
3767 txd->flags |= HN_TXD_FLAG_ONLIST;
3768 #ifndef HN_USE_TXDESC_BUFRING
3769 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
3771 buf_ring_enqueue(txr->hn_txdesc_br, txd);
3774 txr->hn_txdesc_avail = txr->hn_txdesc_cnt;
3776 if (sc->hn_tx_sysctl_tree != NULL) {
3777 struct sysctl_oid_list *child;
3778 struct sysctl_ctx_list *ctx;
3782 * Create per TX ring sysctl tree:
3783 * dev.hn.UNIT.tx.RINGID
3785 ctx = device_get_sysctl_ctx(dev);
3786 child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree);
3788 snprintf(name, sizeof(name), "%d", id);
3789 txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
3790 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3792 if (txr->hn_tx_sysctl_tree != NULL) {
3793 child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree);
3796 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
3797 CTLFLAG_RD, &txr->hn_txdesc_avail, 0,
3798 "# of available TX descs");
3800 #ifdef HN_IFSTART_SUPPORT
3801 if (!hn_use_if_start)
3804 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive",
3805 CTLFLAG_RD, &txr->hn_oactive, 0,
3808 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets",
3809 CTLFLAG_RW, &txr->hn_pkts,
3810 "# of packets transmitted");
3811 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends",
3812 CTLFLAG_RW, &txr->hn_sends, "# of sends");
3820 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd)
3822 struct hn_tx_ring *txr = txd->txr;
3824 KASSERT(txd->m == NULL, ("still has mbuf installed"));
3825 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped"));
3827 bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap);
3828 bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt,
3829 txd->rndis_pkt_dmap);
3830 bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap);
3834 hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd)
3837 KASSERT(txd->refs == 0 || txd->refs == 1,
3838 ("invalid txd refs %d", txd->refs));
3840 /* Aggregated txds will be freed by their aggregating txd. */
3841 if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) {
3844 freed = hn_txdesc_put(txr, txd);
3845 KASSERT(freed, ("can't free txdesc"));
3850 hn_tx_ring_destroy(struct hn_tx_ring *txr)
3854 if (txr->hn_txdesc == NULL)
3859 * Because the freeing of aggregated txds will be deferred
3860 * to the aggregating txd, two passes are used here:
3861 * - The first pass GCes any pending txds. This GC is necessary,
3862 * since if the channels are revoked, hypervisor will not
3863 * deliver send-done for all pending txds.
3864 * - The second pass frees the busdma stuffs, i.e. after all txds
3867 for (i = 0; i < txr->hn_txdesc_cnt; ++i)
3868 hn_txdesc_gc(txr, &txr->hn_txdesc[i]);
3869 for (i = 0; i < txr->hn_txdesc_cnt; ++i)
3870 hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]);
3872 if (txr->hn_tx_data_dtag != NULL)
3873 bus_dma_tag_destroy(txr->hn_tx_data_dtag);
3874 if (txr->hn_tx_rndis_dtag != NULL)
3875 bus_dma_tag_destroy(txr->hn_tx_rndis_dtag);
3877 #ifdef HN_USE_TXDESC_BUFRING
3878 buf_ring_free(txr->hn_txdesc_br, M_DEVBUF);
3881 free(txr->hn_txdesc, M_DEVBUF);
3882 txr->hn_txdesc = NULL;
3884 if (txr->hn_mbuf_br != NULL)
3885 buf_ring_free(txr->hn_mbuf_br, M_DEVBUF);
3887 #ifndef HN_USE_TXDESC_BUFRING
3888 mtx_destroy(&txr->hn_txlist_spin);
3890 mtx_destroy(&txr->hn_tx_lock);
3894 hn_create_tx_data(struct hn_softc *sc, int ring_cnt)
3896 struct sysctl_oid_list *child;
3897 struct sysctl_ctx_list *ctx;
3901 * Create TXBUF for chimney sending.
3903 * NOTE: It is shared by all channels.
3905 sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev),
3906 PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma,
3907 BUS_DMA_WAITOK | BUS_DMA_ZERO);
3908 if (sc->hn_chim == NULL) {
3909 device_printf(sc->hn_dev, "allocate txbuf failed\n");
3913 sc->hn_tx_ring_cnt = ring_cnt;
3914 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
3916 sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt,
3917 M_DEVBUF, M_WAITOK | M_ZERO);
3919 ctx = device_get_sysctl_ctx(sc->hn_dev);
3920 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev));
3922 /* Create dev.hn.UNIT.tx sysctl tree */
3923 sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx",
3924 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3926 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
3929 error = hn_tx_ring_create(sc, i);
3934 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs",
3935 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3936 __offsetof(struct hn_tx_ring, hn_no_txdescs),
3937 hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs");
3938 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed",
3939 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3940 __offsetof(struct hn_tx_ring, hn_send_failed),
3941 hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure");
3942 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed",
3943 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3944 __offsetof(struct hn_tx_ring, hn_txdma_failed),
3945 hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
3946 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed",
3947 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3948 __offsetof(struct hn_tx_ring, hn_flush_failed),
3949 hn_tx_stat_ulong_sysctl, "LU",
3950 "# of packet transmission aggregation flush failure");
3951 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
3952 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3953 __offsetof(struct hn_tx_ring, hn_tx_collapsed),
3954 hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed");
3955 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney",
3956 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3957 __offsetof(struct hn_tx_ring, hn_tx_chimney),
3958 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send");
3959 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried",
3960 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3961 __offsetof(struct hn_tx_ring, hn_tx_chimney_tried),
3962 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries");
3963 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
3964 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0,
3965 "# of total TX descs");
3966 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
3967 CTLFLAG_RD, &sc->hn_chim_szmax, 0,
3968 "Chimney send packet size upper boundary");
3969 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
3970 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
3971 hn_chim_size_sysctl, "I", "Chimney send packet size limit");
3972 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size",
3973 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3974 __offsetof(struct hn_tx_ring, hn_direct_tx_size),
3975 hn_tx_conf_int_sysctl, "I",
3976 "Size of the packet for direct transmission");
3977 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx",
3978 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3979 __offsetof(struct hn_tx_ring, hn_sched_tx),
3980 hn_tx_conf_int_sysctl, "I",
3981 "Always schedule transmission "
3982 "instead of doing direct transmission");
3983 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt",
3984 CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings");
3985 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse",
3986 CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings");
3987 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax",
3988 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0,
3989 "Applied packet transmission aggregation size");
3990 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax",
3991 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
3992 hn_txagg_pktmax_sysctl, "I",
3993 "Applied packet transmission aggregation packets");
3994 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align",
3995 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
3996 hn_txagg_align_sysctl, "I",
3997 "Applied packet transmission aggregation alignment");
4003 hn_set_chim_size(struct hn_softc *sc, int chim_size)
4007 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
4008 sc->hn_tx_ring[i].hn_chim_size = chim_size;
4012 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu)
4014 struct ifnet *ifp = sc->hn_ifp;
4017 if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0)
4020 KASSERT(sc->hn_ndis_tso_sgmin >= 2,
4021 ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin));
4022 tso_minlen = sc->hn_ndis_tso_sgmin * mtu;
4024 KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen &&
4025 sc->hn_ndis_tso_szmax <= IP_MAXPACKET,
4026 ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax));
4028 if (tso_maxlen < tso_minlen)
4029 tso_maxlen = tso_minlen;
4030 else if (tso_maxlen > IP_MAXPACKET)
4031 tso_maxlen = IP_MAXPACKET;
4032 if (tso_maxlen > sc->hn_ndis_tso_szmax)
4033 tso_maxlen = sc->hn_ndis_tso_szmax;
4034 ifp->if_hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
4036 if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax);
4040 hn_fixup_tx_data(struct hn_softc *sc)
4042 uint64_t csum_assist;
4045 hn_set_chim_size(sc, sc->hn_chim_szmax);
4046 if (hn_tx_chimney_size > 0 &&
4047 hn_tx_chimney_size < sc->hn_chim_szmax)
4048 hn_set_chim_size(sc, hn_tx_chimney_size);
4051 if (sc->hn_caps & HN_CAP_IPCS)
4052 csum_assist |= CSUM_IP;
4053 if (sc->hn_caps & HN_CAP_TCP4CS)
4054 csum_assist |= CSUM_IP_TCP;
4055 if (sc->hn_caps & HN_CAP_UDP4CS)
4056 csum_assist |= CSUM_IP_UDP;
4057 if (sc->hn_caps & HN_CAP_TCP6CS)
4058 csum_assist |= CSUM_IP6_TCP;
4059 if (sc->hn_caps & HN_CAP_UDP6CS)
4060 csum_assist |= CSUM_IP6_UDP;
4061 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
4062 sc->hn_tx_ring[i].hn_csum_assist = csum_assist;
4064 if (sc->hn_caps & HN_CAP_HASHVAL) {
4066 * Support HASHVAL pktinfo on TX path.
4069 if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n");
4070 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
4071 sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL;
4076 hn_destroy_tx_data(struct hn_softc *sc)
4080 if (sc->hn_chim != NULL) {
4081 if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) {
4082 hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim);
4084 device_printf(sc->hn_dev,
4085 "chimney sending buffer is referenced");
4090 if (sc->hn_tx_ring_cnt == 0)
4093 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
4094 hn_tx_ring_destroy(&sc->hn_tx_ring[i]);
4096 free(sc->hn_tx_ring, M_DEVBUF);
4097 sc->hn_tx_ring = NULL;
4099 sc->hn_tx_ring_cnt = 0;
4100 sc->hn_tx_ring_inuse = 0;
4103 #ifdef HN_IFSTART_SUPPORT
4106 hn_start_taskfunc(void *xtxr, int pending __unused)
4108 struct hn_tx_ring *txr = xtxr;
4110 mtx_lock(&txr->hn_tx_lock);
4111 hn_start_locked(txr, 0);
4112 mtx_unlock(&txr->hn_tx_lock);
4116 hn_start_locked(struct hn_tx_ring *txr, int len)
4118 struct hn_softc *sc = txr->hn_sc;
4119 struct ifnet *ifp = sc->hn_ifp;
4122 KASSERT(hn_use_if_start,
4123 ("hn_start_locked is called, when if_start is disabled"));
4124 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
4125 mtx_assert(&txr->hn_tx_lock, MA_OWNED);
4126 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
4128 if (__predict_false(txr->hn_suspended))
4131 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4135 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
4136 struct hn_txdesc *txd;
4137 struct mbuf *m_head;
4140 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
4144 if (len > 0 && m_head->m_pkthdr.len > len) {
4146 * This sending could be time consuming; let callers
4147 * dispatch this packet sending (and sending of any
4148 * following up packets) to tx taskqueue.
4150 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
4155 #if defined(INET6) || defined(INET)
4156 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
4157 m_head = hn_tso_fixup(m_head);
4158 if (__predict_false(m_head == NULL)) {
4159 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
4165 txd = hn_txdesc_get(txr);
4167 txr->hn_no_txdescs++;
4168 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
4169 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
4173 error = hn_encap(ifp, txr, txd, &m_head);
4175 /* Both txd and m_head are freed */
4176 KASSERT(txr->hn_agg_txd == NULL,
4177 ("encap failed w/ pending aggregating txdesc"));
4181 if (txr->hn_agg_pktleft == 0) {
4182 if (txr->hn_agg_txd != NULL) {
4183 KASSERT(m_head == NULL,
4184 ("pending mbuf for aggregating txdesc"));
4185 error = hn_flush_txagg(ifp, txr);
4186 if (__predict_false(error)) {
4187 atomic_set_int(&ifp->if_drv_flags,
4192 KASSERT(m_head != NULL, ("mbuf was freed"));
4193 error = hn_txpkt(ifp, txr, txd);
4194 if (__predict_false(error)) {
4195 /* txd is freed, but m_head is not */
4196 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
4197 atomic_set_int(&ifp->if_drv_flags,
4205 KASSERT(txr->hn_agg_txd != NULL,
4206 ("no aggregating txdesc"));
4207 KASSERT(m_head == NULL,
4208 ("pending mbuf for aggregating txdesc"));
4213 /* Flush pending aggerated transmission. */
4214 if (txr->hn_agg_txd != NULL)
4215 hn_flush_txagg(ifp, txr);
4220 hn_start(struct ifnet *ifp)
4222 struct hn_softc *sc = ifp->if_softc;
4223 struct hn_tx_ring *txr = &sc->hn_tx_ring[0];
4225 if (txr->hn_sched_tx)
4228 if (mtx_trylock(&txr->hn_tx_lock)) {
4231 sched = hn_start_locked(txr, txr->hn_direct_tx_size);
4232 mtx_unlock(&txr->hn_tx_lock);
4237 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
4241 hn_start_txeof_taskfunc(void *xtxr, int pending __unused)
4243 struct hn_tx_ring *txr = xtxr;
4245 mtx_lock(&txr->hn_tx_lock);
4246 atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE);
4247 hn_start_locked(txr, 0);
4248 mtx_unlock(&txr->hn_tx_lock);
4252 hn_start_txeof(struct hn_tx_ring *txr)
4254 struct hn_softc *sc = txr->hn_sc;
4255 struct ifnet *ifp = sc->hn_ifp;
4257 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
4259 if (txr->hn_sched_tx)
4262 if (mtx_trylock(&txr->hn_tx_lock)) {
4265 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
4266 sched = hn_start_locked(txr, txr->hn_direct_tx_size);
4267 mtx_unlock(&txr->hn_tx_lock);
4269 taskqueue_enqueue(txr->hn_tx_taskq,
4275 * Release the OACTIVE earlier, with the hope, that
4276 * others could catch up. The task will clear the
4277 * flag again with the hn_tx_lock to avoid possible
4280 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
4281 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
4285 #endif /* HN_IFSTART_SUPPORT */
4288 hn_xmit(struct hn_tx_ring *txr, int len)
4290 struct hn_softc *sc = txr->hn_sc;
4291 struct ifnet *ifp = sc->hn_ifp;
4292 struct mbuf *m_head;
4295 mtx_assert(&txr->hn_tx_lock, MA_OWNED);
4296 #ifdef HN_IFSTART_SUPPORT
4297 KASSERT(hn_use_if_start == 0,
4298 ("hn_xmit is called, when if_start is enabled"));
4300 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
4302 if (__predict_false(txr->hn_suspended))
4305 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive)
4308 while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) {
4309 struct hn_txdesc *txd;
4312 if (len > 0 && m_head->m_pkthdr.len > len) {
4314 * This sending could be time consuming; let callers
4315 * dispatch this packet sending (and sending of any
4316 * following up packets) to tx taskqueue.
4318 drbr_putback(ifp, txr->hn_mbuf_br, m_head);
4323 txd = hn_txdesc_get(txr);
4325 txr->hn_no_txdescs++;
4326 drbr_putback(ifp, txr->hn_mbuf_br, m_head);
4327 txr->hn_oactive = 1;
4331 error = hn_encap(ifp, txr, txd, &m_head);
4333 /* Both txd and m_head are freed; discard */
4334 KASSERT(txr->hn_agg_txd == NULL,
4335 ("encap failed w/ pending aggregating txdesc"));
4336 drbr_advance(ifp, txr->hn_mbuf_br);
4340 if (txr->hn_agg_pktleft == 0) {
4341 if (txr->hn_agg_txd != NULL) {
4342 KASSERT(m_head == NULL,
4343 ("pending mbuf for aggregating txdesc"));
4344 error = hn_flush_txagg(ifp, txr);
4345 if (__predict_false(error)) {
4346 txr->hn_oactive = 1;
4350 KASSERT(m_head != NULL, ("mbuf was freed"));
4351 error = hn_txpkt(ifp, txr, txd);
4352 if (__predict_false(error)) {
4353 /* txd is freed, but m_head is not */
4354 drbr_putback(ifp, txr->hn_mbuf_br,
4356 txr->hn_oactive = 1;
4363 KASSERT(txr->hn_agg_txd != NULL,
4364 ("no aggregating txdesc"));
4365 KASSERT(m_head == NULL,
4366 ("pending mbuf for aggregating txdesc"));
4371 drbr_advance(ifp, txr->hn_mbuf_br);
4374 /* Flush pending aggerated transmission. */
4375 if (txr->hn_agg_txd != NULL)
4376 hn_flush_txagg(ifp, txr);
4381 hn_transmit(struct ifnet *ifp, struct mbuf *m)
4383 struct hn_softc *sc = ifp->if_softc;
4384 struct hn_tx_ring *txr;
4387 #if defined(INET6) || defined(INET)
4389 * Perform TSO packet header fixup now, since the TSO
4390 * packet header should be cache-hot.
4392 if (m->m_pkthdr.csum_flags & CSUM_TSO) {
4393 m = hn_tso_fixup(m);
4394 if (__predict_false(m == NULL)) {
4395 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
4402 * Select the TX ring based on flowid
4404 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
4405 #if defined(INET6) || defined(INET)
4408 if (m->m_pkthdr.len < 128 &&
4409 (m->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP)) &&
4410 (m->m_pkthdr.csum_flags & CSUM_TSO) == 0) {
4411 m = hn_check_tcpsyn(m, &tcpsyn);
4412 if (__predict_false(m == NULL)) {
4414 IFCOUNTER_OERRORS, 1);
4419 const int tcpsyn = 0;
4424 idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse;
4426 txr = &sc->hn_tx_ring[idx];
4428 error = drbr_enqueue(ifp, txr->hn_mbuf_br, m);
4430 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
4434 if (txr->hn_oactive)
4437 if (txr->hn_sched_tx)
4440 if (mtx_trylock(&txr->hn_tx_lock)) {
4443 sched = hn_xmit(txr, txr->hn_direct_tx_size);
4444 mtx_unlock(&txr->hn_tx_lock);
4449 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
4454 hn_tx_ring_qflush(struct hn_tx_ring *txr)
4458 mtx_lock(&txr->hn_tx_lock);
4459 while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL)
4461 mtx_unlock(&txr->hn_tx_lock);
4465 hn_xmit_qflush(struct ifnet *ifp)
4467 struct hn_softc *sc = ifp->if_softc;
4470 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
4471 hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
4476 hn_xmit_txeof(struct hn_tx_ring *txr)
4479 if (txr->hn_sched_tx)
4482 if (mtx_trylock(&txr->hn_tx_lock)) {
4485 txr->hn_oactive = 0;
4486 sched = hn_xmit(txr, txr->hn_direct_tx_size);
4487 mtx_unlock(&txr->hn_tx_lock);
4489 taskqueue_enqueue(txr->hn_tx_taskq,
4495 * Release the oactive earlier, with the hope, that
4496 * others could catch up. The task will clear the
4497 * oactive again with the hn_tx_lock to avoid possible
4500 txr->hn_oactive = 0;
4501 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
4506 hn_xmit_taskfunc(void *xtxr, int pending __unused)
4508 struct hn_tx_ring *txr = xtxr;
4510 mtx_lock(&txr->hn_tx_lock);
4512 mtx_unlock(&txr->hn_tx_lock);
4516 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused)
4518 struct hn_tx_ring *txr = xtxr;
4520 mtx_lock(&txr->hn_tx_lock);
4521 txr->hn_oactive = 0;
4523 mtx_unlock(&txr->hn_tx_lock);
4527 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan)
4529 struct vmbus_chan_br cbr;
4530 struct hn_rx_ring *rxr;
4531 struct hn_tx_ring *txr = NULL;
4534 idx = vmbus_chan_subidx(chan);
4537 * Link this channel to RX/TX ring.
4539 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
4540 ("invalid channel index %d, should > 0 && < %d",
4541 idx, sc->hn_rx_ring_inuse));
4542 rxr = &sc->hn_rx_ring[idx];
4543 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0,
4544 ("RX ring %d already attached", idx));
4545 rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED;
4546 rxr->hn_chan = chan;
4549 if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n",
4550 idx, vmbus_chan_id(chan));
4553 if (idx < sc->hn_tx_ring_inuse) {
4554 txr = &sc->hn_tx_ring[idx];
4555 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0,
4556 ("TX ring %d already attached", idx));
4557 txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED;
4559 txr->hn_chan = chan;
4561 if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n",
4562 idx, vmbus_chan_id(chan));
4566 /* Bind this channel to a proper CPU. */
4567 vmbus_chan_cpu_set(chan, HN_RING_IDX2CPU(sc, idx));
4572 cbr.cbr = rxr->hn_br;
4573 cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr;
4574 cbr.cbr_txsz = HN_TXBR_SIZE;
4575 cbr.cbr_rxsz = HN_RXBR_SIZE;
4576 error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr);
4578 if (error == EISCONN) {
4579 if_printf(sc->hn_ifp, "bufring is connected after "
4580 "chan%u open failure\n", vmbus_chan_id(chan));
4581 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF;
4583 if_printf(sc->hn_ifp, "open chan%u failed: %d\n",
4584 vmbus_chan_id(chan), error);
4591 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan)
4593 struct hn_rx_ring *rxr;
4596 idx = vmbus_chan_subidx(chan);
4599 * Link this channel to RX/TX ring.
4601 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
4602 ("invalid channel index %d, should > 0 && < %d",
4603 idx, sc->hn_rx_ring_inuse));
4604 rxr = &sc->hn_rx_ring[idx];
4605 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED),
4606 ("RX ring %d is not attached", idx));
4607 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED;
4609 if (idx < sc->hn_tx_ring_inuse) {
4610 struct hn_tx_ring *txr = &sc->hn_tx_ring[idx];
4612 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED),
4613 ("TX ring %d is not attached attached", idx));
4614 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED;
4618 * Close this channel.
4621 * Channel closing does _not_ destroy the target channel.
4623 error = vmbus_chan_close_direct(chan);
4624 if (error == EISCONN) {
4625 if_printf(sc->hn_ifp, "chan%u bufring is connected "
4626 "after being closed\n", vmbus_chan_id(chan));
4627 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF;
4629 if_printf(sc->hn_ifp, "chan%u close failed: %d\n",
4630 vmbus_chan_id(chan), error);
4635 hn_attach_subchans(struct hn_softc *sc)
4637 struct vmbus_channel **subchans;
4638 int subchan_cnt = sc->hn_rx_ring_inuse - 1;
4641 KASSERT(subchan_cnt > 0, ("no sub-channels"));
4643 /* Attach the sub-channels. */
4644 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
4645 for (i = 0; i < subchan_cnt; ++i) {
4648 error1 = hn_chan_attach(sc, subchans[i]);
4651 /* Move on; all channels will be detached later. */
4654 vmbus_subchan_rel(subchans, subchan_cnt);
4657 if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error);
4660 if_printf(sc->hn_ifp, "%d sub-channels attached\n",
4668 hn_detach_allchans(struct hn_softc *sc)
4670 struct vmbus_channel **subchans;
4671 int subchan_cnt = sc->hn_rx_ring_inuse - 1;
4674 if (subchan_cnt == 0)
4677 /* Detach the sub-channels. */
4678 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
4679 for (i = 0; i < subchan_cnt; ++i)
4680 hn_chan_detach(sc, subchans[i]);
4681 vmbus_subchan_rel(subchans, subchan_cnt);
4685 * Detach the primary channel, _after_ all sub-channels
4688 hn_chan_detach(sc, sc->hn_prichan);
4690 /* Wait for sub-channels to be destroyed, if any. */
4691 vmbus_subchan_drain(sc->hn_prichan);
4694 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
4695 KASSERT((sc->hn_rx_ring[i].hn_rx_flags &
4696 HN_RX_FLAG_ATTACHED) == 0,
4697 ("%dth RX ring is still attached", i));
4699 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
4700 KASSERT((sc->hn_tx_ring[i].hn_tx_flags &
4701 HN_TX_FLAG_ATTACHED) == 0,
4702 ("%dth TX ring is still attached", i));
4708 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch)
4710 struct vmbus_channel **subchans;
4711 int nchan, rxr_cnt, error;
4713 nchan = *nsubch + 1;
4716 * Multiple RX/TX rings are not requested.
4723 * Query RSS capabilities, e.g. # of RX rings, and # of indirect
4726 error = hn_rndis_query_rsscaps(sc, &rxr_cnt);
4728 /* No RSS; this is benign. */
4733 if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n",
4737 if (nchan > rxr_cnt)
4740 if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n");
4746 * Allocate sub-channels from NVS.
4748 *nsubch = nchan - 1;
4749 error = hn_nvs_alloc_subchans(sc, nsubch);
4750 if (error || *nsubch == 0) {
4751 /* Failed to allocate sub-channels. */
4757 * Wait for all sub-channels to become ready before moving on.
4759 subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch);
4760 vmbus_subchan_rel(subchans, *nsubch);
4765 hn_synth_attachable(const struct hn_softc *sc)
4769 if (sc->hn_flags & HN_FLAG_ERRORS)
4772 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
4773 const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
4775 if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF)
4782 * Make sure that the RX filter is zero after the successful
4783 * RNDIS initialization.
4786 * Under certain conditions on certain versions of Hyper-V,
4787 * the RNDIS rxfilter is _not_ zero on the hypervisor side
4788 * after the successful RNDIS initialization, which breaks
4789 * the assumption of any following code (well, it breaks the
4790 * RNDIS API contract actually). Clear the RNDIS rxfilter
4791 * explicitly, drain packets sneaking through, and drain the
4792 * interrupt taskqueues scheduled due to the stealth packets.
4795 hn_rndis_init_fixat(struct hn_softc *sc, int nchan)
4799 hn_drain_rxtx(sc, nchan);
4803 hn_synth_attach(struct hn_softc *sc, int mtu)
4805 #define ATTACHED_NVS 0x0002
4806 #define ATTACHED_RNDIS 0x0004
4808 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
4809 int error, nsubch, nchan = 1, i, rndis_inited;
4810 uint32_t old_caps, attached = 0;
4812 KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0,
4813 ("synthetic parts were attached"));
4815 if (!hn_synth_attachable(sc))
4818 /* Save capabilities for later verification. */
4819 old_caps = sc->hn_caps;
4822 /* Clear RSS stuffs. */
4823 sc->hn_rss_ind_size = 0;
4824 sc->hn_rss_hash = 0;
4827 * Attach the primary channel _before_ attaching NVS and RNDIS.
4829 error = hn_chan_attach(sc, sc->hn_prichan);
4836 error = hn_nvs_attach(sc, mtu);
4839 attached |= ATTACHED_NVS;
4842 * Attach RNDIS _after_ NVS is attached.
4844 error = hn_rndis_attach(sc, mtu, &rndis_inited);
4846 attached |= ATTACHED_RNDIS;
4851 * Make sure capabilities are not changed.
4853 if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) {
4854 if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n",
4855 old_caps, sc->hn_caps);
4861 * Allocate sub-channels for multi-TX/RX rings.
4864 * The # of RX rings that can be used is equivalent to the # of
4865 * channels to be requested.
4867 nsubch = sc->hn_rx_ring_cnt - 1;
4868 error = hn_synth_alloc_subchans(sc, &nsubch);
4871 /* NOTE: _Full_ synthetic parts detach is required now. */
4872 sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED;
4875 * Set the # of TX/RX rings that could be used according to
4876 * the # of channels that NVS offered.
4879 hn_set_ring_inuse(sc, nchan);
4881 /* Only the primary channel can be used; done */
4886 * Attach the sub-channels.
4888 * NOTE: hn_set_ring_inuse() _must_ have been called.
4890 error = hn_attach_subchans(sc);
4895 * Configure RSS key and indirect table _after_ all sub-channels
4898 if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) {
4900 * RSS key is not set yet; set it to the default RSS key.
4903 if_printf(sc->hn_ifp, "setup default RSS key\n");
4904 memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key));
4905 sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
4908 if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) {
4910 * RSS indirect table is not set yet; set it up in round-
4914 if_printf(sc->hn_ifp, "setup default RSS indirect "
4917 for (i = 0; i < NDIS_HASH_INDCNT; ++i)
4918 rss->rss_ind[i] = i % nchan;
4919 sc->hn_flags |= HN_FLAG_HAS_RSSIND;
4922 * # of usable channels may be changed, so we have to
4923 * make sure that all entries in RSS indirect table
4926 * NOTE: hn_set_ring_inuse() _must_ have been called.
4928 hn_rss_ind_fixup(sc);
4931 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
4936 * Fixup transmission aggregation setup.
4939 hn_rndis_init_fixat(sc, nchan);
4943 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
4944 hn_rndis_init_fixat(sc, nchan);
4945 hn_synth_detach(sc);
4947 if (attached & ATTACHED_RNDIS) {
4948 hn_rndis_init_fixat(sc, nchan);
4949 hn_rndis_detach(sc);
4951 if (attached & ATTACHED_NVS)
4953 hn_chan_detach(sc, sc->hn_prichan);
4954 /* Restore old capabilities. */
4955 sc->hn_caps = old_caps;
4959 #undef ATTACHED_RNDIS
4965 * The interface must have been suspended though hn_suspend(), before
4966 * this function get called.
4969 hn_synth_detach(struct hn_softc *sc)
4972 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
4973 ("synthetic parts were not attached"));
4975 /* Detach the RNDIS first. */
4976 hn_rndis_detach(sc);
4981 /* Detach all of the channels. */
4982 hn_detach_allchans(sc);
4984 sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED;
4988 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt)
4990 KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt,
4991 ("invalid ring count %d", ring_cnt));
4993 if (sc->hn_tx_ring_cnt > ring_cnt)
4994 sc->hn_tx_ring_inuse = ring_cnt;
4996 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
4997 sc->hn_rx_ring_inuse = ring_cnt;
5000 if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n",
5001 sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse);
5006 hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan)
5011 * The TX bufring will not be drained by the hypervisor,
5012 * if the primary channel is revoked.
5014 while (!vmbus_chan_rx_empty(chan) ||
5015 (!vmbus_chan_is_revoked(sc->hn_prichan) &&
5016 !vmbus_chan_tx_empty(chan)))
5018 vmbus_chan_intr_drain(chan);
5022 hn_disable_rx(struct hn_softc *sc)
5026 * Disable RX by clearing RX filter forcefully.
5028 sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE;
5029 hn_rndis_set_rxfilter(sc, sc->hn_rx_filter); /* ignore error */
5032 * Give RNDIS enough time to flush all pending data packets.
5034 pause("waitrx", (200 * hz) / 1000);
5039 * RX/TX _must_ have been suspended/disabled, before this function
5043 hn_drain_rxtx(struct hn_softc *sc, int nchan)
5045 struct vmbus_channel **subch = NULL;
5049 * Drain RX/TX bufrings and interrupts.
5053 subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
5055 if (subch != NULL) {
5058 for (i = 0; i < nsubch; ++i)
5059 hn_chan_drain(sc, subch[i]);
5061 hn_chan_drain(sc, sc->hn_prichan);
5064 vmbus_subchan_rel(subch, nsubch);
5068 hn_suspend_data(struct hn_softc *sc)
5070 struct hn_tx_ring *txr;
5078 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
5079 txr = &sc->hn_tx_ring[i];
5081 mtx_lock(&txr->hn_tx_lock);
5082 txr->hn_suspended = 1;
5083 mtx_unlock(&txr->hn_tx_lock);
5084 /* No one is able send more packets now. */
5087 * Wait for all pending sends to finish.
5090 * We will _not_ receive all pending send-done, if the
5091 * primary channel is revoked.
5093 while (hn_tx_ring_pending(txr) &&
5094 !vmbus_chan_is_revoked(sc->hn_prichan))
5095 pause("hnwtx", 1 /* 1 tick */);
5106 hn_drain_rxtx(sc, sc->hn_rx_ring_inuse);
5109 * Drain any pending TX tasks.
5112 * The above hn_drain_rxtx() can dispatch TX tasks, so the TX
5113 * tasks will have to be drained _after_ the above hn_drain_rxtx().
5115 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
5116 txr = &sc->hn_tx_ring[i];
5118 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task);
5119 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task);
5124 hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused)
5127 ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL;
5131 hn_suspend_mgmt(struct hn_softc *sc)
5138 * Make sure that hn_mgmt_taskq0 can nolonger be accessed
5139 * through hn_mgmt_taskq.
5141 TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc);
5142 vmbus_chan_run_task(sc->hn_prichan, &task);
5145 * Make sure that all pending management tasks are completed.
5147 taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init);
5148 taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status);
5149 taskqueue_drain_all(sc->hn_mgmt_taskq0);
5153 hn_suspend(struct hn_softc *sc)
5156 /* Disable polling. */
5159 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
5160 (sc->hn_flags & HN_FLAG_VF))
5161 hn_suspend_data(sc);
5162 hn_suspend_mgmt(sc);
5166 hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt)
5170 KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt,
5171 ("invalid TX ring count %d", tx_ring_cnt));
5173 for (i = 0; i < tx_ring_cnt; ++i) {
5174 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
5176 mtx_lock(&txr->hn_tx_lock);
5177 txr->hn_suspended = 0;
5178 mtx_unlock(&txr->hn_tx_lock);
5183 hn_resume_data(struct hn_softc *sc)
5192 hn_rxfilter_config(sc);
5195 * Make sure to clear suspend status on "all" TX rings,
5196 * since hn_tx_ring_inuse can be changed after
5197 * hn_suspend_data().
5199 hn_resume_tx(sc, sc->hn_tx_ring_cnt);
5201 #ifdef HN_IFSTART_SUPPORT
5202 if (!hn_use_if_start)
5206 * Flush unused drbrs, since hn_tx_ring_inuse may be
5209 for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i)
5210 hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
5216 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
5217 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
5220 * Use txeof task, so that any pending oactive can be
5223 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
5228 hn_resume_mgmt(struct hn_softc *sc)
5231 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
5234 * Kick off network change detection, if it was pending.
5235 * If no network change was pending, start link status
5236 * checks, which is more lightweight than network change
5239 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
5240 hn_change_network(sc);
5242 hn_update_link_status(sc);
5246 hn_resume(struct hn_softc *sc)
5249 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
5250 (sc->hn_flags & HN_FLAG_VF))
5254 * When the VF is activated, the synthetic interface is changed
5255 * to DOWN in hn_set_vf(). Here, if the VF is still active, we
5256 * don't call hn_resume_mgmt() until the VF is deactivated in
5259 if (!(sc->hn_flags & HN_FLAG_VF))
5263 * Re-enable polling if this interface is running and
5264 * the polling is requested.
5266 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && sc->hn_pollhz > 0)
5267 hn_polling(sc, sc->hn_pollhz);
5271 hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen)
5273 const struct rndis_status_msg *msg;
5276 if (dlen < sizeof(*msg)) {
5277 if_printf(sc->hn_ifp, "invalid RNDIS status\n");
5282 switch (msg->rm_status) {
5283 case RNDIS_STATUS_MEDIA_CONNECT:
5284 case RNDIS_STATUS_MEDIA_DISCONNECT:
5285 hn_update_link_status(sc);
5288 case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG:
5289 /* Not really useful; ignore. */
5292 case RNDIS_STATUS_NETWORK_CHANGE:
5293 ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset);
5294 if (dlen < ofs + msg->rm_stbuflen ||
5295 msg->rm_stbuflen < sizeof(uint32_t)) {
5296 if_printf(sc->hn_ifp, "network changed\n");
5300 memcpy(&change, ((const uint8_t *)msg) + ofs,
5302 if_printf(sc->hn_ifp, "network changed, change %u\n",
5305 hn_change_network(sc);
5309 if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n",
5316 hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info)
5318 const struct rndis_pktinfo *pi = info_data;
5321 while (info_dlen != 0) {
5325 if (__predict_false(info_dlen < sizeof(*pi)))
5327 if (__predict_false(info_dlen < pi->rm_size))
5329 info_dlen -= pi->rm_size;
5331 if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK))
5333 if (__predict_false(pi->rm_size < pi->rm_pktinfooffset))
5335 dlen = pi->rm_size - pi->rm_pktinfooffset;
5338 switch (pi->rm_type) {
5339 case NDIS_PKTINFO_TYPE_VLAN:
5340 if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE))
5342 info->vlan_info = *((const uint32_t *)data);
5343 mask |= HN_RXINFO_VLAN;
5346 case NDIS_PKTINFO_TYPE_CSUM:
5347 if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE))
5349 info->csum_info = *((const uint32_t *)data);
5350 mask |= HN_RXINFO_CSUM;
5353 case HN_NDIS_PKTINFO_TYPE_HASHVAL:
5354 if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE))
5356 info->hash_value = *((const uint32_t *)data);
5357 mask |= HN_RXINFO_HASHVAL;
5360 case HN_NDIS_PKTINFO_TYPE_HASHINF:
5361 if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE))
5363 info->hash_info = *((const uint32_t *)data);
5364 mask |= HN_RXINFO_HASHINF;
5371 if (mask == HN_RXINFO_ALL) {
5372 /* All found; done */
5376 pi = (const struct rndis_pktinfo *)
5377 ((const uint8_t *)pi + pi->rm_size);
5382 * - If there is no hash value, invalidate the hash info.
5384 if ((mask & HN_RXINFO_HASHVAL) == 0)
5385 info->hash_info = HN_NDIS_HASH_INFO_INVALID;
5389 static __inline bool
5390 hn_rndis_check_overlap(int off, int len, int check_off, int check_len)
5393 if (off < check_off) {
5394 if (__predict_true(off + len <= check_off))
5396 } else if (off > check_off) {
5397 if (__predict_true(check_off + check_len <= off))
5404 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen)
5406 const struct rndis_packet_msg *pkt;
5407 struct hn_rxinfo info;
5408 int data_off, pktinfo_off, data_len, pktinfo_len;
5413 if (__predict_false(dlen < sizeof(*pkt))) {
5414 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n");
5419 if (__predict_false(dlen < pkt->rm_len)) {
5420 if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, "
5421 "dlen %d, msglen %u\n", dlen, pkt->rm_len);
5424 if (__predict_false(pkt->rm_len <
5425 pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) {
5426 if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, "
5427 "msglen %u, data %u, oob %u, pktinfo %u\n",
5428 pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen,
5429 pkt->rm_pktinfolen);
5432 if (__predict_false(pkt->rm_datalen == 0)) {
5433 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n");
5440 #define IS_OFFSET_INVALID(ofs) \
5441 ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \
5442 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK))
5444 /* XXX Hyper-V does not meet data offset alignment requirement */
5445 if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) {
5446 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5447 "data offset %u\n", pkt->rm_dataoffset);
5450 if (__predict_false(pkt->rm_oobdataoffset > 0 &&
5451 IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) {
5452 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5453 "oob offset %u\n", pkt->rm_oobdataoffset);
5456 if (__predict_true(pkt->rm_pktinfooffset > 0) &&
5457 __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) {
5458 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5459 "pktinfo offset %u\n", pkt->rm_pktinfooffset);
5463 #undef IS_OFFSET_INVALID
5465 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset);
5466 data_len = pkt->rm_datalen;
5467 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset);
5468 pktinfo_len = pkt->rm_pktinfolen;
5471 * Check OOB coverage.
5473 if (__predict_false(pkt->rm_oobdatalen != 0)) {
5474 int oob_off, oob_len;
5476 if_printf(rxr->hn_ifp, "got oobdata\n");
5477 oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset);
5478 oob_len = pkt->rm_oobdatalen;
5480 if (__predict_false(oob_off + oob_len > pkt->rm_len)) {
5481 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5482 "oob overflow, msglen %u, oob abs %d len %d\n",
5483 pkt->rm_len, oob_off, oob_len);
5488 * Check against data.
5490 if (hn_rndis_check_overlap(oob_off, oob_len,
5491 data_off, data_len)) {
5492 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5493 "oob overlaps data, oob abs %d len %d, "
5494 "data abs %d len %d\n",
5495 oob_off, oob_len, data_off, data_len);
5500 * Check against pktinfo.
5502 if (pktinfo_len != 0 &&
5503 hn_rndis_check_overlap(oob_off, oob_len,
5504 pktinfo_off, pktinfo_len)) {
5505 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5506 "oob overlaps pktinfo, oob abs %d len %d, "
5507 "pktinfo abs %d len %d\n",
5508 oob_off, oob_len, pktinfo_off, pktinfo_len);
5514 * Check per-packet-info coverage and find useful per-packet-info.
5516 info.vlan_info = HN_NDIS_VLAN_INFO_INVALID;
5517 info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID;
5518 info.hash_info = HN_NDIS_HASH_INFO_INVALID;
5519 if (__predict_true(pktinfo_len != 0)) {
5523 if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) {
5524 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5525 "pktinfo overflow, msglen %u, "
5526 "pktinfo abs %d len %d\n",
5527 pkt->rm_len, pktinfo_off, pktinfo_len);
5532 * Check packet info coverage.
5534 overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len,
5535 data_off, data_len);
5536 if (__predict_false(overlap)) {
5537 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5538 "pktinfo overlap data, pktinfo abs %d len %d, "
5539 "data abs %d len %d\n",
5540 pktinfo_off, pktinfo_len, data_off, data_len);
5545 * Find useful per-packet-info.
5547 error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off,
5548 pktinfo_len, &info);
5549 if (__predict_false(error)) {
5550 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg "
5556 if (__predict_false(data_off + data_len > pkt->rm_len)) {
5557 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5558 "data overflow, msglen %u, data abs %d len %d\n",
5559 pkt->rm_len, data_off, data_len);
5562 hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info);
5565 static __inline void
5566 hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen)
5568 const struct rndis_msghdr *hdr;
5570 if (__predict_false(dlen < sizeof(*hdr))) {
5571 if_printf(rxr->hn_ifp, "invalid RNDIS msg\n");
5576 if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) {
5577 /* Hot data path. */
5578 hn_rndis_rx_data(rxr, data, dlen);
5583 if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG)
5584 hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen);
5586 hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen);
5590 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt)
5592 const struct hn_nvs_hdr *hdr;
5594 if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) {
5595 if_printf(sc->hn_ifp, "invalid nvs notify\n");
5598 hdr = VMBUS_CHANPKT_CONST_DATA(pkt);
5600 if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) {
5601 /* Useless; ignore */
5604 if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type);
5608 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan,
5609 const struct vmbus_chanpkt_hdr *pkt)
5611 struct hn_nvs_sendctx *sndc;
5613 sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid;
5614 sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt),
5615 VMBUS_CHANPKT_DATALEN(pkt));
5618 * 'sndc' CAN NOT be accessed anymore, since it can be freed by
5624 hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
5625 const struct vmbus_chanpkt_hdr *pkthdr)
5627 const struct vmbus_chanpkt_rxbuf *pkt;
5628 const struct hn_nvs_hdr *nvs_hdr;
5631 if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) {
5632 if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n");
5635 nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr);
5637 /* Make sure that this is a RNDIS message. */
5638 if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) {
5639 if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n",
5644 hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen);
5645 if (__predict_false(hlen < sizeof(*pkt))) {
5646 if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n");
5649 pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr;
5651 if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) {
5652 if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n",
5657 count = pkt->cp_rxbuf_cnt;
5658 if (__predict_false(hlen <
5659 __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) {
5660 if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count);
5664 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
5665 for (i = 0; i < count; ++i) {
5668 ofs = pkt->cp_rxbuf[i].rb_ofs;
5669 len = pkt->cp_rxbuf[i].rb_len;
5670 if (__predict_false(ofs + len > HN_RXBUF_SIZE)) {
5671 if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, "
5672 "ofs %d, len %d\n", i, ofs, len);
5675 hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len);
5679 * Ack the consumed RXBUF associated w/ this channel packet,
5680 * so that this RXBUF can be recycled by the hypervisor.
5682 hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid);
5686 hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
5689 struct hn_nvs_rndis_ack ack;
5692 ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK;
5693 ack.nvs_status = HN_NVS_STATUS_OK;
5697 error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP,
5698 VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid);
5699 if (__predict_false(error == EAGAIN)) {
5702 * This should _not_ happen in real world, since the
5703 * consumption of the TX bufring from the TX path is
5706 if (rxr->hn_ack_failed == 0)
5707 if_printf(rxr->hn_ifp, "RXBUF ack retry\n");
5708 rxr->hn_ack_failed++;
5715 if_printf(rxr->hn_ifp, "RXBUF ack failed\n");
5720 hn_chan_callback(struct vmbus_channel *chan, void *xrxr)
5722 struct hn_rx_ring *rxr = xrxr;
5723 struct hn_softc *sc = rxr->hn_ifp->if_softc;
5726 struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf;
5729 pktlen = rxr->hn_pktbuf_len;
5730 error = vmbus_chan_recv_pkt(chan, pkt, &pktlen);
5731 if (__predict_false(error == ENOBUFS)) {
5736 * Expand channel packet buffer.
5739 * Use M_WAITOK here, since allocation failure
5742 nlen = rxr->hn_pktbuf_len * 2;
5743 while (nlen < pktlen)
5745 nbuf = malloc(nlen, M_DEVBUF, M_WAITOK);
5747 if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n",
5748 rxr->hn_pktbuf_len, nlen);
5750 free(rxr->hn_pktbuf, M_DEVBUF);
5751 rxr->hn_pktbuf = nbuf;
5752 rxr->hn_pktbuf_len = nlen;
5755 } else if (__predict_false(error == EAGAIN)) {
5756 /* No more channel packets; done! */
5759 KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error));
5761 switch (pkt->cph_type) {
5762 case VMBUS_CHANPKT_TYPE_COMP:
5763 hn_nvs_handle_comp(sc, chan, pkt);
5766 case VMBUS_CHANPKT_TYPE_RXBUF:
5767 hn_nvs_handle_rxbuf(rxr, chan, pkt);
5770 case VMBUS_CHANPKT_TYPE_INBAND:
5771 hn_nvs_handle_notify(sc, pkt);
5775 if_printf(rxr->hn_ifp, "unknown chan pkt %u\n",
5780 hn_chan_rollup(rxr, rxr->hn_txr);
5784 hn_tx_taskq_create(void *arg __unused)
5789 * Fix the # of TX taskqueues.
5791 if (hn_tx_taskq_cnt <= 0)
5792 hn_tx_taskq_cnt = 1;
5793 else if (hn_tx_taskq_cnt > mp_ncpus)
5794 hn_tx_taskq_cnt = mp_ncpus;
5797 * Fix the TX taskqueue mode.
5799 switch (hn_tx_taskq_mode) {
5800 case HN_TX_TASKQ_M_INDEP:
5801 case HN_TX_TASKQ_M_GLOBAL:
5802 case HN_TX_TASKQ_M_EVTTQ:
5805 hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP;
5809 if (vm_guest != VM_GUEST_HV)
5812 if (hn_tx_taskq_mode != HN_TX_TASKQ_M_GLOBAL)
5815 hn_tx_taskque = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *),
5816 M_DEVBUF, M_WAITOK);
5817 for (i = 0; i < hn_tx_taskq_cnt; ++i) {
5818 hn_tx_taskque[i] = taskqueue_create("hn_tx", M_WAITOK,
5819 taskqueue_thread_enqueue, &hn_tx_taskque[i]);
5820 taskqueue_start_threads(&hn_tx_taskque[i], 1, PI_NET,
5824 SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_SECOND,
5825 hn_tx_taskq_create, NULL);
5828 hn_tx_taskq_destroy(void *arg __unused)
5831 if (hn_tx_taskque != NULL) {
5834 for (i = 0; i < hn_tx_taskq_cnt; ++i)
5835 taskqueue_free(hn_tx_taskque[i]);
5836 free(hn_tx_taskque, M_DEVBUF);
5839 SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_SECOND,
5840 hn_tx_taskq_destroy, NULL);