2 * Copyright (c) 2010-2012 Citrix Inc.
3 * Copyright (c) 2009-2012,2016 Microsoft Corp.
4 * Copyright (c) 2012 NetApp Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * Copyright (c) 2004-2006 Kip Macy
31 * All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 #include <sys/cdefs.h>
56 __FBSDID("$FreeBSD$");
58 #include "opt_inet6.h"
62 #include <sys/param.h>
64 #include <sys/kernel.h>
65 #include <sys/limits.h>
66 #include <sys/malloc.h>
68 #include <sys/module.h>
70 #include <sys/queue.h>
73 #include <sys/socket.h>
74 #include <sys/sockio.h>
76 #include <sys/sysctl.h>
77 #include <sys/systm.h>
78 #include <sys/taskqueue.h>
79 #include <sys/buf_ring.h>
80 #include <sys/eventhandler.h>
82 #include <machine/atomic.h>
83 #include <machine/in_cksum.h>
86 #include <net/ethernet.h>
88 #include <net/if_arp.h>
89 #include <net/if_dl.h>
90 #include <net/if_media.h>
91 #include <net/if_types.h>
92 #include <net/if_var.h>
93 #include <net/if_vlan_var.h>
94 #include <net/rndis.h>
96 #include <netinet/in_systm.h>
97 #include <netinet/in.h>
98 #include <netinet/ip.h>
99 #include <netinet/ip6.h>
100 #include <netinet/tcp.h>
101 #include <netinet/tcp_lro.h>
102 #include <netinet/udp.h>
104 #include <dev/hyperv/include/hyperv.h>
105 #include <dev/hyperv/include/hyperv_busdma.h>
106 #include <dev/hyperv/include/vmbus.h>
107 #include <dev/hyperv/include/vmbus_xact.h>
109 #include <dev/hyperv/netvsc/ndis.h>
110 #include <dev/hyperv/netvsc/if_hnreg.h>
111 #include <dev/hyperv/netvsc/if_hnvar.h>
112 #include <dev/hyperv/netvsc/hn_nvs.h>
113 #include <dev/hyperv/netvsc/hn_rndis.h>
115 #include "vmbus_if.h"
117 #define HN_IFSTART_SUPPORT
119 #define HN_RING_CNT_DEF_MAX 8
121 /* YYY should get it from the underlying channel */
122 #define HN_TX_DESC_CNT 512
124 #define HN_RNDIS_PKT_LEN \
125 (sizeof(struct rndis_packet_msg) + \
126 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \
127 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \
128 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \
129 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE))
130 #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE
131 #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE
133 #define HN_TX_DATA_BOUNDARY PAGE_SIZE
134 #define HN_TX_DATA_MAXSIZE IP_MAXPACKET
135 #define HN_TX_DATA_SEGSIZE PAGE_SIZE
136 /* -1 for RNDIS packet message */
137 #define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1)
139 #define HN_DIRECT_TX_SIZE_DEF 128
141 #define HN_EARLY_TXEOF_THRESH 8
143 #define HN_PKTBUF_LEN_DEF (16 * 1024)
145 #define HN_LROENT_CNT_DEF 128
147 #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU)
148 #define HN_LRO_LENLIM_DEF (25 * ETHERMTU)
149 /* YYY 2*MTU is a bit rough, but should be good enough. */
150 #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu)
152 #define HN_LRO_ACKCNT_DEF 1
154 #define HN_LOCK_INIT(sc) \
155 sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev))
156 #define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock)
157 #define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED)
158 #define HN_LOCK(sc) \
160 while (sx_try_xlock(&(sc)->hn_lock) == 0) \
163 #define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock)
165 #define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP)
166 #define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP)
167 #define HN_CSUM_IP_HWASSIST(sc) \
168 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK)
169 #define HN_CSUM_IP6_HWASSIST(sc) \
170 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK)
172 #define HN_PKTSIZE_MIN(align) \
173 roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \
174 HN_RNDIS_PKT_LEN, (align))
175 #define HN_PKTSIZE(m, align) \
176 roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align))
178 #define HN_RING_IDX2CPU(sc, idx) (((sc)->hn_cpu + (idx)) % mp_ncpus)
181 #ifndef HN_USE_TXDESC_BUFRING
182 SLIST_ENTRY(hn_txdesc) link;
184 STAILQ_ENTRY(hn_txdesc) agg_link;
186 /* Aggregated txdescs, in sending order. */
187 STAILQ_HEAD(, hn_txdesc) agg_list;
189 /* The oldest packet, if transmission aggregation happens. */
191 struct hn_tx_ring *txr;
193 uint32_t flags; /* HN_TXD_FLAG_ */
194 struct hn_nvs_sendctx send_ctx;
198 bus_dmamap_t data_dmap;
200 bus_addr_t rndis_pkt_paddr;
201 struct rndis_packet_msg *rndis_pkt;
202 bus_dmamap_t rndis_pkt_dmap;
205 #define HN_TXD_FLAG_ONLIST 0x0001
206 #define HN_TXD_FLAG_DMAMAP 0x0002
207 #define HN_TXD_FLAG_ONAGG 0x0004
216 struct hn_update_vf {
217 struct hn_rx_ring *rxr;
221 #define HN_RXINFO_VLAN 0x0001
222 #define HN_RXINFO_CSUM 0x0002
223 #define HN_RXINFO_HASHINF 0x0004
224 #define HN_RXINFO_HASHVAL 0x0008
225 #define HN_RXINFO_ALL \
228 HN_RXINFO_HASHINF | \
231 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff
232 #define HN_NDIS_RXCSUM_INFO_INVALID 0
233 #define HN_NDIS_HASH_INFO_INVALID 0
235 static int hn_probe(device_t);
236 static int hn_attach(device_t);
237 static int hn_detach(device_t);
238 static int hn_shutdown(device_t);
239 static void hn_chan_callback(struct vmbus_channel *,
242 static void hn_init(void *);
243 static int hn_ioctl(struct ifnet *, u_long, caddr_t);
244 #ifdef HN_IFSTART_SUPPORT
245 static void hn_start(struct ifnet *);
247 static int hn_transmit(struct ifnet *, struct mbuf *);
248 static void hn_xmit_qflush(struct ifnet *);
249 static int hn_ifmedia_upd(struct ifnet *);
250 static void hn_ifmedia_sts(struct ifnet *,
251 struct ifmediareq *);
253 static int hn_rndis_rxinfo(const void *, int,
255 static void hn_rndis_rx_data(struct hn_rx_ring *,
257 static void hn_rndis_rx_status(struct hn_softc *,
260 static void hn_nvs_handle_notify(struct hn_softc *,
261 const struct vmbus_chanpkt_hdr *);
262 static void hn_nvs_handle_comp(struct hn_softc *,
263 struct vmbus_channel *,
264 const struct vmbus_chanpkt_hdr *);
265 static void hn_nvs_handle_rxbuf(struct hn_rx_ring *,
266 struct vmbus_channel *,
267 const struct vmbus_chanpkt_hdr *);
268 static void hn_nvs_ack_rxbuf(struct hn_rx_ring *,
269 struct vmbus_channel *, uint64_t);
271 #if __FreeBSD_version >= 1100099
272 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS);
273 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS);
275 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS);
276 static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS);
277 #if __FreeBSD_version < 1100095
278 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS);
280 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS);
282 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
283 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
284 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS);
285 static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS);
286 static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS);
287 static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS);
288 static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS);
289 static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS);
290 static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS);
291 static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS);
292 static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS);
293 static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS);
294 static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS);
295 static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS);
296 static int hn_polling_sysctl(SYSCTL_HANDLER_ARGS);
297 static int hn_vf_sysctl(SYSCTL_HANDLER_ARGS);
299 static void hn_stop(struct hn_softc *, bool);
300 static void hn_init_locked(struct hn_softc *);
301 static int hn_chan_attach(struct hn_softc *,
302 struct vmbus_channel *);
303 static void hn_chan_detach(struct hn_softc *,
304 struct vmbus_channel *);
305 static int hn_attach_subchans(struct hn_softc *);
306 static void hn_detach_allchans(struct hn_softc *);
307 static void hn_chan_rollup(struct hn_rx_ring *,
308 struct hn_tx_ring *);
309 static void hn_set_ring_inuse(struct hn_softc *, int);
310 static int hn_synth_attach(struct hn_softc *, int);
311 static void hn_synth_detach(struct hn_softc *);
312 static int hn_synth_alloc_subchans(struct hn_softc *,
314 static bool hn_synth_attachable(const struct hn_softc *);
315 static void hn_suspend(struct hn_softc *);
316 static void hn_suspend_data(struct hn_softc *);
317 static void hn_suspend_mgmt(struct hn_softc *);
318 static void hn_resume(struct hn_softc *);
319 static void hn_resume_data(struct hn_softc *);
320 static void hn_resume_mgmt(struct hn_softc *);
321 static void hn_suspend_mgmt_taskfunc(void *, int);
322 static void hn_chan_drain(struct hn_softc *,
323 struct vmbus_channel *);
324 static void hn_polling(struct hn_softc *, u_int);
325 static void hn_chan_polling(struct vmbus_channel *, u_int);
327 static void hn_update_link_status(struct hn_softc *);
328 static void hn_change_network(struct hn_softc *);
329 static void hn_link_taskfunc(void *, int);
330 static void hn_netchg_init_taskfunc(void *, int);
331 static void hn_netchg_status_taskfunc(void *, int);
332 static void hn_link_status(struct hn_softc *);
334 static int hn_create_rx_data(struct hn_softc *, int);
335 static void hn_destroy_rx_data(struct hn_softc *);
336 static int hn_check_iplen(const struct mbuf *, int);
337 static int hn_set_rxfilter(struct hn_softc *, uint32_t);
338 static int hn_rxfilter_config(struct hn_softc *);
339 static int hn_rss_reconfig(struct hn_softc *);
340 static void hn_rss_ind_fixup(struct hn_softc *);
341 static int hn_rxpkt(struct hn_rx_ring *, const void *,
342 int, const struct hn_rxinfo *);
344 static int hn_tx_ring_create(struct hn_softc *, int);
345 static void hn_tx_ring_destroy(struct hn_tx_ring *);
346 static int hn_create_tx_data(struct hn_softc *, int);
347 static void hn_fixup_tx_data(struct hn_softc *);
348 static void hn_destroy_tx_data(struct hn_softc *);
349 static void hn_txdesc_dmamap_destroy(struct hn_txdesc *);
350 static void hn_txdesc_gc(struct hn_tx_ring *,
352 static int hn_encap(struct ifnet *, struct hn_tx_ring *,
353 struct hn_txdesc *, struct mbuf **);
354 static int hn_txpkt(struct ifnet *, struct hn_tx_ring *,
356 static void hn_set_chim_size(struct hn_softc *, int);
357 static void hn_set_tso_maxsize(struct hn_softc *, int, int);
358 static bool hn_tx_ring_pending(struct hn_tx_ring *);
359 static void hn_tx_ring_qflush(struct hn_tx_ring *);
360 static void hn_resume_tx(struct hn_softc *, int);
361 static void hn_set_txagg(struct hn_softc *);
362 static void *hn_try_txagg(struct ifnet *,
363 struct hn_tx_ring *, struct hn_txdesc *,
365 static int hn_get_txswq_depth(const struct hn_tx_ring *);
366 static void hn_txpkt_done(struct hn_nvs_sendctx *,
367 struct hn_softc *, struct vmbus_channel *,
369 static int hn_txpkt_sglist(struct hn_tx_ring *,
371 static int hn_txpkt_chim(struct hn_tx_ring *,
373 static int hn_xmit(struct hn_tx_ring *, int);
374 static void hn_xmit_taskfunc(void *, int);
375 static void hn_xmit_txeof(struct hn_tx_ring *);
376 static void hn_xmit_txeof_taskfunc(void *, int);
377 #ifdef HN_IFSTART_SUPPORT
378 static int hn_start_locked(struct hn_tx_ring *, int);
379 static void hn_start_taskfunc(void *, int);
380 static void hn_start_txeof(struct hn_tx_ring *);
381 static void hn_start_txeof_taskfunc(void *, int);
384 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
385 "Hyper-V network interface");
387 /* Trust tcp segements verification on host side. */
388 static int hn_trust_hosttcp = 1;
389 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN,
390 &hn_trust_hosttcp, 0,
391 "Trust tcp segement verification on host side, "
392 "when csum info is missing (global setting)");
394 /* Trust udp datagrams verification on host side. */
395 static int hn_trust_hostudp = 1;
396 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN,
397 &hn_trust_hostudp, 0,
398 "Trust udp datagram verification on host side, "
399 "when csum info is missing (global setting)");
401 /* Trust ip packets verification on host side. */
402 static int hn_trust_hostip = 1;
403 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN,
405 "Trust ip packet verification on host side, "
406 "when csum info is missing (global setting)");
408 /* Limit TSO burst size */
409 static int hn_tso_maxlen = IP_MAXPACKET;
410 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
411 &hn_tso_maxlen, 0, "TSO burst limit");
413 /* Limit chimney send size */
414 static int hn_tx_chimney_size = 0;
415 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN,
416 &hn_tx_chimney_size, 0, "Chimney send packet size limit");
418 /* Limit the size of packet for direct transmission */
419 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF;
420 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN,
421 &hn_direct_tx_size, 0, "Size of the packet for direct transmission");
423 /* # of LRO entries per RX ring */
424 #if defined(INET) || defined(INET6)
425 #if __FreeBSD_version >= 1100095
426 static int hn_lro_entry_count = HN_LROENT_CNT_DEF;
427 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
428 &hn_lro_entry_count, 0, "LRO entry count");
432 static int hn_tx_taskq_cnt = 1;
433 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_cnt, CTLFLAG_RDTUN,
434 &hn_tx_taskq_cnt, 0, "# of TX taskqueues");
436 #define HN_TX_TASKQ_M_INDEP 0
437 #define HN_TX_TASKQ_M_GLOBAL 1
438 #define HN_TX_TASKQ_M_EVTTQ 2
440 static int hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP;
441 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_mode, CTLFLAG_RDTUN,
442 &hn_tx_taskq_mode, 0, "TX taskqueue modes: "
443 "0 - independent, 1 - share global tx taskqs, 2 - share event taskqs");
445 #ifndef HN_USE_TXDESC_BUFRING
446 static int hn_use_txdesc_bufring = 0;
448 static int hn_use_txdesc_bufring = 1;
450 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD,
451 &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors");
453 #ifdef HN_IFSTART_SUPPORT
454 /* Use ifnet.if_start instead of ifnet.if_transmit */
455 static int hn_use_if_start = 0;
456 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN,
457 &hn_use_if_start, 0, "Use if_start TX method");
460 /* # of channels to use */
461 static int hn_chan_cnt = 0;
462 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
464 "# of channels to use; each channel has one RX ring and one TX ring");
466 /* # of transmit rings to use */
467 static int hn_tx_ring_cnt = 0;
468 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN,
469 &hn_tx_ring_cnt, 0, "# of TX rings to use");
471 /* Software TX ring deptch */
472 static int hn_tx_swq_depth = 0;
473 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN,
474 &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING");
476 /* Enable sorted LRO, and the depth of the per-channel mbuf queue */
477 #if __FreeBSD_version >= 1100095
478 static u_int hn_lro_mbufq_depth = 0;
479 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
480 &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue");
483 /* Packet transmission aggregation size limit */
484 static int hn_tx_agg_size = -1;
485 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN,
486 &hn_tx_agg_size, 0, "Packet transmission aggregation size limit");
488 /* Packet transmission aggregation count limit */
489 static int hn_tx_agg_pkts = -1;
490 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN,
491 &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit");
493 static u_int hn_cpu_index; /* next CPU for channel */
494 static struct taskqueue **hn_tx_taskque;/* shared TX taskqueues */
497 hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = {
498 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
499 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
500 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
501 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
502 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
505 static device_method_t hn_methods[] = {
506 /* Device interface */
507 DEVMETHOD(device_probe, hn_probe),
508 DEVMETHOD(device_attach, hn_attach),
509 DEVMETHOD(device_detach, hn_detach),
510 DEVMETHOD(device_shutdown, hn_shutdown),
514 static driver_t hn_driver = {
517 sizeof(struct hn_softc)
520 static devclass_t hn_devclass;
522 DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0);
523 MODULE_VERSION(hn, 1);
524 MODULE_DEPEND(hn, vmbus, 1, 1, 1);
526 #if __FreeBSD_version >= 1100099
528 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim)
532 for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
533 sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim;
538 hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd)
541 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
542 txd->chim_size == 0, ("invalid rndis sglist txd"));
543 return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA,
544 &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt));
548 hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd)
550 struct hn_nvs_rndis rndis;
552 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID &&
553 txd->chim_size > 0, ("invalid rndis chim txd"));
555 rndis.nvs_type = HN_NVS_TYPE_RNDIS;
556 rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA;
557 rndis.nvs_chim_idx = txd->chim_index;
558 rndis.nvs_chim_sz = txd->chim_size;
560 return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC,
561 &rndis, sizeof(rndis), &txd->send_ctx));
564 static __inline uint32_t
565 hn_chim_alloc(struct hn_softc *sc)
567 int i, bmap_cnt = sc->hn_chim_bmap_cnt;
568 u_long *bmap = sc->hn_chim_bmap;
569 uint32_t ret = HN_NVS_CHIM_IDX_INVALID;
571 for (i = 0; i < bmap_cnt; ++i) {
574 idx = ffsl(~bmap[i]);
578 --idx; /* ffsl is 1-based */
579 KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt,
580 ("invalid i %d and idx %d", i, idx));
582 if (atomic_testandset_long(&bmap[i], idx))
585 ret = i * LONG_BIT + idx;
592 hn_chim_free(struct hn_softc *sc, uint32_t chim_idx)
597 idx = chim_idx / LONG_BIT;
598 KASSERT(idx < sc->hn_chim_bmap_cnt,
599 ("invalid chimney index 0x%x", chim_idx));
601 mask = 1UL << (chim_idx % LONG_BIT);
602 KASSERT(sc->hn_chim_bmap[idx] & mask,
603 ("index bitmap 0x%lx, chimney index %u, "
604 "bitmap idx %d, bitmask 0x%lx",
605 sc->hn_chim_bmap[idx], chim_idx, idx, mask));
607 atomic_clear_long(&sc->hn_chim_bmap[idx], mask);
610 #if defined(INET6) || defined(INET)
612 * NOTE: If this function failed, the m_head would be freed.
614 static __inline struct mbuf *
615 hn_tso_fixup(struct mbuf *m_head)
617 struct ether_vlan_header *evl;
621 KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable"));
623 #define PULLUP_HDR(m, len) \
625 if (__predict_false((m)->m_len < (len))) { \
626 (m) = m_pullup((m), (len)); \
632 PULLUP_HDR(m_head, sizeof(*evl));
633 evl = mtod(m_head, struct ether_vlan_header *);
634 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN))
635 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
637 ehlen = ETHER_HDR_LEN;
640 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
644 PULLUP_HDR(m_head, ehlen + sizeof(*ip));
645 ip = mtodo(m_head, ehlen);
646 iphlen = ip->ip_hl << 2;
648 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th));
649 th = mtodo(m_head, ehlen + iphlen);
653 th->th_sum = in_pseudo(ip->ip_src.s_addr,
654 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
657 #if defined(INET6) && defined(INET)
664 PULLUP_HDR(m_head, ehlen + sizeof(*ip6));
665 ip6 = mtodo(m_head, ehlen);
666 if (ip6->ip6_nxt != IPPROTO_TCP) {
671 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th));
672 th = mtodo(m_head, ehlen + sizeof(*ip6));
675 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
682 #endif /* INET6 || INET */
685 hn_set_rxfilter(struct hn_softc *sc, uint32_t filter)
691 if (sc->hn_rx_filter != filter) {
692 error = hn_rndis_set_rxfilter(sc, filter);
694 sc->hn_rx_filter = filter;
700 hn_rxfilter_config(struct hn_softc *sc)
702 struct ifnet *ifp = sc->hn_ifp;
707 if ((ifp->if_flags & IFF_PROMISC) ||
708 (sc->hn_flags & HN_FLAG_VF)) {
709 filter = NDIS_PACKET_TYPE_PROMISCUOUS;
711 filter = NDIS_PACKET_TYPE_DIRECTED;
712 if (ifp->if_flags & IFF_BROADCAST)
713 filter |= NDIS_PACKET_TYPE_BROADCAST;
714 /* TODO: support multicast list */
715 if ((ifp->if_flags & IFF_ALLMULTI) ||
716 !TAILQ_EMPTY(&ifp->if_multiaddrs))
717 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
719 return (hn_set_rxfilter(sc, filter));
723 hn_set_txagg(struct hn_softc *sc)
729 * Setup aggregation size.
731 if (sc->hn_agg_size < 0)
734 size = sc->hn_agg_size;
736 if (sc->hn_rndis_agg_size < size)
737 size = sc->hn_rndis_agg_size;
739 /* NOTE: We only aggregate packets using chimney sending buffers. */
740 if (size > (uint32_t)sc->hn_chim_szmax)
741 size = sc->hn_chim_szmax;
743 if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) {
750 /* NOTE: Type of the per TX ring setting is 'int'. */
755 * Setup aggregation packet count.
757 if (sc->hn_agg_pkts < 0)
760 pkts = sc->hn_agg_pkts;
762 if (sc->hn_rndis_agg_pkts < pkts)
763 pkts = sc->hn_rndis_agg_pkts;
772 /* NOTE: Type of the per TX ring setting is 'short'. */
777 /* NOTE: Type of the per TX ring setting is 'short'. */
778 if (sc->hn_rndis_agg_align > SHRT_MAX) {
785 if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n",
786 size, pkts, sc->hn_rndis_agg_align);
789 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
790 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
792 mtx_lock(&txr->hn_tx_lock);
793 txr->hn_agg_szmax = size;
794 txr->hn_agg_pktmax = pkts;
795 txr->hn_agg_align = sc->hn_rndis_agg_align;
796 mtx_unlock(&txr->hn_tx_lock);
801 hn_get_txswq_depth(const struct hn_tx_ring *txr)
804 KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet"));
805 if (hn_tx_swq_depth < txr->hn_txdesc_cnt)
806 return txr->hn_txdesc_cnt;
807 return hn_tx_swq_depth;
811 hn_rss_reconfig(struct hn_softc *sc)
817 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
824 * Direct reconfiguration by setting the UNCHG flags does
825 * _not_ work properly.
828 if_printf(sc->hn_ifp, "disable RSS\n");
829 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE);
831 if_printf(sc->hn_ifp, "RSS disable failed\n");
836 * Reenable the RSS w/ the updated RSS key or indirect
840 if_printf(sc->hn_ifp, "reconfig RSS\n");
841 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
843 if_printf(sc->hn_ifp, "RSS reconfig failed\n");
850 hn_rss_ind_fixup(struct hn_softc *sc)
852 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
855 nchan = sc->hn_rx_ring_inuse;
856 KASSERT(nchan > 1, ("invalid # of channels %d", nchan));
859 * Check indirect table to make sure that all channels in it
862 for (i = 0; i < NDIS_HASH_INDCNT; ++i) {
863 if (rss->rss_ind[i] >= nchan) {
864 if_printf(sc->hn_ifp,
865 "RSS indirect table %d fixup: %u -> %d\n",
866 i, rss->rss_ind[i], nchan - 1);
867 rss->rss_ind[i] = nchan - 1;
873 hn_ifmedia_upd(struct ifnet *ifp __unused)
880 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
882 struct hn_softc *sc = ifp->if_softc;
884 ifmr->ifm_status = IFM_AVALID;
885 ifmr->ifm_active = IFM_ETHER;
887 if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) {
888 ifmr->ifm_active |= IFM_NONE;
891 ifmr->ifm_status |= IFM_ACTIVE;
892 ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
896 hn_update_vf_task(void *arg, int pending __unused)
898 struct hn_update_vf *uv = arg;
900 uv->rxr->hn_vf = uv->vf;
904 hn_update_vf(struct hn_softc *sc, struct ifnet *vf)
906 struct hn_rx_ring *rxr;
907 struct hn_update_vf uv;
913 TASK_INIT(&task, 0, hn_update_vf_task, &uv);
915 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
916 rxr = &sc->hn_rx_ring[i];
918 if (i < sc->hn_rx_ring_inuse) {
921 vmbus_chan_run_task(rxr->hn_chan, &task);
929 hn_set_vf(struct hn_softc *sc, struct ifnet *ifp, bool vf)
931 struct ifnet *hn_ifp;
935 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
943 if (ifp->if_alloctype != IFT_ETHER)
946 /* Ignore lagg/vlan interfaces */
947 if (strcmp(ifp->if_dname, "lagg") == 0 ||
948 strcmp(ifp->if_dname, "vlan") == 0)
951 if (bcmp(IF_LLADDR(ifp), IF_LLADDR(hn_ifp), ETHER_ADDR_LEN) != 0)
954 /* Now we're sure 'ifp' is a real VF device. */
956 if (sc->hn_flags & HN_FLAG_VF)
959 sc->hn_flags |= HN_FLAG_VF;
960 hn_rxfilter_config(sc);
962 if (!(sc->hn_flags & HN_FLAG_VF))
965 sc->hn_flags &= ~HN_FLAG_VF;
966 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING)
967 hn_rxfilter_config(sc);
969 hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE);
972 hn_nvs_set_datapath(sc,
973 vf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTHETIC);
975 hn_update_vf(sc, vf ? ifp : NULL);
980 ~(HN_LINK_FLAG_LINKUP | HN_LINK_FLAG_NETCHG);
981 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN);
986 devctl_notify("HYPERV_NIC_VF", if_name(hn_ifp),
987 vf ? "VF_UP" : "VF_DOWN", NULL);
990 if_printf(hn_ifp, "Data path is switched %s %s\n",
991 vf ? "to" : "from", if_name(ifp));
997 hn_ifnet_event(void *arg, struct ifnet *ifp, int event)
999 if (event != IFNET_EVENT_UP && event != IFNET_EVENT_DOWN)
1002 hn_set_vf(arg, ifp, event == IFNET_EVENT_UP);
1006 hn_ifaddr_event(void *arg, struct ifnet *ifp)
1008 hn_set_vf(arg, ifp, ifp->if_flags & IFF_UP);
1011 /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */
1012 static const struct hyperv_guid g_net_vsc_device_type = {
1013 .hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
1014 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
1018 hn_probe(device_t dev)
1021 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev,
1022 &g_net_vsc_device_type) == 0) {
1023 device_set_desc(dev, "Hyper-V Network Interface");
1024 return BUS_PROBE_DEFAULT;
1030 hn_attach(device_t dev)
1032 struct hn_softc *sc = device_get_softc(dev);
1033 struct sysctl_oid_list *child;
1034 struct sysctl_ctx_list *ctx;
1035 uint8_t eaddr[ETHER_ADDR_LEN];
1036 struct ifnet *ifp = NULL;
1037 int error, ring_cnt, tx_ring_cnt;
1040 sc->hn_prichan = vmbus_get_channel(dev);
1044 * Initialize these tunables once.
1046 sc->hn_agg_size = hn_tx_agg_size;
1047 sc->hn_agg_pkts = hn_tx_agg_pkts;
1050 * Setup taskqueue for transmission.
1052 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_INDEP) {
1056 malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *),
1057 M_DEVBUF, M_WAITOK);
1058 for (i = 0; i < hn_tx_taskq_cnt; ++i) {
1059 sc->hn_tx_taskqs[i] = taskqueue_create("hn_tx",
1060 M_WAITOK, taskqueue_thread_enqueue,
1061 &sc->hn_tx_taskqs[i]);
1062 taskqueue_start_threads(&sc->hn_tx_taskqs[i], 1, PI_NET,
1063 "%s tx%d", device_get_nameunit(dev), i);
1065 } else if (hn_tx_taskq_mode == HN_TX_TASKQ_M_GLOBAL) {
1066 sc->hn_tx_taskqs = hn_tx_taskque;
1070 * Setup taskqueue for mangement tasks, e.g. link status.
1072 sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK,
1073 taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0);
1074 taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt",
1075 device_get_nameunit(dev));
1076 TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc);
1077 TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc);
1078 TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0,
1079 hn_netchg_status_taskfunc, sc);
1082 * Allocate ifnet and setup its name earlier, so that if_printf
1083 * can be used by functions, which will be called after
1086 ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER);
1088 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1091 * Initialize ifmedia earlier so that it can be unconditionally
1092 * destroyed, if error happened later on.
1094 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts);
1097 * Figure out the # of RX rings (ring_cnt) and the # of TX rings
1098 * to use (tx_ring_cnt).
1101 * The # of RX rings to use is same as the # of channels to use.
1103 ring_cnt = hn_chan_cnt;
1104 if (ring_cnt <= 0) {
1106 ring_cnt = mp_ncpus;
1107 if (ring_cnt > HN_RING_CNT_DEF_MAX)
1108 ring_cnt = HN_RING_CNT_DEF_MAX;
1109 } else if (ring_cnt > mp_ncpus) {
1110 ring_cnt = mp_ncpus;
1113 tx_ring_cnt = hn_tx_ring_cnt;
1114 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt)
1115 tx_ring_cnt = ring_cnt;
1116 #ifdef HN_IFSTART_SUPPORT
1117 if (hn_use_if_start) {
1118 /* ifnet.if_start only needs one TX ring. */
1124 * Set the leader CPU for channels.
1126 sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus;
1129 * Create enough TX/RX rings, even if only limited number of
1130 * channels can be allocated.
1132 error = hn_create_tx_data(sc, tx_ring_cnt);
1135 error = hn_create_rx_data(sc, ring_cnt);
1140 * Create transaction context for NVS and RNDIS transactions.
1142 sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev),
1143 HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0);
1144 if (sc->hn_xact == NULL) {
1150 * Install orphan handler for the revocation of this device's
1154 * The processing order is critical here:
1155 * Install the orphan handler, _before_ testing whether this
1156 * device's primary channel has been revoked or not.
1158 vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact);
1159 if (vmbus_chan_is_revoked(sc->hn_prichan)) {
1165 * Attach the synthetic parts, i.e. NVS and RNDIS.
1167 error = hn_synth_attach(sc, ETHERMTU);
1171 error = hn_rndis_get_eaddr(sc, eaddr);
1175 #if __FreeBSD_version >= 1100099
1176 if (sc->hn_rx_ring_inuse > 1) {
1178 * Reduce TCP segment aggregation limit for multiple
1179 * RX rings to increase ACK timeliness.
1181 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF);
1186 * Fixup TX stuffs after synthetic parts are attached.
1188 hn_fixup_tx_data(sc);
1190 ctx = device_get_sysctl_ctx(dev);
1191 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
1192 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD,
1193 &sc->hn_nvs_ver, 0, "NVS version");
1194 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version",
1195 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1196 hn_ndis_version_sysctl, "A", "NDIS version");
1197 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps",
1198 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1199 hn_caps_sysctl, "A", "capabilities");
1200 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist",
1201 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1202 hn_hwassist_sysctl, "A", "hwassist");
1203 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter",
1204 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1205 hn_rxfilter_sysctl, "A", "rxfilter");
1206 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash",
1207 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1208 hn_rss_hash_sysctl, "A", "RSS hash");
1209 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size",
1210 CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count");
1211 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key",
1212 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1213 hn_rss_key_sysctl, "IU", "RSS key");
1214 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind",
1215 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1216 hn_rss_ind_sysctl, "IU", "RSS indirect table");
1217 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size",
1218 CTLFLAG_RD, &sc->hn_rndis_agg_size, 0,
1219 "RNDIS offered packet transmission aggregation size limit");
1220 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts",
1221 CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0,
1222 "RNDIS offered packet transmission aggregation count limit");
1223 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align",
1224 CTLFLAG_RD, &sc->hn_rndis_agg_align, 0,
1225 "RNDIS packet transmission aggregation alignment");
1226 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size",
1227 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1228 hn_txagg_size_sysctl, "I",
1229 "Packet transmission aggregation size, 0 -- disable, -1 -- auto");
1230 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts",
1231 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1232 hn_txagg_pkts_sysctl, "I",
1233 "Packet transmission aggregation packets, "
1234 "0 -- disable, -1 -- auto");
1235 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "polling",
1236 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1237 hn_polling_sysctl, "I",
1238 "Polling frequency: [100,1000000], 0 disable polling");
1239 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf",
1240 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1241 hn_vf_sysctl, "A", "Virtual Function's name");
1244 * Setup the ifmedia, which has been initialized earlier.
1246 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1247 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO);
1248 /* XXX ifmedia_set really should do this for us */
1249 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media;
1252 * Setup the ifnet for this interface.
1256 ifp->if_baudrate = IF_Gbps(10);
1258 /* if_baudrate is 32bits on 32bit system. */
1259 ifp->if_baudrate = IF_Gbps(1);
1261 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1262 ifp->if_ioctl = hn_ioctl;
1263 ifp->if_init = hn_init;
1264 #ifdef HN_IFSTART_SUPPORT
1265 if (hn_use_if_start) {
1266 int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]);
1268 ifp->if_start = hn_start;
1269 IFQ_SET_MAXLEN(&ifp->if_snd, qdepth);
1270 ifp->if_snd.ifq_drv_maxlen = qdepth - 1;
1271 IFQ_SET_READY(&ifp->if_snd);
1275 ifp->if_transmit = hn_transmit;
1276 ifp->if_qflush = hn_xmit_qflush;
1279 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO;
1281 /* We can't diff IPv6 packets from IPv4 packets on RX path. */
1282 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6;
1284 if (sc->hn_caps & HN_CAP_VLAN) {
1285 /* XXX not sure about VLAN_MTU. */
1286 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
1289 ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist;
1290 if (ifp->if_hwassist & HN_CSUM_IP_MASK)
1291 ifp->if_capabilities |= IFCAP_TXCSUM;
1292 if (ifp->if_hwassist & HN_CSUM_IP6_MASK)
1293 ifp->if_capabilities |= IFCAP_TXCSUM_IPV6;
1294 if (sc->hn_caps & HN_CAP_TSO4) {
1295 ifp->if_capabilities |= IFCAP_TSO4;
1296 ifp->if_hwassist |= CSUM_IP_TSO;
1298 if (sc->hn_caps & HN_CAP_TSO6) {
1299 ifp->if_capabilities |= IFCAP_TSO6;
1300 ifp->if_hwassist |= CSUM_IP6_TSO;
1303 /* Enable all available capabilities by default. */
1304 ifp->if_capenable = ifp->if_capabilities;
1307 * Disable IPv6 TSO and TXCSUM by default, they still can
1308 * be enabled through SIOCSIFCAP.
1310 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6);
1311 ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO);
1313 if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) {
1314 hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU);
1315 ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX;
1316 ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
1319 ether_ifattach(ifp, eaddr);
1321 if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) {
1322 if_printf(ifp, "TSO segcnt %u segsz %u\n",
1323 ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize);
1326 /* Inform the upper layer about the long frame support. */
1327 ifp->if_hdrlen = sizeof(struct ether_vlan_header);
1330 * Kick off link status check.
1332 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
1333 hn_update_link_status(sc);
1335 sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event,
1336 hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY);
1338 sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event,
1339 hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY);
1343 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)
1344 hn_synth_detach(sc);
1350 hn_detach(device_t dev)
1352 struct hn_softc *sc = device_get_softc(dev);
1353 struct ifnet *ifp = sc->hn_ifp;
1355 if (sc->hn_ifaddr_evthand != NULL)
1356 EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand);
1357 if (sc->hn_ifnet_evthand != NULL)
1358 EVENTHANDLER_DEREGISTER(ifnet_event, sc->hn_ifnet_evthand);
1360 if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) {
1362 * In case that the vmbus missed the orphan handler
1365 vmbus_xact_ctx_orphan(sc->hn_xact);
1368 if (device_is_attached(dev)) {
1370 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
1371 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1375 * hn_stop() only suspends data, so managment
1376 * stuffs have to be suspended manually here.
1378 hn_suspend_mgmt(sc);
1379 hn_synth_detach(sc);
1382 ether_ifdetach(ifp);
1385 ifmedia_removeall(&sc->hn_media);
1386 hn_destroy_rx_data(sc);
1387 hn_destroy_tx_data(sc);
1389 if (sc->hn_tx_taskqs != NULL && sc->hn_tx_taskqs != hn_tx_taskque) {
1392 for (i = 0; i < hn_tx_taskq_cnt; ++i)
1393 taskqueue_free(sc->hn_tx_taskqs[i]);
1394 free(sc->hn_tx_taskqs, M_DEVBUF);
1396 taskqueue_free(sc->hn_mgmt_taskq0);
1398 if (sc->hn_xact != NULL) {
1400 * Uninstall the orphan handler _before_ the xact is
1403 vmbus_chan_unset_orphan(sc->hn_prichan);
1404 vmbus_xact_ctx_destroy(sc->hn_xact);
1409 HN_LOCK_DESTROY(sc);
1414 hn_shutdown(device_t dev)
1421 hn_link_status(struct hn_softc *sc)
1423 uint32_t link_status;
1426 error = hn_rndis_get_linkstatus(sc, &link_status);
1428 /* XXX what to do? */
1432 if (link_status == NDIS_MEDIA_STATE_CONNECTED)
1433 sc->hn_link_flags |= HN_LINK_FLAG_LINKUP;
1435 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
1436 if_link_state_change(sc->hn_ifp,
1437 (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ?
1438 LINK_STATE_UP : LINK_STATE_DOWN);
1442 hn_link_taskfunc(void *xsc, int pending __unused)
1444 struct hn_softc *sc = xsc;
1446 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
1452 hn_netchg_init_taskfunc(void *xsc, int pending __unused)
1454 struct hn_softc *sc = xsc;
1456 /* Prevent any link status checks from running. */
1457 sc->hn_link_flags |= HN_LINK_FLAG_NETCHG;
1460 * Fake up a [link down --> link up] state change; 5 seconds
1461 * delay is used, which closely simulates miibus reaction
1462 * upon link down event.
1464 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
1465 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN);
1466 taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0,
1467 &sc->hn_netchg_status, 5 * hz);
1471 hn_netchg_status_taskfunc(void *xsc, int pending __unused)
1473 struct hn_softc *sc = xsc;
1475 /* Re-allow link status checks. */
1476 sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG;
1481 hn_update_link_status(struct hn_softc *sc)
1484 if (sc->hn_mgmt_taskq != NULL)
1485 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task);
1489 hn_change_network(struct hn_softc *sc)
1492 if (sc->hn_mgmt_taskq != NULL)
1493 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init);
1497 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd,
1498 struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs)
1500 struct mbuf *m = *m_head;
1503 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim"));
1505 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap,
1506 m, segs, nsegs, BUS_DMA_NOWAIT);
1507 if (error == EFBIG) {
1510 m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX);
1514 *m_head = m = m_new;
1515 txr->hn_tx_collapsed++;
1517 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag,
1518 txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT);
1521 bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap,
1522 BUS_DMASYNC_PREWRITE);
1523 txd->flags |= HN_TXD_FLAG_DMAMAP;
1529 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd)
1532 KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
1533 ("put an onlist txd %#x", txd->flags));
1534 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
1535 ("put an onagg txd %#x", txd->flags));
1537 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
1538 if (atomic_fetchadd_int(&txd->refs, -1) != 1)
1541 if (!STAILQ_EMPTY(&txd->agg_list)) {
1542 struct hn_txdesc *tmp_txd;
1544 while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) {
1547 KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list),
1548 ("resursive aggregation on aggregated txdesc"));
1549 KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG),
1550 ("not aggregated txdesc"));
1551 KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
1552 ("aggregated txdesc uses dmamap"));
1553 KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
1554 ("aggregated txdesc consumes "
1555 "chimney sending buffer"));
1556 KASSERT(tmp_txd->chim_size == 0,
1557 ("aggregated txdesc has non-zero "
1558 "chimney sending size"));
1560 STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link);
1561 tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG;
1562 freed = hn_txdesc_put(txr, tmp_txd);
1563 KASSERT(freed, ("failed to free aggregated txdesc"));
1567 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) {
1568 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
1569 ("chim txd uses dmamap"));
1570 hn_chim_free(txr->hn_sc, txd->chim_index);
1571 txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
1573 } else if (txd->flags & HN_TXD_FLAG_DMAMAP) {
1574 bus_dmamap_sync(txr->hn_tx_data_dtag,
1575 txd->data_dmap, BUS_DMASYNC_POSTWRITE);
1576 bus_dmamap_unload(txr->hn_tx_data_dtag,
1578 txd->flags &= ~HN_TXD_FLAG_DMAMAP;
1581 if (txd->m != NULL) {
1586 txd->flags |= HN_TXD_FLAG_ONLIST;
1587 #ifndef HN_USE_TXDESC_BUFRING
1588 mtx_lock_spin(&txr->hn_txlist_spin);
1589 KASSERT(txr->hn_txdesc_avail >= 0 &&
1590 txr->hn_txdesc_avail < txr->hn_txdesc_cnt,
1591 ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail));
1592 txr->hn_txdesc_avail++;
1593 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
1594 mtx_unlock_spin(&txr->hn_txlist_spin);
1595 #else /* HN_USE_TXDESC_BUFRING */
1597 atomic_add_int(&txr->hn_txdesc_avail, 1);
1599 buf_ring_enqueue(txr->hn_txdesc_br, txd);
1600 #endif /* !HN_USE_TXDESC_BUFRING */
1605 static __inline struct hn_txdesc *
1606 hn_txdesc_get(struct hn_tx_ring *txr)
1608 struct hn_txdesc *txd;
1610 #ifndef HN_USE_TXDESC_BUFRING
1611 mtx_lock_spin(&txr->hn_txlist_spin);
1612 txd = SLIST_FIRST(&txr->hn_txlist);
1614 KASSERT(txr->hn_txdesc_avail > 0,
1615 ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail));
1616 txr->hn_txdesc_avail--;
1617 SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
1619 mtx_unlock_spin(&txr->hn_txlist_spin);
1621 txd = buf_ring_dequeue_sc(txr->hn_txdesc_br);
1625 #ifdef HN_USE_TXDESC_BUFRING
1627 atomic_subtract_int(&txr->hn_txdesc_avail, 1);
1629 #endif /* HN_USE_TXDESC_BUFRING */
1630 KASSERT(txd->m == NULL && txd->refs == 0 &&
1631 STAILQ_EMPTY(&txd->agg_list) &&
1632 txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
1633 txd->chim_size == 0 &&
1634 (txd->flags & HN_TXD_FLAG_ONLIST) &&
1635 (txd->flags & HN_TXD_FLAG_ONAGG) == 0 &&
1636 (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd"));
1637 txd->flags &= ~HN_TXD_FLAG_ONLIST;
1643 static __inline void
1644 hn_txdesc_hold(struct hn_txdesc *txd)
1647 /* 0->1 transition will never work */
1648 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
1649 atomic_add_int(&txd->refs, 1);
1652 static __inline void
1653 hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd)
1656 KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0,
1657 ("recursive aggregation on aggregating txdesc"));
1659 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
1660 ("already aggregated"));
1661 KASSERT(STAILQ_EMPTY(&txd->agg_list),
1662 ("recursive aggregation on to-be-aggregated txdesc"));
1664 txd->flags |= HN_TXD_FLAG_ONAGG;
1665 STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link);
1669 hn_tx_ring_pending(struct hn_tx_ring *txr)
1671 bool pending = false;
1673 #ifndef HN_USE_TXDESC_BUFRING
1674 mtx_lock_spin(&txr->hn_txlist_spin);
1675 if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt)
1677 mtx_unlock_spin(&txr->hn_txlist_spin);
1679 if (!buf_ring_full(txr->hn_txdesc_br))
1685 static __inline void
1686 hn_txeof(struct hn_tx_ring *txr)
1688 txr->hn_has_txeof = 0;
1693 hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc,
1694 struct vmbus_channel *chan, const void *data __unused, int dlen __unused)
1696 struct hn_txdesc *txd = sndc->hn_cbarg;
1697 struct hn_tx_ring *txr;
1700 KASSERT(txr->hn_chan == chan,
1701 ("channel mismatch, on chan%u, should be chan%u",
1702 vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan)));
1704 txr->hn_has_txeof = 1;
1705 hn_txdesc_put(txr, txd);
1707 ++txr->hn_txdone_cnt;
1708 if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) {
1709 txr->hn_txdone_cnt = 0;
1710 if (txr->hn_oactive)
1716 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr)
1718 #if defined(INET) || defined(INET6)
1719 struct lro_ctrl *lro = &rxr->hn_lro;
1720 struct lro_entry *queued;
1722 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1723 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1724 tcp_lro_flush(lro, queued);
1730 * 'txr' could be NULL, if multiple channels and
1731 * ifnet.if_start method are enabled.
1733 if (txr == NULL || !txr->hn_has_txeof)
1736 txr->hn_txdone_cnt = 0;
1740 static __inline uint32_t
1741 hn_rndis_pktmsg_offset(uint32_t ofs)
1744 KASSERT(ofs >= sizeof(struct rndis_packet_msg),
1745 ("invalid RNDIS packet msg offset %u", ofs));
1746 return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset));
1749 static __inline void *
1750 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize,
1751 size_t pi_dlen, uint32_t pi_type)
1753 const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen);
1754 struct rndis_pktinfo *pi;
1756 KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0,
1757 ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen));
1760 * Per-packet-info does not move; it only grows.
1763 * rm_pktinfooffset in this phase counts from the beginning
1764 * of rndis_packet_msg.
1766 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize,
1767 ("%u pktinfo overflows RNDIS packet msg", pi_type));
1768 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset +
1769 pkt->rm_pktinfolen);
1770 pkt->rm_pktinfolen += pi_size;
1772 pi->rm_size = pi_size;
1773 pi->rm_type = pi_type;
1774 pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET;
1776 return (pi->rm_data);
1780 hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr)
1782 struct hn_txdesc *txd;
1786 txd = txr->hn_agg_txd;
1787 KASSERT(txd != NULL, ("no aggregate txdesc"));
1790 * Since hn_txpkt() will reset this temporary stat, save
1791 * it now, so that oerrors can be updated properly, if
1792 * hn_txpkt() ever fails.
1794 pkts = txr->hn_stat_pkts;
1797 * Since txd's mbuf will _not_ be freed upon hn_txpkt()
1798 * failure, save it for later freeing, if hn_txpkt() ever
1802 error = hn_txpkt(ifp, txr, txd);
1803 if (__predict_false(error)) {
1804 /* txd is freed, but m is not. */
1807 txr->hn_flush_failed++;
1808 if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts);
1811 /* Reset all aggregation states. */
1812 txr->hn_agg_txd = NULL;
1813 txr->hn_agg_szleft = 0;
1814 txr->hn_agg_pktleft = 0;
1815 txr->hn_agg_prevpkt = NULL;
1821 hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
1826 if (txr->hn_agg_txd != NULL) {
1827 if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) {
1828 struct hn_txdesc *agg_txd = txr->hn_agg_txd;
1829 struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt;
1833 * Update the previous RNDIS packet's total length,
1834 * it can be increased due to the mandatory alignment
1835 * padding for this RNDIS packet. And update the
1836 * aggregating txdesc's chimney sending buffer size
1840 * Zero-out the padding, as required by the RNDIS spec.
1843 pkt->rm_len = roundup2(olen, txr->hn_agg_align);
1844 agg_txd->chim_size += pkt->rm_len - olen;
1846 /* Link this txdesc to the parent. */
1847 hn_txdesc_agg(agg_txd, txd);
1849 chim = (uint8_t *)pkt + pkt->rm_len;
1850 /* Save the current packet for later fixup. */
1851 txr->hn_agg_prevpkt = chim;
1853 txr->hn_agg_pktleft--;
1854 txr->hn_agg_szleft -= pktsize;
1855 if (txr->hn_agg_szleft <=
1856 HN_PKTSIZE_MIN(txr->hn_agg_align)) {
1858 * Probably can't aggregate more packets,
1859 * flush this aggregating txdesc proactively.
1861 txr->hn_agg_pktleft = 0;
1866 hn_flush_txagg(ifp, txr);
1868 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
1870 txr->hn_tx_chimney_tried++;
1871 txd->chim_index = hn_chim_alloc(txr->hn_sc);
1872 if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID)
1874 txr->hn_tx_chimney++;
1876 chim = txr->hn_sc->hn_chim +
1877 (txd->chim_index * txr->hn_sc->hn_chim_szmax);
1879 if (txr->hn_agg_pktmax > 1 &&
1880 txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) {
1881 txr->hn_agg_txd = txd;
1882 txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1;
1883 txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize;
1884 txr->hn_agg_prevpkt = chim;
1891 * If this function fails, then both txd and m_head0 will be freed.
1894 hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
1895 struct mbuf **m_head0)
1897 bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
1898 int error, nsegs, i;
1899 struct mbuf *m_head = *m_head0;
1900 struct rndis_packet_msg *pkt;
1903 int pkt_hlen, pkt_size;
1905 pkt = txd->rndis_pkt;
1906 pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align);
1907 if (pkt_size < txr->hn_chim_size) {
1908 chim = hn_try_txagg(ifp, txr, txd, pkt_size);
1912 if (txr->hn_agg_txd != NULL)
1913 hn_flush_txagg(ifp, txr);
1916 pkt->rm_type = REMOTE_NDIS_PACKET_MSG;
1917 pkt->rm_len = m_head->m_pkthdr.len;
1918 pkt->rm_dataoffset = 0;
1919 pkt->rm_datalen = m_head->m_pkthdr.len;
1920 pkt->rm_oobdataoffset = 0;
1921 pkt->rm_oobdatalen = 0;
1922 pkt->rm_oobdataelements = 0;
1923 pkt->rm_pktinfooffset = sizeof(*pkt);
1924 pkt->rm_pktinfolen = 0;
1925 pkt->rm_vchandle = 0;
1926 pkt->rm_reserved = 0;
1928 if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) {
1930 * Set the hash value for this packet, so that the host could
1931 * dispatch the TX done event for this packet back to this TX
1934 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1935 HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL);
1936 *pi_data = txr->hn_tx_idx;
1939 if (m_head->m_flags & M_VLANTAG) {
1940 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1941 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN);
1942 *pi_data = NDIS_VLAN_INFO_MAKE(
1943 EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag),
1944 EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag),
1945 EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag));
1948 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1949 #if defined(INET6) || defined(INET)
1950 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1951 NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO);
1953 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
1954 *pi_data = NDIS_LSO2_INFO_MAKEIPV4(0,
1955 m_head->m_pkthdr.tso_segsz);
1958 #if defined(INET6) && defined(INET)
1963 *pi_data = NDIS_LSO2_INFO_MAKEIPV6(0,
1964 m_head->m_pkthdr.tso_segsz);
1967 #endif /* INET6 || INET */
1968 } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) {
1969 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1970 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM);
1971 if (m_head->m_pkthdr.csum_flags &
1972 (CSUM_IP6_TCP | CSUM_IP6_UDP)) {
1973 *pi_data = NDIS_TXCSUM_INFO_IPV6;
1975 *pi_data = NDIS_TXCSUM_INFO_IPV4;
1976 if (m_head->m_pkthdr.csum_flags & CSUM_IP)
1977 *pi_data |= NDIS_TXCSUM_INFO_IPCS;
1980 if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
1981 *pi_data |= NDIS_TXCSUM_INFO_TCPCS;
1982 else if (m_head->m_pkthdr.csum_flags &
1983 (CSUM_IP_UDP | CSUM_IP6_UDP))
1984 *pi_data |= NDIS_TXCSUM_INFO_UDPCS;
1987 pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
1988 /* Fixup RNDIS packet message total length */
1989 pkt->rm_len += pkt_hlen;
1990 /* Convert RNDIS packet message offsets */
1991 pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt_hlen);
1992 pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset);
1995 * Fast path: Chimney sending.
1998 struct hn_txdesc *tgt_txd = txd;
2000 if (txr->hn_agg_txd != NULL) {
2001 tgt_txd = txr->hn_agg_txd;
2007 KASSERT(pkt == chim,
2008 ("RNDIS pkt not in chimney sending buffer"));
2009 KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID,
2010 ("chimney sending buffer is not used"));
2011 tgt_txd->chim_size += pkt->rm_len;
2013 m_copydata(m_head, 0, m_head->m_pkthdr.len,
2014 ((uint8_t *)chim) + pkt_hlen);
2016 txr->hn_gpa_cnt = 0;
2017 txr->hn_sendpkt = hn_txpkt_chim;
2021 KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc"));
2022 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
2023 ("chimney buffer is used"));
2024 KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc"));
2026 error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs);
2027 if (__predict_false(error)) {
2031 * This mbuf is not linked w/ the txd yet, so free it now.
2036 freed = hn_txdesc_put(txr, txd);
2038 ("fail to free txd upon txdma error"));
2040 txr->hn_txdma_failed++;
2041 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2046 /* +1 RNDIS packet message */
2047 txr->hn_gpa_cnt = nsegs + 1;
2049 /* send packet with page buffer */
2050 txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr);
2051 txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK;
2052 txr->hn_gpa[0].gpa_len = pkt_hlen;
2055 * Fill the page buffers with mbuf info after the page
2056 * buffer for RNDIS packet message.
2058 for (i = 0; i < nsegs; ++i) {
2059 struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1];
2061 gpa->gpa_page = atop(segs[i].ds_addr);
2062 gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK;
2063 gpa->gpa_len = segs[i].ds_len;
2066 txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
2068 txr->hn_sendpkt = hn_txpkt_sglist;
2072 /* Set the completion routine */
2073 hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd);
2075 /* Update temporary stats for later use. */
2076 txr->hn_stat_pkts++;
2077 txr->hn_stat_size += m_head->m_pkthdr.len;
2078 if (m_head->m_flags & M_MCAST)
2079 txr->hn_stat_mcasts++;
2086 * If this function fails, then txd will be freed, but the mbuf
2087 * associated w/ the txd will _not_ be freed.
2090 hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd)
2092 int error, send_failed = 0, has_bpf;
2095 has_bpf = bpf_peers_present(ifp->if_bpf);
2098 * Make sure that this txd and any aggregated txds are not
2099 * freed before ETHER_BPF_MTAP.
2101 hn_txdesc_hold(txd);
2103 error = txr->hn_sendpkt(txr, txd);
2106 const struct hn_txdesc *tmp_txd;
2108 ETHER_BPF_MTAP(ifp, txd->m);
2109 STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link)
2110 ETHER_BPF_MTAP(ifp, tmp_txd->m);
2113 if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts);
2114 #ifdef HN_IFSTART_SUPPORT
2115 if (!hn_use_if_start)
2118 if_inc_counter(ifp, IFCOUNTER_OBYTES,
2120 if (txr->hn_stat_mcasts != 0) {
2121 if_inc_counter(ifp, IFCOUNTER_OMCASTS,
2122 txr->hn_stat_mcasts);
2125 txr->hn_pkts += txr->hn_stat_pkts;
2129 hn_txdesc_put(txr, txd);
2131 if (__predict_false(error)) {
2135 * This should "really rarely" happen.
2137 * XXX Too many RX to be acked or too many sideband
2138 * commands to run? Ask netvsc_channel_rollup()
2139 * to kick start later.
2141 txr->hn_has_txeof = 1;
2143 txr->hn_send_failed++;
2146 * Try sending again after set hn_has_txeof;
2147 * in case that we missed the last
2148 * netvsc_channel_rollup().
2152 if_printf(ifp, "send failed\n");
2155 * Caller will perform further processing on the
2156 * associated mbuf, so don't free it in hn_txdesc_put();
2157 * only unload it from the DMA map in hn_txdesc_put(),
2161 freed = hn_txdesc_put(txr, txd);
2163 ("fail to free txd upon send error"));
2165 txr->hn_send_failed++;
2168 /* Reset temporary stats, after this sending is done. */
2169 txr->hn_stat_size = 0;
2170 txr->hn_stat_pkts = 0;
2171 txr->hn_stat_mcasts = 0;
2177 * Append the specified data to the indicated mbuf chain,
2178 * Extend the mbuf chain if the new data does not fit in
2181 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c.
2182 * There should be an equivalent in the kernel mbuf code,
2183 * but there does not appear to be one yet.
2185 * Differs from m_append() in that additional mbufs are
2186 * allocated with cluster size MJUMPAGESIZE, and filled
2189 * Return 1 if able to complete the job; otherwise 0.
2192 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
2195 int remainder, space;
2197 for (m = m0; m->m_next != NULL; m = m->m_next)
2200 space = M_TRAILINGSPACE(m);
2203 * Copy into available space.
2205 if (space > remainder)
2207 bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
2212 while (remainder > 0) {
2214 * Allocate a new mbuf; could check space
2215 * and allocate a cluster instead.
2217 n = m_getjcl(M_DONTWAIT, m->m_type, 0, MJUMPAGESIZE);
2220 n->m_len = min(MJUMPAGESIZE, remainder);
2221 bcopy(cp, mtod(n, caddr_t), n->m_len);
2223 remainder -= n->m_len;
2227 if (m0->m_flags & M_PKTHDR)
2228 m0->m_pkthdr.len += len - remainder;
2230 return (remainder == 0);
2233 #if defined(INET) || defined(INET6)
2235 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m)
2237 #if __FreeBSD_version >= 1100095
2238 if (hn_lro_mbufq_depth) {
2239 tcp_lro_queue_mbuf(lc, m);
2243 return tcp_lro_rx(lc, m, 0);
2248 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen,
2249 const struct hn_rxinfo *info)
2253 int size, do_lro = 0, do_csum = 1;
2254 int hash_type = M_HASHTYPE_OPAQUE;
2256 /* If the VF is active, inject the packet through the VF */
2257 ifp = rxr->hn_vf ? rxr->hn_vf : rxr->hn_ifp;
2259 if (dlen <= MHLEN) {
2260 m_new = m_gethdr(M_NOWAIT, MT_DATA);
2261 if (m_new == NULL) {
2262 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
2265 memcpy(mtod(m_new, void *), data, dlen);
2266 m_new->m_pkthdr.len = m_new->m_len = dlen;
2267 rxr->hn_small_pkts++;
2270 * Get an mbuf with a cluster. For packets 2K or less,
2271 * get a standard 2K cluster. For anything larger, get a
2272 * 4K cluster. Any buffers larger than 4K can cause problems
2273 * if looped around to the Hyper-V TX channel, so avoid them.
2276 if (dlen > MCLBYTES) {
2278 size = MJUMPAGESIZE;
2281 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
2282 if (m_new == NULL) {
2283 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
2287 hv_m_append(m_new, dlen, data);
2289 m_new->m_pkthdr.rcvif = ifp;
2291 if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0))
2294 /* receive side checksum offload */
2295 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) {
2296 /* IP csum offload */
2297 if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) {
2298 m_new->m_pkthdr.csum_flags |=
2299 (CSUM_IP_CHECKED | CSUM_IP_VALID);
2303 /* TCP/UDP csum offload */
2304 if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK |
2305 NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) {
2306 m_new->m_pkthdr.csum_flags |=
2307 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2308 m_new->m_pkthdr.csum_data = 0xffff;
2309 if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK)
2317 * As of this write (Oct 28th, 2016), host side will turn
2318 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so
2319 * the do_lro setting here is actually _not_ accurate. We
2320 * depend on the RSS hash type check to reset do_lro.
2322 if ((info->csum_info &
2323 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) ==
2324 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK))
2327 const struct ether_header *eh;
2332 if (m_new->m_len < hoff)
2334 eh = mtod(m_new, struct ether_header *);
2335 etype = ntohs(eh->ether_type);
2336 if (etype == ETHERTYPE_VLAN) {
2337 const struct ether_vlan_header *evl;
2339 hoff = sizeof(*evl);
2340 if (m_new->m_len < hoff)
2342 evl = mtod(m_new, struct ether_vlan_header *);
2343 etype = ntohs(evl->evl_proto);
2346 if (etype == ETHERTYPE_IP) {
2349 pr = hn_check_iplen(m_new, hoff);
2350 if (pr == IPPROTO_TCP) {
2352 (rxr->hn_trust_hcsum &
2353 HN_TRUST_HCSUM_TCP)) {
2354 rxr->hn_csum_trusted++;
2355 m_new->m_pkthdr.csum_flags |=
2356 (CSUM_IP_CHECKED | CSUM_IP_VALID |
2357 CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2358 m_new->m_pkthdr.csum_data = 0xffff;
2361 } else if (pr == IPPROTO_UDP) {
2363 (rxr->hn_trust_hcsum &
2364 HN_TRUST_HCSUM_UDP)) {
2365 rxr->hn_csum_trusted++;
2366 m_new->m_pkthdr.csum_flags |=
2367 (CSUM_IP_CHECKED | CSUM_IP_VALID |
2368 CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2369 m_new->m_pkthdr.csum_data = 0xffff;
2371 } else if (pr != IPPROTO_DONE && do_csum &&
2372 (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) {
2373 rxr->hn_csum_trusted++;
2374 m_new->m_pkthdr.csum_flags |=
2375 (CSUM_IP_CHECKED | CSUM_IP_VALID);
2380 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) {
2381 m_new->m_pkthdr.ether_vtag = EVL_MAKETAG(
2382 NDIS_VLAN_INFO_ID(info->vlan_info),
2383 NDIS_VLAN_INFO_PRI(info->vlan_info),
2384 NDIS_VLAN_INFO_CFI(info->vlan_info));
2385 m_new->m_flags |= M_VLANTAG;
2388 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) {
2390 m_new->m_pkthdr.flowid = info->hash_value;
2391 if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) ==
2392 NDIS_HASH_FUNCTION_TOEPLITZ) {
2393 uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK);
2397 * do_lro is resetted, if the hash types are not TCP
2398 * related. See the comment in the above csum_flags
2402 case NDIS_HASH_IPV4:
2403 hash_type = M_HASHTYPE_RSS_IPV4;
2407 case NDIS_HASH_TCP_IPV4:
2408 hash_type = M_HASHTYPE_RSS_TCP_IPV4;
2411 case NDIS_HASH_IPV6:
2412 hash_type = M_HASHTYPE_RSS_IPV6;
2416 case NDIS_HASH_IPV6_EX:
2417 hash_type = M_HASHTYPE_RSS_IPV6_EX;
2421 case NDIS_HASH_TCP_IPV6:
2422 hash_type = M_HASHTYPE_RSS_TCP_IPV6;
2425 case NDIS_HASH_TCP_IPV6_EX:
2426 hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX;
2431 m_new->m_pkthdr.flowid = rxr->hn_rx_idx;
2433 M_HASHTYPE_SET(m_new, hash_type);
2436 * Note: Moved RX completion back to hv_nv_on_receive() so all
2437 * messages (not just data messages) will trigger a response.
2443 if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
2444 #if defined(INET) || defined(INET6)
2445 struct lro_ctrl *lro = &rxr->hn_lro;
2448 rxr->hn_lro_tried++;
2449 if (hn_lro_rx(lro, m_new) == 0) {
2457 /* We're not holding the lock here, so don't release it */
2458 (*ifp->if_input)(ifp, m_new);
2464 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2466 struct hn_softc *sc = ifp->if_softc;
2467 struct ifreq *ifr = (struct ifreq *)data;
2468 int mask, error = 0;
2472 if (ifr->ifr_mtu > HN_MTU_MAX) {
2479 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
2484 if ((sc->hn_caps & HN_CAP_MTU) == 0) {
2485 /* Can't change MTU */
2491 if (ifp->if_mtu == ifr->ifr_mtu) {
2497 * Suspend this interface before the synthetic parts
2503 * Detach the synthetics parts, i.e. NVS and RNDIS.
2505 hn_synth_detach(sc);
2508 * Reattach the synthetic parts, i.e. NVS and RNDIS,
2509 * with the new MTU setting.
2511 error = hn_synth_attach(sc, ifr->ifr_mtu);
2518 * Commit the requested MTU, after the synthetic parts
2519 * have been successfully attached.
2521 ifp->if_mtu = ifr->ifr_mtu;
2524 * Make sure that various parameters based on MTU are
2525 * still valid, after the MTU change.
2527 if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax)
2528 hn_set_chim_size(sc, sc->hn_chim_szmax);
2529 hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu);
2530 #if __FreeBSD_version >= 1100099
2531 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim <
2532 HN_LRO_LENLIM_MIN(ifp))
2533 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp));
2537 * All done! Resume the interface now.
2547 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
2552 if (ifp->if_flags & IFF_UP) {
2553 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2555 * Caller meight hold mutex, e.g.
2556 * bpf; use busy-wait for the RNDIS
2560 hn_rxfilter_config(sc);
2566 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2569 sc->hn_if_flags = ifp->if_flags;
2576 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2578 if (mask & IFCAP_TXCSUM) {
2579 ifp->if_capenable ^= IFCAP_TXCSUM;
2580 if (ifp->if_capenable & IFCAP_TXCSUM)
2581 ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc);
2583 ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc);
2585 if (mask & IFCAP_TXCSUM_IPV6) {
2586 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2587 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
2588 ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc);
2590 ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc);
2593 /* TODO: flip RNDIS offload parameters for RXCSUM. */
2594 if (mask & IFCAP_RXCSUM)
2595 ifp->if_capenable ^= IFCAP_RXCSUM;
2597 /* We can't diff IPv6 packets from IPv4 packets on RX path. */
2598 if (mask & IFCAP_RXCSUM_IPV6)
2599 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2602 if (mask & IFCAP_LRO)
2603 ifp->if_capenable ^= IFCAP_LRO;
2605 if (mask & IFCAP_TSO4) {
2606 ifp->if_capenable ^= IFCAP_TSO4;
2607 if (ifp->if_capenable & IFCAP_TSO4)
2608 ifp->if_hwassist |= CSUM_IP_TSO;
2610 ifp->if_hwassist &= ~CSUM_IP_TSO;
2612 if (mask & IFCAP_TSO6) {
2613 ifp->if_capenable ^= IFCAP_TSO6;
2614 if (ifp->if_capenable & IFCAP_TSO6)
2615 ifp->if_hwassist |= CSUM_IP6_TSO;
2617 ifp->if_hwassist &= ~CSUM_IP6_TSO;
2627 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
2631 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2633 * Multicast uses mutex; use busy-wait for
2637 hn_rxfilter_config(sc);
2646 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd);
2650 error = ether_ioctl(ifp, cmd, data);
2657 hn_stop(struct hn_softc *sc, bool detaching)
2659 struct ifnet *ifp = sc->hn_ifp;
2664 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
2665 ("synthetic parts were not attached"));
2667 /* Disable polling. */
2670 /* Clear RUNNING bit _before_ hn_suspend_data() */
2671 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
2672 hn_suspend_data(sc);
2674 /* Clear OACTIVE bit. */
2675 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
2676 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
2677 sc->hn_tx_ring[i].hn_oactive = 0;
2680 * If the VF is active, make sure the filter is not 0, even if
2681 * the synthetic NIC is down.
2683 if (!detaching && (sc->hn_flags & HN_FLAG_VF))
2684 hn_rxfilter_config(sc);
2688 hn_init_locked(struct hn_softc *sc)
2690 struct ifnet *ifp = sc->hn_ifp;
2695 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
2698 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2701 /* Configure RX filter */
2702 hn_rxfilter_config(sc);
2704 /* Clear OACTIVE bit. */
2705 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
2706 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
2707 sc->hn_tx_ring[i].hn_oactive = 0;
2709 /* Clear TX 'suspended' bit. */
2710 hn_resume_tx(sc, sc->hn_tx_ring_inuse);
2712 /* Everything is ready; unleash! */
2713 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
2715 /* Re-enable polling if requested. */
2716 if (sc->hn_pollhz > 0)
2717 hn_polling(sc, sc->hn_pollhz);
2723 struct hn_softc *sc = xsc;
2730 #if __FreeBSD_version >= 1100099
2733 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS)
2735 struct hn_softc *sc = arg1;
2736 unsigned int lenlim;
2739 lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim;
2740 error = sysctl_handle_int(oidp, &lenlim, 0, req);
2741 if (error || req->newptr == NULL)
2745 if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) ||
2746 lenlim > TCP_LRO_LENGTH_MAX) {
2750 hn_set_lro_lenlim(sc, lenlim);
2757 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS)
2759 struct hn_softc *sc = arg1;
2760 int ackcnt, error, i;
2763 * lro_ackcnt_lim is append count limit,
2764 * +1 to turn it into aggregation limit.
2766 ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1;
2767 error = sysctl_handle_int(oidp, &ackcnt, 0, req);
2768 if (error || req->newptr == NULL)
2771 if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1))
2775 * Convert aggregation limit back to append
2780 for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
2781 sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt;
2789 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS)
2791 struct hn_softc *sc = arg1;
2796 if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum)
2799 error = sysctl_handle_int(oidp, &on, 0, req);
2800 if (error || req->newptr == NULL)
2804 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2805 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
2808 rxr->hn_trust_hcsum |= hcsum;
2810 rxr->hn_trust_hcsum &= ~hcsum;
2817 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS)
2819 struct hn_softc *sc = arg1;
2820 int chim_size, error;
2822 chim_size = sc->hn_tx_ring[0].hn_chim_size;
2823 error = sysctl_handle_int(oidp, &chim_size, 0, req);
2824 if (error || req->newptr == NULL)
2827 if (chim_size > sc->hn_chim_szmax || chim_size <= 0)
2831 hn_set_chim_size(sc, chim_size);
2836 #if __FreeBSD_version < 1100095
2838 hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS)
2840 struct hn_softc *sc = arg1;
2841 int ofs = arg2, i, error;
2842 struct hn_rx_ring *rxr;
2846 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2847 rxr = &sc->hn_rx_ring[i];
2848 stat += *((int *)((uint8_t *)rxr + ofs));
2851 error = sysctl_handle_64(oidp, &stat, 0, req);
2852 if (error || req->newptr == NULL)
2855 /* Zero out this stat. */
2856 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2857 rxr = &sc->hn_rx_ring[i];
2858 *((int *)((uint8_t *)rxr + ofs)) = 0;
2864 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
2866 struct hn_softc *sc = arg1;
2867 int ofs = arg2, i, error;
2868 struct hn_rx_ring *rxr;
2872 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2873 rxr = &sc->hn_rx_ring[i];
2874 stat += *((uint64_t *)((uint8_t *)rxr + ofs));
2877 error = sysctl_handle_64(oidp, &stat, 0, req);
2878 if (error || req->newptr == NULL)
2881 /* Zero out this stat. */
2882 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2883 rxr = &sc->hn_rx_ring[i];
2884 *((uint64_t *)((uint8_t *)rxr + ofs)) = 0;
2892 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
2894 struct hn_softc *sc = arg1;
2895 int ofs = arg2, i, error;
2896 struct hn_rx_ring *rxr;
2900 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2901 rxr = &sc->hn_rx_ring[i];
2902 stat += *((u_long *)((uint8_t *)rxr + ofs));
2905 error = sysctl_handle_long(oidp, &stat, 0, req);
2906 if (error || req->newptr == NULL)
2909 /* Zero out this stat. */
2910 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2911 rxr = &sc->hn_rx_ring[i];
2912 *((u_long *)((uint8_t *)rxr + ofs)) = 0;
2918 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
2920 struct hn_softc *sc = arg1;
2921 int ofs = arg2, i, error;
2922 struct hn_tx_ring *txr;
2926 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
2927 txr = &sc->hn_tx_ring[i];
2928 stat += *((u_long *)((uint8_t *)txr + ofs));
2931 error = sysctl_handle_long(oidp, &stat, 0, req);
2932 if (error || req->newptr == NULL)
2935 /* Zero out this stat. */
2936 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
2937 txr = &sc->hn_tx_ring[i];
2938 *((u_long *)((uint8_t *)txr + ofs)) = 0;
2944 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
2946 struct hn_softc *sc = arg1;
2947 int ofs = arg2, i, error, conf;
2948 struct hn_tx_ring *txr;
2950 txr = &sc->hn_tx_ring[0];
2951 conf = *((int *)((uint8_t *)txr + ofs));
2953 error = sysctl_handle_int(oidp, &conf, 0, req);
2954 if (error || req->newptr == NULL)
2958 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
2959 txr = &sc->hn_tx_ring[i];
2960 *((int *)((uint8_t *)txr + ofs)) = conf;
2968 hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS)
2970 struct hn_softc *sc = arg1;
2973 size = sc->hn_agg_size;
2974 error = sysctl_handle_int(oidp, &size, 0, req);
2975 if (error || req->newptr == NULL)
2979 sc->hn_agg_size = size;
2987 hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS)
2989 struct hn_softc *sc = arg1;
2992 pkts = sc->hn_agg_pkts;
2993 error = sysctl_handle_int(oidp, &pkts, 0, req);
2994 if (error || req->newptr == NULL)
2998 sc->hn_agg_pkts = pkts;
3006 hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS)
3008 struct hn_softc *sc = arg1;
3011 pkts = sc->hn_tx_ring[0].hn_agg_pktmax;
3012 return (sysctl_handle_int(oidp, &pkts, 0, req));
3016 hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS)
3018 struct hn_softc *sc = arg1;
3021 align = sc->hn_tx_ring[0].hn_agg_align;
3022 return (sysctl_handle_int(oidp, &align, 0, req));
3026 hn_chan_polling(struct vmbus_channel *chan, u_int pollhz)
3029 vmbus_chan_poll_disable(chan);
3031 vmbus_chan_poll_enable(chan, pollhz);
3035 hn_polling(struct hn_softc *sc, u_int pollhz)
3037 int nsubch = sc->hn_rx_ring_inuse - 1;
3042 struct vmbus_channel **subch;
3045 subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
3046 for (i = 0; i < nsubch; ++i)
3047 hn_chan_polling(subch[i], pollhz);
3048 vmbus_subchan_rel(subch, nsubch);
3050 hn_chan_polling(sc->hn_prichan, pollhz);
3054 hn_polling_sysctl(SYSCTL_HANDLER_ARGS)
3056 struct hn_softc *sc = arg1;
3059 pollhz = sc->hn_pollhz;
3060 error = sysctl_handle_int(oidp, &pollhz, 0, req);
3061 if (error || req->newptr == NULL)
3065 (pollhz < VMBUS_CHAN_POLLHZ_MIN || pollhz > VMBUS_CHAN_POLLHZ_MAX))
3069 if (sc->hn_pollhz != pollhz) {
3070 sc->hn_pollhz = pollhz;
3071 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) &&
3072 (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
3073 hn_polling(sc, sc->hn_pollhz);
3081 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS)
3083 struct hn_softc *sc = arg1;
3086 snprintf(verstr, sizeof(verstr), "%u.%u",
3087 HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver),
3088 HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver));
3089 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
3093 hn_caps_sysctl(SYSCTL_HANDLER_ARGS)
3095 struct hn_softc *sc = arg1;
3102 snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS);
3103 return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req);
3107 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS)
3109 struct hn_softc *sc = arg1;
3110 char assist_str[128];
3114 hwassist = sc->hn_ifp->if_hwassist;
3116 snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS);
3117 return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req);
3121 hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS)
3123 struct hn_softc *sc = arg1;
3124 char filter_str[128];
3128 filter = sc->hn_rx_filter;
3130 snprintf(filter_str, sizeof(filter_str), "%b", filter,
3132 return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req);
3136 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS)
3138 struct hn_softc *sc = arg1;
3143 error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
3144 if (error || req->newptr == NULL)
3147 error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
3150 sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
3152 if (sc->hn_rx_ring_inuse > 1) {
3153 error = hn_rss_reconfig(sc);
3155 /* Not RSS capable, at least for now; just save the RSS key. */
3164 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS)
3166 struct hn_softc *sc = arg1;
3171 error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
3172 if (error || req->newptr == NULL)
3176 * Don't allow RSS indirect table change, if this interface is not
3177 * RSS capable currently.
3179 if (sc->hn_rx_ring_inuse == 1) {
3184 error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
3187 sc->hn_flags |= HN_FLAG_HAS_RSSIND;
3189 hn_rss_ind_fixup(sc);
3190 error = hn_rss_reconfig(sc);
3197 hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS)
3199 struct hn_softc *sc = arg1;
3204 hash = sc->hn_rss_hash;
3206 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS);
3207 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req);
3211 hn_vf_sysctl(SYSCTL_HANDLER_ARGS)
3213 struct hn_softc *sc = arg1;
3219 vf = sc->hn_rx_ring[0].hn_vf;
3221 snprintf(vf_name, sizeof(vf_name), "%s", if_name(vf));
3223 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req);
3227 hn_check_iplen(const struct mbuf *m, int hoff)
3229 const struct ip *ip;
3230 int len, iphlen, iplen;
3231 const struct tcphdr *th;
3232 int thoff; /* TCP data offset */
3234 len = hoff + sizeof(struct ip);
3236 /* The packet must be at least the size of an IP header. */
3237 if (m->m_pkthdr.len < len)
3238 return IPPROTO_DONE;
3240 /* The fixed IP header must reside completely in the first mbuf. */
3242 return IPPROTO_DONE;
3244 ip = mtodo(m, hoff);
3246 /* Bound check the packet's stated IP header length. */
3247 iphlen = ip->ip_hl << 2;
3248 if (iphlen < sizeof(struct ip)) /* minimum header length */
3249 return IPPROTO_DONE;
3251 /* The full IP header must reside completely in the one mbuf. */
3252 if (m->m_len < hoff + iphlen)
3253 return IPPROTO_DONE;
3255 iplen = ntohs(ip->ip_len);
3258 * Check that the amount of data in the buffers is as
3259 * at least much as the IP header would have us expect.
3261 if (m->m_pkthdr.len < hoff + iplen)
3262 return IPPROTO_DONE;
3265 * Ignore IP fragments.
3267 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF))
3268 return IPPROTO_DONE;
3271 * The TCP/IP or UDP/IP header must be entirely contained within
3272 * the first fragment of a packet.
3276 if (iplen < iphlen + sizeof(struct tcphdr))
3277 return IPPROTO_DONE;
3278 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr))
3279 return IPPROTO_DONE;
3280 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen);
3281 thoff = th->th_off << 2;
3282 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen)
3283 return IPPROTO_DONE;
3284 if (m->m_len < hoff + iphlen + thoff)
3285 return IPPROTO_DONE;
3288 if (iplen < iphlen + sizeof(struct udphdr))
3289 return IPPROTO_DONE;
3290 if (m->m_len < hoff + iphlen + sizeof(struct udphdr))
3291 return IPPROTO_DONE;
3295 return IPPROTO_DONE;
3302 hn_create_rx_data(struct hn_softc *sc, int ring_cnt)
3304 struct sysctl_oid_list *child;
3305 struct sysctl_ctx_list *ctx;
3306 device_t dev = sc->hn_dev;
3307 #if defined(INET) || defined(INET6)
3308 #if __FreeBSD_version >= 1100095
3315 * Create RXBUF for reception.
3318 * - It is shared by all channels.
3319 * - A large enough buffer is allocated, certain version of NVSes
3320 * may further limit the usable space.
3322 sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
3323 PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma,
3324 BUS_DMA_WAITOK | BUS_DMA_ZERO);
3325 if (sc->hn_rxbuf == NULL) {
3326 device_printf(sc->hn_dev, "allocate rxbuf failed\n");
3330 sc->hn_rx_ring_cnt = ring_cnt;
3331 sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt;
3333 sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt,
3334 M_DEVBUF, M_WAITOK | M_ZERO);
3336 #if defined(INET) || defined(INET6)
3337 #if __FreeBSD_version >= 1100095
3338 lroent_cnt = hn_lro_entry_count;
3339 if (lroent_cnt < TCP_LRO_ENTRIES)
3340 lroent_cnt = TCP_LRO_ENTRIES;
3342 device_printf(dev, "LRO: entry count %d\n", lroent_cnt);
3344 #endif /* INET || INET6 */
3346 ctx = device_get_sysctl_ctx(dev);
3347 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
3349 /* Create dev.hn.UNIT.rx sysctl tree */
3350 sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx",
3351 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3353 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
3354 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
3356 rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
3357 PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE,
3358 &rxr->hn_br_dma, BUS_DMA_WAITOK);
3359 if (rxr->hn_br == NULL) {
3360 device_printf(dev, "allocate bufring failed\n");
3364 if (hn_trust_hosttcp)
3365 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP;
3366 if (hn_trust_hostudp)
3367 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP;
3368 if (hn_trust_hostip)
3369 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP;
3370 rxr->hn_ifp = sc->hn_ifp;
3371 if (i < sc->hn_tx_ring_cnt)
3372 rxr->hn_txr = &sc->hn_tx_ring[i];
3373 rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF;
3374 rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK);
3376 rxr->hn_rxbuf = sc->hn_rxbuf;
3381 #if defined(INET) || defined(INET6)
3382 #if __FreeBSD_version >= 1100095
3383 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt,
3384 hn_lro_mbufq_depth);
3386 tcp_lro_init(&rxr->hn_lro);
3387 rxr->hn_lro.ifp = sc->hn_ifp;
3389 #if __FreeBSD_version >= 1100099
3390 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF;
3391 rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
3393 #endif /* INET || INET6 */
3395 if (sc->hn_rx_sysctl_tree != NULL) {
3399 * Create per RX ring sysctl tree:
3400 * dev.hn.UNIT.rx.RINGID
3402 snprintf(name, sizeof(name), "%d", i);
3403 rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx,
3404 SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree),
3405 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3407 if (rxr->hn_rx_sysctl_tree != NULL) {
3408 SYSCTL_ADD_ULONG(ctx,
3409 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
3410 OID_AUTO, "packets", CTLFLAG_RW,
3411 &rxr->hn_pkts, "# of packets received");
3412 SYSCTL_ADD_ULONG(ctx,
3413 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
3414 OID_AUTO, "rss_pkts", CTLFLAG_RW,
3416 "# of packets w/ RSS info received");
3418 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
3419 OID_AUTO, "pktbuf_len", CTLFLAG_RD,
3420 &rxr->hn_pktbuf_len, 0,
3421 "Temporary channel packet buffer length");
3426 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
3427 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3428 __offsetof(struct hn_rx_ring, hn_lro.lro_queued),
3429 #if __FreeBSD_version < 1100095
3430 hn_rx_stat_int_sysctl,
3432 hn_rx_stat_u64_sysctl,
3434 "LU", "LRO queued");
3435 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed",
3436 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3437 __offsetof(struct hn_rx_ring, hn_lro.lro_flushed),
3438 #if __FreeBSD_version < 1100095
3439 hn_rx_stat_int_sysctl,
3441 hn_rx_stat_u64_sysctl,
3443 "LU", "LRO flushed");
3444 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried",
3445 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3446 __offsetof(struct hn_rx_ring, hn_lro_tried),
3447 hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries");
3448 #if __FreeBSD_version >= 1100099
3449 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim",
3450 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
3451 hn_lro_lenlim_sysctl, "IU",
3452 "Max # of data bytes to be aggregated by LRO");
3453 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim",
3454 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
3455 hn_lro_ackcnt_sysctl, "I",
3456 "Max # of ACKs to be aggregated by LRO");
3458 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp",
3459 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP,
3460 hn_trust_hcsum_sysctl, "I",
3461 "Trust tcp segement verification on host side, "
3462 "when csum info is missing");
3463 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp",
3464 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP,
3465 hn_trust_hcsum_sysctl, "I",
3466 "Trust udp datagram verification on host side, "
3467 "when csum info is missing");
3468 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip",
3469 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP,
3470 hn_trust_hcsum_sysctl, "I",
3471 "Trust ip packet verification on host side, "
3472 "when csum info is missing");
3473 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip",
3474 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3475 __offsetof(struct hn_rx_ring, hn_csum_ip),
3476 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP");
3477 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp",
3478 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3479 __offsetof(struct hn_rx_ring, hn_csum_tcp),
3480 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP");
3481 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp",
3482 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3483 __offsetof(struct hn_rx_ring, hn_csum_udp),
3484 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP");
3485 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted",
3486 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3487 __offsetof(struct hn_rx_ring, hn_csum_trusted),
3488 hn_rx_stat_ulong_sysctl, "LU",
3489 "# of packets that we trust host's csum verification");
3490 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts",
3491 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3492 __offsetof(struct hn_rx_ring, hn_small_pkts),
3493 hn_rx_stat_ulong_sysctl, "LU", "# of small packets received");
3494 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed",
3495 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3496 __offsetof(struct hn_rx_ring, hn_ack_failed),
3497 hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures");
3498 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt",
3499 CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings");
3500 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse",
3501 CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings");
3507 hn_destroy_rx_data(struct hn_softc *sc)
3511 if (sc->hn_rxbuf != NULL) {
3512 if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0)
3513 hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf);
3515 device_printf(sc->hn_dev, "RXBUF is referenced\n");
3516 sc->hn_rxbuf = NULL;
3519 if (sc->hn_rx_ring_cnt == 0)
3522 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
3523 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
3525 if (rxr->hn_br == NULL)
3527 if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) {
3528 hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br);
3530 device_printf(sc->hn_dev,
3531 "%dth channel bufring is referenced", i);
3535 #if defined(INET) || defined(INET6)
3536 tcp_lro_free(&rxr->hn_lro);
3538 free(rxr->hn_pktbuf, M_DEVBUF);
3540 free(sc->hn_rx_ring, M_DEVBUF);
3541 sc->hn_rx_ring = NULL;
3543 sc->hn_rx_ring_cnt = 0;
3544 sc->hn_rx_ring_inuse = 0;
3548 hn_tx_ring_create(struct hn_softc *sc, int id)
3550 struct hn_tx_ring *txr = &sc->hn_tx_ring[id];
3551 device_t dev = sc->hn_dev;
3552 bus_dma_tag_t parent_dtag;
3556 txr->hn_tx_idx = id;
3558 #ifndef HN_USE_TXDESC_BUFRING
3559 mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
3561 mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF);
3563 txr->hn_txdesc_cnt = HN_TX_DESC_CNT;
3564 txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt,
3565 M_DEVBUF, M_WAITOK | M_ZERO);
3566 #ifndef HN_USE_TXDESC_BUFRING
3567 SLIST_INIT(&txr->hn_txlist);
3569 txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF,
3570 M_WAITOK, &txr->hn_tx_lock);
3573 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_EVTTQ) {
3574 txr->hn_tx_taskq = VMBUS_GET_EVENT_TASKQ(
3575 device_get_parent(dev), dev, HN_RING_IDX2CPU(sc, id));
3577 txr->hn_tx_taskq = sc->hn_tx_taskqs[id % hn_tx_taskq_cnt];
3580 #ifdef HN_IFSTART_SUPPORT
3581 if (hn_use_if_start) {
3582 txr->hn_txeof = hn_start_txeof;
3583 TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr);
3584 TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr);
3590 txr->hn_txeof = hn_xmit_txeof;
3591 TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr);
3592 TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr);
3594 br_depth = hn_get_txswq_depth(txr);
3595 txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF,
3596 M_WAITOK, &txr->hn_tx_lock);
3599 txr->hn_direct_tx_size = hn_direct_tx_size;
3602 * Always schedule transmission instead of trying to do direct
3603 * transmission. This one gives the best performance so far.
3605 txr->hn_sched_tx = 1;
3607 parent_dtag = bus_get_dma_tag(dev);
3609 /* DMA tag for RNDIS packet messages. */
3610 error = bus_dma_tag_create(parent_dtag, /* parent */
3611 HN_RNDIS_PKT_ALIGN, /* alignment */
3612 HN_RNDIS_PKT_BOUNDARY, /* boundary */
3613 BUS_SPACE_MAXADDR, /* lowaddr */
3614 BUS_SPACE_MAXADDR, /* highaddr */
3615 NULL, NULL, /* filter, filterarg */
3616 HN_RNDIS_PKT_LEN, /* maxsize */
3618 HN_RNDIS_PKT_LEN, /* maxsegsize */
3620 NULL, /* lockfunc */
3621 NULL, /* lockfuncarg */
3622 &txr->hn_tx_rndis_dtag);
3624 device_printf(dev, "failed to create rndis dmatag\n");
3628 /* DMA tag for data. */
3629 error = bus_dma_tag_create(parent_dtag, /* parent */
3631 HN_TX_DATA_BOUNDARY, /* boundary */
3632 BUS_SPACE_MAXADDR, /* lowaddr */
3633 BUS_SPACE_MAXADDR, /* highaddr */
3634 NULL, NULL, /* filter, filterarg */
3635 HN_TX_DATA_MAXSIZE, /* maxsize */
3636 HN_TX_DATA_SEGCNT_MAX, /* nsegments */
3637 HN_TX_DATA_SEGSIZE, /* maxsegsize */
3639 NULL, /* lockfunc */
3640 NULL, /* lockfuncarg */
3641 &txr->hn_tx_data_dtag);
3643 device_printf(dev, "failed to create data dmatag\n");
3647 for (i = 0; i < txr->hn_txdesc_cnt; ++i) {
3648 struct hn_txdesc *txd = &txr->hn_txdesc[i];
3651 txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
3652 STAILQ_INIT(&txd->agg_list);
3655 * Allocate and load RNDIS packet message.
3657 error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag,
3658 (void **)&txd->rndis_pkt,
3659 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
3660 &txd->rndis_pkt_dmap);
3663 "failed to allocate rndis_packet_msg, %d\n", i);
3667 error = bus_dmamap_load(txr->hn_tx_rndis_dtag,
3668 txd->rndis_pkt_dmap,
3669 txd->rndis_pkt, HN_RNDIS_PKT_LEN,
3670 hyperv_dma_map_paddr, &txd->rndis_pkt_paddr,
3674 "failed to load rndis_packet_msg, %d\n", i);
3675 bus_dmamem_free(txr->hn_tx_rndis_dtag,
3676 txd->rndis_pkt, txd->rndis_pkt_dmap);
3680 /* DMA map for TX data. */
3681 error = bus_dmamap_create(txr->hn_tx_data_dtag, 0,
3685 "failed to allocate tx data dmamap\n");
3686 bus_dmamap_unload(txr->hn_tx_rndis_dtag,
3687 txd->rndis_pkt_dmap);
3688 bus_dmamem_free(txr->hn_tx_rndis_dtag,
3689 txd->rndis_pkt, txd->rndis_pkt_dmap);
3693 /* All set, put it to list */
3694 txd->flags |= HN_TXD_FLAG_ONLIST;
3695 #ifndef HN_USE_TXDESC_BUFRING
3696 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
3698 buf_ring_enqueue(txr->hn_txdesc_br, txd);
3701 txr->hn_txdesc_avail = txr->hn_txdesc_cnt;
3703 if (sc->hn_tx_sysctl_tree != NULL) {
3704 struct sysctl_oid_list *child;
3705 struct sysctl_ctx_list *ctx;
3709 * Create per TX ring sysctl tree:
3710 * dev.hn.UNIT.tx.RINGID
3712 ctx = device_get_sysctl_ctx(dev);
3713 child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree);
3715 snprintf(name, sizeof(name), "%d", id);
3716 txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
3717 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3719 if (txr->hn_tx_sysctl_tree != NULL) {
3720 child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree);
3723 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
3724 CTLFLAG_RD, &txr->hn_txdesc_avail, 0,
3725 "# of available TX descs");
3727 #ifdef HN_IFSTART_SUPPORT
3728 if (!hn_use_if_start)
3731 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive",
3732 CTLFLAG_RD, &txr->hn_oactive, 0,
3735 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets",
3736 CTLFLAG_RW, &txr->hn_pkts,
3737 "# of packets transmitted");
3738 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends",
3739 CTLFLAG_RW, &txr->hn_sends, "# of sends");
3747 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd)
3749 struct hn_tx_ring *txr = txd->txr;
3751 KASSERT(txd->m == NULL, ("still has mbuf installed"));
3752 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped"));
3754 bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap);
3755 bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt,
3756 txd->rndis_pkt_dmap);
3757 bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap);
3761 hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd)
3764 KASSERT(txd->refs == 0 || txd->refs == 1,
3765 ("invalid txd refs %d", txd->refs));
3767 /* Aggregated txds will be freed by their aggregating txd. */
3768 if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) {
3771 freed = hn_txdesc_put(txr, txd);
3772 KASSERT(freed, ("can't free txdesc"));
3777 hn_tx_ring_destroy(struct hn_tx_ring *txr)
3781 if (txr->hn_txdesc == NULL)
3786 * Because the freeing of aggregated txds will be deferred
3787 * to the aggregating txd, two passes are used here:
3788 * - The first pass GCes any pending txds. This GC is necessary,
3789 * since if the channels are revoked, hypervisor will not
3790 * deliver send-done for all pending txds.
3791 * - The second pass frees the busdma stuffs, i.e. after all txds
3794 for (i = 0; i < txr->hn_txdesc_cnt; ++i)
3795 hn_txdesc_gc(txr, &txr->hn_txdesc[i]);
3796 for (i = 0; i < txr->hn_txdesc_cnt; ++i)
3797 hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]);
3799 if (txr->hn_tx_data_dtag != NULL)
3800 bus_dma_tag_destroy(txr->hn_tx_data_dtag);
3801 if (txr->hn_tx_rndis_dtag != NULL)
3802 bus_dma_tag_destroy(txr->hn_tx_rndis_dtag);
3804 #ifdef HN_USE_TXDESC_BUFRING
3805 buf_ring_free(txr->hn_txdesc_br, M_DEVBUF);
3808 free(txr->hn_txdesc, M_DEVBUF);
3809 txr->hn_txdesc = NULL;
3811 if (txr->hn_mbuf_br != NULL)
3812 buf_ring_free(txr->hn_mbuf_br, M_DEVBUF);
3814 #ifndef HN_USE_TXDESC_BUFRING
3815 mtx_destroy(&txr->hn_txlist_spin);
3817 mtx_destroy(&txr->hn_tx_lock);
3821 hn_create_tx_data(struct hn_softc *sc, int ring_cnt)
3823 struct sysctl_oid_list *child;
3824 struct sysctl_ctx_list *ctx;
3828 * Create TXBUF for chimney sending.
3830 * NOTE: It is shared by all channels.
3832 sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev),
3833 PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma,
3834 BUS_DMA_WAITOK | BUS_DMA_ZERO);
3835 if (sc->hn_chim == NULL) {
3836 device_printf(sc->hn_dev, "allocate txbuf failed\n");
3840 sc->hn_tx_ring_cnt = ring_cnt;
3841 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
3843 sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt,
3844 M_DEVBUF, M_WAITOK | M_ZERO);
3846 ctx = device_get_sysctl_ctx(sc->hn_dev);
3847 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev));
3849 /* Create dev.hn.UNIT.tx sysctl tree */
3850 sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx",
3851 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3853 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
3856 error = hn_tx_ring_create(sc, i);
3861 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs",
3862 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3863 __offsetof(struct hn_tx_ring, hn_no_txdescs),
3864 hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs");
3865 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed",
3866 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3867 __offsetof(struct hn_tx_ring, hn_send_failed),
3868 hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure");
3869 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed",
3870 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3871 __offsetof(struct hn_tx_ring, hn_txdma_failed),
3872 hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
3873 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed",
3874 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3875 __offsetof(struct hn_tx_ring, hn_flush_failed),
3876 hn_tx_stat_ulong_sysctl, "LU",
3877 "# of packet transmission aggregation flush failure");
3878 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
3879 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3880 __offsetof(struct hn_tx_ring, hn_tx_collapsed),
3881 hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed");
3882 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney",
3883 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3884 __offsetof(struct hn_tx_ring, hn_tx_chimney),
3885 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send");
3886 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried",
3887 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3888 __offsetof(struct hn_tx_ring, hn_tx_chimney_tried),
3889 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries");
3890 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
3891 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0,
3892 "# of total TX descs");
3893 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
3894 CTLFLAG_RD, &sc->hn_chim_szmax, 0,
3895 "Chimney send packet size upper boundary");
3896 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
3897 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
3898 hn_chim_size_sysctl, "I", "Chimney send packet size limit");
3899 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size",
3900 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3901 __offsetof(struct hn_tx_ring, hn_direct_tx_size),
3902 hn_tx_conf_int_sysctl, "I",
3903 "Size of the packet for direct transmission");
3904 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx",
3905 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3906 __offsetof(struct hn_tx_ring, hn_sched_tx),
3907 hn_tx_conf_int_sysctl, "I",
3908 "Always schedule transmission "
3909 "instead of doing direct transmission");
3910 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt",
3911 CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings");
3912 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse",
3913 CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings");
3914 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax",
3915 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0,
3916 "Applied packet transmission aggregation size");
3917 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax",
3918 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
3919 hn_txagg_pktmax_sysctl, "I",
3920 "Applied packet transmission aggregation packets");
3921 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align",
3922 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
3923 hn_txagg_align_sysctl, "I",
3924 "Applied packet transmission aggregation alignment");
3930 hn_set_chim_size(struct hn_softc *sc, int chim_size)
3934 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
3935 sc->hn_tx_ring[i].hn_chim_size = chim_size;
3939 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu)
3941 struct ifnet *ifp = sc->hn_ifp;
3944 if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0)
3947 KASSERT(sc->hn_ndis_tso_sgmin >= 2,
3948 ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin));
3949 tso_minlen = sc->hn_ndis_tso_sgmin * mtu;
3951 KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen &&
3952 sc->hn_ndis_tso_szmax <= IP_MAXPACKET,
3953 ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax));
3955 if (tso_maxlen < tso_minlen)
3956 tso_maxlen = tso_minlen;
3957 else if (tso_maxlen > IP_MAXPACKET)
3958 tso_maxlen = IP_MAXPACKET;
3959 if (tso_maxlen > sc->hn_ndis_tso_szmax)
3960 tso_maxlen = sc->hn_ndis_tso_szmax;
3961 ifp->if_hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
3963 if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax);
3967 hn_fixup_tx_data(struct hn_softc *sc)
3969 uint64_t csum_assist;
3972 hn_set_chim_size(sc, sc->hn_chim_szmax);
3973 if (hn_tx_chimney_size > 0 &&
3974 hn_tx_chimney_size < sc->hn_chim_szmax)
3975 hn_set_chim_size(sc, hn_tx_chimney_size);
3978 if (sc->hn_caps & HN_CAP_IPCS)
3979 csum_assist |= CSUM_IP;
3980 if (sc->hn_caps & HN_CAP_TCP4CS)
3981 csum_assist |= CSUM_IP_TCP;
3982 if (sc->hn_caps & HN_CAP_UDP4CS)
3983 csum_assist |= CSUM_IP_UDP;
3984 if (sc->hn_caps & HN_CAP_TCP6CS)
3985 csum_assist |= CSUM_IP6_TCP;
3986 if (sc->hn_caps & HN_CAP_UDP6CS)
3987 csum_assist |= CSUM_IP6_UDP;
3988 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
3989 sc->hn_tx_ring[i].hn_csum_assist = csum_assist;
3991 if (sc->hn_caps & HN_CAP_HASHVAL) {
3993 * Support HASHVAL pktinfo on TX path.
3996 if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n");
3997 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
3998 sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL;
4003 hn_destroy_tx_data(struct hn_softc *sc)
4007 if (sc->hn_chim != NULL) {
4008 if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) {
4009 hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim);
4011 device_printf(sc->hn_dev,
4012 "chimney sending buffer is referenced");
4017 if (sc->hn_tx_ring_cnt == 0)
4020 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
4021 hn_tx_ring_destroy(&sc->hn_tx_ring[i]);
4023 free(sc->hn_tx_ring, M_DEVBUF);
4024 sc->hn_tx_ring = NULL;
4026 sc->hn_tx_ring_cnt = 0;
4027 sc->hn_tx_ring_inuse = 0;
4030 #ifdef HN_IFSTART_SUPPORT
4033 hn_start_taskfunc(void *xtxr, int pending __unused)
4035 struct hn_tx_ring *txr = xtxr;
4037 mtx_lock(&txr->hn_tx_lock);
4038 hn_start_locked(txr, 0);
4039 mtx_unlock(&txr->hn_tx_lock);
4043 hn_start_locked(struct hn_tx_ring *txr, int len)
4045 struct hn_softc *sc = txr->hn_sc;
4046 struct ifnet *ifp = sc->hn_ifp;
4049 KASSERT(hn_use_if_start,
4050 ("hn_start_locked is called, when if_start is disabled"));
4051 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
4052 mtx_assert(&txr->hn_tx_lock, MA_OWNED);
4053 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
4055 if (__predict_false(txr->hn_suspended))
4058 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4062 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
4063 struct hn_txdesc *txd;
4064 struct mbuf *m_head;
4067 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
4071 if (len > 0 && m_head->m_pkthdr.len > len) {
4073 * This sending could be time consuming; let callers
4074 * dispatch this packet sending (and sending of any
4075 * following up packets) to tx taskqueue.
4077 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
4082 #if defined(INET6) || defined(INET)
4083 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
4084 m_head = hn_tso_fixup(m_head);
4085 if (__predict_false(m_head == NULL)) {
4086 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
4092 txd = hn_txdesc_get(txr);
4094 txr->hn_no_txdescs++;
4095 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
4096 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
4100 error = hn_encap(ifp, txr, txd, &m_head);
4102 /* Both txd and m_head are freed */
4103 KASSERT(txr->hn_agg_txd == NULL,
4104 ("encap failed w/ pending aggregating txdesc"));
4108 if (txr->hn_agg_pktleft == 0) {
4109 if (txr->hn_agg_txd != NULL) {
4110 KASSERT(m_head == NULL,
4111 ("pending mbuf for aggregating txdesc"));
4112 error = hn_flush_txagg(ifp, txr);
4113 if (__predict_false(error)) {
4114 atomic_set_int(&ifp->if_drv_flags,
4119 KASSERT(m_head != NULL, ("mbuf was freed"));
4120 error = hn_txpkt(ifp, txr, txd);
4121 if (__predict_false(error)) {
4122 /* txd is freed, but m_head is not */
4123 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
4124 atomic_set_int(&ifp->if_drv_flags,
4132 KASSERT(txr->hn_agg_txd != NULL,
4133 ("no aggregating txdesc"));
4134 KASSERT(m_head == NULL,
4135 ("pending mbuf for aggregating txdesc"));
4140 /* Flush pending aggerated transmission. */
4141 if (txr->hn_agg_txd != NULL)
4142 hn_flush_txagg(ifp, txr);
4147 hn_start(struct ifnet *ifp)
4149 struct hn_softc *sc = ifp->if_softc;
4150 struct hn_tx_ring *txr = &sc->hn_tx_ring[0];
4152 if (txr->hn_sched_tx)
4155 if (mtx_trylock(&txr->hn_tx_lock)) {
4158 sched = hn_start_locked(txr, txr->hn_direct_tx_size);
4159 mtx_unlock(&txr->hn_tx_lock);
4164 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
4168 hn_start_txeof_taskfunc(void *xtxr, int pending __unused)
4170 struct hn_tx_ring *txr = xtxr;
4172 mtx_lock(&txr->hn_tx_lock);
4173 atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE);
4174 hn_start_locked(txr, 0);
4175 mtx_unlock(&txr->hn_tx_lock);
4179 hn_start_txeof(struct hn_tx_ring *txr)
4181 struct hn_softc *sc = txr->hn_sc;
4182 struct ifnet *ifp = sc->hn_ifp;
4184 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
4186 if (txr->hn_sched_tx)
4189 if (mtx_trylock(&txr->hn_tx_lock)) {
4192 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
4193 sched = hn_start_locked(txr, txr->hn_direct_tx_size);
4194 mtx_unlock(&txr->hn_tx_lock);
4196 taskqueue_enqueue(txr->hn_tx_taskq,
4202 * Release the OACTIVE earlier, with the hope, that
4203 * others could catch up. The task will clear the
4204 * flag again with the hn_tx_lock to avoid possible
4207 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
4208 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
4212 #endif /* HN_IFSTART_SUPPORT */
4215 hn_xmit(struct hn_tx_ring *txr, int len)
4217 struct hn_softc *sc = txr->hn_sc;
4218 struct ifnet *ifp = sc->hn_ifp;
4219 struct mbuf *m_head;
4222 mtx_assert(&txr->hn_tx_lock, MA_OWNED);
4223 #ifdef HN_IFSTART_SUPPORT
4224 KASSERT(hn_use_if_start == 0,
4225 ("hn_xmit is called, when if_start is enabled"));
4227 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
4229 if (__predict_false(txr->hn_suspended))
4232 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive)
4235 while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) {
4236 struct hn_txdesc *txd;
4239 if (len > 0 && m_head->m_pkthdr.len > len) {
4241 * This sending could be time consuming; let callers
4242 * dispatch this packet sending (and sending of any
4243 * following up packets) to tx taskqueue.
4245 drbr_putback(ifp, txr->hn_mbuf_br, m_head);
4250 txd = hn_txdesc_get(txr);
4252 txr->hn_no_txdescs++;
4253 drbr_putback(ifp, txr->hn_mbuf_br, m_head);
4254 txr->hn_oactive = 1;
4258 error = hn_encap(ifp, txr, txd, &m_head);
4260 /* Both txd and m_head are freed; discard */
4261 KASSERT(txr->hn_agg_txd == NULL,
4262 ("encap failed w/ pending aggregating txdesc"));
4263 drbr_advance(ifp, txr->hn_mbuf_br);
4267 if (txr->hn_agg_pktleft == 0) {
4268 if (txr->hn_agg_txd != NULL) {
4269 KASSERT(m_head == NULL,
4270 ("pending mbuf for aggregating txdesc"));
4271 error = hn_flush_txagg(ifp, txr);
4272 if (__predict_false(error)) {
4273 txr->hn_oactive = 1;
4277 KASSERT(m_head != NULL, ("mbuf was freed"));
4278 error = hn_txpkt(ifp, txr, txd);
4279 if (__predict_false(error)) {
4280 /* txd is freed, but m_head is not */
4281 drbr_putback(ifp, txr->hn_mbuf_br,
4283 txr->hn_oactive = 1;
4290 KASSERT(txr->hn_agg_txd != NULL,
4291 ("no aggregating txdesc"));
4292 KASSERT(m_head == NULL,
4293 ("pending mbuf for aggregating txdesc"));
4298 drbr_advance(ifp, txr->hn_mbuf_br);
4301 /* Flush pending aggerated transmission. */
4302 if (txr->hn_agg_txd != NULL)
4303 hn_flush_txagg(ifp, txr);
4308 hn_transmit(struct ifnet *ifp, struct mbuf *m)
4310 struct hn_softc *sc = ifp->if_softc;
4311 struct hn_tx_ring *txr;
4314 #if defined(INET6) || defined(INET)
4316 * Perform TSO packet header fixup now, since the TSO
4317 * packet header should be cache-hot.
4319 if (m->m_pkthdr.csum_flags & CSUM_TSO) {
4320 m = hn_tso_fixup(m);
4321 if (__predict_false(m == NULL)) {
4322 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
4329 * Select the TX ring based on flowid
4331 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
4332 idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse;
4333 txr = &sc->hn_tx_ring[idx];
4335 error = drbr_enqueue(ifp, txr->hn_mbuf_br, m);
4337 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
4341 if (txr->hn_oactive)
4344 if (txr->hn_sched_tx)
4347 if (mtx_trylock(&txr->hn_tx_lock)) {
4350 sched = hn_xmit(txr, txr->hn_direct_tx_size);
4351 mtx_unlock(&txr->hn_tx_lock);
4356 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
4361 hn_tx_ring_qflush(struct hn_tx_ring *txr)
4365 mtx_lock(&txr->hn_tx_lock);
4366 while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL)
4368 mtx_unlock(&txr->hn_tx_lock);
4372 hn_xmit_qflush(struct ifnet *ifp)
4374 struct hn_softc *sc = ifp->if_softc;
4377 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
4378 hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
4383 hn_xmit_txeof(struct hn_tx_ring *txr)
4386 if (txr->hn_sched_tx)
4389 if (mtx_trylock(&txr->hn_tx_lock)) {
4392 txr->hn_oactive = 0;
4393 sched = hn_xmit(txr, txr->hn_direct_tx_size);
4394 mtx_unlock(&txr->hn_tx_lock);
4396 taskqueue_enqueue(txr->hn_tx_taskq,
4402 * Release the oactive earlier, with the hope, that
4403 * others could catch up. The task will clear the
4404 * oactive again with the hn_tx_lock to avoid possible
4407 txr->hn_oactive = 0;
4408 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
4413 hn_xmit_taskfunc(void *xtxr, int pending __unused)
4415 struct hn_tx_ring *txr = xtxr;
4417 mtx_lock(&txr->hn_tx_lock);
4419 mtx_unlock(&txr->hn_tx_lock);
4423 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused)
4425 struct hn_tx_ring *txr = xtxr;
4427 mtx_lock(&txr->hn_tx_lock);
4428 txr->hn_oactive = 0;
4430 mtx_unlock(&txr->hn_tx_lock);
4434 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan)
4436 struct vmbus_chan_br cbr;
4437 struct hn_rx_ring *rxr;
4438 struct hn_tx_ring *txr = NULL;
4441 idx = vmbus_chan_subidx(chan);
4444 * Link this channel to RX/TX ring.
4446 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
4447 ("invalid channel index %d, should > 0 && < %d",
4448 idx, sc->hn_rx_ring_inuse));
4449 rxr = &sc->hn_rx_ring[idx];
4450 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0,
4451 ("RX ring %d already attached", idx));
4452 rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED;
4453 rxr->hn_chan = chan;
4456 if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n",
4457 idx, vmbus_chan_id(chan));
4460 if (idx < sc->hn_tx_ring_inuse) {
4461 txr = &sc->hn_tx_ring[idx];
4462 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0,
4463 ("TX ring %d already attached", idx));
4464 txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED;
4466 txr->hn_chan = chan;
4468 if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n",
4469 idx, vmbus_chan_id(chan));
4473 /* Bind this channel to a proper CPU. */
4474 vmbus_chan_cpu_set(chan, HN_RING_IDX2CPU(sc, idx));
4479 cbr.cbr = rxr->hn_br;
4480 cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr;
4481 cbr.cbr_txsz = HN_TXBR_SIZE;
4482 cbr.cbr_rxsz = HN_RXBR_SIZE;
4483 error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr);
4485 if (error == EISCONN) {
4486 if_printf(sc->hn_ifp, "bufring is connected after "
4487 "chan%u open failure\n", vmbus_chan_id(chan));
4488 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF;
4490 if_printf(sc->hn_ifp, "open chan%u failed: %d\n",
4491 vmbus_chan_id(chan), error);
4498 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan)
4500 struct hn_rx_ring *rxr;
4503 idx = vmbus_chan_subidx(chan);
4506 * Link this channel to RX/TX ring.
4508 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
4509 ("invalid channel index %d, should > 0 && < %d",
4510 idx, sc->hn_rx_ring_inuse));
4511 rxr = &sc->hn_rx_ring[idx];
4512 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED),
4513 ("RX ring %d is not attached", idx));
4514 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED;
4516 if (idx < sc->hn_tx_ring_inuse) {
4517 struct hn_tx_ring *txr = &sc->hn_tx_ring[idx];
4519 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED),
4520 ("TX ring %d is not attached attached", idx));
4521 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED;
4525 * Close this channel.
4528 * Channel closing does _not_ destroy the target channel.
4530 error = vmbus_chan_close_direct(chan);
4531 if (error == EISCONN) {
4532 if_printf(sc->hn_ifp, "chan%u bufring is connected "
4533 "after being closed\n", vmbus_chan_id(chan));
4534 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF;
4536 if_printf(sc->hn_ifp, "chan%u close failed: %d\n",
4537 vmbus_chan_id(chan), error);
4542 hn_attach_subchans(struct hn_softc *sc)
4544 struct vmbus_channel **subchans;
4545 int subchan_cnt = sc->hn_rx_ring_inuse - 1;
4548 KASSERT(subchan_cnt > 0, ("no sub-channels"));
4550 /* Attach the sub-channels. */
4551 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
4552 for (i = 0; i < subchan_cnt; ++i) {
4555 error1 = hn_chan_attach(sc, subchans[i]);
4558 /* Move on; all channels will be detached later. */
4561 vmbus_subchan_rel(subchans, subchan_cnt);
4564 if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error);
4567 if_printf(sc->hn_ifp, "%d sub-channels attached\n",
4575 hn_detach_allchans(struct hn_softc *sc)
4577 struct vmbus_channel **subchans;
4578 int subchan_cnt = sc->hn_rx_ring_inuse - 1;
4581 if (subchan_cnt == 0)
4584 /* Detach the sub-channels. */
4585 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
4586 for (i = 0; i < subchan_cnt; ++i)
4587 hn_chan_detach(sc, subchans[i]);
4588 vmbus_subchan_rel(subchans, subchan_cnt);
4592 * Detach the primary channel, _after_ all sub-channels
4595 hn_chan_detach(sc, sc->hn_prichan);
4597 /* Wait for sub-channels to be destroyed, if any. */
4598 vmbus_subchan_drain(sc->hn_prichan);
4601 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
4602 KASSERT((sc->hn_rx_ring[i].hn_rx_flags &
4603 HN_RX_FLAG_ATTACHED) == 0,
4604 ("%dth RX ring is still attached", i));
4606 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
4607 KASSERT((sc->hn_tx_ring[i].hn_tx_flags &
4608 HN_TX_FLAG_ATTACHED) == 0,
4609 ("%dth TX ring is still attached", i));
4615 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch)
4617 struct vmbus_channel **subchans;
4618 int nchan, rxr_cnt, error;
4620 nchan = *nsubch + 1;
4623 * Multiple RX/TX rings are not requested.
4630 * Query RSS capabilities, e.g. # of RX rings, and # of indirect
4633 error = hn_rndis_query_rsscaps(sc, &rxr_cnt);
4635 /* No RSS; this is benign. */
4640 if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n",
4644 if (nchan > rxr_cnt)
4647 if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n");
4653 * Allocate sub-channels from NVS.
4655 *nsubch = nchan - 1;
4656 error = hn_nvs_alloc_subchans(sc, nsubch);
4657 if (error || *nsubch == 0) {
4658 /* Failed to allocate sub-channels. */
4664 * Wait for all sub-channels to become ready before moving on.
4666 subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch);
4667 vmbus_subchan_rel(subchans, *nsubch);
4672 hn_synth_attachable(const struct hn_softc *sc)
4676 if (sc->hn_flags & HN_FLAG_ERRORS)
4679 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
4680 const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
4682 if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF)
4689 hn_synth_attach(struct hn_softc *sc, int mtu)
4691 #define ATTACHED_NVS 0x0002
4692 #define ATTACHED_RNDIS 0x0004
4694 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
4695 int error, nsubch, nchan, i;
4696 uint32_t old_caps, attached = 0;
4698 KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0,
4699 ("synthetic parts were attached"));
4701 if (!hn_synth_attachable(sc))
4704 /* Save capabilities for later verification. */
4705 old_caps = sc->hn_caps;
4708 /* Clear RSS stuffs. */
4709 sc->hn_rss_ind_size = 0;
4710 sc->hn_rss_hash = 0;
4713 * Attach the primary channel _before_ attaching NVS and RNDIS.
4715 error = hn_chan_attach(sc, sc->hn_prichan);
4722 error = hn_nvs_attach(sc, mtu);
4725 attached |= ATTACHED_NVS;
4728 * Attach RNDIS _after_ NVS is attached.
4730 error = hn_rndis_attach(sc, mtu);
4733 attached |= ATTACHED_RNDIS;
4736 * Make sure capabilities are not changed.
4738 if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) {
4739 if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n",
4740 old_caps, sc->hn_caps);
4746 * Allocate sub-channels for multi-TX/RX rings.
4749 * The # of RX rings that can be used is equivalent to the # of
4750 * channels to be requested.
4752 nsubch = sc->hn_rx_ring_cnt - 1;
4753 error = hn_synth_alloc_subchans(sc, &nsubch);
4756 /* NOTE: _Full_ synthetic parts detach is required now. */
4757 sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED;
4760 * Set the # of TX/RX rings that could be used according to
4761 * the # of channels that NVS offered.
4764 hn_set_ring_inuse(sc, nchan);
4766 /* Only the primary channel can be used; done */
4771 * Attach the sub-channels.
4773 * NOTE: hn_set_ring_inuse() _must_ have been called.
4775 error = hn_attach_subchans(sc);
4780 * Configure RSS key and indirect table _after_ all sub-channels
4783 if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) {
4785 * RSS key is not set yet; set it to the default RSS key.
4788 if_printf(sc->hn_ifp, "setup default RSS key\n");
4789 memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key));
4790 sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
4793 if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) {
4795 * RSS indirect table is not set yet; set it up in round-
4799 if_printf(sc->hn_ifp, "setup default RSS indirect "
4802 for (i = 0; i < NDIS_HASH_INDCNT; ++i)
4803 rss->rss_ind[i] = i % nchan;
4804 sc->hn_flags |= HN_FLAG_HAS_RSSIND;
4807 * # of usable channels may be changed, so we have to
4808 * make sure that all entries in RSS indirect table
4811 * NOTE: hn_set_ring_inuse() _must_ have been called.
4813 hn_rss_ind_fixup(sc);
4816 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
4821 * Fixup transmission aggregation setup.
4827 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
4828 hn_synth_detach(sc);
4830 if (attached & ATTACHED_RNDIS)
4831 hn_rndis_detach(sc);
4832 if (attached & ATTACHED_NVS)
4834 hn_chan_detach(sc, sc->hn_prichan);
4835 /* Restore old capabilities. */
4836 sc->hn_caps = old_caps;
4840 #undef ATTACHED_RNDIS
4846 * The interface must have been suspended though hn_suspend(), before
4847 * this function get called.
4850 hn_synth_detach(struct hn_softc *sc)
4853 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
4854 ("synthetic parts were not attached"));
4856 /* Detach the RNDIS first. */
4857 hn_rndis_detach(sc);
4862 /* Detach all of the channels. */
4863 hn_detach_allchans(sc);
4865 sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED;
4869 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt)
4871 KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt,
4872 ("invalid ring count %d", ring_cnt));
4874 if (sc->hn_tx_ring_cnt > ring_cnt)
4875 sc->hn_tx_ring_inuse = ring_cnt;
4877 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
4878 sc->hn_rx_ring_inuse = ring_cnt;
4881 if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n",
4882 sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse);
4887 hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan)
4892 * The TX bufring will not be drained by the hypervisor,
4893 * if the primary channel is revoked.
4895 while (!vmbus_chan_rx_empty(chan) ||
4896 (!vmbus_chan_is_revoked(sc->hn_prichan) &&
4897 !vmbus_chan_tx_empty(chan)))
4899 vmbus_chan_intr_drain(chan);
4903 hn_suspend_data(struct hn_softc *sc)
4905 struct vmbus_channel **subch = NULL;
4906 struct hn_tx_ring *txr;
4914 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
4915 txr = &sc->hn_tx_ring[i];
4917 mtx_lock(&txr->hn_tx_lock);
4918 txr->hn_suspended = 1;
4919 mtx_unlock(&txr->hn_tx_lock);
4920 /* No one is able send more packets now. */
4923 * Wait for all pending sends to finish.
4926 * We will _not_ receive all pending send-done, if the
4927 * primary channel is revoked.
4929 while (hn_tx_ring_pending(txr) &&
4930 !vmbus_chan_is_revoked(sc->hn_prichan))
4931 pause("hnwtx", 1 /* 1 tick */);
4935 * Disable RX by clearing RX filter.
4937 hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE);
4940 * Give RNDIS enough time to flush all pending data packets.
4942 pause("waitrx", (200 * hz) / 1000);
4945 * Drain RX/TX bufrings and interrupts.
4947 nsubch = sc->hn_rx_ring_inuse - 1;
4949 subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
4951 if (subch != NULL) {
4952 for (i = 0; i < nsubch; ++i)
4953 hn_chan_drain(sc, subch[i]);
4955 hn_chan_drain(sc, sc->hn_prichan);
4958 vmbus_subchan_rel(subch, nsubch);
4961 * Drain any pending TX tasks.
4964 * The above hn_chan_drain() can dispatch TX tasks, so the TX
4965 * tasks will have to be drained _after_ the above hn_chan_drain()
4968 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
4969 txr = &sc->hn_tx_ring[i];
4971 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task);
4972 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task);
4977 hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused)
4980 ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL;
4984 hn_suspend_mgmt(struct hn_softc *sc)
4991 * Make sure that hn_mgmt_taskq0 can nolonger be accessed
4992 * through hn_mgmt_taskq.
4994 TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc);
4995 vmbus_chan_run_task(sc->hn_prichan, &task);
4998 * Make sure that all pending management tasks are completed.
5000 taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init);
5001 taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status);
5002 taskqueue_drain_all(sc->hn_mgmt_taskq0);
5006 hn_suspend(struct hn_softc *sc)
5009 /* Disable polling. */
5012 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
5013 (sc->hn_flags & HN_FLAG_VF))
5014 hn_suspend_data(sc);
5015 hn_suspend_mgmt(sc);
5019 hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt)
5023 KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt,
5024 ("invalid TX ring count %d", tx_ring_cnt));
5026 for (i = 0; i < tx_ring_cnt; ++i) {
5027 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
5029 mtx_lock(&txr->hn_tx_lock);
5030 txr->hn_suspended = 0;
5031 mtx_unlock(&txr->hn_tx_lock);
5036 hn_resume_data(struct hn_softc *sc)
5045 hn_rxfilter_config(sc);
5048 * Make sure to clear suspend status on "all" TX rings,
5049 * since hn_tx_ring_inuse can be changed after
5050 * hn_suspend_data().
5052 hn_resume_tx(sc, sc->hn_tx_ring_cnt);
5054 #ifdef HN_IFSTART_SUPPORT
5055 if (!hn_use_if_start)
5059 * Flush unused drbrs, since hn_tx_ring_inuse may be
5062 for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i)
5063 hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
5069 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
5070 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
5073 * Use txeof task, so that any pending oactive can be
5076 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
5081 hn_resume_mgmt(struct hn_softc *sc)
5084 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
5087 * Kick off network change detection, if it was pending.
5088 * If no network change was pending, start link status
5089 * checks, which is more lightweight than network change
5092 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
5093 hn_change_network(sc);
5095 hn_update_link_status(sc);
5099 hn_resume(struct hn_softc *sc)
5102 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
5103 (sc->hn_flags & HN_FLAG_VF))
5107 * When the VF is activated, the synthetic interface is changed
5108 * to DOWN in hn_set_vf(). Here, if the VF is still active, we
5109 * don't call hn_resume_mgmt() until the VF is deactivated in
5112 if (!(sc->hn_flags & HN_FLAG_VF))
5116 * Re-enable polling if this interface is running and
5117 * the polling is requested.
5119 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && sc->hn_pollhz > 0)
5120 hn_polling(sc, sc->hn_pollhz);
5124 hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen)
5126 const struct rndis_status_msg *msg;
5129 if (dlen < sizeof(*msg)) {
5130 if_printf(sc->hn_ifp, "invalid RNDIS status\n");
5135 switch (msg->rm_status) {
5136 case RNDIS_STATUS_MEDIA_CONNECT:
5137 case RNDIS_STATUS_MEDIA_DISCONNECT:
5138 hn_update_link_status(sc);
5141 case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG:
5142 /* Not really useful; ignore. */
5145 case RNDIS_STATUS_NETWORK_CHANGE:
5146 ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset);
5147 if (dlen < ofs + msg->rm_stbuflen ||
5148 msg->rm_stbuflen < sizeof(uint32_t)) {
5149 if_printf(sc->hn_ifp, "network changed\n");
5153 memcpy(&change, ((const uint8_t *)msg) + ofs,
5155 if_printf(sc->hn_ifp, "network changed, change %u\n",
5158 hn_change_network(sc);
5162 if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n",
5169 hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info)
5171 const struct rndis_pktinfo *pi = info_data;
5174 while (info_dlen != 0) {
5178 if (__predict_false(info_dlen < sizeof(*pi)))
5180 if (__predict_false(info_dlen < pi->rm_size))
5182 info_dlen -= pi->rm_size;
5184 if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK))
5186 if (__predict_false(pi->rm_size < pi->rm_pktinfooffset))
5188 dlen = pi->rm_size - pi->rm_pktinfooffset;
5191 switch (pi->rm_type) {
5192 case NDIS_PKTINFO_TYPE_VLAN:
5193 if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE))
5195 info->vlan_info = *((const uint32_t *)data);
5196 mask |= HN_RXINFO_VLAN;
5199 case NDIS_PKTINFO_TYPE_CSUM:
5200 if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE))
5202 info->csum_info = *((const uint32_t *)data);
5203 mask |= HN_RXINFO_CSUM;
5206 case HN_NDIS_PKTINFO_TYPE_HASHVAL:
5207 if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE))
5209 info->hash_value = *((const uint32_t *)data);
5210 mask |= HN_RXINFO_HASHVAL;
5213 case HN_NDIS_PKTINFO_TYPE_HASHINF:
5214 if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE))
5216 info->hash_info = *((const uint32_t *)data);
5217 mask |= HN_RXINFO_HASHINF;
5224 if (mask == HN_RXINFO_ALL) {
5225 /* All found; done */
5229 pi = (const struct rndis_pktinfo *)
5230 ((const uint8_t *)pi + pi->rm_size);
5235 * - If there is no hash value, invalidate the hash info.
5237 if ((mask & HN_RXINFO_HASHVAL) == 0)
5238 info->hash_info = HN_NDIS_HASH_INFO_INVALID;
5242 static __inline bool
5243 hn_rndis_check_overlap(int off, int len, int check_off, int check_len)
5246 if (off < check_off) {
5247 if (__predict_true(off + len <= check_off))
5249 } else if (off > check_off) {
5250 if (__predict_true(check_off + check_len <= off))
5257 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen)
5259 const struct rndis_packet_msg *pkt;
5260 struct hn_rxinfo info;
5261 int data_off, pktinfo_off, data_len, pktinfo_len;
5266 if (__predict_false(dlen < sizeof(*pkt))) {
5267 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n");
5272 if (__predict_false(dlen < pkt->rm_len)) {
5273 if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, "
5274 "dlen %d, msglen %u\n", dlen, pkt->rm_len);
5277 if (__predict_false(pkt->rm_len <
5278 pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) {
5279 if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, "
5280 "msglen %u, data %u, oob %u, pktinfo %u\n",
5281 pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen,
5282 pkt->rm_pktinfolen);
5285 if (__predict_false(pkt->rm_datalen == 0)) {
5286 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n");
5293 #define IS_OFFSET_INVALID(ofs) \
5294 ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \
5295 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK))
5297 /* XXX Hyper-V does not meet data offset alignment requirement */
5298 if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) {
5299 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5300 "data offset %u\n", pkt->rm_dataoffset);
5303 if (__predict_false(pkt->rm_oobdataoffset > 0 &&
5304 IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) {
5305 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5306 "oob offset %u\n", pkt->rm_oobdataoffset);
5309 if (__predict_true(pkt->rm_pktinfooffset > 0) &&
5310 __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) {
5311 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5312 "pktinfo offset %u\n", pkt->rm_pktinfooffset);
5316 #undef IS_OFFSET_INVALID
5318 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset);
5319 data_len = pkt->rm_datalen;
5320 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset);
5321 pktinfo_len = pkt->rm_pktinfolen;
5324 * Check OOB coverage.
5326 if (__predict_false(pkt->rm_oobdatalen != 0)) {
5327 int oob_off, oob_len;
5329 if_printf(rxr->hn_ifp, "got oobdata\n");
5330 oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset);
5331 oob_len = pkt->rm_oobdatalen;
5333 if (__predict_false(oob_off + oob_len > pkt->rm_len)) {
5334 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5335 "oob overflow, msglen %u, oob abs %d len %d\n",
5336 pkt->rm_len, oob_off, oob_len);
5341 * Check against data.
5343 if (hn_rndis_check_overlap(oob_off, oob_len,
5344 data_off, data_len)) {
5345 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5346 "oob overlaps data, oob abs %d len %d, "
5347 "data abs %d len %d\n",
5348 oob_off, oob_len, data_off, data_len);
5353 * Check against pktinfo.
5355 if (pktinfo_len != 0 &&
5356 hn_rndis_check_overlap(oob_off, oob_len,
5357 pktinfo_off, pktinfo_len)) {
5358 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5359 "oob overlaps pktinfo, oob abs %d len %d, "
5360 "pktinfo abs %d len %d\n",
5361 oob_off, oob_len, pktinfo_off, pktinfo_len);
5367 * Check per-packet-info coverage and find useful per-packet-info.
5369 info.vlan_info = HN_NDIS_VLAN_INFO_INVALID;
5370 info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID;
5371 info.hash_info = HN_NDIS_HASH_INFO_INVALID;
5372 if (__predict_true(pktinfo_len != 0)) {
5376 if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) {
5377 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5378 "pktinfo overflow, msglen %u, "
5379 "pktinfo abs %d len %d\n",
5380 pkt->rm_len, pktinfo_off, pktinfo_len);
5385 * Check packet info coverage.
5387 overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len,
5388 data_off, data_len);
5389 if (__predict_false(overlap)) {
5390 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5391 "pktinfo overlap data, pktinfo abs %d len %d, "
5392 "data abs %d len %d\n",
5393 pktinfo_off, pktinfo_len, data_off, data_len);
5398 * Find useful per-packet-info.
5400 error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off,
5401 pktinfo_len, &info);
5402 if (__predict_false(error)) {
5403 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg "
5409 if (__predict_false(data_off + data_len > pkt->rm_len)) {
5410 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5411 "data overflow, msglen %u, data abs %d len %d\n",
5412 pkt->rm_len, data_off, data_len);
5415 hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info);
5418 static __inline void
5419 hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen)
5421 const struct rndis_msghdr *hdr;
5423 if (__predict_false(dlen < sizeof(*hdr))) {
5424 if_printf(rxr->hn_ifp, "invalid RNDIS msg\n");
5429 if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) {
5430 /* Hot data path. */
5431 hn_rndis_rx_data(rxr, data, dlen);
5436 if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG)
5437 hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen);
5439 hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen);
5443 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt)
5445 const struct hn_nvs_hdr *hdr;
5447 if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) {
5448 if_printf(sc->hn_ifp, "invalid nvs notify\n");
5451 hdr = VMBUS_CHANPKT_CONST_DATA(pkt);
5453 if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) {
5454 /* Useless; ignore */
5457 if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type);
5461 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan,
5462 const struct vmbus_chanpkt_hdr *pkt)
5464 struct hn_nvs_sendctx *sndc;
5466 sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid;
5467 sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt),
5468 VMBUS_CHANPKT_DATALEN(pkt));
5471 * 'sndc' CAN NOT be accessed anymore, since it can be freed by
5477 hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
5478 const struct vmbus_chanpkt_hdr *pkthdr)
5480 const struct vmbus_chanpkt_rxbuf *pkt;
5481 const struct hn_nvs_hdr *nvs_hdr;
5484 if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) {
5485 if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n");
5488 nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr);
5490 /* Make sure that this is a RNDIS message. */
5491 if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) {
5492 if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n",
5497 hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen);
5498 if (__predict_false(hlen < sizeof(*pkt))) {
5499 if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n");
5502 pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr;
5504 if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) {
5505 if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n",
5510 count = pkt->cp_rxbuf_cnt;
5511 if (__predict_false(hlen <
5512 __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) {
5513 if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count);
5517 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
5518 for (i = 0; i < count; ++i) {
5521 ofs = pkt->cp_rxbuf[i].rb_ofs;
5522 len = pkt->cp_rxbuf[i].rb_len;
5523 if (__predict_false(ofs + len > HN_RXBUF_SIZE)) {
5524 if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, "
5525 "ofs %d, len %d\n", i, ofs, len);
5528 hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len);
5532 * Ack the consumed RXBUF associated w/ this channel packet,
5533 * so that this RXBUF can be recycled by the hypervisor.
5535 hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid);
5539 hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
5542 struct hn_nvs_rndis_ack ack;
5545 ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK;
5546 ack.nvs_status = HN_NVS_STATUS_OK;
5550 error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP,
5551 VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid);
5552 if (__predict_false(error == EAGAIN)) {
5555 * This should _not_ happen in real world, since the
5556 * consumption of the TX bufring from the TX path is
5559 if (rxr->hn_ack_failed == 0)
5560 if_printf(rxr->hn_ifp, "RXBUF ack retry\n");
5561 rxr->hn_ack_failed++;
5568 if_printf(rxr->hn_ifp, "RXBUF ack failed\n");
5573 hn_chan_callback(struct vmbus_channel *chan, void *xrxr)
5575 struct hn_rx_ring *rxr = xrxr;
5576 struct hn_softc *sc = rxr->hn_ifp->if_softc;
5579 struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf;
5582 pktlen = rxr->hn_pktbuf_len;
5583 error = vmbus_chan_recv_pkt(chan, pkt, &pktlen);
5584 if (__predict_false(error == ENOBUFS)) {
5589 * Expand channel packet buffer.
5592 * Use M_WAITOK here, since allocation failure
5595 nlen = rxr->hn_pktbuf_len * 2;
5596 while (nlen < pktlen)
5598 nbuf = malloc(nlen, M_DEVBUF, M_WAITOK);
5600 if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n",
5601 rxr->hn_pktbuf_len, nlen);
5603 free(rxr->hn_pktbuf, M_DEVBUF);
5604 rxr->hn_pktbuf = nbuf;
5605 rxr->hn_pktbuf_len = nlen;
5608 } else if (__predict_false(error == EAGAIN)) {
5609 /* No more channel packets; done! */
5612 KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error));
5614 switch (pkt->cph_type) {
5615 case VMBUS_CHANPKT_TYPE_COMP:
5616 hn_nvs_handle_comp(sc, chan, pkt);
5619 case VMBUS_CHANPKT_TYPE_RXBUF:
5620 hn_nvs_handle_rxbuf(rxr, chan, pkt);
5623 case VMBUS_CHANPKT_TYPE_INBAND:
5624 hn_nvs_handle_notify(sc, pkt);
5628 if_printf(rxr->hn_ifp, "unknown chan pkt %u\n",
5633 hn_chan_rollup(rxr, rxr->hn_txr);
5637 hn_tx_taskq_create(void *arg __unused)
5642 * Fix the # of TX taskqueues.
5644 if (hn_tx_taskq_cnt <= 0)
5645 hn_tx_taskq_cnt = 1;
5646 else if (hn_tx_taskq_cnt > mp_ncpus)
5647 hn_tx_taskq_cnt = mp_ncpus;
5650 * Fix the TX taskqueue mode.
5652 switch (hn_tx_taskq_mode) {
5653 case HN_TX_TASKQ_M_INDEP:
5654 case HN_TX_TASKQ_M_GLOBAL:
5655 case HN_TX_TASKQ_M_EVTTQ:
5658 hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP;
5662 if (vm_guest != VM_GUEST_HV)
5665 if (hn_tx_taskq_mode != HN_TX_TASKQ_M_GLOBAL)
5668 hn_tx_taskque = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *),
5669 M_DEVBUF, M_WAITOK);
5670 for (i = 0; i < hn_tx_taskq_cnt; ++i) {
5671 hn_tx_taskque[i] = taskqueue_create("hn_tx", M_WAITOK,
5672 taskqueue_thread_enqueue, &hn_tx_taskque[i]);
5673 taskqueue_start_threads(&hn_tx_taskque[i], 1, PI_NET,
5677 SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_SECOND,
5678 hn_tx_taskq_create, NULL);
5681 hn_tx_taskq_destroy(void *arg __unused)
5684 if (hn_tx_taskque != NULL) {
5687 for (i = 0; i < hn_tx_taskq_cnt; ++i)
5688 taskqueue_free(hn_tx_taskque[i]);
5689 free(hn_tx_taskque, M_DEVBUF);
5692 SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_SECOND,
5693 hn_tx_taskq_destroy, NULL);