2 * Copyright (c) 2010-2012 Citrix Inc.
3 * Copyright (c) 2009-2012,2016 Microsoft Corp.
4 * Copyright (c) 2012 NetApp Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * Copyright (c) 2004-2006 Kip Macy
31 * All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 #include <sys/cdefs.h>
56 __FBSDID("$FreeBSD$");
58 #include "opt_inet6.h"
62 #include <sys/param.h>
64 #include <sys/kernel.h>
65 #include <sys/limits.h>
66 #include <sys/malloc.h>
68 #include <sys/module.h>
70 #include <sys/queue.h>
73 #include <sys/socket.h>
74 #include <sys/sockio.h>
76 #include <sys/sysctl.h>
77 #include <sys/systm.h>
78 #include <sys/taskqueue.h>
79 #include <sys/buf_ring.h>
80 #include <sys/eventhandler.h>
82 #include <machine/atomic.h>
83 #include <machine/in_cksum.h>
86 #include <net/ethernet.h>
88 #include <net/if_arp.h>
89 #include <net/if_dl.h>
90 #include <net/if_media.h>
91 #include <net/if_types.h>
92 #include <net/if_var.h>
93 #include <net/if_vlan_var.h>
94 #include <net/rndis.h>
96 #include <netinet/in_systm.h>
97 #include <netinet/in.h>
98 #include <netinet/ip.h>
99 #include <netinet/ip6.h>
100 #include <netinet/tcp.h>
101 #include <netinet/tcp_lro.h>
102 #include <netinet/udp.h>
104 #include <dev/hyperv/include/hyperv.h>
105 #include <dev/hyperv/include/hyperv_busdma.h>
106 #include <dev/hyperv/include/vmbus.h>
107 #include <dev/hyperv/include/vmbus_xact.h>
109 #include <dev/hyperv/netvsc/ndis.h>
110 #include <dev/hyperv/netvsc/if_hnreg.h>
111 #include <dev/hyperv/netvsc/if_hnvar.h>
112 #include <dev/hyperv/netvsc/hn_nvs.h>
113 #include <dev/hyperv/netvsc/hn_rndis.h>
115 #include "vmbus_if.h"
117 #define HN_IFSTART_SUPPORT
119 #define HN_RING_CNT_DEF_MAX 8
121 /* YYY should get it from the underlying channel */
122 #define HN_TX_DESC_CNT 512
124 #define HN_RNDIS_PKT_LEN \
125 (sizeof(struct rndis_packet_msg) + \
126 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \
127 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \
128 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \
129 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE))
130 #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE
131 #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE
133 #define HN_TX_DATA_BOUNDARY PAGE_SIZE
134 #define HN_TX_DATA_MAXSIZE IP_MAXPACKET
135 #define HN_TX_DATA_SEGSIZE PAGE_SIZE
136 /* -1 for RNDIS packet message */
137 #define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1)
139 #define HN_DIRECT_TX_SIZE_DEF 128
141 #define HN_EARLY_TXEOF_THRESH 8
143 #define HN_PKTBUF_LEN_DEF (16 * 1024)
145 #define HN_LROENT_CNT_DEF 128
147 #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU)
148 #define HN_LRO_LENLIM_DEF (25 * ETHERMTU)
149 /* YYY 2*MTU is a bit rough, but should be good enough. */
150 #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu)
152 #define HN_LRO_ACKCNT_DEF 1
154 #define HN_LOCK_INIT(sc) \
155 sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev))
156 #define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock)
157 #define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED)
158 #define HN_LOCK(sc) \
160 while (sx_try_xlock(&(sc)->hn_lock) == 0) \
163 #define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock)
165 #define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP)
166 #define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP)
167 #define HN_CSUM_IP_HWASSIST(sc) \
168 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK)
169 #define HN_CSUM_IP6_HWASSIST(sc) \
170 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK)
172 #define HN_PKTSIZE_MIN(align) \
173 roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \
174 HN_RNDIS_PKT_LEN, (align))
175 #define HN_PKTSIZE(m, align) \
176 roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align))
178 #define HN_RING_IDX2CPU(sc, idx) (((sc)->hn_cpu + (idx)) % mp_ncpus)
181 #ifndef HN_USE_TXDESC_BUFRING
182 SLIST_ENTRY(hn_txdesc) link;
184 STAILQ_ENTRY(hn_txdesc) agg_link;
186 /* Aggregated txdescs, in sending order. */
187 STAILQ_HEAD(, hn_txdesc) agg_list;
189 /* The oldest packet, if transmission aggregation happens. */
191 struct hn_tx_ring *txr;
193 uint32_t flags; /* HN_TXD_FLAG_ */
194 struct hn_nvs_sendctx send_ctx;
198 bus_dmamap_t data_dmap;
200 bus_addr_t rndis_pkt_paddr;
201 struct rndis_packet_msg *rndis_pkt;
202 bus_dmamap_t rndis_pkt_dmap;
205 #define HN_TXD_FLAG_ONLIST 0x0001
206 #define HN_TXD_FLAG_DMAMAP 0x0002
207 #define HN_TXD_FLAG_ONAGG 0x0004
216 struct hn_update_vf {
217 struct hn_rx_ring *rxr;
221 #define HN_RXINFO_VLAN 0x0001
222 #define HN_RXINFO_CSUM 0x0002
223 #define HN_RXINFO_HASHINF 0x0004
224 #define HN_RXINFO_HASHVAL 0x0008
225 #define HN_RXINFO_ALL \
228 HN_RXINFO_HASHINF | \
231 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff
232 #define HN_NDIS_RXCSUM_INFO_INVALID 0
233 #define HN_NDIS_HASH_INFO_INVALID 0
235 static int hn_probe(device_t);
236 static int hn_attach(device_t);
237 static int hn_detach(device_t);
238 static int hn_shutdown(device_t);
239 static void hn_chan_callback(struct vmbus_channel *,
242 static void hn_init(void *);
243 static int hn_ioctl(struct ifnet *, u_long, caddr_t);
244 #ifdef HN_IFSTART_SUPPORT
245 static void hn_start(struct ifnet *);
247 static int hn_transmit(struct ifnet *, struct mbuf *);
248 static void hn_xmit_qflush(struct ifnet *);
249 static int hn_ifmedia_upd(struct ifnet *);
250 static void hn_ifmedia_sts(struct ifnet *,
251 struct ifmediareq *);
253 static int hn_rndis_rxinfo(const void *, int,
255 static void hn_rndis_rx_data(struct hn_rx_ring *,
257 static void hn_rndis_rx_status(struct hn_softc *,
260 static void hn_nvs_handle_notify(struct hn_softc *,
261 const struct vmbus_chanpkt_hdr *);
262 static void hn_nvs_handle_comp(struct hn_softc *,
263 struct vmbus_channel *,
264 const struct vmbus_chanpkt_hdr *);
265 static void hn_nvs_handle_rxbuf(struct hn_rx_ring *,
266 struct vmbus_channel *,
267 const struct vmbus_chanpkt_hdr *);
268 static void hn_nvs_ack_rxbuf(struct hn_rx_ring *,
269 struct vmbus_channel *, uint64_t);
271 #if __FreeBSD_version >= 1100099
272 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS);
273 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS);
275 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS);
276 static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS);
277 #if __FreeBSD_version < 1100095
278 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS);
280 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS);
282 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
283 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
284 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS);
285 static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS);
286 static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS);
287 static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS);
288 static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS);
289 static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS);
290 static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS);
291 static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS);
292 static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS);
293 static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS);
294 static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS);
295 static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS);
296 static int hn_polling_sysctl(SYSCTL_HANDLER_ARGS);
298 static void hn_stop(struct hn_softc *, bool);
299 static void hn_init_locked(struct hn_softc *);
300 static int hn_chan_attach(struct hn_softc *,
301 struct vmbus_channel *);
302 static void hn_chan_detach(struct hn_softc *,
303 struct vmbus_channel *);
304 static int hn_attach_subchans(struct hn_softc *);
305 static void hn_detach_allchans(struct hn_softc *);
306 static void hn_chan_rollup(struct hn_rx_ring *,
307 struct hn_tx_ring *);
308 static void hn_set_ring_inuse(struct hn_softc *, int);
309 static int hn_synth_attach(struct hn_softc *, int);
310 static void hn_synth_detach(struct hn_softc *);
311 static int hn_synth_alloc_subchans(struct hn_softc *,
313 static bool hn_synth_attachable(const struct hn_softc *);
314 static void hn_suspend(struct hn_softc *);
315 static void hn_suspend_data(struct hn_softc *);
316 static void hn_suspend_mgmt(struct hn_softc *);
317 static void hn_resume(struct hn_softc *);
318 static void hn_resume_data(struct hn_softc *);
319 static void hn_resume_mgmt(struct hn_softc *);
320 static void hn_suspend_mgmt_taskfunc(void *, int);
321 static void hn_chan_drain(struct hn_softc *,
322 struct vmbus_channel *);
323 static void hn_polling(struct hn_softc *, u_int);
324 static void hn_chan_polling(struct vmbus_channel *, u_int);
326 static void hn_update_link_status(struct hn_softc *);
327 static void hn_change_network(struct hn_softc *);
328 static void hn_link_taskfunc(void *, int);
329 static void hn_netchg_init_taskfunc(void *, int);
330 static void hn_netchg_status_taskfunc(void *, int);
331 static void hn_link_status(struct hn_softc *);
333 static int hn_create_rx_data(struct hn_softc *, int);
334 static void hn_destroy_rx_data(struct hn_softc *);
335 static int hn_check_iplen(const struct mbuf *, int);
336 static int hn_set_rxfilter(struct hn_softc *, uint32_t);
337 static int hn_rxfilter_config(struct hn_softc *);
338 static int hn_rss_reconfig(struct hn_softc *);
339 static void hn_rss_ind_fixup(struct hn_softc *);
340 static int hn_rxpkt(struct hn_rx_ring *, const void *,
341 int, const struct hn_rxinfo *);
343 static int hn_tx_ring_create(struct hn_softc *, int);
344 static void hn_tx_ring_destroy(struct hn_tx_ring *);
345 static int hn_create_tx_data(struct hn_softc *, int);
346 static void hn_fixup_tx_data(struct hn_softc *);
347 static void hn_destroy_tx_data(struct hn_softc *);
348 static void hn_txdesc_dmamap_destroy(struct hn_txdesc *);
349 static void hn_txdesc_gc(struct hn_tx_ring *,
351 static int hn_encap(struct ifnet *, struct hn_tx_ring *,
352 struct hn_txdesc *, struct mbuf **);
353 static int hn_txpkt(struct ifnet *, struct hn_tx_ring *,
355 static void hn_set_chim_size(struct hn_softc *, int);
356 static void hn_set_tso_maxsize(struct hn_softc *, int, int);
357 static bool hn_tx_ring_pending(struct hn_tx_ring *);
358 static void hn_tx_ring_qflush(struct hn_tx_ring *);
359 static void hn_resume_tx(struct hn_softc *, int);
360 static void hn_set_txagg(struct hn_softc *);
361 static void *hn_try_txagg(struct ifnet *,
362 struct hn_tx_ring *, struct hn_txdesc *,
364 static int hn_get_txswq_depth(const struct hn_tx_ring *);
365 static void hn_txpkt_done(struct hn_nvs_sendctx *,
366 struct hn_softc *, struct vmbus_channel *,
368 static int hn_txpkt_sglist(struct hn_tx_ring *,
370 static int hn_txpkt_chim(struct hn_tx_ring *,
372 static int hn_xmit(struct hn_tx_ring *, int);
373 static void hn_xmit_taskfunc(void *, int);
374 static void hn_xmit_txeof(struct hn_tx_ring *);
375 static void hn_xmit_txeof_taskfunc(void *, int);
376 #ifdef HN_IFSTART_SUPPORT
377 static int hn_start_locked(struct hn_tx_ring *, int);
378 static void hn_start_taskfunc(void *, int);
379 static void hn_start_txeof(struct hn_tx_ring *);
380 static void hn_start_txeof_taskfunc(void *, int);
383 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
384 "Hyper-V network interface");
386 /* Trust tcp segements verification on host side. */
387 static int hn_trust_hosttcp = 1;
388 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN,
389 &hn_trust_hosttcp, 0,
390 "Trust tcp segement verification on host side, "
391 "when csum info is missing (global setting)");
393 /* Trust udp datagrams verification on host side. */
394 static int hn_trust_hostudp = 1;
395 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN,
396 &hn_trust_hostudp, 0,
397 "Trust udp datagram verification on host side, "
398 "when csum info is missing (global setting)");
400 /* Trust ip packets verification on host side. */
401 static int hn_trust_hostip = 1;
402 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN,
404 "Trust ip packet verification on host side, "
405 "when csum info is missing (global setting)");
407 /* Limit TSO burst size */
408 static int hn_tso_maxlen = IP_MAXPACKET;
409 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
410 &hn_tso_maxlen, 0, "TSO burst limit");
412 /* Limit chimney send size */
413 static int hn_tx_chimney_size = 0;
414 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN,
415 &hn_tx_chimney_size, 0, "Chimney send packet size limit");
417 /* Limit the size of packet for direct transmission */
418 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF;
419 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN,
420 &hn_direct_tx_size, 0, "Size of the packet for direct transmission");
422 /* # of LRO entries per RX ring */
423 #if defined(INET) || defined(INET6)
424 #if __FreeBSD_version >= 1100095
425 static int hn_lro_entry_count = HN_LROENT_CNT_DEF;
426 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
427 &hn_lro_entry_count, 0, "LRO entry count");
431 static int hn_tx_taskq_cnt = 1;
432 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_cnt, CTLFLAG_RDTUN,
433 &hn_tx_taskq_cnt, 0, "# of TX taskqueues");
435 #define HN_TX_TASKQ_M_INDEP 0
436 #define HN_TX_TASKQ_M_GLOBAL 1
437 #define HN_TX_TASKQ_M_EVTTQ 2
439 static int hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP;
440 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_mode, CTLFLAG_RDTUN,
441 &hn_tx_taskq_mode, 0, "TX taskqueue modes: "
442 "0 - independent, 1 - share global tx taskqs, 2 - share event taskqs");
444 #ifndef HN_USE_TXDESC_BUFRING
445 static int hn_use_txdesc_bufring = 0;
447 static int hn_use_txdesc_bufring = 1;
449 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD,
450 &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors");
452 #ifdef HN_IFSTART_SUPPORT
453 /* Use ifnet.if_start instead of ifnet.if_transmit */
454 static int hn_use_if_start = 0;
455 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN,
456 &hn_use_if_start, 0, "Use if_start TX method");
459 /* # of channels to use */
460 static int hn_chan_cnt = 0;
461 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
463 "# of channels to use; each channel has one RX ring and one TX ring");
465 /* # of transmit rings to use */
466 static int hn_tx_ring_cnt = 0;
467 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN,
468 &hn_tx_ring_cnt, 0, "# of TX rings to use");
470 /* Software TX ring deptch */
471 static int hn_tx_swq_depth = 0;
472 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN,
473 &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING");
475 /* Enable sorted LRO, and the depth of the per-channel mbuf queue */
476 #if __FreeBSD_version >= 1100095
477 static u_int hn_lro_mbufq_depth = 0;
478 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
479 &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue");
482 /* Packet transmission aggregation size limit */
483 static int hn_tx_agg_size = -1;
484 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN,
485 &hn_tx_agg_size, 0, "Packet transmission aggregation size limit");
487 /* Packet transmission aggregation count limit */
488 static int hn_tx_agg_pkts = -1;
489 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN,
490 &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit");
492 static u_int hn_cpu_index; /* next CPU for channel */
493 static struct taskqueue **hn_tx_taskque;/* shared TX taskqueues */
496 hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = {
497 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
498 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
499 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
500 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
501 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
504 static device_method_t hn_methods[] = {
505 /* Device interface */
506 DEVMETHOD(device_probe, hn_probe),
507 DEVMETHOD(device_attach, hn_attach),
508 DEVMETHOD(device_detach, hn_detach),
509 DEVMETHOD(device_shutdown, hn_shutdown),
513 static driver_t hn_driver = {
516 sizeof(struct hn_softc)
519 static devclass_t hn_devclass;
521 DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0);
522 MODULE_VERSION(hn, 1);
523 MODULE_DEPEND(hn, vmbus, 1, 1, 1);
525 #if __FreeBSD_version >= 1100099
527 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim)
531 for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
532 sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim;
537 hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd)
540 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
541 txd->chim_size == 0, ("invalid rndis sglist txd"));
542 return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA,
543 &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt));
547 hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd)
549 struct hn_nvs_rndis rndis;
551 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID &&
552 txd->chim_size > 0, ("invalid rndis chim txd"));
554 rndis.nvs_type = HN_NVS_TYPE_RNDIS;
555 rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA;
556 rndis.nvs_chim_idx = txd->chim_index;
557 rndis.nvs_chim_sz = txd->chim_size;
559 return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC,
560 &rndis, sizeof(rndis), &txd->send_ctx));
563 static __inline uint32_t
564 hn_chim_alloc(struct hn_softc *sc)
566 int i, bmap_cnt = sc->hn_chim_bmap_cnt;
567 u_long *bmap = sc->hn_chim_bmap;
568 uint32_t ret = HN_NVS_CHIM_IDX_INVALID;
570 for (i = 0; i < bmap_cnt; ++i) {
573 idx = ffsl(~bmap[i]);
577 --idx; /* ffsl is 1-based */
578 KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt,
579 ("invalid i %d and idx %d", i, idx));
581 if (atomic_testandset_long(&bmap[i], idx))
584 ret = i * LONG_BIT + idx;
591 hn_chim_free(struct hn_softc *sc, uint32_t chim_idx)
596 idx = chim_idx / LONG_BIT;
597 KASSERT(idx < sc->hn_chim_bmap_cnt,
598 ("invalid chimney index 0x%x", chim_idx));
600 mask = 1UL << (chim_idx % LONG_BIT);
601 KASSERT(sc->hn_chim_bmap[idx] & mask,
602 ("index bitmap 0x%lx, chimney index %u, "
603 "bitmap idx %d, bitmask 0x%lx",
604 sc->hn_chim_bmap[idx], chim_idx, idx, mask));
606 atomic_clear_long(&sc->hn_chim_bmap[idx], mask);
609 #if defined(INET6) || defined(INET)
611 * NOTE: If this function failed, the m_head would be freed.
613 static __inline struct mbuf *
614 hn_tso_fixup(struct mbuf *m_head)
616 struct ether_vlan_header *evl;
620 KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable"));
622 #define PULLUP_HDR(m, len) \
624 if (__predict_false((m)->m_len < (len))) { \
625 (m) = m_pullup((m), (len)); \
631 PULLUP_HDR(m_head, sizeof(*evl));
632 evl = mtod(m_head, struct ether_vlan_header *);
633 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN))
634 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
636 ehlen = ETHER_HDR_LEN;
639 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
643 PULLUP_HDR(m_head, ehlen + sizeof(*ip));
644 ip = mtodo(m_head, ehlen);
645 iphlen = ip->ip_hl << 2;
647 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th));
648 th = mtodo(m_head, ehlen + iphlen);
652 th->th_sum = in_pseudo(ip->ip_src.s_addr,
653 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
656 #if defined(INET6) && defined(INET)
663 PULLUP_HDR(m_head, ehlen + sizeof(*ip6));
664 ip6 = mtodo(m_head, ehlen);
665 if (ip6->ip6_nxt != IPPROTO_TCP) {
670 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th));
671 th = mtodo(m_head, ehlen + sizeof(*ip6));
674 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
681 #endif /* INET6 || INET */
684 hn_set_rxfilter(struct hn_softc *sc, uint32_t filter)
690 if (sc->hn_rx_filter != filter) {
691 error = hn_rndis_set_rxfilter(sc, filter);
693 sc->hn_rx_filter = filter;
699 hn_rxfilter_config(struct hn_softc *sc)
701 struct ifnet *ifp = sc->hn_ifp;
706 if ((ifp->if_flags & IFF_PROMISC) ||
707 (sc->hn_flags & HN_FLAG_VF)) {
708 filter = NDIS_PACKET_TYPE_PROMISCUOUS;
710 filter = NDIS_PACKET_TYPE_DIRECTED;
711 if (ifp->if_flags & IFF_BROADCAST)
712 filter |= NDIS_PACKET_TYPE_BROADCAST;
713 /* TODO: support multicast list */
714 if ((ifp->if_flags & IFF_ALLMULTI) ||
715 !TAILQ_EMPTY(&ifp->if_multiaddrs))
716 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
718 return (hn_set_rxfilter(sc, filter));
722 hn_set_txagg(struct hn_softc *sc)
728 * Setup aggregation size.
730 if (sc->hn_agg_size < 0)
733 size = sc->hn_agg_size;
735 if (sc->hn_rndis_agg_size < size)
736 size = sc->hn_rndis_agg_size;
738 /* NOTE: We only aggregate packets using chimney sending buffers. */
739 if (size > (uint32_t)sc->hn_chim_szmax)
740 size = sc->hn_chim_szmax;
742 if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) {
749 /* NOTE: Type of the per TX ring setting is 'int'. */
754 * Setup aggregation packet count.
756 if (sc->hn_agg_pkts < 0)
759 pkts = sc->hn_agg_pkts;
761 if (sc->hn_rndis_agg_pkts < pkts)
762 pkts = sc->hn_rndis_agg_pkts;
771 /* NOTE: Type of the per TX ring setting is 'short'. */
776 /* NOTE: Type of the per TX ring setting is 'short'. */
777 if (sc->hn_rndis_agg_align > SHRT_MAX) {
784 if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n",
785 size, pkts, sc->hn_rndis_agg_align);
788 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
789 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
791 mtx_lock(&txr->hn_tx_lock);
792 txr->hn_agg_szmax = size;
793 txr->hn_agg_pktmax = pkts;
794 txr->hn_agg_align = sc->hn_rndis_agg_align;
795 mtx_unlock(&txr->hn_tx_lock);
800 hn_get_txswq_depth(const struct hn_tx_ring *txr)
803 KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet"));
804 if (hn_tx_swq_depth < txr->hn_txdesc_cnt)
805 return txr->hn_txdesc_cnt;
806 return hn_tx_swq_depth;
810 hn_rss_reconfig(struct hn_softc *sc)
816 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
823 * Direct reconfiguration by setting the UNCHG flags does
824 * _not_ work properly.
827 if_printf(sc->hn_ifp, "disable RSS\n");
828 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE);
830 if_printf(sc->hn_ifp, "RSS disable failed\n");
835 * Reenable the RSS w/ the updated RSS key or indirect
839 if_printf(sc->hn_ifp, "reconfig RSS\n");
840 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
842 if_printf(sc->hn_ifp, "RSS reconfig failed\n");
849 hn_rss_ind_fixup(struct hn_softc *sc)
851 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
854 nchan = sc->hn_rx_ring_inuse;
855 KASSERT(nchan > 1, ("invalid # of channels %d", nchan));
858 * Check indirect table to make sure that all channels in it
861 for (i = 0; i < NDIS_HASH_INDCNT; ++i) {
862 if (rss->rss_ind[i] >= nchan) {
863 if_printf(sc->hn_ifp,
864 "RSS indirect table %d fixup: %u -> %d\n",
865 i, rss->rss_ind[i], nchan - 1);
866 rss->rss_ind[i] = nchan - 1;
872 hn_ifmedia_upd(struct ifnet *ifp __unused)
879 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
881 struct hn_softc *sc = ifp->if_softc;
883 ifmr->ifm_status = IFM_AVALID;
884 ifmr->ifm_active = IFM_ETHER;
886 if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) {
887 ifmr->ifm_active |= IFM_NONE;
890 ifmr->ifm_status |= IFM_ACTIVE;
891 ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
895 hn_update_vf_task(void *arg, int pending __unused)
897 struct hn_update_vf *uv = arg;
899 uv->rxr->hn_vf = uv->vf;
903 hn_update_vf(struct hn_softc *sc, struct ifnet *vf)
905 struct hn_rx_ring *rxr;
906 struct hn_update_vf uv;
912 TASK_INIT(&task, 0, hn_update_vf_task, &uv);
914 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
915 rxr = &sc->hn_rx_ring[i];
917 if (i < sc->hn_rx_ring_inuse) {
920 vmbus_chan_run_task(rxr->hn_chan, &task);
928 hn_set_vf(struct hn_softc *sc, struct ifnet *ifp, bool vf)
930 struct ifnet *hn_ifp;
934 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
942 if (ifp->if_alloctype != IFT_ETHER)
945 /* Ignore lagg/vlan interfaces */
946 if (strcmp(ifp->if_dname, "lagg") == 0 ||
947 strcmp(ifp->if_dname, "vlan") == 0)
950 if (bcmp(IF_LLADDR(ifp), IF_LLADDR(hn_ifp), ETHER_ADDR_LEN) != 0)
953 /* Now we're sure 'ifp' is a real VF device. */
955 if (sc->hn_flags & HN_FLAG_VF)
958 sc->hn_flags |= HN_FLAG_VF;
959 hn_rxfilter_config(sc);
961 if (!(sc->hn_flags & HN_FLAG_VF))
964 sc->hn_flags &= ~HN_FLAG_VF;
965 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING)
966 hn_rxfilter_config(sc);
968 hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE);
971 hn_nvs_set_datapath(sc,
972 vf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTHETIC);
974 hn_update_vf(sc, vf ? ifp : NULL);
979 ~(HN_LINK_FLAG_LINKUP | HN_LINK_FLAG_NETCHG);
980 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN);
986 if_printf(hn_ifp, "Data path is switched %s %s\n",
987 vf ? "to" : "from", if_name(ifp));
993 hn_ifnet_event(void *arg, struct ifnet *ifp, int event)
995 if (event != IFNET_EVENT_UP && event != IFNET_EVENT_DOWN)
998 hn_set_vf(arg, ifp, event == IFNET_EVENT_UP);
1002 hn_ifaddr_event(void *arg, struct ifnet *ifp)
1004 hn_set_vf(arg, ifp, ifp->if_flags & IFF_UP);
1007 /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */
1008 static const struct hyperv_guid g_net_vsc_device_type = {
1009 .hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
1010 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
1014 hn_probe(device_t dev)
1017 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev,
1018 &g_net_vsc_device_type) == 0) {
1019 device_set_desc(dev, "Hyper-V Network Interface");
1020 return BUS_PROBE_DEFAULT;
1026 hn_attach(device_t dev)
1028 struct hn_softc *sc = device_get_softc(dev);
1029 struct sysctl_oid_list *child;
1030 struct sysctl_ctx_list *ctx;
1031 uint8_t eaddr[ETHER_ADDR_LEN];
1032 struct ifnet *ifp = NULL;
1033 int error, ring_cnt, tx_ring_cnt;
1036 sc->hn_prichan = vmbus_get_channel(dev);
1040 * Initialize these tunables once.
1042 sc->hn_agg_size = hn_tx_agg_size;
1043 sc->hn_agg_pkts = hn_tx_agg_pkts;
1046 * Setup taskqueue for transmission.
1048 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_INDEP) {
1052 malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *),
1053 M_DEVBUF, M_WAITOK);
1054 for (i = 0; i < hn_tx_taskq_cnt; ++i) {
1055 sc->hn_tx_taskqs[i] = taskqueue_create("hn_tx",
1056 M_WAITOK, taskqueue_thread_enqueue,
1057 &sc->hn_tx_taskqs[i]);
1058 taskqueue_start_threads(&sc->hn_tx_taskqs[i], 1, PI_NET,
1059 "%s tx%d", device_get_nameunit(dev), i);
1061 } else if (hn_tx_taskq_mode == HN_TX_TASKQ_M_GLOBAL) {
1062 sc->hn_tx_taskqs = hn_tx_taskque;
1066 * Setup taskqueue for mangement tasks, e.g. link status.
1068 sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK,
1069 taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0);
1070 taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt",
1071 device_get_nameunit(dev));
1072 TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc);
1073 TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc);
1074 TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0,
1075 hn_netchg_status_taskfunc, sc);
1078 * Allocate ifnet and setup its name earlier, so that if_printf
1079 * can be used by functions, which will be called after
1082 ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER);
1084 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1087 * Initialize ifmedia earlier so that it can be unconditionally
1088 * destroyed, if error happened later on.
1090 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts);
1093 * Figure out the # of RX rings (ring_cnt) and the # of TX rings
1094 * to use (tx_ring_cnt).
1097 * The # of RX rings to use is same as the # of channels to use.
1099 ring_cnt = hn_chan_cnt;
1100 if (ring_cnt <= 0) {
1102 ring_cnt = mp_ncpus;
1103 if (ring_cnt > HN_RING_CNT_DEF_MAX)
1104 ring_cnt = HN_RING_CNT_DEF_MAX;
1105 } else if (ring_cnt > mp_ncpus) {
1106 ring_cnt = mp_ncpus;
1109 tx_ring_cnt = hn_tx_ring_cnt;
1110 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt)
1111 tx_ring_cnt = ring_cnt;
1112 #ifdef HN_IFSTART_SUPPORT
1113 if (hn_use_if_start) {
1114 /* ifnet.if_start only needs one TX ring. */
1120 * Set the leader CPU for channels.
1122 sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus;
1125 * Create enough TX/RX rings, even if only limited number of
1126 * channels can be allocated.
1128 error = hn_create_tx_data(sc, tx_ring_cnt);
1131 error = hn_create_rx_data(sc, ring_cnt);
1136 * Create transaction context for NVS and RNDIS transactions.
1138 sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev),
1139 HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0);
1140 if (sc->hn_xact == NULL) {
1146 * Install orphan handler for the revocation of this device's
1150 * The processing order is critical here:
1151 * Install the orphan handler, _before_ testing whether this
1152 * device's primary channel has been revoked or not.
1154 vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact);
1155 if (vmbus_chan_is_revoked(sc->hn_prichan)) {
1161 * Attach the synthetic parts, i.e. NVS and RNDIS.
1163 error = hn_synth_attach(sc, ETHERMTU);
1167 error = hn_rndis_get_eaddr(sc, eaddr);
1171 #if __FreeBSD_version >= 1100099
1172 if (sc->hn_rx_ring_inuse > 1) {
1174 * Reduce TCP segment aggregation limit for multiple
1175 * RX rings to increase ACK timeliness.
1177 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF);
1182 * Fixup TX stuffs after synthetic parts are attached.
1184 hn_fixup_tx_data(sc);
1186 ctx = device_get_sysctl_ctx(dev);
1187 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
1188 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD,
1189 &sc->hn_nvs_ver, 0, "NVS version");
1190 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version",
1191 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1192 hn_ndis_version_sysctl, "A", "NDIS version");
1193 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps",
1194 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1195 hn_caps_sysctl, "A", "capabilities");
1196 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist",
1197 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1198 hn_hwassist_sysctl, "A", "hwassist");
1199 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter",
1200 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1201 hn_rxfilter_sysctl, "A", "rxfilter");
1202 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash",
1203 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1204 hn_rss_hash_sysctl, "A", "RSS hash");
1205 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size",
1206 CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count");
1207 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key",
1208 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1209 hn_rss_key_sysctl, "IU", "RSS key");
1210 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind",
1211 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1212 hn_rss_ind_sysctl, "IU", "RSS indirect table");
1213 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size",
1214 CTLFLAG_RD, &sc->hn_rndis_agg_size, 0,
1215 "RNDIS offered packet transmission aggregation size limit");
1216 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts",
1217 CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0,
1218 "RNDIS offered packet transmission aggregation count limit");
1219 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align",
1220 CTLFLAG_RD, &sc->hn_rndis_agg_align, 0,
1221 "RNDIS packet transmission aggregation alignment");
1222 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size",
1223 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1224 hn_txagg_size_sysctl, "I",
1225 "Packet transmission aggregation size, 0 -- disable, -1 -- auto");
1226 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts",
1227 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1228 hn_txagg_pkts_sysctl, "I",
1229 "Packet transmission aggregation packets, "
1230 "0 -- disable, -1 -- auto");
1231 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "polling",
1232 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1233 hn_polling_sysctl, "I",
1234 "Polling frequency: [100,1000000], 0 disable polling");
1237 * Setup the ifmedia, which has been initialized earlier.
1239 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1240 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO);
1241 /* XXX ifmedia_set really should do this for us */
1242 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media;
1245 * Setup the ifnet for this interface.
1249 ifp->if_baudrate = IF_Gbps(10);
1251 /* if_baudrate is 32bits on 32bit system. */
1252 ifp->if_baudrate = IF_Gbps(1);
1254 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1255 ifp->if_ioctl = hn_ioctl;
1256 ifp->if_init = hn_init;
1257 #ifdef HN_IFSTART_SUPPORT
1258 if (hn_use_if_start) {
1259 int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]);
1261 ifp->if_start = hn_start;
1262 IFQ_SET_MAXLEN(&ifp->if_snd, qdepth);
1263 ifp->if_snd.ifq_drv_maxlen = qdepth - 1;
1264 IFQ_SET_READY(&ifp->if_snd);
1268 ifp->if_transmit = hn_transmit;
1269 ifp->if_qflush = hn_xmit_qflush;
1272 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO;
1274 /* We can't diff IPv6 packets from IPv4 packets on RX path. */
1275 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6;
1277 if (sc->hn_caps & HN_CAP_VLAN) {
1278 /* XXX not sure about VLAN_MTU. */
1279 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
1282 ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist;
1283 if (ifp->if_hwassist & HN_CSUM_IP_MASK)
1284 ifp->if_capabilities |= IFCAP_TXCSUM;
1285 if (ifp->if_hwassist & HN_CSUM_IP6_MASK)
1286 ifp->if_capabilities |= IFCAP_TXCSUM_IPV6;
1287 if (sc->hn_caps & HN_CAP_TSO4) {
1288 ifp->if_capabilities |= IFCAP_TSO4;
1289 ifp->if_hwassist |= CSUM_IP_TSO;
1291 if (sc->hn_caps & HN_CAP_TSO6) {
1292 ifp->if_capabilities |= IFCAP_TSO6;
1293 ifp->if_hwassist |= CSUM_IP6_TSO;
1296 /* Enable all available capabilities by default. */
1297 ifp->if_capenable = ifp->if_capabilities;
1300 * Disable IPv6 TSO and TXCSUM by default, they still can
1301 * be enabled through SIOCSIFCAP.
1303 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6);
1304 ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO);
1306 if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) {
1307 hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU);
1308 ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX;
1309 ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
1312 ether_ifattach(ifp, eaddr);
1314 if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) {
1315 if_printf(ifp, "TSO segcnt %u segsz %u\n",
1316 ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize);
1319 /* Inform the upper layer about the long frame support. */
1320 ifp->if_hdrlen = sizeof(struct ether_vlan_header);
1323 * Kick off link status check.
1325 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
1326 hn_update_link_status(sc);
1328 sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event,
1329 hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY);
1331 sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event,
1332 hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY);
1336 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)
1337 hn_synth_detach(sc);
1343 hn_detach(device_t dev)
1345 struct hn_softc *sc = device_get_softc(dev);
1346 struct ifnet *ifp = sc->hn_ifp;
1348 if (sc->hn_ifaddr_evthand != NULL)
1349 EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand);
1350 if (sc->hn_ifnet_evthand != NULL)
1351 EVENTHANDLER_DEREGISTER(ifnet_event, sc->hn_ifnet_evthand);
1353 if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) {
1355 * In case that the vmbus missed the orphan handler
1358 vmbus_xact_ctx_orphan(sc->hn_xact);
1361 if (device_is_attached(dev)) {
1363 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
1364 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1368 * hn_stop() only suspends data, so managment
1369 * stuffs have to be suspended manually here.
1371 hn_suspend_mgmt(sc);
1372 hn_synth_detach(sc);
1375 ether_ifdetach(ifp);
1378 ifmedia_removeall(&sc->hn_media);
1379 hn_destroy_rx_data(sc);
1380 hn_destroy_tx_data(sc);
1382 if (sc->hn_tx_taskqs != NULL && sc->hn_tx_taskqs != hn_tx_taskque) {
1385 for (i = 0; i < hn_tx_taskq_cnt; ++i)
1386 taskqueue_free(sc->hn_tx_taskqs[i]);
1387 free(sc->hn_tx_taskqs, M_DEVBUF);
1389 taskqueue_free(sc->hn_mgmt_taskq0);
1391 if (sc->hn_xact != NULL) {
1393 * Uninstall the orphan handler _before_ the xact is
1396 vmbus_chan_unset_orphan(sc->hn_prichan);
1397 vmbus_xact_ctx_destroy(sc->hn_xact);
1402 HN_LOCK_DESTROY(sc);
1407 hn_shutdown(device_t dev)
1414 hn_link_status(struct hn_softc *sc)
1416 uint32_t link_status;
1419 error = hn_rndis_get_linkstatus(sc, &link_status);
1421 /* XXX what to do? */
1425 if (link_status == NDIS_MEDIA_STATE_CONNECTED)
1426 sc->hn_link_flags |= HN_LINK_FLAG_LINKUP;
1428 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
1429 if_link_state_change(sc->hn_ifp,
1430 (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ?
1431 LINK_STATE_UP : LINK_STATE_DOWN);
1435 hn_link_taskfunc(void *xsc, int pending __unused)
1437 struct hn_softc *sc = xsc;
1439 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
1445 hn_netchg_init_taskfunc(void *xsc, int pending __unused)
1447 struct hn_softc *sc = xsc;
1449 /* Prevent any link status checks from running. */
1450 sc->hn_link_flags |= HN_LINK_FLAG_NETCHG;
1453 * Fake up a [link down --> link up] state change; 5 seconds
1454 * delay is used, which closely simulates miibus reaction
1455 * upon link down event.
1457 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
1458 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN);
1459 taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0,
1460 &sc->hn_netchg_status, 5 * hz);
1464 hn_netchg_status_taskfunc(void *xsc, int pending __unused)
1466 struct hn_softc *sc = xsc;
1468 /* Re-allow link status checks. */
1469 sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG;
1474 hn_update_link_status(struct hn_softc *sc)
1477 if (sc->hn_mgmt_taskq != NULL)
1478 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task);
1482 hn_change_network(struct hn_softc *sc)
1485 if (sc->hn_mgmt_taskq != NULL)
1486 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init);
1490 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd,
1491 struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs)
1493 struct mbuf *m = *m_head;
1496 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim"));
1498 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap,
1499 m, segs, nsegs, BUS_DMA_NOWAIT);
1500 if (error == EFBIG) {
1503 m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX);
1507 *m_head = m = m_new;
1508 txr->hn_tx_collapsed++;
1510 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag,
1511 txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT);
1514 bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap,
1515 BUS_DMASYNC_PREWRITE);
1516 txd->flags |= HN_TXD_FLAG_DMAMAP;
1522 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd)
1525 KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
1526 ("put an onlist txd %#x", txd->flags));
1527 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
1528 ("put an onagg txd %#x", txd->flags));
1530 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
1531 if (atomic_fetchadd_int(&txd->refs, -1) != 1)
1534 if (!STAILQ_EMPTY(&txd->agg_list)) {
1535 struct hn_txdesc *tmp_txd;
1537 while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) {
1540 KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list),
1541 ("resursive aggregation on aggregated txdesc"));
1542 KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG),
1543 ("not aggregated txdesc"));
1544 KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
1545 ("aggregated txdesc uses dmamap"));
1546 KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
1547 ("aggregated txdesc consumes "
1548 "chimney sending buffer"));
1549 KASSERT(tmp_txd->chim_size == 0,
1550 ("aggregated txdesc has non-zero "
1551 "chimney sending size"));
1553 STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link);
1554 tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG;
1555 freed = hn_txdesc_put(txr, tmp_txd);
1556 KASSERT(freed, ("failed to free aggregated txdesc"));
1560 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) {
1561 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
1562 ("chim txd uses dmamap"));
1563 hn_chim_free(txr->hn_sc, txd->chim_index);
1564 txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
1566 } else if (txd->flags & HN_TXD_FLAG_DMAMAP) {
1567 bus_dmamap_sync(txr->hn_tx_data_dtag,
1568 txd->data_dmap, BUS_DMASYNC_POSTWRITE);
1569 bus_dmamap_unload(txr->hn_tx_data_dtag,
1571 txd->flags &= ~HN_TXD_FLAG_DMAMAP;
1574 if (txd->m != NULL) {
1579 txd->flags |= HN_TXD_FLAG_ONLIST;
1580 #ifndef HN_USE_TXDESC_BUFRING
1581 mtx_lock_spin(&txr->hn_txlist_spin);
1582 KASSERT(txr->hn_txdesc_avail >= 0 &&
1583 txr->hn_txdesc_avail < txr->hn_txdesc_cnt,
1584 ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail));
1585 txr->hn_txdesc_avail++;
1586 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
1587 mtx_unlock_spin(&txr->hn_txlist_spin);
1588 #else /* HN_USE_TXDESC_BUFRING */
1590 atomic_add_int(&txr->hn_txdesc_avail, 1);
1592 buf_ring_enqueue(txr->hn_txdesc_br, txd);
1593 #endif /* !HN_USE_TXDESC_BUFRING */
1598 static __inline struct hn_txdesc *
1599 hn_txdesc_get(struct hn_tx_ring *txr)
1601 struct hn_txdesc *txd;
1603 #ifndef HN_USE_TXDESC_BUFRING
1604 mtx_lock_spin(&txr->hn_txlist_spin);
1605 txd = SLIST_FIRST(&txr->hn_txlist);
1607 KASSERT(txr->hn_txdesc_avail > 0,
1608 ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail));
1609 txr->hn_txdesc_avail--;
1610 SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
1612 mtx_unlock_spin(&txr->hn_txlist_spin);
1614 txd = buf_ring_dequeue_sc(txr->hn_txdesc_br);
1618 #ifdef HN_USE_TXDESC_BUFRING
1620 atomic_subtract_int(&txr->hn_txdesc_avail, 1);
1622 #endif /* HN_USE_TXDESC_BUFRING */
1623 KASSERT(txd->m == NULL && txd->refs == 0 &&
1624 STAILQ_EMPTY(&txd->agg_list) &&
1625 txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
1626 txd->chim_size == 0 &&
1627 (txd->flags & HN_TXD_FLAG_ONLIST) &&
1628 (txd->flags & HN_TXD_FLAG_ONAGG) == 0 &&
1629 (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd"));
1630 txd->flags &= ~HN_TXD_FLAG_ONLIST;
1636 static __inline void
1637 hn_txdesc_hold(struct hn_txdesc *txd)
1640 /* 0->1 transition will never work */
1641 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
1642 atomic_add_int(&txd->refs, 1);
1645 static __inline void
1646 hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd)
1649 KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0,
1650 ("recursive aggregation on aggregating txdesc"));
1652 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
1653 ("already aggregated"));
1654 KASSERT(STAILQ_EMPTY(&txd->agg_list),
1655 ("recursive aggregation on to-be-aggregated txdesc"));
1657 txd->flags |= HN_TXD_FLAG_ONAGG;
1658 STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link);
1662 hn_tx_ring_pending(struct hn_tx_ring *txr)
1664 bool pending = false;
1666 #ifndef HN_USE_TXDESC_BUFRING
1667 mtx_lock_spin(&txr->hn_txlist_spin);
1668 if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt)
1670 mtx_unlock_spin(&txr->hn_txlist_spin);
1672 if (!buf_ring_full(txr->hn_txdesc_br))
1678 static __inline void
1679 hn_txeof(struct hn_tx_ring *txr)
1681 txr->hn_has_txeof = 0;
1686 hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc,
1687 struct vmbus_channel *chan, const void *data __unused, int dlen __unused)
1689 struct hn_txdesc *txd = sndc->hn_cbarg;
1690 struct hn_tx_ring *txr;
1693 KASSERT(txr->hn_chan == chan,
1694 ("channel mismatch, on chan%u, should be chan%u",
1695 vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan)));
1697 txr->hn_has_txeof = 1;
1698 hn_txdesc_put(txr, txd);
1700 ++txr->hn_txdone_cnt;
1701 if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) {
1702 txr->hn_txdone_cnt = 0;
1703 if (txr->hn_oactive)
1709 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr)
1711 #if defined(INET) || defined(INET6)
1712 struct lro_ctrl *lro = &rxr->hn_lro;
1713 struct lro_entry *queued;
1715 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1716 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1717 tcp_lro_flush(lro, queued);
1723 * 'txr' could be NULL, if multiple channels and
1724 * ifnet.if_start method are enabled.
1726 if (txr == NULL || !txr->hn_has_txeof)
1729 txr->hn_txdone_cnt = 0;
1733 static __inline uint32_t
1734 hn_rndis_pktmsg_offset(uint32_t ofs)
1737 KASSERT(ofs >= sizeof(struct rndis_packet_msg),
1738 ("invalid RNDIS packet msg offset %u", ofs));
1739 return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset));
1742 static __inline void *
1743 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize,
1744 size_t pi_dlen, uint32_t pi_type)
1746 const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen);
1747 struct rndis_pktinfo *pi;
1749 KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0,
1750 ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen));
1753 * Per-packet-info does not move; it only grows.
1756 * rm_pktinfooffset in this phase counts from the beginning
1757 * of rndis_packet_msg.
1759 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize,
1760 ("%u pktinfo overflows RNDIS packet msg", pi_type));
1761 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset +
1762 pkt->rm_pktinfolen);
1763 pkt->rm_pktinfolen += pi_size;
1765 pi->rm_size = pi_size;
1766 pi->rm_type = pi_type;
1767 pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET;
1769 /* Data immediately follow per-packet-info. */
1770 pkt->rm_dataoffset += pi_size;
1772 /* Update RNDIS packet msg length */
1773 pkt->rm_len += pi_size;
1775 return (pi->rm_data);
1779 hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr)
1781 struct hn_txdesc *txd;
1785 txd = txr->hn_agg_txd;
1786 KASSERT(txd != NULL, ("no aggregate txdesc"));
1789 * Since hn_txpkt() will reset this temporary stat, save
1790 * it now, so that oerrors can be updated properly, if
1791 * hn_txpkt() ever fails.
1793 pkts = txr->hn_stat_pkts;
1796 * Since txd's mbuf will _not_ be freed upon hn_txpkt()
1797 * failure, save it for later freeing, if hn_txpkt() ever
1801 error = hn_txpkt(ifp, txr, txd);
1802 if (__predict_false(error)) {
1803 /* txd is freed, but m is not. */
1806 txr->hn_flush_failed++;
1807 if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts);
1810 /* Reset all aggregation states. */
1811 txr->hn_agg_txd = NULL;
1812 txr->hn_agg_szleft = 0;
1813 txr->hn_agg_pktleft = 0;
1814 txr->hn_agg_prevpkt = NULL;
1820 hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
1825 if (txr->hn_agg_txd != NULL) {
1826 if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) {
1827 struct hn_txdesc *agg_txd = txr->hn_agg_txd;
1828 struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt;
1832 * Update the previous RNDIS packet's total length,
1833 * it can be increased due to the mandatory alignment
1834 * padding for this RNDIS packet. And update the
1835 * aggregating txdesc's chimney sending buffer size
1839 * Zero-out the padding, as required by the RNDIS spec.
1842 pkt->rm_len = roundup2(olen, txr->hn_agg_align);
1843 agg_txd->chim_size += pkt->rm_len - olen;
1845 /* Link this txdesc to the parent. */
1846 hn_txdesc_agg(agg_txd, txd);
1848 chim = (uint8_t *)pkt + pkt->rm_len;
1849 /* Save the current packet for later fixup. */
1850 txr->hn_agg_prevpkt = chim;
1852 txr->hn_agg_pktleft--;
1853 txr->hn_agg_szleft -= pktsize;
1854 if (txr->hn_agg_szleft <=
1855 HN_PKTSIZE_MIN(txr->hn_agg_align)) {
1857 * Probably can't aggregate more packets,
1858 * flush this aggregating txdesc proactively.
1860 txr->hn_agg_pktleft = 0;
1865 hn_flush_txagg(ifp, txr);
1867 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
1869 txr->hn_tx_chimney_tried++;
1870 txd->chim_index = hn_chim_alloc(txr->hn_sc);
1871 if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID)
1873 txr->hn_tx_chimney++;
1875 chim = txr->hn_sc->hn_chim +
1876 (txd->chim_index * txr->hn_sc->hn_chim_szmax);
1878 if (txr->hn_agg_pktmax > 1 &&
1879 txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) {
1880 txr->hn_agg_txd = txd;
1881 txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1;
1882 txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize;
1883 txr->hn_agg_prevpkt = chim;
1890 * If this function fails, then both txd and m_head0 will be freed.
1893 hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
1894 struct mbuf **m_head0)
1896 bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
1897 int error, nsegs, i;
1898 struct mbuf *m_head = *m_head0;
1899 struct rndis_packet_msg *pkt;
1902 int pkt_hlen, pkt_size;
1904 pkt = txd->rndis_pkt;
1905 pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align);
1906 if (pkt_size < txr->hn_chim_size) {
1907 chim = hn_try_txagg(ifp, txr, txd, pkt_size);
1911 if (txr->hn_agg_txd != NULL)
1912 hn_flush_txagg(ifp, txr);
1915 pkt->rm_type = REMOTE_NDIS_PACKET_MSG;
1916 pkt->rm_len = sizeof(*pkt) + m_head->m_pkthdr.len;
1917 pkt->rm_dataoffset = sizeof(*pkt);
1918 pkt->rm_datalen = m_head->m_pkthdr.len;
1919 pkt->rm_oobdataoffset = 0;
1920 pkt->rm_oobdatalen = 0;
1921 pkt->rm_oobdataelements = 0;
1922 pkt->rm_pktinfooffset = sizeof(*pkt);
1923 pkt->rm_pktinfolen = 0;
1924 pkt->rm_vchandle = 0;
1925 pkt->rm_reserved = 0;
1927 if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) {
1929 * Set the hash value for this packet, so that the host could
1930 * dispatch the TX done event for this packet back to this TX
1933 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1934 HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL);
1935 *pi_data = txr->hn_tx_idx;
1938 if (m_head->m_flags & M_VLANTAG) {
1939 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1940 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN);
1941 *pi_data = NDIS_VLAN_INFO_MAKE(
1942 EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag),
1943 EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag),
1944 EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag));
1947 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1948 #if defined(INET6) || defined(INET)
1949 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1950 NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO);
1952 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
1953 *pi_data = NDIS_LSO2_INFO_MAKEIPV4(0,
1954 m_head->m_pkthdr.tso_segsz);
1957 #if defined(INET6) && defined(INET)
1962 *pi_data = NDIS_LSO2_INFO_MAKEIPV6(0,
1963 m_head->m_pkthdr.tso_segsz);
1966 #endif /* INET6 || INET */
1967 } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) {
1968 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1969 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM);
1970 if (m_head->m_pkthdr.csum_flags &
1971 (CSUM_IP6_TCP | CSUM_IP6_UDP)) {
1972 *pi_data = NDIS_TXCSUM_INFO_IPV6;
1974 *pi_data = NDIS_TXCSUM_INFO_IPV4;
1975 if (m_head->m_pkthdr.csum_flags & CSUM_IP)
1976 *pi_data |= NDIS_TXCSUM_INFO_IPCS;
1979 if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
1980 *pi_data |= NDIS_TXCSUM_INFO_TCPCS;
1981 else if (m_head->m_pkthdr.csum_flags &
1982 (CSUM_IP_UDP | CSUM_IP6_UDP))
1983 *pi_data |= NDIS_TXCSUM_INFO_UDPCS;
1986 pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
1987 /* Convert RNDIS packet message offsets */
1988 pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt->rm_dataoffset);
1989 pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset);
1992 * Fast path: Chimney sending.
1995 struct hn_txdesc *tgt_txd = txd;
1997 if (txr->hn_agg_txd != NULL) {
1998 tgt_txd = txr->hn_agg_txd;
2004 KASSERT(pkt == chim,
2005 ("RNDIS pkt not in chimney sending buffer"));
2006 KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID,
2007 ("chimney sending buffer is not used"));
2008 tgt_txd->chim_size += pkt->rm_len;
2010 m_copydata(m_head, 0, m_head->m_pkthdr.len,
2011 ((uint8_t *)chim) + pkt_hlen);
2013 txr->hn_gpa_cnt = 0;
2014 txr->hn_sendpkt = hn_txpkt_chim;
2018 KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc"));
2019 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
2020 ("chimney buffer is used"));
2021 KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc"));
2023 error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs);
2024 if (__predict_false(error)) {
2028 * This mbuf is not linked w/ the txd yet, so free it now.
2033 freed = hn_txdesc_put(txr, txd);
2035 ("fail to free txd upon txdma error"));
2037 txr->hn_txdma_failed++;
2038 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2043 /* +1 RNDIS packet message */
2044 txr->hn_gpa_cnt = nsegs + 1;
2046 /* send packet with page buffer */
2047 txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr);
2048 txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK;
2049 txr->hn_gpa[0].gpa_len = pkt_hlen;
2052 * Fill the page buffers with mbuf info after the page
2053 * buffer for RNDIS packet message.
2055 for (i = 0; i < nsegs; ++i) {
2056 struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1];
2058 gpa->gpa_page = atop(segs[i].ds_addr);
2059 gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK;
2060 gpa->gpa_len = segs[i].ds_len;
2063 txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
2065 txr->hn_sendpkt = hn_txpkt_sglist;
2069 /* Set the completion routine */
2070 hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd);
2072 /* Update temporary stats for later use. */
2073 txr->hn_stat_pkts++;
2074 txr->hn_stat_size += m_head->m_pkthdr.len;
2075 if (m_head->m_flags & M_MCAST)
2076 txr->hn_stat_mcasts++;
2083 * If this function fails, then txd will be freed, but the mbuf
2084 * associated w/ the txd will _not_ be freed.
2087 hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd)
2089 int error, send_failed = 0, has_bpf;
2092 has_bpf = bpf_peers_present(ifp->if_bpf);
2095 * Make sure that this txd and any aggregated txds are not
2096 * freed before ETHER_BPF_MTAP.
2098 hn_txdesc_hold(txd);
2100 error = txr->hn_sendpkt(txr, txd);
2103 const struct hn_txdesc *tmp_txd;
2105 ETHER_BPF_MTAP(ifp, txd->m);
2106 STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link)
2107 ETHER_BPF_MTAP(ifp, tmp_txd->m);
2110 if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts);
2111 #ifdef HN_IFSTART_SUPPORT
2112 if (!hn_use_if_start)
2115 if_inc_counter(ifp, IFCOUNTER_OBYTES,
2117 if (txr->hn_stat_mcasts != 0) {
2118 if_inc_counter(ifp, IFCOUNTER_OMCASTS,
2119 txr->hn_stat_mcasts);
2122 txr->hn_pkts += txr->hn_stat_pkts;
2126 hn_txdesc_put(txr, txd);
2128 if (__predict_false(error)) {
2132 * This should "really rarely" happen.
2134 * XXX Too many RX to be acked or too many sideband
2135 * commands to run? Ask netvsc_channel_rollup()
2136 * to kick start later.
2138 txr->hn_has_txeof = 1;
2140 txr->hn_send_failed++;
2143 * Try sending again after set hn_has_txeof;
2144 * in case that we missed the last
2145 * netvsc_channel_rollup().
2149 if_printf(ifp, "send failed\n");
2152 * Caller will perform further processing on the
2153 * associated mbuf, so don't free it in hn_txdesc_put();
2154 * only unload it from the DMA map in hn_txdesc_put(),
2158 freed = hn_txdesc_put(txr, txd);
2160 ("fail to free txd upon send error"));
2162 txr->hn_send_failed++;
2165 /* Reset temporary stats, after this sending is done. */
2166 txr->hn_stat_size = 0;
2167 txr->hn_stat_pkts = 0;
2168 txr->hn_stat_mcasts = 0;
2174 * Append the specified data to the indicated mbuf chain,
2175 * Extend the mbuf chain if the new data does not fit in
2178 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c.
2179 * There should be an equivalent in the kernel mbuf code,
2180 * but there does not appear to be one yet.
2182 * Differs from m_append() in that additional mbufs are
2183 * allocated with cluster size MJUMPAGESIZE, and filled
2186 * Return 1 if able to complete the job; otherwise 0.
2189 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
2192 int remainder, space;
2194 for (m = m0; m->m_next != NULL; m = m->m_next)
2197 space = M_TRAILINGSPACE(m);
2200 * Copy into available space.
2202 if (space > remainder)
2204 bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
2209 while (remainder > 0) {
2211 * Allocate a new mbuf; could check space
2212 * and allocate a cluster instead.
2214 n = m_getjcl(M_DONTWAIT, m->m_type, 0, MJUMPAGESIZE);
2217 n->m_len = min(MJUMPAGESIZE, remainder);
2218 bcopy(cp, mtod(n, caddr_t), n->m_len);
2220 remainder -= n->m_len;
2224 if (m0->m_flags & M_PKTHDR)
2225 m0->m_pkthdr.len += len - remainder;
2227 return (remainder == 0);
2230 #if defined(INET) || defined(INET6)
2232 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m)
2234 #if __FreeBSD_version >= 1100095
2235 if (hn_lro_mbufq_depth) {
2236 tcp_lro_queue_mbuf(lc, m);
2240 return tcp_lro_rx(lc, m, 0);
2245 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen,
2246 const struct hn_rxinfo *info)
2250 int size, do_lro = 0, do_csum = 1;
2251 int hash_type = M_HASHTYPE_OPAQUE;
2253 /* If the VF is active, inject the packet through the VF */
2254 ifp = rxr->hn_vf ? rxr->hn_vf : rxr->hn_ifp;
2256 if (dlen <= MHLEN) {
2257 m_new = m_gethdr(M_NOWAIT, MT_DATA);
2258 if (m_new == NULL) {
2259 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
2262 memcpy(mtod(m_new, void *), data, dlen);
2263 m_new->m_pkthdr.len = m_new->m_len = dlen;
2264 rxr->hn_small_pkts++;
2267 * Get an mbuf with a cluster. For packets 2K or less,
2268 * get a standard 2K cluster. For anything larger, get a
2269 * 4K cluster. Any buffers larger than 4K can cause problems
2270 * if looped around to the Hyper-V TX channel, so avoid them.
2273 if (dlen > MCLBYTES) {
2275 size = MJUMPAGESIZE;
2278 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
2279 if (m_new == NULL) {
2280 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
2284 hv_m_append(m_new, dlen, data);
2286 m_new->m_pkthdr.rcvif = ifp;
2288 if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0))
2291 /* receive side checksum offload */
2292 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) {
2293 /* IP csum offload */
2294 if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) {
2295 m_new->m_pkthdr.csum_flags |=
2296 (CSUM_IP_CHECKED | CSUM_IP_VALID);
2300 /* TCP/UDP csum offload */
2301 if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK |
2302 NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) {
2303 m_new->m_pkthdr.csum_flags |=
2304 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2305 m_new->m_pkthdr.csum_data = 0xffff;
2306 if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK)
2314 * As of this write (Oct 28th, 2016), host side will turn
2315 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so
2316 * the do_lro setting here is actually _not_ accurate. We
2317 * depend on the RSS hash type check to reset do_lro.
2319 if ((info->csum_info &
2320 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) ==
2321 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK))
2324 const struct ether_header *eh;
2329 if (m_new->m_len < hoff)
2331 eh = mtod(m_new, struct ether_header *);
2332 etype = ntohs(eh->ether_type);
2333 if (etype == ETHERTYPE_VLAN) {
2334 const struct ether_vlan_header *evl;
2336 hoff = sizeof(*evl);
2337 if (m_new->m_len < hoff)
2339 evl = mtod(m_new, struct ether_vlan_header *);
2340 etype = ntohs(evl->evl_proto);
2343 if (etype == ETHERTYPE_IP) {
2346 pr = hn_check_iplen(m_new, hoff);
2347 if (pr == IPPROTO_TCP) {
2349 (rxr->hn_trust_hcsum &
2350 HN_TRUST_HCSUM_TCP)) {
2351 rxr->hn_csum_trusted++;
2352 m_new->m_pkthdr.csum_flags |=
2353 (CSUM_IP_CHECKED | CSUM_IP_VALID |
2354 CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2355 m_new->m_pkthdr.csum_data = 0xffff;
2358 } else if (pr == IPPROTO_UDP) {
2360 (rxr->hn_trust_hcsum &
2361 HN_TRUST_HCSUM_UDP)) {
2362 rxr->hn_csum_trusted++;
2363 m_new->m_pkthdr.csum_flags |=
2364 (CSUM_IP_CHECKED | CSUM_IP_VALID |
2365 CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2366 m_new->m_pkthdr.csum_data = 0xffff;
2368 } else if (pr != IPPROTO_DONE && do_csum &&
2369 (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) {
2370 rxr->hn_csum_trusted++;
2371 m_new->m_pkthdr.csum_flags |=
2372 (CSUM_IP_CHECKED | CSUM_IP_VALID);
2377 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) {
2378 m_new->m_pkthdr.ether_vtag = EVL_MAKETAG(
2379 NDIS_VLAN_INFO_ID(info->vlan_info),
2380 NDIS_VLAN_INFO_PRI(info->vlan_info),
2381 NDIS_VLAN_INFO_CFI(info->vlan_info));
2382 m_new->m_flags |= M_VLANTAG;
2385 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) {
2387 m_new->m_pkthdr.flowid = info->hash_value;
2388 if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) ==
2389 NDIS_HASH_FUNCTION_TOEPLITZ) {
2390 uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK);
2394 * do_lro is resetted, if the hash types are not TCP
2395 * related. See the comment in the above csum_flags
2399 case NDIS_HASH_IPV4:
2400 hash_type = M_HASHTYPE_RSS_IPV4;
2404 case NDIS_HASH_TCP_IPV4:
2405 hash_type = M_HASHTYPE_RSS_TCP_IPV4;
2408 case NDIS_HASH_IPV6:
2409 hash_type = M_HASHTYPE_RSS_IPV6;
2413 case NDIS_HASH_IPV6_EX:
2414 hash_type = M_HASHTYPE_RSS_IPV6_EX;
2418 case NDIS_HASH_TCP_IPV6:
2419 hash_type = M_HASHTYPE_RSS_TCP_IPV6;
2422 case NDIS_HASH_TCP_IPV6_EX:
2423 hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX;
2428 m_new->m_pkthdr.flowid = rxr->hn_rx_idx;
2430 M_HASHTYPE_SET(m_new, hash_type);
2433 * Note: Moved RX completion back to hv_nv_on_receive() so all
2434 * messages (not just data messages) will trigger a response.
2440 if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
2441 #if defined(INET) || defined(INET6)
2442 struct lro_ctrl *lro = &rxr->hn_lro;
2445 rxr->hn_lro_tried++;
2446 if (hn_lro_rx(lro, m_new) == 0) {
2454 /* We're not holding the lock here, so don't release it */
2455 (*ifp->if_input)(ifp, m_new);
2461 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2463 struct hn_softc *sc = ifp->if_softc;
2464 struct ifreq *ifr = (struct ifreq *)data;
2465 int mask, error = 0;
2469 if (ifr->ifr_mtu > HN_MTU_MAX) {
2476 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
2481 if ((sc->hn_caps & HN_CAP_MTU) == 0) {
2482 /* Can't change MTU */
2488 if (ifp->if_mtu == ifr->ifr_mtu) {
2494 * Suspend this interface before the synthetic parts
2500 * Detach the synthetics parts, i.e. NVS and RNDIS.
2502 hn_synth_detach(sc);
2505 * Reattach the synthetic parts, i.e. NVS and RNDIS,
2506 * with the new MTU setting.
2508 error = hn_synth_attach(sc, ifr->ifr_mtu);
2515 * Commit the requested MTU, after the synthetic parts
2516 * have been successfully attached.
2518 ifp->if_mtu = ifr->ifr_mtu;
2521 * Make sure that various parameters based on MTU are
2522 * still valid, after the MTU change.
2524 if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax)
2525 hn_set_chim_size(sc, sc->hn_chim_szmax);
2526 hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu);
2527 #if __FreeBSD_version >= 1100099
2528 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim <
2529 HN_LRO_LENLIM_MIN(ifp))
2530 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp));
2534 * All done! Resume the interface now.
2544 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
2549 if (ifp->if_flags & IFF_UP) {
2550 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2552 * Caller meight hold mutex, e.g.
2553 * bpf; use busy-wait for the RNDIS
2557 hn_rxfilter_config(sc);
2563 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2566 sc->hn_if_flags = ifp->if_flags;
2573 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2575 if (mask & IFCAP_TXCSUM) {
2576 ifp->if_capenable ^= IFCAP_TXCSUM;
2577 if (ifp->if_capenable & IFCAP_TXCSUM)
2578 ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc);
2580 ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc);
2582 if (mask & IFCAP_TXCSUM_IPV6) {
2583 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2584 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
2585 ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc);
2587 ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc);
2590 /* TODO: flip RNDIS offload parameters for RXCSUM. */
2591 if (mask & IFCAP_RXCSUM)
2592 ifp->if_capenable ^= IFCAP_RXCSUM;
2594 /* We can't diff IPv6 packets from IPv4 packets on RX path. */
2595 if (mask & IFCAP_RXCSUM_IPV6)
2596 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2599 if (mask & IFCAP_LRO)
2600 ifp->if_capenable ^= IFCAP_LRO;
2602 if (mask & IFCAP_TSO4) {
2603 ifp->if_capenable ^= IFCAP_TSO4;
2604 if (ifp->if_capenable & IFCAP_TSO4)
2605 ifp->if_hwassist |= CSUM_IP_TSO;
2607 ifp->if_hwassist &= ~CSUM_IP_TSO;
2609 if (mask & IFCAP_TSO6) {
2610 ifp->if_capenable ^= IFCAP_TSO6;
2611 if (ifp->if_capenable & IFCAP_TSO6)
2612 ifp->if_hwassist |= CSUM_IP6_TSO;
2614 ifp->if_hwassist &= ~CSUM_IP6_TSO;
2624 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
2628 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2630 * Multicast uses mutex; use busy-wait for
2634 hn_rxfilter_config(sc);
2643 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd);
2647 error = ether_ioctl(ifp, cmd, data);
2654 hn_stop(struct hn_softc *sc, bool detaching)
2656 struct ifnet *ifp = sc->hn_ifp;
2661 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
2662 ("synthetic parts were not attached"));
2664 /* Disable polling. */
2667 /* Clear RUNNING bit _before_ hn_suspend_data() */
2668 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
2669 hn_suspend_data(sc);
2671 /* Clear OACTIVE bit. */
2672 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
2673 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
2674 sc->hn_tx_ring[i].hn_oactive = 0;
2677 * If the VF is active, make sure the filter is not 0, even if
2678 * the synthetic NIC is down.
2680 if (!detaching && (sc->hn_flags & HN_FLAG_VF))
2681 hn_rxfilter_config(sc);
2685 hn_init_locked(struct hn_softc *sc)
2687 struct ifnet *ifp = sc->hn_ifp;
2692 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
2695 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2698 /* Configure RX filter */
2699 hn_rxfilter_config(sc);
2701 /* Clear OACTIVE bit. */
2702 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
2703 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
2704 sc->hn_tx_ring[i].hn_oactive = 0;
2706 /* Clear TX 'suspended' bit. */
2707 hn_resume_tx(sc, sc->hn_tx_ring_inuse);
2709 /* Everything is ready; unleash! */
2710 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
2712 /* Re-enable polling if requested. */
2713 if (sc->hn_pollhz > 0)
2714 hn_polling(sc, sc->hn_pollhz);
2720 struct hn_softc *sc = xsc;
2727 #if __FreeBSD_version >= 1100099
2730 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS)
2732 struct hn_softc *sc = arg1;
2733 unsigned int lenlim;
2736 lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim;
2737 error = sysctl_handle_int(oidp, &lenlim, 0, req);
2738 if (error || req->newptr == NULL)
2742 if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) ||
2743 lenlim > TCP_LRO_LENGTH_MAX) {
2747 hn_set_lro_lenlim(sc, lenlim);
2754 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS)
2756 struct hn_softc *sc = arg1;
2757 int ackcnt, error, i;
2760 * lro_ackcnt_lim is append count limit,
2761 * +1 to turn it into aggregation limit.
2763 ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1;
2764 error = sysctl_handle_int(oidp, &ackcnt, 0, req);
2765 if (error || req->newptr == NULL)
2768 if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1))
2772 * Convert aggregation limit back to append
2777 for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
2778 sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt;
2786 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS)
2788 struct hn_softc *sc = arg1;
2793 if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum)
2796 error = sysctl_handle_int(oidp, &on, 0, req);
2797 if (error || req->newptr == NULL)
2801 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2802 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
2805 rxr->hn_trust_hcsum |= hcsum;
2807 rxr->hn_trust_hcsum &= ~hcsum;
2814 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS)
2816 struct hn_softc *sc = arg1;
2817 int chim_size, error;
2819 chim_size = sc->hn_tx_ring[0].hn_chim_size;
2820 error = sysctl_handle_int(oidp, &chim_size, 0, req);
2821 if (error || req->newptr == NULL)
2824 if (chim_size > sc->hn_chim_szmax || chim_size <= 0)
2828 hn_set_chim_size(sc, chim_size);
2833 #if __FreeBSD_version < 1100095
2835 hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS)
2837 struct hn_softc *sc = arg1;
2838 int ofs = arg2, i, error;
2839 struct hn_rx_ring *rxr;
2843 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2844 rxr = &sc->hn_rx_ring[i];
2845 stat += *((int *)((uint8_t *)rxr + ofs));
2848 error = sysctl_handle_64(oidp, &stat, 0, req);
2849 if (error || req->newptr == NULL)
2852 /* Zero out this stat. */
2853 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2854 rxr = &sc->hn_rx_ring[i];
2855 *((int *)((uint8_t *)rxr + ofs)) = 0;
2861 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
2863 struct hn_softc *sc = arg1;
2864 int ofs = arg2, i, error;
2865 struct hn_rx_ring *rxr;
2869 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2870 rxr = &sc->hn_rx_ring[i];
2871 stat += *((uint64_t *)((uint8_t *)rxr + ofs));
2874 error = sysctl_handle_64(oidp, &stat, 0, req);
2875 if (error || req->newptr == NULL)
2878 /* Zero out this stat. */
2879 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2880 rxr = &sc->hn_rx_ring[i];
2881 *((uint64_t *)((uint8_t *)rxr + ofs)) = 0;
2889 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
2891 struct hn_softc *sc = arg1;
2892 int ofs = arg2, i, error;
2893 struct hn_rx_ring *rxr;
2897 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2898 rxr = &sc->hn_rx_ring[i];
2899 stat += *((u_long *)((uint8_t *)rxr + ofs));
2902 error = sysctl_handle_long(oidp, &stat, 0, req);
2903 if (error || req->newptr == NULL)
2906 /* Zero out this stat. */
2907 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2908 rxr = &sc->hn_rx_ring[i];
2909 *((u_long *)((uint8_t *)rxr + ofs)) = 0;
2915 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
2917 struct hn_softc *sc = arg1;
2918 int ofs = arg2, i, error;
2919 struct hn_tx_ring *txr;
2923 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
2924 txr = &sc->hn_tx_ring[i];
2925 stat += *((u_long *)((uint8_t *)txr + ofs));
2928 error = sysctl_handle_long(oidp, &stat, 0, req);
2929 if (error || req->newptr == NULL)
2932 /* Zero out this stat. */
2933 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
2934 txr = &sc->hn_tx_ring[i];
2935 *((u_long *)((uint8_t *)txr + ofs)) = 0;
2941 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
2943 struct hn_softc *sc = arg1;
2944 int ofs = arg2, i, error, conf;
2945 struct hn_tx_ring *txr;
2947 txr = &sc->hn_tx_ring[0];
2948 conf = *((int *)((uint8_t *)txr + ofs));
2950 error = sysctl_handle_int(oidp, &conf, 0, req);
2951 if (error || req->newptr == NULL)
2955 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
2956 txr = &sc->hn_tx_ring[i];
2957 *((int *)((uint8_t *)txr + ofs)) = conf;
2965 hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS)
2967 struct hn_softc *sc = arg1;
2970 size = sc->hn_agg_size;
2971 error = sysctl_handle_int(oidp, &size, 0, req);
2972 if (error || req->newptr == NULL)
2976 sc->hn_agg_size = size;
2984 hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS)
2986 struct hn_softc *sc = arg1;
2989 pkts = sc->hn_agg_pkts;
2990 error = sysctl_handle_int(oidp, &pkts, 0, req);
2991 if (error || req->newptr == NULL)
2995 sc->hn_agg_pkts = pkts;
3003 hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS)
3005 struct hn_softc *sc = arg1;
3008 pkts = sc->hn_tx_ring[0].hn_agg_pktmax;
3009 return (sysctl_handle_int(oidp, &pkts, 0, req));
3013 hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS)
3015 struct hn_softc *sc = arg1;
3018 align = sc->hn_tx_ring[0].hn_agg_align;
3019 return (sysctl_handle_int(oidp, &align, 0, req));
3023 hn_chan_polling(struct vmbus_channel *chan, u_int pollhz)
3026 vmbus_chan_poll_disable(chan);
3028 vmbus_chan_poll_enable(chan, pollhz);
3032 hn_polling(struct hn_softc *sc, u_int pollhz)
3034 int nsubch = sc->hn_rx_ring_inuse - 1;
3039 struct vmbus_channel **subch;
3042 subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
3043 for (i = 0; i < nsubch; ++i)
3044 hn_chan_polling(subch[i], pollhz);
3045 vmbus_subchan_rel(subch, nsubch);
3047 hn_chan_polling(sc->hn_prichan, pollhz);
3051 hn_polling_sysctl(SYSCTL_HANDLER_ARGS)
3053 struct hn_softc *sc = arg1;
3056 pollhz = sc->hn_pollhz;
3057 error = sysctl_handle_int(oidp, &pollhz, 0, req);
3058 if (error || req->newptr == NULL)
3062 (pollhz < VMBUS_CHAN_POLLHZ_MIN || pollhz > VMBUS_CHAN_POLLHZ_MAX))
3066 if (sc->hn_pollhz != pollhz) {
3067 sc->hn_pollhz = pollhz;
3068 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) &&
3069 (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
3070 hn_polling(sc, sc->hn_pollhz);
3078 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS)
3080 struct hn_softc *sc = arg1;
3083 snprintf(verstr, sizeof(verstr), "%u.%u",
3084 HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver),
3085 HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver));
3086 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
3090 hn_caps_sysctl(SYSCTL_HANDLER_ARGS)
3092 struct hn_softc *sc = arg1;
3099 snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS);
3100 return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req);
3104 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS)
3106 struct hn_softc *sc = arg1;
3107 char assist_str[128];
3111 hwassist = sc->hn_ifp->if_hwassist;
3113 snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS);
3114 return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req);
3118 hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS)
3120 struct hn_softc *sc = arg1;
3121 char filter_str[128];
3125 filter = sc->hn_rx_filter;
3127 snprintf(filter_str, sizeof(filter_str), "%b", filter,
3129 return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req);
3133 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS)
3135 struct hn_softc *sc = arg1;
3140 error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
3141 if (error || req->newptr == NULL)
3144 error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
3147 sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
3149 if (sc->hn_rx_ring_inuse > 1) {
3150 error = hn_rss_reconfig(sc);
3152 /* Not RSS capable, at least for now; just save the RSS key. */
3161 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS)
3163 struct hn_softc *sc = arg1;
3168 error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
3169 if (error || req->newptr == NULL)
3173 * Don't allow RSS indirect table change, if this interface is not
3174 * RSS capable currently.
3176 if (sc->hn_rx_ring_inuse == 1) {
3181 error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
3184 sc->hn_flags |= HN_FLAG_HAS_RSSIND;
3186 hn_rss_ind_fixup(sc);
3187 error = hn_rss_reconfig(sc);
3194 hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS)
3196 struct hn_softc *sc = arg1;
3201 hash = sc->hn_rss_hash;
3203 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS);
3204 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req);
3208 hn_check_iplen(const struct mbuf *m, int hoff)
3210 const struct ip *ip;
3211 int len, iphlen, iplen;
3212 const struct tcphdr *th;
3213 int thoff; /* TCP data offset */
3215 len = hoff + sizeof(struct ip);
3217 /* The packet must be at least the size of an IP header. */
3218 if (m->m_pkthdr.len < len)
3219 return IPPROTO_DONE;
3221 /* The fixed IP header must reside completely in the first mbuf. */
3223 return IPPROTO_DONE;
3225 ip = mtodo(m, hoff);
3227 /* Bound check the packet's stated IP header length. */
3228 iphlen = ip->ip_hl << 2;
3229 if (iphlen < sizeof(struct ip)) /* minimum header length */
3230 return IPPROTO_DONE;
3232 /* The full IP header must reside completely in the one mbuf. */
3233 if (m->m_len < hoff + iphlen)
3234 return IPPROTO_DONE;
3236 iplen = ntohs(ip->ip_len);
3239 * Check that the amount of data in the buffers is as
3240 * at least much as the IP header would have us expect.
3242 if (m->m_pkthdr.len < hoff + iplen)
3243 return IPPROTO_DONE;
3246 * Ignore IP fragments.
3248 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF))
3249 return IPPROTO_DONE;
3252 * The TCP/IP or UDP/IP header must be entirely contained within
3253 * the first fragment of a packet.
3257 if (iplen < iphlen + sizeof(struct tcphdr))
3258 return IPPROTO_DONE;
3259 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr))
3260 return IPPROTO_DONE;
3261 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen);
3262 thoff = th->th_off << 2;
3263 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen)
3264 return IPPROTO_DONE;
3265 if (m->m_len < hoff + iphlen + thoff)
3266 return IPPROTO_DONE;
3269 if (iplen < iphlen + sizeof(struct udphdr))
3270 return IPPROTO_DONE;
3271 if (m->m_len < hoff + iphlen + sizeof(struct udphdr))
3272 return IPPROTO_DONE;
3276 return IPPROTO_DONE;
3283 hn_create_rx_data(struct hn_softc *sc, int ring_cnt)
3285 struct sysctl_oid_list *child;
3286 struct sysctl_ctx_list *ctx;
3287 device_t dev = sc->hn_dev;
3288 #if defined(INET) || defined(INET6)
3289 #if __FreeBSD_version >= 1100095
3296 * Create RXBUF for reception.
3299 * - It is shared by all channels.
3300 * - A large enough buffer is allocated, certain version of NVSes
3301 * may further limit the usable space.
3303 sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
3304 PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma,
3305 BUS_DMA_WAITOK | BUS_DMA_ZERO);
3306 if (sc->hn_rxbuf == NULL) {
3307 device_printf(sc->hn_dev, "allocate rxbuf failed\n");
3311 sc->hn_rx_ring_cnt = ring_cnt;
3312 sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt;
3314 sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt,
3315 M_DEVBUF, M_WAITOK | M_ZERO);
3317 #if defined(INET) || defined(INET6)
3318 #if __FreeBSD_version >= 1100095
3319 lroent_cnt = hn_lro_entry_count;
3320 if (lroent_cnt < TCP_LRO_ENTRIES)
3321 lroent_cnt = TCP_LRO_ENTRIES;
3323 device_printf(dev, "LRO: entry count %d\n", lroent_cnt);
3325 #endif /* INET || INET6 */
3327 ctx = device_get_sysctl_ctx(dev);
3328 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
3330 /* Create dev.hn.UNIT.rx sysctl tree */
3331 sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx",
3332 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3334 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
3335 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
3337 rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
3338 PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE,
3339 &rxr->hn_br_dma, BUS_DMA_WAITOK);
3340 if (rxr->hn_br == NULL) {
3341 device_printf(dev, "allocate bufring failed\n");
3345 if (hn_trust_hosttcp)
3346 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP;
3347 if (hn_trust_hostudp)
3348 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP;
3349 if (hn_trust_hostip)
3350 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP;
3351 rxr->hn_ifp = sc->hn_ifp;
3352 if (i < sc->hn_tx_ring_cnt)
3353 rxr->hn_txr = &sc->hn_tx_ring[i];
3354 rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF;
3355 rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK);
3357 rxr->hn_rxbuf = sc->hn_rxbuf;
3362 #if defined(INET) || defined(INET6)
3363 #if __FreeBSD_version >= 1100095
3364 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt,
3365 hn_lro_mbufq_depth);
3367 tcp_lro_init(&rxr->hn_lro);
3368 rxr->hn_lro.ifp = sc->hn_ifp;
3370 #if __FreeBSD_version >= 1100099
3371 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF;
3372 rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
3374 #endif /* INET || INET6 */
3376 if (sc->hn_rx_sysctl_tree != NULL) {
3380 * Create per RX ring sysctl tree:
3381 * dev.hn.UNIT.rx.RINGID
3383 snprintf(name, sizeof(name), "%d", i);
3384 rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx,
3385 SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree),
3386 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3388 if (rxr->hn_rx_sysctl_tree != NULL) {
3389 SYSCTL_ADD_ULONG(ctx,
3390 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
3391 OID_AUTO, "packets", CTLFLAG_RW,
3392 &rxr->hn_pkts, "# of packets received");
3393 SYSCTL_ADD_ULONG(ctx,
3394 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
3395 OID_AUTO, "rss_pkts", CTLFLAG_RW,
3397 "# of packets w/ RSS info received");
3399 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
3400 OID_AUTO, "pktbuf_len", CTLFLAG_RD,
3401 &rxr->hn_pktbuf_len, 0,
3402 "Temporary channel packet buffer length");
3407 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
3408 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3409 __offsetof(struct hn_rx_ring, hn_lro.lro_queued),
3410 #if __FreeBSD_version < 1100095
3411 hn_rx_stat_int_sysctl,
3413 hn_rx_stat_u64_sysctl,
3415 "LU", "LRO queued");
3416 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed",
3417 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3418 __offsetof(struct hn_rx_ring, hn_lro.lro_flushed),
3419 #if __FreeBSD_version < 1100095
3420 hn_rx_stat_int_sysctl,
3422 hn_rx_stat_u64_sysctl,
3424 "LU", "LRO flushed");
3425 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried",
3426 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3427 __offsetof(struct hn_rx_ring, hn_lro_tried),
3428 hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries");
3429 #if __FreeBSD_version >= 1100099
3430 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim",
3431 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
3432 hn_lro_lenlim_sysctl, "IU",
3433 "Max # of data bytes to be aggregated by LRO");
3434 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim",
3435 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
3436 hn_lro_ackcnt_sysctl, "I",
3437 "Max # of ACKs to be aggregated by LRO");
3439 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp",
3440 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP,
3441 hn_trust_hcsum_sysctl, "I",
3442 "Trust tcp segement verification on host side, "
3443 "when csum info is missing");
3444 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp",
3445 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP,
3446 hn_trust_hcsum_sysctl, "I",
3447 "Trust udp datagram verification on host side, "
3448 "when csum info is missing");
3449 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip",
3450 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP,
3451 hn_trust_hcsum_sysctl, "I",
3452 "Trust ip packet verification on host side, "
3453 "when csum info is missing");
3454 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip",
3455 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3456 __offsetof(struct hn_rx_ring, hn_csum_ip),
3457 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP");
3458 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp",
3459 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3460 __offsetof(struct hn_rx_ring, hn_csum_tcp),
3461 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP");
3462 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp",
3463 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3464 __offsetof(struct hn_rx_ring, hn_csum_udp),
3465 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP");
3466 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted",
3467 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3468 __offsetof(struct hn_rx_ring, hn_csum_trusted),
3469 hn_rx_stat_ulong_sysctl, "LU",
3470 "# of packets that we trust host's csum verification");
3471 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts",
3472 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3473 __offsetof(struct hn_rx_ring, hn_small_pkts),
3474 hn_rx_stat_ulong_sysctl, "LU", "# of small packets received");
3475 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed",
3476 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3477 __offsetof(struct hn_rx_ring, hn_ack_failed),
3478 hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures");
3479 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt",
3480 CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings");
3481 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse",
3482 CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings");
3488 hn_destroy_rx_data(struct hn_softc *sc)
3492 if (sc->hn_rxbuf != NULL) {
3493 if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0)
3494 hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf);
3496 device_printf(sc->hn_dev, "RXBUF is referenced\n");
3497 sc->hn_rxbuf = NULL;
3500 if (sc->hn_rx_ring_cnt == 0)
3503 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
3504 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
3506 if (rxr->hn_br == NULL)
3508 if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) {
3509 hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br);
3511 device_printf(sc->hn_dev,
3512 "%dth channel bufring is referenced", i);
3516 #if defined(INET) || defined(INET6)
3517 tcp_lro_free(&rxr->hn_lro);
3519 free(rxr->hn_pktbuf, M_DEVBUF);
3521 free(sc->hn_rx_ring, M_DEVBUF);
3522 sc->hn_rx_ring = NULL;
3524 sc->hn_rx_ring_cnt = 0;
3525 sc->hn_rx_ring_inuse = 0;
3529 hn_tx_ring_create(struct hn_softc *sc, int id)
3531 struct hn_tx_ring *txr = &sc->hn_tx_ring[id];
3532 device_t dev = sc->hn_dev;
3533 bus_dma_tag_t parent_dtag;
3537 txr->hn_tx_idx = id;
3539 #ifndef HN_USE_TXDESC_BUFRING
3540 mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
3542 mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF);
3544 txr->hn_txdesc_cnt = HN_TX_DESC_CNT;
3545 txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt,
3546 M_DEVBUF, M_WAITOK | M_ZERO);
3547 #ifndef HN_USE_TXDESC_BUFRING
3548 SLIST_INIT(&txr->hn_txlist);
3550 txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF,
3551 M_WAITOK, &txr->hn_tx_lock);
3554 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_EVTTQ) {
3555 txr->hn_tx_taskq = VMBUS_GET_EVENT_TASKQ(
3556 device_get_parent(dev), dev, HN_RING_IDX2CPU(sc, id));
3558 txr->hn_tx_taskq = sc->hn_tx_taskqs[id % hn_tx_taskq_cnt];
3561 #ifdef HN_IFSTART_SUPPORT
3562 if (hn_use_if_start) {
3563 txr->hn_txeof = hn_start_txeof;
3564 TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr);
3565 TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr);
3571 txr->hn_txeof = hn_xmit_txeof;
3572 TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr);
3573 TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr);
3575 br_depth = hn_get_txswq_depth(txr);
3576 txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF,
3577 M_WAITOK, &txr->hn_tx_lock);
3580 txr->hn_direct_tx_size = hn_direct_tx_size;
3583 * Always schedule transmission instead of trying to do direct
3584 * transmission. This one gives the best performance so far.
3586 txr->hn_sched_tx = 1;
3588 parent_dtag = bus_get_dma_tag(dev);
3590 /* DMA tag for RNDIS packet messages. */
3591 error = bus_dma_tag_create(parent_dtag, /* parent */
3592 HN_RNDIS_PKT_ALIGN, /* alignment */
3593 HN_RNDIS_PKT_BOUNDARY, /* boundary */
3594 BUS_SPACE_MAXADDR, /* lowaddr */
3595 BUS_SPACE_MAXADDR, /* highaddr */
3596 NULL, NULL, /* filter, filterarg */
3597 HN_RNDIS_PKT_LEN, /* maxsize */
3599 HN_RNDIS_PKT_LEN, /* maxsegsize */
3601 NULL, /* lockfunc */
3602 NULL, /* lockfuncarg */
3603 &txr->hn_tx_rndis_dtag);
3605 device_printf(dev, "failed to create rndis dmatag\n");
3609 /* DMA tag for data. */
3610 error = bus_dma_tag_create(parent_dtag, /* parent */
3612 HN_TX_DATA_BOUNDARY, /* boundary */
3613 BUS_SPACE_MAXADDR, /* lowaddr */
3614 BUS_SPACE_MAXADDR, /* highaddr */
3615 NULL, NULL, /* filter, filterarg */
3616 HN_TX_DATA_MAXSIZE, /* maxsize */
3617 HN_TX_DATA_SEGCNT_MAX, /* nsegments */
3618 HN_TX_DATA_SEGSIZE, /* maxsegsize */
3620 NULL, /* lockfunc */
3621 NULL, /* lockfuncarg */
3622 &txr->hn_tx_data_dtag);
3624 device_printf(dev, "failed to create data dmatag\n");
3628 for (i = 0; i < txr->hn_txdesc_cnt; ++i) {
3629 struct hn_txdesc *txd = &txr->hn_txdesc[i];
3632 txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
3633 STAILQ_INIT(&txd->agg_list);
3636 * Allocate and load RNDIS packet message.
3638 error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag,
3639 (void **)&txd->rndis_pkt,
3640 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
3641 &txd->rndis_pkt_dmap);
3644 "failed to allocate rndis_packet_msg, %d\n", i);
3648 error = bus_dmamap_load(txr->hn_tx_rndis_dtag,
3649 txd->rndis_pkt_dmap,
3650 txd->rndis_pkt, HN_RNDIS_PKT_LEN,
3651 hyperv_dma_map_paddr, &txd->rndis_pkt_paddr,
3655 "failed to load rndis_packet_msg, %d\n", i);
3656 bus_dmamem_free(txr->hn_tx_rndis_dtag,
3657 txd->rndis_pkt, txd->rndis_pkt_dmap);
3661 /* DMA map for TX data. */
3662 error = bus_dmamap_create(txr->hn_tx_data_dtag, 0,
3666 "failed to allocate tx data dmamap\n");
3667 bus_dmamap_unload(txr->hn_tx_rndis_dtag,
3668 txd->rndis_pkt_dmap);
3669 bus_dmamem_free(txr->hn_tx_rndis_dtag,
3670 txd->rndis_pkt, txd->rndis_pkt_dmap);
3674 /* All set, put it to list */
3675 txd->flags |= HN_TXD_FLAG_ONLIST;
3676 #ifndef HN_USE_TXDESC_BUFRING
3677 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
3679 buf_ring_enqueue(txr->hn_txdesc_br, txd);
3682 txr->hn_txdesc_avail = txr->hn_txdesc_cnt;
3684 if (sc->hn_tx_sysctl_tree != NULL) {
3685 struct sysctl_oid_list *child;
3686 struct sysctl_ctx_list *ctx;
3690 * Create per TX ring sysctl tree:
3691 * dev.hn.UNIT.tx.RINGID
3693 ctx = device_get_sysctl_ctx(dev);
3694 child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree);
3696 snprintf(name, sizeof(name), "%d", id);
3697 txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
3698 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3700 if (txr->hn_tx_sysctl_tree != NULL) {
3701 child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree);
3704 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
3705 CTLFLAG_RD, &txr->hn_txdesc_avail, 0,
3706 "# of available TX descs");
3708 #ifdef HN_IFSTART_SUPPORT
3709 if (!hn_use_if_start)
3712 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive",
3713 CTLFLAG_RD, &txr->hn_oactive, 0,
3716 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets",
3717 CTLFLAG_RW, &txr->hn_pkts,
3718 "# of packets transmitted");
3719 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends",
3720 CTLFLAG_RW, &txr->hn_sends, "# of sends");
3728 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd)
3730 struct hn_tx_ring *txr = txd->txr;
3732 KASSERT(txd->m == NULL, ("still has mbuf installed"));
3733 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped"));
3735 bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap);
3736 bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt,
3737 txd->rndis_pkt_dmap);
3738 bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap);
3742 hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd)
3745 KASSERT(txd->refs == 0 || txd->refs == 1,
3746 ("invalid txd refs %d", txd->refs));
3748 /* Aggregated txds will be freed by their aggregating txd. */
3749 if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) {
3752 freed = hn_txdesc_put(txr, txd);
3753 KASSERT(freed, ("can't free txdesc"));
3758 hn_tx_ring_destroy(struct hn_tx_ring *txr)
3762 if (txr->hn_txdesc == NULL)
3767 * Because the freeing of aggregated txds will be deferred
3768 * to the aggregating txd, two passes are used here:
3769 * - The first pass GCes any pending txds. This GC is necessary,
3770 * since if the channels are revoked, hypervisor will not
3771 * deliver send-done for all pending txds.
3772 * - The second pass frees the busdma stuffs, i.e. after all txds
3775 for (i = 0; i < txr->hn_txdesc_cnt; ++i)
3776 hn_txdesc_gc(txr, &txr->hn_txdesc[i]);
3777 for (i = 0; i < txr->hn_txdesc_cnt; ++i)
3778 hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]);
3780 if (txr->hn_tx_data_dtag != NULL)
3781 bus_dma_tag_destroy(txr->hn_tx_data_dtag);
3782 if (txr->hn_tx_rndis_dtag != NULL)
3783 bus_dma_tag_destroy(txr->hn_tx_rndis_dtag);
3785 #ifdef HN_USE_TXDESC_BUFRING
3786 buf_ring_free(txr->hn_txdesc_br, M_DEVBUF);
3789 free(txr->hn_txdesc, M_DEVBUF);
3790 txr->hn_txdesc = NULL;
3792 if (txr->hn_mbuf_br != NULL)
3793 buf_ring_free(txr->hn_mbuf_br, M_DEVBUF);
3795 #ifndef HN_USE_TXDESC_BUFRING
3796 mtx_destroy(&txr->hn_txlist_spin);
3798 mtx_destroy(&txr->hn_tx_lock);
3802 hn_create_tx_data(struct hn_softc *sc, int ring_cnt)
3804 struct sysctl_oid_list *child;
3805 struct sysctl_ctx_list *ctx;
3809 * Create TXBUF for chimney sending.
3811 * NOTE: It is shared by all channels.
3813 sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev),
3814 PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma,
3815 BUS_DMA_WAITOK | BUS_DMA_ZERO);
3816 if (sc->hn_chim == NULL) {
3817 device_printf(sc->hn_dev, "allocate txbuf failed\n");
3821 sc->hn_tx_ring_cnt = ring_cnt;
3822 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
3824 sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt,
3825 M_DEVBUF, M_WAITOK | M_ZERO);
3827 ctx = device_get_sysctl_ctx(sc->hn_dev);
3828 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev));
3830 /* Create dev.hn.UNIT.tx sysctl tree */
3831 sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx",
3832 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3834 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
3837 error = hn_tx_ring_create(sc, i);
3842 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs",
3843 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3844 __offsetof(struct hn_tx_ring, hn_no_txdescs),
3845 hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs");
3846 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed",
3847 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3848 __offsetof(struct hn_tx_ring, hn_send_failed),
3849 hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure");
3850 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed",
3851 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3852 __offsetof(struct hn_tx_ring, hn_txdma_failed),
3853 hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
3854 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed",
3855 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3856 __offsetof(struct hn_tx_ring, hn_flush_failed),
3857 hn_tx_stat_ulong_sysctl, "LU",
3858 "# of packet transmission aggregation flush failure");
3859 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
3860 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3861 __offsetof(struct hn_tx_ring, hn_tx_collapsed),
3862 hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed");
3863 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney",
3864 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3865 __offsetof(struct hn_tx_ring, hn_tx_chimney),
3866 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send");
3867 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried",
3868 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3869 __offsetof(struct hn_tx_ring, hn_tx_chimney_tried),
3870 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries");
3871 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
3872 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0,
3873 "# of total TX descs");
3874 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
3875 CTLFLAG_RD, &sc->hn_chim_szmax, 0,
3876 "Chimney send packet size upper boundary");
3877 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
3878 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
3879 hn_chim_size_sysctl, "I", "Chimney send packet size limit");
3880 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size",
3881 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3882 __offsetof(struct hn_tx_ring, hn_direct_tx_size),
3883 hn_tx_conf_int_sysctl, "I",
3884 "Size of the packet for direct transmission");
3885 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx",
3886 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3887 __offsetof(struct hn_tx_ring, hn_sched_tx),
3888 hn_tx_conf_int_sysctl, "I",
3889 "Always schedule transmission "
3890 "instead of doing direct transmission");
3891 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt",
3892 CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings");
3893 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse",
3894 CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings");
3895 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax",
3896 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0,
3897 "Applied packet transmission aggregation size");
3898 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax",
3899 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
3900 hn_txagg_pktmax_sysctl, "I",
3901 "Applied packet transmission aggregation packets");
3902 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align",
3903 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
3904 hn_txagg_align_sysctl, "I",
3905 "Applied packet transmission aggregation alignment");
3911 hn_set_chim_size(struct hn_softc *sc, int chim_size)
3915 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
3916 sc->hn_tx_ring[i].hn_chim_size = chim_size;
3920 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu)
3922 struct ifnet *ifp = sc->hn_ifp;
3925 if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0)
3928 KASSERT(sc->hn_ndis_tso_sgmin >= 2,
3929 ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin));
3930 tso_minlen = sc->hn_ndis_tso_sgmin * mtu;
3932 KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen &&
3933 sc->hn_ndis_tso_szmax <= IP_MAXPACKET,
3934 ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax));
3936 if (tso_maxlen < tso_minlen)
3937 tso_maxlen = tso_minlen;
3938 else if (tso_maxlen > IP_MAXPACKET)
3939 tso_maxlen = IP_MAXPACKET;
3940 if (tso_maxlen > sc->hn_ndis_tso_szmax)
3941 tso_maxlen = sc->hn_ndis_tso_szmax;
3942 ifp->if_hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
3944 if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax);
3948 hn_fixup_tx_data(struct hn_softc *sc)
3950 uint64_t csum_assist;
3953 hn_set_chim_size(sc, sc->hn_chim_szmax);
3954 if (hn_tx_chimney_size > 0 &&
3955 hn_tx_chimney_size < sc->hn_chim_szmax)
3956 hn_set_chim_size(sc, hn_tx_chimney_size);
3959 if (sc->hn_caps & HN_CAP_IPCS)
3960 csum_assist |= CSUM_IP;
3961 if (sc->hn_caps & HN_CAP_TCP4CS)
3962 csum_assist |= CSUM_IP_TCP;
3963 if (sc->hn_caps & HN_CAP_UDP4CS)
3964 csum_assist |= CSUM_IP_UDP;
3965 if (sc->hn_caps & HN_CAP_TCP6CS)
3966 csum_assist |= CSUM_IP6_TCP;
3967 if (sc->hn_caps & HN_CAP_UDP6CS)
3968 csum_assist |= CSUM_IP6_UDP;
3969 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
3970 sc->hn_tx_ring[i].hn_csum_assist = csum_assist;
3972 if (sc->hn_caps & HN_CAP_HASHVAL) {
3974 * Support HASHVAL pktinfo on TX path.
3977 if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n");
3978 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
3979 sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL;
3984 hn_destroy_tx_data(struct hn_softc *sc)
3988 if (sc->hn_chim != NULL) {
3989 if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) {
3990 hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim);
3992 device_printf(sc->hn_dev,
3993 "chimney sending buffer is referenced");
3998 if (sc->hn_tx_ring_cnt == 0)
4001 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
4002 hn_tx_ring_destroy(&sc->hn_tx_ring[i]);
4004 free(sc->hn_tx_ring, M_DEVBUF);
4005 sc->hn_tx_ring = NULL;
4007 sc->hn_tx_ring_cnt = 0;
4008 sc->hn_tx_ring_inuse = 0;
4011 #ifdef HN_IFSTART_SUPPORT
4014 hn_start_taskfunc(void *xtxr, int pending __unused)
4016 struct hn_tx_ring *txr = xtxr;
4018 mtx_lock(&txr->hn_tx_lock);
4019 hn_start_locked(txr, 0);
4020 mtx_unlock(&txr->hn_tx_lock);
4024 hn_start_locked(struct hn_tx_ring *txr, int len)
4026 struct hn_softc *sc = txr->hn_sc;
4027 struct ifnet *ifp = sc->hn_ifp;
4030 KASSERT(hn_use_if_start,
4031 ("hn_start_locked is called, when if_start is disabled"));
4032 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
4033 mtx_assert(&txr->hn_tx_lock, MA_OWNED);
4034 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
4036 if (__predict_false(txr->hn_suspended))
4039 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4043 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
4044 struct hn_txdesc *txd;
4045 struct mbuf *m_head;
4048 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
4052 if (len > 0 && m_head->m_pkthdr.len > len) {
4054 * This sending could be time consuming; let callers
4055 * dispatch this packet sending (and sending of any
4056 * following up packets) to tx taskqueue.
4058 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
4063 #if defined(INET6) || defined(INET)
4064 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
4065 m_head = hn_tso_fixup(m_head);
4066 if (__predict_false(m_head == NULL)) {
4067 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
4073 txd = hn_txdesc_get(txr);
4075 txr->hn_no_txdescs++;
4076 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
4077 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
4081 error = hn_encap(ifp, txr, txd, &m_head);
4083 /* Both txd and m_head are freed */
4084 KASSERT(txr->hn_agg_txd == NULL,
4085 ("encap failed w/ pending aggregating txdesc"));
4089 if (txr->hn_agg_pktleft == 0) {
4090 if (txr->hn_agg_txd != NULL) {
4091 KASSERT(m_head == NULL,
4092 ("pending mbuf for aggregating txdesc"));
4093 error = hn_flush_txagg(ifp, txr);
4094 if (__predict_false(error)) {
4095 atomic_set_int(&ifp->if_drv_flags,
4100 KASSERT(m_head != NULL, ("mbuf was freed"));
4101 error = hn_txpkt(ifp, txr, txd);
4102 if (__predict_false(error)) {
4103 /* txd is freed, but m_head is not */
4104 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
4105 atomic_set_int(&ifp->if_drv_flags,
4113 KASSERT(txr->hn_agg_txd != NULL,
4114 ("no aggregating txdesc"));
4115 KASSERT(m_head == NULL,
4116 ("pending mbuf for aggregating txdesc"));
4121 /* Flush pending aggerated transmission. */
4122 if (txr->hn_agg_txd != NULL)
4123 hn_flush_txagg(ifp, txr);
4128 hn_start(struct ifnet *ifp)
4130 struct hn_softc *sc = ifp->if_softc;
4131 struct hn_tx_ring *txr = &sc->hn_tx_ring[0];
4133 if (txr->hn_sched_tx)
4136 if (mtx_trylock(&txr->hn_tx_lock)) {
4139 sched = hn_start_locked(txr, txr->hn_direct_tx_size);
4140 mtx_unlock(&txr->hn_tx_lock);
4145 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
4149 hn_start_txeof_taskfunc(void *xtxr, int pending __unused)
4151 struct hn_tx_ring *txr = xtxr;
4153 mtx_lock(&txr->hn_tx_lock);
4154 atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE);
4155 hn_start_locked(txr, 0);
4156 mtx_unlock(&txr->hn_tx_lock);
4160 hn_start_txeof(struct hn_tx_ring *txr)
4162 struct hn_softc *sc = txr->hn_sc;
4163 struct ifnet *ifp = sc->hn_ifp;
4165 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
4167 if (txr->hn_sched_tx)
4170 if (mtx_trylock(&txr->hn_tx_lock)) {
4173 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
4174 sched = hn_start_locked(txr, txr->hn_direct_tx_size);
4175 mtx_unlock(&txr->hn_tx_lock);
4177 taskqueue_enqueue(txr->hn_tx_taskq,
4183 * Release the OACTIVE earlier, with the hope, that
4184 * others could catch up. The task will clear the
4185 * flag again with the hn_tx_lock to avoid possible
4188 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
4189 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
4193 #endif /* HN_IFSTART_SUPPORT */
4196 hn_xmit(struct hn_tx_ring *txr, int len)
4198 struct hn_softc *sc = txr->hn_sc;
4199 struct ifnet *ifp = sc->hn_ifp;
4200 struct mbuf *m_head;
4203 mtx_assert(&txr->hn_tx_lock, MA_OWNED);
4204 #ifdef HN_IFSTART_SUPPORT
4205 KASSERT(hn_use_if_start == 0,
4206 ("hn_xmit is called, when if_start is enabled"));
4208 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
4210 if (__predict_false(txr->hn_suspended))
4213 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive)
4216 while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) {
4217 struct hn_txdesc *txd;
4220 if (len > 0 && m_head->m_pkthdr.len > len) {
4222 * This sending could be time consuming; let callers
4223 * dispatch this packet sending (and sending of any
4224 * following up packets) to tx taskqueue.
4226 drbr_putback(ifp, txr->hn_mbuf_br, m_head);
4231 txd = hn_txdesc_get(txr);
4233 txr->hn_no_txdescs++;
4234 drbr_putback(ifp, txr->hn_mbuf_br, m_head);
4235 txr->hn_oactive = 1;
4239 error = hn_encap(ifp, txr, txd, &m_head);
4241 /* Both txd and m_head are freed; discard */
4242 KASSERT(txr->hn_agg_txd == NULL,
4243 ("encap failed w/ pending aggregating txdesc"));
4244 drbr_advance(ifp, txr->hn_mbuf_br);
4248 if (txr->hn_agg_pktleft == 0) {
4249 if (txr->hn_agg_txd != NULL) {
4250 KASSERT(m_head == NULL,
4251 ("pending mbuf for aggregating txdesc"));
4252 error = hn_flush_txagg(ifp, txr);
4253 if (__predict_false(error)) {
4254 txr->hn_oactive = 1;
4258 KASSERT(m_head != NULL, ("mbuf was freed"));
4259 error = hn_txpkt(ifp, txr, txd);
4260 if (__predict_false(error)) {
4261 /* txd is freed, but m_head is not */
4262 drbr_putback(ifp, txr->hn_mbuf_br,
4264 txr->hn_oactive = 1;
4271 KASSERT(txr->hn_agg_txd != NULL,
4272 ("no aggregating txdesc"));
4273 KASSERT(m_head == NULL,
4274 ("pending mbuf for aggregating txdesc"));
4279 drbr_advance(ifp, txr->hn_mbuf_br);
4282 /* Flush pending aggerated transmission. */
4283 if (txr->hn_agg_txd != NULL)
4284 hn_flush_txagg(ifp, txr);
4289 hn_transmit(struct ifnet *ifp, struct mbuf *m)
4291 struct hn_softc *sc = ifp->if_softc;
4292 struct hn_tx_ring *txr;
4295 #if defined(INET6) || defined(INET)
4297 * Perform TSO packet header fixup now, since the TSO
4298 * packet header should be cache-hot.
4300 if (m->m_pkthdr.csum_flags & CSUM_TSO) {
4301 m = hn_tso_fixup(m);
4302 if (__predict_false(m == NULL)) {
4303 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
4310 * Select the TX ring based on flowid
4312 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
4313 idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse;
4314 txr = &sc->hn_tx_ring[idx];
4316 error = drbr_enqueue(ifp, txr->hn_mbuf_br, m);
4318 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
4322 if (txr->hn_oactive)
4325 if (txr->hn_sched_tx)
4328 if (mtx_trylock(&txr->hn_tx_lock)) {
4331 sched = hn_xmit(txr, txr->hn_direct_tx_size);
4332 mtx_unlock(&txr->hn_tx_lock);
4337 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
4342 hn_tx_ring_qflush(struct hn_tx_ring *txr)
4346 mtx_lock(&txr->hn_tx_lock);
4347 while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL)
4349 mtx_unlock(&txr->hn_tx_lock);
4353 hn_xmit_qflush(struct ifnet *ifp)
4355 struct hn_softc *sc = ifp->if_softc;
4358 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
4359 hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
4364 hn_xmit_txeof(struct hn_tx_ring *txr)
4367 if (txr->hn_sched_tx)
4370 if (mtx_trylock(&txr->hn_tx_lock)) {
4373 txr->hn_oactive = 0;
4374 sched = hn_xmit(txr, txr->hn_direct_tx_size);
4375 mtx_unlock(&txr->hn_tx_lock);
4377 taskqueue_enqueue(txr->hn_tx_taskq,
4383 * Release the oactive earlier, with the hope, that
4384 * others could catch up. The task will clear the
4385 * oactive again with the hn_tx_lock to avoid possible
4388 txr->hn_oactive = 0;
4389 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
4394 hn_xmit_taskfunc(void *xtxr, int pending __unused)
4396 struct hn_tx_ring *txr = xtxr;
4398 mtx_lock(&txr->hn_tx_lock);
4400 mtx_unlock(&txr->hn_tx_lock);
4404 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused)
4406 struct hn_tx_ring *txr = xtxr;
4408 mtx_lock(&txr->hn_tx_lock);
4409 txr->hn_oactive = 0;
4411 mtx_unlock(&txr->hn_tx_lock);
4415 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan)
4417 struct vmbus_chan_br cbr;
4418 struct hn_rx_ring *rxr;
4419 struct hn_tx_ring *txr = NULL;
4422 idx = vmbus_chan_subidx(chan);
4425 * Link this channel to RX/TX ring.
4427 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
4428 ("invalid channel index %d, should > 0 && < %d",
4429 idx, sc->hn_rx_ring_inuse));
4430 rxr = &sc->hn_rx_ring[idx];
4431 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0,
4432 ("RX ring %d already attached", idx));
4433 rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED;
4434 rxr->hn_chan = chan;
4437 if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n",
4438 idx, vmbus_chan_id(chan));
4441 if (idx < sc->hn_tx_ring_inuse) {
4442 txr = &sc->hn_tx_ring[idx];
4443 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0,
4444 ("TX ring %d already attached", idx));
4445 txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED;
4447 txr->hn_chan = chan;
4449 if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n",
4450 idx, vmbus_chan_id(chan));
4454 /* Bind this channel to a proper CPU. */
4455 vmbus_chan_cpu_set(chan, HN_RING_IDX2CPU(sc, idx));
4460 cbr.cbr = rxr->hn_br;
4461 cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr;
4462 cbr.cbr_txsz = HN_TXBR_SIZE;
4463 cbr.cbr_rxsz = HN_RXBR_SIZE;
4464 error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr);
4466 if (error == EISCONN) {
4467 if_printf(sc->hn_ifp, "bufring is connected after "
4468 "chan%u open failure\n", vmbus_chan_id(chan));
4469 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF;
4471 if_printf(sc->hn_ifp, "open chan%u failed: %d\n",
4472 vmbus_chan_id(chan), error);
4479 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan)
4481 struct hn_rx_ring *rxr;
4484 idx = vmbus_chan_subidx(chan);
4487 * Link this channel to RX/TX ring.
4489 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
4490 ("invalid channel index %d, should > 0 && < %d",
4491 idx, sc->hn_rx_ring_inuse));
4492 rxr = &sc->hn_rx_ring[idx];
4493 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED),
4494 ("RX ring %d is not attached", idx));
4495 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED;
4497 if (idx < sc->hn_tx_ring_inuse) {
4498 struct hn_tx_ring *txr = &sc->hn_tx_ring[idx];
4500 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED),
4501 ("TX ring %d is not attached attached", idx));
4502 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED;
4506 * Close this channel.
4509 * Channel closing does _not_ destroy the target channel.
4511 error = vmbus_chan_close_direct(chan);
4512 if (error == EISCONN) {
4513 if_printf(sc->hn_ifp, "chan%u bufring is connected "
4514 "after being closed\n", vmbus_chan_id(chan));
4515 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF;
4517 if_printf(sc->hn_ifp, "chan%u close failed: %d\n",
4518 vmbus_chan_id(chan), error);
4523 hn_attach_subchans(struct hn_softc *sc)
4525 struct vmbus_channel **subchans;
4526 int subchan_cnt = sc->hn_rx_ring_inuse - 1;
4529 KASSERT(subchan_cnt > 0, ("no sub-channels"));
4531 /* Attach the sub-channels. */
4532 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
4533 for (i = 0; i < subchan_cnt; ++i) {
4536 error1 = hn_chan_attach(sc, subchans[i]);
4539 /* Move on; all channels will be detached later. */
4542 vmbus_subchan_rel(subchans, subchan_cnt);
4545 if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error);
4548 if_printf(sc->hn_ifp, "%d sub-channels attached\n",
4556 hn_detach_allchans(struct hn_softc *sc)
4558 struct vmbus_channel **subchans;
4559 int subchan_cnt = sc->hn_rx_ring_inuse - 1;
4562 if (subchan_cnt == 0)
4565 /* Detach the sub-channels. */
4566 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
4567 for (i = 0; i < subchan_cnt; ++i)
4568 hn_chan_detach(sc, subchans[i]);
4569 vmbus_subchan_rel(subchans, subchan_cnt);
4573 * Detach the primary channel, _after_ all sub-channels
4576 hn_chan_detach(sc, sc->hn_prichan);
4578 /* Wait for sub-channels to be destroyed, if any. */
4579 vmbus_subchan_drain(sc->hn_prichan);
4582 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
4583 KASSERT((sc->hn_rx_ring[i].hn_rx_flags &
4584 HN_RX_FLAG_ATTACHED) == 0,
4585 ("%dth RX ring is still attached", i));
4587 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
4588 KASSERT((sc->hn_tx_ring[i].hn_tx_flags &
4589 HN_TX_FLAG_ATTACHED) == 0,
4590 ("%dth TX ring is still attached", i));
4596 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch)
4598 struct vmbus_channel **subchans;
4599 int nchan, rxr_cnt, error;
4601 nchan = *nsubch + 1;
4604 * Multiple RX/TX rings are not requested.
4611 * Query RSS capabilities, e.g. # of RX rings, and # of indirect
4614 error = hn_rndis_query_rsscaps(sc, &rxr_cnt);
4616 /* No RSS; this is benign. */
4621 if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n",
4625 if (nchan > rxr_cnt)
4628 if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n");
4634 * Allocate sub-channels from NVS.
4636 *nsubch = nchan - 1;
4637 error = hn_nvs_alloc_subchans(sc, nsubch);
4638 if (error || *nsubch == 0) {
4639 /* Failed to allocate sub-channels. */
4645 * Wait for all sub-channels to become ready before moving on.
4647 subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch);
4648 vmbus_subchan_rel(subchans, *nsubch);
4653 hn_synth_attachable(const struct hn_softc *sc)
4657 if (sc->hn_flags & HN_FLAG_ERRORS)
4660 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
4661 const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
4663 if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF)
4670 hn_synth_attach(struct hn_softc *sc, int mtu)
4672 #define ATTACHED_NVS 0x0002
4673 #define ATTACHED_RNDIS 0x0004
4675 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
4676 int error, nsubch, nchan, i;
4677 uint32_t old_caps, attached = 0;
4679 KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0,
4680 ("synthetic parts were attached"));
4682 if (!hn_synth_attachable(sc))
4685 /* Save capabilities for later verification. */
4686 old_caps = sc->hn_caps;
4689 /* Clear RSS stuffs. */
4690 sc->hn_rss_ind_size = 0;
4691 sc->hn_rss_hash = 0;
4694 * Attach the primary channel _before_ attaching NVS and RNDIS.
4696 error = hn_chan_attach(sc, sc->hn_prichan);
4703 error = hn_nvs_attach(sc, mtu);
4706 attached |= ATTACHED_NVS;
4709 * Attach RNDIS _after_ NVS is attached.
4711 error = hn_rndis_attach(sc, mtu);
4714 attached |= ATTACHED_RNDIS;
4717 * Make sure capabilities are not changed.
4719 if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) {
4720 if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n",
4721 old_caps, sc->hn_caps);
4727 * Allocate sub-channels for multi-TX/RX rings.
4730 * The # of RX rings that can be used is equivalent to the # of
4731 * channels to be requested.
4733 nsubch = sc->hn_rx_ring_cnt - 1;
4734 error = hn_synth_alloc_subchans(sc, &nsubch);
4737 /* NOTE: _Full_ synthetic parts detach is required now. */
4738 sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED;
4741 * Set the # of TX/RX rings that could be used according to
4742 * the # of channels that NVS offered.
4745 hn_set_ring_inuse(sc, nchan);
4747 /* Only the primary channel can be used; done */
4752 * Attach the sub-channels.
4754 * NOTE: hn_set_ring_inuse() _must_ have been called.
4756 error = hn_attach_subchans(sc);
4761 * Configure RSS key and indirect table _after_ all sub-channels
4764 if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) {
4766 * RSS key is not set yet; set it to the default RSS key.
4769 if_printf(sc->hn_ifp, "setup default RSS key\n");
4770 memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key));
4771 sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
4774 if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) {
4776 * RSS indirect table is not set yet; set it up in round-
4780 if_printf(sc->hn_ifp, "setup default RSS indirect "
4783 for (i = 0; i < NDIS_HASH_INDCNT; ++i)
4784 rss->rss_ind[i] = i % nchan;
4785 sc->hn_flags |= HN_FLAG_HAS_RSSIND;
4788 * # of usable channels may be changed, so we have to
4789 * make sure that all entries in RSS indirect table
4792 * NOTE: hn_set_ring_inuse() _must_ have been called.
4794 hn_rss_ind_fixup(sc);
4797 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
4802 * Fixup transmission aggregation setup.
4808 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
4809 hn_synth_detach(sc);
4811 if (attached & ATTACHED_RNDIS)
4812 hn_rndis_detach(sc);
4813 if (attached & ATTACHED_NVS)
4815 hn_chan_detach(sc, sc->hn_prichan);
4816 /* Restore old capabilities. */
4817 sc->hn_caps = old_caps;
4821 #undef ATTACHED_RNDIS
4827 * The interface must have been suspended though hn_suspend(), before
4828 * this function get called.
4831 hn_synth_detach(struct hn_softc *sc)
4834 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
4835 ("synthetic parts were not attached"));
4837 /* Detach the RNDIS first. */
4838 hn_rndis_detach(sc);
4843 /* Detach all of the channels. */
4844 hn_detach_allchans(sc);
4846 sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED;
4850 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt)
4852 KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt,
4853 ("invalid ring count %d", ring_cnt));
4855 if (sc->hn_tx_ring_cnt > ring_cnt)
4856 sc->hn_tx_ring_inuse = ring_cnt;
4858 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
4859 sc->hn_rx_ring_inuse = ring_cnt;
4862 if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n",
4863 sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse);
4868 hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan)
4873 * The TX bufring will not be drained by the hypervisor,
4874 * if the primary channel is revoked.
4876 while (!vmbus_chan_rx_empty(chan) ||
4877 (!vmbus_chan_is_revoked(sc->hn_prichan) &&
4878 !vmbus_chan_tx_empty(chan)))
4880 vmbus_chan_intr_drain(chan);
4884 hn_suspend_data(struct hn_softc *sc)
4886 struct vmbus_channel **subch = NULL;
4887 struct hn_tx_ring *txr;
4895 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
4896 txr = &sc->hn_tx_ring[i];
4898 mtx_lock(&txr->hn_tx_lock);
4899 txr->hn_suspended = 1;
4900 mtx_unlock(&txr->hn_tx_lock);
4901 /* No one is able send more packets now. */
4904 * Wait for all pending sends to finish.
4907 * We will _not_ receive all pending send-done, if the
4908 * primary channel is revoked.
4910 while (hn_tx_ring_pending(txr) &&
4911 !vmbus_chan_is_revoked(sc->hn_prichan))
4912 pause("hnwtx", 1 /* 1 tick */);
4916 * Disable RX by clearing RX filter.
4918 hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE);
4921 * Give RNDIS enough time to flush all pending data packets.
4923 pause("waitrx", (200 * hz) / 1000);
4926 * Drain RX/TX bufrings and interrupts.
4928 nsubch = sc->hn_rx_ring_inuse - 1;
4930 subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
4932 if (subch != NULL) {
4933 for (i = 0; i < nsubch; ++i)
4934 hn_chan_drain(sc, subch[i]);
4936 hn_chan_drain(sc, sc->hn_prichan);
4939 vmbus_subchan_rel(subch, nsubch);
4942 * Drain any pending TX tasks.
4945 * The above hn_chan_drain() can dispatch TX tasks, so the TX
4946 * tasks will have to be drained _after_ the above hn_chan_drain()
4949 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
4950 txr = &sc->hn_tx_ring[i];
4952 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task);
4953 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task);
4958 hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused)
4961 ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL;
4965 hn_suspend_mgmt(struct hn_softc *sc)
4972 * Make sure that hn_mgmt_taskq0 can nolonger be accessed
4973 * through hn_mgmt_taskq.
4975 TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc);
4976 vmbus_chan_run_task(sc->hn_prichan, &task);
4979 * Make sure that all pending management tasks are completed.
4981 taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init);
4982 taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status);
4983 taskqueue_drain_all(sc->hn_mgmt_taskq0);
4987 hn_suspend(struct hn_softc *sc)
4990 /* Disable polling. */
4993 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
4994 (sc->hn_flags & HN_FLAG_VF))
4995 hn_suspend_data(sc);
4996 hn_suspend_mgmt(sc);
5000 hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt)
5004 KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt,
5005 ("invalid TX ring count %d", tx_ring_cnt));
5007 for (i = 0; i < tx_ring_cnt; ++i) {
5008 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
5010 mtx_lock(&txr->hn_tx_lock);
5011 txr->hn_suspended = 0;
5012 mtx_unlock(&txr->hn_tx_lock);
5017 hn_resume_data(struct hn_softc *sc)
5026 hn_rxfilter_config(sc);
5029 * Make sure to clear suspend status on "all" TX rings,
5030 * since hn_tx_ring_inuse can be changed after
5031 * hn_suspend_data().
5033 hn_resume_tx(sc, sc->hn_tx_ring_cnt);
5035 #ifdef HN_IFSTART_SUPPORT
5036 if (!hn_use_if_start)
5040 * Flush unused drbrs, since hn_tx_ring_inuse may be
5043 for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i)
5044 hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
5050 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
5051 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
5054 * Use txeof task, so that any pending oactive can be
5057 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
5062 hn_resume_mgmt(struct hn_softc *sc)
5065 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
5068 * Kick off network change detection, if it was pending.
5069 * If no network change was pending, start link status
5070 * checks, which is more lightweight than network change
5073 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
5074 hn_change_network(sc);
5076 hn_update_link_status(sc);
5080 hn_resume(struct hn_softc *sc)
5083 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
5084 (sc->hn_flags & HN_FLAG_VF))
5088 * When the VF is activated, the synthetic interface is changed
5089 * to DOWN in hn_set_vf(). Here, if the VF is still active, we
5090 * don't call hn_resume_mgmt() until the VF is deactivated in
5093 if (!(sc->hn_flags & HN_FLAG_VF))
5097 * Re-enable polling if this interface is running and
5098 * the polling is requested.
5100 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && sc->hn_pollhz > 0)
5101 hn_polling(sc, sc->hn_pollhz);
5105 hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen)
5107 const struct rndis_status_msg *msg;
5110 if (dlen < sizeof(*msg)) {
5111 if_printf(sc->hn_ifp, "invalid RNDIS status\n");
5116 switch (msg->rm_status) {
5117 case RNDIS_STATUS_MEDIA_CONNECT:
5118 case RNDIS_STATUS_MEDIA_DISCONNECT:
5119 hn_update_link_status(sc);
5122 case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG:
5123 /* Not really useful; ignore. */
5126 case RNDIS_STATUS_NETWORK_CHANGE:
5127 ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset);
5128 if (dlen < ofs + msg->rm_stbuflen ||
5129 msg->rm_stbuflen < sizeof(uint32_t)) {
5130 if_printf(sc->hn_ifp, "network changed\n");
5134 memcpy(&change, ((const uint8_t *)msg) + ofs,
5136 if_printf(sc->hn_ifp, "network changed, change %u\n",
5139 hn_change_network(sc);
5143 if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n",
5150 hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info)
5152 const struct rndis_pktinfo *pi = info_data;
5155 while (info_dlen != 0) {
5159 if (__predict_false(info_dlen < sizeof(*pi)))
5161 if (__predict_false(info_dlen < pi->rm_size))
5163 info_dlen -= pi->rm_size;
5165 if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK))
5167 if (__predict_false(pi->rm_size < pi->rm_pktinfooffset))
5169 dlen = pi->rm_size - pi->rm_pktinfooffset;
5172 switch (pi->rm_type) {
5173 case NDIS_PKTINFO_TYPE_VLAN:
5174 if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE))
5176 info->vlan_info = *((const uint32_t *)data);
5177 mask |= HN_RXINFO_VLAN;
5180 case NDIS_PKTINFO_TYPE_CSUM:
5181 if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE))
5183 info->csum_info = *((const uint32_t *)data);
5184 mask |= HN_RXINFO_CSUM;
5187 case HN_NDIS_PKTINFO_TYPE_HASHVAL:
5188 if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE))
5190 info->hash_value = *((const uint32_t *)data);
5191 mask |= HN_RXINFO_HASHVAL;
5194 case HN_NDIS_PKTINFO_TYPE_HASHINF:
5195 if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE))
5197 info->hash_info = *((const uint32_t *)data);
5198 mask |= HN_RXINFO_HASHINF;
5205 if (mask == HN_RXINFO_ALL) {
5206 /* All found; done */
5210 pi = (const struct rndis_pktinfo *)
5211 ((const uint8_t *)pi + pi->rm_size);
5216 * - If there is no hash value, invalidate the hash info.
5218 if ((mask & HN_RXINFO_HASHVAL) == 0)
5219 info->hash_info = HN_NDIS_HASH_INFO_INVALID;
5223 static __inline bool
5224 hn_rndis_check_overlap(int off, int len, int check_off, int check_len)
5227 if (off < check_off) {
5228 if (__predict_true(off + len <= check_off))
5230 } else if (off > check_off) {
5231 if (__predict_true(check_off + check_len <= off))
5238 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen)
5240 const struct rndis_packet_msg *pkt;
5241 struct hn_rxinfo info;
5242 int data_off, pktinfo_off, data_len, pktinfo_len;
5247 if (__predict_false(dlen < sizeof(*pkt))) {
5248 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n");
5253 if (__predict_false(dlen < pkt->rm_len)) {
5254 if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, "
5255 "dlen %d, msglen %u\n", dlen, pkt->rm_len);
5258 if (__predict_false(pkt->rm_len <
5259 pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) {
5260 if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, "
5261 "msglen %u, data %u, oob %u, pktinfo %u\n",
5262 pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen,
5263 pkt->rm_pktinfolen);
5266 if (__predict_false(pkt->rm_datalen == 0)) {
5267 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n");
5274 #define IS_OFFSET_INVALID(ofs) \
5275 ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \
5276 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK))
5278 /* XXX Hyper-V does not meet data offset alignment requirement */
5279 if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) {
5280 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5281 "data offset %u\n", pkt->rm_dataoffset);
5284 if (__predict_false(pkt->rm_oobdataoffset > 0 &&
5285 IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) {
5286 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5287 "oob offset %u\n", pkt->rm_oobdataoffset);
5290 if (__predict_true(pkt->rm_pktinfooffset > 0) &&
5291 __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) {
5292 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5293 "pktinfo offset %u\n", pkt->rm_pktinfooffset);
5297 #undef IS_OFFSET_INVALID
5299 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset);
5300 data_len = pkt->rm_datalen;
5301 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset);
5302 pktinfo_len = pkt->rm_pktinfolen;
5305 * Check OOB coverage.
5307 if (__predict_false(pkt->rm_oobdatalen != 0)) {
5308 int oob_off, oob_len;
5310 if_printf(rxr->hn_ifp, "got oobdata\n");
5311 oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset);
5312 oob_len = pkt->rm_oobdatalen;
5314 if (__predict_false(oob_off + oob_len > pkt->rm_len)) {
5315 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5316 "oob overflow, msglen %u, oob abs %d len %d\n",
5317 pkt->rm_len, oob_off, oob_len);
5322 * Check against data.
5324 if (hn_rndis_check_overlap(oob_off, oob_len,
5325 data_off, data_len)) {
5326 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5327 "oob overlaps data, oob abs %d len %d, "
5328 "data abs %d len %d\n",
5329 oob_off, oob_len, data_off, data_len);
5334 * Check against pktinfo.
5336 if (pktinfo_len != 0 &&
5337 hn_rndis_check_overlap(oob_off, oob_len,
5338 pktinfo_off, pktinfo_len)) {
5339 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5340 "oob overlaps pktinfo, oob abs %d len %d, "
5341 "pktinfo abs %d len %d\n",
5342 oob_off, oob_len, pktinfo_off, pktinfo_len);
5348 * Check per-packet-info coverage and find useful per-packet-info.
5350 info.vlan_info = HN_NDIS_VLAN_INFO_INVALID;
5351 info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID;
5352 info.hash_info = HN_NDIS_HASH_INFO_INVALID;
5353 if (__predict_true(pktinfo_len != 0)) {
5357 if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) {
5358 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5359 "pktinfo overflow, msglen %u, "
5360 "pktinfo abs %d len %d\n",
5361 pkt->rm_len, pktinfo_off, pktinfo_len);
5366 * Check packet info coverage.
5368 overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len,
5369 data_off, data_len);
5370 if (__predict_false(overlap)) {
5371 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5372 "pktinfo overlap data, pktinfo abs %d len %d, "
5373 "data abs %d len %d\n",
5374 pktinfo_off, pktinfo_len, data_off, data_len);
5379 * Find useful per-packet-info.
5381 error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off,
5382 pktinfo_len, &info);
5383 if (__predict_false(error)) {
5384 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg "
5390 if (__predict_false(data_off + data_len > pkt->rm_len)) {
5391 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5392 "data overflow, msglen %u, data abs %d len %d\n",
5393 pkt->rm_len, data_off, data_len);
5396 hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info);
5399 static __inline void
5400 hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen)
5402 const struct rndis_msghdr *hdr;
5404 if (__predict_false(dlen < sizeof(*hdr))) {
5405 if_printf(rxr->hn_ifp, "invalid RNDIS msg\n");
5410 if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) {
5411 /* Hot data path. */
5412 hn_rndis_rx_data(rxr, data, dlen);
5417 if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG)
5418 hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen);
5420 hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen);
5424 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt)
5426 const struct hn_nvs_hdr *hdr;
5428 if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) {
5429 if_printf(sc->hn_ifp, "invalid nvs notify\n");
5432 hdr = VMBUS_CHANPKT_CONST_DATA(pkt);
5434 if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) {
5435 /* Useless; ignore */
5438 if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type);
5442 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan,
5443 const struct vmbus_chanpkt_hdr *pkt)
5445 struct hn_nvs_sendctx *sndc;
5447 sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid;
5448 sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt),
5449 VMBUS_CHANPKT_DATALEN(pkt));
5452 * 'sndc' CAN NOT be accessed anymore, since it can be freed by
5458 hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
5459 const struct vmbus_chanpkt_hdr *pkthdr)
5461 const struct vmbus_chanpkt_rxbuf *pkt;
5462 const struct hn_nvs_hdr *nvs_hdr;
5465 if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) {
5466 if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n");
5469 nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr);
5471 /* Make sure that this is a RNDIS message. */
5472 if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) {
5473 if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n",
5478 hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen);
5479 if (__predict_false(hlen < sizeof(*pkt))) {
5480 if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n");
5483 pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr;
5485 if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) {
5486 if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n",
5491 count = pkt->cp_rxbuf_cnt;
5492 if (__predict_false(hlen <
5493 __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) {
5494 if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count);
5498 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
5499 for (i = 0; i < count; ++i) {
5502 ofs = pkt->cp_rxbuf[i].rb_ofs;
5503 len = pkt->cp_rxbuf[i].rb_len;
5504 if (__predict_false(ofs + len > HN_RXBUF_SIZE)) {
5505 if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, "
5506 "ofs %d, len %d\n", i, ofs, len);
5509 hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len);
5513 * Ack the consumed RXBUF associated w/ this channel packet,
5514 * so that this RXBUF can be recycled by the hypervisor.
5516 hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid);
5520 hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
5523 struct hn_nvs_rndis_ack ack;
5526 ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK;
5527 ack.nvs_status = HN_NVS_STATUS_OK;
5531 error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP,
5532 VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid);
5533 if (__predict_false(error == EAGAIN)) {
5536 * This should _not_ happen in real world, since the
5537 * consumption of the TX bufring from the TX path is
5540 if (rxr->hn_ack_failed == 0)
5541 if_printf(rxr->hn_ifp, "RXBUF ack retry\n");
5542 rxr->hn_ack_failed++;
5549 if_printf(rxr->hn_ifp, "RXBUF ack failed\n");
5554 hn_chan_callback(struct vmbus_channel *chan, void *xrxr)
5556 struct hn_rx_ring *rxr = xrxr;
5557 struct hn_softc *sc = rxr->hn_ifp->if_softc;
5560 struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf;
5563 pktlen = rxr->hn_pktbuf_len;
5564 error = vmbus_chan_recv_pkt(chan, pkt, &pktlen);
5565 if (__predict_false(error == ENOBUFS)) {
5570 * Expand channel packet buffer.
5573 * Use M_WAITOK here, since allocation failure
5576 nlen = rxr->hn_pktbuf_len * 2;
5577 while (nlen < pktlen)
5579 nbuf = malloc(nlen, M_DEVBUF, M_WAITOK);
5581 if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n",
5582 rxr->hn_pktbuf_len, nlen);
5584 free(rxr->hn_pktbuf, M_DEVBUF);
5585 rxr->hn_pktbuf = nbuf;
5586 rxr->hn_pktbuf_len = nlen;
5589 } else if (__predict_false(error == EAGAIN)) {
5590 /* No more channel packets; done! */
5593 KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error));
5595 switch (pkt->cph_type) {
5596 case VMBUS_CHANPKT_TYPE_COMP:
5597 hn_nvs_handle_comp(sc, chan, pkt);
5600 case VMBUS_CHANPKT_TYPE_RXBUF:
5601 hn_nvs_handle_rxbuf(rxr, chan, pkt);
5604 case VMBUS_CHANPKT_TYPE_INBAND:
5605 hn_nvs_handle_notify(sc, pkt);
5609 if_printf(rxr->hn_ifp, "unknown chan pkt %u\n",
5614 hn_chan_rollup(rxr, rxr->hn_txr);
5618 hn_tx_taskq_create(void *arg __unused)
5623 * Fix the # of TX taskqueues.
5625 if (hn_tx_taskq_cnt <= 0)
5626 hn_tx_taskq_cnt = 1;
5627 else if (hn_tx_taskq_cnt > mp_ncpus)
5628 hn_tx_taskq_cnt = mp_ncpus;
5631 * Fix the TX taskqueue mode.
5633 switch (hn_tx_taskq_mode) {
5634 case HN_TX_TASKQ_M_INDEP:
5635 case HN_TX_TASKQ_M_GLOBAL:
5636 case HN_TX_TASKQ_M_EVTTQ:
5639 hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP;
5643 if (vm_guest != VM_GUEST_HV)
5646 if (hn_tx_taskq_mode != HN_TX_TASKQ_M_GLOBAL)
5649 hn_tx_taskque = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *),
5650 M_DEVBUF, M_WAITOK);
5651 for (i = 0; i < hn_tx_taskq_cnt; ++i) {
5652 hn_tx_taskque[i] = taskqueue_create("hn_tx", M_WAITOK,
5653 taskqueue_thread_enqueue, &hn_tx_taskque[i]);
5654 taskqueue_start_threads(&hn_tx_taskque[i], 1, PI_NET,
5658 SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_SECOND,
5659 hn_tx_taskq_create, NULL);
5662 hn_tx_taskq_destroy(void *arg __unused)
5665 if (hn_tx_taskque != NULL) {
5668 for (i = 0; i < hn_tx_taskq_cnt; ++i)
5669 taskqueue_free(hn_tx_taskque[i]);
5670 free(hn_tx_taskque, M_DEVBUF);
5673 SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_SECOND,
5674 hn_tx_taskq_destroy, NULL);