2 * Copyright (c) 2010-2012 Citrix Inc.
3 * Copyright (c) 2009-2012,2016 Microsoft Corp.
4 * Copyright (c) 2012 NetApp Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * Copyright (c) 2004-2006 Kip Macy
31 * All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 #include <sys/cdefs.h>
56 __FBSDID("$FreeBSD$");
58 #include "opt_inet6.h"
62 #include <sys/param.h>
64 #include <sys/kernel.h>
65 #include <sys/limits.h>
66 #include <sys/malloc.h>
68 #include <sys/module.h>
70 #include <sys/queue.h>
73 #include <sys/socket.h>
74 #include <sys/sockio.h>
76 #include <sys/sysctl.h>
77 #include <sys/systm.h>
78 #include <sys/taskqueue.h>
79 #include <sys/buf_ring.h>
80 #include <sys/eventhandler.h>
82 #include <machine/atomic.h>
83 #include <machine/in_cksum.h>
86 #include <net/ethernet.h>
88 #include <net/if_arp.h>
89 #include <net/if_dl.h>
90 #include <net/if_media.h>
91 #include <net/if_types.h>
92 #include <net/if_var.h>
93 #include <net/if_vlan_var.h>
94 #include <net/rndis.h>
96 #include <netinet/in_systm.h>
97 #include <netinet/in.h>
98 #include <netinet/ip.h>
99 #include <netinet/ip6.h>
100 #include <netinet/tcp.h>
101 #include <netinet/tcp_lro.h>
102 #include <netinet/udp.h>
104 #include <dev/hyperv/include/hyperv.h>
105 #include <dev/hyperv/include/hyperv_busdma.h>
106 #include <dev/hyperv/include/vmbus.h>
107 #include <dev/hyperv/include/vmbus_xact.h>
109 #include <dev/hyperv/netvsc/ndis.h>
110 #include <dev/hyperv/netvsc/if_hnreg.h>
111 #include <dev/hyperv/netvsc/if_hnvar.h>
112 #include <dev/hyperv/netvsc/hn_nvs.h>
113 #include <dev/hyperv/netvsc/hn_rndis.h>
115 #include "vmbus_if.h"
117 #define HN_IFSTART_SUPPORT
119 #define HN_RING_CNT_DEF_MAX 8
121 /* YYY should get it from the underlying channel */
122 #define HN_TX_DESC_CNT 512
124 #define HN_RNDIS_PKT_LEN \
125 (sizeof(struct rndis_packet_msg) + \
126 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \
127 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \
128 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \
129 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE))
130 #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE
131 #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE
133 #define HN_TX_DATA_BOUNDARY PAGE_SIZE
134 #define HN_TX_DATA_MAXSIZE IP_MAXPACKET
135 #define HN_TX_DATA_SEGSIZE PAGE_SIZE
136 /* -1 for RNDIS packet message */
137 #define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1)
139 #define HN_DIRECT_TX_SIZE_DEF 128
141 #define HN_EARLY_TXEOF_THRESH 8
143 #define HN_PKTBUF_LEN_DEF (16 * 1024)
145 #define HN_LROENT_CNT_DEF 128
147 #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU)
148 #define HN_LRO_LENLIM_DEF (25 * ETHERMTU)
149 /* YYY 2*MTU is a bit rough, but should be good enough. */
150 #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu)
152 #define HN_LRO_ACKCNT_DEF 1
154 #define HN_LOCK_INIT(sc) \
155 sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev))
156 #define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock)
157 #define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED)
158 #define HN_LOCK(sc) \
160 while (sx_try_xlock(&(sc)->hn_lock) == 0) \
163 #define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock)
165 #define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP)
166 #define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP)
167 #define HN_CSUM_IP_HWASSIST(sc) \
168 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK)
169 #define HN_CSUM_IP6_HWASSIST(sc) \
170 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK)
172 #define HN_PKTSIZE_MIN(align) \
173 roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \
174 HN_RNDIS_PKT_LEN, (align))
175 #define HN_PKTSIZE(m, align) \
176 roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align))
178 #define HN_RING_IDX2CPU(sc, idx) (((sc)->hn_cpu + (idx)) % mp_ncpus)
181 #ifndef HN_USE_TXDESC_BUFRING
182 SLIST_ENTRY(hn_txdesc) link;
184 STAILQ_ENTRY(hn_txdesc) agg_link;
186 /* Aggregated txdescs, in sending order. */
187 STAILQ_HEAD(, hn_txdesc) agg_list;
189 /* The oldest packet, if transmission aggregation happens. */
191 struct hn_tx_ring *txr;
193 uint32_t flags; /* HN_TXD_FLAG_ */
194 struct hn_nvs_sendctx send_ctx;
198 bus_dmamap_t data_dmap;
200 bus_addr_t rndis_pkt_paddr;
201 struct rndis_packet_msg *rndis_pkt;
202 bus_dmamap_t rndis_pkt_dmap;
205 #define HN_TXD_FLAG_ONLIST 0x0001
206 #define HN_TXD_FLAG_DMAMAP 0x0002
207 #define HN_TXD_FLAG_ONAGG 0x0004
216 struct hn_update_vf {
217 struct hn_rx_ring *rxr;
221 #define HN_RXINFO_VLAN 0x0001
222 #define HN_RXINFO_CSUM 0x0002
223 #define HN_RXINFO_HASHINF 0x0004
224 #define HN_RXINFO_HASHVAL 0x0008
225 #define HN_RXINFO_ALL \
228 HN_RXINFO_HASHINF | \
231 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff
232 #define HN_NDIS_RXCSUM_INFO_INVALID 0
233 #define HN_NDIS_HASH_INFO_INVALID 0
235 static int hn_probe(device_t);
236 static int hn_attach(device_t);
237 static int hn_detach(device_t);
238 static int hn_shutdown(device_t);
239 static void hn_chan_callback(struct vmbus_channel *,
242 static void hn_init(void *);
243 static int hn_ioctl(struct ifnet *, u_long, caddr_t);
244 #ifdef HN_IFSTART_SUPPORT
245 static void hn_start(struct ifnet *);
247 static int hn_transmit(struct ifnet *, struct mbuf *);
248 static void hn_xmit_qflush(struct ifnet *);
249 static int hn_ifmedia_upd(struct ifnet *);
250 static void hn_ifmedia_sts(struct ifnet *,
251 struct ifmediareq *);
253 static int hn_rndis_rxinfo(const void *, int,
255 static void hn_rndis_rx_data(struct hn_rx_ring *,
257 static void hn_rndis_rx_status(struct hn_softc *,
260 static void hn_nvs_handle_notify(struct hn_softc *,
261 const struct vmbus_chanpkt_hdr *);
262 static void hn_nvs_handle_comp(struct hn_softc *,
263 struct vmbus_channel *,
264 const struct vmbus_chanpkt_hdr *);
265 static void hn_nvs_handle_rxbuf(struct hn_rx_ring *,
266 struct vmbus_channel *,
267 const struct vmbus_chanpkt_hdr *);
268 static void hn_nvs_ack_rxbuf(struct hn_rx_ring *,
269 struct vmbus_channel *, uint64_t);
271 #if __FreeBSD_version >= 1100099
272 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS);
273 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS);
275 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS);
276 static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS);
277 #if __FreeBSD_version < 1100095
278 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS);
280 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS);
282 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
283 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
284 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS);
285 static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS);
286 static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS);
287 static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS);
288 static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS);
289 static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS);
290 static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS);
291 static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS);
292 static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS);
293 static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS);
294 static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS);
295 static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS);
296 static int hn_polling_sysctl(SYSCTL_HANDLER_ARGS);
297 static int hn_vf_sysctl(SYSCTL_HANDLER_ARGS);
299 static void hn_stop(struct hn_softc *, bool);
300 static void hn_init_locked(struct hn_softc *);
301 static int hn_chan_attach(struct hn_softc *,
302 struct vmbus_channel *);
303 static void hn_chan_detach(struct hn_softc *,
304 struct vmbus_channel *);
305 static int hn_attach_subchans(struct hn_softc *);
306 static void hn_detach_allchans(struct hn_softc *);
307 static void hn_chan_rollup(struct hn_rx_ring *,
308 struct hn_tx_ring *);
309 static void hn_set_ring_inuse(struct hn_softc *, int);
310 static int hn_synth_attach(struct hn_softc *, int);
311 static void hn_synth_detach(struct hn_softc *);
312 static int hn_synth_alloc_subchans(struct hn_softc *,
314 static bool hn_synth_attachable(const struct hn_softc *);
315 static void hn_suspend(struct hn_softc *);
316 static void hn_suspend_data(struct hn_softc *);
317 static void hn_suspend_mgmt(struct hn_softc *);
318 static void hn_resume(struct hn_softc *);
319 static void hn_resume_data(struct hn_softc *);
320 static void hn_resume_mgmt(struct hn_softc *);
321 static void hn_suspend_mgmt_taskfunc(void *, int);
322 static void hn_chan_drain(struct hn_softc *,
323 struct vmbus_channel *);
324 static void hn_polling(struct hn_softc *, u_int);
325 static void hn_chan_polling(struct vmbus_channel *, u_int);
327 static void hn_update_link_status(struct hn_softc *);
328 static void hn_change_network(struct hn_softc *);
329 static void hn_link_taskfunc(void *, int);
330 static void hn_netchg_init_taskfunc(void *, int);
331 static void hn_netchg_status_taskfunc(void *, int);
332 static void hn_link_status(struct hn_softc *);
334 static int hn_create_rx_data(struct hn_softc *, int);
335 static void hn_destroy_rx_data(struct hn_softc *);
336 static int hn_check_iplen(const struct mbuf *, int);
337 static int hn_set_rxfilter(struct hn_softc *, uint32_t);
338 static int hn_rxfilter_config(struct hn_softc *);
339 static int hn_rss_reconfig(struct hn_softc *);
340 static void hn_rss_ind_fixup(struct hn_softc *);
341 static int hn_rxpkt(struct hn_rx_ring *, const void *,
342 int, const struct hn_rxinfo *);
344 static int hn_tx_ring_create(struct hn_softc *, int);
345 static void hn_tx_ring_destroy(struct hn_tx_ring *);
346 static int hn_create_tx_data(struct hn_softc *, int);
347 static void hn_fixup_tx_data(struct hn_softc *);
348 static void hn_destroy_tx_data(struct hn_softc *);
349 static void hn_txdesc_dmamap_destroy(struct hn_txdesc *);
350 static void hn_txdesc_gc(struct hn_tx_ring *,
352 static int hn_encap(struct ifnet *, struct hn_tx_ring *,
353 struct hn_txdesc *, struct mbuf **);
354 static int hn_txpkt(struct ifnet *, struct hn_tx_ring *,
356 static void hn_set_chim_size(struct hn_softc *, int);
357 static void hn_set_tso_maxsize(struct hn_softc *, int, int);
358 static bool hn_tx_ring_pending(struct hn_tx_ring *);
359 static void hn_tx_ring_qflush(struct hn_tx_ring *);
360 static void hn_resume_tx(struct hn_softc *, int);
361 static void hn_set_txagg(struct hn_softc *);
362 static void *hn_try_txagg(struct ifnet *,
363 struct hn_tx_ring *, struct hn_txdesc *,
365 static int hn_get_txswq_depth(const struct hn_tx_ring *);
366 static void hn_txpkt_done(struct hn_nvs_sendctx *,
367 struct hn_softc *, struct vmbus_channel *,
369 static int hn_txpkt_sglist(struct hn_tx_ring *,
371 static int hn_txpkt_chim(struct hn_tx_ring *,
373 static int hn_xmit(struct hn_tx_ring *, int);
374 static void hn_xmit_taskfunc(void *, int);
375 static void hn_xmit_txeof(struct hn_tx_ring *);
376 static void hn_xmit_txeof_taskfunc(void *, int);
377 #ifdef HN_IFSTART_SUPPORT
378 static int hn_start_locked(struct hn_tx_ring *, int);
379 static void hn_start_taskfunc(void *, int);
380 static void hn_start_txeof(struct hn_tx_ring *);
381 static void hn_start_txeof_taskfunc(void *, int);
384 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
385 "Hyper-V network interface");
387 /* Trust tcp segements verification on host side. */
388 static int hn_trust_hosttcp = 1;
389 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN,
390 &hn_trust_hosttcp, 0,
391 "Trust tcp segement verification on host side, "
392 "when csum info is missing (global setting)");
394 /* Trust udp datagrams verification on host side. */
395 static int hn_trust_hostudp = 1;
396 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN,
397 &hn_trust_hostudp, 0,
398 "Trust udp datagram verification on host side, "
399 "when csum info is missing (global setting)");
401 /* Trust ip packets verification on host side. */
402 static int hn_trust_hostip = 1;
403 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN,
405 "Trust ip packet verification on host side, "
406 "when csum info is missing (global setting)");
408 /* Limit TSO burst size */
409 static int hn_tso_maxlen = IP_MAXPACKET;
410 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
411 &hn_tso_maxlen, 0, "TSO burst limit");
413 /* Limit chimney send size */
414 static int hn_tx_chimney_size = 0;
415 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN,
416 &hn_tx_chimney_size, 0, "Chimney send packet size limit");
418 /* Limit the size of packet for direct transmission */
419 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF;
420 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN,
421 &hn_direct_tx_size, 0, "Size of the packet for direct transmission");
423 /* # of LRO entries per RX ring */
424 #if defined(INET) || defined(INET6)
425 #if __FreeBSD_version >= 1100095
426 static int hn_lro_entry_count = HN_LROENT_CNT_DEF;
427 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
428 &hn_lro_entry_count, 0, "LRO entry count");
432 static int hn_tx_taskq_cnt = 1;
433 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_cnt, CTLFLAG_RDTUN,
434 &hn_tx_taskq_cnt, 0, "# of TX taskqueues");
436 #define HN_TX_TASKQ_M_INDEP 0
437 #define HN_TX_TASKQ_M_GLOBAL 1
438 #define HN_TX_TASKQ_M_EVTTQ 2
440 static int hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP;
441 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_mode, CTLFLAG_RDTUN,
442 &hn_tx_taskq_mode, 0, "TX taskqueue modes: "
443 "0 - independent, 1 - share global tx taskqs, 2 - share event taskqs");
445 #ifndef HN_USE_TXDESC_BUFRING
446 static int hn_use_txdesc_bufring = 0;
448 static int hn_use_txdesc_bufring = 1;
450 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD,
451 &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors");
453 #ifdef HN_IFSTART_SUPPORT
454 /* Use ifnet.if_start instead of ifnet.if_transmit */
455 static int hn_use_if_start = 0;
456 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN,
457 &hn_use_if_start, 0, "Use if_start TX method");
460 /* # of channels to use */
461 static int hn_chan_cnt = 0;
462 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
464 "# of channels to use; each channel has one RX ring and one TX ring");
466 /* # of transmit rings to use */
467 static int hn_tx_ring_cnt = 0;
468 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN,
469 &hn_tx_ring_cnt, 0, "# of TX rings to use");
471 /* Software TX ring deptch */
472 static int hn_tx_swq_depth = 0;
473 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN,
474 &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING");
476 /* Enable sorted LRO, and the depth of the per-channel mbuf queue */
477 #if __FreeBSD_version >= 1100095
478 static u_int hn_lro_mbufq_depth = 0;
479 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
480 &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue");
483 /* Packet transmission aggregation size limit */
484 static int hn_tx_agg_size = -1;
485 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN,
486 &hn_tx_agg_size, 0, "Packet transmission aggregation size limit");
488 /* Packet transmission aggregation count limit */
489 static int hn_tx_agg_pkts = -1;
490 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN,
491 &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit");
493 static u_int hn_cpu_index; /* next CPU for channel */
494 static struct taskqueue **hn_tx_taskque;/* shared TX taskqueues */
497 hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = {
498 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
499 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
500 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
501 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
502 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
505 static device_method_t hn_methods[] = {
506 /* Device interface */
507 DEVMETHOD(device_probe, hn_probe),
508 DEVMETHOD(device_attach, hn_attach),
509 DEVMETHOD(device_detach, hn_detach),
510 DEVMETHOD(device_shutdown, hn_shutdown),
514 static driver_t hn_driver = {
517 sizeof(struct hn_softc)
520 static devclass_t hn_devclass;
522 DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0);
523 MODULE_VERSION(hn, 1);
524 MODULE_DEPEND(hn, vmbus, 1, 1, 1);
526 #if __FreeBSD_version >= 1100099
528 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim)
532 for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
533 sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim;
538 hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd)
541 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
542 txd->chim_size == 0, ("invalid rndis sglist txd"));
543 return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA,
544 &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt));
548 hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd)
550 struct hn_nvs_rndis rndis;
552 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID &&
553 txd->chim_size > 0, ("invalid rndis chim txd"));
555 rndis.nvs_type = HN_NVS_TYPE_RNDIS;
556 rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA;
557 rndis.nvs_chim_idx = txd->chim_index;
558 rndis.nvs_chim_sz = txd->chim_size;
560 return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC,
561 &rndis, sizeof(rndis), &txd->send_ctx));
564 static __inline uint32_t
565 hn_chim_alloc(struct hn_softc *sc)
567 int i, bmap_cnt = sc->hn_chim_bmap_cnt;
568 u_long *bmap = sc->hn_chim_bmap;
569 uint32_t ret = HN_NVS_CHIM_IDX_INVALID;
571 for (i = 0; i < bmap_cnt; ++i) {
574 idx = ffsl(~bmap[i]);
578 --idx; /* ffsl is 1-based */
579 KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt,
580 ("invalid i %d and idx %d", i, idx));
582 if (atomic_testandset_long(&bmap[i], idx))
585 ret = i * LONG_BIT + idx;
592 hn_chim_free(struct hn_softc *sc, uint32_t chim_idx)
597 idx = chim_idx / LONG_BIT;
598 KASSERT(idx < sc->hn_chim_bmap_cnt,
599 ("invalid chimney index 0x%x", chim_idx));
601 mask = 1UL << (chim_idx % LONG_BIT);
602 KASSERT(sc->hn_chim_bmap[idx] & mask,
603 ("index bitmap 0x%lx, chimney index %u, "
604 "bitmap idx %d, bitmask 0x%lx",
605 sc->hn_chim_bmap[idx], chim_idx, idx, mask));
607 atomic_clear_long(&sc->hn_chim_bmap[idx], mask);
610 #if defined(INET6) || defined(INET)
612 * NOTE: If this function failed, the m_head would be freed.
614 static __inline struct mbuf *
615 hn_tso_fixup(struct mbuf *m_head)
617 struct ether_vlan_header *evl;
621 KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable"));
623 #define PULLUP_HDR(m, len) \
625 if (__predict_false((m)->m_len < (len))) { \
626 (m) = m_pullup((m), (len)); \
632 PULLUP_HDR(m_head, sizeof(*evl));
633 evl = mtod(m_head, struct ether_vlan_header *);
634 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN))
635 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
637 ehlen = ETHER_HDR_LEN;
640 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
644 PULLUP_HDR(m_head, ehlen + sizeof(*ip));
645 ip = mtodo(m_head, ehlen);
646 iphlen = ip->ip_hl << 2;
648 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th));
649 th = mtodo(m_head, ehlen + iphlen);
653 th->th_sum = in_pseudo(ip->ip_src.s_addr,
654 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
657 #if defined(INET6) && defined(INET)
664 PULLUP_HDR(m_head, ehlen + sizeof(*ip6));
665 ip6 = mtodo(m_head, ehlen);
666 if (ip6->ip6_nxt != IPPROTO_TCP) {
671 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th));
672 th = mtodo(m_head, ehlen + sizeof(*ip6));
675 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
682 #endif /* INET6 || INET */
685 hn_set_rxfilter(struct hn_softc *sc, uint32_t filter)
691 if (sc->hn_rx_filter != filter) {
692 error = hn_rndis_set_rxfilter(sc, filter);
694 sc->hn_rx_filter = filter;
700 hn_rxfilter_config(struct hn_softc *sc)
702 struct ifnet *ifp = sc->hn_ifp;
707 if ((ifp->if_flags & IFF_PROMISC) ||
708 (sc->hn_flags & HN_FLAG_VF)) {
709 filter = NDIS_PACKET_TYPE_PROMISCUOUS;
711 filter = NDIS_PACKET_TYPE_DIRECTED;
712 if (ifp->if_flags & IFF_BROADCAST)
713 filter |= NDIS_PACKET_TYPE_BROADCAST;
714 /* TODO: support multicast list */
715 if ((ifp->if_flags & IFF_ALLMULTI) ||
716 !TAILQ_EMPTY(&ifp->if_multiaddrs))
717 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
719 return (hn_set_rxfilter(sc, filter));
723 hn_set_txagg(struct hn_softc *sc)
729 * Setup aggregation size.
731 if (sc->hn_agg_size < 0)
734 size = sc->hn_agg_size;
736 if (sc->hn_rndis_agg_size < size)
737 size = sc->hn_rndis_agg_size;
739 /* NOTE: We only aggregate packets using chimney sending buffers. */
740 if (size > (uint32_t)sc->hn_chim_szmax)
741 size = sc->hn_chim_szmax;
743 if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) {
750 /* NOTE: Type of the per TX ring setting is 'int'. */
755 * Setup aggregation packet count.
757 if (sc->hn_agg_pkts < 0)
760 pkts = sc->hn_agg_pkts;
762 if (sc->hn_rndis_agg_pkts < pkts)
763 pkts = sc->hn_rndis_agg_pkts;
772 /* NOTE: Type of the per TX ring setting is 'short'. */
777 /* NOTE: Type of the per TX ring setting is 'short'. */
778 if (sc->hn_rndis_agg_align > SHRT_MAX) {
785 if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n",
786 size, pkts, sc->hn_rndis_agg_align);
789 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
790 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
792 mtx_lock(&txr->hn_tx_lock);
793 txr->hn_agg_szmax = size;
794 txr->hn_agg_pktmax = pkts;
795 txr->hn_agg_align = sc->hn_rndis_agg_align;
796 mtx_unlock(&txr->hn_tx_lock);
801 hn_get_txswq_depth(const struct hn_tx_ring *txr)
804 KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet"));
805 if (hn_tx_swq_depth < txr->hn_txdesc_cnt)
806 return txr->hn_txdesc_cnt;
807 return hn_tx_swq_depth;
811 hn_rss_reconfig(struct hn_softc *sc)
817 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
824 * Direct reconfiguration by setting the UNCHG flags does
825 * _not_ work properly.
828 if_printf(sc->hn_ifp, "disable RSS\n");
829 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE);
831 if_printf(sc->hn_ifp, "RSS disable failed\n");
836 * Reenable the RSS w/ the updated RSS key or indirect
840 if_printf(sc->hn_ifp, "reconfig RSS\n");
841 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
843 if_printf(sc->hn_ifp, "RSS reconfig failed\n");
850 hn_rss_ind_fixup(struct hn_softc *sc)
852 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
855 nchan = sc->hn_rx_ring_inuse;
856 KASSERT(nchan > 1, ("invalid # of channels %d", nchan));
859 * Check indirect table to make sure that all channels in it
862 for (i = 0; i < NDIS_HASH_INDCNT; ++i) {
863 if (rss->rss_ind[i] >= nchan) {
864 if_printf(sc->hn_ifp,
865 "RSS indirect table %d fixup: %u -> %d\n",
866 i, rss->rss_ind[i], nchan - 1);
867 rss->rss_ind[i] = nchan - 1;
873 hn_ifmedia_upd(struct ifnet *ifp __unused)
880 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
882 struct hn_softc *sc = ifp->if_softc;
884 ifmr->ifm_status = IFM_AVALID;
885 ifmr->ifm_active = IFM_ETHER;
887 if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) {
888 ifmr->ifm_active |= IFM_NONE;
891 ifmr->ifm_status |= IFM_ACTIVE;
892 ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
896 hn_update_vf_task(void *arg, int pending __unused)
898 struct hn_update_vf *uv = arg;
900 uv->rxr->hn_vf = uv->vf;
904 hn_update_vf(struct hn_softc *sc, struct ifnet *vf)
906 struct hn_rx_ring *rxr;
907 struct hn_update_vf uv;
913 TASK_INIT(&task, 0, hn_update_vf_task, &uv);
915 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
916 rxr = &sc->hn_rx_ring[i];
918 if (i < sc->hn_rx_ring_inuse) {
921 vmbus_chan_run_task(rxr->hn_chan, &task);
929 hn_set_vf(struct hn_softc *sc, struct ifnet *ifp, bool vf)
931 struct ifnet *hn_ifp;
935 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
943 if (ifp->if_alloctype != IFT_ETHER)
946 /* Ignore lagg/vlan interfaces */
947 if (strcmp(ifp->if_dname, "lagg") == 0 ||
948 strcmp(ifp->if_dname, "vlan") == 0)
951 if (bcmp(IF_LLADDR(ifp), IF_LLADDR(hn_ifp), ETHER_ADDR_LEN) != 0)
954 /* Now we're sure 'ifp' is a real VF device. */
956 if (sc->hn_flags & HN_FLAG_VF)
959 sc->hn_flags |= HN_FLAG_VF;
960 hn_rxfilter_config(sc);
962 if (!(sc->hn_flags & HN_FLAG_VF))
965 sc->hn_flags &= ~HN_FLAG_VF;
966 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING)
967 hn_rxfilter_config(sc);
969 hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE);
972 hn_nvs_set_datapath(sc,
973 vf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTHETIC);
975 hn_update_vf(sc, vf ? ifp : NULL);
980 ~(HN_LINK_FLAG_LINKUP | HN_LINK_FLAG_NETCHG);
981 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN);
986 devctl_notify("HYPERV_NIC_VF", if_name(hn_ifp),
987 vf ? "VF_UP" : "VF_DOWN", NULL);
990 if_printf(hn_ifp, "Data path is switched %s %s\n",
991 vf ? "to" : "from", if_name(ifp));
997 hn_ifnet_event(void *arg, struct ifnet *ifp, int event)
999 if (event != IFNET_EVENT_UP && event != IFNET_EVENT_DOWN)
1002 hn_set_vf(arg, ifp, event == IFNET_EVENT_UP);
1006 hn_ifaddr_event(void *arg, struct ifnet *ifp)
1008 hn_set_vf(arg, ifp, ifp->if_flags & IFF_UP);
1011 /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */
1012 static const struct hyperv_guid g_net_vsc_device_type = {
1013 .hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
1014 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
1018 hn_probe(device_t dev)
1021 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev,
1022 &g_net_vsc_device_type) == 0) {
1023 device_set_desc(dev, "Hyper-V Network Interface");
1024 return BUS_PROBE_DEFAULT;
1030 hn_attach(device_t dev)
1032 struct hn_softc *sc = device_get_softc(dev);
1033 struct sysctl_oid_list *child;
1034 struct sysctl_ctx_list *ctx;
1035 uint8_t eaddr[ETHER_ADDR_LEN];
1036 struct ifnet *ifp = NULL;
1037 int error, ring_cnt, tx_ring_cnt;
1040 sc->hn_prichan = vmbus_get_channel(dev);
1044 * Initialize these tunables once.
1046 sc->hn_agg_size = hn_tx_agg_size;
1047 sc->hn_agg_pkts = hn_tx_agg_pkts;
1050 * Setup taskqueue for transmission.
1052 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_INDEP) {
1056 malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *),
1057 M_DEVBUF, M_WAITOK);
1058 for (i = 0; i < hn_tx_taskq_cnt; ++i) {
1059 sc->hn_tx_taskqs[i] = taskqueue_create("hn_tx",
1060 M_WAITOK, taskqueue_thread_enqueue,
1061 &sc->hn_tx_taskqs[i]);
1062 taskqueue_start_threads(&sc->hn_tx_taskqs[i], 1, PI_NET,
1063 "%s tx%d", device_get_nameunit(dev), i);
1065 } else if (hn_tx_taskq_mode == HN_TX_TASKQ_M_GLOBAL) {
1066 sc->hn_tx_taskqs = hn_tx_taskque;
1070 * Setup taskqueue for mangement tasks, e.g. link status.
1072 sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK,
1073 taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0);
1074 taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt",
1075 device_get_nameunit(dev));
1076 TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc);
1077 TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc);
1078 TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0,
1079 hn_netchg_status_taskfunc, sc);
1082 * Allocate ifnet and setup its name earlier, so that if_printf
1083 * can be used by functions, which will be called after
1086 ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER);
1088 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1091 * Initialize ifmedia earlier so that it can be unconditionally
1092 * destroyed, if error happened later on.
1094 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts);
1097 * Figure out the # of RX rings (ring_cnt) and the # of TX rings
1098 * to use (tx_ring_cnt).
1101 * The # of RX rings to use is same as the # of channels to use.
1103 ring_cnt = hn_chan_cnt;
1104 if (ring_cnt <= 0) {
1106 ring_cnt = mp_ncpus;
1107 if (ring_cnt > HN_RING_CNT_DEF_MAX)
1108 ring_cnt = HN_RING_CNT_DEF_MAX;
1109 } else if (ring_cnt > mp_ncpus) {
1110 ring_cnt = mp_ncpus;
1113 tx_ring_cnt = hn_tx_ring_cnt;
1114 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt)
1115 tx_ring_cnt = ring_cnt;
1116 #ifdef HN_IFSTART_SUPPORT
1117 if (hn_use_if_start) {
1118 /* ifnet.if_start only needs one TX ring. */
1124 * Set the leader CPU for channels.
1126 sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus;
1129 * Create enough TX/RX rings, even if only limited number of
1130 * channels can be allocated.
1132 error = hn_create_tx_data(sc, tx_ring_cnt);
1135 error = hn_create_rx_data(sc, ring_cnt);
1140 * Create transaction context for NVS and RNDIS transactions.
1142 sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev),
1143 HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0);
1144 if (sc->hn_xact == NULL) {
1150 * Install orphan handler for the revocation of this device's
1154 * The processing order is critical here:
1155 * Install the orphan handler, _before_ testing whether this
1156 * device's primary channel has been revoked or not.
1158 vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact);
1159 if (vmbus_chan_is_revoked(sc->hn_prichan)) {
1165 * Attach the synthetic parts, i.e. NVS and RNDIS.
1167 error = hn_synth_attach(sc, ETHERMTU);
1171 error = hn_rndis_get_eaddr(sc, eaddr);
1175 #if __FreeBSD_version >= 1100099
1176 if (sc->hn_rx_ring_inuse > 1) {
1178 * Reduce TCP segment aggregation limit for multiple
1179 * RX rings to increase ACK timeliness.
1181 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF);
1186 * Fixup TX stuffs after synthetic parts are attached.
1188 hn_fixup_tx_data(sc);
1190 ctx = device_get_sysctl_ctx(dev);
1191 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
1192 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD,
1193 &sc->hn_nvs_ver, 0, "NVS version");
1194 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version",
1195 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1196 hn_ndis_version_sysctl, "A", "NDIS version");
1197 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps",
1198 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1199 hn_caps_sysctl, "A", "capabilities");
1200 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist",
1201 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1202 hn_hwassist_sysctl, "A", "hwassist");
1203 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter",
1204 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1205 hn_rxfilter_sysctl, "A", "rxfilter");
1206 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash",
1207 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1208 hn_rss_hash_sysctl, "A", "RSS hash");
1209 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size",
1210 CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count");
1211 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key",
1212 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1213 hn_rss_key_sysctl, "IU", "RSS key");
1214 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind",
1215 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1216 hn_rss_ind_sysctl, "IU", "RSS indirect table");
1217 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size",
1218 CTLFLAG_RD, &sc->hn_rndis_agg_size, 0,
1219 "RNDIS offered packet transmission aggregation size limit");
1220 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts",
1221 CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0,
1222 "RNDIS offered packet transmission aggregation count limit");
1223 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align",
1224 CTLFLAG_RD, &sc->hn_rndis_agg_align, 0,
1225 "RNDIS packet transmission aggregation alignment");
1226 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size",
1227 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1228 hn_txagg_size_sysctl, "I",
1229 "Packet transmission aggregation size, 0 -- disable, -1 -- auto");
1230 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts",
1231 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1232 hn_txagg_pkts_sysctl, "I",
1233 "Packet transmission aggregation packets, "
1234 "0 -- disable, -1 -- auto");
1235 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "polling",
1236 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1237 hn_polling_sysctl, "I",
1238 "Polling frequency: [100,1000000], 0 disable polling");
1239 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf",
1240 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1241 hn_vf_sysctl, "A", "Virtual Function's name");
1244 * Setup the ifmedia, which has been initialized earlier.
1246 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1247 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO);
1248 /* XXX ifmedia_set really should do this for us */
1249 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media;
1252 * Setup the ifnet for this interface.
1256 ifp->if_baudrate = IF_Gbps(10);
1258 /* if_baudrate is 32bits on 32bit system. */
1259 ifp->if_baudrate = IF_Gbps(1);
1261 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1262 ifp->if_ioctl = hn_ioctl;
1263 ifp->if_init = hn_init;
1264 #ifdef HN_IFSTART_SUPPORT
1265 if (hn_use_if_start) {
1266 int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]);
1268 ifp->if_start = hn_start;
1269 IFQ_SET_MAXLEN(&ifp->if_snd, qdepth);
1270 ifp->if_snd.ifq_drv_maxlen = qdepth - 1;
1271 IFQ_SET_READY(&ifp->if_snd);
1275 ifp->if_transmit = hn_transmit;
1276 ifp->if_qflush = hn_xmit_qflush;
1279 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO;
1281 /* We can't diff IPv6 packets from IPv4 packets on RX path. */
1282 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6;
1284 if (sc->hn_caps & HN_CAP_VLAN) {
1285 /* XXX not sure about VLAN_MTU. */
1286 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
1289 ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist;
1290 if (ifp->if_hwassist & HN_CSUM_IP_MASK)
1291 ifp->if_capabilities |= IFCAP_TXCSUM;
1292 if (ifp->if_hwassist & HN_CSUM_IP6_MASK)
1293 ifp->if_capabilities |= IFCAP_TXCSUM_IPV6;
1294 if (sc->hn_caps & HN_CAP_TSO4) {
1295 ifp->if_capabilities |= IFCAP_TSO4;
1296 ifp->if_hwassist |= CSUM_IP_TSO;
1298 if (sc->hn_caps & HN_CAP_TSO6) {
1299 ifp->if_capabilities |= IFCAP_TSO6;
1300 ifp->if_hwassist |= CSUM_IP6_TSO;
1303 /* Enable all available capabilities by default. */
1304 ifp->if_capenable = ifp->if_capabilities;
1307 * Disable IPv6 TSO and TXCSUM by default, they still can
1308 * be enabled through SIOCSIFCAP.
1310 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6);
1311 ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO);
1313 if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) {
1314 hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU);
1315 ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX;
1316 ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
1319 ether_ifattach(ifp, eaddr);
1321 if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) {
1322 if_printf(ifp, "TSO segcnt %u segsz %u\n",
1323 ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize);
1326 /* Inform the upper layer about the long frame support. */
1327 ifp->if_hdrlen = sizeof(struct ether_vlan_header);
1330 * Kick off link status check.
1332 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
1333 hn_update_link_status(sc);
1335 sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event,
1336 hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY);
1338 sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event,
1339 hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY);
1343 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)
1344 hn_synth_detach(sc);
1350 hn_detach(device_t dev)
1352 struct hn_softc *sc = device_get_softc(dev);
1353 struct ifnet *ifp = sc->hn_ifp;
1355 if (sc->hn_ifaddr_evthand != NULL)
1356 EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand);
1357 if (sc->hn_ifnet_evthand != NULL)
1358 EVENTHANDLER_DEREGISTER(ifnet_event, sc->hn_ifnet_evthand);
1360 if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) {
1362 * In case that the vmbus missed the orphan handler
1365 vmbus_xact_ctx_orphan(sc->hn_xact);
1368 if (device_is_attached(dev)) {
1370 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
1371 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1375 * hn_stop() only suspends data, so managment
1376 * stuffs have to be suspended manually here.
1378 hn_suspend_mgmt(sc);
1379 hn_synth_detach(sc);
1382 ether_ifdetach(ifp);
1385 ifmedia_removeall(&sc->hn_media);
1386 hn_destroy_rx_data(sc);
1387 hn_destroy_tx_data(sc);
1389 if (sc->hn_tx_taskqs != NULL && sc->hn_tx_taskqs != hn_tx_taskque) {
1392 for (i = 0; i < hn_tx_taskq_cnt; ++i)
1393 taskqueue_free(sc->hn_tx_taskqs[i]);
1394 free(sc->hn_tx_taskqs, M_DEVBUF);
1396 taskqueue_free(sc->hn_mgmt_taskq0);
1398 if (sc->hn_xact != NULL) {
1400 * Uninstall the orphan handler _before_ the xact is
1403 vmbus_chan_unset_orphan(sc->hn_prichan);
1404 vmbus_xact_ctx_destroy(sc->hn_xact);
1409 HN_LOCK_DESTROY(sc);
1414 hn_shutdown(device_t dev)
1421 hn_link_status(struct hn_softc *sc)
1423 uint32_t link_status;
1426 error = hn_rndis_get_linkstatus(sc, &link_status);
1428 /* XXX what to do? */
1432 if (link_status == NDIS_MEDIA_STATE_CONNECTED)
1433 sc->hn_link_flags |= HN_LINK_FLAG_LINKUP;
1435 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
1436 if_link_state_change(sc->hn_ifp,
1437 (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ?
1438 LINK_STATE_UP : LINK_STATE_DOWN);
1442 hn_link_taskfunc(void *xsc, int pending __unused)
1444 struct hn_softc *sc = xsc;
1446 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
1452 hn_netchg_init_taskfunc(void *xsc, int pending __unused)
1454 struct hn_softc *sc = xsc;
1456 /* Prevent any link status checks from running. */
1457 sc->hn_link_flags |= HN_LINK_FLAG_NETCHG;
1460 * Fake up a [link down --> link up] state change; 5 seconds
1461 * delay is used, which closely simulates miibus reaction
1462 * upon link down event.
1464 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
1465 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN);
1466 taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0,
1467 &sc->hn_netchg_status, 5 * hz);
1471 hn_netchg_status_taskfunc(void *xsc, int pending __unused)
1473 struct hn_softc *sc = xsc;
1475 /* Re-allow link status checks. */
1476 sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG;
1481 hn_update_link_status(struct hn_softc *sc)
1484 if (sc->hn_mgmt_taskq != NULL)
1485 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task);
1489 hn_change_network(struct hn_softc *sc)
1492 if (sc->hn_mgmt_taskq != NULL)
1493 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init);
1497 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd,
1498 struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs)
1500 struct mbuf *m = *m_head;
1503 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim"));
1505 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap,
1506 m, segs, nsegs, BUS_DMA_NOWAIT);
1507 if (error == EFBIG) {
1510 m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX);
1514 *m_head = m = m_new;
1515 txr->hn_tx_collapsed++;
1517 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag,
1518 txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT);
1521 bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap,
1522 BUS_DMASYNC_PREWRITE);
1523 txd->flags |= HN_TXD_FLAG_DMAMAP;
1529 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd)
1532 KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
1533 ("put an onlist txd %#x", txd->flags));
1534 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
1535 ("put an onagg txd %#x", txd->flags));
1537 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
1538 if (atomic_fetchadd_int(&txd->refs, -1) != 1)
1541 if (!STAILQ_EMPTY(&txd->agg_list)) {
1542 struct hn_txdesc *tmp_txd;
1544 while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) {
1547 KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list),
1548 ("resursive aggregation on aggregated txdesc"));
1549 KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG),
1550 ("not aggregated txdesc"));
1551 KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
1552 ("aggregated txdesc uses dmamap"));
1553 KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
1554 ("aggregated txdesc consumes "
1555 "chimney sending buffer"));
1556 KASSERT(tmp_txd->chim_size == 0,
1557 ("aggregated txdesc has non-zero "
1558 "chimney sending size"));
1560 STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link);
1561 tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG;
1562 freed = hn_txdesc_put(txr, tmp_txd);
1563 KASSERT(freed, ("failed to free aggregated txdesc"));
1567 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) {
1568 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
1569 ("chim txd uses dmamap"));
1570 hn_chim_free(txr->hn_sc, txd->chim_index);
1571 txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
1573 } else if (txd->flags & HN_TXD_FLAG_DMAMAP) {
1574 bus_dmamap_sync(txr->hn_tx_data_dtag,
1575 txd->data_dmap, BUS_DMASYNC_POSTWRITE);
1576 bus_dmamap_unload(txr->hn_tx_data_dtag,
1578 txd->flags &= ~HN_TXD_FLAG_DMAMAP;
1581 if (txd->m != NULL) {
1586 txd->flags |= HN_TXD_FLAG_ONLIST;
1587 #ifndef HN_USE_TXDESC_BUFRING
1588 mtx_lock_spin(&txr->hn_txlist_spin);
1589 KASSERT(txr->hn_txdesc_avail >= 0 &&
1590 txr->hn_txdesc_avail < txr->hn_txdesc_cnt,
1591 ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail));
1592 txr->hn_txdesc_avail++;
1593 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
1594 mtx_unlock_spin(&txr->hn_txlist_spin);
1595 #else /* HN_USE_TXDESC_BUFRING */
1597 atomic_add_int(&txr->hn_txdesc_avail, 1);
1599 buf_ring_enqueue(txr->hn_txdesc_br, txd);
1600 #endif /* !HN_USE_TXDESC_BUFRING */
1605 static __inline struct hn_txdesc *
1606 hn_txdesc_get(struct hn_tx_ring *txr)
1608 struct hn_txdesc *txd;
1610 #ifndef HN_USE_TXDESC_BUFRING
1611 mtx_lock_spin(&txr->hn_txlist_spin);
1612 txd = SLIST_FIRST(&txr->hn_txlist);
1614 KASSERT(txr->hn_txdesc_avail > 0,
1615 ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail));
1616 txr->hn_txdesc_avail--;
1617 SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
1619 mtx_unlock_spin(&txr->hn_txlist_spin);
1621 txd = buf_ring_dequeue_sc(txr->hn_txdesc_br);
1625 #ifdef HN_USE_TXDESC_BUFRING
1627 atomic_subtract_int(&txr->hn_txdesc_avail, 1);
1629 #endif /* HN_USE_TXDESC_BUFRING */
1630 KASSERT(txd->m == NULL && txd->refs == 0 &&
1631 STAILQ_EMPTY(&txd->agg_list) &&
1632 txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
1633 txd->chim_size == 0 &&
1634 (txd->flags & HN_TXD_FLAG_ONLIST) &&
1635 (txd->flags & HN_TXD_FLAG_ONAGG) == 0 &&
1636 (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd"));
1637 txd->flags &= ~HN_TXD_FLAG_ONLIST;
1643 static __inline void
1644 hn_txdesc_hold(struct hn_txdesc *txd)
1647 /* 0->1 transition will never work */
1648 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
1649 atomic_add_int(&txd->refs, 1);
1652 static __inline void
1653 hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd)
1656 KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0,
1657 ("recursive aggregation on aggregating txdesc"));
1659 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
1660 ("already aggregated"));
1661 KASSERT(STAILQ_EMPTY(&txd->agg_list),
1662 ("recursive aggregation on to-be-aggregated txdesc"));
1664 txd->flags |= HN_TXD_FLAG_ONAGG;
1665 STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link);
1669 hn_tx_ring_pending(struct hn_tx_ring *txr)
1671 bool pending = false;
1673 #ifndef HN_USE_TXDESC_BUFRING
1674 mtx_lock_spin(&txr->hn_txlist_spin);
1675 if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt)
1677 mtx_unlock_spin(&txr->hn_txlist_spin);
1679 if (!buf_ring_full(txr->hn_txdesc_br))
1685 static __inline void
1686 hn_txeof(struct hn_tx_ring *txr)
1688 txr->hn_has_txeof = 0;
1693 hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc,
1694 struct vmbus_channel *chan, const void *data __unused, int dlen __unused)
1696 struct hn_txdesc *txd = sndc->hn_cbarg;
1697 struct hn_tx_ring *txr;
1700 KASSERT(txr->hn_chan == chan,
1701 ("channel mismatch, on chan%u, should be chan%u",
1702 vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan)));
1704 txr->hn_has_txeof = 1;
1705 hn_txdesc_put(txr, txd);
1707 ++txr->hn_txdone_cnt;
1708 if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) {
1709 txr->hn_txdone_cnt = 0;
1710 if (txr->hn_oactive)
1716 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr)
1718 #if defined(INET) || defined(INET6)
1719 struct lro_ctrl *lro = &rxr->hn_lro;
1720 struct lro_entry *queued;
1722 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1723 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1724 tcp_lro_flush(lro, queued);
1730 * 'txr' could be NULL, if multiple channels and
1731 * ifnet.if_start method are enabled.
1733 if (txr == NULL || !txr->hn_has_txeof)
1736 txr->hn_txdone_cnt = 0;
1740 static __inline uint32_t
1741 hn_rndis_pktmsg_offset(uint32_t ofs)
1744 KASSERT(ofs >= sizeof(struct rndis_packet_msg),
1745 ("invalid RNDIS packet msg offset %u", ofs));
1746 return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset));
1749 static __inline void *
1750 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize,
1751 size_t pi_dlen, uint32_t pi_type)
1753 const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen);
1754 struct rndis_pktinfo *pi;
1756 KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0,
1757 ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen));
1760 * Per-packet-info does not move; it only grows.
1763 * rm_pktinfooffset in this phase counts from the beginning
1764 * of rndis_packet_msg.
1766 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize,
1767 ("%u pktinfo overflows RNDIS packet msg", pi_type));
1768 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset +
1769 pkt->rm_pktinfolen);
1770 pkt->rm_pktinfolen += pi_size;
1772 pi->rm_size = pi_size;
1773 pi->rm_type = pi_type;
1774 pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET;
1776 /* Data immediately follow per-packet-info. */
1777 pkt->rm_dataoffset += pi_size;
1779 /* Update RNDIS packet msg length */
1780 pkt->rm_len += pi_size;
1782 return (pi->rm_data);
1786 hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr)
1788 struct hn_txdesc *txd;
1792 txd = txr->hn_agg_txd;
1793 KASSERT(txd != NULL, ("no aggregate txdesc"));
1796 * Since hn_txpkt() will reset this temporary stat, save
1797 * it now, so that oerrors can be updated properly, if
1798 * hn_txpkt() ever fails.
1800 pkts = txr->hn_stat_pkts;
1803 * Since txd's mbuf will _not_ be freed upon hn_txpkt()
1804 * failure, save it for later freeing, if hn_txpkt() ever
1808 error = hn_txpkt(ifp, txr, txd);
1809 if (__predict_false(error)) {
1810 /* txd is freed, but m is not. */
1813 txr->hn_flush_failed++;
1814 if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts);
1817 /* Reset all aggregation states. */
1818 txr->hn_agg_txd = NULL;
1819 txr->hn_agg_szleft = 0;
1820 txr->hn_agg_pktleft = 0;
1821 txr->hn_agg_prevpkt = NULL;
1827 hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
1832 if (txr->hn_agg_txd != NULL) {
1833 if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) {
1834 struct hn_txdesc *agg_txd = txr->hn_agg_txd;
1835 struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt;
1839 * Update the previous RNDIS packet's total length,
1840 * it can be increased due to the mandatory alignment
1841 * padding for this RNDIS packet. And update the
1842 * aggregating txdesc's chimney sending buffer size
1846 * Zero-out the padding, as required by the RNDIS spec.
1849 pkt->rm_len = roundup2(olen, txr->hn_agg_align);
1850 agg_txd->chim_size += pkt->rm_len - olen;
1852 /* Link this txdesc to the parent. */
1853 hn_txdesc_agg(agg_txd, txd);
1855 chim = (uint8_t *)pkt + pkt->rm_len;
1856 /* Save the current packet for later fixup. */
1857 txr->hn_agg_prevpkt = chim;
1859 txr->hn_agg_pktleft--;
1860 txr->hn_agg_szleft -= pktsize;
1861 if (txr->hn_agg_szleft <=
1862 HN_PKTSIZE_MIN(txr->hn_agg_align)) {
1864 * Probably can't aggregate more packets,
1865 * flush this aggregating txdesc proactively.
1867 txr->hn_agg_pktleft = 0;
1872 hn_flush_txagg(ifp, txr);
1874 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
1876 txr->hn_tx_chimney_tried++;
1877 txd->chim_index = hn_chim_alloc(txr->hn_sc);
1878 if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID)
1880 txr->hn_tx_chimney++;
1882 chim = txr->hn_sc->hn_chim +
1883 (txd->chim_index * txr->hn_sc->hn_chim_szmax);
1885 if (txr->hn_agg_pktmax > 1 &&
1886 txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) {
1887 txr->hn_agg_txd = txd;
1888 txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1;
1889 txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize;
1890 txr->hn_agg_prevpkt = chim;
1897 * If this function fails, then both txd and m_head0 will be freed.
1900 hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
1901 struct mbuf **m_head0)
1903 bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
1904 int error, nsegs, i;
1905 struct mbuf *m_head = *m_head0;
1906 struct rndis_packet_msg *pkt;
1909 int pkt_hlen, pkt_size;
1911 pkt = txd->rndis_pkt;
1912 pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align);
1913 if (pkt_size < txr->hn_chim_size) {
1914 chim = hn_try_txagg(ifp, txr, txd, pkt_size);
1918 if (txr->hn_agg_txd != NULL)
1919 hn_flush_txagg(ifp, txr);
1922 pkt->rm_type = REMOTE_NDIS_PACKET_MSG;
1923 pkt->rm_len = sizeof(*pkt) + m_head->m_pkthdr.len;
1924 pkt->rm_dataoffset = sizeof(*pkt);
1925 pkt->rm_datalen = m_head->m_pkthdr.len;
1926 pkt->rm_oobdataoffset = 0;
1927 pkt->rm_oobdatalen = 0;
1928 pkt->rm_oobdataelements = 0;
1929 pkt->rm_pktinfooffset = sizeof(*pkt);
1930 pkt->rm_pktinfolen = 0;
1931 pkt->rm_vchandle = 0;
1932 pkt->rm_reserved = 0;
1934 if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) {
1936 * Set the hash value for this packet, so that the host could
1937 * dispatch the TX done event for this packet back to this TX
1940 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1941 HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL);
1942 *pi_data = txr->hn_tx_idx;
1945 if (m_head->m_flags & M_VLANTAG) {
1946 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1947 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN);
1948 *pi_data = NDIS_VLAN_INFO_MAKE(
1949 EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag),
1950 EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag),
1951 EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag));
1954 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1955 #if defined(INET6) || defined(INET)
1956 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1957 NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO);
1959 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
1960 *pi_data = NDIS_LSO2_INFO_MAKEIPV4(0,
1961 m_head->m_pkthdr.tso_segsz);
1964 #if defined(INET6) && defined(INET)
1969 *pi_data = NDIS_LSO2_INFO_MAKEIPV6(0,
1970 m_head->m_pkthdr.tso_segsz);
1973 #endif /* INET6 || INET */
1974 } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) {
1975 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1976 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM);
1977 if (m_head->m_pkthdr.csum_flags &
1978 (CSUM_IP6_TCP | CSUM_IP6_UDP)) {
1979 *pi_data = NDIS_TXCSUM_INFO_IPV6;
1981 *pi_data = NDIS_TXCSUM_INFO_IPV4;
1982 if (m_head->m_pkthdr.csum_flags & CSUM_IP)
1983 *pi_data |= NDIS_TXCSUM_INFO_IPCS;
1986 if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
1987 *pi_data |= NDIS_TXCSUM_INFO_TCPCS;
1988 else if (m_head->m_pkthdr.csum_flags &
1989 (CSUM_IP_UDP | CSUM_IP6_UDP))
1990 *pi_data |= NDIS_TXCSUM_INFO_UDPCS;
1993 pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
1994 /* Convert RNDIS packet message offsets */
1995 pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt->rm_dataoffset);
1996 pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset);
1999 * Fast path: Chimney sending.
2002 struct hn_txdesc *tgt_txd = txd;
2004 if (txr->hn_agg_txd != NULL) {
2005 tgt_txd = txr->hn_agg_txd;
2011 KASSERT(pkt == chim,
2012 ("RNDIS pkt not in chimney sending buffer"));
2013 KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID,
2014 ("chimney sending buffer is not used"));
2015 tgt_txd->chim_size += pkt->rm_len;
2017 m_copydata(m_head, 0, m_head->m_pkthdr.len,
2018 ((uint8_t *)chim) + pkt_hlen);
2020 txr->hn_gpa_cnt = 0;
2021 txr->hn_sendpkt = hn_txpkt_chim;
2025 KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc"));
2026 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
2027 ("chimney buffer is used"));
2028 KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc"));
2030 error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs);
2031 if (__predict_false(error)) {
2035 * This mbuf is not linked w/ the txd yet, so free it now.
2040 freed = hn_txdesc_put(txr, txd);
2042 ("fail to free txd upon txdma error"));
2044 txr->hn_txdma_failed++;
2045 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2050 /* +1 RNDIS packet message */
2051 txr->hn_gpa_cnt = nsegs + 1;
2053 /* send packet with page buffer */
2054 txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr);
2055 txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK;
2056 txr->hn_gpa[0].gpa_len = pkt_hlen;
2059 * Fill the page buffers with mbuf info after the page
2060 * buffer for RNDIS packet message.
2062 for (i = 0; i < nsegs; ++i) {
2063 struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1];
2065 gpa->gpa_page = atop(segs[i].ds_addr);
2066 gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK;
2067 gpa->gpa_len = segs[i].ds_len;
2070 txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
2072 txr->hn_sendpkt = hn_txpkt_sglist;
2076 /* Set the completion routine */
2077 hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd);
2079 /* Update temporary stats for later use. */
2080 txr->hn_stat_pkts++;
2081 txr->hn_stat_size += m_head->m_pkthdr.len;
2082 if (m_head->m_flags & M_MCAST)
2083 txr->hn_stat_mcasts++;
2090 * If this function fails, then txd will be freed, but the mbuf
2091 * associated w/ the txd will _not_ be freed.
2094 hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd)
2096 int error, send_failed = 0, has_bpf;
2099 has_bpf = bpf_peers_present(ifp->if_bpf);
2102 * Make sure that this txd and any aggregated txds are not
2103 * freed before ETHER_BPF_MTAP.
2105 hn_txdesc_hold(txd);
2107 error = txr->hn_sendpkt(txr, txd);
2110 const struct hn_txdesc *tmp_txd;
2112 ETHER_BPF_MTAP(ifp, txd->m);
2113 STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link)
2114 ETHER_BPF_MTAP(ifp, tmp_txd->m);
2117 if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts);
2118 #ifdef HN_IFSTART_SUPPORT
2119 if (!hn_use_if_start)
2122 if_inc_counter(ifp, IFCOUNTER_OBYTES,
2124 if (txr->hn_stat_mcasts != 0) {
2125 if_inc_counter(ifp, IFCOUNTER_OMCASTS,
2126 txr->hn_stat_mcasts);
2129 txr->hn_pkts += txr->hn_stat_pkts;
2133 hn_txdesc_put(txr, txd);
2135 if (__predict_false(error)) {
2139 * This should "really rarely" happen.
2141 * XXX Too many RX to be acked or too many sideband
2142 * commands to run? Ask netvsc_channel_rollup()
2143 * to kick start later.
2145 txr->hn_has_txeof = 1;
2147 txr->hn_send_failed++;
2150 * Try sending again after set hn_has_txeof;
2151 * in case that we missed the last
2152 * netvsc_channel_rollup().
2156 if_printf(ifp, "send failed\n");
2159 * Caller will perform further processing on the
2160 * associated mbuf, so don't free it in hn_txdesc_put();
2161 * only unload it from the DMA map in hn_txdesc_put(),
2165 freed = hn_txdesc_put(txr, txd);
2167 ("fail to free txd upon send error"));
2169 txr->hn_send_failed++;
2172 /* Reset temporary stats, after this sending is done. */
2173 txr->hn_stat_size = 0;
2174 txr->hn_stat_pkts = 0;
2175 txr->hn_stat_mcasts = 0;
2181 * Append the specified data to the indicated mbuf chain,
2182 * Extend the mbuf chain if the new data does not fit in
2185 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c.
2186 * There should be an equivalent in the kernel mbuf code,
2187 * but there does not appear to be one yet.
2189 * Differs from m_append() in that additional mbufs are
2190 * allocated with cluster size MJUMPAGESIZE, and filled
2193 * Return 1 if able to complete the job; otherwise 0.
2196 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
2199 int remainder, space;
2201 for (m = m0; m->m_next != NULL; m = m->m_next)
2204 space = M_TRAILINGSPACE(m);
2207 * Copy into available space.
2209 if (space > remainder)
2211 bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
2216 while (remainder > 0) {
2218 * Allocate a new mbuf; could check space
2219 * and allocate a cluster instead.
2221 n = m_getjcl(M_DONTWAIT, m->m_type, 0, MJUMPAGESIZE);
2224 n->m_len = min(MJUMPAGESIZE, remainder);
2225 bcopy(cp, mtod(n, caddr_t), n->m_len);
2227 remainder -= n->m_len;
2231 if (m0->m_flags & M_PKTHDR)
2232 m0->m_pkthdr.len += len - remainder;
2234 return (remainder == 0);
2237 #if defined(INET) || defined(INET6)
2239 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m)
2241 #if __FreeBSD_version >= 1100095
2242 if (hn_lro_mbufq_depth) {
2243 tcp_lro_queue_mbuf(lc, m);
2247 return tcp_lro_rx(lc, m, 0);
2252 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen,
2253 const struct hn_rxinfo *info)
2257 int size, do_lro = 0, do_csum = 1;
2258 int hash_type = M_HASHTYPE_OPAQUE;
2260 /* If the VF is active, inject the packet through the VF */
2261 ifp = rxr->hn_vf ? rxr->hn_vf : rxr->hn_ifp;
2263 if (dlen <= MHLEN) {
2264 m_new = m_gethdr(M_NOWAIT, MT_DATA);
2265 if (m_new == NULL) {
2266 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
2269 memcpy(mtod(m_new, void *), data, dlen);
2270 m_new->m_pkthdr.len = m_new->m_len = dlen;
2271 rxr->hn_small_pkts++;
2274 * Get an mbuf with a cluster. For packets 2K or less,
2275 * get a standard 2K cluster. For anything larger, get a
2276 * 4K cluster. Any buffers larger than 4K can cause problems
2277 * if looped around to the Hyper-V TX channel, so avoid them.
2280 if (dlen > MCLBYTES) {
2282 size = MJUMPAGESIZE;
2285 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
2286 if (m_new == NULL) {
2287 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
2291 hv_m_append(m_new, dlen, data);
2293 m_new->m_pkthdr.rcvif = ifp;
2295 if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0))
2298 /* receive side checksum offload */
2299 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) {
2300 /* IP csum offload */
2301 if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) {
2302 m_new->m_pkthdr.csum_flags |=
2303 (CSUM_IP_CHECKED | CSUM_IP_VALID);
2307 /* TCP/UDP csum offload */
2308 if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK |
2309 NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) {
2310 m_new->m_pkthdr.csum_flags |=
2311 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2312 m_new->m_pkthdr.csum_data = 0xffff;
2313 if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK)
2321 * As of this write (Oct 28th, 2016), host side will turn
2322 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so
2323 * the do_lro setting here is actually _not_ accurate. We
2324 * depend on the RSS hash type check to reset do_lro.
2326 if ((info->csum_info &
2327 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) ==
2328 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK))
2331 const struct ether_header *eh;
2336 if (m_new->m_len < hoff)
2338 eh = mtod(m_new, struct ether_header *);
2339 etype = ntohs(eh->ether_type);
2340 if (etype == ETHERTYPE_VLAN) {
2341 const struct ether_vlan_header *evl;
2343 hoff = sizeof(*evl);
2344 if (m_new->m_len < hoff)
2346 evl = mtod(m_new, struct ether_vlan_header *);
2347 etype = ntohs(evl->evl_proto);
2350 if (etype == ETHERTYPE_IP) {
2353 pr = hn_check_iplen(m_new, hoff);
2354 if (pr == IPPROTO_TCP) {
2356 (rxr->hn_trust_hcsum &
2357 HN_TRUST_HCSUM_TCP)) {
2358 rxr->hn_csum_trusted++;
2359 m_new->m_pkthdr.csum_flags |=
2360 (CSUM_IP_CHECKED | CSUM_IP_VALID |
2361 CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2362 m_new->m_pkthdr.csum_data = 0xffff;
2365 } else if (pr == IPPROTO_UDP) {
2367 (rxr->hn_trust_hcsum &
2368 HN_TRUST_HCSUM_UDP)) {
2369 rxr->hn_csum_trusted++;
2370 m_new->m_pkthdr.csum_flags |=
2371 (CSUM_IP_CHECKED | CSUM_IP_VALID |
2372 CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2373 m_new->m_pkthdr.csum_data = 0xffff;
2375 } else if (pr != IPPROTO_DONE && do_csum &&
2376 (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) {
2377 rxr->hn_csum_trusted++;
2378 m_new->m_pkthdr.csum_flags |=
2379 (CSUM_IP_CHECKED | CSUM_IP_VALID);
2384 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) {
2385 m_new->m_pkthdr.ether_vtag = EVL_MAKETAG(
2386 NDIS_VLAN_INFO_ID(info->vlan_info),
2387 NDIS_VLAN_INFO_PRI(info->vlan_info),
2388 NDIS_VLAN_INFO_CFI(info->vlan_info));
2389 m_new->m_flags |= M_VLANTAG;
2392 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) {
2394 m_new->m_pkthdr.flowid = info->hash_value;
2395 if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) ==
2396 NDIS_HASH_FUNCTION_TOEPLITZ) {
2397 uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK);
2401 * do_lro is resetted, if the hash types are not TCP
2402 * related. See the comment in the above csum_flags
2406 case NDIS_HASH_IPV4:
2407 hash_type = M_HASHTYPE_RSS_IPV4;
2411 case NDIS_HASH_TCP_IPV4:
2412 hash_type = M_HASHTYPE_RSS_TCP_IPV4;
2415 case NDIS_HASH_IPV6:
2416 hash_type = M_HASHTYPE_RSS_IPV6;
2420 case NDIS_HASH_IPV6_EX:
2421 hash_type = M_HASHTYPE_RSS_IPV6_EX;
2425 case NDIS_HASH_TCP_IPV6:
2426 hash_type = M_HASHTYPE_RSS_TCP_IPV6;
2429 case NDIS_HASH_TCP_IPV6_EX:
2430 hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX;
2435 m_new->m_pkthdr.flowid = rxr->hn_rx_idx;
2437 M_HASHTYPE_SET(m_new, hash_type);
2440 * Note: Moved RX completion back to hv_nv_on_receive() so all
2441 * messages (not just data messages) will trigger a response.
2447 if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
2448 #if defined(INET) || defined(INET6)
2449 struct lro_ctrl *lro = &rxr->hn_lro;
2452 rxr->hn_lro_tried++;
2453 if (hn_lro_rx(lro, m_new) == 0) {
2461 /* We're not holding the lock here, so don't release it */
2462 (*ifp->if_input)(ifp, m_new);
2468 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2470 struct hn_softc *sc = ifp->if_softc;
2471 struct ifreq *ifr = (struct ifreq *)data;
2472 int mask, error = 0;
2476 if (ifr->ifr_mtu > HN_MTU_MAX) {
2483 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
2488 if ((sc->hn_caps & HN_CAP_MTU) == 0) {
2489 /* Can't change MTU */
2495 if (ifp->if_mtu == ifr->ifr_mtu) {
2501 * Suspend this interface before the synthetic parts
2507 * Detach the synthetics parts, i.e. NVS and RNDIS.
2509 hn_synth_detach(sc);
2512 * Reattach the synthetic parts, i.e. NVS and RNDIS,
2513 * with the new MTU setting.
2515 error = hn_synth_attach(sc, ifr->ifr_mtu);
2522 * Commit the requested MTU, after the synthetic parts
2523 * have been successfully attached.
2525 ifp->if_mtu = ifr->ifr_mtu;
2528 * Make sure that various parameters based on MTU are
2529 * still valid, after the MTU change.
2531 if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax)
2532 hn_set_chim_size(sc, sc->hn_chim_szmax);
2533 hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu);
2534 #if __FreeBSD_version >= 1100099
2535 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim <
2536 HN_LRO_LENLIM_MIN(ifp))
2537 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp));
2541 * All done! Resume the interface now.
2551 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
2556 if (ifp->if_flags & IFF_UP) {
2557 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2559 * Caller meight hold mutex, e.g.
2560 * bpf; use busy-wait for the RNDIS
2564 hn_rxfilter_config(sc);
2570 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2573 sc->hn_if_flags = ifp->if_flags;
2580 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2582 if (mask & IFCAP_TXCSUM) {
2583 ifp->if_capenable ^= IFCAP_TXCSUM;
2584 if (ifp->if_capenable & IFCAP_TXCSUM)
2585 ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc);
2587 ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc);
2589 if (mask & IFCAP_TXCSUM_IPV6) {
2590 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2591 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
2592 ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc);
2594 ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc);
2597 /* TODO: flip RNDIS offload parameters for RXCSUM. */
2598 if (mask & IFCAP_RXCSUM)
2599 ifp->if_capenable ^= IFCAP_RXCSUM;
2601 /* We can't diff IPv6 packets from IPv4 packets on RX path. */
2602 if (mask & IFCAP_RXCSUM_IPV6)
2603 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2606 if (mask & IFCAP_LRO)
2607 ifp->if_capenable ^= IFCAP_LRO;
2609 if (mask & IFCAP_TSO4) {
2610 ifp->if_capenable ^= IFCAP_TSO4;
2611 if (ifp->if_capenable & IFCAP_TSO4)
2612 ifp->if_hwassist |= CSUM_IP_TSO;
2614 ifp->if_hwassist &= ~CSUM_IP_TSO;
2616 if (mask & IFCAP_TSO6) {
2617 ifp->if_capenable ^= IFCAP_TSO6;
2618 if (ifp->if_capenable & IFCAP_TSO6)
2619 ifp->if_hwassist |= CSUM_IP6_TSO;
2621 ifp->if_hwassist &= ~CSUM_IP6_TSO;
2631 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
2635 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2637 * Multicast uses mutex; use busy-wait for
2641 hn_rxfilter_config(sc);
2650 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd);
2654 error = ether_ioctl(ifp, cmd, data);
2661 hn_stop(struct hn_softc *sc, bool detaching)
2663 struct ifnet *ifp = sc->hn_ifp;
2668 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
2669 ("synthetic parts were not attached"));
2671 /* Disable polling. */
2674 /* Clear RUNNING bit _before_ hn_suspend_data() */
2675 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
2676 hn_suspend_data(sc);
2678 /* Clear OACTIVE bit. */
2679 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
2680 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
2681 sc->hn_tx_ring[i].hn_oactive = 0;
2684 * If the VF is active, make sure the filter is not 0, even if
2685 * the synthetic NIC is down.
2687 if (!detaching && (sc->hn_flags & HN_FLAG_VF))
2688 hn_rxfilter_config(sc);
2692 hn_init_locked(struct hn_softc *sc)
2694 struct ifnet *ifp = sc->hn_ifp;
2699 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
2702 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2705 /* Configure RX filter */
2706 hn_rxfilter_config(sc);
2708 /* Clear OACTIVE bit. */
2709 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
2710 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
2711 sc->hn_tx_ring[i].hn_oactive = 0;
2713 /* Clear TX 'suspended' bit. */
2714 hn_resume_tx(sc, sc->hn_tx_ring_inuse);
2716 /* Everything is ready; unleash! */
2717 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
2719 /* Re-enable polling if requested. */
2720 if (sc->hn_pollhz > 0)
2721 hn_polling(sc, sc->hn_pollhz);
2727 struct hn_softc *sc = xsc;
2734 #if __FreeBSD_version >= 1100099
2737 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS)
2739 struct hn_softc *sc = arg1;
2740 unsigned int lenlim;
2743 lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim;
2744 error = sysctl_handle_int(oidp, &lenlim, 0, req);
2745 if (error || req->newptr == NULL)
2749 if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) ||
2750 lenlim > TCP_LRO_LENGTH_MAX) {
2754 hn_set_lro_lenlim(sc, lenlim);
2761 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS)
2763 struct hn_softc *sc = arg1;
2764 int ackcnt, error, i;
2767 * lro_ackcnt_lim is append count limit,
2768 * +1 to turn it into aggregation limit.
2770 ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1;
2771 error = sysctl_handle_int(oidp, &ackcnt, 0, req);
2772 if (error || req->newptr == NULL)
2775 if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1))
2779 * Convert aggregation limit back to append
2784 for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
2785 sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt;
2793 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS)
2795 struct hn_softc *sc = arg1;
2800 if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum)
2803 error = sysctl_handle_int(oidp, &on, 0, req);
2804 if (error || req->newptr == NULL)
2808 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2809 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
2812 rxr->hn_trust_hcsum |= hcsum;
2814 rxr->hn_trust_hcsum &= ~hcsum;
2821 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS)
2823 struct hn_softc *sc = arg1;
2824 int chim_size, error;
2826 chim_size = sc->hn_tx_ring[0].hn_chim_size;
2827 error = sysctl_handle_int(oidp, &chim_size, 0, req);
2828 if (error || req->newptr == NULL)
2831 if (chim_size > sc->hn_chim_szmax || chim_size <= 0)
2835 hn_set_chim_size(sc, chim_size);
2840 #if __FreeBSD_version < 1100095
2842 hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS)
2844 struct hn_softc *sc = arg1;
2845 int ofs = arg2, i, error;
2846 struct hn_rx_ring *rxr;
2850 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2851 rxr = &sc->hn_rx_ring[i];
2852 stat += *((int *)((uint8_t *)rxr + ofs));
2855 error = sysctl_handle_64(oidp, &stat, 0, req);
2856 if (error || req->newptr == NULL)
2859 /* Zero out this stat. */
2860 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2861 rxr = &sc->hn_rx_ring[i];
2862 *((int *)((uint8_t *)rxr + ofs)) = 0;
2868 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
2870 struct hn_softc *sc = arg1;
2871 int ofs = arg2, i, error;
2872 struct hn_rx_ring *rxr;
2876 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2877 rxr = &sc->hn_rx_ring[i];
2878 stat += *((uint64_t *)((uint8_t *)rxr + ofs));
2881 error = sysctl_handle_64(oidp, &stat, 0, req);
2882 if (error || req->newptr == NULL)
2885 /* Zero out this stat. */
2886 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2887 rxr = &sc->hn_rx_ring[i];
2888 *((uint64_t *)((uint8_t *)rxr + ofs)) = 0;
2896 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
2898 struct hn_softc *sc = arg1;
2899 int ofs = arg2, i, error;
2900 struct hn_rx_ring *rxr;
2904 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2905 rxr = &sc->hn_rx_ring[i];
2906 stat += *((u_long *)((uint8_t *)rxr + ofs));
2909 error = sysctl_handle_long(oidp, &stat, 0, req);
2910 if (error || req->newptr == NULL)
2913 /* Zero out this stat. */
2914 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2915 rxr = &sc->hn_rx_ring[i];
2916 *((u_long *)((uint8_t *)rxr + ofs)) = 0;
2922 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
2924 struct hn_softc *sc = arg1;
2925 int ofs = arg2, i, error;
2926 struct hn_tx_ring *txr;
2930 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
2931 txr = &sc->hn_tx_ring[i];
2932 stat += *((u_long *)((uint8_t *)txr + ofs));
2935 error = sysctl_handle_long(oidp, &stat, 0, req);
2936 if (error || req->newptr == NULL)
2939 /* Zero out this stat. */
2940 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
2941 txr = &sc->hn_tx_ring[i];
2942 *((u_long *)((uint8_t *)txr + ofs)) = 0;
2948 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
2950 struct hn_softc *sc = arg1;
2951 int ofs = arg2, i, error, conf;
2952 struct hn_tx_ring *txr;
2954 txr = &sc->hn_tx_ring[0];
2955 conf = *((int *)((uint8_t *)txr + ofs));
2957 error = sysctl_handle_int(oidp, &conf, 0, req);
2958 if (error || req->newptr == NULL)
2962 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
2963 txr = &sc->hn_tx_ring[i];
2964 *((int *)((uint8_t *)txr + ofs)) = conf;
2972 hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS)
2974 struct hn_softc *sc = arg1;
2977 size = sc->hn_agg_size;
2978 error = sysctl_handle_int(oidp, &size, 0, req);
2979 if (error || req->newptr == NULL)
2983 sc->hn_agg_size = size;
2991 hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS)
2993 struct hn_softc *sc = arg1;
2996 pkts = sc->hn_agg_pkts;
2997 error = sysctl_handle_int(oidp, &pkts, 0, req);
2998 if (error || req->newptr == NULL)
3002 sc->hn_agg_pkts = pkts;
3010 hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS)
3012 struct hn_softc *sc = arg1;
3015 pkts = sc->hn_tx_ring[0].hn_agg_pktmax;
3016 return (sysctl_handle_int(oidp, &pkts, 0, req));
3020 hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS)
3022 struct hn_softc *sc = arg1;
3025 align = sc->hn_tx_ring[0].hn_agg_align;
3026 return (sysctl_handle_int(oidp, &align, 0, req));
3030 hn_chan_polling(struct vmbus_channel *chan, u_int pollhz)
3033 vmbus_chan_poll_disable(chan);
3035 vmbus_chan_poll_enable(chan, pollhz);
3039 hn_polling(struct hn_softc *sc, u_int pollhz)
3041 int nsubch = sc->hn_rx_ring_inuse - 1;
3046 struct vmbus_channel **subch;
3049 subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
3050 for (i = 0; i < nsubch; ++i)
3051 hn_chan_polling(subch[i], pollhz);
3052 vmbus_subchan_rel(subch, nsubch);
3054 hn_chan_polling(sc->hn_prichan, pollhz);
3058 hn_polling_sysctl(SYSCTL_HANDLER_ARGS)
3060 struct hn_softc *sc = arg1;
3063 pollhz = sc->hn_pollhz;
3064 error = sysctl_handle_int(oidp, &pollhz, 0, req);
3065 if (error || req->newptr == NULL)
3069 (pollhz < VMBUS_CHAN_POLLHZ_MIN || pollhz > VMBUS_CHAN_POLLHZ_MAX))
3073 if (sc->hn_pollhz != pollhz) {
3074 sc->hn_pollhz = pollhz;
3075 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) &&
3076 (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
3077 hn_polling(sc, sc->hn_pollhz);
3085 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS)
3087 struct hn_softc *sc = arg1;
3090 snprintf(verstr, sizeof(verstr), "%u.%u",
3091 HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver),
3092 HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver));
3093 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
3097 hn_caps_sysctl(SYSCTL_HANDLER_ARGS)
3099 struct hn_softc *sc = arg1;
3106 snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS);
3107 return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req);
3111 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS)
3113 struct hn_softc *sc = arg1;
3114 char assist_str[128];
3118 hwassist = sc->hn_ifp->if_hwassist;
3120 snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS);
3121 return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req);
3125 hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS)
3127 struct hn_softc *sc = arg1;
3128 char filter_str[128];
3132 filter = sc->hn_rx_filter;
3134 snprintf(filter_str, sizeof(filter_str), "%b", filter,
3136 return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req);
3140 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS)
3142 struct hn_softc *sc = arg1;
3147 error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
3148 if (error || req->newptr == NULL)
3151 error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
3154 sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
3156 if (sc->hn_rx_ring_inuse > 1) {
3157 error = hn_rss_reconfig(sc);
3159 /* Not RSS capable, at least for now; just save the RSS key. */
3168 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS)
3170 struct hn_softc *sc = arg1;
3175 error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
3176 if (error || req->newptr == NULL)
3180 * Don't allow RSS indirect table change, if this interface is not
3181 * RSS capable currently.
3183 if (sc->hn_rx_ring_inuse == 1) {
3188 error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
3191 sc->hn_flags |= HN_FLAG_HAS_RSSIND;
3193 hn_rss_ind_fixup(sc);
3194 error = hn_rss_reconfig(sc);
3201 hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS)
3203 struct hn_softc *sc = arg1;
3208 hash = sc->hn_rss_hash;
3210 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS);
3211 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req);
3215 hn_vf_sysctl(SYSCTL_HANDLER_ARGS)
3217 struct hn_softc *sc = arg1;
3223 vf = sc->hn_rx_ring[0].hn_vf;
3225 snprintf(vf_name, sizeof(vf_name), "%s", if_name(vf));
3227 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req);
3231 hn_check_iplen(const struct mbuf *m, int hoff)
3233 const struct ip *ip;
3234 int len, iphlen, iplen;
3235 const struct tcphdr *th;
3236 int thoff; /* TCP data offset */
3238 len = hoff + sizeof(struct ip);
3240 /* The packet must be at least the size of an IP header. */
3241 if (m->m_pkthdr.len < len)
3242 return IPPROTO_DONE;
3244 /* The fixed IP header must reside completely in the first mbuf. */
3246 return IPPROTO_DONE;
3248 ip = mtodo(m, hoff);
3250 /* Bound check the packet's stated IP header length. */
3251 iphlen = ip->ip_hl << 2;
3252 if (iphlen < sizeof(struct ip)) /* minimum header length */
3253 return IPPROTO_DONE;
3255 /* The full IP header must reside completely in the one mbuf. */
3256 if (m->m_len < hoff + iphlen)
3257 return IPPROTO_DONE;
3259 iplen = ntohs(ip->ip_len);
3262 * Check that the amount of data in the buffers is as
3263 * at least much as the IP header would have us expect.
3265 if (m->m_pkthdr.len < hoff + iplen)
3266 return IPPROTO_DONE;
3269 * Ignore IP fragments.
3271 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF))
3272 return IPPROTO_DONE;
3275 * The TCP/IP or UDP/IP header must be entirely contained within
3276 * the first fragment of a packet.
3280 if (iplen < iphlen + sizeof(struct tcphdr))
3281 return IPPROTO_DONE;
3282 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr))
3283 return IPPROTO_DONE;
3284 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen);
3285 thoff = th->th_off << 2;
3286 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen)
3287 return IPPROTO_DONE;
3288 if (m->m_len < hoff + iphlen + thoff)
3289 return IPPROTO_DONE;
3292 if (iplen < iphlen + sizeof(struct udphdr))
3293 return IPPROTO_DONE;
3294 if (m->m_len < hoff + iphlen + sizeof(struct udphdr))
3295 return IPPROTO_DONE;
3299 return IPPROTO_DONE;
3306 hn_create_rx_data(struct hn_softc *sc, int ring_cnt)
3308 struct sysctl_oid_list *child;
3309 struct sysctl_ctx_list *ctx;
3310 device_t dev = sc->hn_dev;
3311 #if defined(INET) || defined(INET6)
3312 #if __FreeBSD_version >= 1100095
3319 * Create RXBUF for reception.
3322 * - It is shared by all channels.
3323 * - A large enough buffer is allocated, certain version of NVSes
3324 * may further limit the usable space.
3326 sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
3327 PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma,
3328 BUS_DMA_WAITOK | BUS_DMA_ZERO);
3329 if (sc->hn_rxbuf == NULL) {
3330 device_printf(sc->hn_dev, "allocate rxbuf failed\n");
3334 sc->hn_rx_ring_cnt = ring_cnt;
3335 sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt;
3337 sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt,
3338 M_DEVBUF, M_WAITOK | M_ZERO);
3340 #if defined(INET) || defined(INET6)
3341 #if __FreeBSD_version >= 1100095
3342 lroent_cnt = hn_lro_entry_count;
3343 if (lroent_cnt < TCP_LRO_ENTRIES)
3344 lroent_cnt = TCP_LRO_ENTRIES;
3346 device_printf(dev, "LRO: entry count %d\n", lroent_cnt);
3348 #endif /* INET || INET6 */
3350 ctx = device_get_sysctl_ctx(dev);
3351 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
3353 /* Create dev.hn.UNIT.rx sysctl tree */
3354 sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx",
3355 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3357 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
3358 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
3360 rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
3361 PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE,
3362 &rxr->hn_br_dma, BUS_DMA_WAITOK);
3363 if (rxr->hn_br == NULL) {
3364 device_printf(dev, "allocate bufring failed\n");
3368 if (hn_trust_hosttcp)
3369 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP;
3370 if (hn_trust_hostudp)
3371 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP;
3372 if (hn_trust_hostip)
3373 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP;
3374 rxr->hn_ifp = sc->hn_ifp;
3375 if (i < sc->hn_tx_ring_cnt)
3376 rxr->hn_txr = &sc->hn_tx_ring[i];
3377 rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF;
3378 rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK);
3380 rxr->hn_rxbuf = sc->hn_rxbuf;
3385 #if defined(INET) || defined(INET6)
3386 #if __FreeBSD_version >= 1100095
3387 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt,
3388 hn_lro_mbufq_depth);
3390 tcp_lro_init(&rxr->hn_lro);
3391 rxr->hn_lro.ifp = sc->hn_ifp;
3393 #if __FreeBSD_version >= 1100099
3394 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF;
3395 rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
3397 #endif /* INET || INET6 */
3399 if (sc->hn_rx_sysctl_tree != NULL) {
3403 * Create per RX ring sysctl tree:
3404 * dev.hn.UNIT.rx.RINGID
3406 snprintf(name, sizeof(name), "%d", i);
3407 rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx,
3408 SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree),
3409 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3411 if (rxr->hn_rx_sysctl_tree != NULL) {
3412 SYSCTL_ADD_ULONG(ctx,
3413 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
3414 OID_AUTO, "packets", CTLFLAG_RW,
3415 &rxr->hn_pkts, "# of packets received");
3416 SYSCTL_ADD_ULONG(ctx,
3417 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
3418 OID_AUTO, "rss_pkts", CTLFLAG_RW,
3420 "# of packets w/ RSS info received");
3422 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
3423 OID_AUTO, "pktbuf_len", CTLFLAG_RD,
3424 &rxr->hn_pktbuf_len, 0,
3425 "Temporary channel packet buffer length");
3430 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
3431 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3432 __offsetof(struct hn_rx_ring, hn_lro.lro_queued),
3433 #if __FreeBSD_version < 1100095
3434 hn_rx_stat_int_sysctl,
3436 hn_rx_stat_u64_sysctl,
3438 "LU", "LRO queued");
3439 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed",
3440 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3441 __offsetof(struct hn_rx_ring, hn_lro.lro_flushed),
3442 #if __FreeBSD_version < 1100095
3443 hn_rx_stat_int_sysctl,
3445 hn_rx_stat_u64_sysctl,
3447 "LU", "LRO flushed");
3448 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried",
3449 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3450 __offsetof(struct hn_rx_ring, hn_lro_tried),
3451 hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries");
3452 #if __FreeBSD_version >= 1100099
3453 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim",
3454 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
3455 hn_lro_lenlim_sysctl, "IU",
3456 "Max # of data bytes to be aggregated by LRO");
3457 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim",
3458 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
3459 hn_lro_ackcnt_sysctl, "I",
3460 "Max # of ACKs to be aggregated by LRO");
3462 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp",
3463 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP,
3464 hn_trust_hcsum_sysctl, "I",
3465 "Trust tcp segement verification on host side, "
3466 "when csum info is missing");
3467 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp",
3468 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP,
3469 hn_trust_hcsum_sysctl, "I",
3470 "Trust udp datagram verification on host side, "
3471 "when csum info is missing");
3472 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip",
3473 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP,
3474 hn_trust_hcsum_sysctl, "I",
3475 "Trust ip packet verification on host side, "
3476 "when csum info is missing");
3477 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip",
3478 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3479 __offsetof(struct hn_rx_ring, hn_csum_ip),
3480 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP");
3481 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp",
3482 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3483 __offsetof(struct hn_rx_ring, hn_csum_tcp),
3484 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP");
3485 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp",
3486 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3487 __offsetof(struct hn_rx_ring, hn_csum_udp),
3488 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP");
3489 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted",
3490 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3491 __offsetof(struct hn_rx_ring, hn_csum_trusted),
3492 hn_rx_stat_ulong_sysctl, "LU",
3493 "# of packets that we trust host's csum verification");
3494 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts",
3495 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3496 __offsetof(struct hn_rx_ring, hn_small_pkts),
3497 hn_rx_stat_ulong_sysctl, "LU", "# of small packets received");
3498 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed",
3499 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3500 __offsetof(struct hn_rx_ring, hn_ack_failed),
3501 hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures");
3502 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt",
3503 CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings");
3504 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse",
3505 CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings");
3511 hn_destroy_rx_data(struct hn_softc *sc)
3515 if (sc->hn_rxbuf != NULL) {
3516 if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0)
3517 hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf);
3519 device_printf(sc->hn_dev, "RXBUF is referenced\n");
3520 sc->hn_rxbuf = NULL;
3523 if (sc->hn_rx_ring_cnt == 0)
3526 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
3527 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
3529 if (rxr->hn_br == NULL)
3531 if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) {
3532 hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br);
3534 device_printf(sc->hn_dev,
3535 "%dth channel bufring is referenced", i);
3539 #if defined(INET) || defined(INET6)
3540 tcp_lro_free(&rxr->hn_lro);
3542 free(rxr->hn_pktbuf, M_DEVBUF);
3544 free(sc->hn_rx_ring, M_DEVBUF);
3545 sc->hn_rx_ring = NULL;
3547 sc->hn_rx_ring_cnt = 0;
3548 sc->hn_rx_ring_inuse = 0;
3552 hn_tx_ring_create(struct hn_softc *sc, int id)
3554 struct hn_tx_ring *txr = &sc->hn_tx_ring[id];
3555 device_t dev = sc->hn_dev;
3556 bus_dma_tag_t parent_dtag;
3560 txr->hn_tx_idx = id;
3562 #ifndef HN_USE_TXDESC_BUFRING
3563 mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
3565 mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF);
3567 txr->hn_txdesc_cnt = HN_TX_DESC_CNT;
3568 txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt,
3569 M_DEVBUF, M_WAITOK | M_ZERO);
3570 #ifndef HN_USE_TXDESC_BUFRING
3571 SLIST_INIT(&txr->hn_txlist);
3573 txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF,
3574 M_WAITOK, &txr->hn_tx_lock);
3577 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_EVTTQ) {
3578 txr->hn_tx_taskq = VMBUS_GET_EVENT_TASKQ(
3579 device_get_parent(dev), dev, HN_RING_IDX2CPU(sc, id));
3581 txr->hn_tx_taskq = sc->hn_tx_taskqs[id % hn_tx_taskq_cnt];
3584 #ifdef HN_IFSTART_SUPPORT
3585 if (hn_use_if_start) {
3586 txr->hn_txeof = hn_start_txeof;
3587 TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr);
3588 TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr);
3594 txr->hn_txeof = hn_xmit_txeof;
3595 TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr);
3596 TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr);
3598 br_depth = hn_get_txswq_depth(txr);
3599 txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF,
3600 M_WAITOK, &txr->hn_tx_lock);
3603 txr->hn_direct_tx_size = hn_direct_tx_size;
3606 * Always schedule transmission instead of trying to do direct
3607 * transmission. This one gives the best performance so far.
3609 txr->hn_sched_tx = 1;
3611 parent_dtag = bus_get_dma_tag(dev);
3613 /* DMA tag for RNDIS packet messages. */
3614 error = bus_dma_tag_create(parent_dtag, /* parent */
3615 HN_RNDIS_PKT_ALIGN, /* alignment */
3616 HN_RNDIS_PKT_BOUNDARY, /* boundary */
3617 BUS_SPACE_MAXADDR, /* lowaddr */
3618 BUS_SPACE_MAXADDR, /* highaddr */
3619 NULL, NULL, /* filter, filterarg */
3620 HN_RNDIS_PKT_LEN, /* maxsize */
3622 HN_RNDIS_PKT_LEN, /* maxsegsize */
3624 NULL, /* lockfunc */
3625 NULL, /* lockfuncarg */
3626 &txr->hn_tx_rndis_dtag);
3628 device_printf(dev, "failed to create rndis dmatag\n");
3632 /* DMA tag for data. */
3633 error = bus_dma_tag_create(parent_dtag, /* parent */
3635 HN_TX_DATA_BOUNDARY, /* boundary */
3636 BUS_SPACE_MAXADDR, /* lowaddr */
3637 BUS_SPACE_MAXADDR, /* highaddr */
3638 NULL, NULL, /* filter, filterarg */
3639 HN_TX_DATA_MAXSIZE, /* maxsize */
3640 HN_TX_DATA_SEGCNT_MAX, /* nsegments */
3641 HN_TX_DATA_SEGSIZE, /* maxsegsize */
3643 NULL, /* lockfunc */
3644 NULL, /* lockfuncarg */
3645 &txr->hn_tx_data_dtag);
3647 device_printf(dev, "failed to create data dmatag\n");
3651 for (i = 0; i < txr->hn_txdesc_cnt; ++i) {
3652 struct hn_txdesc *txd = &txr->hn_txdesc[i];
3655 txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
3656 STAILQ_INIT(&txd->agg_list);
3659 * Allocate and load RNDIS packet message.
3661 error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag,
3662 (void **)&txd->rndis_pkt,
3663 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
3664 &txd->rndis_pkt_dmap);
3667 "failed to allocate rndis_packet_msg, %d\n", i);
3671 error = bus_dmamap_load(txr->hn_tx_rndis_dtag,
3672 txd->rndis_pkt_dmap,
3673 txd->rndis_pkt, HN_RNDIS_PKT_LEN,
3674 hyperv_dma_map_paddr, &txd->rndis_pkt_paddr,
3678 "failed to load rndis_packet_msg, %d\n", i);
3679 bus_dmamem_free(txr->hn_tx_rndis_dtag,
3680 txd->rndis_pkt, txd->rndis_pkt_dmap);
3684 /* DMA map for TX data. */
3685 error = bus_dmamap_create(txr->hn_tx_data_dtag, 0,
3689 "failed to allocate tx data dmamap\n");
3690 bus_dmamap_unload(txr->hn_tx_rndis_dtag,
3691 txd->rndis_pkt_dmap);
3692 bus_dmamem_free(txr->hn_tx_rndis_dtag,
3693 txd->rndis_pkt, txd->rndis_pkt_dmap);
3697 /* All set, put it to list */
3698 txd->flags |= HN_TXD_FLAG_ONLIST;
3699 #ifndef HN_USE_TXDESC_BUFRING
3700 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
3702 buf_ring_enqueue(txr->hn_txdesc_br, txd);
3705 txr->hn_txdesc_avail = txr->hn_txdesc_cnt;
3707 if (sc->hn_tx_sysctl_tree != NULL) {
3708 struct sysctl_oid_list *child;
3709 struct sysctl_ctx_list *ctx;
3713 * Create per TX ring sysctl tree:
3714 * dev.hn.UNIT.tx.RINGID
3716 ctx = device_get_sysctl_ctx(dev);
3717 child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree);
3719 snprintf(name, sizeof(name), "%d", id);
3720 txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
3721 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3723 if (txr->hn_tx_sysctl_tree != NULL) {
3724 child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree);
3727 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
3728 CTLFLAG_RD, &txr->hn_txdesc_avail, 0,
3729 "# of available TX descs");
3731 #ifdef HN_IFSTART_SUPPORT
3732 if (!hn_use_if_start)
3735 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive",
3736 CTLFLAG_RD, &txr->hn_oactive, 0,
3739 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets",
3740 CTLFLAG_RW, &txr->hn_pkts,
3741 "# of packets transmitted");
3742 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends",
3743 CTLFLAG_RW, &txr->hn_sends, "# of sends");
3751 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd)
3753 struct hn_tx_ring *txr = txd->txr;
3755 KASSERT(txd->m == NULL, ("still has mbuf installed"));
3756 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped"));
3758 bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap);
3759 bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt,
3760 txd->rndis_pkt_dmap);
3761 bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap);
3765 hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd)
3768 KASSERT(txd->refs == 0 || txd->refs == 1,
3769 ("invalid txd refs %d", txd->refs));
3771 /* Aggregated txds will be freed by their aggregating txd. */
3772 if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) {
3775 freed = hn_txdesc_put(txr, txd);
3776 KASSERT(freed, ("can't free txdesc"));
3781 hn_tx_ring_destroy(struct hn_tx_ring *txr)
3785 if (txr->hn_txdesc == NULL)
3790 * Because the freeing of aggregated txds will be deferred
3791 * to the aggregating txd, two passes are used here:
3792 * - The first pass GCes any pending txds. This GC is necessary,
3793 * since if the channels are revoked, hypervisor will not
3794 * deliver send-done for all pending txds.
3795 * - The second pass frees the busdma stuffs, i.e. after all txds
3798 for (i = 0; i < txr->hn_txdesc_cnt; ++i)
3799 hn_txdesc_gc(txr, &txr->hn_txdesc[i]);
3800 for (i = 0; i < txr->hn_txdesc_cnt; ++i)
3801 hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]);
3803 if (txr->hn_tx_data_dtag != NULL)
3804 bus_dma_tag_destroy(txr->hn_tx_data_dtag);
3805 if (txr->hn_tx_rndis_dtag != NULL)
3806 bus_dma_tag_destroy(txr->hn_tx_rndis_dtag);
3808 #ifdef HN_USE_TXDESC_BUFRING
3809 buf_ring_free(txr->hn_txdesc_br, M_DEVBUF);
3812 free(txr->hn_txdesc, M_DEVBUF);
3813 txr->hn_txdesc = NULL;
3815 if (txr->hn_mbuf_br != NULL)
3816 buf_ring_free(txr->hn_mbuf_br, M_DEVBUF);
3818 #ifndef HN_USE_TXDESC_BUFRING
3819 mtx_destroy(&txr->hn_txlist_spin);
3821 mtx_destroy(&txr->hn_tx_lock);
3825 hn_create_tx_data(struct hn_softc *sc, int ring_cnt)
3827 struct sysctl_oid_list *child;
3828 struct sysctl_ctx_list *ctx;
3832 * Create TXBUF for chimney sending.
3834 * NOTE: It is shared by all channels.
3836 sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev),
3837 PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma,
3838 BUS_DMA_WAITOK | BUS_DMA_ZERO);
3839 if (sc->hn_chim == NULL) {
3840 device_printf(sc->hn_dev, "allocate txbuf failed\n");
3844 sc->hn_tx_ring_cnt = ring_cnt;
3845 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
3847 sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt,
3848 M_DEVBUF, M_WAITOK | M_ZERO);
3850 ctx = device_get_sysctl_ctx(sc->hn_dev);
3851 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev));
3853 /* Create dev.hn.UNIT.tx sysctl tree */
3854 sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx",
3855 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3857 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
3860 error = hn_tx_ring_create(sc, i);
3865 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs",
3866 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3867 __offsetof(struct hn_tx_ring, hn_no_txdescs),
3868 hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs");
3869 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed",
3870 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3871 __offsetof(struct hn_tx_ring, hn_send_failed),
3872 hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure");
3873 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed",
3874 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3875 __offsetof(struct hn_tx_ring, hn_txdma_failed),
3876 hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
3877 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed",
3878 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3879 __offsetof(struct hn_tx_ring, hn_flush_failed),
3880 hn_tx_stat_ulong_sysctl, "LU",
3881 "# of packet transmission aggregation flush failure");
3882 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
3883 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3884 __offsetof(struct hn_tx_ring, hn_tx_collapsed),
3885 hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed");
3886 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney",
3887 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3888 __offsetof(struct hn_tx_ring, hn_tx_chimney),
3889 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send");
3890 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried",
3891 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3892 __offsetof(struct hn_tx_ring, hn_tx_chimney_tried),
3893 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries");
3894 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
3895 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0,
3896 "# of total TX descs");
3897 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
3898 CTLFLAG_RD, &sc->hn_chim_szmax, 0,
3899 "Chimney send packet size upper boundary");
3900 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
3901 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
3902 hn_chim_size_sysctl, "I", "Chimney send packet size limit");
3903 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size",
3904 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3905 __offsetof(struct hn_tx_ring, hn_direct_tx_size),
3906 hn_tx_conf_int_sysctl, "I",
3907 "Size of the packet for direct transmission");
3908 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx",
3909 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3910 __offsetof(struct hn_tx_ring, hn_sched_tx),
3911 hn_tx_conf_int_sysctl, "I",
3912 "Always schedule transmission "
3913 "instead of doing direct transmission");
3914 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt",
3915 CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings");
3916 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse",
3917 CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings");
3918 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax",
3919 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0,
3920 "Applied packet transmission aggregation size");
3921 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax",
3922 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
3923 hn_txagg_pktmax_sysctl, "I",
3924 "Applied packet transmission aggregation packets");
3925 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align",
3926 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
3927 hn_txagg_align_sysctl, "I",
3928 "Applied packet transmission aggregation alignment");
3934 hn_set_chim_size(struct hn_softc *sc, int chim_size)
3938 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
3939 sc->hn_tx_ring[i].hn_chim_size = chim_size;
3943 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu)
3945 struct ifnet *ifp = sc->hn_ifp;
3948 if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0)
3951 KASSERT(sc->hn_ndis_tso_sgmin >= 2,
3952 ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin));
3953 tso_minlen = sc->hn_ndis_tso_sgmin * mtu;
3955 KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen &&
3956 sc->hn_ndis_tso_szmax <= IP_MAXPACKET,
3957 ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax));
3959 if (tso_maxlen < tso_minlen)
3960 tso_maxlen = tso_minlen;
3961 else if (tso_maxlen > IP_MAXPACKET)
3962 tso_maxlen = IP_MAXPACKET;
3963 if (tso_maxlen > sc->hn_ndis_tso_szmax)
3964 tso_maxlen = sc->hn_ndis_tso_szmax;
3965 ifp->if_hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
3967 if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax);
3971 hn_fixup_tx_data(struct hn_softc *sc)
3973 uint64_t csum_assist;
3976 hn_set_chim_size(sc, sc->hn_chim_szmax);
3977 if (hn_tx_chimney_size > 0 &&
3978 hn_tx_chimney_size < sc->hn_chim_szmax)
3979 hn_set_chim_size(sc, hn_tx_chimney_size);
3982 if (sc->hn_caps & HN_CAP_IPCS)
3983 csum_assist |= CSUM_IP;
3984 if (sc->hn_caps & HN_CAP_TCP4CS)
3985 csum_assist |= CSUM_IP_TCP;
3986 if (sc->hn_caps & HN_CAP_UDP4CS)
3987 csum_assist |= CSUM_IP_UDP;
3988 if (sc->hn_caps & HN_CAP_TCP6CS)
3989 csum_assist |= CSUM_IP6_TCP;
3990 if (sc->hn_caps & HN_CAP_UDP6CS)
3991 csum_assist |= CSUM_IP6_UDP;
3992 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
3993 sc->hn_tx_ring[i].hn_csum_assist = csum_assist;
3995 if (sc->hn_caps & HN_CAP_HASHVAL) {
3997 * Support HASHVAL pktinfo on TX path.
4000 if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n");
4001 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
4002 sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL;
4007 hn_destroy_tx_data(struct hn_softc *sc)
4011 if (sc->hn_chim != NULL) {
4012 if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) {
4013 hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim);
4015 device_printf(sc->hn_dev,
4016 "chimney sending buffer is referenced");
4021 if (sc->hn_tx_ring_cnt == 0)
4024 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
4025 hn_tx_ring_destroy(&sc->hn_tx_ring[i]);
4027 free(sc->hn_tx_ring, M_DEVBUF);
4028 sc->hn_tx_ring = NULL;
4030 sc->hn_tx_ring_cnt = 0;
4031 sc->hn_tx_ring_inuse = 0;
4034 #ifdef HN_IFSTART_SUPPORT
4037 hn_start_taskfunc(void *xtxr, int pending __unused)
4039 struct hn_tx_ring *txr = xtxr;
4041 mtx_lock(&txr->hn_tx_lock);
4042 hn_start_locked(txr, 0);
4043 mtx_unlock(&txr->hn_tx_lock);
4047 hn_start_locked(struct hn_tx_ring *txr, int len)
4049 struct hn_softc *sc = txr->hn_sc;
4050 struct ifnet *ifp = sc->hn_ifp;
4053 KASSERT(hn_use_if_start,
4054 ("hn_start_locked is called, when if_start is disabled"));
4055 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
4056 mtx_assert(&txr->hn_tx_lock, MA_OWNED);
4057 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
4059 if (__predict_false(txr->hn_suspended))
4062 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4066 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
4067 struct hn_txdesc *txd;
4068 struct mbuf *m_head;
4071 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
4075 if (len > 0 && m_head->m_pkthdr.len > len) {
4077 * This sending could be time consuming; let callers
4078 * dispatch this packet sending (and sending of any
4079 * following up packets) to tx taskqueue.
4081 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
4086 #if defined(INET6) || defined(INET)
4087 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
4088 m_head = hn_tso_fixup(m_head);
4089 if (__predict_false(m_head == NULL)) {
4090 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
4096 txd = hn_txdesc_get(txr);
4098 txr->hn_no_txdescs++;
4099 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
4100 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
4104 error = hn_encap(ifp, txr, txd, &m_head);
4106 /* Both txd and m_head are freed */
4107 KASSERT(txr->hn_agg_txd == NULL,
4108 ("encap failed w/ pending aggregating txdesc"));
4112 if (txr->hn_agg_pktleft == 0) {
4113 if (txr->hn_agg_txd != NULL) {
4114 KASSERT(m_head == NULL,
4115 ("pending mbuf for aggregating txdesc"));
4116 error = hn_flush_txagg(ifp, txr);
4117 if (__predict_false(error)) {
4118 atomic_set_int(&ifp->if_drv_flags,
4123 KASSERT(m_head != NULL, ("mbuf was freed"));
4124 error = hn_txpkt(ifp, txr, txd);
4125 if (__predict_false(error)) {
4126 /* txd is freed, but m_head is not */
4127 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
4128 atomic_set_int(&ifp->if_drv_flags,
4136 KASSERT(txr->hn_agg_txd != NULL,
4137 ("no aggregating txdesc"));
4138 KASSERT(m_head == NULL,
4139 ("pending mbuf for aggregating txdesc"));
4144 /* Flush pending aggerated transmission. */
4145 if (txr->hn_agg_txd != NULL)
4146 hn_flush_txagg(ifp, txr);
4151 hn_start(struct ifnet *ifp)
4153 struct hn_softc *sc = ifp->if_softc;
4154 struct hn_tx_ring *txr = &sc->hn_tx_ring[0];
4156 if (txr->hn_sched_tx)
4159 if (mtx_trylock(&txr->hn_tx_lock)) {
4162 sched = hn_start_locked(txr, txr->hn_direct_tx_size);
4163 mtx_unlock(&txr->hn_tx_lock);
4168 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
4172 hn_start_txeof_taskfunc(void *xtxr, int pending __unused)
4174 struct hn_tx_ring *txr = xtxr;
4176 mtx_lock(&txr->hn_tx_lock);
4177 atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE);
4178 hn_start_locked(txr, 0);
4179 mtx_unlock(&txr->hn_tx_lock);
4183 hn_start_txeof(struct hn_tx_ring *txr)
4185 struct hn_softc *sc = txr->hn_sc;
4186 struct ifnet *ifp = sc->hn_ifp;
4188 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
4190 if (txr->hn_sched_tx)
4193 if (mtx_trylock(&txr->hn_tx_lock)) {
4196 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
4197 sched = hn_start_locked(txr, txr->hn_direct_tx_size);
4198 mtx_unlock(&txr->hn_tx_lock);
4200 taskqueue_enqueue(txr->hn_tx_taskq,
4206 * Release the OACTIVE earlier, with the hope, that
4207 * others could catch up. The task will clear the
4208 * flag again with the hn_tx_lock to avoid possible
4211 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
4212 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
4216 #endif /* HN_IFSTART_SUPPORT */
4219 hn_xmit(struct hn_tx_ring *txr, int len)
4221 struct hn_softc *sc = txr->hn_sc;
4222 struct ifnet *ifp = sc->hn_ifp;
4223 struct mbuf *m_head;
4226 mtx_assert(&txr->hn_tx_lock, MA_OWNED);
4227 #ifdef HN_IFSTART_SUPPORT
4228 KASSERT(hn_use_if_start == 0,
4229 ("hn_xmit is called, when if_start is enabled"));
4231 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
4233 if (__predict_false(txr->hn_suspended))
4236 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive)
4239 while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) {
4240 struct hn_txdesc *txd;
4243 if (len > 0 && m_head->m_pkthdr.len > len) {
4245 * This sending could be time consuming; let callers
4246 * dispatch this packet sending (and sending of any
4247 * following up packets) to tx taskqueue.
4249 drbr_putback(ifp, txr->hn_mbuf_br, m_head);
4254 txd = hn_txdesc_get(txr);
4256 txr->hn_no_txdescs++;
4257 drbr_putback(ifp, txr->hn_mbuf_br, m_head);
4258 txr->hn_oactive = 1;
4262 error = hn_encap(ifp, txr, txd, &m_head);
4264 /* Both txd and m_head are freed; discard */
4265 KASSERT(txr->hn_agg_txd == NULL,
4266 ("encap failed w/ pending aggregating txdesc"));
4267 drbr_advance(ifp, txr->hn_mbuf_br);
4271 if (txr->hn_agg_pktleft == 0) {
4272 if (txr->hn_agg_txd != NULL) {
4273 KASSERT(m_head == NULL,
4274 ("pending mbuf for aggregating txdesc"));
4275 error = hn_flush_txagg(ifp, txr);
4276 if (__predict_false(error)) {
4277 txr->hn_oactive = 1;
4281 KASSERT(m_head != NULL, ("mbuf was freed"));
4282 error = hn_txpkt(ifp, txr, txd);
4283 if (__predict_false(error)) {
4284 /* txd is freed, but m_head is not */
4285 drbr_putback(ifp, txr->hn_mbuf_br,
4287 txr->hn_oactive = 1;
4294 KASSERT(txr->hn_agg_txd != NULL,
4295 ("no aggregating txdesc"));
4296 KASSERT(m_head == NULL,
4297 ("pending mbuf for aggregating txdesc"));
4302 drbr_advance(ifp, txr->hn_mbuf_br);
4305 /* Flush pending aggerated transmission. */
4306 if (txr->hn_agg_txd != NULL)
4307 hn_flush_txagg(ifp, txr);
4312 hn_transmit(struct ifnet *ifp, struct mbuf *m)
4314 struct hn_softc *sc = ifp->if_softc;
4315 struct hn_tx_ring *txr;
4318 #if defined(INET6) || defined(INET)
4320 * Perform TSO packet header fixup now, since the TSO
4321 * packet header should be cache-hot.
4323 if (m->m_pkthdr.csum_flags & CSUM_TSO) {
4324 m = hn_tso_fixup(m);
4325 if (__predict_false(m == NULL)) {
4326 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
4333 * Select the TX ring based on flowid
4335 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
4336 idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse;
4337 txr = &sc->hn_tx_ring[idx];
4339 error = drbr_enqueue(ifp, txr->hn_mbuf_br, m);
4341 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
4345 if (txr->hn_oactive)
4348 if (txr->hn_sched_tx)
4351 if (mtx_trylock(&txr->hn_tx_lock)) {
4354 sched = hn_xmit(txr, txr->hn_direct_tx_size);
4355 mtx_unlock(&txr->hn_tx_lock);
4360 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
4365 hn_tx_ring_qflush(struct hn_tx_ring *txr)
4369 mtx_lock(&txr->hn_tx_lock);
4370 while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL)
4372 mtx_unlock(&txr->hn_tx_lock);
4376 hn_xmit_qflush(struct ifnet *ifp)
4378 struct hn_softc *sc = ifp->if_softc;
4381 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
4382 hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
4387 hn_xmit_txeof(struct hn_tx_ring *txr)
4390 if (txr->hn_sched_tx)
4393 if (mtx_trylock(&txr->hn_tx_lock)) {
4396 txr->hn_oactive = 0;
4397 sched = hn_xmit(txr, txr->hn_direct_tx_size);
4398 mtx_unlock(&txr->hn_tx_lock);
4400 taskqueue_enqueue(txr->hn_tx_taskq,
4406 * Release the oactive earlier, with the hope, that
4407 * others could catch up. The task will clear the
4408 * oactive again with the hn_tx_lock to avoid possible
4411 txr->hn_oactive = 0;
4412 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
4417 hn_xmit_taskfunc(void *xtxr, int pending __unused)
4419 struct hn_tx_ring *txr = xtxr;
4421 mtx_lock(&txr->hn_tx_lock);
4423 mtx_unlock(&txr->hn_tx_lock);
4427 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused)
4429 struct hn_tx_ring *txr = xtxr;
4431 mtx_lock(&txr->hn_tx_lock);
4432 txr->hn_oactive = 0;
4434 mtx_unlock(&txr->hn_tx_lock);
4438 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan)
4440 struct vmbus_chan_br cbr;
4441 struct hn_rx_ring *rxr;
4442 struct hn_tx_ring *txr = NULL;
4445 idx = vmbus_chan_subidx(chan);
4448 * Link this channel to RX/TX ring.
4450 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
4451 ("invalid channel index %d, should > 0 && < %d",
4452 idx, sc->hn_rx_ring_inuse));
4453 rxr = &sc->hn_rx_ring[idx];
4454 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0,
4455 ("RX ring %d already attached", idx));
4456 rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED;
4457 rxr->hn_chan = chan;
4460 if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n",
4461 idx, vmbus_chan_id(chan));
4464 if (idx < sc->hn_tx_ring_inuse) {
4465 txr = &sc->hn_tx_ring[idx];
4466 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0,
4467 ("TX ring %d already attached", idx));
4468 txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED;
4470 txr->hn_chan = chan;
4472 if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n",
4473 idx, vmbus_chan_id(chan));
4477 /* Bind this channel to a proper CPU. */
4478 vmbus_chan_cpu_set(chan, HN_RING_IDX2CPU(sc, idx));
4483 cbr.cbr = rxr->hn_br;
4484 cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr;
4485 cbr.cbr_txsz = HN_TXBR_SIZE;
4486 cbr.cbr_rxsz = HN_RXBR_SIZE;
4487 error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr);
4489 if (error == EISCONN) {
4490 if_printf(sc->hn_ifp, "bufring is connected after "
4491 "chan%u open failure\n", vmbus_chan_id(chan));
4492 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF;
4494 if_printf(sc->hn_ifp, "open chan%u failed: %d\n",
4495 vmbus_chan_id(chan), error);
4502 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan)
4504 struct hn_rx_ring *rxr;
4507 idx = vmbus_chan_subidx(chan);
4510 * Link this channel to RX/TX ring.
4512 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
4513 ("invalid channel index %d, should > 0 && < %d",
4514 idx, sc->hn_rx_ring_inuse));
4515 rxr = &sc->hn_rx_ring[idx];
4516 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED),
4517 ("RX ring %d is not attached", idx));
4518 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED;
4520 if (idx < sc->hn_tx_ring_inuse) {
4521 struct hn_tx_ring *txr = &sc->hn_tx_ring[idx];
4523 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED),
4524 ("TX ring %d is not attached attached", idx));
4525 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED;
4529 * Close this channel.
4532 * Channel closing does _not_ destroy the target channel.
4534 error = vmbus_chan_close_direct(chan);
4535 if (error == EISCONN) {
4536 if_printf(sc->hn_ifp, "chan%u bufring is connected "
4537 "after being closed\n", vmbus_chan_id(chan));
4538 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF;
4540 if_printf(sc->hn_ifp, "chan%u close failed: %d\n",
4541 vmbus_chan_id(chan), error);
4546 hn_attach_subchans(struct hn_softc *sc)
4548 struct vmbus_channel **subchans;
4549 int subchan_cnt = sc->hn_rx_ring_inuse - 1;
4552 KASSERT(subchan_cnt > 0, ("no sub-channels"));
4554 /* Attach the sub-channels. */
4555 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
4556 for (i = 0; i < subchan_cnt; ++i) {
4559 error1 = hn_chan_attach(sc, subchans[i]);
4562 /* Move on; all channels will be detached later. */
4565 vmbus_subchan_rel(subchans, subchan_cnt);
4568 if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error);
4571 if_printf(sc->hn_ifp, "%d sub-channels attached\n",
4579 hn_detach_allchans(struct hn_softc *sc)
4581 struct vmbus_channel **subchans;
4582 int subchan_cnt = sc->hn_rx_ring_inuse - 1;
4585 if (subchan_cnt == 0)
4588 /* Detach the sub-channels. */
4589 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
4590 for (i = 0; i < subchan_cnt; ++i)
4591 hn_chan_detach(sc, subchans[i]);
4592 vmbus_subchan_rel(subchans, subchan_cnt);
4596 * Detach the primary channel, _after_ all sub-channels
4599 hn_chan_detach(sc, sc->hn_prichan);
4601 /* Wait for sub-channels to be destroyed, if any. */
4602 vmbus_subchan_drain(sc->hn_prichan);
4605 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
4606 KASSERT((sc->hn_rx_ring[i].hn_rx_flags &
4607 HN_RX_FLAG_ATTACHED) == 0,
4608 ("%dth RX ring is still attached", i));
4610 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
4611 KASSERT((sc->hn_tx_ring[i].hn_tx_flags &
4612 HN_TX_FLAG_ATTACHED) == 0,
4613 ("%dth TX ring is still attached", i));
4619 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch)
4621 struct vmbus_channel **subchans;
4622 int nchan, rxr_cnt, error;
4624 nchan = *nsubch + 1;
4627 * Multiple RX/TX rings are not requested.
4634 * Query RSS capabilities, e.g. # of RX rings, and # of indirect
4637 error = hn_rndis_query_rsscaps(sc, &rxr_cnt);
4639 /* No RSS; this is benign. */
4644 if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n",
4648 if (nchan > rxr_cnt)
4651 if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n");
4657 * Allocate sub-channels from NVS.
4659 *nsubch = nchan - 1;
4660 error = hn_nvs_alloc_subchans(sc, nsubch);
4661 if (error || *nsubch == 0) {
4662 /* Failed to allocate sub-channels. */
4668 * Wait for all sub-channels to become ready before moving on.
4670 subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch);
4671 vmbus_subchan_rel(subchans, *nsubch);
4676 hn_synth_attachable(const struct hn_softc *sc)
4680 if (sc->hn_flags & HN_FLAG_ERRORS)
4683 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
4684 const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
4686 if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF)
4693 hn_synth_attach(struct hn_softc *sc, int mtu)
4695 #define ATTACHED_NVS 0x0002
4696 #define ATTACHED_RNDIS 0x0004
4698 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
4699 int error, nsubch, nchan, i;
4700 uint32_t old_caps, attached = 0;
4702 KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0,
4703 ("synthetic parts were attached"));
4705 if (!hn_synth_attachable(sc))
4708 /* Save capabilities for later verification. */
4709 old_caps = sc->hn_caps;
4712 /* Clear RSS stuffs. */
4713 sc->hn_rss_ind_size = 0;
4714 sc->hn_rss_hash = 0;
4717 * Attach the primary channel _before_ attaching NVS and RNDIS.
4719 error = hn_chan_attach(sc, sc->hn_prichan);
4726 error = hn_nvs_attach(sc, mtu);
4729 attached |= ATTACHED_NVS;
4732 * Attach RNDIS _after_ NVS is attached.
4734 error = hn_rndis_attach(sc, mtu);
4737 attached |= ATTACHED_RNDIS;
4740 * Make sure capabilities are not changed.
4742 if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) {
4743 if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n",
4744 old_caps, sc->hn_caps);
4750 * Allocate sub-channels for multi-TX/RX rings.
4753 * The # of RX rings that can be used is equivalent to the # of
4754 * channels to be requested.
4756 nsubch = sc->hn_rx_ring_cnt - 1;
4757 error = hn_synth_alloc_subchans(sc, &nsubch);
4760 /* NOTE: _Full_ synthetic parts detach is required now. */
4761 sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED;
4764 * Set the # of TX/RX rings that could be used according to
4765 * the # of channels that NVS offered.
4768 hn_set_ring_inuse(sc, nchan);
4770 /* Only the primary channel can be used; done */
4775 * Attach the sub-channels.
4777 * NOTE: hn_set_ring_inuse() _must_ have been called.
4779 error = hn_attach_subchans(sc);
4784 * Configure RSS key and indirect table _after_ all sub-channels
4787 if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) {
4789 * RSS key is not set yet; set it to the default RSS key.
4792 if_printf(sc->hn_ifp, "setup default RSS key\n");
4793 memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key));
4794 sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
4797 if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) {
4799 * RSS indirect table is not set yet; set it up in round-
4803 if_printf(sc->hn_ifp, "setup default RSS indirect "
4806 for (i = 0; i < NDIS_HASH_INDCNT; ++i)
4807 rss->rss_ind[i] = i % nchan;
4808 sc->hn_flags |= HN_FLAG_HAS_RSSIND;
4811 * # of usable channels may be changed, so we have to
4812 * make sure that all entries in RSS indirect table
4815 * NOTE: hn_set_ring_inuse() _must_ have been called.
4817 hn_rss_ind_fixup(sc);
4820 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
4825 * Fixup transmission aggregation setup.
4831 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
4832 hn_synth_detach(sc);
4834 if (attached & ATTACHED_RNDIS)
4835 hn_rndis_detach(sc);
4836 if (attached & ATTACHED_NVS)
4838 hn_chan_detach(sc, sc->hn_prichan);
4839 /* Restore old capabilities. */
4840 sc->hn_caps = old_caps;
4844 #undef ATTACHED_RNDIS
4850 * The interface must have been suspended though hn_suspend(), before
4851 * this function get called.
4854 hn_synth_detach(struct hn_softc *sc)
4857 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
4858 ("synthetic parts were not attached"));
4860 /* Detach the RNDIS first. */
4861 hn_rndis_detach(sc);
4866 /* Detach all of the channels. */
4867 hn_detach_allchans(sc);
4869 sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED;
4873 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt)
4875 KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt,
4876 ("invalid ring count %d", ring_cnt));
4878 if (sc->hn_tx_ring_cnt > ring_cnt)
4879 sc->hn_tx_ring_inuse = ring_cnt;
4881 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
4882 sc->hn_rx_ring_inuse = ring_cnt;
4885 if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n",
4886 sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse);
4891 hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan)
4896 * The TX bufring will not be drained by the hypervisor,
4897 * if the primary channel is revoked.
4899 while (!vmbus_chan_rx_empty(chan) ||
4900 (!vmbus_chan_is_revoked(sc->hn_prichan) &&
4901 !vmbus_chan_tx_empty(chan)))
4903 vmbus_chan_intr_drain(chan);
4907 hn_suspend_data(struct hn_softc *sc)
4909 struct vmbus_channel **subch = NULL;
4910 struct hn_tx_ring *txr;
4918 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
4919 txr = &sc->hn_tx_ring[i];
4921 mtx_lock(&txr->hn_tx_lock);
4922 txr->hn_suspended = 1;
4923 mtx_unlock(&txr->hn_tx_lock);
4924 /* No one is able send more packets now. */
4927 * Wait for all pending sends to finish.
4930 * We will _not_ receive all pending send-done, if the
4931 * primary channel is revoked.
4933 while (hn_tx_ring_pending(txr) &&
4934 !vmbus_chan_is_revoked(sc->hn_prichan))
4935 pause("hnwtx", 1 /* 1 tick */);
4939 * Disable RX by clearing RX filter.
4941 hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE);
4944 * Give RNDIS enough time to flush all pending data packets.
4946 pause("waitrx", (200 * hz) / 1000);
4949 * Drain RX/TX bufrings and interrupts.
4951 nsubch = sc->hn_rx_ring_inuse - 1;
4953 subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
4955 if (subch != NULL) {
4956 for (i = 0; i < nsubch; ++i)
4957 hn_chan_drain(sc, subch[i]);
4959 hn_chan_drain(sc, sc->hn_prichan);
4962 vmbus_subchan_rel(subch, nsubch);
4965 * Drain any pending TX tasks.
4968 * The above hn_chan_drain() can dispatch TX tasks, so the TX
4969 * tasks will have to be drained _after_ the above hn_chan_drain()
4972 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
4973 txr = &sc->hn_tx_ring[i];
4975 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task);
4976 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task);
4981 hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused)
4984 ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL;
4988 hn_suspend_mgmt(struct hn_softc *sc)
4995 * Make sure that hn_mgmt_taskq0 can nolonger be accessed
4996 * through hn_mgmt_taskq.
4998 TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc);
4999 vmbus_chan_run_task(sc->hn_prichan, &task);
5002 * Make sure that all pending management tasks are completed.
5004 taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init);
5005 taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status);
5006 taskqueue_drain_all(sc->hn_mgmt_taskq0);
5010 hn_suspend(struct hn_softc *sc)
5013 /* Disable polling. */
5016 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
5017 (sc->hn_flags & HN_FLAG_VF))
5018 hn_suspend_data(sc);
5019 hn_suspend_mgmt(sc);
5023 hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt)
5027 KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt,
5028 ("invalid TX ring count %d", tx_ring_cnt));
5030 for (i = 0; i < tx_ring_cnt; ++i) {
5031 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
5033 mtx_lock(&txr->hn_tx_lock);
5034 txr->hn_suspended = 0;
5035 mtx_unlock(&txr->hn_tx_lock);
5040 hn_resume_data(struct hn_softc *sc)
5049 hn_rxfilter_config(sc);
5052 * Make sure to clear suspend status on "all" TX rings,
5053 * since hn_tx_ring_inuse can be changed after
5054 * hn_suspend_data().
5056 hn_resume_tx(sc, sc->hn_tx_ring_cnt);
5058 #ifdef HN_IFSTART_SUPPORT
5059 if (!hn_use_if_start)
5063 * Flush unused drbrs, since hn_tx_ring_inuse may be
5066 for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i)
5067 hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
5073 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
5074 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
5077 * Use txeof task, so that any pending oactive can be
5080 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
5085 hn_resume_mgmt(struct hn_softc *sc)
5088 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
5091 * Kick off network change detection, if it was pending.
5092 * If no network change was pending, start link status
5093 * checks, which is more lightweight than network change
5096 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
5097 hn_change_network(sc);
5099 hn_update_link_status(sc);
5103 hn_resume(struct hn_softc *sc)
5106 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
5107 (sc->hn_flags & HN_FLAG_VF))
5111 * When the VF is activated, the synthetic interface is changed
5112 * to DOWN in hn_set_vf(). Here, if the VF is still active, we
5113 * don't call hn_resume_mgmt() until the VF is deactivated in
5116 if (!(sc->hn_flags & HN_FLAG_VF))
5120 * Re-enable polling if this interface is running and
5121 * the polling is requested.
5123 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && sc->hn_pollhz > 0)
5124 hn_polling(sc, sc->hn_pollhz);
5128 hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen)
5130 const struct rndis_status_msg *msg;
5133 if (dlen < sizeof(*msg)) {
5134 if_printf(sc->hn_ifp, "invalid RNDIS status\n");
5139 switch (msg->rm_status) {
5140 case RNDIS_STATUS_MEDIA_CONNECT:
5141 case RNDIS_STATUS_MEDIA_DISCONNECT:
5142 hn_update_link_status(sc);
5145 case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG:
5146 /* Not really useful; ignore. */
5149 case RNDIS_STATUS_NETWORK_CHANGE:
5150 ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset);
5151 if (dlen < ofs + msg->rm_stbuflen ||
5152 msg->rm_stbuflen < sizeof(uint32_t)) {
5153 if_printf(sc->hn_ifp, "network changed\n");
5157 memcpy(&change, ((const uint8_t *)msg) + ofs,
5159 if_printf(sc->hn_ifp, "network changed, change %u\n",
5162 hn_change_network(sc);
5166 if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n",
5173 hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info)
5175 const struct rndis_pktinfo *pi = info_data;
5178 while (info_dlen != 0) {
5182 if (__predict_false(info_dlen < sizeof(*pi)))
5184 if (__predict_false(info_dlen < pi->rm_size))
5186 info_dlen -= pi->rm_size;
5188 if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK))
5190 if (__predict_false(pi->rm_size < pi->rm_pktinfooffset))
5192 dlen = pi->rm_size - pi->rm_pktinfooffset;
5195 switch (pi->rm_type) {
5196 case NDIS_PKTINFO_TYPE_VLAN:
5197 if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE))
5199 info->vlan_info = *((const uint32_t *)data);
5200 mask |= HN_RXINFO_VLAN;
5203 case NDIS_PKTINFO_TYPE_CSUM:
5204 if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE))
5206 info->csum_info = *((const uint32_t *)data);
5207 mask |= HN_RXINFO_CSUM;
5210 case HN_NDIS_PKTINFO_TYPE_HASHVAL:
5211 if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE))
5213 info->hash_value = *((const uint32_t *)data);
5214 mask |= HN_RXINFO_HASHVAL;
5217 case HN_NDIS_PKTINFO_TYPE_HASHINF:
5218 if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE))
5220 info->hash_info = *((const uint32_t *)data);
5221 mask |= HN_RXINFO_HASHINF;
5228 if (mask == HN_RXINFO_ALL) {
5229 /* All found; done */
5233 pi = (const struct rndis_pktinfo *)
5234 ((const uint8_t *)pi + pi->rm_size);
5239 * - If there is no hash value, invalidate the hash info.
5241 if ((mask & HN_RXINFO_HASHVAL) == 0)
5242 info->hash_info = HN_NDIS_HASH_INFO_INVALID;
5246 static __inline bool
5247 hn_rndis_check_overlap(int off, int len, int check_off, int check_len)
5250 if (off < check_off) {
5251 if (__predict_true(off + len <= check_off))
5253 } else if (off > check_off) {
5254 if (__predict_true(check_off + check_len <= off))
5261 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen)
5263 const struct rndis_packet_msg *pkt;
5264 struct hn_rxinfo info;
5265 int data_off, pktinfo_off, data_len, pktinfo_len;
5270 if (__predict_false(dlen < sizeof(*pkt))) {
5271 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n");
5276 if (__predict_false(dlen < pkt->rm_len)) {
5277 if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, "
5278 "dlen %d, msglen %u\n", dlen, pkt->rm_len);
5281 if (__predict_false(pkt->rm_len <
5282 pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) {
5283 if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, "
5284 "msglen %u, data %u, oob %u, pktinfo %u\n",
5285 pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen,
5286 pkt->rm_pktinfolen);
5289 if (__predict_false(pkt->rm_datalen == 0)) {
5290 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n");
5297 #define IS_OFFSET_INVALID(ofs) \
5298 ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \
5299 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK))
5301 /* XXX Hyper-V does not meet data offset alignment requirement */
5302 if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) {
5303 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5304 "data offset %u\n", pkt->rm_dataoffset);
5307 if (__predict_false(pkt->rm_oobdataoffset > 0 &&
5308 IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) {
5309 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5310 "oob offset %u\n", pkt->rm_oobdataoffset);
5313 if (__predict_true(pkt->rm_pktinfooffset > 0) &&
5314 __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) {
5315 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5316 "pktinfo offset %u\n", pkt->rm_pktinfooffset);
5320 #undef IS_OFFSET_INVALID
5322 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset);
5323 data_len = pkt->rm_datalen;
5324 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset);
5325 pktinfo_len = pkt->rm_pktinfolen;
5328 * Check OOB coverage.
5330 if (__predict_false(pkt->rm_oobdatalen != 0)) {
5331 int oob_off, oob_len;
5333 if_printf(rxr->hn_ifp, "got oobdata\n");
5334 oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset);
5335 oob_len = pkt->rm_oobdatalen;
5337 if (__predict_false(oob_off + oob_len > pkt->rm_len)) {
5338 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5339 "oob overflow, msglen %u, oob abs %d len %d\n",
5340 pkt->rm_len, oob_off, oob_len);
5345 * Check against data.
5347 if (hn_rndis_check_overlap(oob_off, oob_len,
5348 data_off, data_len)) {
5349 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5350 "oob overlaps data, oob abs %d len %d, "
5351 "data abs %d len %d\n",
5352 oob_off, oob_len, data_off, data_len);
5357 * Check against pktinfo.
5359 if (pktinfo_len != 0 &&
5360 hn_rndis_check_overlap(oob_off, oob_len,
5361 pktinfo_off, pktinfo_len)) {
5362 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5363 "oob overlaps pktinfo, oob abs %d len %d, "
5364 "pktinfo abs %d len %d\n",
5365 oob_off, oob_len, pktinfo_off, pktinfo_len);
5371 * Check per-packet-info coverage and find useful per-packet-info.
5373 info.vlan_info = HN_NDIS_VLAN_INFO_INVALID;
5374 info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID;
5375 info.hash_info = HN_NDIS_HASH_INFO_INVALID;
5376 if (__predict_true(pktinfo_len != 0)) {
5380 if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) {
5381 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5382 "pktinfo overflow, msglen %u, "
5383 "pktinfo abs %d len %d\n",
5384 pkt->rm_len, pktinfo_off, pktinfo_len);
5389 * Check packet info coverage.
5391 overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len,
5392 data_off, data_len);
5393 if (__predict_false(overlap)) {
5394 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5395 "pktinfo overlap data, pktinfo abs %d len %d, "
5396 "data abs %d len %d\n",
5397 pktinfo_off, pktinfo_len, data_off, data_len);
5402 * Find useful per-packet-info.
5404 error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off,
5405 pktinfo_len, &info);
5406 if (__predict_false(error)) {
5407 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg "
5413 if (__predict_false(data_off + data_len > pkt->rm_len)) {
5414 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5415 "data overflow, msglen %u, data abs %d len %d\n",
5416 pkt->rm_len, data_off, data_len);
5419 hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info);
5422 static __inline void
5423 hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen)
5425 const struct rndis_msghdr *hdr;
5427 if (__predict_false(dlen < sizeof(*hdr))) {
5428 if_printf(rxr->hn_ifp, "invalid RNDIS msg\n");
5433 if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) {
5434 /* Hot data path. */
5435 hn_rndis_rx_data(rxr, data, dlen);
5440 if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG)
5441 hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen);
5443 hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen);
5447 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt)
5449 const struct hn_nvs_hdr *hdr;
5451 if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) {
5452 if_printf(sc->hn_ifp, "invalid nvs notify\n");
5455 hdr = VMBUS_CHANPKT_CONST_DATA(pkt);
5457 if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) {
5458 /* Useless; ignore */
5461 if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type);
5465 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan,
5466 const struct vmbus_chanpkt_hdr *pkt)
5468 struct hn_nvs_sendctx *sndc;
5470 sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid;
5471 sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt),
5472 VMBUS_CHANPKT_DATALEN(pkt));
5475 * 'sndc' CAN NOT be accessed anymore, since it can be freed by
5481 hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
5482 const struct vmbus_chanpkt_hdr *pkthdr)
5484 const struct vmbus_chanpkt_rxbuf *pkt;
5485 const struct hn_nvs_hdr *nvs_hdr;
5488 if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) {
5489 if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n");
5492 nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr);
5494 /* Make sure that this is a RNDIS message. */
5495 if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) {
5496 if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n",
5501 hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen);
5502 if (__predict_false(hlen < sizeof(*pkt))) {
5503 if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n");
5506 pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr;
5508 if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) {
5509 if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n",
5514 count = pkt->cp_rxbuf_cnt;
5515 if (__predict_false(hlen <
5516 __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) {
5517 if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count);
5521 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
5522 for (i = 0; i < count; ++i) {
5525 ofs = pkt->cp_rxbuf[i].rb_ofs;
5526 len = pkt->cp_rxbuf[i].rb_len;
5527 if (__predict_false(ofs + len > HN_RXBUF_SIZE)) {
5528 if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, "
5529 "ofs %d, len %d\n", i, ofs, len);
5532 hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len);
5536 * Ack the consumed RXBUF associated w/ this channel packet,
5537 * so that this RXBUF can be recycled by the hypervisor.
5539 hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid);
5543 hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
5546 struct hn_nvs_rndis_ack ack;
5549 ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK;
5550 ack.nvs_status = HN_NVS_STATUS_OK;
5554 error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP,
5555 VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid);
5556 if (__predict_false(error == EAGAIN)) {
5559 * This should _not_ happen in real world, since the
5560 * consumption of the TX bufring from the TX path is
5563 if (rxr->hn_ack_failed == 0)
5564 if_printf(rxr->hn_ifp, "RXBUF ack retry\n");
5565 rxr->hn_ack_failed++;
5572 if_printf(rxr->hn_ifp, "RXBUF ack failed\n");
5577 hn_chan_callback(struct vmbus_channel *chan, void *xrxr)
5579 struct hn_rx_ring *rxr = xrxr;
5580 struct hn_softc *sc = rxr->hn_ifp->if_softc;
5583 struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf;
5586 pktlen = rxr->hn_pktbuf_len;
5587 error = vmbus_chan_recv_pkt(chan, pkt, &pktlen);
5588 if (__predict_false(error == ENOBUFS)) {
5593 * Expand channel packet buffer.
5596 * Use M_WAITOK here, since allocation failure
5599 nlen = rxr->hn_pktbuf_len * 2;
5600 while (nlen < pktlen)
5602 nbuf = malloc(nlen, M_DEVBUF, M_WAITOK);
5604 if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n",
5605 rxr->hn_pktbuf_len, nlen);
5607 free(rxr->hn_pktbuf, M_DEVBUF);
5608 rxr->hn_pktbuf = nbuf;
5609 rxr->hn_pktbuf_len = nlen;
5612 } else if (__predict_false(error == EAGAIN)) {
5613 /* No more channel packets; done! */
5616 KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error));
5618 switch (pkt->cph_type) {
5619 case VMBUS_CHANPKT_TYPE_COMP:
5620 hn_nvs_handle_comp(sc, chan, pkt);
5623 case VMBUS_CHANPKT_TYPE_RXBUF:
5624 hn_nvs_handle_rxbuf(rxr, chan, pkt);
5627 case VMBUS_CHANPKT_TYPE_INBAND:
5628 hn_nvs_handle_notify(sc, pkt);
5632 if_printf(rxr->hn_ifp, "unknown chan pkt %u\n",
5637 hn_chan_rollup(rxr, rxr->hn_txr);
5641 hn_tx_taskq_create(void *arg __unused)
5646 * Fix the # of TX taskqueues.
5648 if (hn_tx_taskq_cnt <= 0)
5649 hn_tx_taskq_cnt = 1;
5650 else if (hn_tx_taskq_cnt > mp_ncpus)
5651 hn_tx_taskq_cnt = mp_ncpus;
5654 * Fix the TX taskqueue mode.
5656 switch (hn_tx_taskq_mode) {
5657 case HN_TX_TASKQ_M_INDEP:
5658 case HN_TX_TASKQ_M_GLOBAL:
5659 case HN_TX_TASKQ_M_EVTTQ:
5662 hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP;
5666 if (vm_guest != VM_GUEST_HV)
5669 if (hn_tx_taskq_mode != HN_TX_TASKQ_M_GLOBAL)
5672 hn_tx_taskque = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *),
5673 M_DEVBUF, M_WAITOK);
5674 for (i = 0; i < hn_tx_taskq_cnt; ++i) {
5675 hn_tx_taskque[i] = taskqueue_create("hn_tx", M_WAITOK,
5676 taskqueue_thread_enqueue, &hn_tx_taskque[i]);
5677 taskqueue_start_threads(&hn_tx_taskque[i], 1, PI_NET,
5681 SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_SECOND,
5682 hn_tx_taskq_create, NULL);
5685 hn_tx_taskq_destroy(void *arg __unused)
5688 if (hn_tx_taskque != NULL) {
5691 for (i = 0; i < hn_tx_taskq_cnt; ++i)
5692 taskqueue_free(hn_tx_taskque[i]);
5693 free(hn_tx_taskque, M_DEVBUF);
5696 SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_SECOND,
5697 hn_tx_taskq_destroy, NULL);