2 * Copyright (c) 2010-2012 Citrix Inc.
3 * Copyright (c) 2009-2012,2016 Microsoft Corp.
4 * Copyright (c) 2012 NetApp Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * Copyright (c) 2004-2006 Kip Macy
31 * All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 #include <sys/cdefs.h>
56 __FBSDID("$FreeBSD$");
58 #include "opt_inet6.h"
62 #include <sys/param.h>
64 #include <sys/kernel.h>
65 #include <sys/limits.h>
66 #include <sys/malloc.h>
68 #include <sys/module.h>
70 #include <sys/queue.h>
73 #include <sys/socket.h>
74 #include <sys/sockio.h>
76 #include <sys/sysctl.h>
77 #include <sys/systm.h>
78 #include <sys/taskqueue.h>
79 #include <sys/buf_ring.h>
80 #include <sys/eventhandler.h>
82 #include <machine/atomic.h>
83 #include <machine/in_cksum.h>
86 #include <net/ethernet.h>
88 #include <net/if_arp.h>
89 #include <net/if_dl.h>
90 #include <net/if_media.h>
91 #include <net/if_types.h>
92 #include <net/if_var.h>
93 #include <net/if_vlan_var.h>
94 #include <net/rndis.h>
96 #include <netinet/in_systm.h>
97 #include <netinet/in.h>
98 #include <netinet/ip.h>
99 #include <netinet/ip6.h>
100 #include <netinet/tcp.h>
101 #include <netinet/tcp_lro.h>
102 #include <netinet/udp.h>
104 #include <dev/hyperv/include/hyperv.h>
105 #include <dev/hyperv/include/hyperv_busdma.h>
106 #include <dev/hyperv/include/vmbus.h>
107 #include <dev/hyperv/include/vmbus_xact.h>
109 #include <dev/hyperv/netvsc/ndis.h>
110 #include <dev/hyperv/netvsc/if_hnreg.h>
111 #include <dev/hyperv/netvsc/if_hnvar.h>
112 #include <dev/hyperv/netvsc/hn_nvs.h>
113 #include <dev/hyperv/netvsc/hn_rndis.h>
115 #include "vmbus_if.h"
117 #define HN_IFSTART_SUPPORT
119 #define HN_RING_CNT_DEF_MAX 8
121 /* YYY should get it from the underlying channel */
122 #define HN_TX_DESC_CNT 512
124 #define HN_RNDIS_PKT_LEN \
125 (sizeof(struct rndis_packet_msg) + \
126 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \
127 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \
128 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \
129 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE))
130 #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE
131 #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE
133 #define HN_TX_DATA_BOUNDARY PAGE_SIZE
134 #define HN_TX_DATA_MAXSIZE IP_MAXPACKET
135 #define HN_TX_DATA_SEGSIZE PAGE_SIZE
136 /* -1 for RNDIS packet message */
137 #define HN_TX_DATA_SEGCNT_MAX (HN_GPACNT_MAX - 1)
139 #define HN_DIRECT_TX_SIZE_DEF 128
141 #define HN_EARLY_TXEOF_THRESH 8
143 #define HN_PKTBUF_LEN_DEF (16 * 1024)
145 #define HN_LROENT_CNT_DEF 128
147 #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU)
148 #define HN_LRO_LENLIM_DEF (25 * ETHERMTU)
149 /* YYY 2*MTU is a bit rough, but should be good enough. */
150 #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu)
152 #define HN_LRO_ACKCNT_DEF 1
154 #define HN_LOCK_INIT(sc) \
155 sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev))
156 #define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock)
157 #define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED)
158 #define HN_LOCK(sc) \
160 while (sx_try_xlock(&(sc)->hn_lock) == 0) \
163 #define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock)
165 #define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP)
166 #define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP)
167 #define HN_CSUM_IP_HWASSIST(sc) \
168 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK)
169 #define HN_CSUM_IP6_HWASSIST(sc) \
170 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK)
172 #define HN_PKTSIZE_MIN(align) \
173 roundup2(ETHER_MIN_LEN + ETHER_VLAN_ENCAP_LEN - ETHER_CRC_LEN + \
174 HN_RNDIS_PKT_LEN, (align))
175 #define HN_PKTSIZE(m, align) \
176 roundup2((m)->m_pkthdr.len + HN_RNDIS_PKT_LEN, (align))
178 #define HN_RING_IDX2CPU(sc, idx) (((sc)->hn_cpu + (idx)) % mp_ncpus)
181 #ifndef HN_USE_TXDESC_BUFRING
182 SLIST_ENTRY(hn_txdesc) link;
184 STAILQ_ENTRY(hn_txdesc) agg_link;
186 /* Aggregated txdescs, in sending order. */
187 STAILQ_HEAD(, hn_txdesc) agg_list;
189 /* The oldest packet, if transmission aggregation happens. */
191 struct hn_tx_ring *txr;
193 uint32_t flags; /* HN_TXD_FLAG_ */
194 struct hn_nvs_sendctx send_ctx;
198 bus_dmamap_t data_dmap;
200 bus_addr_t rndis_pkt_paddr;
201 struct rndis_packet_msg *rndis_pkt;
202 bus_dmamap_t rndis_pkt_dmap;
205 #define HN_TXD_FLAG_ONLIST 0x0001
206 #define HN_TXD_FLAG_DMAMAP 0x0002
207 #define HN_TXD_FLAG_ONAGG 0x0004
216 struct hn_update_vf {
217 struct hn_rx_ring *rxr;
221 #define HN_RXINFO_VLAN 0x0001
222 #define HN_RXINFO_CSUM 0x0002
223 #define HN_RXINFO_HASHINF 0x0004
224 #define HN_RXINFO_HASHVAL 0x0008
225 #define HN_RXINFO_ALL \
228 HN_RXINFO_HASHINF | \
231 #define HN_NDIS_VLAN_INFO_INVALID 0xffffffff
232 #define HN_NDIS_RXCSUM_INFO_INVALID 0
233 #define HN_NDIS_HASH_INFO_INVALID 0
235 static int hn_probe(device_t);
236 static int hn_attach(device_t);
237 static int hn_detach(device_t);
238 static int hn_shutdown(device_t);
239 static void hn_chan_callback(struct vmbus_channel *,
242 static void hn_init(void *);
243 static int hn_ioctl(struct ifnet *, u_long, caddr_t);
244 #ifdef HN_IFSTART_SUPPORT
245 static void hn_start(struct ifnet *);
247 static int hn_transmit(struct ifnet *, struct mbuf *);
248 static void hn_xmit_qflush(struct ifnet *);
249 static int hn_ifmedia_upd(struct ifnet *);
250 static void hn_ifmedia_sts(struct ifnet *,
251 struct ifmediareq *);
253 static int hn_rndis_rxinfo(const void *, int,
255 static void hn_rndis_rx_data(struct hn_rx_ring *,
257 static void hn_rndis_rx_status(struct hn_softc *,
259 static void hn_rndis_init_fixat(struct hn_softc *, int);
261 static void hn_nvs_handle_notify(struct hn_softc *,
262 const struct vmbus_chanpkt_hdr *);
263 static void hn_nvs_handle_comp(struct hn_softc *,
264 struct vmbus_channel *,
265 const struct vmbus_chanpkt_hdr *);
266 static void hn_nvs_handle_rxbuf(struct hn_rx_ring *,
267 struct vmbus_channel *,
268 const struct vmbus_chanpkt_hdr *);
269 static void hn_nvs_ack_rxbuf(struct hn_rx_ring *,
270 struct vmbus_channel *, uint64_t);
272 #if __FreeBSD_version >= 1100099
273 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS);
274 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS);
276 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS);
277 static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS);
278 #if __FreeBSD_version < 1100095
279 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS);
281 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS);
283 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
284 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
285 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS);
286 static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS);
287 static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS);
288 static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS);
289 static int hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS);
290 static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS);
291 static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS);
292 static int hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS);
293 static int hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS);
294 static int hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS);
295 static int hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS);
296 static int hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS);
297 static int hn_polling_sysctl(SYSCTL_HANDLER_ARGS);
298 static int hn_vf_sysctl(SYSCTL_HANDLER_ARGS);
300 static void hn_stop(struct hn_softc *, bool);
301 static void hn_init_locked(struct hn_softc *);
302 static int hn_chan_attach(struct hn_softc *,
303 struct vmbus_channel *);
304 static void hn_chan_detach(struct hn_softc *,
305 struct vmbus_channel *);
306 static int hn_attach_subchans(struct hn_softc *);
307 static void hn_detach_allchans(struct hn_softc *);
308 static void hn_chan_rollup(struct hn_rx_ring *,
309 struct hn_tx_ring *);
310 static void hn_set_ring_inuse(struct hn_softc *, int);
311 static int hn_synth_attach(struct hn_softc *, int);
312 static void hn_synth_detach(struct hn_softc *);
313 static int hn_synth_alloc_subchans(struct hn_softc *,
315 static bool hn_synth_attachable(const struct hn_softc *);
316 static void hn_suspend(struct hn_softc *);
317 static void hn_suspend_data(struct hn_softc *);
318 static void hn_suspend_mgmt(struct hn_softc *);
319 static void hn_resume(struct hn_softc *);
320 static void hn_resume_data(struct hn_softc *);
321 static void hn_resume_mgmt(struct hn_softc *);
322 static void hn_suspend_mgmt_taskfunc(void *, int);
323 static void hn_chan_drain(struct hn_softc *,
324 struct vmbus_channel *);
325 static void hn_disable_rx(struct hn_softc *);
326 static void hn_drain_rxtx(struct hn_softc *, int);
327 static void hn_polling(struct hn_softc *, u_int);
328 static void hn_chan_polling(struct vmbus_channel *, u_int);
330 static void hn_update_link_status(struct hn_softc *);
331 static void hn_change_network(struct hn_softc *);
332 static void hn_link_taskfunc(void *, int);
333 static void hn_netchg_init_taskfunc(void *, int);
334 static void hn_netchg_status_taskfunc(void *, int);
335 static void hn_link_status(struct hn_softc *);
337 static int hn_create_rx_data(struct hn_softc *, int);
338 static void hn_destroy_rx_data(struct hn_softc *);
339 static int hn_check_iplen(const struct mbuf *, int);
340 static int hn_set_rxfilter(struct hn_softc *, uint32_t);
341 static int hn_rxfilter_config(struct hn_softc *);
342 static int hn_rss_reconfig(struct hn_softc *);
343 static void hn_rss_ind_fixup(struct hn_softc *);
344 static int hn_rxpkt(struct hn_rx_ring *, const void *,
345 int, const struct hn_rxinfo *);
347 static int hn_tx_ring_create(struct hn_softc *, int);
348 static void hn_tx_ring_destroy(struct hn_tx_ring *);
349 static int hn_create_tx_data(struct hn_softc *, int);
350 static void hn_fixup_tx_data(struct hn_softc *);
351 static void hn_destroy_tx_data(struct hn_softc *);
352 static void hn_txdesc_dmamap_destroy(struct hn_txdesc *);
353 static void hn_txdesc_gc(struct hn_tx_ring *,
355 static int hn_encap(struct ifnet *, struct hn_tx_ring *,
356 struct hn_txdesc *, struct mbuf **);
357 static int hn_txpkt(struct ifnet *, struct hn_tx_ring *,
359 static void hn_set_chim_size(struct hn_softc *, int);
360 static void hn_set_tso_maxsize(struct hn_softc *, int, int);
361 static bool hn_tx_ring_pending(struct hn_tx_ring *);
362 static void hn_tx_ring_qflush(struct hn_tx_ring *);
363 static void hn_resume_tx(struct hn_softc *, int);
364 static void hn_set_txagg(struct hn_softc *);
365 static void *hn_try_txagg(struct ifnet *,
366 struct hn_tx_ring *, struct hn_txdesc *,
368 static int hn_get_txswq_depth(const struct hn_tx_ring *);
369 static void hn_txpkt_done(struct hn_nvs_sendctx *,
370 struct hn_softc *, struct vmbus_channel *,
372 static int hn_txpkt_sglist(struct hn_tx_ring *,
374 static int hn_txpkt_chim(struct hn_tx_ring *,
376 static int hn_xmit(struct hn_tx_ring *, int);
377 static void hn_xmit_taskfunc(void *, int);
378 static void hn_xmit_txeof(struct hn_tx_ring *);
379 static void hn_xmit_txeof_taskfunc(void *, int);
380 #ifdef HN_IFSTART_SUPPORT
381 static int hn_start_locked(struct hn_tx_ring *, int);
382 static void hn_start_taskfunc(void *, int);
383 static void hn_start_txeof(struct hn_tx_ring *);
384 static void hn_start_txeof_taskfunc(void *, int);
387 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
388 "Hyper-V network interface");
390 /* Trust tcp segements verification on host side. */
391 static int hn_trust_hosttcp = 1;
392 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN,
393 &hn_trust_hosttcp, 0,
394 "Trust tcp segement verification on host side, "
395 "when csum info is missing (global setting)");
397 /* Trust udp datagrams verification on host side. */
398 static int hn_trust_hostudp = 1;
399 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN,
400 &hn_trust_hostudp, 0,
401 "Trust udp datagram verification on host side, "
402 "when csum info is missing (global setting)");
404 /* Trust ip packets verification on host side. */
405 static int hn_trust_hostip = 1;
406 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN,
408 "Trust ip packet verification on host side, "
409 "when csum info is missing (global setting)");
411 /* Limit TSO burst size */
412 static int hn_tso_maxlen = IP_MAXPACKET;
413 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
414 &hn_tso_maxlen, 0, "TSO burst limit");
416 /* Limit chimney send size */
417 static int hn_tx_chimney_size = 0;
418 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN,
419 &hn_tx_chimney_size, 0, "Chimney send packet size limit");
421 /* Limit the size of packet for direct transmission */
422 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF;
423 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN,
424 &hn_direct_tx_size, 0, "Size of the packet for direct transmission");
426 /* # of LRO entries per RX ring */
427 #if defined(INET) || defined(INET6)
428 #if __FreeBSD_version >= 1100095
429 static int hn_lro_entry_count = HN_LROENT_CNT_DEF;
430 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
431 &hn_lro_entry_count, 0, "LRO entry count");
435 static int hn_tx_taskq_cnt = 1;
436 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_cnt, CTLFLAG_RDTUN,
437 &hn_tx_taskq_cnt, 0, "# of TX taskqueues");
439 #define HN_TX_TASKQ_M_INDEP 0
440 #define HN_TX_TASKQ_M_GLOBAL 1
441 #define HN_TX_TASKQ_M_EVTTQ 2
443 static int hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP;
444 SYSCTL_INT(_hw_hn, OID_AUTO, tx_taskq_mode, CTLFLAG_RDTUN,
445 &hn_tx_taskq_mode, 0, "TX taskqueue modes: "
446 "0 - independent, 1 - share global tx taskqs, 2 - share event taskqs");
448 #ifndef HN_USE_TXDESC_BUFRING
449 static int hn_use_txdesc_bufring = 0;
451 static int hn_use_txdesc_bufring = 1;
453 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD,
454 &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors");
456 #ifdef HN_IFSTART_SUPPORT
457 /* Use ifnet.if_start instead of ifnet.if_transmit */
458 static int hn_use_if_start = 0;
459 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN,
460 &hn_use_if_start, 0, "Use if_start TX method");
463 /* # of channels to use */
464 static int hn_chan_cnt = 0;
465 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
467 "# of channels to use; each channel has one RX ring and one TX ring");
469 /* # of transmit rings to use */
470 static int hn_tx_ring_cnt = 0;
471 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN,
472 &hn_tx_ring_cnt, 0, "# of TX rings to use");
474 /* Software TX ring deptch */
475 static int hn_tx_swq_depth = 0;
476 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN,
477 &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING");
479 /* Enable sorted LRO, and the depth of the per-channel mbuf queue */
480 #if __FreeBSD_version >= 1100095
481 static u_int hn_lro_mbufq_depth = 0;
482 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
483 &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue");
486 /* Packet transmission aggregation size limit */
487 static int hn_tx_agg_size = -1;
488 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_size, CTLFLAG_RDTUN,
489 &hn_tx_agg_size, 0, "Packet transmission aggregation size limit");
491 /* Packet transmission aggregation count limit */
492 static int hn_tx_agg_pkts = -1;
493 SYSCTL_INT(_hw_hn, OID_AUTO, tx_agg_pkts, CTLFLAG_RDTUN,
494 &hn_tx_agg_pkts, 0, "Packet transmission aggregation packet limit");
496 static u_int hn_cpu_index; /* next CPU for channel */
497 static struct taskqueue **hn_tx_taskque;/* shared TX taskqueues */
500 hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = {
501 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
502 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
503 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
504 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
505 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
508 static device_method_t hn_methods[] = {
509 /* Device interface */
510 DEVMETHOD(device_probe, hn_probe),
511 DEVMETHOD(device_attach, hn_attach),
512 DEVMETHOD(device_detach, hn_detach),
513 DEVMETHOD(device_shutdown, hn_shutdown),
517 static driver_t hn_driver = {
520 sizeof(struct hn_softc)
523 static devclass_t hn_devclass;
525 DRIVER_MODULE(hn, vmbus, hn_driver, hn_devclass, 0, 0);
526 MODULE_VERSION(hn, 1);
527 MODULE_DEPEND(hn, vmbus, 1, 1, 1);
529 #if __FreeBSD_version >= 1100099
531 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim)
535 for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
536 sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim;
541 hn_txpkt_sglist(struct hn_tx_ring *txr, struct hn_txdesc *txd)
544 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
545 txd->chim_size == 0, ("invalid rndis sglist txd"));
546 return (hn_nvs_send_rndis_sglist(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA,
547 &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt));
551 hn_txpkt_chim(struct hn_tx_ring *txr, struct hn_txdesc *txd)
553 struct hn_nvs_rndis rndis;
555 KASSERT(txd->chim_index != HN_NVS_CHIM_IDX_INVALID &&
556 txd->chim_size > 0, ("invalid rndis chim txd"));
558 rndis.nvs_type = HN_NVS_TYPE_RNDIS;
559 rndis.nvs_rndis_mtype = HN_NVS_RNDIS_MTYPE_DATA;
560 rndis.nvs_chim_idx = txd->chim_index;
561 rndis.nvs_chim_sz = txd->chim_size;
563 return (hn_nvs_send(txr->hn_chan, VMBUS_CHANPKT_FLAG_RC,
564 &rndis, sizeof(rndis), &txd->send_ctx));
567 static __inline uint32_t
568 hn_chim_alloc(struct hn_softc *sc)
570 int i, bmap_cnt = sc->hn_chim_bmap_cnt;
571 u_long *bmap = sc->hn_chim_bmap;
572 uint32_t ret = HN_NVS_CHIM_IDX_INVALID;
574 for (i = 0; i < bmap_cnt; ++i) {
577 idx = ffsl(~bmap[i]);
581 --idx; /* ffsl is 1-based */
582 KASSERT(i * LONG_BIT + idx < sc->hn_chim_cnt,
583 ("invalid i %d and idx %d", i, idx));
585 if (atomic_testandset_long(&bmap[i], idx))
588 ret = i * LONG_BIT + idx;
595 hn_chim_free(struct hn_softc *sc, uint32_t chim_idx)
600 idx = chim_idx / LONG_BIT;
601 KASSERT(idx < sc->hn_chim_bmap_cnt,
602 ("invalid chimney index 0x%x", chim_idx));
604 mask = 1UL << (chim_idx % LONG_BIT);
605 KASSERT(sc->hn_chim_bmap[idx] & mask,
606 ("index bitmap 0x%lx, chimney index %u, "
607 "bitmap idx %d, bitmask 0x%lx",
608 sc->hn_chim_bmap[idx], chim_idx, idx, mask));
610 atomic_clear_long(&sc->hn_chim_bmap[idx], mask);
613 #if defined(INET6) || defined(INET)
615 * NOTE: If this function failed, the m_head would be freed.
617 static __inline struct mbuf *
618 hn_tso_fixup(struct mbuf *m_head)
620 struct ether_vlan_header *evl;
624 KASSERT(M_WRITABLE(m_head), ("TSO mbuf not writable"));
626 #define PULLUP_HDR(m, len) \
628 if (__predict_false((m)->m_len < (len))) { \
629 (m) = m_pullup((m), (len)); \
635 PULLUP_HDR(m_head, sizeof(*evl));
636 evl = mtod(m_head, struct ether_vlan_header *);
637 if (evl->evl_encap_proto == ntohs(ETHERTYPE_VLAN))
638 ehlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
640 ehlen = ETHER_HDR_LEN;
643 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
647 PULLUP_HDR(m_head, ehlen + sizeof(*ip));
648 ip = mtodo(m_head, ehlen);
649 iphlen = ip->ip_hl << 2;
651 PULLUP_HDR(m_head, ehlen + iphlen + sizeof(*th));
652 th = mtodo(m_head, ehlen + iphlen);
656 th->th_sum = in_pseudo(ip->ip_src.s_addr,
657 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
660 #if defined(INET6) && defined(INET)
667 PULLUP_HDR(m_head, ehlen + sizeof(*ip6));
668 ip6 = mtodo(m_head, ehlen);
669 if (ip6->ip6_nxt != IPPROTO_TCP) {
674 PULLUP_HDR(m_head, ehlen + sizeof(*ip6) + sizeof(*th));
675 th = mtodo(m_head, ehlen + sizeof(*ip6));
678 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
685 #endif /* INET6 || INET */
688 hn_set_rxfilter(struct hn_softc *sc, uint32_t filter)
694 if (sc->hn_rx_filter != filter) {
695 error = hn_rndis_set_rxfilter(sc, filter);
697 sc->hn_rx_filter = filter;
703 hn_rxfilter_config(struct hn_softc *sc)
705 struct ifnet *ifp = sc->hn_ifp;
710 if ((ifp->if_flags & IFF_PROMISC) ||
711 (sc->hn_flags & HN_FLAG_VF)) {
712 filter = NDIS_PACKET_TYPE_PROMISCUOUS;
714 filter = NDIS_PACKET_TYPE_DIRECTED;
715 if (ifp->if_flags & IFF_BROADCAST)
716 filter |= NDIS_PACKET_TYPE_BROADCAST;
717 /* TODO: support multicast list */
718 if ((ifp->if_flags & IFF_ALLMULTI) ||
719 !TAILQ_EMPTY(&ifp->if_multiaddrs))
720 filter |= NDIS_PACKET_TYPE_ALL_MULTICAST;
722 return (hn_set_rxfilter(sc, filter));
726 hn_set_txagg(struct hn_softc *sc)
732 * Setup aggregation size.
734 if (sc->hn_agg_size < 0)
737 size = sc->hn_agg_size;
739 if (sc->hn_rndis_agg_size < size)
740 size = sc->hn_rndis_agg_size;
742 /* NOTE: We only aggregate packets using chimney sending buffers. */
743 if (size > (uint32_t)sc->hn_chim_szmax)
744 size = sc->hn_chim_szmax;
746 if (size <= 2 * HN_PKTSIZE_MIN(sc->hn_rndis_agg_align)) {
753 /* NOTE: Type of the per TX ring setting is 'int'. */
758 * Setup aggregation packet count.
760 if (sc->hn_agg_pkts < 0)
763 pkts = sc->hn_agg_pkts;
765 if (sc->hn_rndis_agg_pkts < pkts)
766 pkts = sc->hn_rndis_agg_pkts;
775 /* NOTE: Type of the per TX ring setting is 'short'. */
780 /* NOTE: Type of the per TX ring setting is 'short'. */
781 if (sc->hn_rndis_agg_align > SHRT_MAX) {
788 if_printf(sc->hn_ifp, "TX agg size %u, pkts %u, align %u\n",
789 size, pkts, sc->hn_rndis_agg_align);
792 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
793 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
795 mtx_lock(&txr->hn_tx_lock);
796 txr->hn_agg_szmax = size;
797 txr->hn_agg_pktmax = pkts;
798 txr->hn_agg_align = sc->hn_rndis_agg_align;
799 mtx_unlock(&txr->hn_tx_lock);
804 hn_get_txswq_depth(const struct hn_tx_ring *txr)
807 KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet"));
808 if (hn_tx_swq_depth < txr->hn_txdesc_cnt)
809 return txr->hn_txdesc_cnt;
810 return hn_tx_swq_depth;
814 hn_rss_reconfig(struct hn_softc *sc)
820 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
827 * Direct reconfiguration by setting the UNCHG flags does
828 * _not_ work properly.
831 if_printf(sc->hn_ifp, "disable RSS\n");
832 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE);
834 if_printf(sc->hn_ifp, "RSS disable failed\n");
839 * Reenable the RSS w/ the updated RSS key or indirect
843 if_printf(sc->hn_ifp, "reconfig RSS\n");
844 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
846 if_printf(sc->hn_ifp, "RSS reconfig failed\n");
853 hn_rss_ind_fixup(struct hn_softc *sc)
855 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
858 nchan = sc->hn_rx_ring_inuse;
859 KASSERT(nchan > 1, ("invalid # of channels %d", nchan));
862 * Check indirect table to make sure that all channels in it
865 for (i = 0; i < NDIS_HASH_INDCNT; ++i) {
866 if (rss->rss_ind[i] >= nchan) {
867 if_printf(sc->hn_ifp,
868 "RSS indirect table %d fixup: %u -> %d\n",
869 i, rss->rss_ind[i], nchan - 1);
870 rss->rss_ind[i] = nchan - 1;
876 hn_ifmedia_upd(struct ifnet *ifp __unused)
883 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
885 struct hn_softc *sc = ifp->if_softc;
887 ifmr->ifm_status = IFM_AVALID;
888 ifmr->ifm_active = IFM_ETHER;
890 if ((sc->hn_link_flags & HN_LINK_FLAG_LINKUP) == 0) {
891 ifmr->ifm_active |= IFM_NONE;
894 ifmr->ifm_status |= IFM_ACTIVE;
895 ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
899 hn_update_vf_task(void *arg, int pending __unused)
901 struct hn_update_vf *uv = arg;
903 uv->rxr->hn_vf = uv->vf;
907 hn_update_vf(struct hn_softc *sc, struct ifnet *vf)
909 struct hn_rx_ring *rxr;
910 struct hn_update_vf uv;
916 TASK_INIT(&task, 0, hn_update_vf_task, &uv);
918 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
919 rxr = &sc->hn_rx_ring[i];
921 if (i < sc->hn_rx_ring_inuse) {
924 vmbus_chan_run_task(rxr->hn_chan, &task);
932 hn_set_vf(struct hn_softc *sc, struct ifnet *ifp, bool vf)
934 struct ifnet *hn_ifp;
938 if (!(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
946 if (ifp->if_alloctype != IFT_ETHER)
949 /* Ignore lagg/vlan interfaces */
950 if (strcmp(ifp->if_dname, "lagg") == 0 ||
951 strcmp(ifp->if_dname, "vlan") == 0)
954 if (bcmp(IF_LLADDR(ifp), IF_LLADDR(hn_ifp), ETHER_ADDR_LEN) != 0)
957 /* Now we're sure 'ifp' is a real VF device. */
959 if (sc->hn_flags & HN_FLAG_VF)
962 sc->hn_flags |= HN_FLAG_VF;
963 hn_rxfilter_config(sc);
965 if (!(sc->hn_flags & HN_FLAG_VF))
968 sc->hn_flags &= ~HN_FLAG_VF;
969 if (sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING)
970 hn_rxfilter_config(sc);
972 hn_set_rxfilter(sc, NDIS_PACKET_TYPE_NONE);
975 hn_nvs_set_datapath(sc,
976 vf ? HN_NVS_DATAPATH_VF : HN_NVS_DATAPATH_SYNTHETIC);
978 hn_update_vf(sc, vf ? ifp : NULL);
983 ~(HN_LINK_FLAG_LINKUP | HN_LINK_FLAG_NETCHG);
984 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN);
989 devctl_notify("HYPERV_NIC_VF", if_name(hn_ifp),
990 vf ? "VF_UP" : "VF_DOWN", NULL);
993 if_printf(hn_ifp, "Data path is switched %s %s\n",
994 vf ? "to" : "from", if_name(ifp));
1000 hn_ifnet_event(void *arg, struct ifnet *ifp, int event)
1002 if (event != IFNET_EVENT_UP && event != IFNET_EVENT_DOWN)
1005 hn_set_vf(arg, ifp, event == IFNET_EVENT_UP);
1009 hn_ifaddr_event(void *arg, struct ifnet *ifp)
1011 hn_set_vf(arg, ifp, ifp->if_flags & IFF_UP);
1014 /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */
1015 static const struct hyperv_guid g_net_vsc_device_type = {
1016 .hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
1017 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
1021 hn_probe(device_t dev)
1024 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev,
1025 &g_net_vsc_device_type) == 0) {
1026 device_set_desc(dev, "Hyper-V Network Interface");
1027 return BUS_PROBE_DEFAULT;
1033 hn_attach(device_t dev)
1035 struct hn_softc *sc = device_get_softc(dev);
1036 struct sysctl_oid_list *child;
1037 struct sysctl_ctx_list *ctx;
1038 uint8_t eaddr[ETHER_ADDR_LEN];
1039 struct ifnet *ifp = NULL;
1040 int error, ring_cnt, tx_ring_cnt;
1043 sc->hn_prichan = vmbus_get_channel(dev);
1047 * Initialize these tunables once.
1049 sc->hn_agg_size = hn_tx_agg_size;
1050 sc->hn_agg_pkts = hn_tx_agg_pkts;
1053 * Setup taskqueue for transmission.
1055 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_INDEP) {
1059 malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *),
1060 M_DEVBUF, M_WAITOK);
1061 for (i = 0; i < hn_tx_taskq_cnt; ++i) {
1062 sc->hn_tx_taskqs[i] = taskqueue_create("hn_tx",
1063 M_WAITOK, taskqueue_thread_enqueue,
1064 &sc->hn_tx_taskqs[i]);
1065 taskqueue_start_threads(&sc->hn_tx_taskqs[i], 1, PI_NET,
1066 "%s tx%d", device_get_nameunit(dev), i);
1068 } else if (hn_tx_taskq_mode == HN_TX_TASKQ_M_GLOBAL) {
1069 sc->hn_tx_taskqs = hn_tx_taskque;
1073 * Setup taskqueue for mangement tasks, e.g. link status.
1075 sc->hn_mgmt_taskq0 = taskqueue_create("hn_mgmt", M_WAITOK,
1076 taskqueue_thread_enqueue, &sc->hn_mgmt_taskq0);
1077 taskqueue_start_threads(&sc->hn_mgmt_taskq0, 1, PI_NET, "%s mgmt",
1078 device_get_nameunit(dev));
1079 TASK_INIT(&sc->hn_link_task, 0, hn_link_taskfunc, sc);
1080 TASK_INIT(&sc->hn_netchg_init, 0, hn_netchg_init_taskfunc, sc);
1081 TIMEOUT_TASK_INIT(sc->hn_mgmt_taskq0, &sc->hn_netchg_status, 0,
1082 hn_netchg_status_taskfunc, sc);
1085 * Allocate ifnet and setup its name earlier, so that if_printf
1086 * can be used by functions, which will be called after
1089 ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER);
1091 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1094 * Initialize ifmedia earlier so that it can be unconditionally
1095 * destroyed, if error happened later on.
1097 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts);
1100 * Figure out the # of RX rings (ring_cnt) and the # of TX rings
1101 * to use (tx_ring_cnt).
1104 * The # of RX rings to use is same as the # of channels to use.
1106 ring_cnt = hn_chan_cnt;
1107 if (ring_cnt <= 0) {
1109 ring_cnt = mp_ncpus;
1110 if (ring_cnt > HN_RING_CNT_DEF_MAX)
1111 ring_cnt = HN_RING_CNT_DEF_MAX;
1112 } else if (ring_cnt > mp_ncpus) {
1113 ring_cnt = mp_ncpus;
1116 tx_ring_cnt = hn_tx_ring_cnt;
1117 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt)
1118 tx_ring_cnt = ring_cnt;
1119 #ifdef HN_IFSTART_SUPPORT
1120 if (hn_use_if_start) {
1121 /* ifnet.if_start only needs one TX ring. */
1127 * Set the leader CPU for channels.
1129 sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus;
1132 * Create enough TX/RX rings, even if only limited number of
1133 * channels can be allocated.
1135 error = hn_create_tx_data(sc, tx_ring_cnt);
1138 error = hn_create_rx_data(sc, ring_cnt);
1143 * Create transaction context for NVS and RNDIS transactions.
1145 sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev),
1146 HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0);
1147 if (sc->hn_xact == NULL) {
1153 * Install orphan handler for the revocation of this device's
1157 * The processing order is critical here:
1158 * Install the orphan handler, _before_ testing whether this
1159 * device's primary channel has been revoked or not.
1161 vmbus_chan_set_orphan(sc->hn_prichan, sc->hn_xact);
1162 if (vmbus_chan_is_revoked(sc->hn_prichan)) {
1168 * Attach the synthetic parts, i.e. NVS and RNDIS.
1170 error = hn_synth_attach(sc, ETHERMTU);
1174 error = hn_rndis_get_eaddr(sc, eaddr);
1178 #if __FreeBSD_version >= 1100099
1179 if (sc->hn_rx_ring_inuse > 1) {
1181 * Reduce TCP segment aggregation limit for multiple
1182 * RX rings to increase ACK timeliness.
1184 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF);
1189 * Fixup TX stuffs after synthetic parts are attached.
1191 hn_fixup_tx_data(sc);
1193 ctx = device_get_sysctl_ctx(dev);
1194 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
1195 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD,
1196 &sc->hn_nvs_ver, 0, "NVS version");
1197 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version",
1198 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1199 hn_ndis_version_sysctl, "A", "NDIS version");
1200 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps",
1201 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1202 hn_caps_sysctl, "A", "capabilities");
1203 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist",
1204 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1205 hn_hwassist_sysctl, "A", "hwassist");
1206 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rxfilter",
1207 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1208 hn_rxfilter_sysctl, "A", "rxfilter");
1209 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_hash",
1210 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1211 hn_rss_hash_sysctl, "A", "RSS hash");
1212 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rss_ind_size",
1213 CTLFLAG_RD, &sc->hn_rss_ind_size, 0, "RSS indirect entry count");
1214 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key",
1215 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1216 hn_rss_key_sysctl, "IU", "RSS key");
1217 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind",
1218 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1219 hn_rss_ind_sysctl, "IU", "RSS indirect table");
1220 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_size",
1221 CTLFLAG_RD, &sc->hn_rndis_agg_size, 0,
1222 "RNDIS offered packet transmission aggregation size limit");
1223 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_pkts",
1224 CTLFLAG_RD, &sc->hn_rndis_agg_pkts, 0,
1225 "RNDIS offered packet transmission aggregation count limit");
1226 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "rndis_agg_align",
1227 CTLFLAG_RD, &sc->hn_rndis_agg_align, 0,
1228 "RNDIS packet transmission aggregation alignment");
1229 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_size",
1230 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1231 hn_txagg_size_sysctl, "I",
1232 "Packet transmission aggregation size, 0 -- disable, -1 -- auto");
1233 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pkts",
1234 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1235 hn_txagg_pkts_sysctl, "I",
1236 "Packet transmission aggregation packets, "
1237 "0 -- disable, -1 -- auto");
1238 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "polling",
1239 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
1240 hn_polling_sysctl, "I",
1241 "Polling frequency: [100,1000000], 0 disable polling");
1242 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "vf",
1243 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
1244 hn_vf_sysctl, "A", "Virtual Function's name");
1247 * Setup the ifmedia, which has been initialized earlier.
1249 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1250 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO);
1251 /* XXX ifmedia_set really should do this for us */
1252 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media;
1255 * Setup the ifnet for this interface.
1259 ifp->if_baudrate = IF_Gbps(10);
1261 /* if_baudrate is 32bits on 32bit system. */
1262 ifp->if_baudrate = IF_Gbps(1);
1264 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1265 ifp->if_ioctl = hn_ioctl;
1266 ifp->if_init = hn_init;
1267 #ifdef HN_IFSTART_SUPPORT
1268 if (hn_use_if_start) {
1269 int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]);
1271 ifp->if_start = hn_start;
1272 IFQ_SET_MAXLEN(&ifp->if_snd, qdepth);
1273 ifp->if_snd.ifq_drv_maxlen = qdepth - 1;
1274 IFQ_SET_READY(&ifp->if_snd);
1278 ifp->if_transmit = hn_transmit;
1279 ifp->if_qflush = hn_xmit_qflush;
1282 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO;
1284 /* We can't diff IPv6 packets from IPv4 packets on RX path. */
1285 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6;
1287 if (sc->hn_caps & HN_CAP_VLAN) {
1288 /* XXX not sure about VLAN_MTU. */
1289 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
1292 ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist;
1293 if (ifp->if_hwassist & HN_CSUM_IP_MASK)
1294 ifp->if_capabilities |= IFCAP_TXCSUM;
1295 if (ifp->if_hwassist & HN_CSUM_IP6_MASK)
1296 ifp->if_capabilities |= IFCAP_TXCSUM_IPV6;
1297 if (sc->hn_caps & HN_CAP_TSO4) {
1298 ifp->if_capabilities |= IFCAP_TSO4;
1299 ifp->if_hwassist |= CSUM_IP_TSO;
1301 if (sc->hn_caps & HN_CAP_TSO6) {
1302 ifp->if_capabilities |= IFCAP_TSO6;
1303 ifp->if_hwassist |= CSUM_IP6_TSO;
1306 /* Enable all available capabilities by default. */
1307 ifp->if_capenable = ifp->if_capabilities;
1310 * Disable IPv6 TSO and TXCSUM by default, they still can
1311 * be enabled through SIOCSIFCAP.
1313 ifp->if_capenable &= ~(IFCAP_TXCSUM_IPV6 | IFCAP_TSO6);
1314 ifp->if_hwassist &= ~(HN_CSUM_IP6_MASK | CSUM_IP6_TSO);
1316 if (ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) {
1317 hn_set_tso_maxsize(sc, hn_tso_maxlen, ETHERMTU);
1318 ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX;
1319 ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
1322 ether_ifattach(ifp, eaddr);
1324 if ((ifp->if_capabilities & (IFCAP_TSO6 | IFCAP_TSO4)) && bootverbose) {
1325 if_printf(ifp, "TSO segcnt %u segsz %u\n",
1326 ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize);
1329 /* Inform the upper layer about the long frame support. */
1330 ifp->if_hdrlen = sizeof(struct ether_vlan_header);
1333 * Kick off link status check.
1335 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
1336 hn_update_link_status(sc);
1338 sc->hn_ifnet_evthand = EVENTHANDLER_REGISTER(ifnet_event,
1339 hn_ifnet_event, sc, EVENTHANDLER_PRI_ANY);
1341 sc->hn_ifaddr_evthand = EVENTHANDLER_REGISTER(ifaddr_event,
1342 hn_ifaddr_event, sc, EVENTHANDLER_PRI_ANY);
1346 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED)
1347 hn_synth_detach(sc);
1353 hn_detach(device_t dev)
1355 struct hn_softc *sc = device_get_softc(dev);
1356 struct ifnet *ifp = sc->hn_ifp;
1358 if (sc->hn_ifaddr_evthand != NULL)
1359 EVENTHANDLER_DEREGISTER(ifaddr_event, sc->hn_ifaddr_evthand);
1360 if (sc->hn_ifnet_evthand != NULL)
1361 EVENTHANDLER_DEREGISTER(ifnet_event, sc->hn_ifnet_evthand);
1363 if (sc->hn_xact != NULL && vmbus_chan_is_revoked(sc->hn_prichan)) {
1365 * In case that the vmbus missed the orphan handler
1368 vmbus_xact_ctx_orphan(sc->hn_xact);
1371 if (device_is_attached(dev)) {
1373 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
1374 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1378 * hn_stop() only suspends data, so managment
1379 * stuffs have to be suspended manually here.
1381 hn_suspend_mgmt(sc);
1382 hn_synth_detach(sc);
1385 ether_ifdetach(ifp);
1388 ifmedia_removeall(&sc->hn_media);
1389 hn_destroy_rx_data(sc);
1390 hn_destroy_tx_data(sc);
1392 if (sc->hn_tx_taskqs != NULL && sc->hn_tx_taskqs != hn_tx_taskque) {
1395 for (i = 0; i < hn_tx_taskq_cnt; ++i)
1396 taskqueue_free(sc->hn_tx_taskqs[i]);
1397 free(sc->hn_tx_taskqs, M_DEVBUF);
1399 taskqueue_free(sc->hn_mgmt_taskq0);
1401 if (sc->hn_xact != NULL) {
1403 * Uninstall the orphan handler _before_ the xact is
1406 vmbus_chan_unset_orphan(sc->hn_prichan);
1407 vmbus_xact_ctx_destroy(sc->hn_xact);
1412 HN_LOCK_DESTROY(sc);
1417 hn_shutdown(device_t dev)
1424 hn_link_status(struct hn_softc *sc)
1426 uint32_t link_status;
1429 error = hn_rndis_get_linkstatus(sc, &link_status);
1431 /* XXX what to do? */
1435 if (link_status == NDIS_MEDIA_STATE_CONNECTED)
1436 sc->hn_link_flags |= HN_LINK_FLAG_LINKUP;
1438 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
1439 if_link_state_change(sc->hn_ifp,
1440 (sc->hn_link_flags & HN_LINK_FLAG_LINKUP) ?
1441 LINK_STATE_UP : LINK_STATE_DOWN);
1445 hn_link_taskfunc(void *xsc, int pending __unused)
1447 struct hn_softc *sc = xsc;
1449 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
1455 hn_netchg_init_taskfunc(void *xsc, int pending __unused)
1457 struct hn_softc *sc = xsc;
1459 /* Prevent any link status checks from running. */
1460 sc->hn_link_flags |= HN_LINK_FLAG_NETCHG;
1463 * Fake up a [link down --> link up] state change; 5 seconds
1464 * delay is used, which closely simulates miibus reaction
1465 * upon link down event.
1467 sc->hn_link_flags &= ~HN_LINK_FLAG_LINKUP;
1468 if_link_state_change(sc->hn_ifp, LINK_STATE_DOWN);
1469 taskqueue_enqueue_timeout(sc->hn_mgmt_taskq0,
1470 &sc->hn_netchg_status, 5 * hz);
1474 hn_netchg_status_taskfunc(void *xsc, int pending __unused)
1476 struct hn_softc *sc = xsc;
1478 /* Re-allow link status checks. */
1479 sc->hn_link_flags &= ~HN_LINK_FLAG_NETCHG;
1484 hn_update_link_status(struct hn_softc *sc)
1487 if (sc->hn_mgmt_taskq != NULL)
1488 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_link_task);
1492 hn_change_network(struct hn_softc *sc)
1495 if (sc->hn_mgmt_taskq != NULL)
1496 taskqueue_enqueue(sc->hn_mgmt_taskq, &sc->hn_netchg_init);
1500 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd,
1501 struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs)
1503 struct mbuf *m = *m_head;
1506 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID, ("txd uses chim"));
1508 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap,
1509 m, segs, nsegs, BUS_DMA_NOWAIT);
1510 if (error == EFBIG) {
1513 m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX);
1517 *m_head = m = m_new;
1518 txr->hn_tx_collapsed++;
1520 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag,
1521 txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT);
1524 bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap,
1525 BUS_DMASYNC_PREWRITE);
1526 txd->flags |= HN_TXD_FLAG_DMAMAP;
1532 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd)
1535 KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
1536 ("put an onlist txd %#x", txd->flags));
1537 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
1538 ("put an onagg txd %#x", txd->flags));
1540 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
1541 if (atomic_fetchadd_int(&txd->refs, -1) != 1)
1544 if (!STAILQ_EMPTY(&txd->agg_list)) {
1545 struct hn_txdesc *tmp_txd;
1547 while ((tmp_txd = STAILQ_FIRST(&txd->agg_list)) != NULL) {
1550 KASSERT(STAILQ_EMPTY(&tmp_txd->agg_list),
1551 ("resursive aggregation on aggregated txdesc"));
1552 KASSERT((tmp_txd->flags & HN_TXD_FLAG_ONAGG),
1553 ("not aggregated txdesc"));
1554 KASSERT((tmp_txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
1555 ("aggregated txdesc uses dmamap"));
1556 KASSERT(tmp_txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
1557 ("aggregated txdesc consumes "
1558 "chimney sending buffer"));
1559 KASSERT(tmp_txd->chim_size == 0,
1560 ("aggregated txdesc has non-zero "
1561 "chimney sending size"));
1563 STAILQ_REMOVE_HEAD(&txd->agg_list, agg_link);
1564 tmp_txd->flags &= ~HN_TXD_FLAG_ONAGG;
1565 freed = hn_txdesc_put(txr, tmp_txd);
1566 KASSERT(freed, ("failed to free aggregated txdesc"));
1570 if (txd->chim_index != HN_NVS_CHIM_IDX_INVALID) {
1571 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0,
1572 ("chim txd uses dmamap"));
1573 hn_chim_free(txr->hn_sc, txd->chim_index);
1574 txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
1576 } else if (txd->flags & HN_TXD_FLAG_DMAMAP) {
1577 bus_dmamap_sync(txr->hn_tx_data_dtag,
1578 txd->data_dmap, BUS_DMASYNC_POSTWRITE);
1579 bus_dmamap_unload(txr->hn_tx_data_dtag,
1581 txd->flags &= ~HN_TXD_FLAG_DMAMAP;
1584 if (txd->m != NULL) {
1589 txd->flags |= HN_TXD_FLAG_ONLIST;
1590 #ifndef HN_USE_TXDESC_BUFRING
1591 mtx_lock_spin(&txr->hn_txlist_spin);
1592 KASSERT(txr->hn_txdesc_avail >= 0 &&
1593 txr->hn_txdesc_avail < txr->hn_txdesc_cnt,
1594 ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail));
1595 txr->hn_txdesc_avail++;
1596 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
1597 mtx_unlock_spin(&txr->hn_txlist_spin);
1598 #else /* HN_USE_TXDESC_BUFRING */
1600 atomic_add_int(&txr->hn_txdesc_avail, 1);
1602 buf_ring_enqueue(txr->hn_txdesc_br, txd);
1603 #endif /* !HN_USE_TXDESC_BUFRING */
1608 static __inline struct hn_txdesc *
1609 hn_txdesc_get(struct hn_tx_ring *txr)
1611 struct hn_txdesc *txd;
1613 #ifndef HN_USE_TXDESC_BUFRING
1614 mtx_lock_spin(&txr->hn_txlist_spin);
1615 txd = SLIST_FIRST(&txr->hn_txlist);
1617 KASSERT(txr->hn_txdesc_avail > 0,
1618 ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail));
1619 txr->hn_txdesc_avail--;
1620 SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
1622 mtx_unlock_spin(&txr->hn_txlist_spin);
1624 txd = buf_ring_dequeue_sc(txr->hn_txdesc_br);
1628 #ifdef HN_USE_TXDESC_BUFRING
1630 atomic_subtract_int(&txr->hn_txdesc_avail, 1);
1632 #endif /* HN_USE_TXDESC_BUFRING */
1633 KASSERT(txd->m == NULL && txd->refs == 0 &&
1634 STAILQ_EMPTY(&txd->agg_list) &&
1635 txd->chim_index == HN_NVS_CHIM_IDX_INVALID &&
1636 txd->chim_size == 0 &&
1637 (txd->flags & HN_TXD_FLAG_ONLIST) &&
1638 (txd->flags & HN_TXD_FLAG_ONAGG) == 0 &&
1639 (txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("invalid txd"));
1640 txd->flags &= ~HN_TXD_FLAG_ONLIST;
1646 static __inline void
1647 hn_txdesc_hold(struct hn_txdesc *txd)
1650 /* 0->1 transition will never work */
1651 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
1652 atomic_add_int(&txd->refs, 1);
1655 static __inline void
1656 hn_txdesc_agg(struct hn_txdesc *agg_txd, struct hn_txdesc *txd)
1659 KASSERT((agg_txd->flags & HN_TXD_FLAG_ONAGG) == 0,
1660 ("recursive aggregation on aggregating txdesc"));
1662 KASSERT((txd->flags & HN_TXD_FLAG_ONAGG) == 0,
1663 ("already aggregated"));
1664 KASSERT(STAILQ_EMPTY(&txd->agg_list),
1665 ("recursive aggregation on to-be-aggregated txdesc"));
1667 txd->flags |= HN_TXD_FLAG_ONAGG;
1668 STAILQ_INSERT_TAIL(&agg_txd->agg_list, txd, agg_link);
1672 hn_tx_ring_pending(struct hn_tx_ring *txr)
1674 bool pending = false;
1676 #ifndef HN_USE_TXDESC_BUFRING
1677 mtx_lock_spin(&txr->hn_txlist_spin);
1678 if (txr->hn_txdesc_avail != txr->hn_txdesc_cnt)
1680 mtx_unlock_spin(&txr->hn_txlist_spin);
1682 if (!buf_ring_full(txr->hn_txdesc_br))
1688 static __inline void
1689 hn_txeof(struct hn_tx_ring *txr)
1691 txr->hn_has_txeof = 0;
1696 hn_txpkt_done(struct hn_nvs_sendctx *sndc, struct hn_softc *sc,
1697 struct vmbus_channel *chan, const void *data __unused, int dlen __unused)
1699 struct hn_txdesc *txd = sndc->hn_cbarg;
1700 struct hn_tx_ring *txr;
1703 KASSERT(txr->hn_chan == chan,
1704 ("channel mismatch, on chan%u, should be chan%u",
1705 vmbus_chan_id(chan), vmbus_chan_id(txr->hn_chan)));
1707 txr->hn_has_txeof = 1;
1708 hn_txdesc_put(txr, txd);
1710 ++txr->hn_txdone_cnt;
1711 if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) {
1712 txr->hn_txdone_cnt = 0;
1713 if (txr->hn_oactive)
1719 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr)
1721 #if defined(INET) || defined(INET6)
1722 struct lro_ctrl *lro = &rxr->hn_lro;
1723 struct lro_entry *queued;
1725 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1726 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1727 tcp_lro_flush(lro, queued);
1733 * 'txr' could be NULL, if multiple channels and
1734 * ifnet.if_start method are enabled.
1736 if (txr == NULL || !txr->hn_has_txeof)
1739 txr->hn_txdone_cnt = 0;
1743 static __inline uint32_t
1744 hn_rndis_pktmsg_offset(uint32_t ofs)
1747 KASSERT(ofs >= sizeof(struct rndis_packet_msg),
1748 ("invalid RNDIS packet msg offset %u", ofs));
1749 return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset));
1752 static __inline void *
1753 hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, size_t pktsize,
1754 size_t pi_dlen, uint32_t pi_type)
1756 const size_t pi_size = HN_RNDIS_PKTINFO_SIZE(pi_dlen);
1757 struct rndis_pktinfo *pi;
1759 KASSERT((pi_size & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK) == 0,
1760 ("unaligned pktinfo size %zu, pktinfo dlen %zu", pi_size, pi_dlen));
1763 * Per-packet-info does not move; it only grows.
1766 * rm_pktinfooffset in this phase counts from the beginning
1767 * of rndis_packet_msg.
1769 KASSERT(pkt->rm_pktinfooffset + pkt->rm_pktinfolen + pi_size <= pktsize,
1770 ("%u pktinfo overflows RNDIS packet msg", pi_type));
1771 pi = (struct rndis_pktinfo *)((uint8_t *)pkt + pkt->rm_pktinfooffset +
1772 pkt->rm_pktinfolen);
1773 pkt->rm_pktinfolen += pi_size;
1775 pi->rm_size = pi_size;
1776 pi->rm_type = pi_type;
1777 pi->rm_pktinfooffset = RNDIS_PKTINFO_OFFSET;
1779 return (pi->rm_data);
1783 hn_flush_txagg(struct ifnet *ifp, struct hn_tx_ring *txr)
1785 struct hn_txdesc *txd;
1789 txd = txr->hn_agg_txd;
1790 KASSERT(txd != NULL, ("no aggregate txdesc"));
1793 * Since hn_txpkt() will reset this temporary stat, save
1794 * it now, so that oerrors can be updated properly, if
1795 * hn_txpkt() ever fails.
1797 pkts = txr->hn_stat_pkts;
1800 * Since txd's mbuf will _not_ be freed upon hn_txpkt()
1801 * failure, save it for later freeing, if hn_txpkt() ever
1805 error = hn_txpkt(ifp, txr, txd);
1806 if (__predict_false(error)) {
1807 /* txd is freed, but m is not. */
1810 txr->hn_flush_failed++;
1811 if_inc_counter(ifp, IFCOUNTER_OERRORS, pkts);
1814 /* Reset all aggregation states. */
1815 txr->hn_agg_txd = NULL;
1816 txr->hn_agg_szleft = 0;
1817 txr->hn_agg_pktleft = 0;
1818 txr->hn_agg_prevpkt = NULL;
1824 hn_try_txagg(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
1829 if (txr->hn_agg_txd != NULL) {
1830 if (txr->hn_agg_pktleft >= 1 && txr->hn_agg_szleft > pktsize) {
1831 struct hn_txdesc *agg_txd = txr->hn_agg_txd;
1832 struct rndis_packet_msg *pkt = txr->hn_agg_prevpkt;
1836 * Update the previous RNDIS packet's total length,
1837 * it can be increased due to the mandatory alignment
1838 * padding for this RNDIS packet. And update the
1839 * aggregating txdesc's chimney sending buffer size
1843 * Zero-out the padding, as required by the RNDIS spec.
1846 pkt->rm_len = roundup2(olen, txr->hn_agg_align);
1847 agg_txd->chim_size += pkt->rm_len - olen;
1849 /* Link this txdesc to the parent. */
1850 hn_txdesc_agg(agg_txd, txd);
1852 chim = (uint8_t *)pkt + pkt->rm_len;
1853 /* Save the current packet for later fixup. */
1854 txr->hn_agg_prevpkt = chim;
1856 txr->hn_agg_pktleft--;
1857 txr->hn_agg_szleft -= pktsize;
1858 if (txr->hn_agg_szleft <=
1859 HN_PKTSIZE_MIN(txr->hn_agg_align)) {
1861 * Probably can't aggregate more packets,
1862 * flush this aggregating txdesc proactively.
1864 txr->hn_agg_pktleft = 0;
1869 hn_flush_txagg(ifp, txr);
1871 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
1873 txr->hn_tx_chimney_tried++;
1874 txd->chim_index = hn_chim_alloc(txr->hn_sc);
1875 if (txd->chim_index == HN_NVS_CHIM_IDX_INVALID)
1877 txr->hn_tx_chimney++;
1879 chim = txr->hn_sc->hn_chim +
1880 (txd->chim_index * txr->hn_sc->hn_chim_szmax);
1882 if (txr->hn_agg_pktmax > 1 &&
1883 txr->hn_agg_szmax > pktsize + HN_PKTSIZE_MIN(txr->hn_agg_align)) {
1884 txr->hn_agg_txd = txd;
1885 txr->hn_agg_pktleft = txr->hn_agg_pktmax - 1;
1886 txr->hn_agg_szleft = txr->hn_agg_szmax - pktsize;
1887 txr->hn_agg_prevpkt = chim;
1894 * If this function fails, then both txd and m_head0 will be freed.
1897 hn_encap(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd,
1898 struct mbuf **m_head0)
1900 bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
1901 int error, nsegs, i;
1902 struct mbuf *m_head = *m_head0;
1903 struct rndis_packet_msg *pkt;
1906 int pkt_hlen, pkt_size;
1908 pkt = txd->rndis_pkt;
1909 pkt_size = HN_PKTSIZE(m_head, txr->hn_agg_align);
1910 if (pkt_size < txr->hn_chim_size) {
1911 chim = hn_try_txagg(ifp, txr, txd, pkt_size);
1915 if (txr->hn_agg_txd != NULL)
1916 hn_flush_txagg(ifp, txr);
1919 pkt->rm_type = REMOTE_NDIS_PACKET_MSG;
1920 pkt->rm_len = m_head->m_pkthdr.len;
1921 pkt->rm_dataoffset = 0;
1922 pkt->rm_datalen = m_head->m_pkthdr.len;
1923 pkt->rm_oobdataoffset = 0;
1924 pkt->rm_oobdatalen = 0;
1925 pkt->rm_oobdataelements = 0;
1926 pkt->rm_pktinfooffset = sizeof(*pkt);
1927 pkt->rm_pktinfolen = 0;
1928 pkt->rm_vchandle = 0;
1929 pkt->rm_reserved = 0;
1931 if (txr->hn_tx_flags & HN_TX_FLAG_HASHVAL) {
1933 * Set the hash value for this packet, so that the host could
1934 * dispatch the TX done event for this packet back to this TX
1937 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1938 HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL);
1939 *pi_data = txr->hn_tx_idx;
1942 if (m_head->m_flags & M_VLANTAG) {
1943 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1944 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN);
1945 *pi_data = NDIS_VLAN_INFO_MAKE(
1946 EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag),
1947 EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag),
1948 EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag));
1951 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1952 #if defined(INET6) || defined(INET)
1953 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1954 NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO);
1956 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
1957 *pi_data = NDIS_LSO2_INFO_MAKEIPV4(0,
1958 m_head->m_pkthdr.tso_segsz);
1961 #if defined(INET6) && defined(INET)
1966 *pi_data = NDIS_LSO2_INFO_MAKEIPV6(0,
1967 m_head->m_pkthdr.tso_segsz);
1970 #endif /* INET6 || INET */
1971 } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) {
1972 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1973 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM);
1974 if (m_head->m_pkthdr.csum_flags &
1975 (CSUM_IP6_TCP | CSUM_IP6_UDP)) {
1976 *pi_data = NDIS_TXCSUM_INFO_IPV6;
1978 *pi_data = NDIS_TXCSUM_INFO_IPV4;
1979 if (m_head->m_pkthdr.csum_flags & CSUM_IP)
1980 *pi_data |= NDIS_TXCSUM_INFO_IPCS;
1983 if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
1984 *pi_data |= NDIS_TXCSUM_INFO_TCPCS;
1985 else if (m_head->m_pkthdr.csum_flags &
1986 (CSUM_IP_UDP | CSUM_IP6_UDP))
1987 *pi_data |= NDIS_TXCSUM_INFO_UDPCS;
1990 pkt_hlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
1991 /* Fixup RNDIS packet message total length */
1992 pkt->rm_len += pkt_hlen;
1993 /* Convert RNDIS packet message offsets */
1994 pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt_hlen);
1995 pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset);
1998 * Fast path: Chimney sending.
2001 struct hn_txdesc *tgt_txd = txd;
2003 if (txr->hn_agg_txd != NULL) {
2004 tgt_txd = txr->hn_agg_txd;
2010 KASSERT(pkt == chim,
2011 ("RNDIS pkt not in chimney sending buffer"));
2012 KASSERT(tgt_txd->chim_index != HN_NVS_CHIM_IDX_INVALID,
2013 ("chimney sending buffer is not used"));
2014 tgt_txd->chim_size += pkt->rm_len;
2016 m_copydata(m_head, 0, m_head->m_pkthdr.len,
2017 ((uint8_t *)chim) + pkt_hlen);
2019 txr->hn_gpa_cnt = 0;
2020 txr->hn_sendpkt = hn_txpkt_chim;
2024 KASSERT(txr->hn_agg_txd == NULL, ("aggregating sglist txdesc"));
2025 KASSERT(txd->chim_index == HN_NVS_CHIM_IDX_INVALID,
2026 ("chimney buffer is used"));
2027 KASSERT(pkt == txd->rndis_pkt, ("RNDIS pkt not in txdesc"));
2029 error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs);
2030 if (__predict_false(error)) {
2034 * This mbuf is not linked w/ the txd yet, so free it now.
2039 freed = hn_txdesc_put(txr, txd);
2041 ("fail to free txd upon txdma error"));
2043 txr->hn_txdma_failed++;
2044 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
2049 /* +1 RNDIS packet message */
2050 txr->hn_gpa_cnt = nsegs + 1;
2052 /* send packet with page buffer */
2053 txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr);
2054 txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK;
2055 txr->hn_gpa[0].gpa_len = pkt_hlen;
2058 * Fill the page buffers with mbuf info after the page
2059 * buffer for RNDIS packet message.
2061 for (i = 0; i < nsegs; ++i) {
2062 struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1];
2064 gpa->gpa_page = atop(segs[i].ds_addr);
2065 gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK;
2066 gpa->gpa_len = segs[i].ds_len;
2069 txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
2071 txr->hn_sendpkt = hn_txpkt_sglist;
2075 /* Set the completion routine */
2076 hn_nvs_sendctx_init(&txd->send_ctx, hn_txpkt_done, txd);
2078 /* Update temporary stats for later use. */
2079 txr->hn_stat_pkts++;
2080 txr->hn_stat_size += m_head->m_pkthdr.len;
2081 if (m_head->m_flags & M_MCAST)
2082 txr->hn_stat_mcasts++;
2089 * If this function fails, then txd will be freed, but the mbuf
2090 * associated w/ the txd will _not_ be freed.
2093 hn_txpkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd)
2095 int error, send_failed = 0, has_bpf;
2098 has_bpf = bpf_peers_present(ifp->if_bpf);
2101 * Make sure that this txd and any aggregated txds are not
2102 * freed before ETHER_BPF_MTAP.
2104 hn_txdesc_hold(txd);
2106 error = txr->hn_sendpkt(txr, txd);
2109 const struct hn_txdesc *tmp_txd;
2111 ETHER_BPF_MTAP(ifp, txd->m);
2112 STAILQ_FOREACH(tmp_txd, &txd->agg_list, agg_link)
2113 ETHER_BPF_MTAP(ifp, tmp_txd->m);
2116 if_inc_counter(ifp, IFCOUNTER_OPACKETS, txr->hn_stat_pkts);
2117 #ifdef HN_IFSTART_SUPPORT
2118 if (!hn_use_if_start)
2121 if_inc_counter(ifp, IFCOUNTER_OBYTES,
2123 if (txr->hn_stat_mcasts != 0) {
2124 if_inc_counter(ifp, IFCOUNTER_OMCASTS,
2125 txr->hn_stat_mcasts);
2128 txr->hn_pkts += txr->hn_stat_pkts;
2132 hn_txdesc_put(txr, txd);
2134 if (__predict_false(error)) {
2138 * This should "really rarely" happen.
2140 * XXX Too many RX to be acked or too many sideband
2141 * commands to run? Ask netvsc_channel_rollup()
2142 * to kick start later.
2144 txr->hn_has_txeof = 1;
2146 txr->hn_send_failed++;
2149 * Try sending again after set hn_has_txeof;
2150 * in case that we missed the last
2151 * netvsc_channel_rollup().
2155 if_printf(ifp, "send failed\n");
2158 * Caller will perform further processing on the
2159 * associated mbuf, so don't free it in hn_txdesc_put();
2160 * only unload it from the DMA map in hn_txdesc_put(),
2164 freed = hn_txdesc_put(txr, txd);
2166 ("fail to free txd upon send error"));
2168 txr->hn_send_failed++;
2171 /* Reset temporary stats, after this sending is done. */
2172 txr->hn_stat_size = 0;
2173 txr->hn_stat_pkts = 0;
2174 txr->hn_stat_mcasts = 0;
2180 * Append the specified data to the indicated mbuf chain,
2181 * Extend the mbuf chain if the new data does not fit in
2184 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c.
2185 * There should be an equivalent in the kernel mbuf code,
2186 * but there does not appear to be one yet.
2188 * Differs from m_append() in that additional mbufs are
2189 * allocated with cluster size MJUMPAGESIZE, and filled
2192 * Return 1 if able to complete the job; otherwise 0.
2195 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
2198 int remainder, space;
2200 for (m = m0; m->m_next != NULL; m = m->m_next)
2203 space = M_TRAILINGSPACE(m);
2206 * Copy into available space.
2208 if (space > remainder)
2210 bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
2215 while (remainder > 0) {
2217 * Allocate a new mbuf; could check space
2218 * and allocate a cluster instead.
2220 n = m_getjcl(M_DONTWAIT, m->m_type, 0, MJUMPAGESIZE);
2223 n->m_len = min(MJUMPAGESIZE, remainder);
2224 bcopy(cp, mtod(n, caddr_t), n->m_len);
2226 remainder -= n->m_len;
2230 if (m0->m_flags & M_PKTHDR)
2231 m0->m_pkthdr.len += len - remainder;
2233 return (remainder == 0);
2236 #if defined(INET) || defined(INET6)
2238 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m)
2240 #if __FreeBSD_version >= 1100095
2241 if (hn_lro_mbufq_depth) {
2242 tcp_lro_queue_mbuf(lc, m);
2246 return tcp_lro_rx(lc, m, 0);
2251 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen,
2252 const struct hn_rxinfo *info)
2256 int size, do_lro = 0, do_csum = 1;
2257 int hash_type = M_HASHTYPE_OPAQUE;
2259 /* If the VF is active, inject the packet through the VF */
2260 ifp = rxr->hn_vf ? rxr->hn_vf : rxr->hn_ifp;
2262 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) {
2265 * See the NOTE of hn_rndis_init_fixat(). This
2266 * function can be reached, immediately after the
2267 * RNDIS is initialized but before the ifnet is
2268 * setup on the hn_attach() path; drop the unexpected
2274 if (dlen <= MHLEN) {
2275 m_new = m_gethdr(M_NOWAIT, MT_DATA);
2276 if (m_new == NULL) {
2277 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
2280 memcpy(mtod(m_new, void *), data, dlen);
2281 m_new->m_pkthdr.len = m_new->m_len = dlen;
2282 rxr->hn_small_pkts++;
2285 * Get an mbuf with a cluster. For packets 2K or less,
2286 * get a standard 2K cluster. For anything larger, get a
2287 * 4K cluster. Any buffers larger than 4K can cause problems
2288 * if looped around to the Hyper-V TX channel, so avoid them.
2291 if (dlen > MCLBYTES) {
2293 size = MJUMPAGESIZE;
2296 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
2297 if (m_new == NULL) {
2298 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
2302 hv_m_append(m_new, dlen, data);
2304 m_new->m_pkthdr.rcvif = ifp;
2306 if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0))
2309 /* receive side checksum offload */
2310 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) {
2311 /* IP csum offload */
2312 if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) {
2313 m_new->m_pkthdr.csum_flags |=
2314 (CSUM_IP_CHECKED | CSUM_IP_VALID);
2318 /* TCP/UDP csum offload */
2319 if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK |
2320 NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) {
2321 m_new->m_pkthdr.csum_flags |=
2322 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2323 m_new->m_pkthdr.csum_data = 0xffff;
2324 if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK)
2332 * As of this write (Oct 28th, 2016), host side will turn
2333 * on only TCPCS_OK and IPCS_OK even for UDP datagrams, so
2334 * the do_lro setting here is actually _not_ accurate. We
2335 * depend on the RSS hash type check to reset do_lro.
2337 if ((info->csum_info &
2338 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) ==
2339 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK))
2342 const struct ether_header *eh;
2347 if (m_new->m_len < hoff)
2349 eh = mtod(m_new, struct ether_header *);
2350 etype = ntohs(eh->ether_type);
2351 if (etype == ETHERTYPE_VLAN) {
2352 const struct ether_vlan_header *evl;
2354 hoff = sizeof(*evl);
2355 if (m_new->m_len < hoff)
2357 evl = mtod(m_new, struct ether_vlan_header *);
2358 etype = ntohs(evl->evl_proto);
2361 if (etype == ETHERTYPE_IP) {
2364 pr = hn_check_iplen(m_new, hoff);
2365 if (pr == IPPROTO_TCP) {
2367 (rxr->hn_trust_hcsum &
2368 HN_TRUST_HCSUM_TCP)) {
2369 rxr->hn_csum_trusted++;
2370 m_new->m_pkthdr.csum_flags |=
2371 (CSUM_IP_CHECKED | CSUM_IP_VALID |
2372 CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2373 m_new->m_pkthdr.csum_data = 0xffff;
2376 } else if (pr == IPPROTO_UDP) {
2378 (rxr->hn_trust_hcsum &
2379 HN_TRUST_HCSUM_UDP)) {
2380 rxr->hn_csum_trusted++;
2381 m_new->m_pkthdr.csum_flags |=
2382 (CSUM_IP_CHECKED | CSUM_IP_VALID |
2383 CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2384 m_new->m_pkthdr.csum_data = 0xffff;
2386 } else if (pr != IPPROTO_DONE && do_csum &&
2387 (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) {
2388 rxr->hn_csum_trusted++;
2389 m_new->m_pkthdr.csum_flags |=
2390 (CSUM_IP_CHECKED | CSUM_IP_VALID);
2395 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) {
2396 m_new->m_pkthdr.ether_vtag = EVL_MAKETAG(
2397 NDIS_VLAN_INFO_ID(info->vlan_info),
2398 NDIS_VLAN_INFO_PRI(info->vlan_info),
2399 NDIS_VLAN_INFO_CFI(info->vlan_info));
2400 m_new->m_flags |= M_VLANTAG;
2403 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) {
2405 m_new->m_pkthdr.flowid = info->hash_value;
2406 if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) ==
2407 NDIS_HASH_FUNCTION_TOEPLITZ) {
2408 uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK);
2412 * do_lro is resetted, if the hash types are not TCP
2413 * related. See the comment in the above csum_flags
2417 case NDIS_HASH_IPV4:
2418 hash_type = M_HASHTYPE_RSS_IPV4;
2422 case NDIS_HASH_TCP_IPV4:
2423 hash_type = M_HASHTYPE_RSS_TCP_IPV4;
2426 case NDIS_HASH_IPV6:
2427 hash_type = M_HASHTYPE_RSS_IPV6;
2431 case NDIS_HASH_IPV6_EX:
2432 hash_type = M_HASHTYPE_RSS_IPV6_EX;
2436 case NDIS_HASH_TCP_IPV6:
2437 hash_type = M_HASHTYPE_RSS_TCP_IPV6;
2440 case NDIS_HASH_TCP_IPV6_EX:
2441 hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX;
2446 m_new->m_pkthdr.flowid = rxr->hn_rx_idx;
2448 M_HASHTYPE_SET(m_new, hash_type);
2451 * Note: Moved RX completion back to hv_nv_on_receive() so all
2452 * messages (not just data messages) will trigger a response.
2458 if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
2459 #if defined(INET) || defined(INET6)
2460 struct lro_ctrl *lro = &rxr->hn_lro;
2463 rxr->hn_lro_tried++;
2464 if (hn_lro_rx(lro, m_new) == 0) {
2472 /* We're not holding the lock here, so don't release it */
2473 (*ifp->if_input)(ifp, m_new);
2479 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
2481 struct hn_softc *sc = ifp->if_softc;
2482 struct ifreq *ifr = (struct ifreq *)data;
2483 int mask, error = 0;
2487 if (ifr->ifr_mtu > HN_MTU_MAX) {
2494 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
2499 if ((sc->hn_caps & HN_CAP_MTU) == 0) {
2500 /* Can't change MTU */
2506 if (ifp->if_mtu == ifr->ifr_mtu) {
2512 * Suspend this interface before the synthetic parts
2518 * Detach the synthetics parts, i.e. NVS and RNDIS.
2520 hn_synth_detach(sc);
2523 * Reattach the synthetic parts, i.e. NVS and RNDIS,
2524 * with the new MTU setting.
2526 error = hn_synth_attach(sc, ifr->ifr_mtu);
2533 * Commit the requested MTU, after the synthetic parts
2534 * have been successfully attached.
2536 ifp->if_mtu = ifr->ifr_mtu;
2539 * Make sure that various parameters based on MTU are
2540 * still valid, after the MTU change.
2542 if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax)
2543 hn_set_chim_size(sc, sc->hn_chim_szmax);
2544 hn_set_tso_maxsize(sc, hn_tso_maxlen, ifp->if_mtu);
2545 #if __FreeBSD_version >= 1100099
2546 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim <
2547 HN_LRO_LENLIM_MIN(ifp))
2548 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp));
2552 * All done! Resume the interface now.
2562 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
2567 if (ifp->if_flags & IFF_UP) {
2568 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2570 * Caller meight hold mutex, e.g.
2571 * bpf; use busy-wait for the RNDIS
2575 hn_rxfilter_config(sc);
2581 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2584 sc->hn_if_flags = ifp->if_flags;
2591 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
2593 if (mask & IFCAP_TXCSUM) {
2594 ifp->if_capenable ^= IFCAP_TXCSUM;
2595 if (ifp->if_capenable & IFCAP_TXCSUM)
2596 ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc);
2598 ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc);
2600 if (mask & IFCAP_TXCSUM_IPV6) {
2601 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
2602 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
2603 ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc);
2605 ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc);
2608 /* TODO: flip RNDIS offload parameters for RXCSUM. */
2609 if (mask & IFCAP_RXCSUM)
2610 ifp->if_capenable ^= IFCAP_RXCSUM;
2612 /* We can't diff IPv6 packets from IPv4 packets on RX path. */
2613 if (mask & IFCAP_RXCSUM_IPV6)
2614 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
2617 if (mask & IFCAP_LRO)
2618 ifp->if_capenable ^= IFCAP_LRO;
2620 if (mask & IFCAP_TSO4) {
2621 ifp->if_capenable ^= IFCAP_TSO4;
2622 if (ifp->if_capenable & IFCAP_TSO4)
2623 ifp->if_hwassist |= CSUM_IP_TSO;
2625 ifp->if_hwassist &= ~CSUM_IP_TSO;
2627 if (mask & IFCAP_TSO6) {
2628 ifp->if_capenable ^= IFCAP_TSO6;
2629 if (ifp->if_capenable & IFCAP_TSO6)
2630 ifp->if_hwassist |= CSUM_IP6_TSO;
2632 ifp->if_hwassist &= ~CSUM_IP6_TSO;
2642 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0) {
2646 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2648 * Multicast uses mutex; use busy-wait for
2652 hn_rxfilter_config(sc);
2661 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd);
2665 error = ether_ioctl(ifp, cmd, data);
2672 hn_stop(struct hn_softc *sc, bool detaching)
2674 struct ifnet *ifp = sc->hn_ifp;
2679 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
2680 ("synthetic parts were not attached"));
2682 /* Disable polling. */
2685 /* Clear RUNNING bit _before_ hn_suspend_data() */
2686 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
2687 hn_suspend_data(sc);
2689 /* Clear OACTIVE bit. */
2690 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
2691 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
2692 sc->hn_tx_ring[i].hn_oactive = 0;
2695 * If the VF is active, make sure the filter is not 0, even if
2696 * the synthetic NIC is down.
2698 if (!detaching && (sc->hn_flags & HN_FLAG_VF))
2699 hn_rxfilter_config(sc);
2703 hn_init_locked(struct hn_softc *sc)
2705 struct ifnet *ifp = sc->hn_ifp;
2710 if ((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0)
2713 if (ifp->if_drv_flags & IFF_DRV_RUNNING)
2716 /* Configure RX filter */
2717 hn_rxfilter_config(sc);
2719 /* Clear OACTIVE bit. */
2720 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
2721 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
2722 sc->hn_tx_ring[i].hn_oactive = 0;
2724 /* Clear TX 'suspended' bit. */
2725 hn_resume_tx(sc, sc->hn_tx_ring_inuse);
2727 /* Everything is ready; unleash! */
2728 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
2730 /* Re-enable polling if requested. */
2731 if (sc->hn_pollhz > 0)
2732 hn_polling(sc, sc->hn_pollhz);
2738 struct hn_softc *sc = xsc;
2745 #if __FreeBSD_version >= 1100099
2748 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS)
2750 struct hn_softc *sc = arg1;
2751 unsigned int lenlim;
2754 lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim;
2755 error = sysctl_handle_int(oidp, &lenlim, 0, req);
2756 if (error || req->newptr == NULL)
2760 if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) ||
2761 lenlim > TCP_LRO_LENGTH_MAX) {
2765 hn_set_lro_lenlim(sc, lenlim);
2772 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS)
2774 struct hn_softc *sc = arg1;
2775 int ackcnt, error, i;
2778 * lro_ackcnt_lim is append count limit,
2779 * +1 to turn it into aggregation limit.
2781 ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1;
2782 error = sysctl_handle_int(oidp, &ackcnt, 0, req);
2783 if (error || req->newptr == NULL)
2786 if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1))
2790 * Convert aggregation limit back to append
2795 for (i = 0; i < sc->hn_rx_ring_cnt; ++i)
2796 sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt;
2804 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS)
2806 struct hn_softc *sc = arg1;
2811 if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum)
2814 error = sysctl_handle_int(oidp, &on, 0, req);
2815 if (error || req->newptr == NULL)
2819 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2820 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
2823 rxr->hn_trust_hcsum |= hcsum;
2825 rxr->hn_trust_hcsum &= ~hcsum;
2832 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS)
2834 struct hn_softc *sc = arg1;
2835 int chim_size, error;
2837 chim_size = sc->hn_tx_ring[0].hn_chim_size;
2838 error = sysctl_handle_int(oidp, &chim_size, 0, req);
2839 if (error || req->newptr == NULL)
2842 if (chim_size > sc->hn_chim_szmax || chim_size <= 0)
2846 hn_set_chim_size(sc, chim_size);
2851 #if __FreeBSD_version < 1100095
2853 hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS)
2855 struct hn_softc *sc = arg1;
2856 int ofs = arg2, i, error;
2857 struct hn_rx_ring *rxr;
2861 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2862 rxr = &sc->hn_rx_ring[i];
2863 stat += *((int *)((uint8_t *)rxr + ofs));
2866 error = sysctl_handle_64(oidp, &stat, 0, req);
2867 if (error || req->newptr == NULL)
2870 /* Zero out this stat. */
2871 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2872 rxr = &sc->hn_rx_ring[i];
2873 *((int *)((uint8_t *)rxr + ofs)) = 0;
2879 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
2881 struct hn_softc *sc = arg1;
2882 int ofs = arg2, i, error;
2883 struct hn_rx_ring *rxr;
2887 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2888 rxr = &sc->hn_rx_ring[i];
2889 stat += *((uint64_t *)((uint8_t *)rxr + ofs));
2892 error = sysctl_handle_64(oidp, &stat, 0, req);
2893 if (error || req->newptr == NULL)
2896 /* Zero out this stat. */
2897 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2898 rxr = &sc->hn_rx_ring[i];
2899 *((uint64_t *)((uint8_t *)rxr + ofs)) = 0;
2907 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
2909 struct hn_softc *sc = arg1;
2910 int ofs = arg2, i, error;
2911 struct hn_rx_ring *rxr;
2915 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2916 rxr = &sc->hn_rx_ring[i];
2917 stat += *((u_long *)((uint8_t *)rxr + ofs));
2920 error = sysctl_handle_long(oidp, &stat, 0, req);
2921 if (error || req->newptr == NULL)
2924 /* Zero out this stat. */
2925 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2926 rxr = &sc->hn_rx_ring[i];
2927 *((u_long *)((uint8_t *)rxr + ofs)) = 0;
2933 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
2935 struct hn_softc *sc = arg1;
2936 int ofs = arg2, i, error;
2937 struct hn_tx_ring *txr;
2941 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
2942 txr = &sc->hn_tx_ring[i];
2943 stat += *((u_long *)((uint8_t *)txr + ofs));
2946 error = sysctl_handle_long(oidp, &stat, 0, req);
2947 if (error || req->newptr == NULL)
2950 /* Zero out this stat. */
2951 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
2952 txr = &sc->hn_tx_ring[i];
2953 *((u_long *)((uint8_t *)txr + ofs)) = 0;
2959 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
2961 struct hn_softc *sc = arg1;
2962 int ofs = arg2, i, error, conf;
2963 struct hn_tx_ring *txr;
2965 txr = &sc->hn_tx_ring[0];
2966 conf = *((int *)((uint8_t *)txr + ofs));
2968 error = sysctl_handle_int(oidp, &conf, 0, req);
2969 if (error || req->newptr == NULL)
2973 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
2974 txr = &sc->hn_tx_ring[i];
2975 *((int *)((uint8_t *)txr + ofs)) = conf;
2983 hn_txagg_size_sysctl(SYSCTL_HANDLER_ARGS)
2985 struct hn_softc *sc = arg1;
2988 size = sc->hn_agg_size;
2989 error = sysctl_handle_int(oidp, &size, 0, req);
2990 if (error || req->newptr == NULL)
2994 sc->hn_agg_size = size;
3002 hn_txagg_pkts_sysctl(SYSCTL_HANDLER_ARGS)
3004 struct hn_softc *sc = arg1;
3007 pkts = sc->hn_agg_pkts;
3008 error = sysctl_handle_int(oidp, &pkts, 0, req);
3009 if (error || req->newptr == NULL)
3013 sc->hn_agg_pkts = pkts;
3021 hn_txagg_pktmax_sysctl(SYSCTL_HANDLER_ARGS)
3023 struct hn_softc *sc = arg1;
3026 pkts = sc->hn_tx_ring[0].hn_agg_pktmax;
3027 return (sysctl_handle_int(oidp, &pkts, 0, req));
3031 hn_txagg_align_sysctl(SYSCTL_HANDLER_ARGS)
3033 struct hn_softc *sc = arg1;
3036 align = sc->hn_tx_ring[0].hn_agg_align;
3037 return (sysctl_handle_int(oidp, &align, 0, req));
3041 hn_chan_polling(struct vmbus_channel *chan, u_int pollhz)
3044 vmbus_chan_poll_disable(chan);
3046 vmbus_chan_poll_enable(chan, pollhz);
3050 hn_polling(struct hn_softc *sc, u_int pollhz)
3052 int nsubch = sc->hn_rx_ring_inuse - 1;
3057 struct vmbus_channel **subch;
3060 subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
3061 for (i = 0; i < nsubch; ++i)
3062 hn_chan_polling(subch[i], pollhz);
3063 vmbus_subchan_rel(subch, nsubch);
3065 hn_chan_polling(sc->hn_prichan, pollhz);
3069 hn_polling_sysctl(SYSCTL_HANDLER_ARGS)
3071 struct hn_softc *sc = arg1;
3074 pollhz = sc->hn_pollhz;
3075 error = sysctl_handle_int(oidp, &pollhz, 0, req);
3076 if (error || req->newptr == NULL)
3080 (pollhz < VMBUS_CHAN_POLLHZ_MIN || pollhz > VMBUS_CHAN_POLLHZ_MAX))
3084 if (sc->hn_pollhz != pollhz) {
3085 sc->hn_pollhz = pollhz;
3086 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) &&
3087 (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED))
3088 hn_polling(sc, sc->hn_pollhz);
3096 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS)
3098 struct hn_softc *sc = arg1;
3101 snprintf(verstr, sizeof(verstr), "%u.%u",
3102 HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver),
3103 HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver));
3104 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
3108 hn_caps_sysctl(SYSCTL_HANDLER_ARGS)
3110 struct hn_softc *sc = arg1;
3117 snprintf(caps_str, sizeof(caps_str), "%b", caps, HN_CAP_BITS);
3118 return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req);
3122 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS)
3124 struct hn_softc *sc = arg1;
3125 char assist_str[128];
3129 hwassist = sc->hn_ifp->if_hwassist;
3131 snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS);
3132 return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req);
3136 hn_rxfilter_sysctl(SYSCTL_HANDLER_ARGS)
3138 struct hn_softc *sc = arg1;
3139 char filter_str[128];
3143 filter = sc->hn_rx_filter;
3145 snprintf(filter_str, sizeof(filter_str), "%b", filter,
3147 return sysctl_handle_string(oidp, filter_str, sizeof(filter_str), req);
3151 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS)
3153 struct hn_softc *sc = arg1;
3158 error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
3159 if (error || req->newptr == NULL)
3162 error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
3165 sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
3167 if (sc->hn_rx_ring_inuse > 1) {
3168 error = hn_rss_reconfig(sc);
3170 /* Not RSS capable, at least for now; just save the RSS key. */
3179 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS)
3181 struct hn_softc *sc = arg1;
3186 error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
3187 if (error || req->newptr == NULL)
3191 * Don't allow RSS indirect table change, if this interface is not
3192 * RSS capable currently.
3194 if (sc->hn_rx_ring_inuse == 1) {
3199 error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
3202 sc->hn_flags |= HN_FLAG_HAS_RSSIND;
3204 hn_rss_ind_fixup(sc);
3205 error = hn_rss_reconfig(sc);
3212 hn_rss_hash_sysctl(SYSCTL_HANDLER_ARGS)
3214 struct hn_softc *sc = arg1;
3219 hash = sc->hn_rss_hash;
3221 snprintf(hash_str, sizeof(hash_str), "%b", hash, NDIS_HASH_BITS);
3222 return sysctl_handle_string(oidp, hash_str, sizeof(hash_str), req);
3226 hn_vf_sysctl(SYSCTL_HANDLER_ARGS)
3228 struct hn_softc *sc = arg1;
3234 vf = sc->hn_rx_ring[0].hn_vf;
3236 snprintf(vf_name, sizeof(vf_name), "%s", if_name(vf));
3238 return sysctl_handle_string(oidp, vf_name, sizeof(vf_name), req);
3242 hn_check_iplen(const struct mbuf *m, int hoff)
3244 const struct ip *ip;
3245 int len, iphlen, iplen;
3246 const struct tcphdr *th;
3247 int thoff; /* TCP data offset */
3249 len = hoff + sizeof(struct ip);
3251 /* The packet must be at least the size of an IP header. */
3252 if (m->m_pkthdr.len < len)
3253 return IPPROTO_DONE;
3255 /* The fixed IP header must reside completely in the first mbuf. */
3257 return IPPROTO_DONE;
3259 ip = mtodo(m, hoff);
3261 /* Bound check the packet's stated IP header length. */
3262 iphlen = ip->ip_hl << 2;
3263 if (iphlen < sizeof(struct ip)) /* minimum header length */
3264 return IPPROTO_DONE;
3266 /* The full IP header must reside completely in the one mbuf. */
3267 if (m->m_len < hoff + iphlen)
3268 return IPPROTO_DONE;
3270 iplen = ntohs(ip->ip_len);
3273 * Check that the amount of data in the buffers is as
3274 * at least much as the IP header would have us expect.
3276 if (m->m_pkthdr.len < hoff + iplen)
3277 return IPPROTO_DONE;
3280 * Ignore IP fragments.
3282 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF))
3283 return IPPROTO_DONE;
3286 * The TCP/IP or UDP/IP header must be entirely contained within
3287 * the first fragment of a packet.
3291 if (iplen < iphlen + sizeof(struct tcphdr))
3292 return IPPROTO_DONE;
3293 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr))
3294 return IPPROTO_DONE;
3295 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen);
3296 thoff = th->th_off << 2;
3297 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen)
3298 return IPPROTO_DONE;
3299 if (m->m_len < hoff + iphlen + thoff)
3300 return IPPROTO_DONE;
3303 if (iplen < iphlen + sizeof(struct udphdr))
3304 return IPPROTO_DONE;
3305 if (m->m_len < hoff + iphlen + sizeof(struct udphdr))
3306 return IPPROTO_DONE;
3310 return IPPROTO_DONE;
3317 hn_create_rx_data(struct hn_softc *sc, int ring_cnt)
3319 struct sysctl_oid_list *child;
3320 struct sysctl_ctx_list *ctx;
3321 device_t dev = sc->hn_dev;
3322 #if defined(INET) || defined(INET6)
3323 #if __FreeBSD_version >= 1100095
3330 * Create RXBUF for reception.
3333 * - It is shared by all channels.
3334 * - A large enough buffer is allocated, certain version of NVSes
3335 * may further limit the usable space.
3337 sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
3338 PAGE_SIZE, 0, HN_RXBUF_SIZE, &sc->hn_rxbuf_dma,
3339 BUS_DMA_WAITOK | BUS_DMA_ZERO);
3340 if (sc->hn_rxbuf == NULL) {
3341 device_printf(sc->hn_dev, "allocate rxbuf failed\n");
3345 sc->hn_rx_ring_cnt = ring_cnt;
3346 sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt;
3348 sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt,
3349 M_DEVBUF, M_WAITOK | M_ZERO);
3351 #if defined(INET) || defined(INET6)
3352 #if __FreeBSD_version >= 1100095
3353 lroent_cnt = hn_lro_entry_count;
3354 if (lroent_cnt < TCP_LRO_ENTRIES)
3355 lroent_cnt = TCP_LRO_ENTRIES;
3357 device_printf(dev, "LRO: entry count %d\n", lroent_cnt);
3359 #endif /* INET || INET6 */
3361 ctx = device_get_sysctl_ctx(dev);
3362 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
3364 /* Create dev.hn.UNIT.rx sysctl tree */
3365 sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx",
3366 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3368 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
3369 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
3371 rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
3372 PAGE_SIZE, 0, HN_TXBR_SIZE + HN_RXBR_SIZE,
3373 &rxr->hn_br_dma, BUS_DMA_WAITOK);
3374 if (rxr->hn_br == NULL) {
3375 device_printf(dev, "allocate bufring failed\n");
3379 if (hn_trust_hosttcp)
3380 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP;
3381 if (hn_trust_hostudp)
3382 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP;
3383 if (hn_trust_hostip)
3384 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP;
3385 rxr->hn_ifp = sc->hn_ifp;
3386 if (i < sc->hn_tx_ring_cnt)
3387 rxr->hn_txr = &sc->hn_tx_ring[i];
3388 rxr->hn_pktbuf_len = HN_PKTBUF_LEN_DEF;
3389 rxr->hn_pktbuf = malloc(rxr->hn_pktbuf_len, M_DEVBUF, M_WAITOK);
3391 rxr->hn_rxbuf = sc->hn_rxbuf;
3396 #if defined(INET) || defined(INET6)
3397 #if __FreeBSD_version >= 1100095
3398 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt,
3399 hn_lro_mbufq_depth);
3401 tcp_lro_init(&rxr->hn_lro);
3402 rxr->hn_lro.ifp = sc->hn_ifp;
3404 #if __FreeBSD_version >= 1100099
3405 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF;
3406 rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
3408 #endif /* INET || INET6 */
3410 if (sc->hn_rx_sysctl_tree != NULL) {
3414 * Create per RX ring sysctl tree:
3415 * dev.hn.UNIT.rx.RINGID
3417 snprintf(name, sizeof(name), "%d", i);
3418 rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx,
3419 SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree),
3420 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3422 if (rxr->hn_rx_sysctl_tree != NULL) {
3423 SYSCTL_ADD_ULONG(ctx,
3424 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
3425 OID_AUTO, "packets", CTLFLAG_RW,
3426 &rxr->hn_pkts, "# of packets received");
3427 SYSCTL_ADD_ULONG(ctx,
3428 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
3429 OID_AUTO, "rss_pkts", CTLFLAG_RW,
3431 "# of packets w/ RSS info received");
3433 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
3434 OID_AUTO, "pktbuf_len", CTLFLAG_RD,
3435 &rxr->hn_pktbuf_len, 0,
3436 "Temporary channel packet buffer length");
3441 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
3442 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3443 __offsetof(struct hn_rx_ring, hn_lro.lro_queued),
3444 #if __FreeBSD_version < 1100095
3445 hn_rx_stat_int_sysctl,
3447 hn_rx_stat_u64_sysctl,
3449 "LU", "LRO queued");
3450 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed",
3451 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3452 __offsetof(struct hn_rx_ring, hn_lro.lro_flushed),
3453 #if __FreeBSD_version < 1100095
3454 hn_rx_stat_int_sysctl,
3456 hn_rx_stat_u64_sysctl,
3458 "LU", "LRO flushed");
3459 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried",
3460 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3461 __offsetof(struct hn_rx_ring, hn_lro_tried),
3462 hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries");
3463 #if __FreeBSD_version >= 1100099
3464 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim",
3465 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
3466 hn_lro_lenlim_sysctl, "IU",
3467 "Max # of data bytes to be aggregated by LRO");
3468 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim",
3469 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
3470 hn_lro_ackcnt_sysctl, "I",
3471 "Max # of ACKs to be aggregated by LRO");
3473 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp",
3474 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP,
3475 hn_trust_hcsum_sysctl, "I",
3476 "Trust tcp segement verification on host side, "
3477 "when csum info is missing");
3478 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp",
3479 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP,
3480 hn_trust_hcsum_sysctl, "I",
3481 "Trust udp datagram verification on host side, "
3482 "when csum info is missing");
3483 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip",
3484 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP,
3485 hn_trust_hcsum_sysctl, "I",
3486 "Trust ip packet verification on host side, "
3487 "when csum info is missing");
3488 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip",
3489 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3490 __offsetof(struct hn_rx_ring, hn_csum_ip),
3491 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP");
3492 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp",
3493 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3494 __offsetof(struct hn_rx_ring, hn_csum_tcp),
3495 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP");
3496 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp",
3497 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3498 __offsetof(struct hn_rx_ring, hn_csum_udp),
3499 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP");
3500 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted",
3501 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3502 __offsetof(struct hn_rx_ring, hn_csum_trusted),
3503 hn_rx_stat_ulong_sysctl, "LU",
3504 "# of packets that we trust host's csum verification");
3505 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts",
3506 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3507 __offsetof(struct hn_rx_ring, hn_small_pkts),
3508 hn_rx_stat_ulong_sysctl, "LU", "# of small packets received");
3509 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_ack_failed",
3510 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3511 __offsetof(struct hn_rx_ring, hn_ack_failed),
3512 hn_rx_stat_ulong_sysctl, "LU", "# of RXBUF ack failures");
3513 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt",
3514 CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings");
3515 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse",
3516 CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings");
3522 hn_destroy_rx_data(struct hn_softc *sc)
3526 if (sc->hn_rxbuf != NULL) {
3527 if ((sc->hn_flags & HN_FLAG_RXBUF_REF) == 0)
3528 hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf);
3530 device_printf(sc->hn_dev, "RXBUF is referenced\n");
3531 sc->hn_rxbuf = NULL;
3534 if (sc->hn_rx_ring_cnt == 0)
3537 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
3538 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
3540 if (rxr->hn_br == NULL)
3542 if ((rxr->hn_rx_flags & HN_RX_FLAG_BR_REF) == 0) {
3543 hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br);
3545 device_printf(sc->hn_dev,
3546 "%dth channel bufring is referenced", i);
3550 #if defined(INET) || defined(INET6)
3551 tcp_lro_free(&rxr->hn_lro);
3553 free(rxr->hn_pktbuf, M_DEVBUF);
3555 free(sc->hn_rx_ring, M_DEVBUF);
3556 sc->hn_rx_ring = NULL;
3558 sc->hn_rx_ring_cnt = 0;
3559 sc->hn_rx_ring_inuse = 0;
3563 hn_tx_ring_create(struct hn_softc *sc, int id)
3565 struct hn_tx_ring *txr = &sc->hn_tx_ring[id];
3566 device_t dev = sc->hn_dev;
3567 bus_dma_tag_t parent_dtag;
3571 txr->hn_tx_idx = id;
3573 #ifndef HN_USE_TXDESC_BUFRING
3574 mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
3576 mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF);
3578 txr->hn_txdesc_cnt = HN_TX_DESC_CNT;
3579 txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt,
3580 M_DEVBUF, M_WAITOK | M_ZERO);
3581 #ifndef HN_USE_TXDESC_BUFRING
3582 SLIST_INIT(&txr->hn_txlist);
3584 txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_DEVBUF,
3585 M_WAITOK, &txr->hn_tx_lock);
3588 if (hn_tx_taskq_mode == HN_TX_TASKQ_M_EVTTQ) {
3589 txr->hn_tx_taskq = VMBUS_GET_EVENT_TASKQ(
3590 device_get_parent(dev), dev, HN_RING_IDX2CPU(sc, id));
3592 txr->hn_tx_taskq = sc->hn_tx_taskqs[id % hn_tx_taskq_cnt];
3595 #ifdef HN_IFSTART_SUPPORT
3596 if (hn_use_if_start) {
3597 txr->hn_txeof = hn_start_txeof;
3598 TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr);
3599 TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr);
3605 txr->hn_txeof = hn_xmit_txeof;
3606 TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr);
3607 TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr);
3609 br_depth = hn_get_txswq_depth(txr);
3610 txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_DEVBUF,
3611 M_WAITOK, &txr->hn_tx_lock);
3614 txr->hn_direct_tx_size = hn_direct_tx_size;
3617 * Always schedule transmission instead of trying to do direct
3618 * transmission. This one gives the best performance so far.
3620 txr->hn_sched_tx = 1;
3622 parent_dtag = bus_get_dma_tag(dev);
3624 /* DMA tag for RNDIS packet messages. */
3625 error = bus_dma_tag_create(parent_dtag, /* parent */
3626 HN_RNDIS_PKT_ALIGN, /* alignment */
3627 HN_RNDIS_PKT_BOUNDARY, /* boundary */
3628 BUS_SPACE_MAXADDR, /* lowaddr */
3629 BUS_SPACE_MAXADDR, /* highaddr */
3630 NULL, NULL, /* filter, filterarg */
3631 HN_RNDIS_PKT_LEN, /* maxsize */
3633 HN_RNDIS_PKT_LEN, /* maxsegsize */
3635 NULL, /* lockfunc */
3636 NULL, /* lockfuncarg */
3637 &txr->hn_tx_rndis_dtag);
3639 device_printf(dev, "failed to create rndis dmatag\n");
3643 /* DMA tag for data. */
3644 error = bus_dma_tag_create(parent_dtag, /* parent */
3646 HN_TX_DATA_BOUNDARY, /* boundary */
3647 BUS_SPACE_MAXADDR, /* lowaddr */
3648 BUS_SPACE_MAXADDR, /* highaddr */
3649 NULL, NULL, /* filter, filterarg */
3650 HN_TX_DATA_MAXSIZE, /* maxsize */
3651 HN_TX_DATA_SEGCNT_MAX, /* nsegments */
3652 HN_TX_DATA_SEGSIZE, /* maxsegsize */
3654 NULL, /* lockfunc */
3655 NULL, /* lockfuncarg */
3656 &txr->hn_tx_data_dtag);
3658 device_printf(dev, "failed to create data dmatag\n");
3662 for (i = 0; i < txr->hn_txdesc_cnt; ++i) {
3663 struct hn_txdesc *txd = &txr->hn_txdesc[i];
3666 txd->chim_index = HN_NVS_CHIM_IDX_INVALID;
3667 STAILQ_INIT(&txd->agg_list);
3670 * Allocate and load RNDIS packet message.
3672 error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag,
3673 (void **)&txd->rndis_pkt,
3674 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
3675 &txd->rndis_pkt_dmap);
3678 "failed to allocate rndis_packet_msg, %d\n", i);
3682 error = bus_dmamap_load(txr->hn_tx_rndis_dtag,
3683 txd->rndis_pkt_dmap,
3684 txd->rndis_pkt, HN_RNDIS_PKT_LEN,
3685 hyperv_dma_map_paddr, &txd->rndis_pkt_paddr,
3689 "failed to load rndis_packet_msg, %d\n", i);
3690 bus_dmamem_free(txr->hn_tx_rndis_dtag,
3691 txd->rndis_pkt, txd->rndis_pkt_dmap);
3695 /* DMA map for TX data. */
3696 error = bus_dmamap_create(txr->hn_tx_data_dtag, 0,
3700 "failed to allocate tx data dmamap\n");
3701 bus_dmamap_unload(txr->hn_tx_rndis_dtag,
3702 txd->rndis_pkt_dmap);
3703 bus_dmamem_free(txr->hn_tx_rndis_dtag,
3704 txd->rndis_pkt, txd->rndis_pkt_dmap);
3708 /* All set, put it to list */
3709 txd->flags |= HN_TXD_FLAG_ONLIST;
3710 #ifndef HN_USE_TXDESC_BUFRING
3711 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
3713 buf_ring_enqueue(txr->hn_txdesc_br, txd);
3716 txr->hn_txdesc_avail = txr->hn_txdesc_cnt;
3718 if (sc->hn_tx_sysctl_tree != NULL) {
3719 struct sysctl_oid_list *child;
3720 struct sysctl_ctx_list *ctx;
3724 * Create per TX ring sysctl tree:
3725 * dev.hn.UNIT.tx.RINGID
3727 ctx = device_get_sysctl_ctx(dev);
3728 child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree);
3730 snprintf(name, sizeof(name), "%d", id);
3731 txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
3732 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3734 if (txr->hn_tx_sysctl_tree != NULL) {
3735 child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree);
3738 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
3739 CTLFLAG_RD, &txr->hn_txdesc_avail, 0,
3740 "# of available TX descs");
3742 #ifdef HN_IFSTART_SUPPORT
3743 if (!hn_use_if_start)
3746 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive",
3747 CTLFLAG_RD, &txr->hn_oactive, 0,
3750 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets",
3751 CTLFLAG_RW, &txr->hn_pkts,
3752 "# of packets transmitted");
3753 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "sends",
3754 CTLFLAG_RW, &txr->hn_sends, "# of sends");
3762 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd)
3764 struct hn_tx_ring *txr = txd->txr;
3766 KASSERT(txd->m == NULL, ("still has mbuf installed"));
3767 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped"));
3769 bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap);
3770 bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt,
3771 txd->rndis_pkt_dmap);
3772 bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap);
3776 hn_txdesc_gc(struct hn_tx_ring *txr, struct hn_txdesc *txd)
3779 KASSERT(txd->refs == 0 || txd->refs == 1,
3780 ("invalid txd refs %d", txd->refs));
3782 /* Aggregated txds will be freed by their aggregating txd. */
3783 if (txd->refs > 0 && (txd->flags & HN_TXD_FLAG_ONAGG) == 0) {
3786 freed = hn_txdesc_put(txr, txd);
3787 KASSERT(freed, ("can't free txdesc"));
3792 hn_tx_ring_destroy(struct hn_tx_ring *txr)
3796 if (txr->hn_txdesc == NULL)
3801 * Because the freeing of aggregated txds will be deferred
3802 * to the aggregating txd, two passes are used here:
3803 * - The first pass GCes any pending txds. This GC is necessary,
3804 * since if the channels are revoked, hypervisor will not
3805 * deliver send-done for all pending txds.
3806 * - The second pass frees the busdma stuffs, i.e. after all txds
3809 for (i = 0; i < txr->hn_txdesc_cnt; ++i)
3810 hn_txdesc_gc(txr, &txr->hn_txdesc[i]);
3811 for (i = 0; i < txr->hn_txdesc_cnt; ++i)
3812 hn_txdesc_dmamap_destroy(&txr->hn_txdesc[i]);
3814 if (txr->hn_tx_data_dtag != NULL)
3815 bus_dma_tag_destroy(txr->hn_tx_data_dtag);
3816 if (txr->hn_tx_rndis_dtag != NULL)
3817 bus_dma_tag_destroy(txr->hn_tx_rndis_dtag);
3819 #ifdef HN_USE_TXDESC_BUFRING
3820 buf_ring_free(txr->hn_txdesc_br, M_DEVBUF);
3823 free(txr->hn_txdesc, M_DEVBUF);
3824 txr->hn_txdesc = NULL;
3826 if (txr->hn_mbuf_br != NULL)
3827 buf_ring_free(txr->hn_mbuf_br, M_DEVBUF);
3829 #ifndef HN_USE_TXDESC_BUFRING
3830 mtx_destroy(&txr->hn_txlist_spin);
3832 mtx_destroy(&txr->hn_tx_lock);
3836 hn_create_tx_data(struct hn_softc *sc, int ring_cnt)
3838 struct sysctl_oid_list *child;
3839 struct sysctl_ctx_list *ctx;
3843 * Create TXBUF for chimney sending.
3845 * NOTE: It is shared by all channels.
3847 sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev),
3848 PAGE_SIZE, 0, HN_CHIM_SIZE, &sc->hn_chim_dma,
3849 BUS_DMA_WAITOK | BUS_DMA_ZERO);
3850 if (sc->hn_chim == NULL) {
3851 device_printf(sc->hn_dev, "allocate txbuf failed\n");
3855 sc->hn_tx_ring_cnt = ring_cnt;
3856 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
3858 sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt,
3859 M_DEVBUF, M_WAITOK | M_ZERO);
3861 ctx = device_get_sysctl_ctx(sc->hn_dev);
3862 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev));
3864 /* Create dev.hn.UNIT.tx sysctl tree */
3865 sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx",
3866 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
3868 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
3871 error = hn_tx_ring_create(sc, i);
3876 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs",
3877 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3878 __offsetof(struct hn_tx_ring, hn_no_txdescs),
3879 hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs");
3880 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed",
3881 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3882 __offsetof(struct hn_tx_ring, hn_send_failed),
3883 hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure");
3884 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed",
3885 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3886 __offsetof(struct hn_tx_ring, hn_txdma_failed),
3887 hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
3888 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_flush_failed",
3889 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3890 __offsetof(struct hn_tx_ring, hn_flush_failed),
3891 hn_tx_stat_ulong_sysctl, "LU",
3892 "# of packet transmission aggregation flush failure");
3893 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
3894 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3895 __offsetof(struct hn_tx_ring, hn_tx_collapsed),
3896 hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed");
3897 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney",
3898 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3899 __offsetof(struct hn_tx_ring, hn_tx_chimney),
3900 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send");
3901 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried",
3902 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3903 __offsetof(struct hn_tx_ring, hn_tx_chimney_tried),
3904 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries");
3905 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
3906 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0,
3907 "# of total TX descs");
3908 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
3909 CTLFLAG_RD, &sc->hn_chim_szmax, 0,
3910 "Chimney send packet size upper boundary");
3911 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
3912 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
3913 hn_chim_size_sysctl, "I", "Chimney send packet size limit");
3914 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size",
3915 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3916 __offsetof(struct hn_tx_ring, hn_direct_tx_size),
3917 hn_tx_conf_int_sysctl, "I",
3918 "Size of the packet for direct transmission");
3919 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx",
3920 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
3921 __offsetof(struct hn_tx_ring, hn_sched_tx),
3922 hn_tx_conf_int_sysctl, "I",
3923 "Always schedule transmission "
3924 "instead of doing direct transmission");
3925 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt",
3926 CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings");
3927 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse",
3928 CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings");
3929 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "agg_szmax",
3930 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_agg_szmax, 0,
3931 "Applied packet transmission aggregation size");
3932 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_pktmax",
3933 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
3934 hn_txagg_pktmax_sysctl, "I",
3935 "Applied packet transmission aggregation packets");
3936 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "agg_align",
3937 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
3938 hn_txagg_align_sysctl, "I",
3939 "Applied packet transmission aggregation alignment");
3945 hn_set_chim_size(struct hn_softc *sc, int chim_size)
3949 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
3950 sc->hn_tx_ring[i].hn_chim_size = chim_size;
3954 hn_set_tso_maxsize(struct hn_softc *sc, int tso_maxlen, int mtu)
3956 struct ifnet *ifp = sc->hn_ifp;
3959 if ((ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) == 0)
3962 KASSERT(sc->hn_ndis_tso_sgmin >= 2,
3963 ("invalid NDIS tso sgmin %d", sc->hn_ndis_tso_sgmin));
3964 tso_minlen = sc->hn_ndis_tso_sgmin * mtu;
3966 KASSERT(sc->hn_ndis_tso_szmax >= tso_minlen &&
3967 sc->hn_ndis_tso_szmax <= IP_MAXPACKET,
3968 ("invalid NDIS tso szmax %d", sc->hn_ndis_tso_szmax));
3970 if (tso_maxlen < tso_minlen)
3971 tso_maxlen = tso_minlen;
3972 else if (tso_maxlen > IP_MAXPACKET)
3973 tso_maxlen = IP_MAXPACKET;
3974 if (tso_maxlen > sc->hn_ndis_tso_szmax)
3975 tso_maxlen = sc->hn_ndis_tso_szmax;
3976 ifp->if_hw_tsomax = tso_maxlen - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
3978 if_printf(ifp, "TSO size max %u\n", ifp->if_hw_tsomax);
3982 hn_fixup_tx_data(struct hn_softc *sc)
3984 uint64_t csum_assist;
3987 hn_set_chim_size(sc, sc->hn_chim_szmax);
3988 if (hn_tx_chimney_size > 0 &&
3989 hn_tx_chimney_size < sc->hn_chim_szmax)
3990 hn_set_chim_size(sc, hn_tx_chimney_size);
3993 if (sc->hn_caps & HN_CAP_IPCS)
3994 csum_assist |= CSUM_IP;
3995 if (sc->hn_caps & HN_CAP_TCP4CS)
3996 csum_assist |= CSUM_IP_TCP;
3997 if (sc->hn_caps & HN_CAP_UDP4CS)
3998 csum_assist |= CSUM_IP_UDP;
3999 if (sc->hn_caps & HN_CAP_TCP6CS)
4000 csum_assist |= CSUM_IP6_TCP;
4001 if (sc->hn_caps & HN_CAP_UDP6CS)
4002 csum_assist |= CSUM_IP6_UDP;
4003 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
4004 sc->hn_tx_ring[i].hn_csum_assist = csum_assist;
4006 if (sc->hn_caps & HN_CAP_HASHVAL) {
4008 * Support HASHVAL pktinfo on TX path.
4011 if_printf(sc->hn_ifp, "support HASHVAL pktinfo\n");
4012 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
4013 sc->hn_tx_ring[i].hn_tx_flags |= HN_TX_FLAG_HASHVAL;
4018 hn_destroy_tx_data(struct hn_softc *sc)
4022 if (sc->hn_chim != NULL) {
4023 if ((sc->hn_flags & HN_FLAG_CHIM_REF) == 0) {
4024 hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim);
4026 device_printf(sc->hn_dev,
4027 "chimney sending buffer is referenced");
4032 if (sc->hn_tx_ring_cnt == 0)
4035 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
4036 hn_tx_ring_destroy(&sc->hn_tx_ring[i]);
4038 free(sc->hn_tx_ring, M_DEVBUF);
4039 sc->hn_tx_ring = NULL;
4041 sc->hn_tx_ring_cnt = 0;
4042 sc->hn_tx_ring_inuse = 0;
4045 #ifdef HN_IFSTART_SUPPORT
4048 hn_start_taskfunc(void *xtxr, int pending __unused)
4050 struct hn_tx_ring *txr = xtxr;
4052 mtx_lock(&txr->hn_tx_lock);
4053 hn_start_locked(txr, 0);
4054 mtx_unlock(&txr->hn_tx_lock);
4058 hn_start_locked(struct hn_tx_ring *txr, int len)
4060 struct hn_softc *sc = txr->hn_sc;
4061 struct ifnet *ifp = sc->hn_ifp;
4064 KASSERT(hn_use_if_start,
4065 ("hn_start_locked is called, when if_start is disabled"));
4066 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
4067 mtx_assert(&txr->hn_tx_lock, MA_OWNED);
4068 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
4070 if (__predict_false(txr->hn_suspended))
4073 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4077 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
4078 struct hn_txdesc *txd;
4079 struct mbuf *m_head;
4082 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
4086 if (len > 0 && m_head->m_pkthdr.len > len) {
4088 * This sending could be time consuming; let callers
4089 * dispatch this packet sending (and sending of any
4090 * following up packets) to tx taskqueue.
4092 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
4097 #if defined(INET6) || defined(INET)
4098 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
4099 m_head = hn_tso_fixup(m_head);
4100 if (__predict_false(m_head == NULL)) {
4101 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
4107 txd = hn_txdesc_get(txr);
4109 txr->hn_no_txdescs++;
4110 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
4111 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
4115 error = hn_encap(ifp, txr, txd, &m_head);
4117 /* Both txd and m_head are freed */
4118 KASSERT(txr->hn_agg_txd == NULL,
4119 ("encap failed w/ pending aggregating txdesc"));
4123 if (txr->hn_agg_pktleft == 0) {
4124 if (txr->hn_agg_txd != NULL) {
4125 KASSERT(m_head == NULL,
4126 ("pending mbuf for aggregating txdesc"));
4127 error = hn_flush_txagg(ifp, txr);
4128 if (__predict_false(error)) {
4129 atomic_set_int(&ifp->if_drv_flags,
4134 KASSERT(m_head != NULL, ("mbuf was freed"));
4135 error = hn_txpkt(ifp, txr, txd);
4136 if (__predict_false(error)) {
4137 /* txd is freed, but m_head is not */
4138 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
4139 atomic_set_int(&ifp->if_drv_flags,
4147 KASSERT(txr->hn_agg_txd != NULL,
4148 ("no aggregating txdesc"));
4149 KASSERT(m_head == NULL,
4150 ("pending mbuf for aggregating txdesc"));
4155 /* Flush pending aggerated transmission. */
4156 if (txr->hn_agg_txd != NULL)
4157 hn_flush_txagg(ifp, txr);
4162 hn_start(struct ifnet *ifp)
4164 struct hn_softc *sc = ifp->if_softc;
4165 struct hn_tx_ring *txr = &sc->hn_tx_ring[0];
4167 if (txr->hn_sched_tx)
4170 if (mtx_trylock(&txr->hn_tx_lock)) {
4173 sched = hn_start_locked(txr, txr->hn_direct_tx_size);
4174 mtx_unlock(&txr->hn_tx_lock);
4179 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
4183 hn_start_txeof_taskfunc(void *xtxr, int pending __unused)
4185 struct hn_tx_ring *txr = xtxr;
4187 mtx_lock(&txr->hn_tx_lock);
4188 atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE);
4189 hn_start_locked(txr, 0);
4190 mtx_unlock(&txr->hn_tx_lock);
4194 hn_start_txeof(struct hn_tx_ring *txr)
4196 struct hn_softc *sc = txr->hn_sc;
4197 struct ifnet *ifp = sc->hn_ifp;
4199 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
4201 if (txr->hn_sched_tx)
4204 if (mtx_trylock(&txr->hn_tx_lock)) {
4207 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
4208 sched = hn_start_locked(txr, txr->hn_direct_tx_size);
4209 mtx_unlock(&txr->hn_tx_lock);
4211 taskqueue_enqueue(txr->hn_tx_taskq,
4217 * Release the OACTIVE earlier, with the hope, that
4218 * others could catch up. The task will clear the
4219 * flag again with the hn_tx_lock to avoid possible
4222 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
4223 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
4227 #endif /* HN_IFSTART_SUPPORT */
4230 hn_xmit(struct hn_tx_ring *txr, int len)
4232 struct hn_softc *sc = txr->hn_sc;
4233 struct ifnet *ifp = sc->hn_ifp;
4234 struct mbuf *m_head;
4237 mtx_assert(&txr->hn_tx_lock, MA_OWNED);
4238 #ifdef HN_IFSTART_SUPPORT
4239 KASSERT(hn_use_if_start == 0,
4240 ("hn_xmit is called, when if_start is enabled"));
4242 KASSERT(txr->hn_agg_txd == NULL, ("lingering aggregating txdesc"));
4244 if (__predict_false(txr->hn_suspended))
4247 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive)
4250 while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) {
4251 struct hn_txdesc *txd;
4254 if (len > 0 && m_head->m_pkthdr.len > len) {
4256 * This sending could be time consuming; let callers
4257 * dispatch this packet sending (and sending of any
4258 * following up packets) to tx taskqueue.
4260 drbr_putback(ifp, txr->hn_mbuf_br, m_head);
4265 txd = hn_txdesc_get(txr);
4267 txr->hn_no_txdescs++;
4268 drbr_putback(ifp, txr->hn_mbuf_br, m_head);
4269 txr->hn_oactive = 1;
4273 error = hn_encap(ifp, txr, txd, &m_head);
4275 /* Both txd and m_head are freed; discard */
4276 KASSERT(txr->hn_agg_txd == NULL,
4277 ("encap failed w/ pending aggregating txdesc"));
4278 drbr_advance(ifp, txr->hn_mbuf_br);
4282 if (txr->hn_agg_pktleft == 0) {
4283 if (txr->hn_agg_txd != NULL) {
4284 KASSERT(m_head == NULL,
4285 ("pending mbuf for aggregating txdesc"));
4286 error = hn_flush_txagg(ifp, txr);
4287 if (__predict_false(error)) {
4288 txr->hn_oactive = 1;
4292 KASSERT(m_head != NULL, ("mbuf was freed"));
4293 error = hn_txpkt(ifp, txr, txd);
4294 if (__predict_false(error)) {
4295 /* txd is freed, but m_head is not */
4296 drbr_putback(ifp, txr->hn_mbuf_br,
4298 txr->hn_oactive = 1;
4305 KASSERT(txr->hn_agg_txd != NULL,
4306 ("no aggregating txdesc"));
4307 KASSERT(m_head == NULL,
4308 ("pending mbuf for aggregating txdesc"));
4313 drbr_advance(ifp, txr->hn_mbuf_br);
4316 /* Flush pending aggerated transmission. */
4317 if (txr->hn_agg_txd != NULL)
4318 hn_flush_txagg(ifp, txr);
4323 hn_transmit(struct ifnet *ifp, struct mbuf *m)
4325 struct hn_softc *sc = ifp->if_softc;
4326 struct hn_tx_ring *txr;
4329 #if defined(INET6) || defined(INET)
4331 * Perform TSO packet header fixup now, since the TSO
4332 * packet header should be cache-hot.
4334 if (m->m_pkthdr.csum_flags & CSUM_TSO) {
4335 m = hn_tso_fixup(m);
4336 if (__predict_false(m == NULL)) {
4337 if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
4344 * Select the TX ring based on flowid
4346 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
4347 idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse;
4348 txr = &sc->hn_tx_ring[idx];
4350 error = drbr_enqueue(ifp, txr->hn_mbuf_br, m);
4352 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
4356 if (txr->hn_oactive)
4359 if (txr->hn_sched_tx)
4362 if (mtx_trylock(&txr->hn_tx_lock)) {
4365 sched = hn_xmit(txr, txr->hn_direct_tx_size);
4366 mtx_unlock(&txr->hn_tx_lock);
4371 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
4376 hn_tx_ring_qflush(struct hn_tx_ring *txr)
4380 mtx_lock(&txr->hn_tx_lock);
4381 while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL)
4383 mtx_unlock(&txr->hn_tx_lock);
4387 hn_xmit_qflush(struct ifnet *ifp)
4389 struct hn_softc *sc = ifp->if_softc;
4392 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
4393 hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
4398 hn_xmit_txeof(struct hn_tx_ring *txr)
4401 if (txr->hn_sched_tx)
4404 if (mtx_trylock(&txr->hn_tx_lock)) {
4407 txr->hn_oactive = 0;
4408 sched = hn_xmit(txr, txr->hn_direct_tx_size);
4409 mtx_unlock(&txr->hn_tx_lock);
4411 taskqueue_enqueue(txr->hn_tx_taskq,
4417 * Release the oactive earlier, with the hope, that
4418 * others could catch up. The task will clear the
4419 * oactive again with the hn_tx_lock to avoid possible
4422 txr->hn_oactive = 0;
4423 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
4428 hn_xmit_taskfunc(void *xtxr, int pending __unused)
4430 struct hn_tx_ring *txr = xtxr;
4432 mtx_lock(&txr->hn_tx_lock);
4434 mtx_unlock(&txr->hn_tx_lock);
4438 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused)
4440 struct hn_tx_ring *txr = xtxr;
4442 mtx_lock(&txr->hn_tx_lock);
4443 txr->hn_oactive = 0;
4445 mtx_unlock(&txr->hn_tx_lock);
4449 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan)
4451 struct vmbus_chan_br cbr;
4452 struct hn_rx_ring *rxr;
4453 struct hn_tx_ring *txr = NULL;
4456 idx = vmbus_chan_subidx(chan);
4459 * Link this channel to RX/TX ring.
4461 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
4462 ("invalid channel index %d, should > 0 && < %d",
4463 idx, sc->hn_rx_ring_inuse));
4464 rxr = &sc->hn_rx_ring[idx];
4465 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0,
4466 ("RX ring %d already attached", idx));
4467 rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED;
4468 rxr->hn_chan = chan;
4471 if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n",
4472 idx, vmbus_chan_id(chan));
4475 if (idx < sc->hn_tx_ring_inuse) {
4476 txr = &sc->hn_tx_ring[idx];
4477 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0,
4478 ("TX ring %d already attached", idx));
4479 txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED;
4481 txr->hn_chan = chan;
4483 if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n",
4484 idx, vmbus_chan_id(chan));
4488 /* Bind this channel to a proper CPU. */
4489 vmbus_chan_cpu_set(chan, HN_RING_IDX2CPU(sc, idx));
4494 cbr.cbr = rxr->hn_br;
4495 cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr;
4496 cbr.cbr_txsz = HN_TXBR_SIZE;
4497 cbr.cbr_rxsz = HN_RXBR_SIZE;
4498 error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr);
4500 if (error == EISCONN) {
4501 if_printf(sc->hn_ifp, "bufring is connected after "
4502 "chan%u open failure\n", vmbus_chan_id(chan));
4503 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF;
4505 if_printf(sc->hn_ifp, "open chan%u failed: %d\n",
4506 vmbus_chan_id(chan), error);
4513 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan)
4515 struct hn_rx_ring *rxr;
4518 idx = vmbus_chan_subidx(chan);
4521 * Link this channel to RX/TX ring.
4523 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
4524 ("invalid channel index %d, should > 0 && < %d",
4525 idx, sc->hn_rx_ring_inuse));
4526 rxr = &sc->hn_rx_ring[idx];
4527 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED),
4528 ("RX ring %d is not attached", idx));
4529 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED;
4531 if (idx < sc->hn_tx_ring_inuse) {
4532 struct hn_tx_ring *txr = &sc->hn_tx_ring[idx];
4534 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED),
4535 ("TX ring %d is not attached attached", idx));
4536 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED;
4540 * Close this channel.
4543 * Channel closing does _not_ destroy the target channel.
4545 error = vmbus_chan_close_direct(chan);
4546 if (error == EISCONN) {
4547 if_printf(sc->hn_ifp, "chan%u bufring is connected "
4548 "after being closed\n", vmbus_chan_id(chan));
4549 rxr->hn_rx_flags |= HN_RX_FLAG_BR_REF;
4551 if_printf(sc->hn_ifp, "chan%u close failed: %d\n",
4552 vmbus_chan_id(chan), error);
4557 hn_attach_subchans(struct hn_softc *sc)
4559 struct vmbus_channel **subchans;
4560 int subchan_cnt = sc->hn_rx_ring_inuse - 1;
4563 KASSERT(subchan_cnt > 0, ("no sub-channels"));
4565 /* Attach the sub-channels. */
4566 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
4567 for (i = 0; i < subchan_cnt; ++i) {
4570 error1 = hn_chan_attach(sc, subchans[i]);
4573 /* Move on; all channels will be detached later. */
4576 vmbus_subchan_rel(subchans, subchan_cnt);
4579 if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error);
4582 if_printf(sc->hn_ifp, "%d sub-channels attached\n",
4590 hn_detach_allchans(struct hn_softc *sc)
4592 struct vmbus_channel **subchans;
4593 int subchan_cnt = sc->hn_rx_ring_inuse - 1;
4596 if (subchan_cnt == 0)
4599 /* Detach the sub-channels. */
4600 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
4601 for (i = 0; i < subchan_cnt; ++i)
4602 hn_chan_detach(sc, subchans[i]);
4603 vmbus_subchan_rel(subchans, subchan_cnt);
4607 * Detach the primary channel, _after_ all sub-channels
4610 hn_chan_detach(sc, sc->hn_prichan);
4612 /* Wait for sub-channels to be destroyed, if any. */
4613 vmbus_subchan_drain(sc->hn_prichan);
4616 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
4617 KASSERT((sc->hn_rx_ring[i].hn_rx_flags &
4618 HN_RX_FLAG_ATTACHED) == 0,
4619 ("%dth RX ring is still attached", i));
4621 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
4622 KASSERT((sc->hn_tx_ring[i].hn_tx_flags &
4623 HN_TX_FLAG_ATTACHED) == 0,
4624 ("%dth TX ring is still attached", i));
4630 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch)
4632 struct vmbus_channel **subchans;
4633 int nchan, rxr_cnt, error;
4635 nchan = *nsubch + 1;
4638 * Multiple RX/TX rings are not requested.
4645 * Query RSS capabilities, e.g. # of RX rings, and # of indirect
4648 error = hn_rndis_query_rsscaps(sc, &rxr_cnt);
4650 /* No RSS; this is benign. */
4655 if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n",
4659 if (nchan > rxr_cnt)
4662 if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n");
4668 * Allocate sub-channels from NVS.
4670 *nsubch = nchan - 1;
4671 error = hn_nvs_alloc_subchans(sc, nsubch);
4672 if (error || *nsubch == 0) {
4673 /* Failed to allocate sub-channels. */
4679 * Wait for all sub-channels to become ready before moving on.
4681 subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch);
4682 vmbus_subchan_rel(subchans, *nsubch);
4687 hn_synth_attachable(const struct hn_softc *sc)
4691 if (sc->hn_flags & HN_FLAG_ERRORS)
4694 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
4695 const struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
4697 if (rxr->hn_rx_flags & HN_RX_FLAG_BR_REF)
4704 * Make sure that the RX filter is zero after the successful
4705 * RNDIS initialization.
4708 * Under certain conditions on certain versions of Hyper-V,
4709 * the RNDIS rxfilter is _not_ zero on the hypervisor side
4710 * after the successful RNDIS initialization, which breaks
4711 * the assumption of any following code (well, it breaks the
4712 * RNDIS API contract actually). Clear the RNDIS rxfilter
4713 * explicitly, drain packets sneaking through, and drain the
4714 * interrupt taskqueues scheduled due to the stealth packets.
4717 hn_rndis_init_fixat(struct hn_softc *sc, int nchan)
4721 hn_drain_rxtx(sc, nchan);
4725 hn_synth_attach(struct hn_softc *sc, int mtu)
4727 #define ATTACHED_NVS 0x0002
4728 #define ATTACHED_RNDIS 0x0004
4730 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
4731 int error, nsubch, nchan = 1, i, rndis_inited;
4732 uint32_t old_caps, attached = 0;
4734 KASSERT((sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) == 0,
4735 ("synthetic parts were attached"));
4737 if (!hn_synth_attachable(sc))
4740 /* Save capabilities for later verification. */
4741 old_caps = sc->hn_caps;
4744 /* Clear RSS stuffs. */
4745 sc->hn_rss_ind_size = 0;
4746 sc->hn_rss_hash = 0;
4749 * Attach the primary channel _before_ attaching NVS and RNDIS.
4751 error = hn_chan_attach(sc, sc->hn_prichan);
4758 error = hn_nvs_attach(sc, mtu);
4761 attached |= ATTACHED_NVS;
4764 * Attach RNDIS _after_ NVS is attached.
4766 error = hn_rndis_attach(sc, mtu, &rndis_inited);
4768 attached |= ATTACHED_RNDIS;
4773 * Make sure capabilities are not changed.
4775 if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) {
4776 if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n",
4777 old_caps, sc->hn_caps);
4783 * Allocate sub-channels for multi-TX/RX rings.
4786 * The # of RX rings that can be used is equivalent to the # of
4787 * channels to be requested.
4789 nsubch = sc->hn_rx_ring_cnt - 1;
4790 error = hn_synth_alloc_subchans(sc, &nsubch);
4793 /* NOTE: _Full_ synthetic parts detach is required now. */
4794 sc->hn_flags |= HN_FLAG_SYNTH_ATTACHED;
4797 * Set the # of TX/RX rings that could be used according to
4798 * the # of channels that NVS offered.
4801 hn_set_ring_inuse(sc, nchan);
4803 /* Only the primary channel can be used; done */
4808 * Attach the sub-channels.
4810 * NOTE: hn_set_ring_inuse() _must_ have been called.
4812 error = hn_attach_subchans(sc);
4817 * Configure RSS key and indirect table _after_ all sub-channels
4820 if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) {
4822 * RSS key is not set yet; set it to the default RSS key.
4825 if_printf(sc->hn_ifp, "setup default RSS key\n");
4826 memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key));
4827 sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
4830 if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) {
4832 * RSS indirect table is not set yet; set it up in round-
4836 if_printf(sc->hn_ifp, "setup default RSS indirect "
4839 for (i = 0; i < NDIS_HASH_INDCNT; ++i)
4840 rss->rss_ind[i] = i % nchan;
4841 sc->hn_flags |= HN_FLAG_HAS_RSSIND;
4844 * # of usable channels may be changed, so we have to
4845 * make sure that all entries in RSS indirect table
4848 * NOTE: hn_set_ring_inuse() _must_ have been called.
4850 hn_rss_ind_fixup(sc);
4853 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
4858 * Fixup transmission aggregation setup.
4861 hn_rndis_init_fixat(sc, nchan);
4865 if (sc->hn_flags & HN_FLAG_SYNTH_ATTACHED) {
4866 hn_rndis_init_fixat(sc, nchan);
4867 hn_synth_detach(sc);
4869 if (attached & ATTACHED_RNDIS) {
4870 hn_rndis_init_fixat(sc, nchan);
4871 hn_rndis_detach(sc);
4873 if (attached & ATTACHED_NVS)
4875 hn_chan_detach(sc, sc->hn_prichan);
4876 /* Restore old capabilities. */
4877 sc->hn_caps = old_caps;
4881 #undef ATTACHED_RNDIS
4887 * The interface must have been suspended though hn_suspend(), before
4888 * this function get called.
4891 hn_synth_detach(struct hn_softc *sc)
4894 KASSERT(sc->hn_flags & HN_FLAG_SYNTH_ATTACHED,
4895 ("synthetic parts were not attached"));
4897 /* Detach the RNDIS first. */
4898 hn_rndis_detach(sc);
4903 /* Detach all of the channels. */
4904 hn_detach_allchans(sc);
4906 sc->hn_flags &= ~HN_FLAG_SYNTH_ATTACHED;
4910 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt)
4912 KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt,
4913 ("invalid ring count %d", ring_cnt));
4915 if (sc->hn_tx_ring_cnt > ring_cnt)
4916 sc->hn_tx_ring_inuse = ring_cnt;
4918 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
4919 sc->hn_rx_ring_inuse = ring_cnt;
4922 if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n",
4923 sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse);
4928 hn_chan_drain(struct hn_softc *sc, struct vmbus_channel *chan)
4933 * The TX bufring will not be drained by the hypervisor,
4934 * if the primary channel is revoked.
4936 while (!vmbus_chan_rx_empty(chan) ||
4937 (!vmbus_chan_is_revoked(sc->hn_prichan) &&
4938 !vmbus_chan_tx_empty(chan)))
4940 vmbus_chan_intr_drain(chan);
4944 hn_disable_rx(struct hn_softc *sc)
4948 * Disable RX by clearing RX filter forcefully.
4950 sc->hn_rx_filter = NDIS_PACKET_TYPE_NONE;
4951 hn_rndis_set_rxfilter(sc, sc->hn_rx_filter); /* ignore error */
4954 * Give RNDIS enough time to flush all pending data packets.
4956 pause("waitrx", (200 * hz) / 1000);
4961 * RX/TX _must_ have been suspended/disabled, before this function
4965 hn_drain_rxtx(struct hn_softc *sc, int nchan)
4967 struct vmbus_channel **subch = NULL;
4971 * Drain RX/TX bufrings and interrupts.
4975 subch = vmbus_subchan_get(sc->hn_prichan, nsubch);
4977 if (subch != NULL) {
4980 for (i = 0; i < nsubch; ++i)
4981 hn_chan_drain(sc, subch[i]);
4983 hn_chan_drain(sc, sc->hn_prichan);
4986 vmbus_subchan_rel(subch, nsubch);
4990 hn_suspend_data(struct hn_softc *sc)
4992 struct hn_tx_ring *txr;
5000 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
5001 txr = &sc->hn_tx_ring[i];
5003 mtx_lock(&txr->hn_tx_lock);
5004 txr->hn_suspended = 1;
5005 mtx_unlock(&txr->hn_tx_lock);
5006 /* No one is able send more packets now. */
5009 * Wait for all pending sends to finish.
5012 * We will _not_ receive all pending send-done, if the
5013 * primary channel is revoked.
5015 while (hn_tx_ring_pending(txr) &&
5016 !vmbus_chan_is_revoked(sc->hn_prichan))
5017 pause("hnwtx", 1 /* 1 tick */);
5028 hn_drain_rxtx(sc, sc->hn_rx_ring_inuse);
5031 * Drain any pending TX tasks.
5034 * The above hn_drain_rxtx() can dispatch TX tasks, so the TX
5035 * tasks will have to be drained _after_ the above hn_drain_rxtx().
5037 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
5038 txr = &sc->hn_tx_ring[i];
5040 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task);
5041 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task);
5046 hn_suspend_mgmt_taskfunc(void *xsc, int pending __unused)
5049 ((struct hn_softc *)xsc)->hn_mgmt_taskq = NULL;
5053 hn_suspend_mgmt(struct hn_softc *sc)
5060 * Make sure that hn_mgmt_taskq0 can nolonger be accessed
5061 * through hn_mgmt_taskq.
5063 TASK_INIT(&task, 0, hn_suspend_mgmt_taskfunc, sc);
5064 vmbus_chan_run_task(sc->hn_prichan, &task);
5067 * Make sure that all pending management tasks are completed.
5069 taskqueue_drain(sc->hn_mgmt_taskq0, &sc->hn_netchg_init);
5070 taskqueue_drain_timeout(sc->hn_mgmt_taskq0, &sc->hn_netchg_status);
5071 taskqueue_drain_all(sc->hn_mgmt_taskq0);
5075 hn_suspend(struct hn_softc *sc)
5078 /* Disable polling. */
5081 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
5082 (sc->hn_flags & HN_FLAG_VF))
5083 hn_suspend_data(sc);
5084 hn_suspend_mgmt(sc);
5088 hn_resume_tx(struct hn_softc *sc, int tx_ring_cnt)
5092 KASSERT(tx_ring_cnt <= sc->hn_tx_ring_cnt,
5093 ("invalid TX ring count %d", tx_ring_cnt));
5095 for (i = 0; i < tx_ring_cnt; ++i) {
5096 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
5098 mtx_lock(&txr->hn_tx_lock);
5099 txr->hn_suspended = 0;
5100 mtx_unlock(&txr->hn_tx_lock);
5105 hn_resume_data(struct hn_softc *sc)
5114 hn_rxfilter_config(sc);
5117 * Make sure to clear suspend status on "all" TX rings,
5118 * since hn_tx_ring_inuse can be changed after
5119 * hn_suspend_data().
5121 hn_resume_tx(sc, sc->hn_tx_ring_cnt);
5123 #ifdef HN_IFSTART_SUPPORT
5124 if (!hn_use_if_start)
5128 * Flush unused drbrs, since hn_tx_ring_inuse may be
5131 for (i = sc->hn_tx_ring_inuse; i < sc->hn_tx_ring_cnt; ++i)
5132 hn_tx_ring_qflush(&sc->hn_tx_ring[i]);
5138 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
5139 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
5142 * Use txeof task, so that any pending oactive can be
5145 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
5150 hn_resume_mgmt(struct hn_softc *sc)
5153 sc->hn_mgmt_taskq = sc->hn_mgmt_taskq0;
5156 * Kick off network change detection, if it was pending.
5157 * If no network change was pending, start link status
5158 * checks, which is more lightweight than network change
5161 if (sc->hn_link_flags & HN_LINK_FLAG_NETCHG)
5162 hn_change_network(sc);
5164 hn_update_link_status(sc);
5168 hn_resume(struct hn_softc *sc)
5171 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) ||
5172 (sc->hn_flags & HN_FLAG_VF))
5176 * When the VF is activated, the synthetic interface is changed
5177 * to DOWN in hn_set_vf(). Here, if the VF is still active, we
5178 * don't call hn_resume_mgmt() until the VF is deactivated in
5181 if (!(sc->hn_flags & HN_FLAG_VF))
5185 * Re-enable polling if this interface is running and
5186 * the polling is requested.
5188 if ((sc->hn_ifp->if_drv_flags & IFF_DRV_RUNNING) && sc->hn_pollhz > 0)
5189 hn_polling(sc, sc->hn_pollhz);
5193 hn_rndis_rx_status(struct hn_softc *sc, const void *data, int dlen)
5195 const struct rndis_status_msg *msg;
5198 if (dlen < sizeof(*msg)) {
5199 if_printf(sc->hn_ifp, "invalid RNDIS status\n");
5204 switch (msg->rm_status) {
5205 case RNDIS_STATUS_MEDIA_CONNECT:
5206 case RNDIS_STATUS_MEDIA_DISCONNECT:
5207 hn_update_link_status(sc);
5210 case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG:
5211 /* Not really useful; ignore. */
5214 case RNDIS_STATUS_NETWORK_CHANGE:
5215 ofs = RNDIS_STBUFOFFSET_ABS(msg->rm_stbufoffset);
5216 if (dlen < ofs + msg->rm_stbuflen ||
5217 msg->rm_stbuflen < sizeof(uint32_t)) {
5218 if_printf(sc->hn_ifp, "network changed\n");
5222 memcpy(&change, ((const uint8_t *)msg) + ofs,
5224 if_printf(sc->hn_ifp, "network changed, change %u\n",
5227 hn_change_network(sc);
5231 if_printf(sc->hn_ifp, "unknown RNDIS status 0x%08x\n",
5238 hn_rndis_rxinfo(const void *info_data, int info_dlen, struct hn_rxinfo *info)
5240 const struct rndis_pktinfo *pi = info_data;
5243 while (info_dlen != 0) {
5247 if (__predict_false(info_dlen < sizeof(*pi)))
5249 if (__predict_false(info_dlen < pi->rm_size))
5251 info_dlen -= pi->rm_size;
5253 if (__predict_false(pi->rm_size & RNDIS_PKTINFO_SIZE_ALIGNMASK))
5255 if (__predict_false(pi->rm_size < pi->rm_pktinfooffset))
5257 dlen = pi->rm_size - pi->rm_pktinfooffset;
5260 switch (pi->rm_type) {
5261 case NDIS_PKTINFO_TYPE_VLAN:
5262 if (__predict_false(dlen < NDIS_VLAN_INFO_SIZE))
5264 info->vlan_info = *((const uint32_t *)data);
5265 mask |= HN_RXINFO_VLAN;
5268 case NDIS_PKTINFO_TYPE_CSUM:
5269 if (__predict_false(dlen < NDIS_RXCSUM_INFO_SIZE))
5271 info->csum_info = *((const uint32_t *)data);
5272 mask |= HN_RXINFO_CSUM;
5275 case HN_NDIS_PKTINFO_TYPE_HASHVAL:
5276 if (__predict_false(dlen < HN_NDIS_HASH_VALUE_SIZE))
5278 info->hash_value = *((const uint32_t *)data);
5279 mask |= HN_RXINFO_HASHVAL;
5282 case HN_NDIS_PKTINFO_TYPE_HASHINF:
5283 if (__predict_false(dlen < HN_NDIS_HASH_INFO_SIZE))
5285 info->hash_info = *((const uint32_t *)data);
5286 mask |= HN_RXINFO_HASHINF;
5293 if (mask == HN_RXINFO_ALL) {
5294 /* All found; done */
5298 pi = (const struct rndis_pktinfo *)
5299 ((const uint8_t *)pi + pi->rm_size);
5304 * - If there is no hash value, invalidate the hash info.
5306 if ((mask & HN_RXINFO_HASHVAL) == 0)
5307 info->hash_info = HN_NDIS_HASH_INFO_INVALID;
5311 static __inline bool
5312 hn_rndis_check_overlap(int off, int len, int check_off, int check_len)
5315 if (off < check_off) {
5316 if (__predict_true(off + len <= check_off))
5318 } else if (off > check_off) {
5319 if (__predict_true(check_off + check_len <= off))
5326 hn_rndis_rx_data(struct hn_rx_ring *rxr, const void *data, int dlen)
5328 const struct rndis_packet_msg *pkt;
5329 struct hn_rxinfo info;
5330 int data_off, pktinfo_off, data_len, pktinfo_len;
5335 if (__predict_false(dlen < sizeof(*pkt))) {
5336 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg\n");
5341 if (__predict_false(dlen < pkt->rm_len)) {
5342 if_printf(rxr->hn_ifp, "truncated RNDIS packet msg, "
5343 "dlen %d, msglen %u\n", dlen, pkt->rm_len);
5346 if (__predict_false(pkt->rm_len <
5347 pkt->rm_datalen + pkt->rm_oobdatalen + pkt->rm_pktinfolen)) {
5348 if_printf(rxr->hn_ifp, "invalid RNDIS packet msglen, "
5349 "msglen %u, data %u, oob %u, pktinfo %u\n",
5350 pkt->rm_len, pkt->rm_datalen, pkt->rm_oobdatalen,
5351 pkt->rm_pktinfolen);
5354 if (__predict_false(pkt->rm_datalen == 0)) {
5355 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, no data\n");
5362 #define IS_OFFSET_INVALID(ofs) \
5363 ((ofs) < RNDIS_PACKET_MSG_OFFSET_MIN || \
5364 ((ofs) & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK))
5366 /* XXX Hyper-V does not meet data offset alignment requirement */
5367 if (__predict_false(pkt->rm_dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) {
5368 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5369 "data offset %u\n", pkt->rm_dataoffset);
5372 if (__predict_false(pkt->rm_oobdataoffset > 0 &&
5373 IS_OFFSET_INVALID(pkt->rm_oobdataoffset))) {
5374 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5375 "oob offset %u\n", pkt->rm_oobdataoffset);
5378 if (__predict_true(pkt->rm_pktinfooffset > 0) &&
5379 __predict_false(IS_OFFSET_INVALID(pkt->rm_pktinfooffset))) {
5380 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5381 "pktinfo offset %u\n", pkt->rm_pktinfooffset);
5385 #undef IS_OFFSET_INVALID
5387 data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_dataoffset);
5388 data_len = pkt->rm_datalen;
5389 pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_pktinfooffset);
5390 pktinfo_len = pkt->rm_pktinfolen;
5393 * Check OOB coverage.
5395 if (__predict_false(pkt->rm_oobdatalen != 0)) {
5396 int oob_off, oob_len;
5398 if_printf(rxr->hn_ifp, "got oobdata\n");
5399 oob_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->rm_oobdataoffset);
5400 oob_len = pkt->rm_oobdatalen;
5402 if (__predict_false(oob_off + oob_len > pkt->rm_len)) {
5403 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5404 "oob overflow, msglen %u, oob abs %d len %d\n",
5405 pkt->rm_len, oob_off, oob_len);
5410 * Check against data.
5412 if (hn_rndis_check_overlap(oob_off, oob_len,
5413 data_off, data_len)) {
5414 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5415 "oob overlaps data, oob abs %d len %d, "
5416 "data abs %d len %d\n",
5417 oob_off, oob_len, data_off, data_len);
5422 * Check against pktinfo.
5424 if (pktinfo_len != 0 &&
5425 hn_rndis_check_overlap(oob_off, oob_len,
5426 pktinfo_off, pktinfo_len)) {
5427 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5428 "oob overlaps pktinfo, oob abs %d len %d, "
5429 "pktinfo abs %d len %d\n",
5430 oob_off, oob_len, pktinfo_off, pktinfo_len);
5436 * Check per-packet-info coverage and find useful per-packet-info.
5438 info.vlan_info = HN_NDIS_VLAN_INFO_INVALID;
5439 info.csum_info = HN_NDIS_RXCSUM_INFO_INVALID;
5440 info.hash_info = HN_NDIS_HASH_INFO_INVALID;
5441 if (__predict_true(pktinfo_len != 0)) {
5445 if (__predict_false(pktinfo_off + pktinfo_len > pkt->rm_len)) {
5446 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5447 "pktinfo overflow, msglen %u, "
5448 "pktinfo abs %d len %d\n",
5449 pkt->rm_len, pktinfo_off, pktinfo_len);
5454 * Check packet info coverage.
5456 overlap = hn_rndis_check_overlap(pktinfo_off, pktinfo_len,
5457 data_off, data_len);
5458 if (__predict_false(overlap)) {
5459 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5460 "pktinfo overlap data, pktinfo abs %d len %d, "
5461 "data abs %d len %d\n",
5462 pktinfo_off, pktinfo_len, data_off, data_len);
5467 * Find useful per-packet-info.
5469 error = hn_rndis_rxinfo(((const uint8_t *)pkt) + pktinfo_off,
5470 pktinfo_len, &info);
5471 if (__predict_false(error)) {
5472 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg "
5478 if (__predict_false(data_off + data_len > pkt->rm_len)) {
5479 if_printf(rxr->hn_ifp, "invalid RNDIS packet msg, "
5480 "data overflow, msglen %u, data abs %d len %d\n",
5481 pkt->rm_len, data_off, data_len);
5484 hn_rxpkt(rxr, ((const uint8_t *)pkt) + data_off, data_len, &info);
5487 static __inline void
5488 hn_rndis_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen)
5490 const struct rndis_msghdr *hdr;
5492 if (__predict_false(dlen < sizeof(*hdr))) {
5493 if_printf(rxr->hn_ifp, "invalid RNDIS msg\n");
5498 if (__predict_true(hdr->rm_type == REMOTE_NDIS_PACKET_MSG)) {
5499 /* Hot data path. */
5500 hn_rndis_rx_data(rxr, data, dlen);
5505 if (hdr->rm_type == REMOTE_NDIS_INDICATE_STATUS_MSG)
5506 hn_rndis_rx_status(rxr->hn_ifp->if_softc, data, dlen);
5508 hn_rndis_rx_ctrl(rxr->hn_ifp->if_softc, data, dlen);
5512 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt)
5514 const struct hn_nvs_hdr *hdr;
5516 if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) {
5517 if_printf(sc->hn_ifp, "invalid nvs notify\n");
5520 hdr = VMBUS_CHANPKT_CONST_DATA(pkt);
5522 if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) {
5523 /* Useless; ignore */
5526 if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type);
5530 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan,
5531 const struct vmbus_chanpkt_hdr *pkt)
5533 struct hn_nvs_sendctx *sndc;
5535 sndc = (struct hn_nvs_sendctx *)(uintptr_t)pkt->cph_xactid;
5536 sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt),
5537 VMBUS_CHANPKT_DATALEN(pkt));
5540 * 'sndc' CAN NOT be accessed anymore, since it can be freed by
5546 hn_nvs_handle_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
5547 const struct vmbus_chanpkt_hdr *pkthdr)
5549 const struct vmbus_chanpkt_rxbuf *pkt;
5550 const struct hn_nvs_hdr *nvs_hdr;
5553 if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) {
5554 if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n");
5557 nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr);
5559 /* Make sure that this is a RNDIS message. */
5560 if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) {
5561 if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n",
5566 hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen);
5567 if (__predict_false(hlen < sizeof(*pkt))) {
5568 if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n");
5571 pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr;
5573 if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) {
5574 if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n",
5579 count = pkt->cp_rxbuf_cnt;
5580 if (__predict_false(hlen <
5581 __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) {
5582 if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count);
5586 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
5587 for (i = 0; i < count; ++i) {
5590 ofs = pkt->cp_rxbuf[i].rb_ofs;
5591 len = pkt->cp_rxbuf[i].rb_len;
5592 if (__predict_false(ofs + len > HN_RXBUF_SIZE)) {
5593 if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, "
5594 "ofs %d, len %d\n", i, ofs, len);
5597 hn_rndis_rxpkt(rxr, rxr->hn_rxbuf + ofs, len);
5601 * Ack the consumed RXBUF associated w/ this channel packet,
5602 * so that this RXBUF can be recycled by the hypervisor.
5604 hn_nvs_ack_rxbuf(rxr, chan, pkt->cp_hdr.cph_xactid);
5608 hn_nvs_ack_rxbuf(struct hn_rx_ring *rxr, struct vmbus_channel *chan,
5611 struct hn_nvs_rndis_ack ack;
5614 ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK;
5615 ack.nvs_status = HN_NVS_STATUS_OK;
5619 error = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP,
5620 VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid);
5621 if (__predict_false(error == EAGAIN)) {
5624 * This should _not_ happen in real world, since the
5625 * consumption of the TX bufring from the TX path is
5628 if (rxr->hn_ack_failed == 0)
5629 if_printf(rxr->hn_ifp, "RXBUF ack retry\n");
5630 rxr->hn_ack_failed++;
5637 if_printf(rxr->hn_ifp, "RXBUF ack failed\n");
5642 hn_chan_callback(struct vmbus_channel *chan, void *xrxr)
5644 struct hn_rx_ring *rxr = xrxr;
5645 struct hn_softc *sc = rxr->hn_ifp->if_softc;
5648 struct vmbus_chanpkt_hdr *pkt = rxr->hn_pktbuf;
5651 pktlen = rxr->hn_pktbuf_len;
5652 error = vmbus_chan_recv_pkt(chan, pkt, &pktlen);
5653 if (__predict_false(error == ENOBUFS)) {
5658 * Expand channel packet buffer.
5661 * Use M_WAITOK here, since allocation failure
5664 nlen = rxr->hn_pktbuf_len * 2;
5665 while (nlen < pktlen)
5667 nbuf = malloc(nlen, M_DEVBUF, M_WAITOK);
5669 if_printf(rxr->hn_ifp, "expand pktbuf %d -> %d\n",
5670 rxr->hn_pktbuf_len, nlen);
5672 free(rxr->hn_pktbuf, M_DEVBUF);
5673 rxr->hn_pktbuf = nbuf;
5674 rxr->hn_pktbuf_len = nlen;
5677 } else if (__predict_false(error == EAGAIN)) {
5678 /* No more channel packets; done! */
5681 KASSERT(!error, ("vmbus_chan_recv_pkt failed: %d", error));
5683 switch (pkt->cph_type) {
5684 case VMBUS_CHANPKT_TYPE_COMP:
5685 hn_nvs_handle_comp(sc, chan, pkt);
5688 case VMBUS_CHANPKT_TYPE_RXBUF:
5689 hn_nvs_handle_rxbuf(rxr, chan, pkt);
5692 case VMBUS_CHANPKT_TYPE_INBAND:
5693 hn_nvs_handle_notify(sc, pkt);
5697 if_printf(rxr->hn_ifp, "unknown chan pkt %u\n",
5702 hn_chan_rollup(rxr, rxr->hn_txr);
5706 hn_tx_taskq_create(void *arg __unused)
5711 * Fix the # of TX taskqueues.
5713 if (hn_tx_taskq_cnt <= 0)
5714 hn_tx_taskq_cnt = 1;
5715 else if (hn_tx_taskq_cnt > mp_ncpus)
5716 hn_tx_taskq_cnt = mp_ncpus;
5719 * Fix the TX taskqueue mode.
5721 switch (hn_tx_taskq_mode) {
5722 case HN_TX_TASKQ_M_INDEP:
5723 case HN_TX_TASKQ_M_GLOBAL:
5724 case HN_TX_TASKQ_M_EVTTQ:
5727 hn_tx_taskq_mode = HN_TX_TASKQ_M_INDEP;
5731 if (vm_guest != VM_GUEST_HV)
5734 if (hn_tx_taskq_mode != HN_TX_TASKQ_M_GLOBAL)
5737 hn_tx_taskque = malloc(hn_tx_taskq_cnt * sizeof(struct taskqueue *),
5738 M_DEVBUF, M_WAITOK);
5739 for (i = 0; i < hn_tx_taskq_cnt; ++i) {
5740 hn_tx_taskque[i] = taskqueue_create("hn_tx", M_WAITOK,
5741 taskqueue_thread_enqueue, &hn_tx_taskque[i]);
5742 taskqueue_start_threads(&hn_tx_taskque[i], 1, PI_NET,
5746 SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_SECOND,
5747 hn_tx_taskq_create, NULL);
5750 hn_tx_taskq_destroy(void *arg __unused)
5753 if (hn_tx_taskque != NULL) {
5756 for (i = 0; i < hn_tx_taskq_cnt; ++i)
5757 taskqueue_free(hn_tx_taskque[i]);
5758 free(hn_tx_taskque, M_DEVBUF);
5761 SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_SECOND,
5762 hn_tx_taskq_destroy, NULL);