2 * Copyright (c) 2010-2012 Citrix Inc.
3 * Copyright (c) 2009-2012,2016 Microsoft Corp.
4 * Copyright (c) 2012 NetApp Inc.
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 * Copyright (c) 2004-2006 Kip Macy
31 * All rights reserved.
33 * Redistribution and use in source and binary forms, with or without
34 * modification, are permitted provided that the following conditions
36 * 1. Redistributions of source code must retain the above copyright
37 * notice, this list of conditions and the following disclaimer.
38 * 2. Redistributions in binary form must reproduce the above copyright
39 * notice, this list of conditions and the following disclaimer in the
40 * documentation and/or other materials provided with the distribution.
42 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
43 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
44 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
45 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
46 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
47 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
48 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
49 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
50 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
51 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
55 #include <sys/cdefs.h>
56 __FBSDID("$FreeBSD$");
58 #include "opt_inet6.h"
61 #include <sys/param.h>
62 #include <sys/systm.h>
63 #include <sys/sockio.h>
65 #include <sys/malloc.h>
66 #include <sys/module.h>
67 #include <sys/kernel.h>
68 #include <sys/socket.h>
70 #include <sys/queue.h>
74 #include <sys/sysctl.h>
75 #include <sys/buf_ring.h>
78 #include <net/if_arp.h>
79 #include <net/ethernet.h>
80 #include <net/if_dl.h>
81 #include <net/if_media.h>
82 #include <net/rndis.h>
85 #include <net/if_types.h>
86 #include <net/if_vlan_var.h>
89 #include <netinet/in_systm.h>
90 #include <netinet/in.h>
91 #include <netinet/ip.h>
92 #include <netinet/if_ether.h>
93 #include <netinet/tcp.h>
94 #include <netinet/udp.h>
95 #include <netinet/ip6.h>
98 #include <vm/vm_param.h>
99 #include <vm/vm_kern.h>
102 #include <machine/bus.h>
103 #include <machine/resource.h>
104 #include <machine/frame.h>
105 #include <machine/vmparam.h>
108 #include <sys/rman.h>
109 #include <sys/mutex.h>
110 #include <sys/errno.h>
111 #include <sys/types.h>
112 #include <machine/atomic.h>
114 #include <machine/intr_machdep.h>
116 #include <machine/in_cksum.h>
118 #include <dev/hyperv/include/hyperv.h>
119 #include <dev/hyperv/include/hyperv_busdma.h>
120 #include <dev/hyperv/include/vmbus_xact.h>
122 #include <dev/hyperv/netvsc/hv_net_vsc.h>
123 #include <dev/hyperv/netvsc/hv_rndis_filter.h>
124 #include <dev/hyperv/netvsc/ndis.h>
126 #include "vmbus_if.h"
128 /* Short for Hyper-V network interface */
129 #define NETVSC_DEVNAME "hn"
132 * It looks like offset 0 of buf is reserved to hold the softc pointer.
133 * The sc pointer evidently not needed, and is not presently populated.
134 * The packet offset is where the netvsc_packet starts in the buffer.
136 #define HV_NV_SC_PTR_OFFSET_IN_BUF 0
137 #define HV_NV_PACKET_OFFSET_IN_BUF 16
139 /* YYY should get it from the underlying channel */
140 #define HN_TX_DESC_CNT 512
142 #define HN_LROENT_CNT_DEF 128
144 #define HN_RING_CNT_DEF_MAX 8
146 #define HN_RNDIS_PKT_LEN \
147 (sizeof(struct rndis_packet_msg) + \
148 HN_RNDIS_PKTINFO_SIZE(HN_NDIS_HASH_VALUE_SIZE) + \
149 HN_RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \
150 HN_RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \
151 HN_RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE))
152 #define HN_RNDIS_PKT_BOUNDARY PAGE_SIZE
153 #define HN_RNDIS_PKT_ALIGN CACHE_LINE_SIZE
155 #define HN_TX_DATA_BOUNDARY PAGE_SIZE
156 #define HN_TX_DATA_MAXSIZE IP_MAXPACKET
157 #define HN_TX_DATA_SEGSIZE PAGE_SIZE
158 /* -1 for RNDIS packet message */
159 #define HN_TX_DATA_SEGCNT_MAX (NETVSC_PACKET_MAXPAGE - 1)
161 #define HN_DIRECT_TX_SIZE_DEF 128
163 #define HN_EARLY_TXEOF_THRESH 8
166 #ifndef HN_USE_TXDESC_BUFRING
167 SLIST_ENTRY(hn_txdesc) link;
170 struct hn_tx_ring *txr;
172 uint32_t flags; /* HN_TXD_FLAG_ */
173 struct hn_send_ctx send_ctx;
175 bus_dmamap_t data_dmap;
177 bus_addr_t rndis_pkt_paddr;
178 struct rndis_packet_msg *rndis_pkt;
179 bus_dmamap_t rndis_pkt_dmap;
182 #define HN_TXD_FLAG_ONLIST 0x1
183 #define HN_TXD_FLAG_DMAMAP 0x2
185 #define HN_LRO_LENLIM_MULTIRX_DEF (12 * ETHERMTU)
186 #define HN_LRO_LENLIM_DEF (25 * ETHERMTU)
187 /* YYY 2*MTU is a bit rough, but should be good enough. */
188 #define HN_LRO_LENLIM_MIN(ifp) (2 * (ifp)->if_mtu)
190 #define HN_LRO_ACKCNT_DEF 1
192 #define HN_LOCK_INIT(sc) \
193 sx_init(&(sc)->hn_lock, device_get_nameunit((sc)->hn_dev))
194 #define HN_LOCK_ASSERT(sc) sx_assert(&(sc)->hn_lock, SA_XLOCKED)
195 #define HN_LOCK_DESTROY(sc) sx_destroy(&(sc)->hn_lock)
196 #define HN_LOCK(sc) sx_xlock(&(sc)->hn_lock)
197 #define HN_UNLOCK(sc) sx_xunlock(&(sc)->hn_lock)
199 #define HN_CSUM_IP_MASK (CSUM_IP | CSUM_IP_TCP | CSUM_IP_UDP)
200 #define HN_CSUM_IP6_MASK (CSUM_IP6_TCP | CSUM_IP6_UDP)
201 #define HN_CSUM_IP_HWASSIST(sc) \
202 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP_MASK)
203 #define HN_CSUM_IP6_HWASSIST(sc) \
204 ((sc)->hn_tx_ring[0].hn_csum_assist & HN_CSUM_IP6_MASK)
210 int hv_promisc_mode = 0; /* normal mode by default */
212 SYSCTL_NODE(_hw, OID_AUTO, hn, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
213 "Hyper-V network interface");
215 /* Trust tcp segements verification on host side. */
216 static int hn_trust_hosttcp = 1;
217 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hosttcp, CTLFLAG_RDTUN,
218 &hn_trust_hosttcp, 0,
219 "Trust tcp segement verification on host side, "
220 "when csum info is missing (global setting)");
222 /* Trust udp datagrams verification on host side. */
223 static int hn_trust_hostudp = 1;
224 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostudp, CTLFLAG_RDTUN,
225 &hn_trust_hostudp, 0,
226 "Trust udp datagram verification on host side, "
227 "when csum info is missing (global setting)");
229 /* Trust ip packets verification on host side. */
230 static int hn_trust_hostip = 1;
231 SYSCTL_INT(_hw_hn, OID_AUTO, trust_hostip, CTLFLAG_RDTUN,
233 "Trust ip packet verification on host side, "
234 "when csum info is missing (global setting)");
236 /* Limit TSO burst size */
237 static int hn_tso_maxlen = 0;
238 SYSCTL_INT(_hw_hn, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
239 &hn_tso_maxlen, 0, "TSO burst limit");
241 /* Limit chimney send size */
242 static int hn_tx_chimney_size = 0;
243 SYSCTL_INT(_hw_hn, OID_AUTO, tx_chimney_size, CTLFLAG_RDTUN,
244 &hn_tx_chimney_size, 0, "Chimney send packet size limit");
246 /* Limit the size of packet for direct transmission */
247 static int hn_direct_tx_size = HN_DIRECT_TX_SIZE_DEF;
248 SYSCTL_INT(_hw_hn, OID_AUTO, direct_tx_size, CTLFLAG_RDTUN,
249 &hn_direct_tx_size, 0, "Size of the packet for direct transmission");
251 #if defined(INET) || defined(INET6)
252 #if __FreeBSD_version >= 1100095
253 static int hn_lro_entry_count = HN_LROENT_CNT_DEF;
254 SYSCTL_INT(_hw_hn, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
255 &hn_lro_entry_count, 0, "LRO entry count");
259 static int hn_share_tx_taskq = 0;
260 SYSCTL_INT(_hw_hn, OID_AUTO, share_tx_taskq, CTLFLAG_RDTUN,
261 &hn_share_tx_taskq, 0, "Enable shared TX taskqueue");
263 static struct taskqueue *hn_tx_taskq;
265 #ifndef HN_USE_TXDESC_BUFRING
266 static int hn_use_txdesc_bufring = 0;
268 static int hn_use_txdesc_bufring = 1;
270 SYSCTL_INT(_hw_hn, OID_AUTO, use_txdesc_bufring, CTLFLAG_RD,
271 &hn_use_txdesc_bufring, 0, "Use buf_ring for TX descriptors");
273 static int hn_bind_tx_taskq = -1;
274 SYSCTL_INT(_hw_hn, OID_AUTO, bind_tx_taskq, CTLFLAG_RDTUN,
275 &hn_bind_tx_taskq, 0, "Bind TX taskqueue to the specified cpu");
277 static int hn_use_if_start = 0;
278 SYSCTL_INT(_hw_hn, OID_AUTO, use_if_start, CTLFLAG_RDTUN,
279 &hn_use_if_start, 0, "Use if_start TX method");
281 static int hn_chan_cnt = 0;
282 SYSCTL_INT(_hw_hn, OID_AUTO, chan_cnt, CTLFLAG_RDTUN,
284 "# of channels to use; each channel has one RX ring and one TX ring");
286 static int hn_tx_ring_cnt = 0;
287 SYSCTL_INT(_hw_hn, OID_AUTO, tx_ring_cnt, CTLFLAG_RDTUN,
288 &hn_tx_ring_cnt, 0, "# of TX rings to use");
290 static int hn_tx_swq_depth = 0;
291 SYSCTL_INT(_hw_hn, OID_AUTO, tx_swq_depth, CTLFLAG_RDTUN,
292 &hn_tx_swq_depth, 0, "Depth of IFQ or BUFRING");
294 #if __FreeBSD_version >= 1100095
295 static u_int hn_lro_mbufq_depth = 0;
296 SYSCTL_UINT(_hw_hn, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
297 &hn_lro_mbufq_depth, 0, "Depth of LRO mbuf queue");
300 static u_int hn_cpu_index;
303 * Forward declarations
305 static void hn_stop(struct hn_softc *sc);
306 static void hn_init_locked(struct hn_softc *sc);
307 static void hn_init(void *xsc);
308 static int hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data);
309 static int hn_start_locked(struct hn_tx_ring *txr, int len);
310 static void hn_start(struct ifnet *ifp);
311 static void hn_start_txeof(struct hn_tx_ring *);
312 static int hn_ifmedia_upd(struct ifnet *ifp);
313 static void hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr);
314 #if __FreeBSD_version >= 1100099
315 static int hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS);
316 static int hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS);
318 static int hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS);
319 static int hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS);
320 #if __FreeBSD_version < 1100095
321 static int hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS);
323 static int hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS);
325 static int hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
326 static int hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS);
327 static int hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS);
328 static int hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS);
329 static int hn_caps_sysctl(SYSCTL_HANDLER_ARGS);
330 static int hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS);
331 static int hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS);
332 static int hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS);
333 static int hn_check_iplen(const struct mbuf *, int);
334 static int hn_create_tx_ring(struct hn_softc *, int);
335 static void hn_destroy_tx_ring(struct hn_tx_ring *);
336 static int hn_create_tx_data(struct hn_softc *, int);
337 static void hn_fixup_tx_data(struct hn_softc *);
338 static void hn_destroy_tx_data(struct hn_softc *);
339 static void hn_start_taskfunc(void *, int);
340 static void hn_start_txeof_taskfunc(void *, int);
341 static void hn_stop_tx_tasks(struct hn_softc *);
342 static int hn_encap(struct hn_tx_ring *, struct hn_txdesc *, struct mbuf **);
343 static int hn_create_rx_data(struct hn_softc *sc, int);
344 static void hn_destroy_rx_data(struct hn_softc *sc);
345 static void hn_set_chim_size(struct hn_softc *, int);
346 static int hn_chan_attach(struct hn_softc *, struct vmbus_channel *);
347 static void hn_chan_detach(struct hn_softc *, struct vmbus_channel *);
348 static int hn_attach_subchans(struct hn_softc *);
349 static void hn_detach_allchans(struct hn_softc *);
350 static void hn_chan_callback(struct vmbus_channel *chan, void *xrxr);
351 static void hn_set_ring_inuse(struct hn_softc *, int);
352 static int hn_synth_attach(struct hn_softc *, int);
354 static void hn_nvs_handle_notify(struct hn_softc *sc,
355 const struct vmbus_chanpkt_hdr *pkt);
356 static void hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan,
357 const struct vmbus_chanpkt_hdr *pkt);
358 static void hn_nvs_handle_rxbuf(struct hn_softc *sc, struct hn_rx_ring *rxr,
359 struct vmbus_channel *chan,
360 const struct vmbus_chanpkt_hdr *pkthdr);
361 static void hn_nvs_ack_rxbuf(struct vmbus_channel *chan, uint64_t tid);
363 static int hn_transmit(struct ifnet *, struct mbuf *);
364 static void hn_xmit_qflush(struct ifnet *);
365 static int hn_xmit(struct hn_tx_ring *, int);
366 static void hn_xmit_txeof(struct hn_tx_ring *);
367 static void hn_xmit_taskfunc(void *, int);
368 static void hn_xmit_txeof_taskfunc(void *, int);
370 static const uint8_t hn_rss_key_default[NDIS_HASH_KEYSIZE_TOEPLITZ] = {
371 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
372 0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
373 0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
374 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
375 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa
378 #if __FreeBSD_version >= 1100099
380 hn_set_lro_lenlim(struct hn_softc *sc, int lenlim)
384 for (i = 0; i < sc->hn_rx_ring_inuse; ++i)
385 sc->hn_rx_ring[i].hn_lro.lro_length_lim = lenlim;
390 hn_get_txswq_depth(const struct hn_tx_ring *txr)
393 KASSERT(txr->hn_txdesc_cnt > 0, ("tx ring is not setup yet"));
394 if (hn_tx_swq_depth < txr->hn_txdesc_cnt)
395 return txr->hn_txdesc_cnt;
396 return hn_tx_swq_depth;
400 hn_rss_reconfig(struct hn_softc *sc)
410 * Direct reconfiguration by setting the UNCHG flags does
411 * _not_ work properly.
414 if_printf(sc->hn_ifp, "disable RSS\n");
415 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_DISABLE);
417 if_printf(sc->hn_ifp, "RSS disable failed\n");
422 * Reenable the RSS w/ the updated RSS key or indirect
426 if_printf(sc->hn_ifp, "reconfig RSS\n");
427 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
429 if_printf(sc->hn_ifp, "RSS reconfig failed\n");
436 hn_rss_ind_fixup(struct hn_softc *sc, int nchan)
438 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
441 KASSERT(nchan > 1, ("invalid # of channels %d", nchan));
444 * Check indirect table to make sure that all channels in it
447 for (i = 0; i < NDIS_HASH_INDCNT; ++i) {
448 if (rss->rss_ind[i] >= nchan) {
449 if_printf(sc->hn_ifp,
450 "RSS indirect table %d fixup: %u -> %d\n",
451 i, rss->rss_ind[i], nchan - 1);
452 rss->rss_ind[i] = nchan - 1;
458 hn_ifmedia_upd(struct ifnet *ifp __unused)
465 hn_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr)
467 struct hn_softc *sc = ifp->if_softc;
469 ifmr->ifm_status = IFM_AVALID;
470 ifmr->ifm_active = IFM_ETHER;
472 if (!sc->hn_carrier) {
473 ifmr->ifm_active |= IFM_NONE;
476 ifmr->ifm_status |= IFM_ACTIVE;
477 ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
480 /* {F8615163-DF3E-46c5-913F-F2D2F965ED0E} */
481 static const struct hyperv_guid g_net_vsc_device_type = {
482 .hv_guid = {0x63, 0x51, 0x61, 0xF8, 0x3E, 0xDF, 0xc5, 0x46,
483 0x91, 0x3F, 0xF2, 0xD2, 0xF9, 0x65, 0xED, 0x0E}
487 * Standard probe entry point.
491 netvsc_probe(device_t dev)
493 if (VMBUS_PROBE_GUID(device_get_parent(dev), dev,
494 &g_net_vsc_device_type) == 0) {
495 device_set_desc(dev, "Hyper-V Network Interface");
496 return BUS_PROBE_DEFAULT;
502 hn_cpuset_setthread_task(void *xmask, int pending __unused)
504 cpuset_t *mask = xmask;
507 error = cpuset_setthread(curthread->td_tid, mask);
509 panic("curthread=%ju: can't pin; error=%d",
510 (uintmax_t)curthread->td_tid, error);
515 * Standard attach entry point.
517 * Called when the driver is loaded. It allocates needed resources,
518 * and initializes the "hardware" and software.
521 netvsc_attach(device_t dev)
523 struct hn_softc *sc = device_get_softc(dev);
524 struct sysctl_oid_list *child;
525 struct sysctl_ctx_list *ctx;
526 uint8_t eaddr[ETHER_ADDR_LEN];
527 uint32_t link_status;
528 struct ifnet *ifp = NULL;
529 int error, ring_cnt, tx_ring_cnt;
533 sc->hn_prichan = vmbus_get_channel(dev);
537 * Setup taskqueue for transmission.
539 if (hn_tx_taskq == NULL) {
540 sc->hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK,
541 taskqueue_thread_enqueue, &sc->hn_tx_taskq);
542 taskqueue_start_threads(&sc->hn_tx_taskq, 1, PI_NET, "%s tx",
543 device_get_nameunit(dev));
544 if (hn_bind_tx_taskq >= 0) {
545 int cpu = hn_bind_tx_taskq;
546 struct task cpuset_task;
549 if (cpu > mp_ncpus - 1)
551 CPU_SETOF(cpu, &cpu_set);
552 TASK_INIT(&cpuset_task, 0, hn_cpuset_setthread_task,
554 taskqueue_enqueue(sc->hn_tx_taskq, &cpuset_task);
555 taskqueue_drain(sc->hn_tx_taskq, &cpuset_task);
558 sc->hn_tx_taskq = hn_tx_taskq;
562 * Allocate ifnet and setup its name earlier, so that if_printf
563 * can be used by functions, which will be called after
566 ifp = sc->hn_ifp = sc->arpcom.ac_ifp = if_alloc(IFT_ETHER);
568 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
571 * Initialize ifmedia earlier so that it can be unconditionally
572 * destroyed, if error happened later on.
574 ifmedia_init(&sc->hn_media, 0, hn_ifmedia_upd, hn_ifmedia_sts);
577 * Figure out the # of RX rings (ring_cnt) and the # of TX rings
578 * to use (tx_ring_cnt).
581 * The # of RX rings to use is same as the # of channels to use.
583 ring_cnt = hn_chan_cnt;
587 if (ring_cnt > HN_RING_CNT_DEF_MAX)
588 ring_cnt = HN_RING_CNT_DEF_MAX;
589 } else if (ring_cnt > mp_ncpus) {
593 tx_ring_cnt = hn_tx_ring_cnt;
594 if (tx_ring_cnt <= 0 || tx_ring_cnt > ring_cnt)
595 tx_ring_cnt = ring_cnt;
596 if (hn_use_if_start) {
597 /* ifnet.if_start only needs one TX ring. */
602 * Set the leader CPU for channels.
604 sc->hn_cpu = atomic_fetchadd_int(&hn_cpu_index, ring_cnt) % mp_ncpus;
607 * Create enough TX/RX rings, even if only limited number of
608 * channels can be allocated.
610 error = hn_create_tx_data(sc, tx_ring_cnt);
613 error = hn_create_rx_data(sc, ring_cnt);
618 * Create transaction context for NVS and RNDIS transactions.
620 sc->hn_xact = vmbus_xact_ctx_create(bus_get_dma_tag(dev),
621 HN_XACT_REQ_SIZE, HN_XACT_RESP_SIZE, 0);
622 if (sc->hn_xact == NULL)
626 * Attach the synthetic parts, i.e. NVS and RNDIS.
628 error = hn_synth_attach(sc, ETHERMTU);
632 error = hn_rndis_get_linkstatus(sc, &link_status);
635 if (link_status == NDIS_MEDIA_STATE_CONNECTED)
638 error = hn_rndis_get_eaddr(sc, eaddr);
642 #if __FreeBSD_version >= 1100099
643 if (sc->hn_rx_ring_inuse > 1) {
645 * Reduce TCP segment aggregation limit for multiple
646 * RX rings to increase ACK timeliness.
648 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MULTIRX_DEF);
653 * Fixup TX stuffs after synthetic parts are attached.
655 hn_fixup_tx_data(sc);
657 ctx = device_get_sysctl_ctx(dev);
658 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
659 SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "nvs_version", CTLFLAG_RD,
660 &sc->hn_nvs_ver, 0, "NVS version");
661 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "ndis_version",
662 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
663 hn_ndis_version_sysctl, "A", "NDIS version");
664 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "caps",
665 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
666 hn_caps_sysctl, "A", "capabilities");
667 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "hwassist",
668 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
669 hn_hwassist_sysctl, "A", "hwassist");
670 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_key",
671 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
672 hn_rss_key_sysctl, "IU", "RSS key");
673 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rss_ind",
674 CTLTYPE_OPAQUE | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
675 hn_rss_ind_sysctl, "IU", "RSS indirect table");
678 * Setup the ifmedia, which has been initialized earlier.
680 ifmedia_add(&sc->hn_media, IFM_ETHER | IFM_AUTO, 0, NULL);
681 ifmedia_set(&sc->hn_media, IFM_ETHER | IFM_AUTO);
682 /* XXX ifmedia_set really should do this for us */
683 sc->hn_media.ifm_media = sc->hn_media.ifm_cur->ifm_media;
686 * Setup the ifnet for this interface.
689 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
690 ifp->if_ioctl = hn_ioctl;
691 ifp->if_init = hn_init;
692 if (hn_use_if_start) {
693 int qdepth = hn_get_txswq_depth(&sc->hn_tx_ring[0]);
695 ifp->if_start = hn_start;
696 IFQ_SET_MAXLEN(&ifp->if_snd, qdepth);
697 ifp->if_snd.ifq_drv_maxlen = qdepth - 1;
698 IFQ_SET_READY(&ifp->if_snd);
700 ifp->if_transmit = hn_transmit;
701 ifp->if_qflush = hn_xmit_qflush;
704 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_LRO;
706 /* We can't diff IPv6 packets from IPv4 packets on RX path. */
707 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6;
709 if (sc->hn_caps & HN_CAP_VLAN) {
710 /* XXX not sure about VLAN_MTU. */
711 ifp->if_capabilities |= IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU;
714 ifp->if_hwassist = sc->hn_tx_ring[0].hn_csum_assist;
715 if (ifp->if_hwassist & HN_CSUM_IP_MASK)
716 ifp->if_capabilities |= IFCAP_TXCSUM;
717 if (ifp->if_hwassist & HN_CSUM_IP6_MASK)
718 ifp->if_capabilities |= IFCAP_TXCSUM_IPV6;
719 if (sc->hn_caps & HN_CAP_TSO4) {
720 ifp->if_capabilities |= IFCAP_TSO4;
721 ifp->if_hwassist |= CSUM_IP_TSO;
723 if (sc->hn_caps & HN_CAP_TSO6) {
724 ifp->if_capabilities |= IFCAP_TSO6;
725 ifp->if_hwassist |= CSUM_IP6_TSO;
728 /* Enable all available capabilities by default. */
729 ifp->if_capenable = ifp->if_capabilities;
731 tso_maxlen = hn_tso_maxlen;
732 if (tso_maxlen <= 0 || tso_maxlen > IP_MAXPACKET)
733 tso_maxlen = IP_MAXPACKET;
734 ifp->if_hw_tsomaxsegcount = HN_TX_DATA_SEGCNT_MAX;
735 ifp->if_hw_tsomaxsegsize = PAGE_SIZE;
736 ifp->if_hw_tsomax = tso_maxlen -
737 (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN);
739 ether_ifattach(ifp, eaddr);
742 if_printf(ifp, "TSO: %u/%u/%u\n", ifp->if_hw_tsomax,
743 ifp->if_hw_tsomaxsegcount, ifp->if_hw_tsomaxsegsize);
746 /* Inform the upper layer about the long frame support. */
747 ifp->if_hdrlen = sizeof(struct ether_vlan_header);
751 /* TODO: reuse netvsc_detach() */
752 hn_destroy_tx_data(sc);
759 * Standard detach entry point
762 netvsc_detach(device_t dev)
764 struct hn_softc *sc = device_get_softc(dev);
767 printf("netvsc_detach\n");
770 * XXXKYS: Need to clean up all our
771 * driver state; this is the driver
776 * XXXKYS: Need to stop outgoing traffic and unregister
780 hv_rf_on_device_remove(sc);
781 hn_detach_allchans(sc);
783 hn_stop_tx_tasks(sc);
785 ifmedia_removeall(&sc->hn_media);
786 hn_destroy_rx_data(sc);
787 hn_destroy_tx_data(sc);
789 if (sc->hn_tx_taskq != hn_tx_taskq)
790 taskqueue_free(sc->hn_tx_taskq);
792 vmbus_xact_ctx_destroy(sc->hn_xact);
798 * Standard shutdown entry point
801 netvsc_shutdown(device_t dev)
807 hn_txdesc_dmamap_load(struct hn_tx_ring *txr, struct hn_txdesc *txd,
808 struct mbuf **m_head, bus_dma_segment_t *segs, int *nsegs)
810 struct mbuf *m = *m_head;
813 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag, txd->data_dmap,
814 m, segs, nsegs, BUS_DMA_NOWAIT);
815 if (error == EFBIG) {
818 m_new = m_collapse(m, M_NOWAIT, HN_TX_DATA_SEGCNT_MAX);
823 txr->hn_tx_collapsed++;
825 error = bus_dmamap_load_mbuf_sg(txr->hn_tx_data_dtag,
826 txd->data_dmap, m, segs, nsegs, BUS_DMA_NOWAIT);
829 bus_dmamap_sync(txr->hn_tx_data_dtag, txd->data_dmap,
830 BUS_DMASYNC_PREWRITE);
831 txd->flags |= HN_TXD_FLAG_DMAMAP;
837 hn_txdesc_dmamap_unload(struct hn_tx_ring *txr, struct hn_txdesc *txd)
840 if (txd->flags & HN_TXD_FLAG_DMAMAP) {
841 bus_dmamap_sync(txr->hn_tx_data_dtag,
842 txd->data_dmap, BUS_DMASYNC_POSTWRITE);
843 bus_dmamap_unload(txr->hn_tx_data_dtag,
845 txd->flags &= ~HN_TXD_FLAG_DMAMAP;
850 hn_txdesc_put(struct hn_tx_ring *txr, struct hn_txdesc *txd)
853 KASSERT((txd->flags & HN_TXD_FLAG_ONLIST) == 0,
854 ("put an onlist txd %#x", txd->flags));
856 KASSERT(txd->refs > 0, ("invalid txd refs %d", txd->refs));
857 if (atomic_fetchadd_int(&txd->refs, -1) != 1)
860 hn_txdesc_dmamap_unload(txr, txd);
861 if (txd->m != NULL) {
866 txd->flags |= HN_TXD_FLAG_ONLIST;
868 #ifndef HN_USE_TXDESC_BUFRING
869 mtx_lock_spin(&txr->hn_txlist_spin);
870 KASSERT(txr->hn_txdesc_avail >= 0 &&
871 txr->hn_txdesc_avail < txr->hn_txdesc_cnt,
872 ("txdesc_put: invalid txd avail %d", txr->hn_txdesc_avail));
873 txr->hn_txdesc_avail++;
874 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
875 mtx_unlock_spin(&txr->hn_txlist_spin);
877 atomic_add_int(&txr->hn_txdesc_avail, 1);
878 buf_ring_enqueue(txr->hn_txdesc_br, txd);
884 static __inline struct hn_txdesc *
885 hn_txdesc_get(struct hn_tx_ring *txr)
887 struct hn_txdesc *txd;
889 #ifndef HN_USE_TXDESC_BUFRING
890 mtx_lock_spin(&txr->hn_txlist_spin);
891 txd = SLIST_FIRST(&txr->hn_txlist);
893 KASSERT(txr->hn_txdesc_avail > 0,
894 ("txdesc_get: invalid txd avail %d", txr->hn_txdesc_avail));
895 txr->hn_txdesc_avail--;
896 SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
898 mtx_unlock_spin(&txr->hn_txlist_spin);
900 txd = buf_ring_dequeue_sc(txr->hn_txdesc_br);
904 #ifdef HN_USE_TXDESC_BUFRING
905 atomic_subtract_int(&txr->hn_txdesc_avail, 1);
907 KASSERT(txd->m == NULL && txd->refs == 0 &&
908 (txd->flags & HN_TXD_FLAG_ONLIST), ("invalid txd"));
909 txd->flags &= ~HN_TXD_FLAG_ONLIST;
916 hn_txdesc_hold(struct hn_txdesc *txd)
919 /* 0->1 transition will never work */
920 KASSERT(txd->refs > 0, ("invalid refs %d", txd->refs));
921 atomic_add_int(&txd->refs, 1);
925 hn_txeof(struct hn_tx_ring *txr)
927 txr->hn_has_txeof = 0;
932 hn_tx_done(struct hn_send_ctx *sndc, struct hn_softc *sc,
933 struct vmbus_channel *chan, const void *data __unused, int dlen __unused)
935 struct hn_txdesc *txd = sndc->hn_cbarg;
936 struct hn_tx_ring *txr;
938 if (sndc->hn_chim_idx != HN_NVS_CHIM_IDX_INVALID)
939 hn_chim_free(sc, sndc->hn_chim_idx);
942 KASSERT(txr->hn_chan == chan,
943 ("channel mismatch, on chan%u, should be chan%u",
944 vmbus_chan_subidx(chan), vmbus_chan_subidx(txr->hn_chan)));
946 txr->hn_has_txeof = 1;
947 hn_txdesc_put(txr, txd);
949 ++txr->hn_txdone_cnt;
950 if (txr->hn_txdone_cnt >= HN_EARLY_TXEOF_THRESH) {
951 txr->hn_txdone_cnt = 0;
958 hn_chan_rollup(struct hn_rx_ring *rxr, struct hn_tx_ring *txr)
960 #if defined(INET) || defined(INET6)
961 struct lro_ctrl *lro = &rxr->hn_lro;
962 struct lro_entry *queued;
964 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
965 SLIST_REMOVE_HEAD(&lro->lro_active, next);
966 tcp_lro_flush(lro, queued);
972 * 'txr' could be NULL, if multiple channels and
973 * ifnet.if_start method are enabled.
975 if (txr == NULL || !txr->hn_has_txeof)
978 txr->hn_txdone_cnt = 0;
982 static __inline uint32_t
983 hn_rndis_pktmsg_offset(uint32_t ofs)
986 KASSERT(ofs >= sizeof(struct rndis_packet_msg),
987 ("invalid RNDIS packet msg offset %u", ofs));
988 return (ofs - __offsetof(struct rndis_packet_msg, rm_dataoffset));
993 * If this function fails, then both txd and m_head0 will be freed.
996 hn_encap(struct hn_tx_ring *txr, struct hn_txdesc *txd, struct mbuf **m_head0)
998 bus_dma_segment_t segs[HN_TX_DATA_SEGCNT_MAX];
1000 struct mbuf *m_head = *m_head0;
1001 struct rndis_packet_msg *pkt;
1002 uint32_t send_buf_section_idx;
1003 int send_buf_section_size, pktlen;
1007 * extension points to the area reserved for the
1008 * rndis_filter_packet, which is placed just after
1009 * the netvsc_packet (and rppi struct, if present;
1010 * length is updated later).
1012 pkt = txd->rndis_pkt;
1013 pkt->rm_type = REMOTE_NDIS_PACKET_MSG;
1014 pkt->rm_len = sizeof(*pkt) + m_head->m_pkthdr.len;
1015 pkt->rm_dataoffset = sizeof(*pkt);
1016 pkt->rm_datalen = m_head->m_pkthdr.len;
1017 pkt->rm_pktinfooffset = sizeof(*pkt);
1018 pkt->rm_pktinfolen = 0;
1021 * Set the hash value for this packet, so that the host could
1022 * dispatch the TX done event for this packet back to this TX
1025 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1026 HN_NDIS_HASH_VALUE_SIZE, HN_NDIS_PKTINFO_TYPE_HASHVAL);
1027 *pi_data = txr->hn_tx_idx;
1029 if (m_head->m_flags & M_VLANTAG) {
1030 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1031 NDIS_VLAN_INFO_SIZE, NDIS_PKTINFO_TYPE_VLAN);
1032 *pi_data = NDIS_VLAN_INFO_MAKE(
1033 EVL_VLANOFTAG(m_head->m_pkthdr.ether_vtag),
1034 EVL_PRIOFTAG(m_head->m_pkthdr.ether_vtag),
1035 EVL_CFIOFTAG(m_head->m_pkthdr.ether_vtag));
1038 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
1039 #if defined(INET6) || defined(INET)
1040 struct ether_vlan_header *eh;
1044 * XXX need m_pullup and use mtodo
1046 eh = mtod(m_head, struct ether_vlan_header*);
1047 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN))
1048 ether_len = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1050 ether_len = ETHER_HDR_LEN;
1052 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1053 NDIS_LSO2_INFO_SIZE, NDIS_PKTINFO_TYPE_LSO);
1055 if (m_head->m_pkthdr.csum_flags & CSUM_IP_TSO) {
1057 (struct ip *)(m_head->m_data + ether_len);
1058 unsigned long iph_len = ip->ip_hl << 2;
1060 (struct tcphdr *)((caddr_t)ip + iph_len);
1064 th->th_sum = in_pseudo(ip->ip_src.s_addr,
1065 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
1066 *pi_data = NDIS_LSO2_INFO_MAKEIPV4(0,
1067 m_head->m_pkthdr.tso_segsz);
1070 #if defined(INET6) && defined(INET)
1075 struct ip6_hdr *ip6 = (struct ip6_hdr *)
1076 (m_head->m_data + ether_len);
1077 struct tcphdr *th = (struct tcphdr *)(ip6 + 1);
1080 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
1081 *pi_data = NDIS_LSO2_INFO_MAKEIPV6(0,
1082 m_head->m_pkthdr.tso_segsz);
1085 #endif /* INET6 || INET */
1086 } else if (m_head->m_pkthdr.csum_flags & txr->hn_csum_assist) {
1087 pi_data = hn_rndis_pktinfo_append(pkt, HN_RNDIS_PKT_LEN,
1088 NDIS_TXCSUM_INFO_SIZE, NDIS_PKTINFO_TYPE_CSUM);
1089 if (m_head->m_pkthdr.csum_flags &
1090 (CSUM_IP6_TCP | CSUM_IP6_UDP)) {
1091 *pi_data = NDIS_TXCSUM_INFO_IPV6;
1093 *pi_data = NDIS_TXCSUM_INFO_IPV4;
1094 if (m_head->m_pkthdr.csum_flags & CSUM_IP)
1095 *pi_data |= NDIS_TXCSUM_INFO_IPCS;
1098 if (m_head->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
1099 *pi_data |= NDIS_TXCSUM_INFO_TCPCS;
1100 else if (m_head->m_pkthdr.csum_flags &
1101 (CSUM_IP_UDP | CSUM_IP6_UDP))
1102 *pi_data |= NDIS_TXCSUM_INFO_UDPCS;
1105 pktlen = pkt->rm_pktinfooffset + pkt->rm_pktinfolen;
1106 /* Convert RNDIS packet message offsets */
1107 pkt->rm_dataoffset = hn_rndis_pktmsg_offset(pkt->rm_dataoffset);
1108 pkt->rm_pktinfooffset = hn_rndis_pktmsg_offset(pkt->rm_pktinfooffset);
1111 * Chimney send, if the packet could fit into one chimney buffer.
1113 if (pkt->rm_len < txr->hn_chim_size) {
1114 txr->hn_tx_chimney_tried++;
1115 send_buf_section_idx = hn_chim_alloc(txr->hn_sc);
1116 if (send_buf_section_idx != HN_NVS_CHIM_IDX_INVALID) {
1117 uint8_t *dest = txr->hn_sc->hn_chim +
1118 (send_buf_section_idx * txr->hn_sc->hn_chim_szmax);
1120 memcpy(dest, pkt, pktlen);
1122 m_copydata(m_head, 0, m_head->m_pkthdr.len, dest);
1124 send_buf_section_size = pkt->rm_len;
1125 txr->hn_gpa_cnt = 0;
1126 txr->hn_tx_chimney++;
1131 error = hn_txdesc_dmamap_load(txr, txd, &m_head, segs, &nsegs);
1136 * This mbuf is not linked w/ the txd yet, so free it now.
1141 freed = hn_txdesc_put(txr, txd);
1143 ("fail to free txd upon txdma error"));
1145 txr->hn_txdma_failed++;
1146 if_inc_counter(txr->hn_sc->hn_ifp, IFCOUNTER_OERRORS, 1);
1151 /* +1 RNDIS packet message */
1152 txr->hn_gpa_cnt = nsegs + 1;
1154 /* send packet with page buffer */
1155 txr->hn_gpa[0].gpa_page = atop(txd->rndis_pkt_paddr);
1156 txr->hn_gpa[0].gpa_ofs = txd->rndis_pkt_paddr & PAGE_MASK;
1157 txr->hn_gpa[0].gpa_len = pktlen;
1160 * Fill the page buffers with mbuf info after the page
1161 * buffer for RNDIS packet message.
1163 for (i = 0; i < nsegs; ++i) {
1164 struct vmbus_gpa *gpa = &txr->hn_gpa[i + 1];
1166 gpa->gpa_page = atop(segs[i].ds_addr);
1167 gpa->gpa_ofs = segs[i].ds_addr & PAGE_MASK;
1168 gpa->gpa_len = segs[i].ds_len;
1171 send_buf_section_idx = HN_NVS_CHIM_IDX_INVALID;
1172 send_buf_section_size = 0;
1176 /* Set the completion routine */
1177 hn_send_ctx_init(&txd->send_ctx, hn_tx_done, txd,
1178 send_buf_section_idx, send_buf_section_size);
1185 * If this function fails, then txd will be freed, but the mbuf
1186 * associated w/ the txd will _not_ be freed.
1189 hn_send_pkt(struct ifnet *ifp, struct hn_tx_ring *txr, struct hn_txdesc *txd)
1191 int error, send_failed = 0;
1195 * Make sure that txd is not freed before ETHER_BPF_MTAP.
1197 hn_txdesc_hold(txd);
1198 error = hv_nv_on_send(txr->hn_chan, HN_NVS_RNDIS_MTYPE_DATA,
1199 &txd->send_ctx, txr->hn_gpa, txr->hn_gpa_cnt);
1201 ETHER_BPF_MTAP(ifp, txd->m);
1202 if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
1203 if (!hn_use_if_start) {
1204 if_inc_counter(ifp, IFCOUNTER_OBYTES,
1205 txd->m->m_pkthdr.len);
1206 if (txd->m->m_flags & M_MCAST)
1207 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
1211 hn_txdesc_put(txr, txd);
1213 if (__predict_false(error)) {
1217 * This should "really rarely" happen.
1219 * XXX Too many RX to be acked or too many sideband
1220 * commands to run? Ask netvsc_channel_rollup()
1221 * to kick start later.
1223 txr->hn_has_txeof = 1;
1225 txr->hn_send_failed++;
1228 * Try sending again after set hn_has_txeof;
1229 * in case that we missed the last
1230 * netvsc_channel_rollup().
1234 if_printf(ifp, "send failed\n");
1237 * Caller will perform further processing on the
1238 * associated mbuf, so don't free it in hn_txdesc_put();
1239 * only unload it from the DMA map in hn_txdesc_put(),
1243 freed = hn_txdesc_put(txr, txd);
1245 ("fail to free txd upon send error"));
1247 txr->hn_send_failed++;
1253 * Start a transmit of one or more packets
1256 hn_start_locked(struct hn_tx_ring *txr, int len)
1258 struct hn_softc *sc = txr->hn_sc;
1259 struct ifnet *ifp = sc->hn_ifp;
1261 KASSERT(hn_use_if_start,
1262 ("hn_start_locked is called, when if_start is disabled"));
1263 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
1264 mtx_assert(&txr->hn_tx_lock, MA_OWNED);
1266 if ((ifp->if_drv_flags & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
1270 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
1271 struct hn_txdesc *txd;
1272 struct mbuf *m_head;
1275 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
1279 if (len > 0 && m_head->m_pkthdr.len > len) {
1281 * This sending could be time consuming; let callers
1282 * dispatch this packet sending (and sending of any
1283 * following up packets) to tx taskqueue.
1285 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1289 txd = hn_txdesc_get(txr);
1291 txr->hn_no_txdescs++;
1292 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1293 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
1297 error = hn_encap(txr, txd, &m_head);
1299 /* Both txd and m_head are freed */
1303 error = hn_send_pkt(ifp, txr, txd);
1304 if (__predict_false(error)) {
1305 /* txd is freed, but m_head is not */
1306 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
1307 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
1315 * Link up/down notification
1318 netvsc_linkstatus_callback(struct hn_softc *sc, uint32_t status)
1328 * Append the specified data to the indicated mbuf chain,
1329 * Extend the mbuf chain if the new data does not fit in
1332 * This is a minor rewrite of m_append() from sys/kern/uipc_mbuf.c.
1333 * There should be an equivalent in the kernel mbuf code,
1334 * but there does not appear to be one yet.
1336 * Differs from m_append() in that additional mbufs are
1337 * allocated with cluster size MJUMPAGESIZE, and filled
1340 * Return 1 if able to complete the job; otherwise 0.
1343 hv_m_append(struct mbuf *m0, int len, c_caddr_t cp)
1346 int remainder, space;
1348 for (m = m0; m->m_next != NULL; m = m->m_next)
1351 space = M_TRAILINGSPACE(m);
1354 * Copy into available space.
1356 if (space > remainder)
1358 bcopy(cp, mtod(m, caddr_t) + m->m_len, space);
1363 while (remainder > 0) {
1365 * Allocate a new mbuf; could check space
1366 * and allocate a cluster instead.
1368 n = m_getjcl(M_DONTWAIT, m->m_type, 0, MJUMPAGESIZE);
1371 n->m_len = min(MJUMPAGESIZE, remainder);
1372 bcopy(cp, mtod(n, caddr_t), n->m_len);
1374 remainder -= n->m_len;
1378 if (m0->m_flags & M_PKTHDR)
1379 m0->m_pkthdr.len += len - remainder;
1381 return (remainder == 0);
1384 #if defined(INET) || defined(INET6)
1386 hn_lro_rx(struct lro_ctrl *lc, struct mbuf *m)
1388 #if __FreeBSD_version >= 1100095
1389 if (hn_lro_mbufq_depth) {
1390 tcp_lro_queue_mbuf(lc, m);
1394 return tcp_lro_rx(lc, m, 0);
1399 * Called when we receive a data packet from the "wire" on the
1402 * Note: This is no longer used as a callback
1405 hn_rxpkt(struct hn_rx_ring *rxr, const void *data, int dlen,
1406 const struct hn_recvinfo *info)
1408 struct ifnet *ifp = rxr->hn_ifp;
1410 int size, do_lro = 0, do_csum = 1;
1411 int hash_type = M_HASHTYPE_OPAQUE;
1413 if (!(ifp->if_drv_flags & IFF_DRV_RUNNING))
1417 * Bail out if packet contains more data than configured MTU.
1419 if (dlen > (ifp->if_mtu + ETHER_HDR_LEN)) {
1421 } else if (dlen <= MHLEN) {
1422 m_new = m_gethdr(M_NOWAIT, MT_DATA);
1423 if (m_new == NULL) {
1424 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
1427 memcpy(mtod(m_new, void *), data, dlen);
1428 m_new->m_pkthdr.len = m_new->m_len = dlen;
1429 rxr->hn_small_pkts++;
1432 * Get an mbuf with a cluster. For packets 2K or less,
1433 * get a standard 2K cluster. For anything larger, get a
1434 * 4K cluster. Any buffers larger than 4K can cause problems
1435 * if looped around to the Hyper-V TX channel, so avoid them.
1438 if (dlen > MCLBYTES) {
1440 size = MJUMPAGESIZE;
1443 m_new = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, size);
1444 if (m_new == NULL) {
1445 if_inc_counter(ifp, IFCOUNTER_IQDROPS, 1);
1449 hv_m_append(m_new, dlen, data);
1451 m_new->m_pkthdr.rcvif = ifp;
1453 if (__predict_false((ifp->if_capenable & IFCAP_RXCSUM) == 0))
1456 /* receive side checksum offload */
1457 if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) {
1458 /* IP csum offload */
1459 if ((info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) && do_csum) {
1460 m_new->m_pkthdr.csum_flags |=
1461 (CSUM_IP_CHECKED | CSUM_IP_VALID);
1465 /* TCP/UDP csum offload */
1466 if ((info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK |
1467 NDIS_RXCSUM_INFO_TCPCS_OK)) && do_csum) {
1468 m_new->m_pkthdr.csum_flags |=
1469 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1470 m_new->m_pkthdr.csum_data = 0xffff;
1471 if (info->csum_info & NDIS_RXCSUM_INFO_TCPCS_OK)
1477 if ((info->csum_info &
1478 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK)) ==
1479 (NDIS_RXCSUM_INFO_TCPCS_OK | NDIS_RXCSUM_INFO_IPCS_OK))
1482 const struct ether_header *eh;
1487 if (m_new->m_len < hoff)
1489 eh = mtod(m_new, struct ether_header *);
1490 etype = ntohs(eh->ether_type);
1491 if (etype == ETHERTYPE_VLAN) {
1492 const struct ether_vlan_header *evl;
1494 hoff = sizeof(*evl);
1495 if (m_new->m_len < hoff)
1497 evl = mtod(m_new, struct ether_vlan_header *);
1498 etype = ntohs(evl->evl_proto);
1501 if (etype == ETHERTYPE_IP) {
1504 pr = hn_check_iplen(m_new, hoff);
1505 if (pr == IPPROTO_TCP) {
1507 (rxr->hn_trust_hcsum &
1508 HN_TRUST_HCSUM_TCP)) {
1509 rxr->hn_csum_trusted++;
1510 m_new->m_pkthdr.csum_flags |=
1511 (CSUM_IP_CHECKED | CSUM_IP_VALID |
1512 CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1513 m_new->m_pkthdr.csum_data = 0xffff;
1516 } else if (pr == IPPROTO_UDP) {
1518 (rxr->hn_trust_hcsum &
1519 HN_TRUST_HCSUM_UDP)) {
1520 rxr->hn_csum_trusted++;
1521 m_new->m_pkthdr.csum_flags |=
1522 (CSUM_IP_CHECKED | CSUM_IP_VALID |
1523 CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1524 m_new->m_pkthdr.csum_data = 0xffff;
1526 } else if (pr != IPPROTO_DONE && do_csum &&
1527 (rxr->hn_trust_hcsum & HN_TRUST_HCSUM_IP)) {
1528 rxr->hn_csum_trusted++;
1529 m_new->m_pkthdr.csum_flags |=
1530 (CSUM_IP_CHECKED | CSUM_IP_VALID);
1535 if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) {
1536 m_new->m_pkthdr.ether_vtag = EVL_MAKETAG(
1537 NDIS_VLAN_INFO_ID(info->vlan_info),
1538 NDIS_VLAN_INFO_PRI(info->vlan_info),
1539 NDIS_VLAN_INFO_CFI(info->vlan_info));
1540 m_new->m_flags |= M_VLANTAG;
1543 if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) {
1545 m_new->m_pkthdr.flowid = info->hash_value;
1546 if ((info->hash_info & NDIS_HASH_FUNCTION_MASK) ==
1547 NDIS_HASH_FUNCTION_TOEPLITZ) {
1548 uint32_t type = (info->hash_info & NDIS_HASH_TYPE_MASK);
1551 case NDIS_HASH_IPV4:
1552 hash_type = M_HASHTYPE_RSS_IPV4;
1555 case NDIS_HASH_TCP_IPV4:
1556 hash_type = M_HASHTYPE_RSS_TCP_IPV4;
1559 case NDIS_HASH_IPV6:
1560 hash_type = M_HASHTYPE_RSS_IPV6;
1563 case NDIS_HASH_IPV6_EX:
1564 hash_type = M_HASHTYPE_RSS_IPV6_EX;
1567 case NDIS_HASH_TCP_IPV6:
1568 hash_type = M_HASHTYPE_RSS_TCP_IPV6;
1571 case NDIS_HASH_TCP_IPV6_EX:
1572 hash_type = M_HASHTYPE_RSS_TCP_IPV6_EX;
1577 m_new->m_pkthdr.flowid = rxr->hn_rx_idx;
1579 M_HASHTYPE_SET(m_new, hash_type);
1582 * Note: Moved RX completion back to hv_nv_on_receive() so all
1583 * messages (not just data messages) will trigger a response.
1589 if ((ifp->if_capenable & IFCAP_LRO) && do_lro) {
1590 #if defined(INET) || defined(INET6)
1591 struct lro_ctrl *lro = &rxr->hn_lro;
1594 rxr->hn_lro_tried++;
1595 if (hn_lro_rx(lro, m_new) == 0) {
1603 /* We're not holding the lock here, so don't release it */
1604 (*ifp->if_input)(ifp, m_new);
1610 hn_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
1612 struct hn_softc *sc = ifp->if_softc;
1613 struct ifreq *ifr = (struct ifreq *)data;
1614 int mask, error = 0;
1618 if (ifr->ifr_mtu > NETVSC_MAX_CONFIGURABLE_MTU) {
1625 if ((sc->hn_caps & HN_CAP_MTU) == 0) {
1626 /* Can't change MTU */
1632 if (ifp->if_mtu == ifr->ifr_mtu) {
1637 /* Obtain and record requested MTU */
1638 ifp->if_mtu = ifr->ifr_mtu;
1640 #if __FreeBSD_version >= 1100099
1642 * Make sure that LRO aggregation length limit is still
1643 * valid, after the MTU change.
1645 if (sc->hn_rx_ring[0].hn_lro.lro_length_lim <
1646 HN_LRO_LENLIM_MIN(ifp))
1647 hn_set_lro_lenlim(sc, HN_LRO_LENLIM_MIN(ifp));
1650 /* We must remove and add back the device to cause the new
1651 * MTU to take effect. This includes tearing down, but not
1652 * deleting the channel, then bringing it back up.
1654 error = hv_rf_on_device_remove(sc);
1661 * Detach all of the channels.
1663 hn_detach_allchans(sc);
1666 * Attach the synthetic parts, i.e. NVS and RNDIS.
1669 hn_synth_attach(sc, ifr->ifr_mtu);
1671 if (sc->hn_tx_ring[0].hn_chim_size > sc->hn_chim_szmax)
1672 hn_set_chim_size(sc, sc->hn_chim_szmax);
1682 if (ifp->if_flags & IFF_UP) {
1684 * If only the state of the PROMISC flag changed,
1685 * then just use the 'set promisc mode' command
1686 * instead of reinitializing the entire NIC. Doing
1687 * a full re-init means reloading the firmware and
1688 * waiting for it to start up, which may take a
1692 /* Fixme: Promiscuous mode? */
1693 if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1694 ifp->if_flags & IFF_PROMISC &&
1695 !(sc->hn_if_flags & IFF_PROMISC)) {
1696 /* do something here for Hyper-V */
1697 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING &&
1698 !(ifp->if_flags & IFF_PROMISC) &&
1699 sc->hn_if_flags & IFF_PROMISC) {
1700 /* do something here for Hyper-V */
1705 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1709 sc->hn_if_flags = ifp->if_flags;
1716 mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1718 if (mask & IFCAP_TXCSUM) {
1719 ifp->if_capenable ^= IFCAP_TXCSUM;
1720 if (ifp->if_capenable & IFCAP_TXCSUM)
1721 ifp->if_hwassist |= HN_CSUM_IP_HWASSIST(sc);
1723 ifp->if_hwassist &= ~HN_CSUM_IP_HWASSIST(sc);
1725 if (mask & IFCAP_TXCSUM_IPV6) {
1726 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1727 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6)
1728 ifp->if_hwassist |= HN_CSUM_IP6_HWASSIST(sc);
1730 ifp->if_hwassist &= ~HN_CSUM_IP6_HWASSIST(sc);
1733 /* TODO: flip RNDIS offload parameters for RXCSUM. */
1734 if (mask & IFCAP_RXCSUM)
1735 ifp->if_capenable ^= IFCAP_RXCSUM;
1737 /* We can't diff IPv6 packets from IPv4 packets on RX path. */
1738 if (mask & IFCAP_RXCSUM_IPV6)
1739 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1742 if (mask & IFCAP_LRO)
1743 ifp->if_capenable ^= IFCAP_LRO;
1745 if (mask & IFCAP_TSO4) {
1746 ifp->if_capenable ^= IFCAP_TSO4;
1747 if (ifp->if_capenable & IFCAP_TSO4)
1748 ifp->if_hwassist |= CSUM_IP_TSO;
1750 ifp->if_hwassist &= ~CSUM_IP_TSO;
1752 if (mask & IFCAP_TSO6) {
1753 ifp->if_capenable ^= IFCAP_TSO6;
1754 if (ifp->if_capenable & IFCAP_TSO6)
1755 ifp->if_hwassist |= CSUM_IP6_TSO;
1757 ifp->if_hwassist &= ~CSUM_IP6_TSO;
1765 /* Always all-multi */
1768 * Enable/disable all-multi according to the emptiness of
1769 * the mcast address list.
1775 error = ifmedia_ioctl(ifp, ifr, &sc->hn_media, cmd);
1779 error = ether_ioctl(ifp, cmd, data);
1786 hn_stop(struct hn_softc *sc)
1796 printf(" Closing Device ...\n");
1798 atomic_clear_int(&ifp->if_drv_flags,
1799 (IFF_DRV_RUNNING | IFF_DRV_OACTIVE));
1800 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
1801 sc->hn_tx_ring[i].hn_oactive = 0;
1803 if_link_state_change(ifp, LINK_STATE_DOWN);
1805 ret = hv_rf_on_close(sc);
1809 * FreeBSD transmit entry point
1812 hn_start(struct ifnet *ifp)
1814 struct hn_softc *sc = ifp->if_softc;
1815 struct hn_tx_ring *txr = &sc->hn_tx_ring[0];
1817 if (txr->hn_sched_tx)
1820 if (mtx_trylock(&txr->hn_tx_lock)) {
1823 sched = hn_start_locked(txr, txr->hn_direct_tx_size);
1824 mtx_unlock(&txr->hn_tx_lock);
1829 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
1833 hn_start_txeof(struct hn_tx_ring *txr)
1835 struct hn_softc *sc = txr->hn_sc;
1836 struct ifnet *ifp = sc->hn_ifp;
1838 KASSERT(txr == &sc->hn_tx_ring[0], ("not the first TX ring"));
1840 if (txr->hn_sched_tx)
1843 if (mtx_trylock(&txr->hn_tx_lock)) {
1846 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
1847 sched = hn_start_locked(txr, txr->hn_direct_tx_size);
1848 mtx_unlock(&txr->hn_tx_lock);
1850 taskqueue_enqueue(txr->hn_tx_taskq,
1856 * Release the OACTIVE earlier, with the hope, that
1857 * others could catch up. The task will clear the
1858 * flag again with the hn_tx_lock to avoid possible
1861 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
1862 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
1867 hn_init_locked(struct hn_softc *sc)
1876 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1880 hv_promisc_mode = 1;
1882 ret = hv_rf_on_open(sc);
1886 atomic_clear_int(&ifp->if_drv_flags, IFF_DRV_OACTIVE);
1887 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
1888 sc->hn_tx_ring[i].hn_oactive = 0;
1890 atomic_set_int(&ifp->if_drv_flags, IFF_DRV_RUNNING);
1891 if_link_state_change(ifp, LINK_STATE_UP);
1897 struct hn_softc *sc = xsc;
1909 hn_watchdog(struct ifnet *ifp)
1912 if_printf(ifp, "watchdog timeout -- resetting\n");
1913 hn_init(ifp->if_softc); /* XXX */
1918 #if __FreeBSD_version >= 1100099
1921 hn_lro_lenlim_sysctl(SYSCTL_HANDLER_ARGS)
1923 struct hn_softc *sc = arg1;
1924 unsigned int lenlim;
1927 lenlim = sc->hn_rx_ring[0].hn_lro.lro_length_lim;
1928 error = sysctl_handle_int(oidp, &lenlim, 0, req);
1929 if (error || req->newptr == NULL)
1933 if (lenlim < HN_LRO_LENLIM_MIN(sc->hn_ifp) ||
1934 lenlim > TCP_LRO_LENGTH_MAX) {
1938 hn_set_lro_lenlim(sc, lenlim);
1945 hn_lro_ackcnt_sysctl(SYSCTL_HANDLER_ARGS)
1947 struct hn_softc *sc = arg1;
1948 int ackcnt, error, i;
1951 * lro_ackcnt_lim is append count limit,
1952 * +1 to turn it into aggregation limit.
1954 ackcnt = sc->hn_rx_ring[0].hn_lro.lro_ackcnt_lim + 1;
1955 error = sysctl_handle_int(oidp, &ackcnt, 0, req);
1956 if (error || req->newptr == NULL)
1959 if (ackcnt < 2 || ackcnt > (TCP_LRO_ACKCNT_MAX + 1))
1963 * Convert aggregation limit back to append
1968 for (i = 0; i < sc->hn_rx_ring_inuse; ++i)
1969 sc->hn_rx_ring[i].hn_lro.lro_ackcnt_lim = ackcnt;
1977 hn_trust_hcsum_sysctl(SYSCTL_HANDLER_ARGS)
1979 struct hn_softc *sc = arg1;
1984 if (sc->hn_rx_ring[0].hn_trust_hcsum & hcsum)
1987 error = sysctl_handle_int(oidp, &on, 0, req);
1988 if (error || req->newptr == NULL)
1992 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
1993 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
1996 rxr->hn_trust_hcsum |= hcsum;
1998 rxr->hn_trust_hcsum &= ~hcsum;
2005 hn_chim_size_sysctl(SYSCTL_HANDLER_ARGS)
2007 struct hn_softc *sc = arg1;
2008 int chim_size, error;
2010 chim_size = sc->hn_tx_ring[0].hn_chim_size;
2011 error = sysctl_handle_int(oidp, &chim_size, 0, req);
2012 if (error || req->newptr == NULL)
2015 if (chim_size > sc->hn_chim_szmax || chim_size <= 0)
2019 hn_set_chim_size(sc, chim_size);
2024 #if __FreeBSD_version < 1100095
2026 hn_rx_stat_int_sysctl(SYSCTL_HANDLER_ARGS)
2028 struct hn_softc *sc = arg1;
2029 int ofs = arg2, i, error;
2030 struct hn_rx_ring *rxr;
2034 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2035 rxr = &sc->hn_rx_ring[i];
2036 stat += *((int *)((uint8_t *)rxr + ofs));
2039 error = sysctl_handle_64(oidp, &stat, 0, req);
2040 if (error || req->newptr == NULL)
2043 /* Zero out this stat. */
2044 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2045 rxr = &sc->hn_rx_ring[i];
2046 *((int *)((uint8_t *)rxr + ofs)) = 0;
2052 hn_rx_stat_u64_sysctl(SYSCTL_HANDLER_ARGS)
2054 struct hn_softc *sc = arg1;
2055 int ofs = arg2, i, error;
2056 struct hn_rx_ring *rxr;
2060 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2061 rxr = &sc->hn_rx_ring[i];
2062 stat += *((uint64_t *)((uint8_t *)rxr + ofs));
2065 error = sysctl_handle_64(oidp, &stat, 0, req);
2066 if (error || req->newptr == NULL)
2069 /* Zero out this stat. */
2070 for (i = 0; i < sc->hn_rx_ring_inuse; ++i) {
2071 rxr = &sc->hn_rx_ring[i];
2072 *((uint64_t *)((uint8_t *)rxr + ofs)) = 0;
2080 hn_rx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
2082 struct hn_softc *sc = arg1;
2083 int ofs = arg2, i, error;
2084 struct hn_rx_ring *rxr;
2088 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2089 rxr = &sc->hn_rx_ring[i];
2090 stat += *((u_long *)((uint8_t *)rxr + ofs));
2093 error = sysctl_handle_long(oidp, &stat, 0, req);
2094 if (error || req->newptr == NULL)
2097 /* Zero out this stat. */
2098 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2099 rxr = &sc->hn_rx_ring[i];
2100 *((u_long *)((uint8_t *)rxr + ofs)) = 0;
2106 hn_tx_stat_ulong_sysctl(SYSCTL_HANDLER_ARGS)
2108 struct hn_softc *sc = arg1;
2109 int ofs = arg2, i, error;
2110 struct hn_tx_ring *txr;
2114 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
2115 txr = &sc->hn_tx_ring[i];
2116 stat += *((u_long *)((uint8_t *)txr + ofs));
2119 error = sysctl_handle_long(oidp, &stat, 0, req);
2120 if (error || req->newptr == NULL)
2123 /* Zero out this stat. */
2124 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
2125 txr = &sc->hn_tx_ring[i];
2126 *((u_long *)((uint8_t *)txr + ofs)) = 0;
2132 hn_tx_conf_int_sysctl(SYSCTL_HANDLER_ARGS)
2134 struct hn_softc *sc = arg1;
2135 int ofs = arg2, i, error, conf;
2136 struct hn_tx_ring *txr;
2138 txr = &sc->hn_tx_ring[0];
2139 conf = *((int *)((uint8_t *)txr + ofs));
2141 error = sysctl_handle_int(oidp, &conf, 0, req);
2142 if (error || req->newptr == NULL)
2146 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
2147 txr = &sc->hn_tx_ring[i];
2148 *((int *)((uint8_t *)txr + ofs)) = conf;
2156 hn_ndis_version_sysctl(SYSCTL_HANDLER_ARGS)
2158 struct hn_softc *sc = arg1;
2161 snprintf(verstr, sizeof(verstr), "%u.%u",
2162 HN_NDIS_VERSION_MAJOR(sc->hn_ndis_ver),
2163 HN_NDIS_VERSION_MINOR(sc->hn_ndis_ver));
2164 return sysctl_handle_string(oidp, verstr, sizeof(verstr), req);
2168 hn_caps_sysctl(SYSCTL_HANDLER_ARGS)
2170 struct hn_softc *sc = arg1;
2177 snprintf(caps_str, sizeof(caps_str), "%b", caps,
2188 return sysctl_handle_string(oidp, caps_str, sizeof(caps_str), req);
2192 hn_hwassist_sysctl(SYSCTL_HANDLER_ARGS)
2194 struct hn_softc *sc = arg1;
2195 char assist_str[128];
2199 hwassist = sc->hn_ifp->if_hwassist;
2201 snprintf(assist_str, sizeof(assist_str), "%b", hwassist, CSUM_BITS);
2202 return sysctl_handle_string(oidp, assist_str, sizeof(assist_str), req);
2206 hn_rss_key_sysctl(SYSCTL_HANDLER_ARGS)
2208 struct hn_softc *sc = arg1;
2213 error = SYSCTL_OUT(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
2214 if (error || req->newptr == NULL)
2217 error = SYSCTL_IN(req, sc->hn_rss.rss_key, sizeof(sc->hn_rss.rss_key));
2220 sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
2222 if (sc->hn_rx_ring_inuse > 1) {
2223 error = hn_rss_reconfig(sc);
2225 /* Not RSS capable, at least for now; just save the RSS key. */
2234 hn_rss_ind_sysctl(SYSCTL_HANDLER_ARGS)
2236 struct hn_softc *sc = arg1;
2241 error = SYSCTL_OUT(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
2242 if (error || req->newptr == NULL)
2246 * Don't allow RSS indirect table change, if this interface is not
2247 * RSS capable currently.
2249 if (sc->hn_rx_ring_inuse == 1) {
2254 error = SYSCTL_IN(req, sc->hn_rss.rss_ind, sizeof(sc->hn_rss.rss_ind));
2257 sc->hn_flags |= HN_FLAG_HAS_RSSIND;
2259 hn_rss_ind_fixup(sc, sc->hn_rx_ring_inuse);
2260 error = hn_rss_reconfig(sc);
2267 hn_check_iplen(const struct mbuf *m, int hoff)
2269 const struct ip *ip;
2270 int len, iphlen, iplen;
2271 const struct tcphdr *th;
2272 int thoff; /* TCP data offset */
2274 len = hoff + sizeof(struct ip);
2276 /* The packet must be at least the size of an IP header. */
2277 if (m->m_pkthdr.len < len)
2278 return IPPROTO_DONE;
2280 /* The fixed IP header must reside completely in the first mbuf. */
2282 return IPPROTO_DONE;
2284 ip = mtodo(m, hoff);
2286 /* Bound check the packet's stated IP header length. */
2287 iphlen = ip->ip_hl << 2;
2288 if (iphlen < sizeof(struct ip)) /* minimum header length */
2289 return IPPROTO_DONE;
2291 /* The full IP header must reside completely in the one mbuf. */
2292 if (m->m_len < hoff + iphlen)
2293 return IPPROTO_DONE;
2295 iplen = ntohs(ip->ip_len);
2298 * Check that the amount of data in the buffers is as
2299 * at least much as the IP header would have us expect.
2301 if (m->m_pkthdr.len < hoff + iplen)
2302 return IPPROTO_DONE;
2305 * Ignore IP fragments.
2307 if (ntohs(ip->ip_off) & (IP_OFFMASK | IP_MF))
2308 return IPPROTO_DONE;
2311 * The TCP/IP or UDP/IP header must be entirely contained within
2312 * the first fragment of a packet.
2316 if (iplen < iphlen + sizeof(struct tcphdr))
2317 return IPPROTO_DONE;
2318 if (m->m_len < hoff + iphlen + sizeof(struct tcphdr))
2319 return IPPROTO_DONE;
2320 th = (const struct tcphdr *)((const uint8_t *)ip + iphlen);
2321 thoff = th->th_off << 2;
2322 if (thoff < sizeof(struct tcphdr) || thoff + iphlen > iplen)
2323 return IPPROTO_DONE;
2324 if (m->m_len < hoff + iphlen + thoff)
2325 return IPPROTO_DONE;
2328 if (iplen < iphlen + sizeof(struct udphdr))
2329 return IPPROTO_DONE;
2330 if (m->m_len < hoff + iphlen + sizeof(struct udphdr))
2331 return IPPROTO_DONE;
2335 return IPPROTO_DONE;
2342 hn_create_rx_data(struct hn_softc *sc, int ring_cnt)
2344 struct sysctl_oid_list *child;
2345 struct sysctl_ctx_list *ctx;
2346 device_t dev = sc->hn_dev;
2347 #if defined(INET) || defined(INET6)
2348 #if __FreeBSD_version >= 1100095
2355 * Create RXBUF for reception.
2358 * - It is shared by all channels.
2359 * - A large enough buffer is allocated, certain version of NVSes
2360 * may further limit the usable space.
2362 sc->hn_rxbuf = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
2363 PAGE_SIZE, 0, NETVSC_RECEIVE_BUFFER_SIZE, &sc->hn_rxbuf_dma,
2364 BUS_DMA_WAITOK | BUS_DMA_ZERO);
2365 if (sc->hn_rxbuf == NULL) {
2366 device_printf(sc->hn_dev, "allocate rxbuf failed\n");
2370 sc->hn_rx_ring_cnt = ring_cnt;
2371 sc->hn_rx_ring_inuse = sc->hn_rx_ring_cnt;
2373 sc->hn_rx_ring = malloc(sizeof(struct hn_rx_ring) * sc->hn_rx_ring_cnt,
2374 M_NETVSC, M_WAITOK | M_ZERO);
2376 #if defined(INET) || defined(INET6)
2377 #if __FreeBSD_version >= 1100095
2378 lroent_cnt = hn_lro_entry_count;
2379 if (lroent_cnt < TCP_LRO_ENTRIES)
2380 lroent_cnt = TCP_LRO_ENTRIES;
2382 device_printf(dev, "LRO: entry count %d\n", lroent_cnt);
2384 #endif /* INET || INET6 */
2386 ctx = device_get_sysctl_ctx(dev);
2387 child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev));
2389 /* Create dev.hn.UNIT.rx sysctl tree */
2390 sc->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "rx",
2391 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
2393 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2394 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
2396 rxr->hn_br = hyperv_dmamem_alloc(bus_get_dma_tag(dev),
2398 NETVSC_DEVICE_RING_BUFFER_SIZE +
2399 NETVSC_DEVICE_RING_BUFFER_SIZE,
2400 &rxr->hn_br_dma, BUS_DMA_WAITOK);
2401 if (rxr->hn_br == NULL) {
2402 device_printf(dev, "allocate bufring failed\n");
2406 if (hn_trust_hosttcp)
2407 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_TCP;
2408 if (hn_trust_hostudp)
2409 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_UDP;
2410 if (hn_trust_hostip)
2411 rxr->hn_trust_hcsum |= HN_TRUST_HCSUM_IP;
2412 rxr->hn_ifp = sc->hn_ifp;
2413 if (i < sc->hn_tx_ring_cnt)
2414 rxr->hn_txr = &sc->hn_tx_ring[i];
2415 rxr->hn_rdbuf = malloc(NETVSC_PACKET_SIZE, M_NETVSC, M_WAITOK);
2417 rxr->hn_rxbuf = sc->hn_rxbuf;
2422 #if defined(INET) || defined(INET6)
2423 #if __FreeBSD_version >= 1100095
2424 tcp_lro_init_args(&rxr->hn_lro, sc->hn_ifp, lroent_cnt,
2425 hn_lro_mbufq_depth);
2427 tcp_lro_init(&rxr->hn_lro);
2428 rxr->hn_lro.ifp = sc->hn_ifp;
2430 #if __FreeBSD_version >= 1100099
2431 rxr->hn_lro.lro_length_lim = HN_LRO_LENLIM_DEF;
2432 rxr->hn_lro.lro_ackcnt_lim = HN_LRO_ACKCNT_DEF;
2434 #endif /* INET || INET6 */
2436 if (sc->hn_rx_sysctl_tree != NULL) {
2440 * Create per RX ring sysctl tree:
2441 * dev.hn.UNIT.rx.RINGID
2443 snprintf(name, sizeof(name), "%d", i);
2444 rxr->hn_rx_sysctl_tree = SYSCTL_ADD_NODE(ctx,
2445 SYSCTL_CHILDREN(sc->hn_rx_sysctl_tree),
2446 OID_AUTO, name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
2448 if (rxr->hn_rx_sysctl_tree != NULL) {
2449 SYSCTL_ADD_ULONG(ctx,
2450 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
2451 OID_AUTO, "packets", CTLFLAG_RW,
2452 &rxr->hn_pkts, "# of packets received");
2453 SYSCTL_ADD_ULONG(ctx,
2454 SYSCTL_CHILDREN(rxr->hn_rx_sysctl_tree),
2455 OID_AUTO, "rss_pkts", CTLFLAG_RW,
2457 "# of packets w/ RSS info received");
2462 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_queued",
2463 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2464 __offsetof(struct hn_rx_ring, hn_lro.lro_queued),
2465 #if __FreeBSD_version < 1100095
2466 hn_rx_stat_int_sysctl,
2468 hn_rx_stat_u64_sysctl,
2470 "LU", "LRO queued");
2471 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_flushed",
2472 CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2473 __offsetof(struct hn_rx_ring, hn_lro.lro_flushed),
2474 #if __FreeBSD_version < 1100095
2475 hn_rx_stat_int_sysctl,
2477 hn_rx_stat_u64_sysctl,
2479 "LU", "LRO flushed");
2480 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_tried",
2481 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2482 __offsetof(struct hn_rx_ring, hn_lro_tried),
2483 hn_rx_stat_ulong_sysctl, "LU", "# of LRO tries");
2484 #if __FreeBSD_version >= 1100099
2485 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_length_lim",
2486 CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
2487 hn_lro_lenlim_sysctl, "IU",
2488 "Max # of data bytes to be aggregated by LRO");
2489 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "lro_ackcnt_lim",
2490 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
2491 hn_lro_ackcnt_sysctl, "I",
2492 "Max # of ACKs to be aggregated by LRO");
2494 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hosttcp",
2495 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_TCP,
2496 hn_trust_hcsum_sysctl, "I",
2497 "Trust tcp segement verification on host side, "
2498 "when csum info is missing");
2499 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostudp",
2500 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_UDP,
2501 hn_trust_hcsum_sysctl, "I",
2502 "Trust udp datagram verification on host side, "
2503 "when csum info is missing");
2504 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "trust_hostip",
2505 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, HN_TRUST_HCSUM_IP,
2506 hn_trust_hcsum_sysctl, "I",
2507 "Trust ip packet verification on host side, "
2508 "when csum info is missing");
2509 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_ip",
2510 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2511 __offsetof(struct hn_rx_ring, hn_csum_ip),
2512 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM IP");
2513 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_tcp",
2514 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2515 __offsetof(struct hn_rx_ring, hn_csum_tcp),
2516 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM TCP");
2517 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_udp",
2518 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2519 __offsetof(struct hn_rx_ring, hn_csum_udp),
2520 hn_rx_stat_ulong_sysctl, "LU", "RXCSUM UDP");
2521 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "csum_trusted",
2522 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2523 __offsetof(struct hn_rx_ring, hn_csum_trusted),
2524 hn_rx_stat_ulong_sysctl, "LU",
2525 "# of packets that we trust host's csum verification");
2526 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "small_pkts",
2527 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2528 __offsetof(struct hn_rx_ring, hn_small_pkts),
2529 hn_rx_stat_ulong_sysctl, "LU", "# of small packets received");
2530 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_cnt",
2531 CTLFLAG_RD, &sc->hn_rx_ring_cnt, 0, "# created RX rings");
2532 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "rx_ring_inuse",
2533 CTLFLAG_RD, &sc->hn_rx_ring_inuse, 0, "# used RX rings");
2539 hn_destroy_rx_data(struct hn_softc *sc)
2543 if (sc->hn_rxbuf != NULL) {
2544 hyperv_dmamem_free(&sc->hn_rxbuf_dma, sc->hn_rxbuf);
2545 sc->hn_rxbuf = NULL;
2548 if (sc->hn_rx_ring_cnt == 0)
2551 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
2552 struct hn_rx_ring *rxr = &sc->hn_rx_ring[i];
2554 if (rxr->hn_br == NULL)
2556 hyperv_dmamem_free(&rxr->hn_br_dma, rxr->hn_br);
2559 #if defined(INET) || defined(INET6)
2560 tcp_lro_free(&rxr->hn_lro);
2562 free(rxr->hn_rdbuf, M_NETVSC);
2564 free(sc->hn_rx_ring, M_NETVSC);
2565 sc->hn_rx_ring = NULL;
2567 sc->hn_rx_ring_cnt = 0;
2568 sc->hn_rx_ring_inuse = 0;
2572 hn_create_tx_ring(struct hn_softc *sc, int id)
2574 struct hn_tx_ring *txr = &sc->hn_tx_ring[id];
2575 device_t dev = sc->hn_dev;
2576 bus_dma_tag_t parent_dtag;
2580 txr->hn_tx_idx = id;
2582 #ifndef HN_USE_TXDESC_BUFRING
2583 mtx_init(&txr->hn_txlist_spin, "hn txlist", NULL, MTX_SPIN);
2585 mtx_init(&txr->hn_tx_lock, "hn tx", NULL, MTX_DEF);
2587 txr->hn_txdesc_cnt = HN_TX_DESC_CNT;
2588 txr->hn_txdesc = malloc(sizeof(struct hn_txdesc) * txr->hn_txdesc_cnt,
2589 M_NETVSC, M_WAITOK | M_ZERO);
2590 #ifndef HN_USE_TXDESC_BUFRING
2591 SLIST_INIT(&txr->hn_txlist);
2593 txr->hn_txdesc_br = buf_ring_alloc(txr->hn_txdesc_cnt, M_NETVSC,
2594 M_WAITOK, &txr->hn_tx_lock);
2597 txr->hn_tx_taskq = sc->hn_tx_taskq;
2599 if (hn_use_if_start) {
2600 txr->hn_txeof = hn_start_txeof;
2601 TASK_INIT(&txr->hn_tx_task, 0, hn_start_taskfunc, txr);
2602 TASK_INIT(&txr->hn_txeof_task, 0, hn_start_txeof_taskfunc, txr);
2606 txr->hn_txeof = hn_xmit_txeof;
2607 TASK_INIT(&txr->hn_tx_task, 0, hn_xmit_taskfunc, txr);
2608 TASK_INIT(&txr->hn_txeof_task, 0, hn_xmit_txeof_taskfunc, txr);
2610 br_depth = hn_get_txswq_depth(txr);
2611 txr->hn_mbuf_br = buf_ring_alloc(br_depth, M_NETVSC,
2612 M_WAITOK, &txr->hn_tx_lock);
2615 txr->hn_direct_tx_size = hn_direct_tx_size;
2618 * Always schedule transmission instead of trying to do direct
2619 * transmission. This one gives the best performance so far.
2621 txr->hn_sched_tx = 1;
2623 parent_dtag = bus_get_dma_tag(dev);
2625 /* DMA tag for RNDIS packet messages. */
2626 error = bus_dma_tag_create(parent_dtag, /* parent */
2627 HN_RNDIS_PKT_ALIGN, /* alignment */
2628 HN_RNDIS_PKT_BOUNDARY, /* boundary */
2629 BUS_SPACE_MAXADDR, /* lowaddr */
2630 BUS_SPACE_MAXADDR, /* highaddr */
2631 NULL, NULL, /* filter, filterarg */
2632 HN_RNDIS_PKT_LEN, /* maxsize */
2634 HN_RNDIS_PKT_LEN, /* maxsegsize */
2636 NULL, /* lockfunc */
2637 NULL, /* lockfuncarg */
2638 &txr->hn_tx_rndis_dtag);
2640 device_printf(dev, "failed to create rndis dmatag\n");
2644 /* DMA tag for data. */
2645 error = bus_dma_tag_create(parent_dtag, /* parent */
2647 HN_TX_DATA_BOUNDARY, /* boundary */
2648 BUS_SPACE_MAXADDR, /* lowaddr */
2649 BUS_SPACE_MAXADDR, /* highaddr */
2650 NULL, NULL, /* filter, filterarg */
2651 HN_TX_DATA_MAXSIZE, /* maxsize */
2652 HN_TX_DATA_SEGCNT_MAX, /* nsegments */
2653 HN_TX_DATA_SEGSIZE, /* maxsegsize */
2655 NULL, /* lockfunc */
2656 NULL, /* lockfuncarg */
2657 &txr->hn_tx_data_dtag);
2659 device_printf(dev, "failed to create data dmatag\n");
2663 for (i = 0; i < txr->hn_txdesc_cnt; ++i) {
2664 struct hn_txdesc *txd = &txr->hn_txdesc[i];
2669 * Allocate and load RNDIS packet message.
2671 error = bus_dmamem_alloc(txr->hn_tx_rndis_dtag,
2672 (void **)&txd->rndis_pkt,
2673 BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO,
2674 &txd->rndis_pkt_dmap);
2677 "failed to allocate rndis_packet_msg, %d\n", i);
2681 error = bus_dmamap_load(txr->hn_tx_rndis_dtag,
2682 txd->rndis_pkt_dmap,
2683 txd->rndis_pkt, HN_RNDIS_PKT_LEN,
2684 hyperv_dma_map_paddr, &txd->rndis_pkt_paddr,
2688 "failed to load rndis_packet_msg, %d\n", i);
2689 bus_dmamem_free(txr->hn_tx_rndis_dtag,
2690 txd->rndis_pkt, txd->rndis_pkt_dmap);
2694 /* DMA map for TX data. */
2695 error = bus_dmamap_create(txr->hn_tx_data_dtag, 0,
2699 "failed to allocate tx data dmamap\n");
2700 bus_dmamap_unload(txr->hn_tx_rndis_dtag,
2701 txd->rndis_pkt_dmap);
2702 bus_dmamem_free(txr->hn_tx_rndis_dtag,
2703 txd->rndis_pkt, txd->rndis_pkt_dmap);
2707 /* All set, put it to list */
2708 txd->flags |= HN_TXD_FLAG_ONLIST;
2709 #ifndef HN_USE_TXDESC_BUFRING
2710 SLIST_INSERT_HEAD(&txr->hn_txlist, txd, link);
2712 buf_ring_enqueue(txr->hn_txdesc_br, txd);
2715 txr->hn_txdesc_avail = txr->hn_txdesc_cnt;
2717 if (sc->hn_tx_sysctl_tree != NULL) {
2718 struct sysctl_oid_list *child;
2719 struct sysctl_ctx_list *ctx;
2723 * Create per TX ring sysctl tree:
2724 * dev.hn.UNIT.tx.RINGID
2726 ctx = device_get_sysctl_ctx(dev);
2727 child = SYSCTL_CHILDREN(sc->hn_tx_sysctl_tree);
2729 snprintf(name, sizeof(name), "%d", id);
2730 txr->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO,
2731 name, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
2733 if (txr->hn_tx_sysctl_tree != NULL) {
2734 child = SYSCTL_CHILDREN(txr->hn_tx_sysctl_tree);
2736 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_avail",
2737 CTLFLAG_RD, &txr->hn_txdesc_avail, 0,
2738 "# of available TX descs");
2739 if (!hn_use_if_start) {
2740 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "oactive",
2741 CTLFLAG_RD, &txr->hn_oactive, 0,
2744 SYSCTL_ADD_ULONG(ctx, child, OID_AUTO, "packets",
2745 CTLFLAG_RW, &txr->hn_pkts,
2746 "# of packets transmitted");
2754 hn_txdesc_dmamap_destroy(struct hn_txdesc *txd)
2756 struct hn_tx_ring *txr = txd->txr;
2758 KASSERT(txd->m == NULL, ("still has mbuf installed"));
2759 KASSERT((txd->flags & HN_TXD_FLAG_DMAMAP) == 0, ("still dma mapped"));
2761 bus_dmamap_unload(txr->hn_tx_rndis_dtag, txd->rndis_pkt_dmap);
2762 bus_dmamem_free(txr->hn_tx_rndis_dtag, txd->rndis_pkt,
2763 txd->rndis_pkt_dmap);
2764 bus_dmamap_destroy(txr->hn_tx_data_dtag, txd->data_dmap);
2768 hn_destroy_tx_ring(struct hn_tx_ring *txr)
2770 struct hn_txdesc *txd;
2772 if (txr->hn_txdesc == NULL)
2775 #ifndef HN_USE_TXDESC_BUFRING
2776 while ((txd = SLIST_FIRST(&txr->hn_txlist)) != NULL) {
2777 SLIST_REMOVE_HEAD(&txr->hn_txlist, link);
2778 hn_txdesc_dmamap_destroy(txd);
2781 mtx_lock(&txr->hn_tx_lock);
2782 while ((txd = buf_ring_dequeue_sc(txr->hn_txdesc_br)) != NULL)
2783 hn_txdesc_dmamap_destroy(txd);
2784 mtx_unlock(&txr->hn_tx_lock);
2787 if (txr->hn_tx_data_dtag != NULL)
2788 bus_dma_tag_destroy(txr->hn_tx_data_dtag);
2789 if (txr->hn_tx_rndis_dtag != NULL)
2790 bus_dma_tag_destroy(txr->hn_tx_rndis_dtag);
2792 #ifdef HN_USE_TXDESC_BUFRING
2793 buf_ring_free(txr->hn_txdesc_br, M_NETVSC);
2796 free(txr->hn_txdesc, M_NETVSC);
2797 txr->hn_txdesc = NULL;
2799 if (txr->hn_mbuf_br != NULL)
2800 buf_ring_free(txr->hn_mbuf_br, M_NETVSC);
2802 #ifndef HN_USE_TXDESC_BUFRING
2803 mtx_destroy(&txr->hn_txlist_spin);
2805 mtx_destroy(&txr->hn_tx_lock);
2809 hn_create_tx_data(struct hn_softc *sc, int ring_cnt)
2811 struct sysctl_oid_list *child;
2812 struct sysctl_ctx_list *ctx;
2816 * Create TXBUF for chimney sending.
2818 * NOTE: It is shared by all channels.
2820 sc->hn_chim = hyperv_dmamem_alloc(bus_get_dma_tag(sc->hn_dev),
2821 PAGE_SIZE, 0, NETVSC_SEND_BUFFER_SIZE, &sc->hn_chim_dma,
2822 BUS_DMA_WAITOK | BUS_DMA_ZERO);
2823 if (sc->hn_chim == NULL) {
2824 device_printf(sc->hn_dev, "allocate txbuf failed\n");
2828 sc->hn_tx_ring_cnt = ring_cnt;
2829 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
2831 sc->hn_tx_ring = malloc(sizeof(struct hn_tx_ring) * sc->hn_tx_ring_cnt,
2832 M_NETVSC, M_WAITOK | M_ZERO);
2834 ctx = device_get_sysctl_ctx(sc->hn_dev);
2835 child = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->hn_dev));
2837 /* Create dev.hn.UNIT.tx sysctl tree */
2838 sc->hn_tx_sysctl_tree = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, "tx",
2839 CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
2841 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
2844 error = hn_create_tx_ring(sc, i);
2849 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "no_txdescs",
2850 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2851 __offsetof(struct hn_tx_ring, hn_no_txdescs),
2852 hn_tx_stat_ulong_sysctl, "LU", "# of times short of TX descs");
2853 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "send_failed",
2854 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2855 __offsetof(struct hn_tx_ring, hn_send_failed),
2856 hn_tx_stat_ulong_sysctl, "LU", "# of hyper-v sending failure");
2857 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "txdma_failed",
2858 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2859 __offsetof(struct hn_tx_ring, hn_txdma_failed),
2860 hn_tx_stat_ulong_sysctl, "LU", "# of TX DMA failure");
2861 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_collapsed",
2862 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2863 __offsetof(struct hn_tx_ring, hn_tx_collapsed),
2864 hn_tx_stat_ulong_sysctl, "LU", "# of TX mbuf collapsed");
2865 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney",
2866 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2867 __offsetof(struct hn_tx_ring, hn_tx_chimney),
2868 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send");
2869 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_tried",
2870 CTLTYPE_ULONG | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2871 __offsetof(struct hn_tx_ring, hn_tx_chimney_tried),
2872 hn_tx_stat_ulong_sysctl, "LU", "# of chimney send tries");
2873 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "txdesc_cnt",
2874 CTLFLAG_RD, &sc->hn_tx_ring[0].hn_txdesc_cnt, 0,
2875 "# of total TX descs");
2876 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_chimney_max",
2877 CTLFLAG_RD, &sc->hn_chim_szmax, 0,
2878 "Chimney send packet size upper boundary");
2879 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_chimney_size",
2880 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc, 0,
2881 hn_chim_size_sysctl, "I", "Chimney send packet size limit");
2882 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "direct_tx_size",
2883 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2884 __offsetof(struct hn_tx_ring, hn_direct_tx_size),
2885 hn_tx_conf_int_sysctl, "I",
2886 "Size of the packet for direct transmission");
2887 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "sched_tx",
2888 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, sc,
2889 __offsetof(struct hn_tx_ring, hn_sched_tx),
2890 hn_tx_conf_int_sysctl, "I",
2891 "Always schedule transmission "
2892 "instead of doing direct transmission");
2893 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_cnt",
2894 CTLFLAG_RD, &sc->hn_tx_ring_cnt, 0, "# created TX rings");
2895 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "tx_ring_inuse",
2896 CTLFLAG_RD, &sc->hn_tx_ring_inuse, 0, "# used TX rings");
2902 hn_set_chim_size(struct hn_softc *sc, int chim_size)
2906 for (i = 0; i < sc->hn_tx_ring_inuse; ++i)
2907 sc->hn_tx_ring[i].hn_chim_size = chim_size;
2911 hn_fixup_tx_data(struct hn_softc *sc)
2913 uint64_t csum_assist;
2916 hn_set_chim_size(sc, sc->hn_chim_szmax);
2917 if (hn_tx_chimney_size > 0 &&
2918 hn_tx_chimney_size < sc->hn_chim_szmax)
2919 hn_set_chim_size(sc, hn_tx_chimney_size);
2922 if (sc->hn_caps & HN_CAP_IPCS)
2923 csum_assist |= CSUM_IP;
2924 if (sc->hn_caps & HN_CAP_TCP4CS)
2925 csum_assist |= CSUM_IP_TCP;
2926 if (sc->hn_caps & HN_CAP_UDP4CS)
2927 csum_assist |= CSUM_IP_UDP;
2929 if (sc->hn_caps & HN_CAP_TCP6CS)
2930 csum_assist |= CSUM_IP6_TCP;
2931 if (sc->hn_caps & HN_CAP_UDP6CS)
2932 csum_assist |= CSUM_IP6_UDP;
2935 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
2936 sc->hn_tx_ring[i].hn_csum_assist = csum_assist;
2940 hn_destroy_tx_data(struct hn_softc *sc)
2944 if (sc->hn_chim != NULL) {
2945 hyperv_dmamem_free(&sc->hn_chim_dma, sc->hn_chim);
2949 if (sc->hn_tx_ring_cnt == 0)
2952 for (i = 0; i < sc->hn_tx_ring_cnt; ++i)
2953 hn_destroy_tx_ring(&sc->hn_tx_ring[i]);
2955 free(sc->hn_tx_ring, M_NETVSC);
2956 sc->hn_tx_ring = NULL;
2958 sc->hn_tx_ring_cnt = 0;
2959 sc->hn_tx_ring_inuse = 0;
2963 hn_start_taskfunc(void *xtxr, int pending __unused)
2965 struct hn_tx_ring *txr = xtxr;
2967 mtx_lock(&txr->hn_tx_lock);
2968 hn_start_locked(txr, 0);
2969 mtx_unlock(&txr->hn_tx_lock);
2973 hn_start_txeof_taskfunc(void *xtxr, int pending __unused)
2975 struct hn_tx_ring *txr = xtxr;
2977 mtx_lock(&txr->hn_tx_lock);
2978 atomic_clear_int(&txr->hn_sc->hn_ifp->if_drv_flags, IFF_DRV_OACTIVE);
2979 hn_start_locked(txr, 0);
2980 mtx_unlock(&txr->hn_tx_lock);
2984 hn_stop_tx_tasks(struct hn_softc *sc)
2988 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
2989 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
2991 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_tx_task);
2992 taskqueue_drain(txr->hn_tx_taskq, &txr->hn_txeof_task);
2997 hn_xmit(struct hn_tx_ring *txr, int len)
2999 struct hn_softc *sc = txr->hn_sc;
3000 struct ifnet *ifp = sc->hn_ifp;
3001 struct mbuf *m_head;
3003 mtx_assert(&txr->hn_tx_lock, MA_OWNED);
3004 KASSERT(hn_use_if_start == 0,
3005 ("hn_xmit is called, when if_start is enabled"));
3007 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || txr->hn_oactive)
3010 while ((m_head = drbr_peek(ifp, txr->hn_mbuf_br)) != NULL) {
3011 struct hn_txdesc *txd;
3014 if (len > 0 && m_head->m_pkthdr.len > len) {
3016 * This sending could be time consuming; let callers
3017 * dispatch this packet sending (and sending of any
3018 * following up packets) to tx taskqueue.
3020 drbr_putback(ifp, txr->hn_mbuf_br, m_head);
3024 txd = hn_txdesc_get(txr);
3026 txr->hn_no_txdescs++;
3027 drbr_putback(ifp, txr->hn_mbuf_br, m_head);
3028 txr->hn_oactive = 1;
3032 error = hn_encap(txr, txd, &m_head);
3034 /* Both txd and m_head are freed; discard */
3035 drbr_advance(ifp, txr->hn_mbuf_br);
3039 error = hn_send_pkt(ifp, txr, txd);
3040 if (__predict_false(error)) {
3041 /* txd is freed, but m_head is not */
3042 drbr_putback(ifp, txr->hn_mbuf_br, m_head);
3043 txr->hn_oactive = 1;
3048 drbr_advance(ifp, txr->hn_mbuf_br);
3054 hn_transmit(struct ifnet *ifp, struct mbuf *m)
3056 struct hn_softc *sc = ifp->if_softc;
3057 struct hn_tx_ring *txr;
3061 * Select the TX ring based on flowid
3063 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
3064 idx = m->m_pkthdr.flowid % sc->hn_tx_ring_inuse;
3065 txr = &sc->hn_tx_ring[idx];
3067 error = drbr_enqueue(ifp, txr->hn_mbuf_br, m);
3069 if_inc_counter(ifp, IFCOUNTER_OQDROPS, 1);
3073 if (txr->hn_oactive)
3076 if (txr->hn_sched_tx)
3079 if (mtx_trylock(&txr->hn_tx_lock)) {
3082 sched = hn_xmit(txr, txr->hn_direct_tx_size);
3083 mtx_unlock(&txr->hn_tx_lock);
3088 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_tx_task);
3093 hn_xmit_qflush(struct ifnet *ifp)
3095 struct hn_softc *sc = ifp->if_softc;
3098 for (i = 0; i < sc->hn_tx_ring_inuse; ++i) {
3099 struct hn_tx_ring *txr = &sc->hn_tx_ring[i];
3102 mtx_lock(&txr->hn_tx_lock);
3103 while ((m = buf_ring_dequeue_sc(txr->hn_mbuf_br)) != NULL)
3105 mtx_unlock(&txr->hn_tx_lock);
3111 hn_xmit_txeof(struct hn_tx_ring *txr)
3114 if (txr->hn_sched_tx)
3117 if (mtx_trylock(&txr->hn_tx_lock)) {
3120 txr->hn_oactive = 0;
3121 sched = hn_xmit(txr, txr->hn_direct_tx_size);
3122 mtx_unlock(&txr->hn_tx_lock);
3124 taskqueue_enqueue(txr->hn_tx_taskq,
3130 * Release the oactive earlier, with the hope, that
3131 * others could catch up. The task will clear the
3132 * oactive again with the hn_tx_lock to avoid possible
3135 txr->hn_oactive = 0;
3136 taskqueue_enqueue(txr->hn_tx_taskq, &txr->hn_txeof_task);
3141 hn_xmit_taskfunc(void *xtxr, int pending __unused)
3143 struct hn_tx_ring *txr = xtxr;
3145 mtx_lock(&txr->hn_tx_lock);
3147 mtx_unlock(&txr->hn_tx_lock);
3151 hn_xmit_txeof_taskfunc(void *xtxr, int pending __unused)
3153 struct hn_tx_ring *txr = xtxr;
3155 mtx_lock(&txr->hn_tx_lock);
3156 txr->hn_oactive = 0;
3158 mtx_unlock(&txr->hn_tx_lock);
3162 hn_chan_attach(struct hn_softc *sc, struct vmbus_channel *chan)
3164 struct vmbus_chan_br cbr;
3165 struct hn_rx_ring *rxr;
3166 struct hn_tx_ring *txr = NULL;
3169 idx = vmbus_chan_subidx(chan);
3172 * Link this channel to RX/TX ring.
3174 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
3175 ("invalid channel index %d, should > 0 && < %d",
3176 idx, sc->hn_rx_ring_inuse));
3177 rxr = &sc->hn_rx_ring[idx];
3178 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED) == 0,
3179 ("RX ring %d already attached", idx));
3180 rxr->hn_rx_flags |= HN_RX_FLAG_ATTACHED;
3183 if_printf(sc->hn_ifp, "link RX ring %d to chan%u\n",
3184 idx, vmbus_chan_id(chan));
3187 if (idx < sc->hn_tx_ring_inuse) {
3188 txr = &sc->hn_tx_ring[idx];
3189 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED) == 0,
3190 ("TX ring %d already attached", idx));
3191 txr->hn_tx_flags |= HN_TX_FLAG_ATTACHED;
3193 txr->hn_chan = chan;
3195 if_printf(sc->hn_ifp, "link TX ring %d to chan%u\n",
3196 idx, vmbus_chan_id(chan));
3200 /* Bind this channel to a proper CPU. */
3201 vmbus_chan_cpu_set(chan, (sc->hn_cpu + idx) % mp_ncpus);
3206 cbr.cbr = rxr->hn_br;
3207 cbr.cbr_paddr = rxr->hn_br_dma.hv_paddr;
3208 cbr.cbr_txsz = NETVSC_DEVICE_RING_BUFFER_SIZE;
3209 cbr.cbr_rxsz = NETVSC_DEVICE_RING_BUFFER_SIZE;
3210 error = vmbus_chan_open_br(chan, &cbr, NULL, 0, hn_chan_callback, rxr);
3212 if_printf(sc->hn_ifp, "open chan%u failed: %d\n",
3213 vmbus_chan_id(chan), error);
3214 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED;
3216 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED;
3222 hn_chan_detach(struct hn_softc *sc, struct vmbus_channel *chan)
3224 struct hn_rx_ring *rxr;
3227 idx = vmbus_chan_subidx(chan);
3230 * Link this channel to RX/TX ring.
3232 KASSERT(idx >= 0 && idx < sc->hn_rx_ring_inuse,
3233 ("invalid channel index %d, should > 0 && < %d",
3234 idx, sc->hn_rx_ring_inuse));
3235 rxr = &sc->hn_rx_ring[idx];
3236 KASSERT((rxr->hn_rx_flags & HN_RX_FLAG_ATTACHED),
3237 ("RX ring %d is not attached", idx));
3238 rxr->hn_rx_flags &= ~HN_RX_FLAG_ATTACHED;
3240 if (idx < sc->hn_tx_ring_inuse) {
3241 struct hn_tx_ring *txr = &sc->hn_tx_ring[idx];
3243 KASSERT((txr->hn_tx_flags & HN_TX_FLAG_ATTACHED),
3244 ("TX ring %d is not attached attached", idx));
3245 txr->hn_tx_flags &= ~HN_TX_FLAG_ATTACHED;
3249 * Close this channel.
3252 * Channel closing does _not_ destroy the target channel.
3254 vmbus_chan_close(chan);
3258 hn_attach_subchans(struct hn_softc *sc)
3260 struct vmbus_channel **subchans;
3261 int subchan_cnt = sc->hn_rx_ring_inuse - 1;
3264 if (subchan_cnt == 0)
3267 /* Attach the sub-channels. */
3268 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
3269 for (i = 0; i < subchan_cnt; ++i) {
3270 error = hn_chan_attach(sc, subchans[i]);
3274 vmbus_subchan_rel(subchans, subchan_cnt);
3277 if_printf(sc->hn_ifp, "sub-channels attach failed: %d\n", error);
3280 if_printf(sc->hn_ifp, "%d sub-channels attached\n",
3288 hn_detach_allchans(struct hn_softc *sc)
3290 struct vmbus_channel **subchans;
3291 int subchan_cnt = sc->hn_rx_ring_inuse - 1;
3294 if (subchan_cnt == 0)
3297 /* Detach the sub-channels. */
3298 subchans = vmbus_subchan_get(sc->hn_prichan, subchan_cnt);
3299 for (i = 0; i < subchan_cnt; ++i)
3300 hn_chan_detach(sc, subchans[i]);
3301 vmbus_subchan_rel(subchans, subchan_cnt);
3305 * Detach the primary channel, _after_ all sub-channels
3308 hn_chan_detach(sc, sc->hn_prichan);
3310 /* Wait for sub-channels to be destroyed, if any. */
3311 vmbus_subchan_drain(sc->hn_prichan);
3314 for (i = 0; i < sc->hn_rx_ring_cnt; ++i) {
3315 KASSERT((sc->hn_rx_ring[i].hn_rx_flags &
3316 HN_RX_FLAG_ATTACHED) == 0,
3317 ("%dth RX ring is still attached", i));
3319 for (i = 0; i < sc->hn_tx_ring_cnt; ++i) {
3320 KASSERT((sc->hn_tx_ring[i].hn_tx_flags &
3321 HN_TX_FLAG_ATTACHED) == 0,
3322 ("%dth TX ring is still attached", i));
3328 hn_synth_alloc_subchans(struct hn_softc *sc, int *nsubch)
3330 struct vmbus_channel **subchans;
3331 int nchan, rxr_cnt, error;
3333 nchan = *nsubch + 1;
3334 if (sc->hn_ndis_ver < HN_NDIS_VERSION_6_30 || nchan == 1) {
3336 * Either RSS is not supported, or multiple RX/TX rings
3337 * are not requested.
3344 * Get RSS capabilities, e.g. # of RX rings, and # of indirect
3347 error = hn_rndis_get_rsscaps(sc, &rxr_cnt);
3349 /* No RSS; this is benign. */
3354 if_printf(sc->hn_ifp, "RX rings offered %u, requested %d\n",
3358 if (nchan > rxr_cnt)
3361 if_printf(sc->hn_ifp, "only 1 channel is supported, no vRSS\n");
3367 * Allocate sub-channels from NVS.
3369 *nsubch = nchan - 1;
3370 error = hn_nvs_alloc_subchans(sc, nsubch);
3371 if (error || *nsubch == 0) {
3372 /* Failed to allocate sub-channels. */
3378 * Wait for all sub-channels to become ready before moving on.
3380 subchans = vmbus_subchan_get(sc->hn_prichan, *nsubch);
3381 vmbus_subchan_rel(subchans, *nsubch);
3386 hn_synth_attach(struct hn_softc *sc, int mtu)
3388 struct ndis_rssprm_toeplitz *rss = &sc->hn_rss;
3389 int error, nsubch, nchan, i;
3392 /* Save capabilities for later verification. */
3393 old_caps = sc->hn_caps;
3397 * Attach the primary channel _before_ attaching NVS and RNDIS.
3399 error = hn_chan_attach(sc, sc->hn_prichan);
3406 error = hn_nvs_attach(sc, mtu);
3411 * Attach RNDIS _after_ NVS is attached.
3413 error = hn_rndis_attach(sc);
3418 * Make sure capabilities are not changed.
3420 if (device_is_attached(sc->hn_dev) && old_caps != sc->hn_caps) {
3421 if_printf(sc->hn_ifp, "caps mismatch old 0x%08x, new 0x%08x\n",
3422 old_caps, sc->hn_caps);
3423 /* Restore old capabilities and abort. */
3424 sc->hn_caps = old_caps;
3429 * Allocate sub-channels for multi-TX/RX rings.
3432 * The # of RX rings that can be used is equivalent to the # of
3433 * channels to be requested.
3435 nsubch = sc->hn_rx_ring_cnt - 1;
3436 error = hn_synth_alloc_subchans(sc, &nsubch);
3442 /* Only the primary channel can be used; done */
3447 * Configure RSS key and indirect table _after_ all sub-channels
3451 if ((sc->hn_flags & HN_FLAG_HAS_RSSKEY) == 0) {
3453 * RSS key is not set yet; set it to the default RSS key.
3456 if_printf(sc->hn_ifp, "setup default RSS key\n");
3457 memcpy(rss->rss_key, hn_rss_key_default, sizeof(rss->rss_key));
3458 sc->hn_flags |= HN_FLAG_HAS_RSSKEY;
3461 if ((sc->hn_flags & HN_FLAG_HAS_RSSIND) == 0) {
3463 * RSS indirect table is not set yet; set it up in round-
3467 if_printf(sc->hn_ifp, "setup default RSS indirect "
3470 /* TODO: Take ndis_rss_caps.ndis_nind into account. */
3471 for (i = 0; i < NDIS_HASH_INDCNT; ++i)
3472 rss->rss_ind[i] = i % nchan;
3473 sc->hn_flags |= HN_FLAG_HAS_RSSIND;
3476 * # of usable channels may be changed, so we have to
3477 * make sure that all entries in RSS indirect table
3480 hn_rss_ind_fixup(sc, nchan);
3483 error = hn_rndis_conf_rss(sc, NDIS_RSS_FLAG_NONE);
3486 * Failed to configure RSS key or indirect table; only
3487 * the primary channel can be used.
3493 * Set the # of TX/RX rings that could be used according to
3494 * the # of channels that NVS offered.
3496 hn_set_ring_inuse(sc, nchan);
3499 * Attach the sub-channels, if any.
3501 error = hn_attach_subchans(sc);
3508 hn_set_ring_inuse(struct hn_softc *sc, int ring_cnt)
3510 KASSERT(ring_cnt > 0 && ring_cnt <= sc->hn_rx_ring_cnt,
3511 ("invalid ring count %d", ring_cnt));
3513 if (sc->hn_tx_ring_cnt > ring_cnt)
3514 sc->hn_tx_ring_inuse = ring_cnt;
3516 sc->hn_tx_ring_inuse = sc->hn_tx_ring_cnt;
3517 sc->hn_rx_ring_inuse = ring_cnt;
3520 if_printf(sc->hn_ifp, "%d TX ring, %d RX ring\n",
3521 sc->hn_tx_ring_inuse, sc->hn_rx_ring_inuse);
3526 hn_nvs_handle_notify(struct hn_softc *sc, const struct vmbus_chanpkt_hdr *pkt)
3528 const struct hn_nvs_hdr *hdr;
3530 if (VMBUS_CHANPKT_DATALEN(pkt) < sizeof(*hdr)) {
3531 if_printf(sc->hn_ifp, "invalid nvs notify\n");
3534 hdr = VMBUS_CHANPKT_CONST_DATA(pkt);
3536 if (hdr->nvs_type == HN_NVS_TYPE_TXTBL_NOTE) {
3537 /* Useless; ignore */
3540 if_printf(sc->hn_ifp, "got notify, nvs type %u\n", hdr->nvs_type);
3544 hn_nvs_handle_comp(struct hn_softc *sc, struct vmbus_channel *chan,
3545 const struct vmbus_chanpkt_hdr *pkt)
3547 struct hn_send_ctx *sndc;
3549 sndc = (struct hn_send_ctx *)(uintptr_t)pkt->cph_xactid;
3550 sndc->hn_cb(sndc, sc, chan, VMBUS_CHANPKT_CONST_DATA(pkt),
3551 VMBUS_CHANPKT_DATALEN(pkt));
3554 * 'sndc' CAN NOT be accessed anymore, since it can be freed by
3560 hn_nvs_handle_rxbuf(struct hn_softc *sc, struct hn_rx_ring *rxr,
3561 struct vmbus_channel *chan, const struct vmbus_chanpkt_hdr *pkthdr)
3563 const struct vmbus_chanpkt_rxbuf *pkt;
3564 const struct hn_nvs_hdr *nvs_hdr;
3567 if (__predict_false(VMBUS_CHANPKT_DATALEN(pkthdr) < sizeof(*nvs_hdr))) {
3568 if_printf(rxr->hn_ifp, "invalid nvs RNDIS\n");
3571 nvs_hdr = VMBUS_CHANPKT_CONST_DATA(pkthdr);
3573 /* Make sure that this is a RNDIS message. */
3574 if (__predict_false(nvs_hdr->nvs_type != HN_NVS_TYPE_RNDIS)) {
3575 if_printf(rxr->hn_ifp, "nvs type %u, not RNDIS\n",
3580 hlen = VMBUS_CHANPKT_GETLEN(pkthdr->cph_hlen);
3581 if (__predict_false(hlen < sizeof(*pkt))) {
3582 if_printf(rxr->hn_ifp, "invalid rxbuf chanpkt\n");
3585 pkt = (const struct vmbus_chanpkt_rxbuf *)pkthdr;
3587 if (__predict_false(pkt->cp_rxbuf_id != HN_NVS_RXBUF_SIG)) {
3588 if_printf(rxr->hn_ifp, "invalid rxbuf_id 0x%08x\n",
3593 count = pkt->cp_rxbuf_cnt;
3594 if (__predict_false(hlen <
3595 __offsetof(struct vmbus_chanpkt_rxbuf, cp_rxbuf[count]))) {
3596 if_printf(rxr->hn_ifp, "invalid rxbuf_cnt %d\n", count);
3600 /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */
3601 for (i = 0; i < count; ++i) {
3604 ofs = pkt->cp_rxbuf[i].rb_ofs;
3605 len = pkt->cp_rxbuf[i].rb_len;
3606 if (__predict_false(ofs + len > NETVSC_RECEIVE_BUFFER_SIZE)) {
3607 if_printf(rxr->hn_ifp, "%dth RNDIS msg overflow rxbuf, "
3608 "ofs %d, len %d\n", i, ofs, len);
3611 hv_rf_on_receive(sc, rxr, rxr->hn_rxbuf + ofs, len);
3615 * Moved completion call back here so that all received
3616 * messages (not just data messages) will trigger a response
3617 * message back to the host.
3619 hn_nvs_ack_rxbuf(chan, pkt->cp_hdr.cph_xactid);
3623 * Net VSC on receive completion
3625 * Send a receive completion packet to RNDIS device (ie NetVsp)
3628 hn_nvs_ack_rxbuf(struct vmbus_channel *chan, uint64_t tid)
3630 struct hn_nvs_rndis_ack ack;
3634 ack.nvs_type = HN_NVS_TYPE_RNDIS_ACK;
3635 ack.nvs_status = HN_NVS_STATUS_OK;
3638 /* Send the completion */
3639 ret = vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP,
3640 VMBUS_CHANPKT_FLAG_NONE, &ack, sizeof(ack), tid);
3644 } else if (ret == EAGAIN) {
3645 /* no more room... wait a bit and attempt to retry 3 times */
3650 goto retry_send_cmplt;
3656 hn_chan_callback(struct vmbus_channel *chan, void *xrxr)
3658 struct hn_rx_ring *rxr = xrxr;
3659 struct hn_softc *sc = rxr->hn_ifp->if_softc;
3661 int bufferlen = NETVSC_PACKET_SIZE;
3663 buffer = rxr->hn_rdbuf;
3665 struct vmbus_chanpkt_hdr *pkt = buffer;
3666 uint32_t bytes_rxed;
3669 bytes_rxed = bufferlen;
3670 ret = vmbus_chan_recv_pkt(chan, pkt, &bytes_rxed);
3672 switch (pkt->cph_type) {
3673 case VMBUS_CHANPKT_TYPE_COMP:
3674 hn_nvs_handle_comp(sc, chan, pkt);
3676 case VMBUS_CHANPKT_TYPE_RXBUF:
3677 hn_nvs_handle_rxbuf(sc, rxr, chan, pkt);
3679 case VMBUS_CHANPKT_TYPE_INBAND:
3680 hn_nvs_handle_notify(sc, pkt);
3683 if_printf(rxr->hn_ifp,
3684 "unknown chan pkt %u\n",
3688 } else if (ret == ENOBUFS) {
3689 /* Handle large packet */
3690 if (bufferlen > NETVSC_PACKET_SIZE) {
3691 free(buffer, M_NETVSC);
3695 /* alloc new buffer */
3696 buffer = malloc(bytes_rxed, M_NETVSC, M_NOWAIT);
3697 if (buffer == NULL) {
3698 if_printf(rxr->hn_ifp,
3699 "hv_cb malloc buffer failed, len=%u\n",
3704 bufferlen = bytes_rxed;
3706 /* No more packets */
3711 if (bufferlen > NETVSC_PACKET_SIZE)
3712 free(buffer, M_NETVSC);
3714 hv_rf_channel_rollup(rxr, rxr->hn_txr);
3718 hn_tx_taskq_create(void *arg __unused)
3720 if (!hn_share_tx_taskq)
3723 hn_tx_taskq = taskqueue_create("hn_tx", M_WAITOK,
3724 taskqueue_thread_enqueue, &hn_tx_taskq);
3725 taskqueue_start_threads(&hn_tx_taskq, 1, PI_NET, "hn tx");
3726 if (hn_bind_tx_taskq >= 0) {
3727 int cpu = hn_bind_tx_taskq;
3728 struct task cpuset_task;
3731 if (cpu > mp_ncpus - 1)
3733 CPU_SETOF(cpu, &cpu_set);
3734 TASK_INIT(&cpuset_task, 0, hn_cpuset_setthread_task, &cpu_set);
3735 taskqueue_enqueue(hn_tx_taskq, &cpuset_task);
3736 taskqueue_drain(hn_tx_taskq, &cpuset_task);
3739 SYSINIT(hn_txtq_create, SI_SUB_DRIVERS, SI_ORDER_FIRST,
3740 hn_tx_taskq_create, NULL);
3743 hn_tx_taskq_destroy(void *arg __unused)
3745 if (hn_tx_taskq != NULL)
3746 taskqueue_free(hn_tx_taskq);
3748 SYSUNINIT(hn_txtq_destroy, SI_SUB_DRIVERS, SI_ORDER_FIRST,
3749 hn_tx_taskq_destroy, NULL);
3751 static device_method_t netvsc_methods[] = {
3752 /* Device interface */
3753 DEVMETHOD(device_probe, netvsc_probe),
3754 DEVMETHOD(device_attach, netvsc_attach),
3755 DEVMETHOD(device_detach, netvsc_detach),
3756 DEVMETHOD(device_shutdown, netvsc_shutdown),
3761 static driver_t netvsc_driver = {
3764 sizeof(struct hn_softc)
3767 static devclass_t netvsc_devclass;
3769 DRIVER_MODULE(hn, vmbus, netvsc_driver, netvsc_devclass, 0, 0);
3770 MODULE_VERSION(hn, 1);
3771 MODULE_DEPEND(hn, vmbus, 1, 1, 1);