1 /******************************************************************************
3 Copyright (c) 2013-2017, Intel Corporation
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
32 ******************************************************************************/
36 ** IXL driver TX/RX Routines:
37 ** This was seperated to allow usage by
38 ** both the PF and VF drivers.
41 #ifndef IXL_STANDALONE_BUILD
43 #include "opt_inet6.h"
50 #include <net/rss_config.h>
53 /* Local Prototypes */
54 static void ixl_rx_checksum(struct mbuf *, u32, u32, u8);
55 static void ixl_refresh_mbufs(struct ixl_queue *, int);
56 static int ixl_xmit(struct ixl_queue *, struct mbuf **);
57 static int ixl_tx_setup_offload(struct ixl_queue *,
58 struct mbuf *, u32 *, u32 *);
59 static bool ixl_tso_setup(struct ixl_queue *, struct mbuf *);
60 static void ixl_queue_sw_irq(struct ixl_vsi *, int);
62 static inline void ixl_rx_discard(struct rx_ring *, int);
63 static inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
66 static inline bool ixl_tso_detect_sparse(struct mbuf *mp);
67 static inline u32 ixl_get_tx_head(struct ixl_queue *que);
70 #include <dev/netmap/if_ixl_netmap.h>
71 #if __FreeBSD_version >= 1200000
72 int ixl_rx_miss, ixl_rx_miss_bufs, ixl_crcstrip = 1;
74 #endif /* DEV_NETMAP */
77 * @key key is saved into this parameter
80 ixl_get_default_rss_key(u32 *key)
84 u32 rss_seed[IXL_RSS_KEY_SIZE_REG] = {0x41b01687,
85 0x183cfd8c, 0xce880440, 0x580cbc3c,
86 0x35897377, 0x328b25e1, 0x4fa98922,
87 0xb7d90c14, 0xd5bad70d, 0xcd15a2c1,
90 bcopy(rss_seed, key, IXL_RSS_KEY_SIZE);
94 * i40e_vc_stat_str - convert virtchnl status err code to a string
95 * @hw: pointer to the HW structure
96 * @stat_err: the status error code to convert
99 i40e_vc_stat_str(struct i40e_hw *hw, enum virtchnl_status_code stat_err)
102 case VIRTCHNL_STATUS_SUCCESS:
104 case VIRTCHNL_ERR_PARAM:
105 return "VIRTCHNL_ERR_PARAM";
106 case VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH:
107 return "VIRTCHNL_STATUS_ERR_OPCODE_MISMATCH";
108 case VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR:
109 return "VIRTCHNL_STATUS_ERR_CQP_COMPL_ERROR";
110 case VIRTCHNL_STATUS_ERR_INVALID_VF_ID:
111 return "VIRTCHNL_STATUS_ERR_INVALID_VF_ID";
112 case VIRTCHNL_STATUS_NOT_SUPPORTED:
113 return "VIRTCHNL_STATUS_NOT_SUPPORTED";
116 snprintf(hw->err_str, sizeof(hw->err_str), "%d", stat_err);
121 * PCI BUSMASTER needs to be set for proper operation.
124 ixl_set_busmaster(device_t dev)
128 pci_cmd_word = pci_read_config(dev, PCIR_COMMAND, 2);
129 pci_cmd_word |= PCIM_CMD_BUSMASTEREN;
130 pci_write_config(dev, PCIR_COMMAND, pci_cmd_word, 2);
134 * Rewrite the ENABLE bit in the MSIX control register
137 ixl_set_msix_enable(device_t dev)
141 pci_find_cap(dev, PCIY_MSIX, &rid);
142 rid += PCIR_MSIX_CTRL;
143 msix_ctrl = pci_read_config(dev, rid, 2);
144 msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
145 pci_write_config(dev, rid, msix_ctrl, 2);
150 ** Multiqueue Transmit driver
153 ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
155 struct ixl_vsi *vsi = ifp->if_softc;
156 struct ixl_queue *que;
164 * Which queue to use:
166 * When doing RSS, map it to the same outbound
167 * queue as the incoming flow would be mapped to.
168 * If everything is setup correctly, it should be
169 * the same bucket that the current CPU we're on is.
171 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
173 if (rss_hash2bucket(m->m_pkthdr.flowid,
174 M_HASHTYPE_GET(m), &bucket_id) == 0) {
175 i = bucket_id % vsi->num_queues;
178 i = m->m_pkthdr.flowid % vsi->num_queues;
180 i = curcpu % vsi->num_queues;
182 que = &vsi->queues[i];
185 err = drbr_enqueue(ifp, txr->br, m);
188 if (IXL_TX_TRYLOCK(txr)) {
189 ixl_mq_start_locked(ifp, txr);
192 taskqueue_enqueue(que->tq, &que->tx_task);
198 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
200 struct ixl_queue *que = txr->que;
201 struct ixl_vsi *vsi = que->vsi;
206 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
207 vsi->link_active == 0)
210 /* Process the transmit queue */
211 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
212 if ((err = ixl_xmit(que, &next)) != 0) {
214 drbr_advance(ifp, txr->br);
216 drbr_putback(ifp, txr->br, next);
219 drbr_advance(ifp, txr->br);
220 /* Send a copy of the frame to the BPF listener */
221 ETHER_BPF_MTAP(ifp, next);
222 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
226 if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
233 * Called from a taskqueue to drain queued transmit packets.
236 ixl_deferred_mq_start(void *arg, int pending)
238 struct ixl_queue *que = arg;
239 struct tx_ring *txr = &que->txr;
240 struct ixl_vsi *vsi = que->vsi;
241 struct ifnet *ifp = vsi->ifp;
244 if (!drbr_empty(ifp, txr->br))
245 ixl_mq_start_locked(ifp, txr);
250 ** Flush all queue ring buffers
253 ixl_qflush(struct ifnet *ifp)
255 struct ixl_vsi *vsi = ifp->if_softc;
257 for (int i = 0; i < vsi->num_queues; i++) {
258 struct ixl_queue *que = &vsi->queues[i];
259 struct tx_ring *txr = &que->txr;
262 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
270 ixl_tso_detect_sparse(struct mbuf *mp)
276 mss = mp->m_pkthdr.tso_segsz;
278 /* Exclude first mbuf; assume it contains all headers */
279 for (m = mp->m_next; m != NULL; m = m->m_next) {
283 mss -= m->m_len % mp->m_pkthdr.tso_segsz;
285 if (num > IXL_SPARSE_CHAIN)
288 num = (mss == 0) ? 0 : 1;
289 mss += mp->m_pkthdr.tso_segsz;
297 /*********************************************************************
299 * This routine maps the mbufs to tx descriptors, allowing the
300 * TX engine to transmit the packets.
301 * - return 0 on success, positive on failure
303 **********************************************************************/
304 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
307 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
309 struct ixl_vsi *vsi = que->vsi;
310 struct i40e_hw *hw = vsi->hw;
311 struct tx_ring *txr = &que->txr;
312 struct ixl_tx_buf *buf;
313 struct i40e_tx_desc *txd = NULL;
314 struct mbuf *m_head, *m;
315 int i, j, error, nsegs;
321 bus_dma_segment_t segs[IXL_MAX_TSO_SEGS];
327 * Important to capture the first descriptor
328 * used because it will contain the index of
329 * the one we tell the hardware to report back
331 first = txr->next_avail;
332 buf = &txr->buffers[first];
336 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
337 /* Use larger mapping for TSO */
339 if (ixl_tso_detect_sparse(m_head)) {
340 m = m_defrag(m_head, M_NOWAIT);
351 * Map the packet for DMA.
353 error = bus_dmamap_load_mbuf_sg(tag, map,
354 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
356 if (error == EFBIG) {
359 m = m_defrag(*m_headp, M_NOWAIT);
361 que->mbuf_defrag_failed++;
369 error = bus_dmamap_load_mbuf_sg(tag, map,
370 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
373 que->tx_dmamap_failed++;
378 } else if (error != 0) {
379 que->tx_dmamap_failed++;
385 /* Make certain there are enough descriptors */
386 if (nsegs > txr->avail - 2) {
393 /* Set up the TSO/CSUM offload */
394 if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
395 error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
400 cmd |= I40E_TX_DESC_CMD_ICRC;
401 /* Grab the VLAN tag */
402 if (m_head->m_flags & M_VLANTAG) {
403 cmd |= I40E_TX_DESC_CMD_IL2TAG1;
404 vtag = htole16(m_head->m_pkthdr.ether_vtag);
408 for (j = 0; j < nsegs; j++) {
411 buf = &txr->buffers[i];
412 buf->tag = tag; /* Keep track of the type tag */
414 seglen = segs[j].ds_len;
416 txd->buffer_addr = htole64(segs[j].ds_addr);
417 txd->cmd_type_offset_bsz =
418 htole64(I40E_TX_DESC_DTYPE_DATA
419 | ((u64)cmd << I40E_TXD_QW1_CMD_SHIFT)
420 | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
421 | ((u64)seglen << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
422 | ((u64)vtag << I40E_TXD_QW1_L2TAG1_SHIFT));
424 last = i; /* descriptor that will get completion IRQ */
426 if (++i == que->num_tx_desc)
432 /* Set the last descriptor for report */
433 txd->cmd_type_offset_bsz |=
434 htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
438 buf->m_head = m_head;
439 /* Swap the dma map between the first and last descriptor.
440 * The descriptor that gets checked on completion will now
441 * have the real map from the first descriptor.
443 txr->buffers[first].map = buf->map;
445 bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
447 /* Set the index of the descriptor that will be marked done */
448 buf = &txr->buffers[first];
449 buf->eop_index = last;
451 bus_dmamap_sync(txr->dma.tag, txr->dma.map,
452 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
454 * Advance the Transmit Descriptor Tail (Tdt), this tells the
455 * hardware that this frame is available to transmit.
457 ++txr->total_packets;
458 wr32(hw, txr->tail, i);
460 /* Mark outstanding work */
461 atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG);
465 bus_dmamap_unload(tag, buf->map);
470 /*********************************************************************
472 * Allocate memory for tx_buffer structures. The tx_buffer stores all
473 * the information needed to transmit a packet on the wire. This is
474 * called only once at attach, setup is done every reset.
476 **********************************************************************/
478 ixl_allocate_tx_data(struct ixl_queue *que)
480 struct tx_ring *txr = &que->txr;
481 struct ixl_vsi *vsi = que->vsi;
482 device_t dev = vsi->dev;
483 struct ixl_tx_buf *buf;
487 * Setup DMA descriptor areas.
489 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
490 1, 0, /* alignment, bounds */
491 BUS_SPACE_MAXADDR, /* lowaddr */
492 BUS_SPACE_MAXADDR, /* highaddr */
493 NULL, NULL, /* filter, filterarg */
494 IXL_TSO_SIZE, /* maxsize */
495 IXL_MAX_TX_SEGS, /* nsegments */
496 IXL_MAX_DMA_SEG_SIZE, /* maxsegsize */
499 NULL, /* lockfuncarg */
501 device_printf(dev,"Unable to allocate TX DMA tag\n");
505 /* Make a special tag for TSO */
506 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
507 1, 0, /* alignment, bounds */
508 BUS_SPACE_MAXADDR, /* lowaddr */
509 BUS_SPACE_MAXADDR, /* highaddr */
510 NULL, NULL, /* filter, filterarg */
511 IXL_TSO_SIZE, /* maxsize */
512 IXL_MAX_TSO_SEGS, /* nsegments */
513 IXL_MAX_DMA_SEG_SIZE, /* maxsegsize */
516 NULL, /* lockfuncarg */
518 device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
523 (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
524 que->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
525 device_printf(dev, "Unable to allocate tx_buffer memory\n");
527 goto free_tx_tso_dma;
530 /* Create the descriptor buffer default dma maps */
532 for (i = 0; i < que->num_tx_desc; i++, buf++) {
533 buf->tag = txr->tx_tag;
534 error = bus_dmamap_create(buf->tag, 0, &buf->map);
536 device_printf(dev, "Unable to create TX DMA map\n");
546 bus_dmamap_destroy(buf->tag, buf->map);
549 free(txr->buffers, M_DEVBUF);
552 bus_dma_tag_destroy(txr->tso_tag);
555 bus_dma_tag_destroy(txr->tx_tag);
562 /*********************************************************************
564 * (Re)Initialize a queue transmit ring.
565 * - called by init, it clears the descriptor ring,
566 * and frees any stale mbufs
568 **********************************************************************/
570 ixl_init_tx_ring(struct ixl_queue *que)
573 struct netmap_adapter *na = NA(que->vsi->ifp);
574 struct netmap_slot *slot;
575 #endif /* DEV_NETMAP */
576 struct tx_ring *txr = &que->txr;
577 struct ixl_tx_buf *buf;
579 /* Clear the old ring contents */
584 * (under lock): if in netmap mode, do some consistency
585 * checks and set slot to entry 0 of the netmap ring.
587 slot = netmap_reset(na, NR_TX, que->me, 0);
588 #endif /* DEV_NETMAP */
590 bzero((void *)txr->base,
591 (sizeof(struct i40e_tx_desc)) * que->num_tx_desc);
595 txr->next_to_clean = 0;
597 /* Reset watchdog status */
598 txr->watchdog_timer = 0;
600 /* Free any existing tx mbufs. */
602 for (int i = 0; i < que->num_tx_desc; i++, buf++) {
603 if (buf->m_head != NULL) {
604 bus_dmamap_sync(buf->tag, buf->map,
605 BUS_DMASYNC_POSTWRITE);
606 bus_dmamap_unload(buf->tag, buf->map);
607 m_freem(buf->m_head);
612 * In netmap mode, set the map for the packet buffer.
613 * NOTE: Some drivers (not this one) also need to set
614 * the physical buffer address in the NIC ring.
615 * netmap_idx_n2k() maps a nic index, i, into the corresponding
616 * netmap slot index, si
619 int si = netmap_idx_n2k(na->tx_rings[que->me], i);
620 netmap_load_map(na, buf->tag, buf->map, NMB(na, slot + si));
622 #endif /* DEV_NETMAP */
623 /* Clear the EOP index */
627 /* Set number of descriptors available */
628 txr->avail = que->num_tx_desc;
630 bus_dmamap_sync(txr->dma.tag, txr->dma.map,
631 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
636 /*********************************************************************
638 * Free transmit ring related data structures.
640 **********************************************************************/
642 ixl_free_que_tx(struct ixl_queue *que)
644 struct tx_ring *txr = &que->txr;
645 struct ixl_tx_buf *buf;
647 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
649 for (int i = 0; i < que->num_tx_desc; i++) {
650 buf = &txr->buffers[i];
651 if (buf->m_head != NULL) {
652 bus_dmamap_sync(buf->tag, buf->map,
653 BUS_DMASYNC_POSTWRITE);
654 m_freem(buf->m_head);
657 bus_dmamap_unload(buf->tag, buf->map);
658 bus_dmamap_destroy(buf->tag, buf->map);
660 if (txr->buffers != NULL) {
661 free(txr->buffers, M_DEVBUF);
664 if (txr->tx_tag != NULL) {
665 bus_dma_tag_destroy(txr->tx_tag);
668 if (txr->tso_tag != NULL) {
669 bus_dma_tag_destroy(txr->tso_tag);
673 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
677 /*********************************************************************
679 * Setup descriptor for hw offloads
681 **********************************************************************/
684 ixl_tx_setup_offload(struct ixl_queue *que,
685 struct mbuf *mp, u32 *cmd, u32 *off)
687 struct ether_vlan_header *eh;
689 struct ip *ip = NULL;
691 struct tcphdr *th = NULL;
695 int elen, ip_hlen = 0, tcp_hlen;
700 /* Set up the TSO context descriptor if required */
701 if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
702 tso = ixl_tso_setup(que, mp);
710 * Determine where frame payload starts.
711 * Jump over vlan headers if already present,
712 * helpful for QinQ too.
714 eh = mtod(mp, struct ether_vlan_header *);
715 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
716 etype = ntohs(eh->evl_proto);
717 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
719 etype = ntohs(eh->evl_encap_proto);
720 elen = ETHER_HDR_LEN;
726 ip = (struct ip *)(mp->m_data + elen);
727 ip_hlen = ip->ip_hl << 2;
729 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
730 /* The IP checksum must be recalculated with TSO */
732 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
734 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
739 ip6 = (struct ip6_hdr *)(mp->m_data + elen);
740 ip_hlen = sizeof(struct ip6_hdr);
741 ipproto = ip6->ip6_nxt;
742 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
743 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
750 *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
751 *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
755 tcp_hlen = th->th_off << 2;
756 if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
757 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
758 *off |= (tcp_hlen >> 2) <<
759 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
763 if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
764 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
765 *off |= (sizeof(struct udphdr) >> 2) <<
766 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
770 if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
771 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
772 *off |= (sizeof(struct sctphdr) >> 2) <<
773 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
784 /**********************************************************************
786 * Setup context for hardware segmentation offload (TSO)
788 **********************************************************************/
790 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
792 struct tx_ring *txr = &que->txr;
793 struct i40e_tx_context_desc *TXD;
794 struct ixl_tx_buf *buf;
795 u32 cmd, mss, type, tsolen;
797 int idx, elen, ip_hlen, tcp_hlen;
798 struct ether_vlan_header *eh;
805 #if defined(INET6) || defined(INET)
808 u64 type_cmd_tso_mss;
811 * Determine where frame payload starts.
812 * Jump over vlan headers if already present
814 eh = mtod(mp, struct ether_vlan_header *);
815 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
816 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
817 etype = eh->evl_proto;
819 elen = ETHER_HDR_LEN;
820 etype = eh->evl_encap_proto;
823 switch (ntohs(etype)) {
826 ip6 = (struct ip6_hdr *)(mp->m_data + elen);
827 if (ip6->ip6_nxt != IPPROTO_TCP)
829 ip_hlen = sizeof(struct ip6_hdr);
830 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
831 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
832 tcp_hlen = th->th_off << 2;
834 * The corresponding flag is set by the stack in the IPv4
835 * TSO case, but not in IPv6 (at least in FreeBSD 10.2).
836 * So, set it here because the rest of the flow requires it.
838 mp->m_pkthdr.csum_flags |= CSUM_TCP_IPV6;
843 ip = (struct ip *)(mp->m_data + elen);
844 if (ip->ip_p != IPPROTO_TCP)
847 ip_hlen = ip->ip_hl << 2;
848 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
849 th->th_sum = in_pseudo(ip->ip_src.s_addr,
850 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
851 tcp_hlen = th->th_off << 2;
855 printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
856 __func__, ntohs(etype));
860 /* Ensure we have at least the IP+TCP header in the first mbuf. */
861 if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
864 idx = txr->next_avail;
865 buf = &txr->buffers[idx];
866 TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
867 tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
869 type = I40E_TX_DESC_DTYPE_CONTEXT;
870 cmd = I40E_TX_CTX_DESC_TSO;
871 /* TSO MSS must not be less than 64 */
872 if (mp->m_pkthdr.tso_segsz < IXL_MIN_TSO_MSS) {
873 que->mss_too_small++;
874 mp->m_pkthdr.tso_segsz = IXL_MIN_TSO_MSS;
876 mss = mp->m_pkthdr.tso_segsz;
878 type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
879 ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
880 ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
881 ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
882 TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
884 TXD->tunneling_params = htole32(0);
888 if (++idx == que->num_tx_desc)
892 txr->next_avail = idx;
898 * ixl_get_tx_head - Retrieve the value from the
899 * location the HW records its HEAD index
902 ixl_get_tx_head(struct ixl_queue *que)
904 struct tx_ring *txr = &que->txr;
905 void *head = &txr->base[que->num_tx_desc];
906 return LE32_TO_CPU(*(volatile __le32 *)head);
909 /**********************************************************************
911 * Get index of last used descriptor/buffer from hardware, and clean
912 * the descriptors/buffers up to that index.
914 **********************************************************************/
916 ixl_txeof_hwb(struct ixl_queue *que)
918 struct tx_ring *txr = &que->txr;
919 u32 first, last, head, done;
920 struct ixl_tx_buf *buf;
921 struct i40e_tx_desc *tx_desc, *eop_desc;
923 mtx_assert(&txr->mtx, MA_OWNED);
926 // XXX todo: implement moderation
927 if (netmap_tx_irq(que->vsi->ifp, que->me))
929 #endif /* DEF_NETMAP */
931 /* These are not the descriptors you seek, move along :) */
932 if (txr->avail == que->num_tx_desc) {
933 atomic_store_rel_32(&txr->watchdog_timer, 0);
937 first = txr->next_to_clean;
938 buf = &txr->buffers[first];
939 tx_desc = (struct i40e_tx_desc *)&txr->base[first];
940 last = buf->eop_index;
943 eop_desc = (struct i40e_tx_desc *)&txr->base[last];
945 /* Sync DMA before reading head index from ring */
946 bus_dmamap_sync(txr->dma.tag, txr->dma.map,
947 BUS_DMASYNC_POSTREAD);
949 /* Get the Head WB value */
950 head = ixl_get_tx_head(que);
953 ** Get the index of the first descriptor
954 ** BEYOND the EOP and call that 'done'.
955 ** I do this so the comparison in the
956 ** inner while loop below can be simple
958 if (++last == que->num_tx_desc) last = 0;
962 ** The HEAD index of the ring is written in a
963 ** defined location, this rather than a done bit
964 ** is what is used to keep track of what must be
967 while (first != head) {
968 /* We clean the range of the packet */
969 while (first != done) {
973 txr->bytes += /* for ITR adjustment */
974 buf->m_head->m_pkthdr.len;
975 txr->tx_bytes += /* for TX stats */
976 buf->m_head->m_pkthdr.len;
977 bus_dmamap_sync(buf->tag,
979 BUS_DMASYNC_POSTWRITE);
980 bus_dmamap_unload(buf->tag,
982 m_freem(buf->m_head);
987 if (++first == que->num_tx_desc)
990 buf = &txr->buffers[first];
991 tx_desc = &txr->base[first];
994 /* If a packet was successfully cleaned, reset the watchdog timer */
995 atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG);
996 /* See if there is more work now */
997 last = buf->eop_index;
999 eop_desc = &txr->base[last];
1000 /* Get next done point */
1001 if (++last == que->num_tx_desc) last = 0;
1006 bus_dmamap_sync(txr->dma.tag, txr->dma.map,
1007 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1009 txr->next_to_clean = first;
1012 * If there are no pending descriptors, clear the timeout.
1014 if (txr->avail == que->num_tx_desc) {
1015 atomic_store_rel_32(&txr->watchdog_timer, 0);
1022 /**********************************************************************
1024 * Use index kept by driver and the flag on each descriptor to find used
1025 * descriptor/buffers and clean them up for re-use.
1027 * This method of reclaiming descriptors is current incompatible with
1030 * Returns TRUE if there are more descriptors to be cleaned after this
1033 **********************************************************************/
1035 ixl_txeof_dwb(struct ixl_queue *que)
1037 struct tx_ring *txr = &que->txr;
1038 u32 first, last, done;
1040 struct ixl_tx_buf *buf;
1041 struct i40e_tx_desc *tx_desc, *eop_desc;
1043 mtx_assert(&txr->mtx, MA_OWNED);
1045 /* There are no descriptors to clean */
1046 if (txr->avail == que->num_tx_desc) {
1047 atomic_store_rel_32(&txr->watchdog_timer, 0);
1051 /* Set starting index/descriptor/buffer */
1052 first = txr->next_to_clean;
1053 buf = &txr->buffers[first];
1054 tx_desc = &txr->base[first];
1057 * This function operates per-packet -- identifies the start of the
1058 * packet and gets the index of the last descriptor of the packet from
1059 * it, from eop_index.
1061 * If the last descriptor is marked "done" by the hardware, then all
1062 * of the descriptors for the packet are cleaned.
1064 last = buf->eop_index;
1067 eop_desc = &txr->base[last];
1069 /* Sync DMA before reading from ring */
1070 bus_dmamap_sync(txr->dma.tag, txr->dma.map, BUS_DMASYNC_POSTREAD);
1073 * Get the index of the first descriptor beyond the EOP and call that
1074 * 'done'. Simplifies the comparison for the inner loop below.
1076 if (++last == que->num_tx_desc)
1081 * We find the last completed descriptor by examining each
1082 * descriptor's status bits to see if it's done.
1085 /* Break if last descriptor in packet isn't marked done */
1086 if ((eop_desc->cmd_type_offset_bsz & I40E_TXD_QW1_DTYPE_MASK)
1087 != I40E_TX_DESC_DTYPE_DESC_DONE)
1090 /* Clean the descriptors that make up the processed packet */
1091 while (first != done) {
1093 * If there was a buffer attached to this descriptor,
1094 * prevent the adapter from accessing it, and add its
1095 * length to the queue's TX stats.
1098 txr->bytes += buf->m_head->m_pkthdr.len;
1099 txr->tx_bytes += buf->m_head->m_pkthdr.len;
1100 bus_dmamap_sync(buf->tag, buf->map,
1101 BUS_DMASYNC_POSTWRITE);
1102 bus_dmamap_unload(buf->tag, buf->map);
1103 m_freem(buf->m_head);
1106 buf->eop_index = -1;
1109 if (++first == que->num_tx_desc)
1111 buf = &txr->buffers[first];
1112 tx_desc = &txr->base[first];
1115 /* If a packet was successfully cleaned, reset the watchdog timer */
1116 atomic_store_rel_32(&txr->watchdog_timer, IXL_WATCHDOG);
1119 * Since buf is the first buffer after the one that was just
1120 * cleaned, check if the packet it starts is done, too.
1122 last = buf->eop_index;
1124 eop_desc = &txr->base[last];
1125 /* Get next done point */
1126 if (++last == que->num_tx_desc) last = 0;
1132 bus_dmamap_sync(txr->dma.tag, txr->dma.map,
1133 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1135 txr->next_to_clean = first;
1138 * If there are no pending descriptors, clear the watchdog timer.
1140 if (txr->avail == que->num_tx_desc) {
1141 atomic_store_rel_32(&txr->watchdog_timer, 0);
1149 ixl_txeof(struct ixl_queue *que)
1151 struct ixl_vsi *vsi = que->vsi;
1153 return (vsi->enable_head_writeback) ? ixl_txeof_hwb(que)
1154 : ixl_txeof_dwb(que);
1158 /*********************************************************************
1160 * Refresh mbuf buffers for RX descriptor rings
1161 * - now keeps its own state so discards due to resource
1162 * exhaustion are unnecessary, if an mbuf cannot be obtained
1163 * it just returns, keeping its placeholder, thus it can simply
1164 * be recalled to try again.
1166 **********************************************************************/
1168 ixl_refresh_mbufs(struct ixl_queue *que, int limit)
1170 struct ixl_vsi *vsi = que->vsi;
1171 struct rx_ring *rxr = &que->rxr;
1172 bus_dma_segment_t hseg[1];
1173 bus_dma_segment_t pseg[1];
1174 struct ixl_rx_buf *buf;
1175 struct mbuf *mh, *mp;
1176 int i, j, nsegs, error;
1177 bool refreshed = FALSE;
1179 i = j = rxr->next_refresh;
1180 /* Control the loop with one beyond */
1181 if (++j == que->num_rx_desc)
1184 while (j != limit) {
1185 buf = &rxr->buffers[i];
1186 if (rxr->hdr_split == FALSE)
1189 if (buf->m_head == NULL) {
1190 mh = m_gethdr(M_NOWAIT, MT_DATA);
1196 mh->m_pkthdr.len = mh->m_len = MHLEN;
1198 mh->m_flags |= M_PKTHDR;
1199 /* Get the memory mapping */
1200 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1201 buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
1203 printf("Refresh mbufs: hdr dmamap load"
1204 " failure - %d\n", error);
1210 bus_dmamap_sync(rxr->htag, buf->hmap,
1211 BUS_DMASYNC_PREREAD);
1212 rxr->base[i].read.hdr_addr =
1213 htole64(hseg[0].ds_addr);
1216 if (buf->m_pack == NULL) {
1217 mp = m_getjcl(M_NOWAIT, MT_DATA,
1218 M_PKTHDR, rxr->mbuf_sz);
1224 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1225 /* Get the memory mapping */
1226 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1227 buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
1229 printf("Refresh mbufs: payload dmamap load"
1230 " failure - %d\n", error);
1236 bus_dmamap_sync(rxr->ptag, buf->pmap,
1237 BUS_DMASYNC_PREREAD);
1238 rxr->base[i].read.pkt_addr =
1239 htole64(pseg[0].ds_addr);
1240 /* Used only when doing header split */
1241 rxr->base[i].read.hdr_addr = 0;
1244 /* Next is precalculated */
1246 rxr->next_refresh = i;
1247 if (++j == que->num_rx_desc)
1251 if (refreshed) /* Update hardware tail index */
1252 wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1257 /*********************************************************************
1259 * Allocate memory for rx_buffer structures. Since we use one
1260 * rx_buffer per descriptor, the maximum number of rx_buffer's
1261 * that we'll need is equal to the number of receive descriptors
1262 * that we've defined.
1264 **********************************************************************/
1266 ixl_allocate_rx_data(struct ixl_queue *que)
1268 struct rx_ring *rxr = &que->rxr;
1269 struct ixl_vsi *vsi = que->vsi;
1270 device_t dev = vsi->dev;
1271 struct ixl_rx_buf *buf;
1272 int i, bsize, error;
1274 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
1275 1, 0, /* alignment, bounds */
1276 BUS_SPACE_MAXADDR, /* lowaddr */
1277 BUS_SPACE_MAXADDR, /* highaddr */
1278 NULL, NULL, /* filter, filterarg */
1279 MSIZE, /* maxsize */
1281 MSIZE, /* maxsegsize */
1283 NULL, /* lockfunc */
1284 NULL, /* lockfuncarg */
1286 device_printf(dev, "Unable to create RX DMA htag\n");
1290 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
1291 1, 0, /* alignment, bounds */
1292 BUS_SPACE_MAXADDR, /* lowaddr */
1293 BUS_SPACE_MAXADDR, /* highaddr */
1294 NULL, NULL, /* filter, filterarg */
1295 MJUM16BYTES, /* maxsize */
1297 MJUM16BYTES, /* maxsegsize */
1299 NULL, /* lockfunc */
1300 NULL, /* lockfuncarg */
1302 device_printf(dev, "Unable to create RX DMA ptag\n");
1306 bsize = sizeof(struct ixl_rx_buf) * que->num_rx_desc;
1307 if (!(rxr->buffers =
1308 (struct ixl_rx_buf *) malloc(bsize,
1309 M_DEVBUF, M_NOWAIT | M_ZERO))) {
1310 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1315 for (i = 0; i < que->num_rx_desc; i++) {
1316 buf = &rxr->buffers[i];
1317 error = bus_dmamap_create(rxr->htag,
1318 BUS_DMA_NOWAIT, &buf->hmap);
1320 device_printf(dev, "Unable to create RX head map\n");
1323 error = bus_dmamap_create(rxr->ptag,
1324 BUS_DMA_NOWAIT, &buf->pmap);
1326 bus_dmamap_destroy(rxr->htag, buf->hmap);
1327 device_printf(dev, "Unable to create RX pkt map\n");
1335 buf = &rxr->buffers[i];
1336 bus_dmamap_destroy(rxr->ptag, buf->pmap);
1337 bus_dmamap_destroy(rxr->htag, buf->hmap);
1339 free(rxr->buffers, M_DEVBUF);
1340 rxr->buffers = NULL;
1342 bus_dma_tag_destroy(rxr->ptag);
1345 bus_dma_tag_destroy(rxr->htag);
1351 /*********************************************************************
1353 * (Re)Initialize the queue receive ring and its buffers.
1355 **********************************************************************/
1357 ixl_init_rx_ring(struct ixl_queue *que)
1359 struct rx_ring *rxr = &que->rxr;
1360 struct ixl_vsi *vsi = que->vsi;
1361 #if defined(INET6) || defined(INET)
1362 struct ifnet *ifp = vsi->ifp;
1363 struct lro_ctrl *lro = &rxr->lro;
1365 struct ixl_rx_buf *buf;
1366 bus_dma_segment_t pseg[1], hseg[1];
1367 int rsize, nsegs, error = 0;
1369 struct netmap_adapter *na = NA(que->vsi->ifp);
1370 struct netmap_slot *slot;
1371 #endif /* DEV_NETMAP */
1375 /* same as in ixl_init_tx_ring() */
1376 slot = netmap_reset(na, NR_RX, que->me, 0);
1377 #endif /* DEV_NETMAP */
1378 /* Clear the ring contents */
1379 rsize = roundup2(que->num_rx_desc *
1380 sizeof(union i40e_rx_desc), DBA_ALIGN);
1381 bzero((void *)rxr->base, rsize);
1382 /* Cleanup any existing buffers */
1383 for (int i = 0; i < que->num_rx_desc; i++) {
1384 buf = &rxr->buffers[i];
1385 if (buf->m_head != NULL) {
1386 bus_dmamap_sync(rxr->htag, buf->hmap,
1387 BUS_DMASYNC_POSTREAD);
1388 bus_dmamap_unload(rxr->htag, buf->hmap);
1389 buf->m_head->m_flags |= M_PKTHDR;
1390 m_freem(buf->m_head);
1392 if (buf->m_pack != NULL) {
1393 bus_dmamap_sync(rxr->ptag, buf->pmap,
1394 BUS_DMASYNC_POSTREAD);
1395 bus_dmamap_unload(rxr->ptag, buf->pmap);
1396 buf->m_pack->m_flags |= M_PKTHDR;
1397 m_freem(buf->m_pack);
1403 /* header split is off */
1404 rxr->hdr_split = FALSE;
1406 /* Now replenish the mbufs */
1407 for (int j = 0; j != que->num_rx_desc; ++j) {
1408 struct mbuf *mh, *mp;
1410 buf = &rxr->buffers[j];
1413 * In netmap mode, fill the map and set the buffer
1414 * address in the NIC ring, considering the offset
1415 * between the netmap and NIC rings (see comment in
1416 * ixgbe_setup_transmit_ring() ). No need to allocate
1417 * an mbuf, so end the block with a continue;
1420 int sj = netmap_idx_n2k(na->rx_rings[que->me], j);
1424 addr = PNMB(na, slot + sj, &paddr);
1425 netmap_load_map(na, rxr->dma.tag, buf->pmap, addr);
1426 /* Update descriptor and the cached value */
1427 rxr->base[j].read.pkt_addr = htole64(paddr);
1428 rxr->base[j].read.hdr_addr = 0;
1431 #endif /* DEV_NETMAP */
1433 ** Don't allocate mbufs if not
1434 ** doing header split, its wasteful
1436 if (rxr->hdr_split == FALSE)
1439 /* First the header */
1440 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1441 if (buf->m_head == NULL) {
1445 m_adj(buf->m_head, ETHER_ALIGN);
1447 mh->m_len = mh->m_pkthdr.len = MHLEN;
1448 mh->m_flags |= M_PKTHDR;
1449 /* Get the memory mapping */
1450 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1451 buf->hmap, buf->m_head, hseg,
1452 &nsegs, BUS_DMA_NOWAIT);
1453 if (error != 0) /* Nothing elegant to do here */
1455 bus_dmamap_sync(rxr->htag,
1456 buf->hmap, BUS_DMASYNC_PREREAD);
1457 /* Update descriptor */
1458 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1461 /* Now the payload cluster */
1462 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1463 M_PKTHDR, rxr->mbuf_sz);
1464 if (buf->m_pack == NULL) {
1469 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1470 /* Get the memory mapping */
1471 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1472 buf->pmap, mp, pseg,
1473 &nsegs, BUS_DMA_NOWAIT);
1476 bus_dmamap_sync(rxr->ptag,
1477 buf->pmap, BUS_DMASYNC_PREREAD);
1478 /* Update descriptor */
1479 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1480 rxr->base[j].read.hdr_addr = 0;
1484 /* Setup our descriptor indices */
1485 rxr->next_check = 0;
1486 rxr->next_refresh = 0;
1487 rxr->lro_enabled = FALSE;
1490 rxr->discard = FALSE;
1492 wr32(vsi->hw, rxr->tail, que->num_rx_desc - 1);
1495 #if defined(INET6) || defined(INET)
1497 ** Now set up the LRO interface:
1499 if (ifp->if_capenable & IFCAP_LRO) {
1500 int err = tcp_lro_init(lro);
1502 if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1505 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1506 rxr->lro_enabled = TRUE;
1507 lro->ifp = vsi->ifp;
1511 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1512 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1520 /*********************************************************************
1522 * Free station receive ring data structures
1524 **********************************************************************/
1526 ixl_free_que_rx(struct ixl_queue *que)
1528 struct rx_ring *rxr = &que->rxr;
1529 struct ixl_rx_buf *buf;
1531 /* Cleanup any existing buffers */
1532 if (rxr->buffers != NULL) {
1533 for (int i = 0; i < que->num_rx_desc; i++) {
1534 buf = &rxr->buffers[i];
1536 /* Free buffers and unload dma maps */
1537 ixl_rx_discard(rxr, i);
1539 bus_dmamap_destroy(rxr->htag, buf->hmap);
1540 bus_dmamap_destroy(rxr->ptag, buf->pmap);
1542 free(rxr->buffers, M_DEVBUF);
1543 rxr->buffers = NULL;
1546 if (rxr->htag != NULL) {
1547 bus_dma_tag_destroy(rxr->htag);
1550 if (rxr->ptag != NULL) {
1551 bus_dma_tag_destroy(rxr->ptag);
1557 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1560 #if defined(INET6) || defined(INET)
1562 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1563 * should be computed by hardware. Also it should not have VLAN tag in
1566 if (rxr->lro_enabled &&
1567 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1568 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1569 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1571 * Send to the stack if:
1572 ** - LRO not enabled, or
1573 ** - no LRO resources, or
1574 ** - lro enqueue fails
1576 if (rxr->lro.lro_cnt != 0)
1577 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1581 (*ifp->if_input)(ifp, m);
1586 ixl_rx_discard(struct rx_ring *rxr, int i)
1588 struct ixl_rx_buf *rbuf;
1590 KASSERT(rxr != NULL, ("Receive ring pointer cannot be null"));
1591 KASSERT(i < rxr->que->num_rx_desc, ("Descriptor index must be less than que->num_desc"));
1593 rbuf = &rxr->buffers[i];
1595 /* Free the mbufs in the current chain for the packet */
1596 if (rbuf->fmp != NULL) {
1597 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1603 * Free the mbufs for the current descriptor; and let ixl_refresh_mbufs()
1604 * assign new mbufs to these.
1607 bus_dmamap_sync(rxr->htag, rbuf->hmap, BUS_DMASYNC_POSTREAD);
1608 bus_dmamap_unload(rxr->htag, rbuf->hmap);
1609 m_free(rbuf->m_head);
1610 rbuf->m_head = NULL;
1614 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1615 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1616 m_free(rbuf->m_pack);
1617 rbuf->m_pack = NULL;
1623 ** i40e_ptype_to_hash: parse the packet type
1624 ** to determine the appropriate hash.
1627 ixl_ptype_to_hash(u8 ptype)
1629 struct i40e_rx_ptype_decoded decoded;
1631 decoded = decode_rx_desc_ptype(ptype);
1634 return M_HASHTYPE_OPAQUE_HASH;
1636 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_L2)
1637 return M_HASHTYPE_OPAQUE_HASH;
1639 /* Note: anything that gets to this point is IP */
1640 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6) {
1641 switch (decoded.inner_prot) {
1642 case I40E_RX_PTYPE_INNER_PROT_TCP:
1643 return M_HASHTYPE_RSS_TCP_IPV6;
1644 case I40E_RX_PTYPE_INNER_PROT_UDP:
1645 return M_HASHTYPE_RSS_UDP_IPV6;
1647 return M_HASHTYPE_RSS_IPV6;
1650 if (decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV4) {
1651 switch (decoded.inner_prot) {
1652 case I40E_RX_PTYPE_INNER_PROT_TCP:
1653 return M_HASHTYPE_RSS_TCP_IPV4;
1654 case I40E_RX_PTYPE_INNER_PROT_UDP:
1655 return M_HASHTYPE_RSS_UDP_IPV4;
1657 return M_HASHTYPE_RSS_IPV4;
1660 /* We should never get here!! */
1661 return M_HASHTYPE_OPAQUE_HASH;
1665 /*********************************************************************
1667 * This routine executes in interrupt context. It replenishes
1668 * the mbufs in the descriptor and sends data which has been
1669 * dma'ed into host memory to upper layer.
1671 * We loop at most count times if count is > 0, or until done if
1674 * Return TRUE for more work, FALSE for all clean.
1675 *********************************************************************/
1677 ixl_rxeof(struct ixl_queue *que, int count)
1679 struct ixl_vsi *vsi = que->vsi;
1680 struct rx_ring *rxr = &que->rxr;
1681 struct ifnet *ifp = vsi->ifp;
1682 #if defined(INET6) || defined(INET)
1683 struct lro_ctrl *lro = &rxr->lro;
1685 int i, nextp, processed = 0;
1686 union i40e_rx_desc *cur;
1687 struct ixl_rx_buf *rbuf, *nbuf;
1692 if (netmap_rx_irq(ifp, que->me, &count)) {
1696 #endif /* DEV_NETMAP */
1698 for (i = rxr->next_check; count != 0;) {
1699 struct mbuf *sendmp, *mh, *mp;
1701 u16 hlen, plen, vtag;
1706 /* Sync the ring. */
1707 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1708 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1710 cur = &rxr->base[i];
1711 qword = le64toh(cur->wb.qword1.status_error_len);
1712 status = (qword & I40E_RXD_QW1_STATUS_MASK)
1713 >> I40E_RXD_QW1_STATUS_SHIFT;
1714 error = (qword & I40E_RXD_QW1_ERROR_MASK)
1715 >> I40E_RXD_QW1_ERROR_SHIFT;
1716 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1717 >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1718 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1719 >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1720 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1721 >> I40E_RXD_QW1_PTYPE_SHIFT;
1723 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1727 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1733 cur->wb.qword1.status_error_len = 0;
1734 rbuf = &rxr->buffers[i];
1737 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1738 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1739 vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1743 /* Remove device access to the rx buffers. */
1744 if (rbuf->m_head != NULL) {
1745 bus_dmamap_sync(rxr->htag, rbuf->hmap,
1746 BUS_DMASYNC_POSTREAD);
1747 bus_dmamap_unload(rxr->htag, rbuf->hmap);
1749 if (rbuf->m_pack != NULL) {
1750 bus_dmamap_sync(rxr->ptag, rbuf->pmap,
1751 BUS_DMASYNC_POSTREAD);
1752 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1756 ** Make sure bad packets are discarded,
1757 ** note that only EOP descriptor has valid
1760 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1762 ixl_rx_discard(rxr, i);
1766 /* Prefetch the next buffer */
1769 if (nextp == que->num_rx_desc)
1771 nbuf = &rxr->buffers[nextp];
1776 ** The header mbuf is ONLY used when header
1777 ** split is enabled, otherwise we get normal
1778 ** behavior, ie, both header and payload
1779 ** are DMA'd into the payload buffer.
1781 ** Rather than using the fmp/lmp global pointers
1782 ** we now keep the head of a packet chain in the
1783 ** buffer struct and pass this along from one
1784 ** descriptor to the next, until we get EOP.
1786 if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1787 if (hlen > IXL_RX_HDR)
1790 mh->m_flags |= M_PKTHDR;
1792 mh->m_pkthdr.len = mh->m_len;
1793 /* Null buf pointer so it is refreshed */
1794 rbuf->m_head = NULL;
1796 ** Check the payload length, this
1797 ** could be zero if its a small
1803 mp->m_flags &= ~M_PKTHDR;
1805 mh->m_pkthdr.len += mp->m_len;
1806 /* Null buf pointer so it is refreshed */
1807 rbuf->m_pack = NULL;
1811 ** Now create the forward
1812 ** chain so when complete
1816 /* stash the chain head */
1818 /* Make forward chain */
1820 mp->m_next = nbuf->m_pack;
1822 mh->m_next = nbuf->m_pack;
1824 /* Singlet, prepare to send */
1827 sendmp->m_pkthdr.ether_vtag = vtag;
1828 sendmp->m_flags |= M_VLANTAG;
1833 ** Either no header split, or a
1834 ** secondary piece of a fragmented
1839 ** See if there is a stored head
1840 ** that determines what we are
1843 rbuf->m_pack = rbuf->fmp = NULL;
1845 if (sendmp != NULL) /* secondary frag */
1846 sendmp->m_pkthdr.len += mp->m_len;
1848 /* first desc of a non-ps chain */
1850 sendmp->m_flags |= M_PKTHDR;
1851 sendmp->m_pkthdr.len = mp->m_len;
1853 /* Pass the head pointer on */
1857 mp->m_next = nbuf->m_pack;
1861 /* Sending this frame? */
1863 sendmp->m_pkthdr.rcvif = ifp;
1866 rxr->rx_bytes += sendmp->m_pkthdr.len;
1867 /* capture data for dynamic ITR adjustment */
1869 rxr->bytes += sendmp->m_pkthdr.len;
1870 /* Set VLAN tag (field only valid in eop desc) */
1872 sendmp->m_pkthdr.ether_vtag = vtag;
1873 sendmp->m_flags |= M_VLANTAG;
1875 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1876 ixl_rx_checksum(sendmp, status, error, ptype);
1878 sendmp->m_pkthdr.flowid =
1879 le32toh(cur->wb.qword0.hi_dword.rss);
1880 M_HASHTYPE_SET(sendmp, ixl_ptype_to_hash(ptype));
1882 sendmp->m_pkthdr.flowid = que->msix;
1883 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1887 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1888 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1890 /* Advance our pointers to the next descriptor. */
1891 if (++i == que->num_rx_desc)
1894 /* Now send to the stack or do LRO */
1895 if (sendmp != NULL) {
1896 rxr->next_check = i;
1898 ixl_rx_input(rxr, ifp, sendmp, ptype);
1901 * Update index used in loop in case another
1902 * ixl_rxeof() call executes when lock is released
1904 i = rxr->next_check;
1907 /* Every 8 descriptors we go to refresh mbufs */
1908 if (processed == 8) {
1909 ixl_refresh_mbufs(que, i);
1914 /* Refresh any remaining buf structs */
1915 if (ixl_rx_unrefreshed(que))
1916 ixl_refresh_mbufs(que, i);
1918 rxr->next_check = i;
1922 #if defined(INET6) || defined(INET)
1924 * Flush any outstanding LRO work
1926 #if __FreeBSD_version >= 1100105
1927 tcp_lro_flush_all(lro);
1929 struct lro_entry *queued;
1930 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1931 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1932 tcp_lro_flush(lro, queued);
1935 #endif /* defined(INET6) || defined(INET) */
1941 /*********************************************************************
1943 * Verify that the hardware indicated that the checksum is valid.
1944 * Inform the stack about the status of checksum so that stack
1945 * doesn't spend time verifying the checksum.
1947 *********************************************************************/
1949 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1951 struct i40e_rx_ptype_decoded decoded;
1953 decoded = decode_rx_desc_ptype(ptype);
1956 if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1957 (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1958 mp->m_pkthdr.csum_flags = 0;
1962 /* IPv6 with extension headers likely have bad csum */
1963 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1964 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1966 (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1967 mp->m_pkthdr.csum_flags = 0;
1972 /* IP Checksum Good */
1973 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1974 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1976 if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1977 mp->m_pkthdr.csum_flags |=
1978 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1979 mp->m_pkthdr.csum_data |= htons(0xffff);
1984 #if __FreeBSD_version >= 1100000
1986 ixl_get_counter(if_t ifp, ift_counter cnt)
1988 struct ixl_vsi *vsi;
1990 vsi = if_getsoftc(ifp);
1993 case IFCOUNTER_IPACKETS:
1994 return (vsi->ipackets);
1995 case IFCOUNTER_IERRORS:
1996 return (vsi->ierrors);
1997 case IFCOUNTER_OPACKETS:
1998 return (vsi->opackets);
1999 case IFCOUNTER_OERRORS:
2000 return (vsi->oerrors);
2001 case IFCOUNTER_COLLISIONS:
2002 /* Collisions are by standard impossible in 40G/10G Ethernet */
2004 case IFCOUNTER_IBYTES:
2005 return (vsi->ibytes);
2006 case IFCOUNTER_OBYTES:
2007 return (vsi->obytes);
2008 case IFCOUNTER_IMCASTS:
2009 return (vsi->imcasts);
2010 case IFCOUNTER_OMCASTS:
2011 return (vsi->omcasts);
2012 case IFCOUNTER_IQDROPS:
2013 return (vsi->iqdrops);
2014 case IFCOUNTER_OQDROPS:
2015 return (vsi->oqdrops);
2016 case IFCOUNTER_NOPROTO:
2017 return (vsi->noproto);
2019 return (if_get_counter_default(ifp, cnt));
2025 * Set TX and RX ring size adjusting value to supported range
2028 ixl_vsi_setup_rings_size(struct ixl_vsi * vsi, int tx_ring_size, int rx_ring_size)
2030 struct device * dev = vsi->dev;
2032 if (tx_ring_size < IXL_MIN_RING
2033 || tx_ring_size > IXL_MAX_RING
2034 || tx_ring_size % IXL_RING_INCREMENT != 0) {
2035 device_printf(dev, "Invalid tx_ring_size value of %d set!\n",
2037 device_printf(dev, "tx_ring_size must be between %d and %d, "
2038 "inclusive, and must be a multiple of %d\n",
2039 IXL_MIN_RING, IXL_MAX_RING, IXL_RING_INCREMENT);
2040 device_printf(dev, "Using default value of %d instead\n",
2042 vsi->num_tx_desc = IXL_DEFAULT_RING;
2044 vsi->num_tx_desc = tx_ring_size;
2046 if (rx_ring_size < IXL_MIN_RING
2047 || rx_ring_size > IXL_MAX_RING
2048 || rx_ring_size % IXL_RING_INCREMENT != 0) {
2049 device_printf(dev, "Invalid rx_ring_size value of %d set!\n",
2051 device_printf(dev, "rx_ring_size must be between %d and %d, "
2052 "inclusive, and must be a multiple of %d\n",
2053 IXL_MIN_RING, IXL_MAX_RING, IXL_RING_INCREMENT);
2054 device_printf(dev, "Using default value of %d instead\n",
2056 vsi->num_rx_desc = IXL_DEFAULT_RING;
2058 vsi->num_rx_desc = rx_ring_size;
2060 device_printf(dev, "using %d tx descriptors and %d rx descriptors\n",
2061 vsi->num_tx_desc, vsi->num_rx_desc);
2066 ixl_queue_sw_irq(struct ixl_vsi *vsi, int qidx)
2068 struct i40e_hw *hw = vsi->hw;
2071 if ((vsi->flags & IXL_FLAGS_IS_VF) != 0) {
2072 mask = (I40E_VFINT_DYN_CTLN1_INTENA_MASK |
2073 I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK |
2074 I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK);
2076 reg = I40E_VFINT_DYN_CTLN1(qidx);
2078 mask = (I40E_PFINT_DYN_CTLN_INTENA_MASK |
2079 I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK |
2080 I40E_PFINT_DYN_CTLN_ITR_INDX_MASK);
2082 reg = ((vsi->flags & IXL_FLAGS_USES_MSIX) != 0) ?
2083 I40E_PFINT_DYN_CTLN(qidx) : I40E_PFINT_DYN_CTL0;
2086 wr32(hw, reg, mask);
2090 ixl_queue_hang_check(struct ixl_vsi *vsi)
2092 struct ixl_queue *que = vsi->queues;
2093 device_t dev = vsi->dev;
2094 struct tx_ring *txr;
2095 s32 timer, new_timer;
2098 for (int i = 0; i < vsi->num_queues; i++, que++) {
2101 * If watchdog_timer is equal to defualt value set by ixl_txeof
2102 * just substract hz and move on - the queue is most probably
2103 * running. Otherwise check the value.
2105 if (atomic_cmpset_rel_32(&txr->watchdog_timer,
2106 IXL_WATCHDOG, (IXL_WATCHDOG) - hz) == 0) {
2107 timer = atomic_load_acq_32(&txr->watchdog_timer);
2109 * Again - if the timer was reset to default value
2110 * then queue is running. Otherwise check if watchdog
2111 * expired and act accrdingly.
2114 if (timer > 0 && timer != IXL_WATCHDOG) {
2115 new_timer = timer - hz;
2116 if (new_timer <= 0) {
2117 atomic_store_rel_32(&txr->watchdog_timer, -1);
2118 device_printf(dev, "WARNING: queue %d "
2119 "appears to be hung!\n", que->me);
2121 /* Try to unblock the queue with SW IRQ */
2122 ixl_queue_sw_irq(vsi, i);
2125 * If this fails, that means something in the TX path
2126 * has updated the watchdog, so it means the TX path
2127 * is still working and the watchdog doesn't need
2130 atomic_cmpset_rel_32(&txr->watchdog_timer,