1 /******************************************************************************
3 Copyright (c) 2013-2014, Intel Corporation
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
32 ******************************************************************************/
36 ** IXL driver TX/RX Routines:
37 ** This was seperated to allow usage by
38 ** both the BASE and the VF drivers.
42 #include "opt_inet6.h"
45 /* Local Prototypes */
46 static void ixl_rx_checksum(struct mbuf *, u32, u32, u8);
47 static void ixl_refresh_mbufs(struct ixl_queue *, int);
48 static int ixl_xmit(struct ixl_queue *, struct mbuf **);
49 static int ixl_tx_setup_offload(struct ixl_queue *,
50 struct mbuf *, u32 *, u32 *);
51 static bool ixl_tso_setup(struct ixl_queue *, struct mbuf *);
53 static __inline void ixl_rx_discard(struct rx_ring *, int);
54 static __inline void ixl_rx_input(struct rx_ring *, struct ifnet *,
58 ** Multiqueue Transmit driver
62 ixl_mq_start(struct ifnet *ifp, struct mbuf *m)
64 struct ixl_vsi *vsi = ifp->if_softc;
65 struct ixl_queue *que;
69 /* Which queue to use */
70 if ((m->m_flags & M_FLOWID) != 0)
71 i = m->m_pkthdr.flowid % vsi->num_queues;
73 i = curcpu % vsi->num_queues;
75 /* Check for a hung queue and pick alternative */
76 if (((1 << i) & vsi->active_queues) == 0)
77 i = ffsl(vsi->active_queues);
79 que = &vsi->queues[i];
82 err = drbr_enqueue(ifp, txr->br, m);
85 if (IXL_TX_TRYLOCK(txr)) {
86 ixl_mq_start_locked(ifp, txr);
89 taskqueue_enqueue(que->tq, &que->tx_task);
95 ixl_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
97 struct ixl_queue *que = txr->que;
98 struct ixl_vsi *vsi = que->vsi;
103 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
104 vsi->link_active == 0)
107 /* Process the transmit queue */
108 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
109 if ((err = ixl_xmit(que, &next)) != 0) {
111 drbr_advance(ifp, txr->br);
113 drbr_putback(ifp, txr->br, next);
116 drbr_advance(ifp, txr->br);
117 /* Send a copy of the frame to the BPF listener */
118 ETHER_BPF_MTAP(ifp, next);
119 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
123 if (txr->avail < IXL_TX_CLEANUP_THRESHOLD)
130 * Called from a taskqueue to drain queued transmit packets.
133 ixl_deferred_mq_start(void *arg, int pending)
135 struct ixl_queue *que = arg;
136 struct tx_ring *txr = &que->txr;
137 struct ixl_vsi *vsi = que->vsi;
138 struct ifnet *ifp = vsi->ifp;
141 if (!drbr_empty(ifp, txr->br))
142 ixl_mq_start_locked(ifp, txr);
147 ** Flush all queue ring buffers
150 ixl_qflush(struct ifnet *ifp)
152 struct ixl_vsi *vsi = ifp->if_softc;
154 for (int i = 0; i < vsi->num_queues; i++) {
155 struct ixl_queue *que = &vsi->queues[i];
156 struct tx_ring *txr = &que->txr;
159 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
167 ** Find mbuf chains passed to the driver
168 ** that are 'sparse', using more than 8
169 ** mbufs to deliver an mss-size chunk of data
172 ixl_tso_detect_sparse(struct mbuf *mp)
178 mss = mp->m_pkthdr.tso_segsz;
179 for (m = mp->m_next; m != NULL; m = m->m_next) {
184 if (m->m_next == NULL)
187 if (num > IXL_SPARSE_CHAIN)
194 /*********************************************************************
196 * This routine maps the mbufs to tx descriptors, allowing the
197 * TX engine to transmit the packets.
198 * - return 0 on success, positive on failure
200 **********************************************************************/
201 #define IXL_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
204 ixl_xmit(struct ixl_queue *que, struct mbuf **m_headp)
206 struct ixl_vsi *vsi = que->vsi;
207 struct i40e_hw *hw = vsi->hw;
208 struct tx_ring *txr = &que->txr;
209 struct ixl_tx_buf *buf;
210 struct i40e_tx_desc *txd = NULL;
211 struct mbuf *m_head, *m;
212 int i, j, error, nsegs, maxsegs;
218 bus_dma_segment_t segs[IXL_MAX_TSO_SEGS];
225 * Important to capture the first descriptor
226 * used because it will contain the index of
227 * the one we tell the hardware to report back
229 first = txr->next_avail;
230 buf = &txr->buffers[first];
233 maxsegs = IXL_MAX_TX_SEGS;
235 if (m_head->m_pkthdr.csum_flags & CSUM_TSO) {
236 /* Use larger mapping for TSO */
238 maxsegs = IXL_MAX_TSO_SEGS;
239 if (ixl_tso_detect_sparse(m_head)) {
240 m = m_defrag(m_head, M_NOWAIT);
251 * Map the packet for DMA.
253 error = bus_dmamap_load_mbuf_sg(tag, map,
254 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
256 if (error == EFBIG) {
259 m = m_collapse(*m_headp, M_NOWAIT, maxsegs);
261 que->mbuf_defrag_failed++;
269 error = bus_dmamap_load_mbuf_sg(tag, map,
270 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
272 if (error == ENOMEM) {
275 } else if (error != 0) {
281 } else if (error == ENOMEM) {
284 } else if (error != 0) {
291 /* Make certain there are enough descriptors */
292 if (nsegs > txr->avail - 2) {
299 /* Set up the TSO/CSUM offload */
300 if (m_head->m_pkthdr.csum_flags & CSUM_OFFLOAD) {
301 error = ixl_tx_setup_offload(que, m_head, &cmd, &off);
306 cmd |= I40E_TX_DESC_CMD_ICRC;
307 /* Grab the VLAN tag */
308 if (m_head->m_flags & M_VLANTAG) {
309 cmd |= I40E_TX_DESC_CMD_IL2TAG1;
310 vtag = htole16(m_head->m_pkthdr.ether_vtag);
314 for (j = 0; j < nsegs; j++) {
317 buf = &txr->buffers[i];
318 buf->tag = tag; /* Keep track of the type tag */
320 seglen = segs[j].ds_len;
322 txd->buffer_addr = htole64(segs[j].ds_addr);
323 txd->cmd_type_offset_bsz =
324 htole64(I40E_TX_DESC_DTYPE_DATA
325 | ((u64)cmd << I40E_TXD_QW1_CMD_SHIFT)
326 | ((u64)off << I40E_TXD_QW1_OFFSET_SHIFT)
327 | ((u64)seglen << I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
328 | ((u64)vtag << I40E_TXD_QW1_L2TAG1_SHIFT));
330 last = i; /* descriptor that will get completion IRQ */
332 if (++i == que->num_desc)
338 /* Set the last descriptor for report */
339 txd->cmd_type_offset_bsz |=
340 htole64(((u64)IXL_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT));
344 buf->m_head = m_head;
345 /* Swap the dma map between the first and last descriptor */
346 txr->buffers[first].map = buf->map;
348 bus_dmamap_sync(tag, map, BUS_DMASYNC_PREWRITE);
350 /* Set the index of the descriptor that will be marked done */
351 buf = &txr->buffers[first];
352 buf->eop_index = last;
354 bus_dmamap_sync(txr->dma.tag, txr->dma.map,
355 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
357 * Advance the Transmit Descriptor Tail (Tdt), this tells the
358 * hardware that this frame is available to transmit.
360 ++txr->total_packets;
361 wr32(hw, txr->tail, i);
364 /* Mark outstanding work */
370 bus_dmamap_unload(tag, buf->map);
375 /*********************************************************************
377 * Allocate memory for tx_buffer structures. The tx_buffer stores all
378 * the information needed to transmit a packet on the wire. This is
379 * called only once at attach, setup is done every reset.
381 **********************************************************************/
383 ixl_allocate_tx_data(struct ixl_queue *que)
385 struct tx_ring *txr = &que->txr;
386 struct ixl_vsi *vsi = que->vsi;
387 device_t dev = vsi->dev;
388 struct ixl_tx_buf *buf;
392 * Setup DMA descriptor areas.
394 if ((error = bus_dma_tag_create(NULL, /* parent */
395 1, 0, /* alignment, bounds */
396 BUS_SPACE_MAXADDR, /* lowaddr */
397 BUS_SPACE_MAXADDR, /* highaddr */
398 NULL, NULL, /* filter, filterarg */
399 IXL_TSO_SIZE, /* maxsize */
400 IXL_MAX_TX_SEGS, /* nsegments */
401 PAGE_SIZE, /* maxsegsize */
404 NULL, /* lockfuncarg */
406 device_printf(dev,"Unable to allocate TX DMA tag\n");
410 /* Make a special tag for TSO */
411 if ((error = bus_dma_tag_create(NULL, /* parent */
412 1, 0, /* alignment, bounds */
413 BUS_SPACE_MAXADDR, /* lowaddr */
414 BUS_SPACE_MAXADDR, /* highaddr */
415 NULL, NULL, /* filter, filterarg */
416 IXL_TSO_SIZE, /* maxsize */
417 IXL_MAX_TSO_SEGS, /* nsegments */
418 PAGE_SIZE, /* maxsegsize */
421 NULL, /* lockfuncarg */
423 device_printf(dev,"Unable to allocate TX TSO DMA tag\n");
428 (struct ixl_tx_buf *) malloc(sizeof(struct ixl_tx_buf) *
429 que->num_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
430 device_printf(dev, "Unable to allocate tx_buffer memory\n");
435 /* Create the descriptor buffer default dma maps */
437 for (int i = 0; i < que->num_desc; i++, buf++) {
438 buf->tag = txr->tx_tag;
439 error = bus_dmamap_create(buf->tag, 0, &buf->map);
441 device_printf(dev, "Unable to create TX DMA map\n");
450 /*********************************************************************
452 * (Re)Initialize a queue transmit ring.
453 * - called by init, it clears the descriptor ring,
454 * and frees any stale mbufs
456 **********************************************************************/
458 ixl_init_tx_ring(struct ixl_queue *que)
460 struct tx_ring *txr = &que->txr;
461 struct ixl_tx_buf *buf;
463 /* Clear the old ring contents */
465 bzero((void *)txr->base,
466 (sizeof(struct i40e_tx_desc)) * que->num_desc);
470 txr->next_to_clean = 0;
473 /* Initialize flow director */
474 txr->atr_rate = ixl_atr_rate;
478 /* Free any existing tx mbufs. */
480 for (int i = 0; i < que->num_desc; i++, buf++) {
481 if (buf->m_head != NULL) {
482 bus_dmamap_sync(buf->tag, buf->map,
483 BUS_DMASYNC_POSTWRITE);
484 bus_dmamap_unload(buf->tag, buf->map);
485 m_freem(buf->m_head);
488 /* Clear the EOP index */
492 /* Set number of descriptors available */
493 txr->avail = que->num_desc;
495 bus_dmamap_sync(txr->dma.tag, txr->dma.map,
496 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
501 /*********************************************************************
503 * Free transmit ring related data structures.
505 **********************************************************************/
507 ixl_free_que_tx(struct ixl_queue *que)
509 struct tx_ring *txr = &que->txr;
510 struct ixl_tx_buf *buf;
512 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
514 for (int i = 0; i < que->num_desc; i++) {
515 buf = &txr->buffers[i];
516 if (buf->m_head != NULL) {
517 bus_dmamap_sync(buf->tag, buf->map,
518 BUS_DMASYNC_POSTWRITE);
519 bus_dmamap_unload(buf->tag,
521 m_freem(buf->m_head);
523 if (buf->map != NULL) {
524 bus_dmamap_destroy(buf->tag,
528 } else if (buf->map != NULL) {
529 bus_dmamap_unload(buf->tag,
531 bus_dmamap_destroy(buf->tag,
537 buf_ring_free(txr->br, M_DEVBUF);
538 if (txr->buffers != NULL) {
539 free(txr->buffers, M_DEVBUF);
542 if (txr->tx_tag != NULL) {
543 bus_dma_tag_destroy(txr->tx_tag);
546 if (txr->tso_tag != NULL) {
547 bus_dma_tag_destroy(txr->tso_tag);
551 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
555 /*********************************************************************
557 * Setup descriptor for hw offloads
559 **********************************************************************/
562 ixl_tx_setup_offload(struct ixl_queue *que,
563 struct mbuf *mp, u32 *cmd, u32 *off)
565 struct ether_vlan_header *eh;
567 struct ip *ip = NULL;
569 struct tcphdr *th = NULL;
573 int elen, ip_hlen = 0, tcp_hlen;
579 /* Set up the TSO context descriptor if required */
580 if (mp->m_pkthdr.csum_flags & CSUM_TSO) {
581 tso = ixl_tso_setup(que, mp);
589 * Determine where frame payload starts.
590 * Jump over vlan headers if already present,
591 * helpful for QinQ too.
593 eh = mtod(mp, struct ether_vlan_header *);
594 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
595 etype = ntohs(eh->evl_proto);
596 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
598 etype = ntohs(eh->evl_encap_proto);
599 elen = ETHER_HDR_LEN;
605 ip = (struct ip *)(mp->m_data + elen);
606 ip_hlen = ip->ip_hl << 2;
608 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
609 /* The IP checksum must be recalculated with TSO */
611 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
613 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
618 ip6 = (struct ip6_hdr *)(mp->m_data + elen);
619 ip_hlen = sizeof(struct ip6_hdr);
620 ipproto = ip6->ip6_nxt;
621 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
622 *cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
629 *off |= (elen >> 1) << I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
630 *off |= (ip_hlen >> 2) << I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
634 tcp_hlen = th->th_off << 2;
635 if (mp->m_pkthdr.csum_flags & (CSUM_TCP|CSUM_TCP_IPV6)) {
636 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
637 *off |= (tcp_hlen >> 2) <<
638 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
641 ixl_atr(que, th, etype);
645 if (mp->m_pkthdr.csum_flags & (CSUM_UDP|CSUM_UDP_IPV6)) {
646 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
647 *off |= (sizeof(struct udphdr) >> 2) <<
648 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
653 if (mp->m_pkthdr.csum_flags & (CSUM_SCTP|CSUM_SCTP_IPV6)) {
654 *cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
655 *off |= (sizeof(struct sctphdr) >> 2) <<
656 I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
667 /**********************************************************************
669 * Setup context for hardware segmentation offload (TSO)
671 **********************************************************************/
673 ixl_tso_setup(struct ixl_queue *que, struct mbuf *mp)
675 struct tx_ring *txr = &que->txr;
676 struct i40e_tx_context_desc *TXD;
677 struct ixl_tx_buf *buf;
678 u32 cmd, mss, type, tsolen;
680 int idx, elen, ip_hlen, tcp_hlen;
681 struct ether_vlan_header *eh;
688 #if defined(INET6) || defined(INET)
691 u64 type_cmd_tso_mss;
694 * Determine where frame payload starts.
695 * Jump over vlan headers if already present
697 eh = mtod(mp, struct ether_vlan_header *);
698 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
699 elen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
700 etype = eh->evl_proto;
702 elen = ETHER_HDR_LEN;
703 etype = eh->evl_encap_proto;
706 switch (ntohs(etype)) {
709 ip6 = (struct ip6_hdr *)(mp->m_data + elen);
710 if (ip6->ip6_nxt != IPPROTO_TCP)
712 ip_hlen = sizeof(struct ip6_hdr);
713 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
714 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
715 tcp_hlen = th->th_off << 2;
720 ip = (struct ip *)(mp->m_data + elen);
721 if (ip->ip_p != IPPROTO_TCP)
724 ip_hlen = ip->ip_hl << 2;
725 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
726 th->th_sum = in_pseudo(ip->ip_src.s_addr,
727 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
728 tcp_hlen = th->th_off << 2;
732 printf("%s: CSUM_TSO but no supported IP version (0x%04x)",
733 __func__, ntohs(etype));
737 /* Ensure we have at least the IP+TCP header in the first mbuf. */
738 if (mp->m_len < elen + ip_hlen + sizeof(struct tcphdr))
741 idx = txr->next_avail;
742 buf = &txr->buffers[idx];
743 TXD = (struct i40e_tx_context_desc *) &txr->base[idx];
744 tsolen = mp->m_pkthdr.len - (elen + ip_hlen + tcp_hlen);
746 type = I40E_TX_DESC_DTYPE_CONTEXT;
747 cmd = I40E_TX_CTX_DESC_TSO;
748 mss = mp->m_pkthdr.tso_segsz;
750 type_cmd_tso_mss = ((u64)type << I40E_TXD_CTX_QW1_DTYPE_SHIFT) |
751 ((u64)cmd << I40E_TXD_CTX_QW1_CMD_SHIFT) |
752 ((u64)tsolen << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT) |
753 ((u64)mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
754 TXD->type_cmd_tso_mss = htole64(type_cmd_tso_mss);
756 TXD->tunneling_params = htole32(0);
760 if (++idx == que->num_desc)
764 txr->next_avail = idx;
770 ** ixl_get_tx_head - Retrieve the value from the
771 ** location the HW records its HEAD index
774 ixl_get_tx_head(struct ixl_queue *que)
776 struct tx_ring *txr = &que->txr;
777 void *head = &txr->base[que->num_desc];
778 return LE32_TO_CPU(*(volatile __le32 *)head);
781 /**********************************************************************
783 * Examine each tx_buffer in the used queue. If the hardware is done
784 * processing the packet then free associated resources. The
785 * tx_buffer is put back on the free queue.
787 **********************************************************************/
789 ixl_txeof(struct ixl_queue *que)
791 struct tx_ring *txr = &que->txr;
792 u32 first, last, head, done, processed;
793 struct ixl_tx_buf *buf;
794 struct i40e_tx_desc *tx_desc, *eop_desc;
797 mtx_assert(&txr->mtx, MA_OWNED);
800 /* These are not the descriptors you seek, move along :) */
801 if (txr->avail == que->num_desc) {
807 first = txr->next_to_clean;
808 buf = &txr->buffers[first];
809 tx_desc = (struct i40e_tx_desc *)&txr->base[first];
810 last = buf->eop_index;
813 eop_desc = (struct i40e_tx_desc *)&txr->base[last];
815 /* Get the Head WB value */
816 head = ixl_get_tx_head(que);
819 ** Get the index of the first descriptor
820 ** BEYOND the EOP and call that 'done'.
821 ** I do this so the comparison in the
822 ** inner while loop below can be simple
824 if (++last == que->num_desc) last = 0;
827 bus_dmamap_sync(txr->dma.tag, txr->dma.map,
828 BUS_DMASYNC_POSTREAD);
830 ** The HEAD index of the ring is written in a
831 ** defined location, this rather than a done bit
832 ** is what is used to keep track of what must be
835 while (first != head) {
836 /* We clean the range of the packet */
837 while (first != done) {
842 txr->bytes += /* for ITR adjustment */
843 buf->m_head->m_pkthdr.len;
844 txr->tx_bytes += /* for TX stats */
845 buf->m_head->m_pkthdr.len;
846 bus_dmamap_sync(buf->tag,
848 BUS_DMASYNC_POSTWRITE);
849 bus_dmamap_unload(buf->tag,
851 m_freem(buf->m_head);
857 if (++first == que->num_desc)
860 buf = &txr->buffers[first];
861 tx_desc = &txr->base[first];
864 /* See if there is more work now */
865 last = buf->eop_index;
867 eop_desc = &txr->base[last];
868 /* Get next done point */
869 if (++last == que->num_desc) last = 0;
874 bus_dmamap_sync(txr->dma.tag, txr->dma.map,
875 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
877 txr->next_to_clean = first;
881 ** Hang detection, we know there's
882 ** work outstanding or the first return
883 ** would have been taken, so indicate an
884 ** unsuccessful pass, in local_timer if
885 ** the value is too great the queue will
886 ** be considered hung. If anything has been
887 ** cleaned then reset the state.
889 if ((processed == 0) && (que->busy != IXL_QUEUE_HUNG))
893 que->busy = 1; /* Note this turns off HUNG */
896 * If there are no pending descriptors, clear the timeout.
898 if (txr->avail == que->num_desc) {
906 /*********************************************************************
908 * Refresh mbuf buffers for RX descriptor rings
909 * - now keeps its own state so discards due to resource
910 * exhaustion are unnecessary, if an mbuf cannot be obtained
911 * it just returns, keeping its placeholder, thus it can simply
912 * be recalled to try again.
914 **********************************************************************/
916 ixl_refresh_mbufs(struct ixl_queue *que, int limit)
918 struct ixl_vsi *vsi = que->vsi;
919 struct rx_ring *rxr = &que->rxr;
920 bus_dma_segment_t hseg[1];
921 bus_dma_segment_t pseg[1];
922 struct ixl_rx_buf *buf;
923 struct mbuf *mh, *mp;
924 int i, j, nsegs, error;
925 bool refreshed = FALSE;
927 i = j = rxr->next_refresh;
928 /* Control the loop with one beyond */
929 if (++j == que->num_desc)
933 buf = &rxr->buffers[i];
934 if (rxr->hdr_split == FALSE)
937 if (buf->m_head == NULL) {
938 mh = m_gethdr(M_NOWAIT, MT_DATA);
944 mh->m_pkthdr.len = mh->m_len = MHLEN;
946 mh->m_flags |= M_PKTHDR;
947 /* Get the memory mapping */
948 error = bus_dmamap_load_mbuf_sg(rxr->htag,
949 buf->hmap, mh, hseg, &nsegs, BUS_DMA_NOWAIT);
951 printf("Refresh mbufs: hdr dmamap load"
952 " failure - %d\n", error);
958 bus_dmamap_sync(rxr->htag, buf->hmap,
959 BUS_DMASYNC_PREREAD);
960 rxr->base[i].read.hdr_addr =
961 htole64(hseg[0].ds_addr);
964 if (buf->m_pack == NULL) {
965 mp = m_getjcl(M_NOWAIT, MT_DATA,
966 M_PKTHDR, rxr->mbuf_sz);
972 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
973 /* Get the memory mapping */
974 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
975 buf->pmap, mp, pseg, &nsegs, BUS_DMA_NOWAIT);
977 printf("Refresh mbufs: payload dmamap load"
978 " failure - %d\n", error);
984 bus_dmamap_sync(rxr->ptag, buf->pmap,
985 BUS_DMASYNC_PREREAD);
986 rxr->base[i].read.pkt_addr =
987 htole64(pseg[0].ds_addr);
988 /* Used only when doing header split */
989 rxr->base[i].read.hdr_addr = 0;
992 /* Next is precalculated */
994 rxr->next_refresh = i;
995 if (++j == que->num_desc)
999 if (refreshed) /* Update hardware tail index */
1000 wr32(vsi->hw, rxr->tail, rxr->next_refresh);
1005 /*********************************************************************
1007 * Allocate memory for rx_buffer structures. Since we use one
1008 * rx_buffer per descriptor, the maximum number of rx_buffer's
1009 * that we'll need is equal to the number of receive descriptors
1010 * that we've defined.
1012 **********************************************************************/
1014 ixl_allocate_rx_data(struct ixl_queue *que)
1016 struct rx_ring *rxr = &que->rxr;
1017 struct ixl_vsi *vsi = que->vsi;
1018 device_t dev = vsi->dev;
1019 struct ixl_rx_buf *buf;
1020 int i, bsize, error;
1022 bsize = sizeof(struct ixl_rx_buf) * que->num_desc;
1023 if (!(rxr->buffers =
1024 (struct ixl_rx_buf *) malloc(bsize,
1025 M_DEVBUF, M_NOWAIT | M_ZERO))) {
1026 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1031 if ((error = bus_dma_tag_create(NULL, /* parent */
1032 1, 0, /* alignment, bounds */
1033 BUS_SPACE_MAXADDR, /* lowaddr */
1034 BUS_SPACE_MAXADDR, /* highaddr */
1035 NULL, NULL, /* filter, filterarg */
1036 MSIZE, /* maxsize */
1038 MSIZE, /* maxsegsize */
1040 NULL, /* lockfunc */
1041 NULL, /* lockfuncarg */
1043 device_printf(dev, "Unable to create RX DMA htag\n");
1047 if ((error = bus_dma_tag_create(NULL, /* parent */
1048 1, 0, /* alignment, bounds */
1049 BUS_SPACE_MAXADDR, /* lowaddr */
1050 BUS_SPACE_MAXADDR, /* highaddr */
1051 NULL, NULL, /* filter, filterarg */
1052 MJUM16BYTES, /* maxsize */
1054 MJUM16BYTES, /* maxsegsize */
1056 NULL, /* lockfunc */
1057 NULL, /* lockfuncarg */
1059 device_printf(dev, "Unable to create RX DMA ptag\n");
1063 for (i = 0; i < que->num_desc; i++) {
1064 buf = &rxr->buffers[i];
1065 error = bus_dmamap_create(rxr->htag,
1066 BUS_DMA_NOWAIT, &buf->hmap);
1068 device_printf(dev, "Unable to create RX head map\n");
1071 error = bus_dmamap_create(rxr->ptag,
1072 BUS_DMA_NOWAIT, &buf->pmap);
1074 device_printf(dev, "Unable to create RX pkt map\n");
1083 /*********************************************************************
1085 * (Re)Initialize the queue receive ring and its buffers.
1087 **********************************************************************/
1089 ixl_init_rx_ring(struct ixl_queue *que)
1091 struct rx_ring *rxr = &que->rxr;
1092 #if defined(INET6) || defined(INET)
1093 struct ixl_vsi *vsi = que->vsi;
1094 struct ifnet *ifp = vsi->ifp;
1095 struct lro_ctrl *lro = &rxr->lro;
1097 struct ixl_rx_buf *buf;
1098 bus_dma_segment_t pseg[1], hseg[1];
1099 int rsize, nsegs, error = 0;
1102 /* Clear the ring contents */
1103 rsize = roundup2(que->num_desc *
1104 sizeof(union i40e_rx_desc), DBA_ALIGN);
1105 bzero((void *)rxr->base, rsize);
1106 /* Cleanup any existing buffers */
1107 for (int i = 0; i < que->num_desc; i++) {
1108 buf = &rxr->buffers[i];
1109 if (buf->m_head != NULL) {
1110 bus_dmamap_sync(rxr->htag, buf->hmap,
1111 BUS_DMASYNC_POSTREAD);
1112 bus_dmamap_unload(rxr->htag, buf->hmap);
1113 buf->m_head->m_flags |= M_PKTHDR;
1114 m_freem(buf->m_head);
1116 if (buf->m_pack != NULL) {
1117 bus_dmamap_sync(rxr->ptag, buf->pmap,
1118 BUS_DMASYNC_POSTREAD);
1119 bus_dmamap_unload(rxr->ptag, buf->pmap);
1120 buf->m_pack->m_flags |= M_PKTHDR;
1121 m_freem(buf->m_pack);
1127 /* header split is off */
1128 rxr->hdr_split = FALSE;
1130 /* Now replenish the mbufs */
1131 for (int j = 0; j != que->num_desc; ++j) {
1132 struct mbuf *mh, *mp;
1134 buf = &rxr->buffers[j];
1136 ** Don't allocate mbufs if not
1137 ** doing header split, its wasteful
1139 if (rxr->hdr_split == FALSE)
1142 /* First the header */
1143 buf->m_head = m_gethdr(M_NOWAIT, MT_DATA);
1144 if (buf->m_head == NULL) {
1148 m_adj(buf->m_head, ETHER_ALIGN);
1150 mh->m_len = mh->m_pkthdr.len = MHLEN;
1151 mh->m_flags |= M_PKTHDR;
1152 /* Get the memory mapping */
1153 error = bus_dmamap_load_mbuf_sg(rxr->htag,
1154 buf->hmap, buf->m_head, hseg,
1155 &nsegs, BUS_DMA_NOWAIT);
1156 if (error != 0) /* Nothing elegant to do here */
1158 bus_dmamap_sync(rxr->htag,
1159 buf->hmap, BUS_DMASYNC_PREREAD);
1160 /* Update descriptor */
1161 rxr->base[j].read.hdr_addr = htole64(hseg[0].ds_addr);
1164 /* Now the payload cluster */
1165 buf->m_pack = m_getjcl(M_NOWAIT, MT_DATA,
1166 M_PKTHDR, rxr->mbuf_sz);
1167 if (buf->m_pack == NULL) {
1172 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1173 /* Get the memory mapping */
1174 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1175 buf->pmap, mp, pseg,
1176 &nsegs, BUS_DMA_NOWAIT);
1179 bus_dmamap_sync(rxr->ptag,
1180 buf->pmap, BUS_DMASYNC_PREREAD);
1181 /* Update descriptor */
1182 rxr->base[j].read.pkt_addr = htole64(pseg[0].ds_addr);
1183 rxr->base[j].read.hdr_addr = 0;
1187 /* Setup our descriptor indices */
1188 rxr->next_check = 0;
1189 rxr->next_refresh = 0;
1190 rxr->lro_enabled = FALSE;
1193 rxr->discard = FALSE;
1195 wr32(vsi->hw, rxr->tail, que->num_desc - 1);
1198 #if defined(INET6) || defined(INET)
1200 ** Now set up the LRO interface:
1202 if (ifp->if_capenable & IFCAP_LRO) {
1203 int err = tcp_lro_init(lro);
1205 if_printf(ifp, "queue %d: LRO Initialization failed!\n", que->me);
1208 INIT_DBG_IF(ifp, "queue %d: RX Soft LRO Initialized", que->me);
1209 rxr->lro_enabled = TRUE;
1210 lro->ifp = vsi->ifp;
1214 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1215 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1223 /*********************************************************************
1225 * Free station receive ring data structures
1227 **********************************************************************/
1229 ixl_free_que_rx(struct ixl_queue *que)
1231 struct rx_ring *rxr = &que->rxr;
1232 struct ixl_rx_buf *buf;
1234 INIT_DBG_IF(que->vsi->ifp, "queue %d: begin", que->me);
1236 /* Cleanup any existing buffers */
1237 if (rxr->buffers != NULL) {
1238 for (int i = 0; i < que->num_desc; i++) {
1239 buf = &rxr->buffers[i];
1240 if (buf->m_head != NULL) {
1241 bus_dmamap_sync(rxr->htag, buf->hmap,
1242 BUS_DMASYNC_POSTREAD);
1243 bus_dmamap_unload(rxr->htag, buf->hmap);
1244 buf->m_head->m_flags |= M_PKTHDR;
1245 m_freem(buf->m_head);
1247 if (buf->m_pack != NULL) {
1248 bus_dmamap_sync(rxr->ptag, buf->pmap,
1249 BUS_DMASYNC_POSTREAD);
1250 bus_dmamap_unload(rxr->ptag, buf->pmap);
1251 buf->m_pack->m_flags |= M_PKTHDR;
1252 m_freem(buf->m_pack);
1256 if (buf->hmap != NULL) {
1257 bus_dmamap_destroy(rxr->htag, buf->hmap);
1260 if (buf->pmap != NULL) {
1261 bus_dmamap_destroy(rxr->ptag, buf->pmap);
1265 if (rxr->buffers != NULL) {
1266 free(rxr->buffers, M_DEVBUF);
1267 rxr->buffers = NULL;
1271 if (rxr->htag != NULL) {
1272 bus_dma_tag_destroy(rxr->htag);
1275 if (rxr->ptag != NULL) {
1276 bus_dma_tag_destroy(rxr->ptag);
1280 INIT_DBG_IF(que->vsi->ifp, "queue %d: end", que->me);
1284 static __inline void
1285 ixl_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u8 ptype)
1288 #if defined(INET6) || defined(INET)
1290 * ATM LRO is only for IPv4/TCP packets and TCP checksum of the packet
1291 * should be computed by hardware. Also it should not have VLAN tag in
1294 if (rxr->lro_enabled &&
1295 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1296 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1297 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1299 * Send to the stack if:
1300 ** - LRO not enabled, or
1301 ** - no LRO resources, or
1302 ** - lro enqueue fails
1304 if (rxr->lro.lro_cnt != 0)
1305 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1310 (*ifp->if_input)(ifp, m);
1315 static __inline void
1316 ixl_rx_discard(struct rx_ring *rxr, int i)
1318 struct ixl_rx_buf *rbuf;
1320 rbuf = &rxr->buffers[i];
1322 if (rbuf->fmp != NULL) {/* Partial chain ? */
1323 rbuf->fmp->m_flags |= M_PKTHDR;
1329 ** With advanced descriptors the writeback
1330 ** clobbers the buffer addrs, so its easier
1331 ** to just free the existing mbufs and take
1332 ** the normal refresh path to get new buffers
1336 m_free(rbuf->m_head);
1337 rbuf->m_head = NULL;
1341 m_free(rbuf->m_pack);
1342 rbuf->m_pack = NULL;
1349 /*********************************************************************
1351 * This routine executes in interrupt context. It replenishes
1352 * the mbufs in the descriptor and sends data which has been
1353 * dma'ed into host memory to upper layer.
1355 * We loop at most count times if count is > 0, or until done if
1358 * Return TRUE for more work, FALSE for all clean.
1359 *********************************************************************/
1361 ixl_rxeof(struct ixl_queue *que, int count)
1363 struct ixl_vsi *vsi = que->vsi;
1364 struct rx_ring *rxr = &que->rxr;
1365 struct ifnet *ifp = vsi->ifp;
1366 #if defined(INET6) || defined(INET)
1367 struct lro_ctrl *lro = &rxr->lro;
1368 struct lro_entry *queued;
1370 int i, nextp, processed = 0;
1371 union i40e_rx_desc *cur;
1372 struct ixl_rx_buf *rbuf, *nbuf;
1378 for (i = rxr->next_check; count != 0;) {
1379 struct mbuf *sendmp, *mh, *mp;
1380 u32 rsc, status, error;
1381 u16 hlen, plen, vtag;
1386 /* Sync the ring. */
1387 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1388 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1390 cur = &rxr->base[i];
1391 qword = le64toh(cur->wb.qword1.status_error_len);
1392 status = (qword & I40E_RXD_QW1_STATUS_MASK)
1393 >> I40E_RXD_QW1_STATUS_SHIFT;
1394 error = (qword & I40E_RXD_QW1_ERROR_MASK)
1395 >> I40E_RXD_QW1_ERROR_SHIFT;
1396 plen = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
1397 >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
1398 hlen = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
1399 >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
1400 ptype = (qword & I40E_RXD_QW1_PTYPE_MASK)
1401 >> I40E_RXD_QW1_PTYPE_SHIFT;
1403 if ((status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) == 0) {
1407 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1414 cur->wb.qword1.status_error_len = 0;
1415 rbuf = &rxr->buffers[i];
1418 eop = (status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT));
1419 if (status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT))
1420 vtag = le16toh(cur->wb.qword0.lo_dword.l2tag1);
1425 ** Make sure bad packets are discarded,
1426 ** note that only EOP descriptor has valid
1429 if (eop && (error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
1431 ixl_rx_discard(rxr, i);
1435 /* Prefetch the next buffer */
1438 if (nextp == que->num_desc)
1440 nbuf = &rxr->buffers[nextp];
1445 ** The header mbuf is ONLY used when header
1446 ** split is enabled, otherwise we get normal
1447 ** behavior, ie, both header and payload
1448 ** are DMA'd into the payload buffer.
1450 ** Rather than using the fmp/lmp global pointers
1451 ** we now keep the head of a packet chain in the
1452 ** buffer struct and pass this along from one
1453 ** descriptor to the next, until we get EOP.
1455 if (rxr->hdr_split && (rbuf->fmp == NULL)) {
1456 if (hlen > IXL_RX_HDR)
1459 mh->m_flags |= M_PKTHDR;
1461 mh->m_pkthdr.len = mh->m_len;
1462 /* Null buf pointer so it is refreshed */
1463 rbuf->m_head = NULL;
1465 ** Check the payload length, this
1466 ** could be zero if its a small
1472 mp->m_flags &= ~M_PKTHDR;
1474 mh->m_pkthdr.len += mp->m_len;
1475 /* Null buf pointer so it is refreshed */
1476 rbuf->m_pack = NULL;
1480 ** Now create the forward
1481 ** chain so when complete
1485 /* stash the chain head */
1487 /* Make forward chain */
1489 mp->m_next = nbuf->m_pack;
1491 mh->m_next = nbuf->m_pack;
1493 /* Singlet, prepare to send */
1496 sendmp->m_pkthdr.ether_vtag = vtag;
1497 sendmp->m_flags |= M_VLANTAG;
1502 ** Either no header split, or a
1503 ** secondary piece of a fragmented
1508 ** See if there is a stored head
1509 ** that determines what we are
1512 rbuf->m_pack = rbuf->fmp = NULL;
1514 if (sendmp != NULL) /* secondary frag */
1515 sendmp->m_pkthdr.len += mp->m_len;
1517 /* first desc of a non-ps chain */
1519 sendmp->m_flags |= M_PKTHDR;
1520 sendmp->m_pkthdr.len = mp->m_len;
1522 sendmp->m_pkthdr.ether_vtag = vtag;
1523 sendmp->m_flags |= M_VLANTAG;
1526 /* Pass the head pointer on */
1530 mp->m_next = nbuf->m_pack;
1534 /* Sending this frame? */
1536 sendmp->m_pkthdr.rcvif = ifp;
1539 rxr->rx_bytes += sendmp->m_pkthdr.len;
1540 /* capture data for dynamic ITR adjustment */
1542 rxr->bytes += sendmp->m_pkthdr.len;
1543 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1544 ixl_rx_checksum(sendmp, status, error, ptype);
1545 sendmp->m_pkthdr.flowid = que->msix;
1546 sendmp->m_flags |= M_FLOWID;
1549 bus_dmamap_sync(rxr->dma.tag, rxr->dma.map,
1550 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1552 /* Advance our pointers to the next descriptor. */
1553 if (++i == que->num_desc)
1556 /* Now send to the stack or do LRO */
1557 if (sendmp != NULL) {
1558 rxr->next_check = i;
1559 ixl_rx_input(rxr, ifp, sendmp, ptype);
1560 i = rxr->next_check;
1563 /* Every 8 descriptors we go to refresh mbufs */
1564 if (processed == 8) {
1565 ixl_refresh_mbufs(que, i);
1570 /* Refresh any remaining buf structs */
1571 if (ixl_rx_unrefreshed(que))
1572 ixl_refresh_mbufs(que, i);
1574 rxr->next_check = i;
1576 #if defined(INET6) || defined(INET)
1578 * Flush any outstanding LRO work
1580 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1581 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1582 tcp_lro_flush(lro, queued);
1591 /*********************************************************************
1593 * Verify that the hardware indicated that the checksum is valid.
1594 * Inform the stack about the status of checksum so that stack
1595 * doesn't spend time verifying the checksum.
1597 *********************************************************************/
1599 ixl_rx_checksum(struct mbuf * mp, u32 status, u32 error, u8 ptype)
1601 struct i40e_rx_ptype_decoded decoded;
1603 decoded = decode_rx_desc_ptype(ptype);
1606 if (error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
1607 (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
1608 mp->m_pkthdr.csum_flags = 0;
1612 /* IPv6 with extension headers likely have bad csum */
1613 if (decoded.outer_ip == I40E_RX_PTYPE_OUTER_IP &&
1614 decoded.outer_ip_ver == I40E_RX_PTYPE_OUTER_IPV6)
1616 (1 << I40E_RX_DESC_STATUS_IPV6EXADD_SHIFT)) {
1617 mp->m_pkthdr.csum_flags = 0;
1622 /* IP Checksum Good */
1623 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1624 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1626 if (status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)) {
1627 mp->m_pkthdr.csum_flags |=
1628 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1629 mp->m_pkthdr.csum_data |= htons(0xffff);
1634 #if __FreeBSD_version >= 1100000
1636 ixl_get_counter(if_t ifp, ift_counter cnt)
1638 struct ixl_vsi *vsi;
1640 vsi = if_getsoftc(ifp);
1643 case IFCOUNTER_IPACKETS:
1644 return (vsi->ipackets);
1645 case IFCOUNTER_IERRORS:
1646 return (vsi->ierrors);
1647 case IFCOUNTER_OPACKETS:
1648 return (vsi->opackets);
1649 case IFCOUNTER_OERRORS:
1650 return (vsi->oerrors);
1651 case IFCOUNTER_COLLISIONS:
1652 /* Collisions are by standard impossible in 40G/10G Ethernet */
1654 case IFCOUNTER_IBYTES:
1655 return (vsi->ibytes);
1656 case IFCOUNTER_OBYTES:
1657 return (vsi->obytes);
1658 case IFCOUNTER_IMCASTS:
1659 return (vsi->imcasts);
1660 case IFCOUNTER_OMCASTS:
1661 return (vsi->omcasts);
1662 case IFCOUNTER_IQDROPS:
1663 return (vsi->iqdrops);
1664 case IFCOUNTER_OQDROPS:
1665 return (vsi->oqdrops);
1666 case IFCOUNTER_NOPROTO:
1667 return (vsi->noproto);
1669 return (if_get_counter_default(ifp, cnt));