1 /******************************************************************************
3 Copyright (c) 2001-2017, Intel Corporation
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
32 ******************************************************************************/
36 #ifndef IXGBE_STANDALONE_BUILD
38 #include "opt_inet6.h"
46 * this feature only works with
47 * IPv4, and only on 82599 and later.
48 * Also this will cause IP forwarding to
49 * fail and that can't be controlled by
50 * the stack as LRO can. For all these
51 * reasons I've deemed it best to leave
52 * this off and not bother with a tuneable
53 * interface, this would need to be compiled
56 static bool ixgbe_rsc_enable = FALSE;
59 * For Flow Director: this is the
60 * number of TX packets we sample
61 * for the filter pool, this means
62 * every 20th packet will be probed.
64 * This feature can be disabled by
67 static int atr_sample_rate = 20;
69 /************************************************************************
70 * Local Function prototypes
71 ************************************************************************/
72 static void ixgbe_setup_transmit_ring(struct tx_ring *);
73 static void ixgbe_free_transmit_buffers(struct tx_ring *);
74 static int ixgbe_setup_receive_ring(struct rx_ring *);
75 static void ixgbe_free_receive_buffers(struct rx_ring *);
76 static void ixgbe_rx_checksum(u32, struct mbuf *, u32);
77 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
78 static int ixgbe_xmit(struct tx_ring *, struct mbuf **);
79 static int ixgbe_tx_ctx_setup(struct tx_ring *,
80 struct mbuf *, u32 *, u32 *);
81 static int ixgbe_tso_setup(struct tx_ring *,
82 struct mbuf *, u32 *, u32 *);
83 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
84 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
86 static int ixgbe_dma_malloc(struct adapter *, bus_size_t,
87 struct ixgbe_dma_alloc *, int);
88 static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
90 /************************************************************************
91 * ixgbe_legacy_start_locked - Transmit entry point
93 * Called by the stack to initiate a transmit.
94 * The driver will remain in this routine as long as there are
95 * packets to transmit and transmit resources are available.
96 * In case resources are not available, the stack is notified
97 * and the packet is requeued.
98 ************************************************************************/
100 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
103 struct adapter *adapter = txr->adapter;
105 IXGBE_TX_LOCK_ASSERT(txr);
107 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
109 if (!adapter->link_active)
112 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
113 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
116 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
120 if (ixgbe_xmit(txr, &m_head)) {
122 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
125 /* Send a copy of the frame to the BPF listener */
126 ETHER_BPF_MTAP(ifp, m_head);
129 return IXGBE_SUCCESS;
130 } /* ixgbe_legacy_start_locked */
132 /************************************************************************
135 * Called by the stack, this always uses the first tx ring,
136 * and should not be used with multiqueue tx enabled.
137 ************************************************************************/
139 ixgbe_legacy_start(struct ifnet *ifp)
141 struct adapter *adapter = ifp->if_softc;
142 struct tx_ring *txr = adapter->tx_rings;
144 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
146 ixgbe_legacy_start_locked(ifp, txr);
147 IXGBE_TX_UNLOCK(txr);
149 } /* ixgbe_legacy_start */
151 /************************************************************************
152 * ixgbe_mq_start - Multiqueue Transmit Entry Point
154 * (if_transmit function)
155 ************************************************************************/
157 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
159 struct adapter *adapter = ifp->if_softc;
160 struct ix_queue *que;
166 * When doing RSS, map it to the same outbound queue
167 * as the incoming flow would be mapped to.
169 * If everything is setup correctly, it should be the
170 * same bucket that the current CPU we're on is.
172 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
173 if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
174 (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
176 i = bucket_id % adapter->num_queues;
178 if (bucket_id > adapter->num_queues)
180 "bucket_id (%d) > num_queues (%d)\n",
181 bucket_id, adapter->num_queues);
184 i = m->m_pkthdr.flowid % adapter->num_queues;
186 i = curcpu % adapter->num_queues;
188 /* Check for a hung queue and pick alternative */
189 if (((1 << i) & adapter->active_queues) == 0)
190 i = ffsl(adapter->active_queues);
192 txr = &adapter->tx_rings[i];
193 que = &adapter->queues[i];
195 err = drbr_enqueue(ifp, txr->br, m);
198 if (IXGBE_TX_TRYLOCK(txr)) {
199 ixgbe_mq_start_locked(ifp, txr);
200 IXGBE_TX_UNLOCK(txr);
202 taskqueue_enqueue(que->tq, &txr->txq_task);
205 } /* ixgbe_mq_start */
207 /************************************************************************
208 * ixgbe_mq_start_locked
209 ************************************************************************/
211 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
214 int enqueued = 0, err = 0;
216 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
218 if (txr->adapter->link_active == 0)
221 /* Process the queue */
222 #if __FreeBSD_version < 901504
223 next = drbr_dequeue(ifp, txr->br);
224 while (next != NULL) {
225 if ((err = ixgbe_xmit(txr, &next)) != 0) {
227 err = drbr_enqueue(ifp, txr->br, next);
229 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
230 err = ixgbe_xmit(txr, &next);
233 drbr_advance(ifp, txr->br);
235 drbr_putback(ifp, txr->br, next);
239 #if __FreeBSD_version >= 901504
240 drbr_advance(ifp, txr->br);
243 #if __FreeBSD_version >= 1100036
245 * Since we're looking at the tx ring, we can check
246 * to see if we're a VF by examing our tail register
249 if ((txr->adapter->feat_en & IXGBE_FEATURE_VF) &&
250 (next->m_flags & M_MCAST))
251 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
253 /* Send a copy of the frame to the BPF listener */
254 ETHER_BPF_MTAP(ifp, next);
255 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
257 #if __FreeBSD_version < 901504
258 next = drbr_dequeue(ifp, txr->br);
262 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
266 } /* ixgbe_mq_start_locked */
268 /************************************************************************
269 * ixgbe_deferred_mq_start
271 * Called from a taskqueue to drain queued transmit packets.
272 ************************************************************************/
274 ixgbe_deferred_mq_start(void *arg, int pending)
276 struct tx_ring *txr = arg;
277 struct adapter *adapter = txr->adapter;
278 struct ifnet *ifp = adapter->ifp;
281 if (!drbr_empty(ifp, txr->br))
282 ixgbe_mq_start_locked(ifp, txr);
283 IXGBE_TX_UNLOCK(txr);
284 } /* ixgbe_deferred_mq_start */
286 /************************************************************************
287 * ixgbe_qflush - Flush all ring buffers
288 ************************************************************************/
290 ixgbe_qflush(struct ifnet *ifp)
292 struct adapter *adapter = ifp->if_softc;
293 struct tx_ring *txr = adapter->tx_rings;
296 for (int i = 0; i < adapter->num_queues; i++, txr++) {
298 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
300 IXGBE_TX_UNLOCK(txr);
306 /************************************************************************
309 * Maps the mbufs to tx descriptors, allowing the
310 * TX engine to transmit the packets.
312 * Return 0 on success, positive on failure
313 ************************************************************************/
315 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
317 struct adapter *adapter = txr->adapter;
318 struct ixgbe_tx_buf *txbuf;
319 union ixgbe_adv_tx_desc *txd = NULL;
321 int i, j, error, nsegs;
323 u32 olinfo_status = 0, cmd_type_len;
325 bus_dma_segment_t segs[adapter->num_segs];
330 /* Basic descriptor defines */
331 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
332 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
334 if (m_head->m_flags & M_VLANTAG)
335 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
338 * Important to capture the first descriptor
339 * used because it will contain the index of
340 * the one we tell the hardware to report back
342 first = txr->next_avail_desc;
343 txbuf = &txr->tx_buffers[first];
347 * Map the packet for DMA.
350 error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs,
351 &nsegs, BUS_DMA_NOWAIT);
353 if (__predict_false(error)) {
358 /* Try it again? - one try */
362 * XXX: m_defrag will choke on
363 * non-MCLBYTES-sized clusters
365 m = m_defrag(*m_headp, M_NOWAIT);
367 adapter->mbuf_defrag_failed++;
377 txr->no_tx_dma_setup++;
380 txr->no_tx_dma_setup++;
387 /* Make certain there are enough descriptors */
388 if (txr->tx_avail < (nsegs + 2)) {
389 txr->no_desc_avail++;
390 bus_dmamap_unload(txr->txtag, map);
396 * Set up the appropriate offload context
397 * this will consume the first descriptor
399 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
400 if (__predict_false(error)) {
401 if (error == ENOBUFS)
406 /* Do the flow director magic */
407 if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
408 (txr->atr_sample) && (!adapter->fdir_reinit)) {
410 if (txr->atr_count >= atr_sample_rate) {
411 ixgbe_atr(txr, m_head);
416 olinfo_status |= IXGBE_ADVTXD_CC;
417 i = txr->next_avail_desc;
418 for (j = 0; j < nsegs; j++) {
422 txbuf = &txr->tx_buffers[i];
423 txd = &txr->tx_base[i];
424 seglen = segs[j].ds_len;
425 segaddr = htole64(segs[j].ds_addr);
427 txd->read.buffer_addr = segaddr;
428 txd->read.cmd_type_len = htole32(txr->txd_cmd |
429 cmd_type_len | seglen);
430 txd->read.olinfo_status = htole32(olinfo_status);
432 if (++i == txr->num_desc)
436 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
437 txr->tx_avail -= nsegs;
438 txr->next_avail_desc = i;
440 txbuf->m_head = m_head;
442 * Here we swap the map so the last descriptor,
443 * which gets the completion interrupt has the
444 * real map, and the first descriptor gets the
445 * unused map from this descriptor.
447 txr->tx_buffers[first].map = txbuf->map;
449 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
451 /* Set the EOP descriptor that will be marked done */
452 txbuf = &txr->tx_buffers[first];
455 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
456 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
458 * Advance the Transmit Descriptor Tail (Tdt), this tells the
459 * hardware that this frame is available to transmit.
461 ++txr->total_packets;
462 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
464 /* Mark queue as having work */
472 /************************************************************************
473 * ixgbe_allocate_transmit_buffers
475 * Allocate memory for tx_buffer structures. The tx_buffer stores all
476 * the information needed to transmit a packet on the wire. This is
477 * called only once at attach, setup is done every reset.
478 ************************************************************************/
480 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
482 struct adapter *adapter = txr->adapter;
483 device_t dev = adapter->dev;
484 struct ixgbe_tx_buf *txbuf;
488 * Setup DMA descriptor areas.
490 error = bus_dma_tag_create(
491 /* parent */ bus_get_dma_tag(adapter->dev),
494 /* lowaddr */ BUS_SPACE_MAXADDR,
495 /* highaddr */ BUS_SPACE_MAXADDR,
497 /* filterarg */ NULL,
498 /* maxsize */ IXGBE_TSO_SIZE,
499 /* nsegments */ adapter->num_segs,
500 /* maxsegsize */ PAGE_SIZE,
503 /* lockfuncarg */ NULL,
506 device_printf(dev, "Unable to allocate TX DMA tag\n");
511 (struct ixgbe_tx_buf *)malloc(sizeof(struct ixgbe_tx_buf) *
512 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO);
513 if (txr->tx_buffers == NULL) {
514 device_printf(dev, "Unable to allocate tx_buffer memory\n");
519 /* Create the descriptor buffer dma maps */
520 txbuf = txr->tx_buffers;
521 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
522 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
524 device_printf(dev, "Unable to create TX DMA map\n");
531 /* We free all, it handles case where we are in the middle */
532 ixgbe_free_transmit_structures(adapter);
535 } /* ixgbe_allocate_transmit_buffers */
537 /************************************************************************
538 * ixgbe_setup_transmit_ring - Initialize a transmit ring.
539 ************************************************************************/
541 ixgbe_setup_transmit_ring(struct tx_ring *txr)
543 struct adapter *adapter = txr->adapter;
544 struct ixgbe_tx_buf *txbuf;
546 struct netmap_adapter *na = NA(adapter->ifp);
547 struct netmap_slot *slot;
548 #endif /* DEV_NETMAP */
550 /* Clear the old ring contents */
554 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
556 * (under lock): if in netmap mode, do some consistency
557 * checks and set slot to entry 0 of the netmap ring.
559 slot = netmap_reset(na, NR_TX, txr->me, 0);
561 #endif /* DEV_NETMAP */
563 bzero((void *)txr->tx_base,
564 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
566 txr->next_avail_desc = 0;
567 txr->next_to_clean = 0;
569 /* Free any existing tx buffers. */
570 txbuf = txr->tx_buffers;
571 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
572 if (txbuf->m_head != NULL) {
573 bus_dmamap_sync(txr->txtag, txbuf->map,
574 BUS_DMASYNC_POSTWRITE);
575 bus_dmamap_unload(txr->txtag, txbuf->map);
576 m_freem(txbuf->m_head);
577 txbuf->m_head = NULL;
582 * In netmap mode, set the map for the packet buffer.
583 * NOTE: Some drivers (not this one) also need to set
584 * the physical buffer address in the NIC ring.
585 * Slots in the netmap ring (indexed by "si") are
586 * kring->nkr_hwofs positions "ahead" wrt the
587 * corresponding slot in the NIC ring. In some drivers
588 * (not here) nkr_hwofs can be negative. Function
589 * netmap_idx_n2k() handles wraparounds properly.
591 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
592 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
593 netmap_load_map(na, txr->txtag,
594 txbuf->map, NMB(na, slot + si));
596 #endif /* DEV_NETMAP */
598 /* Clear the EOP descriptor pointer */
602 /* Set the rate at which we sample packets */
603 if (adapter->feat_en & IXGBE_FEATURE_FDIR)
604 txr->atr_sample = atr_sample_rate;
606 /* Set number of descriptors available */
607 txr->tx_avail = adapter->num_tx_desc;
609 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
610 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
611 IXGBE_TX_UNLOCK(txr);
612 } /* ixgbe_setup_transmit_ring */
614 /************************************************************************
615 * ixgbe_setup_transmit_structures - Initialize all transmit rings.
616 ************************************************************************/
618 ixgbe_setup_transmit_structures(struct adapter *adapter)
620 struct tx_ring *txr = adapter->tx_rings;
622 for (int i = 0; i < adapter->num_queues; i++, txr++)
623 ixgbe_setup_transmit_ring(txr);
626 } /* ixgbe_setup_transmit_structures */
628 /************************************************************************
629 * ixgbe_free_transmit_structures - Free all transmit rings.
630 ************************************************************************/
632 ixgbe_free_transmit_structures(struct adapter *adapter)
634 struct tx_ring *txr = adapter->tx_rings;
636 for (int i = 0; i < adapter->num_queues; i++, txr++) {
638 ixgbe_free_transmit_buffers(txr);
639 ixgbe_dma_free(adapter, &txr->txdma);
640 IXGBE_TX_UNLOCK(txr);
641 IXGBE_TX_LOCK_DESTROY(txr);
643 free(adapter->tx_rings, M_DEVBUF);
644 } /* ixgbe_free_transmit_structures */
646 /************************************************************************
647 * ixgbe_free_transmit_buffers
649 * Free transmit ring related data structures.
650 ************************************************************************/
652 ixgbe_free_transmit_buffers(struct tx_ring *txr)
654 struct adapter *adapter = txr->adapter;
655 struct ixgbe_tx_buf *tx_buffer;
658 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
660 if (txr->tx_buffers == NULL)
663 tx_buffer = txr->tx_buffers;
664 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
665 if (tx_buffer->m_head != NULL) {
666 bus_dmamap_sync(txr->txtag, tx_buffer->map,
667 BUS_DMASYNC_POSTWRITE);
668 bus_dmamap_unload(txr->txtag, tx_buffer->map);
669 m_freem(tx_buffer->m_head);
670 tx_buffer->m_head = NULL;
671 if (tx_buffer->map != NULL) {
672 bus_dmamap_destroy(txr->txtag, tx_buffer->map);
673 tx_buffer->map = NULL;
675 } else if (tx_buffer->map != NULL) {
676 bus_dmamap_unload(txr->txtag, tx_buffer->map);
677 bus_dmamap_destroy(txr->txtag, tx_buffer->map);
678 tx_buffer->map = NULL;
682 buf_ring_free(txr->br, M_DEVBUF);
683 if (txr->tx_buffers != NULL) {
684 free(txr->tx_buffers, M_DEVBUF);
685 txr->tx_buffers = NULL;
687 if (txr->txtag != NULL) {
688 bus_dma_tag_destroy(txr->txtag);
691 } /* ixgbe_free_transmit_buffers */
693 /************************************************************************
696 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
697 ************************************************************************/
699 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
700 u32 *cmd_type_len, u32 *olinfo_status)
702 struct ixgbe_adv_tx_context_desc *TXD;
703 struct ether_vlan_header *eh;
710 int ehdrlen, ip_hlen = 0;
712 int ctxd = txr->next_avail_desc;
713 u32 vlan_macip_lens = 0;
714 u32 type_tucmd_mlhl = 0;
721 /* First check if TSO is to be used */
722 if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO | CSUM_IP6_TSO))
723 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
725 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
728 /* Indicate the whole packet as payload when not doing TSO */
729 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
731 /* Now ready a context descriptor */
732 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
735 * In advanced descriptors the vlan tag must
736 * be placed into the context descriptor. Hence
737 * we need to make one even if not doing offloads.
739 if (mp->m_flags & M_VLANTAG) {
740 vtag = htole16(mp->m_pkthdr.ether_vtag);
741 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
742 } else if (!(txr->adapter->feat_en & IXGBE_FEATURE_NEEDS_CTXD) &&
747 * Determine where frame payload starts.
748 * Jump over vlan headers if already present,
749 * helpful for QinQ too.
751 eh = mtod(mp, struct ether_vlan_header *);
752 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
753 etype = ntohs(eh->evl_proto);
754 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
756 etype = ntohs(eh->evl_encap_proto);
757 ehdrlen = ETHER_HDR_LEN;
760 /* Set the ether header length */
761 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
763 if (offload == FALSE)
767 * If the first mbuf only includes the ethernet header,
768 * jump to the next one
769 * XXX: This assumes the stack splits mbufs containing headers
770 * on header boundaries
771 * XXX: And assumes the entire IP header is contained in one mbuf
773 if (mp->m_len == ehdrlen && mp->m_next)
774 l3d = mtod(mp->m_next, caddr_t);
776 l3d = mtod(mp, caddr_t) + ehdrlen;
781 ip = (struct ip *)(l3d);
782 ip_hlen = ip->ip_hl << 2;
784 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
785 /* Insert IPv4 checksum into data descriptors */
786 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
788 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
794 ip6 = (struct ip6_hdr *)(l3d);
795 ip_hlen = sizeof(struct ip6_hdr);
796 ipproto = ip6->ip6_nxt;
797 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
805 vlan_macip_lens |= ip_hlen;
807 /* No support for offloads for non-L4 next headers */
810 if (mp->m_pkthdr.csum_flags &
811 (CSUM_IP_TCP | CSUM_IP6_TCP))
812 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
817 if (mp->m_pkthdr.csum_flags &
818 (CSUM_IP_UDP | CSUM_IP6_UDP))
819 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
824 if (mp->m_pkthdr.csum_flags &
825 (CSUM_IP_SCTP | CSUM_IP6_SCTP))
826 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
835 if (offload) /* Insert L4 checksum into data descriptors */
836 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
839 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
841 /* Now copy bits into descriptor */
842 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
843 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
844 TXD->seqnum_seed = htole32(0);
845 TXD->mss_l4len_idx = htole32(0);
847 /* We've consumed the first desc, adjust counters */
848 if (++ctxd == txr->num_desc)
850 txr->next_avail_desc = ctxd;
854 } /* ixgbe_tx_ctx_setup */
856 /************************************************************************
859 * Setup work for hardware segmentation offload (TSO) on
860 * adapters using advanced tx descriptors
861 ************************************************************************/
863 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
866 struct ixgbe_adv_tx_context_desc *TXD;
867 struct ether_vlan_header *eh;
875 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
876 u32 vlan_macip_lens = 0;
877 u32 type_tucmd_mlhl = 0;
878 u32 mss_l4len_idx = 0, paylen;
879 u16 vtag = 0, eh_type;
882 * Determine where frame payload starts.
883 * Jump over vlan headers if already present
885 eh = mtod(mp, struct ether_vlan_header *);
886 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
887 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
888 eh_type = eh->evl_proto;
890 ehdrlen = ETHER_HDR_LEN;
891 eh_type = eh->evl_encap_proto;
894 switch (ntohs(eh_type)) {
897 ip = (struct ip *)(mp->m_data + ehdrlen);
898 if (ip->ip_p != IPPROTO_TCP)
901 ip_hlen = ip->ip_hl << 2;
902 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
903 th->th_sum = in_pseudo(ip->ip_src.s_addr,
904 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
905 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
906 /* Tell transmit desc to also do IPv4 checksum. */
907 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
912 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
913 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
914 if (ip6->ip6_nxt != IPPROTO_TCP)
916 ip_hlen = sizeof(struct ip6_hdr);
917 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
918 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
919 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
923 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
924 __func__, ntohs(eh_type));
928 ctxd = txr->next_avail_desc;
929 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
931 tcp_hlen = th->th_off << 2;
933 /* This is used in the transmit desc in encap */
934 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
936 /* VLAN MACLEN IPLEN */
937 if (mp->m_flags & M_VLANTAG) {
938 vtag = htole16(mp->m_pkthdr.ether_vtag);
939 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
942 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
943 vlan_macip_lens |= ip_hlen;
944 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
946 /* ADV DTYPE TUCMD */
947 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
948 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
949 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
952 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
953 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
954 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
956 TXD->seqnum_seed = htole32(0);
958 if (++ctxd == txr->num_desc)
962 txr->next_avail_desc = ctxd;
963 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
964 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
965 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
969 } /* ixgbe_tso_setup */
972 /************************************************************************
975 * Examine each tx_buffer in the used queue. If the hardware is done
976 * processing the packet then free associated resources. The
977 * tx_buffer is put back on the free queue.
978 ************************************************************************/
980 ixgbe_txeof(struct tx_ring *txr)
982 struct adapter *adapter = txr->adapter;
983 struct ixgbe_tx_buf *buf;
984 union ixgbe_adv_tx_desc *txd;
985 u32 work, processed = 0;
986 u32 limit = adapter->tx_process_limit;
988 mtx_assert(&txr->tx_mtx, MA_OWNED);
991 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
992 (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
993 struct netmap_adapter *na = NA(adapter->ifp);
994 struct netmap_kring *kring = &na->tx_rings[txr->me];
996 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
997 BUS_DMASYNC_POSTREAD);
999 * In netmap mode, all the work is done in the context
1000 * of the client thread. Interrupt handlers only wake up
1001 * clients, which may be sleeping on individual rings
1002 * or on a global resource for all rings.
1003 * To implement tx interrupt mitigation, we wake up the client
1004 * thread roughly every half ring, even if the NIC interrupts
1005 * more frequently. This is implemented as follows:
1006 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1007 * the slot that should wake up the thread (nkr_num_slots
1008 * means the user thread should not be woken up);
1009 * - the driver ignores tx interrupts unless netmap_mitigate=0
1010 * or the slot has the DD bit set.
1012 if (!netmap_mitigate ||
1013 (kring->nr_kflags < kring->nkr_num_slots &&
1014 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1015 netmap_tx_irq(adapter->ifp, txr->me);
1019 #endif /* DEV_NETMAP */
1021 if (txr->tx_avail == txr->num_desc) {
1026 /* Get work starting point */
1027 work = txr->next_to_clean;
1028 buf = &txr->tx_buffers[work];
1029 txd = &txr->tx_base[work];
1030 work -= txr->num_desc; /* The distance to ring end */
1031 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1032 BUS_DMASYNC_POSTREAD);
1035 union ixgbe_adv_tx_desc *eop = buf->eop;
1036 if (eop == NULL) /* No work */
1039 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1040 break; /* I/O not complete */
1043 txr->bytes += buf->m_head->m_pkthdr.len;
1044 bus_dmamap_sync(txr->txtag, buf->map,
1045 BUS_DMASYNC_POSTWRITE);
1046 bus_dmamap_unload(txr->txtag, buf->map);
1047 m_freem(buf->m_head);
1053 /* We clean the range if multi segment */
1054 while (txd != eop) {
1058 /* wrap the ring? */
1059 if (__predict_false(!work)) {
1060 work -= txr->num_desc;
1061 buf = txr->tx_buffers;
1065 txr->bytes += buf->m_head->m_pkthdr.len;
1066 bus_dmamap_sync(txr->txtag, buf->map,
1067 BUS_DMASYNC_POSTWRITE);
1068 bus_dmamap_unload(txr->txtag, buf->map);
1069 m_freem(buf->m_head);
1079 /* Try the next packet */
1083 /* reset with a wrap */
1084 if (__predict_false(!work)) {
1085 work -= txr->num_desc;
1086 buf = txr->tx_buffers;
1090 } while (__predict_true(--limit));
1092 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1093 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1095 work += txr->num_desc;
1096 txr->next_to_clean = work;
1099 * Queue Hang detection, we know there's
1100 * work outstanding or the first return
1101 * would have been taken, so increment busy
1102 * if nothing managed to get cleaned, then
1103 * in local_timer it will be checked and
1104 * marked as HUNG if it exceeds a MAX attempt.
1106 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1109 * If anything gets cleaned we reset state to 1,
1110 * note this will turn off HUNG if its set.
1115 if (txr->tx_avail == txr->num_desc)
1121 /************************************************************************
1124 * Used to detect a descriptor that has been merged by Hardware RSC.
1125 ************************************************************************/
1127 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1129 return (le32toh(rx->wb.lower.lo_dword.data) &
1130 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1131 } /* ixgbe_rsc_count */
1133 /************************************************************************
1134 * ixgbe_setup_hw_rsc
1136 * Initialize Hardware RSC (LRO) feature on 82599
1137 * for an RX ring, this is toggled by the LRO capability
1138 * even though it is transparent to the stack.
1140 * NOTE: Since this HW feature only works with IPv4 and
1141 * testing has shown soft LRO to be as effective,
1142 * this feature will be disabled by default.
1143 ************************************************************************/
1145 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1147 struct adapter *adapter = rxr->adapter;
1148 struct ixgbe_hw *hw = &adapter->hw;
1149 u32 rscctrl, rdrxctl;
1151 /* If turning LRO/RSC off we need to disable it */
1152 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1153 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1154 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1158 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1159 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1161 /* Always strip CRC unless Netmap disabled it */
1162 if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1163 !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1165 #endif /* DEV_NETMAP */
1166 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1167 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1168 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1170 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1171 rscctrl |= IXGBE_RSCCTL_RSCEN;
1173 * Limit the total number of descriptors that
1174 * can be combined, so it does not exceed 64K
1176 if (rxr->mbuf_sz == MCLBYTES)
1177 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1178 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1179 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1180 else if (rxr->mbuf_sz == MJUM9BYTES)
1181 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1182 else /* Using 16K cluster */
1183 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1185 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1187 /* Enable TCP header recognition */
1188 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1189 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1191 /* Disable RSC for ACK packets */
1192 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1193 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1196 } /* ixgbe_setup_hw_rsc */
1198 /************************************************************************
1199 * ixgbe_refresh_mbufs
1201 * Refresh mbuf buffers for RX descriptor rings
1202 * - now keeps its own state so discards due to resource
1203 * exhaustion are unnecessary, if an mbuf cannot be obtained
1204 * it just returns, keeping its placeholder, thus it can simply
1205 * be recalled to try again.
1206 ************************************************************************/
1208 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1210 struct adapter *adapter = rxr->adapter;
1211 struct ixgbe_rx_buf *rxbuf;
1213 bus_dma_segment_t seg[1];
1214 int i, j, nsegs, error;
1215 bool refreshed = FALSE;
1217 i = j = rxr->next_to_refresh;
1218 /* Control the loop with one beyond */
1219 if (++j == rxr->num_desc)
1222 while (j != limit) {
1223 rxbuf = &rxr->rx_buffers[i];
1224 if (rxbuf->buf == NULL) {
1225 mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1229 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1230 m_adj(mp, ETHER_ALIGN);
1234 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1236 /* If we're dealing with an mbuf that was copied rather
1237 * than replaced, there's no need to go through busdma.
1239 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1240 /* Get the memory mapping */
1241 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1242 error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap,
1243 mp, seg, &nsegs, BUS_DMA_NOWAIT);
1245 printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1251 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1252 BUS_DMASYNC_PREREAD);
1253 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1254 htole64(seg[0].ds_addr);
1256 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1257 rxbuf->flags &= ~IXGBE_RX_COPY;
1261 /* Next is precalculated */
1263 rxr->next_to_refresh = i;
1264 if (++j == rxr->num_desc)
1269 if (refreshed) /* Update hardware tail index */
1270 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1273 } /* ixgbe_refresh_mbufs */
1275 /************************************************************************
1276 * ixgbe_allocate_receive_buffers
1278 * Allocate memory for rx_buffer structures. Since we use one
1279 * rx_buffer per received packet, the maximum number of rx_buffer's
1280 * that we'll need is equal to the number of receive descriptors
1281 * that we've allocated.
1282 ************************************************************************/
1284 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1286 struct adapter *adapter = rxr->adapter;
1287 device_t dev = adapter->dev;
1288 struct ixgbe_rx_buf *rxbuf;
1291 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1292 rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_DEVBUF,
1294 if (rxr->rx_buffers == NULL) {
1295 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1300 error = bus_dma_tag_create(
1301 /* parent */ bus_get_dma_tag(dev),
1304 /* lowaddr */ BUS_SPACE_MAXADDR,
1305 /* highaddr */ BUS_SPACE_MAXADDR,
1307 /* filterarg */ NULL,
1308 /* maxsize */ MJUM16BYTES,
1310 /* maxsegsize */ MJUM16BYTES,
1312 /* lockfunc */ NULL,
1313 /* lockfuncarg */ NULL,
1316 device_printf(dev, "Unable to create RX DMA tag\n");
1320 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1321 rxbuf = &rxr->rx_buffers[i];
1322 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1324 device_printf(dev, "Unable to create RX dma map\n");
1332 /* Frees all, but can handle partial completion */
1333 ixgbe_free_receive_structures(adapter);
1336 } /* ixgbe_allocate_receive_buffers */
1338 /************************************************************************
1339 * ixgbe_free_receive_ring
1340 ************************************************************************/
1342 ixgbe_free_receive_ring(struct rx_ring *rxr)
1344 for (int i = 0; i < rxr->num_desc; i++) {
1345 ixgbe_rx_discard(rxr, i);
1347 } /* ixgbe_free_receive_ring */
1349 /************************************************************************
1350 * ixgbe_setup_receive_ring
1352 * Initialize a receive ring and its buffers.
1353 ************************************************************************/
1355 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1357 struct adapter *adapter;
1360 struct ixgbe_rx_buf *rxbuf;
1361 struct lro_ctrl *lro = &rxr->lro;
1363 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1364 struct netmap_slot *slot;
1365 #endif /* DEV_NETMAP */
1366 bus_dma_segment_t seg[1];
1367 int rsize, nsegs, error = 0;
1369 adapter = rxr->adapter;
1373 /* Clear the ring contents */
1377 if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1378 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1379 #endif /* DEV_NETMAP */
1381 rsize = roundup2(adapter->num_rx_desc *
1382 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1383 bzero((void *)rxr->rx_base, rsize);
1384 /* Cache the size */
1385 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1387 /* Free current RX buffer structs and their mbufs */
1388 ixgbe_free_receive_ring(rxr);
1390 /* Now replenish the mbufs */
1391 for (int j = 0; j != rxr->num_desc; ++j) {
1394 rxbuf = &rxr->rx_buffers[j];
1398 * In netmap mode, fill the map and set the buffer
1399 * address in the NIC ring, considering the offset
1400 * between the netmap and NIC rings (see comment in
1401 * ixgbe_setup_transmit_ring() ). No need to allocate
1402 * an mbuf, so end the block with a continue;
1404 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1405 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1409 addr = PNMB(na, slot + sj, &paddr);
1410 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1411 /* Update descriptor and the cached value */
1412 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1413 rxbuf->addr = htole64(paddr);
1416 #endif /* DEV_NETMAP */
1419 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1420 adapter->rx_mbuf_sz);
1421 if (rxbuf->buf == NULL) {
1426 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1427 /* Get the memory mapping */
1428 error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, seg,
1429 &nsegs, BUS_DMA_NOWAIT);
1432 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD);
1433 /* Update the descriptor and the cached value */
1434 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1435 rxbuf->addr = htole64(seg[0].ds_addr);
1439 /* Setup our descriptor indices */
1440 rxr->next_to_check = 0;
1441 rxr->next_to_refresh = 0;
1442 rxr->lro_enabled = FALSE;
1445 rxr->vtag_strip = FALSE;
1447 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1448 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1451 * Now set up the LRO interface
1453 if (ixgbe_rsc_enable)
1454 ixgbe_setup_hw_rsc(rxr);
1455 else if (ifp->if_capenable & IFCAP_LRO) {
1456 int err = tcp_lro_init(lro);
1458 device_printf(dev, "LRO Initialization failed!\n");
1461 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1462 rxr->lro_enabled = TRUE;
1463 lro->ifp = adapter->ifp;
1466 IXGBE_RX_UNLOCK(rxr);
1471 ixgbe_free_receive_ring(rxr);
1472 IXGBE_RX_UNLOCK(rxr);
1475 } /* ixgbe_setup_receive_ring */
1477 /************************************************************************
1478 * ixgbe_setup_receive_structures - Initialize all receive rings.
1479 ************************************************************************/
1481 ixgbe_setup_receive_structures(struct adapter *adapter)
1483 struct rx_ring *rxr = adapter->rx_rings;
1486 for (j = 0; j < adapter->num_queues; j++, rxr++)
1487 if (ixgbe_setup_receive_ring(rxr))
1493 * Free RX buffers allocated so far, we will only handle
1494 * the rings that completed, the failing case will have
1495 * cleaned up for itself. 'j' failed, so its the terminus.
1497 for (int i = 0; i < j; ++i) {
1498 rxr = &adapter->rx_rings[i];
1500 ixgbe_free_receive_ring(rxr);
1501 IXGBE_RX_UNLOCK(rxr);
1505 } /* ixgbe_setup_receive_structures */
1508 /************************************************************************
1509 * ixgbe_free_receive_structures - Free all receive rings.
1510 ************************************************************************/
1512 ixgbe_free_receive_structures(struct adapter *adapter)
1514 struct rx_ring *rxr = adapter->rx_rings;
1516 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1518 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1519 ixgbe_free_receive_buffers(rxr);
1520 /* Free LRO memory */
1521 tcp_lro_free(&rxr->lro);
1522 /* Free the ring memory as well */
1523 ixgbe_dma_free(adapter, &rxr->rxdma);
1526 free(adapter->rx_rings, M_DEVBUF);
1527 } /* ixgbe_free_receive_structures */
1530 /************************************************************************
1531 * ixgbe_free_receive_buffers - Free receive ring data structures
1532 ************************************************************************/
1534 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1536 struct adapter *adapter = rxr->adapter;
1537 struct ixgbe_rx_buf *rxbuf;
1539 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1541 /* Cleanup any existing buffers */
1542 if (rxr->rx_buffers != NULL) {
1543 for (int i = 0; i < adapter->num_rx_desc; i++) {
1544 rxbuf = &rxr->rx_buffers[i];
1545 ixgbe_rx_discard(rxr, i);
1546 if (rxbuf->pmap != NULL) {
1547 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1551 if (rxr->rx_buffers != NULL) {
1552 free(rxr->rx_buffers, M_DEVBUF);
1553 rxr->rx_buffers = NULL;
1557 if (rxr->ptag != NULL) {
1558 bus_dma_tag_destroy(rxr->ptag);
1563 } /* ixgbe_free_receive_buffers */
1565 /************************************************************************
1567 ************************************************************************/
1568 static __inline void
1569 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1573 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1574 * should be computed by hardware. Also it should not have VLAN tag in
1575 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1577 if (rxr->lro_enabled &&
1578 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1579 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1580 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1581 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1582 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1583 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1584 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1585 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1587 * Send to the stack if:
1588 * - LRO not enabled, or
1589 * - no LRO resources, or
1590 * - lro enqueue fails
1592 if (rxr->lro.lro_cnt != 0)
1593 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1596 IXGBE_RX_UNLOCK(rxr);
1597 (*ifp->if_input)(ifp, m);
1599 } /* ixgbe_rx_input */
1601 /************************************************************************
1603 ************************************************************************/
1604 static __inline void
1605 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1607 struct ixgbe_rx_buf *rbuf;
1609 rbuf = &rxr->rx_buffers[i];
1612 * With advanced descriptors the writeback
1613 * clobbers the buffer addrs, so its easier
1614 * to just free the existing mbufs and take
1615 * the normal refresh path to get new buffers
1619 if (rbuf->fmp != NULL) {/* Partial chain ? */
1620 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1623 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1624 } else if (rbuf->buf) {
1625 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1629 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1634 } /* ixgbe_rx_discard */
1637 /************************************************************************
1640 * Executes in interrupt context. It replenishes the
1641 * mbufs in the descriptor and sends data which has
1642 * been dma'ed into host memory to upper layer.
1644 * Return TRUE for more work, FALSE for all clean.
1645 ************************************************************************/
1647 ixgbe_rxeof(struct ix_queue *que)
1649 struct adapter *adapter = que->adapter;
1650 struct rx_ring *rxr = que->rxr;
1651 struct ifnet *ifp = adapter->ifp;
1652 struct lro_ctrl *lro = &rxr->lro;
1653 union ixgbe_adv_rx_desc *cur;
1654 struct ixgbe_rx_buf *rbuf, *nbuf;
1655 int i, nextp, processed = 0;
1657 u32 count = adapter->rx_process_limit;
1663 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1664 /* Same as the txeof routine: wakeup clients on intr. */
1665 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1666 IXGBE_RX_UNLOCK(rxr);
1670 #endif /* DEV_NETMAP */
1672 for (i = rxr->next_to_check; count != 0;) {
1673 struct mbuf *sendmp, *mp;
1679 /* Sync the ring. */
1680 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1681 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1683 cur = &rxr->rx_base[i];
1684 staterr = le32toh(cur->wb.upper.status_error);
1685 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1687 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1689 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1696 cur->wb.upper.status_error = 0;
1697 rbuf = &rxr->rx_buffers[i];
1700 len = le16toh(cur->wb.upper.length);
1701 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1702 IXGBE_RXDADV_PKTTYPE_MASK;
1703 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1705 /* Make sure bad packets are discarded */
1706 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1707 #if __FreeBSD_version >= 1100036
1708 if (adapter->feat_en & IXGBE_FEATURE_VF)
1709 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1711 rxr->rx_discarded++;
1712 ixgbe_rx_discard(rxr, i);
1716 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1719 * On 82599 which supports a hardware
1720 * LRO (called HW RSC), packets need
1721 * not be fragmented across sequential
1722 * descriptors, rather the next descriptor
1723 * is indicated in bits of the descriptor.
1724 * This also means that we might proceses
1725 * more than one packet at a time, something
1726 * that has never been true before, it
1727 * required eliminating global chain pointers
1728 * in favor of what we are doing here. -jfv
1732 * Figure out the next descriptor
1735 if (rxr->hw_rsc == TRUE) {
1736 rsc = ixgbe_rsc_count(cur);
1737 rxr->rsc_num += (rsc - 1);
1739 if (rsc) { /* Get hardware index */
1740 nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1741 IXGBE_RXDADV_NEXTP_SHIFT);
1742 } else { /* Just sequential */
1744 if (nextp == adapter->num_rx_desc)
1747 nbuf = &rxr->rx_buffers[nextp];
1751 * Rather than using the fmp/lmp global pointers
1752 * we now keep the head of a packet chain in the
1753 * buffer struct and pass this along from one
1754 * descriptor to the next, until we get EOP.
1758 * See if there is a stored head
1759 * that determines what we are
1762 if (sendmp != NULL) { /* secondary frag */
1763 rbuf->buf = rbuf->fmp = NULL;
1764 mp->m_flags &= ~M_PKTHDR;
1765 sendmp->m_pkthdr.len += mp->m_len;
1768 * Optimize. This might be a small packet,
1769 * maybe just a TCP ACK. Do a fast copy that
1770 * is cache aligned into a new mbuf, and
1771 * leave the old mbuf+cluster for re-use.
1773 if (eop && len <= IXGBE_RX_COPY_LEN) {
1774 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1775 if (sendmp != NULL) {
1776 sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1777 ixgbe_bcopy(mp->m_data, sendmp->m_data,
1779 sendmp->m_len = len;
1781 rbuf->flags |= IXGBE_RX_COPY;
1784 if (sendmp == NULL) {
1785 rbuf->buf = rbuf->fmp = NULL;
1789 /* first desc of a non-ps chain */
1790 sendmp->m_flags |= M_PKTHDR;
1791 sendmp->m_pkthdr.len = mp->m_len;
1795 /* Pass the head pointer on */
1799 mp->m_next = nbuf->buf;
1800 } else { /* Sending this frame */
1801 sendmp->m_pkthdr.rcvif = ifp;
1803 /* capture data for AIM */
1804 rxr->bytes += sendmp->m_pkthdr.len;
1805 rxr->rx_bytes += sendmp->m_pkthdr.len;
1806 /* Process vlan info */
1807 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1808 vtag = le16toh(cur->wb.upper.vlan);
1810 sendmp->m_pkthdr.ether_vtag = vtag;
1811 sendmp->m_flags |= M_VLANTAG;
1813 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1814 ixgbe_rx_checksum(staterr, sendmp, ptype);
1817 * In case of multiqueue, we have RXCSUM.PCSD bit set
1818 * and never cleared. This means we have RSS hash
1819 * available to be used.
1821 if (adapter->num_queues > 1) {
1822 sendmp->m_pkthdr.flowid =
1823 le32toh(cur->wb.lower.hi_dword.rss);
1824 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1825 case IXGBE_RXDADV_RSSTYPE_IPV4:
1826 M_HASHTYPE_SET(sendmp,
1827 M_HASHTYPE_RSS_IPV4);
1829 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1830 M_HASHTYPE_SET(sendmp,
1831 M_HASHTYPE_RSS_TCP_IPV4);
1833 case IXGBE_RXDADV_RSSTYPE_IPV6:
1834 M_HASHTYPE_SET(sendmp,
1835 M_HASHTYPE_RSS_IPV6);
1837 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1838 M_HASHTYPE_SET(sendmp,
1839 M_HASHTYPE_RSS_TCP_IPV6);
1841 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1842 M_HASHTYPE_SET(sendmp,
1843 M_HASHTYPE_RSS_IPV6_EX);
1845 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1846 M_HASHTYPE_SET(sendmp,
1847 M_HASHTYPE_RSS_TCP_IPV6_EX);
1849 #if __FreeBSD_version > 1100000
1850 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1851 M_HASHTYPE_SET(sendmp,
1852 M_HASHTYPE_RSS_UDP_IPV4);
1854 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1855 M_HASHTYPE_SET(sendmp,
1856 M_HASHTYPE_RSS_UDP_IPV6);
1858 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1859 M_HASHTYPE_SET(sendmp,
1860 M_HASHTYPE_RSS_UDP_IPV6_EX);
1864 M_HASHTYPE_SET(sendmp,
1865 M_HASHTYPE_OPAQUE_HASH);
1868 sendmp->m_pkthdr.flowid = que->msix;
1869 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1873 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1874 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1876 /* Advance our pointers to the next descriptor. */
1877 if (++i == rxr->num_desc)
1880 /* Now send to the stack or do LRO */
1881 if (sendmp != NULL) {
1882 rxr->next_to_check = i;
1883 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1884 i = rxr->next_to_check;
1887 /* Every 8 descriptors we go to refresh mbufs */
1888 if (processed == 8) {
1889 ixgbe_refresh_mbufs(rxr, i);
1894 /* Refresh any remaining buf structs */
1895 if (ixgbe_rx_unrefreshed(rxr))
1896 ixgbe_refresh_mbufs(rxr, i);
1898 rxr->next_to_check = i;
1901 * Flush any outstanding LRO work
1903 tcp_lro_flush_all(lro);
1905 IXGBE_RX_UNLOCK(rxr);
1908 * Still have cleaning to do?
1910 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1917 /************************************************************************
1920 * Verify that the hardware indicated that the checksum is valid.
1921 * Inform the stack about the status of checksum so that stack
1922 * doesn't spend time verifying the checksum.
1923 ************************************************************************/
1925 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
1927 u16 status = (u16)staterr;
1928 u8 errors = (u8)(staterr >> 24);
1931 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1932 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
1936 if (status & IXGBE_RXD_STAT_IPCS) {
1937 mp->m_pkthdr.csum_flags |= CSUM_L3_CALC;
1938 /* IP Checksum Good */
1939 if (!(errors & IXGBE_RXD_ERR_IPE))
1940 mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
1942 /* TCP/UDP/SCTP checksum */
1943 if (status & IXGBE_RXD_STAT_L4CS) {
1944 mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
1945 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
1946 mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
1948 mp->m_pkthdr.csum_data = htons(0xffff);
1951 } /* ixgbe_rx_checksum */
1953 /************************************************************************
1954 * ixgbe_dmamap_cb - Manage DMA'able memory.
1955 ************************************************************************/
1957 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
1961 *(bus_addr_t *)arg = segs->ds_addr;
1964 } /* ixgbe_dmamap_cb */
1966 /************************************************************************
1968 ************************************************************************/
1970 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
1971 struct ixgbe_dma_alloc *dma, int mapflags)
1973 device_t dev = adapter->dev;
1976 r = bus_dma_tag_create(
1977 /* parent */ bus_get_dma_tag(adapter->dev),
1978 /* alignment */ DBA_ALIGN,
1980 /* lowaddr */ BUS_SPACE_MAXADDR,
1981 /* highaddr */ BUS_SPACE_MAXADDR,
1983 /* filterarg */ NULL,
1986 /* maxsegsize */ size,
1987 /* flags */ BUS_DMA_ALLOCNOW,
1988 /* lockfunc */ NULL,
1989 /* lockfuncarg */ NULL,
1993 "ixgbe_dma_malloc: bus_dma_tag_create failed; error %u\n",
1997 r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
1998 BUS_DMA_NOWAIT, &dma->dma_map);
2001 "ixgbe_dma_malloc: bus_dmamem_alloc failed; error %u\n", r);
2004 r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size,
2005 ixgbe_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2008 "ixgbe_dma_malloc: bus_dmamap_load failed; error %u\n", r);
2011 dma->dma_size = size;
2015 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2017 bus_dma_tag_destroy(dma->dma_tag);
2019 dma->dma_tag = NULL;
2022 } /* ixgbe_dma_malloc */
2024 /************************************************************************
2026 ************************************************************************/
2028 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2030 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2031 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2032 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2033 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2034 bus_dma_tag_destroy(dma->dma_tag);
2035 } /* ixgbe_dma_free */
2038 /************************************************************************
2039 * ixgbe_allocate_queues
2041 * Allocate memory for the transmit and receive rings, and then
2042 * the descriptors associated with each, called only once at attach.
2043 ************************************************************************/
2045 ixgbe_allocate_queues(struct adapter *adapter)
2047 device_t dev = adapter->dev;
2048 struct ix_queue *que;
2049 struct tx_ring *txr;
2050 struct rx_ring *rxr;
2051 int rsize, tsize, error = IXGBE_SUCCESS;
2052 int txconf = 0, rxconf = 0;
2054 /* First, allocate the top level queue structs */
2055 adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2056 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2057 if (adapter->queues == NULL) {
2058 device_printf(dev, "Unable to allocate queue memory\n");
2063 /* Second, allocate the TX ring struct memory */
2064 adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2065 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2066 if (adapter->tx_rings == NULL) {
2067 device_printf(dev, "Unable to allocate TX ring memory\n");
2072 /* Third, allocate the RX ring */
2073 adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2074 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO);
2075 if (adapter->rx_rings == NULL) {
2076 device_printf(dev, "Unable to allocate RX ring memory\n");
2081 /* For the ring itself */
2082 tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2086 * Now set up the TX queues, txconf is needed to handle the
2087 * possibility that things fail midcourse and we need to
2088 * undo memory gracefully
2090 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2091 /* Set up some basics */
2092 txr = &adapter->tx_rings[i];
2093 txr->adapter = adapter;
2095 /* In case SR-IOV is enabled, align the index properly */
2096 txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2098 txr->num_desc = adapter->num_tx_desc;
2100 /* Initialize the TX side lock */
2101 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2102 device_get_nameunit(dev), txr->me);
2103 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2105 if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2108 "Unable to allocate TX Descriptor memory\n");
2112 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2113 bzero((void *)txr->tx_base, tsize);
2115 /* Now allocate transmit buffers for the ring */
2116 if (ixgbe_allocate_transmit_buffers(txr)) {
2118 "Critical Failure setting up transmit buffers\n");
2122 if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2123 /* Allocate a buf ring */
2124 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2125 M_WAITOK, &txr->tx_mtx);
2126 if (txr->br == NULL) {
2128 "Critical Failure setting up buf ring\n");
2136 * Next the RX queues...
2138 rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2140 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2141 rxr = &adapter->rx_rings[i];
2142 /* Set up some basics */
2143 rxr->adapter = adapter;
2144 /* In case SR-IOV is enabled, align the index properly */
2145 rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2147 rxr->num_desc = adapter->num_rx_desc;
2149 /* Initialize the RX side lock */
2150 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2151 device_get_nameunit(dev), rxr->me);
2152 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2154 if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2157 "Unable to allocate RxDescriptor memory\n");
2161 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2162 bzero((void *)rxr->rx_base, rsize);
2164 /* Allocate receive buffers for the ring */
2165 if (ixgbe_allocate_receive_buffers(rxr)) {
2167 "Critical Failure setting up receive buffers\n");
2174 * Finally set up the queue holding structs
2176 for (int i = 0; i < adapter->num_queues; i++) {
2177 que = &adapter->queues[i];
2178 que->adapter = adapter;
2180 que->txr = &adapter->tx_rings[i];
2181 que->rxr = &adapter->rx_rings[i];
2187 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2188 ixgbe_dma_free(adapter, &rxr->rxdma);
2190 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2191 ixgbe_dma_free(adapter, &txr->txdma);
2192 free(adapter->rx_rings, M_DEVBUF);
2194 free(adapter->tx_rings, M_DEVBUF);
2196 free(adapter->queues, M_DEVBUF);
2199 } /* ixgbe_allocate_queues */