1 /******************************************************************************
3 Copyright (c) 2001-2017, Intel Corporation
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
32 ******************************************************************************/
36 #ifndef IXGBE_STANDALONE_BUILD
38 #include "opt_inet6.h"
43 extern int ix_crcstrip;
47 * this feature only works with
48 * IPv4, and only on 82599 and later.
49 * Also this will cause IP forwarding to
50 * fail and that can't be controlled by
51 * the stack as LRO can. For all these
52 * reasons I've deemed it best to leave
53 * this off and not bother with a tuneable
54 * interface, this would need to be compiled
57 static bool ixgbe_rsc_enable = FALSE;
59 /************************************************************************
60 * Local Function prototypes
61 ************************************************************************/
62 static void ixgbe_setup_transmit_ring(struct tx_ring *);
63 static void ixgbe_free_transmit_buffers(struct tx_ring *);
64 static int ixgbe_setup_receive_ring(struct rx_ring *);
65 static void ixgbe_free_receive_buffers(struct rx_ring *);
66 static void ixgbe_rx_checksum(u32, struct mbuf *, u32);
67 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
68 static int ixgbe_xmit(struct tx_ring *, struct mbuf **);
69 static int ixgbe_tx_ctx_setup(struct tx_ring *,
70 struct mbuf *, u32 *, u32 *);
71 static int ixgbe_tso_setup(struct tx_ring *,
72 struct mbuf *, u32 *, u32 *);
73 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
74 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
76 static int ixgbe_dma_malloc(struct adapter *, bus_size_t,
77 struct ixgbe_dma_alloc *, int);
78 static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
80 MALLOC_DECLARE(M_IXV);
82 /************************************************************************
83 * ixv_legacy_start_locked - Transmit entry point
85 * Called by the stack to initiate a transmit.
86 * The driver will remain in this routine as long as there are
87 * packets to transmit and transmit resources are available.
88 * In case resources are not available, the stack is notified
89 * and the packet is requeued.
90 ************************************************************************/
92 ixv_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
95 struct adapter *adapter = txr->adapter;
97 IXGBE_TX_LOCK_ASSERT(txr);
99 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
101 if (!adapter->link_active)
104 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
105 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
108 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
112 if (ixgbe_xmit(txr, &m_head)) {
114 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
117 /* Send a copy of the frame to the BPF listener */
118 ETHER_BPF_MTAP(ifp, m_head);
121 return IXGBE_SUCCESS;
122 } /* ixv_legacy_start_locked */
124 /************************************************************************
127 * Called by the stack, this always uses the first tx ring,
128 * and should not be used with multiqueue tx enabled.
129 ************************************************************************/
131 ixv_legacy_start(struct ifnet *ifp)
133 struct adapter *adapter = ifp->if_softc;
134 struct tx_ring *txr = adapter->tx_rings;
136 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
138 ixv_legacy_start_locked(ifp, txr);
139 IXGBE_TX_UNLOCK(txr);
141 } /* ixv_legacy_start */
143 /************************************************************************
144 * ixv_mq_start - Multiqueue Transmit Entry Point
146 * (if_transmit function)
147 ************************************************************************/
149 ixv_mq_start(struct ifnet *ifp, struct mbuf *m)
151 struct adapter *adapter = ifp->if_softc;
152 struct ix_queue *que;
158 * When doing RSS, map it to the same outbound queue
159 * as the incoming flow would be mapped to.
161 * If everything is setup correctly, it should be the
162 * same bucket that the current CPU we're on is.
164 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
165 if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
166 (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
168 i = bucket_id % adapter->num_queues;
170 if (bucket_id > adapter->num_queues)
172 "bucket_id (%d) > num_queues (%d)\n",
173 bucket_id, adapter->num_queues);
176 i = m->m_pkthdr.flowid % adapter->num_queues;
178 i = curcpu % adapter->num_queues;
180 /* Check for a hung queue and pick alternative */
181 if (((1 << i) & adapter->active_queues) == 0)
182 i = ffsl(adapter->active_queues);
184 txr = &adapter->tx_rings[i];
185 que = &adapter->queues[i];
187 err = drbr_enqueue(ifp, txr->br, m);
190 if (IXGBE_TX_TRYLOCK(txr)) {
191 ixv_mq_start_locked(ifp, txr);
192 IXGBE_TX_UNLOCK(txr);
194 taskqueue_enqueue(que->tq, &txr->txq_task);
199 /************************************************************************
200 * ixv_mq_start_locked
201 ************************************************************************/
203 ixv_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
206 int enqueued = 0, err = 0;
208 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
210 if (!txr->adapter->link_active)
213 /* Process the queue */
214 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
215 err = ixgbe_xmit(txr, &next);
218 drbr_advance(ifp, txr->br);
220 drbr_putback(ifp, txr->br, next);
223 drbr_advance(ifp, txr->br);
225 #if __FreeBSD_version >= 1100036
227 * Since we're looking at the tx ring, we can check
228 * to see if we're a VF by examing our tail register
231 if (next->m_flags & M_MCAST)
232 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
234 /* Send a copy of the frame to the BPF listener */
235 ETHER_BPF_MTAP(ifp, next);
236 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
240 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
244 } /* ixv_mq_start_locked */
246 /************************************************************************
247 * ixv_deferred_mq_start
249 * Called from a taskqueue to drain queued transmit packets.
250 ************************************************************************/
252 ixv_deferred_mq_start(void *arg, int pending)
254 struct tx_ring *txr = arg;
255 struct adapter *adapter = txr->adapter;
256 struct ifnet *ifp = adapter->ifp;
259 if (!drbr_empty(ifp, txr->br))
260 ixv_mq_start_locked(ifp, txr);
261 IXGBE_TX_UNLOCK(txr);
262 } /* ixv_deferred_mq_start */
264 /************************************************************************
265 * ixv_qflush - Flush all ring buffers
266 ************************************************************************/
268 ixv_qflush(struct ifnet *ifp)
270 struct adapter *adapter = ifp->if_softc;
271 struct tx_ring *txr = adapter->tx_rings;
274 for (int i = 0; i < adapter->num_queues; i++, txr++) {
276 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
278 IXGBE_TX_UNLOCK(txr);
284 /************************************************************************
287 * This routine maps the mbufs to tx descriptors, allowing the
288 * TX engine to transmit the packets.
290 * Return 0 on success, positive on failure
291 ************************************************************************/
293 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
295 struct adapter *adapter = txr->adapter;
296 struct ixgbe_tx_buf *txbuf;
297 union ixgbe_adv_tx_desc *txd = NULL;
299 int i, j, error, nsegs;
301 u32 olinfo_status = 0, cmd_type_len;
303 bus_dma_segment_t segs[adapter->num_segs];
308 /* Basic descriptor defines */
309 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
310 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
312 if (m_head->m_flags & M_VLANTAG)
313 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
316 * Important to capture the first descriptor
317 * used because it will contain the index of
318 * the one we tell the hardware to report back
320 first = txr->next_avail_desc;
321 txbuf = &txr->tx_buffers[first];
325 * Map the packet for DMA.
328 error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs,
329 &nsegs, BUS_DMA_NOWAIT);
331 if (__predict_false(error)) {
336 /* Try it again? - one try */
340 * XXX: m_defrag will choke on
341 * non-MCLBYTES-sized clusters
343 m = m_defrag(*m_headp, M_NOWAIT);
345 adapter->mbuf_defrag_failed++;
355 txr->no_tx_dma_setup++;
358 txr->no_tx_dma_setup++;
365 /* Make certain there are enough descriptors */
366 if (txr->tx_avail < (nsegs + 2)) {
367 txr->no_desc_avail++;
368 bus_dmamap_unload(txr->txtag, map);
374 * Set up the appropriate offload context
375 * this will consume the first descriptor
377 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
378 if (__predict_false(error)) {
379 if (error == ENOBUFS)
384 olinfo_status |= IXGBE_ADVTXD_CC;
385 i = txr->next_avail_desc;
386 for (j = 0; j < nsegs; j++) {
390 txbuf = &txr->tx_buffers[i];
391 txd = &txr->tx_base[i];
392 seglen = segs[j].ds_len;
393 segaddr = htole64(segs[j].ds_addr);
395 txd->read.buffer_addr = segaddr;
396 txd->read.cmd_type_len = htole32(txr->txd_cmd |
397 cmd_type_len | seglen);
398 txd->read.olinfo_status = htole32(olinfo_status);
400 if (++i == txr->num_desc)
404 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
405 txr->tx_avail -= nsegs;
406 txr->next_avail_desc = i;
408 txbuf->m_head = m_head;
410 * Here we swap the map so the last descriptor,
411 * which gets the completion interrupt has the
412 * real map, and the first descriptor gets the
413 * unused map from this descriptor.
415 txr->tx_buffers[first].map = txbuf->map;
417 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
419 /* Set the EOP descriptor that will be marked done */
420 txbuf = &txr->tx_buffers[first];
423 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
424 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
426 * Advance the Transmit Descriptor Tail (Tdt), this tells the
427 * hardware that this frame is available to transmit.
429 ++txr->total_packets;
430 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
432 /* Mark queue as having work */
440 /************************************************************************
441 * ixgbe_allocate_transmit_buffers
443 * Allocate memory for tx_buffer structures. The tx_buffer stores all
444 * the information needed to transmit a packet on the wire. This is
445 * called only once at attach, setup is done every reset.
446 ************************************************************************/
448 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
450 struct adapter *adapter = txr->adapter;
451 device_t dev = adapter->dev;
452 struct ixgbe_tx_buf *txbuf;
456 * Setup DMA descriptor areas.
458 error = bus_dma_tag_create(
459 /* parent */ bus_get_dma_tag(adapter->dev),
462 /* lowaddr */ BUS_SPACE_MAXADDR,
463 /* highaddr */ BUS_SPACE_MAXADDR,
465 /* filterarg */ NULL,
466 /* maxsize */ IXGBE_TSO_SIZE,
467 /* nsegments */ adapter->num_segs,
468 /* maxsegsize */ PAGE_SIZE,
471 /* lockfuncarg */ NULL,
474 device_printf(dev, "Unable to allocate TX DMA tag\n");
479 (struct ixgbe_tx_buf *)malloc(sizeof(struct ixgbe_tx_buf) *
480 adapter->num_tx_desc, M_IXV, M_NOWAIT | M_ZERO);
481 if (!txr->tx_buffers) {
482 device_printf(dev, "Unable to allocate tx_buffer memory\n");
487 /* Create the descriptor buffer dma maps */
488 txbuf = txr->tx_buffers;
489 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
490 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
492 device_printf(dev, "Unable to create TX DMA map\n");
499 /* We free all, it handles case where we are in the middle */
500 ixv_free_transmit_structures(adapter);
503 } /* ixgbe_allocate_transmit_buffers */
505 /************************************************************************
507 * Initialize a transmit ring.
509 ************************************************************************/
511 ixgbe_setup_transmit_ring(struct tx_ring *txr)
513 struct adapter *adapter = txr->adapter;
514 struct ixgbe_tx_buf *txbuf;
516 struct netmap_adapter *na = NA(adapter->ifp);
517 struct netmap_slot *slot;
518 #endif /* DEV_NETMAP */
520 /* Clear the old ring contents */
524 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
526 * (under lock): if in netmap mode, do some consistency
527 * checks and set slot to entry 0 of the netmap ring.
529 slot = netmap_reset(na, NR_TX, txr->me, 0);
531 #endif /* DEV_NETMAP */
533 bzero((void *)txr->tx_base,
534 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
536 txr->next_avail_desc = 0;
537 txr->next_to_clean = 0;
539 /* Free any existing tx buffers. */
540 txbuf = txr->tx_buffers;
541 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
542 if (txbuf->m_head != NULL) {
543 bus_dmamap_sync(txr->txtag, txbuf->map,
544 BUS_DMASYNC_POSTWRITE);
545 bus_dmamap_unload(txr->txtag, txbuf->map);
546 m_freem(txbuf->m_head);
547 txbuf->m_head = NULL;
552 * In netmap mode, set the map for the packet buffer.
553 * NOTE: Some drivers (not this one) also need to set
554 * the physical buffer address in the NIC ring.
555 * Slots in the netmap ring (indexed by "si") are
556 * kring->nkr_hwofs positions "ahead" wrt the
557 * corresponding slot in the NIC ring. In some drivers
558 * (not here) nkr_hwofs can be negative. Function
559 * netmap_idx_n2k() handles wraparounds properly.
561 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
562 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
563 netmap_load_map(na, txr->txtag,
564 txbuf->map, NMB(na, slot + si));
566 #endif /* DEV_NETMAP */
568 /* Clear the EOP descriptor pointer */
572 /* Set number of descriptors available */
573 txr->tx_avail = adapter->num_tx_desc;
575 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
576 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
577 IXGBE_TX_UNLOCK(txr);
578 } /* ixgbe_setup_transmit_ring */
580 /************************************************************************
581 * ixv_setup_transmit_structures - Initialize all transmit rings.
582 ************************************************************************/
584 ixv_setup_transmit_structures(struct adapter *adapter)
586 struct tx_ring *txr = adapter->tx_rings;
588 for (int i = 0; i < adapter->num_queues; i++, txr++)
589 ixgbe_setup_transmit_ring(txr);
592 } /* ixv_setup_transmit_structures */
594 /************************************************************************
595 * ixv_free_transmit_structures - Free all transmit rings.
596 ************************************************************************/
598 ixv_free_transmit_structures(struct adapter *adapter)
600 struct tx_ring *txr = adapter->tx_rings;
602 for (int i = 0; i < adapter->num_queues; i++, txr++) {
604 ixgbe_free_transmit_buffers(txr);
605 ixgbe_dma_free(adapter, &txr->txdma);
606 IXGBE_TX_UNLOCK(txr);
607 IXGBE_TX_LOCK_DESTROY(txr);
609 free(adapter->tx_rings, M_IXV);
610 } /* ixv_free_transmit_structures */
612 /************************************************************************
613 * ixgbe_free_transmit_buffers
615 * Free transmit ring related data structures.
616 ************************************************************************/
618 ixgbe_free_transmit_buffers(struct tx_ring *txr)
620 struct adapter *adapter = txr->adapter;
621 struct ixgbe_tx_buf *tx_buffer;
624 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
626 if (txr->tx_buffers == NULL)
629 tx_buffer = txr->tx_buffers;
630 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
631 if (tx_buffer->m_head != NULL) {
632 bus_dmamap_sync(txr->txtag, tx_buffer->map,
633 BUS_DMASYNC_POSTWRITE);
634 bus_dmamap_unload(txr->txtag, tx_buffer->map);
635 m_freem(tx_buffer->m_head);
636 tx_buffer->m_head = NULL;
637 if (tx_buffer->map != NULL) {
638 bus_dmamap_destroy(txr->txtag, tx_buffer->map);
639 tx_buffer->map = NULL;
641 } else if (tx_buffer->map != NULL) {
642 bus_dmamap_unload(txr->txtag, tx_buffer->map);
643 bus_dmamap_destroy(txr->txtag, tx_buffer->map);
644 tx_buffer->map = NULL;
648 buf_ring_free(txr->br, M_IXV);
649 if (txr->tx_buffers != NULL) {
650 free(txr->tx_buffers, M_IXV);
651 txr->tx_buffers = NULL;
653 if (txr->txtag != NULL) {
654 bus_dma_tag_destroy(txr->txtag);
657 } /* ixgbe_free_transmit_buffers */
659 /************************************************************************
662 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
663 ************************************************************************/
665 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
666 u32 *cmd_type_len, u32 *olinfo_status)
668 struct ixgbe_adv_tx_context_desc *TXD;
669 struct ether_vlan_header *eh;
676 int ehdrlen, ip_hlen = 0;
678 int ctxd = txr->next_avail_desc;
679 u32 vlan_macip_lens = 0;
680 u32 type_tucmd_mlhl = 0;
687 /* First check if TSO is to be used */
688 if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO | CSUM_IP6_TSO))
689 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
691 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
694 /* Indicate the whole packet as payload when not doing TSO */
695 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
697 /* Now ready a context descriptor */
698 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
701 * In advanced descriptors the vlan tag must
702 * be placed into the context descriptor. Hence
703 * we need to make one even if not doing offloads.
705 if (mp->m_flags & M_VLANTAG) {
706 vtag = htole16(mp->m_pkthdr.ether_vtag);
707 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
708 } else if (!IXGBE_IS_X550VF(txr->adapter) && (offload == FALSE))
712 * Determine where frame payload starts.
713 * Jump over vlan headers if already present,
714 * helpful for QinQ too.
716 eh = mtod(mp, struct ether_vlan_header *);
717 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
718 etype = ntohs(eh->evl_proto);
719 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
721 etype = ntohs(eh->evl_encap_proto);
722 ehdrlen = ETHER_HDR_LEN;
725 /* Set the ether header length */
726 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
728 if (offload == FALSE)
732 * If the first mbuf only includes the ethernet header,
733 * jump to the next one
734 * XXX: This assumes the stack splits mbufs containing headers
735 * on header boundaries
736 * XXX: And assumes the entire IP header is contained in one mbuf
738 if (mp->m_len == ehdrlen && mp->m_next)
739 l3d = mtod(mp->m_next, caddr_t);
741 l3d = mtod(mp, caddr_t) + ehdrlen;
746 ip = (struct ip *)(l3d);
747 ip_hlen = ip->ip_hl << 2;
749 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
750 /* Insert IPv4 checksum into data descriptors */
751 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
753 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
759 ip6 = (struct ip6_hdr *)(l3d);
760 ip_hlen = sizeof(struct ip6_hdr);
761 ipproto = ip6->ip6_nxt;
762 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
770 vlan_macip_lens |= ip_hlen;
772 /* No support for offloads for non-L4 next headers */
775 if (mp->m_pkthdr.csum_flags &
776 (CSUM_IP_TCP | CSUM_IP6_TCP))
777 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
782 if (mp->m_pkthdr.csum_flags &
783 (CSUM_IP_UDP | CSUM_IP6_UDP))
784 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
789 if (mp->m_pkthdr.csum_flags &
790 (CSUM_IP_SCTP | CSUM_IP6_SCTP))
791 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
800 if (offload) /* Insert L4 checksum into data descriptors */
801 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
804 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
806 /* Now copy bits into descriptor */
807 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
808 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
809 TXD->seqnum_seed = htole32(0);
810 TXD->mss_l4len_idx = htole32(0);
812 /* We've consumed the first desc, adjust counters */
813 if (++ctxd == txr->num_desc)
815 txr->next_avail_desc = ctxd;
819 } /* ixgbe_tx_ctx_setup */
821 /************************************************************************
824 * Setup work for hardware segmentation offload (TSO) on
825 * adapters using advanced tx descriptors
826 ************************************************************************/
828 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
831 struct ixgbe_adv_tx_context_desc *TXD;
832 struct ether_vlan_header *eh;
840 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
841 u32 vlan_macip_lens = 0;
842 u32 type_tucmd_mlhl = 0;
843 u32 mss_l4len_idx = 0, paylen;
844 u16 vtag = 0, eh_type;
847 * Determine where frame payload starts.
848 * Jump over vlan headers if already present
850 eh = mtod(mp, struct ether_vlan_header *);
851 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
852 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
853 eh_type = eh->evl_proto;
855 ehdrlen = ETHER_HDR_LEN;
856 eh_type = eh->evl_encap_proto;
859 switch (ntohs(eh_type)) {
862 ip = (struct ip *)(mp->m_data + ehdrlen);
863 if (ip->ip_p != IPPROTO_TCP)
866 ip_hlen = ip->ip_hl << 2;
867 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
868 th->th_sum = in_pseudo(ip->ip_src.s_addr,
869 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
870 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
871 /* Tell transmit desc to also do IPv4 checksum. */
872 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
877 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
878 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
879 if (ip6->ip6_nxt != IPPROTO_TCP)
881 ip_hlen = sizeof(struct ip6_hdr);
882 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
883 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
884 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
888 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
889 __func__, ntohs(eh_type));
893 ctxd = txr->next_avail_desc;
894 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
896 tcp_hlen = th->th_off << 2;
898 /* This is used in the transmit desc in encap */
899 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
901 /* VLAN MACLEN IPLEN */
902 if (mp->m_flags & M_VLANTAG) {
903 vtag = htole16(mp->m_pkthdr.ether_vtag);
904 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
907 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
908 vlan_macip_lens |= ip_hlen;
909 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
911 /* ADV DTYPE TUCMD */
912 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
913 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
914 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
917 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
918 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
919 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
921 TXD->seqnum_seed = htole32(0);
923 if (++ctxd == txr->num_desc)
927 txr->next_avail_desc = ctxd;
928 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
929 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
930 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
934 } /* ixgbe_tso_setup */
937 /************************************************************************
940 * Examine each tx_buffer in the used queue. If the hardware is done
941 * processing the packet then free associated resources. The
942 * tx_buffer is put back on the free queue.
943 ************************************************************************/
945 ixv_txeof(struct tx_ring *txr)
947 struct adapter *adapter = txr->adapter;
948 struct ixgbe_tx_buf *buf;
949 union ixgbe_adv_tx_desc *txd;
950 u32 work, processed = 0;
951 u32 limit = adapter->tx_process_limit;
953 mtx_assert(&txr->tx_mtx, MA_OWNED);
956 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
957 (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
958 struct netmap_adapter *na = NA(adapter->ifp);
959 struct netmap_kring *kring = &na->tx_rings[txr->me];
961 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
962 BUS_DMASYNC_POSTREAD);
964 * In netmap mode, all the work is done in the context
965 * of the client thread. Interrupt handlers only wake up
966 * clients, which may be sleeping on individual rings
967 * or on a global resource for all rings.
968 * To implement tx interrupt mitigation, we wake up the client
969 * thread roughly every half ring, even if the NIC interrupts
970 * more frequently. This is implemented as follows:
971 * - ixgbe_txsync() sets kring->nr_kflags with the index of
972 * the slot that should wake up the thread (nkr_num_slots
973 * means the user thread should not be woken up);
974 * - the driver ignores tx interrupts unless netmap_mitigate=0
975 * or the slot has the DD bit set.
977 if (!netmap_mitigate ||
978 (kring->nr_kflags < kring->nkr_num_slots &&
979 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
980 netmap_tx_irq(adapter->ifp, txr->me);
984 #endif /* DEV_NETMAP */
986 if (txr->tx_avail == txr->num_desc) {
991 /* Get work starting point */
992 work = txr->next_to_clean;
993 buf = &txr->tx_buffers[work];
994 txd = &txr->tx_base[work];
995 work -= txr->num_desc; /* The distance to ring end */
996 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
997 BUS_DMASYNC_POSTREAD);
1000 union ixgbe_adv_tx_desc *eop = buf->eop;
1001 if (eop == NULL) /* No work */
1004 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1005 break; /* I/O not complete */
1008 txr->bytes += buf->m_head->m_pkthdr.len;
1009 bus_dmamap_sync(txr->txtag, buf->map,
1010 BUS_DMASYNC_POSTWRITE);
1011 bus_dmamap_unload(txr->txtag, buf->map);
1012 m_freem(buf->m_head);
1018 /* We clean the range if multi segment */
1019 while (txd != eop) {
1023 /* wrap the ring? */
1024 if (__predict_false(!work)) {
1025 work -= txr->num_desc;
1026 buf = txr->tx_buffers;
1030 txr->bytes += buf->m_head->m_pkthdr.len;
1031 bus_dmamap_sync(txr->txtag, buf->map,
1032 BUS_DMASYNC_POSTWRITE);
1033 bus_dmamap_unload(txr->txtag, buf->map);
1034 m_freem(buf->m_head);
1044 /* Try the next packet */
1048 /* reset with a wrap */
1049 if (__predict_false(!work)) {
1050 work -= txr->num_desc;
1051 buf = txr->tx_buffers;
1055 } while (__predict_true(--limit));
1057 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1058 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1060 work += txr->num_desc;
1061 txr->next_to_clean = work;
1064 * Queue Hang detection, we know there's
1065 * work outstanding or the first return
1066 * would have been taken, so increment busy
1067 * if nothing managed to get cleaned, then
1068 * in local_timer it will be checked and
1069 * marked as HUNG if it exceeds a MAX attempt.
1071 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1074 * If anything gets cleaned we reset state to 1,
1075 * note this will turn off HUNG if its set.
1080 if (txr->tx_avail == txr->num_desc)
1086 /************************************************************************
1089 * Used to detect a descriptor that has been merged by Hardware RSC.
1090 ************************************************************************/
1092 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1094 return (le32toh(rx->wb.lower.lo_dword.data) &
1095 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1096 } /* ixgbe_rsc_count */
1098 /************************************************************************
1099 * ixgbe_setup_hw_rsc
1101 * Initialize Hardware RSC (LRO) feature on 82599
1102 * for an RX ring, this is toggled by the LRO capability
1103 * even though it is transparent to the stack.
1105 * NOTE: Since this HW feature only works with IPv4 and
1106 * testing has shown soft LRO to be as effective,
1107 * this feature will be disabled by default.
1108 ************************************************************************/
1110 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1112 struct adapter *adapter = rxr->adapter;
1113 struct ixgbe_hw *hw = &adapter->hw;
1114 u32 rscctrl, rdrxctl;
1116 /* If turning LRO/RSC off we need to disable it */
1117 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1118 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1119 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1123 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1124 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1126 /* Always strip CRC unless Netmap disabled it */
1127 if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1128 !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1130 #endif /* DEV_NETMAP */
1131 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1132 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1133 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1135 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1136 rscctrl |= IXGBE_RSCCTL_RSCEN;
1138 * Limit the total number of descriptors that
1139 * can be combined, so it does not exceed 64K
1141 if (rxr->mbuf_sz == MCLBYTES)
1142 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1143 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1144 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1145 else if (rxr->mbuf_sz == MJUM9BYTES)
1146 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1147 else /* Using 16K cluster */
1148 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1150 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1152 /* Enable TCP header recognition */
1153 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1154 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1156 /* Disable RSC for ACK packets */
1157 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1158 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1161 } /* ixgbe_setup_hw_rsc */
1163 /************************************************************************
1164 * ixgbe_refresh_mbufs
1166 * Refresh mbuf buffers for RX descriptor rings
1167 * - now keeps its own state so discards due to resource
1168 * exhaustion are unnecessary, if an mbuf cannot be obtained
1169 * it just returns, keeping its placeholder, thus it can simply
1170 * be recalled to try again.
1171 ************************************************************************/
1173 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1175 struct adapter *adapter = rxr->adapter;
1176 struct ixgbe_rx_buf *rxbuf;
1178 bus_dma_segment_t seg[1];
1179 int i, j, nsegs, error;
1180 bool refreshed = FALSE;
1182 i = j = rxr->next_to_refresh;
1183 /* Control the loop with one beyond */
1184 if (++j == rxr->num_desc)
1187 while (j != limit) {
1188 rxbuf = &rxr->rx_buffers[i];
1189 if (rxbuf->buf == NULL) {
1190 mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1194 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1195 m_adj(mp, ETHER_ALIGN);
1199 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1201 /* If we're dealing with an mbuf that was copied rather
1202 * than replaced, there's no need to go through busdma.
1204 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1205 /* Get the memory mapping */
1206 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1207 error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap,
1208 mp, seg, &nsegs, BUS_DMA_NOWAIT);
1210 printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1216 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1217 BUS_DMASYNC_PREREAD);
1218 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1219 htole64(seg[0].ds_addr);
1221 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1222 rxbuf->flags &= ~IXGBE_RX_COPY;
1226 /* Next is precalculated */
1228 rxr->next_to_refresh = i;
1229 if (++j == rxr->num_desc)
1234 if (refreshed) /* Update hardware tail index */
1235 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1238 } /* ixgbe_refresh_mbufs */
1240 /************************************************************************
1241 * ixgbe_allocate_receive_buffers
1243 * Allocate memory for rx_buffer structures. Since we use one
1244 * rx_buffer per received packet, the maximum number of rx_buffer's
1245 * that we'll need is equal to the number of receive descriptors
1246 * that we've allocated.
1247 ************************************************************************/
1249 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1251 struct adapter *adapter = rxr->adapter;
1252 device_t dev = adapter->dev;
1253 struct ixgbe_rx_buf *rxbuf;
1256 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1257 rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_IXV,
1259 if (!rxr->rx_buffers) {
1260 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1265 error = bus_dma_tag_create(
1266 /* parent */ bus_get_dma_tag(dev),
1269 /* lowaddr */ BUS_SPACE_MAXADDR,
1270 /* highaddr */ BUS_SPACE_MAXADDR,
1272 /* filterarg */ NULL,
1273 /* maxsize */ MJUM16BYTES,
1275 /* maxsegsize */ MJUM16BYTES,
1277 /* lockfunc */ NULL,
1278 /* lockfuncarg */ NULL,
1281 device_printf(dev, "Unable to create RX DMA tag\n");
1285 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1286 rxbuf = &rxr->rx_buffers[i];
1287 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1289 device_printf(dev, "Unable to create RX dma map\n");
1297 /* Frees all, but can handle partial completion */
1298 ixv_free_receive_structures(adapter);
1301 } /* ixgbe_allocate_receive_buffers */
1303 /************************************************************************
1304 * ixgbe_free_receive_ring
1305 ************************************************************************/
1307 ixgbe_free_receive_ring(struct rx_ring *rxr)
1309 struct ixgbe_rx_buf *rxbuf;
1311 for (int i = 0; i < rxr->num_desc; i++) {
1312 rxbuf = &rxr->rx_buffers[i];
1313 if (rxbuf->buf != NULL) {
1314 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1315 BUS_DMASYNC_POSTREAD);
1316 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1317 rxbuf->buf->m_flags |= M_PKTHDR;
1318 m_freem(rxbuf->buf);
1323 } /* ixgbe_free_receive_ring */
1325 /************************************************************************
1326 * ixgbe_setup_receive_ring
1328 * Initialize a receive ring and its buffers.
1329 ************************************************************************/
1331 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1333 struct adapter *adapter;
1336 struct ixgbe_rx_buf *rxbuf;
1337 struct lro_ctrl *lro = &rxr->lro;
1339 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1340 struct netmap_slot *slot;
1341 #endif /* DEV_NETMAP */
1342 bus_dma_segment_t seg[1];
1343 int rsize, nsegs, error = 0;
1345 adapter = rxr->adapter;
1349 /* Clear the ring contents */
1353 if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1354 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1355 #endif /* DEV_NETMAP */
1357 rsize = roundup2(adapter->num_rx_desc *
1358 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1359 bzero((void *)rxr->rx_base, rsize);
1360 /* Cache the size */
1361 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1363 /* Free current RX buffer structs and their mbufs */
1364 ixgbe_free_receive_ring(rxr);
1366 /* Now replenish the mbufs */
1367 for (int j = 0; j != rxr->num_desc; ++j) {
1370 rxbuf = &rxr->rx_buffers[j];
1374 * In netmap mode, fill the map and set the buffer
1375 * address in the NIC ring, considering the offset
1376 * between the netmap and NIC rings (see comment in
1377 * ixgbe_setup_transmit_ring() ). No need to allocate
1378 * an mbuf, so end the block with a continue;
1380 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1381 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1385 addr = PNMB(na, slot + sj, &paddr);
1386 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1387 /* Update descriptor and the cached value */
1388 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1389 rxbuf->addr = htole64(paddr);
1392 #endif /* DEV_NETMAP */
1395 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1396 adapter->rx_mbuf_sz);
1397 if (rxbuf->buf == NULL) {
1402 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1403 /* Get the memory mapping */
1404 error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, seg,
1405 &nsegs, BUS_DMA_NOWAIT);
1408 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD);
1409 /* Update the descriptor and the cached value */
1410 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1411 rxbuf->addr = htole64(seg[0].ds_addr);
1415 /* Setup our descriptor indices */
1416 rxr->next_to_check = 0;
1417 rxr->next_to_refresh = 0;
1418 rxr->lro_enabled = FALSE;
1421 rxr->vtag_strip = FALSE;
1423 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1424 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1427 * Now set up the LRO interface
1429 if (ixgbe_rsc_enable)
1430 ixgbe_setup_hw_rsc(rxr);
1431 else if (ifp->if_capenable & IFCAP_LRO) {
1432 int err = tcp_lro_init(lro);
1434 device_printf(dev, "LRO Initialization failed!\n");
1437 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1438 rxr->lro_enabled = TRUE;
1439 lro->ifp = adapter->ifp;
1442 IXGBE_RX_UNLOCK(rxr);
1447 ixgbe_free_receive_ring(rxr);
1448 IXGBE_RX_UNLOCK(rxr);
1451 } /* ixgbe_setup_receive_ring */
1453 /************************************************************************
1454 * ixv_setup_receive_structures - Initialize all receive rings.
1455 ************************************************************************/
1457 ixv_setup_receive_structures(struct adapter *adapter)
1459 struct rx_ring *rxr = adapter->rx_rings;
1462 for (j = 0; j < adapter->num_queues; j++, rxr++)
1463 if (ixgbe_setup_receive_ring(rxr))
1469 * Free RX buffers allocated so far, we will only handle
1470 * the rings that completed, the failing case will have
1471 * cleaned up for itself. 'j' failed, so its the terminus.
1473 for (int i = 0; i < j; ++i) {
1474 rxr = &adapter->rx_rings[i];
1475 ixgbe_free_receive_ring(rxr);
1479 } /* ixv_setup_receive_structures */
1482 /************************************************************************
1483 * ixv_free_receive_structures - Free all receive rings.
1484 ************************************************************************/
1486 ixv_free_receive_structures(struct adapter *adapter)
1488 struct rx_ring *rxr = adapter->rx_rings;
1489 struct lro_ctrl *lro;
1491 INIT_DEBUGOUT("ixv_free_receive_structures: begin");
1493 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1495 ixgbe_free_receive_buffers(rxr);
1496 /* Free LRO memory */
1498 /* Free the ring memory as well */
1499 ixgbe_dma_free(adapter, &rxr->rxdma);
1502 free(adapter->rx_rings, M_IXV);
1503 } /* ixv_free_receive_structures */
1506 /************************************************************************
1507 * ixgbe_free_receive_buffers - Free receive ring data structures
1508 ************************************************************************/
1510 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1512 struct adapter *adapter = rxr->adapter;
1513 struct ixgbe_rx_buf *rxbuf;
1515 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1517 /* Cleanup any existing buffers */
1518 if (rxr->rx_buffers != NULL) {
1519 for (int i = 0; i < adapter->num_rx_desc; i++) {
1520 rxbuf = &rxr->rx_buffers[i];
1521 if (rxbuf->buf != NULL) {
1522 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1523 BUS_DMASYNC_POSTREAD);
1524 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1525 rxbuf->buf->m_flags |= M_PKTHDR;
1526 m_freem(rxbuf->buf);
1529 if (rxbuf->pmap != NULL) {
1530 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1534 if (rxr->rx_buffers != NULL) {
1535 free(rxr->rx_buffers, M_IXV);
1536 rxr->rx_buffers = NULL;
1540 if (rxr->ptag != NULL) {
1541 bus_dma_tag_destroy(rxr->ptag);
1546 } /* ixgbe_free_receive_buffers */
1548 /************************************************************************
1550 ************************************************************************/
1551 static __inline void
1552 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1556 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1557 * should be computed by hardware. Also it should not have VLAN tag in
1558 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1560 if (rxr->lro_enabled &&
1561 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1562 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1563 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1564 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1565 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1566 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1567 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1568 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1570 * Send to the stack if:
1571 * - LRO not enabled, or
1572 * - no LRO resources, or
1573 * - lro enqueue fails
1575 if (rxr->lro.lro_cnt != 0)
1576 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1579 IXGBE_RX_UNLOCK(rxr);
1580 (*ifp->if_input)(ifp, m);
1582 } /* ixgbe_rx_input */
1584 /************************************************************************
1586 ************************************************************************/
1587 static __inline void
1588 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1590 struct ixgbe_rx_buf *rbuf;
1592 rbuf = &rxr->rx_buffers[i];
1595 * With advanced descriptors the writeback
1596 * clobbers the buffer addrs, so its easier
1597 * to just free the existing mbufs and take
1598 * the normal refresh path to get new buffers
1602 if (rbuf->fmp != NULL) {/* Partial chain ? */
1603 rbuf->fmp->m_flags |= M_PKTHDR;
1606 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1607 } else if (rbuf->buf) {
1611 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1616 } /* ixgbe_rx_discard */
1619 /************************************************************************
1622 * This routine executes in interrupt context. It replenishes
1623 * the mbufs in the descriptor and sends data which has been
1624 * dma'ed into host memory to upper layer.
1626 * Return TRUE for more work, FALSE for all clean.
1627 ************************************************************************/
1629 ixv_rxeof(struct ix_queue *que)
1631 struct adapter *adapter = que->adapter;
1632 struct rx_ring *rxr = que->rxr;
1633 struct ifnet *ifp = adapter->ifp;
1634 struct lro_ctrl *lro = &rxr->lro;
1635 #if __FreeBSD_version < 1100105
1636 struct lro_entry *queued;
1638 union ixgbe_adv_rx_desc *cur;
1639 struct ixgbe_rx_buf *rbuf, *nbuf;
1640 int i, nextp, processed = 0;
1642 u32 count = adapter->rx_process_limit;
1648 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1649 /* Same as the txeof routine: wakeup clients on intr. */
1650 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1651 IXGBE_RX_UNLOCK(rxr);
1655 #endif /* DEV_NETMAP */
1657 for (i = rxr->next_to_check; count != 0;) {
1658 struct mbuf *sendmp, *mp;
1664 /* Sync the ring. */
1665 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1666 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1668 cur = &rxr->rx_base[i];
1669 staterr = le32toh(cur->wb.upper.status_error);
1670 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1672 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1674 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1681 cur->wb.upper.status_error = 0;
1682 rbuf = &rxr->rx_buffers[i];
1685 len = le16toh(cur->wb.upper.length);
1686 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1687 IXGBE_RXDADV_PKTTYPE_MASK;
1688 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1690 /* Make sure bad packets are discarded */
1691 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1692 #if __FreeBSD_version >= 1100036
1693 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1695 rxr->rx_discarded++;
1696 ixgbe_rx_discard(rxr, i);
1701 * On 82599 which supports a hardware
1702 * LRO (called HW RSC), packets need
1703 * not be fragmented across sequential
1704 * descriptors, rather the next descriptor
1705 * is indicated in bits of the descriptor.
1706 * This also means that we might proceses
1707 * more than one packet at a time, something
1708 * that has never been true before, it
1709 * required eliminating global chain pointers
1710 * in favor of what we are doing here. -jfv
1714 * Figure out the next descriptor
1717 if (rxr->hw_rsc == TRUE) {
1718 rsc = ixgbe_rsc_count(cur);
1719 rxr->rsc_num += (rsc - 1);
1721 if (rsc) { /* Get hardware index */
1722 nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1723 IXGBE_RXDADV_NEXTP_SHIFT);
1724 } else { /* Just sequential */
1726 if (nextp == adapter->num_rx_desc)
1729 nbuf = &rxr->rx_buffers[nextp];
1733 * Rather than using the fmp/lmp global pointers
1734 * we now keep the head of a packet chain in the
1735 * buffer struct and pass this along from one
1736 * descriptor to the next, until we get EOP.
1740 * See if there is a stored head
1741 * that determines what we are
1744 if (sendmp != NULL) { /* secondary frag */
1745 rbuf->buf = rbuf->fmp = NULL;
1746 mp->m_flags &= ~M_PKTHDR;
1747 sendmp->m_pkthdr.len += mp->m_len;
1750 * Optimize. This might be a small packet,
1751 * maybe just a TCP ACK. Do a fast copy that
1752 * is cache aligned into a new mbuf, and
1753 * leave the old mbuf+cluster for re-use.
1755 if (eop && len <= IXGBE_RX_COPY_LEN) {
1756 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1757 if (sendmp != NULL) {
1758 sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1759 ixv_bcopy(mp->m_data, sendmp->m_data,
1761 sendmp->m_len = len;
1763 rbuf->flags |= IXGBE_RX_COPY;
1766 if (sendmp == NULL) {
1767 rbuf->buf = rbuf->fmp = NULL;
1771 /* first desc of a non-ps chain */
1772 sendmp->m_flags |= M_PKTHDR;
1773 sendmp->m_pkthdr.len = mp->m_len;
1777 /* Pass the head pointer on */
1781 mp->m_next = nbuf->buf;
1782 } else { /* Sending this frame */
1783 sendmp->m_pkthdr.rcvif = ifp;
1785 /* capture data for AIM */
1786 rxr->bytes += sendmp->m_pkthdr.len;
1787 rxr->rx_bytes += sendmp->m_pkthdr.len;
1788 /* Process vlan info */
1789 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1790 vtag = le16toh(cur->wb.upper.vlan);
1792 sendmp->m_pkthdr.ether_vtag = vtag;
1793 sendmp->m_flags |= M_VLANTAG;
1795 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1796 ixgbe_rx_checksum(staterr, sendmp, ptype);
1799 * In case of multiqueue, we have RXCSUM.PCSD bit set
1800 * and never cleared. This means we have RSS hash
1801 * available to be used.
1803 if (adapter->num_queues > 1) {
1804 sendmp->m_pkthdr.flowid =
1805 le32toh(cur->wb.lower.hi_dword.rss);
1806 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1807 case IXGBE_RXDADV_RSSTYPE_IPV4:
1808 M_HASHTYPE_SET(sendmp,
1809 M_HASHTYPE_RSS_IPV4);
1811 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1812 M_HASHTYPE_SET(sendmp,
1813 M_HASHTYPE_RSS_TCP_IPV4);
1815 case IXGBE_RXDADV_RSSTYPE_IPV6:
1816 M_HASHTYPE_SET(sendmp,
1817 M_HASHTYPE_RSS_IPV6);
1819 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1820 M_HASHTYPE_SET(sendmp,
1821 M_HASHTYPE_RSS_TCP_IPV6);
1823 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1824 M_HASHTYPE_SET(sendmp,
1825 M_HASHTYPE_RSS_IPV6_EX);
1827 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1828 M_HASHTYPE_SET(sendmp,
1829 M_HASHTYPE_RSS_TCP_IPV6_EX);
1831 #if __FreeBSD_version > 1100000
1832 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1833 M_HASHTYPE_SET(sendmp,
1834 M_HASHTYPE_RSS_UDP_IPV4);
1836 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1837 M_HASHTYPE_SET(sendmp,
1838 M_HASHTYPE_RSS_UDP_IPV6);
1840 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1841 M_HASHTYPE_SET(sendmp,
1842 M_HASHTYPE_RSS_UDP_IPV6_EX);
1846 #if __FreeBSD_version < 1100116
1847 M_HASHTYPE_SET(sendmp,
1850 M_HASHTYPE_SET(sendmp,
1851 M_HASHTYPE_OPAQUE_HASH);
1855 sendmp->m_pkthdr.flowid = que->msix;
1856 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1860 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1861 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1863 /* Advance our pointers to the next descriptor. */
1864 if (++i == rxr->num_desc)
1867 /* Now send to the stack or do LRO */
1868 if (sendmp != NULL) {
1869 rxr->next_to_check = i;
1870 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1871 i = rxr->next_to_check;
1874 /* Every 8 descriptors we go to refresh mbufs */
1875 if (processed == 8) {
1876 ixgbe_refresh_mbufs(rxr, i);
1881 /* Refresh any remaining buf structs */
1882 if (ixgbe_rx_unrefreshed(rxr))
1883 ixgbe_refresh_mbufs(rxr, i);
1885 rxr->next_to_check = i;
1888 * Flush any outstanding LRO work
1890 #if __FreeBSD_version < 1100105
1891 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1892 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1893 tcp_lro_flush(lro, queued);
1896 tcp_lro_flush_all(lro);
1899 IXGBE_RX_UNLOCK(rxr);
1902 * Still have cleaning to do?
1904 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1911 /************************************************************************
1914 * Verify that the hardware indicated that the checksum is valid.
1915 * Inform the stack about the status of checksum so that stack
1916 * doesn't spend time verifying the checksum.
1917 ************************************************************************/
1919 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
1921 u16 status = (u16)staterr;
1922 u8 errors = (u8)(staterr >> 24);
1925 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1926 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
1930 if (status & IXGBE_RXD_STAT_IPCS) {
1931 mp->m_pkthdr.csum_flags |= CSUM_L3_CALC;
1932 /* IP Checksum Good */
1933 if (!(errors & IXGBE_RXD_ERR_IPE))
1934 mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
1936 /* TCP/UDP/SCTP checksum */
1937 if (status & IXGBE_RXD_STAT_L4CS) {
1938 mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
1939 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
1940 mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
1942 mp->m_pkthdr.csum_data = htons(0xffff);
1945 } /* ixgbe_rx_checksum */
1947 /************************************************************************
1948 * ixgbe_dmamap_cb - Manage DMA'able memory.
1949 ************************************************************************/
1951 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
1955 *(bus_addr_t *)arg = segs->ds_addr;
1958 } /* ixgbe_dmamap_cb */
1960 /************************************************************************
1962 ************************************************************************/
1964 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
1965 struct ixgbe_dma_alloc *dma, int mapflags)
1967 device_t dev = adapter->dev;
1970 r = bus_dma_tag_create(
1971 /* parent */ bus_get_dma_tag(adapter->dev),
1972 /* alignment */ DBA_ALIGN,
1974 /* lowaddr */ BUS_SPACE_MAXADDR,
1975 /* highaddr */ BUS_SPACE_MAXADDR,
1977 /* filterarg */ NULL,
1980 /* maxsegsize */ size,
1981 /* flags */ BUS_DMA_ALLOCNOW,
1982 /* lockfunc */ NULL,
1983 /* lockfuncarg */ NULL,
1987 "ixgbe_dma_malloc: bus_dma_tag_create failed; error %u\n",
1991 r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
1992 BUS_DMA_NOWAIT, &dma->dma_map);
1995 "ixgbe_dma_malloc: bus_dmamem_alloc failed; error %u\n", r);
1998 r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size,
1999 ixgbe_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2002 "ixgbe_dma_malloc: bus_dmamap_load failed; error %u\n", r);
2005 dma->dma_size = size;
2009 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2011 bus_dma_tag_destroy(dma->dma_tag);
2013 dma->dma_tag = NULL;
2016 } /* ixgbe_dma_malloc */
2019 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2021 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2022 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2023 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2024 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2025 bus_dma_tag_destroy(dma->dma_tag);
2026 } /* ixgbe_dma_free */
2029 /************************************************************************
2030 * ixv_allocate_queues
2032 * Allocate memory for the transmit and receive rings, and then
2033 * the descriptors associated with each, called only once at attach.
2034 ************************************************************************/
2036 ixv_allocate_queues(struct adapter *adapter)
2038 device_t dev = adapter->dev;
2039 struct ix_queue *que;
2040 struct tx_ring *txr;
2041 struct rx_ring *rxr;
2042 int rsize, tsize, error = IXGBE_SUCCESS;
2043 int txconf = 0, rxconf = 0;
2045 /* First, allocate the top level queue structs */
2046 adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2047 adapter->num_queues, M_IXV, M_NOWAIT | M_ZERO);
2048 if (!adapter->queues) {
2049 device_printf(dev, "Unable to allocate queue memory\n");
2054 /* Second, allocate the TX ring struct memory */
2055 adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2056 adapter->num_queues, M_IXV, M_NOWAIT | M_ZERO);
2057 if (!adapter->tx_rings) {
2058 device_printf(dev, "Unable to allocate TX ring memory\n");
2063 /* Third, allocate the RX ring */
2064 adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2065 adapter->num_queues, M_IXV, M_NOWAIT | M_ZERO);
2066 if (!adapter->rx_rings) {
2067 device_printf(dev, "Unable to allocate RX ring memory\n");
2072 /* For the ring itself */
2073 tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2077 * Now set up the TX queues, txconf is needed to handle the
2078 * possibility that things fail midcourse and we need to
2079 * undo memory gracefully
2081 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2082 /* Set up some basics */
2083 txr = &adapter->tx_rings[i];
2084 txr->adapter = adapter;
2087 txr->num_desc = adapter->num_tx_desc;
2089 /* Initialize the TX side lock */
2090 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2091 device_get_nameunit(dev), txr->me);
2092 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2094 if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2097 "Unable to allocate TX Descriptor memory\n");
2101 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2102 bzero((void *)txr->tx_base, tsize);
2104 /* Now allocate transmit buffers for the ring */
2105 if (ixgbe_allocate_transmit_buffers(txr)) {
2107 "Critical Failure setting up transmit buffers\n");
2111 if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2112 /* Allocate a buf ring */
2113 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_IXV,
2114 M_WAITOK, &txr->tx_mtx);
2115 if (txr->br == NULL) {
2117 "Critical Failure setting up buf ring\n");
2125 * Next the RX queues...
2127 rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2129 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2130 rxr = &adapter->rx_rings[i];
2131 /* Set up some basics */
2132 rxr->adapter = adapter;
2134 rxr->num_desc = adapter->num_rx_desc;
2136 /* Initialize the RX side lock */
2137 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2138 device_get_nameunit(dev), rxr->me);
2139 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2141 if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2144 "Unable to allocate RxDescriptor memory\n");
2148 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2149 bzero((void *)rxr->rx_base, rsize);
2151 /* Allocate receive buffers for the ring */
2152 if (ixgbe_allocate_receive_buffers(rxr)) {
2154 "Critical Failure setting up receive buffers\n");
2161 * Finally set up the queue holding structs
2163 for (int i = 0; i < adapter->num_queues; i++) {
2164 que = &adapter->queues[i];
2165 que->adapter = adapter;
2167 que->txr = &adapter->tx_rings[i];
2168 que->rxr = &adapter->rx_rings[i];
2174 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2175 ixgbe_dma_free(adapter, &rxr->rxdma);
2177 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2178 ixgbe_dma_free(adapter, &txr->txdma);
2179 free(adapter->rx_rings, M_IXV);
2181 free(adapter->tx_rings, M_IXV);
2183 free(adapter->queues, M_IXV);
2186 } /* ixv_allocate_queues */