1 /******************************************************************************
3 Copyright (c) 2001-2017, Intel Corporation
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
32 ******************************************************************************/
36 #ifndef IXGBE_STANDALONE_BUILD
38 #include "opt_inet6.h"
43 extern int ix_crcstrip;
47 * this feature only works with
48 * IPv4, and only on 82599 and later.
49 * Also this will cause IP forwarding to
50 * fail and that can't be controlled by
51 * the stack as LRO can. For all these
52 * reasons I've deemed it best to leave
53 * this off and not bother with a tuneable
54 * interface, this would need to be compiled
57 static bool ixgbe_rsc_enable = FALSE;
60 * For Flow Director: this is the
61 * number of TX packets we sample
62 * for the filter pool, this means
63 * every 20th packet will be probed.
65 * This feature can be disabled by
68 static int atr_sample_rate = 20;
70 /************************************************************************
71 * Local Function prototypes
72 ************************************************************************/
73 static void ixgbe_setup_transmit_ring(struct tx_ring *);
74 static void ixgbe_free_transmit_buffers(struct tx_ring *);
75 static int ixgbe_setup_receive_ring(struct rx_ring *);
76 static void ixgbe_free_receive_buffers(struct rx_ring *);
77 static void ixgbe_rx_checksum(u32, struct mbuf *, u32);
78 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
79 static int ixgbe_xmit(struct tx_ring *, struct mbuf **);
80 static int ixgbe_tx_ctx_setup(struct tx_ring *,
81 struct mbuf *, u32 *, u32 *);
82 static int ixgbe_tso_setup(struct tx_ring *,
83 struct mbuf *, u32 *, u32 *);
84 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
85 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
87 static int ixgbe_dma_malloc(struct adapter *, bus_size_t,
88 struct ixgbe_dma_alloc *, int);
89 static void ixgbe_dma_free(struct adapter *, struct ixgbe_dma_alloc *);
91 MALLOC_DECLARE(M_IXGBE);
93 /************************************************************************
94 * ixgbe_legacy_start_locked - Transmit entry point
96 * Called by the stack to initiate a transmit.
97 * The driver will remain in this routine as long as there are
98 * packets to transmit and transmit resources are available.
99 * In case resources are not available, the stack is notified
100 * and the packet is requeued.
101 ************************************************************************/
103 ixgbe_legacy_start_locked(struct ifnet *ifp, struct tx_ring *txr)
106 struct adapter *adapter = txr->adapter;
108 IXGBE_TX_LOCK_ASSERT(txr);
110 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
112 if (!adapter->link_active)
115 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
116 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
119 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
123 if (ixgbe_xmit(txr, &m_head)) {
125 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
128 /* Send a copy of the frame to the BPF listener */
129 ETHER_BPF_MTAP(ifp, m_head);
132 return IXGBE_SUCCESS;
133 } /* ixgbe_legacy_start_locked */
135 /************************************************************************
138 * Called by the stack, this always uses the first tx ring,
139 * and should not be used with multiqueue tx enabled.
140 ************************************************************************/
142 ixgbe_legacy_start(struct ifnet *ifp)
144 struct adapter *adapter = ifp->if_softc;
145 struct tx_ring *txr = adapter->tx_rings;
147 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
149 ixgbe_legacy_start_locked(ifp, txr);
150 IXGBE_TX_UNLOCK(txr);
152 } /* ixgbe_legacy_start */
154 /************************************************************************
155 * ixgbe_mq_start - Multiqueue Transmit Entry Point
157 * (if_transmit function)
158 ************************************************************************/
160 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
162 struct adapter *adapter = ifp->if_softc;
163 struct ix_queue *que;
169 * When doing RSS, map it to the same outbound queue
170 * as the incoming flow would be mapped to.
172 * If everything is setup correctly, it should be the
173 * same bucket that the current CPU we're on is.
175 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
176 if ((adapter->feat_en & IXGBE_FEATURE_RSS) &&
177 (rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
179 i = bucket_id % adapter->num_queues;
181 if (bucket_id > adapter->num_queues)
183 "bucket_id (%d) > num_queues (%d)\n",
184 bucket_id, adapter->num_queues);
187 i = m->m_pkthdr.flowid % adapter->num_queues;
189 i = curcpu % adapter->num_queues;
191 /* Check for a hung queue and pick alternative */
192 if (((1 << i) & adapter->active_queues) == 0)
193 i = ffsl(adapter->active_queues);
195 txr = &adapter->tx_rings[i];
196 que = &adapter->queues[i];
198 err = drbr_enqueue(ifp, txr->br, m);
201 if (IXGBE_TX_TRYLOCK(txr)) {
202 ixgbe_mq_start_locked(ifp, txr);
203 IXGBE_TX_UNLOCK(txr);
205 taskqueue_enqueue(que->tq, &txr->txq_task);
208 } /* ixgbe_mq_start */
210 /************************************************************************
211 * ixgbe_mq_start_locked
212 ************************************************************************/
214 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
217 int enqueued = 0, err = 0;
219 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
221 if (!txr->adapter->link_active)
224 /* Process the queue */
225 #if __FreeBSD_version < 901504
226 next = drbr_dequeue(ifp, txr->br);
227 while (next != NULL) {
228 err = ixgbe_xmit(txr, &next);
231 err = drbr_enqueue(ifp, txr->br, next);
233 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
234 err = ixgbe_xmit(txr, &next);
237 drbr_advance(ifp, txr->br);
239 drbr_putback(ifp, txr->br, next);
243 #if __FreeBSD_version >= 901504
244 drbr_advance(ifp, txr->br);
247 /* Send a copy of the frame to the BPF listener */
248 ETHER_BPF_MTAP(ifp, next);
249 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
251 #if __FreeBSD_version < 901504
252 next = drbr_dequeue(ifp, txr->br);
256 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD(txr->adapter))
260 } /* ixgbe_mq_start_locked */
262 /************************************************************************
263 * ixgbe_deferred_mq_start
265 * Called from a taskqueue to drain queued transmit packets.
266 ************************************************************************/
268 ixgbe_deferred_mq_start(void *arg, int pending)
270 struct tx_ring *txr = arg;
271 struct adapter *adapter = txr->adapter;
272 struct ifnet *ifp = adapter->ifp;
275 if (!drbr_empty(ifp, txr->br))
276 ixgbe_mq_start_locked(ifp, txr);
277 IXGBE_TX_UNLOCK(txr);
278 } /* ixgbe_deferred_mq_start */
280 /************************************************************************
281 * ixgbe_qflush - Flush all ring buffers
282 ************************************************************************/
284 ixgbe_qflush(struct ifnet *ifp)
286 struct adapter *adapter = ifp->if_softc;
287 struct tx_ring *txr = adapter->tx_rings;
290 for (int i = 0; i < adapter->num_queues; i++, txr++) {
292 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
294 IXGBE_TX_UNLOCK(txr);
300 /************************************************************************
303 * This routine maps the mbufs to tx descriptors, allowing the
304 * TX engine to transmit the packets.
306 * Return 0 on success, positive on failure
307 ************************************************************************/
309 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
311 struct adapter *adapter = txr->adapter;
312 struct ixgbe_tx_buf *txbuf;
313 union ixgbe_adv_tx_desc *txd = NULL;
315 int i, j, error, nsegs;
317 u32 olinfo_status = 0, cmd_type_len;
319 bus_dma_segment_t segs[adapter->num_segs];
324 /* Basic descriptor defines */
325 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
326 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
328 if (m_head->m_flags & M_VLANTAG)
329 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
332 * Important to capture the first descriptor
333 * used because it will contain the index of
334 * the one we tell the hardware to report back
336 first = txr->next_avail_desc;
337 txbuf = &txr->tx_buffers[first];
341 * Map the packet for DMA.
344 error = bus_dmamap_load_mbuf_sg(txr->txtag, map, *m_headp, segs,
345 &nsegs, BUS_DMA_NOWAIT);
347 if (__predict_false(error)) {
352 /* Try it again? - one try */
356 * XXX: m_defrag will choke on
357 * non-MCLBYTES-sized clusters
359 m = m_defrag(*m_headp, M_NOWAIT);
361 adapter->mbuf_defrag_failed++;
371 txr->no_tx_dma_setup++;
374 txr->no_tx_dma_setup++;
381 /* Make certain there are enough descriptors */
382 if (txr->tx_avail < (nsegs + 2)) {
383 txr->no_desc_avail++;
384 bus_dmamap_unload(txr->txtag, map);
390 * Set up the appropriate offload context
391 * this will consume the first descriptor
393 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
394 if (__predict_false(error)) {
395 if (error == ENOBUFS)
400 /* Do the flow director magic */
401 if ((adapter->feat_en & IXGBE_FEATURE_FDIR) &&
402 (txr->atr_sample) && (!adapter->fdir_reinit)) {
404 if (txr->atr_count >= atr_sample_rate) {
405 ixgbe_atr(txr, m_head);
410 olinfo_status |= IXGBE_ADVTXD_CC;
411 i = txr->next_avail_desc;
412 for (j = 0; j < nsegs; j++) {
416 txbuf = &txr->tx_buffers[i];
417 txd = &txr->tx_base[i];
418 seglen = segs[j].ds_len;
419 segaddr = htole64(segs[j].ds_addr);
421 txd->read.buffer_addr = segaddr;
422 txd->read.cmd_type_len = htole32(txr->txd_cmd |
423 cmd_type_len | seglen);
424 txd->read.olinfo_status = htole32(olinfo_status);
426 if (++i == txr->num_desc)
430 txd->read.cmd_type_len |= htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
431 txr->tx_avail -= nsegs;
432 txr->next_avail_desc = i;
434 txbuf->m_head = m_head;
436 * Here we swap the map so the last descriptor,
437 * which gets the completion interrupt has the
438 * real map, and the first descriptor gets the
439 * unused map from this descriptor.
441 txr->tx_buffers[first].map = txbuf->map;
443 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
445 /* Set the EOP descriptor that will be marked done */
446 txbuf = &txr->tx_buffers[first];
449 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
450 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
452 * Advance the Transmit Descriptor Tail (Tdt), this tells the
453 * hardware that this frame is available to transmit.
455 ++txr->total_packets;
456 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
458 /* Mark queue as having work */
466 /************************************************************************
467 * ixgbe_allocate_transmit_buffers
469 * Allocate memory for tx_buffer structures. The tx_buffer stores all
470 * the information needed to transmit a packet on the wire. This is
471 * called only once at attach, setup is done every reset.
472 ************************************************************************/
474 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
476 struct adapter *adapter = txr->adapter;
477 device_t dev = adapter->dev;
478 struct ixgbe_tx_buf *txbuf;
482 * Setup DMA descriptor areas.
484 error = bus_dma_tag_create(
485 /* parent */ bus_get_dma_tag(adapter->dev),
488 /* lowaddr */ BUS_SPACE_MAXADDR,
489 /* highaddr */ BUS_SPACE_MAXADDR,
491 /* filterarg */ NULL,
492 /* maxsize */ IXGBE_TSO_SIZE,
493 /* nsegments */ adapter->num_segs,
494 /* maxsegsize */ PAGE_SIZE,
497 /* lockfuncarg */ NULL,
500 device_printf(dev, "Unable to allocate TX DMA tag\n");
505 (struct ixgbe_tx_buf *)malloc(sizeof(struct ixgbe_tx_buf) *
506 adapter->num_tx_desc, M_IXGBE, M_NOWAIT | M_ZERO);
507 if (!txr->tx_buffers) {
508 device_printf(dev, "Unable to allocate tx_buffer memory\n");
513 /* Create the descriptor buffer dma maps */
514 txbuf = txr->tx_buffers;
515 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
516 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
518 device_printf(dev, "Unable to create TX DMA map\n");
525 /* We free all, it handles case where we are in the middle */
526 ixgbe_free_transmit_structures(adapter);
529 } /* ixgbe_allocate_transmit_buffers */
531 /************************************************************************
533 * Initialize a transmit ring.
535 ************************************************************************/
537 ixgbe_setup_transmit_ring(struct tx_ring *txr)
539 struct adapter *adapter = txr->adapter;
540 struct ixgbe_tx_buf *txbuf;
542 struct netmap_adapter *na = NA(adapter->ifp);
543 struct netmap_slot *slot;
544 #endif /* DEV_NETMAP */
546 /* Clear the old ring contents */
550 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
552 * (under lock): if in netmap mode, do some consistency
553 * checks and set slot to entry 0 of the netmap ring.
555 slot = netmap_reset(na, NR_TX, txr->me, 0);
557 #endif /* DEV_NETMAP */
559 bzero((void *)txr->tx_base,
560 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
562 txr->next_avail_desc = 0;
563 txr->next_to_clean = 0;
565 /* Free any existing tx buffers. */
566 txbuf = txr->tx_buffers;
567 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
568 if (txbuf->m_head != NULL) {
569 bus_dmamap_sync(txr->txtag, txbuf->map,
570 BUS_DMASYNC_POSTWRITE);
571 bus_dmamap_unload(txr->txtag, txbuf->map);
572 m_freem(txbuf->m_head);
573 txbuf->m_head = NULL;
578 * In netmap mode, set the map for the packet buffer.
579 * NOTE: Some drivers (not this one) also need to set
580 * the physical buffer address in the NIC ring.
581 * Slots in the netmap ring (indexed by "si") are
582 * kring->nkr_hwofs positions "ahead" wrt the
583 * corresponding slot in the NIC ring. In some drivers
584 * (not here) nkr_hwofs can be negative. Function
585 * netmap_idx_n2k() handles wraparounds properly.
587 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
588 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
589 netmap_load_map(na, txr->txtag,
590 txbuf->map, NMB(na, slot + si));
592 #endif /* DEV_NETMAP */
594 /* Clear the EOP descriptor pointer */
598 /* Set the rate at which we sample packets */
599 if (adapter->feat_en & IXGBE_FEATURE_FDIR)
600 txr->atr_sample = atr_sample_rate;
602 /* Set number of descriptors available */
603 txr->tx_avail = adapter->num_tx_desc;
605 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
606 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
607 IXGBE_TX_UNLOCK(txr);
608 } /* ixgbe_setup_transmit_ring */
610 /************************************************************************
611 * ixgbe_setup_transmit_structures - Initialize all transmit rings.
612 ************************************************************************/
614 ixgbe_setup_transmit_structures(struct adapter *adapter)
616 struct tx_ring *txr = adapter->tx_rings;
618 for (int i = 0; i < adapter->num_queues; i++, txr++)
619 ixgbe_setup_transmit_ring(txr);
622 } /* ixgbe_setup_transmit_structures */
624 /************************************************************************
625 * ixgbe_free_transmit_structures - Free all transmit rings.
626 ************************************************************************/
628 ixgbe_free_transmit_structures(struct adapter *adapter)
630 struct tx_ring *txr = adapter->tx_rings;
632 for (int i = 0; i < adapter->num_queues; i++, txr++) {
634 ixgbe_free_transmit_buffers(txr);
635 ixgbe_dma_free(adapter, &txr->txdma);
636 IXGBE_TX_UNLOCK(txr);
637 IXGBE_TX_LOCK_DESTROY(txr);
639 free(adapter->tx_rings, M_IXGBE);
640 } /* ixgbe_free_transmit_structures */
642 /************************************************************************
643 * ixgbe_free_transmit_buffers
645 * Free transmit ring related data structures.
646 ************************************************************************/
648 ixgbe_free_transmit_buffers(struct tx_ring *txr)
650 struct adapter *adapter = txr->adapter;
651 struct ixgbe_tx_buf *tx_buffer;
654 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
656 if (txr->tx_buffers == NULL)
659 tx_buffer = txr->tx_buffers;
660 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
661 if (tx_buffer->m_head != NULL) {
662 bus_dmamap_sync(txr->txtag, tx_buffer->map,
663 BUS_DMASYNC_POSTWRITE);
664 bus_dmamap_unload(txr->txtag, tx_buffer->map);
665 m_freem(tx_buffer->m_head);
666 tx_buffer->m_head = NULL;
667 if (tx_buffer->map != NULL) {
668 bus_dmamap_destroy(txr->txtag, tx_buffer->map);
669 tx_buffer->map = NULL;
671 } else if (tx_buffer->map != NULL) {
672 bus_dmamap_unload(txr->txtag, tx_buffer->map);
673 bus_dmamap_destroy(txr->txtag, tx_buffer->map);
674 tx_buffer->map = NULL;
678 buf_ring_free(txr->br, M_IXGBE);
679 if (txr->tx_buffers != NULL) {
680 free(txr->tx_buffers, M_IXGBE);
681 txr->tx_buffers = NULL;
683 if (txr->txtag != NULL) {
684 bus_dma_tag_destroy(txr->txtag);
687 } /* ixgbe_free_transmit_buffers */
689 /************************************************************************
692 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
693 ************************************************************************/
695 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
696 u32 *cmd_type_len, u32 *olinfo_status)
698 struct ixgbe_adv_tx_context_desc *TXD;
699 struct ether_vlan_header *eh;
706 int ehdrlen, ip_hlen = 0;
708 int ctxd = txr->next_avail_desc;
709 u32 vlan_macip_lens = 0;
710 u32 type_tucmd_mlhl = 0;
717 /* First check if TSO is to be used */
718 if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO | CSUM_IP6_TSO))
719 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
721 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
724 /* Indicate the whole packet as payload when not doing TSO */
725 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
727 /* Now ready a context descriptor */
728 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
731 * In advanced descriptors the vlan tag must
732 * be placed into the context descriptor. Hence
733 * we need to make one even if not doing offloads.
735 if (mp->m_flags & M_VLANTAG) {
736 vtag = htole16(mp->m_pkthdr.ether_vtag);
737 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
738 } else if (!IXGBE_IS_X550VF(txr->adapter) && (offload == FALSE))
742 * Determine where frame payload starts.
743 * Jump over vlan headers if already present,
744 * helpful for QinQ too.
746 eh = mtod(mp, struct ether_vlan_header *);
747 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
748 etype = ntohs(eh->evl_proto);
749 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
751 etype = ntohs(eh->evl_encap_proto);
752 ehdrlen = ETHER_HDR_LEN;
755 /* Set the ether header length */
756 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
758 if (offload == FALSE)
762 * If the first mbuf only includes the ethernet header,
763 * jump to the next one
764 * XXX: This assumes the stack splits mbufs containing headers
765 * on header boundaries
766 * XXX: And assumes the entire IP header is contained in one mbuf
768 if (mp->m_len == ehdrlen && mp->m_next)
769 l3d = mtod(mp->m_next, caddr_t);
771 l3d = mtod(mp, caddr_t) + ehdrlen;
776 ip = (struct ip *)(l3d);
777 ip_hlen = ip->ip_hl << 2;
779 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
780 /* Insert IPv4 checksum into data descriptors */
781 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
783 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
789 ip6 = (struct ip6_hdr *)(l3d);
790 ip_hlen = sizeof(struct ip6_hdr);
791 ipproto = ip6->ip6_nxt;
792 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
800 vlan_macip_lens |= ip_hlen;
802 /* No support for offloads for non-L4 next headers */
805 if (mp->m_pkthdr.csum_flags &
806 (CSUM_IP_TCP | CSUM_IP6_TCP))
807 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
812 if (mp->m_pkthdr.csum_flags &
813 (CSUM_IP_UDP | CSUM_IP6_UDP))
814 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
819 if (mp->m_pkthdr.csum_flags &
820 (CSUM_IP_SCTP | CSUM_IP6_SCTP))
821 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
830 if (offload) /* Insert L4 checksum into data descriptors */
831 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
834 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
836 /* Now copy bits into descriptor */
837 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
838 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
839 TXD->seqnum_seed = htole32(0);
840 TXD->mss_l4len_idx = htole32(0);
842 /* We've consumed the first desc, adjust counters */
843 if (++ctxd == txr->num_desc)
845 txr->next_avail_desc = ctxd;
849 } /* ixgbe_tx_ctx_setup */
851 /************************************************************************
854 * Setup work for hardware segmentation offload (TSO) on
855 * adapters using advanced tx descriptors
856 ************************************************************************/
858 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp, u32 *cmd_type_len,
861 struct ixgbe_adv_tx_context_desc *TXD;
862 struct ether_vlan_header *eh;
870 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
871 u32 vlan_macip_lens = 0;
872 u32 type_tucmd_mlhl = 0;
873 u32 mss_l4len_idx = 0, paylen;
874 u16 vtag = 0, eh_type;
877 * Determine where frame payload starts.
878 * Jump over vlan headers if already present
880 eh = mtod(mp, struct ether_vlan_header *);
881 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
882 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
883 eh_type = eh->evl_proto;
885 ehdrlen = ETHER_HDR_LEN;
886 eh_type = eh->evl_encap_proto;
889 switch (ntohs(eh_type)) {
892 ip = (struct ip *)(mp->m_data + ehdrlen);
893 if (ip->ip_p != IPPROTO_TCP)
896 ip_hlen = ip->ip_hl << 2;
897 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
898 th->th_sum = in_pseudo(ip->ip_src.s_addr,
899 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
900 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
901 /* Tell transmit desc to also do IPv4 checksum. */
902 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
907 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
908 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
909 if (ip6->ip6_nxt != IPPROTO_TCP)
911 ip_hlen = sizeof(struct ip6_hdr);
912 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
913 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
914 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
918 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
919 __func__, ntohs(eh_type));
923 ctxd = txr->next_avail_desc;
924 TXD = (struct ixgbe_adv_tx_context_desc *)&txr->tx_base[ctxd];
926 tcp_hlen = th->th_off << 2;
928 /* This is used in the transmit desc in encap */
929 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
931 /* VLAN MACLEN IPLEN */
932 if (mp->m_flags & M_VLANTAG) {
933 vtag = htole16(mp->m_pkthdr.ether_vtag);
934 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
937 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
938 vlan_macip_lens |= ip_hlen;
939 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
941 /* ADV DTYPE TUCMD */
942 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
943 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
944 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
947 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
948 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
949 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
951 TXD->seqnum_seed = htole32(0);
953 if (++ctxd == txr->num_desc)
957 txr->next_avail_desc = ctxd;
958 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
959 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
960 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
964 } /* ixgbe_tso_setup */
967 /************************************************************************
970 * Examine each tx_buffer in the used queue. If the hardware is done
971 * processing the packet then free associated resources. The
972 * tx_buffer is put back on the free queue.
973 ************************************************************************/
975 ixgbe_txeof(struct tx_ring *txr)
977 struct adapter *adapter = txr->adapter;
978 struct ixgbe_tx_buf *buf;
979 union ixgbe_adv_tx_desc *txd;
980 u32 work, processed = 0;
981 u32 limit = adapter->tx_process_limit;
983 mtx_assert(&txr->tx_mtx, MA_OWNED);
986 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) &&
987 (adapter->ifp->if_capenable & IFCAP_NETMAP)) {
988 struct netmap_adapter *na = NA(adapter->ifp);
989 struct netmap_kring *kring = &na->tx_rings[txr->me];
991 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
992 BUS_DMASYNC_POSTREAD);
994 * In netmap mode, all the work is done in the context
995 * of the client thread. Interrupt handlers only wake up
996 * clients, which may be sleeping on individual rings
997 * or on a global resource for all rings.
998 * To implement tx interrupt mitigation, we wake up the client
999 * thread roughly every half ring, even if the NIC interrupts
1000 * more frequently. This is implemented as follows:
1001 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1002 * the slot that should wake up the thread (nkr_num_slots
1003 * means the user thread should not be woken up);
1004 * - the driver ignores tx interrupts unless netmap_mitigate=0
1005 * or the slot has the DD bit set.
1007 if (!netmap_mitigate ||
1008 (kring->nr_kflags < kring->nkr_num_slots &&
1009 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1010 netmap_tx_irq(adapter->ifp, txr->me);
1014 #endif /* DEV_NETMAP */
1016 if (txr->tx_avail == txr->num_desc) {
1021 /* Get work starting point */
1022 work = txr->next_to_clean;
1023 buf = &txr->tx_buffers[work];
1024 txd = &txr->tx_base[work];
1025 work -= txr->num_desc; /* The distance to ring end */
1026 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1027 BUS_DMASYNC_POSTREAD);
1030 union ixgbe_adv_tx_desc *eop = buf->eop;
1031 if (eop == NULL) /* No work */
1034 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1035 break; /* I/O not complete */
1038 txr->bytes += buf->m_head->m_pkthdr.len;
1039 bus_dmamap_sync(txr->txtag, buf->map,
1040 BUS_DMASYNC_POSTWRITE);
1041 bus_dmamap_unload(txr->txtag, buf->map);
1042 m_freem(buf->m_head);
1048 /* We clean the range if multi segment */
1049 while (txd != eop) {
1053 /* wrap the ring? */
1054 if (__predict_false(!work)) {
1055 work -= txr->num_desc;
1056 buf = txr->tx_buffers;
1060 txr->bytes += buf->m_head->m_pkthdr.len;
1061 bus_dmamap_sync(txr->txtag, buf->map,
1062 BUS_DMASYNC_POSTWRITE);
1063 bus_dmamap_unload(txr->txtag, buf->map);
1064 m_freem(buf->m_head);
1074 /* Try the next packet */
1078 /* reset with a wrap */
1079 if (__predict_false(!work)) {
1080 work -= txr->num_desc;
1081 buf = txr->tx_buffers;
1085 } while (__predict_true(--limit));
1087 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1088 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1090 work += txr->num_desc;
1091 txr->next_to_clean = work;
1094 * Queue Hang detection, we know there's
1095 * work outstanding or the first return
1096 * would have been taken, so increment busy
1097 * if nothing managed to get cleaned, then
1098 * in local_timer it will be checked and
1099 * marked as HUNG if it exceeds a MAX attempt.
1101 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1104 * If anything gets cleaned we reset state to 1,
1105 * note this will turn off HUNG if its set.
1110 if (txr->tx_avail == txr->num_desc)
1116 /************************************************************************
1119 * Used to detect a descriptor that has been merged by Hardware RSC.
1120 ************************************************************************/
1122 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1124 return (le32toh(rx->wb.lower.lo_dword.data) &
1125 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1126 } /* ixgbe_rsc_count */
1128 /************************************************************************
1129 * ixgbe_setup_hw_rsc
1131 * Initialize Hardware RSC (LRO) feature on 82599
1132 * for an RX ring, this is toggled by the LRO capability
1133 * even though it is transparent to the stack.
1135 * NOTE: Since this HW feature only works with IPv4 and
1136 * testing has shown soft LRO to be as effective,
1137 * this feature will be disabled by default.
1138 ************************************************************************/
1140 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1142 struct adapter *adapter = rxr->adapter;
1143 struct ixgbe_hw *hw = &adapter->hw;
1144 u32 rscctrl, rdrxctl;
1146 /* If turning LRO/RSC off we need to disable it */
1147 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1148 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1149 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1153 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1154 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1156 /* Always strip CRC unless Netmap disabled it */
1157 if (!(adapter->feat_en & IXGBE_FEATURE_NETMAP) ||
1158 !(adapter->ifp->if_capenable & IFCAP_NETMAP) ||
1160 #endif /* DEV_NETMAP */
1161 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1162 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1163 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1165 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1166 rscctrl |= IXGBE_RSCCTL_RSCEN;
1168 * Limit the total number of descriptors that
1169 * can be combined, so it does not exceed 64K
1171 if (rxr->mbuf_sz == MCLBYTES)
1172 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1173 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1174 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1175 else if (rxr->mbuf_sz == MJUM9BYTES)
1176 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1177 else /* Using 16K cluster */
1178 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1180 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1182 /* Enable TCP header recognition */
1183 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1184 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) | IXGBE_PSRTYPE_TCPHDR));
1186 /* Disable RSC for ACK packets */
1187 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1188 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1191 } /* ixgbe_setup_hw_rsc */
1193 /************************************************************************
1194 * ixgbe_refresh_mbufs
1196 * Refresh mbuf buffers for RX descriptor rings
1197 * - now keeps its own state so discards due to resource
1198 * exhaustion are unnecessary, if an mbuf cannot be obtained
1199 * it just returns, keeping its placeholder, thus it can simply
1200 * be recalled to try again.
1201 ************************************************************************/
1203 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1205 struct adapter *adapter = rxr->adapter;
1206 struct ixgbe_rx_buf *rxbuf;
1208 bus_dma_segment_t seg[1];
1209 int i, j, nsegs, error;
1210 bool refreshed = FALSE;
1212 i = j = rxr->next_to_refresh;
1213 /* Control the loop with one beyond */
1214 if (++j == rxr->num_desc)
1217 while (j != limit) {
1218 rxbuf = &rxr->rx_buffers[i];
1219 if (rxbuf->buf == NULL) {
1220 mp = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1224 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1225 m_adj(mp, ETHER_ALIGN);
1229 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1231 /* If we're dealing with an mbuf that was copied rather
1232 * than replaced, there's no need to go through busdma.
1234 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1235 /* Get the memory mapping */
1236 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1237 error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap,
1238 mp, seg, &nsegs, BUS_DMA_NOWAIT);
1240 printf("Refresh mbufs: payload dmamap load failure - %d\n", error);
1246 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1247 BUS_DMASYNC_PREREAD);
1248 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1249 htole64(seg[0].ds_addr);
1251 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1252 rxbuf->flags &= ~IXGBE_RX_COPY;
1256 /* Next is precalculated */
1258 rxr->next_to_refresh = i;
1259 if (++j == rxr->num_desc)
1264 if (refreshed) /* Update hardware tail index */
1265 IXGBE_WRITE_REG(&adapter->hw, rxr->tail, rxr->next_to_refresh);
1268 } /* ixgbe_refresh_mbufs */
1270 /************************************************************************
1271 * ixgbe_allocate_receive_buffers
1273 * Allocate memory for rx_buffer structures. Since we use one
1274 * rx_buffer per received packet, the maximum number of rx_buffer's
1275 * that we'll need is equal to the number of receive descriptors
1276 * that we've allocated.
1277 ************************************************************************/
1279 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1281 struct adapter *adapter = rxr->adapter;
1282 device_t dev = adapter->dev;
1283 struct ixgbe_rx_buf *rxbuf;
1286 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1287 rxr->rx_buffers = (struct ixgbe_rx_buf *)malloc(bsize, M_IXGBE,
1289 if (!rxr->rx_buffers) {
1290 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1295 error = bus_dma_tag_create(
1296 /* parent */ bus_get_dma_tag(dev),
1299 /* lowaddr */ BUS_SPACE_MAXADDR,
1300 /* highaddr */ BUS_SPACE_MAXADDR,
1302 /* filterarg */ NULL,
1303 /* maxsize */ MJUM16BYTES,
1305 /* maxsegsize */ MJUM16BYTES,
1307 /* lockfunc */ NULL,
1308 /* lockfuncarg */ NULL,
1311 device_printf(dev, "Unable to create RX DMA tag\n");
1315 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1316 rxbuf = &rxr->rx_buffers[i];
1317 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1319 device_printf(dev, "Unable to create RX dma map\n");
1327 /* Frees all, but can handle partial completion */
1328 ixgbe_free_receive_structures(adapter);
1331 } /* ixgbe_allocate_receive_buffers */
1333 /************************************************************************
1334 * ixgbe_free_receive_ring
1335 ************************************************************************/
1337 ixgbe_free_receive_ring(struct rx_ring *rxr)
1339 struct ixgbe_rx_buf *rxbuf;
1341 for (int i = 0; i < rxr->num_desc; i++) {
1342 rxbuf = &rxr->rx_buffers[i];
1343 if (rxbuf->buf != NULL) {
1344 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1345 BUS_DMASYNC_POSTREAD);
1346 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1347 rxbuf->buf->m_flags |= M_PKTHDR;
1348 m_freem(rxbuf->buf);
1353 } /* ixgbe_free_receive_ring */
1355 /************************************************************************
1356 * ixgbe_setup_receive_ring
1358 * Initialize a receive ring and its buffers.
1359 ************************************************************************/
1361 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1363 struct adapter *adapter;
1366 struct ixgbe_rx_buf *rxbuf;
1367 struct lro_ctrl *lro = &rxr->lro;
1369 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1370 struct netmap_slot *slot;
1371 #endif /* DEV_NETMAP */
1372 bus_dma_segment_t seg[1];
1373 int rsize, nsegs, error = 0;
1375 adapter = rxr->adapter;
1379 /* Clear the ring contents */
1383 if (adapter->feat_en & IXGBE_FEATURE_NETMAP)
1384 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1385 #endif /* DEV_NETMAP */
1387 rsize = roundup2(adapter->num_rx_desc *
1388 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1389 bzero((void *)rxr->rx_base, rsize);
1390 /* Cache the size */
1391 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1393 /* Free current RX buffer structs and their mbufs */
1394 ixgbe_free_receive_ring(rxr);
1396 /* Now replenish the mbufs */
1397 for (int j = 0; j != rxr->num_desc; ++j) {
1400 rxbuf = &rxr->rx_buffers[j];
1404 * In netmap mode, fill the map and set the buffer
1405 * address in the NIC ring, considering the offset
1406 * between the netmap and NIC rings (see comment in
1407 * ixgbe_setup_transmit_ring() ). No need to allocate
1408 * an mbuf, so end the block with a continue;
1410 if ((adapter->feat_en & IXGBE_FEATURE_NETMAP) && slot) {
1411 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1415 addr = PNMB(na, slot + sj, &paddr);
1416 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1417 /* Update descriptor and the cached value */
1418 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1419 rxbuf->addr = htole64(paddr);
1422 #endif /* DEV_NETMAP */
1425 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR,
1426 adapter->rx_mbuf_sz);
1427 if (rxbuf->buf == NULL) {
1432 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1433 /* Get the memory mapping */
1434 error = bus_dmamap_load_mbuf_sg(rxr->ptag, rxbuf->pmap, mp, seg,
1435 &nsegs, BUS_DMA_NOWAIT);
1438 bus_dmamap_sync(rxr->ptag, rxbuf->pmap, BUS_DMASYNC_PREREAD);
1439 /* Update the descriptor and the cached value */
1440 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1441 rxbuf->addr = htole64(seg[0].ds_addr);
1445 /* Setup our descriptor indices */
1446 rxr->next_to_check = 0;
1447 rxr->next_to_refresh = 0;
1448 rxr->lro_enabled = FALSE;
1451 rxr->vtag_strip = FALSE;
1453 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1454 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1457 * Now set up the LRO interface
1459 if (ixgbe_rsc_enable)
1460 ixgbe_setup_hw_rsc(rxr);
1461 else if (ifp->if_capenable & IFCAP_LRO) {
1462 int err = tcp_lro_init(lro);
1464 device_printf(dev, "LRO Initialization failed!\n");
1467 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1468 rxr->lro_enabled = TRUE;
1469 lro->ifp = adapter->ifp;
1472 IXGBE_RX_UNLOCK(rxr);
1477 ixgbe_free_receive_ring(rxr);
1478 IXGBE_RX_UNLOCK(rxr);
1481 } /* ixgbe_setup_receive_ring */
1483 /************************************************************************
1484 * ixgbe_setup_receive_structures - Initialize all receive rings.
1485 ************************************************************************/
1487 ixgbe_setup_receive_structures(struct adapter *adapter)
1489 struct rx_ring *rxr = adapter->rx_rings;
1492 for (j = 0; j < adapter->num_queues; j++, rxr++)
1493 if (ixgbe_setup_receive_ring(rxr))
1499 * Free RX buffers allocated so far, we will only handle
1500 * the rings that completed, the failing case will have
1501 * cleaned up for itself. 'j' failed, so its the terminus.
1503 for (int i = 0; i < j; ++i) {
1504 rxr = &adapter->rx_rings[i];
1505 ixgbe_free_receive_ring(rxr);
1509 } /* ixgbe_setup_receive_structures */
1512 /************************************************************************
1513 * ixgbe_free_receive_structures - Free all receive rings.
1514 ************************************************************************/
1516 ixgbe_free_receive_structures(struct adapter *adapter)
1518 struct rx_ring *rxr = adapter->rx_rings;
1519 struct lro_ctrl *lro;
1521 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1523 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1525 ixgbe_free_receive_buffers(rxr);
1526 /* Free LRO memory */
1528 /* Free the ring memory as well */
1529 ixgbe_dma_free(adapter, &rxr->rxdma);
1532 free(adapter->rx_rings, M_IXGBE);
1533 } /* ixgbe_free_receive_structures */
1536 /************************************************************************
1537 * ixgbe_free_receive_buffers - Free receive ring data structures
1538 ************************************************************************/
1540 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1542 struct adapter *adapter = rxr->adapter;
1543 struct ixgbe_rx_buf *rxbuf;
1545 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1547 /* Cleanup any existing buffers */
1548 if (rxr->rx_buffers != NULL) {
1549 for (int i = 0; i < adapter->num_rx_desc; i++) {
1550 rxbuf = &rxr->rx_buffers[i];
1551 if (rxbuf->buf != NULL) {
1552 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1553 BUS_DMASYNC_POSTREAD);
1554 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1555 rxbuf->buf->m_flags |= M_PKTHDR;
1556 m_freem(rxbuf->buf);
1559 if (rxbuf->pmap != NULL) {
1560 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1564 if (rxr->rx_buffers != NULL) {
1565 free(rxr->rx_buffers, M_IXGBE);
1566 rxr->rx_buffers = NULL;
1570 if (rxr->ptag != NULL) {
1571 bus_dma_tag_destroy(rxr->ptag);
1576 } /* ixgbe_free_receive_buffers */
1578 /************************************************************************
1580 ************************************************************************/
1581 static __inline void
1582 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m,
1586 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1587 * should be computed by hardware. Also it should not have VLAN tag in
1588 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1590 if (rxr->lro_enabled &&
1591 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1592 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1593 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1594 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1595 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1596 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1597 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1598 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1600 * Send to the stack if:
1601 * - LRO not enabled, or
1602 * - no LRO resources, or
1603 * - lro enqueue fails
1605 if (rxr->lro.lro_cnt != 0)
1606 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1609 IXGBE_RX_UNLOCK(rxr);
1610 (*ifp->if_input)(ifp, m);
1612 } /* ixgbe_rx_input */
1614 /************************************************************************
1616 ************************************************************************/
1617 static __inline void
1618 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1620 struct ixgbe_rx_buf *rbuf;
1622 rbuf = &rxr->rx_buffers[i];
1625 * With advanced descriptors the writeback
1626 * clobbers the buffer addrs, so its easier
1627 * to just free the existing mbufs and take
1628 * the normal refresh path to get new buffers
1632 if (rbuf->fmp != NULL) {/* Partial chain ? */
1633 rbuf->fmp->m_flags |= M_PKTHDR;
1636 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1637 } else if (rbuf->buf) {
1641 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1646 } /* ixgbe_rx_discard */
1649 /************************************************************************
1652 * This routine executes in interrupt context. It replenishes
1653 * the mbufs in the descriptor and sends data which has been
1654 * dma'ed into host memory to upper layer.
1656 * Return TRUE for more work, FALSE for all clean.
1657 ************************************************************************/
1659 ixgbe_rxeof(struct ix_queue *que)
1661 struct adapter *adapter = que->adapter;
1662 struct rx_ring *rxr = que->rxr;
1663 struct ifnet *ifp = adapter->ifp;
1664 struct lro_ctrl *lro = &rxr->lro;
1665 #if __FreeBSD_version < 1100105
1666 struct lro_entry *queued;
1668 union ixgbe_adv_rx_desc *cur;
1669 struct ixgbe_rx_buf *rbuf, *nbuf;
1670 int i, nextp, processed = 0;
1672 u32 count = adapter->rx_process_limit;
1678 if (adapter->feat_en & IXGBE_FEATURE_NETMAP) {
1679 /* Same as the txeof routine: wakeup clients on intr. */
1680 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1681 IXGBE_RX_UNLOCK(rxr);
1685 #endif /* DEV_NETMAP */
1687 for (i = rxr->next_to_check; count != 0;) {
1688 struct mbuf *sendmp, *mp;
1694 /* Sync the ring. */
1695 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1696 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1698 cur = &rxr->rx_base[i];
1699 staterr = le32toh(cur->wb.upper.status_error);
1700 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1702 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1704 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1711 cur->wb.upper.status_error = 0;
1712 rbuf = &rxr->rx_buffers[i];
1715 len = le16toh(cur->wb.upper.length);
1716 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1717 IXGBE_RXDADV_PKTTYPE_MASK;
1718 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1720 /* Make sure bad packets are discarded */
1721 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1722 rxr->rx_discarded++;
1723 ixgbe_rx_discard(rxr, i);
1728 * On 82599 which supports a hardware
1729 * LRO (called HW RSC), packets need
1730 * not be fragmented across sequential
1731 * descriptors, rather the next descriptor
1732 * is indicated in bits of the descriptor.
1733 * This also means that we might proceses
1734 * more than one packet at a time, something
1735 * that has never been true before, it
1736 * required eliminating global chain pointers
1737 * in favor of what we are doing here. -jfv
1741 * Figure out the next descriptor
1744 if (rxr->hw_rsc == TRUE) {
1745 rsc = ixgbe_rsc_count(cur);
1746 rxr->rsc_num += (rsc - 1);
1748 if (rsc) { /* Get hardware index */
1749 nextp = ((staterr & IXGBE_RXDADV_NEXTP_MASK) >>
1750 IXGBE_RXDADV_NEXTP_SHIFT);
1751 } else { /* Just sequential */
1753 if (nextp == adapter->num_rx_desc)
1756 nbuf = &rxr->rx_buffers[nextp];
1760 * Rather than using the fmp/lmp global pointers
1761 * we now keep the head of a packet chain in the
1762 * buffer struct and pass this along from one
1763 * descriptor to the next, until we get EOP.
1767 * See if there is a stored head
1768 * that determines what we are
1771 if (sendmp != NULL) { /* secondary frag */
1772 rbuf->buf = rbuf->fmp = NULL;
1773 mp->m_flags &= ~M_PKTHDR;
1774 sendmp->m_pkthdr.len += mp->m_len;
1777 * Optimize. This might be a small packet,
1778 * maybe just a TCP ACK. Do a fast copy that
1779 * is cache aligned into a new mbuf, and
1780 * leave the old mbuf+cluster for re-use.
1782 if (eop && len <= IXGBE_RX_COPY_LEN) {
1783 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1784 if (sendmp != NULL) {
1785 sendmp->m_data += IXGBE_RX_COPY_ALIGN;
1786 ixgbe_bcopy(mp->m_data, sendmp->m_data,
1788 sendmp->m_len = len;
1790 rbuf->flags |= IXGBE_RX_COPY;
1793 if (sendmp == NULL) {
1794 rbuf->buf = rbuf->fmp = NULL;
1798 /* first desc of a non-ps chain */
1799 sendmp->m_flags |= M_PKTHDR;
1800 sendmp->m_pkthdr.len = mp->m_len;
1804 /* Pass the head pointer on */
1808 mp->m_next = nbuf->buf;
1809 } else { /* Sending this frame */
1810 sendmp->m_pkthdr.rcvif = ifp;
1812 /* capture data for AIM */
1813 rxr->bytes += sendmp->m_pkthdr.len;
1814 rxr->rx_bytes += sendmp->m_pkthdr.len;
1815 /* Process vlan info */
1816 if ((rxr->vtag_strip) && (staterr & IXGBE_RXD_STAT_VP))
1817 vtag = le16toh(cur->wb.upper.vlan);
1819 sendmp->m_pkthdr.ether_vtag = vtag;
1820 sendmp->m_flags |= M_VLANTAG;
1822 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1823 ixgbe_rx_checksum(staterr, sendmp, ptype);
1826 * In case of multiqueue, we have RXCSUM.PCSD bit set
1827 * and never cleared. This means we have RSS hash
1828 * available to be used.
1830 if (adapter->num_queues > 1) {
1831 sendmp->m_pkthdr.flowid =
1832 le32toh(cur->wb.lower.hi_dword.rss);
1833 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1834 case IXGBE_RXDADV_RSSTYPE_IPV4:
1835 M_HASHTYPE_SET(sendmp,
1836 M_HASHTYPE_RSS_IPV4);
1838 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1839 M_HASHTYPE_SET(sendmp,
1840 M_HASHTYPE_RSS_TCP_IPV4);
1842 case IXGBE_RXDADV_RSSTYPE_IPV6:
1843 M_HASHTYPE_SET(sendmp,
1844 M_HASHTYPE_RSS_IPV6);
1846 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1847 M_HASHTYPE_SET(sendmp,
1848 M_HASHTYPE_RSS_TCP_IPV6);
1850 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1851 M_HASHTYPE_SET(sendmp,
1852 M_HASHTYPE_RSS_IPV6_EX);
1854 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1855 M_HASHTYPE_SET(sendmp,
1856 M_HASHTYPE_RSS_TCP_IPV6_EX);
1858 #if __FreeBSD_version > 1100000
1859 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1860 M_HASHTYPE_SET(sendmp,
1861 M_HASHTYPE_RSS_UDP_IPV4);
1863 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1864 M_HASHTYPE_SET(sendmp,
1865 M_HASHTYPE_RSS_UDP_IPV6);
1867 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1868 M_HASHTYPE_SET(sendmp,
1869 M_HASHTYPE_RSS_UDP_IPV6_EX);
1873 #if __FreeBSD_version < 1100116
1874 M_HASHTYPE_SET(sendmp,
1877 M_HASHTYPE_SET(sendmp,
1878 M_HASHTYPE_OPAQUE_HASH);
1882 sendmp->m_pkthdr.flowid = que->msix;
1883 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1887 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1888 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1890 /* Advance our pointers to the next descriptor. */
1891 if (++i == rxr->num_desc)
1894 /* Now send to the stack or do LRO */
1895 if (sendmp != NULL) {
1896 rxr->next_to_check = i;
1897 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1898 i = rxr->next_to_check;
1901 /* Every 8 descriptors we go to refresh mbufs */
1902 if (processed == 8) {
1903 ixgbe_refresh_mbufs(rxr, i);
1908 /* Refresh any remaining buf structs */
1909 if (ixgbe_rx_unrefreshed(rxr))
1910 ixgbe_refresh_mbufs(rxr, i);
1912 rxr->next_to_check = i;
1915 * Flush any outstanding LRO work
1917 #if __FreeBSD_version < 1100105
1918 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1919 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1920 tcp_lro_flush(lro, queued);
1923 tcp_lro_flush_all(lro);
1926 IXGBE_RX_UNLOCK(rxr);
1929 * Still have cleaning to do?
1931 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1938 /************************************************************************
1941 * Verify that the hardware indicated that the checksum is valid.
1942 * Inform the stack about the status of checksum so that stack
1943 * doesn't spend time verifying the checksum.
1944 ************************************************************************/
1946 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
1948 u16 status = (u16)staterr;
1949 u8 errors = (u8)(staterr >> 24);
1952 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1953 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
1957 if (status & IXGBE_RXD_STAT_IPCS) {
1958 mp->m_pkthdr.csum_flags |= CSUM_L3_CALC;
1959 /* IP Checksum Good */
1960 if (!(errors & IXGBE_RXD_ERR_IPE))
1961 mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
1963 /* TCP/UDP/SCTP checksum */
1964 if (status & IXGBE_RXD_STAT_L4CS) {
1965 mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
1966 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
1967 mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
1969 mp->m_pkthdr.csum_data = htons(0xffff);
1972 } /* ixgbe_rx_checksum */
1974 /************************************************************************
1975 * ixgbe_dmamap_cb - Manage DMA'able memory.
1976 ************************************************************************/
1978 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
1982 *(bus_addr_t *)arg = segs->ds_addr;
1985 } /* ixgbe_dmamap_cb */
1987 /************************************************************************
1989 ************************************************************************/
1991 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
1992 struct ixgbe_dma_alloc *dma, int mapflags)
1994 device_t dev = adapter->dev;
1997 r = bus_dma_tag_create(
1998 /* parent */ bus_get_dma_tag(adapter->dev),
1999 /* alignment */ DBA_ALIGN,
2001 /* lowaddr */ BUS_SPACE_MAXADDR,
2002 /* highaddr */ BUS_SPACE_MAXADDR,
2004 /* filterarg */ NULL,
2007 /* maxsegsize */ size,
2008 /* flags */ BUS_DMA_ALLOCNOW,
2009 /* lockfunc */ NULL,
2010 /* lockfuncarg */ NULL,
2014 "ixgbe_dma_malloc: bus_dma_tag_create failed; error %u\n",
2018 r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2019 BUS_DMA_NOWAIT, &dma->dma_map);
2022 "ixgbe_dma_malloc: bus_dmamem_alloc failed; error %u\n", r);
2025 r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr, size,
2026 ixgbe_dmamap_cb, &dma->dma_paddr, mapflags | BUS_DMA_NOWAIT);
2029 "ixgbe_dma_malloc: bus_dmamap_load failed; error %u\n", r);
2032 dma->dma_size = size;
2036 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2038 bus_dma_tag_destroy(dma->dma_tag);
2040 dma->dma_tag = NULL;
2043 } /* ixgbe_dma_malloc */
2046 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2048 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2049 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2050 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2051 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2052 bus_dma_tag_destroy(dma->dma_tag);
2053 } /* ixgbe_dma_free */
2056 /************************************************************************
2057 * ixgbe_allocate_queues
2059 * Allocate memory for the transmit and receive rings, and then
2060 * the descriptors associated with each, called only once at attach.
2061 ************************************************************************/
2063 ixgbe_allocate_queues(struct adapter *adapter)
2065 device_t dev = adapter->dev;
2066 struct ix_queue *que;
2067 struct tx_ring *txr;
2068 struct rx_ring *rxr;
2069 int rsize, tsize, error = IXGBE_SUCCESS;
2070 int txconf = 0, rxconf = 0;
2072 /* First, allocate the top level queue structs */
2073 adapter->queues = (struct ix_queue *)malloc(sizeof(struct ix_queue) *
2074 adapter->num_queues, M_IXGBE, M_NOWAIT | M_ZERO);
2075 if (!adapter->queues) {
2076 device_printf(dev, "Unable to allocate queue memory\n");
2081 /* Second, allocate the TX ring struct memory */
2082 adapter->tx_rings = (struct tx_ring *)malloc(sizeof(struct tx_ring) *
2083 adapter->num_queues, M_IXGBE, M_NOWAIT | M_ZERO);
2084 if (!adapter->tx_rings) {
2085 device_printf(dev, "Unable to allocate TX ring memory\n");
2090 /* Third, allocate the RX ring */
2091 adapter->rx_rings = (struct rx_ring *)malloc(sizeof(struct rx_ring) *
2092 adapter->num_queues, M_IXGBE, M_NOWAIT | M_ZERO);
2093 if (!adapter->rx_rings) {
2094 device_printf(dev, "Unable to allocate RX ring memory\n");
2099 /* For the ring itself */
2100 tsize = roundup2(adapter->num_tx_desc * sizeof(union ixgbe_adv_tx_desc),
2104 * Now set up the TX queues, txconf is needed to handle the
2105 * possibility that things fail midcourse and we need to
2106 * undo memory gracefully
2108 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2109 /* Set up some basics */
2110 txr = &adapter->tx_rings[i];
2111 txr->adapter = adapter;
2113 /* In case SR-IOV is enabled, align the index properly */
2114 txr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2116 txr->num_desc = adapter->num_tx_desc;
2118 /* Initialize the TX side lock */
2119 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2120 device_get_nameunit(dev), txr->me);
2121 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2123 if (ixgbe_dma_malloc(adapter, tsize, &txr->txdma,
2126 "Unable to allocate TX Descriptor memory\n");
2130 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2131 bzero((void *)txr->tx_base, tsize);
2133 /* Now allocate transmit buffers for the ring */
2134 if (ixgbe_allocate_transmit_buffers(txr)) {
2136 "Critical Failure setting up transmit buffers\n");
2140 if (!(adapter->feat_en & IXGBE_FEATURE_LEGACY_TX)) {
2141 /* Allocate a buf ring */
2142 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_IXGBE,
2143 M_WAITOK, &txr->tx_mtx);
2144 if (txr->br == NULL) {
2146 "Critical Failure setting up buf ring\n");
2154 * Next the RX queues...
2156 rsize = roundup2(adapter->num_rx_desc * sizeof(union ixgbe_adv_rx_desc),
2158 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2159 rxr = &adapter->rx_rings[i];
2160 /* Set up some basics */
2161 rxr->adapter = adapter;
2162 /* In case SR-IOV is enabled, align the index properly */
2163 rxr->me = ixgbe_vf_que_index(adapter->iov_mode, adapter->pool,
2165 rxr->num_desc = adapter->num_rx_desc;
2167 /* Initialize the RX side lock */
2168 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2169 device_get_nameunit(dev), rxr->me);
2170 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2172 if (ixgbe_dma_malloc(adapter, rsize, &rxr->rxdma,
2175 "Unable to allocate RxDescriptor memory\n");
2179 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2180 bzero((void *)rxr->rx_base, rsize);
2182 /* Allocate receive buffers for the ring */
2183 if (ixgbe_allocate_receive_buffers(rxr)) {
2185 "Critical Failure setting up receive buffers\n");
2192 * Finally set up the queue holding structs
2194 for (int i = 0; i < adapter->num_queues; i++) {
2195 que = &adapter->queues[i];
2196 que->adapter = adapter;
2198 que->txr = &adapter->tx_rings[i];
2199 que->rxr = &adapter->rx_rings[i];
2205 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2206 ixgbe_dma_free(adapter, &rxr->rxdma);
2208 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2209 ixgbe_dma_free(adapter, &txr->txdma);
2210 free(adapter->rx_rings, M_IXGBE);
2212 free(adapter->tx_rings, M_IXGBE);
2214 free(adapter->queues, M_IXGBE);
2217 } /* ixgbe_allocate_queues */