1 /******************************************************************************
3 Copyright (c) 2001-2015, Intel Corporation
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
32 ******************************************************************************/
36 #ifndef IXGBE_STANDALONE_BUILD
38 #include "opt_inet6.h"
44 #include <net/netmap.h>
45 #include <sys/selinfo.h>
46 #include <dev/netmap/netmap_kern.h>
48 extern int ix_crcstrip;
53 ** this feature only works with
54 ** IPv4, and only on 82599 and later.
55 ** Also this will cause IP forwarding to
56 ** fail and that can't be controlled by
57 ** the stack as LRO can. For all these
58 ** reasons I've deemed it best to leave
59 ** this off and not bother with a tuneable
60 ** interface, this would need to be compiled
63 static bool ixgbe_rsc_enable = FALSE;
67 ** For Flow Director: this is the
68 ** number of TX packets we sample
69 ** for the filter pool, this means
70 ** every 20th packet will be probed.
72 ** This feature can be disabled by
75 static int atr_sample_rate = 20;
78 /* Shared PCI config read/write */
80 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
84 value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
91 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
93 pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
99 /*********************************************************************
100 * Local Function prototypes
101 *********************************************************************/
102 static void ixgbe_setup_transmit_ring(struct tx_ring *);
103 static void ixgbe_free_transmit_buffers(struct tx_ring *);
104 static int ixgbe_setup_receive_ring(struct rx_ring *);
105 static void ixgbe_free_receive_buffers(struct rx_ring *);
107 static void ixgbe_rx_checksum(u32, struct mbuf *, u32);
108 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
109 static int ixgbe_xmit(struct tx_ring *, struct mbuf **);
110 static int ixgbe_tx_ctx_setup(struct tx_ring *,
111 struct mbuf *, u32 *, u32 *);
112 static int ixgbe_tso_setup(struct tx_ring *,
113 struct mbuf *, u32 *, u32 *);
115 static void ixgbe_atr(struct tx_ring *, struct mbuf *);
117 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
118 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
121 #ifdef IXGBE_LEGACY_TX
122 /*********************************************************************
123 * Transmit entry point
125 * ixgbe_start is called by the stack to initiate a transmit.
126 * The driver will remain in this routine as long as there are
127 * packets to transmit and transmit resources are available.
128 * In case resources are not available stack is notified and
129 * the packet is requeued.
130 **********************************************************************/
133 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
136 struct adapter *adapter = txr->adapter;
138 IXGBE_TX_LOCK_ASSERT(txr);
140 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
142 if (!adapter->link_active)
145 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
146 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
149 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
153 if (ixgbe_xmit(txr, &m_head)) {
155 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
158 /* Send a copy of the frame to the BPF listener */
159 ETHER_BPF_MTAP(ifp, m_head);
165 * Legacy TX start - called by the stack, this
166 * always uses the first tx ring, and should
167 * not be used with multiqueue tx enabled.
170 ixgbe_start(struct ifnet *ifp)
172 struct adapter *adapter = ifp->if_softc;
173 struct tx_ring *txr = adapter->tx_rings;
175 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
177 ixgbe_start_locked(txr, ifp);
178 IXGBE_TX_UNLOCK(txr);
183 #else /* ! IXGBE_LEGACY_TX */
186 ** Multiqueue Transmit driver
190 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
192 struct adapter *adapter = ifp->if_softc;
193 struct ix_queue *que;
198 * When doing RSS, map it to the same outbound queue
199 * as the incoming flow would be mapped to.
201 * If everything is setup correctly, it should be the
202 * same bucket that the current CPU we're on is.
204 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
205 i = m->m_pkthdr.flowid % adapter->num_queues;
207 i = curcpu % adapter->num_queues;
209 /* Check for a hung queue and pick alternative */
210 if (((1 << i) & adapter->active_queues) == 0)
211 i = ffsl(adapter->active_queues);
213 txr = &adapter->tx_rings[i];
214 que = &adapter->queues[i];
216 err = drbr_enqueue(ifp, txr->br, m);
219 if (IXGBE_TX_TRYLOCK(txr)) {
220 ixgbe_mq_start_locked(ifp, txr);
221 IXGBE_TX_UNLOCK(txr);
223 taskqueue_enqueue(que->tq, &txr->txq_task);
229 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
231 struct adapter *adapter = txr->adapter;
233 int enqueued = 0, err = 0;
235 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
236 adapter->link_active == 0)
239 /* Process the queue */
240 #if __FreeBSD_version < 901504
241 next = drbr_dequeue(ifp, txr->br);
242 while (next != NULL) {
243 if ((err = ixgbe_xmit(txr, &next)) != 0) {
245 err = drbr_enqueue(ifp, txr->br, next);
247 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
248 if ((err = ixgbe_xmit(txr, &next)) != 0) {
250 drbr_advance(ifp, txr->br);
252 drbr_putback(ifp, txr->br, next);
257 #if __FreeBSD_version >= 901504
258 drbr_advance(ifp, txr->br);
261 #if 0 // this is VF-only
262 #if __FreeBSD_version >= 1100036
264 * Since we're looking at the tx ring, we can check
265 * to see if we're a VF by examing our tail register
268 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
269 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
272 /* Send a copy of the frame to the BPF listener */
273 ETHER_BPF_MTAP(ifp, next);
274 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
276 #if __FreeBSD_version < 901504
277 next = drbr_dequeue(ifp, txr->br);
281 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
288 * Called from a taskqueue to drain queued transmit packets.
291 ixgbe_deferred_mq_start(void *arg, int pending)
293 struct tx_ring *txr = arg;
294 struct adapter *adapter = txr->adapter;
295 struct ifnet *ifp = adapter->ifp;
298 if (!drbr_empty(ifp, txr->br))
299 ixgbe_mq_start_locked(ifp, txr);
300 IXGBE_TX_UNLOCK(txr);
304 * Flush all ring buffers
307 ixgbe_qflush(struct ifnet *ifp)
309 struct adapter *adapter = ifp->if_softc;
310 struct tx_ring *txr = adapter->tx_rings;
313 for (int i = 0; i < adapter->num_queues; i++, txr++) {
315 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
317 IXGBE_TX_UNLOCK(txr);
321 #endif /* IXGBE_LEGACY_TX */
324 /*********************************************************************
326 * This routine maps the mbufs to tx descriptors, allowing the
327 * TX engine to transmit the packets.
328 * - return 0 on success, positive on failure
330 **********************************************************************/
333 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
335 struct adapter *adapter = txr->adapter;
336 u32 olinfo_status = 0, cmd_type_len;
337 int i, j, error, nsegs;
341 bus_dma_segment_t segs[adapter->num_segs];
343 struct ixgbe_tx_buf *txbuf;
344 union ixgbe_adv_tx_desc *txd = NULL;
348 /* Basic descriptor defines */
349 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
350 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
352 if (m_head->m_flags & M_VLANTAG)
353 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
356 * Important to capture the first descriptor
357 * used because it will contain the index of
358 * the one we tell the hardware to report back
360 first = txr->next_avail_desc;
361 txbuf = &txr->tx_buffers[first];
365 * Map the packet for DMA.
368 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
369 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
371 if (__predict_false(error)) {
376 /* Try it again? - one try */
380 * XXX: m_defrag will choke on
381 * non-MCLBYTES-sized clusters
383 m = m_defrag(*m_headp, M_NOWAIT);
385 adapter->mbuf_defrag_failed++;
395 txr->no_tx_dma_setup++;
398 txr->no_tx_dma_setup++;
405 /* Make certain there are enough descriptors */
406 if (nsegs > txr->tx_avail - 2) {
407 txr->no_desc_avail++;
408 bus_dmamap_unload(txr->txtag, map);
414 * Set up the appropriate offload context
415 * this will consume the first descriptor
417 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
418 if (__predict_false(error)) {
419 if (error == ENOBUFS)
425 /* Do the flow director magic */
426 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
428 if (txr->atr_count >= atr_sample_rate) {
429 ixgbe_atr(txr, m_head);
435 i = txr->next_avail_desc;
436 for (j = 0; j < nsegs; j++) {
440 txbuf = &txr->tx_buffers[i];
441 txd = &txr->tx_base[i];
442 seglen = segs[j].ds_len;
443 segaddr = htole64(segs[j].ds_addr);
445 txd->read.buffer_addr = segaddr;
446 txd->read.cmd_type_len = htole32(txr->txd_cmd |
447 cmd_type_len |seglen);
448 txd->read.olinfo_status = htole32(olinfo_status);
450 if (++i == txr->num_desc)
454 txd->read.cmd_type_len |=
455 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
456 txr->tx_avail -= nsegs;
457 txr->next_avail_desc = i;
459 txbuf->m_head = m_head;
461 * Here we swap the map so the last descriptor,
462 * which gets the completion interrupt has the
463 * real map, and the first descriptor gets the
464 * unused map from this descriptor.
466 txr->tx_buffers[first].map = txbuf->map;
468 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
470 /* Set the EOP descriptor that will be marked done */
471 txbuf = &txr->tx_buffers[first];
474 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
475 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
477 * Advance the Transmit Descriptor Tail (Tdt), this tells the
478 * hardware that this frame is available to transmit.
480 ++txr->total_packets;
481 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
483 /* Mark queue as having work */
491 /*********************************************************************
493 * Allocate memory for tx_buffer structures. The tx_buffer stores all
494 * the information needed to transmit a packet on the wire. This is
495 * called only once at attach, setup is done every reset.
497 **********************************************************************/
499 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
501 struct adapter *adapter = txr->adapter;
502 device_t dev = adapter->dev;
503 struct ixgbe_tx_buf *txbuf;
507 * Setup DMA descriptor areas.
509 if ((error = bus_dma_tag_create(
510 bus_get_dma_tag(adapter->dev), /* parent */
511 1, 0, /* alignment, bounds */
512 BUS_SPACE_MAXADDR, /* lowaddr */
513 BUS_SPACE_MAXADDR, /* highaddr */
514 NULL, NULL, /* filter, filterarg */
515 IXGBE_TSO_SIZE, /* maxsize */
516 adapter->num_segs, /* nsegments */
517 PAGE_SIZE, /* maxsegsize */
520 NULL, /* lockfuncarg */
522 device_printf(dev,"Unable to allocate TX DMA tag\n");
526 if (!(txr->tx_buffers =
527 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
528 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
529 device_printf(dev, "Unable to allocate tx_buffer memory\n");
534 /* Create the descriptor buffer dma maps */
535 txbuf = txr->tx_buffers;
536 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
537 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
539 device_printf(dev, "Unable to create TX DMA map\n");
546 /* We free all, it handles case where we are in the middle */
547 ixgbe_free_transmit_structures(adapter);
551 /*********************************************************************
553 * Initialize a transmit ring.
555 **********************************************************************/
557 ixgbe_setup_transmit_ring(struct tx_ring *txr)
559 struct adapter *adapter = txr->adapter;
560 struct ixgbe_tx_buf *txbuf;
563 struct netmap_adapter *na = NA(adapter->ifp);
564 struct netmap_slot *slot;
565 #endif /* DEV_NETMAP */
567 /* Clear the old ring contents */
571 * (under lock): if in netmap mode, do some consistency
572 * checks and set slot to entry 0 of the netmap ring.
574 slot = netmap_reset(na, NR_TX, txr->me, 0);
575 #endif /* DEV_NETMAP */
576 bzero((void *)txr->tx_base,
577 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
579 txr->next_avail_desc = 0;
580 txr->next_to_clean = 0;
582 /* Free any existing tx buffers. */
583 txbuf = txr->tx_buffers;
584 for (i = 0; i < txr->num_desc; i++, txbuf++) {
585 if (txbuf->m_head != NULL) {
586 bus_dmamap_sync(txr->txtag, txbuf->map,
587 BUS_DMASYNC_POSTWRITE);
588 bus_dmamap_unload(txr->txtag, txbuf->map);
589 m_freem(txbuf->m_head);
590 txbuf->m_head = NULL;
594 * In netmap mode, set the map for the packet buffer.
595 * NOTE: Some drivers (not this one) also need to set
596 * the physical buffer address in the NIC ring.
597 * Slots in the netmap ring (indexed by "si") are
598 * kring->nkr_hwofs positions "ahead" wrt the
599 * corresponding slot in the NIC ring. In some drivers
600 * (not here) nkr_hwofs can be negative. Function
601 * netmap_idx_n2k() handles wraparounds properly.
604 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
605 netmap_load_map(na, txr->txtag, txbuf->map, NMB(na, slot + si));
607 #endif /* DEV_NETMAP */
608 /* Clear the EOP descriptor pointer */
613 /* Set the rate at which we sample packets */
614 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
615 txr->atr_sample = atr_sample_rate;
618 /* Set number of descriptors available */
619 txr->tx_avail = adapter->num_tx_desc;
621 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
622 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
623 IXGBE_TX_UNLOCK(txr);
626 /*********************************************************************
628 * Initialize all transmit rings.
630 **********************************************************************/
632 ixgbe_setup_transmit_structures(struct adapter *adapter)
634 struct tx_ring *txr = adapter->tx_rings;
636 for (int i = 0; i < adapter->num_queues; i++, txr++)
637 ixgbe_setup_transmit_ring(txr);
642 /*********************************************************************
644 * Free all transmit rings.
646 **********************************************************************/
648 ixgbe_free_transmit_structures(struct adapter *adapter)
650 struct tx_ring *txr = adapter->tx_rings;
652 for (int i = 0; i < adapter->num_queues; i++, txr++) {
654 ixgbe_free_transmit_buffers(txr);
655 ixgbe_dma_free(adapter, &txr->txdma);
656 IXGBE_TX_UNLOCK(txr);
657 IXGBE_TX_LOCK_DESTROY(txr);
659 free(adapter->tx_rings, M_DEVBUF);
662 /*********************************************************************
664 * Free transmit ring related data structures.
666 **********************************************************************/
668 ixgbe_free_transmit_buffers(struct tx_ring *txr)
670 struct adapter *adapter = txr->adapter;
671 struct ixgbe_tx_buf *tx_buffer;
674 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
676 if (txr->tx_buffers == NULL)
679 tx_buffer = txr->tx_buffers;
680 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
681 if (tx_buffer->m_head != NULL) {
682 bus_dmamap_sync(txr->txtag, tx_buffer->map,
683 BUS_DMASYNC_POSTWRITE);
684 bus_dmamap_unload(txr->txtag,
686 m_freem(tx_buffer->m_head);
687 tx_buffer->m_head = NULL;
688 if (tx_buffer->map != NULL) {
689 bus_dmamap_destroy(txr->txtag,
691 tx_buffer->map = NULL;
693 } else if (tx_buffer->map != NULL) {
694 bus_dmamap_unload(txr->txtag,
696 bus_dmamap_destroy(txr->txtag,
698 tx_buffer->map = NULL;
701 #ifdef IXGBE_LEGACY_TX
703 buf_ring_free(txr->br, M_DEVBUF);
705 if (txr->tx_buffers != NULL) {
706 free(txr->tx_buffers, M_DEVBUF);
707 txr->tx_buffers = NULL;
709 if (txr->txtag != NULL) {
710 bus_dma_tag_destroy(txr->txtag);
716 /*********************************************************************
718 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
720 **********************************************************************/
723 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
724 u32 *cmd_type_len, u32 *olinfo_status)
726 struct adapter *adapter = txr->adapter;
727 struct ixgbe_adv_tx_context_desc *TXD;
728 struct ether_vlan_header *eh;
731 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
732 int ehdrlen, ip_hlen = 0;
736 int ctxd = txr->next_avail_desc;
739 /* First check if TSO is to be used */
740 if (mp->m_pkthdr.csum_flags & CSUM_TSO)
741 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
743 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
746 /* Indicate the whole packet as payload when not doing TSO */
747 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
749 /* Now ready a context descriptor */
750 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
753 ** In advanced descriptors the vlan tag must
754 ** be placed into the context descriptor. Hence
755 ** we need to make one even if not doing offloads.
757 if (mp->m_flags & M_VLANTAG) {
758 vtag = htole16(mp->m_pkthdr.ether_vtag);
759 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
761 else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
765 * Determine where frame payload starts.
766 * Jump over vlan headers if already present,
767 * helpful for QinQ too.
769 eh = mtod(mp, struct ether_vlan_header *);
770 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
771 etype = ntohs(eh->evl_proto);
772 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
774 etype = ntohs(eh->evl_encap_proto);
775 ehdrlen = ETHER_HDR_LEN;
778 /* Set the ether header length */
779 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
781 if (offload == FALSE)
786 ip = (struct ip *)(mp->m_data + ehdrlen);
787 ip_hlen = ip->ip_hl << 2;
789 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
792 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
793 ip_hlen = sizeof(struct ip6_hdr);
794 /* XXX-BZ this will go badly in case of ext hdrs. */
795 ipproto = ip6->ip6_nxt;
796 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
803 vlan_macip_lens |= ip_hlen;
807 if (mp->m_pkthdr.csum_flags & CSUM_TCP)
808 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
812 if (mp->m_pkthdr.csum_flags & CSUM_UDP)
813 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
816 #if __FreeBSD_version >= 800000
818 if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
819 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
827 if (offload) /* For the TX descriptor setup */
828 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
831 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
833 /* Now copy bits into descriptor */
834 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
835 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
836 TXD->seqnum_seed = htole32(0);
837 TXD->mss_l4len_idx = htole32(0);
839 /* We've consumed the first desc, adjust counters */
840 if (++ctxd == txr->num_desc)
842 txr->next_avail_desc = ctxd;
848 /**********************************************************************
850 * Setup work for hardware segmentation offload (TSO) on
851 * adapters using advanced tx descriptors
853 **********************************************************************/
855 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
856 u32 *cmd_type_len, u32 *olinfo_status)
858 struct ixgbe_adv_tx_context_desc *TXD;
859 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
860 u32 mss_l4len_idx = 0, paylen;
861 u16 vtag = 0, eh_type;
862 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
863 struct ether_vlan_header *eh;
874 * Determine where frame payload starts.
875 * Jump over vlan headers if already present
877 eh = mtod(mp, struct ether_vlan_header *);
878 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
879 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
880 eh_type = eh->evl_proto;
882 ehdrlen = ETHER_HDR_LEN;
883 eh_type = eh->evl_encap_proto;
886 switch (ntohs(eh_type)) {
889 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
890 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
891 if (ip6->ip6_nxt != IPPROTO_TCP)
893 ip_hlen = sizeof(struct ip6_hdr);
894 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
895 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
896 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
897 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
902 ip = (struct ip *)(mp->m_data + ehdrlen);
903 if (ip->ip_p != IPPROTO_TCP)
906 ip_hlen = ip->ip_hl << 2;
907 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
908 th->th_sum = in_pseudo(ip->ip_src.s_addr,
909 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
910 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
911 /* Tell transmit desc to also do IPv4 checksum. */
912 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
916 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
917 __func__, ntohs(eh_type));
921 ctxd = txr->next_avail_desc;
922 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
924 tcp_hlen = th->th_off << 2;
926 /* This is used in the transmit desc in encap */
927 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
929 /* VLAN MACLEN IPLEN */
930 if (mp->m_flags & M_VLANTAG) {
931 vtag = htole16(mp->m_pkthdr.ether_vtag);
932 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
935 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
936 vlan_macip_lens |= ip_hlen;
937 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
939 /* ADV DTYPE TUCMD */
940 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
941 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
942 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
945 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
946 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
947 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
949 TXD->seqnum_seed = htole32(0);
951 if (++ctxd == txr->num_desc)
955 txr->next_avail_desc = ctxd;
956 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
957 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
958 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
964 /**********************************************************************
966 * Examine each tx_buffer in the used queue. If the hardware is done
967 * processing the packet then free associated resources. The
968 * tx_buffer is put back on the free queue.
970 **********************************************************************/
972 ixgbe_txeof(struct tx_ring *txr)
975 struct adapter *adapter = txr->adapter;
976 struct ifnet *ifp = adapter->ifp;
978 u32 work, processed = 0;
979 u16 limit = txr->process_limit;
980 struct ixgbe_tx_buf *buf;
981 union ixgbe_adv_tx_desc *txd;
983 mtx_assert(&txr->tx_mtx, MA_OWNED);
986 if (ifp->if_capenable & IFCAP_NETMAP) {
987 struct netmap_adapter *na = NA(ifp);
988 struct netmap_kring *kring = &na->tx_rings[txr->me];
990 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
991 BUS_DMASYNC_POSTREAD);
993 * In netmap mode, all the work is done in the context
994 * of the client thread. Interrupt handlers only wake up
995 * clients, which may be sleeping on individual rings
996 * or on a global resource for all rings.
997 * To implement tx interrupt mitigation, we wake up the client
998 * thread roughly every half ring, even if the NIC interrupts
999 * more frequently. This is implemented as follows:
1000 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1001 * the slot that should wake up the thread (nkr_num_slots
1002 * means the user thread should not be woken up);
1003 * - the driver ignores tx interrupts unless netmap_mitigate=0
1004 * or the slot has the DD bit set.
1006 if (!netmap_mitigate ||
1007 (kring->nr_kflags < kring->nkr_num_slots &&
1008 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1009 netmap_tx_irq(ifp, txr->me);
1013 #endif /* DEV_NETMAP */
1015 if (txr->tx_avail == txr->num_desc) {
1020 /* Get work starting point */
1021 work = txr->next_to_clean;
1022 buf = &txr->tx_buffers[work];
1023 txd = &txr->tx_base[work];
1024 work -= txr->num_desc; /* The distance to ring end */
1025 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1026 BUS_DMASYNC_POSTREAD);
1029 union ixgbe_adv_tx_desc *eop= buf->eop;
1030 if (eop == NULL) /* No work */
1033 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1034 break; /* I/O not complete */
1038 buf->m_head->m_pkthdr.len;
1039 bus_dmamap_sync(txr->txtag,
1041 BUS_DMASYNC_POSTWRITE);
1042 bus_dmamap_unload(txr->txtag,
1044 m_freem(buf->m_head);
1050 /* We clean the range if multi segment */
1051 while (txd != eop) {
1055 /* wrap the ring? */
1056 if (__predict_false(!work)) {
1057 work -= txr->num_desc;
1058 buf = txr->tx_buffers;
1063 buf->m_head->m_pkthdr.len;
1064 bus_dmamap_sync(txr->txtag,
1066 BUS_DMASYNC_POSTWRITE);
1067 bus_dmamap_unload(txr->txtag,
1069 m_freem(buf->m_head);
1079 /* Try the next packet */
1083 /* reset with a wrap */
1084 if (__predict_false(!work)) {
1085 work -= txr->num_desc;
1086 buf = txr->tx_buffers;
1090 } while (__predict_true(--limit));
1092 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1093 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1095 work += txr->num_desc;
1096 txr->next_to_clean = work;
1099 ** Queue Hang detection, we know there's
1100 ** work outstanding or the first return
1101 ** would have been taken, so increment busy
1102 ** if nothing managed to get cleaned, then
1103 ** in local_timer it will be checked and
1104 ** marked as HUNG if it exceeds a MAX attempt.
1106 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1109 ** If anything gets cleaned we reset state to 1,
1110 ** note this will turn off HUNG if its set.
1115 if (txr->tx_avail == txr->num_desc)
1124 ** This routine parses packet headers so that Flow
1125 ** Director can make a hashed filter table entry
1126 ** allowing traffic flows to be identified and kept
1127 ** on the same cpu. This would be a performance
1128 ** hit, but we only do it at IXGBE_FDIR_RATE of
1132 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1134 struct adapter *adapter = txr->adapter;
1135 struct ix_queue *que;
1139 struct ether_vlan_header *eh;
1140 union ixgbe_atr_hash_dword input = {.dword = 0};
1141 union ixgbe_atr_hash_dword common = {.dword = 0};
1142 int ehdrlen, ip_hlen;
1145 eh = mtod(mp, struct ether_vlan_header *);
1146 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1147 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1148 etype = eh->evl_proto;
1150 ehdrlen = ETHER_HDR_LEN;
1151 etype = eh->evl_encap_proto;
1154 /* Only handling IPv4 */
1155 if (etype != htons(ETHERTYPE_IP))
1158 ip = (struct ip *)(mp->m_data + ehdrlen);
1159 ip_hlen = ip->ip_hl << 2;
1161 /* check if we're UDP or TCP */
1164 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1165 /* src and dst are inverted */
1166 common.port.dst ^= th->th_sport;
1167 common.port.src ^= th->th_dport;
1168 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1171 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1172 /* src and dst are inverted */
1173 common.port.dst ^= uh->uh_sport;
1174 common.port.src ^= uh->uh_dport;
1175 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1181 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1182 if (mp->m_pkthdr.ether_vtag)
1183 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1185 common.flex_bytes ^= etype;
1186 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1188 que = &adapter->queues[txr->me];
1190 ** This assumes the Rx queue and Tx
1191 ** queue are bound to the same CPU
1193 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1194 input, common, que->msix);
1196 #endif /* IXGBE_FDIR */
1199 ** Used to detect a descriptor that has
1200 ** been merged by Hardware RSC.
1203 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1205 return (le32toh(rx->wb.lower.lo_dword.data) &
1206 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1209 /*********************************************************************
1211 * Initialize Hardware RSC (LRO) feature on 82599
1212 * for an RX ring, this is toggled by the LRO capability
1213 * even though it is transparent to the stack.
1215 * NOTE: since this HW feature only works with IPV4 and
1216 * our testing has shown soft LRO to be as effective
1217 * I have decided to disable this by default.
1219 **********************************************************************/
1221 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1223 struct adapter *adapter = rxr->adapter;
1224 struct ixgbe_hw *hw = &adapter->hw;
1225 u32 rscctrl, rdrxctl;
1227 /* If turning LRO/RSC off we need to disable it */
1228 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1229 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1230 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1234 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1235 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1236 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1237 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1238 #endif /* DEV_NETMAP */
1239 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1240 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1241 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1243 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1244 rscctrl |= IXGBE_RSCCTL_RSCEN;
1246 ** Limit the total number of descriptors that
1247 ** can be combined, so it does not exceed 64K
1249 if (rxr->mbuf_sz == MCLBYTES)
1250 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1251 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1252 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1253 else if (rxr->mbuf_sz == MJUM9BYTES)
1254 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1255 else /* Using 16K cluster */
1256 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1258 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1260 /* Enable TCP header recognition */
1261 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1262 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1263 IXGBE_PSRTYPE_TCPHDR));
1265 /* Disable RSC for ACK packets */
1266 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1267 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1271 /*********************************************************************
1273 * Refresh mbuf buffers for RX descriptor rings
1274 * - now keeps its own state so discards due to resource
1275 * exhaustion are unnecessary, if an mbuf cannot be obtained
1276 * it just returns, keeping its placeholder, thus it can simply
1277 * be recalled to try again.
1279 **********************************************************************/
1281 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1283 struct adapter *adapter = rxr->adapter;
1284 bus_dma_segment_t seg[1];
1285 struct ixgbe_rx_buf *rxbuf;
1287 int i, j, nsegs, error;
1288 bool refreshed = FALSE;
1290 i = j = rxr->next_to_refresh;
1291 /* Control the loop with one beyond */
1292 if (++j == rxr->num_desc)
1295 while (j != limit) {
1296 rxbuf = &rxr->rx_buffers[i];
1297 if (rxbuf->buf == NULL) {
1298 mp = m_getjcl(M_NOWAIT, MT_DATA,
1299 M_PKTHDR, rxr->mbuf_sz);
1302 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1303 m_adj(mp, ETHER_ALIGN);
1307 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1309 /* If we're dealing with an mbuf that was copied rather
1310 * than replaced, there's no need to go through busdma.
1312 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1313 /* Get the memory mapping */
1314 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1315 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1316 rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1318 printf("Refresh mbufs: payload dmamap load"
1319 " failure - %d\n", error);
1325 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1326 BUS_DMASYNC_PREREAD);
1327 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1328 htole64(seg[0].ds_addr);
1330 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1331 rxbuf->flags &= ~IXGBE_RX_COPY;
1335 /* Next is precalculated */
1337 rxr->next_to_refresh = i;
1338 if (++j == rxr->num_desc)
1342 if (refreshed) /* Update hardware tail index */
1343 IXGBE_WRITE_REG(&adapter->hw,
1344 rxr->tail, rxr->next_to_refresh);
1348 /*********************************************************************
1350 * Allocate memory for rx_buffer structures. Since we use one
1351 * rx_buffer per received packet, the maximum number of rx_buffer's
1352 * that we'll need is equal to the number of receive descriptors
1353 * that we've allocated.
1355 **********************************************************************/
1357 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1359 struct adapter *adapter = rxr->adapter;
1360 device_t dev = adapter->dev;
1361 struct ixgbe_rx_buf *rxbuf;
1362 int i, bsize, error;
1364 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1365 if (!(rxr->rx_buffers =
1366 (struct ixgbe_rx_buf *) malloc(bsize,
1367 M_DEVBUF, M_NOWAIT | M_ZERO))) {
1368 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1373 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
1374 1, 0, /* alignment, bounds */
1375 BUS_SPACE_MAXADDR, /* lowaddr */
1376 BUS_SPACE_MAXADDR, /* highaddr */
1377 NULL, NULL, /* filter, filterarg */
1378 MJUM16BYTES, /* maxsize */
1380 MJUM16BYTES, /* maxsegsize */
1382 NULL, /* lockfunc */
1383 NULL, /* lockfuncarg */
1385 device_printf(dev, "Unable to create RX DMA tag\n");
1389 for (i = 0; i < rxr->num_desc; i++, rxbuf++) {
1390 rxbuf = &rxr->rx_buffers[i];
1391 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1393 device_printf(dev, "Unable to create RX dma map\n");
1401 /* Frees all, but can handle partial completion */
1402 ixgbe_free_receive_structures(adapter);
1408 ixgbe_free_receive_ring(struct rx_ring *rxr)
1410 struct ixgbe_rx_buf *rxbuf;
1413 for (i = 0; i < rxr->num_desc; i++) {
1414 rxbuf = &rxr->rx_buffers[i];
1415 if (rxbuf->buf != NULL) {
1416 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1417 BUS_DMASYNC_POSTREAD);
1418 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1419 rxbuf->buf->m_flags |= M_PKTHDR;
1420 m_freem(rxbuf->buf);
1428 /*********************************************************************
1430 * Initialize a receive ring and its buffers.
1432 **********************************************************************/
1434 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1436 struct adapter *adapter;
1439 struct ixgbe_rx_buf *rxbuf;
1440 bus_dma_segment_t seg[1];
1441 struct lro_ctrl *lro = &rxr->lro;
1442 int rsize, nsegs, error = 0;
1444 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1445 struct netmap_slot *slot;
1446 #endif /* DEV_NETMAP */
1448 adapter = rxr->adapter;
1452 /* Clear the ring contents */
1455 /* same as in ixgbe_setup_transmit_ring() */
1456 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1457 #endif /* DEV_NETMAP */
1458 rsize = roundup2(adapter->num_rx_desc *
1459 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1460 bzero((void *)rxr->rx_base, rsize);
1461 /* Cache the size */
1462 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1464 /* Free current RX buffer structs and their mbufs */
1465 ixgbe_free_receive_ring(rxr);
1467 /* Now replenish the mbufs */
1468 for (int j = 0; j != rxr->num_desc; ++j) {
1471 rxbuf = &rxr->rx_buffers[j];
1474 * In netmap mode, fill the map and set the buffer
1475 * address in the NIC ring, considering the offset
1476 * between the netmap and NIC rings (see comment in
1477 * ixgbe_setup_transmit_ring() ). No need to allocate
1478 * an mbuf, so end the block with a continue;
1481 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1485 addr = PNMB(na, slot + sj, &paddr);
1486 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1487 /* Update descriptor and the cached value */
1488 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1489 rxbuf->addr = htole64(paddr);
1492 #endif /* DEV_NETMAP */
1494 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1495 M_PKTHDR, adapter->rx_mbuf_sz);
1496 if (rxbuf->buf == NULL) {
1501 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1502 /* Get the memory mapping */
1503 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1504 rxbuf->pmap, mp, seg,
1505 &nsegs, BUS_DMA_NOWAIT);
1508 bus_dmamap_sync(rxr->ptag,
1509 rxbuf->pmap, BUS_DMASYNC_PREREAD);
1510 /* Update the descriptor and the cached value */
1511 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1512 rxbuf->addr = htole64(seg[0].ds_addr);
1516 /* Setup our descriptor indices */
1517 rxr->next_to_check = 0;
1518 rxr->next_to_refresh = 0;
1519 rxr->lro_enabled = FALSE;
1522 rxr->vtag_strip = FALSE;
1524 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1525 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1528 ** Now set up the LRO interface:
1530 if (ixgbe_rsc_enable)
1531 ixgbe_setup_hw_rsc(rxr);
1532 else if (ifp->if_capenable & IFCAP_LRO) {
1533 int err = tcp_lro_init(lro);
1535 device_printf(dev, "LRO Initialization failed!\n");
1538 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1539 rxr->lro_enabled = TRUE;
1540 lro->ifp = adapter->ifp;
1543 IXGBE_RX_UNLOCK(rxr);
1547 ixgbe_free_receive_ring(rxr);
1548 IXGBE_RX_UNLOCK(rxr);
1552 /*********************************************************************
1554 * Initialize all receive rings.
1556 **********************************************************************/
1558 ixgbe_setup_receive_structures(struct adapter *adapter)
1560 struct rx_ring *rxr = adapter->rx_rings;
1563 for (j = 0; j < adapter->num_queues; j++, rxr++)
1564 if (ixgbe_setup_receive_ring(rxr))
1570 * Free RX buffers allocated so far, we will only handle
1571 * the rings that completed, the failing case will have
1572 * cleaned up for itself. 'j' failed, so its the terminus.
1574 for (int i = 0; i < j; ++i) {
1575 rxr = &adapter->rx_rings[i];
1576 ixgbe_free_receive_ring(rxr);
1583 /*********************************************************************
1585 * Free all receive rings.
1587 **********************************************************************/
1589 ixgbe_free_receive_structures(struct adapter *adapter)
1591 struct rx_ring *rxr = adapter->rx_rings;
1593 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1595 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1596 struct lro_ctrl *lro = &rxr->lro;
1597 ixgbe_free_receive_buffers(rxr);
1598 /* Free LRO memory */
1600 /* Free the ring memory as well */
1601 ixgbe_dma_free(adapter, &rxr->rxdma);
1604 free(adapter->rx_rings, M_DEVBUF);
1608 /*********************************************************************
1610 * Free receive ring data structures
1612 **********************************************************************/
1614 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1616 struct adapter *adapter = rxr->adapter;
1617 struct ixgbe_rx_buf *rxbuf;
1619 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1621 /* Cleanup any existing buffers */
1622 if (rxr->rx_buffers != NULL) {
1623 for (int i = 0; i < adapter->num_rx_desc; i++) {
1624 rxbuf = &rxr->rx_buffers[i];
1625 if (rxbuf->buf != NULL) {
1626 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1627 BUS_DMASYNC_POSTREAD);
1628 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1629 rxbuf->buf->m_flags |= M_PKTHDR;
1630 m_freem(rxbuf->buf);
1633 if (rxbuf->pmap != NULL) {
1634 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1638 if (rxr->rx_buffers != NULL) {
1639 free(rxr->rx_buffers, M_DEVBUF);
1640 rxr->rx_buffers = NULL;
1644 if (rxr->ptag != NULL) {
1645 bus_dma_tag_destroy(rxr->ptag);
1652 static __inline void
1653 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1657 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1658 * should be computed by hardware. Also it should not have VLAN tag in
1659 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1661 if (rxr->lro_enabled &&
1662 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1663 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1664 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1665 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1666 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1667 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1668 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1669 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1671 * Send to the stack if:
1672 ** - LRO not enabled, or
1673 ** - no LRO resources, or
1674 ** - lro enqueue fails
1676 if (rxr->lro.lro_cnt != 0)
1677 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1680 IXGBE_RX_UNLOCK(rxr);
1681 (*ifp->if_input)(ifp, m);
1685 static __inline void
1686 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1688 struct ixgbe_rx_buf *rbuf;
1690 rbuf = &rxr->rx_buffers[i];
1694 ** With advanced descriptors the writeback
1695 ** clobbers the buffer addrs, so its easier
1696 ** to just free the existing mbufs and take
1697 ** the normal refresh path to get new buffers
1701 if (rbuf->fmp != NULL) {/* Partial chain ? */
1702 rbuf->fmp->m_flags |= M_PKTHDR;
1705 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1706 } else if (rbuf->buf) {
1710 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1718 /*********************************************************************
1720 * This routine executes in interrupt context. It replenishes
1721 * the mbufs in the descriptor and sends data which has been
1722 * dma'ed into host memory to upper layer.
1724 * Return TRUE for more work, FALSE for all clean.
1725 *********************************************************************/
1727 ixgbe_rxeof(struct ix_queue *que)
1729 struct adapter *adapter = que->adapter;
1730 struct rx_ring *rxr = que->rxr;
1731 struct ifnet *ifp = adapter->ifp;
1732 struct lro_ctrl *lro = &rxr->lro;
1733 struct lro_entry *queued;
1734 int i, nextp, processed = 0;
1736 u16 count = rxr->process_limit;
1737 union ixgbe_adv_rx_desc *cur;
1738 struct ixgbe_rx_buf *rbuf, *nbuf;
1744 /* Same as the txeof routine: wakeup clients on intr. */
1745 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1746 IXGBE_RX_UNLOCK(rxr);
1749 #endif /* DEV_NETMAP */
1751 for (i = rxr->next_to_check; count != 0;) {
1752 struct mbuf *sendmp, *mp;
1758 /* Sync the ring. */
1759 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1760 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1762 cur = &rxr->rx_base[i];
1763 staterr = le32toh(cur->wb.upper.status_error);
1764 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1766 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1768 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1775 cur->wb.upper.status_error = 0;
1776 rbuf = &rxr->rx_buffers[i];
1779 len = le16toh(cur->wb.upper.length);
1780 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1781 IXGBE_RXDADV_PKTTYPE_MASK;
1782 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1784 /* Make sure bad packets are discarded */
1785 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1786 #if __FreeBSD_version >= 1100036
1787 if (IXGBE_IS_VF(adapter))
1788 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1790 rxr->rx_discarded++;
1791 ixgbe_rx_discard(rxr, i);
1796 ** On 82599 which supports a hardware
1797 ** LRO (called HW RSC), packets need
1798 ** not be fragmented across sequential
1799 ** descriptors, rather the next descriptor
1800 ** is indicated in bits of the descriptor.
1801 ** This also means that we might proceses
1802 ** more than one packet at a time, something
1803 ** that has never been true before, it
1804 ** required eliminating global chain pointers
1805 ** in favor of what we are doing here. -jfv
1809 ** Figure out the next descriptor
1812 if (rxr->hw_rsc == TRUE) {
1813 rsc = ixgbe_rsc_count(cur);
1814 rxr->rsc_num += (rsc - 1);
1816 if (rsc) { /* Get hardware index */
1818 IXGBE_RXDADV_NEXTP_MASK) >>
1819 IXGBE_RXDADV_NEXTP_SHIFT);
1820 } else { /* Just sequential */
1822 if (nextp == adapter->num_rx_desc)
1825 nbuf = &rxr->rx_buffers[nextp];
1829 ** Rather than using the fmp/lmp global pointers
1830 ** we now keep the head of a packet chain in the
1831 ** buffer struct and pass this along from one
1832 ** descriptor to the next, until we get EOP.
1836 ** See if there is a stored head
1837 ** that determines what we are
1840 if (sendmp != NULL) { /* secondary frag */
1841 rbuf->buf = rbuf->fmp = NULL;
1842 mp->m_flags &= ~M_PKTHDR;
1843 sendmp->m_pkthdr.len += mp->m_len;
1846 * Optimize. This might be a small packet,
1847 * maybe just a TCP ACK. Do a fast copy that
1848 * is cache aligned into a new mbuf, and
1849 * leave the old mbuf+cluster for re-use.
1851 if (eop && len <= IXGBE_RX_COPY_LEN) {
1852 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1853 if (sendmp != NULL) {
1855 IXGBE_RX_COPY_ALIGN;
1856 ixgbe_bcopy(mp->m_data,
1857 sendmp->m_data, len);
1858 sendmp->m_len = len;
1860 rbuf->flags |= IXGBE_RX_COPY;
1863 if (sendmp == NULL) {
1864 rbuf->buf = rbuf->fmp = NULL;
1868 /* first desc of a non-ps chain */
1869 sendmp->m_flags |= M_PKTHDR;
1870 sendmp->m_pkthdr.len = mp->m_len;
1874 /* Pass the head pointer on */
1878 mp->m_next = nbuf->buf;
1879 } else { /* Sending this frame */
1880 sendmp->m_pkthdr.rcvif = ifp;
1882 /* capture data for AIM */
1883 rxr->bytes += sendmp->m_pkthdr.len;
1884 rxr->rx_bytes += sendmp->m_pkthdr.len;
1885 /* Process vlan info */
1886 if ((rxr->vtag_strip) &&
1887 (staterr & IXGBE_RXD_STAT_VP))
1888 vtag = le16toh(cur->wb.upper.vlan);
1890 sendmp->m_pkthdr.ether_vtag = vtag;
1891 sendmp->m_flags |= M_VLANTAG;
1893 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1894 ixgbe_rx_checksum(staterr, sendmp, ptype);
1897 * In case of multiqueue, we have RXCSUM.PCSD bit set
1898 * and never cleared. This means we have RSS hash
1899 * available to be used.
1901 if (adapter->num_queues > 1) {
1902 sendmp->m_pkthdr.flowid =
1903 le32toh(cur->wb.lower.hi_dword.rss);
1905 * Full RSS support is not avilable in
1906 * FreeBSD 10 so setting the hash type to
1909 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1911 #if __FreeBSD_version >= 800000
1912 sendmp->m_pkthdr.flowid = que->msix;
1913 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1914 #endif /* FreeBSD_version */
1918 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1919 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1921 /* Advance our pointers to the next descriptor. */
1922 if (++i == rxr->num_desc)
1925 /* Now send to the stack or do LRO */
1926 if (sendmp != NULL) {
1927 rxr->next_to_check = i;
1928 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1929 i = rxr->next_to_check;
1932 /* Every 8 descriptors we go to refresh mbufs */
1933 if (processed == 8) {
1934 ixgbe_refresh_mbufs(rxr, i);
1939 /* Refresh any remaining buf structs */
1940 if (ixgbe_rx_unrefreshed(rxr))
1941 ixgbe_refresh_mbufs(rxr, i);
1943 rxr->next_to_check = i;
1946 * Flush any outstanding LRO work
1948 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1949 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1950 tcp_lro_flush(lro, queued);
1953 IXGBE_RX_UNLOCK(rxr);
1956 ** Still have cleaning to do?
1958 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1965 /*********************************************************************
1967 * Verify that the hardware indicated that the checksum is valid.
1968 * Inform the stack about the status of checksum so that stack
1969 * doesn't spend time verifying the checksum.
1971 *********************************************************************/
1973 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
1975 u16 status = (u16) staterr;
1976 u8 errors = (u8) (staterr >> 24);
1979 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1980 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
1983 if (status & IXGBE_RXD_STAT_IPCS) {
1984 if (!(errors & IXGBE_RXD_ERR_IPE)) {
1985 /* IP Checksum Good */
1986 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
1987 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
1990 mp->m_pkthdr.csum_flags = 0;
1992 if (status & IXGBE_RXD_STAT_L4CS) {
1993 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1994 #if __FreeBSD_version >= 800000
1996 type = CSUM_SCTP_VALID;
1998 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
1999 mp->m_pkthdr.csum_flags |= type;
2001 mp->m_pkthdr.csum_data = htons(0xffff);
2007 /********************************************************************
2008 * Manage DMA'able memory.
2009 *******************************************************************/
2011 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2015 *(bus_addr_t *) arg = segs->ds_addr;
2020 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2021 struct ixgbe_dma_alloc *dma, int mapflags)
2023 device_t dev = adapter->dev;
2026 r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2027 DBA_ALIGN, 0, /* alignment, bounds */
2028 BUS_SPACE_MAXADDR, /* lowaddr */
2029 BUS_SPACE_MAXADDR, /* highaddr */
2030 NULL, NULL, /* filter, filterarg */
2033 size, /* maxsegsize */
2034 BUS_DMA_ALLOCNOW, /* flags */
2035 NULL, /* lockfunc */
2036 NULL, /* lockfuncarg */
2039 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2043 r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2044 BUS_DMA_NOWAIT, &dma->dma_map);
2046 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2050 r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2054 mapflags | BUS_DMA_NOWAIT);
2056 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2060 dma->dma_size = size;
2063 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2065 bus_dma_tag_destroy(dma->dma_tag);
2067 dma->dma_tag = NULL;
2072 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2074 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2075 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2076 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2077 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2078 bus_dma_tag_destroy(dma->dma_tag);
2082 /*********************************************************************
2084 * Allocate memory for the transmit and receive rings, and then
2085 * the descriptors associated with each, called only once at attach.
2087 **********************************************************************/
2089 ixgbe_allocate_queues(struct adapter *adapter)
2091 device_t dev = adapter->dev;
2092 struct ix_queue *que;
2093 struct tx_ring *txr;
2094 struct rx_ring *rxr;
2095 int rsize, tsize, error = IXGBE_SUCCESS;
2096 int txconf = 0, rxconf = 0;
2098 /* First allocate the top level queue structs */
2099 if (!(adapter->queues =
2100 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2101 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2102 device_printf(dev, "Unable to allocate queue memory\n");
2107 /* First allocate the TX ring struct memory */
2108 if (!(adapter->tx_rings =
2109 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2110 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2111 device_printf(dev, "Unable to allocate TX ring memory\n");
2116 /* Next allocate the RX */
2117 if (!(adapter->rx_rings =
2118 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2119 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2120 device_printf(dev, "Unable to allocate RX ring memory\n");
2125 /* For the ring itself */
2126 tsize = roundup2(adapter->num_tx_desc *
2127 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2130 * Now set up the TX queues, txconf is needed to handle the
2131 * possibility that things fail midcourse and we need to
2132 * undo memory gracefully
2134 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2135 /* Set up some basics */
2136 txr = &adapter->tx_rings[i];
2137 txr->adapter = adapter;
2139 txr->num_desc = adapter->num_tx_desc;
2141 /* Initialize the TX side lock */
2142 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2143 device_get_nameunit(dev), txr->me);
2144 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2146 if (ixgbe_dma_malloc(adapter, tsize,
2147 &txr->txdma, BUS_DMA_NOWAIT)) {
2149 "Unable to allocate TX Descriptor memory\n");
2153 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2154 bzero((void *)txr->tx_base, tsize);
2156 /* Now allocate transmit buffers for the ring */
2157 if (ixgbe_allocate_transmit_buffers(txr)) {
2159 "Critical Failure setting up transmit buffers\n");
2163 #ifndef IXGBE_LEGACY_TX
2164 /* Allocate a buf ring */
2165 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2166 M_WAITOK, &txr->tx_mtx);
2167 if (txr->br == NULL) {
2169 "Critical Failure setting up buf ring\n");
2177 * Next the RX queues...
2179 rsize = roundup2(adapter->num_rx_desc *
2180 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2181 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2182 rxr = &adapter->rx_rings[i];
2183 /* Set up some basics */
2184 rxr->adapter = adapter;
2186 rxr->num_desc = adapter->num_rx_desc;
2188 /* Initialize the RX side lock */
2189 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2190 device_get_nameunit(dev), rxr->me);
2191 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2193 if (ixgbe_dma_malloc(adapter, rsize,
2194 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2196 "Unable to allocate RxDescriptor memory\n");
2200 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2201 bzero((void *)rxr->rx_base, rsize);
2203 /* Allocate receive buffers for the ring*/
2204 if (ixgbe_allocate_receive_buffers(rxr)) {
2206 "Critical Failure setting up receive buffers\n");
2213 ** Finally set up the queue holding structs
2215 for (int i = 0; i < adapter->num_queues; i++) {
2216 que = &adapter->queues[i];
2217 que->adapter = adapter;
2219 que->txr = &adapter->tx_rings[i];
2220 que->rxr = &adapter->rx_rings[i];
2226 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2227 ixgbe_dma_free(adapter, &rxr->rxdma);
2229 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2230 ixgbe_dma_free(adapter, &txr->txdma);
2231 free(adapter->rx_rings, M_DEVBUF);
2233 free(adapter->tx_rings, M_DEVBUF);
2235 free(adapter->queues, M_DEVBUF);