1 /******************************************************************************
3 Copyright (c) 2001-2015, Intel Corporation
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
32 ******************************************************************************/
36 #ifndef IXGBE_STANDALONE_BUILD
38 #include "opt_inet6.h"
45 #include <net/rss_config.h>
46 #include <netinet/in_rss.h>
50 #include <net/netmap.h>
51 #include <sys/selinfo.h>
52 #include <dev/netmap/netmap_kern.h>
54 extern int ix_crcstrip;
59 ** this feature only works with
60 ** IPv4, and only on 82599 and later.
61 ** Also this will cause IP forwarding to
62 ** fail and that can't be controlled by
63 ** the stack as LRO can. For all these
64 ** reasons I've deemed it best to leave
65 ** this off and not bother with a tuneable
66 ** interface, this would need to be compiled
69 static bool ixgbe_rsc_enable = FALSE;
73 ** For Flow Director: this is the
74 ** number of TX packets we sample
75 ** for the filter pool, this means
76 ** every 20th packet will be probed.
78 ** This feature can be disabled by
81 static int atr_sample_rate = 20;
84 /* Shared PCI config read/write */
86 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
90 value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
97 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
99 pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
105 /*********************************************************************
106 * Local Function prototypes
107 *********************************************************************/
108 static void ixgbe_setup_transmit_ring(struct tx_ring *);
109 static void ixgbe_free_transmit_buffers(struct tx_ring *);
110 static int ixgbe_setup_receive_ring(struct rx_ring *);
111 static void ixgbe_free_receive_buffers(struct rx_ring *);
113 static void ixgbe_rx_checksum(u32, struct mbuf *, u32);
114 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
115 static int ixgbe_xmit(struct tx_ring *, struct mbuf **);
116 static int ixgbe_tx_ctx_setup(struct tx_ring *,
117 struct mbuf *, u32 *, u32 *);
118 static int ixgbe_tso_setup(struct tx_ring *,
119 struct mbuf *, u32 *, u32 *);
121 static void ixgbe_atr(struct tx_ring *, struct mbuf *);
123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
127 #ifdef IXGBE_LEGACY_TX
128 /*********************************************************************
129 * Transmit entry point
131 * ixgbe_start is called by the stack to initiate a transmit.
132 * The driver will remain in this routine as long as there are
133 * packets to transmit and transmit resources are available.
134 * In case resources are not available stack is notified and
135 * the packet is requeued.
136 **********************************************************************/
139 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
142 struct adapter *adapter = txr->adapter;
144 IXGBE_TX_LOCK_ASSERT(txr);
146 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
148 if (!adapter->link_active)
151 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
152 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
155 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
159 if (ixgbe_xmit(txr, &m_head)) {
161 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
164 /* Send a copy of the frame to the BPF listener */
165 ETHER_BPF_MTAP(ifp, m_head);
171 * Legacy TX start - called by the stack, this
172 * always uses the first tx ring, and should
173 * not be used with multiqueue tx enabled.
176 ixgbe_start(struct ifnet *ifp)
178 struct adapter *adapter = ifp->if_softc;
179 struct tx_ring *txr = adapter->tx_rings;
181 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
183 ixgbe_start_locked(txr, ifp);
184 IXGBE_TX_UNLOCK(txr);
189 #else /* ! IXGBE_LEGACY_TX */
192 ** Multiqueue Transmit driver
196 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
198 struct adapter *adapter = ifp->if_softc;
199 struct ix_queue *que;
207 * When doing RSS, map it to the same outbound queue
208 * as the incoming flow would be mapped to.
210 * If everything is setup correctly, it should be the
211 * same bucket that the current CPU we're on is.
213 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
215 if (rss_hash2bucket(m->m_pkthdr.flowid,
216 M_HASHTYPE_GET(m), &bucket_id) == 0)
217 /* TODO: spit out something if bucket_id > num_queues? */
218 i = bucket_id % adapter->num_queues;
221 i = m->m_pkthdr.flowid % adapter->num_queues;
223 i = curcpu % adapter->num_queues;
225 /* Check for a hung queue and pick alternative */
226 if (((1 << i) & adapter->active_queues) == 0)
227 i = ffsl(adapter->active_queues);
229 txr = &adapter->tx_rings[i];
230 que = &adapter->queues[i];
232 err = drbr_enqueue(ifp, txr->br, m);
235 if (IXGBE_TX_TRYLOCK(txr)) {
236 ixgbe_mq_start_locked(ifp, txr);
237 IXGBE_TX_UNLOCK(txr);
239 taskqueue_enqueue(que->tq, &txr->txq_task);
245 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
247 struct adapter *adapter = txr->adapter;
249 int enqueued = 0, err = 0;
251 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
252 adapter->link_active == 0)
255 /* Process the queue */
256 #if __FreeBSD_version < 901504
257 next = drbr_dequeue(ifp, txr->br);
258 while (next != NULL) {
259 if ((err = ixgbe_xmit(txr, &next)) != 0) {
261 err = drbr_enqueue(ifp, txr->br, next);
263 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
264 if ((err = ixgbe_xmit(txr, &next)) != 0) {
266 drbr_advance(ifp, txr->br);
268 drbr_putback(ifp, txr->br, next);
273 #if __FreeBSD_version >= 901504
274 drbr_advance(ifp, txr->br);
277 #if 0 // this is VF-only
278 #if __FreeBSD_version >= 1100036
280 * Since we're looking at the tx ring, we can check
281 * to see if we're a VF by examing our tail register
284 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
285 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
288 /* Send a copy of the frame to the BPF listener */
289 ETHER_BPF_MTAP(ifp, next);
290 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
292 #if __FreeBSD_version < 901504
293 next = drbr_dequeue(ifp, txr->br);
297 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
304 * Called from a taskqueue to drain queued transmit packets.
307 ixgbe_deferred_mq_start(void *arg, int pending)
309 struct tx_ring *txr = arg;
310 struct adapter *adapter = txr->adapter;
311 struct ifnet *ifp = adapter->ifp;
314 if (!drbr_empty(ifp, txr->br))
315 ixgbe_mq_start_locked(ifp, txr);
316 IXGBE_TX_UNLOCK(txr);
320 * Flush all ring buffers
323 ixgbe_qflush(struct ifnet *ifp)
325 struct adapter *adapter = ifp->if_softc;
326 struct tx_ring *txr = adapter->tx_rings;
329 for (int i = 0; i < adapter->num_queues; i++, txr++) {
331 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
333 IXGBE_TX_UNLOCK(txr);
337 #endif /* IXGBE_LEGACY_TX */
340 /*********************************************************************
342 * This routine maps the mbufs to tx descriptors, allowing the
343 * TX engine to transmit the packets.
344 * - return 0 on success, positive on failure
346 **********************************************************************/
349 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
351 struct adapter *adapter = txr->adapter;
352 u32 olinfo_status = 0, cmd_type_len;
353 int i, j, error, nsegs;
357 bus_dma_segment_t segs[adapter->num_segs];
359 struct ixgbe_tx_buf *txbuf;
360 union ixgbe_adv_tx_desc *txd = NULL;
364 /* Basic descriptor defines */
365 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
366 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
368 if (m_head->m_flags & M_VLANTAG)
369 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
372 * Important to capture the first descriptor
373 * used because it will contain the index of
374 * the one we tell the hardware to report back
376 first = txr->next_avail_desc;
377 txbuf = &txr->tx_buffers[first];
381 * Map the packet for DMA.
384 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
385 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
387 if (__predict_false(error)) {
392 /* Try it again? - one try */
396 * XXX: m_defrag will choke on
397 * non-MCLBYTES-sized clusters
399 m = m_defrag(*m_headp, M_NOWAIT);
401 adapter->mbuf_defrag_failed++;
411 txr->no_tx_dma_setup++;
414 txr->no_tx_dma_setup++;
421 /* Make certain there are enough descriptors */
422 if (nsegs > txr->tx_avail - 2) {
423 txr->no_desc_avail++;
424 bus_dmamap_unload(txr->txtag, map);
430 * Set up the appropriate offload context
431 * this will consume the first descriptor
433 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
434 if (__predict_false(error)) {
435 if (error == ENOBUFS)
441 /* Do the flow director magic */
442 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
444 if (txr->atr_count >= atr_sample_rate) {
445 ixgbe_atr(txr, m_head);
451 i = txr->next_avail_desc;
452 for (j = 0; j < nsegs; j++) {
456 txbuf = &txr->tx_buffers[i];
457 txd = &txr->tx_base[i];
458 seglen = segs[j].ds_len;
459 segaddr = htole64(segs[j].ds_addr);
461 txd->read.buffer_addr = segaddr;
462 txd->read.cmd_type_len = htole32(txr->txd_cmd |
463 cmd_type_len |seglen);
464 txd->read.olinfo_status = htole32(olinfo_status);
466 if (++i == txr->num_desc)
470 txd->read.cmd_type_len |=
471 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
472 txr->tx_avail -= nsegs;
473 txr->next_avail_desc = i;
475 txbuf->m_head = m_head;
477 * Here we swap the map so the last descriptor,
478 * which gets the completion interrupt has the
479 * real map, and the first descriptor gets the
480 * unused map from this descriptor.
482 txr->tx_buffers[first].map = txbuf->map;
484 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
486 /* Set the EOP descriptor that will be marked done */
487 txbuf = &txr->tx_buffers[first];
490 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
491 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
493 * Advance the Transmit Descriptor Tail (Tdt), this tells the
494 * hardware that this frame is available to transmit.
496 ++txr->total_packets;
497 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
499 /* Mark queue as having work */
507 /*********************************************************************
509 * Allocate memory for tx_buffer structures. The tx_buffer stores all
510 * the information needed to transmit a packet on the wire. This is
511 * called only once at attach, setup is done every reset.
513 **********************************************************************/
515 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
517 struct adapter *adapter = txr->adapter;
518 device_t dev = adapter->dev;
519 struct ixgbe_tx_buf *txbuf;
523 * Setup DMA descriptor areas.
525 if ((error = bus_dma_tag_create(
526 bus_get_dma_tag(adapter->dev), /* parent */
527 1, 0, /* alignment, bounds */
528 BUS_SPACE_MAXADDR, /* lowaddr */
529 BUS_SPACE_MAXADDR, /* highaddr */
530 NULL, NULL, /* filter, filterarg */
531 IXGBE_TSO_SIZE, /* maxsize */
532 adapter->num_segs, /* nsegments */
533 PAGE_SIZE, /* maxsegsize */
536 NULL, /* lockfuncarg */
538 device_printf(dev,"Unable to allocate TX DMA tag\n");
542 if (!(txr->tx_buffers =
543 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
544 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
545 device_printf(dev, "Unable to allocate tx_buffer memory\n");
550 /* Create the descriptor buffer dma maps */
551 txbuf = txr->tx_buffers;
552 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
553 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
555 device_printf(dev, "Unable to create TX DMA map\n");
562 /* We free all, it handles case where we are in the middle */
563 ixgbe_free_transmit_structures(adapter);
567 /*********************************************************************
569 * Initialize a transmit ring.
571 **********************************************************************/
573 ixgbe_setup_transmit_ring(struct tx_ring *txr)
575 struct adapter *adapter = txr->adapter;
576 struct ixgbe_tx_buf *txbuf;
578 struct netmap_adapter *na = NA(adapter->ifp);
579 struct netmap_slot *slot;
580 #endif /* DEV_NETMAP */
582 /* Clear the old ring contents */
586 * (under lock): if in netmap mode, do some consistency
587 * checks and set slot to entry 0 of the netmap ring.
589 slot = netmap_reset(na, NR_TX, txr->me, 0);
590 #endif /* DEV_NETMAP */
591 bzero((void *)txr->tx_base,
592 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
594 txr->next_avail_desc = 0;
595 txr->next_to_clean = 0;
597 /* Free any existing tx buffers. */
598 txbuf = txr->tx_buffers;
599 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
600 if (txbuf->m_head != NULL) {
601 bus_dmamap_sync(txr->txtag, txbuf->map,
602 BUS_DMASYNC_POSTWRITE);
603 bus_dmamap_unload(txr->txtag, txbuf->map);
604 m_freem(txbuf->m_head);
605 txbuf->m_head = NULL;
609 * In netmap mode, set the map for the packet buffer.
610 * NOTE: Some drivers (not this one) also need to set
611 * the physical buffer address in the NIC ring.
612 * Slots in the netmap ring (indexed by "si") are
613 * kring->nkr_hwofs positions "ahead" wrt the
614 * corresponding slot in the NIC ring. In some drivers
615 * (not here) nkr_hwofs can be negative. Function
616 * netmap_idx_n2k() handles wraparounds properly.
619 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
620 netmap_load_map(na, txr->txtag,
621 txbuf->map, NMB(na, slot + si));
623 #endif /* DEV_NETMAP */
624 /* Clear the EOP descriptor pointer */
629 /* Set the rate at which we sample packets */
630 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
631 txr->atr_sample = atr_sample_rate;
634 /* Set number of descriptors available */
635 txr->tx_avail = adapter->num_tx_desc;
637 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
638 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
639 IXGBE_TX_UNLOCK(txr);
642 /*********************************************************************
644 * Initialize all transmit rings.
646 **********************************************************************/
648 ixgbe_setup_transmit_structures(struct adapter *adapter)
650 struct tx_ring *txr = adapter->tx_rings;
652 for (int i = 0; i < adapter->num_queues; i++, txr++)
653 ixgbe_setup_transmit_ring(txr);
658 /*********************************************************************
660 * Free all transmit rings.
662 **********************************************************************/
664 ixgbe_free_transmit_structures(struct adapter *adapter)
666 struct tx_ring *txr = adapter->tx_rings;
668 for (int i = 0; i < adapter->num_queues; i++, txr++) {
670 ixgbe_free_transmit_buffers(txr);
671 ixgbe_dma_free(adapter, &txr->txdma);
672 IXGBE_TX_UNLOCK(txr);
673 IXGBE_TX_LOCK_DESTROY(txr);
675 free(adapter->tx_rings, M_DEVBUF);
678 /*********************************************************************
680 * Free transmit ring related data structures.
682 **********************************************************************/
684 ixgbe_free_transmit_buffers(struct tx_ring *txr)
686 struct adapter *adapter = txr->adapter;
687 struct ixgbe_tx_buf *tx_buffer;
690 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
692 if (txr->tx_buffers == NULL)
695 tx_buffer = txr->tx_buffers;
696 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
697 if (tx_buffer->m_head != NULL) {
698 bus_dmamap_sync(txr->txtag, tx_buffer->map,
699 BUS_DMASYNC_POSTWRITE);
700 bus_dmamap_unload(txr->txtag,
702 m_freem(tx_buffer->m_head);
703 tx_buffer->m_head = NULL;
704 if (tx_buffer->map != NULL) {
705 bus_dmamap_destroy(txr->txtag,
707 tx_buffer->map = NULL;
709 } else if (tx_buffer->map != NULL) {
710 bus_dmamap_unload(txr->txtag,
712 bus_dmamap_destroy(txr->txtag,
714 tx_buffer->map = NULL;
717 #ifdef IXGBE_LEGACY_TX
719 buf_ring_free(txr->br, M_DEVBUF);
721 if (txr->tx_buffers != NULL) {
722 free(txr->tx_buffers, M_DEVBUF);
723 txr->tx_buffers = NULL;
725 if (txr->txtag != NULL) {
726 bus_dma_tag_destroy(txr->txtag);
732 /*********************************************************************
734 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
736 **********************************************************************/
739 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
740 u32 *cmd_type_len, u32 *olinfo_status)
742 struct adapter *adapter = txr->adapter;
743 struct ixgbe_adv_tx_context_desc *TXD;
744 struct ether_vlan_header *eh;
747 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
748 int ehdrlen, ip_hlen = 0;
752 int ctxd = txr->next_avail_desc;
755 /* First check if TSO is to be used */
756 if (mp->m_pkthdr.csum_flags & CSUM_TSO)
757 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
759 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
762 /* Indicate the whole packet as payload when not doing TSO */
763 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
765 /* Now ready a context descriptor */
766 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
769 ** In advanced descriptors the vlan tag must
770 ** be placed into the context descriptor. Hence
771 ** we need to make one even if not doing offloads.
773 if (mp->m_flags & M_VLANTAG) {
774 vtag = htole16(mp->m_pkthdr.ether_vtag);
775 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
776 } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
780 * Determine where frame payload starts.
781 * Jump over vlan headers if already present,
782 * helpful for QinQ too.
784 eh = mtod(mp, struct ether_vlan_header *);
785 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
786 etype = ntohs(eh->evl_proto);
787 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
789 etype = ntohs(eh->evl_encap_proto);
790 ehdrlen = ETHER_HDR_LEN;
793 /* Set the ether header length */
794 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
796 if (offload == FALSE)
801 ip = (struct ip *)(mp->m_data + ehdrlen);
802 ip_hlen = ip->ip_hl << 2;
804 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
807 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
808 ip_hlen = sizeof(struct ip6_hdr);
809 /* XXX-BZ this will go badly in case of ext hdrs. */
810 ipproto = ip6->ip6_nxt;
811 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
818 vlan_macip_lens |= ip_hlen;
822 if (mp->m_pkthdr.csum_flags & CSUM_TCP)
823 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
827 if (mp->m_pkthdr.csum_flags & CSUM_UDP)
828 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
831 #if __FreeBSD_version >= 800000
833 if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
834 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
842 if (offload) /* For the TX descriptor setup */
843 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
846 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
848 /* Now copy bits into descriptor */
849 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
850 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
851 TXD->seqnum_seed = htole32(0);
852 TXD->mss_l4len_idx = htole32(0);
854 /* We've consumed the first desc, adjust counters */
855 if (++ctxd == txr->num_desc)
857 txr->next_avail_desc = ctxd;
863 /**********************************************************************
865 * Setup work for hardware segmentation offload (TSO) on
866 * adapters using advanced tx descriptors
868 **********************************************************************/
870 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
871 u32 *cmd_type_len, u32 *olinfo_status)
873 struct ixgbe_adv_tx_context_desc *TXD;
874 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
875 u32 mss_l4len_idx = 0, paylen;
876 u16 vtag = 0, eh_type;
877 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
878 struct ether_vlan_header *eh;
889 * Determine where frame payload starts.
890 * Jump over vlan headers if already present
892 eh = mtod(mp, struct ether_vlan_header *);
893 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
894 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
895 eh_type = eh->evl_proto;
897 ehdrlen = ETHER_HDR_LEN;
898 eh_type = eh->evl_encap_proto;
901 switch (ntohs(eh_type)) {
904 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
905 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
906 if (ip6->ip6_nxt != IPPROTO_TCP)
908 ip_hlen = sizeof(struct ip6_hdr);
909 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
910 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
911 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
912 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
917 ip = (struct ip *)(mp->m_data + ehdrlen);
918 if (ip->ip_p != IPPROTO_TCP)
921 ip_hlen = ip->ip_hl << 2;
922 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
923 th->th_sum = in_pseudo(ip->ip_src.s_addr,
924 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
925 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
926 /* Tell transmit desc to also do IPv4 checksum. */
927 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
931 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
932 __func__, ntohs(eh_type));
936 ctxd = txr->next_avail_desc;
937 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
939 tcp_hlen = th->th_off << 2;
941 /* This is used in the transmit desc in encap */
942 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
944 /* VLAN MACLEN IPLEN */
945 if (mp->m_flags & M_VLANTAG) {
946 vtag = htole16(mp->m_pkthdr.ether_vtag);
947 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
950 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
951 vlan_macip_lens |= ip_hlen;
952 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
954 /* ADV DTYPE TUCMD */
955 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
956 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
957 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
960 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
961 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
962 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
964 TXD->seqnum_seed = htole32(0);
966 if (++ctxd == txr->num_desc)
970 txr->next_avail_desc = ctxd;
971 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
972 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
973 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
979 /**********************************************************************
981 * Examine each tx_buffer in the used queue. If the hardware is done
982 * processing the packet then free associated resources. The
983 * tx_buffer is put back on the free queue.
985 **********************************************************************/
987 ixgbe_txeof(struct tx_ring *txr)
989 struct adapter *adapter = txr->adapter;
991 struct ifnet *ifp = adapter->ifp;
993 u32 work, processed = 0;
994 u32 limit = adapter->tx_process_limit;
995 struct ixgbe_tx_buf *buf;
996 union ixgbe_adv_tx_desc *txd;
998 mtx_assert(&txr->tx_mtx, MA_OWNED);
1001 if (ifp->if_capenable & IFCAP_NETMAP) {
1002 struct netmap_adapter *na = NA(ifp);
1003 struct netmap_kring *kring = &na->tx_rings[txr->me];
1005 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1006 BUS_DMASYNC_POSTREAD);
1008 * In netmap mode, all the work is done in the context
1009 * of the client thread. Interrupt handlers only wake up
1010 * clients, which may be sleeping on individual rings
1011 * or on a global resource for all rings.
1012 * To implement tx interrupt mitigation, we wake up the client
1013 * thread roughly every half ring, even if the NIC interrupts
1014 * more frequently. This is implemented as follows:
1015 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1016 * the slot that should wake up the thread (nkr_num_slots
1017 * means the user thread should not be woken up);
1018 * - the driver ignores tx interrupts unless netmap_mitigate=0
1019 * or the slot has the DD bit set.
1021 if (!netmap_mitigate ||
1022 (kring->nr_kflags < kring->nkr_num_slots &&
1023 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1024 netmap_tx_irq(ifp, txr->me);
1028 #endif /* DEV_NETMAP */
1030 if (txr->tx_avail == txr->num_desc) {
1035 /* Get work starting point */
1036 work = txr->next_to_clean;
1037 buf = &txr->tx_buffers[work];
1038 txd = &txr->tx_base[work];
1039 work -= txr->num_desc; /* The distance to ring end */
1040 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1041 BUS_DMASYNC_POSTREAD);
1044 union ixgbe_adv_tx_desc *eop= buf->eop;
1045 if (eop == NULL) /* No work */
1048 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1049 break; /* I/O not complete */
1053 buf->m_head->m_pkthdr.len;
1054 bus_dmamap_sync(txr->txtag,
1056 BUS_DMASYNC_POSTWRITE);
1057 bus_dmamap_unload(txr->txtag,
1059 m_freem(buf->m_head);
1065 /* We clean the range if multi segment */
1066 while (txd != eop) {
1070 /* wrap the ring? */
1071 if (__predict_false(!work)) {
1072 work -= txr->num_desc;
1073 buf = txr->tx_buffers;
1078 buf->m_head->m_pkthdr.len;
1079 bus_dmamap_sync(txr->txtag,
1081 BUS_DMASYNC_POSTWRITE);
1082 bus_dmamap_unload(txr->txtag,
1084 m_freem(buf->m_head);
1094 /* Try the next packet */
1098 /* reset with a wrap */
1099 if (__predict_false(!work)) {
1100 work -= txr->num_desc;
1101 buf = txr->tx_buffers;
1105 } while (__predict_true(--limit));
1107 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1108 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1110 work += txr->num_desc;
1111 txr->next_to_clean = work;
1114 ** Queue Hang detection, we know there's
1115 ** work outstanding or the first return
1116 ** would have been taken, so increment busy
1117 ** if nothing managed to get cleaned, then
1118 ** in local_timer it will be checked and
1119 ** marked as HUNG if it exceeds a MAX attempt.
1121 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1124 ** If anything gets cleaned we reset state to 1,
1125 ** note this will turn off HUNG if its set.
1130 if (txr->tx_avail == txr->num_desc)
1139 ** This routine parses packet headers so that Flow
1140 ** Director can make a hashed filter table entry
1141 ** allowing traffic flows to be identified and kept
1142 ** on the same cpu. This would be a performance
1143 ** hit, but we only do it at IXGBE_FDIR_RATE of
1147 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1149 struct adapter *adapter = txr->adapter;
1150 struct ix_queue *que;
1154 struct ether_vlan_header *eh;
1155 union ixgbe_atr_hash_dword input = {.dword = 0};
1156 union ixgbe_atr_hash_dword common = {.dword = 0};
1157 int ehdrlen, ip_hlen;
1160 eh = mtod(mp, struct ether_vlan_header *);
1161 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1162 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1163 etype = eh->evl_proto;
1165 ehdrlen = ETHER_HDR_LEN;
1166 etype = eh->evl_encap_proto;
1169 /* Only handling IPv4 */
1170 if (etype != htons(ETHERTYPE_IP))
1173 ip = (struct ip *)(mp->m_data + ehdrlen);
1174 ip_hlen = ip->ip_hl << 2;
1176 /* check if we're UDP or TCP */
1179 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1180 /* src and dst are inverted */
1181 common.port.dst ^= th->th_sport;
1182 common.port.src ^= th->th_dport;
1183 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1186 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1187 /* src and dst are inverted */
1188 common.port.dst ^= uh->uh_sport;
1189 common.port.src ^= uh->uh_dport;
1190 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1196 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1197 if (mp->m_pkthdr.ether_vtag)
1198 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1200 common.flex_bytes ^= etype;
1201 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1203 que = &adapter->queues[txr->me];
1205 ** This assumes the Rx queue and Tx
1206 ** queue are bound to the same CPU
1208 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1209 input, common, que->msix);
1211 #endif /* IXGBE_FDIR */
1214 ** Used to detect a descriptor that has
1215 ** been merged by Hardware RSC.
1218 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1220 return (le32toh(rx->wb.lower.lo_dword.data) &
1221 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1224 /*********************************************************************
1226 * Initialize Hardware RSC (LRO) feature on 82599
1227 * for an RX ring, this is toggled by the LRO capability
1228 * even though it is transparent to the stack.
1230 * NOTE: since this HW feature only works with IPV4 and
1231 * our testing has shown soft LRO to be as effective
1232 * I have decided to disable this by default.
1234 **********************************************************************/
1236 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1238 struct adapter *adapter = rxr->adapter;
1239 struct ixgbe_hw *hw = &adapter->hw;
1240 u32 rscctrl, rdrxctl;
1242 /* If turning LRO/RSC off we need to disable it */
1243 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1244 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1245 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1249 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1250 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1251 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1252 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1253 #endif /* DEV_NETMAP */
1254 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1255 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1256 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1258 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1259 rscctrl |= IXGBE_RSCCTL_RSCEN;
1261 ** Limit the total number of descriptors that
1262 ** can be combined, so it does not exceed 64K
1264 if (rxr->mbuf_sz == MCLBYTES)
1265 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1266 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1267 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1268 else if (rxr->mbuf_sz == MJUM9BYTES)
1269 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1270 else /* Using 16K cluster */
1271 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1273 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1275 /* Enable TCP header recognition */
1276 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1277 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1278 IXGBE_PSRTYPE_TCPHDR));
1280 /* Disable RSC for ACK packets */
1281 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1282 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1286 /*********************************************************************
1288 * Refresh mbuf buffers for RX descriptor rings
1289 * - now keeps its own state so discards due to resource
1290 * exhaustion are unnecessary, if an mbuf cannot be obtained
1291 * it just returns, keeping its placeholder, thus it can simply
1292 * be recalled to try again.
1294 **********************************************************************/
1296 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1298 struct adapter *adapter = rxr->adapter;
1299 bus_dma_segment_t seg[1];
1300 struct ixgbe_rx_buf *rxbuf;
1302 int i, j, nsegs, error;
1303 bool refreshed = FALSE;
1305 i = j = rxr->next_to_refresh;
1306 /* Control the loop with one beyond */
1307 if (++j == rxr->num_desc)
1310 while (j != limit) {
1311 rxbuf = &rxr->rx_buffers[i];
1312 if (rxbuf->buf == NULL) {
1313 mp = m_getjcl(M_NOWAIT, MT_DATA,
1314 M_PKTHDR, rxr->mbuf_sz);
1317 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1318 m_adj(mp, ETHER_ALIGN);
1322 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1324 /* If we're dealing with an mbuf that was copied rather
1325 * than replaced, there's no need to go through busdma.
1327 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1328 /* Get the memory mapping */
1329 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1330 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1331 rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1333 printf("Refresh mbufs: payload dmamap load"
1334 " failure - %d\n", error);
1340 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1341 BUS_DMASYNC_PREREAD);
1342 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1343 htole64(seg[0].ds_addr);
1345 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1346 rxbuf->flags &= ~IXGBE_RX_COPY;
1350 /* Next is precalculated */
1352 rxr->next_to_refresh = i;
1353 if (++j == rxr->num_desc)
1357 if (refreshed) /* Update hardware tail index */
1358 IXGBE_WRITE_REG(&adapter->hw,
1359 rxr->tail, rxr->next_to_refresh);
1363 /*********************************************************************
1365 * Allocate memory for rx_buffer structures. Since we use one
1366 * rx_buffer per received packet, the maximum number of rx_buffer's
1367 * that we'll need is equal to the number of receive descriptors
1368 * that we've allocated.
1370 **********************************************************************/
1372 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1374 struct adapter *adapter = rxr->adapter;
1375 device_t dev = adapter->dev;
1376 struct ixgbe_rx_buf *rxbuf;
1379 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1380 if (!(rxr->rx_buffers =
1381 (struct ixgbe_rx_buf *) malloc(bsize,
1382 M_DEVBUF, M_NOWAIT | M_ZERO))) {
1383 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1388 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
1389 1, 0, /* alignment, bounds */
1390 BUS_SPACE_MAXADDR, /* lowaddr */
1391 BUS_SPACE_MAXADDR, /* highaddr */
1392 NULL, NULL, /* filter, filterarg */
1393 MJUM16BYTES, /* maxsize */
1395 MJUM16BYTES, /* maxsegsize */
1397 NULL, /* lockfunc */
1398 NULL, /* lockfuncarg */
1400 device_printf(dev, "Unable to create RX DMA tag\n");
1404 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1405 rxbuf = &rxr->rx_buffers[i];
1406 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1408 device_printf(dev, "Unable to create RX dma map\n");
1416 /* Frees all, but can handle partial completion */
1417 ixgbe_free_receive_structures(adapter);
1423 ixgbe_free_receive_ring(struct rx_ring *rxr)
1425 struct ixgbe_rx_buf *rxbuf;
1427 for (int i = 0; i < rxr->num_desc; i++) {
1428 rxbuf = &rxr->rx_buffers[i];
1429 if (rxbuf->buf != NULL) {
1430 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1431 BUS_DMASYNC_POSTREAD);
1432 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1433 rxbuf->buf->m_flags |= M_PKTHDR;
1434 m_freem(rxbuf->buf);
1442 /*********************************************************************
1444 * Initialize a receive ring and its buffers.
1446 **********************************************************************/
1448 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1450 struct adapter *adapter;
1453 struct ixgbe_rx_buf *rxbuf;
1454 bus_dma_segment_t seg[1];
1455 struct lro_ctrl *lro = &rxr->lro;
1456 int rsize, nsegs, error = 0;
1458 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1459 struct netmap_slot *slot;
1460 #endif /* DEV_NETMAP */
1462 adapter = rxr->adapter;
1466 /* Clear the ring contents */
1469 /* same as in ixgbe_setup_transmit_ring() */
1470 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1471 #endif /* DEV_NETMAP */
1472 rsize = roundup2(adapter->num_rx_desc *
1473 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1474 bzero((void *)rxr->rx_base, rsize);
1475 /* Cache the size */
1476 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1478 /* Free current RX buffer structs and their mbufs */
1479 ixgbe_free_receive_ring(rxr);
1481 /* Now replenish the mbufs */
1482 for (int j = 0; j != rxr->num_desc; ++j) {
1485 rxbuf = &rxr->rx_buffers[j];
1488 * In netmap mode, fill the map and set the buffer
1489 * address in the NIC ring, considering the offset
1490 * between the netmap and NIC rings (see comment in
1491 * ixgbe_setup_transmit_ring() ). No need to allocate
1492 * an mbuf, so end the block with a continue;
1495 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1499 addr = PNMB(na, slot + sj, &paddr);
1500 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1501 /* Update descriptor and the cached value */
1502 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1503 rxbuf->addr = htole64(paddr);
1506 #endif /* DEV_NETMAP */
1508 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1509 M_PKTHDR, adapter->rx_mbuf_sz);
1510 if (rxbuf->buf == NULL) {
1515 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1516 /* Get the memory mapping */
1517 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1518 rxbuf->pmap, mp, seg,
1519 &nsegs, BUS_DMA_NOWAIT);
1522 bus_dmamap_sync(rxr->ptag,
1523 rxbuf->pmap, BUS_DMASYNC_PREREAD);
1524 /* Update the descriptor and the cached value */
1525 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1526 rxbuf->addr = htole64(seg[0].ds_addr);
1530 /* Setup our descriptor indices */
1531 rxr->next_to_check = 0;
1532 rxr->next_to_refresh = 0;
1533 rxr->lro_enabled = FALSE;
1536 rxr->vtag_strip = FALSE;
1538 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1539 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1542 ** Now set up the LRO interface:
1544 if (ixgbe_rsc_enable)
1545 ixgbe_setup_hw_rsc(rxr);
1546 else if (ifp->if_capenable & IFCAP_LRO) {
1547 int err = tcp_lro_init(lro);
1549 device_printf(dev, "LRO Initialization failed!\n");
1552 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1553 rxr->lro_enabled = TRUE;
1554 lro->ifp = adapter->ifp;
1557 IXGBE_RX_UNLOCK(rxr);
1561 ixgbe_free_receive_ring(rxr);
1562 IXGBE_RX_UNLOCK(rxr);
1566 /*********************************************************************
1568 * Initialize all receive rings.
1570 **********************************************************************/
1572 ixgbe_setup_receive_structures(struct adapter *adapter)
1574 struct rx_ring *rxr = adapter->rx_rings;
1577 for (j = 0; j < adapter->num_queues; j++, rxr++)
1578 if (ixgbe_setup_receive_ring(rxr))
1584 * Free RX buffers allocated so far, we will only handle
1585 * the rings that completed, the failing case will have
1586 * cleaned up for itself. 'j' failed, so its the terminus.
1588 for (int i = 0; i < j; ++i) {
1589 rxr = &adapter->rx_rings[i];
1590 ixgbe_free_receive_ring(rxr);
1597 /*********************************************************************
1599 * Free all receive rings.
1601 **********************************************************************/
1603 ixgbe_free_receive_structures(struct adapter *adapter)
1605 struct rx_ring *rxr = adapter->rx_rings;
1607 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1609 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1610 struct lro_ctrl *lro = &rxr->lro;
1611 ixgbe_free_receive_buffers(rxr);
1612 /* Free LRO memory */
1614 /* Free the ring memory as well */
1615 ixgbe_dma_free(adapter, &rxr->rxdma);
1618 free(adapter->rx_rings, M_DEVBUF);
1622 /*********************************************************************
1624 * Free receive ring data structures
1626 **********************************************************************/
1628 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1630 struct adapter *adapter = rxr->adapter;
1631 struct ixgbe_rx_buf *rxbuf;
1633 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1635 /* Cleanup any existing buffers */
1636 if (rxr->rx_buffers != NULL) {
1637 for (int i = 0; i < adapter->num_rx_desc; i++) {
1638 rxbuf = &rxr->rx_buffers[i];
1639 if (rxbuf->buf != NULL) {
1640 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1641 BUS_DMASYNC_POSTREAD);
1642 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1643 rxbuf->buf->m_flags |= M_PKTHDR;
1644 m_freem(rxbuf->buf);
1647 if (rxbuf->pmap != NULL) {
1648 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1652 if (rxr->rx_buffers != NULL) {
1653 free(rxr->rx_buffers, M_DEVBUF);
1654 rxr->rx_buffers = NULL;
1658 if (rxr->ptag != NULL) {
1659 bus_dma_tag_destroy(rxr->ptag);
1666 static __inline void
1667 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1671 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1672 * should be computed by hardware. Also it should not have VLAN tag in
1673 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1675 if (rxr->lro_enabled &&
1676 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1677 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1678 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1679 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1680 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1681 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1682 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1683 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1685 * Send to the stack if:
1686 ** - LRO not enabled, or
1687 ** - no LRO resources, or
1688 ** - lro enqueue fails
1690 if (rxr->lro.lro_cnt != 0)
1691 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1694 IXGBE_RX_UNLOCK(rxr);
1695 (*ifp->if_input)(ifp, m);
1699 static __inline void
1700 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1702 struct ixgbe_rx_buf *rbuf;
1704 rbuf = &rxr->rx_buffers[i];
1708 ** With advanced descriptors the writeback
1709 ** clobbers the buffer addrs, so its easier
1710 ** to just free the existing mbufs and take
1711 ** the normal refresh path to get new buffers
1715 if (rbuf->fmp != NULL) {/* Partial chain ? */
1716 rbuf->fmp->m_flags |= M_PKTHDR;
1719 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1720 } else if (rbuf->buf) {
1724 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1732 /*********************************************************************
1734 * This routine executes in interrupt context. It replenishes
1735 * the mbufs in the descriptor and sends data which has been
1736 * dma'ed into host memory to upper layer.
1738 * Return TRUE for more work, FALSE for all clean.
1739 *********************************************************************/
1741 ixgbe_rxeof(struct ix_queue *que)
1743 struct adapter *adapter = que->adapter;
1744 struct rx_ring *rxr = que->rxr;
1745 struct ifnet *ifp = adapter->ifp;
1746 struct lro_ctrl *lro = &rxr->lro;
1747 struct lro_entry *queued;
1748 int i, nextp, processed = 0;
1750 u32 count = adapter->rx_process_limit;
1751 union ixgbe_adv_rx_desc *cur;
1752 struct ixgbe_rx_buf *rbuf, *nbuf;
1758 /* Same as the txeof routine: wakeup clients on intr. */
1759 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1760 IXGBE_RX_UNLOCK(rxr);
1763 #endif /* DEV_NETMAP */
1765 for (i = rxr->next_to_check; count != 0;) {
1766 struct mbuf *sendmp, *mp;
1772 /* Sync the ring. */
1773 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1774 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1776 cur = &rxr->rx_base[i];
1777 staterr = le32toh(cur->wb.upper.status_error);
1778 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1780 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1782 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1789 cur->wb.upper.status_error = 0;
1790 rbuf = &rxr->rx_buffers[i];
1793 len = le16toh(cur->wb.upper.length);
1794 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1795 IXGBE_RXDADV_PKTTYPE_MASK;
1796 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1798 /* Make sure bad packets are discarded */
1799 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1800 #if __FreeBSD_version >= 1100036
1801 if (IXGBE_IS_VF(adapter))
1802 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1804 rxr->rx_discarded++;
1805 ixgbe_rx_discard(rxr, i);
1810 ** On 82599 which supports a hardware
1811 ** LRO (called HW RSC), packets need
1812 ** not be fragmented across sequential
1813 ** descriptors, rather the next descriptor
1814 ** is indicated in bits of the descriptor.
1815 ** This also means that we might proceses
1816 ** more than one packet at a time, something
1817 ** that has never been true before, it
1818 ** required eliminating global chain pointers
1819 ** in favor of what we are doing here. -jfv
1823 ** Figure out the next descriptor
1826 if (rxr->hw_rsc == TRUE) {
1827 rsc = ixgbe_rsc_count(cur);
1828 rxr->rsc_num += (rsc - 1);
1830 if (rsc) { /* Get hardware index */
1832 IXGBE_RXDADV_NEXTP_MASK) >>
1833 IXGBE_RXDADV_NEXTP_SHIFT);
1834 } else { /* Just sequential */
1836 if (nextp == adapter->num_rx_desc)
1839 nbuf = &rxr->rx_buffers[nextp];
1843 ** Rather than using the fmp/lmp global pointers
1844 ** we now keep the head of a packet chain in the
1845 ** buffer struct and pass this along from one
1846 ** descriptor to the next, until we get EOP.
1850 ** See if there is a stored head
1851 ** that determines what we are
1854 if (sendmp != NULL) { /* secondary frag */
1855 rbuf->buf = rbuf->fmp = NULL;
1856 mp->m_flags &= ~M_PKTHDR;
1857 sendmp->m_pkthdr.len += mp->m_len;
1860 * Optimize. This might be a small packet,
1861 * maybe just a TCP ACK. Do a fast copy that
1862 * is cache aligned into a new mbuf, and
1863 * leave the old mbuf+cluster for re-use.
1865 if (eop && len <= IXGBE_RX_COPY_LEN) {
1866 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1867 if (sendmp != NULL) {
1869 IXGBE_RX_COPY_ALIGN;
1870 ixgbe_bcopy(mp->m_data,
1871 sendmp->m_data, len);
1872 sendmp->m_len = len;
1874 rbuf->flags |= IXGBE_RX_COPY;
1877 if (sendmp == NULL) {
1878 rbuf->buf = rbuf->fmp = NULL;
1882 /* first desc of a non-ps chain */
1883 sendmp->m_flags |= M_PKTHDR;
1884 sendmp->m_pkthdr.len = mp->m_len;
1888 /* Pass the head pointer on */
1892 mp->m_next = nbuf->buf;
1893 } else { /* Sending this frame */
1894 sendmp->m_pkthdr.rcvif = ifp;
1896 /* capture data for AIM */
1897 rxr->bytes += sendmp->m_pkthdr.len;
1898 rxr->rx_bytes += sendmp->m_pkthdr.len;
1899 /* Process vlan info */
1900 if ((rxr->vtag_strip) &&
1901 (staterr & IXGBE_RXD_STAT_VP))
1902 vtag = le16toh(cur->wb.upper.vlan);
1904 sendmp->m_pkthdr.ether_vtag = vtag;
1905 sendmp->m_flags |= M_VLANTAG;
1907 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1908 ixgbe_rx_checksum(staterr, sendmp, ptype);
1911 * In case of multiqueue, we have RXCSUM.PCSD bit set
1912 * and never cleared. This means we have RSS hash
1913 * available to be used.
1915 if (adapter->num_queues > 1) {
1916 sendmp->m_pkthdr.flowid =
1917 le32toh(cur->wb.lower.hi_dword.rss);
1918 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1919 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1920 M_HASHTYPE_SET(sendmp,
1921 M_HASHTYPE_RSS_TCP_IPV4);
1923 case IXGBE_RXDADV_RSSTYPE_IPV4:
1924 M_HASHTYPE_SET(sendmp,
1925 M_HASHTYPE_RSS_IPV4);
1927 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1928 M_HASHTYPE_SET(sendmp,
1929 M_HASHTYPE_RSS_TCP_IPV6);
1931 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1932 M_HASHTYPE_SET(sendmp,
1933 M_HASHTYPE_RSS_IPV6_EX);
1935 case IXGBE_RXDADV_RSSTYPE_IPV6:
1936 M_HASHTYPE_SET(sendmp,
1937 M_HASHTYPE_RSS_IPV6);
1939 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1940 M_HASHTYPE_SET(sendmp,
1941 M_HASHTYPE_RSS_TCP_IPV6_EX);
1943 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1944 M_HASHTYPE_SET(sendmp,
1945 M_HASHTYPE_RSS_UDP_IPV4);
1947 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1948 M_HASHTYPE_SET(sendmp,
1949 M_HASHTYPE_RSS_UDP_IPV6);
1951 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1952 M_HASHTYPE_SET(sendmp,
1953 M_HASHTYPE_RSS_UDP_IPV6_EX);
1956 M_HASHTYPE_SET(sendmp,
1960 sendmp->m_pkthdr.flowid = que->msix;
1961 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1965 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1966 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1968 /* Advance our pointers to the next descriptor. */
1969 if (++i == rxr->num_desc)
1972 /* Now send to the stack or do LRO */
1973 if (sendmp != NULL) {
1974 rxr->next_to_check = i;
1975 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1976 i = rxr->next_to_check;
1979 /* Every 8 descriptors we go to refresh mbufs */
1980 if (processed == 8) {
1981 ixgbe_refresh_mbufs(rxr, i);
1986 /* Refresh any remaining buf structs */
1987 if (ixgbe_rx_unrefreshed(rxr))
1988 ixgbe_refresh_mbufs(rxr, i);
1990 rxr->next_to_check = i;
1993 * Flush any outstanding LRO work
1995 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1996 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1997 tcp_lro_flush(lro, queued);
2000 IXGBE_RX_UNLOCK(rxr);
2003 ** Still have cleaning to do?
2005 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2012 /*********************************************************************
2014 * Verify that the hardware indicated that the checksum is valid.
2015 * Inform the stack about the status of checksum so that stack
2016 * doesn't spend time verifying the checksum.
2018 *********************************************************************/
2020 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
2022 u16 status = (u16) staterr;
2023 u8 errors = (u8) (staterr >> 24);
2026 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2027 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2030 if (status & IXGBE_RXD_STAT_IPCS) {
2031 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2032 /* IP Checksum Good */
2033 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
2034 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2037 mp->m_pkthdr.csum_flags = 0;
2039 if (status & IXGBE_RXD_STAT_L4CS) {
2040 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2041 #if __FreeBSD_version >= 800000
2043 type = CSUM_SCTP_VALID;
2045 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2046 mp->m_pkthdr.csum_flags |= type;
2048 mp->m_pkthdr.csum_data = htons(0xffff);
2054 /********************************************************************
2055 * Manage DMA'able memory.
2056 *******************************************************************/
2058 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2062 *(bus_addr_t *) arg = segs->ds_addr;
2067 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2068 struct ixgbe_dma_alloc *dma, int mapflags)
2070 device_t dev = adapter->dev;
2073 r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2074 DBA_ALIGN, 0, /* alignment, bounds */
2075 BUS_SPACE_MAXADDR, /* lowaddr */
2076 BUS_SPACE_MAXADDR, /* highaddr */
2077 NULL, NULL, /* filter, filterarg */
2080 size, /* maxsegsize */
2081 BUS_DMA_ALLOCNOW, /* flags */
2082 NULL, /* lockfunc */
2083 NULL, /* lockfuncarg */
2086 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2090 r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2091 BUS_DMA_NOWAIT, &dma->dma_map);
2093 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2097 r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2101 mapflags | BUS_DMA_NOWAIT);
2103 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2107 dma->dma_size = size;
2110 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2112 bus_dma_tag_destroy(dma->dma_tag);
2114 dma->dma_tag = NULL;
2119 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2121 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2122 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2123 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2124 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2125 bus_dma_tag_destroy(dma->dma_tag);
2129 /*********************************************************************
2131 * Allocate memory for the transmit and receive rings, and then
2132 * the descriptors associated with each, called only once at attach.
2134 **********************************************************************/
2136 ixgbe_allocate_queues(struct adapter *adapter)
2138 device_t dev = adapter->dev;
2139 struct ix_queue *que;
2140 struct tx_ring *txr;
2141 struct rx_ring *rxr;
2142 int rsize, tsize, error = IXGBE_SUCCESS;
2143 int txconf = 0, rxconf = 0;
2145 enum ixgbe_iov_mode iov_mode;
2148 /* First allocate the top level queue structs */
2149 if (!(adapter->queues =
2150 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2151 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2152 device_printf(dev, "Unable to allocate queue memory\n");
2157 /* First allocate the TX ring struct memory */
2158 if (!(adapter->tx_rings =
2159 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2160 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2161 device_printf(dev, "Unable to allocate TX ring memory\n");
2166 /* Next allocate the RX */
2167 if (!(adapter->rx_rings =
2168 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2169 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2170 device_printf(dev, "Unable to allocate RX ring memory\n");
2175 /* For the ring itself */
2176 tsize = roundup2(adapter->num_tx_desc *
2177 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2180 iov_mode = ixgbe_get_iov_mode(adapter);
2181 adapter->pool = ixgbe_max_vfs(iov_mode);
2186 * Now set up the TX queues, txconf is needed to handle the
2187 * possibility that things fail midcourse and we need to
2188 * undo memory gracefully
2190 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2191 /* Set up some basics */
2192 txr = &adapter->tx_rings[i];
2193 txr->adapter = adapter;
2195 txr->me = ixgbe_pf_que_index(iov_mode, i);
2199 txr->num_desc = adapter->num_tx_desc;
2201 /* Initialize the TX side lock */
2202 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2203 device_get_nameunit(dev), txr->me);
2204 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2206 if (ixgbe_dma_malloc(adapter, tsize,
2207 &txr->txdma, BUS_DMA_NOWAIT)) {
2209 "Unable to allocate TX Descriptor memory\n");
2213 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2214 bzero((void *)txr->tx_base, tsize);
2216 /* Now allocate transmit buffers for the ring */
2217 if (ixgbe_allocate_transmit_buffers(txr)) {
2219 "Critical Failure setting up transmit buffers\n");
2223 #ifndef IXGBE_LEGACY_TX
2224 /* Allocate a buf ring */
2225 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2226 M_WAITOK, &txr->tx_mtx);
2227 if (txr->br == NULL) {
2229 "Critical Failure setting up buf ring\n");
2237 * Next the RX queues...
2239 rsize = roundup2(adapter->num_rx_desc *
2240 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2241 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2242 rxr = &adapter->rx_rings[i];
2243 /* Set up some basics */
2244 rxr->adapter = adapter;
2246 rxr->me = ixgbe_pf_que_index(iov_mode, i);
2250 rxr->num_desc = adapter->num_rx_desc;
2252 /* Initialize the RX side lock */
2253 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2254 device_get_nameunit(dev), rxr->me);
2255 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2257 if (ixgbe_dma_malloc(adapter, rsize,
2258 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2260 "Unable to allocate RxDescriptor memory\n");
2264 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2265 bzero((void *)rxr->rx_base, rsize);
2267 /* Allocate receive buffers for the ring*/
2268 if (ixgbe_allocate_receive_buffers(rxr)) {
2270 "Critical Failure setting up receive buffers\n");
2277 ** Finally set up the queue holding structs
2279 for (int i = 0; i < adapter->num_queues; i++) {
2280 que = &adapter->queues[i];
2281 que->adapter = adapter;
2283 que->txr = &adapter->tx_rings[i];
2284 que->rxr = &adapter->rx_rings[i];
2290 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2291 ixgbe_dma_free(adapter, &rxr->rxdma);
2293 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2294 ixgbe_dma_free(adapter, &txr->txdma);
2295 free(adapter->rx_rings, M_DEVBUF);
2297 free(adapter->tx_rings, M_DEVBUF);
2299 free(adapter->queues, M_DEVBUF);