1 /******************************************************************************
3 Copyright (c) 2001-2015, Intel Corporation
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
32 ******************************************************************************/
36 #ifndef IXGBE_STANDALONE_BUILD
38 #include "opt_inet6.h"
44 #include <net/rss_config.h>
45 #include <netinet/in_rss.h>
49 #include <net/netmap.h>
50 #include <sys/selinfo.h>
51 #include <dev/netmap/netmap_kern.h>
53 extern int ix_crcstrip;
58 ** this feature only works with
59 ** IPv4, and only on 82599 and later.
60 ** Also this will cause IP forwarding to
61 ** fail and that can't be controlled by
62 ** the stack as LRO can. For all these
63 ** reasons I've deemed it best to leave
64 ** this off and not bother with a tuneable
65 ** interface, this would need to be compiled
68 static bool ixgbe_rsc_enable = FALSE;
72 ** For Flow Director: this is the
73 ** number of TX packets we sample
74 ** for the filter pool, this means
75 ** every 20th packet will be probed.
77 ** This feature can be disabled by
80 static int atr_sample_rate = 20;
83 /*********************************************************************
84 * Local Function prototypes
85 *********************************************************************/
86 static void ixgbe_setup_transmit_ring(struct tx_ring *);
87 static void ixgbe_free_transmit_buffers(struct tx_ring *);
88 static int ixgbe_setup_receive_ring(struct rx_ring *);
89 static void ixgbe_free_receive_buffers(struct rx_ring *);
91 static void ixgbe_rx_checksum(u32, struct mbuf *, u32);
92 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
93 static int ixgbe_xmit(struct tx_ring *, struct mbuf **);
94 static int ixgbe_tx_ctx_setup(struct tx_ring *,
95 struct mbuf *, u32 *, u32 *);
96 static int ixgbe_tso_setup(struct tx_ring *,
97 struct mbuf *, u32 *, u32 *);
99 static void ixgbe_atr(struct tx_ring *, struct mbuf *);
101 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
102 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
105 #ifdef IXGBE_LEGACY_TX
106 /*********************************************************************
107 * Transmit entry point
109 * ixgbe_start is called by the stack to initiate a transmit.
110 * The driver will remain in this routine as long as there are
111 * packets to transmit and transmit resources are available.
112 * In case resources are not available stack is notified and
113 * the packet is requeued.
114 **********************************************************************/
117 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
120 struct adapter *adapter = txr->adapter;
122 IXGBE_TX_LOCK_ASSERT(txr);
124 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
126 if (!adapter->link_active)
129 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
130 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
133 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
137 if (ixgbe_xmit(txr, &m_head)) {
139 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
142 /* Send a copy of the frame to the BPF listener */
143 ETHER_BPF_MTAP(ifp, m_head);
149 * Legacy TX start - called by the stack, this
150 * always uses the first tx ring, and should
151 * not be used with multiqueue tx enabled.
154 ixgbe_start(struct ifnet *ifp)
156 struct adapter *adapter = ifp->if_softc;
157 struct tx_ring *txr = adapter->tx_rings;
159 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
161 ixgbe_start_locked(txr, ifp);
162 IXGBE_TX_UNLOCK(txr);
167 #else /* ! IXGBE_LEGACY_TX */
170 ** Multiqueue Transmit Entry Point
171 ** (if_transmit function)
174 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
176 struct adapter *adapter = ifp->if_softc;
177 struct ix_queue *que;
185 * When doing RSS, map it to the same outbound queue
186 * as the incoming flow would be mapped to.
188 * If everything is setup correctly, it should be the
189 * same bucket that the current CPU we're on is.
191 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
193 if (rss_hash2bucket(m->m_pkthdr.flowid,
194 M_HASHTYPE_GET(m), &bucket_id) == 0) {
195 i = bucket_id % adapter->num_queues;
197 if (bucket_id > adapter->num_queues)
198 if_printf(ifp, "bucket_id (%d) > num_queues "
199 "(%d)\n", bucket_id, adapter->num_queues);
203 i = m->m_pkthdr.flowid % adapter->num_queues;
205 i = curcpu % adapter->num_queues;
207 /* Check for a hung queue and pick alternative */
208 if (((1 << i) & adapter->active_queues) == 0)
209 i = ffsl(adapter->active_queues);
211 txr = &adapter->tx_rings[i];
212 que = &adapter->queues[i];
214 err = drbr_enqueue(ifp, txr->br, m);
217 if (IXGBE_TX_TRYLOCK(txr)) {
218 ixgbe_mq_start_locked(ifp, txr);
219 IXGBE_TX_UNLOCK(txr);
221 taskqueue_enqueue(que->tq, &txr->txq_task);
227 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
229 struct adapter *adapter = txr->adapter;
231 int enqueued = 0, err = 0;
233 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
234 adapter->link_active == 0)
237 /* Process the queue */
238 #if __FreeBSD_version < 901504
239 next = drbr_dequeue(ifp, txr->br);
240 while (next != NULL) {
241 if ((err = ixgbe_xmit(txr, &next)) != 0) {
243 err = drbr_enqueue(ifp, txr->br, next);
245 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
246 if ((err = ixgbe_xmit(txr, &next)) != 0) {
248 drbr_advance(ifp, txr->br);
250 drbr_putback(ifp, txr->br, next);
255 #if __FreeBSD_version >= 901504
256 drbr_advance(ifp, txr->br);
259 #if 0 // this is VF-only
260 #if __FreeBSD_version >= 1100036
262 * Since we're looking at the tx ring, we can check
263 * to see if we're a VF by examing our tail register
266 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
267 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
270 /* Send a copy of the frame to the BPF listener */
271 ETHER_BPF_MTAP(ifp, next);
272 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
274 #if __FreeBSD_version < 901504
275 next = drbr_dequeue(ifp, txr->br);
279 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
286 * Called from a taskqueue to drain queued transmit packets.
289 ixgbe_deferred_mq_start(void *arg, int pending)
291 struct tx_ring *txr = arg;
292 struct adapter *adapter = txr->adapter;
293 struct ifnet *ifp = adapter->ifp;
296 if (!drbr_empty(ifp, txr->br))
297 ixgbe_mq_start_locked(ifp, txr);
298 IXGBE_TX_UNLOCK(txr);
302 * Flush all ring buffers
305 ixgbe_qflush(struct ifnet *ifp)
307 struct adapter *adapter = ifp->if_softc;
308 struct tx_ring *txr = adapter->tx_rings;
311 for (int i = 0; i < adapter->num_queues; i++, txr++) {
313 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
315 IXGBE_TX_UNLOCK(txr);
319 #endif /* IXGBE_LEGACY_TX */
322 /*********************************************************************
324 * This routine maps the mbufs to tx descriptors, allowing the
325 * TX engine to transmit the packets.
326 * - return 0 on success, positive on failure
328 **********************************************************************/
331 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
333 struct adapter *adapter = txr->adapter;
334 u32 olinfo_status = 0, cmd_type_len;
335 int i, j, error, nsegs;
339 bus_dma_segment_t segs[adapter->num_segs];
341 struct ixgbe_tx_buf *txbuf;
342 union ixgbe_adv_tx_desc *txd = NULL;
346 /* Basic descriptor defines */
347 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
348 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
350 if (m_head->m_flags & M_VLANTAG)
351 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
354 * Important to capture the first descriptor
355 * used because it will contain the index of
356 * the one we tell the hardware to report back
358 first = txr->next_avail_desc;
359 txbuf = &txr->tx_buffers[first];
363 * Map the packet for DMA.
366 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
367 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
369 if (__predict_false(error)) {
374 /* Try it again? - one try */
378 * XXX: m_defrag will choke on
379 * non-MCLBYTES-sized clusters
381 m = m_defrag(*m_headp, M_NOWAIT);
383 adapter->mbuf_defrag_failed++;
393 txr->no_tx_dma_setup++;
396 txr->no_tx_dma_setup++;
403 /* Make certain there are enough descriptors */
404 if (nsegs > txr->tx_avail - 2) {
405 txr->no_desc_avail++;
406 bus_dmamap_unload(txr->txtag, map);
412 * Set up the appropriate offload context
413 * this will consume the first descriptor
415 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
416 if (__predict_false(error)) {
417 if (error == ENOBUFS)
423 /* Do the flow director magic */
424 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
426 if (txr->atr_count >= atr_sample_rate) {
427 ixgbe_atr(txr, m_head);
433 olinfo_status |= IXGBE_ADVTXD_CC;
434 i = txr->next_avail_desc;
435 for (j = 0; j < nsegs; j++) {
439 txbuf = &txr->tx_buffers[i];
440 txd = &txr->tx_base[i];
441 seglen = segs[j].ds_len;
442 segaddr = htole64(segs[j].ds_addr);
444 txd->read.buffer_addr = segaddr;
445 txd->read.cmd_type_len = htole32(txr->txd_cmd |
446 cmd_type_len |seglen);
447 txd->read.olinfo_status = htole32(olinfo_status);
449 if (++i == txr->num_desc)
453 txd->read.cmd_type_len |=
454 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
455 txr->tx_avail -= nsegs;
456 txr->next_avail_desc = i;
458 txbuf->m_head = m_head;
460 * Here we swap the map so the last descriptor,
461 * which gets the completion interrupt has the
462 * real map, and the first descriptor gets the
463 * unused map from this descriptor.
465 txr->tx_buffers[first].map = txbuf->map;
467 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
469 /* Set the EOP descriptor that will be marked done */
470 txbuf = &txr->tx_buffers[first];
473 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
474 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
476 * Advance the Transmit Descriptor Tail (Tdt), this tells the
477 * hardware that this frame is available to transmit.
479 ++txr->total_packets;
480 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
482 /* Mark queue as having work */
490 /*********************************************************************
492 * Allocate memory for tx_buffer structures. The tx_buffer stores all
493 * the information needed to transmit a packet on the wire. This is
494 * called only once at attach, setup is done every reset.
496 **********************************************************************/
498 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
500 struct adapter *adapter = txr->adapter;
501 device_t dev = adapter->dev;
502 struct ixgbe_tx_buf *txbuf;
506 * Setup DMA descriptor areas.
508 if ((error = bus_dma_tag_create(
509 bus_get_dma_tag(adapter->dev), /* parent */
510 1, 0, /* alignment, bounds */
511 BUS_SPACE_MAXADDR, /* lowaddr */
512 BUS_SPACE_MAXADDR, /* highaddr */
513 NULL, NULL, /* filter, filterarg */
514 IXGBE_TSO_SIZE, /* maxsize */
515 adapter->num_segs, /* nsegments */
516 PAGE_SIZE, /* maxsegsize */
519 NULL, /* lockfuncarg */
521 device_printf(dev,"Unable to allocate TX DMA tag\n");
525 if (!(txr->tx_buffers =
526 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
527 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
528 device_printf(dev, "Unable to allocate tx_buffer memory\n");
533 /* Create the descriptor buffer dma maps */
534 txbuf = txr->tx_buffers;
535 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
536 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
538 device_printf(dev, "Unable to create TX DMA map\n");
545 /* We free all, it handles case where we are in the middle */
546 ixgbe_free_transmit_structures(adapter);
550 /*********************************************************************
552 * Initialize a transmit ring.
554 **********************************************************************/
556 ixgbe_setup_transmit_ring(struct tx_ring *txr)
558 struct adapter *adapter = txr->adapter;
559 struct ixgbe_tx_buf *txbuf;
561 struct netmap_adapter *na = NA(adapter->ifp);
562 struct netmap_slot *slot;
563 #endif /* DEV_NETMAP */
565 /* Clear the old ring contents */
569 * (under lock): if in netmap mode, do some consistency
570 * checks and set slot to entry 0 of the netmap ring.
572 slot = netmap_reset(na, NR_TX, txr->me, 0);
573 #endif /* DEV_NETMAP */
574 bzero((void *)txr->tx_base,
575 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
577 txr->next_avail_desc = 0;
578 txr->next_to_clean = 0;
580 /* Free any existing tx buffers. */
581 txbuf = txr->tx_buffers;
582 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
583 if (txbuf->m_head != NULL) {
584 bus_dmamap_sync(txr->txtag, txbuf->map,
585 BUS_DMASYNC_POSTWRITE);
586 bus_dmamap_unload(txr->txtag, txbuf->map);
587 m_freem(txbuf->m_head);
588 txbuf->m_head = NULL;
592 * In netmap mode, set the map for the packet buffer.
593 * NOTE: Some drivers (not this one) also need to set
594 * the physical buffer address in the NIC ring.
595 * Slots in the netmap ring (indexed by "si") are
596 * kring->nkr_hwofs positions "ahead" wrt the
597 * corresponding slot in the NIC ring. In some drivers
598 * (not here) nkr_hwofs can be negative. Function
599 * netmap_idx_n2k() handles wraparounds properly.
602 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
603 netmap_load_map(na, txr->txtag,
604 txbuf->map, NMB(na, slot + si));
606 #endif /* DEV_NETMAP */
607 /* Clear the EOP descriptor pointer */
612 /* Set the rate at which we sample packets */
613 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
614 txr->atr_sample = atr_sample_rate;
617 /* Set number of descriptors available */
618 txr->tx_avail = adapter->num_tx_desc;
620 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
621 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
622 IXGBE_TX_UNLOCK(txr);
625 /*********************************************************************
627 * Initialize all transmit rings.
629 **********************************************************************/
631 ixgbe_setup_transmit_structures(struct adapter *adapter)
633 struct tx_ring *txr = adapter->tx_rings;
635 for (int i = 0; i < adapter->num_queues; i++, txr++)
636 ixgbe_setup_transmit_ring(txr);
641 /*********************************************************************
643 * Free all transmit rings.
645 **********************************************************************/
647 ixgbe_free_transmit_structures(struct adapter *adapter)
649 struct tx_ring *txr = adapter->tx_rings;
651 for (int i = 0; i < adapter->num_queues; i++, txr++) {
653 ixgbe_free_transmit_buffers(txr);
654 ixgbe_dma_free(adapter, &txr->txdma);
655 IXGBE_TX_UNLOCK(txr);
656 IXGBE_TX_LOCK_DESTROY(txr);
658 free(adapter->tx_rings, M_DEVBUF);
661 /*********************************************************************
663 * Free transmit ring related data structures.
665 **********************************************************************/
667 ixgbe_free_transmit_buffers(struct tx_ring *txr)
669 struct adapter *adapter = txr->adapter;
670 struct ixgbe_tx_buf *tx_buffer;
673 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
675 if (txr->tx_buffers == NULL)
678 tx_buffer = txr->tx_buffers;
679 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
680 if (tx_buffer->m_head != NULL) {
681 bus_dmamap_sync(txr->txtag, tx_buffer->map,
682 BUS_DMASYNC_POSTWRITE);
683 bus_dmamap_unload(txr->txtag,
685 m_freem(tx_buffer->m_head);
686 tx_buffer->m_head = NULL;
687 if (tx_buffer->map != NULL) {
688 bus_dmamap_destroy(txr->txtag,
690 tx_buffer->map = NULL;
692 } else if (tx_buffer->map != NULL) {
693 bus_dmamap_unload(txr->txtag,
695 bus_dmamap_destroy(txr->txtag,
697 tx_buffer->map = NULL;
700 #ifdef IXGBE_LEGACY_TX
702 buf_ring_free(txr->br, M_DEVBUF);
704 if (txr->tx_buffers != NULL) {
705 free(txr->tx_buffers, M_DEVBUF);
706 txr->tx_buffers = NULL;
708 if (txr->txtag != NULL) {
709 bus_dma_tag_destroy(txr->txtag);
715 /*********************************************************************
717 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
719 **********************************************************************/
722 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
723 u32 *cmd_type_len, u32 *olinfo_status)
725 struct adapter *adapter = txr->adapter;
726 struct ixgbe_adv_tx_context_desc *TXD;
727 struct ether_vlan_header *eh;
734 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
735 int ehdrlen, ip_hlen = 0;
739 int ctxd = txr->next_avail_desc;
744 /* First check if TSO is to be used */
745 if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO|CSUM_IP6_TSO))
746 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
748 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
751 /* Indicate the whole packet as payload when not doing TSO */
752 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
754 /* Now ready a context descriptor */
755 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
758 ** In advanced descriptors the vlan tag must
759 ** be placed into the context descriptor. Hence
760 ** we need to make one even if not doing offloads.
762 if (mp->m_flags & M_VLANTAG) {
763 vtag = htole16(mp->m_pkthdr.ether_vtag);
764 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
765 } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
769 * Determine where frame payload starts.
770 * Jump over vlan headers if already present,
771 * helpful for QinQ too.
773 eh = mtod(mp, struct ether_vlan_header *);
774 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
775 etype = ntohs(eh->evl_proto);
776 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
778 etype = ntohs(eh->evl_encap_proto);
779 ehdrlen = ETHER_HDR_LEN;
782 /* Set the ether header length */
783 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
785 if (offload == FALSE)
789 * If the first mbuf only includes the ethernet header, jump to the next one
790 * XXX: This assumes the stack splits mbufs containing headers on header boundaries
791 * XXX: And assumes the entire IP header is contained in one mbuf
793 if (mp->m_len == ehdrlen && mp->m_next)
794 l3d = mtod(mp->m_next, caddr_t);
796 l3d = mtod(mp, caddr_t) + ehdrlen;
801 ip = (struct ip *)(l3d);
802 ip_hlen = ip->ip_hl << 2;
804 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
805 /* Insert IPv4 checksum into data descriptors */
806 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
808 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
814 ip6 = (struct ip6_hdr *)(l3d);
815 ip_hlen = sizeof(struct ip6_hdr);
816 ipproto = ip6->ip6_nxt;
817 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
825 vlan_macip_lens |= ip_hlen;
827 /* No support for offloads for non-L4 next headers */
830 if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
831 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
836 if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
837 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
842 if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP))
843 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
852 if (offload) /* Insert L4 checksum into data descriptors */
853 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
856 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
858 /* Now copy bits into descriptor */
859 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
860 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
861 TXD->seqnum_seed = htole32(0);
862 TXD->mss_l4len_idx = htole32(0);
864 /* We've consumed the first desc, adjust counters */
865 if (++ctxd == txr->num_desc)
867 txr->next_avail_desc = ctxd;
873 /**********************************************************************
875 * Setup work for hardware segmentation offload (TSO) on
876 * adapters using advanced tx descriptors
878 **********************************************************************/
880 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
881 u32 *cmd_type_len, u32 *olinfo_status)
883 struct ixgbe_adv_tx_context_desc *TXD;
884 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
885 u32 mss_l4len_idx = 0, paylen;
886 u16 vtag = 0, eh_type;
887 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
888 struct ether_vlan_header *eh;
898 * Determine where frame payload starts.
899 * Jump over vlan headers if already present
901 eh = mtod(mp, struct ether_vlan_header *);
902 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
903 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
904 eh_type = eh->evl_proto;
906 ehdrlen = ETHER_HDR_LEN;
907 eh_type = eh->evl_encap_proto;
910 switch (ntohs(eh_type)) {
913 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
914 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
915 if (ip6->ip6_nxt != IPPROTO_TCP)
917 ip_hlen = sizeof(struct ip6_hdr);
918 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
919 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
920 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
921 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
926 ip = (struct ip *)(mp->m_data + ehdrlen);
927 if (ip->ip_p != IPPROTO_TCP)
930 ip_hlen = ip->ip_hl << 2;
931 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
932 th->th_sum = in_pseudo(ip->ip_src.s_addr,
933 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
934 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
935 /* Tell transmit desc to also do IPv4 checksum. */
936 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
940 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
941 __func__, ntohs(eh_type));
945 ctxd = txr->next_avail_desc;
946 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
948 tcp_hlen = th->th_off << 2;
950 /* This is used in the transmit desc in encap */
951 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
953 /* VLAN MACLEN IPLEN */
954 if (mp->m_flags & M_VLANTAG) {
955 vtag = htole16(mp->m_pkthdr.ether_vtag);
956 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
959 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
960 vlan_macip_lens |= ip_hlen;
961 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
963 /* ADV DTYPE TUCMD */
964 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
965 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
966 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
969 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
970 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
971 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
973 TXD->seqnum_seed = htole32(0);
975 if (++ctxd == txr->num_desc)
979 txr->next_avail_desc = ctxd;
980 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
981 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
982 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
988 /**********************************************************************
990 * Examine each tx_buffer in the used queue. If the hardware is done
991 * processing the packet then free associated resources. The
992 * tx_buffer is put back on the free queue.
994 **********************************************************************/
996 ixgbe_txeof(struct tx_ring *txr)
998 struct adapter *adapter = txr->adapter;
1000 struct ifnet *ifp = adapter->ifp;
1002 u32 work, processed = 0;
1003 u32 limit = adapter->tx_process_limit;
1004 struct ixgbe_tx_buf *buf;
1005 union ixgbe_adv_tx_desc *txd;
1007 mtx_assert(&txr->tx_mtx, MA_OWNED);
1010 if (ifp->if_capenable & IFCAP_NETMAP) {
1011 struct netmap_adapter *na = NA(ifp);
1012 struct netmap_kring *kring = &na->tx_rings[txr->me];
1014 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1015 BUS_DMASYNC_POSTREAD);
1017 * In netmap mode, all the work is done in the context
1018 * of the client thread. Interrupt handlers only wake up
1019 * clients, which may be sleeping on individual rings
1020 * or on a global resource for all rings.
1021 * To implement tx interrupt mitigation, we wake up the client
1022 * thread roughly every half ring, even if the NIC interrupts
1023 * more frequently. This is implemented as follows:
1024 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1025 * the slot that should wake up the thread (nkr_num_slots
1026 * means the user thread should not be woken up);
1027 * - the driver ignores tx interrupts unless netmap_mitigate=0
1028 * or the slot has the DD bit set.
1030 if (!netmap_mitigate ||
1031 (kring->nr_kflags < kring->nkr_num_slots &&
1032 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1033 netmap_tx_irq(ifp, txr->me);
1037 #endif /* DEV_NETMAP */
1039 if (txr->tx_avail == txr->num_desc) {
1044 /* Get work starting point */
1045 work = txr->next_to_clean;
1046 buf = &txr->tx_buffers[work];
1047 txd = &txr->tx_base[work];
1048 work -= txr->num_desc; /* The distance to ring end */
1049 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1050 BUS_DMASYNC_POSTREAD);
1053 union ixgbe_adv_tx_desc *eop = buf->eop;
1054 if (eop == NULL) /* No work */
1057 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1058 break; /* I/O not complete */
1062 buf->m_head->m_pkthdr.len;
1063 bus_dmamap_sync(txr->txtag,
1065 BUS_DMASYNC_POSTWRITE);
1066 bus_dmamap_unload(txr->txtag,
1068 m_freem(buf->m_head);
1074 /* We clean the range if multi segment */
1075 while (txd != eop) {
1079 /* wrap the ring? */
1080 if (__predict_false(!work)) {
1081 work -= txr->num_desc;
1082 buf = txr->tx_buffers;
1087 buf->m_head->m_pkthdr.len;
1088 bus_dmamap_sync(txr->txtag,
1090 BUS_DMASYNC_POSTWRITE);
1091 bus_dmamap_unload(txr->txtag,
1093 m_freem(buf->m_head);
1103 /* Try the next packet */
1107 /* reset with a wrap */
1108 if (__predict_false(!work)) {
1109 work -= txr->num_desc;
1110 buf = txr->tx_buffers;
1114 } while (__predict_true(--limit));
1116 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1117 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1119 work += txr->num_desc;
1120 txr->next_to_clean = work;
1123 ** Queue Hang detection, we know there's
1124 ** work outstanding or the first return
1125 ** would have been taken, so increment busy
1126 ** if nothing managed to get cleaned, then
1127 ** in local_timer it will be checked and
1128 ** marked as HUNG if it exceeds a MAX attempt.
1130 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1133 ** If anything gets cleaned we reset state to 1,
1134 ** note this will turn off HUNG if its set.
1139 if (txr->tx_avail == txr->num_desc)
1148 ** This routine parses packet headers so that Flow
1149 ** Director can make a hashed filter table entry
1150 ** allowing traffic flows to be identified and kept
1151 ** on the same cpu. This would be a performance
1152 ** hit, but we only do it at IXGBE_FDIR_RATE of
1156 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1158 struct adapter *adapter = txr->adapter;
1159 struct ix_queue *que;
1163 struct ether_vlan_header *eh;
1164 union ixgbe_atr_hash_dword input = {.dword = 0};
1165 union ixgbe_atr_hash_dword common = {.dword = 0};
1166 int ehdrlen, ip_hlen;
1169 eh = mtod(mp, struct ether_vlan_header *);
1170 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1171 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1172 etype = eh->evl_proto;
1174 ehdrlen = ETHER_HDR_LEN;
1175 etype = eh->evl_encap_proto;
1178 /* Only handling IPv4 */
1179 if (etype != htons(ETHERTYPE_IP))
1182 ip = (struct ip *)(mp->m_data + ehdrlen);
1183 ip_hlen = ip->ip_hl << 2;
1185 /* check if we're UDP or TCP */
1188 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1189 /* src and dst are inverted */
1190 common.port.dst ^= th->th_sport;
1191 common.port.src ^= th->th_dport;
1192 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1195 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1196 /* src and dst are inverted */
1197 common.port.dst ^= uh->uh_sport;
1198 common.port.src ^= uh->uh_dport;
1199 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1205 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1206 if (mp->m_pkthdr.ether_vtag)
1207 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1209 common.flex_bytes ^= etype;
1210 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1212 que = &adapter->queues[txr->me];
1214 ** This assumes the Rx queue and Tx
1215 ** queue are bound to the same CPU
1217 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1218 input, common, que->msix);
1220 #endif /* IXGBE_FDIR */
1223 ** Used to detect a descriptor that has
1224 ** been merged by Hardware RSC.
1227 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1229 return (le32toh(rx->wb.lower.lo_dword.data) &
1230 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1233 /*********************************************************************
1235 * Initialize Hardware RSC (LRO) feature on 82599
1236 * for an RX ring, this is toggled by the LRO capability
1237 * even though it is transparent to the stack.
1239 * NOTE: since this HW feature only works with IPV4 and
1240 * our testing has shown soft LRO to be as effective
1241 * I have decided to disable this by default.
1243 **********************************************************************/
1245 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1247 struct adapter *adapter = rxr->adapter;
1248 struct ixgbe_hw *hw = &adapter->hw;
1249 u32 rscctrl, rdrxctl;
1251 /* If turning LRO/RSC off we need to disable it */
1252 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1253 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1254 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1258 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1259 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1260 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1261 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1262 #endif /* DEV_NETMAP */
1263 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1264 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1265 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1267 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1268 rscctrl |= IXGBE_RSCCTL_RSCEN;
1270 ** Limit the total number of descriptors that
1271 ** can be combined, so it does not exceed 64K
1273 if (rxr->mbuf_sz == MCLBYTES)
1274 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1275 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1276 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1277 else if (rxr->mbuf_sz == MJUM9BYTES)
1278 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1279 else /* Using 16K cluster */
1280 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1282 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1284 /* Enable TCP header recognition */
1285 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1286 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1287 IXGBE_PSRTYPE_TCPHDR));
1289 /* Disable RSC for ACK packets */
1290 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1291 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1296 /*********************************************************************
1298 * Refresh mbuf buffers for RX descriptor rings
1299 * - now keeps its own state so discards due to resource
1300 * exhaustion are unnecessary, if an mbuf cannot be obtained
1301 * it just returns, keeping its placeholder, thus it can simply
1302 * be recalled to try again.
1304 **********************************************************************/
1306 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1308 struct adapter *adapter = rxr->adapter;
1309 bus_dma_segment_t seg[1];
1310 struct ixgbe_rx_buf *rxbuf;
1312 int i, j, nsegs, error;
1313 bool refreshed = FALSE;
1315 i = j = rxr->next_to_refresh;
1316 /* Control the loop with one beyond */
1317 if (++j == rxr->num_desc)
1320 while (j != limit) {
1321 rxbuf = &rxr->rx_buffers[i];
1322 if (rxbuf->buf == NULL) {
1323 mp = m_getjcl(M_NOWAIT, MT_DATA,
1324 M_PKTHDR, rxr->mbuf_sz);
1327 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1328 m_adj(mp, ETHER_ALIGN);
1332 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1334 /* If we're dealing with an mbuf that was copied rather
1335 * than replaced, there's no need to go through busdma.
1337 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1338 /* Get the memory mapping */
1339 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1340 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1341 rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1343 printf("Refresh mbufs: payload dmamap load"
1344 " failure - %d\n", error);
1350 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1351 BUS_DMASYNC_PREREAD);
1352 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1353 htole64(seg[0].ds_addr);
1355 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1356 rxbuf->flags &= ~IXGBE_RX_COPY;
1360 /* Next is precalculated */
1362 rxr->next_to_refresh = i;
1363 if (++j == rxr->num_desc)
1367 if (refreshed) /* Update hardware tail index */
1368 IXGBE_WRITE_REG(&adapter->hw,
1369 rxr->tail, rxr->next_to_refresh);
1373 /*********************************************************************
1375 * Allocate memory for rx_buffer structures. Since we use one
1376 * rx_buffer per received packet, the maximum number of rx_buffer's
1377 * that we'll need is equal to the number of receive descriptors
1378 * that we've allocated.
1380 **********************************************************************/
1382 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1384 struct adapter *adapter = rxr->adapter;
1385 device_t dev = adapter->dev;
1386 struct ixgbe_rx_buf *rxbuf;
1389 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1390 if (!(rxr->rx_buffers =
1391 (struct ixgbe_rx_buf *) malloc(bsize,
1392 M_DEVBUF, M_NOWAIT | M_ZERO))) {
1393 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1398 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
1399 1, 0, /* alignment, bounds */
1400 BUS_SPACE_MAXADDR, /* lowaddr */
1401 BUS_SPACE_MAXADDR, /* highaddr */
1402 NULL, NULL, /* filter, filterarg */
1403 MJUM16BYTES, /* maxsize */
1405 MJUM16BYTES, /* maxsegsize */
1407 NULL, /* lockfunc */
1408 NULL, /* lockfuncarg */
1410 device_printf(dev, "Unable to create RX DMA tag\n");
1414 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1415 rxbuf = &rxr->rx_buffers[i];
1416 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1418 device_printf(dev, "Unable to create RX dma map\n");
1426 /* Frees all, but can handle partial completion */
1427 ixgbe_free_receive_structures(adapter);
1432 ixgbe_free_receive_ring(struct rx_ring *rxr)
1434 struct ixgbe_rx_buf *rxbuf;
1436 for (int i = 0; i < rxr->num_desc; i++) {
1437 rxbuf = &rxr->rx_buffers[i];
1438 if (rxbuf->buf != NULL) {
1439 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1440 BUS_DMASYNC_POSTREAD);
1441 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1442 rxbuf->buf->m_flags |= M_PKTHDR;
1443 m_freem(rxbuf->buf);
1450 /*********************************************************************
1452 * Initialize a receive ring and its buffers.
1454 **********************************************************************/
1456 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1458 struct adapter *adapter;
1461 struct ixgbe_rx_buf *rxbuf;
1462 bus_dma_segment_t seg[1];
1463 struct lro_ctrl *lro = &rxr->lro;
1464 int rsize, nsegs, error = 0;
1466 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1467 struct netmap_slot *slot;
1468 #endif /* DEV_NETMAP */
1470 adapter = rxr->adapter;
1474 /* Clear the ring contents */
1477 /* same as in ixgbe_setup_transmit_ring() */
1478 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1479 #endif /* DEV_NETMAP */
1480 rsize = roundup2(adapter->num_rx_desc *
1481 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1482 bzero((void *)rxr->rx_base, rsize);
1483 /* Cache the size */
1484 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1486 /* Free current RX buffer structs and their mbufs */
1487 ixgbe_free_receive_ring(rxr);
1489 /* Now replenish the mbufs */
1490 for (int j = 0; j != rxr->num_desc; ++j) {
1493 rxbuf = &rxr->rx_buffers[j];
1496 * In netmap mode, fill the map and set the buffer
1497 * address in the NIC ring, considering the offset
1498 * between the netmap and NIC rings (see comment in
1499 * ixgbe_setup_transmit_ring() ). No need to allocate
1500 * an mbuf, so end the block with a continue;
1503 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1507 addr = PNMB(na, slot + sj, &paddr);
1508 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1509 /* Update descriptor and the cached value */
1510 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1511 rxbuf->addr = htole64(paddr);
1514 #endif /* DEV_NETMAP */
1516 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1517 M_PKTHDR, adapter->rx_mbuf_sz);
1518 if (rxbuf->buf == NULL) {
1523 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1524 /* Get the memory mapping */
1525 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1526 rxbuf->pmap, mp, seg,
1527 &nsegs, BUS_DMA_NOWAIT);
1530 bus_dmamap_sync(rxr->ptag,
1531 rxbuf->pmap, BUS_DMASYNC_PREREAD);
1532 /* Update the descriptor and the cached value */
1533 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1534 rxbuf->addr = htole64(seg[0].ds_addr);
1538 /* Setup our descriptor indices */
1539 rxr->next_to_check = 0;
1540 rxr->next_to_refresh = 0;
1541 rxr->lro_enabled = FALSE;
1544 rxr->vtag_strip = FALSE;
1546 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1547 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1550 ** Now set up the LRO interface:
1552 if (ixgbe_rsc_enable)
1553 ixgbe_setup_hw_rsc(rxr);
1554 else if (ifp->if_capenable & IFCAP_LRO) {
1555 int err = tcp_lro_init(lro);
1557 device_printf(dev, "LRO Initialization failed!\n");
1560 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1561 rxr->lro_enabled = TRUE;
1562 lro->ifp = adapter->ifp;
1565 IXGBE_RX_UNLOCK(rxr);
1569 ixgbe_free_receive_ring(rxr);
1570 IXGBE_RX_UNLOCK(rxr);
1574 /*********************************************************************
1576 * Initialize all receive rings.
1578 **********************************************************************/
1580 ixgbe_setup_receive_structures(struct adapter *adapter)
1582 struct rx_ring *rxr = adapter->rx_rings;
1585 for (j = 0; j < adapter->num_queues; j++, rxr++)
1586 if (ixgbe_setup_receive_ring(rxr))
1592 * Free RX buffers allocated so far, we will only handle
1593 * the rings that completed, the failing case will have
1594 * cleaned up for itself. 'j' failed, so its the terminus.
1596 for (int i = 0; i < j; ++i) {
1597 rxr = &adapter->rx_rings[i];
1598 ixgbe_free_receive_ring(rxr);
1605 /*********************************************************************
1607 * Free all receive rings.
1609 **********************************************************************/
1611 ixgbe_free_receive_structures(struct adapter *adapter)
1613 struct rx_ring *rxr = adapter->rx_rings;
1615 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1617 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1618 struct lro_ctrl *lro = &rxr->lro;
1619 ixgbe_free_receive_buffers(rxr);
1620 /* Free LRO memory */
1622 /* Free the ring memory as well */
1623 ixgbe_dma_free(adapter, &rxr->rxdma);
1626 free(adapter->rx_rings, M_DEVBUF);
1630 /*********************************************************************
1632 * Free receive ring data structures
1634 **********************************************************************/
1636 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1638 struct adapter *adapter = rxr->adapter;
1639 struct ixgbe_rx_buf *rxbuf;
1641 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1643 /* Cleanup any existing buffers */
1644 if (rxr->rx_buffers != NULL) {
1645 for (int i = 0; i < adapter->num_rx_desc; i++) {
1646 rxbuf = &rxr->rx_buffers[i];
1647 if (rxbuf->buf != NULL) {
1648 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1649 BUS_DMASYNC_POSTREAD);
1650 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1651 rxbuf->buf->m_flags |= M_PKTHDR;
1652 m_freem(rxbuf->buf);
1655 if (rxbuf->pmap != NULL) {
1656 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1660 if (rxr->rx_buffers != NULL) {
1661 free(rxr->rx_buffers, M_DEVBUF);
1662 rxr->rx_buffers = NULL;
1666 if (rxr->ptag != NULL) {
1667 bus_dma_tag_destroy(rxr->ptag);
1674 static __inline void
1675 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1679 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1680 * should be computed by hardware. Also it should not have VLAN tag in
1681 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1683 if (rxr->lro_enabled &&
1684 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1685 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1686 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1687 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1688 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1689 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1690 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1691 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1693 * Send to the stack if:
1694 ** - LRO not enabled, or
1695 ** - no LRO resources, or
1696 ** - lro enqueue fails
1698 if (rxr->lro.lro_cnt != 0)
1699 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1702 IXGBE_RX_UNLOCK(rxr);
1703 (*ifp->if_input)(ifp, m);
1707 static __inline void
1708 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1710 struct ixgbe_rx_buf *rbuf;
1712 rbuf = &rxr->rx_buffers[i];
1716 ** With advanced descriptors the writeback
1717 ** clobbers the buffer addrs, so its easier
1718 ** to just free the existing mbufs and take
1719 ** the normal refresh path to get new buffers
1723 if (rbuf->fmp != NULL) {/* Partial chain ? */
1724 rbuf->fmp->m_flags |= M_PKTHDR;
1727 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1728 } else if (rbuf->buf) {
1732 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1740 /*********************************************************************
1742 * This routine executes in interrupt context. It replenishes
1743 * the mbufs in the descriptor and sends data which has been
1744 * dma'ed into host memory to upper layer.
1746 * Return TRUE for more work, FALSE for all clean.
1747 *********************************************************************/
1749 ixgbe_rxeof(struct ix_queue *que)
1751 struct adapter *adapter = que->adapter;
1752 struct rx_ring *rxr = que->rxr;
1753 struct ifnet *ifp = adapter->ifp;
1754 struct lro_ctrl *lro = &rxr->lro;
1755 struct lro_entry *queued;
1756 int i, nextp, processed = 0;
1758 u32 count = adapter->rx_process_limit;
1759 union ixgbe_adv_rx_desc *cur;
1760 struct ixgbe_rx_buf *rbuf, *nbuf;
1766 /* Same as the txeof routine: wakeup clients on intr. */
1767 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1768 IXGBE_RX_UNLOCK(rxr);
1771 #endif /* DEV_NETMAP */
1773 for (i = rxr->next_to_check; count != 0;) {
1774 struct mbuf *sendmp, *mp;
1780 /* Sync the ring. */
1781 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1782 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1784 cur = &rxr->rx_base[i];
1785 staterr = le32toh(cur->wb.upper.status_error);
1786 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1788 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1790 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1797 cur->wb.upper.status_error = 0;
1798 rbuf = &rxr->rx_buffers[i];
1801 len = le16toh(cur->wb.upper.length);
1802 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1803 IXGBE_RXDADV_PKTTYPE_MASK;
1804 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1806 /* Make sure bad packets are discarded */
1807 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1808 #if __FreeBSD_version >= 1100036
1809 if (IXGBE_IS_VF(adapter))
1810 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1812 rxr->rx_discarded++;
1813 ixgbe_rx_discard(rxr, i);
1818 ** On 82599 which supports a hardware
1819 ** LRO (called HW RSC), packets need
1820 ** not be fragmented across sequential
1821 ** descriptors, rather the next descriptor
1822 ** is indicated in bits of the descriptor.
1823 ** This also means that we might proceses
1824 ** more than one packet at a time, something
1825 ** that has never been true before, it
1826 ** required eliminating global chain pointers
1827 ** in favor of what we are doing here. -jfv
1831 ** Figure out the next descriptor
1834 if (rxr->hw_rsc == TRUE) {
1835 rsc = ixgbe_rsc_count(cur);
1836 rxr->rsc_num += (rsc - 1);
1838 if (rsc) { /* Get hardware index */
1840 IXGBE_RXDADV_NEXTP_MASK) >>
1841 IXGBE_RXDADV_NEXTP_SHIFT);
1842 } else { /* Just sequential */
1844 if (nextp == adapter->num_rx_desc)
1847 nbuf = &rxr->rx_buffers[nextp];
1851 ** Rather than using the fmp/lmp global pointers
1852 ** we now keep the head of a packet chain in the
1853 ** buffer struct and pass this along from one
1854 ** descriptor to the next, until we get EOP.
1858 ** See if there is a stored head
1859 ** that determines what we are
1862 if (sendmp != NULL) { /* secondary frag */
1863 rbuf->buf = rbuf->fmp = NULL;
1864 mp->m_flags &= ~M_PKTHDR;
1865 sendmp->m_pkthdr.len += mp->m_len;
1868 * Optimize. This might be a small packet,
1869 * maybe just a TCP ACK. Do a fast copy that
1870 * is cache aligned into a new mbuf, and
1871 * leave the old mbuf+cluster for re-use.
1873 if (eop && len <= IXGBE_RX_COPY_LEN) {
1874 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1875 if (sendmp != NULL) {
1877 IXGBE_RX_COPY_ALIGN;
1878 ixgbe_bcopy(mp->m_data,
1879 sendmp->m_data, len);
1880 sendmp->m_len = len;
1882 rbuf->flags |= IXGBE_RX_COPY;
1885 if (sendmp == NULL) {
1886 rbuf->buf = rbuf->fmp = NULL;
1890 /* first desc of a non-ps chain */
1891 sendmp->m_flags |= M_PKTHDR;
1892 sendmp->m_pkthdr.len = mp->m_len;
1896 /* Pass the head pointer on */
1900 mp->m_next = nbuf->buf;
1901 } else { /* Sending this frame */
1902 sendmp->m_pkthdr.rcvif = ifp;
1904 /* capture data for AIM */
1905 rxr->bytes += sendmp->m_pkthdr.len;
1906 rxr->rx_bytes += sendmp->m_pkthdr.len;
1907 /* Process vlan info */
1908 if ((rxr->vtag_strip) &&
1909 (staterr & IXGBE_RXD_STAT_VP))
1910 vtag = le16toh(cur->wb.upper.vlan);
1912 sendmp->m_pkthdr.ether_vtag = vtag;
1913 sendmp->m_flags |= M_VLANTAG;
1915 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1916 ixgbe_rx_checksum(staterr, sendmp, ptype);
1919 * In case of multiqueue, we have RXCSUM.PCSD bit set
1920 * and never cleared. This means we have RSS hash
1921 * available to be used.
1923 if (adapter->num_queues > 1) {
1924 sendmp->m_pkthdr.flowid =
1925 le32toh(cur->wb.lower.hi_dword.rss);
1927 * Full RSS support is not avilable in
1928 * FreeBSD 10 so setting the hash type to
1931 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1933 sendmp->m_pkthdr.flowid = que->msix;
1934 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1938 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1939 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1941 /* Advance our pointers to the next descriptor. */
1942 if (++i == rxr->num_desc)
1945 /* Now send to the stack or do LRO */
1946 if (sendmp != NULL) {
1947 rxr->next_to_check = i;
1948 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1949 i = rxr->next_to_check;
1952 /* Every 8 descriptors we go to refresh mbufs */
1953 if (processed == 8) {
1954 ixgbe_refresh_mbufs(rxr, i);
1959 /* Refresh any remaining buf structs */
1960 if (ixgbe_rx_unrefreshed(rxr))
1961 ixgbe_refresh_mbufs(rxr, i);
1963 rxr->next_to_check = i;
1966 * Flush any outstanding LRO work
1968 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1969 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1970 tcp_lro_flush(lro, queued);
1973 IXGBE_RX_UNLOCK(rxr);
1976 ** Still have cleaning to do?
1978 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
1985 /*********************************************************************
1987 * Verify that the hardware indicated that the checksum is valid.
1988 * Inform the stack about the status of checksum so that stack
1989 * doesn't spend time verifying the checksum.
1991 *********************************************************************/
1993 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
1995 u16 status = (u16) staterr;
1996 u8 errors = (u8) (staterr >> 24);
1999 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2000 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2004 if (status & IXGBE_RXD_STAT_IPCS) {
2005 mp->m_pkthdr.csum_flags |= CSUM_L3_CALC;
2006 /* IP Checksum Good */
2007 if (!(errors & IXGBE_RXD_ERR_IPE))
2008 mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
2010 /* TCP/UDP/SCTP checksum */
2011 if (status & IXGBE_RXD_STAT_L4CS) {
2012 mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
2013 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2014 mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
2016 mp->m_pkthdr.csum_data = htons(0xffff);
2021 /********************************************************************
2022 * Manage DMA'able memory.
2023 *******************************************************************/
2025 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2029 *(bus_addr_t *) arg = segs->ds_addr;
2034 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2035 struct ixgbe_dma_alloc *dma, int mapflags)
2037 device_t dev = adapter->dev;
2040 r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2041 DBA_ALIGN, 0, /* alignment, bounds */
2042 BUS_SPACE_MAXADDR, /* lowaddr */
2043 BUS_SPACE_MAXADDR, /* highaddr */
2044 NULL, NULL, /* filter, filterarg */
2047 size, /* maxsegsize */
2048 BUS_DMA_ALLOCNOW, /* flags */
2049 NULL, /* lockfunc */
2050 NULL, /* lockfuncarg */
2053 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2057 r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2058 BUS_DMA_NOWAIT, &dma->dma_map);
2060 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2064 r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2068 mapflags | BUS_DMA_NOWAIT);
2070 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2074 dma->dma_size = size;
2077 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2079 bus_dma_tag_destroy(dma->dma_tag);
2081 dma->dma_tag = NULL;
2086 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2088 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2089 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2090 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2091 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2092 bus_dma_tag_destroy(dma->dma_tag);
2096 /*********************************************************************
2098 * Allocate memory for the transmit and receive rings, and then
2099 * the descriptors associated with each, called only once at attach.
2101 **********************************************************************/
2103 ixgbe_allocate_queues(struct adapter *adapter)
2105 device_t dev = adapter->dev;
2106 struct ix_queue *que;
2107 struct tx_ring *txr;
2108 struct rx_ring *rxr;
2109 int rsize, tsize, error = IXGBE_SUCCESS;
2110 int txconf = 0, rxconf = 0;
2112 enum ixgbe_iov_mode iov_mode;
2115 /* First allocate the top level queue structs */
2116 if (!(adapter->queues =
2117 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2118 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2119 device_printf(dev, "Unable to allocate queue memory\n");
2124 /* First allocate the TX ring struct memory */
2125 if (!(adapter->tx_rings =
2126 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2127 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2128 device_printf(dev, "Unable to allocate TX ring memory\n");
2133 /* Next allocate the RX */
2134 if (!(adapter->rx_rings =
2135 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2136 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2137 device_printf(dev, "Unable to allocate RX ring memory\n");
2142 /* For the ring itself */
2143 tsize = roundup2(adapter->num_tx_desc *
2144 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2147 iov_mode = ixgbe_get_iov_mode(adapter);
2148 adapter->pool = ixgbe_max_vfs(iov_mode);
2153 * Now set up the TX queues, txconf is needed to handle the
2154 * possibility that things fail midcourse and we need to
2155 * undo memory gracefully
2157 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2158 /* Set up some basics */
2159 txr = &adapter->tx_rings[i];
2160 txr->adapter = adapter;
2162 txr->me = ixgbe_pf_que_index(iov_mode, i);
2166 txr->num_desc = adapter->num_tx_desc;
2168 /* Initialize the TX side lock */
2169 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2170 device_get_nameunit(dev), txr->me);
2171 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2173 if (ixgbe_dma_malloc(adapter, tsize,
2174 &txr->txdma, BUS_DMA_NOWAIT)) {
2176 "Unable to allocate TX Descriptor memory\n");
2180 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2181 bzero((void *)txr->tx_base, tsize);
2183 /* Now allocate transmit buffers for the ring */
2184 if (ixgbe_allocate_transmit_buffers(txr)) {
2186 "Critical Failure setting up transmit buffers\n");
2190 #ifndef IXGBE_LEGACY_TX
2191 /* Allocate a buf ring */
2192 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2193 M_WAITOK, &txr->tx_mtx);
2194 if (txr->br == NULL) {
2196 "Critical Failure setting up buf ring\n");
2204 * Next the RX queues...
2206 rsize = roundup2(adapter->num_rx_desc *
2207 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2208 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2209 rxr = &adapter->rx_rings[i];
2210 /* Set up some basics */
2211 rxr->adapter = adapter;
2213 rxr->me = ixgbe_pf_que_index(iov_mode, i);
2217 rxr->num_desc = adapter->num_rx_desc;
2219 /* Initialize the RX side lock */
2220 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2221 device_get_nameunit(dev), rxr->me);
2222 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2224 if (ixgbe_dma_malloc(adapter, rsize,
2225 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2227 "Unable to allocate RxDescriptor memory\n");
2231 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2232 bzero((void *)rxr->rx_base, rsize);
2234 /* Allocate receive buffers for the ring*/
2235 if (ixgbe_allocate_receive_buffers(rxr)) {
2237 "Critical Failure setting up receive buffers\n");
2244 ** Finally set up the queue holding structs
2246 for (int i = 0; i < adapter->num_queues; i++) {
2247 que = &adapter->queues[i];
2248 que->adapter = adapter;
2250 que->txr = &adapter->tx_rings[i];
2251 que->rxr = &adapter->rx_rings[i];
2257 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2258 ixgbe_dma_free(adapter, &rxr->rxdma);
2260 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2261 ixgbe_dma_free(adapter, &txr->txdma);
2262 free(adapter->rx_rings, M_DEVBUF);
2264 free(adapter->tx_rings, M_DEVBUF);
2266 free(adapter->queues, M_DEVBUF);