1 /******************************************************************************
3 Copyright (c) 2001-2015, Intel Corporation
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
32 ******************************************************************************/
36 #ifndef IXGBE_STANDALONE_BUILD
38 #include "opt_inet6.h"
45 #include <net/rss_config.h>
46 #include <netinet/in_rss.h>
50 #include <net/netmap.h>
51 #include <sys/selinfo.h>
52 #include <dev/netmap/netmap_kern.h>
54 extern int ix_crcstrip;
59 ** this feature only works with
60 ** IPv4, and only on 82599 and later.
61 ** Also this will cause IP forwarding to
62 ** fail and that can't be controlled by
63 ** the stack as LRO can. For all these
64 ** reasons I've deemed it best to leave
65 ** this off and not bother with a tuneable
66 ** interface, this would need to be compiled
69 static bool ixgbe_rsc_enable = FALSE;
73 ** For Flow Director: this is the
74 ** number of TX packets we sample
75 ** for the filter pool, this means
76 ** every 20th packet will be probed.
78 ** This feature can be disabled by
81 static int atr_sample_rate = 20;
84 /*********************************************************************
85 * Local Function prototypes
86 *********************************************************************/
87 static void ixgbe_setup_transmit_ring(struct tx_ring *);
88 static void ixgbe_free_transmit_buffers(struct tx_ring *);
89 static int ixgbe_setup_receive_ring(struct rx_ring *);
90 static void ixgbe_free_receive_buffers(struct rx_ring *);
92 static void ixgbe_rx_checksum(u32, struct mbuf *, u32);
93 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
94 static int ixgbe_xmit(struct tx_ring *, struct mbuf **);
95 static int ixgbe_tx_ctx_setup(struct tx_ring *,
96 struct mbuf *, u32 *, u32 *);
97 static int ixgbe_tso_setup(struct tx_ring *,
98 struct mbuf *, u32 *, u32 *);
100 static void ixgbe_atr(struct tx_ring *, struct mbuf *);
102 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
103 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
106 #ifdef IXGBE_LEGACY_TX
107 /*********************************************************************
108 * Transmit entry point
110 * ixgbe_start is called by the stack to initiate a transmit.
111 * The driver will remain in this routine as long as there are
112 * packets to transmit and transmit resources are available.
113 * In case resources are not available stack is notified and
114 * the packet is requeued.
115 **********************************************************************/
118 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
121 struct adapter *adapter = txr->adapter;
123 IXGBE_TX_LOCK_ASSERT(txr);
125 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
127 if (!adapter->link_active)
130 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
131 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
134 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
138 if (ixgbe_xmit(txr, &m_head)) {
140 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
143 /* Send a copy of the frame to the BPF listener */
144 ETHER_BPF_MTAP(ifp, m_head);
150 * Legacy TX start - called by the stack, this
151 * always uses the first tx ring, and should
152 * not be used with multiqueue tx enabled.
155 ixgbe_start(struct ifnet *ifp)
157 struct adapter *adapter = ifp->if_softc;
158 struct tx_ring *txr = adapter->tx_rings;
160 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
162 ixgbe_start_locked(txr, ifp);
163 IXGBE_TX_UNLOCK(txr);
168 #else /* ! IXGBE_LEGACY_TX */
171 ** Multiqueue Transmit Entry Point
172 ** (if_transmit function)
175 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
177 struct adapter *adapter = ifp->if_softc;
178 struct ix_queue *que;
186 * When doing RSS, map it to the same outbound queue
187 * as the incoming flow would be mapped to.
189 * If everything is setup correctly, it should be the
190 * same bucket that the current CPU we're on is.
192 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
194 if (rss_hash2bucket(m->m_pkthdr.flowid,
195 M_HASHTYPE_GET(m), &bucket_id) == 0) {
196 i = bucket_id % adapter->num_queues;
198 if (bucket_id > adapter->num_queues)
199 if_printf(ifp, "bucket_id (%d) > num_queues "
200 "(%d)\n", bucket_id, adapter->num_queues);
204 i = m->m_pkthdr.flowid % adapter->num_queues;
206 i = curcpu % adapter->num_queues;
208 /* Check for a hung queue and pick alternative */
209 if (((1 << i) & adapter->active_queues) == 0)
210 i = ffsl(adapter->active_queues);
212 txr = &adapter->tx_rings[i];
213 que = &adapter->queues[i];
215 err = drbr_enqueue(ifp, txr->br, m);
218 if (IXGBE_TX_TRYLOCK(txr)) {
219 ixgbe_mq_start_locked(ifp, txr);
220 IXGBE_TX_UNLOCK(txr);
222 taskqueue_enqueue(que->tq, &txr->txq_task);
228 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
230 struct adapter *adapter = txr->adapter;
232 int enqueued = 0, err = 0;
234 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
235 adapter->link_active == 0)
238 /* Process the queue */
239 #if __FreeBSD_version < 901504
240 next = drbr_dequeue(ifp, txr->br);
241 while (next != NULL) {
242 if ((err = ixgbe_xmit(txr, &next)) != 0) {
244 err = drbr_enqueue(ifp, txr->br, next);
246 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
247 if ((err = ixgbe_xmit(txr, &next)) != 0) {
249 drbr_advance(ifp, txr->br);
251 drbr_putback(ifp, txr->br, next);
256 #if __FreeBSD_version >= 901504
257 drbr_advance(ifp, txr->br);
260 #if 0 // this is VF-only
261 #if __FreeBSD_version >= 1100036
263 * Since we're looking at the tx ring, we can check
264 * to see if we're a VF by examing our tail register
267 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
268 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
271 /* Send a copy of the frame to the BPF listener */
272 ETHER_BPF_MTAP(ifp, next);
273 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
275 #if __FreeBSD_version < 901504
276 next = drbr_dequeue(ifp, txr->br);
280 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
287 * Called from a taskqueue to drain queued transmit packets.
290 ixgbe_deferred_mq_start(void *arg, int pending)
292 struct tx_ring *txr = arg;
293 struct adapter *adapter = txr->adapter;
294 struct ifnet *ifp = adapter->ifp;
297 if (!drbr_empty(ifp, txr->br))
298 ixgbe_mq_start_locked(ifp, txr);
299 IXGBE_TX_UNLOCK(txr);
303 * Flush all ring buffers
306 ixgbe_qflush(struct ifnet *ifp)
308 struct adapter *adapter = ifp->if_softc;
309 struct tx_ring *txr = adapter->tx_rings;
312 for (int i = 0; i < adapter->num_queues; i++, txr++) {
314 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
316 IXGBE_TX_UNLOCK(txr);
320 #endif /* IXGBE_LEGACY_TX */
323 /*********************************************************************
325 * This routine maps the mbufs to tx descriptors, allowing the
326 * TX engine to transmit the packets.
327 * - return 0 on success, positive on failure
329 **********************************************************************/
332 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
334 struct adapter *adapter = txr->adapter;
335 u32 olinfo_status = 0, cmd_type_len;
336 int i, j, error, nsegs;
340 bus_dma_segment_t segs[adapter->num_segs];
342 struct ixgbe_tx_buf *txbuf;
343 union ixgbe_adv_tx_desc *txd = NULL;
347 /* Basic descriptor defines */
348 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
349 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
351 if (m_head->m_flags & M_VLANTAG)
352 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
355 * Important to capture the first descriptor
356 * used because it will contain the index of
357 * the one we tell the hardware to report back
359 first = txr->next_avail_desc;
360 txbuf = &txr->tx_buffers[first];
364 * Map the packet for DMA.
367 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
368 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
370 if (__predict_false(error)) {
375 /* Try it again? - one try */
379 * XXX: m_defrag will choke on
380 * non-MCLBYTES-sized clusters
382 m = m_defrag(*m_headp, M_NOWAIT);
384 adapter->mbuf_defrag_failed++;
394 txr->no_tx_dma_setup++;
397 txr->no_tx_dma_setup++;
404 /* Make certain there are enough descriptors */
405 if (nsegs > txr->tx_avail - 2) {
406 txr->no_desc_avail++;
407 bus_dmamap_unload(txr->txtag, map);
413 * Set up the appropriate offload context
414 * this will consume the first descriptor
416 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
417 if (__predict_false(error)) {
418 if (error == ENOBUFS)
424 /* Do the flow director magic */
425 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
427 if (txr->atr_count >= atr_sample_rate) {
428 ixgbe_atr(txr, m_head);
434 olinfo_status |= IXGBE_ADVTXD_CC;
435 i = txr->next_avail_desc;
436 for (j = 0; j < nsegs; j++) {
440 txbuf = &txr->tx_buffers[i];
441 txd = &txr->tx_base[i];
442 seglen = segs[j].ds_len;
443 segaddr = htole64(segs[j].ds_addr);
445 txd->read.buffer_addr = segaddr;
446 txd->read.cmd_type_len = htole32(txr->txd_cmd |
447 cmd_type_len |seglen);
448 txd->read.olinfo_status = htole32(olinfo_status);
450 if (++i == txr->num_desc)
454 txd->read.cmd_type_len |=
455 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
456 txr->tx_avail -= nsegs;
457 txr->next_avail_desc = i;
459 txbuf->m_head = m_head;
461 * Here we swap the map so the last descriptor,
462 * which gets the completion interrupt has the
463 * real map, and the first descriptor gets the
464 * unused map from this descriptor.
466 txr->tx_buffers[first].map = txbuf->map;
468 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
470 /* Set the EOP descriptor that will be marked done */
471 txbuf = &txr->tx_buffers[first];
474 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
475 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
477 * Advance the Transmit Descriptor Tail (Tdt), this tells the
478 * hardware that this frame is available to transmit.
480 ++txr->total_packets;
481 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
483 /* Mark queue as having work */
491 /*********************************************************************
493 * Allocate memory for tx_buffer structures. The tx_buffer stores all
494 * the information needed to transmit a packet on the wire. This is
495 * called only once at attach, setup is done every reset.
497 **********************************************************************/
499 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
501 struct adapter *adapter = txr->adapter;
502 device_t dev = adapter->dev;
503 struct ixgbe_tx_buf *txbuf;
507 * Setup DMA descriptor areas.
509 if ((error = bus_dma_tag_create(
510 bus_get_dma_tag(adapter->dev), /* parent */
511 1, 0, /* alignment, bounds */
512 BUS_SPACE_MAXADDR, /* lowaddr */
513 BUS_SPACE_MAXADDR, /* highaddr */
514 NULL, NULL, /* filter, filterarg */
515 IXGBE_TSO_SIZE, /* maxsize */
516 adapter->num_segs, /* nsegments */
517 PAGE_SIZE, /* maxsegsize */
520 NULL, /* lockfuncarg */
522 device_printf(dev,"Unable to allocate TX DMA tag\n");
526 if (!(txr->tx_buffers =
527 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
528 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
529 device_printf(dev, "Unable to allocate tx_buffer memory\n");
534 /* Create the descriptor buffer dma maps */
535 txbuf = txr->tx_buffers;
536 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
537 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
539 device_printf(dev, "Unable to create TX DMA map\n");
546 /* We free all, it handles case where we are in the middle */
547 ixgbe_free_transmit_structures(adapter);
551 /*********************************************************************
553 * Initialize a transmit ring.
555 **********************************************************************/
557 ixgbe_setup_transmit_ring(struct tx_ring *txr)
559 struct adapter *adapter = txr->adapter;
560 struct ixgbe_tx_buf *txbuf;
562 struct netmap_adapter *na = NA(adapter->ifp);
563 struct netmap_slot *slot;
564 #endif /* DEV_NETMAP */
566 /* Clear the old ring contents */
570 * (under lock): if in netmap mode, do some consistency
571 * checks and set slot to entry 0 of the netmap ring.
573 slot = netmap_reset(na, NR_TX, txr->me, 0);
574 #endif /* DEV_NETMAP */
575 bzero((void *)txr->tx_base,
576 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
578 txr->next_avail_desc = 0;
579 txr->next_to_clean = 0;
581 /* Free any existing tx buffers. */
582 txbuf = txr->tx_buffers;
583 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
584 if (txbuf->m_head != NULL) {
585 bus_dmamap_sync(txr->txtag, txbuf->map,
586 BUS_DMASYNC_POSTWRITE);
587 bus_dmamap_unload(txr->txtag, txbuf->map);
588 m_freem(txbuf->m_head);
589 txbuf->m_head = NULL;
593 * In netmap mode, set the map for the packet buffer.
594 * NOTE: Some drivers (not this one) also need to set
595 * the physical buffer address in the NIC ring.
596 * Slots in the netmap ring (indexed by "si") are
597 * kring->nkr_hwofs positions "ahead" wrt the
598 * corresponding slot in the NIC ring. In some drivers
599 * (not here) nkr_hwofs can be negative. Function
600 * netmap_idx_n2k() handles wraparounds properly.
603 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
604 netmap_load_map(na, txr->txtag,
605 txbuf->map, NMB(na, slot + si));
607 #endif /* DEV_NETMAP */
608 /* Clear the EOP descriptor pointer */
613 /* Set the rate at which we sample packets */
614 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
615 txr->atr_sample = atr_sample_rate;
618 /* Set number of descriptors available */
619 txr->tx_avail = adapter->num_tx_desc;
621 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
622 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
623 IXGBE_TX_UNLOCK(txr);
626 /*********************************************************************
628 * Initialize all transmit rings.
630 **********************************************************************/
632 ixgbe_setup_transmit_structures(struct adapter *adapter)
634 struct tx_ring *txr = adapter->tx_rings;
636 for (int i = 0; i < adapter->num_queues; i++, txr++)
637 ixgbe_setup_transmit_ring(txr);
642 /*********************************************************************
644 * Free all transmit rings.
646 **********************************************************************/
648 ixgbe_free_transmit_structures(struct adapter *adapter)
650 struct tx_ring *txr = adapter->tx_rings;
652 for (int i = 0; i < adapter->num_queues; i++, txr++) {
654 ixgbe_free_transmit_buffers(txr);
655 ixgbe_dma_free(adapter, &txr->txdma);
656 IXGBE_TX_UNLOCK(txr);
657 IXGBE_TX_LOCK_DESTROY(txr);
659 free(adapter->tx_rings, M_DEVBUF);
662 /*********************************************************************
664 * Free transmit ring related data structures.
666 **********************************************************************/
668 ixgbe_free_transmit_buffers(struct tx_ring *txr)
670 struct adapter *adapter = txr->adapter;
671 struct ixgbe_tx_buf *tx_buffer;
674 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
676 if (txr->tx_buffers == NULL)
679 tx_buffer = txr->tx_buffers;
680 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
681 if (tx_buffer->m_head != NULL) {
682 bus_dmamap_sync(txr->txtag, tx_buffer->map,
683 BUS_DMASYNC_POSTWRITE);
684 bus_dmamap_unload(txr->txtag,
686 m_freem(tx_buffer->m_head);
687 tx_buffer->m_head = NULL;
688 if (tx_buffer->map != NULL) {
689 bus_dmamap_destroy(txr->txtag,
691 tx_buffer->map = NULL;
693 } else if (tx_buffer->map != NULL) {
694 bus_dmamap_unload(txr->txtag,
696 bus_dmamap_destroy(txr->txtag,
698 tx_buffer->map = NULL;
701 #ifdef IXGBE_LEGACY_TX
703 buf_ring_free(txr->br, M_DEVBUF);
705 if (txr->tx_buffers != NULL) {
706 free(txr->tx_buffers, M_DEVBUF);
707 txr->tx_buffers = NULL;
709 if (txr->txtag != NULL) {
710 bus_dma_tag_destroy(txr->txtag);
716 /*********************************************************************
718 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
720 **********************************************************************/
723 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
724 u32 *cmd_type_len, u32 *olinfo_status)
726 struct adapter *adapter = txr->adapter;
727 struct ixgbe_adv_tx_context_desc *TXD;
728 struct ether_vlan_header *eh;
735 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
736 int ehdrlen, ip_hlen = 0;
740 int ctxd = txr->next_avail_desc;
745 /* First check if TSO is to be used */
746 if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO|CSUM_IP6_TSO))
747 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
749 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
752 /* Indicate the whole packet as payload when not doing TSO */
753 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
755 /* Now ready a context descriptor */
756 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
759 ** In advanced descriptors the vlan tag must
760 ** be placed into the context descriptor. Hence
761 ** we need to make one even if not doing offloads.
763 if (mp->m_flags & M_VLANTAG) {
764 vtag = htole16(mp->m_pkthdr.ether_vtag);
765 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
766 } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
770 * Determine where frame payload starts.
771 * Jump over vlan headers if already present,
772 * helpful for QinQ too.
774 eh = mtod(mp, struct ether_vlan_header *);
775 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
776 etype = ntohs(eh->evl_proto);
777 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
779 etype = ntohs(eh->evl_encap_proto);
780 ehdrlen = ETHER_HDR_LEN;
783 /* Set the ether header length */
784 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
786 if (offload == FALSE)
790 * If the first mbuf only includes the ethernet header, jump to the next one
791 * XXX: This assumes the stack splits mbufs containing headers on header boundaries
792 * XXX: And assumes the entire IP header is contained in one mbuf
794 if (mp->m_len == ehdrlen && mp->m_next)
795 l3d = mtod(mp->m_next, caddr_t);
797 l3d = mtod(mp, caddr_t) + ehdrlen;
801 ip = (struct ip *)(l3d);
802 ip_hlen = ip->ip_hl << 2;
804 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
805 /* Insert IPv4 checksum into data descriptors */
806 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
808 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
812 ip6 = (struct ip6_hdr *)(l3d);
813 ip_hlen = sizeof(struct ip6_hdr);
814 ipproto = ip6->ip6_nxt;
815 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
822 vlan_macip_lens |= ip_hlen;
824 /* No support for offloads for non-L4 next headers */
827 if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
828 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
833 if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
834 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
839 if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP))
840 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
849 if (offload) /* Insert L4 checksum into data descriptors */
850 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
853 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
855 /* Now copy bits into descriptor */
856 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
857 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
858 TXD->seqnum_seed = htole32(0);
859 TXD->mss_l4len_idx = htole32(0);
861 /* We've consumed the first desc, adjust counters */
862 if (++ctxd == txr->num_desc)
864 txr->next_avail_desc = ctxd;
870 /**********************************************************************
872 * Setup work for hardware segmentation offload (TSO) on
873 * adapters using advanced tx descriptors
875 **********************************************************************/
877 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
878 u32 *cmd_type_len, u32 *olinfo_status)
880 struct ixgbe_adv_tx_context_desc *TXD;
881 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
882 u32 mss_l4len_idx = 0, paylen;
883 u16 vtag = 0, eh_type;
884 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
885 struct ether_vlan_header *eh;
895 * Determine where frame payload starts.
896 * Jump over vlan headers if already present
898 eh = mtod(mp, struct ether_vlan_header *);
899 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
900 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
901 eh_type = eh->evl_proto;
903 ehdrlen = ETHER_HDR_LEN;
904 eh_type = eh->evl_encap_proto;
907 switch (ntohs(eh_type)) {
910 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
911 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
912 if (ip6->ip6_nxt != IPPROTO_TCP)
914 ip_hlen = sizeof(struct ip6_hdr);
915 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
916 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
917 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
918 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
923 ip = (struct ip *)(mp->m_data + ehdrlen);
924 if (ip->ip_p != IPPROTO_TCP)
927 ip_hlen = ip->ip_hl << 2;
928 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
929 th->th_sum = in_pseudo(ip->ip_src.s_addr,
930 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
931 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
932 /* Tell transmit desc to also do IPv4 checksum. */
933 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
937 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
938 __func__, ntohs(eh_type));
942 ctxd = txr->next_avail_desc;
943 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
945 tcp_hlen = th->th_off << 2;
947 /* This is used in the transmit desc in encap */
948 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
950 /* VLAN MACLEN IPLEN */
951 if (mp->m_flags & M_VLANTAG) {
952 vtag = htole16(mp->m_pkthdr.ether_vtag);
953 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
956 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
957 vlan_macip_lens |= ip_hlen;
958 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
960 /* ADV DTYPE TUCMD */
961 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
962 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
963 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
966 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
967 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
968 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
970 TXD->seqnum_seed = htole32(0);
972 if (++ctxd == txr->num_desc)
976 txr->next_avail_desc = ctxd;
977 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
978 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
979 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
985 /**********************************************************************
987 * Examine each tx_buffer in the used queue. If the hardware is done
988 * processing the packet then free associated resources. The
989 * tx_buffer is put back on the free queue.
991 **********************************************************************/
993 ixgbe_txeof(struct tx_ring *txr)
995 struct adapter *adapter = txr->adapter;
997 struct ifnet *ifp = adapter->ifp;
999 u32 work, processed = 0;
1000 u32 limit = adapter->tx_process_limit;
1001 struct ixgbe_tx_buf *buf;
1002 union ixgbe_adv_tx_desc *txd;
1004 mtx_assert(&txr->tx_mtx, MA_OWNED);
1007 if (ifp->if_capenable & IFCAP_NETMAP) {
1008 struct netmap_adapter *na = NA(ifp);
1009 struct netmap_kring *kring = &na->tx_rings[txr->me];
1011 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1012 BUS_DMASYNC_POSTREAD);
1014 * In netmap mode, all the work is done in the context
1015 * of the client thread. Interrupt handlers only wake up
1016 * clients, which may be sleeping on individual rings
1017 * or on a global resource for all rings.
1018 * To implement tx interrupt mitigation, we wake up the client
1019 * thread roughly every half ring, even if the NIC interrupts
1020 * more frequently. This is implemented as follows:
1021 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1022 * the slot that should wake up the thread (nkr_num_slots
1023 * means the user thread should not be woken up);
1024 * - the driver ignores tx interrupts unless netmap_mitigate=0
1025 * or the slot has the DD bit set.
1027 if (!netmap_mitigate ||
1028 (kring->nr_kflags < kring->nkr_num_slots &&
1029 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1030 netmap_tx_irq(ifp, txr->me);
1034 #endif /* DEV_NETMAP */
1036 if (txr->tx_avail == txr->num_desc) {
1041 /* Get work starting point */
1042 work = txr->next_to_clean;
1043 buf = &txr->tx_buffers[work];
1044 txd = &txr->tx_base[work];
1045 work -= txr->num_desc; /* The distance to ring end */
1046 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1047 BUS_DMASYNC_POSTREAD);
1050 union ixgbe_adv_tx_desc *eop = buf->eop;
1051 if (eop == NULL) /* No work */
1054 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1055 break; /* I/O not complete */
1059 buf->m_head->m_pkthdr.len;
1060 bus_dmamap_sync(txr->txtag,
1062 BUS_DMASYNC_POSTWRITE);
1063 bus_dmamap_unload(txr->txtag,
1065 m_freem(buf->m_head);
1071 /* We clean the range if multi segment */
1072 while (txd != eop) {
1076 /* wrap the ring? */
1077 if (__predict_false(!work)) {
1078 work -= txr->num_desc;
1079 buf = txr->tx_buffers;
1084 buf->m_head->m_pkthdr.len;
1085 bus_dmamap_sync(txr->txtag,
1087 BUS_DMASYNC_POSTWRITE);
1088 bus_dmamap_unload(txr->txtag,
1090 m_freem(buf->m_head);
1100 /* Try the next packet */
1104 /* reset with a wrap */
1105 if (__predict_false(!work)) {
1106 work -= txr->num_desc;
1107 buf = txr->tx_buffers;
1111 } while (__predict_true(--limit));
1113 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1114 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1116 work += txr->num_desc;
1117 txr->next_to_clean = work;
1120 ** Queue Hang detection, we know there's
1121 ** work outstanding or the first return
1122 ** would have been taken, so increment busy
1123 ** if nothing managed to get cleaned, then
1124 ** in local_timer it will be checked and
1125 ** marked as HUNG if it exceeds a MAX attempt.
1127 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1130 ** If anything gets cleaned we reset state to 1,
1131 ** note this will turn off HUNG if its set.
1136 if (txr->tx_avail == txr->num_desc)
1145 ** This routine parses packet headers so that Flow
1146 ** Director can make a hashed filter table entry
1147 ** allowing traffic flows to be identified and kept
1148 ** on the same cpu. This would be a performance
1149 ** hit, but we only do it at IXGBE_FDIR_RATE of
1153 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1155 struct adapter *adapter = txr->adapter;
1156 struct ix_queue *que;
1160 struct ether_vlan_header *eh;
1161 union ixgbe_atr_hash_dword input = {.dword = 0};
1162 union ixgbe_atr_hash_dword common = {.dword = 0};
1163 int ehdrlen, ip_hlen;
1166 eh = mtod(mp, struct ether_vlan_header *);
1167 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1168 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1169 etype = eh->evl_proto;
1171 ehdrlen = ETHER_HDR_LEN;
1172 etype = eh->evl_encap_proto;
1175 /* Only handling IPv4 */
1176 if (etype != htons(ETHERTYPE_IP))
1179 ip = (struct ip *)(mp->m_data + ehdrlen);
1180 ip_hlen = ip->ip_hl << 2;
1182 /* check if we're UDP or TCP */
1185 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1186 /* src and dst are inverted */
1187 common.port.dst ^= th->th_sport;
1188 common.port.src ^= th->th_dport;
1189 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1192 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1193 /* src and dst are inverted */
1194 common.port.dst ^= uh->uh_sport;
1195 common.port.src ^= uh->uh_dport;
1196 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1202 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1203 if (mp->m_pkthdr.ether_vtag)
1204 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1206 common.flex_bytes ^= etype;
1207 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1209 que = &adapter->queues[txr->me];
1211 ** This assumes the Rx queue and Tx
1212 ** queue are bound to the same CPU
1214 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1215 input, common, que->msix);
1217 #endif /* IXGBE_FDIR */
1220 ** Used to detect a descriptor that has
1221 ** been merged by Hardware RSC.
1224 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1226 return (le32toh(rx->wb.lower.lo_dword.data) &
1227 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1230 /*********************************************************************
1232 * Initialize Hardware RSC (LRO) feature on 82599
1233 * for an RX ring, this is toggled by the LRO capability
1234 * even though it is transparent to the stack.
1236 * NOTE: since this HW feature only works with IPV4 and
1237 * our testing has shown soft LRO to be as effective
1238 * I have decided to disable this by default.
1240 **********************************************************************/
1242 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1244 struct adapter *adapter = rxr->adapter;
1245 struct ixgbe_hw *hw = &adapter->hw;
1246 u32 rscctrl, rdrxctl;
1248 /* If turning LRO/RSC off we need to disable it */
1249 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1250 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1251 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1255 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1256 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1257 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1258 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1259 #endif /* DEV_NETMAP */
1260 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1261 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1262 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1264 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1265 rscctrl |= IXGBE_RSCCTL_RSCEN;
1267 ** Limit the total number of descriptors that
1268 ** can be combined, so it does not exceed 64K
1270 if (rxr->mbuf_sz == MCLBYTES)
1271 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1272 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1273 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1274 else if (rxr->mbuf_sz == MJUM9BYTES)
1275 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1276 else /* Using 16K cluster */
1277 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1279 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1281 /* Enable TCP header recognition */
1282 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1283 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1284 IXGBE_PSRTYPE_TCPHDR));
1286 /* Disable RSC for ACK packets */
1287 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1288 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1293 /*********************************************************************
1295 * Refresh mbuf buffers for RX descriptor rings
1296 * - now keeps its own state so discards due to resource
1297 * exhaustion are unnecessary, if an mbuf cannot be obtained
1298 * it just returns, keeping its placeholder, thus it can simply
1299 * be recalled to try again.
1301 **********************************************************************/
1303 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1305 struct adapter *adapter = rxr->adapter;
1306 bus_dma_segment_t seg[1];
1307 struct ixgbe_rx_buf *rxbuf;
1309 int i, j, nsegs, error;
1310 bool refreshed = FALSE;
1312 i = j = rxr->next_to_refresh;
1313 /* Control the loop with one beyond */
1314 if (++j == rxr->num_desc)
1317 while (j != limit) {
1318 rxbuf = &rxr->rx_buffers[i];
1319 if (rxbuf->buf == NULL) {
1320 mp = m_getjcl(M_NOWAIT, MT_DATA,
1321 M_PKTHDR, rxr->mbuf_sz);
1324 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1325 m_adj(mp, ETHER_ALIGN);
1329 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1331 /* If we're dealing with an mbuf that was copied rather
1332 * than replaced, there's no need to go through busdma.
1334 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1335 /* Get the memory mapping */
1336 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1337 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1338 rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1340 printf("Refresh mbufs: payload dmamap load"
1341 " failure - %d\n", error);
1347 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1348 BUS_DMASYNC_PREREAD);
1349 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1350 htole64(seg[0].ds_addr);
1352 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1353 rxbuf->flags &= ~IXGBE_RX_COPY;
1357 /* Next is precalculated */
1359 rxr->next_to_refresh = i;
1360 if (++j == rxr->num_desc)
1364 if (refreshed) /* Update hardware tail index */
1365 IXGBE_WRITE_REG(&adapter->hw,
1366 rxr->tail, rxr->next_to_refresh);
1370 /*********************************************************************
1372 * Allocate memory for rx_buffer structures. Since we use one
1373 * rx_buffer per received packet, the maximum number of rx_buffer's
1374 * that we'll need is equal to the number of receive descriptors
1375 * that we've allocated.
1377 **********************************************************************/
1379 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1381 struct adapter *adapter = rxr->adapter;
1382 device_t dev = adapter->dev;
1383 struct ixgbe_rx_buf *rxbuf;
1386 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1387 if (!(rxr->rx_buffers =
1388 (struct ixgbe_rx_buf *) malloc(bsize,
1389 M_DEVBUF, M_NOWAIT | M_ZERO))) {
1390 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1395 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
1396 1, 0, /* alignment, bounds */
1397 BUS_SPACE_MAXADDR, /* lowaddr */
1398 BUS_SPACE_MAXADDR, /* highaddr */
1399 NULL, NULL, /* filter, filterarg */
1400 MJUM16BYTES, /* maxsize */
1402 MJUM16BYTES, /* maxsegsize */
1404 NULL, /* lockfunc */
1405 NULL, /* lockfuncarg */
1407 device_printf(dev, "Unable to create RX DMA tag\n");
1411 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1412 rxbuf = &rxr->rx_buffers[i];
1413 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1415 device_printf(dev, "Unable to create RX dma map\n");
1423 /* Frees all, but can handle partial completion */
1424 ixgbe_free_receive_structures(adapter);
1429 ixgbe_free_receive_ring(struct rx_ring *rxr)
1431 struct ixgbe_rx_buf *rxbuf;
1433 for (int i = 0; i < rxr->num_desc; i++) {
1434 rxbuf = &rxr->rx_buffers[i];
1435 if (rxbuf->buf != NULL) {
1436 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1437 BUS_DMASYNC_POSTREAD);
1438 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1439 rxbuf->buf->m_flags |= M_PKTHDR;
1440 m_freem(rxbuf->buf);
1447 /*********************************************************************
1449 * Initialize a receive ring and its buffers.
1451 **********************************************************************/
1453 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1455 struct adapter *adapter;
1458 struct ixgbe_rx_buf *rxbuf;
1459 bus_dma_segment_t seg[1];
1460 struct lro_ctrl *lro = &rxr->lro;
1461 int rsize, nsegs, error = 0;
1463 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1464 struct netmap_slot *slot;
1465 #endif /* DEV_NETMAP */
1467 adapter = rxr->adapter;
1471 /* Clear the ring contents */
1474 /* same as in ixgbe_setup_transmit_ring() */
1475 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1476 #endif /* DEV_NETMAP */
1477 rsize = roundup2(adapter->num_rx_desc *
1478 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1479 bzero((void *)rxr->rx_base, rsize);
1480 /* Cache the size */
1481 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1483 /* Free current RX buffer structs and their mbufs */
1484 ixgbe_free_receive_ring(rxr);
1486 /* Now replenish the mbufs */
1487 for (int j = 0; j != rxr->num_desc; ++j) {
1490 rxbuf = &rxr->rx_buffers[j];
1493 * In netmap mode, fill the map and set the buffer
1494 * address in the NIC ring, considering the offset
1495 * between the netmap and NIC rings (see comment in
1496 * ixgbe_setup_transmit_ring() ). No need to allocate
1497 * an mbuf, so end the block with a continue;
1500 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1504 addr = PNMB(na, slot + sj, &paddr);
1505 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1506 /* Update descriptor and the cached value */
1507 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1508 rxbuf->addr = htole64(paddr);
1511 #endif /* DEV_NETMAP */
1513 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1514 M_PKTHDR, adapter->rx_mbuf_sz);
1515 if (rxbuf->buf == NULL) {
1520 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1521 /* Get the memory mapping */
1522 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1523 rxbuf->pmap, mp, seg,
1524 &nsegs, BUS_DMA_NOWAIT);
1527 bus_dmamap_sync(rxr->ptag,
1528 rxbuf->pmap, BUS_DMASYNC_PREREAD);
1529 /* Update the descriptor and the cached value */
1530 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1531 rxbuf->addr = htole64(seg[0].ds_addr);
1535 /* Setup our descriptor indices */
1536 rxr->next_to_check = 0;
1537 rxr->next_to_refresh = 0;
1538 rxr->lro_enabled = FALSE;
1541 rxr->vtag_strip = FALSE;
1543 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1544 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1547 ** Now set up the LRO interface:
1549 if (ixgbe_rsc_enable)
1550 ixgbe_setup_hw_rsc(rxr);
1551 else if (ifp->if_capenable & IFCAP_LRO) {
1552 int err = tcp_lro_init(lro);
1554 device_printf(dev, "LRO Initialization failed!\n");
1557 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1558 rxr->lro_enabled = TRUE;
1559 lro->ifp = adapter->ifp;
1562 IXGBE_RX_UNLOCK(rxr);
1566 ixgbe_free_receive_ring(rxr);
1567 IXGBE_RX_UNLOCK(rxr);
1571 /*********************************************************************
1573 * Initialize all receive rings.
1575 **********************************************************************/
1577 ixgbe_setup_receive_structures(struct adapter *adapter)
1579 struct rx_ring *rxr = adapter->rx_rings;
1582 for (j = 0; j < adapter->num_queues; j++, rxr++)
1583 if (ixgbe_setup_receive_ring(rxr))
1589 * Free RX buffers allocated so far, we will only handle
1590 * the rings that completed, the failing case will have
1591 * cleaned up for itself. 'j' failed, so its the terminus.
1593 for (int i = 0; i < j; ++i) {
1594 rxr = &adapter->rx_rings[i];
1595 ixgbe_free_receive_ring(rxr);
1602 /*********************************************************************
1604 * Free all receive rings.
1606 **********************************************************************/
1608 ixgbe_free_receive_structures(struct adapter *adapter)
1610 struct rx_ring *rxr = adapter->rx_rings;
1612 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1614 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1615 struct lro_ctrl *lro = &rxr->lro;
1616 ixgbe_free_receive_buffers(rxr);
1617 /* Free LRO memory */
1619 /* Free the ring memory as well */
1620 ixgbe_dma_free(adapter, &rxr->rxdma);
1623 free(adapter->rx_rings, M_DEVBUF);
1627 /*********************************************************************
1629 * Free receive ring data structures
1631 **********************************************************************/
1633 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1635 struct adapter *adapter = rxr->adapter;
1636 struct ixgbe_rx_buf *rxbuf;
1638 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1640 /* Cleanup any existing buffers */
1641 if (rxr->rx_buffers != NULL) {
1642 for (int i = 0; i < adapter->num_rx_desc; i++) {
1643 rxbuf = &rxr->rx_buffers[i];
1644 if (rxbuf->buf != NULL) {
1645 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1646 BUS_DMASYNC_POSTREAD);
1647 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1648 rxbuf->buf->m_flags |= M_PKTHDR;
1649 m_freem(rxbuf->buf);
1652 if (rxbuf->pmap != NULL) {
1653 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1657 if (rxr->rx_buffers != NULL) {
1658 free(rxr->rx_buffers, M_DEVBUF);
1659 rxr->rx_buffers = NULL;
1663 if (rxr->ptag != NULL) {
1664 bus_dma_tag_destroy(rxr->ptag);
1671 static __inline void
1672 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1676 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1677 * should be computed by hardware. Also it should not have VLAN tag in
1678 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1680 if (rxr->lro_enabled &&
1681 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1682 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1683 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1684 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1685 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1686 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1687 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1688 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1690 * Send to the stack if:
1691 ** - LRO not enabled, or
1692 ** - no LRO resources, or
1693 ** - lro enqueue fails
1695 if (rxr->lro.lro_cnt != 0)
1696 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1699 IXGBE_RX_UNLOCK(rxr);
1700 (*ifp->if_input)(ifp, m);
1704 static __inline void
1705 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1707 struct ixgbe_rx_buf *rbuf;
1709 rbuf = &rxr->rx_buffers[i];
1713 ** With advanced descriptors the writeback
1714 ** clobbers the buffer addrs, so its easier
1715 ** to just free the existing mbufs and take
1716 ** the normal refresh path to get new buffers
1720 if (rbuf->fmp != NULL) {/* Partial chain ? */
1721 rbuf->fmp->m_flags |= M_PKTHDR;
1724 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1725 } else if (rbuf->buf) {
1729 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1737 /*********************************************************************
1739 * This routine executes in interrupt context. It replenishes
1740 * the mbufs in the descriptor and sends data which has been
1741 * dma'ed into host memory to upper layer.
1743 * Return TRUE for more work, FALSE for all clean.
1744 *********************************************************************/
1746 ixgbe_rxeof(struct ix_queue *que)
1748 struct adapter *adapter = que->adapter;
1749 struct rx_ring *rxr = que->rxr;
1750 struct ifnet *ifp = adapter->ifp;
1751 struct lro_ctrl *lro = &rxr->lro;
1752 struct lro_entry *queued;
1753 int i, nextp, processed = 0;
1755 u32 count = adapter->rx_process_limit;
1756 union ixgbe_adv_rx_desc *cur;
1757 struct ixgbe_rx_buf *rbuf, *nbuf;
1763 /* Same as the txeof routine: wakeup clients on intr. */
1764 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1765 IXGBE_RX_UNLOCK(rxr);
1768 #endif /* DEV_NETMAP */
1770 for (i = rxr->next_to_check; count != 0;) {
1771 struct mbuf *sendmp, *mp;
1777 /* Sync the ring. */
1778 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1779 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1781 cur = &rxr->rx_base[i];
1782 staterr = le32toh(cur->wb.upper.status_error);
1783 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1785 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1787 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1794 cur->wb.upper.status_error = 0;
1795 rbuf = &rxr->rx_buffers[i];
1798 len = le16toh(cur->wb.upper.length);
1799 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1800 IXGBE_RXDADV_PKTTYPE_MASK;
1801 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1803 /* Make sure bad packets are discarded */
1804 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1805 #if __FreeBSD_version >= 1100036
1806 if (IXGBE_IS_VF(adapter))
1807 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1809 rxr->rx_discarded++;
1810 ixgbe_rx_discard(rxr, i);
1815 ** On 82599 which supports a hardware
1816 ** LRO (called HW RSC), packets need
1817 ** not be fragmented across sequential
1818 ** descriptors, rather the next descriptor
1819 ** is indicated in bits of the descriptor.
1820 ** This also means that we might proceses
1821 ** more than one packet at a time, something
1822 ** that has never been true before, it
1823 ** required eliminating global chain pointers
1824 ** in favor of what we are doing here. -jfv
1828 ** Figure out the next descriptor
1831 if (rxr->hw_rsc == TRUE) {
1832 rsc = ixgbe_rsc_count(cur);
1833 rxr->rsc_num += (rsc - 1);
1835 if (rsc) { /* Get hardware index */
1837 IXGBE_RXDADV_NEXTP_MASK) >>
1838 IXGBE_RXDADV_NEXTP_SHIFT);
1839 } else { /* Just sequential */
1841 if (nextp == adapter->num_rx_desc)
1844 nbuf = &rxr->rx_buffers[nextp];
1848 ** Rather than using the fmp/lmp global pointers
1849 ** we now keep the head of a packet chain in the
1850 ** buffer struct and pass this along from one
1851 ** descriptor to the next, until we get EOP.
1855 ** See if there is a stored head
1856 ** that determines what we are
1859 if (sendmp != NULL) { /* secondary frag */
1860 rbuf->buf = rbuf->fmp = NULL;
1861 mp->m_flags &= ~M_PKTHDR;
1862 sendmp->m_pkthdr.len += mp->m_len;
1865 * Optimize. This might be a small packet,
1866 * maybe just a TCP ACK. Do a fast copy that
1867 * is cache aligned into a new mbuf, and
1868 * leave the old mbuf+cluster for re-use.
1870 if (eop && len <= IXGBE_RX_COPY_LEN) {
1871 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1872 if (sendmp != NULL) {
1874 IXGBE_RX_COPY_ALIGN;
1875 ixgbe_bcopy(mp->m_data,
1876 sendmp->m_data, len);
1877 sendmp->m_len = len;
1879 rbuf->flags |= IXGBE_RX_COPY;
1882 if (sendmp == NULL) {
1883 rbuf->buf = rbuf->fmp = NULL;
1887 /* first desc of a non-ps chain */
1888 sendmp->m_flags |= M_PKTHDR;
1889 sendmp->m_pkthdr.len = mp->m_len;
1893 /* Pass the head pointer on */
1897 mp->m_next = nbuf->buf;
1898 } else { /* Sending this frame */
1899 sendmp->m_pkthdr.rcvif = ifp;
1901 /* capture data for AIM */
1902 rxr->bytes += sendmp->m_pkthdr.len;
1903 rxr->rx_bytes += sendmp->m_pkthdr.len;
1904 /* Process vlan info */
1905 if ((rxr->vtag_strip) &&
1906 (staterr & IXGBE_RXD_STAT_VP))
1907 vtag = le16toh(cur->wb.upper.vlan);
1909 sendmp->m_pkthdr.ether_vtag = vtag;
1910 sendmp->m_flags |= M_VLANTAG;
1912 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1913 ixgbe_rx_checksum(staterr, sendmp, ptype);
1916 * In case of multiqueue, we have RXCSUM.PCSD bit set
1917 * and never cleared. This means we have RSS hash
1918 * available to be used.
1920 if (adapter->num_queues > 1) {
1921 sendmp->m_pkthdr.flowid =
1922 le32toh(cur->wb.lower.hi_dword.rss);
1923 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1924 case IXGBE_RXDADV_RSSTYPE_IPV4:
1925 M_HASHTYPE_SET(sendmp,
1926 M_HASHTYPE_RSS_IPV4);
1928 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1929 M_HASHTYPE_SET(sendmp,
1930 M_HASHTYPE_RSS_TCP_IPV4);
1932 case IXGBE_RXDADV_RSSTYPE_IPV6:
1933 M_HASHTYPE_SET(sendmp,
1934 M_HASHTYPE_RSS_IPV6);
1936 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1937 M_HASHTYPE_SET(sendmp,
1938 M_HASHTYPE_RSS_TCP_IPV6);
1940 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1941 M_HASHTYPE_SET(sendmp,
1942 M_HASHTYPE_RSS_IPV6_EX);
1944 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1945 M_HASHTYPE_SET(sendmp,
1946 M_HASHTYPE_RSS_TCP_IPV6_EX);
1948 #if __FreeBSD_version > 1100000
1949 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1950 M_HASHTYPE_SET(sendmp,
1951 M_HASHTYPE_RSS_UDP_IPV4);
1953 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1954 M_HASHTYPE_SET(sendmp,
1955 M_HASHTYPE_RSS_UDP_IPV6);
1957 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1958 M_HASHTYPE_SET(sendmp,
1959 M_HASHTYPE_RSS_UDP_IPV6_EX);
1963 M_HASHTYPE_SET(sendmp,
1967 sendmp->m_pkthdr.flowid = que->msix;
1968 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1972 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1973 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1975 /* Advance our pointers to the next descriptor. */
1976 if (++i == rxr->num_desc)
1979 /* Now send to the stack or do LRO */
1980 if (sendmp != NULL) {
1981 rxr->next_to_check = i;
1982 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1983 i = rxr->next_to_check;
1986 /* Every 8 descriptors we go to refresh mbufs */
1987 if (processed == 8) {
1988 ixgbe_refresh_mbufs(rxr, i);
1993 /* Refresh any remaining buf structs */
1994 if (ixgbe_rx_unrefreshed(rxr))
1995 ixgbe_refresh_mbufs(rxr, i);
1997 rxr->next_to_check = i;
2000 * Flush any outstanding LRO work
2002 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
2003 SLIST_REMOVE_HEAD(&lro->lro_active, next);
2004 tcp_lro_flush(lro, queued);
2007 IXGBE_RX_UNLOCK(rxr);
2010 ** Still have cleaning to do?
2012 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2019 /*********************************************************************
2021 * Verify that the hardware indicated that the checksum is valid.
2022 * Inform the stack about the status of checksum so that stack
2023 * doesn't spend time verifying the checksum.
2025 *********************************************************************/
2027 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
2029 u16 status = (u16) staterr;
2030 u8 errors = (u8) (staterr >> 24);
2033 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2034 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2038 if (status & IXGBE_RXD_STAT_IPCS) {
2039 mp->m_pkthdr.csum_flags |= CSUM_L3_CALC;
2040 /* IP Checksum Good */
2041 if (!(errors & IXGBE_RXD_ERR_IPE))
2042 mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
2044 /* TCP/UDP/SCTP checksum */
2045 if (status & IXGBE_RXD_STAT_L4CS) {
2046 mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
2047 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2048 mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
2050 mp->m_pkthdr.csum_data = htons(0xffff);
2055 /********************************************************************
2056 * Manage DMA'able memory.
2057 *******************************************************************/
2059 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2063 *(bus_addr_t *) arg = segs->ds_addr;
2068 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2069 struct ixgbe_dma_alloc *dma, int mapflags)
2071 device_t dev = adapter->dev;
2074 r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2075 DBA_ALIGN, 0, /* alignment, bounds */
2076 BUS_SPACE_MAXADDR, /* lowaddr */
2077 BUS_SPACE_MAXADDR, /* highaddr */
2078 NULL, NULL, /* filter, filterarg */
2081 size, /* maxsegsize */
2082 BUS_DMA_ALLOCNOW, /* flags */
2083 NULL, /* lockfunc */
2084 NULL, /* lockfuncarg */
2087 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2091 r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2092 BUS_DMA_NOWAIT, &dma->dma_map);
2094 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2098 r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2102 mapflags | BUS_DMA_NOWAIT);
2104 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2108 dma->dma_size = size;
2111 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2113 bus_dma_tag_destroy(dma->dma_tag);
2115 dma->dma_tag = NULL;
2120 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2122 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2123 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2124 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2125 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2126 bus_dma_tag_destroy(dma->dma_tag);
2130 /*********************************************************************
2132 * Allocate memory for the transmit and receive rings, and then
2133 * the descriptors associated with each, called only once at attach.
2135 **********************************************************************/
2137 ixgbe_allocate_queues(struct adapter *adapter)
2139 device_t dev = adapter->dev;
2140 struct ix_queue *que;
2141 struct tx_ring *txr;
2142 struct rx_ring *rxr;
2143 int rsize, tsize, error = IXGBE_SUCCESS;
2144 int txconf = 0, rxconf = 0;
2146 enum ixgbe_iov_mode iov_mode;
2149 /* First allocate the top level queue structs */
2150 if (!(adapter->queues =
2151 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2152 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2153 device_printf(dev, "Unable to allocate queue memory\n");
2158 /* First allocate the TX ring struct memory */
2159 if (!(adapter->tx_rings =
2160 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2161 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2162 device_printf(dev, "Unable to allocate TX ring memory\n");
2167 /* Next allocate the RX */
2168 if (!(adapter->rx_rings =
2169 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2170 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2171 device_printf(dev, "Unable to allocate RX ring memory\n");
2176 /* For the ring itself */
2177 tsize = roundup2(adapter->num_tx_desc *
2178 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2181 iov_mode = ixgbe_get_iov_mode(adapter);
2182 adapter->pool = ixgbe_max_vfs(iov_mode);
2187 * Now set up the TX queues, txconf is needed to handle the
2188 * possibility that things fail midcourse and we need to
2189 * undo memory gracefully
2191 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2192 /* Set up some basics */
2193 txr = &adapter->tx_rings[i];
2194 txr->adapter = adapter;
2196 txr->me = ixgbe_pf_que_index(iov_mode, i);
2200 txr->num_desc = adapter->num_tx_desc;
2202 /* Initialize the TX side lock */
2203 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2204 device_get_nameunit(dev), txr->me);
2205 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2207 if (ixgbe_dma_malloc(adapter, tsize,
2208 &txr->txdma, BUS_DMA_NOWAIT)) {
2210 "Unable to allocate TX Descriptor memory\n");
2214 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2215 bzero((void *)txr->tx_base, tsize);
2217 /* Now allocate transmit buffers for the ring */
2218 if (ixgbe_allocate_transmit_buffers(txr)) {
2220 "Critical Failure setting up transmit buffers\n");
2224 #ifndef IXGBE_LEGACY_TX
2225 /* Allocate a buf ring */
2226 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2227 M_WAITOK, &txr->tx_mtx);
2228 if (txr->br == NULL) {
2230 "Critical Failure setting up buf ring\n");
2238 * Next the RX queues...
2240 rsize = roundup2(adapter->num_rx_desc *
2241 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2242 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2243 rxr = &adapter->rx_rings[i];
2244 /* Set up some basics */
2245 rxr->adapter = adapter;
2247 rxr->me = ixgbe_pf_que_index(iov_mode, i);
2251 rxr->num_desc = adapter->num_rx_desc;
2253 /* Initialize the RX side lock */
2254 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2255 device_get_nameunit(dev), rxr->me);
2256 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2258 if (ixgbe_dma_malloc(adapter, rsize,
2259 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2261 "Unable to allocate RxDescriptor memory\n");
2265 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2266 bzero((void *)rxr->rx_base, rsize);
2268 /* Allocate receive buffers for the ring*/
2269 if (ixgbe_allocate_receive_buffers(rxr)) {
2271 "Critical Failure setting up receive buffers\n");
2278 ** Finally set up the queue holding structs
2280 for (int i = 0; i < adapter->num_queues; i++) {
2281 que = &adapter->queues[i];
2282 que->adapter = adapter;
2284 que->txr = &adapter->tx_rings[i];
2285 que->rxr = &adapter->rx_rings[i];
2291 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2292 ixgbe_dma_free(adapter, &rxr->rxdma);
2294 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2295 ixgbe_dma_free(adapter, &txr->txdma);
2296 free(adapter->rx_rings, M_DEVBUF);
2298 free(adapter->tx_rings, M_DEVBUF);
2300 free(adapter->queues, M_DEVBUF);