1 /******************************************************************************
3 Copyright (c) 2001-2015, Intel Corporation
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
32 ******************************************************************************/
36 #ifndef IXGBE_STANDALONE_BUILD
38 #include "opt_inet6.h"
45 #include <net/rss_config.h>
46 #include <netinet/in_rss.h>
50 #include <net/netmap.h>
51 #include <sys/selinfo.h>
52 #include <dev/netmap/netmap_kern.h>
54 extern int ix_crcstrip;
59 ** this feature only works with
60 ** IPv4, and only on 82599 and later.
61 ** Also this will cause IP forwarding to
62 ** fail and that can't be controlled by
63 ** the stack as LRO can. For all these
64 ** reasons I've deemed it best to leave
65 ** this off and not bother with a tuneable
66 ** interface, this would need to be compiled
69 static bool ixgbe_rsc_enable = FALSE;
73 ** For Flow Director: this is the
74 ** number of TX packets we sample
75 ** for the filter pool, this means
76 ** every 20th packet will be probed.
78 ** This feature can be disabled by
81 static int atr_sample_rate = 20;
84 /*********************************************************************
85 * Local Function prototypes
86 *********************************************************************/
87 static void ixgbe_setup_transmit_ring(struct tx_ring *);
88 static void ixgbe_free_transmit_buffers(struct tx_ring *);
89 static int ixgbe_setup_receive_ring(struct rx_ring *);
90 static void ixgbe_free_receive_buffers(struct rx_ring *);
92 static void ixgbe_rx_checksum(u32, struct mbuf *, u32);
93 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
94 static int ixgbe_xmit(struct tx_ring *, struct mbuf **);
95 static int ixgbe_tx_ctx_setup(struct tx_ring *,
96 struct mbuf *, u32 *, u32 *);
97 static int ixgbe_tso_setup(struct tx_ring *,
98 struct mbuf *, u32 *, u32 *);
100 static void ixgbe_atr(struct tx_ring *, struct mbuf *);
102 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
103 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
106 #ifdef IXGBE_LEGACY_TX
107 /*********************************************************************
108 * Transmit entry point
110 * ixgbe_start is called by the stack to initiate a transmit.
111 * The driver will remain in this routine as long as there are
112 * packets to transmit and transmit resources are available.
113 * In case resources are not available stack is notified and
114 * the packet is requeued.
115 **********************************************************************/
118 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
121 struct adapter *adapter = txr->adapter;
123 IXGBE_TX_LOCK_ASSERT(txr);
125 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
127 if (!adapter->link_active)
130 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
131 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
134 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
138 if (ixgbe_xmit(txr, &m_head)) {
140 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
143 /* Send a copy of the frame to the BPF listener */
144 ETHER_BPF_MTAP(ifp, m_head);
150 * Legacy TX start - called by the stack, this
151 * always uses the first tx ring, and should
152 * not be used with multiqueue tx enabled.
155 ixgbe_start(struct ifnet *ifp)
157 struct adapter *adapter = ifp->if_softc;
158 struct tx_ring *txr = adapter->tx_rings;
160 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
162 ixgbe_start_locked(txr, ifp);
163 IXGBE_TX_UNLOCK(txr);
168 #else /* ! IXGBE_LEGACY_TX */
171 ** Multiqueue Transmit Entry Point
172 ** (if_transmit function)
175 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
177 struct adapter *adapter = ifp->if_softc;
178 struct ix_queue *que;
186 * When doing RSS, map it to the same outbound queue
187 * as the incoming flow would be mapped to.
189 * If everything is setup correctly, it should be the
190 * same bucket that the current CPU we're on is.
192 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
194 if (rss_hash2bucket(m->m_pkthdr.flowid,
195 M_HASHTYPE_GET(m), &bucket_id) == 0) {
196 i = bucket_id % adapter->num_queues;
198 if (bucket_id > adapter->num_queues)
199 if_printf(ifp, "bucket_id (%d) > num_queues "
200 "(%d)\n", bucket_id, adapter->num_queues);
204 i = m->m_pkthdr.flowid % adapter->num_queues;
206 i = curcpu % adapter->num_queues;
208 /* Check for a hung queue and pick alternative */
209 if (((1 << i) & adapter->active_queues) == 0)
210 i = ffsl(adapter->active_queues);
212 txr = &adapter->tx_rings[i];
213 que = &adapter->queues[i];
215 err = drbr_enqueue(ifp, txr->br, m);
218 if (IXGBE_TX_TRYLOCK(txr)) {
219 ixgbe_mq_start_locked(ifp, txr);
220 IXGBE_TX_UNLOCK(txr);
222 taskqueue_enqueue(que->tq, &txr->txq_task);
228 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
230 struct adapter *adapter = txr->adapter;
232 int enqueued = 0, err = 0;
234 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
235 adapter->link_active == 0)
238 /* Process the queue */
239 #if __FreeBSD_version < 901504
240 next = drbr_dequeue(ifp, txr->br);
241 while (next != NULL) {
242 if ((err = ixgbe_xmit(txr, &next)) != 0) {
244 err = drbr_enqueue(ifp, txr->br, next);
246 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
247 if ((err = ixgbe_xmit(txr, &next)) != 0) {
249 drbr_advance(ifp, txr->br);
251 drbr_putback(ifp, txr->br, next);
256 #if __FreeBSD_version >= 901504
257 drbr_advance(ifp, txr->br);
260 #if 0 // this is VF-only
261 #if __FreeBSD_version >= 1100036
263 * Since we're looking at the tx ring, we can check
264 * to see if we're a VF by examing our tail register
267 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
268 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
271 /* Send a copy of the frame to the BPF listener */
272 ETHER_BPF_MTAP(ifp, next);
273 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
275 #if __FreeBSD_version < 901504
276 next = drbr_dequeue(ifp, txr->br);
280 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
287 * Called from a taskqueue to drain queued transmit packets.
290 ixgbe_deferred_mq_start(void *arg, int pending)
292 struct tx_ring *txr = arg;
293 struct adapter *adapter = txr->adapter;
294 struct ifnet *ifp = adapter->ifp;
297 if (!drbr_empty(ifp, txr->br))
298 ixgbe_mq_start_locked(ifp, txr);
299 IXGBE_TX_UNLOCK(txr);
303 * Flush all ring buffers
306 ixgbe_qflush(struct ifnet *ifp)
308 struct adapter *adapter = ifp->if_softc;
309 struct tx_ring *txr = adapter->tx_rings;
312 for (int i = 0; i < adapter->num_queues; i++, txr++) {
314 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
316 IXGBE_TX_UNLOCK(txr);
320 #endif /* IXGBE_LEGACY_TX */
323 /*********************************************************************
325 * This routine maps the mbufs to tx descriptors, allowing the
326 * TX engine to transmit the packets.
327 * - return 0 on success, positive on failure
329 **********************************************************************/
332 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
334 struct adapter *adapter = txr->adapter;
335 u32 olinfo_status = 0, cmd_type_len;
336 int i, j, error, nsegs;
340 bus_dma_segment_t segs[adapter->num_segs];
342 struct ixgbe_tx_buf *txbuf;
343 union ixgbe_adv_tx_desc *txd = NULL;
347 /* Basic descriptor defines */
348 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
349 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
351 if (m_head->m_flags & M_VLANTAG)
352 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
355 * Important to capture the first descriptor
356 * used because it will contain the index of
357 * the one we tell the hardware to report back
359 first = txr->next_avail_desc;
360 txbuf = &txr->tx_buffers[first];
364 * Map the packet for DMA.
367 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
368 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
370 if (__predict_false(error)) {
375 /* Try it again? - one try */
379 * XXX: m_defrag will choke on
380 * non-MCLBYTES-sized clusters
382 m = m_defrag(*m_headp, M_NOWAIT);
384 adapter->mbuf_defrag_failed++;
394 txr->no_tx_dma_setup++;
397 txr->no_tx_dma_setup++;
404 /* Make certain there are enough descriptors */
405 if (nsegs > txr->tx_avail - 2) {
406 txr->no_desc_avail++;
407 bus_dmamap_unload(txr->txtag, map);
413 * Set up the appropriate offload context
414 * this will consume the first descriptor
416 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
417 if (__predict_false(error)) {
418 if (error == ENOBUFS)
424 /* Do the flow director magic */
425 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
427 if (txr->atr_count >= atr_sample_rate) {
428 ixgbe_atr(txr, m_head);
434 olinfo_status |= IXGBE_ADVTXD_CC;
435 i = txr->next_avail_desc;
436 for (j = 0; j < nsegs; j++) {
440 txbuf = &txr->tx_buffers[i];
441 txd = &txr->tx_base[i];
442 seglen = segs[j].ds_len;
443 segaddr = htole64(segs[j].ds_addr);
445 txd->read.buffer_addr = segaddr;
446 txd->read.cmd_type_len = htole32(txr->txd_cmd |
447 cmd_type_len |seglen);
448 txd->read.olinfo_status = htole32(olinfo_status);
450 if (++i == txr->num_desc)
454 txd->read.cmd_type_len |=
455 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
456 txr->tx_avail -= nsegs;
457 txr->next_avail_desc = i;
459 txbuf->m_head = m_head;
461 * Here we swap the map so the last descriptor,
462 * which gets the completion interrupt has the
463 * real map, and the first descriptor gets the
464 * unused map from this descriptor.
466 txr->tx_buffers[first].map = txbuf->map;
468 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
470 /* Set the EOP descriptor that will be marked done */
471 txbuf = &txr->tx_buffers[first];
474 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
475 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
477 * Advance the Transmit Descriptor Tail (Tdt), this tells the
478 * hardware that this frame is available to transmit.
480 ++txr->total_packets;
481 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
483 /* Mark queue as having work */
491 /*********************************************************************
493 * Allocate memory for tx_buffer structures. The tx_buffer stores all
494 * the information needed to transmit a packet on the wire. This is
495 * called only once at attach, setup is done every reset.
497 **********************************************************************/
499 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
501 struct adapter *adapter = txr->adapter;
502 device_t dev = adapter->dev;
503 struct ixgbe_tx_buf *txbuf;
507 * Setup DMA descriptor areas.
509 if ((error = bus_dma_tag_create(
510 bus_get_dma_tag(adapter->dev), /* parent */
511 1, 0, /* alignment, bounds */
512 BUS_SPACE_MAXADDR, /* lowaddr */
513 BUS_SPACE_MAXADDR, /* highaddr */
514 NULL, NULL, /* filter, filterarg */
515 IXGBE_TSO_SIZE, /* maxsize */
516 adapter->num_segs, /* nsegments */
517 PAGE_SIZE, /* maxsegsize */
520 NULL, /* lockfuncarg */
522 device_printf(dev,"Unable to allocate TX DMA tag\n");
526 if (!(txr->tx_buffers =
527 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
528 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
529 device_printf(dev, "Unable to allocate tx_buffer memory\n");
534 /* Create the descriptor buffer dma maps */
535 txbuf = txr->tx_buffers;
536 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
537 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
539 device_printf(dev, "Unable to create TX DMA map\n");
546 /* We free all, it handles case where we are in the middle */
547 ixgbe_free_transmit_structures(adapter);
551 /*********************************************************************
553 * Initialize a transmit ring.
555 **********************************************************************/
557 ixgbe_setup_transmit_ring(struct tx_ring *txr)
559 struct adapter *adapter = txr->adapter;
560 struct ixgbe_tx_buf *txbuf;
562 struct netmap_adapter *na = NA(adapter->ifp);
563 struct netmap_slot *slot;
564 #endif /* DEV_NETMAP */
566 /* Clear the old ring contents */
570 * (under lock): if in netmap mode, do some consistency
571 * checks and set slot to entry 0 of the netmap ring.
573 slot = netmap_reset(na, NR_TX, txr->me, 0);
574 #endif /* DEV_NETMAP */
575 bzero((void *)txr->tx_base,
576 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
578 txr->next_avail_desc = 0;
579 txr->next_to_clean = 0;
581 /* Free any existing tx buffers. */
582 txbuf = txr->tx_buffers;
583 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
584 if (txbuf->m_head != NULL) {
585 bus_dmamap_sync(txr->txtag, txbuf->map,
586 BUS_DMASYNC_POSTWRITE);
587 bus_dmamap_unload(txr->txtag, txbuf->map);
588 m_freem(txbuf->m_head);
589 txbuf->m_head = NULL;
593 * In netmap mode, set the map for the packet buffer.
594 * NOTE: Some drivers (not this one) also need to set
595 * the physical buffer address in the NIC ring.
596 * Slots in the netmap ring (indexed by "si") are
597 * kring->nkr_hwofs positions "ahead" wrt the
598 * corresponding slot in the NIC ring. In some drivers
599 * (not here) nkr_hwofs can be negative. Function
600 * netmap_idx_n2k() handles wraparounds properly.
603 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
604 netmap_load_map(na, txr->txtag,
605 txbuf->map, NMB(na, slot + si));
607 #endif /* DEV_NETMAP */
608 /* Clear the EOP descriptor pointer */
613 /* Set the rate at which we sample packets */
614 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
615 txr->atr_sample = atr_sample_rate;
618 /* Set number of descriptors available */
619 txr->tx_avail = adapter->num_tx_desc;
621 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
622 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
623 IXGBE_TX_UNLOCK(txr);
626 /*********************************************************************
628 * Initialize all transmit rings.
630 **********************************************************************/
632 ixgbe_setup_transmit_structures(struct adapter *adapter)
634 struct tx_ring *txr = adapter->tx_rings;
636 for (int i = 0; i < adapter->num_queues; i++, txr++)
637 ixgbe_setup_transmit_ring(txr);
642 /*********************************************************************
644 * Free all transmit rings.
646 **********************************************************************/
648 ixgbe_free_transmit_structures(struct adapter *adapter)
650 struct tx_ring *txr = adapter->tx_rings;
652 for (int i = 0; i < adapter->num_queues; i++, txr++) {
654 ixgbe_free_transmit_buffers(txr);
655 ixgbe_dma_free(adapter, &txr->txdma);
656 IXGBE_TX_UNLOCK(txr);
657 IXGBE_TX_LOCK_DESTROY(txr);
659 free(adapter->tx_rings, M_DEVBUF);
662 /*********************************************************************
664 * Free transmit ring related data structures.
666 **********************************************************************/
668 ixgbe_free_transmit_buffers(struct tx_ring *txr)
670 struct adapter *adapter = txr->adapter;
671 struct ixgbe_tx_buf *tx_buffer;
674 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
676 if (txr->tx_buffers == NULL)
679 tx_buffer = txr->tx_buffers;
680 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
681 if (tx_buffer->m_head != NULL) {
682 bus_dmamap_sync(txr->txtag, tx_buffer->map,
683 BUS_DMASYNC_POSTWRITE);
684 bus_dmamap_unload(txr->txtag,
686 m_freem(tx_buffer->m_head);
687 tx_buffer->m_head = NULL;
688 if (tx_buffer->map != NULL) {
689 bus_dmamap_destroy(txr->txtag,
691 tx_buffer->map = NULL;
693 } else if (tx_buffer->map != NULL) {
694 bus_dmamap_unload(txr->txtag,
696 bus_dmamap_destroy(txr->txtag,
698 tx_buffer->map = NULL;
701 #ifdef IXGBE_LEGACY_TX
703 buf_ring_free(txr->br, M_DEVBUF);
705 if (txr->tx_buffers != NULL) {
706 free(txr->tx_buffers, M_DEVBUF);
707 txr->tx_buffers = NULL;
709 if (txr->txtag != NULL) {
710 bus_dma_tag_destroy(txr->txtag);
716 /*********************************************************************
718 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
720 **********************************************************************/
723 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
724 u32 *cmd_type_len, u32 *olinfo_status)
726 struct adapter *adapter = txr->adapter;
727 struct ixgbe_adv_tx_context_desc *TXD;
728 struct ether_vlan_header *eh;
735 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
736 int ehdrlen, ip_hlen = 0;
740 int ctxd = txr->next_avail_desc;
745 /* First check if TSO is to be used */
746 if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO|CSUM_IP6_TSO))
747 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
749 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
752 /* Indicate the whole packet as payload when not doing TSO */
753 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
755 /* Now ready a context descriptor */
756 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
759 ** In advanced descriptors the vlan tag must
760 ** be placed into the context descriptor. Hence
761 ** we need to make one even if not doing offloads.
763 if (mp->m_flags & M_VLANTAG) {
764 vtag = htole16(mp->m_pkthdr.ether_vtag);
765 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
766 } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
770 * Determine where frame payload starts.
771 * Jump over vlan headers if already present,
772 * helpful for QinQ too.
774 eh = mtod(mp, struct ether_vlan_header *);
775 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
776 etype = ntohs(eh->evl_proto);
777 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
779 etype = ntohs(eh->evl_encap_proto);
780 ehdrlen = ETHER_HDR_LEN;
783 /* Set the ether header length */
784 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
786 if (offload == FALSE)
790 * If the first mbuf only includes the ethernet header, jump to the next one
791 * XXX: This assumes the stack splits mbufs containing headers on header boundaries
792 * XXX: And assumes the entire IP header is contained in one mbuf
794 if (mp->m_len == ehdrlen && mp->m_next)
795 l3d = mtod(mp->m_next, caddr_t);
797 l3d = mtod(mp, caddr_t) + ehdrlen;
802 ip = (struct ip *)(l3d);
803 ip_hlen = ip->ip_hl << 2;
805 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
806 /* Insert IPv4 checksum into data descriptors */
807 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
809 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
815 ip6 = (struct ip6_hdr *)(l3d);
816 ip_hlen = sizeof(struct ip6_hdr);
817 ipproto = ip6->ip6_nxt;
818 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
826 vlan_macip_lens |= ip_hlen;
828 /* No support for offloads for non-L4 next headers */
831 if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
832 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
837 if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
838 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
843 if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP))
844 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
853 if (offload) /* Insert L4 checksum into data descriptors */
854 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
857 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
859 /* Now copy bits into descriptor */
860 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
861 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
862 TXD->seqnum_seed = htole32(0);
863 TXD->mss_l4len_idx = htole32(0);
865 /* We've consumed the first desc, adjust counters */
866 if (++ctxd == txr->num_desc)
868 txr->next_avail_desc = ctxd;
874 /**********************************************************************
876 * Setup work for hardware segmentation offload (TSO) on
877 * adapters using advanced tx descriptors
879 **********************************************************************/
881 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
882 u32 *cmd_type_len, u32 *olinfo_status)
884 struct ixgbe_adv_tx_context_desc *TXD;
885 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
886 u32 mss_l4len_idx = 0, paylen;
887 u16 vtag = 0, eh_type;
888 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
889 struct ether_vlan_header *eh;
899 * Determine where frame payload starts.
900 * Jump over vlan headers if already present
902 eh = mtod(mp, struct ether_vlan_header *);
903 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
904 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
905 eh_type = eh->evl_proto;
907 ehdrlen = ETHER_HDR_LEN;
908 eh_type = eh->evl_encap_proto;
911 switch (ntohs(eh_type)) {
914 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
915 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
916 if (ip6->ip6_nxt != IPPROTO_TCP)
918 ip_hlen = sizeof(struct ip6_hdr);
919 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
920 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
921 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
922 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
927 ip = (struct ip *)(mp->m_data + ehdrlen);
928 if (ip->ip_p != IPPROTO_TCP)
931 ip_hlen = ip->ip_hl << 2;
932 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
933 th->th_sum = in_pseudo(ip->ip_src.s_addr,
934 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
935 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
936 /* Tell transmit desc to also do IPv4 checksum. */
937 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
941 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
942 __func__, ntohs(eh_type));
946 ctxd = txr->next_avail_desc;
947 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
949 tcp_hlen = th->th_off << 2;
951 /* This is used in the transmit desc in encap */
952 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
954 /* VLAN MACLEN IPLEN */
955 if (mp->m_flags & M_VLANTAG) {
956 vtag = htole16(mp->m_pkthdr.ether_vtag);
957 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
960 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
961 vlan_macip_lens |= ip_hlen;
962 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
964 /* ADV DTYPE TUCMD */
965 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
966 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
967 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
970 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
971 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
972 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
974 TXD->seqnum_seed = htole32(0);
976 if (++ctxd == txr->num_desc)
980 txr->next_avail_desc = ctxd;
981 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
982 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
983 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
989 /**********************************************************************
991 * Examine each tx_buffer in the used queue. If the hardware is done
992 * processing the packet then free associated resources. The
993 * tx_buffer is put back on the free queue.
995 **********************************************************************/
997 ixgbe_txeof(struct tx_ring *txr)
999 struct adapter *adapter = txr->adapter;
1001 struct ifnet *ifp = adapter->ifp;
1003 u32 work, processed = 0;
1004 u32 limit = adapter->tx_process_limit;
1005 struct ixgbe_tx_buf *buf;
1006 union ixgbe_adv_tx_desc *txd;
1008 mtx_assert(&txr->tx_mtx, MA_OWNED);
1011 if (ifp->if_capenable & IFCAP_NETMAP) {
1012 struct netmap_adapter *na = NA(ifp);
1013 struct netmap_kring *kring = &na->tx_rings[txr->me];
1015 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1016 BUS_DMASYNC_POSTREAD);
1018 * In netmap mode, all the work is done in the context
1019 * of the client thread. Interrupt handlers only wake up
1020 * clients, which may be sleeping on individual rings
1021 * or on a global resource for all rings.
1022 * To implement tx interrupt mitigation, we wake up the client
1023 * thread roughly every half ring, even if the NIC interrupts
1024 * more frequently. This is implemented as follows:
1025 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1026 * the slot that should wake up the thread (nkr_num_slots
1027 * means the user thread should not be woken up);
1028 * - the driver ignores tx interrupts unless netmap_mitigate=0
1029 * or the slot has the DD bit set.
1031 if (!netmap_mitigate ||
1032 (kring->nr_kflags < kring->nkr_num_slots &&
1033 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1034 netmap_tx_irq(ifp, txr->me);
1038 #endif /* DEV_NETMAP */
1040 if (txr->tx_avail == txr->num_desc) {
1045 /* Get work starting point */
1046 work = txr->next_to_clean;
1047 buf = &txr->tx_buffers[work];
1048 txd = &txr->tx_base[work];
1049 work -= txr->num_desc; /* The distance to ring end */
1050 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1051 BUS_DMASYNC_POSTREAD);
1054 union ixgbe_adv_tx_desc *eop = buf->eop;
1055 if (eop == NULL) /* No work */
1058 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1059 break; /* I/O not complete */
1063 buf->m_head->m_pkthdr.len;
1064 bus_dmamap_sync(txr->txtag,
1066 BUS_DMASYNC_POSTWRITE);
1067 bus_dmamap_unload(txr->txtag,
1069 m_freem(buf->m_head);
1075 /* We clean the range if multi segment */
1076 while (txd != eop) {
1080 /* wrap the ring? */
1081 if (__predict_false(!work)) {
1082 work -= txr->num_desc;
1083 buf = txr->tx_buffers;
1088 buf->m_head->m_pkthdr.len;
1089 bus_dmamap_sync(txr->txtag,
1091 BUS_DMASYNC_POSTWRITE);
1092 bus_dmamap_unload(txr->txtag,
1094 m_freem(buf->m_head);
1104 /* Try the next packet */
1108 /* reset with a wrap */
1109 if (__predict_false(!work)) {
1110 work -= txr->num_desc;
1111 buf = txr->tx_buffers;
1115 } while (__predict_true(--limit));
1117 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1118 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1120 work += txr->num_desc;
1121 txr->next_to_clean = work;
1124 ** Queue Hang detection, we know there's
1125 ** work outstanding or the first return
1126 ** would have been taken, so increment busy
1127 ** if nothing managed to get cleaned, then
1128 ** in local_timer it will be checked and
1129 ** marked as HUNG if it exceeds a MAX attempt.
1131 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1134 ** If anything gets cleaned we reset state to 1,
1135 ** note this will turn off HUNG if its set.
1140 if (txr->tx_avail == txr->num_desc)
1149 ** This routine parses packet headers so that Flow
1150 ** Director can make a hashed filter table entry
1151 ** allowing traffic flows to be identified and kept
1152 ** on the same cpu. This would be a performance
1153 ** hit, but we only do it at IXGBE_FDIR_RATE of
1157 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1159 struct adapter *adapter = txr->adapter;
1160 struct ix_queue *que;
1164 struct ether_vlan_header *eh;
1165 union ixgbe_atr_hash_dword input = {.dword = 0};
1166 union ixgbe_atr_hash_dword common = {.dword = 0};
1167 int ehdrlen, ip_hlen;
1170 eh = mtod(mp, struct ether_vlan_header *);
1171 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1172 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1173 etype = eh->evl_proto;
1175 ehdrlen = ETHER_HDR_LEN;
1176 etype = eh->evl_encap_proto;
1179 /* Only handling IPv4 */
1180 if (etype != htons(ETHERTYPE_IP))
1183 ip = (struct ip *)(mp->m_data + ehdrlen);
1184 ip_hlen = ip->ip_hl << 2;
1186 /* check if we're UDP or TCP */
1189 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1190 /* src and dst are inverted */
1191 common.port.dst ^= th->th_sport;
1192 common.port.src ^= th->th_dport;
1193 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1196 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1197 /* src and dst are inverted */
1198 common.port.dst ^= uh->uh_sport;
1199 common.port.src ^= uh->uh_dport;
1200 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1206 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1207 if (mp->m_pkthdr.ether_vtag)
1208 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1210 common.flex_bytes ^= etype;
1211 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1213 que = &adapter->queues[txr->me];
1215 ** This assumes the Rx queue and Tx
1216 ** queue are bound to the same CPU
1218 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1219 input, common, que->msix);
1221 #endif /* IXGBE_FDIR */
1224 ** Used to detect a descriptor that has
1225 ** been merged by Hardware RSC.
1228 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1230 return (le32toh(rx->wb.lower.lo_dword.data) &
1231 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1234 /*********************************************************************
1236 * Initialize Hardware RSC (LRO) feature on 82599
1237 * for an RX ring, this is toggled by the LRO capability
1238 * even though it is transparent to the stack.
1240 * NOTE: since this HW feature only works with IPV4 and
1241 * our testing has shown soft LRO to be as effective
1242 * I have decided to disable this by default.
1244 **********************************************************************/
1246 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1248 struct adapter *adapter = rxr->adapter;
1249 struct ixgbe_hw *hw = &adapter->hw;
1250 u32 rscctrl, rdrxctl;
1252 /* If turning LRO/RSC off we need to disable it */
1253 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1254 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1255 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1259 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1260 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1261 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1262 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1263 #endif /* DEV_NETMAP */
1264 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1265 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1266 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1268 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1269 rscctrl |= IXGBE_RSCCTL_RSCEN;
1271 ** Limit the total number of descriptors that
1272 ** can be combined, so it does not exceed 64K
1274 if (rxr->mbuf_sz == MCLBYTES)
1275 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1276 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1277 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1278 else if (rxr->mbuf_sz == MJUM9BYTES)
1279 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1280 else /* Using 16K cluster */
1281 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1283 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1285 /* Enable TCP header recognition */
1286 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1287 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1288 IXGBE_PSRTYPE_TCPHDR));
1290 /* Disable RSC for ACK packets */
1291 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1292 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1297 /*********************************************************************
1299 * Refresh mbuf buffers for RX descriptor rings
1300 * - now keeps its own state so discards due to resource
1301 * exhaustion are unnecessary, if an mbuf cannot be obtained
1302 * it just returns, keeping its placeholder, thus it can simply
1303 * be recalled to try again.
1305 **********************************************************************/
1307 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1309 struct adapter *adapter = rxr->adapter;
1310 bus_dma_segment_t seg[1];
1311 struct ixgbe_rx_buf *rxbuf;
1313 int i, j, nsegs, error;
1314 bool refreshed = FALSE;
1316 i = j = rxr->next_to_refresh;
1317 /* Control the loop with one beyond */
1318 if (++j == rxr->num_desc)
1321 while (j != limit) {
1322 rxbuf = &rxr->rx_buffers[i];
1323 if (rxbuf->buf == NULL) {
1324 mp = m_getjcl(M_NOWAIT, MT_DATA,
1325 M_PKTHDR, rxr->mbuf_sz);
1328 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1329 m_adj(mp, ETHER_ALIGN);
1333 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1335 /* If we're dealing with an mbuf that was copied rather
1336 * than replaced, there's no need to go through busdma.
1338 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1339 /* Get the memory mapping */
1340 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1341 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1342 rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1344 printf("Refresh mbufs: payload dmamap load"
1345 " failure - %d\n", error);
1351 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1352 BUS_DMASYNC_PREREAD);
1353 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1354 htole64(seg[0].ds_addr);
1356 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1357 rxbuf->flags &= ~IXGBE_RX_COPY;
1361 /* Next is precalculated */
1363 rxr->next_to_refresh = i;
1364 if (++j == rxr->num_desc)
1368 if (refreshed) /* Update hardware tail index */
1369 IXGBE_WRITE_REG(&adapter->hw,
1370 rxr->tail, rxr->next_to_refresh);
1374 /*********************************************************************
1376 * Allocate memory for rx_buffer structures. Since we use one
1377 * rx_buffer per received packet, the maximum number of rx_buffer's
1378 * that we'll need is equal to the number of receive descriptors
1379 * that we've allocated.
1381 **********************************************************************/
1383 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1385 struct adapter *adapter = rxr->adapter;
1386 device_t dev = adapter->dev;
1387 struct ixgbe_rx_buf *rxbuf;
1390 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1391 if (!(rxr->rx_buffers =
1392 (struct ixgbe_rx_buf *) malloc(bsize,
1393 M_DEVBUF, M_NOWAIT | M_ZERO))) {
1394 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1399 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
1400 1, 0, /* alignment, bounds */
1401 BUS_SPACE_MAXADDR, /* lowaddr */
1402 BUS_SPACE_MAXADDR, /* highaddr */
1403 NULL, NULL, /* filter, filterarg */
1404 MJUM16BYTES, /* maxsize */
1406 MJUM16BYTES, /* maxsegsize */
1408 NULL, /* lockfunc */
1409 NULL, /* lockfuncarg */
1411 device_printf(dev, "Unable to create RX DMA tag\n");
1415 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1416 rxbuf = &rxr->rx_buffers[i];
1417 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1419 device_printf(dev, "Unable to create RX dma map\n");
1427 /* Frees all, but can handle partial completion */
1428 ixgbe_free_receive_structures(adapter);
1433 ixgbe_free_receive_ring(struct rx_ring *rxr)
1435 struct ixgbe_rx_buf *rxbuf;
1437 for (int i = 0; i < rxr->num_desc; i++) {
1438 rxbuf = &rxr->rx_buffers[i];
1439 if (rxbuf->buf != NULL) {
1440 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1441 BUS_DMASYNC_POSTREAD);
1442 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1443 rxbuf->buf->m_flags |= M_PKTHDR;
1444 m_freem(rxbuf->buf);
1451 /*********************************************************************
1453 * Initialize a receive ring and its buffers.
1455 **********************************************************************/
1457 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1459 struct adapter *adapter;
1462 struct ixgbe_rx_buf *rxbuf;
1463 bus_dma_segment_t seg[1];
1464 struct lro_ctrl *lro = &rxr->lro;
1465 int rsize, nsegs, error = 0;
1467 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1468 struct netmap_slot *slot;
1469 #endif /* DEV_NETMAP */
1471 adapter = rxr->adapter;
1475 /* Clear the ring contents */
1478 /* same as in ixgbe_setup_transmit_ring() */
1479 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1480 #endif /* DEV_NETMAP */
1481 rsize = roundup2(adapter->num_rx_desc *
1482 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1483 bzero((void *)rxr->rx_base, rsize);
1484 /* Cache the size */
1485 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1487 /* Free current RX buffer structs and their mbufs */
1488 ixgbe_free_receive_ring(rxr);
1490 /* Now replenish the mbufs */
1491 for (int j = 0; j != rxr->num_desc; ++j) {
1494 rxbuf = &rxr->rx_buffers[j];
1497 * In netmap mode, fill the map and set the buffer
1498 * address in the NIC ring, considering the offset
1499 * between the netmap and NIC rings (see comment in
1500 * ixgbe_setup_transmit_ring() ). No need to allocate
1501 * an mbuf, so end the block with a continue;
1504 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1508 addr = PNMB(na, slot + sj, &paddr);
1509 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1510 /* Update descriptor and the cached value */
1511 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1512 rxbuf->addr = htole64(paddr);
1515 #endif /* DEV_NETMAP */
1517 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1518 M_PKTHDR, adapter->rx_mbuf_sz);
1519 if (rxbuf->buf == NULL) {
1524 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1525 /* Get the memory mapping */
1526 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1527 rxbuf->pmap, mp, seg,
1528 &nsegs, BUS_DMA_NOWAIT);
1531 bus_dmamap_sync(rxr->ptag,
1532 rxbuf->pmap, BUS_DMASYNC_PREREAD);
1533 /* Update the descriptor and the cached value */
1534 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1535 rxbuf->addr = htole64(seg[0].ds_addr);
1539 /* Setup our descriptor indices */
1540 rxr->next_to_check = 0;
1541 rxr->next_to_refresh = 0;
1542 rxr->lro_enabled = FALSE;
1545 rxr->vtag_strip = FALSE;
1547 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1548 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1551 ** Now set up the LRO interface:
1553 if (ixgbe_rsc_enable)
1554 ixgbe_setup_hw_rsc(rxr);
1555 else if (ifp->if_capenable & IFCAP_LRO) {
1556 int err = tcp_lro_init(lro);
1558 device_printf(dev, "LRO Initialization failed!\n");
1561 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1562 rxr->lro_enabled = TRUE;
1563 lro->ifp = adapter->ifp;
1566 IXGBE_RX_UNLOCK(rxr);
1570 ixgbe_free_receive_ring(rxr);
1571 IXGBE_RX_UNLOCK(rxr);
1575 /*********************************************************************
1577 * Initialize all receive rings.
1579 **********************************************************************/
1581 ixgbe_setup_receive_structures(struct adapter *adapter)
1583 struct rx_ring *rxr = adapter->rx_rings;
1586 for (j = 0; j < adapter->num_queues; j++, rxr++)
1587 if (ixgbe_setup_receive_ring(rxr))
1593 * Free RX buffers allocated so far, we will only handle
1594 * the rings that completed, the failing case will have
1595 * cleaned up for itself. 'j' failed, so its the terminus.
1597 for (int i = 0; i < j; ++i) {
1598 rxr = &adapter->rx_rings[i];
1599 ixgbe_free_receive_ring(rxr);
1606 /*********************************************************************
1608 * Free all receive rings.
1610 **********************************************************************/
1612 ixgbe_free_receive_structures(struct adapter *adapter)
1614 struct rx_ring *rxr = adapter->rx_rings;
1616 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1618 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1619 struct lro_ctrl *lro = &rxr->lro;
1620 ixgbe_free_receive_buffers(rxr);
1621 /* Free LRO memory */
1623 /* Free the ring memory as well */
1624 ixgbe_dma_free(adapter, &rxr->rxdma);
1627 free(adapter->rx_rings, M_DEVBUF);
1631 /*********************************************************************
1633 * Free receive ring data structures
1635 **********************************************************************/
1637 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1639 struct adapter *adapter = rxr->adapter;
1640 struct ixgbe_rx_buf *rxbuf;
1642 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1644 /* Cleanup any existing buffers */
1645 if (rxr->rx_buffers != NULL) {
1646 for (int i = 0; i < adapter->num_rx_desc; i++) {
1647 rxbuf = &rxr->rx_buffers[i];
1648 if (rxbuf->buf != NULL) {
1649 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1650 BUS_DMASYNC_POSTREAD);
1651 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1652 rxbuf->buf->m_flags |= M_PKTHDR;
1653 m_freem(rxbuf->buf);
1656 if (rxbuf->pmap != NULL) {
1657 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1661 if (rxr->rx_buffers != NULL) {
1662 free(rxr->rx_buffers, M_DEVBUF);
1663 rxr->rx_buffers = NULL;
1667 if (rxr->ptag != NULL) {
1668 bus_dma_tag_destroy(rxr->ptag);
1675 static __inline void
1676 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1680 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1681 * should be computed by hardware. Also it should not have VLAN tag in
1682 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1684 if (rxr->lro_enabled &&
1685 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1686 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1687 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1688 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1689 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1690 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1691 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1692 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1694 * Send to the stack if:
1695 ** - LRO not enabled, or
1696 ** - no LRO resources, or
1697 ** - lro enqueue fails
1699 if (rxr->lro.lro_cnt != 0)
1700 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1703 IXGBE_RX_UNLOCK(rxr);
1704 (*ifp->if_input)(ifp, m);
1708 static __inline void
1709 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1711 struct ixgbe_rx_buf *rbuf;
1713 rbuf = &rxr->rx_buffers[i];
1717 ** With advanced descriptors the writeback
1718 ** clobbers the buffer addrs, so its easier
1719 ** to just free the existing mbufs and take
1720 ** the normal refresh path to get new buffers
1724 if (rbuf->fmp != NULL) {/* Partial chain ? */
1725 rbuf->fmp->m_flags |= M_PKTHDR;
1728 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1729 } else if (rbuf->buf) {
1733 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1741 /*********************************************************************
1743 * This routine executes in interrupt context. It replenishes
1744 * the mbufs in the descriptor and sends data which has been
1745 * dma'ed into host memory to upper layer.
1747 * Return TRUE for more work, FALSE for all clean.
1748 *********************************************************************/
1750 ixgbe_rxeof(struct ix_queue *que)
1752 struct adapter *adapter = que->adapter;
1753 struct rx_ring *rxr = que->rxr;
1754 struct ifnet *ifp = adapter->ifp;
1755 struct lro_ctrl *lro = &rxr->lro;
1756 int i, nextp, processed = 0;
1758 u32 count = adapter->rx_process_limit;
1759 union ixgbe_adv_rx_desc *cur;
1760 struct ixgbe_rx_buf *rbuf, *nbuf;
1766 /* Same as the txeof routine: wakeup clients on intr. */
1767 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1768 IXGBE_RX_UNLOCK(rxr);
1771 #endif /* DEV_NETMAP */
1773 for (i = rxr->next_to_check; count != 0;) {
1774 struct mbuf *sendmp, *mp;
1780 /* Sync the ring. */
1781 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1782 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1784 cur = &rxr->rx_base[i];
1785 staterr = le32toh(cur->wb.upper.status_error);
1786 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1788 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1790 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1797 cur->wb.upper.status_error = 0;
1798 rbuf = &rxr->rx_buffers[i];
1801 len = le16toh(cur->wb.upper.length);
1802 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1803 IXGBE_RXDADV_PKTTYPE_MASK;
1804 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1806 /* Make sure bad packets are discarded */
1807 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1808 #if __FreeBSD_version >= 1100036
1809 if (IXGBE_IS_VF(adapter))
1810 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1812 rxr->rx_discarded++;
1813 ixgbe_rx_discard(rxr, i);
1818 ** On 82599 which supports a hardware
1819 ** LRO (called HW RSC), packets need
1820 ** not be fragmented across sequential
1821 ** descriptors, rather the next descriptor
1822 ** is indicated in bits of the descriptor.
1823 ** This also means that we might proceses
1824 ** more than one packet at a time, something
1825 ** that has never been true before, it
1826 ** required eliminating global chain pointers
1827 ** in favor of what we are doing here. -jfv
1831 ** Figure out the next descriptor
1834 if (rxr->hw_rsc == TRUE) {
1835 rsc = ixgbe_rsc_count(cur);
1836 rxr->rsc_num += (rsc - 1);
1838 if (rsc) { /* Get hardware index */
1840 IXGBE_RXDADV_NEXTP_MASK) >>
1841 IXGBE_RXDADV_NEXTP_SHIFT);
1842 } else { /* Just sequential */
1844 if (nextp == adapter->num_rx_desc)
1847 nbuf = &rxr->rx_buffers[nextp];
1851 ** Rather than using the fmp/lmp global pointers
1852 ** we now keep the head of a packet chain in the
1853 ** buffer struct and pass this along from one
1854 ** descriptor to the next, until we get EOP.
1858 ** See if there is a stored head
1859 ** that determines what we are
1862 if (sendmp != NULL) { /* secondary frag */
1863 rbuf->buf = rbuf->fmp = NULL;
1864 mp->m_flags &= ~M_PKTHDR;
1865 sendmp->m_pkthdr.len += mp->m_len;
1868 * Optimize. This might be a small packet,
1869 * maybe just a TCP ACK. Do a fast copy that
1870 * is cache aligned into a new mbuf, and
1871 * leave the old mbuf+cluster for re-use.
1873 if (eop && len <= IXGBE_RX_COPY_LEN) {
1874 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1875 if (sendmp != NULL) {
1877 IXGBE_RX_COPY_ALIGN;
1878 ixgbe_bcopy(mp->m_data,
1879 sendmp->m_data, len);
1880 sendmp->m_len = len;
1882 rbuf->flags |= IXGBE_RX_COPY;
1885 if (sendmp == NULL) {
1886 rbuf->buf = rbuf->fmp = NULL;
1890 /* first desc of a non-ps chain */
1891 sendmp->m_flags |= M_PKTHDR;
1892 sendmp->m_pkthdr.len = mp->m_len;
1896 /* Pass the head pointer on */
1900 mp->m_next = nbuf->buf;
1901 } else { /* Sending this frame */
1902 sendmp->m_pkthdr.rcvif = ifp;
1904 /* capture data for AIM */
1905 rxr->bytes += sendmp->m_pkthdr.len;
1906 rxr->rx_bytes += sendmp->m_pkthdr.len;
1907 /* Process vlan info */
1908 if ((rxr->vtag_strip) &&
1909 (staterr & IXGBE_RXD_STAT_VP))
1910 vtag = le16toh(cur->wb.upper.vlan);
1912 sendmp->m_pkthdr.ether_vtag = vtag;
1913 sendmp->m_flags |= M_VLANTAG;
1915 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1916 ixgbe_rx_checksum(staterr, sendmp, ptype);
1919 * In case of multiqueue, we have RXCSUM.PCSD bit set
1920 * and never cleared. This means we have RSS hash
1921 * available to be used.
1923 if (adapter->num_queues > 1) {
1924 sendmp->m_pkthdr.flowid =
1925 le32toh(cur->wb.lower.hi_dword.rss);
1926 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1927 case IXGBE_RXDADV_RSSTYPE_IPV4:
1928 M_HASHTYPE_SET(sendmp,
1929 M_HASHTYPE_RSS_IPV4);
1931 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1932 M_HASHTYPE_SET(sendmp,
1933 M_HASHTYPE_RSS_TCP_IPV4);
1935 case IXGBE_RXDADV_RSSTYPE_IPV6:
1936 M_HASHTYPE_SET(sendmp,
1937 M_HASHTYPE_RSS_IPV6);
1939 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1940 M_HASHTYPE_SET(sendmp,
1941 M_HASHTYPE_RSS_TCP_IPV6);
1943 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1944 M_HASHTYPE_SET(sendmp,
1945 M_HASHTYPE_RSS_IPV6_EX);
1947 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1948 M_HASHTYPE_SET(sendmp,
1949 M_HASHTYPE_RSS_TCP_IPV6_EX);
1951 #if __FreeBSD_version > 1100000
1952 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1953 M_HASHTYPE_SET(sendmp,
1954 M_HASHTYPE_RSS_UDP_IPV4);
1956 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1957 M_HASHTYPE_SET(sendmp,
1958 M_HASHTYPE_RSS_UDP_IPV6);
1960 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1961 M_HASHTYPE_SET(sendmp,
1962 M_HASHTYPE_RSS_UDP_IPV6_EX);
1966 M_HASHTYPE_SET(sendmp,
1970 sendmp->m_pkthdr.flowid = que->msix;
1971 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1975 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1976 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1978 /* Advance our pointers to the next descriptor. */
1979 if (++i == rxr->num_desc)
1982 /* Now send to the stack or do LRO */
1983 if (sendmp != NULL) {
1984 rxr->next_to_check = i;
1985 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1986 i = rxr->next_to_check;
1989 /* Every 8 descriptors we go to refresh mbufs */
1990 if (processed == 8) {
1991 ixgbe_refresh_mbufs(rxr, i);
1996 /* Refresh any remaining buf structs */
1997 if (ixgbe_rx_unrefreshed(rxr))
1998 ixgbe_refresh_mbufs(rxr, i);
2000 rxr->next_to_check = i;
2003 * Flush any outstanding LRO work
2005 tcp_lro_flush_all(lro);
2007 IXGBE_RX_UNLOCK(rxr);
2010 ** Still have cleaning to do?
2012 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2019 /*********************************************************************
2021 * Verify that the hardware indicated that the checksum is valid.
2022 * Inform the stack about the status of checksum so that stack
2023 * doesn't spend time verifying the checksum.
2025 *********************************************************************/
2027 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
2029 u16 status = (u16) staterr;
2030 u8 errors = (u8) (staterr >> 24);
2033 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2034 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2038 if (status & IXGBE_RXD_STAT_IPCS) {
2039 mp->m_pkthdr.csum_flags |= CSUM_L3_CALC;
2040 /* IP Checksum Good */
2041 if (!(errors & IXGBE_RXD_ERR_IPE))
2042 mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
2044 /* TCP/UDP/SCTP checksum */
2045 if (status & IXGBE_RXD_STAT_L4CS) {
2046 mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
2047 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2048 mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
2050 mp->m_pkthdr.csum_data = htons(0xffff);
2055 /********************************************************************
2056 * Manage DMA'able memory.
2057 *******************************************************************/
2059 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2063 *(bus_addr_t *) arg = segs->ds_addr;
2068 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2069 struct ixgbe_dma_alloc *dma, int mapflags)
2071 device_t dev = adapter->dev;
2074 r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2075 DBA_ALIGN, 0, /* alignment, bounds */
2076 BUS_SPACE_MAXADDR, /* lowaddr */
2077 BUS_SPACE_MAXADDR, /* highaddr */
2078 NULL, NULL, /* filter, filterarg */
2081 size, /* maxsegsize */
2082 BUS_DMA_ALLOCNOW, /* flags */
2083 NULL, /* lockfunc */
2084 NULL, /* lockfuncarg */
2087 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2091 r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2092 BUS_DMA_NOWAIT, &dma->dma_map);
2094 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2098 r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2102 mapflags | BUS_DMA_NOWAIT);
2104 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2108 dma->dma_size = size;
2111 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2113 bus_dma_tag_destroy(dma->dma_tag);
2115 dma->dma_tag = NULL;
2120 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2122 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2123 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2124 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2125 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2126 bus_dma_tag_destroy(dma->dma_tag);
2130 /*********************************************************************
2132 * Allocate memory for the transmit and receive rings, and then
2133 * the descriptors associated with each, called only once at attach.
2135 **********************************************************************/
2137 ixgbe_allocate_queues(struct adapter *adapter)
2139 device_t dev = adapter->dev;
2140 struct ix_queue *que;
2141 struct tx_ring *txr;
2142 struct rx_ring *rxr;
2143 int rsize, tsize, error = IXGBE_SUCCESS;
2144 int txconf = 0, rxconf = 0;
2146 enum ixgbe_iov_mode iov_mode;
2149 /* First allocate the top level queue structs */
2150 if (!(adapter->queues =
2151 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2152 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2153 device_printf(dev, "Unable to allocate queue memory\n");
2158 /* First allocate the TX ring struct memory */
2159 if (!(adapter->tx_rings =
2160 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2161 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2162 device_printf(dev, "Unable to allocate TX ring memory\n");
2167 /* Next allocate the RX */
2168 if (!(adapter->rx_rings =
2169 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2170 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2171 device_printf(dev, "Unable to allocate RX ring memory\n");
2176 /* For the ring itself */
2177 tsize = roundup2(adapter->num_tx_desc *
2178 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2181 iov_mode = ixgbe_get_iov_mode(adapter);
2182 adapter->pool = ixgbe_max_vfs(iov_mode);
2187 * Now set up the TX queues, txconf is needed to handle the
2188 * possibility that things fail midcourse and we need to
2189 * undo memory gracefully
2191 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2192 /* Set up some basics */
2193 txr = &adapter->tx_rings[i];
2194 txr->adapter = adapter;
2196 txr->me = ixgbe_pf_que_index(iov_mode, i);
2200 txr->num_desc = adapter->num_tx_desc;
2202 /* Initialize the TX side lock */
2203 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2204 device_get_nameunit(dev), txr->me);
2205 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2207 if (ixgbe_dma_malloc(adapter, tsize,
2208 &txr->txdma, BUS_DMA_NOWAIT)) {
2210 "Unable to allocate TX Descriptor memory\n");
2214 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2215 bzero((void *)txr->tx_base, tsize);
2217 /* Now allocate transmit buffers for the ring */
2218 if (ixgbe_allocate_transmit_buffers(txr)) {
2220 "Critical Failure setting up transmit buffers\n");
2224 #ifndef IXGBE_LEGACY_TX
2225 /* Allocate a buf ring */
2226 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2227 M_WAITOK, &txr->tx_mtx);
2228 if (txr->br == NULL) {
2230 "Critical Failure setting up buf ring\n");
2238 * Next the RX queues...
2240 rsize = roundup2(adapter->num_rx_desc *
2241 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2242 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2243 rxr = &adapter->rx_rings[i];
2244 /* Set up some basics */
2245 rxr->adapter = adapter;
2247 rxr->me = ixgbe_pf_que_index(iov_mode, i);
2251 rxr->num_desc = adapter->num_rx_desc;
2253 /* Initialize the RX side lock */
2254 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2255 device_get_nameunit(dev), rxr->me);
2256 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2258 if (ixgbe_dma_malloc(adapter, rsize,
2259 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2261 "Unable to allocate RxDescriptor memory\n");
2265 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2266 bzero((void *)rxr->rx_base, rsize);
2268 /* Allocate receive buffers for the ring*/
2269 if (ixgbe_allocate_receive_buffers(rxr)) {
2271 "Critical Failure setting up receive buffers\n");
2278 ** Finally set up the queue holding structs
2280 for (int i = 0; i < adapter->num_queues; i++) {
2281 que = &adapter->queues[i];
2282 que->adapter = adapter;
2284 que->txr = &adapter->tx_rings[i];
2285 que->rxr = &adapter->rx_rings[i];
2291 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2292 ixgbe_dma_free(adapter, &rxr->rxdma);
2294 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2295 ixgbe_dma_free(adapter, &txr->txdma);
2296 free(adapter->rx_rings, M_DEVBUF);
2298 free(adapter->tx_rings, M_DEVBUF);
2300 free(adapter->queues, M_DEVBUF);