1 /******************************************************************************
3 Copyright (c) 2001-2015, Intel Corporation
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
32 ******************************************************************************/
36 #ifndef IXGBE_STANDALONE_BUILD
38 #include "opt_inet6.h"
45 #include <net/rss_config.h>
46 #include <netinet/in_rss.h>
50 #include <net/netmap.h>
51 #include <sys/selinfo.h>
52 #include <dev/netmap/netmap_kern.h>
54 extern int ix_crcstrip;
59 ** this feature only works with
60 ** IPv4, and only on 82599 and later.
61 ** Also this will cause IP forwarding to
62 ** fail and that can't be controlled by
63 ** the stack as LRO can. For all these
64 ** reasons I've deemed it best to leave
65 ** this off and not bother with a tuneable
66 ** interface, this would need to be compiled
69 static bool ixgbe_rsc_enable = FALSE;
73 ** For Flow Director: this is the
74 ** number of TX packets we sample
75 ** for the filter pool, this means
76 ** every 20th packet will be probed.
78 ** This feature can be disabled by
81 static int atr_sample_rate = 20;
84 /*********************************************************************
85 * Local Function prototypes
86 *********************************************************************/
87 static void ixgbe_setup_transmit_ring(struct tx_ring *);
88 static void ixgbe_free_transmit_buffers(struct tx_ring *);
89 static int ixgbe_setup_receive_ring(struct rx_ring *);
90 static void ixgbe_free_receive_buffers(struct rx_ring *);
92 static void ixgbe_rx_checksum(u32, struct mbuf *, u32);
93 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
94 static int ixgbe_xmit(struct tx_ring *, struct mbuf **);
95 static int ixgbe_tx_ctx_setup(struct tx_ring *,
96 struct mbuf *, u32 *, u32 *);
97 static int ixgbe_tso_setup(struct tx_ring *,
98 struct mbuf *, u32 *, u32 *);
100 static void ixgbe_atr(struct tx_ring *, struct mbuf *);
102 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
103 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
106 #ifdef IXGBE_LEGACY_TX
107 /*********************************************************************
108 * Transmit entry point
110 * ixgbe_start is called by the stack to initiate a transmit.
111 * The driver will remain in this routine as long as there are
112 * packets to transmit and transmit resources are available.
113 * In case resources are not available stack is notified and
114 * the packet is requeued.
115 **********************************************************************/
118 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
121 struct adapter *adapter = txr->adapter;
123 IXGBE_TX_LOCK_ASSERT(txr);
125 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
127 if (!adapter->link_active)
130 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
131 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
134 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
138 if (ixgbe_xmit(txr, &m_head)) {
140 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
143 /* Send a copy of the frame to the BPF listener */
144 ETHER_BPF_MTAP(ifp, m_head);
150 * Legacy TX start - called by the stack, this
151 * always uses the first tx ring, and should
152 * not be used with multiqueue tx enabled.
155 ixgbe_start(struct ifnet *ifp)
157 struct adapter *adapter = ifp->if_softc;
158 struct tx_ring *txr = adapter->tx_rings;
160 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
162 ixgbe_start_locked(txr, ifp);
163 IXGBE_TX_UNLOCK(txr);
168 #else /* ! IXGBE_LEGACY_TX */
171 ** Multiqueue Transmit Entry Point
172 ** (if_transmit function)
175 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
177 struct adapter *adapter = ifp->if_softc;
178 struct ix_queue *que;
186 * When doing RSS, map it to the same outbound queue
187 * as the incoming flow would be mapped to.
189 * If everything is setup correctly, it should be the
190 * same bucket that the current CPU we're on is.
192 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
194 if (rss_hash2bucket(m->m_pkthdr.flowid,
195 M_HASHTYPE_GET(m), &bucket_id) == 0) {
196 i = bucket_id % adapter->num_queues;
198 if (bucket_id > adapter->num_queues)
199 if_printf(ifp, "bucket_id (%d) > num_queues "
200 "(%d)\n", bucket_id, adapter->num_queues);
204 i = m->m_pkthdr.flowid % adapter->num_queues;
206 i = curcpu % adapter->num_queues;
208 /* Check for a hung queue and pick alternative */
209 if (((1 << i) & adapter->active_queues) == 0)
210 i = ffsl(adapter->active_queues);
212 txr = &adapter->tx_rings[i];
213 que = &adapter->queues[i];
215 err = drbr_enqueue(ifp, txr->br, m);
218 if (IXGBE_TX_TRYLOCK(txr)) {
219 ixgbe_mq_start_locked(ifp, txr);
220 IXGBE_TX_UNLOCK(txr);
222 taskqueue_enqueue(que->tq, &txr->txq_task);
228 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
230 struct adapter *adapter = txr->adapter;
232 int enqueued = 0, err = 0;
234 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
235 adapter->link_active == 0)
238 /* Process the queue */
239 #if __FreeBSD_version < 901504
240 next = drbr_dequeue(ifp, txr->br);
241 while (next != NULL) {
242 if ((err = ixgbe_xmit(txr, &next)) != 0) {
244 err = drbr_enqueue(ifp, txr->br, next);
246 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
247 if ((err = ixgbe_xmit(txr, &next)) != 0) {
249 drbr_advance(ifp, txr->br);
251 drbr_putback(ifp, txr->br, next);
256 #if __FreeBSD_version >= 901504
257 drbr_advance(ifp, txr->br);
260 #if 0 // this is VF-only
261 #if __FreeBSD_version >= 1100036
263 * Since we're looking at the tx ring, we can check
264 * to see if we're a VF by examing our tail register
267 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
268 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
271 /* Send a copy of the frame to the BPF listener */
272 ETHER_BPF_MTAP(ifp, next);
273 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
275 #if __FreeBSD_version < 901504
276 next = drbr_dequeue(ifp, txr->br);
280 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
287 * Called from a taskqueue to drain queued transmit packets.
290 ixgbe_deferred_mq_start(void *arg, int pending)
292 struct tx_ring *txr = arg;
293 struct adapter *adapter = txr->adapter;
294 struct ifnet *ifp = adapter->ifp;
297 if (!drbr_empty(ifp, txr->br))
298 ixgbe_mq_start_locked(ifp, txr);
299 IXGBE_TX_UNLOCK(txr);
303 * Flush all ring buffers
306 ixgbe_qflush(struct ifnet *ifp)
308 struct adapter *adapter = ifp->if_softc;
309 struct tx_ring *txr = adapter->tx_rings;
312 for (int i = 0; i < adapter->num_queues; i++, txr++) {
314 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
316 IXGBE_TX_UNLOCK(txr);
320 #endif /* IXGBE_LEGACY_TX */
323 /*********************************************************************
325 * This routine maps the mbufs to tx descriptors, allowing the
326 * TX engine to transmit the packets.
327 * - return 0 on success, positive on failure
329 **********************************************************************/
332 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
334 struct adapter *adapter = txr->adapter;
335 u32 olinfo_status = 0, cmd_type_len;
336 int i, j, error, nsegs;
340 bus_dma_segment_t segs[adapter->num_segs];
342 struct ixgbe_tx_buf *txbuf;
343 union ixgbe_adv_tx_desc *txd = NULL;
347 /* Basic descriptor defines */
348 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
349 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
351 if (m_head->m_flags & M_VLANTAG)
352 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
355 * Important to capture the first descriptor
356 * used because it will contain the index of
357 * the one we tell the hardware to report back
359 first = txr->next_avail_desc;
360 txbuf = &txr->tx_buffers[first];
364 * Map the packet for DMA.
367 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
368 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
370 if (__predict_false(error)) {
375 /* Try it again? - one try */
379 * XXX: m_defrag will choke on
380 * non-MCLBYTES-sized clusters
382 m = m_defrag(*m_headp, M_NOWAIT);
384 adapter->mbuf_defrag_failed++;
394 txr->no_tx_dma_setup++;
397 txr->no_tx_dma_setup++;
404 /* Make certain there are enough descriptors */
405 if (txr->tx_avail < (nsegs + 2)) {
406 txr->no_desc_avail++;
407 bus_dmamap_unload(txr->txtag, map);
413 * Set up the appropriate offload context
414 * this will consume the first descriptor
416 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
417 if (__predict_false(error)) {
418 if (error == ENOBUFS)
424 /* Do the flow director magic */
425 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
427 if (txr->atr_count >= atr_sample_rate) {
428 ixgbe_atr(txr, m_head);
434 olinfo_status |= IXGBE_ADVTXD_CC;
435 i = txr->next_avail_desc;
436 for (j = 0; j < nsegs; j++) {
440 txbuf = &txr->tx_buffers[i];
441 txd = &txr->tx_base[i];
442 seglen = segs[j].ds_len;
443 segaddr = htole64(segs[j].ds_addr);
445 txd->read.buffer_addr = segaddr;
446 txd->read.cmd_type_len = htole32(txr->txd_cmd |
447 cmd_type_len |seglen);
448 txd->read.olinfo_status = htole32(olinfo_status);
450 if (++i == txr->num_desc)
454 txd->read.cmd_type_len |=
455 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
456 txr->tx_avail -= nsegs;
457 txr->next_avail_desc = i;
459 txbuf->m_head = m_head;
461 * Here we swap the map so the last descriptor,
462 * which gets the completion interrupt has the
463 * real map, and the first descriptor gets the
464 * unused map from this descriptor.
466 txr->tx_buffers[first].map = txbuf->map;
468 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
470 /* Set the EOP descriptor that will be marked done */
471 txbuf = &txr->tx_buffers[first];
474 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
475 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
477 * Advance the Transmit Descriptor Tail (Tdt), this tells the
478 * hardware that this frame is available to transmit.
480 ++txr->total_packets;
481 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
483 /* Mark queue as having work */
491 /*********************************************************************
493 * Allocate memory for tx_buffer structures. The tx_buffer stores all
494 * the information needed to transmit a packet on the wire. This is
495 * called only once at attach, setup is done every reset.
497 **********************************************************************/
499 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
501 struct adapter *adapter = txr->adapter;
502 device_t dev = adapter->dev;
503 struct ixgbe_tx_buf *txbuf;
507 * Setup DMA descriptor areas.
509 if ((error = bus_dma_tag_create(
510 bus_get_dma_tag(adapter->dev), /* parent */
511 1, 0, /* alignment, bounds */
512 BUS_SPACE_MAXADDR, /* lowaddr */
513 BUS_SPACE_MAXADDR, /* highaddr */
514 NULL, NULL, /* filter, filterarg */
515 IXGBE_TSO_SIZE, /* maxsize */
516 adapter->num_segs, /* nsegments */
517 PAGE_SIZE, /* maxsegsize */
520 NULL, /* lockfuncarg */
522 device_printf(dev,"Unable to allocate TX DMA tag\n");
526 if (!(txr->tx_buffers =
527 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
528 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
529 device_printf(dev, "Unable to allocate tx_buffer memory\n");
534 /* Create the descriptor buffer dma maps */
535 txbuf = txr->tx_buffers;
536 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
537 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
539 device_printf(dev, "Unable to create TX DMA map\n");
546 /* We free all, it handles case where we are in the middle */
547 ixgbe_free_transmit_structures(adapter);
551 /*********************************************************************
553 * Initialize a transmit ring.
555 **********************************************************************/
557 ixgbe_setup_transmit_ring(struct tx_ring *txr)
559 struct adapter *adapter = txr->adapter;
560 struct ixgbe_tx_buf *txbuf;
562 struct netmap_adapter *na = NA(adapter->ifp);
563 struct netmap_slot *slot;
564 #endif /* DEV_NETMAP */
566 /* Clear the old ring contents */
570 * (under lock): if in netmap mode, do some consistency
571 * checks and set slot to entry 0 of the netmap ring.
573 slot = netmap_reset(na, NR_TX, txr->me, 0);
574 #endif /* DEV_NETMAP */
575 bzero((void *)txr->tx_base,
576 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
578 txr->next_avail_desc = 0;
579 txr->next_to_clean = 0;
581 /* Free any existing tx buffers. */
582 txbuf = txr->tx_buffers;
583 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
584 if (txbuf->m_head != NULL) {
585 bus_dmamap_sync(txr->txtag, txbuf->map,
586 BUS_DMASYNC_POSTWRITE);
587 bus_dmamap_unload(txr->txtag, txbuf->map);
588 m_freem(txbuf->m_head);
589 txbuf->m_head = NULL;
593 * In netmap mode, set the map for the packet buffer.
594 * NOTE: Some drivers (not this one) also need to set
595 * the physical buffer address in the NIC ring.
596 * Slots in the netmap ring (indexed by "si") are
597 * kring->nkr_hwofs positions "ahead" wrt the
598 * corresponding slot in the NIC ring. In some drivers
599 * (not here) nkr_hwofs can be negative. Function
600 * netmap_idx_n2k() handles wraparounds properly.
603 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
604 netmap_load_map(na, txr->txtag,
605 txbuf->map, NMB(na, slot + si));
607 #endif /* DEV_NETMAP */
608 /* Clear the EOP descriptor pointer */
613 /* Set the rate at which we sample packets */
614 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
615 txr->atr_sample = atr_sample_rate;
618 /* Set number of descriptors available */
619 txr->tx_avail = adapter->num_tx_desc;
621 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
622 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
623 IXGBE_TX_UNLOCK(txr);
626 /*********************************************************************
628 * Initialize all transmit rings.
630 **********************************************************************/
632 ixgbe_setup_transmit_structures(struct adapter *adapter)
634 struct tx_ring *txr = adapter->tx_rings;
636 for (int i = 0; i < adapter->num_queues; i++, txr++)
637 ixgbe_setup_transmit_ring(txr);
642 /*********************************************************************
644 * Free all transmit rings.
646 **********************************************************************/
648 ixgbe_free_transmit_structures(struct adapter *adapter)
650 struct tx_ring *txr = adapter->tx_rings;
652 for (int i = 0; i < adapter->num_queues; i++, txr++) {
654 ixgbe_free_transmit_buffers(txr);
655 ixgbe_dma_free(adapter, &txr->txdma);
656 IXGBE_TX_UNLOCK(txr);
657 IXGBE_TX_LOCK_DESTROY(txr);
659 free(adapter->tx_rings, M_DEVBUF);
662 /*********************************************************************
664 * Free transmit ring related data structures.
666 **********************************************************************/
668 ixgbe_free_transmit_buffers(struct tx_ring *txr)
670 struct adapter *adapter = txr->adapter;
671 struct ixgbe_tx_buf *tx_buffer;
674 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
676 if (txr->tx_buffers == NULL)
679 tx_buffer = txr->tx_buffers;
680 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
681 if (tx_buffer->m_head != NULL) {
682 bus_dmamap_sync(txr->txtag, tx_buffer->map,
683 BUS_DMASYNC_POSTWRITE);
684 bus_dmamap_unload(txr->txtag,
686 m_freem(tx_buffer->m_head);
687 tx_buffer->m_head = NULL;
688 if (tx_buffer->map != NULL) {
689 bus_dmamap_destroy(txr->txtag,
691 tx_buffer->map = NULL;
693 } else if (tx_buffer->map != NULL) {
694 bus_dmamap_unload(txr->txtag,
696 bus_dmamap_destroy(txr->txtag,
698 tx_buffer->map = NULL;
701 #ifdef IXGBE_LEGACY_TX
703 buf_ring_free(txr->br, M_DEVBUF);
705 if (txr->tx_buffers != NULL) {
706 free(txr->tx_buffers, M_DEVBUF);
707 txr->tx_buffers = NULL;
709 if (txr->txtag != NULL) {
710 bus_dma_tag_destroy(txr->txtag);
716 /*********************************************************************
718 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
720 **********************************************************************/
723 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
724 u32 *cmd_type_len, u32 *olinfo_status)
726 struct adapter *adapter = txr->adapter;
727 struct ixgbe_adv_tx_context_desc *TXD;
728 struct ether_vlan_header *eh;
735 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
736 int ehdrlen, ip_hlen = 0;
740 int ctxd = txr->next_avail_desc;
745 /* First check if TSO is to be used */
746 if (mp->m_pkthdr.csum_flags & (CSUM_IP_TSO|CSUM_IP6_TSO))
747 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
749 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
752 /* Indicate the whole packet as payload when not doing TSO */
753 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
755 /* Now ready a context descriptor */
756 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
759 ** In advanced descriptors the vlan tag must
760 ** be placed into the context descriptor. Hence
761 ** we need to make one even if not doing offloads.
763 if (mp->m_flags & M_VLANTAG) {
764 vtag = htole16(mp->m_pkthdr.ether_vtag);
765 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
766 } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
770 * Determine where frame payload starts.
771 * Jump over vlan headers if already present,
772 * helpful for QinQ too.
774 eh = mtod(mp, struct ether_vlan_header *);
775 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
776 etype = ntohs(eh->evl_proto);
777 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
779 etype = ntohs(eh->evl_encap_proto);
780 ehdrlen = ETHER_HDR_LEN;
783 /* Set the ether header length */
784 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
786 if (offload == FALSE)
790 * If the first mbuf only includes the ethernet header, jump to the next one
791 * XXX: This assumes the stack splits mbufs containing headers on header boundaries
792 * XXX: And assumes the entire IP header is contained in one mbuf
794 if (mp->m_len == ehdrlen && mp->m_next)
795 l3d = mtod(mp->m_next, caddr_t);
797 l3d = mtod(mp, caddr_t) + ehdrlen;
802 ip = (struct ip *)(l3d);
803 ip_hlen = ip->ip_hl << 2;
805 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
806 /* Insert IPv4 checksum into data descriptors */
807 if (mp->m_pkthdr.csum_flags & CSUM_IP) {
809 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
815 ip6 = (struct ip6_hdr *)(l3d);
816 ip_hlen = sizeof(struct ip6_hdr);
817 ipproto = ip6->ip6_nxt;
818 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
826 vlan_macip_lens |= ip_hlen;
828 /* No support for offloads for non-L4 next headers */
831 if (mp->m_pkthdr.csum_flags & (CSUM_IP_TCP | CSUM_IP6_TCP))
832 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
837 if (mp->m_pkthdr.csum_flags & (CSUM_IP_UDP | CSUM_IP6_UDP))
838 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
843 if (mp->m_pkthdr.csum_flags & (CSUM_IP_SCTP | CSUM_IP6_SCTP))
844 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
853 if (offload) /* Insert L4 checksum into data descriptors */
854 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
857 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
859 /* Now copy bits into descriptor */
860 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
861 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
862 TXD->seqnum_seed = htole32(0);
863 TXD->mss_l4len_idx = htole32(0);
865 /* We've consumed the first desc, adjust counters */
866 if (++ctxd == txr->num_desc)
868 txr->next_avail_desc = ctxd;
874 /**********************************************************************
876 * Setup work for hardware segmentation offload (TSO) on
877 * adapters using advanced tx descriptors
879 **********************************************************************/
881 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
882 u32 *cmd_type_len, u32 *olinfo_status)
884 struct ixgbe_adv_tx_context_desc *TXD;
885 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
886 u32 mss_l4len_idx = 0, paylen;
887 u16 vtag = 0, eh_type;
888 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
889 struct ether_vlan_header *eh;
899 * Determine where frame payload starts.
900 * Jump over vlan headers if already present
902 eh = mtod(mp, struct ether_vlan_header *);
903 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
904 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
905 eh_type = eh->evl_proto;
907 ehdrlen = ETHER_HDR_LEN;
908 eh_type = eh->evl_encap_proto;
911 switch (ntohs(eh_type)) {
914 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
915 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
916 if (ip6->ip6_nxt != IPPROTO_TCP)
918 ip_hlen = sizeof(struct ip6_hdr);
919 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
920 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
921 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
922 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
927 ip = (struct ip *)(mp->m_data + ehdrlen);
928 if (ip->ip_p != IPPROTO_TCP)
931 ip_hlen = ip->ip_hl << 2;
932 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
933 th->th_sum = in_pseudo(ip->ip_src.s_addr,
934 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
935 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
936 /* Tell transmit desc to also do IPv4 checksum. */
937 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
941 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
942 __func__, ntohs(eh_type));
946 ctxd = txr->next_avail_desc;
947 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
949 tcp_hlen = th->th_off << 2;
951 /* This is used in the transmit desc in encap */
952 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
954 /* VLAN MACLEN IPLEN */
955 if (mp->m_flags & M_VLANTAG) {
956 vtag = htole16(mp->m_pkthdr.ether_vtag);
957 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
960 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
961 vlan_macip_lens |= ip_hlen;
962 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
964 /* ADV DTYPE TUCMD */
965 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
966 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
967 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
970 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
971 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
972 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
974 TXD->seqnum_seed = htole32(0);
976 if (++ctxd == txr->num_desc)
980 txr->next_avail_desc = ctxd;
981 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
982 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
983 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
989 /**********************************************************************
991 * Examine each tx_buffer in the used queue. If the hardware is done
992 * processing the packet then free associated resources. The
993 * tx_buffer is put back on the free queue.
995 **********************************************************************/
997 ixgbe_txeof(struct tx_ring *txr)
999 struct adapter *adapter = txr->adapter;
1001 struct ifnet *ifp = adapter->ifp;
1003 u32 work, processed = 0;
1004 u32 limit = adapter->tx_process_limit;
1005 struct ixgbe_tx_buf *buf;
1006 union ixgbe_adv_tx_desc *txd;
1008 mtx_assert(&txr->tx_mtx, MA_OWNED);
1011 if (ifp->if_capenable & IFCAP_NETMAP) {
1012 struct netmap_adapter *na = NA(ifp);
1013 struct netmap_kring *kring = &na->tx_rings[txr->me];
1015 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1016 BUS_DMASYNC_POSTREAD);
1018 * In netmap mode, all the work is done in the context
1019 * of the client thread. Interrupt handlers only wake up
1020 * clients, which may be sleeping on individual rings
1021 * or on a global resource for all rings.
1022 * To implement tx interrupt mitigation, we wake up the client
1023 * thread roughly every half ring, even if the NIC interrupts
1024 * more frequently. This is implemented as follows:
1025 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1026 * the slot that should wake up the thread (nkr_num_slots
1027 * means the user thread should not be woken up);
1028 * - the driver ignores tx interrupts unless netmap_mitigate=0
1029 * or the slot has the DD bit set.
1031 if (!netmap_mitigate ||
1032 (kring->nr_kflags < kring->nkr_num_slots &&
1033 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1034 netmap_tx_irq(ifp, txr->me);
1038 #endif /* DEV_NETMAP */
1040 if (txr->tx_avail == txr->num_desc) {
1045 /* Get work starting point */
1046 work = txr->next_to_clean;
1047 buf = &txr->tx_buffers[work];
1048 txd = &txr->tx_base[work];
1049 work -= txr->num_desc; /* The distance to ring end */
1050 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1051 BUS_DMASYNC_POSTREAD);
1054 union ixgbe_adv_tx_desc *eop = buf->eop;
1055 if (eop == NULL) /* No work */
1058 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1059 break; /* I/O not complete */
1063 buf->m_head->m_pkthdr.len;
1064 bus_dmamap_sync(txr->txtag,
1066 BUS_DMASYNC_POSTWRITE);
1067 bus_dmamap_unload(txr->txtag,
1069 m_freem(buf->m_head);
1075 /* We clean the range if multi segment */
1076 while (txd != eop) {
1080 /* wrap the ring? */
1081 if (__predict_false(!work)) {
1082 work -= txr->num_desc;
1083 buf = txr->tx_buffers;
1088 buf->m_head->m_pkthdr.len;
1089 bus_dmamap_sync(txr->txtag,
1091 BUS_DMASYNC_POSTWRITE);
1092 bus_dmamap_unload(txr->txtag,
1094 m_freem(buf->m_head);
1104 /* Try the next packet */
1108 /* reset with a wrap */
1109 if (__predict_false(!work)) {
1110 work -= txr->num_desc;
1111 buf = txr->tx_buffers;
1115 } while (__predict_true(--limit));
1117 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1118 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1120 work += txr->num_desc;
1121 txr->next_to_clean = work;
1124 ** Queue Hang detection, we know there's
1125 ** work outstanding or the first return
1126 ** would have been taken, so increment busy
1127 ** if nothing managed to get cleaned, then
1128 ** in local_timer it will be checked and
1129 ** marked as HUNG if it exceeds a MAX attempt.
1131 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1134 ** If anything gets cleaned we reset state to 1,
1135 ** note this will turn off HUNG if its set.
1140 if (txr->tx_avail == txr->num_desc)
1149 ** This routine parses packet headers so that Flow
1150 ** Director can make a hashed filter table entry
1151 ** allowing traffic flows to be identified and kept
1152 ** on the same cpu. This would be a performance
1153 ** hit, but we only do it at IXGBE_FDIR_RATE of
1157 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1159 struct adapter *adapter = txr->adapter;
1160 struct ix_queue *que;
1164 struct ether_vlan_header *eh;
1165 union ixgbe_atr_hash_dword input = {.dword = 0};
1166 union ixgbe_atr_hash_dword common = {.dword = 0};
1167 int ehdrlen, ip_hlen;
1170 eh = mtod(mp, struct ether_vlan_header *);
1171 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1172 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1173 etype = eh->evl_proto;
1175 ehdrlen = ETHER_HDR_LEN;
1176 etype = eh->evl_encap_proto;
1179 /* Only handling IPv4 */
1180 if (etype != htons(ETHERTYPE_IP))
1183 ip = (struct ip *)(mp->m_data + ehdrlen);
1184 ip_hlen = ip->ip_hl << 2;
1186 /* check if we're UDP or TCP */
1189 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1190 /* src and dst are inverted */
1191 common.port.dst ^= th->th_sport;
1192 common.port.src ^= th->th_dport;
1193 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1196 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1197 /* src and dst are inverted */
1198 common.port.dst ^= uh->uh_sport;
1199 common.port.src ^= uh->uh_dport;
1200 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1206 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1207 if (mp->m_pkthdr.ether_vtag)
1208 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1210 common.flex_bytes ^= etype;
1211 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1213 que = &adapter->queues[txr->me];
1215 ** This assumes the Rx queue and Tx
1216 ** queue are bound to the same CPU
1218 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1219 input, common, que->msix);
1221 #endif /* IXGBE_FDIR */
1224 ** Used to detect a descriptor that has
1225 ** been merged by Hardware RSC.
1228 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1230 return (le32toh(rx->wb.lower.lo_dword.data) &
1231 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1234 /*********************************************************************
1236 * Initialize Hardware RSC (LRO) feature on 82599
1237 * for an RX ring, this is toggled by the LRO capability
1238 * even though it is transparent to the stack.
1240 * NOTE: since this HW feature only works with IPV4 and
1241 * our testing has shown soft LRO to be as effective
1242 * I have decided to disable this by default.
1244 **********************************************************************/
1246 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1248 struct adapter *adapter = rxr->adapter;
1249 struct ixgbe_hw *hw = &adapter->hw;
1250 u32 rscctrl, rdrxctl;
1252 /* If turning LRO/RSC off we need to disable it */
1253 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1254 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1255 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1259 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1260 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1261 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1262 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1263 #endif /* DEV_NETMAP */
1264 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1265 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1266 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1268 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1269 rscctrl |= IXGBE_RSCCTL_RSCEN;
1271 ** Limit the total number of descriptors that
1272 ** can be combined, so it does not exceed 64K
1274 if (rxr->mbuf_sz == MCLBYTES)
1275 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1276 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1277 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1278 else if (rxr->mbuf_sz == MJUM9BYTES)
1279 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1280 else /* Using 16K cluster */
1281 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1283 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1285 /* Enable TCP header recognition */
1286 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1287 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1288 IXGBE_PSRTYPE_TCPHDR));
1290 /* Disable RSC for ACK packets */
1291 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1292 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1297 /*********************************************************************
1299 * Refresh mbuf buffers for RX descriptor rings
1300 * - now keeps its own state so discards due to resource
1301 * exhaustion are unnecessary, if an mbuf cannot be obtained
1302 * it just returns, keeping its placeholder, thus it can simply
1303 * be recalled to try again.
1305 **********************************************************************/
1307 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1309 struct adapter *adapter = rxr->adapter;
1310 bus_dma_segment_t seg[1];
1311 struct ixgbe_rx_buf *rxbuf;
1313 int i, j, nsegs, error;
1314 bool refreshed = FALSE;
1316 i = j = rxr->next_to_refresh;
1317 /* Control the loop with one beyond */
1318 if (++j == rxr->num_desc)
1321 while (j != limit) {
1322 rxbuf = &rxr->rx_buffers[i];
1323 if (rxbuf->buf == NULL) {
1324 mp = m_getjcl(M_NOWAIT, MT_DATA,
1325 M_PKTHDR, rxr->mbuf_sz);
1328 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1329 m_adj(mp, ETHER_ALIGN);
1333 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1335 /* If we're dealing with an mbuf that was copied rather
1336 * than replaced, there's no need to go through busdma.
1338 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1339 /* Get the memory mapping */
1340 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1341 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1342 rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1344 printf("Refresh mbufs: payload dmamap load"
1345 " failure - %d\n", error);
1351 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1352 BUS_DMASYNC_PREREAD);
1353 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1354 htole64(seg[0].ds_addr);
1356 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1357 rxbuf->flags &= ~IXGBE_RX_COPY;
1361 /* Next is precalculated */
1363 rxr->next_to_refresh = i;
1364 if (++j == rxr->num_desc)
1368 if (refreshed) /* Update hardware tail index */
1369 IXGBE_WRITE_REG(&adapter->hw,
1370 rxr->tail, rxr->next_to_refresh);
1374 /*********************************************************************
1376 * Allocate memory for rx_buffer structures. Since we use one
1377 * rx_buffer per received packet, the maximum number of rx_buffer's
1378 * that we'll need is equal to the number of receive descriptors
1379 * that we've allocated.
1381 **********************************************************************/
1383 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1385 struct adapter *adapter = rxr->adapter;
1386 device_t dev = adapter->dev;
1387 struct ixgbe_rx_buf *rxbuf;
1390 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1391 if (!(rxr->rx_buffers =
1392 (struct ixgbe_rx_buf *) malloc(bsize,
1393 M_DEVBUF, M_NOWAIT | M_ZERO))) {
1394 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1399 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
1400 1, 0, /* alignment, bounds */
1401 BUS_SPACE_MAXADDR, /* lowaddr */
1402 BUS_SPACE_MAXADDR, /* highaddr */
1403 NULL, NULL, /* filter, filterarg */
1404 MJUM16BYTES, /* maxsize */
1406 MJUM16BYTES, /* maxsegsize */
1408 NULL, /* lockfunc */
1409 NULL, /* lockfuncarg */
1411 device_printf(dev, "Unable to create RX DMA tag\n");
1415 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1416 rxbuf = &rxr->rx_buffers[i];
1417 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1419 device_printf(dev, "Unable to create RX dma map\n");
1427 /* Frees all, but can handle partial completion */
1428 ixgbe_free_receive_structures(adapter);
1433 ixgbe_free_receive_ring(struct rx_ring *rxr)
1436 for (int i = 0; i < rxr->num_desc; i++) {
1437 ixgbe_rx_discard(rxr, i);
1441 /*********************************************************************
1443 * Initialize a receive ring and its buffers.
1445 **********************************************************************/
1447 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1449 struct adapter *adapter;
1452 struct ixgbe_rx_buf *rxbuf;
1453 bus_dma_segment_t seg[1];
1454 struct lro_ctrl *lro = &rxr->lro;
1455 int rsize, nsegs, error = 0;
1457 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1458 struct netmap_slot *slot;
1459 #endif /* DEV_NETMAP */
1461 adapter = rxr->adapter;
1465 /* Clear the ring contents */
1468 /* same as in ixgbe_setup_transmit_ring() */
1469 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1470 #endif /* DEV_NETMAP */
1471 rsize = roundup2(adapter->num_rx_desc *
1472 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1473 bzero((void *)rxr->rx_base, rsize);
1474 /* Cache the size */
1475 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1477 /* Free current RX buffer structs and their mbufs */
1478 ixgbe_free_receive_ring(rxr);
1480 /* Now replenish the mbufs */
1481 for (int j = 0; j != rxr->num_desc; ++j) {
1484 rxbuf = &rxr->rx_buffers[j];
1487 * In netmap mode, fill the map and set the buffer
1488 * address in the NIC ring, considering the offset
1489 * between the netmap and NIC rings (see comment in
1490 * ixgbe_setup_transmit_ring() ). No need to allocate
1491 * an mbuf, so end the block with a continue;
1494 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1498 addr = PNMB(na, slot + sj, &paddr);
1499 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1500 /* Update descriptor and the cached value */
1501 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1502 rxbuf->addr = htole64(paddr);
1505 #endif /* DEV_NETMAP */
1507 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1508 M_PKTHDR, adapter->rx_mbuf_sz);
1509 if (rxbuf->buf == NULL) {
1514 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1515 /* Get the memory mapping */
1516 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1517 rxbuf->pmap, mp, seg,
1518 &nsegs, BUS_DMA_NOWAIT);
1521 bus_dmamap_sync(rxr->ptag,
1522 rxbuf->pmap, BUS_DMASYNC_PREREAD);
1523 /* Update the descriptor and the cached value */
1524 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1525 rxbuf->addr = htole64(seg[0].ds_addr);
1529 /* Setup our descriptor indices */
1530 rxr->next_to_check = 0;
1531 rxr->next_to_refresh = 0;
1532 rxr->lro_enabled = FALSE;
1535 rxr->vtag_strip = FALSE;
1537 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1538 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1541 ** Now set up the LRO interface:
1543 if (ixgbe_rsc_enable)
1544 ixgbe_setup_hw_rsc(rxr);
1545 else if (ifp->if_capenable & IFCAP_LRO) {
1546 int err = tcp_lro_init(lro);
1548 device_printf(dev, "LRO Initialization failed!\n");
1551 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1552 rxr->lro_enabled = TRUE;
1553 lro->ifp = adapter->ifp;
1556 IXGBE_RX_UNLOCK(rxr);
1560 ixgbe_free_receive_ring(rxr);
1561 IXGBE_RX_UNLOCK(rxr);
1565 /*********************************************************************
1567 * Initialize all receive rings.
1569 **********************************************************************/
1571 ixgbe_setup_receive_structures(struct adapter *adapter)
1573 struct rx_ring *rxr = adapter->rx_rings;
1576 for (j = 0; j < adapter->num_queues; j++, rxr++)
1577 if (ixgbe_setup_receive_ring(rxr))
1583 * Free RX buffers allocated so far, we will only handle
1584 * the rings that completed, the failing case will have
1585 * cleaned up for itself. 'j' failed, so its the terminus.
1587 for (int i = 0; i < j; ++i) {
1588 rxr = &adapter->rx_rings[i];
1590 ixgbe_free_receive_ring(rxr);
1591 IXGBE_RX_UNLOCK(rxr);
1598 /*********************************************************************
1600 * Free all receive rings.
1602 **********************************************************************/
1604 ixgbe_free_receive_structures(struct adapter *adapter)
1606 struct rx_ring *rxr = adapter->rx_rings;
1608 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1610 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1611 struct lro_ctrl *lro = &rxr->lro;
1612 ixgbe_free_receive_buffers(rxr);
1613 /* Free LRO memory */
1615 /* Free the ring memory as well */
1616 ixgbe_dma_free(adapter, &rxr->rxdma);
1619 free(adapter->rx_rings, M_DEVBUF);
1623 /*********************************************************************
1625 * Free receive ring data structures
1627 **********************************************************************/
1629 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1631 struct adapter *adapter = rxr->adapter;
1632 struct ixgbe_rx_buf *rxbuf;
1634 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1636 /* Cleanup any existing buffers */
1637 if (rxr->rx_buffers != NULL) {
1638 for (int i = 0; i < adapter->num_rx_desc; i++) {
1639 rxbuf = &rxr->rx_buffers[i];
1640 ixgbe_rx_discard(rxr, i);
1641 if (rxbuf->pmap != NULL) {
1642 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1646 if (rxr->rx_buffers != NULL) {
1647 free(rxr->rx_buffers, M_DEVBUF);
1648 rxr->rx_buffers = NULL;
1652 if (rxr->ptag != NULL) {
1653 bus_dma_tag_destroy(rxr->ptag);
1660 static __inline void
1661 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1665 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1666 * should be computed by hardware. Also it should not have VLAN tag in
1667 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1669 if (rxr->lro_enabled &&
1670 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1671 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1672 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1673 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1674 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1675 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1676 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1677 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1679 * Send to the stack if:
1680 ** - LRO not enabled, or
1681 ** - no LRO resources, or
1682 ** - lro enqueue fails
1684 if (rxr->lro.lro_cnt != 0)
1685 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1688 IXGBE_RX_UNLOCK(rxr);
1689 (*ifp->if_input)(ifp, m);
1693 static __inline void
1694 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1696 struct ixgbe_rx_buf *rbuf;
1698 rbuf = &rxr->rx_buffers[i];
1702 ** With advanced descriptors the writeback
1703 ** clobbers the buffer addrs, so its easier
1704 ** to just free the existing mbufs and take
1705 ** the normal refresh path to get new buffers
1709 if (rbuf->fmp != NULL) {/* Partial chain ? */
1710 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1713 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1714 } else if (rbuf->buf) {
1715 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1719 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1727 /*********************************************************************
1729 * This routine executes in interrupt context. It replenishes
1730 * the mbufs in the descriptor and sends data which has been
1731 * dma'ed into host memory to upper layer.
1733 * Return TRUE for more work, FALSE for all clean.
1734 *********************************************************************/
1736 ixgbe_rxeof(struct ix_queue *que)
1738 struct adapter *adapter = que->adapter;
1739 struct rx_ring *rxr = que->rxr;
1740 struct ifnet *ifp = adapter->ifp;
1741 struct lro_ctrl *lro = &rxr->lro;
1742 int i, nextp, processed = 0;
1744 u32 count = adapter->rx_process_limit;
1745 union ixgbe_adv_rx_desc *cur;
1746 struct ixgbe_rx_buf *rbuf, *nbuf;
1752 /* Same as the txeof routine: wakeup clients on intr. */
1753 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1754 IXGBE_RX_UNLOCK(rxr);
1757 #endif /* DEV_NETMAP */
1759 for (i = rxr->next_to_check; count != 0;) {
1760 struct mbuf *sendmp, *mp;
1766 /* Sync the ring. */
1767 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1768 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1770 cur = &rxr->rx_base[i];
1771 staterr = le32toh(cur->wb.upper.status_error);
1772 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1774 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1776 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1783 cur->wb.upper.status_error = 0;
1784 rbuf = &rxr->rx_buffers[i];
1787 len = le16toh(cur->wb.upper.length);
1788 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1789 IXGBE_RXDADV_PKTTYPE_MASK;
1790 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1792 /* Make sure bad packets are discarded */
1793 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1794 #if __FreeBSD_version >= 1100036
1795 if (IXGBE_IS_VF(adapter))
1796 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1798 rxr->rx_discarded++;
1799 ixgbe_rx_discard(rxr, i);
1803 bus_dmamap_sync(rxr->ptag, rbuf->pmap, BUS_DMASYNC_POSTREAD);
1806 ** On 82599 which supports a hardware
1807 ** LRO (called HW RSC), packets need
1808 ** not be fragmented across sequential
1809 ** descriptors, rather the next descriptor
1810 ** is indicated in bits of the descriptor.
1811 ** This also means that we might proceses
1812 ** more than one packet at a time, something
1813 ** that has never been true before, it
1814 ** required eliminating global chain pointers
1815 ** in favor of what we are doing here. -jfv
1819 ** Figure out the next descriptor
1822 if (rxr->hw_rsc == TRUE) {
1823 rsc = ixgbe_rsc_count(cur);
1824 rxr->rsc_num += (rsc - 1);
1826 if (rsc) { /* Get hardware index */
1828 IXGBE_RXDADV_NEXTP_MASK) >>
1829 IXGBE_RXDADV_NEXTP_SHIFT);
1830 } else { /* Just sequential */
1832 if (nextp == adapter->num_rx_desc)
1835 nbuf = &rxr->rx_buffers[nextp];
1839 ** Rather than using the fmp/lmp global pointers
1840 ** we now keep the head of a packet chain in the
1841 ** buffer struct and pass this along from one
1842 ** descriptor to the next, until we get EOP.
1846 ** See if there is a stored head
1847 ** that determines what we are
1850 if (sendmp != NULL) { /* secondary frag */
1851 rbuf->buf = rbuf->fmp = NULL;
1852 mp->m_flags &= ~M_PKTHDR;
1853 sendmp->m_pkthdr.len += mp->m_len;
1856 * Optimize. This might be a small packet,
1857 * maybe just a TCP ACK. Do a fast copy that
1858 * is cache aligned into a new mbuf, and
1859 * leave the old mbuf+cluster for re-use.
1861 if (eop && len <= IXGBE_RX_COPY_LEN) {
1862 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1863 if (sendmp != NULL) {
1865 IXGBE_RX_COPY_ALIGN;
1866 ixgbe_bcopy(mp->m_data,
1867 sendmp->m_data, len);
1868 sendmp->m_len = len;
1870 rbuf->flags |= IXGBE_RX_COPY;
1873 if (sendmp == NULL) {
1874 rbuf->buf = rbuf->fmp = NULL;
1878 /* first desc of a non-ps chain */
1879 sendmp->m_flags |= M_PKTHDR;
1880 sendmp->m_pkthdr.len = mp->m_len;
1884 /* Pass the head pointer on */
1888 mp->m_next = nbuf->buf;
1889 } else { /* Sending this frame */
1890 sendmp->m_pkthdr.rcvif = ifp;
1892 /* capture data for AIM */
1893 rxr->bytes += sendmp->m_pkthdr.len;
1894 rxr->rx_bytes += sendmp->m_pkthdr.len;
1895 /* Process vlan info */
1896 if ((rxr->vtag_strip) &&
1897 (staterr & IXGBE_RXD_STAT_VP))
1898 vtag = le16toh(cur->wb.upper.vlan);
1900 sendmp->m_pkthdr.ether_vtag = vtag;
1901 sendmp->m_flags |= M_VLANTAG;
1903 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1904 ixgbe_rx_checksum(staterr, sendmp, ptype);
1907 * In case of multiqueue, we have RXCSUM.PCSD bit set
1908 * and never cleared. This means we have RSS hash
1909 * available to be used.
1911 if (adapter->num_queues > 1) {
1912 sendmp->m_pkthdr.flowid =
1913 le32toh(cur->wb.lower.hi_dword.rss);
1914 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1915 case IXGBE_RXDADV_RSSTYPE_IPV4:
1916 M_HASHTYPE_SET(sendmp,
1917 M_HASHTYPE_RSS_IPV4);
1919 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1920 M_HASHTYPE_SET(sendmp,
1921 M_HASHTYPE_RSS_TCP_IPV4);
1923 case IXGBE_RXDADV_RSSTYPE_IPV6:
1924 M_HASHTYPE_SET(sendmp,
1925 M_HASHTYPE_RSS_IPV6);
1927 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1928 M_HASHTYPE_SET(sendmp,
1929 M_HASHTYPE_RSS_TCP_IPV6);
1931 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1932 M_HASHTYPE_SET(sendmp,
1933 M_HASHTYPE_RSS_IPV6_EX);
1935 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1936 M_HASHTYPE_SET(sendmp,
1937 M_HASHTYPE_RSS_TCP_IPV6_EX);
1939 #if __FreeBSD_version > 1100000
1940 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1941 M_HASHTYPE_SET(sendmp,
1942 M_HASHTYPE_RSS_UDP_IPV4);
1944 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1945 M_HASHTYPE_SET(sendmp,
1946 M_HASHTYPE_RSS_UDP_IPV6);
1948 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1949 M_HASHTYPE_SET(sendmp,
1950 M_HASHTYPE_RSS_UDP_IPV6_EX);
1954 M_HASHTYPE_SET(sendmp,
1955 M_HASHTYPE_OPAQUE_HASH);
1958 sendmp->m_pkthdr.flowid = que->msix;
1959 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1963 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1964 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1966 /* Advance our pointers to the next descriptor. */
1967 if (++i == rxr->num_desc)
1970 /* Now send to the stack or do LRO */
1971 if (sendmp != NULL) {
1972 rxr->next_to_check = i;
1973 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1974 i = rxr->next_to_check;
1977 /* Every 8 descriptors we go to refresh mbufs */
1978 if (processed == 8) {
1979 ixgbe_refresh_mbufs(rxr, i);
1984 /* Refresh any remaining buf structs */
1985 if (ixgbe_rx_unrefreshed(rxr))
1986 ixgbe_refresh_mbufs(rxr, i);
1988 rxr->next_to_check = i;
1991 * Flush any outstanding LRO work
1993 tcp_lro_flush_all(lro);
1995 IXGBE_RX_UNLOCK(rxr);
1998 ** Still have cleaning to do?
2000 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2007 /*********************************************************************
2009 * Verify that the hardware indicated that the checksum is valid.
2010 * Inform the stack about the status of checksum so that stack
2011 * doesn't spend time verifying the checksum.
2013 *********************************************************************/
2015 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
2017 u16 status = (u16) staterr;
2018 u8 errors = (u8) (staterr >> 24);
2021 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2022 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2026 if (status & IXGBE_RXD_STAT_IPCS) {
2027 mp->m_pkthdr.csum_flags |= CSUM_L3_CALC;
2028 /* IP Checksum Good */
2029 if (!(errors & IXGBE_RXD_ERR_IPE))
2030 mp->m_pkthdr.csum_flags |= CSUM_L3_VALID;
2032 /* TCP/UDP/SCTP checksum */
2033 if (status & IXGBE_RXD_STAT_L4CS) {
2034 mp->m_pkthdr.csum_flags |= CSUM_L4_CALC;
2035 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2036 mp->m_pkthdr.csum_flags |= CSUM_L4_VALID;
2038 mp->m_pkthdr.csum_data = htons(0xffff);
2043 /********************************************************************
2044 * Manage DMA'able memory.
2045 *******************************************************************/
2047 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2051 *(bus_addr_t *) arg = segs->ds_addr;
2056 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2057 struct ixgbe_dma_alloc *dma, int mapflags)
2059 device_t dev = adapter->dev;
2062 r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2063 DBA_ALIGN, 0, /* alignment, bounds */
2064 BUS_SPACE_MAXADDR, /* lowaddr */
2065 BUS_SPACE_MAXADDR, /* highaddr */
2066 NULL, NULL, /* filter, filterarg */
2069 size, /* maxsegsize */
2070 BUS_DMA_ALLOCNOW, /* flags */
2071 NULL, /* lockfunc */
2072 NULL, /* lockfuncarg */
2075 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2079 r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2080 BUS_DMA_NOWAIT, &dma->dma_map);
2082 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2086 r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2090 mapflags | BUS_DMA_NOWAIT);
2092 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2096 dma->dma_size = size;
2099 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2101 bus_dma_tag_destroy(dma->dma_tag);
2103 dma->dma_tag = NULL;
2108 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2110 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2111 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2112 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2113 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2114 bus_dma_tag_destroy(dma->dma_tag);
2118 /*********************************************************************
2120 * Allocate memory for the transmit and receive rings, and then
2121 * the descriptors associated with each, called only once at attach.
2123 **********************************************************************/
2125 ixgbe_allocate_queues(struct adapter *adapter)
2127 device_t dev = adapter->dev;
2128 struct ix_queue *que;
2129 struct tx_ring *txr;
2130 struct rx_ring *rxr;
2131 int rsize, tsize, error = IXGBE_SUCCESS;
2132 int txconf = 0, rxconf = 0;
2134 enum ixgbe_iov_mode iov_mode;
2137 /* First allocate the top level queue structs */
2138 if (!(adapter->queues =
2139 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2140 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2141 device_printf(dev, "Unable to allocate queue memory\n");
2146 /* First allocate the TX ring struct memory */
2147 if (!(adapter->tx_rings =
2148 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2149 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2150 device_printf(dev, "Unable to allocate TX ring memory\n");
2155 /* Next allocate the RX */
2156 if (!(adapter->rx_rings =
2157 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2158 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2159 device_printf(dev, "Unable to allocate RX ring memory\n");
2164 /* For the ring itself */
2165 tsize = roundup2(adapter->num_tx_desc *
2166 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2169 iov_mode = ixgbe_get_iov_mode(adapter);
2170 adapter->pool = ixgbe_max_vfs(iov_mode);
2175 * Now set up the TX queues, txconf is needed to handle the
2176 * possibility that things fail midcourse and we need to
2177 * undo memory gracefully
2179 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2180 /* Set up some basics */
2181 txr = &adapter->tx_rings[i];
2182 txr->adapter = adapter;
2184 txr->me = ixgbe_pf_que_index(iov_mode, i);
2188 txr->num_desc = adapter->num_tx_desc;
2190 /* Initialize the TX side lock */
2191 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2192 device_get_nameunit(dev), txr->me);
2193 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2195 if (ixgbe_dma_malloc(adapter, tsize,
2196 &txr->txdma, BUS_DMA_NOWAIT)) {
2198 "Unable to allocate TX Descriptor memory\n");
2202 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2203 bzero((void *)txr->tx_base, tsize);
2205 /* Now allocate transmit buffers for the ring */
2206 if (ixgbe_allocate_transmit_buffers(txr)) {
2208 "Critical Failure setting up transmit buffers\n");
2212 #ifndef IXGBE_LEGACY_TX
2213 /* Allocate a buf ring */
2214 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2215 M_WAITOK, &txr->tx_mtx);
2216 if (txr->br == NULL) {
2218 "Critical Failure setting up buf ring\n");
2226 * Next the RX queues...
2228 rsize = roundup2(adapter->num_rx_desc *
2229 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2230 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2231 rxr = &adapter->rx_rings[i];
2232 /* Set up some basics */
2233 rxr->adapter = adapter;
2235 rxr->me = ixgbe_pf_que_index(iov_mode, i);
2239 rxr->num_desc = adapter->num_rx_desc;
2241 /* Initialize the RX side lock */
2242 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2243 device_get_nameunit(dev), rxr->me);
2244 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2246 if (ixgbe_dma_malloc(adapter, rsize,
2247 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2249 "Unable to allocate RxDescriptor memory\n");
2253 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2254 bzero((void *)rxr->rx_base, rsize);
2256 /* Allocate receive buffers for the ring*/
2257 if (ixgbe_allocate_receive_buffers(rxr)) {
2259 "Critical Failure setting up receive buffers\n");
2266 ** Finally set up the queue holding structs
2268 for (int i = 0; i < adapter->num_queues; i++) {
2269 que = &adapter->queues[i];
2270 que->adapter = adapter;
2272 que->txr = &adapter->tx_rings[i];
2273 que->rxr = &adapter->rx_rings[i];
2279 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2280 ixgbe_dma_free(adapter, &rxr->rxdma);
2282 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2283 ixgbe_dma_free(adapter, &txr->txdma);
2284 free(adapter->rx_rings, M_DEVBUF);
2286 free(adapter->tx_rings, M_DEVBUF);
2288 free(adapter->queues, M_DEVBUF);