1 /******************************************************************************
3 Copyright (c) 2001-2015, Intel Corporation
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
9 1. Redistributions of source code must retain the above copyright notice,
10 this list of conditions and the following disclaimer.
12 2. Redistributions in binary form must reproduce the above copyright
13 notice, this list of conditions and the following disclaimer in the
14 documentation and/or other materials provided with the distribution.
16 3. Neither the name of the Intel Corporation nor the names of its
17 contributors may be used to endorse or promote products derived from
18 this software without specific prior written permission.
20 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
24 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 POSSIBILITY OF SUCH DAMAGE.
32 ******************************************************************************/
36 #ifndef IXGBE_STANDALONE_BUILD
38 #include "opt_inet6.h"
45 #include <net/rss_config.h>
46 #include <netinet/in_rss.h>
50 #include <net/netmap.h>
51 #include <sys/selinfo.h>
52 #include <dev/netmap/netmap_kern.h>
54 extern int ix_crcstrip;
59 ** this feature only works with
60 ** IPv4, and only on 82599 and later.
61 ** Also this will cause IP forwarding to
62 ** fail and that can't be controlled by
63 ** the stack as LRO can. For all these
64 ** reasons I've deemed it best to leave
65 ** this off and not bother with a tuneable
66 ** interface, this would need to be compiled
69 static bool ixgbe_rsc_enable = FALSE;
73 ** For Flow Director: this is the
74 ** number of TX packets we sample
75 ** for the filter pool, this means
76 ** every 20th packet will be probed.
78 ** This feature can be disabled by
81 static int atr_sample_rate = 20;
84 /* Shared PCI config read/write */
86 ixgbe_read_pci_cfg(struct ixgbe_hw *hw, u32 reg)
90 value = pci_read_config(((struct ixgbe_osdep *)hw->back)->dev,
97 ixgbe_write_pci_cfg(struct ixgbe_hw *hw, u32 reg, u16 value)
99 pci_write_config(((struct ixgbe_osdep *)hw->back)->dev,
105 /*********************************************************************
106 * Local Function prototypes
107 *********************************************************************/
108 static void ixgbe_setup_transmit_ring(struct tx_ring *);
109 static void ixgbe_free_transmit_buffers(struct tx_ring *);
110 static int ixgbe_setup_receive_ring(struct rx_ring *);
111 static void ixgbe_free_receive_buffers(struct rx_ring *);
113 static void ixgbe_rx_checksum(u32, struct mbuf *, u32);
114 static void ixgbe_refresh_mbufs(struct rx_ring *, int);
115 static int ixgbe_xmit(struct tx_ring *, struct mbuf **);
116 static int ixgbe_tx_ctx_setup(struct tx_ring *,
117 struct mbuf *, u32 *, u32 *);
118 static int ixgbe_tso_setup(struct tx_ring *,
119 struct mbuf *, u32 *, u32 *);
121 static void ixgbe_atr(struct tx_ring *, struct mbuf *);
123 static __inline void ixgbe_rx_discard(struct rx_ring *, int);
124 static __inline void ixgbe_rx_input(struct rx_ring *, struct ifnet *,
127 #ifdef IXGBE_LEGACY_TX
128 /*********************************************************************
129 * Transmit entry point
131 * ixgbe_start is called by the stack to initiate a transmit.
132 * The driver will remain in this routine as long as there are
133 * packets to transmit and transmit resources are available.
134 * In case resources are not available stack is notified and
135 * the packet is requeued.
136 **********************************************************************/
139 ixgbe_start_locked(struct tx_ring *txr, struct ifnet * ifp)
142 struct adapter *adapter = txr->adapter;
144 IXGBE_TX_LOCK_ASSERT(txr);
146 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
148 if (!adapter->link_active)
151 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) {
152 if (txr->tx_avail <= IXGBE_QUEUE_MIN_FREE)
155 IFQ_DRV_DEQUEUE(&ifp->if_snd, m_head);
159 if (ixgbe_xmit(txr, &m_head)) {
161 IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
164 /* Send a copy of the frame to the BPF listener */
165 ETHER_BPF_MTAP(ifp, m_head);
171 * Legacy TX start - called by the stack, this
172 * always uses the first tx ring, and should
173 * not be used with multiqueue tx enabled.
176 ixgbe_start(struct ifnet *ifp)
178 struct adapter *adapter = ifp->if_softc;
179 struct tx_ring *txr = adapter->tx_rings;
181 if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
183 ixgbe_start_locked(txr, ifp);
184 IXGBE_TX_UNLOCK(txr);
189 #else /* ! IXGBE_LEGACY_TX */
192 ** Multiqueue Transmit driver
196 ixgbe_mq_start(struct ifnet *ifp, struct mbuf *m)
198 struct adapter *adapter = ifp->if_softc;
199 struct ix_queue *que;
207 * When doing RSS, map it to the same outbound queue
208 * as the incoming flow would be mapped to.
210 * If everything is setup correctly, it should be the
211 * same bucket that the current CPU we're on is.
213 #if __FreeBSD_version < 1100054
214 if (m->m_flags & M_FLOWID) {
216 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
219 if (rss_hash2bucket(m->m_pkthdr.flowid,
220 M_HASHTYPE_GET(m), &bucket_id) == 0)
221 /* TODO: spit out something if bucket_id > num_queues? */
222 i = bucket_id % adapter->num_queues;
225 i = m->m_pkthdr.flowid % adapter->num_queues;
227 i = curcpu % adapter->num_queues;
229 /* Check for a hung queue and pick alternative */
230 if (((1 << i) & adapter->active_queues) == 0)
231 i = ffsl(adapter->active_queues);
233 txr = &adapter->tx_rings[i];
234 que = &adapter->queues[i];
236 err = drbr_enqueue(ifp, txr->br, m);
239 if (IXGBE_TX_TRYLOCK(txr)) {
240 ixgbe_mq_start_locked(ifp, txr);
241 IXGBE_TX_UNLOCK(txr);
243 taskqueue_enqueue(que->tq, &txr->txq_task);
249 ixgbe_mq_start_locked(struct ifnet *ifp, struct tx_ring *txr)
251 struct adapter *adapter = txr->adapter;
253 int enqueued = 0, err = 0;
255 if (((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) ||
256 adapter->link_active == 0)
259 /* Process the queue */
260 #if __FreeBSD_version < 901504
261 next = drbr_dequeue(ifp, txr->br);
262 while (next != NULL) {
263 if ((err = ixgbe_xmit(txr, &next)) != 0) {
265 err = drbr_enqueue(ifp, txr->br, next);
267 while ((next = drbr_peek(ifp, txr->br)) != NULL) {
268 if ((err = ixgbe_xmit(txr, &next)) != 0) {
270 drbr_advance(ifp, txr->br);
272 drbr_putback(ifp, txr->br, next);
277 #if __FreeBSD_version >= 901504
278 drbr_advance(ifp, txr->br);
281 #if 0 // this is VF-only
282 #if __FreeBSD_version >= 1100036
284 * Since we're looking at the tx ring, we can check
285 * to see if we're a VF by examing our tail register
288 if (txr->tail < IXGBE_TDT(0) && next->m_flags & M_MCAST)
289 if_inc_counter(ifp, IFCOUNTER_OMCASTS, 1);
292 /* Send a copy of the frame to the BPF listener */
293 ETHER_BPF_MTAP(ifp, next);
294 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
296 #if __FreeBSD_version < 901504
297 next = drbr_dequeue(ifp, txr->br);
301 if (txr->tx_avail < IXGBE_TX_CLEANUP_THRESHOLD)
308 * Called from a taskqueue to drain queued transmit packets.
311 ixgbe_deferred_mq_start(void *arg, int pending)
313 struct tx_ring *txr = arg;
314 struct adapter *adapter = txr->adapter;
315 struct ifnet *ifp = adapter->ifp;
318 if (!drbr_empty(ifp, txr->br))
319 ixgbe_mq_start_locked(ifp, txr);
320 IXGBE_TX_UNLOCK(txr);
324 * Flush all ring buffers
327 ixgbe_qflush(struct ifnet *ifp)
329 struct adapter *adapter = ifp->if_softc;
330 struct tx_ring *txr = adapter->tx_rings;
333 for (int i = 0; i < adapter->num_queues; i++, txr++) {
335 while ((m = buf_ring_dequeue_sc(txr->br)) != NULL)
337 IXGBE_TX_UNLOCK(txr);
341 #endif /* IXGBE_LEGACY_TX */
344 /*********************************************************************
346 * This routine maps the mbufs to tx descriptors, allowing the
347 * TX engine to transmit the packets.
348 * - return 0 on success, positive on failure
350 **********************************************************************/
353 ixgbe_xmit(struct tx_ring *txr, struct mbuf **m_headp)
355 struct adapter *adapter = txr->adapter;
356 u32 olinfo_status = 0, cmd_type_len;
357 int i, j, error, nsegs;
361 bus_dma_segment_t segs[adapter->num_segs];
363 struct ixgbe_tx_buf *txbuf;
364 union ixgbe_adv_tx_desc *txd = NULL;
368 /* Basic descriptor defines */
369 cmd_type_len = (IXGBE_ADVTXD_DTYP_DATA |
370 IXGBE_ADVTXD_DCMD_IFCS | IXGBE_ADVTXD_DCMD_DEXT);
372 if (m_head->m_flags & M_VLANTAG)
373 cmd_type_len |= IXGBE_ADVTXD_DCMD_VLE;
376 * Important to capture the first descriptor
377 * used because it will contain the index of
378 * the one we tell the hardware to report back
380 first = txr->next_avail_desc;
381 txbuf = &txr->tx_buffers[first];
385 * Map the packet for DMA.
388 error = bus_dmamap_load_mbuf_sg(txr->txtag, map,
389 *m_headp, segs, &nsegs, BUS_DMA_NOWAIT);
391 if (__predict_false(error)) {
396 /* Try it again? - one try */
400 * XXX: m_defrag will choke on
401 * non-MCLBYTES-sized clusters
403 m = m_defrag(*m_headp, M_NOWAIT);
405 adapter->mbuf_defrag_failed++;
415 txr->no_tx_dma_setup++;
418 txr->no_tx_dma_setup++;
425 /* Make certain there are enough descriptors */
426 if (nsegs > txr->tx_avail - 2) {
427 txr->no_desc_avail++;
428 bus_dmamap_unload(txr->txtag, map);
434 * Set up the appropriate offload context
435 * this will consume the first descriptor
437 error = ixgbe_tx_ctx_setup(txr, m_head, &cmd_type_len, &olinfo_status);
438 if (__predict_false(error)) {
439 if (error == ENOBUFS)
445 /* Do the flow director magic */
446 if ((txr->atr_sample) && (!adapter->fdir_reinit)) {
448 if (txr->atr_count >= atr_sample_rate) {
449 ixgbe_atr(txr, m_head);
455 i = txr->next_avail_desc;
456 for (j = 0; j < nsegs; j++) {
460 txbuf = &txr->tx_buffers[i];
461 txd = &txr->tx_base[i];
462 seglen = segs[j].ds_len;
463 segaddr = htole64(segs[j].ds_addr);
465 txd->read.buffer_addr = segaddr;
466 txd->read.cmd_type_len = htole32(txr->txd_cmd |
467 cmd_type_len |seglen);
468 txd->read.olinfo_status = htole32(olinfo_status);
470 if (++i == txr->num_desc)
474 txd->read.cmd_type_len |=
475 htole32(IXGBE_TXD_CMD_EOP | IXGBE_TXD_CMD_RS);
476 txr->tx_avail -= nsegs;
477 txr->next_avail_desc = i;
479 txbuf->m_head = m_head;
481 * Here we swap the map so the last descriptor,
482 * which gets the completion interrupt has the
483 * real map, and the first descriptor gets the
484 * unused map from this descriptor.
486 txr->tx_buffers[first].map = txbuf->map;
488 bus_dmamap_sync(txr->txtag, map, BUS_DMASYNC_PREWRITE);
490 /* Set the EOP descriptor that will be marked done */
491 txbuf = &txr->tx_buffers[first];
494 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
495 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
497 * Advance the Transmit Descriptor Tail (Tdt), this tells the
498 * hardware that this frame is available to transmit.
500 ++txr->total_packets;
501 IXGBE_WRITE_REG(&adapter->hw, txr->tail, i);
503 /* Mark queue as having work */
511 /*********************************************************************
513 * Allocate memory for tx_buffer structures. The tx_buffer stores all
514 * the information needed to transmit a packet on the wire. This is
515 * called only once at attach, setup is done every reset.
517 **********************************************************************/
519 ixgbe_allocate_transmit_buffers(struct tx_ring *txr)
521 struct adapter *adapter = txr->adapter;
522 device_t dev = adapter->dev;
523 struct ixgbe_tx_buf *txbuf;
527 * Setup DMA descriptor areas.
529 if ((error = bus_dma_tag_create(
530 bus_get_dma_tag(adapter->dev), /* parent */
531 1, 0, /* alignment, bounds */
532 BUS_SPACE_MAXADDR, /* lowaddr */
533 BUS_SPACE_MAXADDR, /* highaddr */
534 NULL, NULL, /* filter, filterarg */
535 IXGBE_TSO_SIZE, /* maxsize */
536 adapter->num_segs, /* nsegments */
537 PAGE_SIZE, /* maxsegsize */
540 NULL, /* lockfuncarg */
542 device_printf(dev,"Unable to allocate TX DMA tag\n");
546 if (!(txr->tx_buffers =
547 (struct ixgbe_tx_buf *) malloc(sizeof(struct ixgbe_tx_buf) *
548 adapter->num_tx_desc, M_DEVBUF, M_NOWAIT | M_ZERO))) {
549 device_printf(dev, "Unable to allocate tx_buffer memory\n");
554 /* Create the descriptor buffer dma maps */
555 txbuf = txr->tx_buffers;
556 for (i = 0; i < adapter->num_tx_desc; i++, txbuf++) {
557 error = bus_dmamap_create(txr->txtag, 0, &txbuf->map);
559 device_printf(dev, "Unable to create TX DMA map\n");
566 /* We free all, it handles case where we are in the middle */
567 ixgbe_free_transmit_structures(adapter);
571 /*********************************************************************
573 * Initialize a transmit ring.
575 **********************************************************************/
577 ixgbe_setup_transmit_ring(struct tx_ring *txr)
579 struct adapter *adapter = txr->adapter;
580 struct ixgbe_tx_buf *txbuf;
582 struct netmap_adapter *na = NA(adapter->ifp);
583 struct netmap_slot *slot;
584 #endif /* DEV_NETMAP */
586 /* Clear the old ring contents */
590 * (under lock): if in netmap mode, do some consistency
591 * checks and set slot to entry 0 of the netmap ring.
593 slot = netmap_reset(na, NR_TX, txr->me, 0);
594 #endif /* DEV_NETMAP */
595 bzero((void *)txr->tx_base,
596 (sizeof(union ixgbe_adv_tx_desc)) * adapter->num_tx_desc);
598 txr->next_avail_desc = 0;
599 txr->next_to_clean = 0;
601 /* Free any existing tx buffers. */
602 txbuf = txr->tx_buffers;
603 for (int i = 0; i < txr->num_desc; i++, txbuf++) {
604 if (txbuf->m_head != NULL) {
605 bus_dmamap_sync(txr->txtag, txbuf->map,
606 BUS_DMASYNC_POSTWRITE);
607 bus_dmamap_unload(txr->txtag, txbuf->map);
608 m_freem(txbuf->m_head);
609 txbuf->m_head = NULL;
613 * In netmap mode, set the map for the packet buffer.
614 * NOTE: Some drivers (not this one) also need to set
615 * the physical buffer address in the NIC ring.
616 * Slots in the netmap ring (indexed by "si") are
617 * kring->nkr_hwofs positions "ahead" wrt the
618 * corresponding slot in the NIC ring. In some drivers
619 * (not here) nkr_hwofs can be negative. Function
620 * netmap_idx_n2k() handles wraparounds properly.
623 int si = netmap_idx_n2k(&na->tx_rings[txr->me], i);
624 netmap_load_map(na, txr->txtag,
625 txbuf->map, NMB(na, slot + si));
627 #endif /* DEV_NETMAP */
628 /* Clear the EOP descriptor pointer */
633 /* Set the rate at which we sample packets */
634 if (adapter->hw.mac.type != ixgbe_mac_82598EB)
635 txr->atr_sample = atr_sample_rate;
638 /* Set number of descriptors available */
639 txr->tx_avail = adapter->num_tx_desc;
641 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
642 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
643 IXGBE_TX_UNLOCK(txr);
646 /*********************************************************************
648 * Initialize all transmit rings.
650 **********************************************************************/
652 ixgbe_setup_transmit_structures(struct adapter *adapter)
654 struct tx_ring *txr = adapter->tx_rings;
656 for (int i = 0; i < adapter->num_queues; i++, txr++)
657 ixgbe_setup_transmit_ring(txr);
662 /*********************************************************************
664 * Free all transmit rings.
666 **********************************************************************/
668 ixgbe_free_transmit_structures(struct adapter *adapter)
670 struct tx_ring *txr = adapter->tx_rings;
672 for (int i = 0; i < adapter->num_queues; i++, txr++) {
674 ixgbe_free_transmit_buffers(txr);
675 ixgbe_dma_free(adapter, &txr->txdma);
676 IXGBE_TX_UNLOCK(txr);
677 IXGBE_TX_LOCK_DESTROY(txr);
679 free(adapter->tx_rings, M_DEVBUF);
682 /*********************************************************************
684 * Free transmit ring related data structures.
686 **********************************************************************/
688 ixgbe_free_transmit_buffers(struct tx_ring *txr)
690 struct adapter *adapter = txr->adapter;
691 struct ixgbe_tx_buf *tx_buffer;
694 INIT_DEBUGOUT("ixgbe_free_transmit_ring: begin");
696 if (txr->tx_buffers == NULL)
699 tx_buffer = txr->tx_buffers;
700 for (i = 0; i < adapter->num_tx_desc; i++, tx_buffer++) {
701 if (tx_buffer->m_head != NULL) {
702 bus_dmamap_sync(txr->txtag, tx_buffer->map,
703 BUS_DMASYNC_POSTWRITE);
704 bus_dmamap_unload(txr->txtag,
706 m_freem(tx_buffer->m_head);
707 tx_buffer->m_head = NULL;
708 if (tx_buffer->map != NULL) {
709 bus_dmamap_destroy(txr->txtag,
711 tx_buffer->map = NULL;
713 } else if (tx_buffer->map != NULL) {
714 bus_dmamap_unload(txr->txtag,
716 bus_dmamap_destroy(txr->txtag,
718 tx_buffer->map = NULL;
721 #ifdef IXGBE_LEGACY_TX
723 buf_ring_free(txr->br, M_DEVBUF);
725 if (txr->tx_buffers != NULL) {
726 free(txr->tx_buffers, M_DEVBUF);
727 txr->tx_buffers = NULL;
729 if (txr->txtag != NULL) {
730 bus_dma_tag_destroy(txr->txtag);
736 /*********************************************************************
738 * Advanced Context Descriptor setup for VLAN, CSUM or TSO
740 **********************************************************************/
743 ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp,
744 u32 *cmd_type_len, u32 *olinfo_status)
746 struct adapter *adapter = txr->adapter;
747 struct ixgbe_adv_tx_context_desc *TXD;
748 struct ether_vlan_header *eh;
751 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
752 int ehdrlen, ip_hlen = 0;
756 int ctxd = txr->next_avail_desc;
759 /* First check if TSO is to be used */
760 if (mp->m_pkthdr.csum_flags & CSUM_TSO)
761 return (ixgbe_tso_setup(txr, mp, cmd_type_len, olinfo_status));
763 if ((mp->m_pkthdr.csum_flags & CSUM_OFFLOAD) == 0)
766 /* Indicate the whole packet as payload when not doing TSO */
767 *olinfo_status |= mp->m_pkthdr.len << IXGBE_ADVTXD_PAYLEN_SHIFT;
769 /* Now ready a context descriptor */
770 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
773 ** In advanced descriptors the vlan tag must
774 ** be placed into the context descriptor. Hence
775 ** we need to make one even if not doing offloads.
777 if (mp->m_flags & M_VLANTAG) {
778 vtag = htole16(mp->m_pkthdr.ether_vtag);
779 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
780 } else if (!IXGBE_IS_X550VF(adapter) && (offload == FALSE))
784 * Determine where frame payload starts.
785 * Jump over vlan headers if already present,
786 * helpful for QinQ too.
788 eh = mtod(mp, struct ether_vlan_header *);
789 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
790 etype = ntohs(eh->evl_proto);
791 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
793 etype = ntohs(eh->evl_encap_proto);
794 ehdrlen = ETHER_HDR_LEN;
797 /* Set the ether header length */
798 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
800 if (offload == FALSE)
805 ip = (struct ip *)(mp->m_data + ehdrlen);
806 ip_hlen = ip->ip_hl << 2;
808 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
811 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
812 ip_hlen = sizeof(struct ip6_hdr);
813 /* XXX-BZ this will go badly in case of ext hdrs. */
814 ipproto = ip6->ip6_nxt;
815 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
822 vlan_macip_lens |= ip_hlen;
826 if (mp->m_pkthdr.csum_flags & CSUM_TCP)
827 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
831 if (mp->m_pkthdr.csum_flags & CSUM_UDP)
832 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_UDP;
835 #if __FreeBSD_version >= 800000
837 if (mp->m_pkthdr.csum_flags & CSUM_SCTP)
838 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_SCTP;
846 if (offload) /* For the TX descriptor setup */
847 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
850 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
852 /* Now copy bits into descriptor */
853 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
854 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
855 TXD->seqnum_seed = htole32(0);
856 TXD->mss_l4len_idx = htole32(0);
858 /* We've consumed the first desc, adjust counters */
859 if (++ctxd == txr->num_desc)
861 txr->next_avail_desc = ctxd;
867 /**********************************************************************
869 * Setup work for hardware segmentation offload (TSO) on
870 * adapters using advanced tx descriptors
872 **********************************************************************/
874 ixgbe_tso_setup(struct tx_ring *txr, struct mbuf *mp,
875 u32 *cmd_type_len, u32 *olinfo_status)
877 struct ixgbe_adv_tx_context_desc *TXD;
878 u32 vlan_macip_lens = 0, type_tucmd_mlhl = 0;
879 u32 mss_l4len_idx = 0, paylen;
880 u16 vtag = 0, eh_type;
881 int ctxd, ehdrlen, ip_hlen, tcp_hlen;
882 struct ether_vlan_header *eh;
893 * Determine where frame payload starts.
894 * Jump over vlan headers if already present
896 eh = mtod(mp, struct ether_vlan_header *);
897 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
898 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
899 eh_type = eh->evl_proto;
901 ehdrlen = ETHER_HDR_LEN;
902 eh_type = eh->evl_encap_proto;
905 switch (ntohs(eh_type)) {
908 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
909 /* XXX-BZ For now we do not pretend to support ext. hdrs. */
910 if (ip6->ip6_nxt != IPPROTO_TCP)
912 ip_hlen = sizeof(struct ip6_hdr);
913 ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen);
914 th = (struct tcphdr *)((caddr_t)ip6 + ip_hlen);
915 th->th_sum = in6_cksum_pseudo(ip6, 0, IPPROTO_TCP, 0);
916 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
921 ip = (struct ip *)(mp->m_data + ehdrlen);
922 if (ip->ip_p != IPPROTO_TCP)
925 ip_hlen = ip->ip_hl << 2;
926 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
927 th->th_sum = in_pseudo(ip->ip_src.s_addr,
928 ip->ip_dst.s_addr, htons(IPPROTO_TCP));
929 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
930 /* Tell transmit desc to also do IPv4 checksum. */
931 *olinfo_status |= IXGBE_TXD_POPTS_IXSM << 8;
935 panic("%s: CSUM_TSO but no supported IP version (0x%04x)",
936 __func__, ntohs(eh_type));
940 ctxd = txr->next_avail_desc;
941 TXD = (struct ixgbe_adv_tx_context_desc *) &txr->tx_base[ctxd];
943 tcp_hlen = th->th_off << 2;
945 /* This is used in the transmit desc in encap */
946 paylen = mp->m_pkthdr.len - ehdrlen - ip_hlen - tcp_hlen;
948 /* VLAN MACLEN IPLEN */
949 if (mp->m_flags & M_VLANTAG) {
950 vtag = htole16(mp->m_pkthdr.ether_vtag);
951 vlan_macip_lens |= (vtag << IXGBE_ADVTXD_VLAN_SHIFT);
954 vlan_macip_lens |= ehdrlen << IXGBE_ADVTXD_MACLEN_SHIFT;
955 vlan_macip_lens |= ip_hlen;
956 TXD->vlan_macip_lens = htole32(vlan_macip_lens);
958 /* ADV DTYPE TUCMD */
959 type_tucmd_mlhl |= IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
960 type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
961 TXD->type_tucmd_mlhl = htole32(type_tucmd_mlhl);
964 mss_l4len_idx |= (mp->m_pkthdr.tso_segsz << IXGBE_ADVTXD_MSS_SHIFT);
965 mss_l4len_idx |= (tcp_hlen << IXGBE_ADVTXD_L4LEN_SHIFT);
966 TXD->mss_l4len_idx = htole32(mss_l4len_idx);
968 TXD->seqnum_seed = htole32(0);
970 if (++ctxd == txr->num_desc)
974 txr->next_avail_desc = ctxd;
975 *cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
976 *olinfo_status |= IXGBE_TXD_POPTS_TXSM << 8;
977 *olinfo_status |= paylen << IXGBE_ADVTXD_PAYLEN_SHIFT;
983 /**********************************************************************
985 * Examine each tx_buffer in the used queue. If the hardware is done
986 * processing the packet then free associated resources. The
987 * tx_buffer is put back on the free queue.
989 **********************************************************************/
991 ixgbe_txeof(struct tx_ring *txr)
994 struct adapter *adapter = txr->adapter;
995 struct ifnet *ifp = adapter->ifp;
997 u32 work, processed = 0;
998 u16 limit = txr->process_limit;
999 struct ixgbe_tx_buf *buf;
1000 union ixgbe_adv_tx_desc *txd;
1002 mtx_assert(&txr->tx_mtx, MA_OWNED);
1005 if (ifp->if_capenable & IFCAP_NETMAP) {
1006 struct netmap_adapter *na = NA(ifp);
1007 struct netmap_kring *kring = &na->tx_rings[txr->me];
1009 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1010 BUS_DMASYNC_POSTREAD);
1012 * In netmap mode, all the work is done in the context
1013 * of the client thread. Interrupt handlers only wake up
1014 * clients, which may be sleeping on individual rings
1015 * or on a global resource for all rings.
1016 * To implement tx interrupt mitigation, we wake up the client
1017 * thread roughly every half ring, even if the NIC interrupts
1018 * more frequently. This is implemented as follows:
1019 * - ixgbe_txsync() sets kring->nr_kflags with the index of
1020 * the slot that should wake up the thread (nkr_num_slots
1021 * means the user thread should not be woken up);
1022 * - the driver ignores tx interrupts unless netmap_mitigate=0
1023 * or the slot has the DD bit set.
1025 if (!netmap_mitigate ||
1026 (kring->nr_kflags < kring->nkr_num_slots &&
1027 txd[kring->nr_kflags].wb.status & IXGBE_TXD_STAT_DD)) {
1028 netmap_tx_irq(ifp, txr->me);
1032 #endif /* DEV_NETMAP */
1034 if (txr->tx_avail == txr->num_desc) {
1039 /* Get work starting point */
1040 work = txr->next_to_clean;
1041 buf = &txr->tx_buffers[work];
1042 txd = &txr->tx_base[work];
1043 work -= txr->num_desc; /* The distance to ring end */
1044 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1045 BUS_DMASYNC_POSTREAD);
1048 union ixgbe_adv_tx_desc *eop= buf->eop;
1049 if (eop == NULL) /* No work */
1052 if ((eop->wb.status & IXGBE_TXD_STAT_DD) == 0)
1053 break; /* I/O not complete */
1057 buf->m_head->m_pkthdr.len;
1058 bus_dmamap_sync(txr->txtag,
1060 BUS_DMASYNC_POSTWRITE);
1061 bus_dmamap_unload(txr->txtag,
1063 m_freem(buf->m_head);
1069 /* We clean the range if multi segment */
1070 while (txd != eop) {
1074 /* wrap the ring? */
1075 if (__predict_false(!work)) {
1076 work -= txr->num_desc;
1077 buf = txr->tx_buffers;
1082 buf->m_head->m_pkthdr.len;
1083 bus_dmamap_sync(txr->txtag,
1085 BUS_DMASYNC_POSTWRITE);
1086 bus_dmamap_unload(txr->txtag,
1088 m_freem(buf->m_head);
1098 /* Try the next packet */
1102 /* reset with a wrap */
1103 if (__predict_false(!work)) {
1104 work -= txr->num_desc;
1105 buf = txr->tx_buffers;
1109 } while (__predict_true(--limit));
1111 bus_dmamap_sync(txr->txdma.dma_tag, txr->txdma.dma_map,
1112 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1114 work += txr->num_desc;
1115 txr->next_to_clean = work;
1118 ** Queue Hang detection, we know there's
1119 ** work outstanding or the first return
1120 ** would have been taken, so increment busy
1121 ** if nothing managed to get cleaned, then
1122 ** in local_timer it will be checked and
1123 ** marked as HUNG if it exceeds a MAX attempt.
1125 if ((processed == 0) && (txr->busy != IXGBE_QUEUE_HUNG))
1128 ** If anything gets cleaned we reset state to 1,
1129 ** note this will turn off HUNG if its set.
1134 if (txr->tx_avail == txr->num_desc)
1143 ** This routine parses packet headers so that Flow
1144 ** Director can make a hashed filter table entry
1145 ** allowing traffic flows to be identified and kept
1146 ** on the same cpu. This would be a performance
1147 ** hit, but we only do it at IXGBE_FDIR_RATE of
1151 ixgbe_atr(struct tx_ring *txr, struct mbuf *mp)
1153 struct adapter *adapter = txr->adapter;
1154 struct ix_queue *que;
1158 struct ether_vlan_header *eh;
1159 union ixgbe_atr_hash_dword input = {.dword = 0};
1160 union ixgbe_atr_hash_dword common = {.dword = 0};
1161 int ehdrlen, ip_hlen;
1164 eh = mtod(mp, struct ether_vlan_header *);
1165 if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1166 ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1167 etype = eh->evl_proto;
1169 ehdrlen = ETHER_HDR_LEN;
1170 etype = eh->evl_encap_proto;
1173 /* Only handling IPv4 */
1174 if (etype != htons(ETHERTYPE_IP))
1177 ip = (struct ip *)(mp->m_data + ehdrlen);
1178 ip_hlen = ip->ip_hl << 2;
1180 /* check if we're UDP or TCP */
1183 th = (struct tcphdr *)((caddr_t)ip + ip_hlen);
1184 /* src and dst are inverted */
1185 common.port.dst ^= th->th_sport;
1186 common.port.src ^= th->th_dport;
1187 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_TCPV4;
1190 uh = (struct udphdr *)((caddr_t)ip + ip_hlen);
1191 /* src and dst are inverted */
1192 common.port.dst ^= uh->uh_sport;
1193 common.port.src ^= uh->uh_dport;
1194 input.formatted.flow_type ^= IXGBE_ATR_FLOW_TYPE_UDPV4;
1200 input.formatted.vlan_id = htobe16(mp->m_pkthdr.ether_vtag);
1201 if (mp->m_pkthdr.ether_vtag)
1202 common.flex_bytes ^= htons(ETHERTYPE_VLAN);
1204 common.flex_bytes ^= etype;
1205 common.ip ^= ip->ip_src.s_addr ^ ip->ip_dst.s_addr;
1207 que = &adapter->queues[txr->me];
1209 ** This assumes the Rx queue and Tx
1210 ** queue are bound to the same CPU
1212 ixgbe_fdir_add_signature_filter_82599(&adapter->hw,
1213 input, common, que->msix);
1215 #endif /* IXGBE_FDIR */
1218 ** Used to detect a descriptor that has
1219 ** been merged by Hardware RSC.
1222 ixgbe_rsc_count(union ixgbe_adv_rx_desc *rx)
1224 return (le32toh(rx->wb.lower.lo_dword.data) &
1225 IXGBE_RXDADV_RSCCNT_MASK) >> IXGBE_RXDADV_RSCCNT_SHIFT;
1228 /*********************************************************************
1230 * Initialize Hardware RSC (LRO) feature on 82599
1231 * for an RX ring, this is toggled by the LRO capability
1232 * even though it is transparent to the stack.
1234 * NOTE: since this HW feature only works with IPV4 and
1235 * our testing has shown soft LRO to be as effective
1236 * I have decided to disable this by default.
1238 **********************************************************************/
1240 ixgbe_setup_hw_rsc(struct rx_ring *rxr)
1242 struct adapter *adapter = rxr->adapter;
1243 struct ixgbe_hw *hw = &adapter->hw;
1244 u32 rscctrl, rdrxctl;
1246 /* If turning LRO/RSC off we need to disable it */
1247 if ((adapter->ifp->if_capenable & IFCAP_LRO) == 0) {
1248 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1249 rscctrl &= ~IXGBE_RSCCTL_RSCEN;
1253 rdrxctl = IXGBE_READ_REG(hw, IXGBE_RDRXCTL);
1254 rdrxctl &= ~IXGBE_RDRXCTL_RSCFRSTSIZE;
1255 #ifdef DEV_NETMAP /* crcstrip is optional in netmap */
1256 if (adapter->ifp->if_capenable & IFCAP_NETMAP && !ix_crcstrip)
1257 #endif /* DEV_NETMAP */
1258 rdrxctl |= IXGBE_RDRXCTL_CRCSTRIP;
1259 rdrxctl |= IXGBE_RDRXCTL_RSCACKC;
1260 IXGBE_WRITE_REG(hw, IXGBE_RDRXCTL, rdrxctl);
1262 rscctrl = IXGBE_READ_REG(hw, IXGBE_RSCCTL(rxr->me));
1263 rscctrl |= IXGBE_RSCCTL_RSCEN;
1265 ** Limit the total number of descriptors that
1266 ** can be combined, so it does not exceed 64K
1268 if (rxr->mbuf_sz == MCLBYTES)
1269 rscctrl |= IXGBE_RSCCTL_MAXDESC_16;
1270 else if (rxr->mbuf_sz == MJUMPAGESIZE)
1271 rscctrl |= IXGBE_RSCCTL_MAXDESC_8;
1272 else if (rxr->mbuf_sz == MJUM9BYTES)
1273 rscctrl |= IXGBE_RSCCTL_MAXDESC_4;
1274 else /* Using 16K cluster */
1275 rscctrl |= IXGBE_RSCCTL_MAXDESC_1;
1277 IXGBE_WRITE_REG(hw, IXGBE_RSCCTL(rxr->me), rscctrl);
1279 /* Enable TCP header recognition */
1280 IXGBE_WRITE_REG(hw, IXGBE_PSRTYPE(0),
1281 (IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)) |
1282 IXGBE_PSRTYPE_TCPHDR));
1284 /* Disable RSC for ACK packets */
1285 IXGBE_WRITE_REG(hw, IXGBE_RSCDBU,
1286 (IXGBE_RSCDBU_RSCACKDIS | IXGBE_READ_REG(hw, IXGBE_RSCDBU)));
1290 /*********************************************************************
1292 * Refresh mbuf buffers for RX descriptor rings
1293 * - now keeps its own state so discards due to resource
1294 * exhaustion are unnecessary, if an mbuf cannot be obtained
1295 * it just returns, keeping its placeholder, thus it can simply
1296 * be recalled to try again.
1298 **********************************************************************/
1300 ixgbe_refresh_mbufs(struct rx_ring *rxr, int limit)
1302 struct adapter *adapter = rxr->adapter;
1303 bus_dma_segment_t seg[1];
1304 struct ixgbe_rx_buf *rxbuf;
1306 int i, j, nsegs, error;
1307 bool refreshed = FALSE;
1309 i = j = rxr->next_to_refresh;
1310 /* Control the loop with one beyond */
1311 if (++j == rxr->num_desc)
1314 while (j != limit) {
1315 rxbuf = &rxr->rx_buffers[i];
1316 if (rxbuf->buf == NULL) {
1317 mp = m_getjcl(M_NOWAIT, MT_DATA,
1318 M_PKTHDR, rxr->mbuf_sz);
1321 if (adapter->max_frame_size <= (MCLBYTES - ETHER_ALIGN))
1322 m_adj(mp, ETHER_ALIGN);
1326 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1328 /* If we're dealing with an mbuf that was copied rather
1329 * than replaced, there's no need to go through busdma.
1331 if ((rxbuf->flags & IXGBE_RX_COPY) == 0) {
1332 /* Get the memory mapping */
1333 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1334 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1335 rxbuf->pmap, mp, seg, &nsegs, BUS_DMA_NOWAIT);
1337 printf("Refresh mbufs: payload dmamap load"
1338 " failure - %d\n", error);
1344 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1345 BUS_DMASYNC_PREREAD);
1346 rxbuf->addr = rxr->rx_base[i].read.pkt_addr =
1347 htole64(seg[0].ds_addr);
1349 rxr->rx_base[i].read.pkt_addr = rxbuf->addr;
1350 rxbuf->flags &= ~IXGBE_RX_COPY;
1354 /* Next is precalculated */
1356 rxr->next_to_refresh = i;
1357 if (++j == rxr->num_desc)
1361 if (refreshed) /* Update hardware tail index */
1362 IXGBE_WRITE_REG(&adapter->hw,
1363 rxr->tail, rxr->next_to_refresh);
1367 /*********************************************************************
1369 * Allocate memory for rx_buffer structures. Since we use one
1370 * rx_buffer per received packet, the maximum number of rx_buffer's
1371 * that we'll need is equal to the number of receive descriptors
1372 * that we've allocated.
1374 **********************************************************************/
1376 ixgbe_allocate_receive_buffers(struct rx_ring *rxr)
1378 struct adapter *adapter = rxr->adapter;
1379 device_t dev = adapter->dev;
1380 struct ixgbe_rx_buf *rxbuf;
1383 bsize = sizeof(struct ixgbe_rx_buf) * rxr->num_desc;
1384 if (!(rxr->rx_buffers =
1385 (struct ixgbe_rx_buf *) malloc(bsize,
1386 M_DEVBUF, M_NOWAIT | M_ZERO))) {
1387 device_printf(dev, "Unable to allocate rx_buffer memory\n");
1392 if ((error = bus_dma_tag_create(bus_get_dma_tag(dev), /* parent */
1393 1, 0, /* alignment, bounds */
1394 BUS_SPACE_MAXADDR, /* lowaddr */
1395 BUS_SPACE_MAXADDR, /* highaddr */
1396 NULL, NULL, /* filter, filterarg */
1397 MJUM16BYTES, /* maxsize */
1399 MJUM16BYTES, /* maxsegsize */
1401 NULL, /* lockfunc */
1402 NULL, /* lockfuncarg */
1404 device_printf(dev, "Unable to create RX DMA tag\n");
1408 for (int i = 0; i < rxr->num_desc; i++, rxbuf++) {
1409 rxbuf = &rxr->rx_buffers[i];
1410 error = bus_dmamap_create(rxr->ptag, 0, &rxbuf->pmap);
1412 device_printf(dev, "Unable to create RX dma map\n");
1420 /* Frees all, but can handle partial completion */
1421 ixgbe_free_receive_structures(adapter);
1427 ixgbe_free_receive_ring(struct rx_ring *rxr)
1429 struct ixgbe_rx_buf *rxbuf;
1431 for (int i = 0; i < rxr->num_desc; i++) {
1432 rxbuf = &rxr->rx_buffers[i];
1433 if (rxbuf->buf != NULL) {
1434 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1435 BUS_DMASYNC_POSTREAD);
1436 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1437 rxbuf->buf->m_flags |= M_PKTHDR;
1438 m_freem(rxbuf->buf);
1446 /*********************************************************************
1448 * Initialize a receive ring and its buffers.
1450 **********************************************************************/
1452 ixgbe_setup_receive_ring(struct rx_ring *rxr)
1454 struct adapter *adapter;
1457 struct ixgbe_rx_buf *rxbuf;
1458 bus_dma_segment_t seg[1];
1459 struct lro_ctrl *lro = &rxr->lro;
1460 int rsize, nsegs, error = 0;
1462 struct netmap_adapter *na = NA(rxr->adapter->ifp);
1463 struct netmap_slot *slot;
1464 #endif /* DEV_NETMAP */
1466 adapter = rxr->adapter;
1470 /* Clear the ring contents */
1473 /* same as in ixgbe_setup_transmit_ring() */
1474 slot = netmap_reset(na, NR_RX, rxr->me, 0);
1475 #endif /* DEV_NETMAP */
1476 rsize = roundup2(adapter->num_rx_desc *
1477 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
1478 bzero((void *)rxr->rx_base, rsize);
1479 /* Cache the size */
1480 rxr->mbuf_sz = adapter->rx_mbuf_sz;
1482 /* Free current RX buffer structs and their mbufs */
1483 ixgbe_free_receive_ring(rxr);
1485 /* Now replenish the mbufs */
1486 for (int j = 0; j != rxr->num_desc; ++j) {
1489 rxbuf = &rxr->rx_buffers[j];
1492 * In netmap mode, fill the map and set the buffer
1493 * address in the NIC ring, considering the offset
1494 * between the netmap and NIC rings (see comment in
1495 * ixgbe_setup_transmit_ring() ). No need to allocate
1496 * an mbuf, so end the block with a continue;
1499 int sj = netmap_idx_n2k(&na->rx_rings[rxr->me], j);
1503 addr = PNMB(na, slot + sj, &paddr);
1504 netmap_load_map(na, rxr->ptag, rxbuf->pmap, addr);
1505 /* Update descriptor and the cached value */
1506 rxr->rx_base[j].read.pkt_addr = htole64(paddr);
1507 rxbuf->addr = htole64(paddr);
1510 #endif /* DEV_NETMAP */
1512 rxbuf->buf = m_getjcl(M_NOWAIT, MT_DATA,
1513 M_PKTHDR, adapter->rx_mbuf_sz);
1514 if (rxbuf->buf == NULL) {
1519 mp->m_pkthdr.len = mp->m_len = rxr->mbuf_sz;
1520 /* Get the memory mapping */
1521 error = bus_dmamap_load_mbuf_sg(rxr->ptag,
1522 rxbuf->pmap, mp, seg,
1523 &nsegs, BUS_DMA_NOWAIT);
1526 bus_dmamap_sync(rxr->ptag,
1527 rxbuf->pmap, BUS_DMASYNC_PREREAD);
1528 /* Update the descriptor and the cached value */
1529 rxr->rx_base[j].read.pkt_addr = htole64(seg[0].ds_addr);
1530 rxbuf->addr = htole64(seg[0].ds_addr);
1534 /* Setup our descriptor indices */
1535 rxr->next_to_check = 0;
1536 rxr->next_to_refresh = 0;
1537 rxr->lro_enabled = FALSE;
1540 rxr->vtag_strip = FALSE;
1542 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1543 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1546 ** Now set up the LRO interface:
1548 if (ixgbe_rsc_enable)
1549 ixgbe_setup_hw_rsc(rxr);
1550 else if (ifp->if_capenable & IFCAP_LRO) {
1551 int err = tcp_lro_init(lro);
1553 device_printf(dev, "LRO Initialization failed!\n");
1556 INIT_DEBUGOUT("RX Soft LRO Initialized\n");
1557 rxr->lro_enabled = TRUE;
1558 lro->ifp = adapter->ifp;
1561 IXGBE_RX_UNLOCK(rxr);
1565 ixgbe_free_receive_ring(rxr);
1566 IXGBE_RX_UNLOCK(rxr);
1570 /*********************************************************************
1572 * Initialize all receive rings.
1574 **********************************************************************/
1576 ixgbe_setup_receive_structures(struct adapter *adapter)
1578 struct rx_ring *rxr = adapter->rx_rings;
1581 for (j = 0; j < adapter->num_queues; j++, rxr++)
1582 if (ixgbe_setup_receive_ring(rxr))
1588 * Free RX buffers allocated so far, we will only handle
1589 * the rings that completed, the failing case will have
1590 * cleaned up for itself. 'j' failed, so its the terminus.
1592 for (int i = 0; i < j; ++i) {
1593 rxr = &adapter->rx_rings[i];
1594 ixgbe_free_receive_ring(rxr);
1601 /*********************************************************************
1603 * Free all receive rings.
1605 **********************************************************************/
1607 ixgbe_free_receive_structures(struct adapter *adapter)
1609 struct rx_ring *rxr = adapter->rx_rings;
1611 INIT_DEBUGOUT("ixgbe_free_receive_structures: begin");
1613 for (int i = 0; i < adapter->num_queues; i++, rxr++) {
1614 struct lro_ctrl *lro = &rxr->lro;
1615 ixgbe_free_receive_buffers(rxr);
1616 /* Free LRO memory */
1618 /* Free the ring memory as well */
1619 ixgbe_dma_free(adapter, &rxr->rxdma);
1622 free(adapter->rx_rings, M_DEVBUF);
1626 /*********************************************************************
1628 * Free receive ring data structures
1630 **********************************************************************/
1632 ixgbe_free_receive_buffers(struct rx_ring *rxr)
1634 struct adapter *adapter = rxr->adapter;
1635 struct ixgbe_rx_buf *rxbuf;
1637 INIT_DEBUGOUT("ixgbe_free_receive_buffers: begin");
1639 /* Cleanup any existing buffers */
1640 if (rxr->rx_buffers != NULL) {
1641 for (int i = 0; i < adapter->num_rx_desc; i++) {
1642 rxbuf = &rxr->rx_buffers[i];
1643 if (rxbuf->buf != NULL) {
1644 bus_dmamap_sync(rxr->ptag, rxbuf->pmap,
1645 BUS_DMASYNC_POSTREAD);
1646 bus_dmamap_unload(rxr->ptag, rxbuf->pmap);
1647 rxbuf->buf->m_flags |= M_PKTHDR;
1648 m_freem(rxbuf->buf);
1651 if (rxbuf->pmap != NULL) {
1652 bus_dmamap_destroy(rxr->ptag, rxbuf->pmap);
1656 if (rxr->rx_buffers != NULL) {
1657 free(rxr->rx_buffers, M_DEVBUF);
1658 rxr->rx_buffers = NULL;
1662 if (rxr->ptag != NULL) {
1663 bus_dma_tag_destroy(rxr->ptag);
1670 static __inline void
1671 ixgbe_rx_input(struct rx_ring *rxr, struct ifnet *ifp, struct mbuf *m, u32 ptype)
1675 * ATM LRO is only for IP/TCP packets and TCP checksum of the packet
1676 * should be computed by hardware. Also it should not have VLAN tag in
1677 * ethernet header. In case of IPv6 we do not yet support ext. hdrs.
1679 if (rxr->lro_enabled &&
1680 (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0 &&
1681 (ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
1682 ((ptype & (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1683 (IXGBE_RXDADV_PKTTYPE_IPV4 | IXGBE_RXDADV_PKTTYPE_TCP) ||
1684 (ptype & (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) ==
1685 (IXGBE_RXDADV_PKTTYPE_IPV6 | IXGBE_RXDADV_PKTTYPE_TCP)) &&
1686 (m->m_pkthdr.csum_flags & (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) ==
1687 (CSUM_DATA_VALID | CSUM_PSEUDO_HDR)) {
1689 * Send to the stack if:
1690 ** - LRO not enabled, or
1691 ** - no LRO resources, or
1692 ** - lro enqueue fails
1694 if (rxr->lro.lro_cnt != 0)
1695 if (tcp_lro_rx(&rxr->lro, m, 0) == 0)
1698 IXGBE_RX_UNLOCK(rxr);
1699 (*ifp->if_input)(ifp, m);
1703 static __inline void
1704 ixgbe_rx_discard(struct rx_ring *rxr, int i)
1706 struct ixgbe_rx_buf *rbuf;
1708 rbuf = &rxr->rx_buffers[i];
1712 ** With advanced descriptors the writeback
1713 ** clobbers the buffer addrs, so its easier
1714 ** to just free the existing mbufs and take
1715 ** the normal refresh path to get new buffers
1719 if (rbuf->fmp != NULL) {/* Partial chain ? */
1720 rbuf->fmp->m_flags |= M_PKTHDR;
1723 rbuf->buf = NULL; /* rbuf->buf is part of fmp's chain */
1724 } else if (rbuf->buf) {
1728 bus_dmamap_unload(rxr->ptag, rbuf->pmap);
1736 /*********************************************************************
1738 * This routine executes in interrupt context. It replenishes
1739 * the mbufs in the descriptor and sends data which has been
1740 * dma'ed into host memory to upper layer.
1742 * Return TRUE for more work, FALSE for all clean.
1743 *********************************************************************/
1745 ixgbe_rxeof(struct ix_queue *que)
1747 struct adapter *adapter = que->adapter;
1748 struct rx_ring *rxr = que->rxr;
1749 struct ifnet *ifp = adapter->ifp;
1750 struct lro_ctrl *lro = &rxr->lro;
1751 struct lro_entry *queued;
1752 int i, nextp, processed = 0;
1754 u16 count = rxr->process_limit;
1755 union ixgbe_adv_rx_desc *cur;
1756 struct ixgbe_rx_buf *rbuf, *nbuf;
1762 /* Same as the txeof routine: wakeup clients on intr. */
1763 if (netmap_rx_irq(ifp, rxr->me, &processed)) {
1764 IXGBE_RX_UNLOCK(rxr);
1767 #endif /* DEV_NETMAP */
1769 for (i = rxr->next_to_check; count != 0;) {
1770 struct mbuf *sendmp, *mp;
1776 /* Sync the ring. */
1777 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1778 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
1780 cur = &rxr->rx_base[i];
1781 staterr = le32toh(cur->wb.upper.status_error);
1782 pkt_info = le16toh(cur->wb.lower.lo_dword.hs_rss.pkt_info);
1784 if ((staterr & IXGBE_RXD_STAT_DD) == 0)
1786 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
1793 cur->wb.upper.status_error = 0;
1794 rbuf = &rxr->rx_buffers[i];
1797 len = le16toh(cur->wb.upper.length);
1798 ptype = le32toh(cur->wb.lower.lo_dword.data) &
1799 IXGBE_RXDADV_PKTTYPE_MASK;
1800 eop = ((staterr & IXGBE_RXD_STAT_EOP) != 0);
1802 /* Make sure bad packets are discarded */
1803 if (eop && (staterr & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) != 0) {
1804 #if __FreeBSD_version >= 1100036
1805 if (IXGBE_IS_VF(adapter))
1806 if_inc_counter(ifp, IFCOUNTER_IERRORS, 1);
1808 rxr->rx_discarded++;
1809 ixgbe_rx_discard(rxr, i);
1814 ** On 82599 which supports a hardware
1815 ** LRO (called HW RSC), packets need
1816 ** not be fragmented across sequential
1817 ** descriptors, rather the next descriptor
1818 ** is indicated in bits of the descriptor.
1819 ** This also means that we might proceses
1820 ** more than one packet at a time, something
1821 ** that has never been true before, it
1822 ** required eliminating global chain pointers
1823 ** in favor of what we are doing here. -jfv
1827 ** Figure out the next descriptor
1830 if (rxr->hw_rsc == TRUE) {
1831 rsc = ixgbe_rsc_count(cur);
1832 rxr->rsc_num += (rsc - 1);
1834 if (rsc) { /* Get hardware index */
1836 IXGBE_RXDADV_NEXTP_MASK) >>
1837 IXGBE_RXDADV_NEXTP_SHIFT);
1838 } else { /* Just sequential */
1840 if (nextp == adapter->num_rx_desc)
1843 nbuf = &rxr->rx_buffers[nextp];
1847 ** Rather than using the fmp/lmp global pointers
1848 ** we now keep the head of a packet chain in the
1849 ** buffer struct and pass this along from one
1850 ** descriptor to the next, until we get EOP.
1854 ** See if there is a stored head
1855 ** that determines what we are
1858 if (sendmp != NULL) { /* secondary frag */
1859 rbuf->buf = rbuf->fmp = NULL;
1860 mp->m_flags &= ~M_PKTHDR;
1861 sendmp->m_pkthdr.len += mp->m_len;
1864 * Optimize. This might be a small packet,
1865 * maybe just a TCP ACK. Do a fast copy that
1866 * is cache aligned into a new mbuf, and
1867 * leave the old mbuf+cluster for re-use.
1869 if (eop && len <= IXGBE_RX_COPY_LEN) {
1870 sendmp = m_gethdr(M_NOWAIT, MT_DATA);
1871 if (sendmp != NULL) {
1873 IXGBE_RX_COPY_ALIGN;
1874 ixgbe_bcopy(mp->m_data,
1875 sendmp->m_data, len);
1876 sendmp->m_len = len;
1878 rbuf->flags |= IXGBE_RX_COPY;
1881 if (sendmp == NULL) {
1882 rbuf->buf = rbuf->fmp = NULL;
1886 /* first desc of a non-ps chain */
1887 sendmp->m_flags |= M_PKTHDR;
1888 sendmp->m_pkthdr.len = mp->m_len;
1892 /* Pass the head pointer on */
1896 mp->m_next = nbuf->buf;
1897 } else { /* Sending this frame */
1898 sendmp->m_pkthdr.rcvif = ifp;
1900 /* capture data for AIM */
1901 rxr->bytes += sendmp->m_pkthdr.len;
1902 rxr->rx_bytes += sendmp->m_pkthdr.len;
1903 /* Process vlan info */
1904 if ((rxr->vtag_strip) &&
1905 (staterr & IXGBE_RXD_STAT_VP))
1906 vtag = le16toh(cur->wb.upper.vlan);
1908 sendmp->m_pkthdr.ether_vtag = vtag;
1909 sendmp->m_flags |= M_VLANTAG;
1911 if ((ifp->if_capenable & IFCAP_RXCSUM) != 0)
1912 ixgbe_rx_checksum(staterr, sendmp, ptype);
1913 #if __FreeBSD_version >= 800000
1915 sendmp->m_pkthdr.flowid =
1916 le32toh(cur->wb.lower.hi_dword.rss);
1917 #if __FreeBSD_version < 1100054
1918 sendmp->m_flags |= M_FLOWID;
1920 switch (pkt_info & IXGBE_RXDADV_RSSTYPE_MASK) {
1921 case IXGBE_RXDADV_RSSTYPE_IPV4_TCP:
1922 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV4);
1924 case IXGBE_RXDADV_RSSTYPE_IPV4:
1925 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV4);
1927 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP:
1928 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6);
1930 case IXGBE_RXDADV_RSSTYPE_IPV6_EX:
1931 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6_EX);
1933 case IXGBE_RXDADV_RSSTYPE_IPV6:
1934 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_IPV6);
1936 case IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX:
1937 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_TCP_IPV6_EX);
1939 case IXGBE_RXDADV_RSSTYPE_IPV4_UDP:
1940 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV4);
1942 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP:
1943 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6);
1945 case IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX:
1946 M_HASHTYPE_SET(sendmp, M_HASHTYPE_RSS_UDP_IPV6_EX);
1949 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1952 sendmp->m_pkthdr.flowid = que->msix;
1953 #if __FreeBSD_version >= 1100054
1954 M_HASHTYPE_SET(sendmp, M_HASHTYPE_OPAQUE);
1956 sendmp->m_flags |= M_FLOWID;
1959 #endif /* FreeBSD_version */
1962 bus_dmamap_sync(rxr->rxdma.dma_tag, rxr->rxdma.dma_map,
1963 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1965 /* Advance our pointers to the next descriptor. */
1966 if (++i == rxr->num_desc)
1969 /* Now send to the stack or do LRO */
1970 if (sendmp != NULL) {
1971 rxr->next_to_check = i;
1972 ixgbe_rx_input(rxr, ifp, sendmp, ptype);
1973 i = rxr->next_to_check;
1976 /* Every 8 descriptors we go to refresh mbufs */
1977 if (processed == 8) {
1978 ixgbe_refresh_mbufs(rxr, i);
1983 /* Refresh any remaining buf structs */
1984 if (ixgbe_rx_unrefreshed(rxr))
1985 ixgbe_refresh_mbufs(rxr, i);
1987 rxr->next_to_check = i;
1990 * Flush any outstanding LRO work
1992 while ((queued = SLIST_FIRST(&lro->lro_active)) != NULL) {
1993 SLIST_REMOVE_HEAD(&lro->lro_active, next);
1994 tcp_lro_flush(lro, queued);
1997 IXGBE_RX_UNLOCK(rxr);
2000 ** Still have cleaning to do?
2002 if ((staterr & IXGBE_RXD_STAT_DD) != 0)
2009 /*********************************************************************
2011 * Verify that the hardware indicated that the checksum is valid.
2012 * Inform the stack about the status of checksum so that stack
2013 * doesn't spend time verifying the checksum.
2015 *********************************************************************/
2017 ixgbe_rx_checksum(u32 staterr, struct mbuf * mp, u32 ptype)
2019 u16 status = (u16) staterr;
2020 u8 errors = (u8) (staterr >> 24);
2023 if ((ptype & IXGBE_RXDADV_PKTTYPE_ETQF) == 0 &&
2024 (ptype & IXGBE_RXDADV_PKTTYPE_SCTP) != 0)
2027 if (status & IXGBE_RXD_STAT_IPCS) {
2028 if (!(errors & IXGBE_RXD_ERR_IPE)) {
2029 /* IP Checksum Good */
2030 mp->m_pkthdr.csum_flags = CSUM_IP_CHECKED;
2031 mp->m_pkthdr.csum_flags |= CSUM_IP_VALID;
2034 mp->m_pkthdr.csum_flags = 0;
2036 if (status & IXGBE_RXD_STAT_L4CS) {
2037 u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2038 #if __FreeBSD_version >= 800000
2040 type = CSUM_SCTP_VALID;
2042 if (!(errors & IXGBE_RXD_ERR_TCPE)) {
2043 mp->m_pkthdr.csum_flags |= type;
2045 mp->m_pkthdr.csum_data = htons(0xffff);
2051 /********************************************************************
2052 * Manage DMA'able memory.
2053 *******************************************************************/
2055 ixgbe_dmamap_cb(void *arg, bus_dma_segment_t * segs, int nseg, int error)
2059 *(bus_addr_t *) arg = segs->ds_addr;
2064 ixgbe_dma_malloc(struct adapter *adapter, bus_size_t size,
2065 struct ixgbe_dma_alloc *dma, int mapflags)
2067 device_t dev = adapter->dev;
2070 r = bus_dma_tag_create(bus_get_dma_tag(adapter->dev), /* parent */
2071 DBA_ALIGN, 0, /* alignment, bounds */
2072 BUS_SPACE_MAXADDR, /* lowaddr */
2073 BUS_SPACE_MAXADDR, /* highaddr */
2074 NULL, NULL, /* filter, filterarg */
2077 size, /* maxsegsize */
2078 BUS_DMA_ALLOCNOW, /* flags */
2079 NULL, /* lockfunc */
2080 NULL, /* lockfuncarg */
2083 device_printf(dev,"ixgbe_dma_malloc: bus_dma_tag_create failed; "
2087 r = bus_dmamem_alloc(dma->dma_tag, (void **)&dma->dma_vaddr,
2088 BUS_DMA_NOWAIT, &dma->dma_map);
2090 device_printf(dev,"ixgbe_dma_malloc: bus_dmamem_alloc failed; "
2094 r = bus_dmamap_load(dma->dma_tag, dma->dma_map, dma->dma_vaddr,
2098 mapflags | BUS_DMA_NOWAIT);
2100 device_printf(dev,"ixgbe_dma_malloc: bus_dmamap_load failed; "
2104 dma->dma_size = size;
2107 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2109 bus_dma_tag_destroy(dma->dma_tag);
2111 dma->dma_tag = NULL;
2116 ixgbe_dma_free(struct adapter *adapter, struct ixgbe_dma_alloc *dma)
2118 bus_dmamap_sync(dma->dma_tag, dma->dma_map,
2119 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2120 bus_dmamap_unload(dma->dma_tag, dma->dma_map);
2121 bus_dmamem_free(dma->dma_tag, dma->dma_vaddr, dma->dma_map);
2122 bus_dma_tag_destroy(dma->dma_tag);
2126 /*********************************************************************
2128 * Allocate memory for the transmit and receive rings, and then
2129 * the descriptors associated with each, called only once at attach.
2131 **********************************************************************/
2133 ixgbe_allocate_queues(struct adapter *adapter)
2135 device_t dev = adapter->dev;
2136 struct ix_queue *que;
2137 struct tx_ring *txr;
2138 struct rx_ring *rxr;
2139 int rsize, tsize, error = IXGBE_SUCCESS;
2140 int txconf = 0, rxconf = 0;
2142 enum ixgbe_iov_mode iov_mode;
2145 /* First allocate the top level queue structs */
2146 if (!(adapter->queues =
2147 (struct ix_queue *) malloc(sizeof(struct ix_queue) *
2148 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2149 device_printf(dev, "Unable to allocate queue memory\n");
2154 /* First allocate the TX ring struct memory */
2155 if (!(adapter->tx_rings =
2156 (struct tx_ring *) malloc(sizeof(struct tx_ring) *
2157 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2158 device_printf(dev, "Unable to allocate TX ring memory\n");
2163 /* Next allocate the RX */
2164 if (!(adapter->rx_rings =
2165 (struct rx_ring *) malloc(sizeof(struct rx_ring) *
2166 adapter->num_queues, M_DEVBUF, M_NOWAIT | M_ZERO))) {
2167 device_printf(dev, "Unable to allocate RX ring memory\n");
2172 /* For the ring itself */
2173 tsize = roundup2(adapter->num_tx_desc *
2174 sizeof(union ixgbe_adv_tx_desc), DBA_ALIGN);
2177 iov_mode = ixgbe_get_iov_mode(adapter);
2178 adapter->pool = ixgbe_max_vfs(iov_mode);
2183 * Now set up the TX queues, txconf is needed to handle the
2184 * possibility that things fail midcourse and we need to
2185 * undo memory gracefully
2187 for (int i = 0; i < adapter->num_queues; i++, txconf++) {
2188 /* Set up some basics */
2189 txr = &adapter->tx_rings[i];
2190 txr->adapter = adapter;
2192 txr->me = ixgbe_pf_que_index(iov_mode, i);
2196 txr->num_desc = adapter->num_tx_desc;
2198 /* Initialize the TX side lock */
2199 snprintf(txr->mtx_name, sizeof(txr->mtx_name), "%s:tx(%d)",
2200 device_get_nameunit(dev), txr->me);
2201 mtx_init(&txr->tx_mtx, txr->mtx_name, NULL, MTX_DEF);
2203 if (ixgbe_dma_malloc(adapter, tsize,
2204 &txr->txdma, BUS_DMA_NOWAIT)) {
2206 "Unable to allocate TX Descriptor memory\n");
2210 txr->tx_base = (union ixgbe_adv_tx_desc *)txr->txdma.dma_vaddr;
2211 bzero((void *)txr->tx_base, tsize);
2213 /* Now allocate transmit buffers for the ring */
2214 if (ixgbe_allocate_transmit_buffers(txr)) {
2216 "Critical Failure setting up transmit buffers\n");
2220 #ifndef IXGBE_LEGACY_TX
2221 /* Allocate a buf ring */
2222 txr->br = buf_ring_alloc(IXGBE_BR_SIZE, M_DEVBUF,
2223 M_WAITOK, &txr->tx_mtx);
2224 if (txr->br == NULL) {
2226 "Critical Failure setting up buf ring\n");
2234 * Next the RX queues...
2236 rsize = roundup2(adapter->num_rx_desc *
2237 sizeof(union ixgbe_adv_rx_desc), DBA_ALIGN);
2238 for (int i = 0; i < adapter->num_queues; i++, rxconf++) {
2239 rxr = &adapter->rx_rings[i];
2240 /* Set up some basics */
2241 rxr->adapter = adapter;
2243 rxr->me = ixgbe_pf_que_index(iov_mode, i);
2247 rxr->num_desc = adapter->num_rx_desc;
2249 /* Initialize the RX side lock */
2250 snprintf(rxr->mtx_name, sizeof(rxr->mtx_name), "%s:rx(%d)",
2251 device_get_nameunit(dev), rxr->me);
2252 mtx_init(&rxr->rx_mtx, rxr->mtx_name, NULL, MTX_DEF);
2254 if (ixgbe_dma_malloc(adapter, rsize,
2255 &rxr->rxdma, BUS_DMA_NOWAIT)) {
2257 "Unable to allocate RxDescriptor memory\n");
2261 rxr->rx_base = (union ixgbe_adv_rx_desc *)rxr->rxdma.dma_vaddr;
2262 bzero((void *)rxr->rx_base, rsize);
2264 /* Allocate receive buffers for the ring*/
2265 if (ixgbe_allocate_receive_buffers(rxr)) {
2267 "Critical Failure setting up receive buffers\n");
2274 ** Finally set up the queue holding structs
2276 for (int i = 0; i < adapter->num_queues; i++) {
2277 que = &adapter->queues[i];
2278 que->adapter = adapter;
2280 que->txr = &adapter->tx_rings[i];
2281 que->rxr = &adapter->rx_rings[i];
2287 for (rxr = adapter->rx_rings; rxconf > 0; rxr++, rxconf--)
2288 ixgbe_dma_free(adapter, &rxr->rxdma);
2290 for (txr = adapter->tx_rings; txconf > 0; txr++, txconf--)
2291 ixgbe_dma_free(adapter, &txr->txdma);
2292 free(adapter->rx_rings, M_DEVBUF);
2294 free(adapter->tx_rings, M_DEVBUF);
2296 free(adapter->queues, M_DEVBUF);